-
Notifications
You must be signed in to change notification settings - Fork 84
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
the executable codes for caelus
- Loading branch information
0 parents
commit 79ea1bf
Showing
279 changed files
with
40,231 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
.idea/ | ||
_output/ |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
.PHONY: build | ||
build: format | ||
./hack/build.sh | ||
|
||
.PHONY: format | ||
format: | ||
./hack/format.sh | ||
|
||
.PHONY: test | ||
test: | ||
./hack/test.sh | ||
|
||
.PHONY: clean | ||
clean: | ||
./hack/clean.sh | ||
|
||
.PHONY: image | ||
image: build | ||
./hack/image.sh |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
## Caelus | ||
[![GitHub license](https://img.shields.io/badge/license-Apache--2.0-brightgreen)](https://github.com/Tencent/caelus/blob/master/LICENSE) | ||
[![Release](https://img.shields.io/github/v/release/Tencent/caelus.svg)](https://github.com/Tencent/caelus/releases) | ||
[![PRs Welcome](https://img.shields.io/badge/PRs-welcome-brightgreen.svg)](https://github.com/Tencent/caelus/pulls) | ||
|
||
Caelus is a set of Kubernetes solutions for reusing idle resources of nodes by running extra batch jobs, these resources come from | ||
the underutilization of online jobs, especially during low traffic periods. To make batch jobs compatible with online jobs, | ||
caelus dynamically manages multiple resource isolation mechanisms and also checks abnormalities of various metrics. | ||
Batch jobs will be throttled or even killed if interference detected. | ||
|
||
## Features | ||
|
||
* Collect various metrics, including node resources, cgroup resources and online jobs latency | ||
|
||
* Batch jobs could be running on YARN or Kubernetes | ||
|
||
* Predict total resource usages of the node, including online jobs and kernel modules, such as slab | ||
|
||
* Dynamically manage multiple resource isolation mechanisms, such as CPU, memory, and disk space | ||
|
||
* Dynamically check abnormalities of various metrics, such as CPU usage or online jobs latency | ||
|
||
* Throttle or even kill batch jobs when resource pressure or latency spike detected | ||
|
||
* Prometheus metrics supported | ||
|
||
* Alarm supported | ||
|
||
## Usage | ||
|
||
Find more usage at [Tutorial.md](doc/tutorial.md). The project also have two attached tools: | ||
|
||
#### nm_operator | ||
|
||
[nm_operator](doc/nm_operator.md) is used to execute YARN commands in the way of remote API. | ||
|
||
#### metric_adapter | ||
|
||
[metric_adapter](doc/metric_adapter.md) is used to collect more application metrics with adapter extension. | ||
|
||
|
||
## Getting started | ||
|
||
### build | ||
|
||
``` sh | ||
# binary build, which generates binary under _output/bin/ | ||
$ make build | ||
|
||
# image build | ||
$ make image | ||
|
||
# run unit test | ||
$ make test | ||
``` | ||
|
||
### Run | ||
|
||
```sh | ||
# running in script | ||
$ caelus --config=hack/config/caelus.json --hostname-override=xxx --v=2 | ||
|
||
# running in image | ||
$ kubectl create -f hack/yaml/caelus.json | ||
$ kubectl label node colation=true | ||
$ kubectl -n kube-system get daemonset | ||
``` | ||
|
||
## Contributing | ||
For more information about contributing issues or pull requests, see our [Contributing to Caelus](doc/contributing.md). | ||
|
||
## License | ||
Caelus is under the Apache License 2.0. See the [License](LICENSE) file for details. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,128 @@ | ||
/* | ||
* Copyright (c) 2021 THL A29 Limited, a Tencent company. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* | ||
* You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package app | ||
|
||
import ( | ||
"flag" | ||
"fmt" | ||
"net" | ||
"net/http" | ||
"net/http/pprof" | ||
"time" | ||
|
||
"github.com/tencent/caelus/cmd/caelus/context" | ||
"github.com/tencent/caelus/pkg/caelus/metrics" | ||
"github.com/tencent/caelus/pkg/caelus/metrics/outer/serverrequest" | ||
"github.com/tencent/caelus/pkg/caelus/metrics/outer/textfile" | ||
"github.com/tencent/caelus/pkg/caelus/statestore" | ||
"github.com/tencent/caelus/pkg/version/verflag" | ||
|
||
"github.com/prometheus/client_golang/prometheus" | ||
"github.com/prometheus/client_golang/prometheus/promhttp" | ||
"github.com/spf13/pflag" | ||
"k8s.io/klog" | ||
) | ||
|
||
// ApiOption describe API related data | ||
type ApiOption struct { | ||
// Profiling enable pprof debug | ||
Profiling bool | ||
InsecureAddress string | ||
InsecurePort string | ||
// statsMetric describe metrics which need to show in prometheus mode | ||
statsMetric map[metrics.StatsMetric]interface{} | ||
// prometheusRegistry used for register prometheus metrics | ||
prometheusRegistry *prometheus.Registry | ||
} | ||
|
||
// AddFlags describe API related flags | ||
func (a *ApiOption) AddFlags(fs *pflag.FlagSet) { | ||
fs.BoolVar(&a.Profiling, "profiling", true, | ||
"Enable profiling via web interface host:port/debug/pprof/. Default is true") | ||
fs.StringVar(&a.InsecureAddress, "insecure-bind-address", "127.0.0.1", | ||
"The IP address on which to serve the --insecure-port. Defaults to localhost") | ||
fs.StringVar(&a.InsecurePort, "insecure-port", "10030", | ||
"The port on which to serve unsecured, unauthenticated access. Default 10030") | ||
} | ||
|
||
// Init function init metrics collecting manager | ||
func (a *ApiOption) Init(ctx *context.CaelusContext) error { | ||
a.statsMetric = make(map[metrics.StatsMetric]interface{}) | ||
a.prometheusRegistry = prometheus.NewRegistry() | ||
return nil | ||
} | ||
|
||
// Run main loop, now nothing to do | ||
func (a *ApiOption) Run(stopCh <-chan struct{}) error { | ||
return nil | ||
} | ||
|
||
func (a *ApiOption) loadStatsMetric(name metrics.StatsMetric, stat interface{}) { | ||
a.statsMetric[name] = stat | ||
} | ||
|
||
// RegisterServer register API route | ||
func (a *ApiOption) RegisterServer(node string) error { | ||
// start server | ||
mux := http.NewServeMux() | ||
if a.Profiling { | ||
mux.HandleFunc("/debug/pprof/", pprof.Index) | ||
mux.HandleFunc("/debug/pprof/profile", pprof.Profile) | ||
mux.HandleFunc("/debug/pprof/symbol", pprof.Symbol) | ||
} | ||
|
||
// register prometheus metrics | ||
metrics.RegisterTotalMetrics(a.prometheusRegistry) | ||
textfile.RegisterTextFileMetrics(a.prometheusRegistry) | ||
stStore := a.statsMetric[metrics.StatsMetricStore].(statestore.StateStore) | ||
serverrequest.RegisterRequestMetrics(a.prometheusRegistry, stStore) | ||
a.prometheusRegistry.MustRegister(metrics.NewPrometheusCollector(node, a.statsMetric, | ||
metrics.ResourceMetricsConfig())) | ||
mux.Handle("/metrics", promhttp.HandlerFor(a.prometheusRegistry, promhttp.HandlerOpts{ | ||
ErrorHandling: promhttp.ContinueOnError})) | ||
|
||
// register version api | ||
mux.HandleFunc("/version", verflag.RequestVersion) | ||
|
||
mux.HandleFunc("/klog", func(w http.ResponseWriter, r *http.Request) { | ||
v := r.URL.Query().Get("v") | ||
if v != "" { | ||
flag.Lookup("v").Value.Set(v) | ||
klog.Infof("set log level to %v", v) | ||
} | ||
}) | ||
|
||
handler := http.TimeoutHandler(mux, time.Duration(1*time.Minute), "time out") | ||
httpServer := &http.Server{ | ||
Handler: handler, | ||
MaxHeaderBytes: 1 << 20, | ||
} | ||
|
||
insecureLocation := net.JoinHostPort(a.InsecureAddress, a.InsecurePort) | ||
listener, err := net.Listen("tcp", insecureLocation) | ||
if err != nil { | ||
return fmt.Errorf("listen(%s) err: %v", insecureLocation, err) | ||
} | ||
|
||
go func() { | ||
err = httpServer.Serve(listener) | ||
if err != nil { | ||
err = fmt.Errorf("start listen server err: %v", err) | ||
} | ||
}() | ||
|
||
return err | ||
} |
Oops, something went wrong.