Skip to content
This repository has been archived by the owner on Jun 13, 2023. It is now read-only.

Commit

Permalink
Add a way to check if the dns is ready (#81)
Browse files Browse the repository at this point in the history
* readiness: add a way to check if the dns is ready
* dns: include a prometheus counter
  • Loading branch information
JulienBalestra authored Jul 2, 2018
1 parent a7d20c4 commit 24008b0
Show file tree
Hide file tree
Showing 311 changed files with 40,671 additions and 96 deletions.
5 changes: 0 additions & 5 deletions .ci/sonobuoy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,6 @@ tar -xzvf sonobuoy.tar.gz
rm -v sonobuoy.tar.gz

set +e
while true
do
kubectl get po kube-controller-manager -n kube-system -o json | jq -re '. | select(.status.phase=="Running")' && break
sleep 5
done

./sonobuoy run --mode Quick --skip-preflight || exit $?

Expand Down
2 changes: 1 addition & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ jobs:

- run:
name: run
command: sudo ./pupernetes daemon run sandbox/ --job-type systemd -v 5 --kubectl-link /usr/local/bin/kubectl --bind-address 0.0.0.0:8989 --kubeconfig-path /home/circleci/.kube/config
command: sudo ./pupernetes daemon run sandbox/ --job-type systemd -v 5 --kubectl-link /usr/local/bin/kubectl --bind-address 0.0.0.0:8989 --kubeconfig-path /home/circleci/.kube/config --dns-check

- run:
name: kubectl
Expand Down
35 changes: 23 additions & 12 deletions Gopkg.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions LICENSE-3rdparty.csv
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ core,"github.com/inconshreveable/mousetrap",Apache-2.0
core,"github.com/json-iterator/go",MIT
core,"github.com/magiconair/properties",FreeBSD
core,"github.com/matttproud/golang_protobuf_extensions/pbutil",Apache-2.0
core,"github.com/miekg/dns",NewBSD
core,"github.com/mitchellh/go-homedir",MIT
core,"github.com/mitchellh/mapstructure",MIT
core,"github.com/modern-go/concurrent",Apache-2.0
Expand Down Expand Up @@ -83,11 +84,18 @@ core,"github.com/spf13/pflag",NewBSD
core,"github.com/spf13/viper",MIT
core,"github.com/stretchr/testify/assert",MIT
core,"github.com/stretchr/testify/require",MIT
core,"golang.org/x/crypto/ed25519",NewBSD
core,"golang.org/x/crypto/ed25519/internal/edwards25519",NewBSD
core,"golang.org/x/crypto/ssh/terminal",NewBSD
core,"golang.org/x/net/bpf",NewBSD
core,"golang.org/x/net/context",NewBSD
core,"golang.org/x/net/http2",NewBSD
core,"golang.org/x/net/http2/hpack",NewBSD
core,"golang.org/x/net/idna",NewBSD
core,"golang.org/x/net/internal/iana",NewBSD
core,"golang.org/x/net/internal/socket",NewBSD
core,"golang.org/x/net/ipv4",NewBSD
core,"golang.org/x/net/ipv6",NewBSD
core,"golang.org/x/net/lex/httplex",NewBSD
core,"golang.org/x/sys/unix",NewBSD
core,"golang.org/x/sys/windows",NewBSD
Expand Down
34 changes: 24 additions & 10 deletions cmd/cli/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,24 +78,27 @@ func NewCommand() (*cobra.Command, *int) {
Args: cobra.ExactArgs(1), // basePathDirectory
Example: fmt.Sprintf(`
# Setup and run the environment with the default options:
%s run state/
%s run /opt/state/
# Clean all the environment, setup and run the environment:
%s run state/ -c all
%s run /opt/state/ -c all
# Clean everything but the binaries, setup and run the environment:
%s run state/ -c etcd,kubectl,kubelet,manifests,network,secrets,systemd,mounts
%s run /opt/state/ -c etcd,kubectl,kubelet,manifests,network,secrets,systemd,mounts
# Setup and run the environment with a 5 minutes timeout:
%s run state/ --timeout 5m
%s run /opt/state/ --run-timeout 5m
# Setup and run the environment, then guarantee a kubelet garbage collection during the drain phase:
%s run state/ --gc 1m
%s run /opt/state/ --gc 1m
# Setup and run the environment as a systemd service:
# Get logs with "journalctl -o cat -efu %s"
# Get status with "systemctl status %s --no-pager"
%s run state/ --%s %s
%s run /opt/state/ --%s %s
# Setup and run the environment with a readiness on dns:
%s run /opt/state/ --dns-check --dns-queries quay.io.,coredns.kube-system.svc.cluster.local.
`,
daemonName,
daemonName,
Expand All @@ -107,6 +110,7 @@ func NewCommand() (*cobra.Command, *int) {
daemonName,
config.JobTypeKey,
config.JobSystemd,
daemonName,
),
Run: func(cmd *cobra.Command, args []string) {
// Manage self start in systemd
Expand Down Expand Up @@ -138,7 +142,11 @@ func NewCommand() (*cobra.Command, *int) {
exitCode = 1
return
}
r, err := run.NewRunner(env, config.ViperConfig.GetDuration("run-timeout"), config.ViperConfig.GetDuration("gc"))
var dnsQuery []string
if config.ViperConfig.GetBool("dns-check") {
dnsQuery = config.ViperConfig.GetStringSlice("dns-queries")
}
r, err := run.NewRunner(env, config.ViperConfig.GetDuration("run-timeout"), config.ViperConfig.GetDuration("gc"), dnsQuery)
if err != nil {
exitCode = 2
return
Expand All @@ -158,13 +166,13 @@ func NewCommand() (*cobra.Command, *int) {
Args: cobra.ExactArgs(1), // basePathDirectory
Example: fmt.Sprintf(`
# Clean the environment default:
%s clean state/
%s clean /opt/state/
# Clean everything:
%s clean state/ -c all
%s clean /opt/state/ -c all
# Clean the etcd data-dir, the network configuration and the secrets:
%s clean state/ -c etcd,network,secrets
%s clean /opt/state/ -c etcd,network,secrets
`,
daemonName,
daemonName,
Expand Down Expand Up @@ -318,6 +326,12 @@ func NewCommand() (*cobra.Command, *int) {
runCommand.PersistentFlags().String(config.JobTypeKey, config.ViperConfig.GetString(config.JobTypeKey), fmt.Sprintf("type of job: %s or %s", config.JobForeground, config.JobSystemd))
config.ViperConfig.BindPFlag(config.JobTypeKey, runCommand.PersistentFlags().Lookup(config.JobTypeKey))

runCommand.PersistentFlags().StringSlice("dns-queries", config.ViperConfig.GetStringSlice("dns-queries"), "dns queries for readiness, coma-separated values")
config.ViperConfig.BindPFlag("dns-queries", runCommand.PersistentFlags().Lookup("dns-queries"))

runCommand.PersistentFlags().Bool("dns-check", config.ViperConfig.GetBool("dns-check"), "needed dns queries to notify readiness")
config.ViperConfig.BindPFlag("dns-check", runCommand.PersistentFlags().Lookup("dns-check"))

// Reset
rootCommand.AddCommand(resetCommand)
resetCommand.PersistentFlags().String("api-address", config.ViperConfig.GetString("api-address"), fmt.Sprintf("address for the %s API ip:port", programName))
Expand Down
1 change: 1 addition & 0 deletions docs/metrics.csv
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ name,type,help
"process_resident_memory_bytes","GAUGE","Resident memory size in bytes."
"process_start_time_seconds","GAUGE","Start time of the process since unix epoch in seconds."
"process_virtual_memory_bytes","GAUGE","Virtual memory size in bytes."
"pupernetes_dns_failures","COUNTER","Total number of dns query failures"
"pupernetes_kubelet_api_pods_running","GAUGE","Number of kubelet API pods running"
"pupernetes_kubelet_logs_pods_running","GAUGE","Number of kubelet logs pods running"
"pupernetes_kubelet_probe_failures","COUNTER","Total number of kubelet probe failures"
Expand Down
6 changes: 3 additions & 3 deletions docs/pupernetes_daemon_clean.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ pupernetes daemon clean [directory] [flags]
```
# Clean the environment default:
pupernetes daemon clean state/
pupernetes daemon clean /opt/state/
# Clean everything:
pupernetes daemon clean state/ -c all
pupernetes daemon clean /opt/state/ -c all
# Clean the etcd data-dir, the network configuration and the secrets:
pupernetes daemon clean state/ -c etcd,network,secrets
pupernetes daemon clean /opt/state/ -c etcd,network,secrets
```

Expand Down
17 changes: 11 additions & 6 deletions docs/pupernetes_daemon_run.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,31 +15,36 @@ pupernetes daemon run [directory] [flags]
```
# Setup and run the environment with the default options:
pupernetes daemon run state/
pupernetes daemon run /opt/state/
# Clean all the environment, setup and run the environment:
pupernetes daemon run state/ -c all
pupernetes daemon run /opt/state/ -c all
# Clean everything but the binaries, setup and run the environment:
pupernetes daemon run state/ -c etcd,kubectl,kubelet,manifests,network,secrets,systemd,mounts
pupernetes daemon run /opt/state/ -c etcd,kubectl,kubelet,manifests,network,secrets,systemd,mounts
# Setup and run the environment with a 5 minutes timeout:
pupernetes daemon run state/ --timeout 5m
pupernetes daemon run /opt/state/ --run-timeout 5m
# Setup and run the environment, then guarantee a kubelet garbage collection during the drain phase:
pupernetes daemon run state/ --gc 1m
pupernetes daemon run /opt/state/ --gc 1m
# Setup and run the environment as a systemd service:
# Get logs with "journalctl -o cat -efu pupernetes"
# Get status with "systemctl status pupernetes --no-pager"
pupernetes daemon run state/ --job-type systemd
pupernetes daemon run /opt/state/ --job-type systemd
# Setup and run the environment with a readiness on dns:
pupernetes daemon run /opt/state/ --dns-check --dns-queries quay.io.,coredns.kube-system.svc.cluster.local.
```

### Options

```
--bind-address string bind address for pupernetes API ip:port (default "127.0.0.1:8989")
--dns-check needed dns queries to notify readiness
--dns-queries stringSlice dns queries for readiness, coma-separated values (default [coredns.kube-system.svc.cluster.local.])
-d, --drain string drain options after run: iptables,kubeletgc,pods,all,none (default "all")
--gc duration grace period for the kubelet GC trigger when draining run, no-op if not draining (default 1m0s)
-h, --help help for run
Expand Down
2 changes: 2 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,6 @@ func init() {
ViperConfig.SetDefault("wait-timeout", time.Minute*15)
ViperConfig.SetDefault("client-timeout", time.Minute*1)
ViperConfig.SetDefault("kubeconfig-path", "")
ViperConfig.SetDefault("dns-queries", []string{"coredns.kube-system.svc.cluster.local."})
ViperConfig.SetDefault("dns-check", false)
}
23 changes: 0 additions & 23 deletions pkg/run/kubectl.go

This file was deleted.

74 changes: 74 additions & 0 deletions pkg/run/readiness.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Unless explicitly stated otherwise all files in this repository are licensed
// under the Apache License Version 2.0.
// This product includes software developed at Datadog (https://www.datadoghq.com/).
// Copyright 2018 Datadog, Inc.

package run

import (
"fmt"
"github.com/golang/glog"
"github.com/miekg/dns"
"os/exec"
"strings"
"time"
)

func (r *Runtime) applyManifests() error {
if r.state.IsKubectlApplied() {
glog.V(5).Infof("Kubectl is already applied")
return nil
}
glog.Infof("Calling kubectl apply -f %s ...", r.env.GetManifestsPathToApply())
b, err := exec.Command(r.env.GetHyperkubePath(), "kubectl", "--kubeconfig", r.env.GetKubeconfigInsecurePath(), "apply", "-f", r.env.GetManifestsPathToApply()).CombinedOutput()
output := string(b)
if err != nil {
glog.Errorf("Cannot apply manifests %v:\n%s", err, output)
return err
}
glog.V(2).Infof("Successfully applied manifests:\n%s", output)
r.state.SetKubectlApplied()
return nil
}

func (r *Runtime) checkInClusterDNS() error {
if r.dnsQueriesForReadiness == nil {
glog.V(2).Infof("No dns query supplied for readiness condition, skipping")
return nil
}
c := dns.Client{Timeout: time.Millisecond * 500}
for _, query := range r.dnsQueriesForReadiness {
if !strings.HasSuffix(query, ".") {
// dns: domain must be fully qualified
query = query + "."
}
dnsMessage := &dns.Msg{}
dnsMessage.SetQuestion(query, dns.TypeA)
resp, _, err := c.Exchange(dnsMessage, r.env.GetDNSClusterIP()+":53")
if err != nil {
glog.V(4).Infof("Cannot run DNS query: %v", err)
// err message can be like:
// - read udp 192.168.1.12:60449->192.168.254.2:53: i/o timeout
// - write udp 192.168.1.12:42766->192.168.254.2:53: write: operation not permitted
i := strings.Index(err.Error(), "->")
if i == -1 {
// log all messages if the basic parsing failed,
// this is not ideal but enough for this use case
i = 0
glog.Warningf("DNS error: %v, this is blocking the readiness", err)
}
r.state.SetDNSLastError(fmt.Sprintf("query %s %s", query, err.Error()[i:]))
return err
}
if len(resp.Answer) == 0 {
r.state.SetDNSLastError("No DNS results for " + query)
return err
}
var dnsResults []string
for _, ans := range resp.Answer {
dnsResults = append(dnsResults, strings.Replace(ans.String(), "\t", " ", -1))
}
glog.V(2).Infof("DNS query: %s", strings.Join(dnsResults, " "))
}
return nil
}
Loading

0 comments on commit 24008b0

Please sign in to comment.