-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcheck_proc_timed_hang
executable file
·119 lines (102 loc) · 3.85 KB
/
check_proc_timed_hang
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/python3
# Copyright (c) 2019 SUSE LLC All rights reserved.
#
# check_proc_timed_hang is free software: you can redistribute it
# and/or modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation, version 2 of
# the License.
#
# check_proc_timed_hang is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
# of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with check_proc_timed_hang. If not, see
# <http://www.gnu.org/licenses/>.
#
"""
usage: check_proc_timed_hang -h | --help
check_proc_timed_hang -p <process> [-k <kill_signal>] [-t <run_time_allowance>]
options:
-h --help
Show help
-p --process-name <process>
The name of the process to be monitored
-k --kill-signal <kill_signal>
If provided the monitored process will be killed with the
provided signal
-t --run-time-allowance <run_time_allowance>
The runtime allotted for the process. Specify a numeric value followed
by s (for seconds), m (for minutes), h (for hours), or d (for days).
A value of 10m would expect the process to run for 10m. If the process
is still running after 10m an attempt will be made to kill it with
the given signal if provided. If either the kill attempt fails or
no kill instruction is provided the process will be marked as critical.
The default allotted runtime is 1 hour. [default: 1h]
"""
import datetime
import dateutil.relativedelta
import os
import sys
import time
import psutil
from docopt import docopt
# Command line processing
command_args = docopt(__doc__)
kill_signal = command_args.get('--kill-signal')
process_name = command_args.get('--process-name')
allowed_runtime = command_args.get('--run-time-allowance')
# Nagios states
OK = 0
WARNING = 1
CRITICAL = 2
UNKNOWN = 3
def get_process_by_name(process_name):
processes = []
for process in psutil.process_iter():
try:
pinfo = process.as_dict(attrs=['pid', 'ppid', 'name', 'cmdline', 'create_time'])
except psutil.NoSuchProcess:
pass
else:
if process_name in " ".join(pinfo['cmdline']) and pinfo['pid'] != os.getpid():
processes.append(pinfo)
if len(processes) > 1:
print(
"Multiple processes found: " + ", ".join(
map(
lambda p: '"%s" (pid=%s)' % (" ".join(p['cmdline']), p["pid"]),
processes
)
)
)
sys.exit(CRITICAL)
return next(iter(processes), None)
process_info = get_process_by_name(process_name)
if not process_info:
print('Process %s is not running' % process_name)
sys.exit(OK)
create_time = datetime.datetime.fromtimestamp(process_info['create_time'])
delta = {'s': 'seconds', 'm': 'minutes', 'h': 'hours'}.get(allowed_runtime[-1], 'days')
expire_time = create_time + dateutil.relativedelta.relativedelta(**{delta: +int(allowed_runtime[:-1])})
if expire_time < datetime.datetime.now():
# We have exceeded the specified runtime allowance
if kill_signal:
os.system('kill -%s %s' % (kill_signal, process_info['pid']))
time.sleep(1)
if psutil.pid_exists(process_info['pid']):
msg = 'Process "%s" still running after kill attempt'
print(msg % process_name)
sys.exit(CRITICAL)
else:
msg = 'Process "%s" successfully removed'
print(msg % process_name)
sys.exit(OK)
msg = 'Process "%s" exceeded expiration time "%s"'
print(msg % (process_name, expire_time))
sys.exit(WARNING)
else:
msg = 'Process "%s" within runtime allotment'
print(msg % process_name)
sys.exit(OK)