1
2
3
4
5
6
7
8
9
10
11 __doc__ = """ps
12 Interpret the output from the ps command and provide performance data for
13 CPU utilization, total RSS and the number of processes that match the
14 /Process tree definitions.
15 """
16
17 import re
18 import logging
19 log = logging.getLogger("zen.ps")
20
21 import Globals
22 from Products.ZenRRD.CommandParser import CommandParser
23 from Products.ZenEvents.ZenEventClasses import Status_OSProcess
24
25
26 AllPids = {}
27 emptySet = set()
28
29 -class ps(CommandParser):
30
38
44
45 - def getMatches(self, matchers, procName, cmdAndArgs):
59
61 """
62 Process the non-empyt ps and return back the
63 standard info.
64
65 @parameter line: one line of ps output
66 @type line: text
67 @return: pid, rss, cpu, cmdAndArgs (ie full process name)
68 @rtype: tuple
69 """
70 pid, rss, cpu, cmdAndArgs = line.split(None, 3)
71 return pid, rss, cpu, cmdAndArgs
72
74 """
75 Group processes per datapoint
76 """
77 dpsToProcs = {}
78 for line in output.split('\n')[1:]:
79 if not line:
80 continue
81
82 try:
83 pid, rss, cpu, cmdAndArgs = self.getProcInfo(line)
84 log.debug("line '%s' -> pid=%s " \
85 "rss=%s cpu=%s cmdAndArgs=%s",
86 line, pid, rss, cpu, cmdAndArgs)
87
88 except (SystemExit, KeyboardInterrupt): raise
89 except:
90 log.warn("Unable to parse entry '%s'", line)
91 continue
92
93 try:
94 procName = cmdAndArgs.split()[0]
95 matches = self.getMatches(matchers, procName, cmdAndArgs)
96
97 if not matches:
98 continue
99
100 days = 0
101 if cpu.find('-') > -1:
102 days, cpu = cpu.split('-')
103 days = int(days)
104 cpu = map(int, cpu.split(':'))
105 if len(cpu) == 3:
106 cpu = (days * 24 * 60 * 60 +
107 cpu[0] * 60 * 60 +
108 cpu[1] * 60 +
109 cpu[2])
110 elif len(cpu) == 2:
111 cpu = (days * 24 * 60 * 60 +
112 cpu[0] * 60 +
113 cpu[1])
114
115
116
117 cpu *= 100
118
119 rss = int(rss)
120 pid = int(pid)
121
122 for dp in matches:
123 procInfo = dict(procName=procName,
124 cmdAndArgs=cmdAndArgs, rss=0.0, cpu=0.0,
125 pids=set())
126 procInfo = dpsToProcs.setdefault(dp, procInfo)
127 procInfo['rss'] += rss
128 procInfo['cpu'] += cpu
129 procInfo['pids'].add(pid)
130
131 except (SystemExit, KeyboardInterrupt): raise
132 except:
133 log.exception("Unable to convert entry data pid=%s " \
134 "rss=%s cpu=%s cmdAndArgs=%s",
135 pid, rss, cpu, cmdAndArgs)
136 continue
137 return dpsToProcs
138
139
141
142
143 matchers = {}
144 for dp in cmd.points:
145 matchers[dp] = re.compile(re.escape(dp.data['processName']))
146
147 dpsToProcs = self.groupProcs(matchers, cmd.result.output)
148
149
150 for dp in cmd.points:
151 process = dp.data['processName']
152 failSeverity = dp.data['failSeverity']
153 procInfo = dpsToProcs.get(dp, None)
154 if not procInfo:
155 self.sendEvent(results,
156 summary='Process not running: ' + process,
157 component=process,
158 severity=failSeverity)
159 log.debug("device:%s, command: %s, procInfo: %r, failSeverity: %r, process: %s, dp: %r",
160 cmd.deviceConfig.device,
161 cmd.command,
162 procInfo,
163 failSeverity,
164 process,
165 dp)
166 else:
167 if 'cpu' in dp.id:
168 results.values.append( (dp, procInfo['cpu']) )
169 if 'mem' in dp.id:
170 results.values.append( (dp, procInfo['rss']) )
171 if 'count' in dp.id:
172 results.values.append( (dp, len(procInfo['pids'])) )
173
174
175
176
177 device = cmd.deviceConfig.device
178 before = AllPids.get( (device, process), emptySet)
179 after = set()
180 if procInfo:
181 after = procInfo['pids']
182
183 alertOnRestart = dp.data['alertOnRestart']
184
185 if before != after:
186 if len(before) > len(after) and alertOnRestart:
187 pids = ', '.join(map(str, before - after))
188 self.sendEvent(results,
189 summary='Pid(s) %s stopped: %s' % (pids, process),
190 component=process,
191 severity=failSeverity)
192 if len(before) == len(after) and alertOnRestart:
193
194 pids = ', '.join(map(str, before - after))
195 self.sendEvent(results,
196 summary='Pid(s) %s restarted: %s' % (pids, process),
197 component=process,
198 severity=failSeverity)
199 if len(before) < len(after):
200 if len(before) == 0:
201 self.sendEvent(results,
202 summary='Process running: %s' % process,
203 component=process,
204 severity=0)
205
206 AllPids[device, process] = after
207