1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 __doc__='''zenactions
16
17 Turn events into notifications (pages, emails).
18
19 $Id$
20 '''
21
22 __version__ = "$Revision$"[11:-2]
23
24
25 import socket
26 import time
27 from sets import Set
28 import Globals
29
30 from ZODB.POSException import POSError
31 from _mysql_exceptions import OperationalError, ProgrammingError
32
33 from Products.ZenUtils.ZCmdBase import ZCmdBase
34 from Products.ZenUtils.ZenTales import talesCompile, getEngine
35 from ZenEventClasses import App_Start, App_Stop, Status_Heartbeat
36 from ZenEventClasses import Cmd_Fail
37 import Event
38 from Schedule import Schedule
39 from UpdateCheck import UpdateCheck
40 from Products.ZenUtils import Utils
41 from twisted.internet import reactor
42 from twisted.internet.protocol import ProcessProtocol
43 from email.Utils import formatdate
44
45 DEFAULT_MONITOR = "localhost"
46
48 return s[0:1].upper() + s[1:]
49
51
58
70
72 self.server.log.debug("Command finished: %s" % reason.getErrorMessage())
73 code = 1
74 try:
75 code = reason.value.exitCode
76 except AttributeError:
77 pass
78
79
80 if self.timeout:
81 self.timeout.cancel()
82 self.timeout = None
83
84 if code == 0:
85 cmdData = self.data or "<command produced no output>"
86 self.server.log.debug("Command %s says: %s", self.cmd.id, cmdData)
87 self.server.sendEvent(Event.Event(
88 device=self.server.options.monitor,
89 eventClass=Cmd_Fail,
90 severity=Event.Clear,
91 component="zenactions",
92 eventKey=self.cmd.id,
93 summary="Command succeeded: %s: %s" % (
94 self.cmd.id, cmdData),
95 ))
96 else:
97 cmdError = self.error or "<command produced no output>"
98 self.server.log.error("Command %s says %s", self.cmd.id, cmdError)
99 self.server.sendEvent(Event.Event(
100 device=self.server.options.monitor,
101 eventClass=Cmd_Fail,
102 severity=Event.Error,
103 component="zenactions",
104 eventKey=self.cmd.id,
105 summary="Error running: %s: %s" % (
106 self.cmd.id, cmdError),
107 ))
108
111
114
115
117 """
118 Take actions based on events in the event manager.
119 Start off by sending emails and pages.
120 """
121
122 lastCommand = None
123
124 addstate = ("INSERT INTO alert_state "
125 "VALUES ('%s', '%s', '%s', NULL) "
126 "ON DUPLICATE KEY UPDATE lastSent = now()")
127
128
129 clearstate = ("DELETE FROM alert_state "
130 " WHERE evid='%s' "
131 " AND userid='%s' "
132 " AND rule='%s'")
133
134
135
136
137
138
139 newsel = ("SELECT %s, evid FROM status WHERE "
140 "%s AND evid NOT IN "
141 " (SELECT evid FROM alert_state "
142 " WHERE userid='%s' AND rule='%s' %s)")
143
144 clearsel = ("SELECT %s, h.evid FROM history h, alert_state a "
145 " WHERE h.evid=a.evid AND a.userid='%s' AND a.rule='%s'")
146
147 clearEventSelect = ("SELECT %s "
148 " FROM history clear, history event "
149 " WHERE clear.evid = event.clearid "
150 " AND event.evid = '%s'")
151
152
166
168 """Load the ActionRules into the system.
169 """
170 self.actions = []
171 for ar in self.dmd.ZenUsers.getAllActionRules():
172 if not ar.enabled: continue
173 userid = ar.getUser().id
174 self.actions.append(ar)
175 self.log.debug("action:%s for:%s loaded", ar.getId(), userid)
176
177
189
190
203
204
206 return '%s/zport/dmd/Events/eventFields?evid=%s' % (
207 self.options.zopeurl, evid)
208
209
213
214
216 return '%s/zport/dmd/Events/manage_ackEvents?evids=%s&zenScreenName=viewEvents' % (self.options.zopeurl, evid)
217
218
220 return '%s/zport/dmd/Events/manage_deleteEvents' % self.options.zopeurl + \
221 '?evids=%s&zenScreenName=viewHistoryEvents' % evid
222
223
225 return '%s/zport/dmd/Events/manage_undeleteEvents' % self.options.zopeurl + \
226 '?evid=%s&zenScreenName=viewEvents' % evid
227
228
230 """Run through all rules matching them against events.
231 """
232 for ar in self.actions:
233 try:
234 self.lastCommand = None
235
236 actfunc = getattr(self, "send"+ar.action.title())
237 self.processEvent(zem, ar, actfunc)
238 except (SystemExit, KeyboardInterrupt, OperationalError, POSError):
239 raise
240 except:
241 if self.lastCommand:
242 self.log.warning(self.lastCommand)
243 self.log.exception("action:%s",ar.getId())
244
246 self.updateCheck.check(self.dmd, zem)
247 import transaction
248 transaction.commit()
249
251 fields = context.getEventFields()
252 userid = context.getUserid()
253
254 nwhere = context.where.strip() or '1 = 1'
255 if context.delay > 0:
256 nwhere += " and firstTime + %s < UNIX_TIMESTAMP()" % context.delay
257 awhere = ''
258 if context.repeatTime:
259 awhere += ' and DATE_ADD(lastSent, INTERVAL %d SECOND) > now() ' % (
260 context.repeatTime,)
261 q = self.newsel % (",".join(fields), nwhere, userid, context.getId(),
262 awhere)
263 for result in self.query(q):
264 evid = result[-1]
265 data = dict(zip(fields, map(zem.convert, fields, result[:-1])))
266 data['eventUrl'] = self.getUrl(evid)
267 device = self.dmd.Devices.findDevice(data.get('device', None))
268 if device:
269 data['eventsUrl'] = self.getEventsUrl(device)
270 else:
271 data['eventsUrl'] = 'n/a'
272 data['device'] = data.get('device', None) or ''
273 data['ackUrl'] = self.getAckUrl(evid)
274 data['deleteUrl'] = self.getDeleteUrl(evid)
275 severity = data.get('severity', -1)
276 data['severityString'] = zem.getSeverityString(severity)
277 if action(context, data, False):
278 addcmd = self.addstate % (evid, userid, context.getId())
279 self.execute(addcmd)
280
281
282 historyFields = [("h.%s" % f) for f in fields]
283 historyFields = ','.join(historyFields)
284 q = self.clearsel % (historyFields, userid, context.getId())
285 for result in self.query(q):
286 evid = result[-1]
287 data = dict(zip(fields, map(zem.convert, fields, result[:-1])))
288
289
290 cfields = [('clear.%s' % x) for x in fields]
291 q = self.clearEventSelect % (",".join(cfields), evid)
292
293
294 cfields = [('clear%s' % _capitalize(x)) for x in fields]
295
296
297 data.update({}.fromkeys(cfields, ""))
298
299
300 for values in self.query(q):
301 values = map(zem.convert, fields, values)
302 data.update(dict(zip(cfields, values)))
303
304 data['clearOrEventSummary'] = (
305 data['clearSummary'] or data['summary'])
306
307
308 data['eventUrl'] = self.getUrl(evid)
309 severity = data.get('severity', -1)
310 data['severityString'] = zem.getSeverityString(severity)
311 delcmd = self.clearstate % (evid, userid, context.getId())
312 if getattr(context, 'sendClear', True):
313 if action(context, data, True):
314 self.execute(delcmd)
315 else:
316 self.execute(delcmd)
317
318
319 - def maintenance(self, zem):
320 """Run stored procedures that maintain the events database.
321 """
322 sql = 'call age_events(%s, %s);' % (
323 zem.eventAgingHours, zem.eventAgingSeverity)
324 try:
325 self.execute(sql)
326 except ProgrammingError:
327 self.log.exception("problem with proc: '%s'" % sql)
328
329
331 """
332 Once per day delete events from history table.
333 If force then run the deletion statement regardless of when it was
334 last run (the deletion will still not run if the historyMaxAgeDays
335 setting in the event manager is not greater than zero.)
336 If deferred then we are running in a twisted reactor. Run the
337 deletion script in a non-blocking manner (if it is to be run) and
338 return a deferred (if the deletion script is run.)
339 In all cases return None if the deletion script is not run.
340 """
341 import datetime
342 import os
343 import twisted.internet.utils
344 import Products.ZenUtils.Utils as Utils
345 import transaction
346 import subprocess
347
348 def onSuccess(unused, startTime):
349 self.log.info('Done deleting historical events in %.2f seconds' %
350 (time.time() - startTime))
351 return None
352 def onError(error, startTime):
353 self.log.error('Error deleting historical events after '
354 '%s seconds: %s' % (time.time()-startTime,
355 error))
356 return None
357
358
359
360 d = None
361
362
363
364
365 try:
366 maxDays = int(self.dmd.ZenEventManager.historyMaxAgeDays)
367 except ValueError:
368 maxDays = 0
369 if maxDays > 0:
370
371
372 lastRun = getattr(self.dmd,
373 'lastDeleteHistoricalEvents_datetime', None)
374
375
376
377
378 lastAge = getattr(self.dmd,
379 'lastDeleteHistoricalEvents_days', None)
380 now = datetime.datetime.now()
381 if not lastRun \
382 or now - lastRun > datetime.timedelta(1) \
383 or lastAge != maxDays \
384 or force:
385 self.log.info('Deleting historical events older than %s days' %
386 maxDays)
387 startTime = time.time()
388 cmd = Utils.zenPath('Products', 'ZenUtils',
389 'ZenDeleteHistory.py')
390 args = ['--numDays=%s' % maxDays]
391 if deferred:
392
393 d = twisted.internet.utils.getProcessOutput(
394 cmd, args, os.environ, errortoo=True)
395 d.addCallback(onSuccess, startTime)
396 d.addErrback(onError, startTime)
397 else:
398
399 proc = subprocess.Popen(
400 [cmd]+args, stdout=subprocess.PIPE,
401 stderr=subprocess.STDOUT, env=os.environ)
402
403
404 output, _ = proc.communicate()
405 code = proc.wait()
406 if code:
407 onError(output, startTime)
408 else:
409 onSuccess(output, startTime)
410
411 self.dmd.lastDeleteHistoricalEvents_datetime = now
412 self.dmd.lastDeleteHistoricalEvents_days = maxDays
413 transaction.commit()
414 return d
415
416
418 """Create events for failed heartbeats.
419 """
420
421 q = ("SELECT device, component "
422 "FROM status WHERE eventClass = '%s'" % Status_Heartbeat)
423 heartbeatState = Set(self.query(q))
424
425
426 sel = "SELECT device, component FROM heartbeat "
427 sel += "WHERE DATE_ADD(lastTime, INTERVAL timeout SECOND) <= NOW();"
428 for device, comp in self.query(sel):
429 self.sendEvent(
430 Event.Event(device=device, component=comp,
431 eventClass=Status_Heartbeat,
432 summary="%s %s heartbeat failure" % (device, comp),
433 severity=Event.Error))
434 heartbeatState.discard((device, comp))
435
436
437 for device, comp in heartbeatState:
438 self.sendEvent(
439 Event.Event(device=device, component=comp,
440 eventClass=Status_Heartbeat,
441 summary="%s %s heartbeat clear" % (device, comp),
442 severity=Event.Clear))
443
445 try:
446 command = cmd.command
447 if clear:
448 command = cmd.clearCommand
449 device = self.dmd.Devices.findDevice(data.get('device', ''))
450 component = None
451 if device:
452 componentName = data.get('component')
453 for c in device.getMonitoredComponents():
454 if c.id == componentName:
455 component = c
456 break
457 compiled = talesCompile('string:' + command)
458 environ = {'dev':device, 'component':component, 'evt':data }
459 res = compiled(getEngine().getContext(environ))
460 if isinstance(res, Exception):
461 raise res
462 prot = EventCommandProtocol(cmd, self)
463 self.log.info('Running %s' % res)
464 reactor.spawnProcess(prot, '/bin/sh',
465 ('/bin/sh', '-c', res),
466 env=None)
467 except Exception:
468 self.log.exception('Error running command %s', cmd.id)
469 return True
470
471
480
481
482 - def mainbody(self):
483 """main loop to run actions.
484 """
485 from twisted.internet.process import reapAllProcesses
486 reapAllProcesses()
487 zem = self.dmd.ZenEventManager
488 self.loadActionRules()
489 self.eventCommands(zem)
490 self.processRules(zem)
491 self.checkVersion(zem)
492 self.maintenance(zem)
493 self.deleteHistoricalEvents(deferred=self.options.cycle)
494 self.heartbeatEvents()
495
496
508
509
518
519
521 """Send event to the system.
522 """
523 self.dmd.ZenEventManager.sendEvent(evt)
524
525
533
534
542
558
570
571 - def sendPage(self, action, data, clear = None):
572 """Send and event to a pager. Return True if we think page was sent,
573 False otherwise.
574 """
575 fmt, body = self.format(action, data, clear)
576 recipients = action.getAddresses()
577 if not recipients:
578 self.log.warning('failed to page %s on rule %s: %s',
579 action.getUser().id, action.id,
580 'Unspecified address.')
581 return True
582
583 result = False
584 for recipient in recipients:
585 success, errorMsg = Utils.sendPage(recipient,
586 fmt,
587 self.dmd.pageCommand)
588 if success:
589 self.log.info('sent page to %s: %s', recipient, fmt)
590
591 result = result or success
592 else:
593 self.log.info('failed to send page to %s: %s %s',
594 recipient,
595 fmt,
596 errorMsg)
597 return result
598
599
600
601 - def sendEmail(self, action, data, clear = None):
602 """Send an event to an email address.
603 Return True if we think the email was sent, False otherwise.
604 """
605 from email.MIMEText import MIMEText
606 from email.MIMEMultipart import MIMEMultipart
607 addr = action.getAddresses()
608 if not addr:
609 self.log.warning('failed to email %s on rule %s: %s',
610 action.getUser().id, action.id, 'Unspecified address.')
611 return True
612
613 fmt, htmlbody = self.format(action, data, clear)
614 htmlbody = htmlbody.replace('\n','<br/>\n')
615 body = self.stripTags(htmlbody)
616 plaintext = MIMEText(body)
617
618 emsg = None
619 if action.plainText:
620 emsg = plaintext
621 else:
622 emsg = MIMEMultipart('related')
623 emsgAlternative = MIMEMultipart('alternative')
624 emsg.attach( emsgAlternative )
625 html = MIMEText(htmlbody)
626 html.set_type('text/html')
627 emsgAlternative.attach(plaintext)
628 emsgAlternative.attach(html)
629
630 emsg['Subject'] = fmt
631 emsg['From'] = self.dmd.getEmailFrom()
632 emsg['To'] = ', '.join(addr)
633 emsg['Date'] = formatdate(None, True)
634 result, errorMsg = Utils.sendEmail(emsg, self.dmd.smtpHost,
635 self.dmd.smtpPort, self.dmd.smtpUseTLS, self.dmd.smtpUser,
636 self.dmd.smtpPass)
637 if result:
638 self.log.info("rule '%s' sent email:%s to:%s",
639 action.id, fmt, addr)
640 else:
641 self.log.info("rule '%s' failed to send email to %s: %s %s",
642 action.id, ','.join(addr), fmt, errorMsg)
643 return result
644
645
647 ZCmdBase.buildOptions(self)
648 self.parser.add_option('--cycletime',
649 dest='cycletime', default=60, type="int",
650 help="check events every cycletime seconds")
651 self.parser.add_option(
652 '--zopeurl', dest='zopeurl',
653 default='http://%s:%d' % (socket.getfqdn(), 8080),
654 help="http path to the root of the zope server")
655 self.parser.add_option("--monitor", dest="monitor",
656 default=DEFAULT_MONITOR,
657 help="Name of monitor instance to use for heartbeat "
658 " events. Default is %s." % DEFAULT_MONITOR)
659
660
661 - def sigTerm(self, signum=None, frame=None):
662 'controlled shutdown of main loop on interrupt'
663 try:
664 ZCmdBase.sigTerm(self, signum, frame)
665 except SystemExit:
666 reactor.stop()
667
668 if __name__ == "__main__":
669 za = ZenActions()
670 import logging
671 logging.getLogger('zen.Events').setLevel(20)
672 za.run()
673