1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 __doc__=''' ZenPing
16
17 Determines the availability of an IP address using ping.
18
19 $Id$'''
20
21 from socket import gethostbyname, getfqdn, gaierror
22
23 import time
24
25 import Globals
26
27 from Products.ZenStatus.AsyncPing import Ping
28 from Products.ZenStatus.TestPing import Ping as TestPing
29 from Products.ZenStatus import pingtree
30 from Products.ZenUtils.Utils import unused
31 unused(pingtree)
32
33 from Products.ZenEvents.ZenEventClasses import Status_Ping, Clear
34 from Products.ZenHub.PBDaemon import FakeRemote, PBDaemon
35 from Products.ZenUtils.DaemonStats import DaemonStats
36 from Products.ZenUtils.Driver import drive, driveLater
37
38 from twisted.internet import reactor
39 from twisted.python import failure
40
42
43 name = agent = "zenping"
44 eventGroup = "Ping"
45 initialServices = PBDaemon.initialServices + ['PingConfig']
46
47 pingTimeOut = 1.5
48 pingTries = 2
49 pingChunk = 75
50 pingCycleInterval = 60
51 configCycleInterval = 20*60
52 maxPingFailures = 2
53
54 pinger = None
55 pingTreeIter = None
56 startTime = None
57 jobs = 0
58 reconfigured = True
59 loadingConfig = None
60 lastConfig = None
61
62
78
79
82
83
88
89
95
96
98 "Send an event based on a ping job to the event backend."
99 evt = dict(device=pj.hostname,
100 ipAddress=pj.ipaddr,
101 summary=pj.message,
102 severity=pj.severity,
103 eventClass=Status_Ping,
104 eventGroup=self.eventGroup,
105 agent=self.agent,
106 component='',
107 manager=self.options.monitor)
108 evstate = getattr(pj, 'eventState', None)
109 if evstate is not None:
110 evt['eventState'] = evstate
111 self.sendEvent(evt)
112
114 "Get the configuration for zenping"
115 try:
116 if self.loadingConfig:
117 self.log.warning("Configuration still loading. Started at %s" %
118 time.asctime(time.localtime(self.loadingConfig)))
119 return
120
121 if self.lastConfig:
122 configwait = time.time() - self.lastConfig
123 delay = self.options.minconfigwait - configwait
124 if delay > 0:
125 reactor.callLater(delay, self.remote_updateConfig)
126 self.log.debug("Config recently updated: not fetching")
127 return
128
129 self.loadingConfig = time.time()
130
131 self.log.info('fetching monitor properties')
132 yield self.config().callRemote('propertyItems')
133 self.copyItems(driver.next())
134
135 driveLater(self.configCycleInterval, self.loadConfig)
136
137 self.log.info("fetching default RRDCreateCommand")
138 yield self.config().callRemote('getDefaultRRDCreateCommand')
139 createCommand = driver.next()
140
141 self.log.info("getting threshold classes")
142 yield self.config().callRemote('getThresholdClasses')
143 self.remote_updateThresholdClasses(driver.next())
144
145 self.log.info("getting collector thresholds")
146 yield self.config().callRemote('getCollectorThresholds')
147 self.rrdStats.config(self.options.monitor,
148 self.name,
149 driver.next(),
150 createCommand)
151
152 self.log.info("getting ping tree")
153 yield self.config().callRemote('getPingTree',
154 self.options.name,
155 findIp())
156 oldtree, self.pingtree = self.pingtree, driver.next()
157 self.clearDeletedDevices(oldtree)
158
159 self.rrdStats.gauge('configTime',
160 self.configCycleInterval,
161 time.time() - self.loadingConfig)
162 self.loadingConfig = None
163 self.lastConfig = time.time()
164 except Exception, ex:
165 self.log.exception(ex)
166
167
169 PBDaemon.buildOptions(self)
170 self.parser.add_option('--name',
171 dest='name',
172 default=getfqdn(),
173 help=("host that roots the ping dependency "
174 "tree: typically the collecting hosts' "
175 "name; defaults to our fully qualified "
176 "domain name (%s)" % getfqdn()))
177 self.parser.add_option('--test',
178 dest='test',
179 default=False,
180 action="store_true",
181 help="Run in test mode: doesn't really ping,"
182 " but reads the list of IP Addresses that "
183 " are up from /tmp/testping")
184 self.parser.add_option('--useFileDescriptor',
185 dest='useFileDescriptor',
186 default=None,
187 help=
188 "use the given (privileged) file descriptor")
189 self.parser.add_option('--minConfigWait',
190 dest='minconfigwait',
191 default=300,
192 type='int',
193 help=
194 "the minimal time, in seconds, "
195 "between refreshes of the config")
196
197
209
210
224
225 - def ping(self, pj):
231
238
239
241 "Note the end of the ping list with a successful status message"
242 runtime = time.time() - self.start
243 self.log.info("Finished pinging %d jobs in %.2f seconds",
244 self.jobs, runtime)
245 self.reconfigured = False
246 if not self.options.cycle:
247 reactor.stop()
248 else:
249 self.heartbeat()
250
261
271
273 try:
274 self.doPingFailed(err)
275 except Exception, ex:
276 import traceback
277 from StringIO import StringIO
278 out = StringIO()
279 traceback.print_exc(ex, out)
280 self.log.error("Exception: %s", out.getvalue())
281
314
315
320
321
323 self.log.debug("Asynch update config")
324 d = drive(self.loadConfig)
325 def logResults(v):
326 if isinstance(v, failure.Failure):
327 self.log.error("Unable to reload config for async update")
328 d.addBoth(logResults)
329
330
332 items = dict(items)
333 for att in ("pingTimeOut",
334 "pingTries",
335 "pingChunk",
336 "pingCycleInterval",
337 "configCycleInterval",
338 "maxPingFailures",
339 ):
340 before = getattr(self, att)
341 after = items.get(att, before)
342 setattr(self, att, after)
343 self.configCycleInterval *= 60
344 self.reconfigured = True
345
346
352
353
362
363
368
369
371 try:
372 return gethostbyname(getfqdn())
373 except gaierror:
374
375 import os
376 import re
377 ifconfigs = ['/sbin/ifconfig',
378 '/usr/sbin/ifconfig',
379 '/usr/bin/ifconfig',
380 '/bin/ifconfig']
381 ifconfig = filter(os.path.exists, ifconfigs)[0]
382 fp = os.popen(ifconfig + ' -a')
383 config = fp.read().split('\n\n')
384 fp.close()
385 digits = r'[0-9]{1,3}'
386 pat = r'(addr:|inet) *(%s\.%s\.%s\.%s)[^0-9]' % ((digits,)*4)
387 parse = re.compile(pat)
388 results = []
389 for c in config:
390 addr = parse.search(c)
391 if addr:
392 results.append(addr.group(2))
393 try:
394 results.remove('127.0.0.1')
395 except ValueError:
396 pass
397 if results:
398 return results[0]
399 return '127.0.0.1'
400
401 if __name__=='__main__':
402 pm = ZenPing()
403 import logging
404 logging.getLogger('zen.Events').setLevel(20)
405 pm.run()
406