Trees | Indices | Help |
|
---|
|
1 ########################################################################### 2 # 3 # This program is part of Zenoss Core, an open source monitoring platform. 4 # Copyright (C) 2007, Zenoss Inc. 5 # 6 # This program is free software; you can redistribute it and/or modify it 7 # under the terms of the GNU General Public License version 2 as published by 8 # the Free Software Foundation. 9 # 10 # For complete information please visit: http://www.zenoss.com/oss/ 11 # 12 ########################################################################### 13 14 import time 15 16 from twisted.internet import reactor, defer 17 18 import Globals # make zope imports work 19 from Products.ZenHub.PBDaemon import FakeRemote, PBDaemon 20 from Products.ZenUtils.Driver import drive, driveLater 21 from Products.ZenStatus.ZenTcpClient import ZenTcpClient 22 from Products.ZenEvents.ZenEventClasses import Heartbeat 23 24 # required for pb.setUnjellyableForClass 25 from Products.ZenHub.services import StatusConfig 26 if 0: 27 StatusConfig = None # pyflakes 28 29 from sets import Set 3032 _running = 0 33 _fail = 0 34 _success = 0 35 _start = 0 36 _stop = 0 37 _defer = None 38 4191 9243 self._success = 0 44 self._stop = 0 45 self._fail = 0 46 self._running = 0 47 self._remaining = jobs 48 self._start = time.time() 49 self._defer = defer.Deferred() 50 if not self._remaining: 51 self._stop = time.time() 52 self._defer.callback(self) 53 return self._defer5456 j = self._remaining.pop() 57 d = j.start() 58 d.addCallbacks(self.success, self.failure) 59 self._running += 1 60 return d6163 self._running -= 1 64 if self.done(): 65 self._stop = time.time() 66 self._defer, d = None, self._defer 67 d.callback(self) 68 return result69 73 7779 return self._running == 0 and not self._remaining80 8694 95 name = agent = "zenstatus" 96 initialServices = ['EventService', 'StatusConfig'] 97 statusCycleInterval = 300 98 configCycleInterval = 20 99 properties = ('statusCycleInterval', 'configCycleInterval') 100 reconfigureTimeout = None 101290 291 292 if __name__=='__main__': 293 pm = ZenStatus() 294 pm.run() 295103 PBDaemon.__init__(self, keeproot=True) 104 self.clients = {} 105 self.counts = {} 106 self.status = Status()107 110112 d = drive(self.scanCycle) 113 if not self.options.cycle: 114 d.addBoth(lambda unused: self.stop())115 119 123125 self.log.debug("Notification of config change from zenhub") 126 if self.reconfigureTimeout and not self.reconfigureTimeout.called: 127 self.reconfigureTimeout.cancel() 128 self.reconfigureTimeout = reactor.callLater(5, drive, self.reconfigure)129 133135 'extract configuration elements used by this server' 136 table = dict(items) 137 for name in self.properties: 138 value = table.get(name, None) 139 if value is not None: 140 if getattr(self, name) != value: 141 self.log.debug('Updated %s config to %s' % (name, value)) 142 setattr(self, name, value)143 146148 now = time.time() 149 self.log.info("fetching property items") 150 yield self.configService().callRemote('propertyItems') 151 self.setPropertyItems(driver.next()) 152 153 self.log.info("fetching default RRDCreateCommand") 154 yield self.configService().callRemote('getDefaultRRDCreateCommand') 155 createCommand = driver.next() 156 157 self.log.info("getting threshold classes") 158 yield self.configService().callRemote('getThresholdClasses') 159 self.remote_updateThresholdClasses(driver.next()) 160 161 self.log.info("getting collector thresholds") 162 yield self.configService().callRemote('getCollectorThresholds') 163 self.rrdStats.config(self.options.monitor, self.name, driver.next(), 164 createCommand) 165 166 d = driveLater(self.configCycleInterval * 60, self.configCycle) 167 d.addErrback(self.error) 168 169 yield drive(self.reconfigure) 170 driver.next() 171 172 self.rrdStats.gauge('configTime', 173 self.configCycleInterval * 60, 174 time.time() - now)175177 self.log.debug("Getting service status") 178 yield self.configService().callRemote('serviceStatus') 179 self.counts = {} 180 for (device, component), count in driver.next(): 181 self.counts[device, component] = count 182 183 self.log.debug("Getting services") 184 yield self.configService().callRemote('services', 185 self.options.configpath) 186 self.ipservices = [] 187 for s in driver.next(): 188 count = self.counts.get((s.device, s.component), 0) 189 self.ipservices.append(ZenTcpClient(s, count)) 190 self.log.debug("ZenStatus configured")191 192 195197 d = driveLater(self.statusCycleInterval, self.scanCycle) 198 d.addErrback(self.error) 199 200 if not self.status.done(): 201 duration = self.status.duration() 202 self.log.warning("Scan cycle not complete in %.2f seconds", 203 duration) 204 if duration < self.statusCycleInterval * 2: 205 self.log.warning("Waiting for the cycle to complete") 206 return 207 self.log.warning("Ditching this cycle") 208 209 self.log.debug("Getting down devices") 210 yield self.eventService().callRemote('getDevicePingIssues') 211 ignored = Set([s[0] for s in driver.next()]) 212 213 self.log.debug("Starting scan") 214 d = self.status.start([i for i in self.ipservices 215 if i.cfg.device not in ignored]) 216 self.log.debug("Running jobs") 217 self.runSomeJobs() 218 yield d 219 driver.next() 220 self.log.debug("Scan complete") 221 self.heartbeat()222224 _, _, success, fail = self.status.stats() 225 self.log.info("Finished %d jobs (%d good, %d bad) in %.2f seconds", 226 (success + fail), success, fail, self.status.duration()) 227 if not self.options.cycle: 228 self.stop() 229 return 230 heartbeatevt = dict(eventClass=Heartbeat, 231 component='zenstatus', 232 device=self.options.monitor) 233 self.sendEvent(heartbeatevt, timeout=self.statusCycleInterval*3) 234 self.niceDoggie(self.statusCycleInterval) 235 for ev in (self.rrdStats.gauge('cycleTime', 236 self.statusCycleInterval, 237 self.status.duration()) + 238 self.rrdStats.gauge('success', 239 self.statusCycleInterval, 240 success) + 241 self.rrdStats.gauge('failed', 242 self.statusCycleInterval, 243 fail)): 244 self.sendEvent(ev)245 246248 while 1: 249 left, running, good, bad = self.status.stats() 250 self.log.debug("Status: left %d running %d good %d bad %d", 251 left, running, good, bad) 252 if not left or running >= self.options.parallel: 253 break 254 d = self.status.next() 255 d.addCallbacks(self.processTest, self.processError) 256 self.log.debug("Started job")257259 self.runSomeJobs() 260 key = job.cfg.device, job.cfg.component 261 evt = job.getEvent() 262 if evt: 263 self.sendEvent(evt) 264 self.counts.setdefault(key, 0) 265 self.counts[key] += 1 266 else: 267 if key in self.counts: 268 del self.counts[key]269 272274 PBDaemon.buildOptions(self) 275 p = self.parser 276 p.add_option('--configpath', 277 dest='configpath', 278 default="/Devices/Server", 279 help="path to our monitor config ie: /Devices/Server") 280 p.add_option('--parallel', 281 dest='parallel', 282 type='int', 283 default=50, 284 help="number of devices to collect at one time") 285 p.add_option('--cycletime', 286 dest='cycletime', 287 type="int", 288 default=60, 289 help="check events every cycletime seconds")
Trees | Indices | Help |
|
---|
Generated by Epydoc 3.0beta1 on Thu May 7 11:46:27 2009 | http://epydoc.sourceforge.net |