Package Products :: Package ZenEvents :: Module Availability
[hide private]
[frames] | no frames]

Source Code for Module Products.ZenEvents.Availability

  1  ############################################################################## 
  2  #  
  3  # Copyright (C) Zenoss, Inc. 2007, all rights reserved. 
  4  #  
  5  # This content is made available according to terms specified in 
  6  # License.zenoss under the directory where your Zenoss product is installed. 
  7  #  
  8  ############################################################################## 
  9   
 10   
 11  import time 
 12  from collections import defaultdict 
 13  from itertools import takewhile, chain 
 14   
 15  from Globals import InitializeClass 
 16  from Products.ZenModel.DeviceClass import DeviceClass 
 17  from Products.ZenModel.DeviceGroup import DeviceGroup 
 18  from Products.ZenModel.Location import Location 
 19  from Products.ZenModel.System import System 
 20  from Products.ZenUtils import Map 
 21  from Products.ZenEvents.ZenEventClasses import Status_Ping, Status_Snmp 
 22  from Products.ZenEvents.ZenEventClasses import Status_OSProcess 
 23  from Products.Zuul import getFacade 
 24  from Products.Zuul.interfaces import ICatalogTool 
 25  from Products.AdvancedQuery import Eq, Or, Not 
 26  from zenoss.protocols.protobufs.zep_pb2 import (SEVERITY_CRITICAL, SEVERITY_ERROR, 
 27                                                  SEVERITY_WARNING, SEVERITY_INFO, 
 28                                                  SEVERITY_DEBUG, SEVERITY_CLEAR) 
 29  from zenoss.protocols.protobufs.zep_pb2 import (STATUS_NEW, STATUS_ACKNOWLEDGED, 
 30                                                  STATUS_SUPPRESSED, STATUS_CLOSED, 
 31                                                  STATUS_CLEARED, STATUS_DROPPED, 
 32                                                  STATUS_AGED) 
 33   
 34  ALL_EVENT_STATUSES = set([STATUS_NEW, STATUS_ACKNOWLEDGED, 
 35                          STATUS_SUPPRESSED, STATUS_CLOSED, 
 36                          STATUS_CLEARED, STATUS_DROPPED, 
 37                          STATUS_AGED]) 
 38  CLOSED_EVENT_STATUSES = set([STATUS_CLOSED, STATUS_CLEARED, 
 39                               STATUS_DROPPED, STATUS_AGED]) 
 40  OPEN_EVENT_STATUSES = ALL_EVENT_STATUSES - CLOSED_EVENT_STATUSES 
41 42 -def _severityGreaterThanOrEqual(sev):
43 """function to return a list of severities >= the given severity; 44 defines severity priority using arbitrary order, instead of 45 assuming numeric ordering""" 46 severities_in_order = (SEVERITY_CRITICAL, 47 SEVERITY_ERROR, 48 SEVERITY_WARNING, 49 SEVERITY_INFO, 50 SEVERITY_DEBUG, 51 SEVERITY_CLEAR) 52 return list(takewhile(lambda x : x != sev, severities_in_order)) + [sev,]
53
54 -def _lookupUuid(catalog, cls, identifier):
55 """function to retrieve uuid given an object's catalog, type, and identifier""" 56 results = ICatalogTool(catalog).search(cls, 57 query=Or(Eq('id', identifier), 58 Eq('name', identifier)), 59 limit=1) 60 if results.total: 61 return results.results.next().uuid
62 63 from AccessControl import ClassSecurityInfo 64 65 CACHE_TIME = 60. 66 67 _cache = Map.Locked(Map.Timed({}, CACHE_TIME))
68 69 -def _round(value):
70 if value is None: return None 71 return (value // CACHE_TIME) * CACHE_TIME
72
73 -def _findComponent(device, name):
74 for c in device.getMonitoredComponents(): 75 if c.name() == name: 76 return c 77 return None
78
79 -class Availability(object):
80 security = ClassSecurityInfo() 81 security.setDefaultAccess('allow') 82 83 @staticmethod
85 return time.time() - dmd.ZenEventManager.defaultAvailabilityDays*24*60*60
86 87 @staticmethod
89 return time.time()
90 91 # Simple record for holding availability information
92 - def __init__(self, device, component, downtime, total, systems=''):
93 self.device = device 94 self.systems = systems 95 self.component = component 96 97 # Guard against endDate being equal to or less than startDate. 98 if total <= 0: 99 self.availability = 0 if downtime else 1 100 else: 101 self.availability = max(0, 1 - (float(downtime) / total))
102
103 - def floatStr(self):
104 return '%2.3f%%' % (self.availability * 100)
105
106 - def __str__(self):
107 return self.floatStr()
108
109 - def __repr__(self):
110 return '[%s %s %s]' % (self.device, self.component, self.floatStr())
111
112 - def __float__(self):
113 return float(self.availability)
114
115 - def __int__(self):
116 return int(self.availability * 100)
117
118 - def __cmp__(self, other):
119 return cmp((self.availability, self.device, self.component()), 120 (other.availability, other.device, other.component()))
121
122 - def getDevice(self, dmd):
123 return dmd.Devices.findDevice(self.device)
124
125 - def getComponent(self, dmd):
126 if self.device and self.component: 127 device = self.getDevice(dmd) 128 if device: 129 return _findComponent(device, self.component) 130 return None
131
137 138 InitializeClass(Availability)
139 140 -class Report(object):
141 "Determine availability by counting the amount of time down" 142
143 - def __init__(self, 144 startDate = None, 145 endDate = None, 146 eventClass=Status_Ping, 147 severity=5, 148 device=None, 149 component='', 150 prodState=1000, 151 manager=None, 152 agent=None, 153 DeviceClass=None, 154 Location=None, 155 System=None, 156 DeviceGroup=None, 157 DevicePriority=None, 158 monitor=None):
159 self.startDate = _round(startDate) 160 self.endDate = _round(endDate) 161 self.eventClass = eventClass 162 self.severity = int(severity) if severity is not None else None 163 self.device = device 164 self.component = component 165 self.prodState = int(prodState) if prodState is not None else None 166 self.manager = manager 167 self.agent = agent 168 self.DeviceClass = DeviceClass 169 self.Location = Location 170 self.System = System 171 self.DeviceGroup = DeviceGroup 172 self.DevicePriority = int(DevicePriority) if DevicePriority is not None else None 173 self.monitor = monitor
174
175 - def tuple(self):
176 return ( 177 self.startDate, self.endDate, self.eventClass, self.severity, 178 self.device, self.component, self.prodState, self.manager, 179 self.agent, self.DeviceClass, self.Location, self.System, 180 self.DeviceGroup, self.DevicePriority, self.monitor)
181
182 - def __hash__(self):
183 return hash(self.tuple())
184
185 - def __cmp__(self, other):
186 return cmp(self.tuple(), other.tuple())
187 188
189 - def run(self, dmd):
190 """Run the report, returning an Availability object for each device""" 191 # Note: we don't handle overlapping "down" events, so down 192 # time could get get double-counted. 193 __pychecker__='no-local' 194 now = time.time() 195 zep = getFacade("zep", dmd) 196 endDate = self.endDate or Availability.getDefaultAvailabilityEnd() 197 endDate = min(endDate, now) 198 startDate = self.startDate 199 if not startDate: 200 startDate = Availability.getDefaultAvailabilityStart(dmd) 201 202 # convert start and end date to integer milliseconds for defining filters 203 startDate = int(startDate*1000) 204 endDate = int(endDate*1000) 205 total_report_window = endDate - startDate 206 now_ms = int(now * 1000) 207 208 create_filter_args = { 209 'operator' : zep.AND, 210 'severity' : _severityGreaterThanOrEqual(self.severity), 211 'event_class' : self.eventClass + 212 ('/' if not self.eventClass.endswith('/') else '') 213 } 214 if self.device: 215 create_filter_args['element_identifier'] = '"%s"' % self.device 216 if self.component: 217 create_filter_args['element_sub_identifier'] = '"%s"' % self.component 218 if self.agent: 219 create_filter_args['agent'] = self.agent 220 if self.monitor is not None: 221 create_filter_args['monitor'] = self.monitor 222 223 # add filters on details 224 filter_details = {} 225 if self.DevicePriority is not None: 226 filter_details['zenoss.device.priority'] = "%d:" % self.DevicePriority 227 if self.prodState: 228 filter_details['zenoss.device.production_state'] = "%d:" % self.prodState 229 if filter_details: 230 create_filter_args['details'] = filter_details 231 232 # add filters on tagged values 233 tag_uuids = [] 234 if self.DeviceClass: 235 tag_uuids.append(_lookupUuid(dmd.Devices, DeviceClass, self.DeviceClass)) 236 if self.Location: 237 tag_uuids.append(_lookupUuid(dmd.Locations, Location, self.Location)) 238 if self.System is not None: 239 tag_uuids.append(_lookupUuid(dmd.Systems, System, self.System)) 240 if self.DeviceGroup is not None: 241 tag_uuids.append(_lookupUuid(dmd.Groups, DeviceGroup, self.DeviceGroup)) 242 tag_uuids = filter(None, tag_uuids) 243 if tag_uuids: 244 create_filter_args['tags'] = tag_uuids 245 246 # query zep for matching event summaries 247 # 1. get all open events that: 248 # - first_seen < endDate 249 # (only need to check active events) 250 # 2. get all closed events that: 251 # - first_seen < endDate 252 # - status_change > startDate 253 # (must get both active and archived events) 254 255 # 1. get open events 256 create_filter_args['first_seen'] = (0,endDate) 257 create_filter_args['status'] = OPEN_EVENT_STATUSES 258 event_filter = zep.createEventFilter(**create_filter_args) 259 open_events = zep.getEventSummariesGenerator(event_filter) 260 261 # 2. get closed events 262 create_filter_args['status_change'] = (startDate+1,) 263 create_filter_args['status'] = CLOSED_EVENT_STATUSES 264 event_filter = zep.createEventFilter(**create_filter_args) 265 closed_events = zep.getEventSummariesGenerator(event_filter) 266 # must also get events from archive 267 closed_events_from_archive = zep.getEventSummariesGenerator(event_filter, archive=True) 268 269 # walk events, tallying up downtime 270 accumulator = defaultdict(int) 271 for evtsumm in chain(open_events, closed_events, closed_events_from_archive): 272 273 first = evtsumm['first_seen_time'] 274 # if event is still open, downtime persists til end of report window 275 if evtsumm['status'] not in CLOSED_EVENT_STATUSES: 276 last = endDate 277 else: 278 last = evtsumm['status_change_time'] 279 280 # discard any events that have no elapsed time 281 if first == last: 282 continue 283 284 # clip first and last within report time window 285 first = max(first, startDate) 286 last = min(last, endDate) 287 288 evt = evtsumm['occurrence'][0] 289 evt_actor = evt['actor'] 290 device = evt_actor.get('element_identifier') 291 component = evt_actor.get('element_sub_identifier') 292 293 # Only treat component specially if a component filter was specified. 294 if self.component: 295 accumKey = (device, component) 296 else: 297 accumKey = (device, '') 298 299 accumulator[accumKey] += (last-first) 300 301 if self.device: 302 deviceList = [] 303 device = dmd.Devices.findDevice(self.device) 304 if device: 305 deviceList = [device] 306 accumulator[(self.device, self.component)] += 0 307 else: 308 deviceList = [] 309 if (not self.DeviceClass and not self.Location and 310 not self.System and not self.DeviceGroup): 311 deviceList = dmd.Devices.getSubDevices() 312 else: 313 allDevices = dict((dev.id,dev) for dev in dmd.Devices.getSubDevices()) 314 allDeviceIds = set(allDevices.keys()) 315 316 def getOrgSubDevices(cat, orgId, allIds=allDeviceIds): 317 if orgId: 318 try: 319 org = cat.getOrganizer(orgId) 320 except KeyError: 321 pass 322 else: 323 return set(d.id for d in org.getSubDevices()) 324 return allIds
325 deviceClassDevices = getOrgSubDevices(dmd.Devices, self.DeviceClass) 326 locationDevices = getOrgSubDevices(dmd.Locations, self.Location) 327 systemDevices = getOrgSubDevices(dmd.Systems, self.System) 328 deviceGroupDevices = getOrgSubDevices(dmd.Groups, self.DeviceGroup) 329 330 # Intersect all of the organizers. 331 deviceList.extend(allDevices[deviceId] 332 for deviceId in (deviceClassDevices & locationDevices & 333 systemDevices & deviceGroupDevices)) 334 335 if not self.component: 336 for dev in dmd.Devices.getSubDevices(): 337 accumulator[(dev.id, '')] += 0 338 339 # walk accumulator, generate report results 340 deviceLookup = dict((dev.id, dev) for dev in deviceList) 341 result = [] 342 lastdevid = None 343 sysname = '' 344 for (devid, compid), downtime in sorted(accumulator.items()): 345 if devid != lastdevid: 346 dev = deviceLookup.get(devid, None) 347 if dev: 348 sysname = dev.getSystemNamesString() 349 else: 350 sysname = '' 351 lastdevid = devid 352 result.append(Availability(devid, compid, downtime, total_report_window, sysname)) 353 354 # add in the devices that have the component, but no events - assume this means no downtime 355 if self.component: 356 downtime = 0 357 for dev in deviceList: 358 sysname = dev.getSystemNamesString() 359 for comp in dev.getMonitoredComponents(): 360 if self.component in comp.name(): 361 result.append(Availability(dev.id, comp.name(), downtime, total_report_window, sysname)) 362 return result
363
364 365 -def query(dmd, *args, **kwargs):
366 r = Report(*args, **kwargs) 367 try: 368 return _cache[r.tuple()] 369 except KeyError: 370 result = r.run(dmd) 371 _cache[r.tuple()] = result 372 return result
373 374 375 if __name__ == '__main__': 376 import pprint 377 r = Report(time.time() - 60*60*24*30) 378 start = time.time() - 60*60*24*30 379 # r.component = 'snmp' 380 r.component = None 381 r.eventClass = Status_Snmp 382 r.severity = 3 383 from Products.ZenUtils.ZCmdBase import ZCmdBase 384 z = ZCmdBase() 385 pprint.pprint(r.run(z.dmd)) 386 a = query(z.dmd, start, device='gate.zenoss.loc', eventClass=Status_Ping) 387 assert 0 <= float(a[0]) <= 1. 388 b = query(z.dmd, start, device='gate.zenoss.loc', eventClass=Status_Ping) 389 assert a == b 390 assert id(a) == id(b) 391 pprint.pprint(r.run(z.dmd)) 392 r.component = 'httpd' 393 r.eventClass = Status_OSProcess 394 r.severity = 4 395 pprint.pprint(r.run(z.dmd)) 396 r.device = 'gate.zenoss.loc' 397 r.component = '' 398 r.eventClass = Status_Ping 399 r.severity = 4 400 pprint.pprint(r.run(z.dmd)) 401