add downtime calculation on alerts
- add timestamp on alert embeds - make Alert.post receive the worker instead of service info
This commit is contained in:
parent
227087284b
commit
9f5d42c89c
3 changed files with 49 additions and 5 deletions
|
@ -114,7 +114,7 @@ class HttpAdapter(Adapter):
|
|||
|
||||
if not succ:
|
||||
status_phrase = cls.get_phrase(resp.status)
|
||||
err_str = f'HTTP Status - {resp.status} - {status_phrase}'
|
||||
err_str = f'http status {resp.status} - {status_phrase}'
|
||||
return cls._construct(succ, latency, err_str)
|
||||
|
||||
return cls._construct(succ, latency if succ else 0)
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
import datetime
|
||||
import logging
|
||||
|
||||
from aiohttp import ClientSession
|
||||
|
@ -12,20 +13,61 @@ class DiscordAlert:
|
|||
self.url = alert['url']
|
||||
self.session = ClientSession()
|
||||
|
||||
def _make_payload(self, service, status):
|
||||
def _make_payload(self, worker, status: dict):
|
||||
service = worker.service
|
||||
|
||||
serv_name = service['name']
|
||||
is_up = status['status']
|
||||
|
||||
err = status.get('error', 'No error provided')
|
||||
color = 0x00ff00 if is_up else 0xff0000
|
||||
|
||||
ts = status['timestamp'] / 1000
|
||||
dt_iso = datetime.datetime.utcfromtimestamp(ts).isoformat()
|
||||
embed = {
|
||||
'title': serv_name,
|
||||
'color': color
|
||||
'color': color,
|
||||
'timestamp': dt_iso
|
||||
}
|
||||
|
||||
if not is_up:
|
||||
embed['description'] = err
|
||||
embed['description'] = f'error: {err}'
|
||||
else:
|
||||
# calculate downtime here
|
||||
|
||||
# first we need to find the work that had a success
|
||||
# before the current one
|
||||
conn = worker.manager.conn
|
||||
cur = conn.cursor()
|
||||
|
||||
# find the last work that had a success
|
||||
cur.execute(f"""
|
||||
SELECT timestamp FROM {worker.name}
|
||||
WHERE timestamp < ? AND status = true
|
||||
ORDER BY timestamp DESC
|
||||
LIMIT 1
|
||||
""", (status['timestamp'], ))
|
||||
|
||||
row = cur.fetchone()
|
||||
ts_success = row[0]
|
||||
|
||||
# now we fetch all the downtime after that ts_success
|
||||
cur.execute(f"""
|
||||
SELECT COUNT(*) FROM {worker.name}
|
||||
WHERE timestamp > ? AND status = false
|
||||
""", (ts_success,))
|
||||
|
||||
row = cur.fetchone()
|
||||
count = row[0]
|
||||
|
||||
downtime_msec = count * worker.service['poll'] * 1000
|
||||
downtime_sec = downtime_msec / 1000
|
||||
downtime_min = round(downtime_sec / 60, 3)
|
||||
|
||||
embed['footer'] = {
|
||||
'text': f'down for {downtime_min} minutes '
|
||||
f'({downtime_sec} seconds)'
|
||||
}
|
||||
|
||||
return {
|
||||
'content': '',
|
||||
|
|
|
@ -50,6 +50,8 @@ class ServiceWorker:
|
|||
conn.commit()
|
||||
|
||||
await self._dispatch_work(columns, timestamp, result)
|
||||
|
||||
result['timestamp'] = timestamp
|
||||
await self._check_alert(result)
|
||||
|
||||
async def _dispatch_work(self, columns, timestamp: int, result: tuple):
|
||||
|
@ -96,7 +98,7 @@ class ServiceWorker:
|
|||
self.log.error(f'alert not found: {alert!r}')
|
||||
continue
|
||||
|
||||
await alert_obj.post(self.service, work)
|
||||
await alert_obj.post(self, work)
|
||||
|
||||
async def _work_loop(self):
|
||||
try:
|
||||
|
|
Loading…
Reference in a new issue