#!/usr/bin/env python

'''
Messages collector checker (via POP3)

Script will login to a specified server/box and check messages. If there'll
 be any errors, email is sent to address, defined in optz, and any address,
 defined in 'report_to' field of keepalive. If email cannot be sent, errors
 will be logged.
Checks only messages with specified signature included in subject field.
Each such message should contain a JSON-encoded data (on any line,
 but only closest to the message end is used):
 {
  report_to: admin@example.host,
  probe: probe.example.host,
  interval: 3600,
  server: example.host,
  time: 1235036082
 }
This data is used to start keepalive sequence with this probe-server pair.
If another message from this pair isn't received in interval * wait_limit,
 error message is sent, and keepalive is deactivated. If interval wasn't
 specified, no error will be send on that pair.
If multiple servers are specified, they'll be treated as a single box.

Older than cleanup time entries will be removed from box and db.

Should be started with some time interval, preferrably lesser than
 complimentary smtp scripts (keepalive interval).
Can share single db with imap-based checker.

Sections of this script may look ugly, because some things are
 handled by custom wrapper libs in the original, but there's no
 point to drag them along just for one script.

Mike Kazantsev <mk.fraggod@gmail.com>
'''

optz = dict(
	srv_list = dict(
		core_mailer='example.host'
	),
	# Human-friendly name of this probing server
	server = 'probe.example.host',
	# Address, checked for delivered msgs
	collector = 'box@example.host',
	password = 'somepass',
	# Probing server subject signature
	probe = 'Mail_KeepAlive',
	# Error reporting email, address only
	report_to = 'admin@example.host',
	# DB to store persistent info about checks
	db = '/var/spool/mail_test_db',
	# How many intervals message can travel to box, count
	wait_limit = 2,
	# Cleanup deadline, seconds
	cleanup = 1*24*3600,
	# File to use if mail fails
	log = '/var/log/mail_test.log'
)

#######

from email.MIMEMultipart import MIMEMultipart
from email.MIMEText import MIMEText
from email.Utils import formatdate
import poplib, smtplib

try: import json # py2.6+
except ImportError: import simplejson as json

import os, sys, re
from string import whitespace as spaces
from cStringIO import StringIO as sio
from time import time, strftime, localtime

import logging
log = logging.getLogger('core')
handler = logging.StreamHandler(open(optz['log'], 'a+'))
handler.setFormatter(logging.Formatter(
	'%(asctime)s %(levelname)s %(module)s.%(funcName)s: %(message)s',
	'(%d.%m.%y %H:%M:%S)'
))
log.addHandler(handler)
logging.basicConfig(level=logging.INFO)

import shelve, fcntl
try: fcntl.flock(optz['db'], fcntl.LOCK_EX)
except OSError: pass
db = shelve.open(optz['db'], writeback=True) # DB: server -> probe -> data

htime = lambda x: strftime('%d.%m.%y %H:%M:%S', localtime(x))
addr_from = 'mail_test@%s'%optz['server']
passed = set()


for srv,link in optz['srv_list'].iteritems():

	## Base protocol (POP) operation check
	try:
		dump = sys.stdout = sio()
		link = poplib.POP3(link)
		link.set_debuglevel(2)
		link.user(optz['collector'])
		link.pass_(optz['password'])
		qty = len(link.list()[1])
	except Exception, err:
		link.quit()
		sys.stdout = sys.__stdout__
		dump.seek(0)
		debug = dump.read()
		try:
			msg = MIMEMultipart()
			msg['From'] = addr_from
			msg['To'] = 'Postmaster <%s>'%optz['report_to']
			msg['Date'] = formatdate(localtime=True)
			msg['Subject'] = 'POP3 connection failure on %s'%srv
			msg.attach( MIMEText('Error: %s\nProtocol exchange:\n%s'%(err, debug)) )
			dump = smtplib.stderr = sio()
			smtp = smtplib.SMTP('localhost')
			smtp.set_debuglevel(1)
			smtp.sendmail(addr_from, optz['report_to'], msg.as_string())
			smtp.quit()
		except Exception, err_fatal:
			dump.seek(0)
			debug_fatal = dump.read()
			log.error('========== POP3 connection failure on %s: %s'%(srv, err))
			log.error('Protocol exchange:\n%s'%debug)
			log.fatal('===== Local SMTP failure (%s): %s'%(optz['server'], err_fatal))
			log.fatal('Protocol exchange:\n%s\n\n'%debug_fatal)
	link.set_debuglevel(0)
	sys.stdout = sys.__stdout__


	## Check messges against db
	probe_filter = re.compile('^Subject:.*%s.*$'%re.escape(optz['probe']))
	for id in xrange(qty):
		try: response, lines, octets = link.retr(id+1)
		except: continue # imap can kill the message by now
		for line in lines:
			if probe_filter.match(line): break
		else: continue
		while lines:
			try:
				line = lines.pop().strip(spaces)
				if line:
					data = json.loads(line)
					break
			except ValueError: pass
		else: # no json data found
			log.warn('===== Unrecognized probe format:\n%s'%data[0][1])
			continue
		srv, probe = data.pop('server'), data.pop('probe')
		if srv+probe not in passed:
			try: stored = db[srv][probe]
			except KeyError:
				passed.add(srv+probe)
				data['active'] = True
				try: db[srv][probe] = data
				except KeyError: db[srv] = {probe: data}
			else:
				try: act = stored['active']
				except KeyError: act = False
				if data['time'] > stored['time']:
					if not act: log.info('Resuming keepalive with sever %s, last data: %s'%(srv, stored['time']))
					passed.add(srv+probe)
					data['active'] = True
					db[srv][probe] = data

				# Cleanup
				elif optz['cleanup'] and data['time'] < ( time() - optz['cleanup'] ):
					try: link.dele(id) # only marks for deletion, which actually takes place on disconnect
					except: pass # there are weird protocol errz sometimes

	link.quit()


	## Check db entries, throw errors on outdated keepalives
	for srv,probes in db.items():
		if srv.startswith('_'): continue # unrelated data
		for probe,data in probes.items():
			if (
				srv+probe not in passed and
				data['active'] and
				data['interval'] and
				( data['time'] < (time() - data['interval'] * optz['wait_limit']) )
			):
				passed.add(srv+probe)
				try:
					msg = MIMEMultipart()
					msg['From'] = addr_from
					msg['Date'] = formatdate(localtime=True)
					msg['Subject'] = 'Mail delivery failure on %s'%srv
					msg.attach( MIMEText(
						'\n'.join(
							'Got last keepalive: %s'%htime(data['time']),
							'Expected keepalive: %s'%htime(data['time'] + data['interval']),
							'Time: %s'%htime(time()),
							'Keepalive tracking deactivated: %s -> %s -> %s'%( probe, srv, optz['collector'] )
						)
					) )
					dump = smtplib.stderr = sio()
					link = smtplib.SMTP('localhost')
					link.set_debuglevel(1)
					for addr_to in set([optz['report_to'], data['report_to']]):
						msg['To'] = 'Postmaster <%s>'%addr_to
						link.sendmail(addr_from, addr_to, msg.as_string())
					link.quit()
				except Exception, err_fatal:
					dump.seek(0)
					debug_fatal = dump.read()
					log.fatal('===== Local SMTP failure (%s): %s'%(optz['server'], err_fatal))
					log.fatal('Protocol exchange:\n%s\n\n'%debug_fatal)
				data['active'] = False
				db[srv][probe] = data

			# Cleanup
			elif optz['cleanup'] and data['time'] < ( time() - optz['cleanup'] ):
				del db[srv][probe]
				if not db[srv]: del db[srv]

			# Warn about dead keepalives
			if not data['active']:
				log.error(
					'Dead keepalive (%s): %s -> %s -> %s'
					%( htime(data['time']), probe, srv, optz.collector )
				)

