#!/usr/bin/env python

'''
Sentinel - script for processing logfiles storing the contents to a database,
	for easier access from anywhere else.

Uses SQLAlchemy as a high-level interface to any db backend

by Mike Kazantsev <mk_dot_fraggod_at_gmail_com>
'''

import logging as log
log.basicConfig(level=log.INFO)

import os, sys

from optparse import OptionParser
parser = OptionParser(usage='%prog [options] LOGFILE\n  or: %prog --create-tables')
parser.add_option('--db', action='store', type='str', dest='db', metavar='URL',
	default='postgres://log_parser:password@server/log_db',
	help='db connection string in form of dialect://user:password@host/dbname[?key=value..]'
)
parser.add_option('-c', '--cleanup', action='store', type='int', dest='cleanup', metavar='NUM',
	default=None, help='flush entries older than specified number of days, unset by default'
)
parser.add_option('--create-tables', action='store_true', dest='create_tables',
	help='do not process anything, just create initial tables in database'
)
(optz, argz) = parser.parse_args()
if len(argz) != 1 and not optz.create_tables:
	log.error('You need to specify one logfile as an argument')
	sys.exit(1)

from sqlalchemy import Table, Column, Integer, String, Text, ForeignKey
from sqlalchemy import create_engine, desc
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import mapper, sessionmaker, relation, backref
from sqlalchemy.orm.exc import NoResultFound

engine = create_engine(optz.db)
Session = sessionmaker(bind=engine, autocommit=True)
Base = declarative_base()

class Event(Base):
	__tablename__ = 'syslog'

	id = Column(Integer, primary_key=True)
	timestamp = Column(Integer, nullable=False)
	source = Column(String(32), nullable=True)
	channel = Column(String(16), nullable=False)
	type = Column(String(255), nullable=False)
	message = Column(String(2000), nullable=False)

if optz.create_tables:
	meta = Base.metadata
	meta.create_all(engine)
	sys.exit()

try: log = open(argz[0])
except IOError, err:
	log.error('Error opening given logfile: %s'%err)
	sys.exit(1)

db = Session()

import re
from time import time
from string import whitespace as spaces

# old db entries cleanup
if optz.cleanup != None:
	deadline = time() - 3600*24*optz.cleanup
	db.query(Event).filter(Event.timestamp.between(0, deadline)).delete()

try: last_commit = db.query(Event.timestamp).order_by(desc(Event.timestamp)).first()[0]
except TypeError: last_commit = 0

for line in log:
	try:
		line = re.match('(\d+)\s+\(.*\) (\w+)\.(\w+)\s+(([^\[\] ]+)\s*(\[.*\])?:)?(.*)', line.strip(spaces)).groups()
		if int(line[0]) > last_commit:
			db.add(Event(
				timestamp=line[0],
				channel=line[1],
				type=line[2],
				source=line[4],
				message=line[6]
			))
	except AttributeError: continue

db.flush()
