#!/usr/bin/env ppy
# -*- coding: utf-8 -*-

## Dynamic configuration
from optparse import OptionParser
parser = OptionParser(usage='%prog [options] HOST[:PORT]',
	description='Grab given host rootfs as a backup')
parser.add_option('--debug',
	action='store_true', dest='debug',
	help='print lots of debug info')
parser.add_option('-n', '--nofork',
	action='store_false', default=True, dest='fork',
	help="don't fork out after connection")
parser.add_option('-d', '--delay',
	action='store', dest='delay',
	type='int', metavar='SECS',
	help='wait before starting rsync')
parser.add_option('-c',
	action='store_true', dest='shell',
	help="read ssh private key from stdin, don't fork")
optz,argz = parser.parse_args()
if len(argz) != 1: parser.error('Need exactly one argument')


# Activate posix caps, if necessary
import os, sys
bak_user = os.getuid()
if bak_user:
	from fgc.caps import Caps
	import pwd
	os.putenv('HOME', pwd.getpwuid(bak_user).pw_dir)
	Caps.from_process().activate().apply()


## Configuration
import itertools as it, operator as op, functools as ft
from string import whitespace as spaces
from fgc import dta, log, sh
from time import strftime, time
import re

bak_src = argz[0]
try: bak_src, bak_src_ext = bak_src.split(':')
except: bak_src_ext = tuple()
else: bak_src_ext = '-p', bak_src_ext

bak_root = '/mnt/bakfs'
bak_sub = ft.partial(sh.join, bak_root)
bak_host = bak_src.split('.', 1)[0]

if optz.shell:
	bak_key = bak_sub('.key_{0}'.format(bak_host))
	password, reply = it.imap(
		op.methodcaller('strip', spaces), sys.stdin.read().split('\n\n\n', 1) )
	open(bak_key, 'w').write(password)
	sh.chmod(bak_key, 0400)
	bak_src_ext = bak_src_ext\
		+ ('-i', os.path.realpath(bak_key)) # realpath _only_!
else: password_lock = None

password = None

G = 1024*1024*1024
bak_path = bak_sub('{0}.{1}.{2:.0f}'.format(bak_host, strftime('%Y-%m-%d'), time()))
bak_lock_global = bak_sub('.locks', bak_host)
bak_lock_local = sh.join(bak_path, '.lock')
bak_lock_wait = 300
bak_exclude_client = '/etc/bak_exclude'
bak_exclude_server = bak_sub('.exclude_{0}'.format(bak_host))
bak_user = 'root'
min_keep = 2 # min number of older bakz
min_free_avg = ft.partial(op.mul, 3) # free > min_free_avg( occupied / bak_count ), can be none/false/0
min_free_abs = 8 # free > min_free_abs GB, can be 0
sync_optz = [ '-HaAXz',
	('--skip-compress='
		r'gz/bz2/t\[gb\]z/tbz2/lzma/7z/zip/rar/'
		r'rpm/deb/iso'
		r'jpg/gif/png/mov/avi/ogg/mp\[34g\]/flv/pdf'),
	'--super',
	'--exclude-from={0}'.format(bak_exclude_server),
	'--rsync-path=ionice -c3 rsync',
	'--rsh=ssh {0}'.format(' '.join(bak_src_ext)) ]
sync_filter = map(re.compile, ( # filter for rsync output (lines)
	r"^adding `.*'$",
	r"^Password:\s*$" ))
cmd = dict(ssh='ssh', rsync='rsync')


## Init
from collections import deque
from getpass import getpass
import pexpect

log.cfg(level=(log.WARNING if not optz.debug else log.DEBUG))
os.chdir(bak_root)
src = '{0}@{1}'.format(bak_user, bak_src)

def ssh_pass(): # caches password for rsync
	global password
	if not password_lock: password = getpass('SSH password: ')
	return password
ssh_replies = [
	( pexpect.EOF, False ),
	( 'Are you sure you want to continue connecting',
		ft.partial(log.fatal, 'Host key cannot be verified', crash=2) ),
	( 'connect to host', ft.partial(log.fatal, 'SSH connection failed', crash=2) ),
	( 'Could not resolve', ft.partial(log.fatal, 'Host resolution failed', crash=2) ),
	( 'Password:', (lambda: ssh.sendline(ssh_pass()))
		if not optz.shell else ft.partial(log.fatal, 'Pubkey auth failure', crash=2) ) ]
ssh_query = map(op.itemgetter(0), ssh_replies)


## Check connection / receive exclusion list
if not optz.shell: # reply is preset otherwise
	ssh = pexpect.spawn(cmd['ssh'], list(dta.chain(
		# '-oUserKnownHostsFile={0}'.format(
		# 	sh.join(os.getenv('HOME'), '.ssh/known_hosts') ),
		bak_src_ext, src, '< {0}'.format(bak_exclude_client) )))
	while True:
		reply = ssh.expect(ssh_query)
		if not reply: # EOF
			if not ssh.exitstatus:
				reply = ssh.before.strip(spaces).splitlines()
				break
			else: log.fatal('Remote error {0}:{1}'.format(ssh.exitstatus, ssh.before), crash=2)
		else:
			try: ssh_replies[reply][1]()
			except KeyboardInterrupt: log.fatal('User break, aborting', crash=2)
	del ssh

	reply = '\n'.join(reply)
	if 'Connection closed' in reply:
		log.fatal('SSH returned weird thing: "{0}"'.format(reply), crash=2) # beats me why

password_lock = True
open(bak_exclude_server, 'w').write(reply)


## Lock
lock_global = sh.flock(bak_lock_global, make=True).acquire(bak_lock_wait)


## Fork away!
if not optz.shell and optz.fork:
	import signal
	signal.signal(signal.SIGHUP, signal.SIG_IGN) # so forking or backup won't be interrupted
	lock_global.gc_unlock = False
	if os.fork(): sys.exit()
	lock_global.gc_unlock = True


## Available backups
bakz_re = re.compile(r'^([^.].*)\.\d+-\d+-\d+.\d+$')
bakz = list(bak for bak in os.listdir(bak_root) if bakz_re.match(bak))
bakz_host = sorted((bak for bak in bakz if bak.startswith(bak_host)), reverse=True)


## Free disk space check / cleanup
ds, df = sh.df(bak_root)
min_free = ( max(min_free_avg( (ds-df) / len(bakz)), min_free_abs*G)
	if min_free_avg and bakz else min_free_abs*G )

def bakz_rmq():
	'''Iterator that returns bakz in order of removal'''
	bakz_queue = list( list(bakz) for host,bakz in it.groupby(sorted(bakz),
		key=lambda bak: bakz_re.match(bak).group(1)) )
	while bakz_queue:
		bakz_queue.sort(key=len)
		bakz_queue[-1].sort(reverse=True)
		if len(bakz_queue[-1]) <= min_keep: break
		yield bakz_queue[-1].pop()

if df < min_free:
	for bak in bakz_rmq():
		log.info('Removing backup: {0}'.format(bak))
		sh.rr(bak, onerror=False)
		ds, df = sh.df(bak_root)
		if df >= min_free: break
	else:
		log.fatal( 'Not enough space on media:'
				' {0:.1f}G, need {1:.1f}G, {2} backups min)'\
			.format( op.truediv(df, G),
				op.truediv(min_free, G), min_keep ), crash=2 )


## Wait, if requested
if optz.delay:
	from time import sleep
	sleep(optz.delay)

## Last preps / locks
sh.mkdir(bak_path)
lock_local = sh.flock(bak_lock_local, make=True).acquire(bak_lock_wait)

## Change process name
from fgc import psctl
psctl.name_set(os.path.basename(sys.argv[0]))

## Rsync
src = '{0}:/'.format(src)
sync_optz = list(dta.chain( sync_optz, '--link-dest={0}'\
		.format(os.path.realpath(bakz_host[0]), src, bak_path) ))\
	if bakz_host else list(dta.chain(sync_optz, src, bak_path))

# Filter noisy rsyncd crap
sync_filter = list(dta.chain(sync_filter))
class Filter(object):
	def write(self, *argz, **kwz):
		for line in argz[0].splitlines():
			if not line.strip(spaces): continue
			for re in sync_filter:
				if re.match(line): break
			else: sys.stdout.write(line+'\n')
	def flush(self, *argz, **kwz): sys.stdout.flush(*argz, **kwz)

ssh_query.append(pexpect.TIMEOUT) # supress timeout
ssh = pexpect.spawn(cmd['rsync'], sync_optz)
ssh.logfile_read = Filter()
while True:
	try: reply = ssh.expect(ssh_query)
	except KeyboardInterrupt: log.fatal('User break, aborting', crash=2)
	if not reply: # EOF
		if ssh.exitstatus:
			log.fatal( 'Rsync exited with error status {0}'\
				.format(ssh.exitstatus), crash=ssh.exitstatus )
		else: break
	else:
		try: ssh_replies[reply][1]()
		except IndexError: pass
del ssh


## Unlock / cleanup
if optz.shell: sh.rm(bak_key, onerror=False)
sh.rm(bak_lock_local, onerror=False)
lock_local.release()
lock_global.release()
log.info('Backup completed successfully')
