#!/usr/bin/env python
from __future__ import unicode_literals

# Documentation on formats used here:
#  http://fossil-scm.org/index.html/doc/trunk/www/fileformat.wiki
#   (fossil string-escaping alghoritm is also documented there)
#  http://www.kernel.org/pub/software/scm/git/docs/git-fast-import.html
#   (man git-fast-import)

import argparse
parser = argparse.ArgumentParser(
	description='Tool to keep fossil and git repositories in sync.\n'
		' Monitors fossil_root for changes in *.fossil files (which are treated as source'
		' fossil repositories) and pushes them to corresponding (according to basename)'
		' git repositories.\n'
		' Also has --oneshot mode to do a one-time sync between specified repos.')

parser.add_argument('fossil_root', nargs=1, help='Path to fossil repos.')
parser.add_argument('git_root', nargs=1, help='Path to git repos.')

parser.add_argument('-1', '--oneshot', action='store_true',
	help='Treat fossil_root and git_root as repository paths and try to sync them at once.')
parser.add_argument('-s', '--initial-sync', action='store_true',
	help='Do an initial sync for every *.fossil repository found in fossil_root at start.')
parser.add_argument('-c', '--create', action='store_true',
	help='Dynamically create missing git repositories (bare) inside git-root.')
parser.add_argument('-b', '--branch', default='trunk',
	help='Branch to sync (must exist on both sides, default: %(default)s).')
parser.add_argument('--dry-run', action='store_true',
	help='Dump git updates (fast-import format) to stdout, instead of feeding them to git. Cancels --create.')

parser.add_argument('-x', '--exclude', action='append', default=list(),
	help='Repository names to exclude from syncing'
		' (w/o .fossil or .git suffix, can be specified multiple times).')

parser.add_argument('-t', '--stat-interval', type=int, default=5*60,
	help='Interval between polling source repositories for changes,'
		" if there's no inotify/kevent support (default: %(default)ss).")

parser.add_argument('--debug', action='store_true', help='Verbose operation mode.')
argz = parser.parse_args()

argz.fossil_root, = argz.fossil_root
argz.git_root, = argz.git_root
argz.exclude = set(argz.exclude)

import logging
logging.basicConfig(level=logging.DEBUG if argz.debug else logging.INFO)
log = logging.getLogger()


import itertools as it, operator as op, functools as ft
from subprocess import Popen, PIPE
from time import strptime
from calendar import timegm
from contextlib import contextmanager
import os, sys


fossil_decode = lambda string:\
	string.replace(r'\s', ' ').replace(r'\n', '\n').replace(r'\\', '\\')

@contextmanager
def fossil_artifact(repo, aid):
	proc = Popen(['fossil', 'artifact', '-R', repo, aid], stdout=PIPE)
	try: yield proc.stdout
	finally:
		proc.stdout.read()
		proc.wait()

# Go through the first timeline in case of merge
fossil_merge_filter = lambda rev: rev.split(None, 1)[0]

def fossil_log_parse(repo, tree=argz.branch):
	tree = fossil_merge_filter(tree)
	while True:
		commit = dict(rev=tree, files=dict())
		with fossil_artifact(repo, tree) as artifact:
			for line in artifact:
				fid, content = line.strip().split(' ', 1)
				if fid == 'C': commit['comment'] = fossil_decode(content)
				elif fid == 'U': commit['user'] = fossil_decode(content)
				elif fid == 'F':
					path, aid = content.split(' ', 1)
					try: aid, perms = aid.split(' ', 1)
					except ValueError: pass
					else:
						try: perms, old_path = perms.split(' ', 1)
						except ValueError: pass
					commit['files'][fossil_decode(path)] = aid
				elif fid == 'P':
					tree = commit['parent'] = fossil_merge_filter(content)
				elif fid == 'D': # 2011-05-01T03:28:44.716
					commit['ts'] = timegm(strptime(
						content.rsplit('.', 1)[0], '%Y-%m-%dT%H:%M:%S' ))
		yield commit


def git_log_parse(repo, tree=argz.branch, bs=5):
	while True:
		proc = Popen(
			['git', 'log', '-{}'.format(bs), '--pretty=tformat:%H %ct', tree],
			env={'GIT_DIR': repo}, stdout=PIPE )
		for line in proc.stdout:
			tree_new, ts = line.split(' ', 1)
			yield dict(rev=tree, parent=tree, ts=int(ts))
			tree = tree_new
		if proc.wait(): raise RuntimeError('git process exited with error')


def writelines(dst, *lines):
	for line in lines:
		dst.write(line)
		if line[-1] != '\n': dst.write('\n')

def update(git, fossil, git_base, commit_list, ref='refs/heads/{}'.format(argz.branch)):
	git_import = Popen(['git', 'fast-import', '--quiet'], env={'GIT_DIR': git}, stdin=PIPE)\
		if not argz.dry_run else Popen(['cat'], stdin=PIPE)
	git_import_lines = ft.partial(writelines, git_import.stdin)
	marks, mark_iter = dict(), iter(xrange(1, 2**30))
	first_commit = True
	for commit in commit_list:
		for aid in commit['files'].viewvalues():
			if aid not in marks:
				with fossil_artifact(fossil, aid) as src: artifact = src.read()
				mark = marks[aid] = next(mark_iter)
				git_import_lines( 'blob',
					'mark :{}'.format(mark),
					'data {}'.format(len(artifact)) )
				git_import.stdin.write(artifact)
				git_import.stdin.write('\n')
		mark = marks[commit['rev']] = next(mark_iter)
		git_import_lines(
			'commit {}'.format(ref),
			'mark :{}'.format(mark),
			'committer <{}> {} +0000'.format(commit['user'], commit['ts']),
			'data {}'.format(len(commit['comment'])) )
		git_import.stdin.write(commit['comment'])
		git_import.stdin.write('\n')
		if not first_commit: git_import_lines('from :{}'.format(marks[commit['parent']]))
		else:
			git_import_lines('from {}'.format(git_base))
			first_commit = False
		git_import_lines('deleteall')
		for path,aid in commit['files'].viewitems():
			git_import_lines('M 100644 :{} {}'.format(marks[aid], path))
		git_import.stdin.write('\n')
	git_import.stdin.close()
	if git_import.wait(): raise RuntimeError('git-fast-import process exited with error')


def sync(git, fossil):
	log.debug('Syncing: {} - {}'.format(git, fossil))
	git_bare = '{}.git'.format(git)
	if not os.path.isdir(git): git = git_bare

	if argz.create and not argz.dry_run and not os.path.isdir(git):
		log.debug('Bootstrapping git repo: {}'.format(git))
		os.mkdir(git)
		Popen(['git', 'init', '--bare', '--quiet'], env={'GIT_DIR': git}).wait()
		fossil = Popen(['fossil', 'export', '--git', fossil], stdout=PIPE)
		git = Popen(['git', 'fast-import', '--quiet'], env={'GIT_DIR': git}, stdin=fossil.stdout)
		fossil.wait(), git.wait()
		return # already bootstrapped from fossil-export

	git_log, fossil_log = git_log_parse(git), fossil_log_parse(fossil)
	git_head, fossil_head = next(git_log), next(fossil_log)

	if fossil_head['ts'] < git_head['ts']:
		raise NotImplementedError( 'Git HEAD is newer'
			' than fossil - git:{}, fossil:{}, diff:{}s'.format(
				fossil_head['ts'], git_head['ts'], git_head['ts'] - fossil_head['ts'] ) )

	fossil_updates = list()
	while fossil_head['ts'] != git_head['ts']:
		fossil_updates.append(fossil_head)
		fossil_head = next(fossil_log)
		if 'ts' not in fossil_head:
			print(fossil_head)

	if fossil_updates:
		log.debug('fossil->git: {} update(s)'.format(len(fossil_updates)))
		update(git, fossil, git_head['rev'], reversed(fossil_updates))


if argz.oneshot:
	sync(argz.git_root, argz.fossil_root)

else:
	import pyev, types, signal
	from glob import iglob

	watchers = dict()
	loop = pyev.default_loop()

	_scm_basename = lambda suff, name:\
		os.path.basename(name).rsplit(suff, 1)[0]
	fossil_name = ft.partial(_scm_basename, '.fossil')
	git_name = ft.partial(_scm_basename, '.git')

	def sync_repo(watcher, revent=0):
		fossil = watcher.path\
			if not isinstance(watcher, types.StringTypes) else watcher
		repo = fossil_name(fossil)
		if repo in argz.exclude:
			log.debug('Skipped sync for excluded repo: {}'.format(repo))
			return
		sync(os.path.join(argz.git_root, repo), fossil)

	def init_watchers(watcher=None, revent=0, initial=False):
		repo_list = set(iglob(os.path.join(argz.fossil_root, '*.fossil')))
		for k in set(watchers).difference(repo_list): watchers.pop(k).stop()
		for repo in repo_list.difference(watchers):
			if initial:
				log.debug('Initial sync for repo: {}'.format(repo))
				sync_repo(repo)
			watchers[repo] = loop.stat(repo, argz.stat_interval, sync_repo)
			watchers[repo].start()

	init_watchers(initial=argz.initial_sync)
	loop.stat(argz.fossil_root, argz.stat_interval, init_watchers).start()

	loop_break = lambda watcher,revent: loop.stop(pyev.EVBREAK_ALL)
	for sig in signal.SIGINT, signal.SIGTERM, signal.SIGHUP:
		loop.signal(sig, loop_break).start()

	log.debug('Starting event monitoring loop')
	loop.start()
