#!/usr/bin/python
from __future__ import unicode_literals, print_function

import argparse
parser = argparse.ArgumentParser(description='Video stream harvesting tool.')
parser.add_argument('urls', action='store', type=str, nargs='+', help='Stream URLs to handle.')
parser.add_argument('-s', '--min-size', action='store', type=int,
	default=5, help='Minimal dump size to keep it (default: %(default)s).')
parser.add_argument('-t', '--min-time', action='store', type=int,
	default=60, help='Minimal dump timespan to keep it (default: %(default)s).')
parser.add_argument('--debug', action='store_true', help='Verbose operation mode.')
argz = parser.parse_args()
argz.min_size *= 1024**2


import logging
logging.basicConfig(level=logging.DEBUG if argz.debug else logging.INFO)
log = logging.getLogger()


import itertools as it, operator as op, functools as ft
import os, sys, signal

from time import time, strftime, localtime
from collections import defaultdict
from urlparse import urlparse

from twisted.internet import reactor, defer
from twisted.python import failure
from twisted.internet import protocol


class StreamFeedback(protocol.ProcessProtocol):

	def __init__(self, status=None, pipe_cb=None):
		self._buffer = defaultdict(unicode)
		self.status, self.pipe = status, pipe_cb

	def connectionMade(self):
		self.transport.closeStdin()
	def processEnded(self, stats):
		if self.status: self.status.callback(time())

	def _dataReceived(self, data, buff):
		if not self.pipe: return # don't care about the process output
		data = unicode(data, encoding='utf-8')
		if '\n' in data:
			lines = map(op.methodcaller('strip', '\r'), data.split('\n'))
			data = lines.pop()
			lines[0] = self._buffer[buff] + lines[0]
			self.pipe(lines)
		if data: self._buffer[buff] += data
	outReceived = lambda s,data: s._dataReceived(data, buff=0)
	errReceived = lambda s,data: s._dataReceived(data, buff=1)


class StreamPool(object):
	_sig_handlers = signal.SIGINT, signal.SIGTERM
	_src_tpl = '%Y%m%d_%H:%M_stream_{}_live.dump'
	_dst_tpl = '%Y%m%d_%H:%M_stream_{}.dump'

	def __init__(self, urls):
		self.processes, self.spawn_lock = dict(), False
		url_ids = list()
		for url in urls:
			url_id = urlparse(url).path.strip('/').rsplit('/', 1)[-1] # try to use basename part of the path
			if len(url_id) < 10 and url_id.isalnum(): url_ids.append((url_id, url))
			else:
				url_ids = enumerate(urls)
				break
		for url_id,url in url_ids:
			log.debug('Starting harvesting url (id={}): {}'.format(url_id, url))
			self.spawn(url, url_id=url_id)
		for sig in self._sig_handlers: signal.signal(sig, self.close)

	def output(self, lines, stream_id='core'):
		for line in lines: log.debug('{}: {}'.format(stream_id, line))

	def rotate(self, src, dst, ts_start, ts_end):
		try:
			if os.stat(src).st_size < argz.min_size:
				log.debug('Recording size is too small (< {})'.format(argz.min_size))
				raise IOError
			if ts_end - ts_start < argz.min_time:
				log.debug('Recording timeframe is too small (< {})'.format(argz.min_time))
				raise IOError
		except (OSError, IOError):
			if os.path.isfile(src):
				log.debug('Dropping dump: {}'.format(src))
				os.remove(src)
		else:
			os.rename(src, dst)
			log.debug('Dump successfully recorded: {}'.format(dst))

	def spawn(self, url, url_id=None, result=None):
		ts_start = time()
		src, dst = (strftime( tpl.format(url_id),
			localtime(ts_start) ) for tpl in (self._src_tpl, self._dst_tpl))
		if self.spawn_lock:
			del self.processes[url]
			if not self.processes: reactor.stop()
		else:
			status = defer.Deferred()\
				.addCallback(ft.partial(self.rotate, src, dst, ts_start))\
				.addCallback(ft.partial(self.spawn, url, url_id))
			self.processes[url] = reactor.spawnProcess(
				StreamFeedback( status=status,
					pipe_cb=ft.partial(self.output, stream_id='stream_{0}'.format(url_id)) ),
				'mplayer', [ 'mplayer', '-dumpstream', '-nocache',
					'-prefer-ipv4', '-dumpfile', src, url ] )

	def close(self, sig, frm):
		self.spawn_lock = True
		for proc in self.processes.viewvalues(): proc.signalProcess(signal.SIGINT)
		for sig in self._sig_handlers: signal.signal(sig, signal.SIG_DFL)


pool = StreamPool(argz.urls)
reactor.run()
