# -*- coding: utf-8 -*-

'''Decoder module for xChat.

Decodes non-ascii ISO8859-1 chars encoded in UTF8 sent by others into cp1251/koi8-r:
	No more accented crap instead of damn cyrillic! ;)

Feel free to tweak it any way you like.'''


__module_name__ = 'xcp_recoder'
__module_version__ = '0.5'
__module_description__ = ( 'Decodes non-ascii ISO8859-1'
	' chars encoded in UTF8 sent by others into russian (cp1251/koi8-r).'
	' Also tries to fix "garbled" messages, most likely caused by'
	' incorrect split plus xchat mangling.' )
__module_author__ = ( 'Gustavo Niemeyer <gustavo@niemeyer.net>,'
	' Mike Kazantsev <mk.fraggod@gmail.com>' )


import itertools as it, operator as op, functools as ft
import xchat


dec = op.methodcaller('decode', 'latin1')
garbled_pairs = ( u'\xd1\u20ac', 'р', u'\xd1\u201a', 'т',
	u'\xd1\u2039', 'ы', u'\xd1\u017d', 'ю', u'\xd1\u0152', 'ь',
	u'\xd1\u2030', 'щ', u'\xd1\u2021', 'ч', u'\xd1\u2026', 'х',
	u'\xd1\u201e', 'ф', u'\xd1\u2020', 'ц', u'\xd0\u203a', 'Л',
	u'\xd1\u02c6', 'ш', u'\xd0\u02dc', 'И', u'\xd0\u2022', 'Е',
	u'\xd1\u0192', 'у' )

def garbled_recode(src):
	for i in xrange(0, len(garbled_pairs), 2):
		c,r = garbled_pairs[i:i+2]
		src = src.replace(c, dec(r))

	res = buffer('')
	for c in it.imap(ord, src):
		if c > 0xff:
			res = res[:-1]
			c = '_'
		else: c = chr(c)
		res += c
	res = res[:-1]

	return res


def recode(words, word_eol, userdata):
	# Mandatory check to avoid recursion on xchat.emit_print
	if '//rec' in words[0]: return xchat.EAT_NONE

	event, pos = userdata
	if type(pos) is int: pos = (pos,)

	# Iteration 1: count lowercase chars in koi8r/cp1251 to determine encoding
	enc = dict(it.izip(('koi8-r', 'cp1251', 'utf-8', 'garbled'), it.repeat(0)))
	for i in pos:
		try: word = words[i] = words[i].decode('utf-8')
		except (UnicodeError, IndexError): continue
		for c in it.imap(ord, word):
			if c in (0xd0, 0xd1): enc['garbled'] += 1
			elif c >= 0xc0 and c <= 0xdf: enc['koi8-r'] += 1
			elif c >= 0xe0 and c <= 0xff: enc['cp1251'] += 1
			elif c > 0xff: enc['utf-8'] += 1
	enc, rate = max(enc.iteritems(), key=op.itemgetter(1))
	if not rate or enc == 'utf-8': return xchat.EAT_NONE # no recoding is necessary

	# Iteration 2: actually recode words
	if enc != 'garbled':
		for i in pos:
			try: words[i] = words[i].encode('iso8859-1').decode(enc).encode('utf-8')
			except (UnicodeError, IndexError): pass
	else:
		for i in pos:
			enc = garbled_recode(words[i])
			if '_' in enc: # some symbols could've been replaced by underscore
				from time import strftime
				open('/tmp/xcp_recoder_debug.log', 'a').write(
					'{0} Garbled message:\n\tbefore: {1!r}\n\tafter: {2!r}\n'\
						.format(strftime('%Y-%m-%d %H:%M'), words[i], enc) )
			words[i] = enc

	# Emit event back to xchat
	words[0] = '//rec %s: '%enc + words[0] # mandatory to avoid recursion
	xchat.emit_print(event, *words)
	return xchat.EAT_XCHAT # suppress original event


recode_events = [
	('Channel Action', 1),
	('Channel Action Hilight', 1),
	('Channel Message', 1),
	('Channel Msg Hilight', 1),
	('Channel Notice', 2),
	('Generic Message', (0, 1)),
	('Kick', 3),
	('Killed', 1),
	('Motd', 0),
	('Notice', 1),
	('Part with Reason', 3),
	('Private Message', 1),
	('Private Message to Dialog', 1),
	('Quit', 1),
	('Receive Wallops', 1),
	('Server Notice', 0),
	('Server Text', 0),
	('Topic', 1),
	('Topic Change', 1) ]

for event in recode_events: xchat.hook_print(event[0], recode, event)
print 'xcp_recoder initiated'