# coding: UTF-8
#jtalk/translate.py
#A part of NonVisual Desktop Access (NVDA)
#Copyright (C) 2010-2012 Masataka.Shinke, Takuya Nishimoto
#This file is covered by the GNU General Public License.
#See the file COPYING for more details.

from mecab import *
import tenji

_logwrite = None

try:
	from logHandler import log
	_logwrite = log.info
except:
	def __print(s): print s
	_logwrite = __print

def split(feature, size, CODE_='utf-8'):
	if feature is None or size is None: 
		return
	s2 = ''
	for i in xrange(0, size):
		s = string_at(feature[i]).decode(CODE_, 'ignore')
		if s:
			ar = s.split(",")
			try:
				rr = ar[1]
				ss = ar[2]
				if ss == u'空白':
					s2 += ' '
				elif ss == u'数':
					aa = ar[0]
					bb = unicodedata.normalize('NFKC', aa)
					if aa == bb:
						s2 += ar[9]
						s2 += ' '
					else:
						s2 += ar[0]
				else:
					aa = ar[0]
					bb = unicodedata.normalize('NFKC', aa)
					if aa == bb:
						s2 += ar[9]
						s2 += ' ' 
					else:
						s2 += ar[0]
			except:
				s2 += ar[0]
				s2 += ' ' 
	s = re.sub("  ", " ", unicodedata.normalize('NFKC', unicode(s2.strip())))
	s = re.sub(u" 。", u"。", s)
	s = re.sub(u" 、", u"、", s)
	return s

def initialize():
	global _logwrite
	Mecab_initialize(_logwrite)
	if _logwrite: _logwrite("initialize() done.")

def terminate():
	global _logwrite
	if _logwrite: _logwrite("terminate() done.")

def separate(msg1):
	msg=unicodedata.normalize('NFKC',msg1)
	Mecab_initialize(_logwrite)
	s = Mecab_text2mecab(msg, CODE_='utf-8')
	mf = MecabFeatures()
	Mecab_analysis(s, mf)
	Mecab_correctFeatures(mf, CODE_='utf-8')
	#Mecab_print(feature, size,__print)
	s = split(mf.feature, mf.size, CODE_='utf-8')
	mf = None
	return s

# returns u'\u2801\u2802\u2803\u2804\u2805\u2806\u2807'
def japaneseToUnicodeBraille(text):
	text = tenji.tenji(separate(text)).replace(u'□', '')
	return text

# for brailleViewer
def getReadingAndBraille(text):
	sp = separate(text)
	text = tenji.tenji(sp).replace(u'□', '')
	return (sp, text)

if __name__ == "__main__":
	print separate(u"ウィンドウをタスク バーに移動します。")
