Saturday, November 3, 2012

KIKAMBA TEXT TO SPEECH SYSTEM - The Source Code

This article was first published in April 2010 on my previous blogging site

It written in PYS60, a python version for Series 60 phones. If you want to study and reuse it, do exactly that. If you want to get a feel of it on your s60 phone, you will need the resources (sound files, free of charge with some conditions) which you can get if you contact me.


import appuifw,audio,e32,messaging,inbox
class Analyser:
def __init__(self):
self.counter=0;
self.t_pos=0
self.speech=Speech(self)
self.enoughMemory=True
self.analysed=""
#print "new analyser"

def isSpecial(self,c):
return self.isNum(c) or c == " "
def getToken(self,s):
p = ""
if (self.t_pos < len(s) and s[self.t_pos] == ' '):
self.t_pos=self.t_pos+1
return " "
if (self.t_pos < len(s) and self.isNum(s[self.t_pos])):
while (self.t_pos < len(s) and self.isNum(s[self.t_pos])):
p =p+s[self.t_pos]
self.t_pos=self.t_pos+1
return p;
while (self.t_pos < len(s) and (not self.isSpecial(s[self.t_pos]))):
p += "" + s[self.t_pos]
self.t_pos=self.t_pos+1
return p
def isNum(self,k):
return k=='0' or k=='1' or k=='2' or k=='3' or k=='4' or k=='5' or k=='6' or k=='7' or k=='8' or k=='9'
def getDigitName(self,num,loc,plu):
if loc== 0:
if num==0:
return u"noti"
if num==1:
return u"\xedmwe"
if num==2:
if (plu == 1):
return u"el\xed"
return u"il\xed"
if num==3:
if (plu == 1):
return u"atat\xfa"
return u"itat\xfa"
if num==4:
if (plu == 1):
return u"ana"
if (plu == 2):
return u"ena"
return u"inya"
if num==5:
if (plu == 1):
return u"ataano"
return u"itaano"
if num==6:
return u"thanthat\xfa"
if num==7:
return u"moonza"
if num==8:
return u"nyaanya"
if num ==9:
return u"keenda"
return ""
if loc==1:
if num==1:
return u"\xedkumi"
return u"m\xedongo " + self.getDigitName(num, 0, 2)
if loc==2:
if num== 1:
return u"\xedana " + self.getDigitName(num, 0, 0)
return u"maana " + self.getDigitName(num, 0, 1)
if loc==3 or loc==4 or loc== 5:
return u"ngili " + self.getDigitName(num, loc - 3, 0)
if loc== 6 or loc==7 or loc ==8:
return u"milioni " + self.getDigitName(num, loc - 6, 0)
if loc==9 or loc==10 or loc==11:
return u"t\xfalilioni " + self.getDigitName(num, loc - 9, 0)
return u"namba \xedtasomeka"

def numToStr(self,num,phone):
if phone:#number requires to be read digit per digit not as a figure
fin=""
for k in range (0,len(num)):
fin+=self.getDigitName(int(num[k]),0,0)+" "
return fin
else:#read number as a figure
k = len(num) - 1;
if (k > 11):
return self.numToStr(num,True)
fin = ""
for y in range(0,k+1):
l = int(num[y])#store the current digit in l
m = 0
if (y < k):#if we are not reading the last digit
m = int(num[y + 1])#store the next digit in m
if (l != 0):#read the current digit only if its not zero
fin += self.getDigitName(l, k - y, 0)
if (y < k and m > 0):#if this is not the last digit and the next digit is not zero then add 'na' in btw the 2 digits
fin += u" na "
return self.corrector(fin)


def isVowel(self,a) :
return a=="a" or a=="e" or a=="i" or a=="o" or a=="u" or a==u'\xfa'\
or a== u'\xda'or a== u'\xed'or a== u'\xcd'or a== u'\xd9'or a== u'\xf9'\
or a== u'\xec'or a== u'\xcc'
def corrector(self,s):
try:
k = s.index(u"t\xfalilioni ")
if (k >= 0):
s = s.replace(u"t\xfalilioni ", "");
end = s[k:]
start = s[:k]
s = start + u"t\xfalilioni " + end
except ValueError:
pass
try:
k = s.index(u"milioni ")
if (k >= 0):
s = s.replace(u"milioni ", "")
end = s[k:]
start = s[:k]
s = start + u"milioni " + end
except ValueError:
pass
try:
k = s.index(u"ngili ")
if (k >= 0):
s = s.replace(u"ngili ", "")
end = s[k:]
start = s[:k]
s=start + u"ngili " + end
except ValueError:
pass
return s

def getLetterName(self,let):
if let=='@':return u"at\xed"
elif let=='.':return u"ndono"
elif let==',':return u"koma"
elif let=='+':return u"kwongela"
elif let=='-':return u"kumya"
elif let=='*':return u"k\xfand\xfa"
elif let=='=':return u"sy\xfakaa"
elif let=='$':return u"va\xfandi"
elif let=='#':return u"akyi"
elif let==u'\xa3':return u"ndola"
elif let=='a':return u"aa"
elif let=='b':return u"mbii"
elif let=='c':return u"sii"
elif let=='d':return u"ndii"
elif let=='e':return u"ee"
elif let=='f': return u"efu"
elif let=='g':return u"ngyii"
elif let=='h':return u"ekyi"
elif let=='i':return u"ii"
elif let=='j':return u"ngya\xed"
elif let=='k':return u"kei"
elif let=='l':return u"elo"
elif let=='m':return u"emu"
elif let=='n':return u"eni"
elif let=='o':return u"oo"
elif let=='p':return u"pii"#what happens , how do we differentiate with v
elif let=='q':return u"ki\xfa"
elif let=='r':return u"alla"
elif let=='s':return u"esi"
elif let=='t':return u"tiii"
elif let=='u':return u"yuu"
elif let=='v':return u"vii"
elif let=='w':return u"ndavili\xfa"
elif let=='x':return u"ekisi"
elif let=='y':return u"wae"
elif let=='z':return u"nzeti"
elif let==u'\xed':return u"\xed\xed"
elif let==u'\xfa':return u"\xfa\fa"
else:return ""
def expandWord(self,sente):
str=""
for k in range(0,len(sente)):
str+=self.getLetterName(sente[k])+" "
return str
def storedWord(self,sente):
return sente==u"\xedmwe" or sente== u"noti" or sente== u"\xedmwe" or sente== u"el\xed" or sente== u"il\xed"\
or sente== u"atat\xfa" or sente== u"itat\xfa" or sente== u"ana" or sente== u"ena" or sente== u"inya"\
or sente== u"ataano" or sente== u"itaano" or sente== u"thanthat\xfa" or sente== u"moonza" \
or sente== u"nyaanya" or sente== u"keenda" or sente== u"\xedkumi" or sente == u"m\xedongo"or sente== u"\xedana" or sente== u"maana"\
or sente== u"ngili" or sente== u"milioni" or sente== u"t\xfalilioni" or sente== u"namba"\
or sente== u"\xedtasomeka" or sente== u"at\xed" or sente== u"ndono" or sente== u"koma" or sente== u"aa"\
or sente== u"mbii" or sente== u"sii" or sente== u"ndii" or sente== u"ee" or sente== u"efu"or sente== u"ngyii"\
or sente== u"ekyi" or sente== u"ii" or sente== u"ngya\xed" or sente== u"kei" or sente== u"elo"\
or sente== u"emu" or sente== u"eni" or sente== u"oo" or sente== u"pii" or sente== u"ki\xfa" or sente== u"alla"\
or sente== u"esi" or sente== u"tiii" or sente== u"yuu" or sente== u"vii" or sente== u"ndavili\xfa" \
or sente== u"ekisi" or sente== u"wae" or sente== u"nzeti" or sente== u"\xed\xed" or sente== u"\xfa\fa"\
or sente== u"dr." or sente== u"mr." or sente== u"mrs." or sente== u"prof." or sente== u"pst."\
or sente== u"kwongela" or sente== u"kumya" or sente== u"k\xfand\xfa" or sente== u"sy\xfakaa"\
or sente== u"va\xfandi" or sente== u"akyi" or sente== u"ndola"
def validConsonant(self,sente):
return sente=="k" or sente=="kw" or sente== "ky" or sente=="l" or sente== "ly" or sente=="m" or sente== "mb" \
or sente=="mw" or sente== "n" or sente=="nd" or sente== "ndy" or sente=="ng" or sente== "ny" or sente=="nz" or\
sente=="s" or sente== "sy" or sente=="t" or sente== "th" or sente=="thy" or sente== "tw" or sente=="v"\
or sente== "w" or sente=="ngw" or sente=="nzy" or sente== "nthy" or sente=="y" or sente=="nthw"\
or sente== "thw" or sente=="vw" or sente== "ndw" or sente=="vy" or sente== "ng'" or sente=="sw"\
or sente== "mby" or sente=="mbw" or sente== "w'" or sente == "ngy" or sente == "lw"or sente == "lwy"\
or sente =="nth"
def isValidKikamba(self,sente):
if self.isVowel(sente[-1]):
if self.validConsonant(sente[0:-1]) or len(sente[0:-1])==0:
return True
else:
return False
else:
return False
def append(self,str):
self.analysed=self.analysed+str
def isPunctuation(self,str):
return str==',' or str=='.' or str==':' or str==';' or str=='?' or str=='\'' or str=='\"'\
or str=='\\' or str=='}' or str=='{' or str==')' or str=='(' or str=='/' or str=='~' or \
str=='_' or str=='^' or str=='!' or str=='|' or str=='#' or str=='+' or str=='=' or str=='*'\
or str=='-' or str=='$' or str==u'\xa3'
def insert(self,str,ins,index):
str1=str[:index]
str2=str[index:]
return str1+ins+str2
def process( self,input,play):
s=""
while True:
word = self.getToken(input)
#print "token:",word
readable=True
if (len(word) < 1):
break
if (self.isNum(word[0])):
temp=self.t_pos
self.t_pos=0
if word[0]=="0":
self.process(self.numToStr(word,True),False)#read as phone number
else:
self.process(self.numToStr(word,False),False)#read as number
self.t_pos=temp
continue
word=word.replace(u'\xf9',u'\xfa')
word=word.replace(u'\xec',u'\xed')
if self.storedWord(word):#if the word's phoneme is saved in memory as a whole
self.append(word)
continue
for k in range(0,len(word)):
if (self.isVowel(word[k])):
s =s+ word[k] #+ tone[t]
if not self.isValidKikamba(s) and play:#its this is not a recurse and its invalid
readable=False
temp=self.t_pos
self.t_pos=0
self.process(self.expandWord(word),False)
self.t_pos=temp
s=""
break
#print "very found:",s
s = "";
elif (word[k] == ' ' or word[k] == ',' or word[k]=='.'or word[k]==':'or word[k]==';'):
pass
elif self.isPunctuation(word[k]):
punc=word[k]
#print word," ",word[k]
word=self.insert(word,' ',k)
k=k+1
#word=self.insert(word,' ',k+1)
#print word," ",word[k]
word=word.replace(word[k],self.expandWord(word[k]))
else:
s =s+word[k]
if len(s)>0 and play:#if the word is valid except it does not end with a vowel
temp=self.t_pos
self.t_pos=0
self.process(self.expandWord(word),False)
self.t_pos=temp
s=""
continue
if readable:
#print "its readble"
self.append(word)
if play:
print "Normalised:\n"+self.analysed
self.speech.read(self.analysed)

class Speech:
def __init__(self,master):
self.folder="c:\\res\\"
self.master=master
self.syl=None
self.playing=False
#print "new speech"
def read( self,text):
self.enoughMemory=True
self.master.t_pos=0
while self.enoughMemory:
word=self.master.getToken(text)
if len(word)==0:
break
s=""
if self.master.storedWord(word):#if the word's phoneme is saved in memory as a whole
self.addSyl(u""+self.folder+word + ".wav")
continue
for k in range(0,len(word)):
if not self.enoughMemory:
break
if (self.master.isVowel(word[k])):
s =s+ word[k]
self.addSyl(u""+self.folder+s + ".wav")
s = "";
elif (word[k] == ' ' or word[k] == ',' or word[k]==';'):
self.addSyl(u""+self.folder+"spc.wav")
elif (word[k] == '.' or word[k]==':'):
self.addSyl(u""+self.folder+"spc.wav")
self.addSyl(u""+self.folder+"spc.wav")
else:
s =s+word[k]
self.playQueue(self.enoughMemory)

def addSyl(self,h):
try:
if (self.syl == None) :
self.syl = Phone(h)
else:
temp = self.syl
while (temp.next != None):
temp = temp.next
temp.next = Phone(h)
except SymbianError:
#appuifw.note(u"No Memory! \xdayumbe nd\xfasomeka","error")
self.enoughMemory=False

def playQueue(self,okay):
if(self.playing):
return
self.playing=True
t = self.syl
print "\n"*3
print "SPEECH:"
while (t != None):
try:
t.phone.play(times=1)
e32.ao_sleep(t.length)
t.phone.close()
except AttributeError:
e32.ao_sleep(t.length)
except SymbianError:
okay=False
t = t.next
if okay:
appuifw.note(u"\xdayumbe N\xedwathela","conf")
else:
appuifw.note(u"\xdayumbe Nd\xfasomeka Wonthe","error")
self.playing=False
self.refresh()
def refresh(self):
t=self.syl
if t is not None:
while t.next is not None:
t.phone=None
t.syl=None
t.length=None
t.no=None
t=t.next
self.syl.next=None
self.syl=None

class Phone:
def __init__(self,y):
self.syl=y[y.index("res\\")+4:y.index(".wav")]
if self.syl=='spc':
self.length=0.1
self.phone=None
else:
self.phone=audio.Sound.open(y)
self.length=float(self.phone.duration())/1000000 #self.numVowels(self.syl)*0.12
self.next=None
#print self.syl," length=",self.length


class KikambaTTS:
def __init__(self):
self.prev=""
self.curr=""
self.reader=Analyser()
str=""
appuifw.app.title=u"K\xcdKAMBA TTS"
#print "new K_TTS"
#appuifw.app.screen='large'
#run("w\xedmuthokye m\xfatambon\xed wa k\xfaneena ndeto mband\edke kwa k\xedthyomo kya k\xedkamba")
choice=[u"And\xedka(Create Text)",u"Nd\xfam\xed\xedtwe(Inbox)",u"Nd\xfam\xedte(Sent)",u"\xdatethyo(Help)",u"Vinga(Exit)"]
while(True):
menu=appuifw.selection_list(choice)
# self.run(choice[menu])
if menu==None and appuifw.query(u"Vinga? (Exit?)","query"):
break
elif menu==0:
self.input()
elif menu==1 or menu==2:
self.folder(menu)
elif menu==3:
self.help()
elif menu==4:
print "System Exit!"
break
def quit(self):
self.app_lock.signal()
def sender(self):
text=appuifw.app.body.get()
if(len(text)>0):
self.send(text)
else:
appuifw.note(u"vathei!","error")
def runner(self):
text=appuifw.app.body.get()
if(len(text)>0):
self.run(text)
else:
appuifw.note(u"vathei!","error")
def input(self):
appuifw.app.body = appuifw.Text()
self.textMenus()
def folder(self,menu):
str=self.getFromFolder(menu)
if str is not None:
appuifw.app.body = appuifw.Text(str)
self.textMenus()
def textMenus(self):
self.app_lock=e32.Ao_lock()
appuifw.app.exit_key_handler=self.quit
appuifw.app.menu = [(u"Soma",self.runner ), (u"T\xfama", self.sender)]
self.app_lock.wait()

def send(self,str):
number=appuifw.query(u"Namba","text")
if number is not None:
try:
messaging.sms_send(number,str)
appuifw.note(u"\xdayumbe N\xedwat\xfamwa","conf")
except SymbianError:
appuifw.note(u"\xdayumbe nd\xfanat\xfamwa","error")
#appuifw.note(u"Namba nd\xedvo","error")

def run(self,te):
te=te.lower()
self.curr=te
print "\n"*20
print '''INPUT:
\''''+te+'''\''''
print "\n"*2
print "ANALYSING......."
self.reader.t_pos=0
self.reader.syl=None
if self.curr == self.prev:
#print (u"equal")
self.reader.speech.read(self.reader.analysed)
else:
#print (u"Not Equal")
self.reader.analysed=""
self.reader.process(te,True)
self.prev=self.curr

def getMessage(self,str):
str=appuifw.query(u"And\xedka Vaa",u"text",str)
if str is not None:
return str
def getFromFolder(self,ch):
if ch==1:
mail=inbox.Inbox(inbox.EInbox)
elif ch==2:
mail=inbox.Inbox(inbox.ESent)
msg_ids=mail.sms_messages()
address=[]
for k in range(0,len(msg_ids)):
address.append(mail.address(msg_ids[k]))
index=appuifw.popup_menu(address,u"Sak\xfaa \xdayumbe")
if index>=0:
return mail.content(msg_ids[index]);
def help(self):
eng=u'''INTRODUCTION:
The Kikamba Text-To-Speech System Converts input text into speech and also allows you to send the text as an sms.

INPUTTING TEXT:
There are two ways to input Text:
(i). Create new Text: on the main menu, select 'And\xedka'. This opens a text box where you enter the Text.
(ii). Importing text: To import text from the 'inbox' folder or 'sent' folder of your phone, select on the main menu 'Nd\xfam\xed\xedtwe' or 'Nd\xfam\xedte' respectively. This displays a list of message addresses for the messages in the corresponding folder, select any address to import its text into the text box


GENERATING SPEECH:
From the 'options' menu, select 'soma'.

SENDING TEXT:
From the 'options' menu, select 'send'. This opens an input dialog. Enter the recipients address and click ok to send.

EXITTING:
Click 'Vinga' on the main menu.'''

kik=u'''MWAMB\xcd\xedO:
M\xfatambo \xfa\xfa n\xed wa k\xfav\xednd\xfaa ndeto sya \xfayumbe wa k\xedkamba ikatw\xedka \xfaneeni na \xednd\xed k\xfat\xfama \xfayumbe \xfasu ta sms

KW\xcdK\xcdA \xdaYUMBE M\xdaTAMBON\xcd:
Ve nz\xeda il\xed sya kw\xedk\xeda \xfayumbe:
(i). K\xfaand\xedka mbene: sak\xfaa 'And\xedka' kuma k\xedyoon\xed kya mbee. k\xedyoo k\xedng\xed kya k\xfaand\xedk\xeda n\xedk\xed\xfaving\xfaka na \xednd\xed \xfayand\xedka vo.
(ii). Kwosa \xfayumbe sim\xfan\xed: Wenda kwosa \xfayumbe \xfat\xfam\xed\xedtwe sak\xfaa 'Nd\xfam\xed\xedtwe' kuma k\xedyoon\xed kya mbee, wenda kwosa \xfayumbe \xfat\xfam\xedte sak\xfaa 'Nd\xfam\xedte'. Weeka ou w\xedonw'a masy\xedtwa kana namba sya and\xfa ala \xfamat\xfam\xed\xedte kana mak\xfat\xfam\xed\xedte ma\xfayumbe. Vau sak\xfaa \xfamwe n\xedkana \xfayumbe wake woswe w\xedk\xedwe k\xedyoon\xed k\xedla kya k\xfaand\xedk\xeda

KW\xcdTHUK\xcd\xcdSYA \xdaNEENI:
W\xed k\xedyoon\xed k\xedu kya k\xfaand\xedk\xeda, sak\xfaa 'options' na \xfaitina 'soma'

K\xdaT\xdaMA \xdaYUMBE:
W\xed k\xedyoon\xed k\xedu kya k\xfaand\xedk\xeda, sak\xfaa 'options' na \xfaitina 't\xfama'. Ve um\xedla k\xedyoo k\xedng\xed vala \xfak\xfaand\xedka namba ya sim\xfa \xedla \xfak\xfat\xfam\xeda \xfayumbe \xfasu na \xednd\xed \xfaisak\xfaa 'ok' n\xedkana \xfayumbe \xfat\xfamwe.

K\xdaVINGA M\xdaTAMBO:
Sak\xfaa 'Vinga' kuma k\xedyoon\xed kya mbee'''

choice=[u'K\xedkamba',u'English',u'Syoka(Back)']
while True:
t=appuifw.Text()
ch=appuifw.selection_list(choice)
if ch is 0:
t.set(kik)
elif ch is 1:
t.set(eng)
else:
break
t.set_pos(0)
appuifw.app.body=t
self.app_lock=e32.Ao_lock()
appuifw.app.exit_key_handler=self.quit
self.app_lock.wait()

#PROGRAM EXECUTES AS FROM HERE
run=KikambaTTS()




Have fun developers.

Kiswahili is Deficient in representing Numbers

This article was first published in March 2010 on my previous blogging site

It cant distinguish 11000 from 10001, both are written as 'ELFU KUMI NA MOJA', and this is the case for 12,000 and 10,002 and the like.

This means we can not reliably translate this text back to numbers using a computer.

A possible solution is to introduce a comma for 10,001 to be 'ELFU KUMI, NA MOJA' but this will only work if we are converting text assumed to be representing numbers only, Otherwise 'ELFU KUMI, NA MOJA' could mean both '10,001' and '10,000, and 1'. This now calls for the computer to analyse the context of the text and classify it in one of the two, which is not easy if you know what i mean. There goes a PHD RESEARCH TOPIC idea for those who are already there. And for Prof. Ali Mazrui and other custodians of the Kiswahili language, be careful next time you work on a language, it must be computer friendly.


Kiswahili is not my first language, if am wrong somewhere let me know.


Check out a system that converts numbers to text for Kiswahili and other 2 languages at http://numtotext.appspot.com/, this is the inverse of what we are talking about above.


This text also appeared on my blog at http://cymox1.blogspot.com/2010/08/kiswahili-is-deficient-in-representing.html

Sunday, June 5, 2011

Success today!

Success today, unlike yesterday does not come from being more powerful than others, taking advantage of others or being better than others. In a global world like the one we are living in, we compete on a level ground. We all have equal access to knowledge resources, the most important of all resources.

So how does success come, only by empowering others are you going to rise. The more you give, the more useful and necessary you are, the more you are going to go high. Sharing freely your resourceful information and other knowledge resources is going to bring home your dreams. Empowering individual souls will make you a god(dess) of opportunities and that is what we call power in the 21st century.

Tuesday, April 5, 2011

Tuwakenya Arrives with a Bang!

Finally Kenyans have an open discussion space where they can share ideas,crack jokes as well discuss and critique the current political drama.

http://tuwakenya.appspot.com is a is a simplified blog-like site that allows visitors to quickly browse articles posted by others. The articles are presented in two lists, one for Most Recently Posted articles and another for the Most Read Articles.

To make it all easy, you don't need to register to start using tuwakenya, provided you a kenyan (actually we don't have a way to know if you really are Kenyan). But you must be authenticated by google before you can Post content on Tuwakenya(e.g by logging with your gmail account)

You dont have to login to read or comment on the contents of Tuwakenya.

Again, be among the first 100 to post content on Tuwakenya. Share your views about the Hague and the Ocampo 6, about the world bank grant to improve slums or even about your village and let Kenyans read and comment on it.

Saturday, March 26, 2011

Aligning Text At the Top a Table Cell

Well, am back again to blogspot after a 9 months break, that was quite refreshing and eyeopening.

Quick to the point, I really had some trouble trying to align text at the top of a table cell, which is understandable, given my little experience in front end development.

Any HTML/CSS developer has heard of the 'text-align' CSS property that can be assigned to virtually all HTML elements. It not only controls horizontal alignment of text but all other HTML objects including div's, and tables. It only takes 3 values which set the alignment to be right, left or center. Notice you can set vertical alignment using this property. So what do you do if you have a table row, in which some cells have text spanning multiple lines while others have single lines but are displayed at the center (vertically)? which is the default for most browsers?

Here is the solution. use CSS property 'vertical-align' which takes 'bottom' and 'top' values for bottom alignment and top alignment respectively.

Here is sample CSS for setting all table data cells to be vertically aligned.

td{
vertical-align:top;
}

Well, have a good time coding in HTML.