;; ===================================================================================
;; Italian FESTIVAL Modules(IFM)
;; Copyright (C) 2001-2005 by the IFM Development Team
;; at "ISTC-SPFD CNR" and at "ITC-Irst".
;; ===================================================================================
;;	ISTC-SPFD CNR
;;		Istituto di Scienze e Tecnologie della Cognizione
;;		Sezione di Padova "Fonetica e Dialettologia"
;;		Consiglio Nazionale delle Ricerche
;;		Via G. Anghinoni, 10 - 35121 Padova
;;		tel (+39) 049 8274418 - fax (+39) 049 8274416
;;		e-mail: segreteria@pd.istc.cnr.it 
;; 
;;	ITC-irst
;;		Istituto Trentino di Cultura
;;		Centro per la ricerca scientifica e tecnologica 
;;		Via Santa Croce 77 - 38100 Trento ITALIA
;;		tel (+39) 0461-210111 - fax (+39) 0461-980436
;;		e-mail: info@itc.it 
;; ===================================================================================
;; This file is part of IFM.
;; 
;; This program is free software; you can redistribute it and/or
;; modify it under the terms of the GNU General Public License
;; as published by the Free Software Foundation; either version 2
;; of the License, or (at your option) any later version.
;; 
;; This program is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;; GNU General Public License for more details.
;; 
;; You should have received a copy of the GNU General Public License
;; along with this program; if not, write to the Free Software
;; Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
;; ===================================================================================
;; Authors:	Piero COSI, ISTC-SPFD CNR, (cosi@pd.istc.cnr.it)
;; 		Carlo DRIOLI
;; 		Graziano TISATO
;; 		Roberto GRETTER, ITC-irst (SSI/MPA), (gretter@itc.it) 
;; 		Fabio TESSER
;; ===================================================================================
;;		WEB:  http://www.pd.istc.cnr.it/TTS/ItalianFESTIVAL
;; ===================================================================================
;;
;; Definition of ITALIAN LEXICON
;; and of some pre-tokenization rules.
;;
;; ===================================================================================
;;
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Pre-Lessico per mappare tutti i caratteri in quelli giusti; 
;; in pi tratta le parole con l'apostrofo, unendo la sillabazione ma mantenendo la trascrizione delle singole parole.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(lex.create "otherlex")
(lex.set.phoneset "italian")
(lex.set.lts.method "pre_lex_function")

(define (pre_lex_function word feats)
"(pre_lex_function word feats): Funzione di Pre-lessico per mappare tutti i caratteri in quelli giusti, in pi tratta le parole con l'apostrofo, unendo la sillabazione ma mantenendo la trascrizione delle singole parole."
(if (string-matches word ".* -T.*")
   (let ()
	(set! true_word_str (string-before word " -T")) 
	(set! trasc_word_str (string-after word " -T ")) 
	(set! tl (read-from-string (string-append "(" trasc_word_str ")"))) ;aggiunge una parentesi
	(set! entry (list true_word_str 'N tl))
	;(print entry)
	;(item.set_name w true_word_str)
	;(item.set_feat w "TRA_NUM" trasc_word_str)
	)
   (let ((me (lex.select "italian"))
         )
      ;(format t "intercept function %l %l\n" word feats)
      (set! down_word (apply string-append (lts.apply word 'italian_downcase)))
      ;(format t "intercept function %l %l\n" down_word feats)
      
      (if (string-matches down_word ".+'.+") ;caso parole con apostrofi
      	(let ()
      	(set! e1 (lex.lookup (string-append (string-before down_word "'") "'")) feats)
	(set! e2 (lex.lookup (string-after down_word "'")) feats)
      	(set! pos2 (car (cdr e2)))
      	(set! T (append (car (cdr (cdr e1))) (car (cdr (cdr e2)))))
      	(set! ps 1) ;flag per la prima sillaba
      	(while T
      		(set! s1 (car (car T)))
      		(set! Ok 0)
      		(while s1
      			(set! ph (car s1))
      			(if (string-equal (phone_feature ph 'vc) "+")
      				(set! Ok 1))
			(set! s1 (cdr s1)))	
		(if (and (eq? Ok 0) (cdr T)) 
		  (let ()
			(set! sillaba_tra (append (car (car T)) (car (car (cdr T))))) 
			(set! stress (car (cdr (car (cdr T)))))
			(set! sillaba (list sillaba_tra stress))
			(if (eq? ps 1)
			(set! nT  (append (list sillaba)))
			(set! nT (append  nT (list sillaba))))
			(set! T (cdr T)))
		  (let ()
			(if (eq? ps 1)
			(set! nT  (list (car T)))
			(set! nT (append  nT (list (car T)))))))
		(set! ps 0)
		(set! T (cdr T)))
		(set! ne (list down_word pos2 nT))
		
      	(set! entry ne))
      (set! entry (lex.lookup down_word feats)))
      ;(print entry)
      (lex.select me)
      ))
      
      entry )

;;;;;;;;;;;;;;;;;;;;;DA GUARDARE SE  FATTIBILE O COSA  MEGLIO .cosa devo ancora fare qui?
(setq italian_pos_map
      '(
	(( V-P3_IP vb vbn vbz vbp vbg ) V)
	(( A-MS nn nnp nns nnps fw sym ls ) A)
	(( A ) A-MS)
	(( punc fpunc ) punc)
	(( in ) in)
	(( jj jjr jjs 1 2 ) j)
	(( prp ) prp)
	(( D-NNFab ) D-NN)
	(( rb rp rbr rbs ) r)
	(( cc ) cc)
	(( of ) of)
	(( to ) to)
	(( cd ) cd)
	(( md ) md)
	(( pos ) pos)
	(( wdt ) wdt)
	(( wp ) wp)
	(( wrb ) wrb)
	(( ex ) ex)
	(( uh ) uh)
	(( pdt ) pdt)
	))



;;; Lexicon
(lex.create "italian")
(lex.set.phoneset "italian")
(lex.set.pos.map italian_pos_map)
(defvar ifdlexdir (path-append lexdir "ifd"))
(lex.set.compile.file (path-append ifdlexdir "lex.out"))
(lex.set.lts.method "italian_lts") 

;(lex.set.lts.ruleset "italian")

;PER ANNULARE I POSTLEX:
(set! postlex_rules_hooks nil)

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; LESSICO ADDENDA (pochi termini)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; convenzione 
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; VOCALI ACCENTATE codifica ANSI
;;

(lex.add.entry '("" nn (((a1 ) 1))))
(lex.add.entry '("" nn (((a1 ) 1))))
(lex.add.entry '("" nn (((e1 ) 1))))
(lex.add.entry '("" nn (((E1 ) 1))))
(lex.add.entry '("" nn (((i1 ) 1))))
(lex.add.entry '("" nn (((i1 ) 1))))
(lex.add.entry '("" nn (((o1 ) 1))))
(lex.add.entry '("" nn (((O1 ) 1))))
(lex.add.entry '("" nn (((u1 ) 1))))
(lex.add.entry '("" nn (((u1 ) 1))))

;; VOCALI ACCENTATE codifica OEM (per l'imput da tastiera)
;;the OEM character set (usually DOS generated text files) to the ANSI (Windows).
;; NON dovrebbero servire perch viene innanzi tutto fatta 
;; la mappatura dei caratteri con italian downcase quindi sono gi
;; in codifica ANSI  

;(lex.add.entry '("" nn (((a1 ) 1))))
;(lex.add.entry '("" nn (((E1 ) 1))))
;(lex.add.entry '("" nn (((e1 ) 1))))
;(lex.add.entry '("" nn (((O1 ) 1))))
;(lex.add.entry '("" nn (((u1 ) 1))))
;(lex.add.entry '("" nn (((i1 ) 1))))
;;
(lex.add.entry '("a" nn (((a1) 1))))
(lex.add.entry '("b" nn (((b i1) 1))))
(lex.add.entry '("c" nn (((tS i1) 1))))
(lex.add.entry '("d" nn (((d i1) 1))))
(lex.add.entry '("e" nn (((e1) 1))))     ;;; ATT: penso sia meglio lasciare la e1 (cos trascrive giusta la congiunz.)
(lex.add.entry '("f" nn (((E1 f) 1) ((f e) 0))))
(lex.add.entry '("g" nn (((dZ i1) 0))))
(lex.add.entry '("h" nn (((a1 k) 1) ((k a) 0))))
(lex.add.entry '("i" nn (((i1) 1))))
(lex.add.entry '("j" nn (((i) 0) ((l u1 n) 1) ((g a) 0))))
(lex.add.entry '("k" nn (((k a1 p) 1) ((p a) 0))))
(lex.add.entry '("l" nn (((E1 l) 1) ((l e) 0))))
(lex.add.entry '("m" nn (((E1 m) 1) ((m e) 0))))
(lex.add.entry '("n" nn (((E1 n) 1) ((n e) 0))))
(lex.add.entry '("o" nn (((o1) 1))))     ;;; ATT: penso sia meglio lasciare la o1 (cos trascrive giusta la congiunz.) 
(lex.add.entry '("p" nn (((p i1) 1))))
(lex.add.entry '("q" nn (((k u1) 1))))
(lex.add.entry '("r" nn (((E1 r) 1) ((r e) 0))))
(lex.add.entry '("s" nn (((E1 s) 1) ((s e) 0))))
(lex.add.entry '("t" nn (((t i1) 0))))
(lex.add.entry '("u" nn (((u1) 1))))
(lex.add.entry '("v" nn (((v u1) 0))))
(lex.add.entry '("w" nn (((v u1) 0) ((d o1 p) 1) ((p i a) 0))))   ;; o vu doppia?
(lex.add.entry '("x" nn (((i1) 1) ((k s) 0))))
(lex.add.entry '("y" nn (((i1) 1) ((p s i) 0) ((l o n) 0))))   ;; penso sia piu' usato
(lex.add.entry '("z" nn (((dz e1) 1) ((t a) 0))))
 
;;;ALTRI ATOMI 
(lex.add.entry '("c'" nn (((tS ) 1))));;per trattare il caso di c' c'era etc.
(lex.add.entry '("diec'" N (((d j E1) 1) ((tS) 0))))

;;ALTRI
(lex.add.entry '("si" B (((s i1) 1))))
(lex.add.entry '("no" B (((n O1) 1))))
(lex.add.entry '("posta" S (((p O1) 1) ((s t a) 0))))
(lex.add.entry '("celsius" S (((tS E l) 0) ((s j u1 s) 1))))
(lex.add.entry '("uomini" nil (((w o1) 1) ((m i) 0) ((n i) 0))))
(lex.add.entry '("riempite" nil (((r j E m) 0) ((p i1) 1) ((t e) 0))))
(lex.add.entry '("conoscerla" nil (((k o1) 1) ((n o S) 0) ((S e r) 0) ((l a) 0))))
(lex.add.entry '("tesser" CGN (((t e s) 0) ((s E1 r) 1))))

;;MPIRO LEXICON
(lex.add.entry '("anavyssos" GR (((a) 0) ((n a) 0) ((v i s) 0) ((s o) 0) ((s) 0))))
(lex.add.entry '("archaia" GR (((a r) 0) ((k a) 0) ((j a) 0))))
(lex.add.entry '("archaic" EN (((a r) 0) ((k a) 0) ((i k) 0))))
(lex.add.entry '("athenian" EN (((a) 0) ((t e) 0) ((n j a n) 0))))
(lex.add.entry '("athens" EN (((a) 0) ((t e n) 0) ((s) 0))))
(lex.add.entry '("athinon" EN (((a) 0) ((t i) 0) ((n o n) 0))))
(lex.add.entry '("boardman" EN (((b o r) 0) ((d m e n) 0))))
(lex.add.entry '("coins" EN (((k o) 0) ((i1 n) 1) ((s) 0))))
(lex.add.entry '("ekdotiki" GR (((e k d o) 0) ((t i) 0) ((k i) 0))))
(lex.add.entry '("elliniki" GR (((e l) 0) ((l i) 0) ((n i) 0) ((k i) 0))))
(lex.add.entry '("greek" EN (((g r i k) 0))))
(lex.add.entry '("hadra" EN (((a) 0) ((d r a) 0))))
(lex.add.entry '("hudson" EN (((j u d) 0) ((s o n) 0))))
(lex.add.entry '("iliou" EN (((i) 0) ((l j o1) 1) ((u) 0))))
(lex.add.entry '("j" EN (((dZ e) 0) ((i) 0))))
(lex.add.entry '("kouroi" GR (((k o) 0) ((u) 0) ((r o1) 1) ((i) 0))))
(lex.add.entry '("kouros" GR (((k o) 0) ((u) 0) ((r o1) 1) ((s) 0))))
(lex.add.entry '("kroissos" GR (((k r o) 0) ((i s) 0) ((s o) 0) ((s) 0))))
(lex.add.entry '("oikonomidis" GR (((o) 0) ((i) 0) ((k o) 0) ((n o) 0) ((m i) 0) ((d i) 0) ((s) 0))))
(lex.add.entry '("period" EN (((p i) 0) ((r i) 0) ((o d) 0))))
(lex.add.entry '("rhodes" EN (((r o) 0) ((d e) 0) ((s) 0))))
(lex.add.entry '("school" EN (((s k u l) 0))))
(lex.add.entry '("sculpture" EN (((s k a l) 0) ((p t u r) 0))))
(lex.add.entry '("techni" EN (((t e k) 0) ((n i) 0))))
(lex.add.entry '("thames" EN (((t e) 0) ((i m) 0) ((s) 0))))
(lex.add.entry '("thassos" GR (((t a s) 0) ((s o) 0) ((s) 0))))
(lex.add.entry '("wagner" EN (((v a g n e r) 0))))
(lex.add.entry '("wurzburg" EN (((v u r) 0) ((z b u r) 0) ((g) 0))))
(lex.add.entry '("york" EN (((j o r) 0) ((k) 0))))
(lex.add.entry '("hadra" GR (((a) 0) ((d r a) 0))))
(lex.add.entry '("kalpis" GR (((k a l) 0) ((p i) 0) ((s) 0))))
(lex.add.entry '("kylix" GR (((k i) 0) ((l i k) 0) ((s) 0))))
(lex.add.entry '("metic" EN (((m e) 0) ((t i1 k) 1))))
(lex.add.entry '("nymphides" GR (((n i m) 0) ((f i) 0) ((d e) 0) ((s) 0))))
(lex.add.entry '("oikos" GR (((o) 0) ((i) 0) ((k o) 0) ((s) 0))))
(lex.add.entry '("painterly" EN (((p e) 0) ((i n) 0) ((t e r) 0) ((l i) 0))))
(lex.add.entry '("period" EN (((p i) 0) ((r i) 0) ((o d) 0))))
(lex.add.entry '("red" EN (((r e1 d) 1))))
(lex.add.entry '("rhyton" GR (((r i) 0) ((t o n) 0))))
(lex.add.entry '("splancnoscopia" IT (((s p l a n) 0) ((k n o) 0) ((s k o) 0) ((p i1) 1) ((a) 0))))
(lex.add.entry '("tetradracma" IT (((t e) 0) ((t r a) 0) ((d r a1 k m a) 1))))
(lex.add.entry '("actium" EN (((a1 k t j u m) 1))))
(lex.add.entry '("aetolia" IT (((e) 0) ((t O1) 1) ((l j a) 0))))
(lex.add.entry '("amasis" GR (((a) 0) ((m a1) 1) ((s i) 0) ((s) 0))))
(lex.add.entry '("ares" GR (((a1) 1) ((r e) 0) ((s) 0))))
(lex.add.entry '("atene" IT (((a) 0) ((t e1) 1) ((n e) 0))))
(lex.add.entry '("classical" EN (((k l a s) 0) ((s i) 0) ((k a l) 0))))
(lex.add.entry '("cleofrade" IT (((k l e) 0) ((o) 0) ((f r a1) 1) ((d e) 0))))
(lex.add.entry '("diadumeno" IT (((d j a) 0) ((d u) 0) ((m e1) 1) ((n o) 0))))
(lex.add.entry '("eutimide" IT (((e) 0) ((u) 0) ((t i) 0) ((m i1) 1) ((d e) 0))))
(lex.add.entry '("fidia" IT (((f i1) 1) ((d j a) 0))))
(lex.add.entry '("martin" EN (((m a1 r) 1) ((t i n) 0))))
(lex.add.entry '("meidias" GR (((m e) 0) ((i1) 1) ((d j a) 0) ((s) 0))))
(lex.add.entry '("metropolitan" EN (((m e) 0) ((t r o) 0) ((p O1) 1) ((l i) 0) ((t a n) 0))))
(lex.add.entry '("musee" EN (((m u) 0) ((z e1) 1))))
(lex.add.entry '("necropolis" GR (((n e) 0) ((k r O1) 1) ((p o) 0) ((l i) 0) ((s) 0))))
(lex.add.entry '("nomismata" GR (((n o) 0) ((m i) 0) ((z m a1) 1) ((t a) 0))))
(lex.add.entry '("palais" FR (((p a) 0) ((l e1) 1))))
(lex.add.entry '("persis" FR (((p E1 r) 1) ((s i) 0) ((s) 0))))
(lex.add.entry '("petit" EN (((p e) 0) ((t i1) 1))))
(lex.add.entry '("policleto" EN (((p o) 0) ((l i) 0) ((k l e1) 1) ((t o) 0))))
(lex.add.entry '("princeton" EN (((p r i n) 0) ((s t o n) 0))))
(lex.add.entry '("roman" EN (((r o1) 1) ((m a n) 0))))
(lex.add.entry '("sotades" GR (((s o) 0) ((t a) 0) ((d e) 0) ((s) 0))))
(lex.add.entry '("studies" EN (((s t a) 0) ((d i) 0) ((s) 0))))
(lex.add.entry '("aetoliani" EN (((e) 0) ((t o) 0) ((l j a1) 1) ((n i) 0))))
(lex.add.entry '("at" EN (((e t) 0))))
(lex.add.entry '("du" EN (((d u1) 1))))
(lex.add.entry '("stater" EN (((s t a1) 1) ((t e r) 0))))
(lex.add.entry '("vases" EN (((v a1) 1) ((z e) 0) ((s) 0))))


;;ACRONIMI e abbreviazioni
(lex.add.entry 
 '("ac" ACR (((a) 0) ((v a1 n) 1) ((t i) 0) ((k r i1) 1) ((s t o) 0))))
(lex.add.entry 
 '("dc" ACR (((d o) 0) ((p o) 0) ((k r i1) 1) ((s t o) 0))))
(lex.add.entry 
 '("fig" ACR (((f i) 0) ((g u1) 1) ((r a) 0))))
(lex.add.entry 
 '("ill" ACR (((i l) 0) ((l u) 0) ((s t r a ts) 0) ((ts j o1) 1) ((n e) 0))))
(lex.add.entry 
 '("pp" ACR (((p a1) 1) ((dZ i) 0) ((n e) 0))))
(lex.add.entry 
 '("ecc" ACR (((e tS) 0) ((tS E1) 1) ((t e) 0) ((r a) 0))))
 
;; DA vedere le regole di trascrizione per le parole che finiscono per q
(lex.add.entry 
'("iraq" nil (((i) 0) ((r a1 k) 1))))
 
;;SIMBOLI e PUNTEGGIATURA iSOLATA
(lex.add.entry 
 '("\!" SB (((p u1 n) 1) ((t o) 0) ((e) 0) ((s k l a) 0) ((m a) 0) ((t i1) 1) ((v o) 0))))
(lex.add.entry 
 '("\"" SB (((v i r) 0) ((g o) 0) ((l e1 t) 1) ((t e) 0))))
(lex.add.entry 
 '("\#" SB (((k a n) 0) ((tS e l) 0) ((l e1 t) 1) ((t o) 0))))
(lex.add.entry 
 '("%" SB (((p e1 r) 0) ((tS e1 n) 1) ((t o) 0))))
(lex.add.entry 
 '("&" SB (((E1 n) 1) ((d) 0))))
(lex.add.entry 
 '("\'" SB (((a1) 1) ((p i) 0) ((tS e) 0))))
(lex.add.entry 
 '("\(" SB (((a) 0) ((p E1 r) 1) ((t a) 0) ((t o1 n) 1) ((d a) 0))))
(lex.add.entry 
 '("\)" SB (((k j u1) 1) ((s a) 0) ((t o1 n) 1) ((d a) 0))))
(lex.add.entry 
 '("\*" SB (((a s) 0) ((t e) 0) ((r i1 s) 1)  ((k o) 0))))
(lex.add.entry 
 '("\+" SB (((p j u1) 1))))
(lex.add.entry 
 '("\," SB (((v i1 r) 1) ((g o) 0) ((l a) 0)))) 
(lex.add.entry 
 '("\-" SB (((m e1) 1) ((n o) 0))))
(lex.add.entry 
 '("\." SB (((p u1 n) 1) ((t o) 0))))
(lex.add.entry 
 '("\/" SB (((d i) 0) ((v i1) 1) ((z o) 0))))
(lex.add.entry 
 '("\:" SB (((d u1) 1) ((e) 0) ((p u1 n) 1) ((t i) 0))))
(lex.add.entry 
 '("\;" SB (((p u1 n) 1) ((t o) 0) ((e) 0) ((v i1 r) 1) ((g o) 0) ((l a) 0))))
(lex.add.entry 
 '("\<" SB (((m i) 0) ((n o1) 1) ((r e) 0))))
(lex.add.entry 
 '("\=" SB (((u) 0) ((g w a1) 1) ((l e) 0))))
(lex.add.entry 
 '(">" SB (((m a dZ) 0) ((dZ o1) 1) ((r e) 0))))
(lex.add.entry 
 '("\?" SB (((p u1 n) 1) ((t o) 0) ((d i) 0) ((d o) 0) ((m a1 n) 1) ((d a) 0))))
(lex.add.entry 
 '("\@" SB (((k j O1 tS) 1) ((tS o) 0) ((l a) 0))))
(lex.add.entry 
 '("\[" SB (((a) 0) ((p E1 r) 1) ((t a) 0) ((k w a1) 1) ((d r a) 0))))
(lex.add.entry 
 '("\\" SB (((b a1 r) 1) ((r a) 1))))
(lex.add.entry 
 '("\]" SB (((k j u1) 1) ((s a) 0) ((k w a1) 1) ((d r a) 0))))   
(lex.add.entry 
 '("\^" SB (((k a p) 0) ((p E1 l) 1) ((l o) 0)) ))
(lex.add.entry 
 '("_" SB (((s o t) 0) ((t o) 0) ((l i) 0) ((n e) 0) ((a) 0) ((t u1) 1) ((r a) 0))))
(lex.add.entry 
 '("\`" SB (((a1) 1) ((p i) 0) ((tS e) 0))))
(lex.add.entry 
 '("\{" SB (((a) 0) ((p E1 r) 1) ((t a) 0) ((g r a1) 1) ((f a) 0))))
(lex.add.entry 
 '("\|" SB (((b a1 r) 1) ((r a) 1))))
(lex.add.entry 
 '("\}" SB (((k j u1) 1) ((s a) 0) ((g r a1) 1) ((f a) 0))))
(lex.add.entry 
 '("~" SB (((t i1 l) 1) ((d e) 0)) ))
(lex.add.entry 
 '("\" SB (((g r a1) 1) ((d i) 0))))
(lex.add.entry 
 '(" " SB (((s p a1 ts) 1) ((ts j o) 0))))
(lex.add.entry 
 '("\t" SB (((t a1 b) 1))))
(lex.add.entry 
 '("\n" SB (((n w o1) 1) ((v a) 0) ((l i1) 1) ((n e) 0) ((a) 0))))


(provide 'italian_lexicon)
