GalateaTalk  IPADIC Ȥˤϡ

         2003/12/03 nishi@hil.t.u-tokyo.ac.jp 

GalateaTalk ΥƥȲϽǤϡ
ǲϥ󥸥 chasen ѤƤޤ

chasen  ipadic ȤФ뼭ѤƤޤ
Galatea ץȤǤϿñαΧޤ
 UniDic ȯޤ
GalateaTalk ɸǤ chasen + UniDic 
ѤƤޤ

ߤ UniDic Ͽñ ipadic ⾯ʤᡤ
ѤΥƥȲΤμȤƤ
ŬȤʤΤǤ

ʲμ GalateaTalk ˤ
UniDic ǤϤʤ ipadic ȤäǤޤ
Ͽñ¿ʤΤɤ߾夲ʤʸϤϸޤ
Ⱦ󤬰ڤʤΤǹʼ㲼ޤ
Ū˱ڤؤƤѤ

(1) www.chasen.aist-nara.ac.jp  ipadic-2.6.3
ɤ galatea-v3.0/morph βŸ
make ¹Ԥ

(2) ssm-ipadic.conf եκ

 $ cd galatea-v3.0/SSM

galatea-v3.0/SSM/ssm.conf ̾ǥԡ롣

 $ cp ssm.conf ssm-ipadic.conf

ssm-ipadic.conf  CHASEN-RC ѹ롣

 ѹ  CHASEN-RC ./chasenrc
 ѹ  CHASEN-RC ./chasenrc.ipadic

(3) chasenrc եɲ

chasenrc.ipadic ܥɥȤˤ SSM β
ԡ롣
ʲβս꤬ꤵƤ뤳Ȥǧ롣

 (GRAMMAR  ../morph/ipadic-2.6.3)

(4) ¹ˡ

 $ ./gtalk -C ssm-ipadic.conf

뤤ϡRUN ץȤƬ $conf 

 $conf = "./ssm-ipadic.conf";

ѹ RUN ¹ԡ

 $ ./RUN

(5) ɬפ˱ơեν

ʻηϤۤʤˤϡʤɤνŬڤ
Ԥʤˡʻ켭Υơ֥ɬפ롥

Ūˤ IPADIC б뤿ˡ
morph.c  hinshiTable  define ʬ
    { H_TOUTEN, "-" },
ɲä롣

*** morph.ORG.c	2003-12-03 12:08:30.000000000 +0900
--- morph.c	2003-12-03 12:08:55.000000000 +0900
***************
*** 112,117 ****
--- 112,118 ----
  	{ H_KAKKO_HIRAKU,          "¾--̳" },	/* unidic-0 */
  	{ H_KAKKO_TOJIRU,          "¾--" },	/* unidic-0 */
  	{ H_SONOTA,                "¾" },
+ 	{ H_TOUTEN,                "-" },	/* ipadic */
  	{ H_MICHIGO,               "̤θ" }
  
  };

ѹ塤ѥ¹Ԥ롣

 $ make

[chasenrc.ipadic]
;;
;;  chasenrc for ipadic-2.6.3
;;
;; ܸ٥ȱѸ٥ɤ餫ꤹɤ
;;;
;;;  grammar.cha/ctypes.cha/cforms.cha location /ʸˡե
;;;
;(ʸˡե  /usr/local/lib/chasen/dic/ipadic)
(GRAMMAR  ../morph/ipadic-2.6.3)

;;;
;;;  dictionary /
;;;
(DADIC        chadic)

;;;
;;;  POS for Unknown words /̤θʻ
;;;
;(̤θʻ (̾ ³))
(UNKNOWN_POS (̾ ³))

;;;
;;;  output format /ϥեޥå
;;;
;(ϥեޥå "%m\t%y\t%M\t%U(%P-)\t%T \t%F \n")     ; default(-f)
;(OUTPUT_FORMAT "%m\t%y\t%M\t%U(%P-)\t%T \t%F \n")     ; default(-f)
;(OUTPUT_FORMAT "%m\t%y\t%M\t%P-\t%T \t%F \n")         ; POS name
;(OUTPUT_FORMAT "%m\t%?U/UNKNOWN/%y/\t%M\t%h/%t/%f\n") ; POS code
;(OUTPUT_FORMAT "%M\t%P-\n")
(OUTPUT_FORMAT "<W1 orth=\"%m\" pron=\"%?U/%m/%a/\" pos=\"%U(%P-)\"%?T/ cType=\"%T \"//%?F/ cForm=\"%F \"//%?I/ %i///>\n")

;;;
;;;  output format for compound words /ʣ
;;;
;(ʣ "COMPOUND")      ; default
;(OUTPUT_COMPOUND "COMPOUND")      ; default
;(OUTPUT_COMPOUND "SEG")

;;;
;;;  BOS string /BOSʸ
;;;
;(BOSʸ "BOS")      
;(BOS_STRING "BOS")      
(BOS_STRING "<S>\n")      

;;;
;;;  EOS string /EOSʸ
;;;
;(EOSʸ "EOS")      
;(EOS_STRING "EOS")      
(EOS_STRING "</S>\n")

;;;
;;;  Connection cost for Undefined contexts /̤Ϣܥ
;;;
;(̤Ϣܥ 10000)
(DEF_CONN_COST 10000)

;;;
;;;  POS Weight /ʻ쥳
;;;
;(ʻ쥳
;	((*) 		1)
;	((UNKNOWN)    30000)
;)
(POS_COST
	((*) 		1)
	((UNKNOWN)    30000)
)

;;;
;;;  connecition rules Weight /ϢܥȽŤ
;;;
;(ϢܥȽŤ 1)
(CONN_WEIGHT 1)

;;;
;;;  morphemes Weight /ǥȽŤ
;;;
;(ǥȽŤ 1)
(MORPH_WEIGHT 1)

;;;
;;;  cost width /
;;;
;( 0)
(COST_WIDTH	  0)

;;;
;;;  composit POSs /Ϣʻ
;;;
;(Ϣʻ ((̾ ))
;          (( ե٥å)))
;(COMPOSIT_POS ((̾ ))
;          (( ե٥å)))

;;;
;;;  POSs as white space /ʻ
;;;
;(ʻ ( ))
;(SPACE_POS ( ))

;;;
;;;  annotation POSs /
;;;
;( (("<" ">") ( )))
;(ANNOTATION (("<" ">") ( )))
(ANNOTATION (("<" ">") "%m\n"))

;;;
;;;  delimiter for option -j /ڤʸ
;;;
;(ڤʸ ".,!? ")
;(DELIMITER ".,!? ")

[end of chasenrc.ipadic]
