48 #include "EST_cmd_line.h"
55 static int scfg_train_main(
int argc,
char **argv);
58 int main(
int argc,
char **argv)
61 scfg_train_main(argc,argv);
67 static int scfg_train_main(
int argc,
char **argv)
77 "Summary: Train a stochastic context free grammar from a (bracketed) corpus\n"+
78 "-grammar <ifile> Grammar file, one rule per line.\n"+
79 "-corpus <ifile> Corpus file, one bracketed sentence per line.\n"+
80 "-method <string> {inout}\n"+
81 " Method for training: inout.\n"+
82 "-passes <int> {50}\n"+
83 " Number of training passes.\n"+
84 "-startpass <int> {0}\n"+
85 " Starting at pass N.\n"+
86 "-spread <int> Spread training data over N passes.\n"+
87 "-checkpoint <int> Save grammar every N passes\n"+
88 "-heap <int> {210000}\n"+
89 " Set size of Lisp heap, needed for large corpora\n"+
90 "-o <ofile> Output file for trained grammar.\n",
94 outfile = al.
val(
"-o");
98 siod_init(al.
ival(
"-heap"));
104 grammar.
load(al.
val(
"-grammar"));
108 cerr <<
"scfg_train: no grammar specified" << endl;
114 grammar.load_corpus(al.
val(
"-corpus"));
118 cerr <<
"scfg_train: no corpus specified" << endl;
123 spread = al.
ival(
"-spread");
127 if (al.
val(
"-method") ==
"inout")
131 checkpoint = al.
ival(
"-checkpoint");
133 grammar.train_inout(al.
ival(
"-passes"),
134 al.
ival(
"-startpass"),
135 checkpoint,spread,outfile);
139 cerr <<
"scfg_train: unknown training method \"" <<
140 al.
val(
"-method") <<
"\"" << endl;
144 if (grammar.save(outfile) != write_ok)
146 cerr <<
"scfg_train: failed to write grammar to \"" <<
147 outfile <<
"\"" << endl;
EST_read_status load(const EST_String &filename)
Load grammar from named file.
A class used to train (and test) SCFGs is an extension of EST_SCFG .
int ival(const EST_String &rkey, int m=1) const
const int present(const K &rkey) const
Returns true if key is present.
const V & val(const K &rkey, bool m=0) const
return value according to key (const)