Edinburgh Speech Tools  2.1-release
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
EST_lattice.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1995,1996 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Simon King */
34 /* Date : November 1996 */
35 /*-----------------------------------------------------------------------*/
36 /* Lattice/Finite State Network */
37 /* */
38 /*=======================================================================*/
39 
40 #include "EST_lattice.h"
41 #include <fstream>
42 #include <cstdlib>
43 
44 Lattice::Lattice()
45 {
46  tf=NULL;
47 }
48 
49 
50 Lattice::~Lattice()
51 {
52 
53 }
54 
56 Lattice::start_node()
57 {
58  if(nodes.head() != NULL )
59  return nodes(nodes.head());
60  else{
61  cerr << "LAttice has no nodes !" << endl;
62  return NULL;
63  }
64 
65 }
66 
67 #if 0
68 bool Lattice::build_qmap(Bigram &g, float error_margin)
69 {
70 
71  // very crude VQ (not vectors though)
72  // works best on log bigrams ?
73 
74  // to do : automatically determine error_margin
75 
76  int i,j;
77  EST_Litem *l_ptr;
78  bool flag;
79 
80  // first, build a list, then transfer to array
81  Tlist<float> list_qmap;
82 
83  qmap_error_margin = error_margin;
84 
85  for (i = 0; i < g.size(); ++i){
86  for (j = 0; j < g.size(); ++j){
87 
88  flag = false;
89  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
90  if(fabs(list_qmap(l_ptr) - g.p(i,j)) <= error_margin){
91  flag = true;
92  break;
93  }
94 
95  if(!flag)
96  list_qmap.append(g.p(i,j));
97 
98  }
99  }
100 
101  // special zero (within error_margin) entry, if not already there
102  flag = false;
103  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
104  if(fabs(list_qmap(l_ptr)) <= error_margin){
105  flag = true;
106  break;
107  }
108 
109  if(!flag)
110  list_qmap.append(0);
111 
112  qsort(list_qmap);
113 
114  i=0;
115  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
116  i++;
117 
118  // transfer to array
119  qmap.resize(i);
120  i=0;
121  for(l_ptr=list_qmap.head();l_ptr != 0; l_ptr=l_ptr->next())
122  qmap(i++) = list_qmap(l_ptr);
123 
124  list_qmap.clear();
125 
126 
127  cerr << "Built qmap with " << i << " entries" << endl;
128 
129  return true;
130 }
131 
132 
133 bool
134 Lattice::build_nmap(Bigram &g)
135 {
136 
137 
138  int j;
139  //name_map_entry_t entry;
140  EST_Litem *l_ptr;
141 
142  // first, build a list, then transfer to array
143  Tlist<EST_String> list_nmap;
144 
145  // wordlist must not have !ENTER of !EXIT in it
146  for (j = 0; j < g.size() - 2; ++j){
147  if(g.words(j) == "!ENTER"){
148  cerr << "Wordlist contains special word !ENTER" << endl;
149  return false;
150  }
151  if(g.words(j) == "!EXIT"){
152  cerr << "Wordlist contains special word !EXIT" << endl;
153  return false;
154  }
155  }
156 
157 
158 
159 
160  // add all words
161  for (j = 0; j < g.size() - 2; ++j)
162  list_nmap.append(g.words(j));
163 
164  // special enter and exit
165  list_nmap.append("!ENTER");
166  list_nmap.append("!EXIT");
167 
168  qsort(list_nmap);
169 
170  cerr << list_nmap << endl;
171 
172  j=0;
173  for(l_ptr=list_nmap.head();l_ptr != 0; l_ptr=l_ptr->next())
174  j++;
175 
176  // transfer to array
177  nmap.resize(j);
178  j=0;
179  for(l_ptr=list_nmap.head();l_ptr != 0; l_ptr=l_ptr->next())
180  nmap(j++) = list_nmap(l_ptr);
181 
182  list_nmap.clear();
183  cerr << "Built nmap with " << j << " entries" << endl;
184 
185  return true;
186 }
187 
188 
189 bool
190 Lattice::construct_alphabet(Bigram &g)
191 {
192 
193  int i,j,enteri,exiti,aindex,qindex,nindex,count;
194  symbol_t *sym;
195  EST_Litem *a_ptr;
196  bool flag;
197 
198  if(!build_qmap(g,1.0e-02) || !build_nmap(g)) // to do : fix
199  return false;
200 
201 
202  // temporary list
203  Tlist<symbol_t*> list_alphabet;
204 
205  // alphabet consists of all occurring combinations of
206  // nmap and qmap entries
207 
208  enteri = g.size() - 2;
209  exiti = g.size() - 1;
210 
211  aindex=0;
212 
213  // order is this way for speed
214  for (j = 0; j < g.size(); ++j){
215 
216  cerr << "constructing alphabet " << (int)((float)j*100/(float)g.size()) << "% \r";
217 
218  if(j == enteri)
219  nindex = nmap_name_to_index("!ENTER");
220  else if(j == exiti)
221  nindex = nmap_name_to_index("!EXIT");
222  else
223  nindex = nmap_name_to_index(g.words(j)); // check !!
224 
225  for (i = 0; i < g.size(); ++i){
226 
227  qindex = qmap_value_to_index(g.p(i,j));
228 
229  // have we got sym already ?
230  flag=false;
231  for(a_ptr=list_alphabet.tail();a_ptr!=NULL;a_ptr=a_ptr->prev()){
232  if( (list_alphabet(a_ptr)->nmap_index == nindex) &&
233  (list_alphabet(a_ptr)->qmap_index == qindex) ){
234  flag=true;
235  break;
236  }
237 
238  // since words are added in order, stop
239  // when we get to previous word
240  if(list_alphabet(a_ptr)->nmap_index != nindex)
241  break;
242  }
243 
244 
245  if(!flag){
246  sym = new symbol_t;
247  sym->qmap_index=qindex;
248  sym->nmap_index=nindex;
249  list_alphabet.append(sym);
250  }
251  }
252  }
253 
254  // special ENTER with prob of 1 symbol
255  nindex = nmap_name_to_index("!ENTER");
256  qindex = qmap_value_to_index(0); // log prob
257  flag=false;
258  for(a_ptr=list_alphabet.tail();a_ptr!=NULL;a_ptr=a_ptr->prev())
259  if( (list_alphabet(a_ptr)->nmap_index == nindex) &&
260  (list_alphabet(a_ptr)->qmap_index == qindex) ){
261  flag=true;
262  break;
263  }
264 
265  if(!flag){
266  sym = new symbol_t;
267  sym->qmap_index=qindex;
268  sym->nmap_index=nindex;
269  list_alphabet.append(sym);
270  }
271 
272  // and special no-label label (e-move)
273  sym = new symbol_t;
274  sym->qmap_index=-1;
275  sym->nmap_index=-1;
276  list_alphabet.append(sym);
277 
278  ptr_qsort(list_alphabet);
279 
280  count=0;
281  for(a_ptr=list_alphabet.head();a_ptr != NULL; a_ptr=a_ptr->next())
282  count++;
283 
284 
285  alphabet.resize(count);
286  count=0;
287  for(a_ptr=list_alphabet.head();a_ptr != NULL; a_ptr=a_ptr->next())
288  alphabet(count++) = *(list_alphabet(a_ptr));
289 
290  // .. to do - delete syms
291  list_alphabet.clear();
292 
293  e_move_symbol_index = alphabet_index_lookup(-1,-1);
294 
295  cerr << "Alphabet has " << count << " symbols " << endl;
296 
297  return true;
298 }
299 
300 
301 bool
302 Lattice::construct(Bigram &g)
303 {
304 
305  // every word becomes a node, every non-zero transition becomes an arc
306  // eliminate any transitions into ENTER and out of EXIT
307  int i,j,k;
308  EST_Litem *n_ptr,*a_ptr;
309  Node *n;
310  Arc *a;
311  Node *from,*to;
312  int symbol;
313  // unfortunate, but fixed in Bigram class
314  int enteri = g.size() - 2;
315  int exiti = g.size() - 1;
316  int from_name,to_name;
317 
318  // single entry node, no name since no arcs in
319  // single arc out to node called "!ENTER"
320 
321  Node *enter_node = new Node;
322  enter_node->name.append(-1); // no name !
323  nodes.append(enter_node);
324 
325  // make nodes - one for every entry in nmap
326  for(i=0; i<nmap.n(); i++){
327 
328  n = new Node;
329  n->name.append(i); // index into nmap
330  nodes.append(n);
331 
332  if(nmap(i) == "!ENTER"){ // temporary hack
333 
334  symbol = alphabet_index_lookup(i,qmap_value_to_index(0)); // for log probs !!!
335  a = new Arc;
336  a->label = symbol;
337  a->to = n;
338  enter_node->arcs_out.append(a);
339 
340  }
341 
342 
343  // keep note of final node
344  if(nmap(i) == "!EXIT") // temporary hack
345  final_nodes.append(n);
346 
347  }
348 
349  // make arcs
350  k=0;
351  for (i = 0; i < g.size(); ++i){
352  cerr << "building lattice " << (int)((float)i*100/(float)g.size()) << "% \r";
353  for (j = 0; j < g.size(); ++j){
354 
355  // ignore any transitions into ENTER or out of EXIT
356  //if((g.p(i, j) > 0) && - for probs only, can't do for log probs
357  if((j != enteri) && (i != exiti)){
358 
359  // find from and to nodes
360  from=NULL;
361  to=NULL;
362 
363  if(i == enteri)
364  from_name = nmap_name_to_index("!ENTER");
365  else
366  from_name = nmap_name_to_index(g.words(i));
367 
368  if(j == exiti)
369  to_name = nmap_name_to_index("!EXIT");
370  else
371  to_name= nmap_name_to_index(g.words(j));
372 
373  for (n_ptr = nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
374  int name = nodes(n_ptr)->name(nodes(n_ptr)->name.head());
375  if(name == from_name)
376  from = nodes(n_ptr);
377  if(name == to_name)
378  to = nodes(n_ptr);
379  }
380 
381  if(from==NULL){
382  cerr << "Couldn't find 'from' node " << nmap_index_to_name(from_name) << endl;
383  return false;
384  }
385 
386  if(to==NULL){
387  cerr << "Couldn't find 'to' node " << nmap_index_to_name(to_name) << endl;
388  return false;
389  }
390 
391  // get arc symbol - speed up this fn !
392  symbol = alphabet_index_lookup(to_name,qmap_value_to_index(g.p(i,j)));
393  if(symbol < 0){
394  cerr << "Couldn't lookup symbol in alphabet !" << endl;
395  return false;
396  }
397 
398  a = new Arc;
399  a->label = symbol;
400  a->to = to;
401 
402  from->arcs_out.append(a);
403 
404 
405  }
406  }
407  }
408  cerr << " \r";
409 
410  int c1=0,c2=0,c3=0;
411  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
412  c1++;
413  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
414  c2++;
415  }
416 
417  for (n_ptr = final_nodes.head(); n_ptr != 0; n_ptr = n_ptr->next())
418  c3++;
419 
420  cerr << "NFA has " << c1
421  << " nodes (" << c3 << " final)"
422  << " and " << c2 << " arcs" << endl;
423 
424  return true;
425 }
426 #endif
427 
428 bool Lattice::determinise()
429 {
430  // very simple, but potentially time/memory consuming
431 
432  // make a new lattice whose nodes are all permutations
433  // of the nodes in the old lattice
434  // BUT : only make the nodes which are actually required
435  // (otherwise number of new nodes = 2^(number of old nodes)
436 
437  // we will call the old lattice NFA and the new one DFA
438 
439  EST_Litem *n_ptr,*n2_ptr,*a_ptr,*l_ptr;
440  Node *new_node;
441  EST_TList<Node*> new_nodes;
442  EST_TList<Node*> new_final_nodes;
443  EST_TList<Arc*> new_arcs;
444  EST_TList<Arc*> arc_list;
445  EST_TList<int> new_name;
446  int c,count,current_label,new_arcs_made;
447  bool is_final;
448 
449  // first, sort nodes' lists of arcs by label to make
450  // it easier
451  cerr << "sorting arc lists" << endl;
452  sort_arc_lists();
453 
454  cerr << "making new nodes" << endl;
455 
456  // - for every node in the NFA, look at the outgoing arcs with
457  // the same label
458  // - make a DFA node which is a combination of the NFA nodes
459  // which are the destinations of those arcs
460 
461  // there are more DFA nodes made later
462 
463  // enter node is special case, it has no in-going arcs
464  // so will not be made in the following loop
465 
466  // for now, assume one enter node at head of list ... hmmmmm
467  new_node = new Node;
468  if(new_node == NULL){
469  cerr << "Could not allocate any more memory";
470  return false;
471  }
472  new_node->name = nodes(nodes.head())->name;
473  new_nodes.append(new_node);
474 
475 
476 
477  for (n_ptr = nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
478 
479  for (a_ptr = nodes(n_ptr)->arcs_out.head(); a_ptr != 0; a_ptr = a_ptr->next()){
480 
481  current_label = nodes(n_ptr)->arcs_out(a_ptr)->label;
482 
483  // make list of arc destinations along arcs with this label
484  is_final=false;
485  new_name.clear();
486  new_name = nodes(n_ptr)->arcs_out(a_ptr)->to->name;
487  if(final(nodes(n_ptr)->arcs_out(a_ptr)->to) )
488  is_final=true;
489  while((a_ptr != NULL) &&
490  (a_ptr->next() != NULL) &&
491  (nodes(n_ptr)->arcs_out(a_ptr->next())->label == current_label)){
492  merge_sort_unique(new_name,nodes(n_ptr)->arcs_out(a_ptr->next())->to->name);
493 
494  if( !is_final && final(nodes(n_ptr)->arcs_out(a_ptr->next())->to) )
495  is_final=true;
496 
497  a_ptr=a_ptr->next();
498 
499  }
500 
501  // make new node with this name, unless we already have one
502  bool flag=false;
503  for (n2_ptr = new_nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next())
504  if( new_nodes(n2_ptr)->name == new_name ){
505  flag=true;
506  break;
507  }
508 
509  if(!flag){ // need to make new node
510 
511  new_node = new Node;
512  if(new_node == NULL){
513  cerr << "Could not allocate any more memory";
514  count=0;
515  for (n2_ptr = new_nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next())
516  count++;
517  cerr << " after making " << count << " nodes" << endl;
518  return false;
519  }
520 
521  new_node->name = new_name;
522  new_nodes.append(new_node);
523 
524  if(is_final)
525  new_final_nodes.append(new_node);
526  }
527 
528  } // loop round outgoing arcs for this node
529 
530  } // loop round NFA nodes
531 
532 
533  // now, for every node in the DFA, its 'transition function' (i.e. outgoing arcs)
534  // is the sum of the transition functions of its constituent nodes from
535  // the NFA
536 
537  c=0;
538  new_arcs_made=0;
539  for (n_ptr = new_nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
540 
541  c++;
542  cerr << "Processing node " << c
543  << " arcs=" << new_arcs_made << " \r";
544 
545  // find constituent nodes in NFA (only works if NFA names are single ints !)
546  arc_list.clear();
547  for(l_ptr=new_nodes(n_ptr)->name.head(); l_ptr != 0; l_ptr = l_ptr->next()){
548 
549  for(n2_ptr = nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next()){
550 
551  if(nodes(n2_ptr)->name(nodes(n2_ptr)->name.head()) ==
552  new_nodes(n_ptr)->name(l_ptr))
553  arc_list += nodes(n2_ptr)->arcs_out;
554 
555 
556  }
557  }
558 
559  // now we have a list of all the NFA nodes we can 'get to'
560  // from this DFA node, sort by label and find the
561  // DFA node for each label
562 
563  // or rather, take the arc list from above, and collapse
564  // all arcs with the same label into one arc to the DFA
565  // node which is the equivalent of the NFA nodes at the
566  // end of those arcs
567 
568  sort_by_label(arc_list);
569 
570  for(a_ptr=arc_list.head();a_ptr!=NULL;a_ptr=a_ptr->next()){
571 
572  current_label=arc_list(a_ptr)->label;
573  //cerr << " label " << current_label;
574 
575  is_final=false;
576  EST_TList<int> new_name2;
577  new_name2 = arc_list(a_ptr)->to->name;
578  if(final(arc_list(a_ptr)->to) )
579  is_final=true;
580  while((a_ptr != NULL) &&
581  (a_ptr->next() != NULL) &&
582  (arc_list(a_ptr->next())->label == current_label)){
583  merge_sort_unique(new_name2,arc_list(a_ptr)->to->name);
584  if( !is_final && final(arc_list(a_ptr)->to) )
585  is_final=true;
586  a_ptr=a_ptr->next();
587  }
588 
589  //cerr << " -> " << new_name2;
590 
591  // find DFA node with that name
592  bool flag=false;
593  for (n2_ptr = new_nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next())
594  if( new_nodes(n2_ptr)->name == new_name2 ){
595  flag=true;
596  break;
597  }
598 
599  if(!flag){ // failed to make node previously, do it now
600  new_node = new Node;
601  if(new_node == NULL){
602  cerr << "Could not allocate any more memory";
603  count=0;
604  for (n2_ptr = new_nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next())
605  count++;
606  cerr << " after making " << count << " nodes" << endl;
607  return false;
608  }
609 
610  new_node->name = new_name2;
611  new_nodes.append(new_node);
612  link(new_nodes(n_ptr),new_node,current_label);
613 
614  if(is_final)
615  new_final_nodes.append(new_node);
616 
617  }else{
618 
619  link(new_nodes(n_ptr),new_nodes(n2_ptr),current_label);
620 
621  }
622 
623  new_arcs_made++;
624  }
625 
626 
627  } // loop round DFA nodes
628 
629  cerr << endl;
630 
631  // delete old nodes, and replace with new nodes
632  for (n_ptr = nodes.head(); n_ptr != 0; n_ptr = n_ptr->next())
633  delete nodes(n_ptr);
634  nodes.clear();
635  nodes=new_nodes;
636  new_nodes.clear();
637 
638  final_nodes.clear();
639  final_nodes=new_final_nodes;
640  new_final_nodes.clear();
641 
642  int c1=0,c2=0,c3=0;
643  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
644  c1++;
645  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
646  c2++;
647  }
648 
649  for (n_ptr = final_nodes.head(); n_ptr != 0; n_ptr = n_ptr->next())
650  c3++;
651 
652  cerr << "DFA has " << c1
653  << " nodes (" << c3 << " final)"
654  << " and " << c2 << " arcs" << endl;
655 
656 
657  return true;
658 }
659 
660 bool
661 Lattice::link(Node *n1, Node *n2, int label)
662 {
663 
664  //if(l==NULL)
665  //l=&arcs;
666 
667  // create arc between two nodes
668  // in direction n1,n2
669  Arc *new_arc;
670 
671  if( (n1==NULL) || (n2==NULL) ){
672  cerr << "Can't link null nodes" << endl;
673  return false;
674  }
675 
676 
677 
678  new_arc = new Arc;
679  //new_arc->from = n1;
680  new_arc->to = n2;
681  new_arc->label = label;
682 
683  n1->arcs_out.append(new_arc);
684  //n2->arcs_in.append(new_arc);
685  //l->append(new_arc);
686 
687  return true;
688 }
689 
690 void
691 Lattice::sort_arc_lists()
692 {
693  EST_Litem *n_ptr;
694 
695  for (n_ptr = nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
696 
697  // can't sort nodes(n_ptr)->arcs_out directly
698  // since it is a list of pointers
699 
700  sort_by_label(nodes(n_ptr)->arcs_out);
701 
702  }
703 
704 }
705 
706 void
707 sort_by_label(EST_TList<Lattice::Arc*> &l){
708  ptr_qsort(l);
709 }
710 
711 
712 bool
713 Lattice::build_distinguished_state_table(bool ** &dst)
714 {
715 
716  int i,j;
717  EST_Litem *n_ptr,*n2_ptr;
718 
719  int num_nodes = nodes.length();
720  // not every element will be used
721  dst = new bool*[num_nodes];
722  if(dst==NULL){
723  cerr << "Not enough memory" << endl;
724  return false;
725  }
726  for(i=0;i<num_nodes;i++){
727  dst[i] = new bool[num_nodes];
728  if(dst[i]==NULL){
729  cerr << "Not enough memory" << endl;
730  return false;
731  }
732  for(j=0;j<num_nodes;j++)
733  dst[i][j] = false;
734 
735  }
736 
737  cerr << "final/non-final scan";
738  // any final/non-final (or non-final/final) pairs are distinguished immediately
739  for(i=0,n_ptr=nodes.head();n_ptr->next()!=NULL;n_ptr=n_ptr->next(),i++){
740  for(j=i+1,n2_ptr=n_ptr->next();n2_ptr!=NULL;n2_ptr=n2_ptr->next(),j++){
741 
742  if(final(nodes(n_ptr)) && !final(nodes(n2_ptr)))
743  dst[i][j] = true;
744  else if(!final(nodes(n_ptr)) && final(nodes(n2_ptr)))
745  dst[i][j] = true;
746 
747  }
748  }
749  cerr << "\r \r";
750 
751  // two possible methods, depending on network representation
752  // for sparse networks, use actual nodes and their lists of arcs
753  //build_distinguished_state_table_direct(dst);
754 
755  // for dense networks, use a transition function matrix
756  // which will be faster but more memory consuming
757 
758  if(!build_transition_function()){
759  cerr << "Couldn't build transition function" << endl;
760  return false;
761  }
762 
763  if(!build_distinguished_state_table_from_transition_function(dst)){
764  cerr << "Couldn't build dst from transition function" << endl;
765  return false;
766  }
767 
768  // delete tf, since it will be wrong for minimised net
769  for(i=0;i<num_nodes;i++)
770  delete [] tf[i];
771  delete [] tf;
772  tf=NULL;
773 
774  return true;
775 }
776 
777 bool
778 Lattice::build_distinguished_state_table_direct(bool ** &dst)
779 {
780 
781  int i,j,i1,j1,scan_count,this_symbol;
782  EST_Litem *n_ptr,*n2_ptr,*a_ptr,*s_ptr;
783  bool flag,flag2;
784 
785  // carry out repeated scans of the distinguished state table until no more
786  // cells are set true
787 
788  flag=true;
789  scan_count=0;
790  while(flag){
791  flag=false;
792 
793  scan_count++;
794 
795  for(i=0,n_ptr=nodes.head();n_ptr->next()!=NULL;n_ptr=n_ptr->next(),i++){
796 
797  for(j=i+1,n2_ptr=n_ptr->next();n2_ptr!=NULL;n2_ptr=n2_ptr->next(),j++){
798 
799  cerr << "scan " << scan_count << " : " << i << "," << j << " \r";
800 
801  if(!dst[i][j]){
802 
803  // go through alphabet, or rather just those symbols
804  // occurring on the arcs out of this pair of nodes
805 
806  flag2=true;
807  s_ptr=nodes(n_ptr)->arcs_out.head();
808  while(s_ptr != NULL){
809 
810 
811  if(flag2){ // we are going through symbols on arcs out of 'nodes(n_ptr)'
812  this_symbol=nodes(n_ptr)->arcs_out(s_ptr)->label;
813  i1 = node_index(nodes(n_ptr)->arcs_out(s_ptr)->to);
814 
815  j1=-1;
816  for(a_ptr=nodes(n2_ptr)->arcs_out.head();
817  a_ptr!=NULL; a_ptr=a_ptr->next())
818  if(nodes(n2_ptr)->arcs_out(a_ptr)->label == this_symbol)
819  j1 = node_index(nodes(n2_ptr)->arcs_out(a_ptr)->to);
820 
821  }else{ // we are going through symbols on arcs out of 'nodes(n2_ptr)'
822  this_symbol=nodes(n2_ptr)->arcs_out(s_ptr)->label;
823  j1 = node_index(nodes(n2_ptr)->arcs_out(s_ptr)->to);
824 
825  i1=-1;
826  for(a_ptr=nodes(n_ptr)->arcs_out.head();
827  a_ptr!=NULL; a_ptr=a_ptr->next())
828  if(nodes(n_ptr)->arcs_out(a_ptr)->label == this_symbol)
829  i1 = node_index(nodes(n_ptr)->arcs_out(a_ptr)->to);
830  }
831 
832 
833  // States (nodes) are distinguished if, for any symbol, they
834  // have transitions (arcs) to a pair of distinguished states.
835  // This includes the case where, for any symbol, one state
836  // has a transition and the other does not
837 
838  if( ( (i1>=0) && (j1>=0) && dst[i1][j1]) ||
839  ( (i1>=0) && (j1<0) ) ||
840  ( (j1>=0) && (i1<0) ) )
841  {
842  dst[i][j] = true;
843  flag=true;
844  break; // out of s_ptr loop
845 
846  }
847 
848  s_ptr=s_ptr->next();
849  if( (s_ptr==NULL) && (flag2) ){ // now go down second list
850  s_ptr=nodes(n2_ptr)->arcs_out.head();
851  flag2=false;
852  }
853 
854  }
855  }
856  }
857  }
858  }
859 
860  return true;
861 }
862 
863 bool
864 Lattice::build_distinguished_state_table_from_transition_function(bool ** &dst)
865 {
866  int i,j,i2,j2,k,scan_count;
867  int num_nodes = nodes.length();
868  int num_symbols = alphabet.n();
869  bool flag;
870 
871  flag=true;
872  scan_count=0;
873  while(flag){
874  flag=false;
875 
876  scan_count++;
877 
878  for(i=0;i<num_nodes-1;i++){
879 
880  cerr << "scan " << scan_count << " : row "
881  << i << " \r";
882 
883  for(j=i+1;j<num_nodes;j++){
884 
885  if( !dst[i][j] ){
886 
887  for(k=0;k<num_symbols;k++){
888 
889  i2 = tf[i][k];
890  j2 = tf[j][k];
891 
892  if((i2<0) && (j2>=0)){
893  dst[i][j] = true;
894  flag=true;
895  break;
896 
897  }else if((j2<0) && (i2>=0)){
898  dst[i][j] = true;
899  flag=true;
900  break;
901 
902  }else if( (i2>0) && (j2>0) && dst[i2][j2] ){
903  dst[i][j] = true;
904  flag=true;
905  break;
906  }
907 
908  }
909  }
910  }
911  }
912  }
913 
914  return true;
915 }
916 
917 bool
918 Lattice::minimise()
919 {
920 
921  // method, make distinguished state table
922  // scan all pairs of states (a.k.a. nodes)
923 
924 
925  int i,j;
926  EST_Litem *n_ptr,*n2_ptr,*n3_ptr,*a_ptr;
927  int num_nodes = nodes.length();
928  bool **dst = NULL; // distinguished state table
929  bool flag;
930 
931  if(!build_distinguished_state_table(dst)){
932  cerr << "Couldn't build distinguished state table" << endl;
933  return false;
934  }
935 
936  int count=0,count2=0;
937  for(i=0;i<num_nodes-1;i++)
938  for(j=i+1;j<num_nodes;j++)
939  if(!dst[i][j])
940  count++;
941  else
942  count2++;
943 
944  cerr << "There are " << count << " undistinguished pairs of nodes and "
945  << count2 << " distinguished pairs" << endl;
946 
947 
948  // make lists of nodes to be merged
949  EST_TList<Node *> merge_list;
950 
951 
952 
953  flag=true;
954  while(flag){
955  flag=false;
956 
957  merge_list.clear();
958  for(i=0,n_ptr=nodes.head();n_ptr->next()!=NULL;n_ptr=n_ptr->next(),i++){
959 
960  cerr << "merge, processing row " << i << " \r";
961 
962  for(j=i+1,n2_ptr=n_ptr->next();n2_ptr!=NULL;n2_ptr=n2_ptr->next(),j++){
963 
964  if(!dst[i][j]){
965 
966  // is the merge list empty ?
967  if(merge_list.head() == NULL){
968  // put this pair of nodes on merge list
969  merge_list.append(nodes(n_ptr));
970  merge_list.append(nodes(n2_ptr));
971  dst[i][j] = true;
972 
973  }else{ // merge list already has some items on it
974 
975  // see if either of this pair of nodes is on the merge list,
976  // and if so add the other node in the pair
977  bool add1=false,add2=false;
978  for(n3_ptr=merge_list.head();n3_ptr!=NULL;n3_ptr=n3_ptr->next()){
979  if(merge_list(n3_ptr) == nodes(n_ptr))
980  add2=true;
981  if(merge_list(n3_ptr) == nodes(n2_ptr))
982  add1=true;
983  }
984 
985 
986  if(add1 && !add2){
987  merge_list.append(nodes(n_ptr));
988  dst[i][j] = true;
989  }else if(add2 && !add1){
990  merge_list.append(nodes(n2_ptr));
991  dst[i][j] = true;
992  }
993 
994 
995  }
996  }
997  }
998  }
999 
1000  // anything on the merge list ?
1001  if(merge_list.head() != NULL){
1002 
1003  // so merge them
1004  count=0;
1005  for(n_ptr=merge_list.head();n_ptr!=NULL;n_ptr=n_ptr->next())
1006  count++;
1007  cerr << "merging " << count << " nodes out of ";
1008 
1009  count=0;
1010  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next())
1011  count++;
1012  cerr << count;
1013 
1014  merge_nodes(merge_list);
1015  flag=true;
1016 
1017  count=0;
1018  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next())
1019  count++;
1020  cerr << " leaving " << count << endl;
1021 
1022 
1023  }
1024 
1025 
1026  }
1027 
1028  int c1=0,c2=0;
1029  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1030  c1++;
1031  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
1032  c2++;
1033  }
1034 
1035  cerr << "Minimum state DFA has " << c1
1036  << " nodes and " << c2 << " arcs " << endl;
1037 
1038  merge_arcs();
1039 
1040  c1=0,c2=0;
1041  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1042  c1++;
1043  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
1044  c2++;
1045  }
1046 
1047  cerr << "Pruned minimum state DFA has " << c1
1048  << " nodes and " << c2 << " arcs" << endl;
1049 
1050  for(i=0;i<num_nodes;i++)
1051  delete [] dst[i];
1052  delete [] dst;
1053 
1054  return true;
1055 }
1056 
1057 
1058 void
1059 Lattice::merge_nodes(EST_TList<Node*> &l)
1060 {
1061 
1062  if(l.head() == NULL)
1063  return;
1064 
1065  // make a new node with all node labels of old nodes
1066  // and all arcs of old nodes
1067 
1068  EST_Litem *n_ptr,*n2_ptr,*a_ptr,*a2_ptr;
1069  Node *new_node;
1070  new_node = new Node;
1071 
1072 
1073  // to do .. deal with final_nodes list too ....
1074 
1075 
1076 
1077  for(n_ptr=l.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1078 
1079  // get arcs
1080  for(a_ptr=l(n_ptr)->arcs_out.head();a_ptr!=NULL;a_ptr=a_ptr->next())
1081  new_node->arcs_out.append(l(n_ptr)->arcs_out(a_ptr));
1082 
1083  // get name
1084  merge_sort_unique(new_node->name,l(n_ptr)->name);
1085 
1086  // find arcs into old nodes and make them go into new node
1087  for(n2_ptr=nodes.head();n2_ptr!=NULL;n2_ptr=n2_ptr->next()){
1088  for(a2_ptr=nodes(n2_ptr)->arcs_out.head();a2_ptr!=NULL;a2_ptr=a2_ptr->next()){
1089  if(nodes(n2_ptr)->arcs_out(a2_ptr)->to == l(n_ptr))
1090  nodes(n2_ptr)->arcs_out(a2_ptr)->to = new_node;
1091  }
1092  }
1093 
1094  }
1095 
1096 
1097  // delete old nodes, but not arcs
1098  for(n_ptr=l.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1099  for(n2_ptr=nodes.head();n2_ptr!=NULL;n2_ptr=n2_ptr->next())
1100  if(nodes(n2_ptr) == l(n_ptr)){
1101  nodes(n2_ptr)->name.clear();
1102  nodes(n2_ptr)->arcs_out.clear();
1103  delete nodes(n2_ptr);
1104  nodes.remove(n2_ptr);
1105  }
1106  }
1107 
1108  nodes.append(new_node);
1109 
1110 }
1111 
1112 void
1113 Lattice::merge_arcs()
1114 {
1115 
1116  EST_Litem *n_ptr,*a_ptr,*a2_ptr;
1117  EST_TList<Arc*> merge_list;
1118  int count=0,count2;
1119 
1120  // find repeated arcs with the same label between two nodes
1121  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1122 
1123  count2=0;
1124  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
1125  count2++;
1126 
1127  cerr << "merging arcs from node " << ++count
1128  << ", before:" << count2;
1129 
1130  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr->next()!=NULL; a_ptr=a_ptr->next()){
1131 
1132  merge_list.clear();
1133  for(a2_ptr=a_ptr->next();a2_ptr!=NULL; a2_ptr=a2_ptr->next())
1134 
1135  if((nodes(n_ptr)->arcs_out(a_ptr)->label ==
1136  nodes(n_ptr)->arcs_out(a2_ptr)->label) &&
1137 
1138  (nodes(n_ptr)->arcs_out(a_ptr)->to ==
1139  nodes(n_ptr)->arcs_out(a2_ptr)->to) ){
1140 
1141  delete nodes(n_ptr)->arcs_out(a2_ptr);
1142  a2_ptr=nodes(n_ptr)->arcs_out.remove(a2_ptr);
1143 
1144  }
1145 
1146  }
1147 
1148  count2=0;
1149  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
1150  count2++;
1151 
1152  cerr<< ", after:" << count2 << endl;
1153 
1154  }
1155 
1156  cerr << " \r" << endl;
1157 
1158 }
1159 
1160 
1161 void
1162 Lattice::prune_arcs(Node *node, EST_TList<Arc*> arcs)
1163 {
1164 
1165  int count=0;
1166  EST_Litem *a_ptr;
1167  for(a_ptr=arcs.head(); a_ptr != 0; a_ptr=a_ptr->next() ){
1168  prune_arc(node, arcs(a_ptr));
1169  count++;
1170  }
1171  //cerr << "pruned " << count << " arcs" << endl;
1172 }
1173 
1174 
1175 void
1176 Lattice::prune_arc(Node *node, Arc *arc)
1177 {
1178  remove_arc_from_nodes_out_list(node,arc);
1179  delete arc;
1180 }
1181 
1182 /*
1183 void
1184 Lattice::remove_arc_from_nodes_in_list(Node *n, Arc *a)
1185 {
1186  EST_Litem *a_ptr;
1187  for (a_ptr = n->arcs_in.head(); a_ptr != 0; a_ptr = a_ptr->next())
1188  if(n->arcs_in(a_ptr) == a)
1189  n->arcs_in.remove(a_ptr);
1190 }
1191 */
1192 
1193 void
1194 Lattice::remove_arc_from_nodes_out_list(Node *n, Arc *a)
1195 {
1196  EST_Litem *a_ptr;
1197  for (a_ptr = n->arcs_out.head(); a_ptr != 0; a_ptr = a_ptr->next())
1198  if(n->arcs_out(a_ptr) == a)
1199  n->arcs_out.remove(a_ptr);
1200 }
1201 
1202 /* not used
1203 bool
1204 Lattice::is_enter_node(Node *n)
1205 {
1206  // contains "!ENTER" in its list of names
1207  EST_Litem *l_ptr;
1208  for(l_ptr=n->name.head(); l_ptr != 0; l_ptr=l_ptr->next())
1209  if(n->name(l_ptr) == nmap_name_to_index("!ENTER")) // temporary !!
1210  return true;
1211  return false;
1212 }
1213 */
1214 
1215 /* Superseded now we have list of final nodes
1216 bool
1217 Lattice::is_exit_node(Node *n)
1218 {
1219  EST_Litem *l_ptr;
1220  for(l_ptr=n->name.head(); l_ptr != 0; l_ptr=l_ptr->next())
1221  if(n->name(l_ptr) == nmap_name_to_index("!EXIT")) // temporary !!
1222  return true;
1223  return false;
1224 }
1225 */
1226 
1227 EST_String
1228 Lattice::nmap_index_to_name(int index)
1229 {
1230  if(index < nmap.n())
1231  return nmap(index);
1232  else{
1233  cerr << "Warning : nmap index " << index << " out of range" << endl;
1234  return EST_String("!error!");
1235  }
1236 
1237 }
1238 
1239 int
1240 Lattice::nmap_name_to_index(const EST_String &name)
1241 {
1242  int i,j,mid;
1243 
1244  // binary search
1245  i=0;
1246  j=nmap.n()-1;
1247 
1248  while(true){
1249 
1250  mid = (i+j)/2;
1251 
1252  if(name > nmap(mid))
1253  i = mid;
1254 
1255  else if(name < nmap(mid))
1256  j = mid;
1257 
1258  else{ // name == nmap(mid)
1259  return mid; // lucky strike
1260  }
1261 
1262  if(i==j){
1263  if(name == nmap(i))
1264  return i;
1265  else{
1266  cerr << "Lattice::nmap_name_to_index failed for '"
1267  << name << "'" << endl;
1268  return -1;
1269  }
1270 
1271  }else if(j==i+1){
1272 
1273  if(name == nmap(i))
1274  return i;
1275  else if(name == nmap(j))
1276  return j;
1277  else{
1278  cerr << "Lattice::nmap_name_to_index failed for '"
1279  << name << "'" << endl;
1280  return -1;
1281  }
1282 
1283  }
1284 
1285  }
1286 
1287  return -1;
1288 }
1289 
1290 
1291 float
1292 Lattice::qmap_index_to_value(int index)
1293 {
1294  if(index < qmap.n())
1295  return qmap(index);
1296  else{
1297  cerr << "Warning : qmap index " << index << " out of range" << endl;
1298  return 1;
1299  }
1300 }
1301 
1302 
1303 int
1304 Lattice::qmap_value_to_index(float value)
1305 {
1306  int i,j,mid;
1307 
1308  // binary search
1309  i=0;
1310  j=qmap.n()-1;
1311 
1312  while(true){
1313 
1314  mid = (i+j)/2;
1315 
1316  if(value > qmap(mid))
1317  i = mid;
1318 
1319  else if(value < qmap(mid))
1320  j = mid;
1321 
1322  else
1323  return mid; // lucky strike
1324 
1325  if(i==j)
1326  return i;
1327 
1328  else if(j==i+1){
1329 
1330  if( fabs(qmap(i) - value) < fabs(qmap(j) - value) )
1331  return i;
1332  else
1333  return j;
1334  }
1335 
1336  }
1337  return 0;
1338 }
1339 
1340 int
1341 Lattice::node_index(Node *n)
1342 {
1343  EST_Litem *n_ptr;
1344  for (n_ptr = nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
1345  if(nodes(n_ptr) == n)
1346  return nodes.index(n_ptr);
1347 
1348  }
1349 
1350  //cerr << "Lattice::node_index(Node *n) couldn't find index of node " << n->name;
1351 
1352  return -1;
1353 }
1354 
1355 
1356 
1357 
1358 bool
1359 Lattice::expand()
1360 {
1361 
1362  // keep HTK happy - it can't handle multiple arcs into a node
1363  // with different word labels
1364 
1365 
1366  EST_Litem *n_ptr,*n2_ptr,*a_ptr,*w_ptr;
1367  int word;
1368  EST_TList<int> word_list;
1369  Node *new_node;
1370  Arc *new_arc;
1371  for (n_ptr = nodes.head(); n_ptr != 0; n_ptr = n_ptr->next()){
1372 
1373  // find all arcs into this node
1374  word_list.clear();
1375  for (n2_ptr = nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next()){
1376 
1377  for (a_ptr = nodes(n2_ptr)->arcs_out.head(); a_ptr != 0; a_ptr = a_ptr->next())
1378  if(nodes(n2_ptr)->arcs_out(a_ptr)->to == nodes(n_ptr)){
1379 
1380  if(nodes(n2_ptr)->arcs_out(a_ptr)->label != e_move_symbol_index){
1381  word = alphabet_index_to_symbol(nodes(n2_ptr)->arcs_out(a_ptr)->label)->nmap_index;
1382  word_list.append(word);
1383  sort_unique(word_list);
1384 
1385  }
1386  }
1387  }
1388 
1389 
1390  // if word_list.head() is NULL we should be worried
1391  if((word_list.head() != NULL) && word_list.head()->next() != NULL){
1392 
1393  // for each word make a null node, change all offending arcs to point
1394  // to it, and make another link from null node to nodes(n_ptr)
1395 
1396  for(w_ptr=word_list.head();w_ptr!=NULL;w_ptr=w_ptr->next()){
1397 
1398  // make null node
1399  new_node = new Node;
1400  new_arc = new Arc;
1401  new_arc->label = e_move_symbol_index; // no label
1402  new_arc->to = nodes(n_ptr);
1403  new_node->arcs_out.append(new_arc);
1404 
1405  // for every arc into nodes(n_ptr) with this word label, change its arcs
1406  for (n2_ptr = nodes.head(); n2_ptr != 0; n2_ptr = n2_ptr->next()){
1407 
1408  for (a_ptr = nodes(n2_ptr)->arcs_out.head(); a_ptr != 0; a_ptr = a_ptr->next()){
1409  if(nodes(n2_ptr)->arcs_out(a_ptr)->to == nodes(n_ptr)){
1410 
1411  word = alphabet_index_to_symbol(nodes(n2_ptr)->arcs_out(a_ptr)->label)->nmap_index;
1412  if(word == word_list(w_ptr)){
1413 
1414  // change the arc
1415  nodes(n2_ptr)->arcs_out(a_ptr)->to = new_node;
1416  }
1417 
1418  }
1419  }
1420  }
1421  nodes.append(new_node);
1422  }
1423  }
1424  }
1425 
1426  // need to make sure ENTER node has no arcs in - if so add a dummy ENTER node
1427 
1428  //Node *enter_node = nodes(nodes.head());
1429  bool flag=false;
1430  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1431  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next()){
1432  flag=true;
1433  break;
1434  }
1435  }
1436 
1437 /*
1438  if(flag){
1439  cerr << " fixing ENTER node" << endl;
1440  new_node = new Node;
1441  new_arc = new Arc;
1442  new_arc->label = enter_node->label;
1443  new_arc->to = enter_node;
1444  new_node->arcs_out.append(new_arc);
1445  nodes.append(new_node);
1446  }
1447  */
1448 
1449  // also need to make sure there is only one EXIT node
1450  if(final_nodes.length() > 1){
1451  cerr << " making single EXIT node" << endl;
1452 
1453  // make null node
1454  new_node = new Node;
1455 
1456  for(n_ptr=final_nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1457 
1458  new_arc = new Arc;
1459  new_arc->label = e_move_symbol_index; // no label
1460  new_arc->to = final_nodes(n_ptr);
1461  final_nodes(n_ptr)->arcs_out.append(new_arc);
1462 
1463 
1464  }
1465 
1466  // empty final node list (nodes themselves remain on nodes list)
1467  final_nodes.clear();
1468 
1469  // now a single final node
1470  nodes.append(new_node);
1471  final_nodes.append(new_node);
1472  }
1473 
1474 
1475  int c1=0,c2=0;
1476  for(n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next()){
1477  c1++;
1478  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL; a_ptr=a_ptr->next())
1479  c2++;
1480  }
1481 
1482  cerr << "HTKified DFA has " << c1
1483  << " nodes and " << c2 << " arcs" << endl;
1484 
1485  return true;
1486 }
1487 
1488 bool
1489 Lattice::final(Node *n)
1490 {
1491  EST_Litem *n_ptr;
1492  for (n_ptr = final_nodes.head(); n_ptr != 0; n_ptr = n_ptr->next())
1493  if(final_nodes(n_ptr) == n)
1494  return true;
1495 
1496  return false;
1497 }
1498 
1499 
1500 
1501 EST_String Lattice::name_as_string(EST_IList &l)
1502 {
1503  EST_String name;
1504  EST_Litem *l_ptr;
1505  for(l_ptr=l.head();l_ptr!=NULL;l_ptr=l_ptr->next())
1506  name+=nmap_index_to_name(l(l_ptr)) + ",";
1507 
1508  return name;
1509 }
1510 
1511 
1512 
1513 int
1514 Lattice::alphabet_index_lookup(int nmap_index, int qmap_index)
1515 {
1516  symbol_t sym;
1517  sym.nmap_index = nmap_index;
1518  sym.qmap_index = qmap_index;
1519 
1520  return alphabet_symbol_to_index(&sym);
1521 }
1522 
1523 
1525 Lattice::alphabet_index_to_symbol(int index)
1526 {
1527  if(index < alphabet.n())
1528  return &(alphabet[index]);
1529  else{
1530  cerr << "Warning : alphabet index " << index << " out of range" << endl;
1531  return NULL;
1532  }
1533 
1534 }
1535 
1536 int
1537 Lattice::alphabet_symbol_to_index(Lattice::symbol_t *sym)
1538 {
1539 
1540  int i,j,mid;
1541 
1542  // binary search
1543  i=0;
1544  j=alphabet.n()-1;
1545 
1546  while(true){
1547 
1548  mid = (i+j)/2;
1549 
1550  if(*sym > alphabet(mid))
1551  i = mid;
1552 
1553  else if(*sym < alphabet(mid))
1554  j = mid;
1555 
1556  else // *sym == alphabet(mid)
1557  return mid; // lucky strike
1558 
1559  if(i==j){
1560  if(*sym == alphabet(i))
1561  return i;
1562  else{
1563  cerr << "Lattice::alphabet_symbol_to_index failed for '"
1564  << *sym << "' 1" << endl;
1565  return -1;
1566  }
1567 
1568  }else if(j==i+1){
1569 
1570  if(*sym == alphabet(i))
1571  return i;
1572  else if(*sym == alphabet(j))
1573  return j;
1574  else{
1575  cerr << "Lattice::alphabet_symbol_to_index failed for '"
1576  << *sym << "' 2" << endl;
1577 
1578  cerr << i << " " << alphabet(i) << endl;
1579  cerr << j << " " << alphabet(j) << endl;
1580 
1581  return -1;
1582  }
1583 
1584  }
1585 
1586  }
1587 
1588  return -1;
1589 
1590 }
1591 
1592 
1593 bool
1594 Lattice::build_transition_function()
1595 {
1596 
1597  // different representation of the network , currently only for DFAs
1598  // since for a NFA each tf cell would be a list
1599 
1600  int i,j;
1601  EST_Litem *n_ptr,*a_ptr;
1602  int num_nodes = nodes.length();
1603  int num_symbols = alphabet.n();
1604 
1605  if(tf != NULL)
1606  cerr << "Warning : discarding existing transition function" << endl;
1607 
1608 
1609  tf = new int*[num_nodes];
1610  for(i=0;i<num_nodes;i++)
1611  tf[i] = new int[num_symbols];
1612 
1613  if(tf == NULL){
1614  cerr << "Not enough memory to build transition function"
1615  << "(needed " << num_nodes*num_symbols*sizeof(int) << " bytes)" << endl;
1616  return false;
1617  }
1618 
1619  for(i=0,n_ptr=nodes.head();n_ptr!=NULL;n_ptr=n_ptr->next(),i++){
1620 
1621  cerr << "building transition function " << (int)((float)(i+1)*100/(float)num_nodes) << "% \r";
1622 
1623  for(j=0;j<alphabet.n();j++){
1624 
1625  tf[i][j]=-1; // means no transition
1626  for(a_ptr=nodes(n_ptr)->arcs_out.head();a_ptr!=NULL;a_ptr=a_ptr->next()){
1627 
1628  if(j == nodes(n_ptr)->arcs_out(a_ptr)->label){
1629  tf[i][j] = node_index(nodes(n_ptr)->arcs_out(a_ptr)->to);
1630  break;
1631  }
1632  }
1633  }
1634  }
1635 
1636  cerr << endl;
1637  return true;
1638 }
1639 
1640 
1641 
1642 bool
1643 Lattice::accepts(EST_TList<symbol_t*> &string)
1644 {
1645  (void) string;
1646  return false;
1647 }
1648 
1649 
1650 float
1651 Lattice::viterbi_transduce(EST_TList<EST_String> &input,
1652  EST_TList<Arc*> &path,
1653  EST_Litem *current_symbol,
1654  Node *start_node)
1655 {
1656 
1657  // finds maximum sum-of-probs path
1658  EST_Litem *a_ptr,*best;
1659 
1660  if(start_node == NULL){
1661  start_node = nodes(nodes.head());
1662  current_symbol = input.head();
1663  path.clear();
1664  }
1665 
1666  if(current_symbol == NULL){ // consumed all input
1667  if( final(start_node) )
1668  return 0; // log prob 1
1669 
1670  else
1671  return -10000000; // hack for now
1672 
1673  }
1674 
1675  best=NULL;
1676  float max=-10000000; // hack for now
1677  for (a_ptr = start_node->arcs_out.head(); a_ptr != 0; a_ptr = a_ptr->next()){
1678 
1679  if( alphabet_index_to_symbol(start_node->arcs_out(a_ptr)->label)->nmap_index
1680  == nmap_name_to_index(input(current_symbol)) ){
1681 
1682  float x = viterbi_transduce(input,
1683  path,
1684  current_symbol->next(),
1685  start_node->arcs_out(a_ptr)->to)
1686  + qmap_index_to_value(alphabet_index_to_symbol(start_node->arcs_out(a_ptr)->label)->qmap_index);
1687 
1688  if(x > max){
1689  max = x;
1690  best = a_ptr;
1691  }
1692  }
1693  }
1694 
1695  if(best != NULL)
1696  path.append(start_node->arcs_out(best));
1697 
1698  return max;
1699 
1700 }
1701 
1702 
1703 float Lattice::viterbi_transduce(EST_Track &observations,
1704  EST_TList<Arc*> &path,
1705  float &score,
1706  int current_frame,
1707  Node *start_node)
1708 {
1709  // finds maximum sum-of-probs path
1710  EST_Litem *a_ptr,*best;
1711 
1712  if(start_node == NULL){
1713  start_node = nodes(nodes.head());
1714  current_frame = 0;
1715  path.clear();
1716  score = 0.0;
1717  }
1718 
1719  if(current_frame == observations.num_frames()){ // consumed all input
1720  if( final(start_node) )
1721  return 0; // log prob 1
1722 
1723  else
1724  return -10000000; // hack for now
1725 
1726  }
1727 
1728 
1729  if(score < -100000){ // hack !
1730  return -10000000;
1731  }
1732 
1733  best=NULL;
1734  float max=-10000000; // hack for now
1735  for (a_ptr = start_node->arcs_out.head(); a_ptr != 0; a_ptr = a_ptr->next()){
1736 
1737  int observation_element =
1738  alphabet_index_to_symbol(start_node->arcs_out(a_ptr)->label)->nmap_index
1739  - 2; // HACK !!@!!!! to skip ENTER/EXIT
1740 
1741  float this_observation = observations.a(current_frame,observation_element);
1742 
1743  //cerr << "frame " << current_frame << "," << observation_element
1744  //<< " : " << this_observation << endl;
1745 
1746  float x = viterbi_transduce(observations,
1747  path,
1748  score,
1749  current_frame+1,
1750  start_node->arcs_out(a_ptr)->to)
1751 
1752  + qmap_index_to_value(alphabet_index_to_symbol(start_node->arcs_out(a_ptr)->label)->qmap_index)
1753 
1754  + this_observation;
1755 
1756  if(x > max){
1757  max = x;
1758  best = a_ptr;
1759  }
1760 
1761  }
1762 
1763  if(best != NULL){
1764  path.append(start_node->arcs_out(best));
1765 
1766  int observation_element =
1767  alphabet_index_to_symbol(start_node->arcs_out(best)->label)->nmap_index;
1768 
1769  float this_observation = observations.a(current_frame,observation_element);
1770 
1771  score += qmap_index_to_value(alphabet_index_to_symbol(start_node->arcs_out(best)->label)->qmap_index) + this_observation;
1772 ;
1773 
1774  }
1775 
1776  cerr << max << endl;
1777 
1778  return max;
1779 
1780 }
1781 
1782 // hack for now (required for SHARED=2)
1783 bool Lattice::symbol_t::operator!=(Lattice::symbol_t const &) const {
1784 return false;
1785 }
1786 
1787 
1788 
1789 
float & a(int i, int c=0)
Definition: EST_Track.cc:1022
void resize(int n, int set=1)
Definition: EST_TVector.cc:196
INLINE int n() const
number of items in vector.
Definition: EST_TVector.h:252
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:198
int num_frames() const
return number of frames in track
Definition: EST_Track.h:650
void clear(void)
remove all items in list
Definition: EST_TList.h:246
void resize(int n, int set=1)
resize vector