Edinburgh Speech Tools  2.1-release
 All Classes Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ols_main.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1996,1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : January 1998 */
35 /*-----------------------------------------------------------------------*/
36 /* Ordinary least squares */
37 /* */
38 /*=======================================================================*/
39 #include <cstdlib>
40 #include <iostream>
41 #include <fstream>
42 #include <cstring>
43 #include "EST_Wagon.h"
44 #include "EST_multistats.h"
45 #include "EST_cmd_line.h"
46 
47 static void load_ols_data(EST_FMatrix &X, EST_FMatrix &Y, WDataSet &d);
48 static int ols_main(int argc, char **argv);
49 
50 int main(int argc, char **argv)
51 {
52  return ols_main(argc,argv);
53 }
54 
55 static int ols_main(int argc, char **argv)
56 {
57  // Top level function loads in sample data and finds coefficients
58  EST_Option al;
59  EST_StrList files;
60  EST_String ofile = "-";
61  WDataSet dataset,test_dataset;
62  EST_FMatrix coeffs;
63  EST_FMatrix X,Y,Xtest,Ytest;
64  LISP ignores = NIL;
65 
66  parse_command_line
67  (argc, argv,
68  EST_String("[options]\n")+
69  "Summary: Linear Regression by ordinary least squares (defaults in {})\n"+
70  "-desc <ifile> Field description file\n"+
71  "-data <ifile> Datafile, one vector per line\n"+
72  "-test <ifile> Datafile, for testing\n"+
73  "-robust Robust, may take longer\n"+
74  "-stepwise Order the features by contribution,\n"+
75  " implies robust.\n"+
76  "-swlimit <float> {0.0}\n"+
77  " Percentage necessary improvement for stepwise\n"+
78  "-quiet No summary\n"+
79  "-o <ofile> \n"+
80  "-output <ofile> Output file for coefficients\n"+
81  "-ignore <string> Filename or bracket list of fields to ignore\n",
82  files, al);
83 
84 
85  if (al.present("-output"))
86  ofile = al.val("-output");
87  if (al.present("-o"))
88  ofile = al.val("-o");
89 
90  siod_init();
91 
92  if (al.present("-ignore"))
93  {
94  EST_String ig = al.val("-ignore");
95  if (ig[0] == '(')
96  ignores = read_from_string(ig);
97  else
98  ignores = vload(ig,1);
99  }
100 
101  // Load in the data
102  if (!al.present("-desc"))
103  {
104  cerr << "ols: no description file specified\n";
105  return -1;
106  }
107  else
108  {
109  dataset.load_description(al.val("-desc"),ignores);
110  dataset.ignore_non_numbers();
111  }
112  if (!al.present("-data"))
113  {
114  cerr << "ols: no data file specified\n";
115  return -1;
116  }
117  else
118  wgn_load_dataset(dataset,al.val("-data"));
119  if (al.present("-test"))
120  {
121  test_dataset.load_description(al.val("-desc"),ignores);
122  test_dataset.ignore_non_numbers();
123  wgn_load_dataset(test_dataset,al.val("-test"));
124  load_ols_data(Xtest,Ytest,test_dataset);
125  }
126  else
127  // No test data specified so use training data
128  load_ols_data(Xtest,Ytest,dataset);
129 
130  load_ols_data(X,Y,dataset);
131 
132  if (al.present("-stepwise"))
133  {
134  EST_StrList names;
135  float swlimit = al.fval("-swlimit");
136  EST_IVector included;
137  int i;
138 
139  names.append("Intercept");
140  for (i=1; i < dataset.width(); i++)
141  names.append(dataset.feat_name(i));
142 
143  included.resize(X.num_columns());
144  included[0] = TRUE; // always guarantee interceptor
145  for (i=1; i<included.length(); i++)
146  {
147  if (dataset.ignore(i) == TRUE)
148  included.a_no_check(i) = OLS_IGNORE;
149  else
150  included.a_no_check(i) = FALSE;
151  }
152 
153  if (!stepwise_ols(X,Y,names,swlimit,coeffs,Xtest,Ytest,included))
154  {
155  cerr << "OLS: failed stepwise ols" << endl;
156  return -1;
157  }
158  }
159  else if (al.present("-robust"))
160  {
161  EST_IVector included;
162  int i;
163 
164  included.resize(X.num_columns());
165  included[0] = TRUE; // always guarantee interceptor
166  for (i=1; i<included.length(); i++)
167  {
168  if (dataset.ignore(i) == TRUE)
169  included.a_no_check(i) = OLS_IGNORE;
170  else
171  included.a_no_check(i) = TRUE;
172  }
173 
174  if (!robust_ols(X,Y,included,coeffs))
175  {
176  cerr << "OLS: failed robust ols" << endl;
177  return -1;
178  }
179  }
180  else if (!ols(X,Y,coeffs))
181  {
182  cerr << "OLS: failed no pseudo_inverse" << endl;
183  return -1;
184  }
185 
186  if (coeffs.save(ofile) != write_ok)
187  {
188  cerr << "OLS: failed to save coefficients in \"" << ofile << "\""
189  << endl;
190  return -1;
191  }
192 
193  if (!al.present("-quiet"))
194  {
195  EST_FMatrix pred;
196  float cor,rmse;
197 
198  ols_apply(Xtest,coeffs,pred);
199  ols_test(Ytest,pred,cor,rmse);
200 
201  printf(";; RMSE %f Correlation is %f\n",rmse,cor);
202  }
203 
204  return 0;
205 }
206 
207 static void load_ols_data(EST_FMatrix &X, EST_FMatrix &Y, WDataSet &d)
208 {
209  EST_Litem *p;
210  int n,m;
211 
212  X.resize(d.length(),d.width());
213  Y.resize(d.length(),1);
214 
215  for (n=0,p=d.head(); p != 0; p=p->next(),n++)
216  {
217  Y.a_no_check(n,0) = d(p)->get_flt_val(0);
218  X.a_no_check(n,0) = 1;
219  for (m=1; m < d.width(); m++)
220  {
221  if (d.ignore(m))
222  {
223  X.a_no_check(n,m) = 0;
224  }
225  else
226  X.a_no_check(n,m) = d(p)->get_flt_val(m);
227  }
228  }
229 
230 }
INLINE const T & a_no_check(int row, int col) const
const access with no bounds check, care recommend
Definition: EST_TMatrix.h:183
int num_columns() const
return number of columns
Definition: EST_TMatrix.h:180
float fval(const EST_String &rkey, int m=1) const
Definition: EST_Option.cc:98
EST_write_status save(const EST_String &filename, const EST_String &type=EST_FMatrix::default_file_type)
Save in file (ascii or binary)
Definition: EST_FMatrix.cc:340
INLINE int length() const
number of items in vector.
Definition: EST_TVector.h:250
const int present(const K &rkey) const
Returns true if key is present.
Definition: EST_TKVL.cc:222
const V & val(const K &rkey, bool m=0) const
return value according to key (const)
Definition: EST_TKVL.cc:145
INLINE const T & a_no_check(int n) const
read-only const access operator: without bounds checking
Definition: EST_TVector.h:255
void append(const T &item)
add item onto end of list
Definition: EST_TList.h:198
void resize(int rows, int cols, int set=1)
resize matrix
void resize(int n, int set=1)
resize vector