[ VIGRA Homepage | Function Index | Class Index | Namespaces | File List | Main Page ]

rf_ridge_split.hxx
1 //
2 // C++ Interface: rf_ridge_split
3 //
4 // Description:
5 //
6 //
7 // Author: Nico Splitthoff <splitthoff@zg00103>, (C) 2009
8 //
9 // Copyright: See COPYING file that comes with this distribution
10 //
11 //
12 #ifndef VIGRA_RANDOM_FOREST_RIDGE_SPLIT_H
13 #define VIGRA_RANDOM_FOREST_RIDGE_SPLIT_H
14 //#include "rf_sampling.hxx"
15 #include "../sampling.hxx"
16 #include "rf_split.hxx"
17 #include "rf_nodeproxy.hxx"
18 #include "../regression.hxx"
19 
20 #define outm(v) std::cout << (#v) << ": " << (v) << std::endl;
21 #define outm2(v) std::cout << (#v) << ": " << (v) << ", ";
22 
23 namespace vigra
24 {
25 
26 /*template<>
27 class Node<i_RegrNode>
28 : public NodeBase
29 {
30 public:
31  typedef NodeBase BT;
32 
33 
34  Node( BT::T_Container_type & topology,
35  BT::P_Container_type & param,
36  int nNumCols)
37  : BT(5+nNumCols,2+nNumCols,topology, param)
38  {
39  BT::typeID() = i_RegrNode;
40  }
41 
42  Node( BT::T_Container_type & topology,
43  BT::P_Container_type & param,
44  INT n )
45  : BT(5,2,topology, param, n)
46  {}
47 
48  Node( BT & node_)
49  : BT(5, 2, node_)
50  {}
51 
52  double& threshold()
53  {
54  return BT::parameters_begin()[1];
55  }
56 
57  BT::INT& column()
58  {
59  return BT::column_data()[0];
60  }
61 
62  template<class U, class C>
63  BT::INT& next(MultiArrayView<2,U,C> const & feature)
64  {
65  return (feature(0, column()) < threshold())? child(0):child(1);
66  }
67 };*/
68 
69 
70 template<class ColumnDecisionFunctor, class Tag = ClassificationTag>
71 class RidgeSplit: public SplitBase<Tag>
72 {
73  public:
74 
75 
76  typedef SplitBase<Tag> SB;
77 
78  ArrayVector<Int32> splitColumns;
79  ColumnDecisionFunctor bgfunc;
80 
81  double region_gini_;
82  ArrayVector<double> min_gini_;
83  ArrayVector<std::ptrdiff_t> min_indices_;
84  ArrayVector<double> min_thresholds_;
85 
86  int bestSplitIndex;
87 
88  //dns
89  bool m_bDoScalingInTraining;
90  bool m_bDoBestLambdaBasedOnGini;
91 
92  RidgeSplit()
93  :m_bDoScalingInTraining(true),
94  m_bDoBestLambdaBasedOnGini(true)
95  {
96  }
97 
98  double minGini() const
99  {
100  return min_gini_[bestSplitIndex];
101  }
102 
103  int bestSplitColumn() const
104  {
105  return splitColumns[bestSplitIndex];
106  }
107 
108  bool& doScalingInTraining()
109  { return m_bDoScalingInTraining; }
110 
111  bool& doBestLambdaBasedOnGini()
112  { return m_bDoBestLambdaBasedOnGini; }
113 
114  template<class T>
115  void set_external_parameters(ProblemSpec<T> const & in)
116  {
118  bgfunc.set_external_parameters(in);
119  int featureCount_ = in.column_count_;
120  splitColumns.resize(featureCount_);
121  for(int k=0; k<featureCount_; ++k)
122  splitColumns[k] = k;
123  min_gini_.resize(featureCount_);
124  min_indices_.resize(featureCount_);
125  min_thresholds_.resize(featureCount_);
126  }
127 
128 
129  template<class T, class C, class T2, class C2, class Region, class Random>
130  int findBestSplit(MultiArrayView<2, T, C> features,
131  MultiArrayView<2, T2, C2> multiClassLabels,
132  Region & region,
133  ArrayVector<Region>& childRegions,
134  Random & randint)
135  {
136 
137  //std::cerr << "Split called" << std::endl;
138  typedef typename Region::IndexIterator IndexIterator;
139  typedef typename MultiArrayView <2, T, C>::difference_type fShape;
140  typedef typename MultiArrayView <2, T2, C2>::difference_type lShape;
141  typedef typename MultiArrayView <2, double>::difference_type dShape;
142 
143  // calculate things that haven't been calculated yet.
144 // std::cout << "start" << std::endl;
145  if(std::accumulate(region.classCounts().begin(),
146  region.classCounts().end(), 0) != region.size())
147  {
148  RandomForestClassCounter< MultiArrayView<2,T2, C2>,
149  ArrayVector<double> >
150  counter(multiClassLabels, region.classCounts());
151  std::for_each( region.begin(), region.end(), counter);
152  region.classCountsIsValid = true;
153  }
154 
155 
156  // Is the region pure already?
157  region_gini_ = GiniCriterion::impurity(region.classCounts(),
158  region.size());
159  if(region_gini_ == 0 || region.size() < SB::ext_param_.actual_mtry_ || region.oob_size() < 2)
160  return SB::makeTerminalNode(features, multiClassLabels, region, randint);
161 
162  // select columns to be tried.
163  for(int ii = 0; ii < SB::ext_param_.actual_mtry_; ++ii)
164  std::swap(splitColumns[ii],
165  splitColumns[ii+ randint(features.shape(1) - ii)]);
166 
167  //do implicit binary case
168  MultiArray<2, T2> labels(lShape(multiClassLabels.shape(0),1));
169  //number of classes should be >1, otherwise makeTerminalNode would have been called
170  int nNumClasses=0;
171  for(int n=0; n<(int)region.classCounts().size(); n++)
172  nNumClasses+=((region.classCounts()[n]>0) ? 1:0);
173 
174  //convert to binary case
175  if(nNumClasses>2)
176  {
177  int nMaxClass=0;
178  int nMaxClassCounts=0;
179  for(int n=0; n<(int)region.classCounts().size(); n++)
180  {
181  //this should occur in any case:
182  //we had more than two non-zero classes in order to get here
183  if(region.classCounts()[n]>nMaxClassCounts)
184  {
185  nMaxClassCounts=region.classCounts()[n];
186  nMaxClass=n;
187  }
188  }
189 
190  //create binary labels
191  for(int n=0; n<multiClassLabels.shape(0); n++)
192  labels(n,0)=((multiClassLabels(n,0)==nMaxClass) ? 1:0);
193  }
194  else
195  labels=multiClassLabels;
196 
197  //_do implicit binary case
198 
199  //uncomment this for some debugging
200 /* int nNumCases=features.shape(0);
201 
202  typedef typename MultiArrayView <2, int>::difference_type nShape;
203  MultiArray<2, int> elementCounterArray(nShape(nNumCases,1),(int)0);
204  int nUniqueElements=0;
205  for(int n=0; n<region.size(); n++)
206  elementCounterArray[region[n]]++;
207 
208  for(int n=0; n<nNumCases; n++)
209  nUniqueElements+=((elementCounterArray[n]>0) ? 1:0);
210 
211  outm(nUniqueElements);
212  nUniqueElements=0;
213  MultiArray<2, int> elementCounterArray_oob(nShape(nNumCases,1),(int)0);
214  for(int n=0; n<region.oob_size(); n++)
215  elementCounterArray_oob[region.oob_begin()[n]]++;
216  for(int n=0; n<nNumCases; n++)
217  nUniqueElements+=((elementCounterArray_oob[n]>0) ? 1:0);
218  outm(nUniqueElements);
219 
220  int notUniqueElements=0;
221  for(int n=0; n<nNumCases; n++)
222  notUniqueElements+=(((elementCounterArray_oob[n]>0) && (elementCounterArray[n]>0)) ? 1:0);
223  outm(notUniqueElements);*/
224 
225  //outm(SB::ext_param_.actual_mtry_);
226 
227 
228 //select submatrix of features for regression calculation
229  MultiArrayView<2, T, C> cVector;
230  MultiArray<2, T> xtrain(fShape(region.size(),SB::ext_param_.actual_mtry_));
231  //we only want -1 and 1 for this
232  MultiArray<2, double> regrLabels(dShape(region.size(),1));
233 
234  //copy data into a vigra data structure and centre and scale while doing so
235  MultiArray<2, double> meanMatrix(dShape(SB::ext_param_.actual_mtry_,1));
236  MultiArray<2, double> stdMatrix(dShape(SB::ext_param_.actual_mtry_,1));
237  for(int m=0; m<SB::ext_param_.actual_mtry_; m++)
238  {
239  cVector=columnVector(features, splitColumns[m]);
240 
241  //centre and scale the data
242  double dCurrFeatureColumnMean=0.0;
243  double dCurrFeatureColumnStd=1.0; //default value
244 
245  //calc mean on bootstrap data
246  for(int n=0; n<region.size(); n++)
247  dCurrFeatureColumnMean+=cVector[region[n]];
248  dCurrFeatureColumnMean/=region.size();
249  //calc scaling
250  if(m_bDoScalingInTraining)
251  {
252  for(int n=0; n<region.size(); n++)
253  {
254  dCurrFeatureColumnStd+=
255  (cVector[region[n]]-dCurrFeatureColumnMean)*(cVector[region[n]]-dCurrFeatureColumnMean);
256  }
257  //unbiased std estimator:
258  dCurrFeatureColumnStd=sqrt(dCurrFeatureColumnStd/(region.size()-1));
259  }
260  //dCurrFeatureColumnStd is still 1.0 if we didn't want scaling
261  stdMatrix(m,0)=dCurrFeatureColumnStd;
262 
263  meanMatrix(m,0)=dCurrFeatureColumnMean;
264 
265  //get feature matrix, i.e. A (note that weighting is done automatically
266  //since rows can occur multiple times -> bagging)
267  for(int n=0; n<region.size(); n++)
268  xtrain(n,m)=(cVector[region[n]]-dCurrFeatureColumnMean)/dCurrFeatureColumnStd;
269  }
270 
271 // std::cout << "middle" << std::endl;
272  //get label vector (i.e. b)
273  for(int n=0; n<region.size(); n++)
274  {
275  //we checked for/built binary case further up.
276  //class labels should thus be either 0 or 1
277  //-> convert to -1 and 1 for regression
278  regrLabels(n,0)=((labels[region[n]]==0) ? -1:1);
279  }
280 
281  MultiArray<2, double> dLambdas(dShape(11,1));
282  int nCounter=0;
283  for(int nLambda=-5; nLambda<=5; nLambda++)
284  dLambdas[nCounter++]=pow(10.0,nLambda);
285  //destination vector for regression coefficients; use same type as for xtrain
286  MultiArray<2, double> regrCoef(dShape(SB::ext_param_.actual_mtry_,11));
287  ridgeRegressionSeries(xtrain,regrLabels,regrCoef,dLambdas);
288 
289  double dMaxRidgeSum=NumericTraits<double>::min();
290  double dCurrRidgeSum;
291  int nMaxRidgeSumAtLambdaInd=0;
292 
293  for(int nLambdaInd=0; nLambdaInd<11; nLambdaInd++)
294  {
295  //just sum up the correct answers
296  //(correct means >=intercept for class 1, <intercept for class 0)
297  //(intercept=0 or intercept=threshold based on gini)
298  dCurrRidgeSum=0.0;
299 
300  //assemble projection vector
301  MultiArray<2, double> dDistanceFromHyperplane(dShape(features.shape(0),1));
302 
303  for(int n=0; n<region.oob_size(); n++)
304  {
305  dDistanceFromHyperplane(region.oob_begin()[n],0)=0.0;
306  for (int m=0; m<SB::ext_param_.actual_mtry_; m++)
307  {
308  dDistanceFromHyperplane(region.oob_begin()[n],0)+=
309  features(region.oob_begin()[n],splitColumns[m])*regrCoef(m,nLambdaInd);
310  }
311  }
312 
313  double dCurrIntercept=0.0;
314  if(m_bDoBestLambdaBasedOnGini)
315  {
316  //calculate gini index
317  bgfunc(dDistanceFromHyperplane,
318  labels,
319  region.oob_begin(), region.oob_end(),
320  region.classCounts());
321  dCurrIntercept=bgfunc.min_threshold_;
322  }
323  else
324  {
325  for (int m=0; m<SB::ext_param_.actual_mtry_; m++)
326  dCurrIntercept+=meanMatrix(m,0)*regrCoef(m,nLambdaInd);
327  }
328 
329  for(int n=0; n<region.oob_size(); n++)
330  {
331  //check what lambda performs best on oob data
332  int nClassPrediction=((dDistanceFromHyperplane(region.oob_begin()[n],0) >=dCurrIntercept) ? 1:0);
333  dCurrRidgeSum+=((nClassPrediction == labels(region.oob_begin()[n],0)) ? 1:0);
334  }
335  if(dCurrRidgeSum>dMaxRidgeSum)
336  {
337  dMaxRidgeSum=dCurrRidgeSum;
338  nMaxRidgeSumAtLambdaInd=nLambdaInd;
339  }
340  }
341 
342 // std::cout << "middle2" << std::endl;
343  //create a Node for output
344  Node<i_HyperplaneNode> node(SB::ext_param_.actual_mtry_, SB::t_data, SB::p_data);
345 
346  //normalise coeffs
347  //data was scaled (by 1.0 or by std) -> take into account
348  MultiArray<2, double> dCoeffVector(dShape(SB::ext_param_.actual_mtry_,1));
349  for(int n=0; n<SB::ext_param_.actual_mtry_; n++)
350  dCoeffVector(n,0)=regrCoef(n,nMaxRidgeSumAtLambdaInd)*stdMatrix(n,0);
351 
352  //calc norm
353  double dVnorm=columnVector(regrCoef,nMaxRidgeSumAtLambdaInd).norm();
354 
355  for(int n=0; n<SB::ext_param_.actual_mtry_; n++)
356  node.weights()[n]=dCoeffVector(n,0)/dVnorm;
357  //_normalise coeffs
358 
359  //save the columns
360  node.column_data()[0]=SB::ext_param_.actual_mtry_;
361  for(int n=0; n<SB::ext_param_.actual_mtry_; n++)
362  node.column_data()[n+1]=splitColumns[n];
363 
364  //assemble projection vector
365  //careful here: "region" is a pointer to indices...
366  //all the indices in "region" need to have valid data
367  //convert from "region" space to original "feature" space
368  MultiArray<2, double> dDistanceFromHyperplane(dShape(features.shape(0),1));
369 
370  for(int n=0; n<region.size(); n++)
371  {
372  dDistanceFromHyperplane(region[n],0)=0.0;
373  for (int m=0; m<SB::ext_param_.actual_mtry_; m++)
374  {
375  dDistanceFromHyperplane(region[n],0)+=
376  features(region[n],m)*node.weights()[m];
377  }
378  }
379  for(int n=0; n<region.oob_size(); n++)
380  {
381  dDistanceFromHyperplane(region.oob_begin()[n],0)=0.0;
382  for (int m=0; m<SB::ext_param_.actual_mtry_; m++)
383  {
384  dDistanceFromHyperplane(region.oob_begin()[n],0)+=
385  features(region.oob_begin()[n],m)*node.weights()[m];
386  }
387  }
388 
389  //calculate gini index
390  bgfunc(dDistanceFromHyperplane,
391  labels,
392  region.begin(), region.end(),
393  region.classCounts());
394 
395  // did not find any suitable split
396  if(closeAtTolerance(bgfunc.min_gini_, NumericTraits<double>::max()))
397  return SB::makeTerminalNode(features, multiClassLabels, region, randint);
398 
399  //take gini threshold here due to scaling, normalisation, etc. of the coefficients
400  node.intercept() = bgfunc.min_threshold_;
401  SB::node_ = node;
402 
403  childRegions[0].classCounts() = bgfunc.bestCurrentCounts[0];
404  childRegions[1].classCounts() = bgfunc.bestCurrentCounts[1];
405  childRegions[0].classCountsIsValid = true;
406  childRegions[1].classCountsIsValid = true;
407 
408  // Save the ranges of the child stack entries.
409  childRegions[0].setRange( region.begin() , region.begin() + bgfunc.min_index_ );
410  childRegions[0].rule = region.rule;
411  childRegions[0].rule.push_back(std::make_pair(1, 1.0));
412  childRegions[1].setRange( region.begin() + bgfunc.min_index_ , region.end() );
413  childRegions[1].rule = region.rule;
414  childRegions[1].rule.push_back(std::make_pair(1, 1.0));
415 
416  //adjust oob ranges
417 // std::cout << "adjust oob" << std::endl;
418  //sort the oobs
419  std::sort(region.oob_begin(), region.oob_end(),
420  SortSamplesByDimensions< MultiArray<2, double> > (dDistanceFromHyperplane, 0));
421 
422  //find split index
423  int nOOBindx;
424  for(nOOBindx=0; nOOBindx<region.oob_size(); nOOBindx++)
425  {
426  if(dDistanceFromHyperplane(region.oob_begin()[nOOBindx],0)>=node.intercept())
427  break;
428  }
429 
430  childRegions[0].set_oob_range( region.oob_begin() , region.oob_begin() + nOOBindx );
431  childRegions[1].set_oob_range( region.oob_begin() + nOOBindx , region.oob_end() );
432 
433 // std::cout << "end" << std::endl;
434 // outm2(region.oob_begin());outm2(nOOBindx);outm(region.oob_begin() + nOOBindx);
435  //_adjust oob ranges
436 
437  return i_HyperplaneNode;
438  }
439 };
440 
441 /** Standard ridge regression split
442  */
443 typedef RidgeSplit<BestGiniOfColumn<GiniCriterion> > GiniRidgeSplit;
444 
445 
446 } //namespace vigra
447 #endif // VIGRA_RANDOM_FOREST_RIDGE_SPLIT_H

© Ullrich Köthe (ullrich.koethe@iwr.uni-heidelberg.de)
Heidelberg Collaboratory for Image Processing, University of Heidelberg, Germany

html generated using doxygen and Python
vigra 1.9.0 (Sat Oct 5 2013)