BALL  1.4.1
featureSelection.h
Go to the documentation of this file.
00001 /* featureSelection.h
00002  * 
00003  * Copyright (C) 2009 Marcel Schumann
00004  * 
00005  * This file is part of QuEasy -- A Toolbox for Automated QSAR Model
00006  * Construction and Validation.
00007  * QuEasy is free software; you can redistribute it and/or modify
00008  * it under the terms of the GNU General Public License as published by
00009  * the Free Software Foundation; either version 3 of the License, or (at
00010  * your option) any later version.
00011  * 
00012  * QuEasy is distributed in the hope that it will be useful, but
00013  * WITHOUT ANY WARRANTY; without even the implied warranty of
00014  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015  * General Public License for more details.
00016  * 
00017  * You should have received a copy of the GNU General Public License
00018  * along with this program; if not, see <http://www.gnu.org/licenses/>.
00019  */
00020 
00021 // -*- Mode: C++; tab-width: 2; -*-
00022 // vi: set ts=2:
00023 //
00024 // 
00025 
00026 #ifndef FEATURESEL
00027 #define FEATURESEL
00028 
00029 #ifndef MODEL
00030 #include <BALL/QSAR/Model.h>
00031 #endif
00032 
00033 #ifndef KMODEL
00034 #include <BALL/QSAR/kernelModel.h>
00035 #endif
00036 
00037 #ifndef LMODEL
00038 #include <BALL/QSAR/linearModel.h>
00039 #endif
00040 
00041 #include <set>
00042 
00043 namespace BALL 
00044 {
00045   namespace QSAR
00046   {
00047 
00048     class BALL_EXPORT FeatureSelection
00049     {
00050       public:
00054         FeatureSelection(Model& m);
00055         
00056         FeatureSelection(KernelModel& m);
00057 
00058         ~FeatureSelection();
00060         
00061         
00066         void setModel(Model& m);
00067         
00068         void setModel(KernelModel& km);
00069   
00074         void forwardSelection(int k=4, bool optPar=0);
00075         
00080         void backwardSelection(int k=4, bool optPar=0);
00081         
00082         void stepwiseSelection(int k=4, bool optPar=0);
00083         
00088         void twinScan(int k, bool optPar=0);
00089         
00094         void implicitSelection(LinearModel& lm, int act=1, double d=1);
00095         
00098         void removeHighlyCorrelatedFeatures(double& cor_threshold);
00099         
00100         
00102         void removeLowResponseCorrelation(double& min_correlation);
00103         
00104         
00106         void removeEmptyDescriptors();
00107         
00108         void selectStat(int s);
00109         
00112         void setQualityIncreaseCutoff(double& d);
00114   
00115   
00116       private:
00117         
00121         void updateWeights(std::multiset<unsigned int>& oldDescIDs, std::multiset<unsigned int>& newDescIDs, Vector<double>& oldWeights);
00123         
00124         
00130         std::multiset<unsigned int>* findIrrelevantDescriptors();
00131         
00133         Model* model_;
00134         
00136         Vector<double>* weights_;
00137         
00139         void forward(bool stepwise, int k, bool optPar);
00140         
00142         double quality_increase_cutoff_;
00144     };
00145   }
00146 }
00147 
00148 
00149 #endif // FEATURESEL
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Defines