BALL
1.4.1
|
00001 /* Model.h 00002 * 00003 * Copyright (C) 2009 Marcel Schumann 00004 * 00005 * This file is part of QuEasy -- A Toolbox for Automated QSAR Model 00006 * Construction and Validation. 00007 * QuEasy is free software; you can redistribute it and/or modify 00008 * it under the terms of the GNU General Public License as published by 00009 * the Free Software Foundation; either version 3 of the License, or (at 00010 * your option) any later version. 00011 * 00012 * QuEasy is distributed in the hope that it will be useful, but 00013 * WITHOUT ANY WARRANTY; without even the implied warranty of 00014 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00015 * General Public License for more details. 00016 * 00017 * You should have received a copy of the GNU General Public License 00018 * along with this program; if not, see <http://www.gnu.org/licenses/>. 00019 */ 00020 00021 // -*- Mode: C++; tab-width: 2; -*- 00022 // vi: set ts=2: 00023 // 00024 // 00025 #ifndef MODEL 00026 #define MODEL 00027 00028 #include <vector> 00029 #include <set> 00030 00031 #include <BALL/MATHS/LINALG/matrix.h> 00032 00033 #ifndef VALIDATION 00034 #include <BALL/QSAR/validation.h> 00035 #endif 00036 00037 #ifndef QSARH 00038 #include <BALL/QSAR/QSARData.h> 00039 #endif 00040 00041 #ifndef QSAR_EXCEPTION 00042 #include <BALL/QSAR/exception.h> 00043 #endif 00044 00045 #include <BALL/MATHS/parsedFunction.h> 00046 00047 namespace BALL 00048 { 00049 00050 namespace QSAR 00051 { 00052 class BALL_EXPORT Model 00053 { 00054 public: 00060 Model(const QSARData& q); 00061 00062 virtual ~Model(); 00063 00066 virtual void operator=(const Model& m); 00068 00069 00074 void copyData(const Model& m); 00075 00077 void copyDescriptorIDs(const Model& m); 00078 00079 00083 void readTrainingData(); 00084 00085 00092 virtual Vector<double> predict(const vector<double>& substance, bool transform) =0; 00093 00095 void deleteDescriptorIDs(); 00096 00098 virtual void train() =0; 00099 00100 00105 virtual bool optimizeParameters(int /*k*/, int /*no_steps*/){return 0;}; 00106 00107 bool optimizeParameters(int k); 00108 00109 virtual double calculateStdErr() 00110 {return -1.0;}; 00111 00113 virtual void setParameters(vector<double>& /*v*/){}; 00114 00115 virtual vector<double> getParameters() const; 00116 00118 std::multiset<unsigned int>* getDescriptorIDs(); 00119 00120 void setDataSource(const QSARData* q); 00121 00123 virtual void saveToFile(string filename) = 0; 00124 00126 virtual void readFromFile(string filename) = 0; 00127 00129 const Matrix<double>* getDescriptorMatrix(); 00130 00132 const vector<string>* getSubstanceNames(); 00133 00135 const vector<string>* getDescriptorNames(); 00136 00138 const Matrix<double>* getY(); 00139 00141 void setDescriptorIDs(const std::multiset<unsigned int>& sl); 00142 00144 const string* getType(); 00145 00147 void getUnnormalizedFeatureValue(int compound, int feature, double& return_value); 00148 00150 void getUnnormalizedResponseValue(int compound, int response, double& return_value); 00152 00153 00158 const QSARData* data; 00159 00161 Validation* model_val; 00163 00164 00165 protected: 00166 00170 int default_no_opt_steps_; 00171 00177 Vector<double> getSubstanceVector(const vector<double>& substance, bool transform); 00178 00179 Vector<double> getSubstanceVector(const Vector<double>& substance, bool transform); 00180 00182 void backTransformPrediction(Vector<double>& pred); 00183 00185 void addLambda(Matrix<double>& matrix, double& lambda); 00186 00188 void readDescriptorInformation(); 00190 00191 00196 void readMatrix(Matrix<double>& mat, std::ifstream& in, uint lines, uint col); 00197 00198 void readVector(Vector<double>& vec, std::ifstream& in, uint no_cells, bool column_vector); 00199 00200 void readModelParametersFromFile(std::ifstream& in); 00201 void saveModelParametersToFile(std::ofstream& out); 00202 00203 00205 virtual void saveDescriptorInformationToFile(std::ofstream& out); 00206 virtual void readDescriptorInformationFromFile(std::ifstream& in, int no_descriptors, bool transformation); 00207 00208 void readResponseTransformationFromFile(std::ifstream& in, int no_y); 00209 void saveResponseTransformationToFile(std::ofstream& out); 00211 00212 00217 Matrix<double> descriptor_matrix_; 00218 00220 vector<string> substance_names_; 00221 00223 vector<string> descriptor_names_; 00224 00227 Matrix<double> descriptor_transformations_; 00228 00231 Matrix<double> y_transformations_; 00232 00235 Matrix<double> Y_; 00236 00238 String type_; 00239 00243 std::multiset<unsigned int> descriptor_IDs_; 00245 00246 friend class Validation; 00247 friend class RegressionValidation; 00248 friend class ClassificationValidation; 00249 #ifdef BALL_HAS_LAPACK 00250 friend class PCRModel; 00251 friend class KPCRModel; 00252 friend class FeatureSelection; 00253 #endif //BALL_HAS_LAPACK 00254 }; 00255 00257 Model* createNewModelFromFile(String model_file, const QSARData& q); 00258 } 00259 } 00260 00261 #endif // MODEL