BALL
1.4.1
|
00001 // -*- Mode: C++; tab-width: 2; -*- 00002 // vi: set ts=2: 00003 00004 #ifndef BALL_MOLMEC_AMBER_GAFFCESPARSER_H 00005 #define BALL_MOLMEC_AMBER_GAFFCESPARSER_H 00006 00007 #ifndef BALL_KERNEL_ATOM_H 00008 #include <BALL/KERNEL/atom.h> 00009 #endif 00010 00011 #ifndef BALL_DATATYPE_STRING_H 00012 #include <BALL/DATATYPE/string.h> 00013 #endif 00014 00015 #include <set> 00016 #include <map> 00017 #include <vector> 00018 00019 namespace BALL 00020 { 00021 00022 class GAFFCESParser 00023 { 00024 public: 00025 00026 //atomic property string 00027 class APSMatcher 00028 { 00029 public: 00030 //encode Ringatomtypes 00031 enum APSType 00032 { 00033 IS_RING_ATOM, 00034 IS_NON_RING_ATOM, 00035 IS_PLANAR, 00036 IS_PLANAR_WITH_DB_TO_NR, 00037 IS_PURELY_AROMATIC, 00038 IS_PURELY_ALIPHATIC, 00039 IS_OTHER_RING, 00040 IS_3_RING_ATOM, 00041 IS_4_RING_ATOM, 00042 IS_5_RING_ATOM, 00043 IS_6_RING_ATOM, 00044 IS_7_RING_ATOM, 00045 IS_8_RING_ATOM, 00046 IS_9_RING_ATOM, 00047 PURE_SINGLE_BOND, 00048 PURE_SINGLE_BOND_TO_PARENT, 00049 NO_PURE_SINGLE_BOND_TO_PARENT, 00050 SINGLE_BOND, 00051 SINGLE_BOND_TO_PARENT, 00052 NO_SINGLE_BOND_TO_PARENT, 00053 PURE_DOUBLE_BOND, 00054 PURE_DOUBLE_BOND_TO_PARENT, 00055 NO_PURE_DOUBLE_BOND_TO_PARENT, 00056 DOUBLE_BOND, 00057 DOUBLE_BOND_TO_PARENT, 00058 NO_DOUBLE_BOND_TO_PARENT, 00059 TRIPLE_BOND, 00060 TRIPLE_BOND_TO_PARENT, 00061 NO_TRIPLE_BOND_TO_PARENT, 00062 DELOCALIZED_BOND, 00063 DELOCALIZED_BOND_TO_PARENT, 00064 NO_DELOCALIZED_BOND_TO_PARENT, 00065 AROMATIC_BOND, 00066 AROMATIC_BOND_TO_PARENT, 00067 NO_AROMATIC_BOND_TO_PARENT, 00068 APS_TRUE 00069 }; 00070 00071 class APSTerm 00072 { 00073 public: 00074 APSTerm(APSType new_type, int new_feature_number) 00075 : type(new_type), 00076 feature_number(new_feature_number) 00077 {} 00078 00079 APSType type; 00080 //contain the number of occurence of a given feature 00081 int feature_number; 00082 }; 00083 00084 //check if current atom is in a ring 00085 bool isRingAtom(Atom& atom); 00086 //check if the current atom is in a ring with size n 00087 //and return the number of occurence 00088 int isNRingAtom(Size size, Atom& atom); 00089 //check if the current atom is not a ringatom or 00090 //in a ten-membered or larger ring 00091 bool isNonRingAtom(Atom& atom); 00092 00093 bool checkGAFFProperties(Atom& atom, Atom& predecessor, APSTerm aps); 00094 00095 00096 APSMatcher(); 00097 ~APSMatcher(); 00098 00099 String printAPS(); 00100 00101 // check if atom matches atomic property string 00102 bool operator() (Atom& atom, Atom& predecessor); 00103 00104 //store atomic property string 00105 //external vector: all AND (",") types 00106 //internal vector: all OR (".")types 00107 std::vector < std::vector< APSTerm> > aps_terms; 00108 00109 protected: 00110 bool hasBond_(Atom* atom, Atom* predecessor, int bond_type, int feature_number); 00111 }; 00112 00113 struct State 00114 { 00115 //not "thread-safe" 00116 GAFFCESParser* current_parser; 00117 00118 APSMatcher::APSType current_aps_type; 00119 int feature_number; 00120 }; 00121 00122 00123 00124 //chemical environment string 00125 class CESPredicate 00126 { 00127 public: 00128 //encode wildcard elements 00129 enum CESwildcards 00130 { 00131 XA, XB, XC, XD, XX 00132 }; 00133 00134 CESPredicate(GAFFCESParser* parser) 00135 : parent(0), 00136 parser_(parser) 00137 { 00138 initStringToWildcard(); 00139 }; 00140 00141 virtual ~CESPredicate(); 00142 00143 //initialize stringToWildcard map 00144 void initStringToWildcard(); 00145 std::map<String, CESwildcards> getStringToWildcard(); 00146 00147 //add a CESwildcardsConnectionPredicate to "predicate tree" 00148 void addCESwildcardsConnectionPredicate(String wildcard, Size partners); 00149 //add a CESwilddcardsPredicate to "predicate tree" 00150 void addCESwildcardsPredicate(String wildcard); 00151 //add an CESelementPredicate to "predicate tree" 00152 void addCESelementPredicate(String name); 00153 //add an CESelementConnectionPredicate to "predicate tree" 00154 void addCESelementConnectionPredicate(Size partners, String name); 00155 // add a TruePredicate to "predicate tree" 00156 void addTruePredicate() const; 00157 00158 //check if atom and its environment match predicates 00159 virtual bool operator () (Atom& atom); 00160 //check if atom matches "predicates in predicate-tree" 00161 virtual bool match(Atom&){return false;}; 00162 //delete children 00163 void clear(); 00164 00165 // check whether this atom is contained on a path to the root 00166 bool alreadySeenThisAtom(Atom* atom); 00167 00168 //to expand aps_term in aps_matcher object 00169 void addNewAND(); 00170 void addNewOR(APSMatcher::APSType aps, int feature_number); 00171 00172 //store existing atomic property string 00173 APSMatcher aps_matcher; 00174 00175 //all CESPredicates for current_predicate ->children of current_predicate 00176 std::vector<CESPredicate*> children; 00177 //CESPredicate, that has current_predicate in his children vector 00178 CESPredicate* parent; 00179 00180 //The atom we are trying to match to this predicate 00181 Atom* atom_to_test; 00182 protected: 00183 //map to convert String into wildcard-element 00184 std::map<String, CESwildcards > stringToWildcard_; 00185 GAFFCESParser* parser_; 00186 }; 00187 00188 //element-name of the partnerAtom and number of its connected atoms 00189 class CESelementConnectionPredicate : public CESPredicate 00190 { 00191 public: 00192 CESelementConnectionPredicate(GAFFCESParser* parser) 00193 : CESPredicate(parser), 00194 numberOfPartners_(0), 00195 elementName_("NoName") 00196 {}; 00197 00198 ~CESelementConnectionPredicate(); 00199 00200 void setNumberOfPartners(Size number); 00201 void setElementName(String name); 00202 Size getNumberOfPartners(); 00203 String getElementName(); 00204 //check if atom matches predicate 00205 bool match(Atom& atom); 00206 protected: 00207 Size numberOfPartners_; 00208 String elementName_; 00209 }; 00210 00211 //element-name of the partnerAtom 00212 class CESelementPredicate : public CESPredicate 00213 { 00214 public: 00215 CESelementPredicate(GAFFCESParser* parser) 00216 : CESPredicate(parser), 00217 elementName_("NoName") 00218 { 00219 }; 00220 00221 ~CESelementPredicate(); 00222 00223 void setElementName(String name); 00224 String getElementName(); 00225 //check if atom matches predicate 00226 bool match(Atom& atom); 00227 protected: 00228 String elementName_; 00229 }; 00230 00231 //XA,XB,XC,XD,XX 00232 class CESwildcardsPredicate : public CESPredicate 00233 { 00234 public: 00235 CESwildcardsPredicate(GAFFCESParser* parser) 00236 : CESPredicate(parser) 00237 {}; 00238 00239 ~CESwildcardsPredicate(); 00240 00241 void setWildcards(String new_wildcard); 00242 CESwildcards getWildcards(); 00243 //checks if atom matches the given wildcard-element (XA, XB, XC, XD, XX) 00244 bool matchWildcards(Atom& atom); 00245 //check if atom matches predicate 00246 bool match(Atom& atom); 00247 protected: 00248 CESwildcards wildcards_; 00249 }; 00250 00251 //XA,XB,XC,XD,XX and number of partnerAtoms 00252 class CESwildcardsConnectionPredicate : public CESPredicate 00253 { 00254 public: 00255 CESwildcardsConnectionPredicate(GAFFCESParser* parser) 00256 : CESPredicate(parser), 00257 numberOfPartners_(0) 00258 {}; 00259 ~CESwildcardsConnectionPredicate(); 00260 00261 void setNumberOfPartners(Size number); 00262 void setWildcards(String new_wildcard); 00263 Size getNumberOfPartners(); 00264 CESwildcards getWildcards(); 00265 //checks if atom matches the given wildcard-element (XA, XB, XC, XD, XX) 00266 bool matchWildcards(Atom& atom); 00267 //check if atom matches predicate 00268 bool match(Atom& atom); 00269 protected: 00270 CESwildcards wildcards_; 00271 Size numberOfPartners_; 00272 }; 00273 00274 //string is "*" which means always true 00275 class TruePredicate : public CESPredicate 00276 { 00277 public: 00278 TruePredicate(GAFFCESParser* parser) 00279 : CESPredicate(parser) 00280 {}; 00281 ~TruePredicate() {}; 00282 //check if atom matches predicate (always true!) 00283 bool match(Atom&) { return true; } 00284 }; 00285 00286 //Parser-match-Function checking if atom's environment matches the "predicate tree" 00287 bool match(Atom& atom) const; 00288 00289 //initialize Set of Elementsymbols 00290 void initElementSymbols(); 00291 const std::set<String>& getElementSymbols(); 00292 00293 GAFFCESParser(); 00294 GAFFCESParser(const String& cesstring); 00295 ~GAFFCESParser(); 00296 00297 //for lexer/parser 00298 Size read(char* buf, Size max_size); 00299 00300 static State state; 00301 00303 TruePredicate root; 00304 //fixed root of the "predicate tree" 00305 CESPredicate* root_predicate; 00306 //parent-predicate of current_predicate 00307 CESPredicate* current_root_predicate; 00308 //"predicate-node" in the "predicate tree" we actually considering 00309 CESPredicate* current_predicate; 00310 00311 00312 //parse chemical environment string 00313 bool parse(const String& cesstring); 00314 //check if any atom matches parsed ces_string 00315 bool GAFFCESatomMatcher(Atom& atom, const String& cesstring); 00316 //start filling the children vector for a current predicate 00317 void startChildPredicates(); 00318 //end up filling the children vector for a current predicate 00319 void endChildPredicates(); 00320 00321 protected: 00322 //current chemical environment string 00323 String cesstring_; 00324 //set with all valid element symbols 00325 std::set<String> element_symbols_; 00326 //for Parser/Lexer function YYINPUT 00327 Position read_start_; 00328 }; 00329 00330 } 00331 00332 #endif