BALL
1.4.1
|
00001 // -*- Mode: C++; tab-width: 2; -*- 00002 // vi: set ts=2: 00003 // 00004 00005 #ifndef BALL_STRUCTURE_SMARTES_PARSER_H 00006 #define BALL_STRUCTURE_SMARTES_PARSER_H 00007 00008 #ifndef BALL_COMMON_H 00009 # include <BALL/common.h> 00010 #endif 00011 00012 #include <map> 00013 #include <set> 00014 00015 // needed for MSVC: 00016 #undef CW_DEFAULT 00017 00018 namespace BALL 00019 { 00020 // forward declarations 00021 class Bond; 00022 class Atom; 00023 class Element; 00024 00035 class BALL_EXPORT SmartsParser 00036 { 00037 public: 00038 00039 enum ZEIsomerType 00040 { 00041 ANY_ZE = 1, 00042 NONE, 00043 Z, 00044 E 00045 }; 00046 00048 enum ChiralClass 00049 { 00050 CHIRAL_CLASS_UNSPECIFIED = 1, 00051 NONCHIRAL, 00052 NONCHIRAL_OR_UNSPECIFIED, 00053 CW_DEFAULT, // TH 00054 CW_DEFAULT_OR_UNSPECIFIED, 00055 CCW_DEFAULT, // TH 00056 CCW_DEFAULT_OR_UNSPECIFIED, 00057 CW_TH, // tetrahdral 00058 CW_TH_OR_UNSPECIFIED, 00059 CCW_TH, 00060 CCW_TH_OR_UNSPECIFIED, 00061 CW_AL, // allene-like 00062 CW_AL_OR_UNSPECIFIED, 00063 CCW_AL, 00064 CCW_AL_OR_UNSPECIFIED, 00065 CW_SP, // square planar 00066 CW_SP_OR_UNSPECIFIED, 00067 CCW_SP, 00068 CCW_SP_OR_UNSPECIFIED, 00069 CW_TB, //trigonal bipyramidal 00070 CW_TB_OR_UNSPECIFIED, 00071 CCW_TB, 00072 CCW_TB_OR_UNSPECIFIED, 00073 CW_OH, // octahedral 00074 CW_OH_OR_UNSPECIFIED, 00075 CCW_OH, 00076 CCW_OH_OR_UNSPECIFIED 00077 }; 00078 00086 enum LogicalOperator 00087 { 00088 AND, 00089 OR, 00090 AND_LOW, 00091 NOOP 00092 }; 00093 00094 00096 class SPAtom; 00097 00105 class BALL_EXPORT SPBond 00106 { 00107 public: 00108 00110 enum SPBondOrder 00111 { 00112 SINGLE = 1, 00113 SINGLE_UP, 00114 SINGLE_UP_OR_ANY, 00115 SINGLE_DOWN, 00116 SINGLE_DOWN_OR_ANY, 00117 SINGLE_OR_AROMATIC, 00118 AROMATIC, 00119 DOUBLE, 00120 TRIPLE, 00121 NOT_NECESSARILY_CONNECTED, 00122 IN_RING, 00123 ANY 00124 }; 00125 00129 00130 SPBond(); 00131 00133 SPBond(SPBondOrder bond_order); 00134 00136 SPBond(SPAtom* first, SPAtom* second, SPBondOrder bond_order); 00137 00139 virtual ~SPBond() ; 00141 00145 00146 ZEIsomerType getZEType() const { return ze_type_; } 00147 00149 void setZEType(ZEIsomerType type) { ze_type_ = type; } 00150 00152 void setBondOrder(SPBondOrder bond_order); 00153 00155 SPBondOrder getBondOrder() const { return bond_order_; } 00156 00158 bool isNot() const { return not_; } 00159 00161 void setNot(bool is_not) { not_ = is_not; } 00162 00163 // returns true if the SPBond matches the given bond 00164 bool equals(const Bond* bond) const; 00166 00167 protected: 00168 00170 ZEIsomerType ze_type_; 00171 00173 SPBondOrder bond_order_; 00174 00176 bool not_; 00177 }; 00178 00188 class BALL_EXPORT SPAtom 00189 { 00190 public: 00191 00193 enum PropertyType 00194 { 00195 ISOTOPE = 1, 00196 CHARGE, 00197 AROMATIC, 00198 ALIPHATIC, 00199 IN_NUM_RINGS, 00200 IN_RING_SIZE, 00201 IN_BRACKETS, 00202 CONNECTED, 00203 EXPLICIT_HYDROGENS, 00204 VALENCE, 00205 IMPLICIT_HYDROGENS, 00206 DEGREE, 00207 RING_CONNECTED, 00208 CHIRALITY, 00209 SYMBOL 00210 }; 00211 00213 union PropertyValue 00214 { 00215 int int_value; 00216 bool bool_value; 00217 const Element* element_value; 00218 ChiralClass chiral_class_value; 00219 }; 00220 00222 struct Property 00223 { 00224 public: 00225 00229 00230 Property(PropertyType type, int value); 00231 00233 Property(PropertyType type, bool value); 00234 00236 Property(PropertyType type, const Element* value); 00237 00239 Property(PropertyType type, ChiralClass value); 00240 00242 virtual ~Property(); 00244 00246 void operator = (const Property& rhs); 00247 00251 00252 PropertyType getType() const { return type_; } 00253 00255 PropertyValue getValue() const { return value_; } 00257 00258 private: 00259 00261 Property(); 00262 00264 PropertyType type_; 00265 00267 PropertyValue value_; 00268 }; 00269 00307 00308 SPAtom(); 00309 00311 SPAtom(const String& symbol); 00312 00314 virtual ~SPAtom() ; 00316 00317 00321 00322 void setProperty(PropertyType type, int int_value); 00323 00325 void setProperty(PropertyType type, bool flag); 00326 00328 void setProperty(PropertyType type, const Element* element); 00329 00331 void setProperty(PropertyType type, ChiralClass chirality); 00332 00334 void setProperty(Property property); 00335 00337 void addPropertiesFromSPAtom(SPAtom* sp_atom); 00338 00340 void setNotProperty(PropertyType type); 00341 00343 bool hasProperty(PropertyType type) const; 00344 00346 PropertyValue getProperty(PropertyType type); 00347 00349 Size countProperties() const; 00350 00352 Size getDefaultValence(const Atom* atom) const; 00353 00355 Size countRealValences(const Atom* atom) const; 00356 00358 Size getNumberOfImplicitHydrogens(const Atom* atom) const; 00360 00364 00365 bool equals(const Atom* atom) const; 00367 00368 protected: 00369 00371 Atom* atom_; 00372 00374 std::map<PropertyType, PropertyValue> properties_; 00375 00377 std::set<PropertyType> not_properties_; 00378 }; 00379 00381 class SPNode; 00382 00385 class BALL_EXPORT SPEdge 00386 { 00387 public: 00388 00392 00393 SPEdge(); 00394 00396 SPEdge(const SPEdge& sp_edge); 00397 00399 virtual ~SPEdge(); 00401 00405 00406 bool isInternal() const { return internal_; } 00407 00409 void setInternal(bool internal) { internal_ = internal; } 00410 00412 void setSPBond(SPBond* sp_bond) { bond_ = sp_bond; } 00413 00415 SPBond* getSPBond() const { return bond_; } 00416 00418 void setFirstSPNode(SPNode* first) { first_ = first; } 00419 00421 SPNode* getFirstSPNode() const { return first_; } 00422 00424 void setSecondSPNode(SPNode* second) { second_ = second; } 00425 00427 SPNode* getSecondSPNode() const { return second_; } 00428 00430 SPNode* getPartnerSPNode(SPNode* node) { return node == first_ ? second_ : first_; } 00431 00433 bool isNot() const { return is_not_; } 00434 00436 void setNot(bool is_not) { is_not_ = is_not; } 00437 00439 void setFirstSPEdge(SPEdge* first) { first_edge_ = first; } 00440 00442 SPEdge* getFirstSPEdge() const { return first_edge_; } 00443 00445 void setSecondSPEdge(SPEdge* second) { second_edge_ = second; } 00446 00448 SPEdge* getSecondSPEdge() const { return second_edge_; } 00449 00451 void setLogicalOperator(LogicalOperator log_op) { log_op_ = log_op; } 00452 00454 LogicalOperator getLogicalOperator() const { return log_op_; } 00456 00457 protected: 00458 00460 bool internal_; 00461 00463 bool is_not_; 00464 00466 SPNode* first_; 00467 00469 SPNode* second_; 00470 00472 SPBond* bond_; 00473 00475 SPEdge* first_edge_; 00476 00478 SPEdge* second_edge_; 00479 00481 LogicalOperator log_op_; 00482 }; 00483 00486 class BALL_EXPORT SPNode 00487 { 00488 public: 00489 00493 00494 typedef std::vector<SPEdge*>::iterator EdgeIterator; 00495 00497 typedef std::vector<SPEdge*>::const_iterator EdgeConstIterator; 00499 00500 00504 00505 SPNode(); 00506 00508 SPNode(SPAtom* atom); 00509 00511 SPNode(SPNode* first, LogicalOperator log_op, SPNode* second); 00512 00514 SPNode(const SPNode& sp_node); 00515 00517 virtual ~SPNode(); 00519 00520 00524 00525 bool isInternal() const { return internal_; } 00526 00528 void setInternal(bool internal) { internal_ = internal; } 00529 00531 bool isRecursive() const { return recursive_; } 00532 00534 void setRecursive(bool recursive); 00535 00537 void setComponentNumber(int no) { component_no_ = no; } 00538 00540 Size getComponentNumber() const { return component_no_; } 00541 00543 SPAtom* getSPAtom() const { return sp_atom_; } 00544 00546 void setSPAtom(SPAtom* sp_atom) { sp_atom_ = sp_atom; } 00547 00549 SPEdge* getFirstEdge() const { return first_edge_; } 00550 00552 void setFirstEdge(SPEdge* first) { first_edge_ = first; } 00553 00555 SPEdge* getSecondEdge() const { return second_edge_; } 00556 00558 void setSecondEdge(SPEdge* second) { second_edge_ = second; } 00559 00561 bool getNot() const { return is_not_; } 00562 00564 void setNot(bool is_not) { is_not_ = is_not; } 00565 00566 00568 //void setInBrackets() { in_brackets_ = true; } 00569 00571 void addSPEdge(SPEdge* sp_edge) { edges_.push_back(sp_edge); } 00572 00574 void setLogicalOperator(LogicalOperator log_op) { log_op_ = log_op; } 00575 00577 LogicalOperator getLogicalOperator() const { return log_op_; } 00578 00580 Size countEdges() const { return edges_.size(); } 00582 00586 00587 EdgeIterator begin() { return edges_.begin(); } 00588 00590 EdgeIterator end() { return edges_.end(); } 00591 00593 EdgeConstIterator begin() const { return edges_.begin(); } 00594 00596 EdgeConstIterator end() const { return edges_.end(); } 00598 00599 protected: 00600 00602 bool internal_; 00603 00605 bool is_not_; 00606 00608 bool recursive_; 00609 00611 //bool in_brackets_; 00612 00614 LogicalOperator log_op_; 00615 00617 std::vector<SPEdge*> edges_; 00618 00620 SPEdge* first_edge_; 00621 00623 SPEdge* second_edge_; 00624 00626 SPAtom* sp_atom_; 00627 00629 int component_no_; 00630 }; 00631 00632 00636 00637 SmartsParser(); 00638 00640 SmartsParser(const SmartsParser& parser); 00641 00643 virtual ~SmartsParser(); 00645 00651 void parse(const String& s) 00652 throw(Exception::ParseError); 00653 00657 00658 SPAtom* createAtom(const String& symbol, bool in_bracket = false); 00659 00661 void setRoot(SPNode* root) { root_ = root; } 00662 00664 SPNode* getRoot() const { return root_; } 00665 00667 void dumpTree(); 00668 00670 void clear(); 00671 00673 void addRingConnection(SPNode* spnode, Size index); 00674 00676 std::map<Size, std::vector<SPNode*> > getRingConnections() const; 00677 00679 void setSSSR(const std::vector<std::vector<Atom*> >& sssr); 00680 00682 void setNeedsSSSR(bool needs_sssr) { needs_SSSR_ = needs_sssr; } 00683 00685 bool getNeedsSSSR() const { return needs_SSSR_; } 00686 00688 void setRecursive(bool recursive) { recursive_ = recursive; } 00689 00691 bool isRecursive() const { return recursive_; } 00692 00694 void setComponentGrouping(bool component_grouping) { component_grouping_ = component_grouping; } 00695 00697 bool hasComponentGrouping() const { return component_grouping_; } 00698 00700 struct State 00701 { 00702 Size char_count; 00703 SmartsParser* current_parser; 00704 const char* buffer; 00705 }; 00706 00708 static State state; 00709 00711 const std::set<SPNode*>& getNodes() const { return nodes_; } 00712 00714 const std::set<SPEdge*>& getEdges() const { return edges_; } 00715 00717 void addEdge(SPEdge* edge) { edges_.insert(edge); } 00718 00720 void addNode(SPNode* node) { nodes_.insert(node); } 00721 00723 bool hasRecursiveEdge(SPEdge* edge) const { return rec_edges_.find(edge) != rec_edges_.end(); } 00724 00726 void addRecursiveEdge(SPEdge* edge) { rec_edges_.insert(edge); } 00727 00729 void setNextComponentNumberToSubTree(SPNode* spnode); 00731 00732 protected: 00733 00735 bool needs_SSSR_; 00736 00738 bool recursive_; 00739 00741 bool component_grouping_; 00742 00744 static vector<std::set<const Atom*> >* sssr_; 00745 00747 void dumpTreeRecursive_(SPNode* node, Size depth); 00748 00750 void dumpTreeRecursive_(SPEdge* edge, Size depth); 00751 00753 std::map<Size, std::vector<SPNode*> > ring_connections_; 00754 00756 static SmartsParser* current_parser_; 00757 00759 std::set<SPEdge*> edges_; 00760 00762 std::set<SPNode*> nodes_; 00763 00765 std::set<SPEdge*> rec_edges_; 00766 00768 SPNode* root_; 00769 00771 int component_no_; 00772 }; 00773 00774 } // namespace BALL 00775 00776 #endif // BALL_STRUCTURE_SMARTS_PARSER_H 00777