BALL  1.4.1
smartsParser.h
Go to the documentation of this file.
00001 // -*- Mode: C++; tab-width: 2; -*-
00002 // vi: set ts=2:
00003 //
00004 
00005 #ifndef BALL_STRUCTURE_SMARTES_PARSER_H
00006 #define BALL_STRUCTURE_SMARTES_PARSER_H
00007 
00008 #ifndef BALL_COMMON_H
00009   # include <BALL/common.h>
00010 #endif
00011 
00012 #include <map>
00013 #include <set>
00014 
00015 // needed for MSVC:
00016 #undef CW_DEFAULT
00017 
00018 namespace BALL 
00019 {
00020   // forward declarations
00021   class Bond;
00022   class Atom;
00023   class Element;
00024 
00035   class BALL_EXPORT SmartsParser
00036   {
00037     public:
00038 
00039     enum ZEIsomerType
00040     {
00041       ANY_ZE = 1,
00042       NONE,
00043       Z,
00044       E
00045     };
00046 
00048     enum ChiralClass
00049     {
00050       CHIRAL_CLASS_UNSPECIFIED = 1,
00051       NONCHIRAL,
00052       NONCHIRAL_OR_UNSPECIFIED,
00053       CW_DEFAULT, // TH
00054       CW_DEFAULT_OR_UNSPECIFIED,
00055       CCW_DEFAULT, // TH
00056       CCW_DEFAULT_OR_UNSPECIFIED,
00057       CW_TH, // tetrahdral
00058       CW_TH_OR_UNSPECIFIED,
00059       CCW_TH,
00060       CCW_TH_OR_UNSPECIFIED,
00061       CW_AL, // allene-like
00062       CW_AL_OR_UNSPECIFIED,
00063       CCW_AL, 
00064       CCW_AL_OR_UNSPECIFIED,
00065       CW_SP, // square planar
00066       CW_SP_OR_UNSPECIFIED,
00067       CCW_SP,
00068       CCW_SP_OR_UNSPECIFIED,
00069       CW_TB, //trigonal bipyramidal
00070       CW_TB_OR_UNSPECIFIED,
00071       CCW_TB,
00072       CCW_TB_OR_UNSPECIFIED,
00073       CW_OH, // octahedral
00074       CW_OH_OR_UNSPECIFIED,
00075       CCW_OH,
00076       CCW_OH_OR_UNSPECIFIED
00077     };
00078 
00086     enum LogicalOperator
00087     {
00088       AND,
00089       OR,
00090       AND_LOW,
00091       NOOP
00092     };
00093 
00094 
00096     class SPAtom;
00097 
00105     class BALL_EXPORT SPBond 
00106     {
00107       public:
00108 
00110         enum SPBondOrder
00111         {
00112           SINGLE = 1,
00113           SINGLE_UP,
00114           SINGLE_UP_OR_ANY,
00115           SINGLE_DOWN,
00116           SINGLE_DOWN_OR_ANY,
00117           SINGLE_OR_AROMATIC,
00118           AROMATIC,
00119           DOUBLE,
00120           TRIPLE,
00121           NOT_NECESSARILY_CONNECTED,
00122           IN_RING,
00123           ANY
00124         };
00125     
00129 
00130         SPBond();
00131 
00133         SPBond(SPBondOrder bond_order);
00134 
00136         SPBond(SPAtom* first, SPAtom* second, SPBondOrder bond_order);
00137 
00139         virtual ~SPBond() ;
00141 
00145 
00146         ZEIsomerType getZEType() const { return ze_type_; }
00147 
00149         void setZEType(ZEIsomerType type) { ze_type_ = type; }
00150 
00152         void setBondOrder(SPBondOrder bond_order);
00153 
00155         SPBondOrder getBondOrder() const { return bond_order_; }
00156 
00158         bool isNot() const { return not_; }
00159 
00161         void setNot(bool is_not) { not_ = is_not; }
00162 
00163         // returns true if the SPBond matches the given bond
00164         bool equals(const Bond* bond) const;
00166 
00167       protected:
00168 
00170         ZEIsomerType  ze_type_;
00171 
00173         SPBondOrder bond_order_;
00174 
00176         bool not_;
00177     };
00178     
00188     class BALL_EXPORT SPAtom
00189     {
00190       public:
00191 
00193         enum PropertyType
00194         {
00195           ISOTOPE = 1,
00196           CHARGE,
00197           AROMATIC,
00198           ALIPHATIC,
00199           IN_NUM_RINGS,
00200           IN_RING_SIZE,
00201           IN_BRACKETS,
00202           CONNECTED,
00203           EXPLICIT_HYDROGENS,
00204           VALENCE,
00205           IMPLICIT_HYDROGENS,
00206           DEGREE,
00207           RING_CONNECTED,
00208           CHIRALITY,
00209           SYMBOL
00210         };
00211 
00213         union PropertyValue
00214         {
00215           int int_value;
00216           bool bool_value;
00217           const Element* element_value;
00218           ChiralClass chiral_class_value;
00219         };
00220 
00222         struct Property
00223         {
00224           public:
00225         
00229 
00230             Property(PropertyType type, int value);
00231             
00233             Property(PropertyType type, bool value);
00234             
00236             Property(PropertyType type, const Element* value);
00237             
00239             Property(PropertyType type, ChiralClass value);
00240 
00242             virtual ~Property();
00244 
00246             void operator = (const Property& rhs);
00247 
00251 
00252             PropertyType getType() const { return type_; }
00253 
00255             PropertyValue getValue() const { return value_; }
00257 
00258           private:
00259             
00261             Property();
00262             
00264             PropertyType type_;
00265 
00267             PropertyValue value_;
00268         };
00269         
00307 
00308         SPAtom();
00309 
00311         SPAtom(const String& symbol);
00312 
00314         virtual ~SPAtom() ;
00316 
00317 
00321 
00322         void setProperty(PropertyType type, int int_value);
00323         
00325         void setProperty(PropertyType type, bool  flag);
00326 
00328         void setProperty(PropertyType type, const Element* element);
00329 
00331         void setProperty(PropertyType type, ChiralClass chirality);
00332 
00334         void setProperty(Property property);
00335 
00337         void addPropertiesFromSPAtom(SPAtom* sp_atom);
00338 
00340         void setNotProperty(PropertyType type);
00341 
00343         bool hasProperty(PropertyType type) const;
00344 
00346         PropertyValue getProperty(PropertyType type);
00347 
00349         Size countProperties() const;
00350 
00352         Size getDefaultValence(const Atom* atom) const;
00353         
00355         Size countRealValences(const Atom* atom) const;
00356         
00358         Size getNumberOfImplicitHydrogens(const Atom* atom) const;
00360 
00364 
00365         bool equals(const Atom* atom) const;
00367         
00368       protected:
00369 
00371         Atom* atom_;
00372 
00374         std::map<PropertyType, PropertyValue> properties_;
00375 
00377         std::set<PropertyType> not_properties_;
00378     };
00379 
00381     class SPNode;
00382 
00385     class BALL_EXPORT SPEdge
00386     {
00387       public:
00388 
00392 
00393         SPEdge();
00394 
00396         SPEdge(const SPEdge& sp_edge);
00397 
00399         virtual ~SPEdge();
00401         
00405 
00406         bool isInternal() const { return internal_; }
00407 
00409         void setInternal(bool internal) { internal_ = internal; }
00410         
00412         void setSPBond(SPBond* sp_bond) { bond_ = sp_bond; }
00413 
00415         SPBond* getSPBond() const { return bond_; }
00416         
00418         void setFirstSPNode(SPNode* first) { first_ = first; }
00419 
00421         SPNode* getFirstSPNode() const { return first_; } 
00422         
00424         void setSecondSPNode(SPNode* second) { second_ = second; }
00425 
00427         SPNode* getSecondSPNode() const { return second_; }
00428         
00430         SPNode* getPartnerSPNode(SPNode* node) { return node == first_ ? second_ : first_; }
00431         
00433         bool isNot() const { return is_not_; }
00434 
00436         void setNot(bool is_not) { is_not_ = is_not; }
00437       
00439         void setFirstSPEdge(SPEdge* first) { first_edge_ = first; }
00440 
00442         SPEdge* getFirstSPEdge() const { return first_edge_; }
00443 
00445         void setSecondSPEdge(SPEdge* second) { second_edge_ = second; }
00446 
00448         SPEdge* getSecondSPEdge() const { return second_edge_; }
00449     
00451         void setLogicalOperator(LogicalOperator log_op) { log_op_ = log_op; }
00452 
00454         LogicalOperator getLogicalOperator() const { return log_op_; }
00456       
00457       protected:
00458 
00460         bool internal_;
00461 
00463         bool is_not_;
00464 
00466         SPNode* first_;
00467 
00469         SPNode* second_;
00470 
00472         SPBond* bond_;
00473 
00475         SPEdge* first_edge_;
00476 
00478         SPEdge* second_edge_;
00479 
00481         LogicalOperator log_op_;
00482     };
00483 
00486     class BALL_EXPORT SPNode
00487     {
00488       public:
00489     
00493 
00494         typedef std::vector<SPEdge*>::iterator EdgeIterator;
00495 
00497         typedef std::vector<SPEdge*>::const_iterator EdgeConstIterator;
00499   
00500 
00504 
00505         SPNode();
00506 
00508         SPNode(SPAtom* atom);
00509 
00511         SPNode(SPNode* first, LogicalOperator log_op, SPNode* second);
00512 
00514         SPNode(const SPNode& sp_node);
00515 
00517         virtual ~SPNode();
00519         
00520 
00524 
00525         bool isInternal() const { return internal_; }
00526 
00528         void setInternal(bool internal) { internal_ = internal; }
00529         
00531         bool isRecursive() const { return recursive_; }
00532 
00534         void setRecursive(bool recursive); 
00535       
00537         void setComponentNumber(int no) { component_no_ = no; }
00538 
00540         Size getComponentNumber() const { return component_no_; }
00541       
00543         SPAtom* getSPAtom() const { return sp_atom_; }
00544 
00546         void setSPAtom(SPAtom* sp_atom) { sp_atom_ = sp_atom; }
00547     
00549         SPEdge* getFirstEdge() const { return first_edge_; }
00550 
00552         void setFirstEdge(SPEdge* first) { first_edge_ = first; }
00553 
00555         SPEdge* getSecondEdge() const { return second_edge_; }
00556 
00558         void setSecondEdge(SPEdge* second) { second_edge_ = second; }
00559     
00561         bool getNot() const { return is_not_; }
00562 
00564         void setNot(bool is_not) { is_not_ = is_not; }
00565   
00566 
00568         //void setInBrackets() { in_brackets_ = true; }
00569 
00571         void addSPEdge(SPEdge* sp_edge) { edges_.push_back(sp_edge); }
00572 
00574         void setLogicalOperator(LogicalOperator log_op) { log_op_ = log_op; }
00575 
00577         LogicalOperator getLogicalOperator() const { return log_op_; }  
00578 
00580         Size countEdges() const { return edges_.size(); }
00582 
00586 
00587         EdgeIterator begin() { return edges_.begin(); }
00588 
00590         EdgeIterator end() { return edges_.end(); }
00591 
00593         EdgeConstIterator begin() const { return edges_.begin(); }
00594 
00596         EdgeConstIterator end() const { return edges_.end(); }
00598 
00599       protected:
00600         
00602         bool internal_;
00603 
00605         bool is_not_;
00606 
00608         bool recursive_;
00609 
00611         //bool in_brackets_;
00612 
00614         LogicalOperator log_op_;
00615 
00617         std::vector<SPEdge*> edges_;
00618 
00620         SPEdge* first_edge_;
00621 
00623         SPEdge* second_edge_;
00624 
00626         SPAtom* sp_atom_;
00627 
00629         int component_no_;
00630     };
00631 
00632   
00636 
00637     SmartsParser();
00638       
00640     SmartsParser(const SmartsParser& parser);
00641 
00643     virtual ~SmartsParser();
00645     
00651     void parse(const String& s)
00652       throw(Exception::ParseError);
00653 
00657 
00658     SPAtom* createAtom(const String& symbol, bool in_bracket = false);
00659 
00661     void setRoot(SPNode* root) { root_ = root; }
00662 
00664     SPNode* getRoot() const { return root_; }
00665 
00667     void dumpTree();
00668     
00670     void clear();
00671     
00673     void addRingConnection(SPNode* spnode, Size index);
00674     
00676     std::map<Size, std::vector<SPNode*> > getRingConnections() const;
00677   
00679     void setSSSR(const std::vector<std::vector<Atom*> >& sssr);
00680 
00682     void setNeedsSSSR(bool needs_sssr) { needs_SSSR_ = needs_sssr; }
00683 
00685     bool getNeedsSSSR() const { return needs_SSSR_; }
00686 
00688     void setRecursive(bool recursive) { recursive_ = recursive; }
00689 
00691     bool isRecursive() const { return recursive_; }
00692 
00694     void setComponentGrouping(bool component_grouping) { component_grouping_ = component_grouping; }
00695 
00697     bool hasComponentGrouping() const { return component_grouping_; }
00698 
00700     struct State
00701     {
00702       Size          char_count;
00703       SmartsParser* current_parser;
00704       const char*   buffer;
00705     };
00706     
00708     static State state;
00709 
00711     const std::set<SPNode*>& getNodes() const { return nodes_; }
00712 
00714     const std::set<SPEdge*>& getEdges() const { return edges_; }
00715 
00717     void addEdge(SPEdge* edge) { edges_.insert(edge); }
00718 
00720     void addNode(SPNode* node) { nodes_.insert(node); }
00721 
00723     bool hasRecursiveEdge(SPEdge* edge) const { return rec_edges_.find(edge) != rec_edges_.end(); }
00724 
00726     void addRecursiveEdge(SPEdge* edge) { rec_edges_.insert(edge); }
00727 
00729     void setNextComponentNumberToSubTree(SPNode* spnode);
00731 
00732     protected:
00733 
00735       bool needs_SSSR_;
00736 
00738       bool recursive_;
00739 
00741       bool component_grouping_;
00742 
00744       static vector<std::set<const Atom*> >* sssr_;
00745 
00747       void dumpTreeRecursive_(SPNode* node, Size depth);
00748 
00750       void dumpTreeRecursive_(SPEdge* edge, Size depth);
00751       
00753       std::map<Size, std::vector<SPNode*> > ring_connections_;
00754     
00756       static SmartsParser* current_parser_;
00757       
00759       std::set<SPEdge*> edges_;
00760       
00762       std::set<SPNode*> nodes_;
00763 
00765       std::set<SPEdge*> rec_edges_;
00766 
00768       SPNode* root_;
00769 
00771       int component_no_;
00772   };
00773   
00774 } // namespace BALL
00775 
00776 #endif // BALL_STRUCTURE_SMARTS_PARSER_H
00777 
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Defines