OpenWalnut  1.3.1
WStructuredTextParser.h
1 //---------------------------------------------------------------------------
2 //
3 // Project: OpenWalnut ( http://www.openwalnut.org )
4 //
5 // Copyright 2009 OpenWalnut Community, BSV@Uni-Leipzig and CNCF@MPI-CBS
6 // For more information see http://www.openwalnut.org/copying
7 //
8 // This file is part of OpenWalnut.
9 //
10 // OpenWalnut is free software: you can redistribute it and/or modify
11 // it under the terms of the GNU Lesser General Public License as published by
12 // the Free Software Foundation, either version 3 of the License, or
13 // (at your option) any later version.
14 //
15 // OpenWalnut is distributed in the hope that it will be useful,
16 // but WITHOUT ANY WARRANTY; without even the implied warranty of
17 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 // GNU Lesser General Public License for more details.
19 //
20 // You should have received a copy of the GNU Lesser General Public License
21 // along with OpenWalnut. If not, see <http://www.gnu.org/licenses/>.
22 //
23 //---------------------------------------------------------------------------
24 
25 #ifndef WSTRUCTUREDTEXTPARSER_H
26 #define WSTRUCTUREDTEXTPARSER_H
27 
28 #include <algorithm>
29 #include <iostream>
30 #include <map>
31 #include <ostream>
32 #include <string>
33 #include <vector>
34 
35 #include <boost/config/warning_disable.hpp>
36 #include <boost/spirit/include/qi.hpp>
37 #include <boost/spirit/include/phoenix_core.hpp>
38 #include <boost/spirit/include/phoenix_operator.hpp>
39 #include <boost/spirit/include/phoenix_fusion.hpp>
40 #include <boost/spirit/include/phoenix_stl.hpp>
41 #include <boost/spirit/include/phoenix_object.hpp>
42 #include <boost/fusion/include/adapt_struct.hpp>
43 #include <boost/variant/recursive_variant.hpp>
44 #include <boost/fusion/include/io.hpp>
45 #include <boost/filesystem/path.hpp>
46 
47 #include "WStringUtils.h"
48 #include "exceptions/WTypeMismatch.h"
49 #include "exceptions/WNotFound.h"
50 
51 /**
52  * This namespace contains the WStructuredTextParser data types and the parser. It builds up the abstract syntax tree (AST)
53  * for the given input which later can be traversed.
54  */
55 namespace WStructuredTextParser
56 {
57  //! we use these quite often, so define some short alias for them:
58  namespace qi = boost::spirit::qi;
59 
60  //! we use these quite often, so define some short alias for them:
61  namespace fusion = boost::fusion;
62 
63  //! we use these quite often, so define some short alias for them:
64  namespace ascii = boost::spirit::ascii;
65 
66  //! we use these quite often, so define some short alias for them:
67  namespace phoenix = boost::phoenix;
68 
69  //! we use these quite often, so define some short alias for them:
70  namespace spirit = boost::spirit;
71 
72  /**
73  * The type used for keys
74  */
75  typedef std::string KeyType;
76 
77  /**
78  * The type used for values
79  */
80  typedef std::string ValueType;
81 
82  /**
83  * The type used for comments
84  */
85  typedef std::string CommentType;
86 
87  /**
88  * Forward declare the object type.
89  */
90  struct ObjectType;
91 
92  /**
93  * KeyValueType - a tuple containing name and value
94  */
95  struct KeyValueType
96  {
97  /**
98  * Name string.
99  */
100  std::string m_name;
101  /**
102  * Value string.
103  */
104  std::string m_value;
105  };
106 
107  /**
108  * A node inside the AST is either another object or a key-value pair.
109  */
110  typedef
111  boost::variant<
112  boost::recursive_wrapper< ObjectType >,
113  KeyValueType,
115  >
117 
118  /**
119  * An object is always a name and contains several further nodes
120  */
121  struct ObjectType
122  {
123  /**
124  * Name of the object
125  */
126  std::string m_name;
127 
128  /**
129  * Object's members
130  */
131  std::vector< MemberType > m_nodes;
132  };
133 
134  /**
135  * An object representing all objects and comments on file level.
136  */
137  typedef std::vector< MemberType > FileType;
138 }
139 
140 
141 // Doxygen has problems with the following
142 // \cond Suppress_Doxygen
143 /**
144  * Tell boost::fusion about our types.
145  */
146 BOOST_FUSION_ADAPT_STRUCT(
148  ( std::string, m_name )
149  ( std::vector< WStructuredTextParser::MemberType >, m_nodes )
150  )
151 
152 /**
153  * Tell boost::fusion about our types.
154  */
155 BOOST_FUSION_ADAPT_STRUCT(
156  WStructuredTextParser::KeyValueType,
157  ( std::string, m_name )
158  ( std::string, m_value )
159  )
160 // \endcond
161 
162 namespace WStructuredTextParser
163 {
164  /**
165  * The grammar describing the structured format. It uses the boost::spirit features to parse the input. There are several rules to comply to
166  * successfully parse a file:
167  * <ul>
168  * <li>Key: identifier, needs to be a-z,A-Z,0-9,_
169  * <li>Object: defined as key + { ... }
170  * <li> ";" is optional after objects
171  * <li>Key-Value Pair: is a member of an object and is defines as key="value".
172  * <li>Comments begin with //
173  * </ul>
174  * For more details please see the test fixture file in core/common/test/fixtures/WStrutcuredTextParser_test.txt.
175  *
176  * \tparam Iterator the iterator, used to get the input stream
177  */
178  template <typename Iterator>
179  struct Grammar: qi::grammar<Iterator, FileType(), ascii::space_type >
180  {
181  /**
182  * Constructor and grammar description. It contains the EBNF (Extended Backus Naur Form) of the format we can parse.
183  *
184  * \param error Will contain error message if any occurs during functions execution
185  */
186  explicit Grammar( std::ostream& error ): Grammar::base_type( file, "WStructuredTextParser::Grammar" ) // NOLINT - non-const ref
187  {
188  // a key begins with a letter
189  key %= qi::char_( "a-zA-Z_" ) >> *qi::char_( "a-zA-Z_0-9" );
190  // a value is a quoted string. Multi-line strings possible
191  value %= '"' >> *( ~qi::char_( "\"" ) | qi::char_( " " ) ) >> '"';
192 
193  // a pair is: key = value
194  kvpair %= key >> '=' >> value >> ';';
195  // a comment is // + arbitrary symbols
196  comment %= qi::lexeme[ qi::char_( "/" ) >> qi::char_( "/" ) >> *qi::char_( "a-zA-Z_0-9!\"#$%&'()*,:;<>?@\\^`{|}~/ .@=[]ยง!+-" ) ];
197  // a object is a name, and a set of nested objects or key-value pairs
198  object %= ( key | value ) >> '{' >> *( object | kvpair | comment ) >> '}' >> *qi::char_( ";" );
199  // a file is basically an object without name.
200  file %= *( object | kvpair | comment );
201 
202  // provide names for these objects for better readability of parse errors
203  object.name( "object" );
204  kvpair.name( "key-value pair" );
205  key.name( "key" );
206  value.name( "value" );
207  file.name( "file" );
208  comment.name( "comment" );
209 
210  // provide error handlers
211  // XXX: can someone tell me how to get them work? According to the boost::spirit doc, this is everything needed but it doesn't work.
212  qi::on_error< qi::fail >( object, error << phoenix::val( "Error: " ) << qi::_4 );
213  qi::on_error< qi::fail >( kvpair, error << phoenix::val( "Error: " ) << qi::_4 );
214  qi::on_error< qi::fail >( key, error << phoenix::val( "Error: " ) << qi::_4 );
215  qi::on_error< qi::fail >( value, error << phoenix::val( "Error: " ) << qi::_4 );
216  qi::on_error< qi::fail >( comment, error << phoenix::val( "Error: " ) << qi::_4 );
217  qi::on_error< qi::fail >( file, error << phoenix::val( "Error: " ) << qi::_4 );
218  }
219 
220  // Rules we use
221 
222  /**
223  * Rule for objects. Attribute is ObjectType and is the start rule of the grammar. See constructor for exact definition.
224  */
225  qi::rule< Iterator, ObjectType(), ascii::space_type > object;
226 
227  /**
228  * Rule for files. Basically the same as an object but without name
229  */
230  qi::rule< Iterator, FileType(), ascii::space_type > file;
231 
232  /**
233  * Rule for comments. Ignored.
234  */
235  qi::rule< Iterator, CommentType(), ascii::space_type > comment;
236 
237  /**
238  * Key-value pair rule. See constructor for exact definition.
239  */
240  qi::rule< Iterator, KeyValueType(), ascii::space_type > kvpair;
241 
242  /**
243  * Key rule. See constructor for exact definition.
244  */
245  qi::rule< Iterator, KeyType() > key;
246 
247  /**
248  * Value rule. See constructor for exact definition.
249  */
250  qi::rule< Iterator, ValueType() > value;
251  };
252 
253  /**
254  * This simplifies working with a tree in a \ref FileType instance. It provides easy query and check methods. It does not
255  * provide any semantic options. So check validity of the contents and structure of the tree is the job of the using class/derived class. As
256  * the tree does not know anything about the semantics of your structure, it is also untyped. For every key you query, you need to specify
257  * the type.
258  *
259  * This tree uses the types in the \ref WStructuredTextParser namespace. To avoid unnecessary copy operations, this class is not recursive
260  * itself. When querying, you always need to specify the full path. This class can be seen as accessor to the \ref ObjectType tree.
261  *
262  * \note The syntax of the parsed files is defined by the parser itself. See \ref Grammar for details.
263  * \note This also stores the comments of the parsed file. This allows them to be written again if OW loads a file, modifies it and re-writes
264  * it.
265  */
267  {
268  friend class WStructuredTextParserTest;
269  public:
270  /**
271  * This char is used as separator for identifying values in the tree. NEVER change this value.
272  */
273  static const std::string Separator;
274 
275  /**
276  * Construct the instance given the original parsing structure.
277  *
278  * \param file the parsing result structure (the root node).
279  */
280  explicit StructuredValueTree( const FileType& file );
281 
282  /**
283  * Construct the instance given a text as string.
284  *
285  * \param toParse the text to parse
286  */
287  explicit StructuredValueTree( const std::string& toParse );
288 
289  /**
290  * Construct the instance given a path to a file to load.
291  *
292  * \param file the path to a file to load.
293  */
294  explicit StructuredValueTree( const boost::filesystem::path& file );
295 
296  /**
297  * Creates an empty tree. It will contain no information at all.
298  */
300 
301  /**
302  * Cleanup.
303  */
304  virtual ~StructuredValueTree();
305 
306  /**
307  * Checks whether the given value or object exists. If you want to know only if a value with the given name exists, set valuesOnly to
308  * true.
309  *
310  * \param key path to the value
311  * \param valuesOnly if true, it checks only if a value with the name exists. If false, also objects with this name cause this function
312  * to return true.
313  *
314  * \return true if existing.
315  */
316  bool exists( std::string key, bool valuesOnly = false ) const;
317 
318  /**
319  * It is possible that there are multiple values matching a key. This method counts them.
320  *
321  * \param key path to the values to count
322  * \param valuesOnly if true, it only counts values matching the given name.
323  *
324  * \return the number of found values.
325  */
326  size_t count( std::string key, bool valuesOnly = false ) const;
327 
328  /**
329  * Queries the value with the given name. If it is not found, the default value will be returned.
330  *
331  * \param key path to the value. Paths to whole objects are invalid.
332  * \param defaultValue the default if no value was found
333  * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
334  *
335  * \throw WTypeMismatch if the value cannot be cast to the specified target type
336  *
337  * \return the value
338  *
339  * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
340  */
341  template< typename T >
342  T getValue( std::string key, const T& defaultValue ) const;
343 
344  /**
345  * Queries the list of values matching the given path. If it is not found, the default value will be returned.
346  *
347  * \param key path to the value. Paths to whole objects are invalid.
348  * \param defaults the defaults if no value was found
349  * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
350  *
351  * \throw WTypeMismatch if the value cannot be cast to the specified target type
352  *
353  * \return the value
354  *
355  * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
356  */
357  template< typename T >
358  std::vector< T > getValues( std::string key, const std::vector< T >& defaults ) const;
359 
360  /**
361  * Queries the list of values matching the given path. If it is not found, an empty results vector is returned.
362  *
363  * \param key path to the value. Paths to whole objects are invalid.
364  * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
365  *
366  * \throw WTypeMismatch if the value cannot be cast to the specified target type
367  *
368  * \return the value vector. Might be empty if no elements where found.
369  *
370  * \note this does not return a reference as the default value might be returned. It returns a copy of the value.
371  */
372  template< typename T >
373  std::vector< T > getValues( std::string key ) const;
374 
375  /**
376  * Queries the value with the given name. If it is not found, an exception is thrown. If multiple entries with this path exist, the first
377  * one is returned. Use \ref getValues in this case. Query the count of a key:value pair using \ref count
378  *
379  * \param key path to the value. Paths to whole objects are invalid.
380  * \tparam T the return type. This method tries to cast to this type. If it fails, an exception is thrown. Type std::string is always valid.
381  * \throw WTypeMismatch if the value cannot be cast to the specified target type
382  * \throw WNotFound if the key:value pair does not exist
383  *
384  * \return the value as copy to avoid any const_cast which would allow modification.
385  */
386  template< typename T >
387  T operator[]( std::string key ) const;
388 
389  /**
390  * Gets a subtree. The ValueTree returned contains the node you have searched. It only contains the first match. If all matches are
391  * needed, use \ref getSubTrees instead. If the key is not valid/nothing matches the key, an empty value tree is returned. If they key
392  * matches a key-value pair, nothing is returned. This means, this method is only useful for objects.
393  *
394  * \param key key to search.
395  *
396  * \return the structured value tree.
397  */
398  StructuredValueTree getSubTree( std::string key ) const;
399 
400  /**
401  * Gets all matching subtrees. The subtrees returned contains the node you have searched. If multiple objects match the key, a list of
402  * subtrees is returned. If nothing matches, the returned list is empty. If they key
403  * matches a key-value pair, nothing is returned. This means, this method is only useful for objects.
404  *
405  * \param key key to search.
406  *
407  * \return the structured value trees.
408  */
409  std::vector< StructuredValueTree > getSubTrees( std::string key ) const;
410 
411  protected:
412  private:
413  /**
414  * The named values.
415  */
417 
418  /**
419  * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes,
420  * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key.
421  *
422  * \param current current element to check and recursively traverse
423  * \param keyIter the current path element
424  * \param keyEnd the end iter. Just used to stop iteration if the key as not further elements
425  * \param resultObjects all matching instances of type \ref ObjectType
426  * \param resultValues all matching instances of type \ref KeyValueType
427  */
428  void traverse( MemberType current, std::vector< std::string >::const_iterator keyIter,
429  std::vector< std::string >::const_iterator keyEnd,
430  std::vector< ObjectType >& resultObjects,
431  std::vector< KeyValueType >& resultValues ) const;
432 
433  /**
434  * Recursively fills a result vector using a given path iterator. It checks whether the current element matches the current key. If yes,
435  * it traverses or adds the value to the result vector. This uses depth-first search and allows multiple matches for one key.
436  *
437  * \param current current element to check and recursively traverse
438  * \param key the path
439  * \param resultObjects all matching instances of type \ref ObjectType
440  * \param resultValues all matching instances of type \ref KeyValueType
441  */
442  void traverse( FileType current, std::string key,
443  std::vector< ObjectType >& resultObjects,
444  std::vector< KeyValueType >& resultValues ) const;
445  };
446 
447  /**
448  * Parse the given input and return the syntax tree. Throws an exception WParseError on error.
449  *
450  * \param input the input to parse.
451  *
452  * \return the syntax tree in plain format. You should use WStructuredValueTree to use this.
453  *
454  * \throw WParseError on parse error
455  */
456  FileType parseFromString( std::string input );
457 
458  /**
459  * Parse the given input and return the syntax tree. Throws an exception WParseError on error.
460  *
461  * \param path the file to parse
462  *
463  * \return the syntax tree in plain format. You should use WStructuredValueTree to use this.
464  *
465  * \throw WParseError on parse error
466  * \throw WFileNotFOund in case the specified file could not be opened
467  */
468  FileType parseFromFile( boost::filesystem::path path );
469 
470  template< typename T >
471  T StructuredValueTree::getValue( std::string key, const T& defaultValue ) const
472  {
473  // NOTE: getValues ensures that always something is returned (the default value). So the returned vector has a valid begin iterator
474  return *getValues< T >( key, std::vector< T >( 1, defaultValue ) ).begin();
475  }
476 
477  template< typename T >
478  std::vector< T > StructuredValueTree::getValues( std::string key, const std::vector< T >& defaults ) const
479  {
480  std::vector< T > r = getValues< T >( key );
481  if( r.size() )
482  {
483  return r;
484  }
485  else
486  {
487  return defaults;
488  }
489  }
490 
491  template< typename T >
492  T StructuredValueTree::operator[]( std::string key ) const
493  {
494  std::vector< T > r = getValues< T >( key );
495  if( r.size() )
496  {
497  return *r.begin();
498  }
499  else
500  {
501  throw WNotFound( "The key \"" + key + "\" was not found." );
502  }
503  }
504 
505  /**
506  * Visitor to identify whether the given variant of type \ref MemberType is a object or key-value pair.
507  */
508  class IsLeafVisitor: public boost::static_visitor< bool >
509  {
510  public:
511  /**
512  * Returns always true as it is only called for key-value pairs.
513  *
514  * \return always true since it identified an key-value pair
515  */
516  bool operator()( const KeyValueType& /* element */ ) const
517  {
518  return true;
519  }
520 
521  /**
522  * Returns always false as it is only called for objects.
523  *
524  * \tparam T the type. Should be \ref ObjectType or \ref CommentType
525  * \return always false since it identified an Object/comment
526  */
527  template< typename T >
528  bool operator()( const T& /* element */ ) const
529  {
530  return false;
531  }
532  };
533 
534  /**
535  * Visitor to identify whether the given variant of type \ref MemberType is a comment.
536  */
537  class IsCommentVisitor: public boost::static_visitor< bool >
538  {
539  public:
540  /**
541  * Returns always true as it is only called for comments.
542  *
543  * \return always true
544  */
545  bool operator()( const CommentType& /* element */ ) const
546  {
547  return true;
548  }
549 
550  /**
551  * Returns always false as it is only called for objects and key-value pairs.
552  *
553  * \tparam T the type. Should be \ref ObjectType or \ref KeyValueType
554  * \return always false since it identified an Object/KeyValueType
555  */
556  template< typename T >
557  bool operator()( const T& /* element */ ) const
558  {
559  return false;
560  }
561  };
562 
563  /**
564  * Visitor to query the m_name member of \ref ObjectType and \ref KeyValueType.
565  */
566  class NameQueryVisitor: public boost::static_visitor< std::string >
567  {
568  public:
569  /**
570  * Comments have no name.
571  *
572  * \return empty string.
573  */
574  std::string operator()( const CommentType& /* element */ ) const
575  {
576  return "";
577  }
578 
579  /**
580  * Returns the m_name member of the specified object or key-valuev pair.
581  *
582  * \param element Specified object.
583  *
584  * \tparam T one of the types of the \ref MemberType variant
585  * \return always true since it identified an key-value pair
586  */
587  template< typename T >
588  std::string operator()( const T& element ) const
589  {
590  return element.m_name;
591  }
592  };
593 
594  template< typename T >
595  std::vector< T > StructuredValueTree::getValues( std::string key ) const
596  {
597  // traverse the tree
598  std::vector< ObjectType > rObj;
599  std::vector< KeyValueType > rKV;
600 
601  // traverse
602  traverse( m_file, key, rObj, rKV );
603 
604  // copy to result vector and cast
605  std::vector< T > r;
606  for( std::vector< KeyValueType >::const_iterator i = rKV.begin(); i != rKV.end(); ++i )
607  {
608  try
609  {
610  r.push_back( string_utils::fromString< T >( ( *i ).m_value ) );
611  }
612  catch( ... )
613  {
614  // convert the standard exception (if cannot convert) to a WTypeMismnatch.
615  throw WTypeMismatch( "Cannot convert element \"" + key + "\" to desired type." );
616  }
617  }
618 
619  // done
620  return r;
621  }
622 }
623 
624 #endif // WSTRUCTUREDTEXTPARSER_H
625