libstdc++
regex_scanner.h
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2015 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_scanner.h
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37  /**
38  * @addtogroup regex-detail
39  * @{
40  */
41 
42  struct _ScannerBase
43  {
44  public:
45  /// Token types returned from the scanner.
46  enum _TokenT
47  {
48  _S_token_anychar,
49  _S_token_ord_char,
50  _S_token_oct_num,
51  _S_token_hex_num,
52  _S_token_backref,
53  _S_token_subexpr_begin,
54  _S_token_subexpr_no_group_begin,
55  _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
56  _S_token_subexpr_end,
57  _S_token_bracket_begin,
58  _S_token_bracket_neg_begin,
59  _S_token_bracket_end,
60  _S_token_interval_begin,
61  _S_token_interval_end,
62  _S_token_quoted_class,
63  _S_token_char_class_name,
64  _S_token_collsymbol,
65  _S_token_equiv_class_name,
66  _S_token_opt,
67  _S_token_or,
68  _S_token_closure0,
69  _S_token_closure1,
70  _S_token_line_begin,
71  _S_token_line_end,
72  _S_token_word_bound, // neg if _M_value[0] == 'n'
73  _S_token_comma,
74  _S_token_dup_count,
75  _S_token_eof,
76  _S_token_unknown
77  };
78 
79  protected:
81 
82  enum _StateT
83  {
84  _S_state_normal,
85  _S_state_in_brace,
86  _S_state_in_bracket,
87  };
88 
89  protected:
90  _ScannerBase(_FlagT __flags)
91  : _M_state(_S_state_normal),
92  _M_flags(__flags),
93  _M_escape_tbl(_M_is_ecma()
94  ? _M_ecma_escape_tbl
95  : _M_awk_escape_tbl),
96  _M_spec_char(_M_is_ecma()
97  ? _M_ecma_spec_char
98  : _M_is_basic()
99  ? _M_basic_spec_char
100  : _M_extended_spec_char),
101  _M_at_bracket_start(false)
102  { }
103 
104  protected:
105  const char*
106  _M_find_escape(char __c)
107  {
108  auto __it = _M_escape_tbl;
109  for (; __it->first != '\0'; ++__it)
110  if (__it->first == __c)
111  return &__it->second;
112  return nullptr;
113  }
114 
115  bool
116  _M_is_ecma() const
117  { return _M_flags & regex_constants::ECMAScript; }
118 
119  bool
120  _M_is_basic() const
121  { return _M_flags & (regex_constants::basic | regex_constants::grep); }
122 
123  bool
124  _M_is_extended() const
125  {
126  return _M_flags & (regex_constants::extended
129  }
130 
131  bool
132  _M_is_grep() const
133  { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
134 
135  bool
136  _M_is_awk() const
137  { return _M_flags & regex_constants::awk; }
138 
139  protected:
140  const std::pair<char, _TokenT> _M_token_tbl[9] =
141  {
142  {'^', _S_token_line_begin},
143  {'$', _S_token_line_end},
144  {'.', _S_token_anychar},
145  {'*', _S_token_closure0},
146  {'+', _S_token_closure1},
147  {'?', _S_token_opt},
148  {'|', _S_token_or},
149  {'\n', _S_token_or}, // grep and egrep
150  {'\0', _S_token_or},
151  };
152  const std::pair<char, char> _M_ecma_escape_tbl[8] =
153  {
154  {'0', '\0'},
155  {'b', '\b'},
156  {'f', '\f'},
157  {'n', '\n'},
158  {'r', '\r'},
159  {'t', '\t'},
160  {'v', '\v'},
161  {'\0', '\0'},
162  };
163  const std::pair<char, char> _M_awk_escape_tbl[11] =
164  {
165  {'"', '"'},
166  {'/', '/'},
167  {'\\', '\\'},
168  {'a', '\a'},
169  {'b', '\b'},
170  {'f', '\f'},
171  {'n', '\n'},
172  {'r', '\r'},
173  {'t', '\t'},
174  {'v', '\v'},
175  {'\0', '\0'},
176  };
177  const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
178  const char* _M_basic_spec_char = ".[\\*^$";
179  const char* _M_extended_spec_char = ".[\\()*+?{|^$";
180 
181  _StateT _M_state;
182  _FlagT _M_flags;
183  _TokenT _M_token;
184  const std::pair<char, char>* _M_escape_tbl;
185  const char* _M_spec_char;
186  bool _M_at_bracket_start;
187  };
188 
189  /**
190  * @brief Scans an input range for regex tokens.
191  *
192  * The %_Scanner class interprets the regular expression pattern in
193  * the input range passed to its constructor as a sequence of parse
194  * tokens passed to the regular expression compiler. The sequence
195  * of tokens provided depends on the flag settings passed to the
196  * constructor: different regular expression grammars will interpret
197  * the same input pattern in syntactically different ways.
198  */
199  template<typename _CharT>
200  class _Scanner
201  : public _ScannerBase
202  {
203  public:
204  typedef const _CharT* _IterT;
207  typedef const std::ctype<_CharT> _CtypeT;
208 
209  _Scanner(_IterT __begin, _IterT __end,
210  _FlagT __flags, std::locale __loc);
211 
212  void
213  _M_advance();
214 
215  _TokenT
216  _M_get_token() const
217  { return _M_token; }
218 
219  const _StringT&
220  _M_get_value() const
221  { return _M_value; }
222 
223 #ifdef _GLIBCXX_DEBUG
224  std::ostream&
225  _M_print(std::ostream&);
226 #endif
227 
228  private:
229  void
230  _M_scan_normal();
231 
232  void
233  _M_scan_in_bracket();
234 
235  void
236  _M_scan_in_brace();
237 
238  void
239  _M_eat_escape_ecma();
240 
241  void
242  _M_eat_escape_posix();
243 
244  void
245  _M_eat_escape_awk();
246 
247  void
248  _M_eat_class(char);
249 
250  _IterT _M_current;
251  _IterT _M_end;
252  _CtypeT& _M_ctype;
253  _StringT _M_value;
254  void (_Scanner::* _M_eat_escape)();
255  };
256 
257  //@} regex-detail
258 _GLIBCXX_END_NAMESPACE_VERSION
259 } // namespace __detail
260 } // namespace std
261 
262 #include <bits/regex_scanner.tcc>
Container class for localization functionality.The locale class is first a class wrapper for C librar...
constexpr syntax_option_type grep
Primary class template ctype facet.This template class defines classification and conversion function...
_TokenT
Token types returned from the scanner.
Definition: regex_scanner.h:46
Scans an input range for regex tokens.
syntax_option_type
This is a bitmask type indicating how to interpret the regex.
constexpr syntax_option_type awk
constexpr syntax_option_type ECMAScript
constexpr syntax_option_type extended
ISO C++ entities toplevel namespace is std.
constexpr syntax_option_type basic
constexpr syntax_option_type egrep