/src/boost/boost/xpressive/regex_primitives.hpp
Line | Count | Source |
1 | | /////////////////////////////////////////////////////////////////////////////// |
2 | | /// \file regex_primitives.hpp |
3 | | /// Contains the syntax elements for writing static regular expressions. |
4 | | // |
5 | | // Copyright 2008 Eric Niebler. Distributed under the Boost |
6 | | // Software License, Version 1.0. (See accompanying file |
7 | | // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) |
8 | | |
9 | | #ifndef BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005 |
10 | | #define BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005 |
11 | | |
12 | | #include <vector> |
13 | | #include <climits> |
14 | | #include <boost/config.hpp> |
15 | | #include <boost/assert.hpp> |
16 | | #include <boost/mpl/if.hpp> |
17 | | #include <boost/mpl/and.hpp> |
18 | | #include <boost/mpl/assert.hpp> |
19 | | #include <boost/detail/workaround.hpp> |
20 | | #include <boost/preprocessor/cat.hpp> |
21 | | #include <boost/xpressive/detail/detail_fwd.hpp> |
22 | | #include <boost/xpressive/detail/core/matchers.hpp> |
23 | | #include <boost/xpressive/detail/core/regex_domain.hpp> |
24 | | #include <boost/xpressive/detail/utility/ignore_unused.hpp> |
25 | | |
26 | | // Doxygen can't handle proto :-( |
27 | | #ifndef BOOST_XPRESSIVE_DOXYGEN_INVOKED |
28 | | # include <boost/proto/core.hpp> |
29 | | # include <boost/proto/transform/arg.hpp> |
30 | | # include <boost/proto/transform/when.hpp> |
31 | | # include <boost/xpressive/detail/core/icase.hpp> |
32 | | # include <boost/xpressive/detail/static/compile.hpp> |
33 | | # include <boost/xpressive/detail/static/modifier.hpp> |
34 | | #endif |
35 | | |
36 | | namespace boost { namespace xpressive { namespace detail |
37 | | { |
38 | | |
39 | | typedef assert_word_placeholder<word_boundary<mpl::true_> > assert_word_boundary; |
40 | | typedef assert_word_placeholder<word_begin> assert_word_begin; |
41 | | typedef assert_word_placeholder<word_end> assert_word_end; |
42 | | |
43 | | // workaround msvc-7.1 bug with function pointer types |
44 | | // within function types: |
45 | | #if BOOST_WORKAROUND(BOOST_MSVC, == 1310) |
46 | | #define mark_number(x) proto::call<mark_number(x)> |
47 | | #define minus_one() proto::make<minus_one()> |
48 | | #endif |
49 | | |
50 | | struct push_back : proto::callable |
51 | | { |
52 | | typedef int result_type; |
53 | | |
54 | | template<typename Subs> |
55 | | int operator ()(Subs &subs, int i) const |
56 | | { |
57 | | subs.push_back(i); |
58 | | return i; |
59 | | } |
60 | | }; |
61 | | |
62 | | struct mark_number : proto::callable |
63 | | { |
64 | | typedef int result_type; |
65 | | |
66 | | template<typename Expr> |
67 | | int operator ()(Expr const &expr) const |
68 | | { |
69 | | return expr.mark_number_; |
70 | | } |
71 | | }; |
72 | | |
73 | | typedef mpl::int_<-1> minus_one; |
74 | | |
75 | | // s1 or -s1 |
76 | | struct SubMatch |
77 | | : proto::or_< |
78 | | proto::when<basic_mark_tag, push_back(proto::_data, mark_number(proto::_value)) > |
79 | | , proto::when<proto::negate<basic_mark_tag>, push_back(proto::_data, minus_one()) > |
80 | | > |
81 | | {}; |
82 | | |
83 | | struct SubMatchList |
84 | | : proto::or_<SubMatch, proto::comma<SubMatchList, SubMatch> > |
85 | | {}; |
86 | | |
87 | | template<typename Subs> |
88 | | typename enable_if< |
89 | | mpl::and_<proto::is_expr<Subs>, proto::matches<Subs, SubMatchList> > |
90 | | , std::vector<int> |
91 | | >::type |
92 | | to_vector(Subs const &subs) |
93 | | { |
94 | | std::vector<int> subs_; |
95 | | SubMatchList()(subs, 0, subs_); |
96 | | return subs_; |
97 | | } |
98 | | |
99 | | #if BOOST_WORKAROUND(BOOST_MSVC, == 1310) |
100 | | #undef mark_number |
101 | | #undef minus_one |
102 | | #endif |
103 | | |
104 | | // replace "Expr" with "keep(*State) >> Expr" |
105 | | struct skip_primitives : proto::transform<skip_primitives> |
106 | | { |
107 | | template<typename Expr, typename State, typename Data> |
108 | | struct impl : proto::transform_impl<Expr, State, Data> |
109 | | { |
110 | | typedef |
111 | | typename proto::shift_right< |
112 | | typename proto::unary_expr< |
113 | | keeper_tag |
114 | | , typename proto::dereference<State>::type |
115 | | >::type |
116 | | , Expr |
117 | | >::type |
118 | | result_type; |
119 | | |
120 | | result_type operator ()( |
121 | | typename impl::expr_param expr |
122 | | , typename impl::state_param state |
123 | | , typename impl::data_param |
124 | | ) const |
125 | | { |
126 | | result_type that = {{{state}}, expr}; |
127 | | return that; |
128 | | } |
129 | | }; |
130 | | }; |
131 | | |
132 | | struct Primitives |
133 | | : proto::or_< |
134 | | proto::terminal<proto::_> |
135 | | , proto::comma<proto::_, proto::_> |
136 | | , proto::subscript<proto::terminal<set_initializer>, proto::_> |
137 | | , proto::assign<proto::terminal<set_initializer>, proto::_> |
138 | | , proto::assign<proto::terminal<attribute_placeholder<proto::_> >, proto::_> |
139 | | , proto::complement<Primitives> |
140 | | > |
141 | | {}; |
142 | | |
143 | | struct SkipGrammar |
144 | | : proto::or_< |
145 | | proto::when<Primitives, skip_primitives> |
146 | | , proto::assign<proto::terminal<mark_placeholder>, SkipGrammar> // don't "skip" mark tags |
147 | | , proto::subscript<SkipGrammar, proto::_> // don't put skips in actions |
148 | | , proto::binary_expr<modifier_tag, proto::_, SkipGrammar> // don't skip modifiers |
149 | | , proto::unary_expr<lookbehind_tag, proto::_> // don't skip lookbehinds |
150 | | , proto::nary_expr<proto::_, proto::vararg<SkipGrammar> > // everything else is fair game! |
151 | | > |
152 | | {}; |
153 | | |
154 | | template<typename Skip> |
155 | | struct skip_directive |
156 | | { |
157 | | typedef typename proto::result_of::as_expr<Skip>::type skip_type; |
158 | | |
159 | | skip_directive(Skip const &skip) |
160 | | : skip_(proto::as_expr(skip)) |
161 | | {} |
162 | | |
163 | | template<typename Sig> |
164 | | struct result {}; |
165 | | |
166 | | template<typename This, typename Expr> |
167 | | struct result<This(Expr)> |
168 | | { |
169 | | typedef |
170 | | SkipGrammar::impl< |
171 | | typename proto::result_of::as_expr<Expr>::type |
172 | | , skip_type const & |
173 | | , mpl::void_ & |
174 | | > |
175 | | skip_transform; |
176 | | |
177 | | typedef |
178 | | typename proto::shift_right< |
179 | | typename skip_transform::result_type |
180 | | , typename proto::dereference<skip_type>::type |
181 | | >::type |
182 | | type; |
183 | | }; |
184 | | |
185 | | template<typename Expr> |
186 | | typename result<skip_directive(Expr)>::type |
187 | | operator ()(Expr const &expr) const |
188 | | { |
189 | | mpl::void_ ignore; |
190 | | typedef result<skip_directive(Expr)> result_fun; |
191 | | typename result_fun::type that = { |
192 | | typename result_fun::skip_transform()(proto::as_expr(expr), this->skip_, ignore) |
193 | | , {skip_} |
194 | | }; |
195 | | return that; |
196 | | } |
197 | | |
198 | | private: |
199 | | skip_type skip_; |
200 | | }; |
201 | | |
202 | | /* |
203 | | /////////////////////////////////////////////////////////////////////////////// |
204 | | /// INTERNAL ONLY |
205 | | // BOOST_XPRESSIVE_GLOBAL |
206 | | // for defining globals that neither violate the One Definition Rule nor |
207 | | // lead to undefined behavior due to global object initialization order. |
208 | | //#define BOOST_XPRESSIVE_GLOBAL(type, name, init) \ |
209 | | // namespace detail \ |
210 | | // { \ |
211 | | // template<int Dummy> \ |
212 | | // struct BOOST_PP_CAT(global_pod_, name) \ |
213 | | // { \ |
214 | | // static type const value; \ |
215 | | // private: \ |
216 | | // union type_must_be_pod \ |
217 | | // { \ |
218 | | // type t; \ |
219 | | // char ch; \ |
220 | | // } u; \ |
221 | | // }; \ |
222 | | // template<int Dummy> \ |
223 | | // type const BOOST_PP_CAT(global_pod_, name)<Dummy>::value = init; \ |
224 | | // } \ |
225 | | // type const &name = detail::BOOST_PP_CAT(global_pod_, name)<0>::value |
226 | | */ |
227 | | |
228 | | |
229 | | } // namespace detail |
230 | | |
231 | | /// INTERNAL ONLY (for backwards compatibility) |
232 | | unsigned int const repeat_max = UINT_MAX-1; |
233 | | |
234 | | /////////////////////////////////////////////////////////////////////////////// |
235 | | /// \brief For infinite repetition of a sub-expression. |
236 | | /// |
237 | | /// Magic value used with the repeat\<\>() function template |
238 | | /// to specify an unbounded repeat. Use as: repeat<17, inf>('a'). |
239 | | /// The equivalent in perl is /a{17,}/. |
240 | | unsigned int const inf = UINT_MAX-1; |
241 | | |
242 | | /// INTERNAL ONLY (for backwards compatibility) |
243 | | proto::terminal<detail::epsilon_matcher>::type const epsilon = {{}}; |
244 | | |
245 | | /////////////////////////////////////////////////////////////////////////////// |
246 | | /// \brief Successfully matches nothing. |
247 | | /// |
248 | | /// Successfully matches a zero-width sequence. nil always succeeds and |
249 | | /// never consumes any characters. |
250 | | proto::terminal<detail::epsilon_matcher>::type const nil = {{}}; |
251 | | |
252 | | /////////////////////////////////////////////////////////////////////////////// |
253 | | /// \brief Matches an alpha-numeric character. |
254 | | /// |
255 | | /// The regex traits are used to determine which characters are alpha-numeric. |
256 | | /// To match any character that is not alpha-numeric, use ~alnum. |
257 | | /// |
258 | | /// \attention alnum is equivalent to /[[:alnum:]]/ in perl. ~alnum is equivalent |
259 | | /// to /[[:^alnum:]]/ in perl. |
260 | | proto::terminal<detail::posix_charset_placeholder>::type const alnum = {{"alnum", false}}; |
261 | | |
262 | | /////////////////////////////////////////////////////////////////////////////// |
263 | | /// \brief Matches an alphabetic character. |
264 | | /// |
265 | | /// The regex traits are used to determine which characters are alphabetic. |
266 | | /// To match any character that is not alphabetic, use ~alpha. |
267 | | /// |
268 | | /// \attention alpha is equivalent to /[[:alpha:]]/ in perl. ~alpha is equivalent |
269 | | /// to /[[:^alpha:]]/ in perl. |
270 | | proto::terminal<detail::posix_charset_placeholder>::type const alpha = {{"alpha", false}}; |
271 | | |
272 | | /////////////////////////////////////////////////////////////////////////////// |
273 | | /// \brief Matches a blank (horizonal white-space) character. |
274 | | /// |
275 | | /// The regex traits are used to determine which characters are blank characters. |
276 | | /// To match any character that is not blank, use ~blank. |
277 | | /// |
278 | | /// \attention blank is equivalent to /[[:blank:]]/ in perl. ~blank is equivalent |
279 | | /// to /[[:^blank:]]/ in perl. |
280 | | proto::terminal<detail::posix_charset_placeholder>::type const blank = {{"blank", false}}; |
281 | | |
282 | | /////////////////////////////////////////////////////////////////////////////// |
283 | | /// \brief Matches a control character. |
284 | | /// |
285 | | /// The regex traits are used to determine which characters are control characters. |
286 | | /// To match any character that is not a control character, use ~cntrl. |
287 | | /// |
288 | | /// \attention cntrl is equivalent to /[[:cntrl:]]/ in perl. ~cntrl is equivalent |
289 | | /// to /[[:^cntrl:]]/ in perl. |
290 | | proto::terminal<detail::posix_charset_placeholder>::type const cntrl = {{"cntrl", false}}; |
291 | | |
292 | | /////////////////////////////////////////////////////////////////////////////// |
293 | | /// \brief Matches a digit character. |
294 | | /// |
295 | | /// The regex traits are used to determine which characters are digits. |
296 | | /// To match any character that is not a digit, use ~digit. |
297 | | /// |
298 | | /// \attention digit is equivalent to /[[:digit:]]/ in perl. ~digit is equivalent |
299 | | /// to /[[:^digit:]]/ in perl. |
300 | | proto::terminal<detail::posix_charset_placeholder>::type const digit = {{"digit", false}}; |
301 | | |
302 | | /////////////////////////////////////////////////////////////////////////////// |
303 | | /// \brief Matches a graph character. |
304 | | /// |
305 | | /// The regex traits are used to determine which characters are graphable. |
306 | | /// To match any character that is not graphable, use ~graph. |
307 | | /// |
308 | | /// \attention graph is equivalent to /[[:graph:]]/ in perl. ~graph is equivalent |
309 | | /// to /[[:^graph:]]/ in perl. |
310 | | proto::terminal<detail::posix_charset_placeholder>::type const graph = {{"graph", false}}; |
311 | | |
312 | | /////////////////////////////////////////////////////////////////////////////// |
313 | | /// \brief Matches a lower-case character. |
314 | | /// |
315 | | /// The regex traits are used to determine which characters are lower-case. |
316 | | /// To match any character that is not a lower-case character, use ~lower. |
317 | | /// |
318 | | /// \attention lower is equivalent to /[[:lower:]]/ in perl. ~lower is equivalent |
319 | | /// to /[[:^lower:]]/ in perl. |
320 | | proto::terminal<detail::posix_charset_placeholder>::type const lower = {{"lower", false}}; |
321 | | |
322 | | /////////////////////////////////////////////////////////////////////////////// |
323 | | /// \brief Matches a printable character. |
324 | | /// |
325 | | /// The regex traits are used to determine which characters are printable. |
326 | | /// To match any character that is not printable, use ~print. |
327 | | /// |
328 | | /// \attention print is equivalent to /[[:print:]]/ in perl. ~print is equivalent |
329 | | /// to /[[:^print:]]/ in perl. |
330 | | proto::terminal<detail::posix_charset_placeholder>::type const print = {{"print", false}}; |
331 | | |
332 | | /////////////////////////////////////////////////////////////////////////////// |
333 | | /// \brief Matches a punctuation character. |
334 | | /// |
335 | | /// The regex traits are used to determine which characters are punctuation. |
336 | | /// To match any character that is not punctuation, use ~punct. |
337 | | /// |
338 | | /// \attention punct is equivalent to /[[:punct:]]/ in perl. ~punct is equivalent |
339 | | /// to /[[:^punct:]]/ in perl. |
340 | | proto::terminal<detail::posix_charset_placeholder>::type const punct = {{"punct", false}}; |
341 | | |
342 | | /////////////////////////////////////////////////////////////////////////////// |
343 | | /// \brief Matches a space character. |
344 | | /// |
345 | | /// The regex traits are used to determine which characters are space characters. |
346 | | /// To match any character that is not white-space, use ~space. |
347 | | /// |
348 | | /// \attention space is equivalent to /[[:space:]]/ in perl. ~space is equivalent |
349 | | /// to /[[:^space:]]/ in perl. |
350 | | proto::terminal<detail::posix_charset_placeholder>::type const space = {{"space", false}}; |
351 | | |
352 | | /////////////////////////////////////////////////////////////////////////////// |
353 | | /// \brief Matches an upper-case character. |
354 | | /// |
355 | | /// The regex traits are used to determine which characters are upper-case. |
356 | | /// To match any character that is not upper-case, use ~upper. |
357 | | /// |
358 | | /// \attention upper is equivalent to /[[:upper:]]/ in perl. ~upper is equivalent |
359 | | /// to /[[:^upper:]]/ in perl. |
360 | | proto::terminal<detail::posix_charset_placeholder>::type const upper = {{"upper", false}}; |
361 | | |
362 | | /////////////////////////////////////////////////////////////////////////////// |
363 | | /// \brief Matches a hexadecimal digit character. |
364 | | /// |
365 | | /// The regex traits are used to determine which characters are hex digits. |
366 | | /// To match any character that is not a hex digit, use ~xdigit. |
367 | | /// |
368 | | /// \attention xdigit is equivalent to /[[:xdigit:]]/ in perl. ~xdigit is equivalent |
369 | | /// to /[[:^xdigit:]]/ in perl. |
370 | | proto::terminal<detail::posix_charset_placeholder>::type const xdigit = {{"xdigit", false}}; |
371 | | |
372 | | /////////////////////////////////////////////////////////////////////////////// |
373 | | /// \brief Beginning of sequence assertion. |
374 | | /// |
375 | | /// For the character sequence [begin, end), 'bos' matches the |
376 | | /// zero-width sub-sequence [begin, begin). |
377 | | proto::terminal<detail::assert_bos_matcher>::type const bos = {{}}; |
378 | | |
379 | | /////////////////////////////////////////////////////////////////////////////// |
380 | | /// \brief End of sequence assertion. |
381 | | /// |
382 | | /// For the character sequence [begin, end), |
383 | | /// 'eos' matches the zero-width sub-sequence [end, end). |
384 | | /// |
385 | | /// \attention Unlike the perl end of sequence assertion \$, 'eos' will |
386 | | /// not match at the position [end-1, end-1) if *(end-1) is '\\n'. To |
387 | | /// get that behavior, use (!_n >> eos). |
388 | | proto::terminal<detail::assert_eos_matcher>::type const eos = {{}}; |
389 | | |
390 | | /////////////////////////////////////////////////////////////////////////////// |
391 | | /// \brief Beginning of line assertion. |
392 | | /// |
393 | | /// 'bol' matches the zero-width sub-sequence |
394 | | /// immediately following a logical newline sequence. The regex traits |
395 | | /// is used to determine what constitutes a logical newline sequence. |
396 | | proto::terminal<detail::assert_bol_placeholder>::type const bol = {{}}; |
397 | | |
398 | | /////////////////////////////////////////////////////////////////////////////// |
399 | | /// \brief End of line assertion. |
400 | | /// |
401 | | /// 'eol' matches the zero-width sub-sequence |
402 | | /// immediately preceeding a logical newline sequence. The regex traits |
403 | | /// is used to determine what constitutes a logical newline sequence. |
404 | | proto::terminal<detail::assert_eol_placeholder>::type const eol = {{}}; |
405 | | |
406 | | /////////////////////////////////////////////////////////////////////////////// |
407 | | /// \brief Beginning of word assertion. |
408 | | /// |
409 | | /// 'bow' matches the zero-width sub-sequence |
410 | | /// immediately following a non-word character and preceeding a word character. |
411 | | /// The regex traits are used to determine what constitutes a word character. |
412 | | proto::terminal<detail::assert_word_begin>::type const bow = {{}}; |
413 | | |
414 | | /////////////////////////////////////////////////////////////////////////////// |
415 | | /// \brief End of word assertion. |
416 | | /// |
417 | | /// 'eow' matches the zero-width sub-sequence |
418 | | /// immediately following a word character and preceeding a non-word character. |
419 | | /// The regex traits are used to determine what constitutes a word character. |
420 | | proto::terminal<detail::assert_word_end>::type const eow = {{}}; |
421 | | |
422 | | /////////////////////////////////////////////////////////////////////////////// |
423 | | /// \brief Word boundary assertion. |
424 | | /// |
425 | | /// '_b' matches the zero-width sub-sequence at the beginning or the end of a word. |
426 | | /// It is equivalent to (bow | eow). The regex traits are used to determine what |
427 | | /// constitutes a word character. To match a non-word boundary, use ~_b. |
428 | | /// |
429 | | /// \attention _b is like \\b in perl. ~_b is like \\B in perl. |
430 | | proto::terminal<detail::assert_word_boundary>::type const _b = {{}}; |
431 | | |
432 | | /////////////////////////////////////////////////////////////////////////////// |
433 | | /// \brief Matches a word character. |
434 | | /// |
435 | | /// '_w' matches a single word character. The regex traits are used to determine which |
436 | | /// characters are word characters. Use ~_w to match a character that is not a word |
437 | | /// character. |
438 | | /// |
439 | | /// \attention _w is like \\w in perl. ~_w is like \\W in perl. |
440 | | proto::terminal<detail::posix_charset_placeholder>::type const _w = {{"w", false}}; |
441 | | |
442 | | /////////////////////////////////////////////////////////////////////////////// |
443 | | /// \brief Matches a digit character. |
444 | | /// |
445 | | /// '_d' matches a single digit character. The regex traits are used to determine which |
446 | | /// characters are digits. Use ~_d to match a character that is not a digit |
447 | | /// character. |
448 | | /// |
449 | | /// \attention _d is like \\d in perl. ~_d is like \\D in perl. |
450 | | proto::terminal<detail::posix_charset_placeholder>::type const _d = {{"d", false}}; |
451 | | |
452 | | /////////////////////////////////////////////////////////////////////////////// |
453 | | /// \brief Matches a space character. |
454 | | /// |
455 | | /// '_s' matches a single space character. The regex traits are used to determine which |
456 | | /// characters are space characters. Use ~_s to match a character that is not a space |
457 | | /// character. |
458 | | /// |
459 | | /// \attention _s is like \\s in perl. ~_s is like \\S in perl. |
460 | | proto::terminal<detail::posix_charset_placeholder>::type const _s = {{"s", false}}; |
461 | | |
462 | | /////////////////////////////////////////////////////////////////////////////// |
463 | | /// \brief Matches a literal newline character, '\\n'. |
464 | | /// |
465 | | /// '_n' matches a single newline character, '\\n'. Use ~_n to match a character |
466 | | /// that is not a newline. |
467 | | /// |
468 | | /// \attention ~_n is like '.' in perl without the /s modifier. |
469 | | proto::terminal<char>::type const _n = {'\n'}; |
470 | | |
471 | | /////////////////////////////////////////////////////////////////////////////// |
472 | | /// \brief Matches a logical newline sequence. |
473 | | /// |
474 | | /// '_ln' matches a logical newline sequence. This can be any character in the |
475 | | /// line separator class, as determined by the regex traits, or the '\\r\\n' sequence. |
476 | | /// For the purpose of back-tracking, '\\r\\n' is treated as a unit. |
477 | | /// To match any one character that is not a logical newline, use ~_ln. |
478 | | detail::logical_newline_xpression const _ln = {{}}; |
479 | | |
480 | | /////////////////////////////////////////////////////////////////////////////// |
481 | | /// \brief Matches any one character. |
482 | | /// |
483 | | /// Match any character, similar to '.' in perl syntax with the /s modifier. |
484 | | /// '_' matches any one character, including the newline. |
485 | | /// |
486 | | /// \attention To match any character except the newline, use ~_n |
487 | | proto::terminal<detail::any_matcher>::type const _ = {{}}; |
488 | | |
489 | | /////////////////////////////////////////////////////////////////////////////// |
490 | | /// \brief Reference to the current regex object |
491 | | /// |
492 | | /// Useful when constructing recursive regular expression objects. The 'self' |
493 | | /// identifier is a short-hand for the current regex object. For instance, |
494 | | /// sregex rx = '(' >> (self | nil) >> ')'; will create a regex object that |
495 | | /// matches balanced parens such as "((()))". |
496 | | proto::terminal<detail::self_placeholder>::type const self = {{}}; |
497 | | |
498 | | /////////////////////////////////////////////////////////////////////////////// |
499 | | /// \brief Used to create character sets. |
500 | | /// |
501 | | /// There are two ways to create character sets with the 'set' identifier. The |
502 | | /// easiest is to create a comma-separated list of the characters in the set, |
503 | | /// as in (set= 'a','b','c'). This set will match 'a', 'b', or 'c'. The other |
504 | | /// way is to define the set as an argument to the set subscript operator. |
505 | | /// For instance, set[ 'a' | range('b','c') | digit ] will match an 'a', 'b', |
506 | | /// 'c' or a digit character. |
507 | | /// |
508 | | /// To complement a set, apply the '~' operator. For instance, ~(set= 'a','b','c') |
509 | | /// will match any character that is not an 'a', 'b', or 'c'. |
510 | | /// |
511 | | /// Sets can be composed of other, possibly complemented, sets. For instance, |
512 | | /// set[ ~digit | ~(set= 'a','b','c') ]. |
513 | | detail::set_initializer_type const set = {{}}; |
514 | | |
515 | | /////////////////////////////////////////////////////////////////////////////// |
516 | | /// \brief Sub-match placeholder type, used to create named captures in |
517 | | /// static regexes. |
518 | | /// |
519 | | /// \c mark_tag is the type of the global sub-match placeholders \c s0, \c s1, etc.. You |
520 | | /// can use the \c mark_tag type to create your own sub-match placeholders with |
521 | | /// more meaningful names. This is roughly equivalent to the "named capture" |
522 | | /// feature of dynamic regular expressions. |
523 | | /// |
524 | | /// To create a named sub-match placeholder, initialize it with a unique integer. |
525 | | /// The integer must only be unique within the regex in which the placeholder |
526 | | /// is used. Then you can use it within static regexes to created sub-matches |
527 | | /// by assigning a sub-expression to it, or to refer back to already created |
528 | | /// sub-matches. |
529 | | /// |
530 | | /// \code |
531 | | /// mark_tag number(1); // "number" is now equivalent to "s1" |
532 | | /// // Match a number, followed by a space and the same number again |
533 | | /// sregex rx = (number = +_d) >> ' ' >> number; |
534 | | /// \endcode |
535 | | /// |
536 | | /// After a successful \c regex_match() or \c regex_search(), the sub-match placeholder |
537 | | /// can be used to index into the <tt>match_results\<\></tt> object to retrieve the |
538 | | /// corresponding sub-match. |
539 | | struct mark_tag |
540 | | : proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain> |
541 | | { |
542 | | private: |
543 | | typedef proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain> base_type; |
544 | | |
545 | | static detail::basic_mark_tag make_tag(int mark_nbr) |
546 | 0 | { |
547 | 0 | detail::basic_mark_tag mark = {{mark_nbr}}; |
548 | 0 | return mark; |
549 | 0 | } |
550 | | |
551 | | public: |
552 | | /// \brief Initialize a mark_tag placeholder |
553 | | /// \param mark_nbr An integer that uniquely identifies this \c mark_tag |
554 | | /// within the static regexes in which this \c mark_tag will be used. |
555 | | /// \pre <tt>mark_nbr \> 0</tt> |
556 | | mark_tag(int mark_nbr) |
557 | | : base_type(mark_tag::make_tag(mark_nbr)) |
558 | 0 | { |
559 | 0 | // Marks numbers must be integers greater than 0. |
560 | 0 | BOOST_ASSERT(mark_nbr > 0); |
561 | 0 | } |
562 | | |
563 | | /// INTERNAL ONLY |
564 | | operator detail::basic_mark_tag const &() const |
565 | 0 | { |
566 | 0 | return this->proto_base(); |
567 | 0 | } |
568 | | |
569 | | BOOST_PROTO_EXTENDS_USING_ASSIGN_NON_DEPENDENT(mark_tag) |
570 | | }; |
571 | | |
572 | | // This macro is used when declaring mark_tags that are global because |
573 | | // it guarantees that they are statically initialized. That avoids |
574 | | // order-of-initialization bugs. In user code, the simpler: mark_tag s0(0); |
575 | | // would be preferable. |
576 | | /// INTERNAL ONLY |
577 | | #define BOOST_XPRESSIVE_GLOBAL_MARK_TAG(NAME, VALUE) \ |
578 | | boost::xpressive::mark_tag::proto_base_expr const NAME = {{VALUE}} \ |
579 | | /**/ |
580 | | |
581 | | /////////////////////////////////////////////////////////////////////////////// |
582 | | /// \brief Sub-match placeholder, like $& in Perl |
583 | | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s0, 0); |
584 | | |
585 | | /////////////////////////////////////////////////////////////////////////////// |
586 | | /// \brief Sub-match placeholder, like $1 in perl. |
587 | | /// |
588 | | /// To create a sub-match, assign a sub-expression to the sub-match placeholder. |
589 | | /// For instance, (s1= _) will match any one character and remember which |
590 | | /// character was matched in the 1st sub-match. Later in the pattern, you can |
591 | | /// refer back to the sub-match. For instance, (s1= _) >> s1 will match any |
592 | | /// character, and then match the same character again. |
593 | | /// |
594 | | /// After a successful regex_match() or regex_search(), the sub-match placeholders |
595 | | /// can be used to index into the match_results\<\> object to retrieve the Nth |
596 | | /// sub-match. |
597 | | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s1, 1); |
598 | | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s2, 2); |
599 | | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s3, 3); |
600 | | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s4, 4); |
601 | | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s5, 5); |
602 | | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s6, 6); |
603 | | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s7, 7); |
604 | | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s8, 8); |
605 | | BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s9, 9); |
606 | | |
607 | | // NOTE: For the purpose of xpressive's documentation, make icase() look like an |
608 | | // ordinary function. In reality, it is a function object defined in detail/icase.hpp |
609 | | // so that it can serve double-duty as regex_constants::icase, the syntax_option_type. |
610 | | #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED |
611 | | /////////////////////////////////////////////////////////////////////////////// |
612 | | /// \brief Makes a sub-expression case-insensitive. |
613 | | /// |
614 | | /// Use icase() to make a sub-expression case-insensitive. For instance, |
615 | | /// "foo" >> icase(set['b'] >> "ar") will match "foo" exactly followed by |
616 | | /// "bar" irrespective of case. |
617 | | template<typename Expr> detail::unspecified icase(Expr const &expr) { return 0; } |
618 | | #endif |
619 | | |
620 | | /////////////////////////////////////////////////////////////////////////////// |
621 | | /// \brief Makes a literal into a regular expression. |
622 | | /// |
623 | | /// Use as_xpr() to turn a literal into a regular expression. For instance, |
624 | | /// "foo" >> "bar" will not compile because both operands to the right-shift |
625 | | /// operator are const char*, and no such operator exists. Use as_xpr("foo") >> "bar" |
626 | | /// instead. |
627 | | /// |
628 | | /// You can use as_xpr() with character literals in addition to string literals. |
629 | | /// For instance, as_xpr('a') will match an 'a'. You can also complement a |
630 | | /// character literal, as with ~as_xpr('a'). This will match any one character |
631 | | /// that is not an 'a'. |
632 | | #ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED |
633 | | template<typename Literal> detail::unspecified as_xpr(Literal const &literal) { return 0; } |
634 | | #else |
635 | | proto::functional::as_expr<> const as_xpr = {}; |
636 | | #endif |
637 | | |
638 | | /////////////////////////////////////////////////////////////////////////////// |
639 | | /// \brief Embed a regex object by reference. |
640 | | /// |
641 | | /// \param rex The basic_regex object to embed by reference. |
642 | | template<typename BidiIter> |
643 | | inline typename proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type const |
644 | | by_ref(basic_regex<BidiIter> const &rex) |
645 | | { |
646 | | reference_wrapper<basic_regex<BidiIter> const> ref(rex); |
647 | | return proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type::make(ref); |
648 | | } |
649 | | |
650 | | /////////////////////////////////////////////////////////////////////////////// |
651 | | /// \brief Match a range of characters. |
652 | | /// |
653 | | /// Match any character in the range [ch_min, ch_max]. |
654 | | /// |
655 | | /// \param ch_min The lower end of the range to match. |
656 | | /// \param ch_max The upper end of the range to match. |
657 | | template<typename Char> |
658 | | inline typename proto::terminal<detail::range_placeholder<Char> >::type const |
659 | | range(Char ch_min, Char ch_max) |
660 | | { |
661 | | detail::range_placeholder<Char> that = {ch_min, ch_max, false}; |
662 | | return proto::terminal<detail::range_placeholder<Char> >::type::make(that); |
663 | | } |
664 | | |
665 | | /////////////////////////////////////////////////////////////////////////////// |
666 | | /// \brief Make a sub-expression optional. Equivalent to !as_xpr(expr). |
667 | | /// |
668 | | /// \param expr The sub-expression to make optional. |
669 | | template<typename Expr> |
670 | | typename proto::result_of::make_expr< |
671 | | proto::tag::logical_not |
672 | | , proto::default_domain |
673 | | , Expr const & |
674 | | >::type const |
675 | | optional(Expr const &expr) |
676 | | { |
677 | | return proto::make_expr< |
678 | | proto::tag::logical_not |
679 | | , proto::default_domain |
680 | | >(boost::ref(expr)); |
681 | | } |
682 | | |
683 | | /////////////////////////////////////////////////////////////////////////////// |
684 | | /// \brief Repeat a sub-expression multiple times. |
685 | | /// |
686 | | /// There are two forms of the repeat\<\>() function template. To match a |
687 | | /// sub-expression N times, use repeat\<N\>(expr). To match a sub-expression |
688 | | /// from M to N times, use repeat\<M,N\>(expr). |
689 | | /// |
690 | | /// The repeat\<\>() function creates a greedy quantifier. To make the quantifier |
691 | | /// non-greedy, apply the unary minus operator, as in -repeat\<M,N\>(expr). |
692 | | /// |
693 | | /// \param expr The sub-expression to repeat. |
694 | | template<unsigned int Min, unsigned int Max, typename Expr> |
695 | | typename proto::result_of::make_expr< |
696 | | detail::generic_quant_tag<Min, Max> |
697 | | , proto::default_domain |
698 | | , Expr const & |
699 | | >::type const |
700 | | repeat(Expr const &expr) |
701 | | { |
702 | | return proto::make_expr< |
703 | | detail::generic_quant_tag<Min, Max> |
704 | | , proto::default_domain |
705 | | >(boost::ref(expr)); |
706 | | } |
707 | | |
708 | | /// \overload |
709 | | /// |
710 | | template<unsigned int Count, typename Expr2> |
711 | | typename proto::result_of::make_expr< |
712 | | detail::generic_quant_tag<Count, Count> |
713 | | , proto::default_domain |
714 | | , Expr2 const & |
715 | | >::type const |
716 | | repeat(Expr2 const &expr2) |
717 | | { |
718 | | return proto::make_expr< |
719 | | detail::generic_quant_tag<Count, Count> |
720 | | , proto::default_domain |
721 | | >(boost::ref(expr2)); |
722 | | } |
723 | | |
724 | | /////////////////////////////////////////////////////////////////////////////// |
725 | | /// \brief Create an independent sub-expression. |
726 | | /// |
727 | | /// Turn off back-tracking for a sub-expression. Any branches or repeats within |
728 | | /// the sub-expression will match only one way, and no other alternatives are |
729 | | /// tried. |
730 | | /// |
731 | | /// \attention keep(expr) is equivalent to the perl (?>...) extension. |
732 | | /// |
733 | | /// \param expr The sub-expression to modify. |
734 | | template<typename Expr> |
735 | | typename proto::result_of::make_expr< |
736 | | detail::keeper_tag |
737 | | , proto::default_domain |
738 | | , Expr const & |
739 | | >::type const |
740 | | keep(Expr const &expr) |
741 | | { |
742 | | return proto::make_expr< |
743 | | detail::keeper_tag |
744 | | , proto::default_domain |
745 | | >(boost::ref(expr)); |
746 | | } |
747 | | |
748 | | /////////////////////////////////////////////////////////////////////////////// |
749 | | /// \brief Look-ahead assertion. |
750 | | /// |
751 | | /// before(expr) succeeds if the expr sub-expression would match at the current |
752 | | /// position in the sequence, but expr is not included in the match. For instance, |
753 | | /// before("foo") succeeds if we are before a "foo". Look-ahead assertions can be |
754 | | /// negated with the bit-compliment operator. |
755 | | /// |
756 | | /// \attention before(expr) is equivalent to the perl (?=...) extension. |
757 | | /// ~before(expr) is a negative look-ahead assertion, equivalent to the |
758 | | /// perl (?!...) extension. |
759 | | /// |
760 | | /// \param expr The sub-expression to put in the look-ahead assertion. |
761 | | template<typename Expr> |
762 | | typename proto::result_of::make_expr< |
763 | | detail::lookahead_tag |
764 | | , proto::default_domain |
765 | | , Expr const & |
766 | | >::type const |
767 | | before(Expr const &expr) |
768 | | { |
769 | | return proto::make_expr< |
770 | | detail::lookahead_tag |
771 | | , proto::default_domain |
772 | | >(boost::ref(expr)); |
773 | | } |
774 | | |
775 | | /////////////////////////////////////////////////////////////////////////////// |
776 | | /// \brief Look-behind assertion. |
777 | | /// |
778 | | /// after(expr) succeeds if the expr sub-expression would match at the current |
779 | | /// position minus N in the sequence, where N is the width of expr. expr is not included in |
780 | | /// the match. For instance, after("foo") succeeds if we are after a "foo". Look-behind |
781 | | /// assertions can be negated with the bit-complement operator. |
782 | | /// |
783 | | /// \attention after(expr) is equivalent to the perl (?<=...) extension. |
784 | | /// ~after(expr) is a negative look-behind assertion, equivalent to the |
785 | | /// perl (?<!...) extension. |
786 | | /// |
787 | | /// \param expr The sub-expression to put in the look-ahead assertion. |
788 | | /// |
789 | | /// \pre expr cannot match a variable number of characters. |
790 | | template<typename Expr> |
791 | | typename proto::result_of::make_expr< |
792 | | detail::lookbehind_tag |
793 | | , proto::default_domain |
794 | | , Expr const & |
795 | | >::type const |
796 | | after(Expr const &expr) |
797 | | { |
798 | | return proto::make_expr< |
799 | | detail::lookbehind_tag |
800 | | , proto::default_domain |
801 | | >(boost::ref(expr)); |
802 | | } |
803 | | |
804 | | /////////////////////////////////////////////////////////////////////////////// |
805 | | /// \brief Specify a regex traits or a std::locale. |
806 | | /// |
807 | | /// imbue() instructs the regex engine to use the specified traits or locale |
808 | | /// when matching the regex. The entire expression must use the same traits/locale. |
809 | | /// For instance, the following specifies a locale for use with a regex: |
810 | | /// std::locale loc; |
811 | | /// sregex rx = imbue(loc)(+digit); |
812 | | /// |
813 | | /// \param loc The std::locale or regex traits object. |
814 | | template<typename Locale> |
815 | | inline detail::modifier_op<detail::locale_modifier<Locale> > const |
816 | | imbue(Locale const &loc) |
817 | | { |
818 | | detail::modifier_op<detail::locale_modifier<Locale> > mod = |
819 | | { |
820 | | detail::locale_modifier<Locale>(loc) |
821 | | , regex_constants::ECMAScript |
822 | | }; |
823 | | return mod; |
824 | | } |
825 | | |
826 | | proto::terminal<detail::attribute_placeholder<mpl::int_<1> > >::type const a1 = {{}}; |
827 | | proto::terminal<detail::attribute_placeholder<mpl::int_<2> > >::type const a2 = {{}}; |
828 | | proto::terminal<detail::attribute_placeholder<mpl::int_<3> > >::type const a3 = {{}}; |
829 | | proto::terminal<detail::attribute_placeholder<mpl::int_<4> > >::type const a4 = {{}}; |
830 | | proto::terminal<detail::attribute_placeholder<mpl::int_<5> > >::type const a5 = {{}}; |
831 | | proto::terminal<detail::attribute_placeholder<mpl::int_<6> > >::type const a6 = {{}}; |
832 | | proto::terminal<detail::attribute_placeholder<mpl::int_<7> > >::type const a7 = {{}}; |
833 | | proto::terminal<detail::attribute_placeholder<mpl::int_<8> > >::type const a8 = {{}}; |
834 | | proto::terminal<detail::attribute_placeholder<mpl::int_<9> > >::type const a9 = {{}}; |
835 | | |
836 | | /////////////////////////////////////////////////////////////////////////////// |
837 | | /// \brief Specify which characters to skip when matching a regex. |
838 | | /// |
839 | | /// <tt>skip()</tt> instructs the regex engine to skip certain characters when matching |
840 | | /// a regex. It is most useful for writing regexes that ignore whitespace. |
841 | | /// For instance, the following specifies a regex that skips whitespace and |
842 | | /// punctuation: |
843 | | /// |
844 | | /// \code |
845 | | /// // A sentence is one or more words separated by whitespace |
846 | | /// // and punctuation. |
847 | | /// sregex word = +alpha; |
848 | | /// sregex sentence = skip(set[_s | punct])( +word ); |
849 | | /// \endcode |
850 | | /// |
851 | | /// The way it works in the above example is to insert |
852 | | /// <tt>keep(*set[_s | punct])</tt> before each primitive within the regex. |
853 | | /// A "primitive" includes terminals like strings, character sets and nested |
854 | | /// regexes. A final <tt>*set[_s | punct]</tt> is added to the end of the |
855 | | /// regex. The regex <tt>sentence</tt> specified above is equivalent to |
856 | | /// the following: |
857 | | /// |
858 | | /// \code |
859 | | /// sregex sentence = +( keep(*set[_s | punct]) >> word ) |
860 | | /// >> *set[_s | punct]; |
861 | | /// \endcode |
862 | | /// |
863 | | /// \attention Skipping does not affect how nested regexes are handled because |
864 | | /// they are treated atomically. String literals are also treated |
865 | | /// atomically; that is, no skipping is done within a string literal. So |
866 | | /// <tt>skip(_s)("this that")</tt> is not the same as |
867 | | /// <tt>skip(_s)("this" >> as_xpr("that"))</tt>. The first will only match |
868 | | /// when there is only one space between "this" and "that". The second will |
869 | | /// skip any and all whitespace between "this" and "that". |
870 | | /// |
871 | | /// \param skip A regex that specifies which characters to skip. |
872 | | template<typename Skip> |
873 | | detail::skip_directive<Skip> skip(Skip const &skip) |
874 | | { |
875 | | return detail::skip_directive<Skip>(skip); |
876 | | } |
877 | | |
878 | | namespace detail |
879 | | { |
880 | | inline void ignore_unused_regex_primitives() |
881 | 0 | { |
882 | 0 | detail::ignore_unused(repeat_max); |
883 | 0 | detail::ignore_unused(inf); |
884 | 0 | detail::ignore_unused(epsilon); |
885 | 0 | detail::ignore_unused(nil); |
886 | 0 | detail::ignore_unused(alnum); |
887 | 0 | detail::ignore_unused(bos); |
888 | 0 | detail::ignore_unused(eos); |
889 | 0 | detail::ignore_unused(bol); |
890 | 0 | detail::ignore_unused(eol); |
891 | 0 | detail::ignore_unused(bow); |
892 | 0 | detail::ignore_unused(eow); |
893 | 0 | detail::ignore_unused(_b); |
894 | 0 | detail::ignore_unused(_w); |
895 | 0 | detail::ignore_unused(_d); |
896 | 0 | detail::ignore_unused(_s); |
897 | 0 | detail::ignore_unused(_n); |
898 | 0 | detail::ignore_unused(_ln); |
899 | 0 | detail::ignore_unused(_); |
900 | 0 | detail::ignore_unused(self); |
901 | 0 | detail::ignore_unused(set); |
902 | 0 | detail::ignore_unused(s0); |
903 | 0 | detail::ignore_unused(s1); |
904 | 0 | detail::ignore_unused(s2); |
905 | 0 | detail::ignore_unused(s3); |
906 | 0 | detail::ignore_unused(s4); |
907 | 0 | detail::ignore_unused(s5); |
908 | 0 | detail::ignore_unused(s6); |
909 | 0 | detail::ignore_unused(s7); |
910 | 0 | detail::ignore_unused(s8); |
911 | 0 | detail::ignore_unused(s9); |
912 | 0 | detail::ignore_unused(a1); |
913 | 0 | detail::ignore_unused(a2); |
914 | 0 | detail::ignore_unused(a3); |
915 | 0 | detail::ignore_unused(a4); |
916 | 0 | detail::ignore_unused(a5); |
917 | 0 | detail::ignore_unused(a6); |
918 | 0 | detail::ignore_unused(a7); |
919 | 0 | detail::ignore_unused(a8); |
920 | 0 | detail::ignore_unused(a9); |
921 | 0 | detail::ignore_unused(as_xpr); |
922 | 0 | } |
923 | | } |
924 | | |
925 | | }} // namespace boost::xpressive |
926 | | |
927 | | #endif |