Coverage Report

Created: 2026-01-15 06:31

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/boost/boost/xpressive/regex_primitives.hpp
Line
Count
Source
1
///////////////////////////////////////////////////////////////////////////////
2
/// \file regex_primitives.hpp
3
/// Contains the syntax elements for writing static regular expressions.
4
//
5
//  Copyright 2008 Eric Niebler. Distributed under the Boost
6
//  Software License, Version 1.0. (See accompanying file
7
//  LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
8
9
#ifndef BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005
10
#define BOOST_XPRESSIVE_REGEX_PRIMITIVES_HPP_EAN_10_04_2005
11
12
#include <vector>
13
#include <climits>
14
#include <boost/config.hpp>
15
#include <boost/assert.hpp>
16
#include <boost/mpl/if.hpp>
17
#include <boost/mpl/and.hpp>
18
#include <boost/mpl/assert.hpp>
19
#include <boost/detail/workaround.hpp>
20
#include <boost/preprocessor/cat.hpp>
21
#include <boost/xpressive/detail/detail_fwd.hpp>
22
#include <boost/xpressive/detail/core/matchers.hpp>
23
#include <boost/xpressive/detail/core/regex_domain.hpp>
24
#include <boost/xpressive/detail/utility/ignore_unused.hpp>
25
26
// Doxygen can't handle proto :-(
27
#ifndef BOOST_XPRESSIVE_DOXYGEN_INVOKED
28
# include <boost/proto/core.hpp>
29
# include <boost/proto/transform/arg.hpp>
30
# include <boost/proto/transform/when.hpp>
31
# include <boost/xpressive/detail/core/icase.hpp>
32
# include <boost/xpressive/detail/static/compile.hpp>
33
# include <boost/xpressive/detail/static/modifier.hpp>
34
#endif
35
36
namespace boost { namespace xpressive { namespace detail
37
{
38
39
    typedef assert_word_placeholder<word_boundary<mpl::true_> > assert_word_boundary;
40
    typedef assert_word_placeholder<word_begin> assert_word_begin;
41
    typedef assert_word_placeholder<word_end> assert_word_end;
42
43
    // workaround msvc-7.1 bug with function pointer types
44
    // within function types:
45
    #if BOOST_WORKAROUND(BOOST_MSVC, == 1310)
46
    #define mark_number(x) proto::call<mark_number(x)>
47
    #define minus_one() proto::make<minus_one()>
48
    #endif
49
50
    struct push_back : proto::callable
51
    {
52
        typedef int result_type;
53
54
        template<typename Subs>
55
        int operator ()(Subs &subs, int i) const
56
        {
57
            subs.push_back(i);
58
            return i;
59
        }
60
    };
61
62
    struct mark_number : proto::callable
63
    {
64
        typedef int result_type;
65
66
        template<typename Expr>
67
        int operator ()(Expr const &expr) const
68
        {
69
            return expr.mark_number_;
70
        }
71
    };
72
73
    typedef mpl::int_<-1> minus_one;
74
75
    // s1 or -s1
76
    struct SubMatch
77
      : proto::or_<
78
            proto::when<basic_mark_tag,                push_back(proto::_data, mark_number(proto::_value))   >
79
          , proto::when<proto::negate<basic_mark_tag>, push_back(proto::_data, minus_one())                  >
80
        >
81
    {};
82
83
    struct SubMatchList
84
      : proto::or_<SubMatch, proto::comma<SubMatchList, SubMatch> >
85
    {};
86
87
    template<typename Subs>
88
    typename enable_if<
89
        mpl::and_<proto::is_expr<Subs>, proto::matches<Subs, SubMatchList> >
90
      , std::vector<int>
91
    >::type
92
    to_vector(Subs const &subs)
93
    {
94
        std::vector<int> subs_;
95
        SubMatchList()(subs, 0, subs_);
96
        return subs_;
97
    }
98
99
    #if BOOST_WORKAROUND(BOOST_MSVC, == 1310)
100
    #undef mark_number
101
    #undef minus_one
102
    #endif
103
104
    // replace "Expr" with "keep(*State) >> Expr"
105
    struct skip_primitives : proto::transform<skip_primitives>
106
    {
107
        template<typename Expr, typename State, typename Data>
108
        struct impl : proto::transform_impl<Expr, State, Data>
109
        {
110
            typedef
111
                typename proto::shift_right<
112
                    typename proto::unary_expr<
113
                        keeper_tag
114
                      , typename proto::dereference<State>::type
115
                    >::type
116
                  , Expr
117
                >::type
118
            result_type;
119
120
            result_type operator ()(
121
                typename impl::expr_param expr
122
              , typename impl::state_param state
123
              , typename impl::data_param
124
            ) const
125
            {
126
                result_type that = {{{state}}, expr};
127
                return that;
128
            }
129
        };
130
    };
131
132
    struct Primitives
133
      : proto::or_<
134
            proto::terminal<proto::_>
135
          , proto::comma<proto::_, proto::_>
136
          , proto::subscript<proto::terminal<set_initializer>, proto::_>
137
          , proto::assign<proto::terminal<set_initializer>, proto::_>
138
          , proto::assign<proto::terminal<attribute_placeholder<proto::_> >, proto::_>
139
          , proto::complement<Primitives>
140
        >
141
    {};
142
143
    struct SkipGrammar
144
      : proto::or_<
145
            proto::when<Primitives, skip_primitives>
146
          , proto::assign<proto::terminal<mark_placeholder>, SkipGrammar>   // don't "skip" mark tags
147
          , proto::subscript<SkipGrammar, proto::_>                         // don't put skips in actions
148
          , proto::binary_expr<modifier_tag, proto::_, SkipGrammar>         // don't skip modifiers
149
          , proto::unary_expr<lookbehind_tag, proto::_>                     // don't skip lookbehinds
150
          , proto::nary_expr<proto::_, proto::vararg<SkipGrammar> >         // everything else is fair game!
151
        >
152
    {};
153
154
    template<typename Skip>
155
    struct skip_directive
156
    {
157
        typedef typename proto::result_of::as_expr<Skip>::type skip_type;
158
159
        skip_directive(Skip const &skip)
160
          : skip_(proto::as_expr(skip))
161
        {}
162
163
        template<typename Sig>
164
        struct result {};
165
166
        template<typename This, typename Expr>
167
        struct result<This(Expr)>
168
        {
169
            typedef
170
                SkipGrammar::impl<
171
                    typename proto::result_of::as_expr<Expr>::type
172
                  , skip_type const &
173
                  , mpl::void_ &
174
                >
175
            skip_transform;
176
177
            typedef
178
                typename proto::shift_right<
179
                    typename skip_transform::result_type
180
                  , typename proto::dereference<skip_type>::type
181
                >::type
182
            type;
183
        };
184
185
        template<typename Expr>
186
        typename result<skip_directive(Expr)>::type
187
        operator ()(Expr const &expr) const
188
        {
189
            mpl::void_ ignore;
190
            typedef result<skip_directive(Expr)> result_fun;
191
            typename result_fun::type that = {
192
                typename result_fun::skip_transform()(proto::as_expr(expr), this->skip_, ignore)
193
              , {skip_}
194
            };
195
            return that;
196
        }
197
198
    private:
199
        skip_type skip_;
200
    };
201
202
/*
203
///////////////////////////////////////////////////////////////////////////////
204
/// INTERNAL ONLY
205
// BOOST_XPRESSIVE_GLOBAL
206
//  for defining globals that neither violate the One Definition Rule nor
207
//  lead to undefined behavior due to global object initialization order.
208
//#define BOOST_XPRESSIVE_GLOBAL(type, name, init)                                        \
209
//    namespace detail                                                                    \
210
//    {                                                                                   \
211
//        template<int Dummy>                                                             \
212
//        struct BOOST_PP_CAT(global_pod_, name)                                          \
213
//        {                                                                               \
214
//            static type const value;                                                    \
215
//        private:                                                                        \
216
//            union type_must_be_pod                                                      \
217
//            {                                                                           \
218
//                type t;                                                                 \
219
//                char ch;                                                                \
220
//            } u;                                                                        \
221
//        };                                                                              \
222
//        template<int Dummy>                                                             \
223
//        type const BOOST_PP_CAT(global_pod_, name)<Dummy>::value = init;                \
224
//    }                                                                                   \
225
//    type const &name = detail::BOOST_PP_CAT(global_pod_, name)<0>::value
226
*/
227
228
229
} // namespace detail
230
231
/// INTERNAL ONLY (for backwards compatibility)
232
unsigned int const repeat_max = UINT_MAX-1;
233
234
///////////////////////////////////////////////////////////////////////////////
235
/// \brief For infinite repetition of a sub-expression.
236
///
237
/// Magic value used with the repeat\<\>() function template
238
/// to specify an unbounded repeat. Use as: repeat<17, inf>('a').
239
/// The equivalent in perl is /a{17,}/.
240
unsigned int const inf = UINT_MAX-1;
241
242
/// INTERNAL ONLY (for backwards compatibility)
243
proto::terminal<detail::epsilon_matcher>::type const epsilon = {{}};
244
245
///////////////////////////////////////////////////////////////////////////////
246
/// \brief Successfully matches nothing.
247
///
248
/// Successfully matches a zero-width sequence. nil always succeeds and
249
/// never consumes any characters.
250
proto::terminal<detail::epsilon_matcher>::type const nil = {{}};
251
252
///////////////////////////////////////////////////////////////////////////////
253
/// \brief Matches an alpha-numeric character.
254
///
255
/// The regex traits are used to determine which characters are alpha-numeric.
256
/// To match any character that is not alpha-numeric, use ~alnum.
257
///
258
/// \attention alnum is equivalent to /[[:alnum:]]/ in perl. ~alnum is equivalent
259
/// to /[[:^alnum:]]/ in perl.
260
proto::terminal<detail::posix_charset_placeholder>::type const alnum = {{"alnum", false}};
261
262
///////////////////////////////////////////////////////////////////////////////
263
/// \brief Matches an alphabetic character.
264
///
265
/// The regex traits are used to determine which characters are alphabetic.
266
/// To match any character that is not alphabetic, use ~alpha.
267
///
268
/// \attention alpha is equivalent to /[[:alpha:]]/ in perl. ~alpha is equivalent
269
/// to /[[:^alpha:]]/ in perl.
270
proto::terminal<detail::posix_charset_placeholder>::type const alpha = {{"alpha", false}};
271
272
///////////////////////////////////////////////////////////////////////////////
273
/// \brief Matches a blank (horizonal white-space) character.
274
///
275
/// The regex traits are used to determine which characters are blank characters.
276
/// To match any character that is not blank, use ~blank.
277
///
278
/// \attention blank is equivalent to /[[:blank:]]/ in perl. ~blank is equivalent
279
/// to /[[:^blank:]]/ in perl.
280
proto::terminal<detail::posix_charset_placeholder>::type const blank = {{"blank", false}};
281
282
///////////////////////////////////////////////////////////////////////////////
283
/// \brief Matches a control character.
284
///
285
/// The regex traits are used to determine which characters are control characters.
286
/// To match any character that is not a control character, use ~cntrl.
287
///
288
/// \attention cntrl is equivalent to /[[:cntrl:]]/ in perl. ~cntrl is equivalent
289
/// to /[[:^cntrl:]]/ in perl.
290
proto::terminal<detail::posix_charset_placeholder>::type const cntrl = {{"cntrl", false}};
291
292
///////////////////////////////////////////////////////////////////////////////
293
/// \brief Matches a digit character.
294
///
295
/// The regex traits are used to determine which characters are digits.
296
/// To match any character that is not a digit, use ~digit.
297
///
298
/// \attention digit is equivalent to /[[:digit:]]/ in perl. ~digit is equivalent
299
/// to /[[:^digit:]]/ in perl.
300
proto::terminal<detail::posix_charset_placeholder>::type const digit = {{"digit", false}};
301
302
///////////////////////////////////////////////////////////////////////////////
303
/// \brief Matches a graph character.
304
///
305
/// The regex traits are used to determine which characters are graphable.
306
/// To match any character that is not graphable, use ~graph.
307
///
308
/// \attention graph is equivalent to /[[:graph:]]/ in perl. ~graph is equivalent
309
/// to /[[:^graph:]]/ in perl.
310
proto::terminal<detail::posix_charset_placeholder>::type const graph = {{"graph", false}};
311
312
///////////////////////////////////////////////////////////////////////////////
313
/// \brief Matches a lower-case character.
314
///
315
/// The regex traits are used to determine which characters are lower-case.
316
/// To match any character that is not a lower-case character, use ~lower.
317
///
318
/// \attention lower is equivalent to /[[:lower:]]/ in perl. ~lower is equivalent
319
/// to /[[:^lower:]]/ in perl.
320
proto::terminal<detail::posix_charset_placeholder>::type const lower = {{"lower", false}};
321
322
///////////////////////////////////////////////////////////////////////////////
323
/// \brief Matches a printable character.
324
///
325
/// The regex traits are used to determine which characters are printable.
326
/// To match any character that is not printable, use ~print.
327
///
328
/// \attention print is equivalent to /[[:print:]]/ in perl. ~print is equivalent
329
/// to /[[:^print:]]/ in perl.
330
proto::terminal<detail::posix_charset_placeholder>::type const print = {{"print", false}};
331
332
///////////////////////////////////////////////////////////////////////////////
333
/// \brief Matches a punctuation character.
334
///
335
/// The regex traits are used to determine which characters are punctuation.
336
/// To match any character that is not punctuation, use ~punct.
337
///
338
/// \attention punct is equivalent to /[[:punct:]]/ in perl. ~punct is equivalent
339
/// to /[[:^punct:]]/ in perl.
340
proto::terminal<detail::posix_charset_placeholder>::type const punct = {{"punct", false}};
341
342
///////////////////////////////////////////////////////////////////////////////
343
/// \brief Matches a space character.
344
///
345
/// The regex traits are used to determine which characters are space characters.
346
/// To match any character that is not white-space, use ~space.
347
///
348
/// \attention space is equivalent to /[[:space:]]/ in perl. ~space is equivalent
349
/// to /[[:^space:]]/ in perl.
350
proto::terminal<detail::posix_charset_placeholder>::type const space = {{"space", false}};
351
352
///////////////////////////////////////////////////////////////////////////////
353
/// \brief Matches an upper-case character.
354
///
355
/// The regex traits are used to determine which characters are upper-case.
356
/// To match any character that is not upper-case, use ~upper.
357
///
358
/// \attention upper is equivalent to /[[:upper:]]/ in perl. ~upper is equivalent
359
/// to /[[:^upper:]]/ in perl.
360
proto::terminal<detail::posix_charset_placeholder>::type const upper = {{"upper", false}};
361
362
///////////////////////////////////////////////////////////////////////////////
363
/// \brief Matches a hexadecimal digit character.
364
///
365
/// The regex traits are used to determine which characters are hex digits.
366
/// To match any character that is not a hex digit, use ~xdigit.
367
///
368
/// \attention xdigit is equivalent to /[[:xdigit:]]/ in perl. ~xdigit is equivalent
369
/// to /[[:^xdigit:]]/ in perl.
370
proto::terminal<detail::posix_charset_placeholder>::type const xdigit = {{"xdigit", false}};
371
372
///////////////////////////////////////////////////////////////////////////////
373
/// \brief Beginning of sequence assertion.
374
///
375
/// For the character sequence [begin, end), 'bos' matches the
376
/// zero-width sub-sequence [begin, begin).
377
proto::terminal<detail::assert_bos_matcher>::type const bos = {{}};
378
379
///////////////////////////////////////////////////////////////////////////////
380
/// \brief End of sequence assertion.
381
///
382
/// For the character sequence [begin, end),
383
/// 'eos' matches the zero-width sub-sequence [end, end).
384
///
385
/// \attention Unlike the perl end of sequence assertion \$, 'eos' will
386
/// not match at the position [end-1, end-1) if *(end-1) is '\\n'. To
387
/// get that behavior, use (!_n >> eos).
388
proto::terminal<detail::assert_eos_matcher>::type const eos = {{}};
389
390
///////////////////////////////////////////////////////////////////////////////
391
/// \brief Beginning of line assertion.
392
///
393
/// 'bol' matches the zero-width sub-sequence
394
/// immediately following a logical newline sequence. The regex traits
395
/// is used to determine what constitutes a logical newline sequence.
396
proto::terminal<detail::assert_bol_placeholder>::type const bol = {{}};
397
398
///////////////////////////////////////////////////////////////////////////////
399
/// \brief End of line assertion.
400
///
401
/// 'eol' matches the zero-width sub-sequence
402
/// immediately preceeding a logical newline sequence. The regex traits
403
/// is used to determine what constitutes a logical newline sequence.
404
proto::terminal<detail::assert_eol_placeholder>::type const eol = {{}};
405
406
///////////////////////////////////////////////////////////////////////////////
407
/// \brief Beginning of word assertion.
408
///
409
/// 'bow' matches the zero-width sub-sequence
410
/// immediately following a non-word character and preceeding a word character.
411
/// The regex traits are used to determine what constitutes a word character.
412
proto::terminal<detail::assert_word_begin>::type const bow = {{}};
413
414
///////////////////////////////////////////////////////////////////////////////
415
/// \brief End of word assertion.
416
///
417
/// 'eow' matches the zero-width sub-sequence
418
/// immediately following a word character and preceeding a non-word character.
419
/// The regex traits are used to determine what constitutes a word character.
420
proto::terminal<detail::assert_word_end>::type const eow = {{}};
421
422
///////////////////////////////////////////////////////////////////////////////
423
/// \brief Word boundary assertion.
424
///
425
/// '_b' matches the zero-width sub-sequence at the beginning or the end of a word.
426
/// It is equivalent to (bow | eow). The regex traits are used to determine what
427
/// constitutes a word character. To match a non-word boundary, use ~_b.
428
///
429
/// \attention _b is like \\b in perl. ~_b is like \\B in perl.
430
proto::terminal<detail::assert_word_boundary>::type const _b = {{}};
431
432
///////////////////////////////////////////////////////////////////////////////
433
/// \brief Matches a word character.
434
///
435
/// '_w' matches a single word character. The regex traits are used to determine which
436
/// characters are word characters. Use ~_w to match a character that is not a word
437
/// character.
438
///
439
/// \attention _w is like \\w in perl. ~_w is like \\W in perl.
440
proto::terminal<detail::posix_charset_placeholder>::type const _w = {{"w", false}};
441
442
///////////////////////////////////////////////////////////////////////////////
443
/// \brief Matches a digit character.
444
///
445
/// '_d' matches a single digit character. The regex traits are used to determine which
446
/// characters are digits. Use ~_d to match a character that is not a digit
447
/// character.
448
///
449
/// \attention _d is like \\d in perl. ~_d is like \\D in perl.
450
proto::terminal<detail::posix_charset_placeholder>::type const _d = {{"d", false}};
451
452
///////////////////////////////////////////////////////////////////////////////
453
/// \brief Matches a space character.
454
///
455
/// '_s' matches a single space character. The regex traits are used to determine which
456
/// characters are space characters. Use ~_s to match a character that is not a space
457
/// character.
458
///
459
/// \attention _s is like \\s in perl. ~_s is like \\S in perl.
460
proto::terminal<detail::posix_charset_placeholder>::type const _s = {{"s", false}};
461
462
///////////////////////////////////////////////////////////////////////////////
463
/// \brief Matches a literal newline character, '\\n'.
464
///
465
/// '_n' matches a single newline character, '\\n'. Use ~_n to match a character
466
/// that is not a newline.
467
///
468
/// \attention ~_n is like '.' in perl without the /s modifier.
469
proto::terminal<char>::type const _n = {'\n'};
470
471
///////////////////////////////////////////////////////////////////////////////
472
/// \brief Matches a logical newline sequence.
473
///
474
/// '_ln' matches a logical newline sequence. This can be any character in the
475
/// line separator class, as determined by the regex traits, or the '\\r\\n' sequence.
476
/// For the purpose of back-tracking, '\\r\\n' is treated as a unit.
477
/// To match any one character that is not a logical newline, use ~_ln.
478
detail::logical_newline_xpression const _ln = {{}};
479
480
///////////////////////////////////////////////////////////////////////////////
481
/// \brief Matches any one character.
482
///
483
/// Match any character, similar to '.' in perl syntax with the /s modifier.
484
/// '_' matches any one character, including the newline.
485
///
486
/// \attention To match any character except the newline, use ~_n
487
proto::terminal<detail::any_matcher>::type const _ = {{}};
488
489
///////////////////////////////////////////////////////////////////////////////
490
/// \brief Reference to the current regex object
491
///
492
/// Useful when constructing recursive regular expression objects. The 'self'
493
/// identifier is a short-hand for the current regex object. For instance,
494
/// sregex rx = '(' >> (self | nil) >> ')'; will create a regex object that
495
/// matches balanced parens such as "((()))".
496
proto::terminal<detail::self_placeholder>::type const self = {{}};
497
498
///////////////////////////////////////////////////////////////////////////////
499
/// \brief Used to create character sets.
500
///
501
/// There are two ways to create character sets with the 'set' identifier. The
502
/// easiest is to create a comma-separated list of the characters in the set,
503
/// as in (set= 'a','b','c'). This set will match 'a', 'b', or 'c'. The other
504
/// way is to define the set as an argument to the set subscript operator.
505
/// For instance, set[ 'a' | range('b','c') | digit ] will match an 'a', 'b',
506
/// 'c' or a digit character.
507
///
508
/// To complement a set, apply the '~' operator. For instance, ~(set= 'a','b','c')
509
/// will match any character that is not an 'a', 'b', or 'c'.
510
///
511
/// Sets can be composed of other, possibly complemented, sets. For instance,
512
/// set[ ~digit | ~(set= 'a','b','c') ].
513
detail::set_initializer_type const set = {{}};
514
515
///////////////////////////////////////////////////////////////////////////////
516
/// \brief Sub-match placeholder type, used to create named captures in
517
/// static regexes.
518
///
519
/// \c mark_tag is the type of the global sub-match placeholders \c s0, \c s1, etc.. You
520
/// can use the \c mark_tag type to create your own sub-match placeholders with
521
/// more meaningful names. This is roughly equivalent to the "named capture"
522
/// feature of dynamic regular expressions.
523
///
524
/// To create a named sub-match placeholder, initialize it with a unique integer.
525
/// The integer must only be unique within the regex in which the placeholder
526
/// is used. Then you can use it within static regexes to created sub-matches
527
/// by assigning a sub-expression to it, or to refer back to already created
528
/// sub-matches.
529
/// 
530
/// \code
531
/// mark_tag number(1); // "number" is now equivalent to "s1"
532
/// // Match a number, followed by a space and the same number again
533
/// sregex rx = (number = +_d) >> ' ' >> number;
534
/// \endcode
535
///
536
/// After a successful \c regex_match() or \c regex_search(), the sub-match placeholder
537
/// can be used to index into the <tt>match_results\<\></tt> object to retrieve the
538
/// corresponding sub-match.
539
struct mark_tag
540
  : proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain>
541
{
542
private:
543
    typedef proto::extends<detail::basic_mark_tag, mark_tag, detail::regex_domain> base_type;
544
545
    static detail::basic_mark_tag make_tag(int mark_nbr)
546
0
    {
547
0
        detail::basic_mark_tag mark = {{mark_nbr}};
548
0
        return mark;
549
0
    }
550
551
public:
552
    /// \brief Initialize a mark_tag placeholder
553
    /// \param mark_nbr An integer that uniquely identifies this \c mark_tag
554
    /// within the static regexes in which this \c mark_tag will be used.
555
    /// \pre <tt>mark_nbr \> 0</tt>
556
    mark_tag(int mark_nbr)
557
      : base_type(mark_tag::make_tag(mark_nbr))
558
0
    {
559
0
        // Marks numbers must be integers greater than 0.
560
0
        BOOST_ASSERT(mark_nbr > 0);
561
0
    }
562
563
    /// INTERNAL ONLY
564
    operator detail::basic_mark_tag const &() const
565
0
    {
566
0
        return this->proto_base();
567
0
    }
568
569
    BOOST_PROTO_EXTENDS_USING_ASSIGN_NON_DEPENDENT(mark_tag)
570
};
571
572
// This macro is used when declaring mark_tags that are global because
573
// it guarantees that they are statically initialized. That avoids
574
// order-of-initialization bugs. In user code, the simpler: mark_tag s0(0);
575
// would be preferable.
576
/// INTERNAL ONLY
577
#define BOOST_XPRESSIVE_GLOBAL_MARK_TAG(NAME, VALUE)                            \
578
    boost::xpressive::mark_tag::proto_base_expr const NAME = {{VALUE}}          \
579
    /**/
580
581
///////////////////////////////////////////////////////////////////////////////
582
/// \brief Sub-match placeholder, like $& in Perl
583
BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s0, 0);
584
585
///////////////////////////////////////////////////////////////////////////////
586
/// \brief Sub-match placeholder, like $1 in perl.
587
///
588
/// To create a sub-match, assign a sub-expression to the sub-match placeholder.
589
/// For instance, (s1= _) will match any one character and remember which
590
/// character was matched in the 1st sub-match. Later in the pattern, you can
591
/// refer back to the sub-match. For instance,  (s1= _) >> s1  will match any
592
/// character, and then match the same character again.
593
///
594
/// After a successful regex_match() or regex_search(), the sub-match placeholders
595
/// can be used to index into the match_results\<\> object to retrieve the Nth
596
/// sub-match.
597
BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s1, 1);
598
BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s2, 2);
599
BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s3, 3);
600
BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s4, 4);
601
BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s5, 5);
602
BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s6, 6);
603
BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s7, 7);
604
BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s8, 8);
605
BOOST_XPRESSIVE_GLOBAL_MARK_TAG(s9, 9);
606
607
// NOTE: For the purpose of xpressive's documentation, make icase() look like an
608
// ordinary function. In reality, it is a function object defined in detail/icase.hpp
609
// so that it can serve double-duty as regex_constants::icase, the syntax_option_type.
610
#ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED
611
///////////////////////////////////////////////////////////////////////////////
612
/// \brief Makes a sub-expression case-insensitive.
613
///
614
/// Use icase() to make a sub-expression case-insensitive. For instance,
615
/// "foo" >> icase(set['b'] >> "ar") will match "foo" exactly followed by
616
/// "bar" irrespective of case.
617
template<typename Expr> detail::unspecified icase(Expr const &expr) { return 0; }
618
#endif
619
620
///////////////////////////////////////////////////////////////////////////////
621
/// \brief Makes a literal into a regular expression.
622
///
623
/// Use as_xpr() to turn a literal into a regular expression. For instance,
624
/// "foo" >> "bar" will not compile because both operands to the right-shift
625
/// operator are const char*, and no such operator exists. Use as_xpr("foo") >> "bar"
626
/// instead.
627
///
628
/// You can use as_xpr() with character literals in addition to string literals.
629
/// For instance, as_xpr('a') will match an 'a'. You can also complement a
630
/// character literal, as with ~as_xpr('a'). This will match any one character
631
/// that is not an 'a'.
632
#ifdef BOOST_XPRESSIVE_DOXYGEN_INVOKED
633
template<typename Literal> detail::unspecified as_xpr(Literal const &literal) { return 0; }
634
#else
635
proto::functional::as_expr<> const as_xpr = {};
636
#endif
637
638
///////////////////////////////////////////////////////////////////////////////
639
/// \brief Embed a regex object by reference.
640
///
641
/// \param rex The basic_regex object to embed by reference.
642
template<typename BidiIter>
643
inline typename proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type const
644
by_ref(basic_regex<BidiIter> const &rex)
645
{
646
    reference_wrapper<basic_regex<BidiIter> const> ref(rex);
647
    return proto::terminal<reference_wrapper<basic_regex<BidiIter> const> >::type::make(ref);
648
}
649
650
///////////////////////////////////////////////////////////////////////////////
651
/// \brief Match a range of characters.
652
///
653
/// Match any character in the range [ch_min, ch_max].
654
///
655
/// \param ch_min The lower end of the range to match.
656
/// \param ch_max The upper end of the range to match.
657
template<typename Char>
658
inline typename proto::terminal<detail::range_placeholder<Char> >::type const
659
range(Char ch_min, Char ch_max)
660
{
661
    detail::range_placeholder<Char> that = {ch_min, ch_max, false};
662
    return proto::terminal<detail::range_placeholder<Char> >::type::make(that);
663
}
664
665
///////////////////////////////////////////////////////////////////////////////
666
/// \brief Make a sub-expression optional. Equivalent to !as_xpr(expr).
667
///
668
/// \param expr The sub-expression to make optional.
669
template<typename Expr>
670
typename proto::result_of::make_expr<
671
    proto::tag::logical_not
672
  , proto::default_domain
673
  , Expr const &
674
>::type const
675
optional(Expr const &expr)
676
{
677
    return proto::make_expr<
678
        proto::tag::logical_not
679
      , proto::default_domain
680
    >(boost::ref(expr));
681
}
682
683
///////////////////////////////////////////////////////////////////////////////
684
/// \brief Repeat a sub-expression multiple times.
685
///
686
/// There are two forms of the repeat\<\>() function template. To match a
687
/// sub-expression N times, use repeat\<N\>(expr). To match a sub-expression
688
/// from M to N times, use repeat\<M,N\>(expr).
689
///
690
/// The repeat\<\>() function creates a greedy quantifier. To make the quantifier
691
/// non-greedy, apply the unary minus operator, as in -repeat\<M,N\>(expr).
692
///
693
/// \param expr The sub-expression to repeat.
694
template<unsigned int Min, unsigned int Max, typename Expr>
695
typename proto::result_of::make_expr<
696
    detail::generic_quant_tag<Min, Max>
697
  , proto::default_domain
698
  , Expr const &
699
>::type const
700
repeat(Expr const &expr)
701
{
702
    return proto::make_expr<
703
        detail::generic_quant_tag<Min, Max>
704
      , proto::default_domain
705
    >(boost::ref(expr));
706
}
707
708
/// \overload
709
///
710
template<unsigned int Count, typename Expr2>
711
typename proto::result_of::make_expr<
712
    detail::generic_quant_tag<Count, Count>
713
  , proto::default_domain
714
  , Expr2 const &
715
>::type const
716
repeat(Expr2 const &expr2)
717
{
718
    return proto::make_expr<
719
        detail::generic_quant_tag<Count, Count>
720
      , proto::default_domain
721
    >(boost::ref(expr2));
722
}
723
724
///////////////////////////////////////////////////////////////////////////////
725
/// \brief Create an independent sub-expression.
726
///
727
/// Turn off back-tracking for a sub-expression. Any branches or repeats within
728
/// the sub-expression will match only one way, and no other alternatives are
729
/// tried.
730
///
731
/// \attention keep(expr) is equivalent to the perl (?>...) extension.
732
///
733
/// \param expr The sub-expression to modify.
734
template<typename Expr>
735
typename proto::result_of::make_expr<
736
    detail::keeper_tag
737
  , proto::default_domain
738
  , Expr const &
739
>::type const
740
keep(Expr const &expr)
741
{
742
    return proto::make_expr<
743
        detail::keeper_tag
744
      , proto::default_domain
745
    >(boost::ref(expr));
746
}
747
748
///////////////////////////////////////////////////////////////////////////////
749
/// \brief Look-ahead assertion.
750
///
751
/// before(expr) succeeds if the expr sub-expression would match at the current
752
/// position in the sequence, but expr is not included in the match. For instance,
753
/// before("foo") succeeds if we are before a "foo". Look-ahead assertions can be
754
/// negated with the bit-compliment operator.
755
///
756
/// \attention before(expr) is equivalent to the perl (?=...) extension.
757
/// ~before(expr) is a negative look-ahead assertion, equivalent to the
758
/// perl (?!...) extension.
759
///
760
/// \param expr The sub-expression to put in the look-ahead assertion.
761
template<typename Expr>
762
typename proto::result_of::make_expr<
763
    detail::lookahead_tag
764
  , proto::default_domain
765
  , Expr const &
766
>::type const
767
before(Expr const &expr)
768
{
769
    return proto::make_expr<
770
        detail::lookahead_tag
771
      , proto::default_domain
772
    >(boost::ref(expr));
773
}
774
775
///////////////////////////////////////////////////////////////////////////////
776
/// \brief Look-behind assertion.
777
///
778
/// after(expr) succeeds if the expr sub-expression would match at the current
779
/// position minus N in the sequence, where N is the width of expr. expr is not included in
780
/// the match. For instance,  after("foo") succeeds if we are after a "foo". Look-behind
781
/// assertions can be negated with the bit-complement operator.
782
///
783
/// \attention after(expr) is equivalent to the perl (?<=...) extension.
784
/// ~after(expr) is a negative look-behind assertion, equivalent to the
785
/// perl (?<!...) extension.
786
///
787
/// \param expr The sub-expression to put in the look-ahead assertion.
788
///
789
/// \pre expr cannot match a variable number of characters.
790
template<typename Expr>
791
typename proto::result_of::make_expr<
792
    detail::lookbehind_tag
793
  , proto::default_domain
794
  , Expr const &
795
>::type const
796
after(Expr const &expr)
797
{
798
    return proto::make_expr<
799
        detail::lookbehind_tag
800
      , proto::default_domain
801
    >(boost::ref(expr));
802
}
803
804
///////////////////////////////////////////////////////////////////////////////
805
/// \brief Specify a regex traits or a std::locale.
806
///
807
/// imbue() instructs the regex engine to use the specified traits or locale
808
/// when matching the regex. The entire expression must use the same traits/locale.
809
/// For instance, the following specifies a locale for use with a regex:
810
///   std::locale loc;
811
///   sregex rx = imbue(loc)(+digit);
812
///
813
/// \param loc The std::locale or regex traits object.
814
template<typename Locale>
815
inline detail::modifier_op<detail::locale_modifier<Locale> > const
816
imbue(Locale const &loc)
817
{
818
    detail::modifier_op<detail::locale_modifier<Locale> > mod =
819
    {
820
        detail::locale_modifier<Locale>(loc)
821
      , regex_constants::ECMAScript
822
    };
823
    return mod;
824
}
825
826
proto::terminal<detail::attribute_placeholder<mpl::int_<1> > >::type const a1 = {{}};
827
proto::terminal<detail::attribute_placeholder<mpl::int_<2> > >::type const a2 = {{}};
828
proto::terminal<detail::attribute_placeholder<mpl::int_<3> > >::type const a3 = {{}};
829
proto::terminal<detail::attribute_placeholder<mpl::int_<4> > >::type const a4 = {{}};
830
proto::terminal<detail::attribute_placeholder<mpl::int_<5> > >::type const a5 = {{}};
831
proto::terminal<detail::attribute_placeholder<mpl::int_<6> > >::type const a6 = {{}};
832
proto::terminal<detail::attribute_placeholder<mpl::int_<7> > >::type const a7 = {{}};
833
proto::terminal<detail::attribute_placeholder<mpl::int_<8> > >::type const a8 = {{}};
834
proto::terminal<detail::attribute_placeholder<mpl::int_<9> > >::type const a9 = {{}};
835
836
///////////////////////////////////////////////////////////////////////////////
837
/// \brief Specify which characters to skip when matching a regex.
838
///
839
/// <tt>skip()</tt> instructs the regex engine to skip certain characters when matching
840
/// a regex. It is most useful for writing regexes that ignore whitespace.
841
/// For instance, the following specifies a regex that skips whitespace and
842
/// punctuation:
843
///
844
/// \code
845
/// // A sentence is one or more words separated by whitespace
846
/// // and punctuation.
847
/// sregex word = +alpha;
848
/// sregex sentence = skip(set[_s | punct])( +word );
849
/// \endcode
850
///
851
/// The way it works in the above example is to insert
852
/// <tt>keep(*set[_s | punct])</tt> before each primitive within the regex.
853
/// A "primitive" includes terminals like strings, character sets and nested
854
/// regexes. A final <tt>*set[_s | punct]</tt> is added to the end of the
855
/// regex. The regex <tt>sentence</tt> specified above is equivalent to
856
/// the following:
857
///
858
/// \code
859
/// sregex sentence = +( keep(*set[_s | punct]) >> word )
860
///                        >> *set[_s | punct];
861
/// \endcode
862
///
863
/// \attention Skipping does not affect how nested regexes are handled because
864
/// they are treated atomically. String literals are also treated
865
/// atomically; that is, no skipping is done within a string literal. So
866
/// <tt>skip(_s)("this that")</tt> is not the same as
867
/// <tt>skip(_s)("this" >> as_xpr("that"))</tt>. The first will only match
868
/// when there is only one space between "this" and "that". The second will
869
/// skip any and all whitespace between "this" and "that".
870
///
871
/// \param skip A regex that specifies which characters to skip.
872
template<typename Skip>
873
detail::skip_directive<Skip> skip(Skip const &skip)
874
{
875
    return detail::skip_directive<Skip>(skip);
876
}
877
878
namespace detail
879
{
880
    inline void ignore_unused_regex_primitives()
881
0
    {
882
0
        detail::ignore_unused(repeat_max);
883
0
        detail::ignore_unused(inf);
884
0
        detail::ignore_unused(epsilon);
885
0
        detail::ignore_unused(nil);
886
0
        detail::ignore_unused(alnum);
887
0
        detail::ignore_unused(bos);
888
0
        detail::ignore_unused(eos);
889
0
        detail::ignore_unused(bol);
890
0
        detail::ignore_unused(eol);
891
0
        detail::ignore_unused(bow);
892
0
        detail::ignore_unused(eow);
893
0
        detail::ignore_unused(_b);
894
0
        detail::ignore_unused(_w);
895
0
        detail::ignore_unused(_d);
896
0
        detail::ignore_unused(_s);
897
0
        detail::ignore_unused(_n);
898
0
        detail::ignore_unused(_ln);
899
0
        detail::ignore_unused(_);
900
0
        detail::ignore_unused(self);
901
0
        detail::ignore_unused(set);
902
0
        detail::ignore_unused(s0);
903
0
        detail::ignore_unused(s1);
904
0
        detail::ignore_unused(s2);
905
0
        detail::ignore_unused(s3);
906
0
        detail::ignore_unused(s4);
907
0
        detail::ignore_unused(s5);
908
0
        detail::ignore_unused(s6);
909
0
        detail::ignore_unused(s7);
910
0
        detail::ignore_unused(s8);
911
0
        detail::ignore_unused(s9);
912
0
        detail::ignore_unused(a1);
913
0
        detail::ignore_unused(a2);
914
0
        detail::ignore_unused(a3);
915
0
        detail::ignore_unused(a4);
916
0
        detail::ignore_unused(a5);
917
0
        detail::ignore_unused(a6);
918
0
        detail::ignore_unused(a7);
919
0
        detail::ignore_unused(a8);
920
0
        detail::ignore_unused(a9);
921
0
        detail::ignore_unused(as_xpr);
922
0
    }
923
}
924
925
}} // namespace boost::xpressive
926
927
#endif