regex_syntax/ast/
parse.rs

1/*!
2This module provides a regular expression parser.
3*/
4
5use std::borrow::Borrow;
6use std::cell::{Cell, RefCell};
7use std::mem;
8use std::result;
9
10use crate::ast::{self, Ast, Position, Span};
11use crate::either::Either;
12
13use crate::is_meta_character;
14
15type Result<T> = result::Result<T, ast::Error>;
16
17/// A primitive is an expression with no sub-expressions. This includes
18/// literals, assertions and non-set character classes. This representation
19/// is used as intermediate state in the parser.
20///
21/// This does not include ASCII character classes, since they can only appear
22/// within a set character class.
23#[derive(Clone, Debug, Eq, PartialEq)]
24enum Primitive {
25    Literal(ast::Literal),
26    Assertion(ast::Assertion),
27    Dot(Span),
28    Perl(ast::ClassPerl),
29    Unicode(ast::ClassUnicode),
30}
31
32impl Primitive {
33    /// Return the span of this primitive.
34    fn span(&self) -> &Span {
35        match *self {
36            Primitive::Literal(ref x) => &x.span,
37            Primitive::Assertion(ref x) => &x.span,
38            Primitive::Dot(ref span) => span,
39            Primitive::Perl(ref x) => &x.span,
40            Primitive::Unicode(ref x) => &x.span,
41        }
42    }
43
44    /// Convert this primitive into a proper AST.
45    fn into_ast(self) -> Ast {
46        match self {
47            Primitive::Literal(lit) => Ast::Literal(lit),
48            Primitive::Assertion(assert) => Ast::Assertion(assert),
49            Primitive::Dot(span) => Ast::Dot(span),
50            Primitive::Perl(cls) => Ast::Class(ast::Class::Perl(cls)),
51            Primitive::Unicode(cls) => Ast::Class(ast::Class::Unicode(cls)),
52        }
53    }
54
55    /// Convert this primitive into an item in a character class.
56    ///
57    /// If this primitive is not a legal item (i.e., an assertion or a dot),
58    /// then return an error.
59    fn into_class_set_item<P: Borrow<Parser>>(
60        self,
61        p: &ParserI<'_, P>,
62    ) -> Result<ast::ClassSetItem> {
63        use self::Primitive::*;
64        use crate::ast::ClassSetItem;
65
66        match self {
67            Literal(lit) => Ok(ClassSetItem::Literal(lit)),
68            Perl(cls) => Ok(ClassSetItem::Perl(cls)),
69            Unicode(cls) => Ok(ClassSetItem::Unicode(cls)),
70            x => Err(p.error(*x.span(), ast::ErrorKind::ClassEscapeInvalid)),
71        }
72    }
73
74    /// Convert this primitive into a literal in a character class. In
75    /// particular, literals are the only valid items that can appear in
76    /// ranges.
77    ///
78    /// If this primitive is not a legal item (i.e., a class, assertion or a
79    /// dot), then return an error.
80    fn into_class_literal<P: Borrow<Parser>>(
81        self,
82        p: &ParserI<'_, P>,
83    ) -> Result<ast::Literal> {
84        use self::Primitive::*;
85
86        match self {
87            Literal(lit) => Ok(lit),
88            x => Err(p.error(*x.span(), ast::ErrorKind::ClassRangeLiteral)),
89        }
90    }
91}
92
93/// Returns true if the given character is a hexadecimal digit.
94fn is_hex(c: char) -> bool {
95    ('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')
96}
97
98/// Returns true if the given character is a valid in a capture group name.
99///
100/// If `first` is true, then `c` is treated as the first character in the
101/// group name (which must be alphabetic or underscore).
102fn is_capture_char(c: char, first: bool) -> bool {
103    c == '_'
104        || (!first
105            && (('0' <= c && c <= '9') || c == '.' || c == '[' || c == ']'))
106        || ('A' <= c && c <= 'Z')
107        || ('a' <= c && c <= 'z')
108}
109
110/// A builder for a regular expression parser.
111///
112/// This builder permits modifying configuration options for the parser.
113#[derive(Clone, Debug)]
114pub struct ParserBuilder {
115    ignore_whitespace: bool,
116    nest_limit: u32,
117    octal: bool,
118}
119
120impl Default for ParserBuilder {
121    fn default() -> ParserBuilder {
122        ParserBuilder::new()
123    }
124}
125
126impl ParserBuilder {
127    /// Create a new parser builder with a default configuration.
128    pub fn new() -> ParserBuilder {
129        ParserBuilder {
130            ignore_whitespace: false,
131            nest_limit: 250,
132            octal: false,
133        }
134    }
135
136    /// Build a parser from this configuration with the given pattern.
137    pub fn build(&self) -> Parser {
138        Parser {
139            pos: Cell::new(Position { offset: 0, line: 1, column: 1 }),
140            capture_index: Cell::new(0),
141            nest_limit: self.nest_limit,
142            octal: self.octal,
143            initial_ignore_whitespace: self.ignore_whitespace,
144            ignore_whitespace: Cell::new(self.ignore_whitespace),
145            comments: RefCell::new(vec![]),
146            stack_group: RefCell::new(vec![]),
147            stack_class: RefCell::new(vec![]),
148            capture_names: RefCell::new(vec![]),
149            scratch: RefCell::new(String::new()),
150        }
151    }
152
153    /// Set the nesting limit for this parser.
154    ///
155    /// The nesting limit controls how deep the abstract syntax tree is allowed
156    /// to be. If the AST exceeds the given limit (e.g., with too many nested
157    /// groups), then an error is returned by the parser.
158    ///
159    /// The purpose of this limit is to act as a heuristic to prevent stack
160    /// overflow for consumers that do structural induction on an `Ast` using
161    /// explicit recursion. While this crate never does this (instead using
162    /// constant stack space and moving the call stack to the heap), other
163    /// crates may.
164    ///
165    /// This limit is not checked until the entire Ast is parsed. Therefore,
166    /// if callers want to put a limit on the amount of heap space used, then
167    /// they should impose a limit on the length, in bytes, of the concrete
168    /// pattern string. In particular, this is viable since this parser
169    /// implementation will limit itself to heap space proportional to the
170    /// length of the pattern string.
171    ///
172    /// Note that a nest limit of `0` will return a nest limit error for most
173    /// patterns but not all. For example, a nest limit of `0` permits `a` but
174    /// not `ab`, since `ab` requires a concatenation, which results in a nest
175    /// depth of `1`. In general, a nest limit is not something that manifests
176    /// in an obvious way in the concrete syntax, therefore, it should not be
177    /// used in a granular way.
178    pub fn nest_limit(&mut self, limit: u32) -> &mut ParserBuilder {
179        self.nest_limit = limit;
180        self
181    }
182
183    /// Whether to support octal syntax or not.
184    ///
185    /// Octal syntax is a little-known way of uttering Unicode codepoints in
186    /// a regular expression. For example, `a`, `\x61`, `\u0061` and
187    /// `\141` are all equivalent regular expressions, where the last example
188    /// shows octal syntax.
189    ///
190    /// While supporting octal syntax isn't in and of itself a problem, it does
191    /// make good error messages harder. That is, in PCRE based regex engines,
192    /// syntax like `\0` invokes a backreference, which is explicitly
193    /// unsupported in Rust's regex engine. However, many users expect it to
194    /// be supported. Therefore, when octal support is disabled, the error
195    /// message will explicitly mention that backreferences aren't supported.
196    ///
197    /// Octal syntax is disabled by default.
198    pub fn octal(&mut self, yes: bool) -> &mut ParserBuilder {
199        self.octal = yes;
200        self
201    }
202
203    /// Enable verbose mode in the regular expression.
204    ///
205    /// When enabled, verbose mode permits insignificant whitespace in many
206    /// places in the regular expression, as well as comments. Comments are
207    /// started using `#` and continue until the end of the line.
208    ///
209    /// By default, this is disabled. It may be selectively enabled in the
210    /// regular expression by using the `x` flag regardless of this setting.
211    pub fn ignore_whitespace(&mut self, yes: bool) -> &mut ParserBuilder {
212        self.ignore_whitespace = yes;
213        self
214    }
215}
216
217/// A regular expression parser.
218///
219/// This parses a string representation of a regular expression into an
220/// abstract syntax tree. The size of the tree is proportional to the length
221/// of the regular expression pattern.
222///
223/// A `Parser` can be configured in more detail via a
224/// [`ParserBuilder`](struct.ParserBuilder.html).
225#[derive(Clone, Debug)]
226pub struct Parser {
227    /// The current position of the parser.
228    pos: Cell<Position>,
229    /// The current capture index.
230    capture_index: Cell<u32>,
231    /// The maximum number of open parens/brackets allowed. If the parser
232    /// exceeds this number, then an error is returned.
233    nest_limit: u32,
234    /// Whether to support octal syntax or not. When `false`, the parser will
235    /// return an error helpfully pointing out that backreferences are not
236    /// supported.
237    octal: bool,
238    /// The initial setting for `ignore_whitespace` as provided by
239    /// `ParserBuilder`. It is used when resetting the parser's state.
240    initial_ignore_whitespace: bool,
241    /// Whether whitespace should be ignored. When enabled, comments are
242    /// also permitted.
243    ignore_whitespace: Cell<bool>,
244    /// A list of comments, in order of appearance.
245    comments: RefCell<Vec<ast::Comment>>,
246    /// A stack of grouped sub-expressions, including alternations.
247    stack_group: RefCell<Vec<GroupState>>,
248    /// A stack of nested character classes. This is only non-empty when
249    /// parsing a class.
250    stack_class: RefCell<Vec<ClassState>>,
251    /// A sorted sequence of capture names. This is used to detect duplicate
252    /// capture names and report an error if one is detected.
253    capture_names: RefCell<Vec<ast::CaptureName>>,
254    /// A scratch buffer used in various places. Mostly this is used to
255    /// accumulate relevant characters from parts of a pattern.
256    scratch: RefCell<String>,
257}
258
259/// ParserI is the internal parser implementation.
260///
261/// We use this separate type so that we can carry the provided pattern string
262/// along with us. In particular, a `Parser` internal state is not tied to any
263/// one pattern, but `ParserI` is.
264///
265/// This type also lets us use `ParserI<&Parser>` in production code while
266/// retaining the convenience of `ParserI<Parser>` for tests, which sometimes
267/// work against the internal interface of the parser.
268#[derive(Clone, Debug)]
269struct ParserI<'s, P> {
270    /// The parser state/configuration.
271    parser: P,
272    /// The full regular expression provided by the user.
273    pattern: &'s str,
274}
275
276/// GroupState represents a single stack frame while parsing nested groups
277/// and alternations. Each frame records the state up to an opening parenthesis
278/// or a alternating bracket `|`.
279#[derive(Clone, Debug)]
280enum GroupState {
281    /// This state is pushed whenever an opening group is found.
282    Group {
283        /// The concatenation immediately preceding the opening group.
284        concat: ast::Concat,
285        /// The group that has been opened. Its sub-AST is always empty.
286        group: ast::Group,
287        /// Whether this group has the `x` flag enabled or not.
288        ignore_whitespace: bool,
289    },
290    /// This state is pushed whenever a new alternation branch is found. If
291    /// an alternation branch is found and this state is at the top of the
292    /// stack, then this state should be modified to include the new
293    /// alternation.
294    Alternation(ast::Alternation),
295}
296
297/// ClassState represents a single stack frame while parsing character classes.
298/// Each frame records the state up to an intersection, difference, symmetric
299/// difference or nested class.
300///
301/// Note that a parser's character class stack is only non-empty when parsing
302/// a character class. In all other cases, it is empty.
303#[derive(Clone, Debug)]
304enum ClassState {
305    /// This state is pushed whenever an opening bracket is found.
306    Open {
307        /// The union of class items immediately preceding this class.
308        union: ast::ClassSetUnion,
309        /// The class that has been opened. Typically this just corresponds
310        /// to the `[`, but it can also include `[^` since `^` indicates
311        /// negation of the class.
312        set: ast::ClassBracketed,
313    },
314    /// This state is pushed when a operator is seen. When popped, the stored
315    /// set becomes the left hand side of the operator.
316    Op {
317        /// The type of the operation, i.e., &&, -- or ~~.
318        kind: ast::ClassSetBinaryOpKind,
319        /// The left-hand side of the operator.
320        lhs: ast::ClassSet,
321    },
322}
323
324impl Parser {
325    /// Create a new parser with a default configuration.
326    ///
327    /// The parser can be run with either the `parse` or `parse_with_comments`
328    /// methods. The parse methods return an abstract syntax tree.
329    ///
330    /// To set configuration options on the parser, use
331    /// [`ParserBuilder`](struct.ParserBuilder.html).
332    pub fn new() -> Parser {
333        ParserBuilder::new().build()
334    }
335
336    /// Parse the regular expression into an abstract syntax tree.
337    pub fn parse(&mut self, pattern: &str) -> Result<Ast> {
338        ParserI::new(self, pattern).parse()
339    }
340
341    /// Parse the regular expression and return an abstract syntax tree with
342    /// all of the comments found in the pattern.
343    pub fn parse_with_comments(
344        &mut self,
345        pattern: &str,
346    ) -> Result<ast::WithComments> {
347        ParserI::new(self, pattern).parse_with_comments()
348    }
349
350    /// Reset the internal state of a parser.
351    ///
352    /// This is called at the beginning of every parse. This prevents the
353    /// parser from running with inconsistent state (say, if a previous
354    /// invocation returned an error and the parser is reused).
355    fn reset(&self) {
356        // These settings should be in line with the construction
357        // in `ParserBuilder::build`.
358        self.pos.set(Position { offset: 0, line: 1, column: 1 });
359        self.ignore_whitespace.set(self.initial_ignore_whitespace);
360        self.comments.borrow_mut().clear();
361        self.stack_group.borrow_mut().clear();
362        self.stack_class.borrow_mut().clear();
363    }
364}
365
366impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
367    /// Build an internal parser from a parser configuration and a pattern.
368    fn new(parser: P, pattern: &'s str) -> ParserI<'s, P> {
369        ParserI { parser, pattern }
370    }
371
372    /// Return a reference to the parser state.
373    fn parser(&self) -> &Parser {
374        self.parser.borrow()
375    }
376
377    /// Return a reference to the pattern being parsed.
378    fn pattern(&self) -> &str {
379        self.pattern.borrow()
380    }
381
382    /// Create a new error with the given span and error type.
383    fn error(&self, span: Span, kind: ast::ErrorKind) -> ast::Error {
384        ast::Error { kind, pattern: self.pattern().to_string(), span }
385    }
386
387    /// Return the current offset of the parser.
388    ///
389    /// The offset starts at `0` from the beginning of the regular expression
390    /// pattern string.
391    fn offset(&self) -> usize {
392        self.parser().pos.get().offset
393    }
394
395    /// Return the current line number of the parser.
396    ///
397    /// The line number starts at `1`.
398    fn line(&self) -> usize {
399        self.parser().pos.get().line
400    }
401
402    /// Return the current column of the parser.
403    ///
404    /// The column number starts at `1` and is reset whenever a `\n` is seen.
405    fn column(&self) -> usize {
406        self.parser().pos.get().column
407    }
408
409    /// Return the next capturing index. Each subsequent call increments the
410    /// internal index.
411    ///
412    /// The span given should correspond to the location of the opening
413    /// parenthesis.
414    ///
415    /// If the capture limit is exceeded, then an error is returned.
416    fn next_capture_index(&self, span: Span) -> Result<u32> {
417        let current = self.parser().capture_index.get();
418        let i = current.checked_add(1).ok_or_else(|| {
419            self.error(span, ast::ErrorKind::CaptureLimitExceeded)
420        })?;
421        self.parser().capture_index.set(i);
422        Ok(i)
423    }
424
425    /// Adds the given capture name to this parser. If this capture name has
426    /// already been used, then an error is returned.
427    fn add_capture_name(&self, cap: &ast::CaptureName) -> Result<()> {
428        let mut names = self.parser().capture_names.borrow_mut();
429        match names
430            .binary_search_by_key(&cap.name.as_str(), |c| c.name.as_str())
431        {
432            Err(i) => {
433                names.insert(i, cap.clone());
434                Ok(())
435            }
436            Ok(i) => Err(self.error(
437                cap.span,
438                ast::ErrorKind::GroupNameDuplicate { original: names[i].span },
439            )),
440        }
441    }
442
443    /// Return whether the parser should ignore whitespace or not.
444    fn ignore_whitespace(&self) -> bool {
445        self.parser().ignore_whitespace.get()
446    }
447
448    /// Return the character at the current position of the parser.
449    ///
450    /// This panics if the current position does not point to a valid char.
451    fn char(&self) -> char {
452        self.char_at(self.offset())
453    }
454
455    /// Return the character at the given position.
456    ///
457    /// This panics if the given position does not point to a valid char.
458    fn char_at(&self, i: usize) -> char {
459        self.pattern()[i..]
460            .chars()
461            .next()
462            .unwrap_or_else(|| panic!("expected char at offset {}", i))
463    }
464
465    /// Bump the parser to the next Unicode scalar value.
466    ///
467    /// If the end of the input has been reached, then `false` is returned.
468    fn bump(&self) -> bool {
469        if self.is_eof() {
470            return false;
471        }
472        let Position { mut offset, mut line, mut column } = self.pos();
473        if self.char() == '\n' {
474            line = line.checked_add(1).unwrap();
475            column = 1;
476        } else {
477            column = column.checked_add(1).unwrap();
478        }
479        offset += self.char().len_utf8();
480        self.parser().pos.set(Position { offset, line, column });
481        self.pattern()[self.offset()..].chars().next().is_some()
482    }
483
484    /// If the substring starting at the current position of the parser has
485    /// the given prefix, then bump the parser to the character immediately
486    /// following the prefix and return true. Otherwise, don't bump the parser
487    /// and return false.
488    fn bump_if(&self, prefix: &str) -> bool {
489        if self.pattern()[self.offset()..].starts_with(prefix) {
490            for _ in 0..prefix.chars().count() {
491                self.bump();
492            }
493            true
494        } else {
495            false
496        }
497    }
498
499    /// Returns true if and only if the parser is positioned at a look-around
500    /// prefix. The conditions under which this returns true must always
501    /// correspond to a regular expression that would otherwise be consider
502    /// invalid.
503    ///
504    /// This should only be called immediately after parsing the opening of
505    /// a group or a set of flags.
506    fn is_lookaround_prefix(&self) -> bool {
507        self.bump_if("?=")
508            || self.bump_if("?!")
509            || self.bump_if("?<=")
510            || self.bump_if("?<!")
511    }
512
513    /// Bump the parser, and if the `x` flag is enabled, bump through any
514    /// subsequent spaces. Return true if and only if the parser is not at
515    /// EOF.
516    fn bump_and_bump_space(&self) -> bool {
517        if !self.bump() {
518            return false;
519        }
520        self.bump_space();
521        !self.is_eof()
522    }
523
524    /// If the `x` flag is enabled (i.e., whitespace insensitivity with
525    /// comments), then this will advance the parser through all whitespace
526    /// and comments to the next non-whitespace non-comment byte.
527    ///
528    /// If the `x` flag is disabled, then this is a no-op.
529    ///
530    /// This should be used selectively throughout the parser where
531    /// arbitrary whitespace is permitted when the `x` flag is enabled. For
532    /// example, `{   5  , 6}` is equivalent to `{5,6}`.
533    fn bump_space(&self) {
534        if !self.ignore_whitespace() {
535            return;
536        }
537        while !self.is_eof() {
538            if self.char().is_whitespace() {
539                self.bump();
540            } else if self.char() == '#' {
541                let start = self.pos();
542                let mut comment_text = String::new();
543                self.bump();
544                while !self.is_eof() {
545                    let c = self.char();
546                    self.bump();
547                    if c == '\n' {
548                        break;
549                    }
550                    comment_text.push(c);
551                }
552                let comment = ast::Comment {
553                    span: Span::new(start, self.pos()),
554                    comment: comment_text,
555                };
556                self.parser().comments.borrow_mut().push(comment);
557            } else {
558                break;
559            }
560        }
561    }
562
563    /// Peek at the next character in the input without advancing the parser.
564    ///
565    /// If the input has been exhausted, then this returns `None`.
566    fn peek(&self) -> Option<char> {
567        if self.is_eof() {
568            return None;
569        }
570        self.pattern()[self.offset() + self.char().len_utf8()..].chars().next()
571    }
572
573    /// Like peek, but will ignore spaces when the parser is in whitespace
574    /// insensitive mode.
575    fn peek_space(&self) -> Option<char> {
576        if !self.ignore_whitespace() {
577            return self.peek();
578        }
579        if self.is_eof() {
580            return None;
581        }
582        let mut start = self.offset() + self.char().len_utf8();
583        let mut in_comment = false;
584        for (i, c) in self.pattern()[start..].char_indices() {
585            if c.is_whitespace() {
586                continue;
587            } else if !in_comment && c == '#' {
588                in_comment = true;
589            } else if in_comment && c == '\n' {
590                in_comment = false;
591            } else {
592                start += i;
593                break;
594            }
595        }
596        self.pattern()[start..].chars().next()
597    }
598
599    /// Returns true if the next call to `bump` would return false.
600    fn is_eof(&self) -> bool {
601        self.offset() == self.pattern().len()
602    }
603
604    /// Return the current position of the parser, which includes the offset,
605    /// line and column.
606    fn pos(&self) -> Position {
607        self.parser().pos.get()
608    }
609
610    /// Create a span at the current position of the parser. Both the start
611    /// and end of the span are set.
612    fn span(&self) -> Span {
613        Span::splat(self.pos())
614    }
615
616    /// Create a span that covers the current character.
617    fn span_char(&self) -> Span {
618        let mut next = Position {
619            offset: self.offset().checked_add(self.char().len_utf8()).unwrap(),
620            line: self.line(),
621            column: self.column().checked_add(1).unwrap(),
622        };
623        if self.char() == '\n' {
624            next.line += 1;
625            next.column = 1;
626        }
627        Span::new(self.pos(), next)
628    }
629
630    /// Parse and push a single alternation on to the parser's internal stack.
631    /// If the top of the stack already has an alternation, then add to that
632    /// instead of pushing a new one.
633    ///
634    /// The concatenation given corresponds to a single alternation branch.
635    /// The concatenation returned starts the next branch and is empty.
636    ///
637    /// This assumes the parser is currently positioned at `|` and will advance
638    /// the parser to the character following `|`.
639    #[inline(never)]
640    fn push_alternate(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
641        assert_eq!(self.char(), '|');
642        concat.span.end = self.pos();
643        self.push_or_add_alternation(concat);
644        self.bump();
645        Ok(ast::Concat { span: self.span(), asts: vec![] })
646    }
647
648    /// Pushes or adds the given branch of an alternation to the parser's
649    /// internal stack of state.
650    fn push_or_add_alternation(&self, concat: ast::Concat) {
651        use self::GroupState::*;
652
653        let mut stack = self.parser().stack_group.borrow_mut();
654        if let Some(&mut Alternation(ref mut alts)) = stack.last_mut() {
655            alts.asts.push(concat.into_ast());
656            return;
657        }
658        stack.push(Alternation(ast::Alternation {
659            span: Span::new(concat.span.start, self.pos()),
660            asts: vec![concat.into_ast()],
661        }));
662    }
663
664    /// Parse and push a group AST (and its parent concatenation) on to the
665    /// parser's internal stack. Return a fresh concatenation corresponding
666    /// to the group's sub-AST.
667    ///
668    /// If a set of flags was found (with no group), then the concatenation
669    /// is returned with that set of flags added.
670    ///
671    /// This assumes that the parser is currently positioned on the opening
672    /// parenthesis. It advances the parser to the character at the start
673    /// of the sub-expression (or adjoining expression).
674    ///
675    /// If there was a problem parsing the start of the group, then an error
676    /// is returned.
677    #[inline(never)]
678    fn push_group(&self, mut concat: ast::Concat) -> Result<ast::Concat> {
679        assert_eq!(self.char(), '(');
680        match self.parse_group()? {
681            Either::Left(set) => {
682                let ignore = set.flags.flag_state(ast::Flag::IgnoreWhitespace);
683                if let Some(v) = ignore {
684                    self.parser().ignore_whitespace.set(v);
685                }
686
687                concat.asts.push(Ast::Flags(set));
688                Ok(concat)
689            }
690            Either::Right(group) => {
691                let old_ignore_whitespace = self.ignore_whitespace();
692                let new_ignore_whitespace = group
693                    .flags()
694                    .and_then(|f| f.flag_state(ast::Flag::IgnoreWhitespace))
695                    .unwrap_or(old_ignore_whitespace);
696                self.parser().stack_group.borrow_mut().push(
697                    GroupState::Group {
698                        concat,
699                        group,
700                        ignore_whitespace: old_ignore_whitespace,
701                    },
702                );
703                self.parser().ignore_whitespace.set(new_ignore_whitespace);
704                Ok(ast::Concat { span: self.span(), asts: vec![] })
705            }
706        }
707    }
708
709    /// Pop a group AST from the parser's internal stack and set the group's
710    /// AST to the given concatenation. Return the concatenation containing
711    /// the group.
712    ///
713    /// This assumes that the parser is currently positioned on the closing
714    /// parenthesis and advances the parser to the character following the `)`.
715    ///
716    /// If no such group could be popped, then an unopened group error is
717    /// returned.
718    #[inline(never)]
719    fn pop_group(&self, mut group_concat: ast::Concat) -> Result<ast::Concat> {
720        use self::GroupState::*;
721
722        assert_eq!(self.char(), ')');
723        let mut stack = self.parser().stack_group.borrow_mut();
724        let (mut prior_concat, mut group, ignore_whitespace, alt) = match stack
725            .pop()
726        {
727            Some(Group { concat, group, ignore_whitespace }) => {
728                (concat, group, ignore_whitespace, None)
729            }
730            Some(Alternation(alt)) => match stack.pop() {
731                Some(Group { concat, group, ignore_whitespace }) => {
732                    (concat, group, ignore_whitespace, Some(alt))
733                }
734                None | Some(Alternation(_)) => {
735                    return Err(self.error(
736                        self.span_char(),
737                        ast::ErrorKind::GroupUnopened,
738                    ));
739                }
740            },
741            None => {
742                return Err(self
743                    .error(self.span_char(), ast::ErrorKind::GroupUnopened));
744            }
745        };
746        self.parser().ignore_whitespace.set(ignore_whitespace);
747        group_concat.span.end = self.pos();
748        self.bump();
749        group.span.end = self.pos();
750        match alt {
751            Some(mut alt) => {
752                alt.span.end = group_concat.span.end;
753                alt.asts.push(group_concat.into_ast());
754                group.ast = Box::new(alt.into_ast());
755            }
756            None => {
757                group.ast = Box::new(group_concat.into_ast());
758            }
759        }
760        prior_concat.asts.push(Ast::Group(group));
761        Ok(prior_concat)
762    }
763
764    /// Pop the last state from the parser's internal stack, if it exists, and
765    /// add the given concatenation to it. There either must be no state or a
766    /// single alternation item on the stack. Any other scenario produces an
767    /// error.
768    ///
769    /// This assumes that the parser has advanced to the end.
770    #[inline(never)]
771    fn pop_group_end(&self, mut concat: ast::Concat) -> Result<Ast> {
772        concat.span.end = self.pos();
773        let mut stack = self.parser().stack_group.borrow_mut();
774        let ast = match stack.pop() {
775            None => Ok(concat.into_ast()),
776            Some(GroupState::Alternation(mut alt)) => {
777                alt.span.end = self.pos();
778                alt.asts.push(concat.into_ast());
779                Ok(Ast::Alternation(alt))
780            }
781            Some(GroupState::Group { group, .. }) => {
782                return Err(
783                    self.error(group.span, ast::ErrorKind::GroupUnclosed)
784                );
785            }
786        };
787        // If we try to pop again, there should be nothing.
788        match stack.pop() {
789            None => ast,
790            Some(GroupState::Alternation(_)) => {
791                // This unreachable is unfortunate. This case can't happen
792                // because the only way we can be here is if there were two
793                // `GroupState::Alternation`s adjacent in the parser's stack,
794                // which we guarantee to never happen because we never push a
795                // `GroupState::Alternation` if one is already at the top of
796                // the stack.
797                unreachable!()
798            }
799            Some(GroupState::Group { group, .. }) => {
800                Err(self.error(group.span, ast::ErrorKind::GroupUnclosed))
801            }
802        }
803    }
804
805    /// Parse the opening of a character class and push the current class
806    /// parsing context onto the parser's stack. This assumes that the parser
807    /// is positioned at an opening `[`. The given union should correspond to
808    /// the union of set items built up before seeing the `[`.
809    ///
810    /// If there was a problem parsing the opening of the class, then an error
811    /// is returned. Otherwise, a new union of set items for the class is
812    /// returned (which may be populated with either a `]` or a `-`).
813    #[inline(never)]
814    fn push_class_open(
815        &self,
816        parent_union: ast::ClassSetUnion,
817    ) -> Result<ast::ClassSetUnion> {
818        assert_eq!(self.char(), '[');
819
820        let (nested_set, nested_union) = self.parse_set_class_open()?;
821        self.parser()
822            .stack_class
823            .borrow_mut()
824            .push(ClassState::Open { union: parent_union, set: nested_set });
825        Ok(nested_union)
826    }
827
828    /// Parse the end of a character class set and pop the character class
829    /// parser stack. The union given corresponds to the last union built
830    /// before seeing the closing `]`. The union returned corresponds to the
831    /// parent character class set with the nested class added to it.
832    ///
833    /// This assumes that the parser is positioned at a `]` and will advance
834    /// the parser to the byte immediately following the `]`.
835    ///
836    /// If the stack is empty after popping, then this returns the final
837    /// "top-level" character class AST (where a "top-level" character class
838    /// is one that is not nested inside any other character class).
839    ///
840    /// If there is no corresponding opening bracket on the parser's stack,
841    /// then an error is returned.
842    #[inline(never)]
843    fn pop_class(
844        &self,
845        nested_union: ast::ClassSetUnion,
846    ) -> Result<Either<ast::ClassSetUnion, ast::Class>> {
847        assert_eq!(self.char(), ']');
848
849        let item = ast::ClassSet::Item(nested_union.into_item());
850        let prevset = self.pop_class_op(item);
851        let mut stack = self.parser().stack_class.borrow_mut();
852        match stack.pop() {
853            None => {
854                // We can never observe an empty stack:
855                //
856                // 1) We are guaranteed to start with a non-empty stack since
857                //    the character class parser is only initiated when it sees
858                //    a `[`.
859                // 2) If we ever observe an empty stack while popping after
860                //    seeing a `]`, then we signal the character class parser
861                //    to terminate.
862                panic!("unexpected empty character class stack")
863            }
864            Some(ClassState::Op { .. }) => {
865                // This panic is unfortunate, but this case is impossible
866                // since we already popped the Op state if one exists above.
867                // Namely, every push to the class parser stack is guarded by
868                // whether an existing Op is already on the top of the stack.
869                // If it is, the existing Op is modified. That is, the stack
870                // can never have consecutive Op states.
871                panic!("unexpected ClassState::Op")
872            }
873            Some(ClassState::Open { mut union, mut set }) => {
874                self.bump();
875                set.span.end = self.pos();
876                set.kind = prevset;
877                if stack.is_empty() {
878                    Ok(Either::Right(ast::Class::Bracketed(set)))
879                } else {
880                    union.push(ast::ClassSetItem::Bracketed(Box::new(set)));
881                    Ok(Either::Left(union))
882                }
883            }
884        }
885    }
886
887    /// Return an "unclosed class" error whose span points to the most
888    /// recently opened class.
889    ///
890    /// This should only be called while parsing a character class.
891    #[inline(never)]
892    fn unclosed_class_error(&self) -> ast::Error {
893        for state in self.parser().stack_class.borrow().iter().rev() {
894            if let ClassState::Open { ref set, .. } = *state {
895                return self.error(set.span, ast::ErrorKind::ClassUnclosed);
896            }
897        }
898        // We are guaranteed to have a non-empty stack with at least
899        // one open bracket, so we should never get here.
900        panic!("no open character class found")
901    }
902
903    /// Push the current set of class items on to the class parser's stack as
904    /// the left hand side of the given operator.
905    ///
906    /// A fresh set union is returned, which should be used to build the right
907    /// hand side of this operator.
908    #[inline(never)]
909    fn push_class_op(
910        &self,
911        next_kind: ast::ClassSetBinaryOpKind,
912        next_union: ast::ClassSetUnion,
913    ) -> ast::ClassSetUnion {
914        let item = ast::ClassSet::Item(next_union.into_item());
915        let new_lhs = self.pop_class_op(item);
916        self.parser()
917            .stack_class
918            .borrow_mut()
919            .push(ClassState::Op { kind: next_kind, lhs: new_lhs });
920        ast::ClassSetUnion { span: self.span(), items: vec![] }
921    }
922
923    /// Pop a character class set from the character class parser stack. If the
924    /// top of the stack is just an item (not an operation), then return the
925    /// given set unchanged. If the top of the stack is an operation, then the
926    /// given set will be used as the rhs of the operation on the top of the
927    /// stack. In that case, the binary operation is returned as a set.
928    #[inline(never)]
929    fn pop_class_op(&self, rhs: ast::ClassSet) -> ast::ClassSet {
930        let mut stack = self.parser().stack_class.borrow_mut();
931        let (kind, lhs) = match stack.pop() {
932            Some(ClassState::Op { kind, lhs }) => (kind, lhs),
933            Some(state @ ClassState::Open { .. }) => {
934                stack.push(state);
935                return rhs;
936            }
937            None => unreachable!(),
938        };
939        let span = Span::new(lhs.span().start, rhs.span().end);
940        ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
941            span,
942            kind,
943            lhs: Box::new(lhs),
944            rhs: Box::new(rhs),
945        })
946    }
947}
948
949impl<'s, P: Borrow<Parser>> ParserI<'s, P> {
950    /// Parse the regular expression into an abstract syntax tree.
951    fn parse(&self) -> Result<Ast> {
952        self.parse_with_comments().map(|astc| astc.ast)
953    }
954
955    /// Parse the regular expression and return an abstract syntax tree with
956    /// all of the comments found in the pattern.
957    fn parse_with_comments(&self) -> Result<ast::WithComments> {
958        assert_eq!(self.offset(), 0, "parser can only be used once");
959        self.parser().reset();
960        let mut concat = ast::Concat { span: self.span(), asts: vec![] };
961        loop {
962            self.bump_space();
963            if self.is_eof() {
964                break;
965            }
966            match self.char() {
967                '(' => concat = self.push_group(concat)?,
968                ')' => concat = self.pop_group(concat)?,
969                '|' => concat = self.push_alternate(concat)?,
970                '[' => {
971                    let class = self.parse_set_class()?;
972                    concat.asts.push(Ast::Class(class));
973                }
974                '?' => {
975                    concat = self.parse_uncounted_repetition(
976                        concat,
977                        ast::RepetitionKind::ZeroOrOne,
978                    )?;
979                }
980                '*' => {
981                    concat = self.parse_uncounted_repetition(
982                        concat,
983                        ast::RepetitionKind::ZeroOrMore,
984                    )?;
985                }
986                '+' => {
987                    concat = self.parse_uncounted_repetition(
988                        concat,
989                        ast::RepetitionKind::OneOrMore,
990                    )?;
991                }
992                '{' => {
993                    concat = self.parse_counted_repetition(concat)?;
994                }
995                _ => concat.asts.push(self.parse_primitive()?.into_ast()),
996            }
997        }
998        let ast = self.pop_group_end(concat)?;
999        NestLimiter::new(self).check(&ast)?;
1000        Ok(ast::WithComments {
1001            ast,
1002            comments: mem::replace(
1003                &mut *self.parser().comments.borrow_mut(),
1004                vec![],
1005            ),
1006        })
1007    }
1008
1009    /// Parses an uncounted repetition operation. An uncounted repetition
1010    /// operator includes ?, * and +, but does not include the {m,n} syntax.
1011    /// The given `kind` should correspond to the operator observed by the
1012    /// caller.
1013    ///
1014    /// This assumes that the parser is currently positioned at the repetition
1015    /// operator and advances the parser to the first character after the
1016    /// operator. (Note that the operator may include a single additional `?`,
1017    /// which makes the operator ungreedy.)
1018    ///
1019    /// The caller should include the concatenation that is being built. The
1020    /// concatenation returned includes the repetition operator applied to the
1021    /// last expression in the given concatenation.
1022    #[inline(never)]
1023    fn parse_uncounted_repetition(
1024        &self,
1025        mut concat: ast::Concat,
1026        kind: ast::RepetitionKind,
1027    ) -> Result<ast::Concat> {
1028        assert!(
1029            self.char() == '?' || self.char() == '*' || self.char() == '+'
1030        );
1031        let op_start = self.pos();
1032        let ast = match concat.asts.pop() {
1033            Some(ast) => ast,
1034            None => {
1035                return Err(
1036                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1037                )
1038            }
1039        };
1040        match ast {
1041            Ast::Empty(_) | Ast::Flags(_) => {
1042                return Err(
1043                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1044                )
1045            }
1046            _ => {}
1047        }
1048        let mut greedy = true;
1049        if self.bump() && self.char() == '?' {
1050            greedy = false;
1051            self.bump();
1052        }
1053        concat.asts.push(Ast::Repetition(ast::Repetition {
1054            span: ast.span().with_end(self.pos()),
1055            op: ast::RepetitionOp {
1056                span: Span::new(op_start, self.pos()),
1057                kind,
1058            },
1059            greedy,
1060            ast: Box::new(ast),
1061        }));
1062        Ok(concat)
1063    }
1064
1065    /// Parses a counted repetition operation. A counted repetition operator
1066    /// corresponds to the {m,n} syntax, and does not include the ?, * or +
1067    /// operators.
1068    ///
1069    /// This assumes that the parser is currently positioned at the opening `{`
1070    /// and advances the parser to the first character after the operator.
1071    /// (Note that the operator may include a single additional `?`, which
1072    /// makes the operator ungreedy.)
1073    ///
1074    /// The caller should include the concatenation that is being built. The
1075    /// concatenation returned includes the repetition operator applied to the
1076    /// last expression in the given concatenation.
1077    #[inline(never)]
1078    fn parse_counted_repetition(
1079        &self,
1080        mut concat: ast::Concat,
1081    ) -> Result<ast::Concat> {
1082        assert!(self.char() == '{');
1083        let start = self.pos();
1084        let ast = match concat.asts.pop() {
1085            Some(ast) => ast,
1086            None => {
1087                return Err(
1088                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1089                )
1090            }
1091        };
1092        match ast {
1093            Ast::Empty(_) | Ast::Flags(_) => {
1094                return Err(
1095                    self.error(self.span(), ast::ErrorKind::RepetitionMissing)
1096                )
1097            }
1098            _ => {}
1099        }
1100        if !self.bump_and_bump_space() {
1101            return Err(self.error(
1102                Span::new(start, self.pos()),
1103                ast::ErrorKind::RepetitionCountUnclosed,
1104            ));
1105        }
1106        let count_start = specialize_err(
1107            self.parse_decimal(),
1108            ast::ErrorKind::DecimalEmpty,
1109            ast::ErrorKind::RepetitionCountDecimalEmpty,
1110        )?;
1111        let mut range = ast::RepetitionRange::Exactly(count_start);
1112        if self.is_eof() {
1113            return Err(self.error(
1114                Span::new(start, self.pos()),
1115                ast::ErrorKind::RepetitionCountUnclosed,
1116            ));
1117        }
1118        if self.char() == ',' {
1119            if !self.bump_and_bump_space() {
1120                return Err(self.error(
1121                    Span::new(start, self.pos()),
1122                    ast::ErrorKind::RepetitionCountUnclosed,
1123                ));
1124            }
1125            if self.char() != '}' {
1126                let count_end = specialize_err(
1127                    self.parse_decimal(),
1128                    ast::ErrorKind::DecimalEmpty,
1129                    ast::ErrorKind::RepetitionCountDecimalEmpty,
1130                )?;
1131                range = ast::RepetitionRange::Bounded(count_start, count_end);
1132            } else {
1133                range = ast::RepetitionRange::AtLeast(count_start);
1134            }
1135        }
1136        if self.is_eof() || self.char() != '}' {
1137            return Err(self.error(
1138                Span::new(start, self.pos()),
1139                ast::ErrorKind::RepetitionCountUnclosed,
1140            ));
1141        }
1142
1143        let mut greedy = true;
1144        if self.bump_and_bump_space() && self.char() == '?' {
1145            greedy = false;
1146            self.bump();
1147        }
1148
1149        let op_span = Span::new(start, self.pos());
1150        if !range.is_valid() {
1151            return Err(
1152                self.error(op_span, ast::ErrorKind::RepetitionCountInvalid)
1153            );
1154        }
1155        concat.asts.push(Ast::Repetition(ast::Repetition {
1156            span: ast.span().with_end(self.pos()),
1157            op: ast::RepetitionOp {
1158                span: op_span,
1159                kind: ast::RepetitionKind::Range(range),
1160            },
1161            greedy,
1162            ast: Box::new(ast),
1163        }));
1164        Ok(concat)
1165    }
1166
1167    /// Parse a group (which contains a sub-expression) or a set of flags.
1168    ///
1169    /// If a group was found, then it is returned with an empty AST. If a set
1170    /// of flags is found, then that set is returned.
1171    ///
1172    /// The parser should be positioned at the opening parenthesis.
1173    ///
1174    /// This advances the parser to the character before the start of the
1175    /// sub-expression (in the case of a group) or to the closing parenthesis
1176    /// immediately following the set of flags.
1177    ///
1178    /// # Errors
1179    ///
1180    /// If flags are given and incorrectly specified, then a corresponding
1181    /// error is returned.
1182    ///
1183    /// If a capture name is given and it is incorrectly specified, then a
1184    /// corresponding error is returned.
1185    #[inline(never)]
1186    fn parse_group(&self) -> Result<Either<ast::SetFlags, ast::Group>> {
1187        assert_eq!(self.char(), '(');
1188        let open_span = self.span_char();
1189        self.bump();
1190        self.bump_space();
1191        if self.is_lookaround_prefix() {
1192            return Err(self.error(
1193                Span::new(open_span.start, self.span().end),
1194                ast::ErrorKind::UnsupportedLookAround,
1195            ));
1196        }
1197        let inner_span = self.span();
1198        if self.bump_if("?P<") {
1199            let capture_index = self.next_capture_index(open_span)?;
1200            let cap = self.parse_capture_name(capture_index)?;
1201            Ok(Either::Right(ast::Group {
1202                span: open_span,
1203                kind: ast::GroupKind::CaptureName(cap),
1204                ast: Box::new(Ast::Empty(self.span())),
1205            }))
1206        } else if self.bump_if("?") {
1207            if self.is_eof() {
1208                return Err(
1209                    self.error(open_span, ast::ErrorKind::GroupUnclosed)
1210                );
1211            }
1212            let flags = self.parse_flags()?;
1213            let char_end = self.char();
1214            self.bump();
1215            if char_end == ')' {
1216                // We don't allow empty flags, e.g., `(?)`. We instead
1217                // interpret it as a repetition operator missing its argument.
1218                if flags.items.is_empty() {
1219                    return Err(self.error(
1220                        inner_span,
1221                        ast::ErrorKind::RepetitionMissing,
1222                    ));
1223                }
1224                Ok(Either::Left(ast::SetFlags {
1225                    span: Span { end: self.pos(), ..open_span },
1226                    flags,
1227                }))
1228            } else {
1229                assert_eq!(char_end, ':');
1230                Ok(Either::Right(ast::Group {
1231                    span: open_span,
1232                    kind: ast::GroupKind::NonCapturing(flags),
1233                    ast: Box::new(Ast::Empty(self.span())),
1234                }))
1235            }
1236        } else {
1237            let capture_index = self.next_capture_index(open_span)?;
1238            Ok(Either::Right(ast::Group {
1239                span: open_span,
1240                kind: ast::GroupKind::CaptureIndex(capture_index),
1241                ast: Box::new(Ast::Empty(self.span())),
1242            }))
1243        }
1244    }
1245
1246    /// Parses a capture group name. Assumes that the parser is positioned at
1247    /// the first character in the name following the opening `<` (and may
1248    /// possibly be EOF). This advances the parser to the first character
1249    /// following the closing `>`.
1250    ///
1251    /// The caller must provide the capture index of the group for this name.
1252    #[inline(never)]
1253    fn parse_capture_name(
1254        &self,
1255        capture_index: u32,
1256    ) -> Result<ast::CaptureName> {
1257        if self.is_eof() {
1258            return Err(self
1259                .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1260        }
1261        let start = self.pos();
1262        loop {
1263            if self.char() == '>' {
1264                break;
1265            }
1266            if !is_capture_char(self.char(), self.pos() == start) {
1267                return Err(self.error(
1268                    self.span_char(),
1269                    ast::ErrorKind::GroupNameInvalid,
1270                ));
1271            }
1272            if !self.bump() {
1273                break;
1274            }
1275        }
1276        let end = self.pos();
1277        if self.is_eof() {
1278            return Err(self
1279                .error(self.span(), ast::ErrorKind::GroupNameUnexpectedEof));
1280        }
1281        assert_eq!(self.char(), '>');
1282        self.bump();
1283        let name = &self.pattern()[start.offset..end.offset];
1284        if name.is_empty() {
1285            return Err(self.error(
1286                Span::new(start, start),
1287                ast::ErrorKind::GroupNameEmpty,
1288            ));
1289        }
1290        let capname = ast::CaptureName {
1291            span: Span::new(start, end),
1292            name: name.to_string(),
1293            index: capture_index,
1294        };
1295        self.add_capture_name(&capname)?;
1296        Ok(capname)
1297    }
1298
1299    /// Parse a sequence of flags starting at the current character.
1300    ///
1301    /// This advances the parser to the character immediately following the
1302    /// flags, which is guaranteed to be either `:` or `)`.
1303    ///
1304    /// # Errors
1305    ///
1306    /// If any flags are duplicated, then an error is returned.
1307    ///
1308    /// If the negation operator is used more than once, then an error is
1309    /// returned.
1310    ///
1311    /// If no flags could be found or if the negation operation is not followed
1312    /// by any flags, then an error is returned.
1313    #[inline(never)]
1314    fn parse_flags(&self) -> Result<ast::Flags> {
1315        let mut flags = ast::Flags { span: self.span(), items: vec![] };
1316        let mut last_was_negation = None;
1317        while self.char() != ':' && self.char() != ')' {
1318            if self.char() == '-' {
1319                last_was_negation = Some(self.span_char());
1320                let item = ast::FlagsItem {
1321                    span: self.span_char(),
1322                    kind: ast::FlagsItemKind::Negation,
1323                };
1324                if let Some(i) = flags.add_item(item) {
1325                    return Err(self.error(
1326                        self.span_char(),
1327                        ast::ErrorKind::FlagRepeatedNegation {
1328                            original: flags.items[i].span,
1329                        },
1330                    ));
1331                }
1332            } else {
1333                last_was_negation = None;
1334                let item = ast::FlagsItem {
1335                    span: self.span_char(),
1336                    kind: ast::FlagsItemKind::Flag(self.parse_flag()?),
1337                };
1338                if let Some(i) = flags.add_item(item) {
1339                    return Err(self.error(
1340                        self.span_char(),
1341                        ast::ErrorKind::FlagDuplicate {
1342                            original: flags.items[i].span,
1343                        },
1344                    ));
1345                }
1346            }
1347            if !self.bump() {
1348                return Err(
1349                    self.error(self.span(), ast::ErrorKind::FlagUnexpectedEof)
1350                );
1351            }
1352        }
1353        if let Some(span) = last_was_negation {
1354            return Err(self.error(span, ast::ErrorKind::FlagDanglingNegation));
1355        }
1356        flags.span.end = self.pos();
1357        Ok(flags)
1358    }
1359
1360    /// Parse the current character as a flag. Do not advance the parser.
1361    ///
1362    /// # Errors
1363    ///
1364    /// If the flag is not recognized, then an error is returned.
1365    #[inline(never)]
1366    fn parse_flag(&self) -> Result<ast::Flag> {
1367        match self.char() {
1368            'i' => Ok(ast::Flag::CaseInsensitive),
1369            'm' => Ok(ast::Flag::MultiLine),
1370            's' => Ok(ast::Flag::DotMatchesNewLine),
1371            'U' => Ok(ast::Flag::SwapGreed),
1372            'u' => Ok(ast::Flag::Unicode),
1373            'x' => Ok(ast::Flag::IgnoreWhitespace),
1374            _ => {
1375                Err(self
1376                    .error(self.span_char(), ast::ErrorKind::FlagUnrecognized))
1377            }
1378        }
1379    }
1380
1381    /// Parse a primitive AST. e.g., A literal, non-set character class or
1382    /// assertion.
1383    ///
1384    /// This assumes that the parser expects a primitive at the current
1385    /// location. i.e., All other non-primitive cases have been handled.
1386    /// For example, if the parser's position is at `|`, then `|` will be
1387    /// treated as a literal (e.g., inside a character class).
1388    ///
1389    /// This advances the parser to the first character immediately following
1390    /// the primitive.
1391    fn parse_primitive(&self) -> Result<Primitive> {
1392        match self.char() {
1393            '\\' => self.parse_escape(),
1394            '.' => {
1395                let ast = Primitive::Dot(self.span_char());
1396                self.bump();
1397                Ok(ast)
1398            }
1399            '^' => {
1400                let ast = Primitive::Assertion(ast::Assertion {
1401                    span: self.span_char(),
1402                    kind: ast::AssertionKind::StartLine,
1403                });
1404                self.bump();
1405                Ok(ast)
1406            }
1407            '$' => {
1408                let ast = Primitive::Assertion(ast::Assertion {
1409                    span: self.span_char(),
1410                    kind: ast::AssertionKind::EndLine,
1411                });
1412                self.bump();
1413                Ok(ast)
1414            }
1415            c => {
1416                let ast = Primitive::Literal(ast::Literal {
1417                    span: self.span_char(),
1418                    kind: ast::LiteralKind::Verbatim,
1419                    c,
1420                });
1421                self.bump();
1422                Ok(ast)
1423            }
1424        }
1425    }
1426
1427    /// Parse an escape sequence as a primitive AST.
1428    ///
1429    /// This assumes the parser is positioned at the start of the escape
1430    /// sequence, i.e., `\`. It advances the parser to the first position
1431    /// immediately following the escape sequence.
1432    #[inline(never)]
1433    fn parse_escape(&self) -> Result<Primitive> {
1434        assert_eq!(self.char(), '\\');
1435        let start = self.pos();
1436        if !self.bump() {
1437            return Err(self.error(
1438                Span::new(start, self.pos()),
1439                ast::ErrorKind::EscapeUnexpectedEof,
1440            ));
1441        }
1442        let c = self.char();
1443        // Put some of the more complicated routines into helpers.
1444        match c {
1445            '0'..='7' => {
1446                if !self.parser().octal {
1447                    return Err(self.error(
1448                        Span::new(start, self.span_char().end),
1449                        ast::ErrorKind::UnsupportedBackreference,
1450                    ));
1451                }
1452                let mut lit = self.parse_octal();
1453                lit.span.start = start;
1454                return Ok(Primitive::Literal(lit));
1455            }
1456            '8'..='9' if !self.parser().octal => {
1457                return Err(self.error(
1458                    Span::new(start, self.span_char().end),
1459                    ast::ErrorKind::UnsupportedBackreference,
1460                ));
1461            }
1462            'x' | 'u' | 'U' => {
1463                let mut lit = self.parse_hex()?;
1464                lit.span.start = start;
1465                return Ok(Primitive::Literal(lit));
1466            }
1467            'p' | 'P' => {
1468                let mut cls = self.parse_unicode_class()?;
1469                cls.span.start = start;
1470                return Ok(Primitive::Unicode(cls));
1471            }
1472            'd' | 's' | 'w' | 'D' | 'S' | 'W' => {
1473                let mut cls = self.parse_perl_class();
1474                cls.span.start = start;
1475                return Ok(Primitive::Perl(cls));
1476            }
1477            _ => {}
1478        }
1479
1480        // Handle all of the one letter sequences inline.
1481        self.bump();
1482        let span = Span::new(start, self.pos());
1483        if is_meta_character(c) {
1484            return Ok(Primitive::Literal(ast::Literal {
1485                span,
1486                kind: ast::LiteralKind::Punctuation,
1487                c,
1488            }));
1489        }
1490        let special = |kind, c| {
1491            Ok(Primitive::Literal(ast::Literal {
1492                span,
1493                kind: ast::LiteralKind::Special(kind),
1494                c,
1495            }))
1496        };
1497        match c {
1498            'a' => special(ast::SpecialLiteralKind::Bell, '\x07'),
1499            'f' => special(ast::SpecialLiteralKind::FormFeed, '\x0C'),
1500            't' => special(ast::SpecialLiteralKind::Tab, '\t'),
1501            'n' => special(ast::SpecialLiteralKind::LineFeed, '\n'),
1502            'r' => special(ast::SpecialLiteralKind::CarriageReturn, '\r'),
1503            'v' => special(ast::SpecialLiteralKind::VerticalTab, '\x0B'),
1504            ' ' if self.ignore_whitespace() => {
1505                special(ast::SpecialLiteralKind::Space, ' ')
1506            }
1507            'A' => Ok(Primitive::Assertion(ast::Assertion {
1508                span,
1509                kind: ast::AssertionKind::StartText,
1510            })),
1511            'z' => Ok(Primitive::Assertion(ast::Assertion {
1512                span,
1513                kind: ast::AssertionKind::EndText,
1514            })),
1515            'b' => Ok(Primitive::Assertion(ast::Assertion {
1516                span,
1517                kind: ast::AssertionKind::WordBoundary,
1518            })),
1519            'B' => Ok(Primitive::Assertion(ast::Assertion {
1520                span,
1521                kind: ast::AssertionKind::NotWordBoundary,
1522            })),
1523            _ => Err(self.error(span, ast::ErrorKind::EscapeUnrecognized)),
1524        }
1525    }
1526
1527    /// Parse an octal representation of a Unicode codepoint up to 3 digits
1528    /// long. This expects the parser to be positioned at the first octal
1529    /// digit and advances the parser to the first character immediately
1530    /// following the octal number. This also assumes that parsing octal
1531    /// escapes is enabled.
1532    ///
1533    /// Assuming the preconditions are met, this routine can never fail.
1534    #[inline(never)]
1535    fn parse_octal(&self) -> ast::Literal {
1536        use std::char;
1537        use std::u32;
1538
1539        assert!(self.parser().octal);
1540        assert!('0' <= self.char() && self.char() <= '7');
1541        let start = self.pos();
1542        // Parse up to two more digits.
1543        while self.bump()
1544            && '0' <= self.char()
1545            && self.char() <= '7'
1546            && self.pos().offset - start.offset <= 2
1547        {}
1548        let end = self.pos();
1549        let octal = &self.pattern()[start.offset..end.offset];
1550        // Parsing the octal should never fail since the above guarantees a
1551        // valid number.
1552        let codepoint =
1553            u32::from_str_radix(octal, 8).expect("valid octal number");
1554        // The max value for 3 digit octal is 0777 = 511 and [0, 511] has no
1555        // invalid Unicode scalar values.
1556        let c = char::from_u32(codepoint).expect("Unicode scalar value");
1557        ast::Literal {
1558            span: Span::new(start, end),
1559            kind: ast::LiteralKind::Octal,
1560            c,
1561        }
1562    }
1563
1564    /// Parse a hex representation of a Unicode codepoint. This handles both
1565    /// hex notations, i.e., `\xFF` and `\x{FFFF}`. This expects the parser to
1566    /// be positioned at the `x`, `u` or `U` prefix. The parser is advanced to
1567    /// the first character immediately following the hexadecimal literal.
1568    #[inline(never)]
1569    fn parse_hex(&self) -> Result<ast::Literal> {
1570        assert!(
1571            self.char() == 'x' || self.char() == 'u' || self.char() == 'U'
1572        );
1573
1574        let hex_kind = match self.char() {
1575            'x' => ast::HexLiteralKind::X,
1576            'u' => ast::HexLiteralKind::UnicodeShort,
1577            _ => ast::HexLiteralKind::UnicodeLong,
1578        };
1579        if !self.bump_and_bump_space() {
1580            return Err(
1581                self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
1582            );
1583        }
1584        if self.char() == '{' {
1585            self.parse_hex_brace(hex_kind)
1586        } else {
1587            self.parse_hex_digits(hex_kind)
1588        }
1589    }
1590
1591    /// Parse an N-digit hex representation of a Unicode codepoint. This
1592    /// expects the parser to be positioned at the first digit and will advance
1593    /// the parser to the first character immediately following the escape
1594    /// sequence.
1595    ///
1596    /// The number of digits given must be 2 (for `\xNN`), 4 (for `\uNNNN`)
1597    /// or 8 (for `\UNNNNNNNN`).
1598    #[inline(never)]
1599    fn parse_hex_digits(
1600        &self,
1601        kind: ast::HexLiteralKind,
1602    ) -> Result<ast::Literal> {
1603        use std::char;
1604        use std::u32;
1605
1606        let mut scratch = self.parser().scratch.borrow_mut();
1607        scratch.clear();
1608
1609        let start = self.pos();
1610        for i in 0..kind.digits() {
1611            if i > 0 && !self.bump_and_bump_space() {
1612                return Err(self
1613                    .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
1614            }
1615            if !is_hex(self.char()) {
1616                return Err(self.error(
1617                    self.span_char(),
1618                    ast::ErrorKind::EscapeHexInvalidDigit,
1619                ));
1620            }
1621            scratch.push(self.char());
1622        }
1623        // The final bump just moves the parser past the literal, which may
1624        // be EOF.
1625        self.bump_and_bump_space();
1626        let end = self.pos();
1627        let hex = scratch.as_str();
1628        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1629            None => Err(self.error(
1630                Span::new(start, end),
1631                ast::ErrorKind::EscapeHexInvalid,
1632            )),
1633            Some(c) => Ok(ast::Literal {
1634                span: Span::new(start, end),
1635                kind: ast::LiteralKind::HexFixed(kind),
1636                c,
1637            }),
1638        }
1639    }
1640
1641    /// Parse a hex representation of any Unicode scalar value. This expects
1642    /// the parser to be positioned at the opening brace `{` and will advance
1643    /// the parser to the first character following the closing brace `}`.
1644    #[inline(never)]
1645    fn parse_hex_brace(
1646        &self,
1647        kind: ast::HexLiteralKind,
1648    ) -> Result<ast::Literal> {
1649        use std::char;
1650        use std::u32;
1651
1652        let mut scratch = self.parser().scratch.borrow_mut();
1653        scratch.clear();
1654
1655        let brace_pos = self.pos();
1656        let start = self.span_char().end;
1657        while self.bump_and_bump_space() && self.char() != '}' {
1658            if !is_hex(self.char()) {
1659                return Err(self.error(
1660                    self.span_char(),
1661                    ast::ErrorKind::EscapeHexInvalidDigit,
1662                ));
1663            }
1664            scratch.push(self.char());
1665        }
1666        if self.is_eof() {
1667            return Err(self.error(
1668                Span::new(brace_pos, self.pos()),
1669                ast::ErrorKind::EscapeUnexpectedEof,
1670            ));
1671        }
1672        let end = self.pos();
1673        let hex = scratch.as_str();
1674        assert_eq!(self.char(), '}');
1675        self.bump_and_bump_space();
1676
1677        if hex.is_empty() {
1678            return Err(self.error(
1679                Span::new(brace_pos, self.pos()),
1680                ast::ErrorKind::EscapeHexEmpty,
1681            ));
1682        }
1683        match u32::from_str_radix(hex, 16).ok().and_then(char::from_u32) {
1684            None => Err(self.error(
1685                Span::new(start, end),
1686                ast::ErrorKind::EscapeHexInvalid,
1687            )),
1688            Some(c) => Ok(ast::Literal {
1689                span: Span::new(start, self.pos()),
1690                kind: ast::LiteralKind::HexBrace(kind),
1691                c,
1692            }),
1693        }
1694    }
1695
1696    /// Parse a decimal number into a u32 while trimming leading and trailing
1697    /// whitespace.
1698    ///
1699    /// This expects the parser to be positioned at the first position where
1700    /// a decimal digit could occur. This will advance the parser to the byte
1701    /// immediately following the last contiguous decimal digit.
1702    ///
1703    /// If no decimal digit could be found or if there was a problem parsing
1704    /// the complete set of digits into a u32, then an error is returned.
1705    fn parse_decimal(&self) -> Result<u32> {
1706        let mut scratch = self.parser().scratch.borrow_mut();
1707        scratch.clear();
1708
1709        while !self.is_eof() && self.char().is_whitespace() {
1710            self.bump();
1711        }
1712        let start = self.pos();
1713        while !self.is_eof() && '0' <= self.char() && self.char() <= '9' {
1714            scratch.push(self.char());
1715            self.bump_and_bump_space();
1716        }
1717        let span = Span::new(start, self.pos());
1718        while !self.is_eof() && self.char().is_whitespace() {
1719            self.bump_and_bump_space();
1720        }
1721        let digits = scratch.as_str();
1722        if digits.is_empty() {
1723            return Err(self.error(span, ast::ErrorKind::DecimalEmpty));
1724        }
1725        match u32::from_str_radix(digits, 10).ok() {
1726            Some(n) => Ok(n),
1727            None => Err(self.error(span, ast::ErrorKind::DecimalInvalid)),
1728        }
1729    }
1730
1731    /// Parse a standard character class consisting primarily of characters or
1732    /// character ranges, but can also contain nested character classes of
1733    /// any type (sans `.`).
1734    ///
1735    /// This assumes the parser is positioned at the opening `[`. If parsing
1736    /// is successful, then the parser is advanced to the position immediately
1737    /// following the closing `]`.
1738    #[inline(never)]
1739    fn parse_set_class(&self) -> Result<ast::Class> {
1740        assert_eq!(self.char(), '[');
1741
1742        let mut union =
1743            ast::ClassSetUnion { span: self.span(), items: vec![] };
1744        loop {
1745            self.bump_space();
1746            if self.is_eof() {
1747                return Err(self.unclosed_class_error());
1748            }
1749            match self.char() {
1750                '[' => {
1751                    // If we've already parsed the opening bracket, then
1752                    // attempt to treat this as the beginning of an ASCII
1753                    // class. If ASCII class parsing fails, then the parser
1754                    // backs up to `[`.
1755                    if !self.parser().stack_class.borrow().is_empty() {
1756                        if let Some(cls) = self.maybe_parse_ascii_class() {
1757                            union.push(ast::ClassSetItem::Ascii(cls));
1758                            continue;
1759                        }
1760                    }
1761                    union = self.push_class_open(union)?;
1762                }
1763                ']' => match self.pop_class(union)? {
1764                    Either::Left(nested_union) => {
1765                        union = nested_union;
1766                    }
1767                    Either::Right(class) => return Ok(class),
1768                },
1769                '&' if self.peek() == Some('&') => {
1770                    assert!(self.bump_if("&&"));
1771                    union = self.push_class_op(
1772                        ast::ClassSetBinaryOpKind::Intersection,
1773                        union,
1774                    );
1775                }
1776                '-' if self.peek() == Some('-') => {
1777                    assert!(self.bump_if("--"));
1778                    union = self.push_class_op(
1779                        ast::ClassSetBinaryOpKind::Difference,
1780                        union,
1781                    );
1782                }
1783                '~' if self.peek() == Some('~') => {
1784                    assert!(self.bump_if("~~"));
1785                    union = self.push_class_op(
1786                        ast::ClassSetBinaryOpKind::SymmetricDifference,
1787                        union,
1788                    );
1789                }
1790                _ => {
1791                    union.push(self.parse_set_class_range()?);
1792                }
1793            }
1794        }
1795    }
1796
1797    /// Parse a single primitive item in a character class set. The item to
1798    /// be parsed can either be one of a simple literal character, a range
1799    /// between two simple literal characters or a "primitive" character
1800    /// class like \w or \p{Greek}.
1801    ///
1802    /// If an invalid escape is found, or if a character class is found where
1803    /// a simple literal is expected (e.g., in a range), then an error is
1804    /// returned.
1805    #[inline(never)]
1806    fn parse_set_class_range(&self) -> Result<ast::ClassSetItem> {
1807        let prim1 = self.parse_set_class_item()?;
1808        self.bump_space();
1809        if self.is_eof() {
1810            return Err(self.unclosed_class_error());
1811        }
1812        // If the next char isn't a `-`, then we don't have a range.
1813        // There are two exceptions. If the char after a `-` is a `]`, then
1814        // `-` is interpreted as a literal `-`. Alternatively, if the char
1815        // after a `-` is a `-`, then `--` corresponds to a "difference"
1816        // operation.
1817        if self.char() != '-'
1818            || self.peek_space() == Some(']')
1819            || self.peek_space() == Some('-')
1820        {
1821            return prim1.into_class_set_item(self);
1822        }
1823        // OK, now we're parsing a range, so bump past the `-` and parse the
1824        // second half of the range.
1825        if !self.bump_and_bump_space() {
1826            return Err(self.unclosed_class_error());
1827        }
1828        let prim2 = self.parse_set_class_item()?;
1829        let range = ast::ClassSetRange {
1830            span: Span::new(prim1.span().start, prim2.span().end),
1831            start: prim1.into_class_literal(self)?,
1832            end: prim2.into_class_literal(self)?,
1833        };
1834        if !range.is_valid() {
1835            return Err(
1836                self.error(range.span, ast::ErrorKind::ClassRangeInvalid)
1837            );
1838        }
1839        Ok(ast::ClassSetItem::Range(range))
1840    }
1841
1842    /// Parse a single item in a character class as a primitive, where the
1843    /// primitive either consists of a verbatim literal or a single escape
1844    /// sequence.
1845    ///
1846    /// This assumes the parser is positioned at the beginning of a primitive,
1847    /// and advances the parser to the first position after the primitive if
1848    /// successful.
1849    ///
1850    /// Note that it is the caller's responsibility to report an error if an
1851    /// illegal primitive was parsed.
1852    #[inline(never)]
1853    fn parse_set_class_item(&self) -> Result<Primitive> {
1854        if self.char() == '\\' {
1855            self.parse_escape()
1856        } else {
1857            let x = Primitive::Literal(ast::Literal {
1858                span: self.span_char(),
1859                kind: ast::LiteralKind::Verbatim,
1860                c: self.char(),
1861            });
1862            self.bump();
1863            Ok(x)
1864        }
1865    }
1866
1867    /// Parses the opening of a character class set. This includes the opening
1868    /// bracket along with `^` if present to indicate negation. This also
1869    /// starts parsing the opening set of unioned items if applicable, since
1870    /// there are special rules applied to certain characters in the opening
1871    /// of a character class. For example, `[^]]` is the class of all
1872    /// characters not equal to `]`. (`]` would need to be escaped in any other
1873    /// position.) Similarly for `-`.
1874    ///
1875    /// In all cases, the op inside the returned `ast::ClassBracketed` is an
1876    /// empty union. This empty union should be replaced with the actual item
1877    /// when it is popped from the parser's stack.
1878    ///
1879    /// This assumes the parser is positioned at the opening `[` and advances
1880    /// the parser to the first non-special byte of the character class.
1881    ///
1882    /// An error is returned if EOF is found.
1883    #[inline(never)]
1884    fn parse_set_class_open(
1885        &self,
1886    ) -> Result<(ast::ClassBracketed, ast::ClassSetUnion)> {
1887        assert_eq!(self.char(), '[');
1888        let start = self.pos();
1889        if !self.bump_and_bump_space() {
1890            return Err(self.error(
1891                Span::new(start, self.pos()),
1892                ast::ErrorKind::ClassUnclosed,
1893            ));
1894        }
1895
1896        let negated = if self.char() != '^' {
1897            false
1898        } else {
1899            if !self.bump_and_bump_space() {
1900                return Err(self.error(
1901                    Span::new(start, self.pos()),
1902                    ast::ErrorKind::ClassUnclosed,
1903                ));
1904            }
1905            true
1906        };
1907        // Accept any number of `-` as literal `-`.
1908        let mut union =
1909            ast::ClassSetUnion { span: self.span(), items: vec![] };
1910        while self.char() == '-' {
1911            union.push(ast::ClassSetItem::Literal(ast::Literal {
1912                span: self.span_char(),
1913                kind: ast::LiteralKind::Verbatim,
1914                c: '-',
1915            }));
1916            if !self.bump_and_bump_space() {
1917                return Err(self.error(
1918                    Span::new(start, start),
1919                    ast::ErrorKind::ClassUnclosed,
1920                ));
1921            }
1922        }
1923        // If `]` is the *first* char in a set, then interpret it as a literal
1924        // `]`. That is, an empty class is impossible to write.
1925        if union.items.is_empty() && self.char() == ']' {
1926            union.push(ast::ClassSetItem::Literal(ast::Literal {
1927                span: self.span_char(),
1928                kind: ast::LiteralKind::Verbatim,
1929                c: ']',
1930            }));
1931            if !self.bump_and_bump_space() {
1932                return Err(self.error(
1933                    Span::new(start, self.pos()),
1934                    ast::ErrorKind::ClassUnclosed,
1935                ));
1936            }
1937        }
1938        let set = ast::ClassBracketed {
1939            span: Span::new(start, self.pos()),
1940            negated,
1941            kind: ast::ClassSet::union(ast::ClassSetUnion {
1942                span: Span::new(union.span.start, union.span.start),
1943                items: vec![],
1944            }),
1945        };
1946        Ok((set, union))
1947    }
1948
1949    /// Attempt to parse an ASCII character class, e.g., `[:alnum:]`.
1950    ///
1951    /// This assumes the parser is positioned at the opening `[`.
1952    ///
1953    /// If no valid ASCII character class could be found, then this does not
1954    /// advance the parser and `None` is returned. Otherwise, the parser is
1955    /// advanced to the first byte following the closing `]` and the
1956    /// corresponding ASCII class is returned.
1957    #[inline(never)]
1958    fn maybe_parse_ascii_class(&self) -> Option<ast::ClassAscii> {
1959        // ASCII character classes are interesting from a parsing perspective
1960        // because parsing cannot fail with any interesting error. For example,
1961        // in order to use an ASCII character class, it must be enclosed in
1962        // double brackets, e.g., `[[:alnum:]]`. Alternatively, you might think
1963        // of it as "ASCII character characters have the syntax `[:NAME:]`
1964        // which can only appear within character brackets." This means that
1965        // things like `[[:lower:]A]` are legal constructs.
1966        //
1967        // However, if one types an incorrect ASCII character class, e.g.,
1968        // `[[:loower:]]`, then we treat that as a normal nested character
1969        // class containing the characters `:elorw`. One might argue that we
1970        // should return an error instead since the repeated colons give away
1971        // the intent to write an ASCII class. But what if the user typed
1972        // `[[:lower]]` instead? How can we tell that was intended to be an
1973        // ASCII class and not just a normal nested class?
1974        //
1975        // Reasonable people can probably disagree over this, but for better
1976        // or worse, we implement semantics that never fails at the expense
1977        // of better failure modes.
1978        assert_eq!(self.char(), '[');
1979        // If parsing fails, then we back up the parser to this starting point.
1980        let start = self.pos();
1981        let mut negated = false;
1982        if !self.bump() || self.char() != ':' {
1983            self.parser().pos.set(start);
1984            return None;
1985        }
1986        if !self.bump() {
1987            self.parser().pos.set(start);
1988            return None;
1989        }
1990        if self.char() == '^' {
1991            negated = true;
1992            if !self.bump() {
1993                self.parser().pos.set(start);
1994                return None;
1995            }
1996        }
1997        let name_start = self.offset();
1998        while self.char() != ':' && self.bump() {}
1999        if self.is_eof() {
2000            self.parser().pos.set(start);
2001            return None;
2002        }
2003        let name = &self.pattern()[name_start..self.offset()];
2004        if !self.bump_if(":]") {
2005            self.parser().pos.set(start);
2006            return None;
2007        }
2008        let kind = match ast::ClassAsciiKind::from_name(name) {
2009            Some(kind) => kind,
2010            None => {
2011                self.parser().pos.set(start);
2012                return None;
2013            }
2014        };
2015        Some(ast::ClassAscii {
2016            span: Span::new(start, self.pos()),
2017            kind,
2018            negated,
2019        })
2020    }
2021
2022    /// Parse a Unicode class in either the single character notation, `\pN`
2023    /// or the multi-character bracketed notation, `\p{Greek}`. This assumes
2024    /// the parser is positioned at the `p` (or `P` for negation) and will
2025    /// advance the parser to the character immediately following the class.
2026    ///
2027    /// Note that this does not check whether the class name is valid or not.
2028    #[inline(never)]
2029    fn parse_unicode_class(&self) -> Result<ast::ClassUnicode> {
2030        assert!(self.char() == 'p' || self.char() == 'P');
2031
2032        let mut scratch = self.parser().scratch.borrow_mut();
2033        scratch.clear();
2034
2035        let negated = self.char() == 'P';
2036        if !self.bump_and_bump_space() {
2037            return Err(
2038                self.error(self.span(), ast::ErrorKind::EscapeUnexpectedEof)
2039            );
2040        }
2041        let (start, kind) = if self.char() == '{' {
2042            let start = self.span_char().end;
2043            while self.bump_and_bump_space() && self.char() != '}' {
2044                scratch.push(self.char());
2045            }
2046            if self.is_eof() {
2047                return Err(self
2048                    .error(self.span(), ast::ErrorKind::EscapeUnexpectedEof));
2049            }
2050            assert_eq!(self.char(), '}');
2051            self.bump();
2052
2053            let name = scratch.as_str();
2054            if let Some(i) = name.find("!=") {
2055                (
2056                    start,
2057                    ast::ClassUnicodeKind::NamedValue {
2058                        op: ast::ClassUnicodeOpKind::NotEqual,
2059                        name: name[..i].to_string(),
2060                        value: name[i + 2..].to_string(),
2061                    },
2062                )
2063            } else if let Some(i) = name.find(':') {
2064                (
2065                    start,
2066                    ast::ClassUnicodeKind::NamedValue {
2067                        op: ast::ClassUnicodeOpKind::Colon,
2068                        name: name[..i].to_string(),
2069                        value: name[i + 1..].to_string(),
2070                    },
2071                )
2072            } else if let Some(i) = name.find('=') {
2073                (
2074                    start,
2075                    ast::ClassUnicodeKind::NamedValue {
2076                        op: ast::ClassUnicodeOpKind::Equal,
2077                        name: name[..i].to_string(),
2078                        value: name[i + 1..].to_string(),
2079                    },
2080                )
2081            } else {
2082                (start, ast::ClassUnicodeKind::Named(name.to_string()))
2083            }
2084        } else {
2085            let start = self.pos();
2086            let c = self.char();
2087            if c == '\\' {
2088                return Err(self.error(
2089                    self.span_char(),
2090                    ast::ErrorKind::UnicodeClassInvalid,
2091                ));
2092            }
2093            self.bump_and_bump_space();
2094            let kind = ast::ClassUnicodeKind::OneLetter(c);
2095            (start, kind)
2096        };
2097        Ok(ast::ClassUnicode {
2098            span: Span::new(start, self.pos()),
2099            negated,
2100            kind,
2101        })
2102    }
2103
2104    /// Parse a Perl character class, e.g., `\d` or `\W`. This assumes the
2105    /// parser is currently at a valid character class name and will be
2106    /// advanced to the character immediately following the class.
2107    #[inline(never)]
2108    fn parse_perl_class(&self) -> ast::ClassPerl {
2109        let c = self.char();
2110        let span = self.span_char();
2111        self.bump();
2112        let (negated, kind) = match c {
2113            'd' => (false, ast::ClassPerlKind::Digit),
2114            'D' => (true, ast::ClassPerlKind::Digit),
2115            's' => (false, ast::ClassPerlKind::Space),
2116            'S' => (true, ast::ClassPerlKind::Space),
2117            'w' => (false, ast::ClassPerlKind::Word),
2118            'W' => (true, ast::ClassPerlKind::Word),
2119            c => panic!("expected valid Perl class but got '{}'", c),
2120        };
2121        ast::ClassPerl { span, kind, negated }
2122    }
2123}
2124
2125/// A type that traverses a fully parsed Ast and checks whether its depth
2126/// exceeds the specified nesting limit. If it does, then an error is returned.
2127#[derive(Debug)]
2128struct NestLimiter<'p, 's, P> {
2129    /// The parser that is checking the nest limit.
2130    p: &'p ParserI<'s, P>,
2131    /// The current depth while walking an Ast.
2132    depth: u32,
2133}
2134
2135impl<'p, 's, P: Borrow<Parser>> NestLimiter<'p, 's, P> {
2136    fn new(p: &'p ParserI<'s, P>) -> NestLimiter<'p, 's, P> {
2137        NestLimiter { p, depth: 0 }
2138    }
2139
2140    #[inline(never)]
2141    fn check(self, ast: &Ast) -> Result<()> {
2142        ast::visit(ast, self)
2143    }
2144
2145    fn increment_depth(&mut self, span: &Span) -> Result<()> {
2146        let new = self.depth.checked_add(1).ok_or_else(|| {
2147            self.p.error(
2148                span.clone(),
2149                ast::ErrorKind::NestLimitExceeded(::std::u32::MAX),
2150            )
2151        })?;
2152        let limit = self.p.parser().nest_limit;
2153        if new > limit {
2154            return Err(self.p.error(
2155                span.clone(),
2156                ast::ErrorKind::NestLimitExceeded(limit),
2157            ));
2158        }
2159        self.depth = new;
2160        Ok(())
2161    }
2162
2163    fn decrement_depth(&mut self) {
2164        // Assuming the correctness of the visitor, this should never drop
2165        // below 0.
2166        self.depth = self.depth.checked_sub(1).unwrap();
2167    }
2168}
2169
2170impl<'p, 's, P: Borrow<Parser>> ast::Visitor for NestLimiter<'p, 's, P> {
2171    type Output = ();
2172    type Err = ast::Error;
2173
2174    fn finish(self) -> Result<()> {
2175        Ok(())
2176    }
2177
2178    fn visit_pre(&mut self, ast: &Ast) -> Result<()> {
2179        let span = match *ast {
2180            Ast::Empty(_)
2181            | Ast::Flags(_)
2182            | Ast::Literal(_)
2183            | Ast::Dot(_)
2184            | Ast::Assertion(_)
2185            | Ast::Class(ast::Class::Unicode(_))
2186            | Ast::Class(ast::Class::Perl(_)) => {
2187                // These are all base cases, so we don't increment depth.
2188                return Ok(());
2189            }
2190            Ast::Class(ast::Class::Bracketed(ref x)) => &x.span,
2191            Ast::Repetition(ref x) => &x.span,
2192            Ast::Group(ref x) => &x.span,
2193            Ast::Alternation(ref x) => &x.span,
2194            Ast::Concat(ref x) => &x.span,
2195        };
2196        self.increment_depth(span)
2197    }
2198
2199    fn visit_post(&mut self, ast: &Ast) -> Result<()> {
2200        match *ast {
2201            Ast::Empty(_)
2202            | Ast::Flags(_)
2203            | Ast::Literal(_)
2204            | Ast::Dot(_)
2205            | Ast::Assertion(_)
2206            | Ast::Class(ast::Class::Unicode(_))
2207            | Ast::Class(ast::Class::Perl(_)) => {
2208                // These are all base cases, so we don't decrement depth.
2209                Ok(())
2210            }
2211            Ast::Class(ast::Class::Bracketed(_))
2212            | Ast::Repetition(_)
2213            | Ast::Group(_)
2214            | Ast::Alternation(_)
2215            | Ast::Concat(_) => {
2216                self.decrement_depth();
2217                Ok(())
2218            }
2219        }
2220    }
2221
2222    fn visit_class_set_item_pre(
2223        &mut self,
2224        ast: &ast::ClassSetItem,
2225    ) -> Result<()> {
2226        let span = match *ast {
2227            ast::ClassSetItem::Empty(_)
2228            | ast::ClassSetItem::Literal(_)
2229            | ast::ClassSetItem::Range(_)
2230            | ast::ClassSetItem::Ascii(_)
2231            | ast::ClassSetItem::Unicode(_)
2232            | ast::ClassSetItem::Perl(_) => {
2233                // These are all base cases, so we don't increment depth.
2234                return Ok(());
2235            }
2236            ast::ClassSetItem::Bracketed(ref x) => &x.span,
2237            ast::ClassSetItem::Union(ref x) => &x.span,
2238        };
2239        self.increment_depth(span)
2240    }
2241
2242    fn visit_class_set_item_post(
2243        &mut self,
2244        ast: &ast::ClassSetItem,
2245    ) -> Result<()> {
2246        match *ast {
2247            ast::ClassSetItem::Empty(_)
2248            | ast::ClassSetItem::Literal(_)
2249            | ast::ClassSetItem::Range(_)
2250            | ast::ClassSetItem::Ascii(_)
2251            | ast::ClassSetItem::Unicode(_)
2252            | ast::ClassSetItem::Perl(_) => {
2253                // These are all base cases, so we don't decrement depth.
2254                Ok(())
2255            }
2256            ast::ClassSetItem::Bracketed(_) | ast::ClassSetItem::Union(_) => {
2257                self.decrement_depth();
2258                Ok(())
2259            }
2260        }
2261    }
2262
2263    fn visit_class_set_binary_op_pre(
2264        &mut self,
2265        ast: &ast::ClassSetBinaryOp,
2266    ) -> Result<()> {
2267        self.increment_depth(&ast.span)
2268    }
2269
2270    fn visit_class_set_binary_op_post(
2271        &mut self,
2272        _ast: &ast::ClassSetBinaryOp,
2273    ) -> Result<()> {
2274        self.decrement_depth();
2275        Ok(())
2276    }
2277}
2278
2279/// When the result is an error, transforms the ast::ErrorKind from the source
2280/// Result into another one. This function is used to return clearer error
2281/// messages when possible.
2282fn specialize_err<T>(
2283    result: Result<T>,
2284    from: ast::ErrorKind,
2285    to: ast::ErrorKind,
2286) -> Result<T> {
2287    if let Err(e) = result {
2288        if e.kind == from {
2289            Err(ast::Error { kind: to, pattern: e.pattern, span: e.span })
2290        } else {
2291            Err(e)
2292        }
2293    } else {
2294        result
2295    }
2296}
2297
2298#[cfg(test)]
2299mod tests {
2300    use std::ops::Range;
2301
2302    use super::{Parser, ParserBuilder, ParserI, Primitive};
2303    use crate::ast::{self, Ast, Position, Span};
2304
2305    // Our own assert_eq, which has slightly better formatting (but honestly
2306    // still kind of crappy).
2307    macro_rules! assert_eq {
2308        ($left:expr, $right:expr) => {{
2309            match (&$left, &$right) {
2310                (left_val, right_val) => {
2311                    if !(*left_val == *right_val) {
2312                        panic!(
2313                            "assertion failed: `(left == right)`\n\n\
2314                             left:  `{:?}`\nright: `{:?}`\n\n",
2315                            left_val, right_val
2316                        )
2317                    }
2318                }
2319            }
2320        }};
2321    }
2322
2323    // We create these errors to compare with real ast::Errors in the tests.
2324    // We define equality between TestError and ast::Error to disregard the
2325    // pattern string in ast::Error, which is annoying to provide in tests.
2326    #[derive(Clone, Debug)]
2327    struct TestError {
2328        span: Span,
2329        kind: ast::ErrorKind,
2330    }
2331
2332    impl PartialEq<ast::Error> for TestError {
2333        fn eq(&self, other: &ast::Error) -> bool {
2334            self.span == other.span && self.kind == other.kind
2335        }
2336    }
2337
2338    impl PartialEq<TestError> for ast::Error {
2339        fn eq(&self, other: &TestError) -> bool {
2340            self.span == other.span && self.kind == other.kind
2341        }
2342    }
2343
2344    fn s(str: &str) -> String {
2345        str.to_string()
2346    }
2347
2348    fn parser(pattern: &str) -> ParserI<'_, Parser> {
2349        ParserI::new(Parser::new(), pattern)
2350    }
2351
2352    fn parser_octal(pattern: &str) -> ParserI<'_, Parser> {
2353        let parser = ParserBuilder::new().octal(true).build();
2354        ParserI::new(parser, pattern)
2355    }
2356
2357    fn parser_nest_limit(
2358        pattern: &str,
2359        nest_limit: u32,
2360    ) -> ParserI<'_, Parser> {
2361        let p = ParserBuilder::new().nest_limit(nest_limit).build();
2362        ParserI::new(p, pattern)
2363    }
2364
2365    fn parser_ignore_whitespace(pattern: &str) -> ParserI<'_, Parser> {
2366        let p = ParserBuilder::new().ignore_whitespace(true).build();
2367        ParserI::new(p, pattern)
2368    }
2369
2370    /// Short alias for creating a new span.
2371    fn nspan(start: Position, end: Position) -> Span {
2372        Span::new(start, end)
2373    }
2374
2375    /// Short alias for creating a new position.
2376    fn npos(offset: usize, line: usize, column: usize) -> Position {
2377        Position::new(offset, line, column)
2378    }
2379
2380    /// Create a new span from the given offset range. This assumes a single
2381    /// line and sets the columns based on the offsets. i.e., This only works
2382    /// out of the box for ASCII, which is fine for most tests.
2383    fn span(range: Range<usize>) -> Span {
2384        let start = Position::new(range.start, 1, range.start + 1);
2385        let end = Position::new(range.end, 1, range.end + 1);
2386        Span::new(start, end)
2387    }
2388
2389    /// Create a new span for the corresponding byte range in the given string.
2390    fn span_range(subject: &str, range: Range<usize>) -> Span {
2391        let start = Position {
2392            offset: range.start,
2393            line: 1 + subject[..range.start].matches('\n').count(),
2394            column: 1 + subject[..range.start]
2395                .chars()
2396                .rev()
2397                .position(|c| c == '\n')
2398                .unwrap_or(subject[..range.start].chars().count()),
2399        };
2400        let end = Position {
2401            offset: range.end,
2402            line: 1 + subject[..range.end].matches('\n').count(),
2403            column: 1 + subject[..range.end]
2404                .chars()
2405                .rev()
2406                .position(|c| c == '\n')
2407                .unwrap_or(subject[..range.end].chars().count()),
2408        };
2409        Span::new(start, end)
2410    }
2411
2412    /// Create a verbatim literal starting at the given position.
2413    fn lit(c: char, start: usize) -> Ast {
2414        lit_with(c, span(start..start + c.len_utf8()))
2415    }
2416
2417    /// Create a punctuation literal starting at the given position.
2418    fn punct_lit(c: char, span: Span) -> Ast {
2419        Ast::Literal(ast::Literal {
2420            span,
2421            kind: ast::LiteralKind::Punctuation,
2422            c,
2423        })
2424    }
2425
2426    /// Create a verbatim literal with the given span.
2427    fn lit_with(c: char, span: Span) -> Ast {
2428        Ast::Literal(ast::Literal {
2429            span,
2430            kind: ast::LiteralKind::Verbatim,
2431            c,
2432        })
2433    }
2434
2435    /// Create a concatenation with the given range.
2436    fn concat(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2437        concat_with(span(range), asts)
2438    }
2439
2440    /// Create a concatenation with the given span.
2441    fn concat_with(span: Span, asts: Vec<Ast>) -> Ast {
2442        Ast::Concat(ast::Concat { span, asts })
2443    }
2444
2445    /// Create an alternation with the given span.
2446    fn alt(range: Range<usize>, asts: Vec<Ast>) -> Ast {
2447        Ast::Alternation(ast::Alternation { span: span(range), asts })
2448    }
2449
2450    /// Create a capturing group with the given span.
2451    fn group(range: Range<usize>, index: u32, ast: Ast) -> Ast {
2452        Ast::Group(ast::Group {
2453            span: span(range),
2454            kind: ast::GroupKind::CaptureIndex(index),
2455            ast: Box::new(ast),
2456        })
2457    }
2458
2459    /// Create an ast::SetFlags.
2460    ///
2461    /// The given pattern should be the full pattern string. The range given
2462    /// should correspond to the byte offsets where the flag set occurs.
2463    ///
2464    /// If negated is true, then the set is interpreted as beginning with a
2465    /// negation.
2466    fn flag_set(
2467        pat: &str,
2468        range: Range<usize>,
2469        flag: ast::Flag,
2470        negated: bool,
2471    ) -> Ast {
2472        let mut items = vec![ast::FlagsItem {
2473            span: span_range(pat, (range.end - 2)..(range.end - 1)),
2474            kind: ast::FlagsItemKind::Flag(flag),
2475        }];
2476        if negated {
2477            items.insert(
2478                0,
2479                ast::FlagsItem {
2480                    span: span_range(pat, (range.start + 2)..(range.end - 2)),
2481                    kind: ast::FlagsItemKind::Negation,
2482                },
2483            );
2484        }
2485        Ast::Flags(ast::SetFlags {
2486            span: span_range(pat, range.clone()),
2487            flags: ast::Flags {
2488                span: span_range(pat, (range.start + 2)..(range.end - 1)),
2489                items,
2490            },
2491        })
2492    }
2493
2494    #[test]
2495    fn parse_nest_limit() {
2496        // A nest limit of 0 still allows some types of regexes.
2497        assert_eq!(
2498            parser_nest_limit("", 0).parse(),
2499            Ok(Ast::Empty(span(0..0)))
2500        );
2501        assert_eq!(parser_nest_limit("a", 0).parse(), Ok(lit('a', 0)));
2502
2503        // Test repetition operations, which require one level of nesting.
2504        assert_eq!(
2505            parser_nest_limit("a+", 0).parse().unwrap_err(),
2506            TestError {
2507                span: span(0..2),
2508                kind: ast::ErrorKind::NestLimitExceeded(0),
2509            }
2510        );
2511        assert_eq!(
2512            parser_nest_limit("a+", 1).parse(),
2513            Ok(Ast::Repetition(ast::Repetition {
2514                span: span(0..2),
2515                op: ast::RepetitionOp {
2516                    span: span(1..2),
2517                    kind: ast::RepetitionKind::OneOrMore,
2518                },
2519                greedy: true,
2520                ast: Box::new(lit('a', 0)),
2521            }))
2522        );
2523        assert_eq!(
2524            parser_nest_limit("(a)+", 1).parse().unwrap_err(),
2525            TestError {
2526                span: span(0..3),
2527                kind: ast::ErrorKind::NestLimitExceeded(1),
2528            }
2529        );
2530        assert_eq!(
2531            parser_nest_limit("a+*", 1).parse().unwrap_err(),
2532            TestError {
2533                span: span(0..2),
2534                kind: ast::ErrorKind::NestLimitExceeded(1),
2535            }
2536        );
2537        assert_eq!(
2538            parser_nest_limit("a+*", 2).parse(),
2539            Ok(Ast::Repetition(ast::Repetition {
2540                span: span(0..3),
2541                op: ast::RepetitionOp {
2542                    span: span(2..3),
2543                    kind: ast::RepetitionKind::ZeroOrMore,
2544                },
2545                greedy: true,
2546                ast: Box::new(Ast::Repetition(ast::Repetition {
2547                    span: span(0..2),
2548                    op: ast::RepetitionOp {
2549                        span: span(1..2),
2550                        kind: ast::RepetitionKind::OneOrMore,
2551                    },
2552                    greedy: true,
2553                    ast: Box::new(lit('a', 0)),
2554                })),
2555            }))
2556        );
2557
2558        // Test concatenations. A concatenation requires one level of nesting.
2559        assert_eq!(
2560            parser_nest_limit("ab", 0).parse().unwrap_err(),
2561            TestError {
2562                span: span(0..2),
2563                kind: ast::ErrorKind::NestLimitExceeded(0),
2564            }
2565        );
2566        assert_eq!(
2567            parser_nest_limit("ab", 1).parse(),
2568            Ok(concat(0..2, vec![lit('a', 0), lit('b', 1)]))
2569        );
2570        assert_eq!(
2571            parser_nest_limit("abc", 1).parse(),
2572            Ok(concat(0..3, vec![lit('a', 0), lit('b', 1), lit('c', 2)]))
2573        );
2574
2575        // Test alternations. An alternation requires one level of nesting.
2576        assert_eq!(
2577            parser_nest_limit("a|b", 0).parse().unwrap_err(),
2578            TestError {
2579                span: span(0..3),
2580                kind: ast::ErrorKind::NestLimitExceeded(0),
2581            }
2582        );
2583        assert_eq!(
2584            parser_nest_limit("a|b", 1).parse(),
2585            Ok(alt(0..3, vec![lit('a', 0), lit('b', 2)]))
2586        );
2587        assert_eq!(
2588            parser_nest_limit("a|b|c", 1).parse(),
2589            Ok(alt(0..5, vec![lit('a', 0), lit('b', 2), lit('c', 4)]))
2590        );
2591
2592        // Test character classes. Classes form their own mini-recursive
2593        // syntax!
2594        assert_eq!(
2595            parser_nest_limit("[a]", 0).parse().unwrap_err(),
2596            TestError {
2597                span: span(0..3),
2598                kind: ast::ErrorKind::NestLimitExceeded(0),
2599            }
2600        );
2601        assert_eq!(
2602            parser_nest_limit("[a]", 1).parse(),
2603            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
2604                span: span(0..3),
2605                negated: false,
2606                kind: ast::ClassSet::Item(ast::ClassSetItem::Literal(
2607                    ast::Literal {
2608                        span: span(1..2),
2609                        kind: ast::LiteralKind::Verbatim,
2610                        c: 'a',
2611                    }
2612                )),
2613            })))
2614        );
2615        assert_eq!(
2616            parser_nest_limit("[ab]", 1).parse().unwrap_err(),
2617            TestError {
2618                span: span(1..3),
2619                kind: ast::ErrorKind::NestLimitExceeded(1),
2620            }
2621        );
2622        assert_eq!(
2623            parser_nest_limit("[ab[cd]]", 2).parse().unwrap_err(),
2624            TestError {
2625                span: span(3..7),
2626                kind: ast::ErrorKind::NestLimitExceeded(2),
2627            }
2628        );
2629        assert_eq!(
2630            parser_nest_limit("[ab[cd]]", 3).parse().unwrap_err(),
2631            TestError {
2632                span: span(4..6),
2633                kind: ast::ErrorKind::NestLimitExceeded(3),
2634            }
2635        );
2636        assert_eq!(
2637            parser_nest_limit("[a--b]", 1).parse().unwrap_err(),
2638            TestError {
2639                span: span(1..5),
2640                kind: ast::ErrorKind::NestLimitExceeded(1),
2641            }
2642        );
2643        assert_eq!(
2644            parser_nest_limit("[a--bc]", 2).parse().unwrap_err(),
2645            TestError {
2646                span: span(4..6),
2647                kind: ast::ErrorKind::NestLimitExceeded(2),
2648            }
2649        );
2650    }
2651
2652    #[test]
2653    fn parse_comments() {
2654        let pat = "(?x)
2655# This is comment 1.
2656foo # This is comment 2.
2657  # This is comment 3.
2658bar
2659# This is comment 4.";
2660        let astc = parser(pat).parse_with_comments().unwrap();
2661        assert_eq!(
2662            astc.ast,
2663            concat_with(
2664                span_range(pat, 0..pat.len()),
2665                vec![
2666                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2667                    lit_with('f', span_range(pat, 26..27)),
2668                    lit_with('o', span_range(pat, 27..28)),
2669                    lit_with('o', span_range(pat, 28..29)),
2670                    lit_with('b', span_range(pat, 74..75)),
2671                    lit_with('a', span_range(pat, 75..76)),
2672                    lit_with('r', span_range(pat, 76..77)),
2673                ]
2674            )
2675        );
2676        assert_eq!(
2677            astc.comments,
2678            vec![
2679                ast::Comment {
2680                    span: span_range(pat, 5..26),
2681                    comment: s(" This is comment 1."),
2682                },
2683                ast::Comment {
2684                    span: span_range(pat, 30..51),
2685                    comment: s(" This is comment 2."),
2686                },
2687                ast::Comment {
2688                    span: span_range(pat, 53..74),
2689                    comment: s(" This is comment 3."),
2690                },
2691                ast::Comment {
2692                    span: span_range(pat, 78..98),
2693                    comment: s(" This is comment 4."),
2694                },
2695            ]
2696        );
2697    }
2698
2699    #[test]
2700    fn parse_holistic() {
2701        assert_eq!(parser("]").parse(), Ok(lit(']', 0)));
2702        assert_eq!(
2703            parser(r"\\\.\+\*\?\(\)\|\[\]\{\}\^\$\#\&\-\~").parse(),
2704            Ok(concat(
2705                0..36,
2706                vec![
2707                    punct_lit('\\', span(0..2)),
2708                    punct_lit('.', span(2..4)),
2709                    punct_lit('+', span(4..6)),
2710                    punct_lit('*', span(6..8)),
2711                    punct_lit('?', span(8..10)),
2712                    punct_lit('(', span(10..12)),
2713                    punct_lit(')', span(12..14)),
2714                    punct_lit('|', span(14..16)),
2715                    punct_lit('[', span(16..18)),
2716                    punct_lit(']', span(18..20)),
2717                    punct_lit('{', span(20..22)),
2718                    punct_lit('}', span(22..24)),
2719                    punct_lit('^', span(24..26)),
2720                    punct_lit('$', span(26..28)),
2721                    punct_lit('#', span(28..30)),
2722                    punct_lit('&', span(30..32)),
2723                    punct_lit('-', span(32..34)),
2724                    punct_lit('~', span(34..36)),
2725                ]
2726            ))
2727        );
2728    }
2729
2730    #[test]
2731    fn parse_ignore_whitespace() {
2732        // Test that basic whitespace insensitivity works.
2733        let pat = "(?x)a b";
2734        assert_eq!(
2735            parser(pat).parse(),
2736            Ok(concat_with(
2737                nspan(npos(0, 1, 1), npos(7, 1, 8)),
2738                vec![
2739                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2740                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2741                    lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2742                ]
2743            ))
2744        );
2745
2746        // Test that we can toggle whitespace insensitivity.
2747        let pat = "(?x)a b(?-x)a b";
2748        assert_eq!(
2749            parser(pat).parse(),
2750            Ok(concat_with(
2751                nspan(npos(0, 1, 1), npos(15, 1, 16)),
2752                vec![
2753                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2754                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2755                    lit_with('b', nspan(npos(6, 1, 7), npos(7, 1, 8))),
2756                    flag_set(pat, 7..12, ast::Flag::IgnoreWhitespace, true),
2757                    lit_with('a', nspan(npos(12, 1, 13), npos(13, 1, 14))),
2758                    lit_with(' ', nspan(npos(13, 1, 14), npos(14, 1, 15))),
2759                    lit_with('b', nspan(npos(14, 1, 15), npos(15, 1, 16))),
2760                ]
2761            ))
2762        );
2763
2764        // Test that nesting whitespace insensitive flags works.
2765        let pat = "a (?x:a )a ";
2766        assert_eq!(
2767            parser(pat).parse(),
2768            Ok(concat_with(
2769                span_range(pat, 0..11),
2770                vec![
2771                    lit_with('a', span_range(pat, 0..1)),
2772                    lit_with(' ', span_range(pat, 1..2)),
2773                    Ast::Group(ast::Group {
2774                        span: span_range(pat, 2..9),
2775                        kind: ast::GroupKind::NonCapturing(ast::Flags {
2776                            span: span_range(pat, 4..5),
2777                            items: vec![ast::FlagsItem {
2778                                span: span_range(pat, 4..5),
2779                                kind: ast::FlagsItemKind::Flag(
2780                                    ast::Flag::IgnoreWhitespace
2781                                ),
2782                            },],
2783                        }),
2784                        ast: Box::new(lit_with('a', span_range(pat, 6..7))),
2785                    }),
2786                    lit_with('a', span_range(pat, 9..10)),
2787                    lit_with(' ', span_range(pat, 10..11)),
2788                ]
2789            ))
2790        );
2791
2792        // Test that whitespace after an opening paren is insignificant.
2793        let pat = "(?x)( ?P<foo> a )";
2794        assert_eq!(
2795            parser(pat).parse(),
2796            Ok(concat_with(
2797                span_range(pat, 0..pat.len()),
2798                vec![
2799                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2800                    Ast::Group(ast::Group {
2801                        span: span_range(pat, 4..pat.len()),
2802                        kind: ast::GroupKind::CaptureName(ast::CaptureName {
2803                            span: span_range(pat, 9..12),
2804                            name: s("foo"),
2805                            index: 1,
2806                        }),
2807                        ast: Box::new(lit_with('a', span_range(pat, 14..15))),
2808                    }),
2809                ]
2810            ))
2811        );
2812        let pat = "(?x)(  a )";
2813        assert_eq!(
2814            parser(pat).parse(),
2815            Ok(concat_with(
2816                span_range(pat, 0..pat.len()),
2817                vec![
2818                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2819                    Ast::Group(ast::Group {
2820                        span: span_range(pat, 4..pat.len()),
2821                        kind: ast::GroupKind::CaptureIndex(1),
2822                        ast: Box::new(lit_with('a', span_range(pat, 7..8))),
2823                    }),
2824                ]
2825            ))
2826        );
2827        let pat = "(?x)(  ?:  a )";
2828        assert_eq!(
2829            parser(pat).parse(),
2830            Ok(concat_with(
2831                span_range(pat, 0..pat.len()),
2832                vec![
2833                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2834                    Ast::Group(ast::Group {
2835                        span: span_range(pat, 4..pat.len()),
2836                        kind: ast::GroupKind::NonCapturing(ast::Flags {
2837                            span: span_range(pat, 8..8),
2838                            items: vec![],
2839                        }),
2840                        ast: Box::new(lit_with('a', span_range(pat, 11..12))),
2841                    }),
2842                ]
2843            ))
2844        );
2845        let pat = r"(?x)\x { 53 }";
2846        assert_eq!(
2847            parser(pat).parse(),
2848            Ok(concat_with(
2849                span_range(pat, 0..pat.len()),
2850                vec![
2851                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2852                    Ast::Literal(ast::Literal {
2853                        span: span(4..13),
2854                        kind: ast::LiteralKind::HexBrace(
2855                            ast::HexLiteralKind::X
2856                        ),
2857                        c: 'S',
2858                    }),
2859                ]
2860            ))
2861        );
2862
2863        // Test that whitespace after an escape is OK.
2864        let pat = r"(?x)\ ";
2865        assert_eq!(
2866            parser(pat).parse(),
2867            Ok(concat_with(
2868                span_range(pat, 0..pat.len()),
2869                vec![
2870                    flag_set(pat, 0..4, ast::Flag::IgnoreWhitespace, false),
2871                    Ast::Literal(ast::Literal {
2872                        span: span_range(pat, 4..6),
2873                        kind: ast::LiteralKind::Special(
2874                            ast::SpecialLiteralKind::Space
2875                        ),
2876                        c: ' ',
2877                    }),
2878                ]
2879            ))
2880        );
2881        // ... but only when `x` mode is enabled.
2882        let pat = r"\ ";
2883        assert_eq!(
2884            parser(pat).parse().unwrap_err(),
2885            TestError {
2886                span: span_range(pat, 0..2),
2887                kind: ast::ErrorKind::EscapeUnrecognized,
2888            }
2889        );
2890    }
2891
2892    #[test]
2893    fn parse_newlines() {
2894        let pat = ".\n.";
2895        assert_eq!(
2896            parser(pat).parse(),
2897            Ok(concat_with(
2898                span_range(pat, 0..3),
2899                vec![
2900                    Ast::Dot(span_range(pat, 0..1)),
2901                    lit_with('\n', span_range(pat, 1..2)),
2902                    Ast::Dot(span_range(pat, 2..3)),
2903                ]
2904            ))
2905        );
2906
2907        let pat = "foobar\nbaz\nquux\n";
2908        assert_eq!(
2909            parser(pat).parse(),
2910            Ok(concat_with(
2911                span_range(pat, 0..pat.len()),
2912                vec![
2913                    lit_with('f', nspan(npos(0, 1, 1), npos(1, 1, 2))),
2914                    lit_with('o', nspan(npos(1, 1, 2), npos(2, 1, 3))),
2915                    lit_with('o', nspan(npos(2, 1, 3), npos(3, 1, 4))),
2916                    lit_with('b', nspan(npos(3, 1, 4), npos(4, 1, 5))),
2917                    lit_with('a', nspan(npos(4, 1, 5), npos(5, 1, 6))),
2918                    lit_with('r', nspan(npos(5, 1, 6), npos(6, 1, 7))),
2919                    lit_with('\n', nspan(npos(6, 1, 7), npos(7, 2, 1))),
2920                    lit_with('b', nspan(npos(7, 2, 1), npos(8, 2, 2))),
2921                    lit_with('a', nspan(npos(8, 2, 2), npos(9, 2, 3))),
2922                    lit_with('z', nspan(npos(9, 2, 3), npos(10, 2, 4))),
2923                    lit_with('\n', nspan(npos(10, 2, 4), npos(11, 3, 1))),
2924                    lit_with('q', nspan(npos(11, 3, 1), npos(12, 3, 2))),
2925                    lit_with('u', nspan(npos(12, 3, 2), npos(13, 3, 3))),
2926                    lit_with('u', nspan(npos(13, 3, 3), npos(14, 3, 4))),
2927                    lit_with('x', nspan(npos(14, 3, 4), npos(15, 3, 5))),
2928                    lit_with('\n', nspan(npos(15, 3, 5), npos(16, 4, 1))),
2929                ]
2930            ))
2931        );
2932    }
2933
2934    #[test]
2935    fn parse_uncounted_repetition() {
2936        assert_eq!(
2937            parser(r"a*").parse(),
2938            Ok(Ast::Repetition(ast::Repetition {
2939                span: span(0..2),
2940                op: ast::RepetitionOp {
2941                    span: span(1..2),
2942                    kind: ast::RepetitionKind::ZeroOrMore,
2943                },
2944                greedy: true,
2945                ast: Box::new(lit('a', 0)),
2946            }))
2947        );
2948        assert_eq!(
2949            parser(r"a+").parse(),
2950            Ok(Ast::Repetition(ast::Repetition {
2951                span: span(0..2),
2952                op: ast::RepetitionOp {
2953                    span: span(1..2),
2954                    kind: ast::RepetitionKind::OneOrMore,
2955                },
2956                greedy: true,
2957                ast: Box::new(lit('a', 0)),
2958            }))
2959        );
2960
2961        assert_eq!(
2962            parser(r"a?").parse(),
2963            Ok(Ast::Repetition(ast::Repetition {
2964                span: span(0..2),
2965                op: ast::RepetitionOp {
2966                    span: span(1..2),
2967                    kind: ast::RepetitionKind::ZeroOrOne,
2968                },
2969                greedy: true,
2970                ast: Box::new(lit('a', 0)),
2971            }))
2972        );
2973        assert_eq!(
2974            parser(r"a??").parse(),
2975            Ok(Ast::Repetition(ast::Repetition {
2976                span: span(0..3),
2977                op: ast::RepetitionOp {
2978                    span: span(1..3),
2979                    kind: ast::RepetitionKind::ZeroOrOne,
2980                },
2981                greedy: false,
2982                ast: Box::new(lit('a', 0)),
2983            }))
2984        );
2985        assert_eq!(
2986            parser(r"a?").parse(),
2987            Ok(Ast::Repetition(ast::Repetition {
2988                span: span(0..2),
2989                op: ast::RepetitionOp {
2990                    span: span(1..2),
2991                    kind: ast::RepetitionKind::ZeroOrOne,
2992                },
2993                greedy: true,
2994                ast: Box::new(lit('a', 0)),
2995            }))
2996        );
2997        assert_eq!(
2998            parser(r"a?b").parse(),
2999            Ok(concat(
3000                0..3,
3001                vec![
3002                    Ast::Repetition(ast::Repetition {
3003                        span: span(0..2),
3004                        op: ast::RepetitionOp {
3005                            span: span(1..2),
3006                            kind: ast::RepetitionKind::ZeroOrOne,
3007                        },
3008                        greedy: true,
3009                        ast: Box::new(lit('a', 0)),
3010                    }),
3011                    lit('b', 2),
3012                ]
3013            ))
3014        );
3015        assert_eq!(
3016            parser(r"a??b").parse(),
3017            Ok(concat(
3018                0..4,
3019                vec![
3020                    Ast::Repetition(ast::Repetition {
3021                        span: span(0..3),
3022                        op: ast::RepetitionOp {
3023                            span: span(1..3),
3024                            kind: ast::RepetitionKind::ZeroOrOne,
3025                        },
3026                        greedy: false,
3027                        ast: Box::new(lit('a', 0)),
3028                    }),
3029                    lit('b', 3),
3030                ]
3031            ))
3032        );
3033        assert_eq!(
3034            parser(r"ab?").parse(),
3035            Ok(concat(
3036                0..3,
3037                vec![
3038                    lit('a', 0),
3039                    Ast::Repetition(ast::Repetition {
3040                        span: span(1..3),
3041                        op: ast::RepetitionOp {
3042                            span: span(2..3),
3043                            kind: ast::RepetitionKind::ZeroOrOne,
3044                        },
3045                        greedy: true,
3046                        ast: Box::new(lit('b', 1)),
3047                    }),
3048                ]
3049            ))
3050        );
3051        assert_eq!(
3052            parser(r"(ab)?").parse(),
3053            Ok(Ast::Repetition(ast::Repetition {
3054                span: span(0..5),
3055                op: ast::RepetitionOp {
3056                    span: span(4..5),
3057                    kind: ast::RepetitionKind::ZeroOrOne,
3058                },
3059                greedy: true,
3060                ast: Box::new(group(
3061                    0..4,
3062                    1,
3063                    concat(1..3, vec![lit('a', 1), lit('b', 2),])
3064                )),
3065            }))
3066        );
3067        assert_eq!(
3068            parser(r"|a?").parse(),
3069            Ok(alt(
3070                0..3,
3071                vec![
3072                    Ast::Empty(span(0..0)),
3073                    Ast::Repetition(ast::Repetition {
3074                        span: span(1..3),
3075                        op: ast::RepetitionOp {
3076                            span: span(2..3),
3077                            kind: ast::RepetitionKind::ZeroOrOne,
3078                        },
3079                        greedy: true,
3080                        ast: Box::new(lit('a', 1)),
3081                    }),
3082                ]
3083            ))
3084        );
3085
3086        assert_eq!(
3087            parser(r"*").parse().unwrap_err(),
3088            TestError {
3089                span: span(0..0),
3090                kind: ast::ErrorKind::RepetitionMissing,
3091            }
3092        );
3093        assert_eq!(
3094            parser(r"(?i)*").parse().unwrap_err(),
3095            TestError {
3096                span: span(4..4),
3097                kind: ast::ErrorKind::RepetitionMissing,
3098            }
3099        );
3100        assert_eq!(
3101            parser(r"(*)").parse().unwrap_err(),
3102            TestError {
3103                span: span(1..1),
3104                kind: ast::ErrorKind::RepetitionMissing,
3105            }
3106        );
3107        assert_eq!(
3108            parser(r"(?:?)").parse().unwrap_err(),
3109            TestError {
3110                span: span(3..3),
3111                kind: ast::ErrorKind::RepetitionMissing,
3112            }
3113        );
3114        assert_eq!(
3115            parser(r"+").parse().unwrap_err(),
3116            TestError {
3117                span: span(0..0),
3118                kind: ast::ErrorKind::RepetitionMissing,
3119            }
3120        );
3121        assert_eq!(
3122            parser(r"?").parse().unwrap_err(),
3123            TestError {
3124                span: span(0..0),
3125                kind: ast::ErrorKind::RepetitionMissing,
3126            }
3127        );
3128        assert_eq!(
3129            parser(r"(?)").parse().unwrap_err(),
3130            TestError {
3131                span: span(1..1),
3132                kind: ast::ErrorKind::RepetitionMissing,
3133            }
3134        );
3135        assert_eq!(
3136            parser(r"|*").parse().unwrap_err(),
3137            TestError {
3138                span: span(1..1),
3139                kind: ast::ErrorKind::RepetitionMissing,
3140            }
3141        );
3142        assert_eq!(
3143            parser(r"|+").parse().unwrap_err(),
3144            TestError {
3145                span: span(1..1),
3146                kind: ast::ErrorKind::RepetitionMissing,
3147            }
3148        );
3149        assert_eq!(
3150            parser(r"|?").parse().unwrap_err(),
3151            TestError {
3152                span: span(1..1),
3153                kind: ast::ErrorKind::RepetitionMissing,
3154            }
3155        );
3156    }
3157
3158    #[test]
3159    fn parse_counted_repetition() {
3160        assert_eq!(
3161            parser(r"a{5}").parse(),
3162            Ok(Ast::Repetition(ast::Repetition {
3163                span: span(0..4),
3164                op: ast::RepetitionOp {
3165                    span: span(1..4),
3166                    kind: ast::RepetitionKind::Range(
3167                        ast::RepetitionRange::Exactly(5)
3168                    ),
3169                },
3170                greedy: true,
3171                ast: Box::new(lit('a', 0)),
3172            }))
3173        );
3174        assert_eq!(
3175            parser(r"a{5,}").parse(),
3176            Ok(Ast::Repetition(ast::Repetition {
3177                span: span(0..5),
3178                op: ast::RepetitionOp {
3179                    span: span(1..5),
3180                    kind: ast::RepetitionKind::Range(
3181                        ast::RepetitionRange::AtLeast(5)
3182                    ),
3183                },
3184                greedy: true,
3185                ast: Box::new(lit('a', 0)),
3186            }))
3187        );
3188        assert_eq!(
3189            parser(r"a{5,9}").parse(),
3190            Ok(Ast::Repetition(ast::Repetition {
3191                span: span(0..6),
3192                op: ast::RepetitionOp {
3193                    span: span(1..6),
3194                    kind: ast::RepetitionKind::Range(
3195                        ast::RepetitionRange::Bounded(5, 9)
3196                    ),
3197                },
3198                greedy: true,
3199                ast: Box::new(lit('a', 0)),
3200            }))
3201        );
3202        assert_eq!(
3203            parser(r"a{5}?").parse(),
3204            Ok(Ast::Repetition(ast::Repetition {
3205                span: span(0..5),
3206                op: ast::RepetitionOp {
3207                    span: span(1..5),
3208                    kind: ast::RepetitionKind::Range(
3209                        ast::RepetitionRange::Exactly(5)
3210                    ),
3211                },
3212                greedy: false,
3213                ast: Box::new(lit('a', 0)),
3214            }))
3215        );
3216        assert_eq!(
3217            parser(r"ab{5}").parse(),
3218            Ok(concat(
3219                0..5,
3220                vec![
3221                    lit('a', 0),
3222                    Ast::Repetition(ast::Repetition {
3223                        span: span(1..5),
3224                        op: ast::RepetitionOp {
3225                            span: span(2..5),
3226                            kind: ast::RepetitionKind::Range(
3227                                ast::RepetitionRange::Exactly(5)
3228                            ),
3229                        },
3230                        greedy: true,
3231                        ast: Box::new(lit('b', 1)),
3232                    }),
3233                ]
3234            ))
3235        );
3236        assert_eq!(
3237            parser(r"ab{5}c").parse(),
3238            Ok(concat(
3239                0..6,
3240                vec![
3241                    lit('a', 0),
3242                    Ast::Repetition(ast::Repetition {
3243                        span: span(1..5),
3244                        op: ast::RepetitionOp {
3245                            span: span(2..5),
3246                            kind: ast::RepetitionKind::Range(
3247                                ast::RepetitionRange::Exactly(5)
3248                            ),
3249                        },
3250                        greedy: true,
3251                        ast: Box::new(lit('b', 1)),
3252                    }),
3253                    lit('c', 5),
3254                ]
3255            ))
3256        );
3257
3258        assert_eq!(
3259            parser(r"a{ 5 }").parse(),
3260            Ok(Ast::Repetition(ast::Repetition {
3261                span: span(0..6),
3262                op: ast::RepetitionOp {
3263                    span: span(1..6),
3264                    kind: ast::RepetitionKind::Range(
3265                        ast::RepetitionRange::Exactly(5)
3266                    ),
3267                },
3268                greedy: true,
3269                ast: Box::new(lit('a', 0)),
3270            }))
3271        );
3272        assert_eq!(
3273            parser(r"a{ 5 , 9 }").parse(),
3274            Ok(Ast::Repetition(ast::Repetition {
3275                span: span(0..10),
3276                op: ast::RepetitionOp {
3277                    span: span(1..10),
3278                    kind: ast::RepetitionKind::Range(
3279                        ast::RepetitionRange::Bounded(5, 9)
3280                    ),
3281                },
3282                greedy: true,
3283                ast: Box::new(lit('a', 0)),
3284            }))
3285        );
3286        assert_eq!(
3287            parser_ignore_whitespace(r"a{5,9} ?").parse(),
3288            Ok(Ast::Repetition(ast::Repetition {
3289                span: span(0..8),
3290                op: ast::RepetitionOp {
3291                    span: span(1..8),
3292                    kind: ast::RepetitionKind::Range(
3293                        ast::RepetitionRange::Bounded(5, 9)
3294                    ),
3295                },
3296                greedy: false,
3297                ast: Box::new(lit('a', 0)),
3298            }))
3299        );
3300
3301        assert_eq!(
3302            parser(r"(?i){0}").parse().unwrap_err(),
3303            TestError {
3304                span: span(4..4),
3305                kind: ast::ErrorKind::RepetitionMissing,
3306            }
3307        );
3308        assert_eq!(
3309            parser(r"(?m){1,1}").parse().unwrap_err(),
3310            TestError {
3311                span: span(4..4),
3312                kind: ast::ErrorKind::RepetitionMissing,
3313            }
3314        );
3315        assert_eq!(
3316            parser(r"a{]}").parse().unwrap_err(),
3317            TestError {
3318                span: span(2..2),
3319                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3320            }
3321        );
3322        assert_eq!(
3323            parser(r"a{1,]}").parse().unwrap_err(),
3324            TestError {
3325                span: span(4..4),
3326                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3327            }
3328        );
3329        assert_eq!(
3330            parser(r"a{").parse().unwrap_err(),
3331            TestError {
3332                span: span(1..2),
3333                kind: ast::ErrorKind::RepetitionCountUnclosed,
3334            }
3335        );
3336        assert_eq!(
3337            parser(r"a{}").parse().unwrap_err(),
3338            TestError {
3339                span: span(2..2),
3340                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3341            }
3342        );
3343        assert_eq!(
3344            parser(r"a{a").parse().unwrap_err(),
3345            TestError {
3346                span: span(2..2),
3347                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3348            }
3349        );
3350        assert_eq!(
3351            parser(r"a{9999999999}").parse().unwrap_err(),
3352            TestError {
3353                span: span(2..12),
3354                kind: ast::ErrorKind::DecimalInvalid,
3355            }
3356        );
3357        assert_eq!(
3358            parser(r"a{9").parse().unwrap_err(),
3359            TestError {
3360                span: span(1..3),
3361                kind: ast::ErrorKind::RepetitionCountUnclosed,
3362            }
3363        );
3364        assert_eq!(
3365            parser(r"a{9,a").parse().unwrap_err(),
3366            TestError {
3367                span: span(4..4),
3368                kind: ast::ErrorKind::RepetitionCountDecimalEmpty,
3369            }
3370        );
3371        assert_eq!(
3372            parser(r"a{9,9999999999}").parse().unwrap_err(),
3373            TestError {
3374                span: span(4..14),
3375                kind: ast::ErrorKind::DecimalInvalid,
3376            }
3377        );
3378        assert_eq!(
3379            parser(r"a{9,").parse().unwrap_err(),
3380            TestError {
3381                span: span(1..4),
3382                kind: ast::ErrorKind::RepetitionCountUnclosed,
3383            }
3384        );
3385        assert_eq!(
3386            parser(r"a{9,11").parse().unwrap_err(),
3387            TestError {
3388                span: span(1..6),
3389                kind: ast::ErrorKind::RepetitionCountUnclosed,
3390            }
3391        );
3392        assert_eq!(
3393            parser(r"a{2,1}").parse().unwrap_err(),
3394            TestError {
3395                span: span(1..6),
3396                kind: ast::ErrorKind::RepetitionCountInvalid,
3397            }
3398        );
3399        assert_eq!(
3400            parser(r"{5}").parse().unwrap_err(),
3401            TestError {
3402                span: span(0..0),
3403                kind: ast::ErrorKind::RepetitionMissing,
3404            }
3405        );
3406        assert_eq!(
3407            parser(r"|{5}").parse().unwrap_err(),
3408            TestError {
3409                span: span(1..1),
3410                kind: ast::ErrorKind::RepetitionMissing,
3411            }
3412        );
3413    }
3414
3415    #[test]
3416    fn parse_alternate() {
3417        assert_eq!(
3418            parser(r"a|b").parse(),
3419            Ok(Ast::Alternation(ast::Alternation {
3420                span: span(0..3),
3421                asts: vec![lit('a', 0), lit('b', 2)],
3422            }))
3423        );
3424        assert_eq!(
3425            parser(r"(a|b)").parse(),
3426            Ok(group(
3427                0..5,
3428                1,
3429                Ast::Alternation(ast::Alternation {
3430                    span: span(1..4),
3431                    asts: vec![lit('a', 1), lit('b', 3)],
3432                })
3433            ))
3434        );
3435
3436        assert_eq!(
3437            parser(r"a|b|c").parse(),
3438            Ok(Ast::Alternation(ast::Alternation {
3439                span: span(0..5),
3440                asts: vec![lit('a', 0), lit('b', 2), lit('c', 4)],
3441            }))
3442        );
3443        assert_eq!(
3444            parser(r"ax|by|cz").parse(),
3445            Ok(Ast::Alternation(ast::Alternation {
3446                span: span(0..8),
3447                asts: vec![
3448                    concat(0..2, vec![lit('a', 0), lit('x', 1)]),
3449                    concat(3..5, vec![lit('b', 3), lit('y', 4)]),
3450                    concat(6..8, vec![lit('c', 6), lit('z', 7)]),
3451                ],
3452            }))
3453        );
3454        assert_eq!(
3455            parser(r"(ax|by|cz)").parse(),
3456            Ok(group(
3457                0..10,
3458                1,
3459                Ast::Alternation(ast::Alternation {
3460                    span: span(1..9),
3461                    asts: vec![
3462                        concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3463                        concat(4..6, vec![lit('b', 4), lit('y', 5)]),
3464                        concat(7..9, vec![lit('c', 7), lit('z', 8)]),
3465                    ],
3466                })
3467            ))
3468        );
3469        assert_eq!(
3470            parser(r"(ax|(by|(cz)))").parse(),
3471            Ok(group(
3472                0..14,
3473                1,
3474                alt(
3475                    1..13,
3476                    vec![
3477                        concat(1..3, vec![lit('a', 1), lit('x', 2)]),
3478                        group(
3479                            4..13,
3480                            2,
3481                            alt(
3482                                5..12,
3483                                vec![
3484                                    concat(
3485                                        5..7,
3486                                        vec![lit('b', 5), lit('y', 6)]
3487                                    ),
3488                                    group(
3489                                        8..12,
3490                                        3,
3491                                        concat(
3492                                            9..11,
3493                                            vec![lit('c', 9), lit('z', 10),]
3494                                        )
3495                                    ),
3496                                ]
3497                            )
3498                        ),
3499                    ]
3500                )
3501            ))
3502        );
3503
3504        assert_eq!(
3505            parser(r"|").parse(),
3506            Ok(alt(
3507                0..1,
3508                vec![Ast::Empty(span(0..0)), Ast::Empty(span(1..1)),]
3509            ))
3510        );
3511        assert_eq!(
3512            parser(r"||").parse(),
3513            Ok(alt(
3514                0..2,
3515                vec![
3516                    Ast::Empty(span(0..0)),
3517                    Ast::Empty(span(1..1)),
3518                    Ast::Empty(span(2..2)),
3519                ]
3520            ))
3521        );
3522        assert_eq!(
3523            parser(r"a|").parse(),
3524            Ok(alt(0..2, vec![lit('a', 0), Ast::Empty(span(2..2)),]))
3525        );
3526        assert_eq!(
3527            parser(r"|a").parse(),
3528            Ok(alt(0..2, vec![Ast::Empty(span(0..0)), lit('a', 1),]))
3529        );
3530
3531        assert_eq!(
3532            parser(r"(|)").parse(),
3533            Ok(group(
3534                0..3,
3535                1,
3536                alt(
3537                    1..2,
3538                    vec![Ast::Empty(span(1..1)), Ast::Empty(span(2..2)),]
3539                )
3540            ))
3541        );
3542        assert_eq!(
3543            parser(r"(a|)").parse(),
3544            Ok(group(
3545                0..4,
3546                1,
3547                alt(1..3, vec![lit('a', 1), Ast::Empty(span(3..3)),])
3548            ))
3549        );
3550        assert_eq!(
3551            parser(r"(|a)").parse(),
3552            Ok(group(
3553                0..4,
3554                1,
3555                alt(1..3, vec![Ast::Empty(span(1..1)), lit('a', 2),])
3556            ))
3557        );
3558
3559        assert_eq!(
3560            parser(r"a|b)").parse().unwrap_err(),
3561            TestError {
3562                span: span(3..4),
3563                kind: ast::ErrorKind::GroupUnopened,
3564            }
3565        );
3566        assert_eq!(
3567            parser(r"(a|b").parse().unwrap_err(),
3568            TestError {
3569                span: span(0..1),
3570                kind: ast::ErrorKind::GroupUnclosed,
3571            }
3572        );
3573    }
3574
3575    #[test]
3576    fn parse_unsupported_lookaround() {
3577        assert_eq!(
3578            parser(r"(?=a)").parse().unwrap_err(),
3579            TestError {
3580                span: span(0..3),
3581                kind: ast::ErrorKind::UnsupportedLookAround,
3582            }
3583        );
3584        assert_eq!(
3585            parser(r"(?!a)").parse().unwrap_err(),
3586            TestError {
3587                span: span(0..3),
3588                kind: ast::ErrorKind::UnsupportedLookAround,
3589            }
3590        );
3591        assert_eq!(
3592            parser(r"(?<=a)").parse().unwrap_err(),
3593            TestError {
3594                span: span(0..4),
3595                kind: ast::ErrorKind::UnsupportedLookAround,
3596            }
3597        );
3598        assert_eq!(
3599            parser(r"(?<!a)").parse().unwrap_err(),
3600            TestError {
3601                span: span(0..4),
3602                kind: ast::ErrorKind::UnsupportedLookAround,
3603            }
3604        );
3605    }
3606
3607    #[test]
3608    fn parse_group() {
3609        assert_eq!(
3610            parser("(?i)").parse(),
3611            Ok(Ast::Flags(ast::SetFlags {
3612                span: span(0..4),
3613                flags: ast::Flags {
3614                    span: span(2..3),
3615                    items: vec![ast::FlagsItem {
3616                        span: span(2..3),
3617                        kind: ast::FlagsItemKind::Flag(
3618                            ast::Flag::CaseInsensitive
3619                        ),
3620                    }],
3621                },
3622            }))
3623        );
3624        assert_eq!(
3625            parser("(?iU)").parse(),
3626            Ok(Ast::Flags(ast::SetFlags {
3627                span: span(0..5),
3628                flags: ast::Flags {
3629                    span: span(2..4),
3630                    items: vec![
3631                        ast::FlagsItem {
3632                            span: span(2..3),
3633                            kind: ast::FlagsItemKind::Flag(
3634                                ast::Flag::CaseInsensitive
3635                            ),
3636                        },
3637                        ast::FlagsItem {
3638                            span: span(3..4),
3639                            kind: ast::FlagsItemKind::Flag(
3640                                ast::Flag::SwapGreed
3641                            ),
3642                        },
3643                    ],
3644                },
3645            }))
3646        );
3647        assert_eq!(
3648            parser("(?i-U)").parse(),
3649            Ok(Ast::Flags(ast::SetFlags {
3650                span: span(0..6),
3651                flags: ast::Flags {
3652                    span: span(2..5),
3653                    items: vec![
3654                        ast::FlagsItem {
3655                            span: span(2..3),
3656                            kind: ast::FlagsItemKind::Flag(
3657                                ast::Flag::CaseInsensitive
3658                            ),
3659                        },
3660                        ast::FlagsItem {
3661                            span: span(3..4),
3662                            kind: ast::FlagsItemKind::Negation,
3663                        },
3664                        ast::FlagsItem {
3665                            span: span(4..5),
3666                            kind: ast::FlagsItemKind::Flag(
3667                                ast::Flag::SwapGreed
3668                            ),
3669                        },
3670                    ],
3671                },
3672            }))
3673        );
3674
3675        assert_eq!(
3676            parser("()").parse(),
3677            Ok(Ast::Group(ast::Group {
3678                span: span(0..2),
3679                kind: ast::GroupKind::CaptureIndex(1),
3680                ast: Box::new(Ast::Empty(span(1..1))),
3681            }))
3682        );
3683        assert_eq!(
3684            parser("(a)").parse(),
3685            Ok(Ast::Group(ast::Group {
3686                span: span(0..3),
3687                kind: ast::GroupKind::CaptureIndex(1),
3688                ast: Box::new(lit('a', 1)),
3689            }))
3690        );
3691        assert_eq!(
3692            parser("(())").parse(),
3693            Ok(Ast::Group(ast::Group {
3694                span: span(0..4),
3695                kind: ast::GroupKind::CaptureIndex(1),
3696                ast: Box::new(Ast::Group(ast::Group {
3697                    span: span(1..3),
3698                    kind: ast::GroupKind::CaptureIndex(2),
3699                    ast: Box::new(Ast::Empty(span(2..2))),
3700                })),
3701            }))
3702        );
3703
3704        assert_eq!(
3705            parser("(?:a)").parse(),
3706            Ok(Ast::Group(ast::Group {
3707                span: span(0..5),
3708                kind: ast::GroupKind::NonCapturing(ast::Flags {
3709                    span: span(2..2),
3710                    items: vec![],
3711                }),
3712                ast: Box::new(lit('a', 3)),
3713            }))
3714        );
3715
3716        assert_eq!(
3717            parser("(?i:a)").parse(),
3718            Ok(Ast::Group(ast::Group {
3719                span: span(0..6),
3720                kind: ast::GroupKind::NonCapturing(ast::Flags {
3721                    span: span(2..3),
3722                    items: vec![ast::FlagsItem {
3723                        span: span(2..3),
3724                        kind: ast::FlagsItemKind::Flag(
3725                            ast::Flag::CaseInsensitive
3726                        ),
3727                    },],
3728                }),
3729                ast: Box::new(lit('a', 4)),
3730            }))
3731        );
3732        assert_eq!(
3733            parser("(?i-U:a)").parse(),
3734            Ok(Ast::Group(ast::Group {
3735                span: span(0..8),
3736                kind: ast::GroupKind::NonCapturing(ast::Flags {
3737                    span: span(2..5),
3738                    items: vec![
3739                        ast::FlagsItem {
3740                            span: span(2..3),
3741                            kind: ast::FlagsItemKind::Flag(
3742                                ast::Flag::CaseInsensitive
3743                            ),
3744                        },
3745                        ast::FlagsItem {
3746                            span: span(3..4),
3747                            kind: ast::FlagsItemKind::Negation,
3748                        },
3749                        ast::FlagsItem {
3750                            span: span(4..5),
3751                            kind: ast::FlagsItemKind::Flag(
3752                                ast::Flag::SwapGreed
3753                            ),
3754                        },
3755                    ],
3756                }),
3757                ast: Box::new(lit('a', 6)),
3758            }))
3759        );
3760
3761        assert_eq!(
3762            parser("(").parse().unwrap_err(),
3763            TestError {
3764                span: span(0..1),
3765                kind: ast::ErrorKind::GroupUnclosed,
3766            }
3767        );
3768        assert_eq!(
3769            parser("(?").parse().unwrap_err(),
3770            TestError {
3771                span: span(0..1),
3772                kind: ast::ErrorKind::GroupUnclosed,
3773            }
3774        );
3775        assert_eq!(
3776            parser("(?P").parse().unwrap_err(),
3777            TestError {
3778                span: span(2..3),
3779                kind: ast::ErrorKind::FlagUnrecognized,
3780            }
3781        );
3782        assert_eq!(
3783            parser("(?P<").parse().unwrap_err(),
3784            TestError {
3785                span: span(4..4),
3786                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3787            }
3788        );
3789        assert_eq!(
3790            parser("(a").parse().unwrap_err(),
3791            TestError {
3792                span: span(0..1),
3793                kind: ast::ErrorKind::GroupUnclosed,
3794            }
3795        );
3796        assert_eq!(
3797            parser("(()").parse().unwrap_err(),
3798            TestError {
3799                span: span(0..1),
3800                kind: ast::ErrorKind::GroupUnclosed,
3801            }
3802        );
3803        assert_eq!(
3804            parser(")").parse().unwrap_err(),
3805            TestError {
3806                span: span(0..1),
3807                kind: ast::ErrorKind::GroupUnopened,
3808            }
3809        );
3810        assert_eq!(
3811            parser("a)").parse().unwrap_err(),
3812            TestError {
3813                span: span(1..2),
3814                kind: ast::ErrorKind::GroupUnopened,
3815            }
3816        );
3817    }
3818
3819    #[test]
3820    fn parse_capture_name() {
3821        assert_eq!(
3822            parser("(?P<a>z)").parse(),
3823            Ok(Ast::Group(ast::Group {
3824                span: span(0..8),
3825                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3826                    span: span(4..5),
3827                    name: s("a"),
3828                    index: 1,
3829                }),
3830                ast: Box::new(lit('z', 6)),
3831            }))
3832        );
3833        assert_eq!(
3834            parser("(?P<abc>z)").parse(),
3835            Ok(Ast::Group(ast::Group {
3836                span: span(0..10),
3837                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3838                    span: span(4..7),
3839                    name: s("abc"),
3840                    index: 1,
3841                }),
3842                ast: Box::new(lit('z', 8)),
3843            }))
3844        );
3845
3846        assert_eq!(
3847            parser("(?P<a_1>z)").parse(),
3848            Ok(Ast::Group(ast::Group {
3849                span: span(0..10),
3850                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3851                    span: span(4..7),
3852                    name: s("a_1"),
3853                    index: 1,
3854                }),
3855                ast: Box::new(lit('z', 8)),
3856            }))
3857        );
3858
3859        assert_eq!(
3860            parser("(?P<a.1>z)").parse(),
3861            Ok(Ast::Group(ast::Group {
3862                span: span(0..10),
3863                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3864                    span: span(4..7),
3865                    name: s("a.1"),
3866                    index: 1,
3867                }),
3868                ast: Box::new(lit('z', 8)),
3869            }))
3870        );
3871
3872        assert_eq!(
3873            parser("(?P<a[1]>z)").parse(),
3874            Ok(Ast::Group(ast::Group {
3875                span: span(0..11),
3876                kind: ast::GroupKind::CaptureName(ast::CaptureName {
3877                    span: span(4..8),
3878                    name: s("a[1]"),
3879                    index: 1,
3880                }),
3881                ast: Box::new(lit('z', 9)),
3882            }))
3883        );
3884
3885        assert_eq!(
3886            parser("(?P<").parse().unwrap_err(),
3887            TestError {
3888                span: span(4..4),
3889                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3890            }
3891        );
3892        assert_eq!(
3893            parser("(?P<>z)").parse().unwrap_err(),
3894            TestError {
3895                span: span(4..4),
3896                kind: ast::ErrorKind::GroupNameEmpty,
3897            }
3898        );
3899        assert_eq!(
3900            parser("(?P<a").parse().unwrap_err(),
3901            TestError {
3902                span: span(5..5),
3903                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3904            }
3905        );
3906        assert_eq!(
3907            parser("(?P<ab").parse().unwrap_err(),
3908            TestError {
3909                span: span(6..6),
3910                kind: ast::ErrorKind::GroupNameUnexpectedEof,
3911            }
3912        );
3913        assert_eq!(
3914            parser("(?P<0a").parse().unwrap_err(),
3915            TestError {
3916                span: span(4..5),
3917                kind: ast::ErrorKind::GroupNameInvalid,
3918            }
3919        );
3920        assert_eq!(
3921            parser("(?P<~").parse().unwrap_err(),
3922            TestError {
3923                span: span(4..5),
3924                kind: ast::ErrorKind::GroupNameInvalid,
3925            }
3926        );
3927        assert_eq!(
3928            parser("(?P<abc~").parse().unwrap_err(),
3929            TestError {
3930                span: span(7..8),
3931                kind: ast::ErrorKind::GroupNameInvalid,
3932            }
3933        );
3934        assert_eq!(
3935            parser("(?P<a>y)(?P<a>z)").parse().unwrap_err(),
3936            TestError {
3937                span: span(12..13),
3938                kind: ast::ErrorKind::GroupNameDuplicate {
3939                    original: span(4..5),
3940                },
3941            }
3942        );
3943    }
3944
3945    #[test]
3946    fn parse_flags() {
3947        assert_eq!(
3948            parser("i:").parse_flags(),
3949            Ok(ast::Flags {
3950                span: span(0..1),
3951                items: vec![ast::FlagsItem {
3952                    span: span(0..1),
3953                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3954                }],
3955            })
3956        );
3957        assert_eq!(
3958            parser("i)").parse_flags(),
3959            Ok(ast::Flags {
3960                span: span(0..1),
3961                items: vec![ast::FlagsItem {
3962                    span: span(0..1),
3963                    kind: ast::FlagsItemKind::Flag(ast::Flag::CaseInsensitive),
3964                }],
3965            })
3966        );
3967
3968        assert_eq!(
3969            parser("isU:").parse_flags(),
3970            Ok(ast::Flags {
3971                span: span(0..3),
3972                items: vec![
3973                    ast::FlagsItem {
3974                        span: span(0..1),
3975                        kind: ast::FlagsItemKind::Flag(
3976                            ast::Flag::CaseInsensitive
3977                        ),
3978                    },
3979                    ast::FlagsItem {
3980                        span: span(1..2),
3981                        kind: ast::FlagsItemKind::Flag(
3982                            ast::Flag::DotMatchesNewLine
3983                        ),
3984                    },
3985                    ast::FlagsItem {
3986                        span: span(2..3),
3987                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
3988                    },
3989                ],
3990            })
3991        );
3992
3993        assert_eq!(
3994            parser("-isU:").parse_flags(),
3995            Ok(ast::Flags {
3996                span: span(0..4),
3997                items: vec![
3998                    ast::FlagsItem {
3999                        span: span(0..1),
4000                        kind: ast::FlagsItemKind::Negation,
4001                    },
4002                    ast::FlagsItem {
4003                        span: span(1..2),
4004                        kind: ast::FlagsItemKind::Flag(
4005                            ast::Flag::CaseInsensitive
4006                        ),
4007                    },
4008                    ast::FlagsItem {
4009                        span: span(2..3),
4010                        kind: ast::FlagsItemKind::Flag(
4011                            ast::Flag::DotMatchesNewLine
4012                        ),
4013                    },
4014                    ast::FlagsItem {
4015                        span: span(3..4),
4016                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4017                    },
4018                ],
4019            })
4020        );
4021        assert_eq!(
4022            parser("i-sU:").parse_flags(),
4023            Ok(ast::Flags {
4024                span: span(0..4),
4025                items: vec![
4026                    ast::FlagsItem {
4027                        span: span(0..1),
4028                        kind: ast::FlagsItemKind::Flag(
4029                            ast::Flag::CaseInsensitive
4030                        ),
4031                    },
4032                    ast::FlagsItem {
4033                        span: span(1..2),
4034                        kind: ast::FlagsItemKind::Negation,
4035                    },
4036                    ast::FlagsItem {
4037                        span: span(2..3),
4038                        kind: ast::FlagsItemKind::Flag(
4039                            ast::Flag::DotMatchesNewLine
4040                        ),
4041                    },
4042                    ast::FlagsItem {
4043                        span: span(3..4),
4044                        kind: ast::FlagsItemKind::Flag(ast::Flag::SwapGreed),
4045                    },
4046                ],
4047            })
4048        );
4049
4050        assert_eq!(
4051            parser("isU").parse_flags().unwrap_err(),
4052            TestError {
4053                span: span(3..3),
4054                kind: ast::ErrorKind::FlagUnexpectedEof,
4055            }
4056        );
4057        assert_eq!(
4058            parser("isUa:").parse_flags().unwrap_err(),
4059            TestError {
4060                span: span(3..4),
4061                kind: ast::ErrorKind::FlagUnrecognized,
4062            }
4063        );
4064        assert_eq!(
4065            parser("isUi:").parse_flags().unwrap_err(),
4066            TestError {
4067                span: span(3..4),
4068                kind: ast::ErrorKind::FlagDuplicate { original: span(0..1) },
4069            }
4070        );
4071        assert_eq!(
4072            parser("i-sU-i:").parse_flags().unwrap_err(),
4073            TestError {
4074                span: span(4..5),
4075                kind: ast::ErrorKind::FlagRepeatedNegation {
4076                    original: span(1..2),
4077                },
4078            }
4079        );
4080        assert_eq!(
4081            parser("-)").parse_flags().unwrap_err(),
4082            TestError {
4083                span: span(0..1),
4084                kind: ast::ErrorKind::FlagDanglingNegation,
4085            }
4086        );
4087        assert_eq!(
4088            parser("i-)").parse_flags().unwrap_err(),
4089            TestError {
4090                span: span(1..2),
4091                kind: ast::ErrorKind::FlagDanglingNegation,
4092            }
4093        );
4094        assert_eq!(
4095            parser("iU-)").parse_flags().unwrap_err(),
4096            TestError {
4097                span: span(2..3),
4098                kind: ast::ErrorKind::FlagDanglingNegation,
4099            }
4100        );
4101    }
4102
4103    #[test]
4104    fn parse_flag() {
4105        assert_eq!(parser("i").parse_flag(), Ok(ast::Flag::CaseInsensitive));
4106        assert_eq!(parser("m").parse_flag(), Ok(ast::Flag::MultiLine));
4107        assert_eq!(parser("s").parse_flag(), Ok(ast::Flag::DotMatchesNewLine));
4108        assert_eq!(parser("U").parse_flag(), Ok(ast::Flag::SwapGreed));
4109        assert_eq!(parser("u").parse_flag(), Ok(ast::Flag::Unicode));
4110        assert_eq!(parser("x").parse_flag(), Ok(ast::Flag::IgnoreWhitespace));
4111
4112        assert_eq!(
4113            parser("a").parse_flag().unwrap_err(),
4114            TestError {
4115                span: span(0..1),
4116                kind: ast::ErrorKind::FlagUnrecognized,
4117            }
4118        );
4119        assert_eq!(
4120            parser("☃").parse_flag().unwrap_err(),
4121            TestError {
4122                span: span_range("☃", 0..3),
4123                kind: ast::ErrorKind::FlagUnrecognized,
4124            }
4125        );
4126    }
4127
4128    #[test]
4129    fn parse_primitive_non_escape() {
4130        assert_eq!(
4131            parser(r".").parse_primitive(),
4132            Ok(Primitive::Dot(span(0..1)))
4133        );
4134        assert_eq!(
4135            parser(r"^").parse_primitive(),
4136            Ok(Primitive::Assertion(ast::Assertion {
4137                span: span(0..1),
4138                kind: ast::AssertionKind::StartLine,
4139            }))
4140        );
4141        assert_eq!(
4142            parser(r"$").parse_primitive(),
4143            Ok(Primitive::Assertion(ast::Assertion {
4144                span: span(0..1),
4145                kind: ast::AssertionKind::EndLine,
4146            }))
4147        );
4148
4149        assert_eq!(
4150            parser(r"a").parse_primitive(),
4151            Ok(Primitive::Literal(ast::Literal {
4152                span: span(0..1),
4153                kind: ast::LiteralKind::Verbatim,
4154                c: 'a',
4155            }))
4156        );
4157        assert_eq!(
4158            parser(r"|").parse_primitive(),
4159            Ok(Primitive::Literal(ast::Literal {
4160                span: span(0..1),
4161                kind: ast::LiteralKind::Verbatim,
4162                c: '|',
4163            }))
4164        );
4165        assert_eq!(
4166            parser(r"☃").parse_primitive(),
4167            Ok(Primitive::Literal(ast::Literal {
4168                span: span_range("☃", 0..3),
4169                kind: ast::LiteralKind::Verbatim,
4170                c: '☃',
4171            }))
4172        );
4173    }
4174
4175    #[test]
4176    fn parse_escape() {
4177        assert_eq!(
4178            parser(r"\|").parse_primitive(),
4179            Ok(Primitive::Literal(ast::Literal {
4180                span: span(0..2),
4181                kind: ast::LiteralKind::Punctuation,
4182                c: '|',
4183            }))
4184        );
4185        let specials = &[
4186            (r"\a", '\x07', ast::SpecialLiteralKind::Bell),
4187            (r"\f", '\x0C', ast::SpecialLiteralKind::FormFeed),
4188            (r"\t", '\t', ast::SpecialLiteralKind::Tab),
4189            (r"\n", '\n', ast::SpecialLiteralKind::LineFeed),
4190            (r"\r", '\r', ast::SpecialLiteralKind::CarriageReturn),
4191            (r"\v", '\x0B', ast::SpecialLiteralKind::VerticalTab),
4192        ];
4193        for &(pat, c, ref kind) in specials {
4194            assert_eq!(
4195                parser(pat).parse_primitive(),
4196                Ok(Primitive::Literal(ast::Literal {
4197                    span: span(0..2),
4198                    kind: ast::LiteralKind::Special(kind.clone()),
4199                    c,
4200                }))
4201            );
4202        }
4203        assert_eq!(
4204            parser(r"\A").parse_primitive(),
4205            Ok(Primitive::Assertion(ast::Assertion {
4206                span: span(0..2),
4207                kind: ast::AssertionKind::StartText,
4208            }))
4209        );
4210        assert_eq!(
4211            parser(r"\z").parse_primitive(),
4212            Ok(Primitive::Assertion(ast::Assertion {
4213                span: span(0..2),
4214                kind: ast::AssertionKind::EndText,
4215            }))
4216        );
4217        assert_eq!(
4218            parser(r"\b").parse_primitive(),
4219            Ok(Primitive::Assertion(ast::Assertion {
4220                span: span(0..2),
4221                kind: ast::AssertionKind::WordBoundary,
4222            }))
4223        );
4224        assert_eq!(
4225            parser(r"\B").parse_primitive(),
4226            Ok(Primitive::Assertion(ast::Assertion {
4227                span: span(0..2),
4228                kind: ast::AssertionKind::NotWordBoundary,
4229            }))
4230        );
4231
4232        assert_eq!(
4233            parser(r"\").parse_escape().unwrap_err(),
4234            TestError {
4235                span: span(0..1),
4236                kind: ast::ErrorKind::EscapeUnexpectedEof,
4237            }
4238        );
4239        assert_eq!(
4240            parser(r"\y").parse_escape().unwrap_err(),
4241            TestError {
4242                span: span(0..2),
4243                kind: ast::ErrorKind::EscapeUnrecognized,
4244            }
4245        );
4246    }
4247
4248    #[test]
4249    fn parse_unsupported_backreference() {
4250        assert_eq!(
4251            parser(r"\0").parse_escape().unwrap_err(),
4252            TestError {
4253                span: span(0..2),
4254                kind: ast::ErrorKind::UnsupportedBackreference,
4255            }
4256        );
4257        assert_eq!(
4258            parser(r"\9").parse_escape().unwrap_err(),
4259            TestError {
4260                span: span(0..2),
4261                kind: ast::ErrorKind::UnsupportedBackreference,
4262            }
4263        );
4264    }
4265
4266    #[test]
4267    fn parse_octal() {
4268        for i in 0..511 {
4269            let pat = format!(r"\{:o}", i);
4270            assert_eq!(
4271                parser_octal(&pat).parse_escape(),
4272                Ok(Primitive::Literal(ast::Literal {
4273                    span: span(0..pat.len()),
4274                    kind: ast::LiteralKind::Octal,
4275                    c: ::std::char::from_u32(i).unwrap(),
4276                }))
4277            );
4278        }
4279        assert_eq!(
4280            parser_octal(r"\778").parse_escape(),
4281            Ok(Primitive::Literal(ast::Literal {
4282                span: span(0..3),
4283                kind: ast::LiteralKind::Octal,
4284                c: '?',
4285            }))
4286        );
4287        assert_eq!(
4288            parser_octal(r"\7777").parse_escape(),
4289            Ok(Primitive::Literal(ast::Literal {
4290                span: span(0..4),
4291                kind: ast::LiteralKind::Octal,
4292                c: '\u{01FF}',
4293            }))
4294        );
4295        assert_eq!(
4296            parser_octal(r"\778").parse(),
4297            Ok(Ast::Concat(ast::Concat {
4298                span: span(0..4),
4299                asts: vec![
4300                    Ast::Literal(ast::Literal {
4301                        span: span(0..3),
4302                        kind: ast::LiteralKind::Octal,
4303                        c: '?',
4304                    }),
4305                    Ast::Literal(ast::Literal {
4306                        span: span(3..4),
4307                        kind: ast::LiteralKind::Verbatim,
4308                        c: '8',
4309                    }),
4310                ],
4311            }))
4312        );
4313        assert_eq!(
4314            parser_octal(r"\7777").parse(),
4315            Ok(Ast::Concat(ast::Concat {
4316                span: span(0..5),
4317                asts: vec![
4318                    Ast::Literal(ast::Literal {
4319                        span: span(0..4),
4320                        kind: ast::LiteralKind::Octal,
4321                        c: '\u{01FF}',
4322                    }),
4323                    Ast::Literal(ast::Literal {
4324                        span: span(4..5),
4325                        kind: ast::LiteralKind::Verbatim,
4326                        c: '7',
4327                    }),
4328                ],
4329            }))
4330        );
4331
4332        assert_eq!(
4333            parser_octal(r"\8").parse_escape().unwrap_err(),
4334            TestError {
4335                span: span(0..2),
4336                kind: ast::ErrorKind::EscapeUnrecognized,
4337            }
4338        );
4339    }
4340
4341    #[test]
4342    fn parse_hex_two() {
4343        for i in 0..256 {
4344            let pat = format!(r"\x{:02x}", i);
4345            assert_eq!(
4346                parser(&pat).parse_escape(),
4347                Ok(Primitive::Literal(ast::Literal {
4348                    span: span(0..pat.len()),
4349                    kind: ast::LiteralKind::HexFixed(ast::HexLiteralKind::X),
4350                    c: ::std::char::from_u32(i).unwrap(),
4351                }))
4352            );
4353        }
4354
4355        assert_eq!(
4356            parser(r"\xF").parse_escape().unwrap_err(),
4357            TestError {
4358                span: span(3..3),
4359                kind: ast::ErrorKind::EscapeUnexpectedEof,
4360            }
4361        );
4362        assert_eq!(
4363            parser(r"\xG").parse_escape().unwrap_err(),
4364            TestError {
4365                span: span(2..3),
4366                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4367            }
4368        );
4369        assert_eq!(
4370            parser(r"\xFG").parse_escape().unwrap_err(),
4371            TestError {
4372                span: span(3..4),
4373                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4374            }
4375        );
4376    }
4377
4378    #[test]
4379    fn parse_hex_four() {
4380        for i in 0..65536 {
4381            let c = match ::std::char::from_u32(i) {
4382                None => continue,
4383                Some(c) => c,
4384            };
4385            let pat = format!(r"\u{:04x}", i);
4386            assert_eq!(
4387                parser(&pat).parse_escape(),
4388                Ok(Primitive::Literal(ast::Literal {
4389                    span: span(0..pat.len()),
4390                    kind: ast::LiteralKind::HexFixed(
4391                        ast::HexLiteralKind::UnicodeShort
4392                    ),
4393                    c,
4394                }))
4395            );
4396        }
4397
4398        assert_eq!(
4399            parser(r"\uF").parse_escape().unwrap_err(),
4400            TestError {
4401                span: span(3..3),
4402                kind: ast::ErrorKind::EscapeUnexpectedEof,
4403            }
4404        );
4405        assert_eq!(
4406            parser(r"\uG").parse_escape().unwrap_err(),
4407            TestError {
4408                span: span(2..3),
4409                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4410            }
4411        );
4412        assert_eq!(
4413            parser(r"\uFG").parse_escape().unwrap_err(),
4414            TestError {
4415                span: span(3..4),
4416                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4417            }
4418        );
4419        assert_eq!(
4420            parser(r"\uFFG").parse_escape().unwrap_err(),
4421            TestError {
4422                span: span(4..5),
4423                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4424            }
4425        );
4426        assert_eq!(
4427            parser(r"\uFFFG").parse_escape().unwrap_err(),
4428            TestError {
4429                span: span(5..6),
4430                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4431            }
4432        );
4433        assert_eq!(
4434            parser(r"\uD800").parse_escape().unwrap_err(),
4435            TestError {
4436                span: span(2..6),
4437                kind: ast::ErrorKind::EscapeHexInvalid,
4438            }
4439        );
4440    }
4441
4442    #[test]
4443    fn parse_hex_eight() {
4444        for i in 0..65536 {
4445            let c = match ::std::char::from_u32(i) {
4446                None => continue,
4447                Some(c) => c,
4448            };
4449            let pat = format!(r"\U{:08x}", i);
4450            assert_eq!(
4451                parser(&pat).parse_escape(),
4452                Ok(Primitive::Literal(ast::Literal {
4453                    span: span(0..pat.len()),
4454                    kind: ast::LiteralKind::HexFixed(
4455                        ast::HexLiteralKind::UnicodeLong
4456                    ),
4457                    c,
4458                }))
4459            );
4460        }
4461
4462        assert_eq!(
4463            parser(r"\UF").parse_escape().unwrap_err(),
4464            TestError {
4465                span: span(3..3),
4466                kind: ast::ErrorKind::EscapeUnexpectedEof,
4467            }
4468        );
4469        assert_eq!(
4470            parser(r"\UG").parse_escape().unwrap_err(),
4471            TestError {
4472                span: span(2..3),
4473                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4474            }
4475        );
4476        assert_eq!(
4477            parser(r"\UFG").parse_escape().unwrap_err(),
4478            TestError {
4479                span: span(3..4),
4480                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4481            }
4482        );
4483        assert_eq!(
4484            parser(r"\UFFG").parse_escape().unwrap_err(),
4485            TestError {
4486                span: span(4..5),
4487                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4488            }
4489        );
4490        assert_eq!(
4491            parser(r"\UFFFG").parse_escape().unwrap_err(),
4492            TestError {
4493                span: span(5..6),
4494                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4495            }
4496        );
4497        assert_eq!(
4498            parser(r"\UFFFFG").parse_escape().unwrap_err(),
4499            TestError {
4500                span: span(6..7),
4501                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4502            }
4503        );
4504        assert_eq!(
4505            parser(r"\UFFFFFG").parse_escape().unwrap_err(),
4506            TestError {
4507                span: span(7..8),
4508                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4509            }
4510        );
4511        assert_eq!(
4512            parser(r"\UFFFFFFG").parse_escape().unwrap_err(),
4513            TestError {
4514                span: span(8..9),
4515                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4516            }
4517        );
4518        assert_eq!(
4519            parser(r"\UFFFFFFFG").parse_escape().unwrap_err(),
4520            TestError {
4521                span: span(9..10),
4522                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4523            }
4524        );
4525    }
4526
4527    #[test]
4528    fn parse_hex_brace() {
4529        assert_eq!(
4530            parser(r"\u{26c4}").parse_escape(),
4531            Ok(Primitive::Literal(ast::Literal {
4532                span: span(0..8),
4533                kind: ast::LiteralKind::HexBrace(
4534                    ast::HexLiteralKind::UnicodeShort
4535                ),
4536                c: '⛄',
4537            }))
4538        );
4539        assert_eq!(
4540            parser(r"\U{26c4}").parse_escape(),
4541            Ok(Primitive::Literal(ast::Literal {
4542                span: span(0..8),
4543                kind: ast::LiteralKind::HexBrace(
4544                    ast::HexLiteralKind::UnicodeLong
4545                ),
4546                c: '⛄',
4547            }))
4548        );
4549        assert_eq!(
4550            parser(r"\x{26c4}").parse_escape(),
4551            Ok(Primitive::Literal(ast::Literal {
4552                span: span(0..8),
4553                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4554                c: '⛄',
4555            }))
4556        );
4557        assert_eq!(
4558            parser(r"\x{26C4}").parse_escape(),
4559            Ok(Primitive::Literal(ast::Literal {
4560                span: span(0..8),
4561                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4562                c: '⛄',
4563            }))
4564        );
4565        assert_eq!(
4566            parser(r"\x{10fFfF}").parse_escape(),
4567            Ok(Primitive::Literal(ast::Literal {
4568                span: span(0..10),
4569                kind: ast::LiteralKind::HexBrace(ast::HexLiteralKind::X),
4570                c: '\u{10FFFF}',
4571            }))
4572        );
4573
4574        assert_eq!(
4575            parser(r"\x").parse_escape().unwrap_err(),
4576            TestError {
4577                span: span(2..2),
4578                kind: ast::ErrorKind::EscapeUnexpectedEof,
4579            }
4580        );
4581        assert_eq!(
4582            parser(r"\x{").parse_escape().unwrap_err(),
4583            TestError {
4584                span: span(2..3),
4585                kind: ast::ErrorKind::EscapeUnexpectedEof,
4586            }
4587        );
4588        assert_eq!(
4589            parser(r"\x{FF").parse_escape().unwrap_err(),
4590            TestError {
4591                span: span(2..5),
4592                kind: ast::ErrorKind::EscapeUnexpectedEof,
4593            }
4594        );
4595        assert_eq!(
4596            parser(r"\x{}").parse_escape().unwrap_err(),
4597            TestError {
4598                span: span(2..4),
4599                kind: ast::ErrorKind::EscapeHexEmpty,
4600            }
4601        );
4602        assert_eq!(
4603            parser(r"\x{FGF}").parse_escape().unwrap_err(),
4604            TestError {
4605                span: span(4..5),
4606                kind: ast::ErrorKind::EscapeHexInvalidDigit,
4607            }
4608        );
4609        assert_eq!(
4610            parser(r"\x{FFFFFF}").parse_escape().unwrap_err(),
4611            TestError {
4612                span: span(3..9),
4613                kind: ast::ErrorKind::EscapeHexInvalid,
4614            }
4615        );
4616        assert_eq!(
4617            parser(r"\x{D800}").parse_escape().unwrap_err(),
4618            TestError {
4619                span: span(3..7),
4620                kind: ast::ErrorKind::EscapeHexInvalid,
4621            }
4622        );
4623        assert_eq!(
4624            parser(r"\x{FFFFFFFFF}").parse_escape().unwrap_err(),
4625            TestError {
4626                span: span(3..12),
4627                kind: ast::ErrorKind::EscapeHexInvalid,
4628            }
4629        );
4630    }
4631
4632    #[test]
4633    fn parse_decimal() {
4634        assert_eq!(parser("123").parse_decimal(), Ok(123));
4635        assert_eq!(parser("0").parse_decimal(), Ok(0));
4636        assert_eq!(parser("01").parse_decimal(), Ok(1));
4637
4638        assert_eq!(
4639            parser("-1").parse_decimal().unwrap_err(),
4640            TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4641        );
4642        assert_eq!(
4643            parser("").parse_decimal().unwrap_err(),
4644            TestError { span: span(0..0), kind: ast::ErrorKind::DecimalEmpty }
4645        );
4646        assert_eq!(
4647            parser("9999999999").parse_decimal().unwrap_err(),
4648            TestError {
4649                span: span(0..10),
4650                kind: ast::ErrorKind::DecimalInvalid,
4651            }
4652        );
4653    }
4654
4655    #[test]
4656    fn parse_set_class() {
4657        fn union(span: Span, items: Vec<ast::ClassSetItem>) -> ast::ClassSet {
4658            ast::ClassSet::union(ast::ClassSetUnion { span, items })
4659        }
4660
4661        fn intersection(
4662            span: Span,
4663            lhs: ast::ClassSet,
4664            rhs: ast::ClassSet,
4665        ) -> ast::ClassSet {
4666            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4667                span,
4668                kind: ast::ClassSetBinaryOpKind::Intersection,
4669                lhs: Box::new(lhs),
4670                rhs: Box::new(rhs),
4671            })
4672        }
4673
4674        fn difference(
4675            span: Span,
4676            lhs: ast::ClassSet,
4677            rhs: ast::ClassSet,
4678        ) -> ast::ClassSet {
4679            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4680                span,
4681                kind: ast::ClassSetBinaryOpKind::Difference,
4682                lhs: Box::new(lhs),
4683                rhs: Box::new(rhs),
4684            })
4685        }
4686
4687        fn symdifference(
4688            span: Span,
4689            lhs: ast::ClassSet,
4690            rhs: ast::ClassSet,
4691        ) -> ast::ClassSet {
4692            ast::ClassSet::BinaryOp(ast::ClassSetBinaryOp {
4693                span,
4694                kind: ast::ClassSetBinaryOpKind::SymmetricDifference,
4695                lhs: Box::new(lhs),
4696                rhs: Box::new(rhs),
4697            })
4698        }
4699
4700        fn itemset(item: ast::ClassSetItem) -> ast::ClassSet {
4701            ast::ClassSet::Item(item)
4702        }
4703
4704        fn item_ascii(cls: ast::ClassAscii) -> ast::ClassSetItem {
4705            ast::ClassSetItem::Ascii(cls)
4706        }
4707
4708        fn item_unicode(cls: ast::ClassUnicode) -> ast::ClassSetItem {
4709            ast::ClassSetItem::Unicode(cls)
4710        }
4711
4712        fn item_perl(cls: ast::ClassPerl) -> ast::ClassSetItem {
4713            ast::ClassSetItem::Perl(cls)
4714        }
4715
4716        fn item_bracket(cls: ast::ClassBracketed) -> ast::ClassSetItem {
4717            ast::ClassSetItem::Bracketed(Box::new(cls))
4718        }
4719
4720        fn lit(span: Span, c: char) -> ast::ClassSetItem {
4721            ast::ClassSetItem::Literal(ast::Literal {
4722                span,
4723                kind: ast::LiteralKind::Verbatim,
4724                c,
4725            })
4726        }
4727
4728        fn empty(span: Span) -> ast::ClassSetItem {
4729            ast::ClassSetItem::Empty(span)
4730        }
4731
4732        fn range(span: Span, start: char, end: char) -> ast::ClassSetItem {
4733            let pos1 = Position {
4734                offset: span.start.offset + start.len_utf8(),
4735                column: span.start.column + 1,
4736                ..span.start
4737            };
4738            let pos2 = Position {
4739                offset: span.end.offset - end.len_utf8(),
4740                column: span.end.column - 1,
4741                ..span.end
4742            };
4743            ast::ClassSetItem::Range(ast::ClassSetRange {
4744                span,
4745                start: ast::Literal {
4746                    span: Span { end: pos1, ..span },
4747                    kind: ast::LiteralKind::Verbatim,
4748                    c: start,
4749                },
4750                end: ast::Literal {
4751                    span: Span { start: pos2, ..span },
4752                    kind: ast::LiteralKind::Verbatim,
4753                    c: end,
4754                },
4755            })
4756        }
4757
4758        fn alnum(span: Span, negated: bool) -> ast::ClassAscii {
4759            ast::ClassAscii { span, kind: ast::ClassAsciiKind::Alnum, negated }
4760        }
4761
4762        fn lower(span: Span, negated: bool) -> ast::ClassAscii {
4763            ast::ClassAscii { span, kind: ast::ClassAsciiKind::Lower, negated }
4764        }
4765
4766        assert_eq!(
4767            parser("[[:alnum:]]").parse(),
4768            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4769                span: span(0..11),
4770                negated: false,
4771                kind: itemset(item_ascii(alnum(span(1..10), false))),
4772            })))
4773        );
4774        assert_eq!(
4775            parser("[[[:alnum:]]]").parse(),
4776            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4777                span: span(0..13),
4778                negated: false,
4779                kind: itemset(item_bracket(ast::ClassBracketed {
4780                    span: span(1..12),
4781                    negated: false,
4782                    kind: itemset(item_ascii(alnum(span(2..11), false))),
4783                })),
4784            })))
4785        );
4786        assert_eq!(
4787            parser("[[:alnum:]&&[:lower:]]").parse(),
4788            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4789                span: span(0..22),
4790                negated: false,
4791                kind: intersection(
4792                    span(1..21),
4793                    itemset(item_ascii(alnum(span(1..10), false))),
4794                    itemset(item_ascii(lower(span(12..21), false))),
4795                ),
4796            })))
4797        );
4798        assert_eq!(
4799            parser("[[:alnum:]--[:lower:]]").parse(),
4800            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4801                span: span(0..22),
4802                negated: false,
4803                kind: difference(
4804                    span(1..21),
4805                    itemset(item_ascii(alnum(span(1..10), false))),
4806                    itemset(item_ascii(lower(span(12..21), false))),
4807                ),
4808            })))
4809        );
4810        assert_eq!(
4811            parser("[[:alnum:]~~[:lower:]]").parse(),
4812            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4813                span: span(0..22),
4814                negated: false,
4815                kind: symdifference(
4816                    span(1..21),
4817                    itemset(item_ascii(alnum(span(1..10), false))),
4818                    itemset(item_ascii(lower(span(12..21), false))),
4819                ),
4820            })))
4821        );
4822
4823        assert_eq!(
4824            parser("[a]").parse(),
4825            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4826                span: span(0..3),
4827                negated: false,
4828                kind: itemset(lit(span(1..2), 'a')),
4829            })))
4830        );
4831        assert_eq!(
4832            parser(r"[a\]]").parse(),
4833            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4834                span: span(0..5),
4835                negated: false,
4836                kind: union(
4837                    span(1..4),
4838                    vec![
4839                        lit(span(1..2), 'a'),
4840                        ast::ClassSetItem::Literal(ast::Literal {
4841                            span: span(2..4),
4842                            kind: ast::LiteralKind::Punctuation,
4843                            c: ']',
4844                        }),
4845                    ]
4846                ),
4847            })))
4848        );
4849        assert_eq!(
4850            parser(r"[a\-z]").parse(),
4851            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4852                span: span(0..6),
4853                negated: false,
4854                kind: union(
4855                    span(1..5),
4856                    vec![
4857                        lit(span(1..2), 'a'),
4858                        ast::ClassSetItem::Literal(ast::Literal {
4859                            span: span(2..4),
4860                            kind: ast::LiteralKind::Punctuation,
4861                            c: '-',
4862                        }),
4863                        lit(span(4..5), 'z'),
4864                    ]
4865                ),
4866            })))
4867        );
4868        assert_eq!(
4869            parser("[ab]").parse(),
4870            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4871                span: span(0..4),
4872                negated: false,
4873                kind: union(
4874                    span(1..3),
4875                    vec![lit(span(1..2), 'a'), lit(span(2..3), 'b'),]
4876                ),
4877            })))
4878        );
4879        assert_eq!(
4880            parser("[a-]").parse(),
4881            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4882                span: span(0..4),
4883                negated: false,
4884                kind: union(
4885                    span(1..3),
4886                    vec![lit(span(1..2), 'a'), lit(span(2..3), '-'),]
4887                ),
4888            })))
4889        );
4890        assert_eq!(
4891            parser("[-a]").parse(),
4892            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4893                span: span(0..4),
4894                negated: false,
4895                kind: union(
4896                    span(1..3),
4897                    vec![lit(span(1..2), '-'), lit(span(2..3), 'a'),]
4898                ),
4899            })))
4900        );
4901        assert_eq!(
4902            parser(r"[\pL]").parse(),
4903            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4904                span: span(0..5),
4905                negated: false,
4906                kind: itemset(item_unicode(ast::ClassUnicode {
4907                    span: span(1..4),
4908                    negated: false,
4909                    kind: ast::ClassUnicodeKind::OneLetter('L'),
4910                })),
4911            })))
4912        );
4913        assert_eq!(
4914            parser(r"[\w]").parse(),
4915            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4916                span: span(0..4),
4917                negated: false,
4918                kind: itemset(item_perl(ast::ClassPerl {
4919                    span: span(1..3),
4920                    kind: ast::ClassPerlKind::Word,
4921                    negated: false,
4922                })),
4923            })))
4924        );
4925        assert_eq!(
4926            parser(r"[a\wz]").parse(),
4927            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4928                span: span(0..6),
4929                negated: false,
4930                kind: union(
4931                    span(1..5),
4932                    vec![
4933                        lit(span(1..2), 'a'),
4934                        item_perl(ast::ClassPerl {
4935                            span: span(2..4),
4936                            kind: ast::ClassPerlKind::Word,
4937                            negated: false,
4938                        }),
4939                        lit(span(4..5), 'z'),
4940                    ]
4941                ),
4942            })))
4943        );
4944
4945        assert_eq!(
4946            parser("[a-z]").parse(),
4947            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4948                span: span(0..5),
4949                negated: false,
4950                kind: itemset(range(span(1..4), 'a', 'z')),
4951            })))
4952        );
4953        assert_eq!(
4954            parser("[a-cx-z]").parse(),
4955            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4956                span: span(0..8),
4957                negated: false,
4958                kind: union(
4959                    span(1..7),
4960                    vec![
4961                        range(span(1..4), 'a', 'c'),
4962                        range(span(4..7), 'x', 'z'),
4963                    ]
4964                ),
4965            })))
4966        );
4967        assert_eq!(
4968            parser(r"[\w&&a-cx-z]").parse(),
4969            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4970                span: span(0..12),
4971                negated: false,
4972                kind: intersection(
4973                    span(1..11),
4974                    itemset(item_perl(ast::ClassPerl {
4975                        span: span(1..3),
4976                        kind: ast::ClassPerlKind::Word,
4977                        negated: false,
4978                    })),
4979                    union(
4980                        span(5..11),
4981                        vec![
4982                            range(span(5..8), 'a', 'c'),
4983                            range(span(8..11), 'x', 'z'),
4984                        ]
4985                    ),
4986                ),
4987            })))
4988        );
4989        assert_eq!(
4990            parser(r"[a-cx-z&&\w]").parse(),
4991            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
4992                span: span(0..12),
4993                negated: false,
4994                kind: intersection(
4995                    span(1..11),
4996                    union(
4997                        span(1..7),
4998                        vec![
4999                            range(span(1..4), 'a', 'c'),
5000                            range(span(4..7), 'x', 'z'),
5001                        ]
5002                    ),
5003                    itemset(item_perl(ast::ClassPerl {
5004                        span: span(9..11),
5005                        kind: ast::ClassPerlKind::Word,
5006                        negated: false,
5007                    })),
5008                ),
5009            })))
5010        );
5011        assert_eq!(
5012            parser(r"[a--b--c]").parse(),
5013            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5014                span: span(0..9),
5015                negated: false,
5016                kind: difference(
5017                    span(1..8),
5018                    difference(
5019                        span(1..5),
5020                        itemset(lit(span(1..2), 'a')),
5021                        itemset(lit(span(4..5), 'b')),
5022                    ),
5023                    itemset(lit(span(7..8), 'c')),
5024                ),
5025            })))
5026        );
5027        assert_eq!(
5028            parser(r"[a~~b~~c]").parse(),
5029            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5030                span: span(0..9),
5031                negated: false,
5032                kind: symdifference(
5033                    span(1..8),
5034                    symdifference(
5035                        span(1..5),
5036                        itemset(lit(span(1..2), 'a')),
5037                        itemset(lit(span(4..5), 'b')),
5038                    ),
5039                    itemset(lit(span(7..8), 'c')),
5040                ),
5041            })))
5042        );
5043        assert_eq!(
5044            parser(r"[\^&&^]").parse(),
5045            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5046                span: span(0..7),
5047                negated: false,
5048                kind: intersection(
5049                    span(1..6),
5050                    itemset(ast::ClassSetItem::Literal(ast::Literal {
5051                        span: span(1..3),
5052                        kind: ast::LiteralKind::Punctuation,
5053                        c: '^',
5054                    })),
5055                    itemset(lit(span(5..6), '^')),
5056                ),
5057            })))
5058        );
5059        assert_eq!(
5060            parser(r"[\&&&&]").parse(),
5061            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5062                span: span(0..7),
5063                negated: false,
5064                kind: intersection(
5065                    span(1..6),
5066                    itemset(ast::ClassSetItem::Literal(ast::Literal {
5067                        span: span(1..3),
5068                        kind: ast::LiteralKind::Punctuation,
5069                        c: '&',
5070                    })),
5071                    itemset(lit(span(5..6), '&')),
5072                ),
5073            })))
5074        );
5075        assert_eq!(
5076            parser(r"[&&&&]").parse(),
5077            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5078                span: span(0..6),
5079                negated: false,
5080                kind: intersection(
5081                    span(1..5),
5082                    intersection(
5083                        span(1..3),
5084                        itemset(empty(span(1..1))),
5085                        itemset(empty(span(3..3))),
5086                    ),
5087                    itemset(empty(span(5..5))),
5088                ),
5089            })))
5090        );
5091
5092        let pat = "[☃-⛄]";
5093        assert_eq!(
5094            parser(pat).parse(),
5095            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5096                span: span_range(pat, 0..9),
5097                negated: false,
5098                kind: itemset(ast::ClassSetItem::Range(ast::ClassSetRange {
5099                    span: span_range(pat, 1..8),
5100                    start: ast::Literal {
5101                        span: span_range(pat, 1..4),
5102                        kind: ast::LiteralKind::Verbatim,
5103                        c: '☃',
5104                    },
5105                    end: ast::Literal {
5106                        span: span_range(pat, 5..8),
5107                        kind: ast::LiteralKind::Verbatim,
5108                        c: '⛄',
5109                    },
5110                })),
5111            })))
5112        );
5113
5114        assert_eq!(
5115            parser(r"[]]").parse(),
5116            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5117                span: span(0..3),
5118                negated: false,
5119                kind: itemset(lit(span(1..2), ']')),
5120            })))
5121        );
5122        assert_eq!(
5123            parser(r"[]\[]").parse(),
5124            Ok(Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5125                span: span(0..5),
5126                negated: false,
5127                kind: union(
5128                    span(1..4),
5129                    vec![
5130                        lit(span(1..2), ']'),
5131                        ast::ClassSetItem::Literal(ast::Literal {
5132                            span: span(2..4),
5133                            kind: ast::LiteralKind::Punctuation,
5134                            c: '[',
5135                        }),
5136                    ]
5137                ),
5138            })))
5139        );
5140        assert_eq!(
5141            parser(r"[\[]]").parse(),
5142            Ok(concat(
5143                0..5,
5144                vec![
5145                    Ast::Class(ast::Class::Bracketed(ast::ClassBracketed {
5146                        span: span(0..4),
5147                        negated: false,
5148                        kind: itemset(ast::ClassSetItem::Literal(
5149                            ast::Literal {
5150                                span: span(1..3),
5151                                kind: ast::LiteralKind::Punctuation,
5152                                c: '[',
5153                            }
5154                        )),
5155                    })),
5156                    Ast::Literal(ast::Literal {
5157                        span: span(4..5),
5158                        kind: ast::LiteralKind::Verbatim,
5159                        c: ']',
5160                    }),
5161                ]
5162            ))
5163        );
5164
5165        assert_eq!(
5166            parser("[").parse().unwrap_err(),
5167            TestError {
5168                span: span(0..1),
5169                kind: ast::ErrorKind::ClassUnclosed,
5170            }
5171        );
5172        assert_eq!(
5173            parser("[[").parse().unwrap_err(),
5174            TestError {
5175                span: span(1..2),
5176                kind: ast::ErrorKind::ClassUnclosed,
5177            }
5178        );
5179        assert_eq!(
5180            parser("[[-]").parse().unwrap_err(),
5181            TestError {
5182                span: span(0..1),
5183                kind: ast::ErrorKind::ClassUnclosed,
5184            }
5185        );
5186        assert_eq!(
5187            parser("[[[:alnum:]").parse().unwrap_err(),
5188            TestError {
5189                span: span(1..2),
5190                kind: ast::ErrorKind::ClassUnclosed,
5191            }
5192        );
5193        assert_eq!(
5194            parser(r"[\b]").parse().unwrap_err(),
5195            TestError {
5196                span: span(1..3),
5197                kind: ast::ErrorKind::ClassEscapeInvalid,
5198            }
5199        );
5200        assert_eq!(
5201            parser(r"[\w-a]").parse().unwrap_err(),
5202            TestError {
5203                span: span(1..3),
5204                kind: ast::ErrorKind::ClassRangeLiteral,
5205            }
5206        );
5207        assert_eq!(
5208            parser(r"[a-\w]").parse().unwrap_err(),
5209            TestError {
5210                span: span(3..5),
5211                kind: ast::ErrorKind::ClassRangeLiteral,
5212            }
5213        );
5214        assert_eq!(
5215            parser(r"[z-a]").parse().unwrap_err(),
5216            TestError {
5217                span: span(1..4),
5218                kind: ast::ErrorKind::ClassRangeInvalid,
5219            }
5220        );
5221
5222        assert_eq!(
5223            parser_ignore_whitespace("[a ").parse().unwrap_err(),
5224            TestError {
5225                span: span(0..1),
5226                kind: ast::ErrorKind::ClassUnclosed,
5227            }
5228        );
5229        assert_eq!(
5230            parser_ignore_whitespace("[a- ").parse().unwrap_err(),
5231            TestError {
5232                span: span(0..1),
5233                kind: ast::ErrorKind::ClassUnclosed,
5234            }
5235        );
5236    }
5237
5238    #[test]
5239    fn parse_set_class_open() {
5240        assert_eq!(parser("[a]").parse_set_class_open(), {
5241            let set = ast::ClassBracketed {
5242                span: span(0..1),
5243                negated: false,
5244                kind: ast::ClassSet::union(ast::ClassSetUnion {
5245                    span: span(1..1),
5246                    items: vec![],
5247                }),
5248            };
5249            let union = ast::ClassSetUnion { span: span(1..1), items: vec![] };
5250            Ok((set, union))
5251        });
5252        assert_eq!(
5253            parser_ignore_whitespace("[   a]").parse_set_class_open(),
5254            {
5255                let set = ast::ClassBracketed {
5256                    span: span(0..4),
5257                    negated: false,
5258                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5259                        span: span(4..4),
5260                        items: vec![],
5261                    }),
5262                };
5263                let union =
5264                    ast::ClassSetUnion { span: span(4..4), items: vec![] };
5265                Ok((set, union))
5266            }
5267        );
5268        assert_eq!(parser("[^a]").parse_set_class_open(), {
5269            let set = ast::ClassBracketed {
5270                span: span(0..2),
5271                negated: true,
5272                kind: ast::ClassSet::union(ast::ClassSetUnion {
5273                    span: span(2..2),
5274                    items: vec![],
5275                }),
5276            };
5277            let union = ast::ClassSetUnion { span: span(2..2), items: vec![] };
5278            Ok((set, union))
5279        });
5280        assert_eq!(
5281            parser_ignore_whitespace("[ ^ a]").parse_set_class_open(),
5282            {
5283                let set = ast::ClassBracketed {
5284                    span: span(0..4),
5285                    negated: true,
5286                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5287                        span: span(4..4),
5288                        items: vec![],
5289                    }),
5290                };
5291                let union =
5292                    ast::ClassSetUnion { span: span(4..4), items: vec![] };
5293                Ok((set, union))
5294            }
5295        );
5296        assert_eq!(parser("[-a]").parse_set_class_open(), {
5297            let set = ast::ClassBracketed {
5298                span: span(0..2),
5299                negated: false,
5300                kind: ast::ClassSet::union(ast::ClassSetUnion {
5301                    span: span(1..1),
5302                    items: vec![],
5303                }),
5304            };
5305            let union = ast::ClassSetUnion {
5306                span: span(1..2),
5307                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5308                    span: span(1..2),
5309                    kind: ast::LiteralKind::Verbatim,
5310                    c: '-',
5311                })],
5312            };
5313            Ok((set, union))
5314        });
5315        assert_eq!(
5316            parser_ignore_whitespace("[ - a]").parse_set_class_open(),
5317            {
5318                let set = ast::ClassBracketed {
5319                    span: span(0..4),
5320                    negated: false,
5321                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5322                        span: span(2..2),
5323                        items: vec![],
5324                    }),
5325                };
5326                let union = ast::ClassSetUnion {
5327                    span: span(2..3),
5328                    items: vec![ast::ClassSetItem::Literal(ast::Literal {
5329                        span: span(2..3),
5330                        kind: ast::LiteralKind::Verbatim,
5331                        c: '-',
5332                    })],
5333                };
5334                Ok((set, union))
5335            }
5336        );
5337        assert_eq!(parser("[^-a]").parse_set_class_open(), {
5338            let set = ast::ClassBracketed {
5339                span: span(0..3),
5340                negated: true,
5341                kind: ast::ClassSet::union(ast::ClassSetUnion {
5342                    span: span(2..2),
5343                    items: vec![],
5344                }),
5345            };
5346            let union = ast::ClassSetUnion {
5347                span: span(2..3),
5348                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5349                    span: span(2..3),
5350                    kind: ast::LiteralKind::Verbatim,
5351                    c: '-',
5352                })],
5353            };
5354            Ok((set, union))
5355        });
5356        assert_eq!(parser("[--a]").parse_set_class_open(), {
5357            let set = ast::ClassBracketed {
5358                span: span(0..3),
5359                negated: false,
5360                kind: ast::ClassSet::union(ast::ClassSetUnion {
5361                    span: span(1..1),
5362                    items: vec![],
5363                }),
5364            };
5365            let union = ast::ClassSetUnion {
5366                span: span(1..3),
5367                items: vec![
5368                    ast::ClassSetItem::Literal(ast::Literal {
5369                        span: span(1..2),
5370                        kind: ast::LiteralKind::Verbatim,
5371                        c: '-',
5372                    }),
5373                    ast::ClassSetItem::Literal(ast::Literal {
5374                        span: span(2..3),
5375                        kind: ast::LiteralKind::Verbatim,
5376                        c: '-',
5377                    }),
5378                ],
5379            };
5380            Ok((set, union))
5381        });
5382        assert_eq!(parser("[]a]").parse_set_class_open(), {
5383            let set = ast::ClassBracketed {
5384                span: span(0..2),
5385                negated: false,
5386                kind: ast::ClassSet::union(ast::ClassSetUnion {
5387                    span: span(1..1),
5388                    items: vec![],
5389                }),
5390            };
5391            let union = ast::ClassSetUnion {
5392                span: span(1..2),
5393                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5394                    span: span(1..2),
5395                    kind: ast::LiteralKind::Verbatim,
5396                    c: ']',
5397                })],
5398            };
5399            Ok((set, union))
5400        });
5401        assert_eq!(
5402            parser_ignore_whitespace("[ ] a]").parse_set_class_open(),
5403            {
5404                let set = ast::ClassBracketed {
5405                    span: span(0..4),
5406                    negated: false,
5407                    kind: ast::ClassSet::union(ast::ClassSetUnion {
5408                        span: span(2..2),
5409                        items: vec![],
5410                    }),
5411                };
5412                let union = ast::ClassSetUnion {
5413                    span: span(2..3),
5414                    items: vec![ast::ClassSetItem::Literal(ast::Literal {
5415                        span: span(2..3),
5416                        kind: ast::LiteralKind::Verbatim,
5417                        c: ']',
5418                    })],
5419                };
5420                Ok((set, union))
5421            }
5422        );
5423        assert_eq!(parser("[^]a]").parse_set_class_open(), {
5424            let set = ast::ClassBracketed {
5425                span: span(0..3),
5426                negated: true,
5427                kind: ast::ClassSet::union(ast::ClassSetUnion {
5428                    span: span(2..2),
5429                    items: vec![],
5430                }),
5431            };
5432            let union = ast::ClassSetUnion {
5433                span: span(2..3),
5434                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5435                    span: span(2..3),
5436                    kind: ast::LiteralKind::Verbatim,
5437                    c: ']',
5438                })],
5439            };
5440            Ok((set, union))
5441        });
5442        assert_eq!(parser("[-]a]").parse_set_class_open(), {
5443            let set = ast::ClassBracketed {
5444                span: span(0..2),
5445                negated: false,
5446                kind: ast::ClassSet::union(ast::ClassSetUnion {
5447                    span: span(1..1),
5448                    items: vec![],
5449                }),
5450            };
5451            let union = ast::ClassSetUnion {
5452                span: span(1..2),
5453                items: vec![ast::ClassSetItem::Literal(ast::Literal {
5454                    span: span(1..2),
5455                    kind: ast::LiteralKind::Verbatim,
5456                    c: '-',
5457                })],
5458            };
5459            Ok((set, union))
5460        });
5461
5462        assert_eq!(
5463            parser("[").parse_set_class_open().unwrap_err(),
5464            TestError {
5465                span: span(0..1),
5466                kind: ast::ErrorKind::ClassUnclosed,
5467            }
5468        );
5469        assert_eq!(
5470            parser_ignore_whitespace("[    ")
5471                .parse_set_class_open()
5472                .unwrap_err(),
5473            TestError {
5474                span: span(0..5),
5475                kind: ast::ErrorKind::ClassUnclosed,
5476            }
5477        );
5478        assert_eq!(
5479            parser("[^").parse_set_class_open().unwrap_err(),
5480            TestError {
5481                span: span(0..2),
5482                kind: ast::ErrorKind::ClassUnclosed,
5483            }
5484        );
5485        assert_eq!(
5486            parser("[]").parse_set_class_open().unwrap_err(),
5487            TestError {
5488                span: span(0..2),
5489                kind: ast::ErrorKind::ClassUnclosed,
5490            }
5491        );
5492        assert_eq!(
5493            parser("[-").parse_set_class_open().unwrap_err(),
5494            TestError {
5495                span: span(0..0),
5496                kind: ast::ErrorKind::ClassUnclosed,
5497            }
5498        );
5499        assert_eq!(
5500            parser("[--").parse_set_class_open().unwrap_err(),
5501            TestError {
5502                span: span(0..0),
5503                kind: ast::ErrorKind::ClassUnclosed,
5504            }
5505        );
5506
5507        // See: https://github.com/rust-lang/regex/issues/792
5508        assert_eq!(
5509            parser("(?x)[-#]").parse_with_comments().unwrap_err(),
5510            TestError {
5511                span: span(4..4),
5512                kind: ast::ErrorKind::ClassUnclosed,
5513            }
5514        );
5515    }
5516
5517    #[test]
5518    fn maybe_parse_ascii_class() {
5519        assert_eq!(
5520            parser(r"[:alnum:]").maybe_parse_ascii_class(),
5521            Some(ast::ClassAscii {
5522                span: span(0..9),
5523                kind: ast::ClassAsciiKind::Alnum,
5524                negated: false,
5525            })
5526        );
5527        assert_eq!(
5528            parser(r"[:alnum:]A").maybe_parse_ascii_class(),
5529            Some(ast::ClassAscii {
5530                span: span(0..9),
5531                kind: ast::ClassAsciiKind::Alnum,
5532                negated: false,
5533            })
5534        );
5535        assert_eq!(
5536            parser(r"[:^alnum:]").maybe_parse_ascii_class(),
5537            Some(ast::ClassAscii {
5538                span: span(0..10),
5539                kind: ast::ClassAsciiKind::Alnum,
5540                negated: true,
5541            })
5542        );
5543
5544        let p = parser(r"[:");
5545        assert_eq!(p.maybe_parse_ascii_class(), None);
5546        assert_eq!(p.offset(), 0);
5547
5548        let p = parser(r"[:^");
5549        assert_eq!(p.maybe_parse_ascii_class(), None);
5550        assert_eq!(p.offset(), 0);
5551
5552        let p = parser(r"[^:alnum:]");
5553        assert_eq!(p.maybe_parse_ascii_class(), None);
5554        assert_eq!(p.offset(), 0);
5555
5556        let p = parser(r"[:alnnum:]");
5557        assert_eq!(p.maybe_parse_ascii_class(), None);
5558        assert_eq!(p.offset(), 0);
5559
5560        let p = parser(r"[:alnum]");
5561        assert_eq!(p.maybe_parse_ascii_class(), None);
5562        assert_eq!(p.offset(), 0);
5563
5564        let p = parser(r"[:alnum:");
5565        assert_eq!(p.maybe_parse_ascii_class(), None);
5566        assert_eq!(p.offset(), 0);
5567    }
5568
5569    #[test]
5570    fn parse_unicode_class() {
5571        assert_eq!(
5572            parser(r"\pN").parse_escape(),
5573            Ok(Primitive::Unicode(ast::ClassUnicode {
5574                span: span(0..3),
5575                negated: false,
5576                kind: ast::ClassUnicodeKind::OneLetter('N'),
5577            }))
5578        );
5579        assert_eq!(
5580            parser(r"\PN").parse_escape(),
5581            Ok(Primitive::Unicode(ast::ClassUnicode {
5582                span: span(0..3),
5583                negated: true,
5584                kind: ast::ClassUnicodeKind::OneLetter('N'),
5585            }))
5586        );
5587        assert_eq!(
5588            parser(r"\p{N}").parse_escape(),
5589            Ok(Primitive::Unicode(ast::ClassUnicode {
5590                span: span(0..5),
5591                negated: false,
5592                kind: ast::ClassUnicodeKind::Named(s("N")),
5593            }))
5594        );
5595        assert_eq!(
5596            parser(r"\P{N}").parse_escape(),
5597            Ok(Primitive::Unicode(ast::ClassUnicode {
5598                span: span(0..5),
5599                negated: true,
5600                kind: ast::ClassUnicodeKind::Named(s("N")),
5601            }))
5602        );
5603        assert_eq!(
5604            parser(r"\p{Greek}").parse_escape(),
5605            Ok(Primitive::Unicode(ast::ClassUnicode {
5606                span: span(0..9),
5607                negated: false,
5608                kind: ast::ClassUnicodeKind::Named(s("Greek")),
5609            }))
5610        );
5611
5612        assert_eq!(
5613            parser(r"\p{scx:Katakana}").parse_escape(),
5614            Ok(Primitive::Unicode(ast::ClassUnicode {
5615                span: span(0..16),
5616                negated: false,
5617                kind: ast::ClassUnicodeKind::NamedValue {
5618                    op: ast::ClassUnicodeOpKind::Colon,
5619                    name: s("scx"),
5620                    value: s("Katakana"),
5621                },
5622            }))
5623        );
5624        assert_eq!(
5625            parser(r"\p{scx=Katakana}").parse_escape(),
5626            Ok(Primitive::Unicode(ast::ClassUnicode {
5627                span: span(0..16),
5628                negated: false,
5629                kind: ast::ClassUnicodeKind::NamedValue {
5630                    op: ast::ClassUnicodeOpKind::Equal,
5631                    name: s("scx"),
5632                    value: s("Katakana"),
5633                },
5634            }))
5635        );
5636        assert_eq!(
5637            parser(r"\p{scx!=Katakana}").parse_escape(),
5638            Ok(Primitive::Unicode(ast::ClassUnicode {
5639                span: span(0..17),
5640                negated: false,
5641                kind: ast::ClassUnicodeKind::NamedValue {
5642                    op: ast::ClassUnicodeOpKind::NotEqual,
5643                    name: s("scx"),
5644                    value: s("Katakana"),
5645                },
5646            }))
5647        );
5648
5649        assert_eq!(
5650            parser(r"\p{:}").parse_escape(),
5651            Ok(Primitive::Unicode(ast::ClassUnicode {
5652                span: span(0..5),
5653                negated: false,
5654                kind: ast::ClassUnicodeKind::NamedValue {
5655                    op: ast::ClassUnicodeOpKind::Colon,
5656                    name: s(""),
5657                    value: s(""),
5658                },
5659            }))
5660        );
5661        assert_eq!(
5662            parser(r"\p{=}").parse_escape(),
5663            Ok(Primitive::Unicode(ast::ClassUnicode {
5664                span: span(0..5),
5665                negated: false,
5666                kind: ast::ClassUnicodeKind::NamedValue {
5667                    op: ast::ClassUnicodeOpKind::Equal,
5668                    name: s(""),
5669                    value: s(""),
5670                },
5671            }))
5672        );
5673        assert_eq!(
5674            parser(r"\p{!=}").parse_escape(),
5675            Ok(Primitive::Unicode(ast::ClassUnicode {
5676                span: span(0..6),
5677                negated: false,
5678                kind: ast::ClassUnicodeKind::NamedValue {
5679                    op: ast::ClassUnicodeOpKind::NotEqual,
5680                    name: s(""),
5681                    value: s(""),
5682                },
5683            }))
5684        );
5685
5686        assert_eq!(
5687            parser(r"\p").parse_escape().unwrap_err(),
5688            TestError {
5689                span: span(2..2),
5690                kind: ast::ErrorKind::EscapeUnexpectedEof,
5691            }
5692        );
5693        assert_eq!(
5694            parser(r"\p{").parse_escape().unwrap_err(),
5695            TestError {
5696                span: span(3..3),
5697                kind: ast::ErrorKind::EscapeUnexpectedEof,
5698            }
5699        );
5700        assert_eq!(
5701            parser(r"\p{N").parse_escape().unwrap_err(),
5702            TestError {
5703                span: span(4..4),
5704                kind: ast::ErrorKind::EscapeUnexpectedEof,
5705            }
5706        );
5707        assert_eq!(
5708            parser(r"\p{Greek").parse_escape().unwrap_err(),
5709            TestError {
5710                span: span(8..8),
5711                kind: ast::ErrorKind::EscapeUnexpectedEof,
5712            }
5713        );
5714
5715        assert_eq!(
5716            parser(r"\pNz").parse(),
5717            Ok(Ast::Concat(ast::Concat {
5718                span: span(0..4),
5719                asts: vec![
5720                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5721                        span: span(0..3),
5722                        negated: false,
5723                        kind: ast::ClassUnicodeKind::OneLetter('N'),
5724                    })),
5725                    Ast::Literal(ast::Literal {
5726                        span: span(3..4),
5727                        kind: ast::LiteralKind::Verbatim,
5728                        c: 'z',
5729                    }),
5730                ],
5731            }))
5732        );
5733        assert_eq!(
5734            parser(r"\p{Greek}z").parse(),
5735            Ok(Ast::Concat(ast::Concat {
5736                span: span(0..10),
5737                asts: vec![
5738                    Ast::Class(ast::Class::Unicode(ast::ClassUnicode {
5739                        span: span(0..9),
5740                        negated: false,
5741                        kind: ast::ClassUnicodeKind::Named(s("Greek")),
5742                    })),
5743                    Ast::Literal(ast::Literal {
5744                        span: span(9..10),
5745                        kind: ast::LiteralKind::Verbatim,
5746                        c: 'z',
5747                    }),
5748                ],
5749            }))
5750        );
5751        assert_eq!(
5752            parser(r"\p\{").parse().unwrap_err(),
5753            TestError {
5754                span: span(2..3),
5755                kind: ast::ErrorKind::UnicodeClassInvalid,
5756            }
5757        );
5758        assert_eq!(
5759            parser(r"\P\{").parse().unwrap_err(),
5760            TestError {
5761                span: span(2..3),
5762                kind: ast::ErrorKind::UnicodeClassInvalid,
5763            }
5764        );
5765    }
5766
5767    #[test]
5768    fn parse_perl_class() {
5769        assert_eq!(
5770            parser(r"\d").parse_escape(),
5771            Ok(Primitive::Perl(ast::ClassPerl {
5772                span: span(0..2),
5773                kind: ast::ClassPerlKind::Digit,
5774                negated: false,
5775            }))
5776        );
5777        assert_eq!(
5778            parser(r"\D").parse_escape(),
5779            Ok(Primitive::Perl(ast::ClassPerl {
5780                span: span(0..2),
5781                kind: ast::ClassPerlKind::Digit,
5782                negated: true,
5783            }))
5784        );
5785        assert_eq!(
5786            parser(r"\s").parse_escape(),
5787            Ok(Primitive::Perl(ast::ClassPerl {
5788                span: span(0..2),
5789                kind: ast::ClassPerlKind::Space,
5790                negated: false,
5791            }))
5792        );
5793        assert_eq!(
5794            parser(r"\S").parse_escape(),
5795            Ok(Primitive::Perl(ast::ClassPerl {
5796                span: span(0..2),
5797                kind: ast::ClassPerlKind::Space,
5798                negated: true,
5799            }))
5800        );
5801        assert_eq!(
5802            parser(r"\w").parse_escape(),
5803            Ok(Primitive::Perl(ast::ClassPerl {
5804                span: span(0..2),
5805                kind: ast::ClassPerlKind::Word,
5806                negated: false,
5807            }))
5808        );
5809        assert_eq!(
5810            parser(r"\W").parse_escape(),
5811            Ok(Primitive::Perl(ast::ClassPerl {
5812                span: span(0..2),
5813                kind: ast::ClassPerlKind::Word,
5814                negated: true,
5815            }))
5816        );
5817
5818        assert_eq!(
5819            parser(r"\d").parse(),
5820            Ok(Ast::Class(ast::Class::Perl(ast::ClassPerl {
5821                span: span(0..2),
5822                kind: ast::ClassPerlKind::Digit,
5823                negated: false,
5824            })))
5825        );
5826        assert_eq!(
5827            parser(r"\dz").parse(),
5828            Ok(Ast::Concat(ast::Concat {
5829                span: span(0..3),
5830                asts: vec![
5831                    Ast::Class(ast::Class::Perl(ast::ClassPerl {
5832                        span: span(0..2),
5833                        kind: ast::ClassPerlKind::Digit,
5834                        negated: false,
5835                    })),
5836                    Ast::Literal(ast::Literal {
5837                        span: span(2..3),
5838                        kind: ast::LiteralKind::Verbatim,
5839                        c: 'z',
5840                    }),
5841                ],
5842            }))
5843        );
5844    }
5845
5846    // This tests a bug fix where the nest limit checker wasn't decrementing
5847    // its depth during post-traversal, which causes long regexes to trip
5848    // the default limit too aggressively.
5849    #[test]
5850    fn regression_454_nest_too_big() {
5851        let pattern = r#"
5852        2(?:
5853          [45]\d{3}|
5854          7(?:
5855            1[0-267]|
5856            2[0-289]|
5857            3[0-29]|
5858            4[01]|
5859            5[1-3]|
5860            6[013]|
5861            7[0178]|
5862            91
5863          )|
5864          8(?:
5865            0[125]|
5866            [139][1-6]|
5867            2[0157-9]|
5868            41|
5869            6[1-35]|
5870            7[1-5]|
5871            8[1-8]|
5872            90
5873          )|
5874          9(?:
5875            0[0-2]|
5876            1[0-4]|
5877            2[568]|
5878            3[3-6]|
5879            5[5-7]|
5880            6[0167]|
5881            7[15]|
5882            8[0146-9]
5883          )
5884        )\d{4}
5885        "#;
5886        assert!(parser_nest_limit(pattern, 50).parse().is_ok());
5887    }
5888
5889    // This tests that we treat a trailing `-` in a character class as a
5890    // literal `-` even when whitespace mode is enabled and there is whitespace
5891    // after the trailing `-`.
5892    #[test]
5893    fn regression_455_trailing_dash_ignore_whitespace() {
5894        assert!(parser("(?x)[ / - ]").parse().is_ok());
5895        assert!(parser("(?x)[ a - ]").parse().is_ok());
5896        assert!(parser(
5897            "(?x)[
5898            a
5899            - ]
5900        "
5901        )
5902        .parse()
5903        .is_ok());
5904        assert!(parser(
5905            "(?x)[
5906            a # wat
5907            - ]
5908        "
5909        )
5910        .parse()
5911        .is_ok());
5912
5913        assert!(parser("(?x)[ / -").parse().is_err());
5914        assert!(parser("(?x)[ / - ").parse().is_err());
5915        assert!(parser(
5916            "(?x)[
5917            / -
5918        "
5919        )
5920        .parse()
5921        .is_err());
5922        assert!(parser(
5923            "(?x)[
5924            / - # wat
5925        "
5926        )
5927        .parse()
5928        .is_err());
5929    }
5930}