regex_syntax/ast/
print.rs

1/*!
2This module provides a regular expression printer for `Ast`.
3*/
4
5use std::fmt;
6
7use crate::ast::visitor::{self, Visitor};
8use crate::ast::{self, Ast};
9
10/// A builder for constructing a printer.
11///
12/// Note that since a printer doesn't have any configuration knobs, this type
13/// remains unexported.
14#[derive(Clone, Debug)]
15struct PrinterBuilder {
16    _priv: (),
17}
18
19impl Default for PrinterBuilder {
20    fn default() -> PrinterBuilder {
21        PrinterBuilder::new()
22    }
23}
24
25impl PrinterBuilder {
26    fn new() -> PrinterBuilder {
27        PrinterBuilder { _priv: () }
28    }
29
30    fn build(&self) -> Printer {
31        Printer { _priv: () }
32    }
33}
34
35/// A printer for a regular expression abstract syntax tree.
36///
37/// A printer converts an abstract syntax tree (AST) to a regular expression
38/// pattern string. This particular printer uses constant stack space and heap
39/// space proportional to the size of the AST.
40///
41/// This printer will not necessarily preserve the original formatting of the
42/// regular expression pattern string. For example, all whitespace and comments
43/// are ignored.
44#[derive(Debug)]
45pub struct Printer {
46    _priv: (),
47}
48
49impl Printer {
50    /// Create a new printer.
51    pub fn new() -> Printer {
52        PrinterBuilder::new().build()
53    }
54
55    /// Print the given `Ast` to the given writer. The writer must implement
56    /// `fmt::Write`. Typical implementations of `fmt::Write` that can be used
57    /// here are a `fmt::Formatter` (which is available in `fmt::Display`
58    /// implementations) or a `&mut String`.
59    pub fn print<W: fmt::Write>(&mut self, ast: &Ast, wtr: W) -> fmt::Result {
60        visitor::visit(ast, Writer { wtr })
61    }
62}
63
64#[derive(Debug)]
65struct Writer<W> {
66    wtr: W,
67}
68
69impl<W: fmt::Write> Visitor for Writer<W> {
70    type Output = ();
71    type Err = fmt::Error;
72
73    fn finish(self) -> fmt::Result {
74        Ok(())
75    }
76
77    fn visit_pre(&mut self, ast: &Ast) -> fmt::Result {
78        match *ast {
79            Ast::Group(ref x) => self.fmt_group_pre(x),
80            Ast::Class(ast::Class::Bracketed(ref x)) => {
81                self.fmt_class_bracketed_pre(x)
82            }
83            _ => Ok(()),
84        }
85    }
86
87    fn visit_post(&mut self, ast: &Ast) -> fmt::Result {
88        use crate::ast::Class;
89
90        match *ast {
91            Ast::Empty(_) => Ok(()),
92            Ast::Flags(ref x) => self.fmt_set_flags(x),
93            Ast::Literal(ref x) => self.fmt_literal(x),
94            Ast::Dot(_) => self.wtr.write_str("."),
95            Ast::Assertion(ref x) => self.fmt_assertion(x),
96            Ast::Class(Class::Perl(ref x)) => self.fmt_class_perl(x),
97            Ast::Class(Class::Unicode(ref x)) => self.fmt_class_unicode(x),
98            Ast::Class(Class::Bracketed(ref x)) => {
99                self.fmt_class_bracketed_post(x)
100            }
101            Ast::Repetition(ref x) => self.fmt_repetition(x),
102            Ast::Group(ref x) => self.fmt_group_post(x),
103            Ast::Alternation(_) => Ok(()),
104            Ast::Concat(_) => Ok(()),
105        }
106    }
107
108    fn visit_alternation_in(&mut self) -> fmt::Result {
109        self.wtr.write_str("|")
110    }
111
112    fn visit_class_set_item_pre(
113        &mut self,
114        ast: &ast::ClassSetItem,
115    ) -> Result<(), Self::Err> {
116        match *ast {
117            ast::ClassSetItem::Bracketed(ref x) => {
118                self.fmt_class_bracketed_pre(x)
119            }
120            _ => Ok(()),
121        }
122    }
123
124    fn visit_class_set_item_post(
125        &mut self,
126        ast: &ast::ClassSetItem,
127    ) -> Result<(), Self::Err> {
128        use crate::ast::ClassSetItem::*;
129
130        match *ast {
131            Empty(_) => Ok(()),
132            Literal(ref x) => self.fmt_literal(x),
133            Range(ref x) => {
134                self.fmt_literal(&x.start)?;
135                self.wtr.write_str("-")?;
136                self.fmt_literal(&x.end)?;
137                Ok(())
138            }
139            Ascii(ref x) => self.fmt_class_ascii(x),
140            Unicode(ref x) => self.fmt_class_unicode(x),
141            Perl(ref x) => self.fmt_class_perl(x),
142            Bracketed(ref x) => self.fmt_class_bracketed_post(x),
143            Union(_) => Ok(()),
144        }
145    }
146
147    fn visit_class_set_binary_op_in(
148        &mut self,
149        ast: &ast::ClassSetBinaryOp,
150    ) -> Result<(), Self::Err> {
151        self.fmt_class_set_binary_op_kind(&ast.kind)
152    }
153}
154
155impl<W: fmt::Write> Writer<W> {
156    fn fmt_group_pre(&mut self, ast: &ast::Group) -> fmt::Result {
157        use crate::ast::GroupKind::*;
158        match ast.kind {
159            CaptureIndex(_) => self.wtr.write_str("("),
160            CaptureName(ref x) => {
161                self.wtr.write_str("(?P<")?;
162                self.wtr.write_str(&x.name)?;
163                self.wtr.write_str(">")?;
164                Ok(())
165            }
166            NonCapturing(ref flags) => {
167                self.wtr.write_str("(?")?;
168                self.fmt_flags(flags)?;
169                self.wtr.write_str(":")?;
170                Ok(())
171            }
172        }
173    }
174
175    fn fmt_group_post(&mut self, _ast: &ast::Group) -> fmt::Result {
176        self.wtr.write_str(")")
177    }
178
179    fn fmt_repetition(&mut self, ast: &ast::Repetition) -> fmt::Result {
180        use crate::ast::RepetitionKind::*;
181        match ast.op.kind {
182            ZeroOrOne if ast.greedy => self.wtr.write_str("?"),
183            ZeroOrOne => self.wtr.write_str("??"),
184            ZeroOrMore if ast.greedy => self.wtr.write_str("*"),
185            ZeroOrMore => self.wtr.write_str("*?"),
186            OneOrMore if ast.greedy => self.wtr.write_str("+"),
187            OneOrMore => self.wtr.write_str("+?"),
188            Range(ref x) => {
189                self.fmt_repetition_range(x)?;
190                if !ast.greedy {
191                    self.wtr.write_str("?")?;
192                }
193                Ok(())
194            }
195        }
196    }
197
198    fn fmt_repetition_range(
199        &mut self,
200        ast: &ast::RepetitionRange,
201    ) -> fmt::Result {
202        use crate::ast::RepetitionRange::*;
203        match *ast {
204            Exactly(x) => write!(self.wtr, "{{{}}}", x),
205            AtLeast(x) => write!(self.wtr, "{{{},}}", x),
206            Bounded(x, y) => write!(self.wtr, "{{{},{}}}", x, y),
207        }
208    }
209
210    fn fmt_literal(&mut self, ast: &ast::Literal) -> fmt::Result {
211        use crate::ast::LiteralKind::*;
212
213        match ast.kind {
214            Verbatim => self.wtr.write_char(ast.c),
215            Punctuation => write!(self.wtr, r"\{}", ast.c),
216            Octal => write!(self.wtr, r"\{:o}", ast.c as u32),
217            HexFixed(ast::HexLiteralKind::X) => {
218                write!(self.wtr, r"\x{:02X}", ast.c as u32)
219            }
220            HexFixed(ast::HexLiteralKind::UnicodeShort) => {
221                write!(self.wtr, r"\u{:04X}", ast.c as u32)
222            }
223            HexFixed(ast::HexLiteralKind::UnicodeLong) => {
224                write!(self.wtr, r"\U{:08X}", ast.c as u32)
225            }
226            HexBrace(ast::HexLiteralKind::X) => {
227                write!(self.wtr, r"\x{{{:X}}}", ast.c as u32)
228            }
229            HexBrace(ast::HexLiteralKind::UnicodeShort) => {
230                write!(self.wtr, r"\u{{{:X}}}", ast.c as u32)
231            }
232            HexBrace(ast::HexLiteralKind::UnicodeLong) => {
233                write!(self.wtr, r"\U{{{:X}}}", ast.c as u32)
234            }
235            Special(ast::SpecialLiteralKind::Bell) => {
236                self.wtr.write_str(r"\a")
237            }
238            Special(ast::SpecialLiteralKind::FormFeed) => {
239                self.wtr.write_str(r"\f")
240            }
241            Special(ast::SpecialLiteralKind::Tab) => self.wtr.write_str(r"\t"),
242            Special(ast::SpecialLiteralKind::LineFeed) => {
243                self.wtr.write_str(r"\n")
244            }
245            Special(ast::SpecialLiteralKind::CarriageReturn) => {
246                self.wtr.write_str(r"\r")
247            }
248            Special(ast::SpecialLiteralKind::VerticalTab) => {
249                self.wtr.write_str(r"\v")
250            }
251            Special(ast::SpecialLiteralKind::Space) => {
252                self.wtr.write_str(r"\ ")
253            }
254        }
255    }
256
257    fn fmt_assertion(&mut self, ast: &ast::Assertion) -> fmt::Result {
258        use crate::ast::AssertionKind::*;
259        match ast.kind {
260            StartLine => self.wtr.write_str("^"),
261            EndLine => self.wtr.write_str("$"),
262            StartText => self.wtr.write_str(r"\A"),
263            EndText => self.wtr.write_str(r"\z"),
264            WordBoundary => self.wtr.write_str(r"\b"),
265            NotWordBoundary => self.wtr.write_str(r"\B"),
266        }
267    }
268
269    fn fmt_set_flags(&mut self, ast: &ast::SetFlags) -> fmt::Result {
270        self.wtr.write_str("(?")?;
271        self.fmt_flags(&ast.flags)?;
272        self.wtr.write_str(")")?;
273        Ok(())
274    }
275
276    fn fmt_flags(&mut self, ast: &ast::Flags) -> fmt::Result {
277        use crate::ast::{Flag, FlagsItemKind};
278
279        for item in &ast.items {
280            match item.kind {
281                FlagsItemKind::Negation => self.wtr.write_str("-"),
282                FlagsItemKind::Flag(ref flag) => match *flag {
283                    Flag::CaseInsensitive => self.wtr.write_str("i"),
284                    Flag::MultiLine => self.wtr.write_str("m"),
285                    Flag::DotMatchesNewLine => self.wtr.write_str("s"),
286                    Flag::SwapGreed => self.wtr.write_str("U"),
287                    Flag::Unicode => self.wtr.write_str("u"),
288                    Flag::IgnoreWhitespace => self.wtr.write_str("x"),
289                },
290            }?;
291        }
292        Ok(())
293    }
294
295    fn fmt_class_bracketed_pre(
296        &mut self,
297        ast: &ast::ClassBracketed,
298    ) -> fmt::Result {
299        if ast.negated {
300            self.wtr.write_str("[^")
301        } else {
302            self.wtr.write_str("[")
303        }
304    }
305
306    fn fmt_class_bracketed_post(
307        &mut self,
308        _ast: &ast::ClassBracketed,
309    ) -> fmt::Result {
310        self.wtr.write_str("]")
311    }
312
313    fn fmt_class_set_binary_op_kind(
314        &mut self,
315        ast: &ast::ClassSetBinaryOpKind,
316    ) -> fmt::Result {
317        use crate::ast::ClassSetBinaryOpKind::*;
318        match *ast {
319            Intersection => self.wtr.write_str("&&"),
320            Difference => self.wtr.write_str("--"),
321            SymmetricDifference => self.wtr.write_str("~~"),
322        }
323    }
324
325    fn fmt_class_perl(&mut self, ast: &ast::ClassPerl) -> fmt::Result {
326        use crate::ast::ClassPerlKind::*;
327        match ast.kind {
328            Digit if ast.negated => self.wtr.write_str(r"\D"),
329            Digit => self.wtr.write_str(r"\d"),
330            Space if ast.negated => self.wtr.write_str(r"\S"),
331            Space => self.wtr.write_str(r"\s"),
332            Word if ast.negated => self.wtr.write_str(r"\W"),
333            Word => self.wtr.write_str(r"\w"),
334        }
335    }
336
337    fn fmt_class_ascii(&mut self, ast: &ast::ClassAscii) -> fmt::Result {
338        use crate::ast::ClassAsciiKind::*;
339        match ast.kind {
340            Alnum if ast.negated => self.wtr.write_str("[:^alnum:]"),
341            Alnum => self.wtr.write_str("[:alnum:]"),
342            Alpha if ast.negated => self.wtr.write_str("[:^alpha:]"),
343            Alpha => self.wtr.write_str("[:alpha:]"),
344            Ascii if ast.negated => self.wtr.write_str("[:^ascii:]"),
345            Ascii => self.wtr.write_str("[:ascii:]"),
346            Blank if ast.negated => self.wtr.write_str("[:^blank:]"),
347            Blank => self.wtr.write_str("[:blank:]"),
348            Cntrl if ast.negated => self.wtr.write_str("[:^cntrl:]"),
349            Cntrl => self.wtr.write_str("[:cntrl:]"),
350            Digit if ast.negated => self.wtr.write_str("[:^digit:]"),
351            Digit => self.wtr.write_str("[:digit:]"),
352            Graph if ast.negated => self.wtr.write_str("[:^graph:]"),
353            Graph => self.wtr.write_str("[:graph:]"),
354            Lower if ast.negated => self.wtr.write_str("[:^lower:]"),
355            Lower => self.wtr.write_str("[:lower:]"),
356            Print if ast.negated => self.wtr.write_str("[:^print:]"),
357            Print => self.wtr.write_str("[:print:]"),
358            Punct if ast.negated => self.wtr.write_str("[:^punct:]"),
359            Punct => self.wtr.write_str("[:punct:]"),
360            Space if ast.negated => self.wtr.write_str("[:^space:]"),
361            Space => self.wtr.write_str("[:space:]"),
362            Upper if ast.negated => self.wtr.write_str("[:^upper:]"),
363            Upper => self.wtr.write_str("[:upper:]"),
364            Word if ast.negated => self.wtr.write_str("[:^word:]"),
365            Word => self.wtr.write_str("[:word:]"),
366            Xdigit if ast.negated => self.wtr.write_str("[:^xdigit:]"),
367            Xdigit => self.wtr.write_str("[:xdigit:]"),
368        }
369    }
370
371    fn fmt_class_unicode(&mut self, ast: &ast::ClassUnicode) -> fmt::Result {
372        use crate::ast::ClassUnicodeKind::*;
373        use crate::ast::ClassUnicodeOpKind::*;
374
375        if ast.negated {
376            self.wtr.write_str(r"\P")?;
377        } else {
378            self.wtr.write_str(r"\p")?;
379        }
380        match ast.kind {
381            OneLetter(c) => self.wtr.write_char(c),
382            Named(ref x) => write!(self.wtr, "{{{}}}", x),
383            NamedValue { op: Equal, ref name, ref value } => {
384                write!(self.wtr, "{{{}={}}}", name, value)
385            }
386            NamedValue { op: Colon, ref name, ref value } => {
387                write!(self.wtr, "{{{}:{}}}", name, value)
388            }
389            NamedValue { op: NotEqual, ref name, ref value } => {
390                write!(self.wtr, "{{{}!={}}}", name, value)
391            }
392        }
393    }
394}
395
396#[cfg(test)]
397mod tests {
398    use super::Printer;
399    use crate::ast::parse::ParserBuilder;
400
401    fn roundtrip(given: &str) {
402        roundtrip_with(|b| b, given);
403    }
404
405    fn roundtrip_with<F>(mut f: F, given: &str)
406    where
407        F: FnMut(&mut ParserBuilder) -> &mut ParserBuilder,
408    {
409        let mut builder = ParserBuilder::new();
410        f(&mut builder);
411        let ast = builder.build().parse(given).unwrap();
412
413        let mut printer = Printer::new();
414        let mut dst = String::new();
415        printer.print(&ast, &mut dst).unwrap();
416        assert_eq!(given, dst);
417    }
418
419    #[test]
420    fn print_literal() {
421        roundtrip("a");
422        roundtrip(r"\[");
423        roundtrip_with(|b| b.octal(true), r"\141");
424        roundtrip(r"\x61");
425        roundtrip(r"\x7F");
426        roundtrip(r"\u0061");
427        roundtrip(r"\U00000061");
428        roundtrip(r"\x{61}");
429        roundtrip(r"\x{7F}");
430        roundtrip(r"\u{61}");
431        roundtrip(r"\U{61}");
432
433        roundtrip(r"\a");
434        roundtrip(r"\f");
435        roundtrip(r"\t");
436        roundtrip(r"\n");
437        roundtrip(r"\r");
438        roundtrip(r"\v");
439        roundtrip(r"(?x)\ ");
440    }
441
442    #[test]
443    fn print_dot() {
444        roundtrip(".");
445    }
446
447    #[test]
448    fn print_concat() {
449        roundtrip("ab");
450        roundtrip("abcde");
451        roundtrip("a(bcd)ef");
452    }
453
454    #[test]
455    fn print_alternation() {
456        roundtrip("a|b");
457        roundtrip("a|b|c|d|e");
458        roundtrip("|a|b|c|d|e");
459        roundtrip("|a|b|c|d|e|");
460        roundtrip("a(b|c|d)|e|f");
461    }
462
463    #[test]
464    fn print_assertion() {
465        roundtrip(r"^");
466        roundtrip(r"$");
467        roundtrip(r"\A");
468        roundtrip(r"\z");
469        roundtrip(r"\b");
470        roundtrip(r"\B");
471    }
472
473    #[test]
474    fn print_repetition() {
475        roundtrip("a?");
476        roundtrip("a??");
477        roundtrip("a*");
478        roundtrip("a*?");
479        roundtrip("a+");
480        roundtrip("a+?");
481        roundtrip("a{5}");
482        roundtrip("a{5}?");
483        roundtrip("a{5,}");
484        roundtrip("a{5,}?");
485        roundtrip("a{5,10}");
486        roundtrip("a{5,10}?");
487    }
488
489    #[test]
490    fn print_flags() {
491        roundtrip("(?i)");
492        roundtrip("(?-i)");
493        roundtrip("(?s-i)");
494        roundtrip("(?-si)");
495        roundtrip("(?siUmux)");
496    }
497
498    #[test]
499    fn print_group() {
500        roundtrip("(?i:a)");
501        roundtrip("(?P<foo>a)");
502        roundtrip("(a)");
503    }
504
505    #[test]
506    fn print_class() {
507        roundtrip(r"[abc]");
508        roundtrip(r"[a-z]");
509        roundtrip(r"[^a-z]");
510        roundtrip(r"[a-z0-9]");
511        roundtrip(r"[-a-z0-9]");
512        roundtrip(r"[-a-z0-9]");
513        roundtrip(r"[a-z0-9---]");
514        roundtrip(r"[a-z&&m-n]");
515        roundtrip(r"[[a-z&&m-n]]");
516        roundtrip(r"[a-z--m-n]");
517        roundtrip(r"[a-z~~m-n]");
518        roundtrip(r"[a-z[0-9]]");
519        roundtrip(r"[a-z[^0-9]]");
520
521        roundtrip(r"\d");
522        roundtrip(r"\D");
523        roundtrip(r"\s");
524        roundtrip(r"\S");
525        roundtrip(r"\w");
526        roundtrip(r"\W");
527
528        roundtrip(r"[[:alnum:]]");
529        roundtrip(r"[[:^alnum:]]");
530        roundtrip(r"[[:alpha:]]");
531        roundtrip(r"[[:^alpha:]]");
532        roundtrip(r"[[:ascii:]]");
533        roundtrip(r"[[:^ascii:]]");
534        roundtrip(r"[[:blank:]]");
535        roundtrip(r"[[:^blank:]]");
536        roundtrip(r"[[:cntrl:]]");
537        roundtrip(r"[[:^cntrl:]]");
538        roundtrip(r"[[:digit:]]");
539        roundtrip(r"[[:^digit:]]");
540        roundtrip(r"[[:graph:]]");
541        roundtrip(r"[[:^graph:]]");
542        roundtrip(r"[[:lower:]]");
543        roundtrip(r"[[:^lower:]]");
544        roundtrip(r"[[:print:]]");
545        roundtrip(r"[[:^print:]]");
546        roundtrip(r"[[:punct:]]");
547        roundtrip(r"[[:^punct:]]");
548        roundtrip(r"[[:space:]]");
549        roundtrip(r"[[:^space:]]");
550        roundtrip(r"[[:upper:]]");
551        roundtrip(r"[[:^upper:]]");
552        roundtrip(r"[[:word:]]");
553        roundtrip(r"[[:^word:]]");
554        roundtrip(r"[[:xdigit:]]");
555        roundtrip(r"[[:^xdigit:]]");
556
557        roundtrip(r"\pL");
558        roundtrip(r"\PL");
559        roundtrip(r"\p{L}");
560        roundtrip(r"\P{L}");
561        roundtrip(r"\p{X=Y}");
562        roundtrip(r"\P{X=Y}");
563        roundtrip(r"\p{X:Y}");
564        roundtrip(r"\P{X:Y}");
565        roundtrip(r"\p{X!=Y}");
566        roundtrip(r"\P{X!=Y}");
567    }
568}