LLVM API Documentation

Regex.h
Go to the documentation of this file.
00001 //===-- Regex.h - Regular Expression matcher implementation -*- C++ -*-----===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file implements a POSIX regular expression matcher.  Both Basic and
00011 // Extended POSIX regular expressions (ERE) are supported.  EREs were extended
00012 // to support backreferences in matches.
00013 // This implementation also supports matching strings with embedded NUL chars.
00014 //
00015 //===----------------------------------------------------------------------===//
00016 
00017 #ifndef LLVM_SUPPORT_REGEX_H
00018 #define LLVM_SUPPORT_REGEX_H
00019 
00020 #include "llvm/Support/Compiler.h"
00021 #include <string>
00022 
00023 struct llvm_regex;
00024 
00025 namespace llvm {
00026   class StringRef;
00027   template<typename T> class SmallVectorImpl;
00028 
00029   class Regex {
00030   public:
00031     enum {
00032       NoFlags=0,
00033       /// Compile for matching that ignores upper/lower case distinctions.
00034       IgnoreCase=1,
00035       /// Compile for newline-sensitive matching. With this flag '[^' bracket
00036       /// expressions and '.' never match newline. A ^ anchor matches the
00037       /// null string after any newline in the string in addition to its normal
00038       /// function, and the $ anchor matches the null string before any
00039       /// newline in the string in addition to its normal function.
00040       Newline=2,
00041       /// By default, the POSIX extended regular expression (ERE) syntax is
00042       /// assumed. Pass this flag to turn on basic regular expressions (BRE)
00043       /// instead.
00044       BasicRegex=4
00045     };
00046 
00047     /// Compiles the given regular expression \p Regex.
00048     Regex(StringRef Regex, unsigned Flags = NoFlags);
00049     Regex(const Regex &) LLVM_DELETED_FUNCTION;
00050     Regex &operator=(Regex regex) {
00051       std::swap(preg, regex.preg);
00052       std::swap(error, regex.error);
00053       return *this;
00054     }
00055     Regex(Regex &&regex) {
00056       preg = regex.preg;
00057       error = regex.error;
00058       regex.preg = nullptr;
00059     }
00060     ~Regex();
00061 
00062     /// isValid - returns the error encountered during regex compilation, or
00063     /// matching, if any.
00064     bool isValid(std::string &Error);
00065 
00066     /// getNumMatches - In a valid regex, return the number of parenthesized
00067     /// matches it contains.  The number filled in by match will include this
00068     /// many entries plus one for the whole regex (as element 0).
00069     unsigned getNumMatches() const;
00070 
00071     /// matches - Match the regex against a given \p String.
00072     ///
00073     /// \param Matches - If given, on a successful match this will be filled in
00074     /// with references to the matched group expressions (inside \p String),
00075     /// the first group is always the entire pattern.
00076     ///
00077     /// This returns true on a successful match.
00078     bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = nullptr);
00079 
00080     /// sub - Return the result of replacing the first match of the regex in
00081     /// \p String with the \p Repl string. Backreferences like "\0" in the
00082     /// replacement string are replaced with the appropriate match substring.
00083     ///
00084     /// Note that the replacement string has backslash escaping performed on
00085     /// it. Invalid backreferences are ignored (replaced by empty strings).
00086     ///
00087     /// \param Error If non-null, any errors in the substitution (invalid
00088     /// backreferences, trailing backslashes) will be recorded as a non-empty
00089     /// string.
00090     std::string sub(StringRef Repl, StringRef String,
00091                     std::string *Error = nullptr);
00092 
00093     /// \brief If this function returns true, ^Str$ is an extended regular
00094     /// expression that matches Str and only Str.
00095     static bool isLiteralERE(StringRef Str);
00096 
00097     /// \brief Turn String into a regex by escaping its special characters.
00098     static std::string escape(StringRef String);
00099 
00100   private:
00101     struct llvm_regex *preg;
00102     int error;
00103   };
00104 }
00105 
00106 #endif // LLVM_SUPPORT_REGEX_H