clang API Documentation

FormatString.h
Go to the documentation of this file.
00001 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This file defines APIs for analyzing the format strings of printf, fscanf,
00011 // and friends.
00012 //
00013 // The structure of format strings for fprintf are described in C99 7.19.6.1.
00014 //
00015 // The structure of format strings for fscanf are described in C99 7.19.6.2.
00016 //
00017 //===----------------------------------------------------------------------===//
00018 
00019 #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
00020 #define LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H
00021 
00022 #include "clang/AST/CanonicalType.h"
00023 
00024 namespace clang {
00025 
00026 class TargetInfo;
00027 
00028 //===----------------------------------------------------------------------===//
00029 /// Common components of both fprintf and fscanf format strings.
00030 namespace analyze_format_string {
00031 
00032 /// Class representing optional flags with location and representation
00033 /// information.
00034 class OptionalFlag {
00035 public:
00036   OptionalFlag(const char *Representation)
00037       : representation(Representation), flag(false) {}
00038   bool isSet() { return flag; }
00039   void set() { flag = true; }
00040   void clear() { flag = false; }
00041   void setPosition(const char *position) {
00042     assert(position);
00043     this->position = position;
00044   }
00045   const char *getPosition() const {
00046     assert(position);
00047     return position;
00048   }
00049   const char *toString() const { return representation; }
00050 
00051   // Overloaded operators for bool like qualities
00052   LLVM_EXPLICIT operator bool() const { return flag; }
00053   OptionalFlag& operator=(const bool &rhs) {
00054     flag = rhs;
00055     return *this;  // Return a reference to myself.
00056   }
00057 private:
00058   const char *representation;
00059   const char *position;
00060   bool flag;
00061 };
00062 
00063 /// Represents the length modifier in a format string in scanf/printf.
00064 class LengthModifier {
00065 public:
00066   enum Kind {
00067     None,
00068     AsChar,       // 'hh'
00069     AsShort,      // 'h'
00070     AsLong,       // 'l'
00071     AsLongLong,   // 'll'
00072     AsQuad,       // 'q' (BSD, deprecated, for 64-bit integer types)
00073     AsIntMax,     // 'j'
00074     AsSizeT,      // 'z'
00075     AsPtrDiff,    // 't'
00076     AsInt32,      // 'I32' (MSVCRT, like __int32)
00077     AsInt3264,    // 'I'   (MSVCRT, like __int3264 from MIDL)
00078     AsInt64,      // 'I64' (MSVCRT, like __int64)
00079     AsLongDouble, // 'L'
00080     AsAllocate,   // for '%as', GNU extension to C90 scanf
00081     AsMAllocate,  // for '%ms', GNU extension to scanf
00082     AsWide,       // 'w' (MSVCRT, like l but only for c, C, s, S, or Z
00083     AsWideChar = AsLong // for '%ls', only makes sense for printf
00084   };
00085 
00086   LengthModifier()
00087     : Position(nullptr), kind(None) {}
00088   LengthModifier(const char *pos, Kind k)
00089     : Position(pos), kind(k) {}
00090 
00091   const char *getStart() const {
00092     return Position;
00093   }
00094 
00095   unsigned getLength() const {
00096     switch (kind) {
00097       default:
00098         return 1;
00099       case AsLongLong:
00100       case AsChar:
00101         return 2;
00102       case AsInt32:
00103       case AsInt64:
00104         return 3;
00105       case None:
00106         return 0;
00107     }
00108   }
00109 
00110   Kind getKind() const { return kind; }
00111   void setKind(Kind k) { kind = k; }
00112 
00113   const char *toString() const;
00114 
00115 private:
00116   const char *Position;
00117   Kind kind;
00118 };
00119 
00120 class ConversionSpecifier {
00121 public:
00122   enum Kind {
00123     InvalidSpecifier = 0,
00124       // C99 conversion specifiers.
00125     cArg,
00126     dArg,
00127     DArg, // Apple extension
00128     iArg,
00129     IntArgBeg = dArg, IntArgEnd = iArg,
00130 
00131     oArg,
00132     OArg, // Apple extension
00133     uArg,
00134     UArg, // Apple extension
00135     xArg,
00136     XArg,
00137     UIntArgBeg = oArg, UIntArgEnd = XArg,
00138 
00139     fArg,
00140     FArg,
00141     eArg,
00142     EArg,
00143     gArg,
00144     GArg,
00145     aArg,
00146     AArg,
00147     DoubleArgBeg = fArg, DoubleArgEnd = AArg,
00148 
00149     sArg,
00150     pArg,
00151     nArg,
00152     PercentArg,
00153     CArg,
00154     SArg,
00155 
00156     // ** Printf-specific **
00157 
00158     ZArg, // MS extension
00159 
00160     // Objective-C specific specifiers.
00161     ObjCObjArg,  // '@'
00162     ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg,
00163 
00164     // GlibC specific specifiers.
00165     PrintErrno,   // 'm'
00166 
00167     PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno,
00168 
00169     // ** Scanf-specific **
00170     ScanListArg, // '['
00171     ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg
00172   };
00173 
00174   ConversionSpecifier(bool isPrintf = true)
00175     : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr),
00176       kind(InvalidSpecifier) {}
00177 
00178   ConversionSpecifier(bool isPrintf, const char *pos, Kind k)
00179     : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {}
00180 
00181   const char *getStart() const {
00182     return Position;
00183   }
00184 
00185   StringRef getCharacters() const {
00186     return StringRef(getStart(), getLength());
00187   }
00188 
00189   bool consumesDataArgument() const {
00190     switch (kind) {
00191       case PrintErrno:
00192         assert(IsPrintf);
00193         return false;
00194       case PercentArg:
00195         return false;
00196       default:
00197         return true;
00198     }
00199   }
00200 
00201   Kind getKind() const { return kind; }
00202   void setKind(Kind k) { kind = k; }
00203   unsigned getLength() const {
00204     return EndScanList ? EndScanList - Position : 1;
00205   }
00206 
00207   bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; }
00208   bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; }
00209   bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; }
00210   const char *toString() const;
00211 
00212   bool isPrintfKind() const { return IsPrintf; }
00213   
00214   Optional<ConversionSpecifier> getStandardSpecifier() const;
00215 
00216 protected:
00217   bool IsPrintf;
00218   const char *Position;
00219   const char *EndScanList;
00220   Kind kind;
00221 };
00222 
00223 class ArgType {
00224 public:
00225   enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy,
00226               AnyCharTy, CStrTy, WCStrTy, WIntTy };
00227 private:
00228   const Kind K;
00229   QualType T;
00230   const char *Name;
00231   bool Ptr;
00232 public:
00233   ArgType(Kind k = UnknownTy, const char *n = nullptr)
00234       : K(k), Name(n), Ptr(false) {}
00235   ArgType(QualType t, const char *n = nullptr)
00236       : K(SpecificTy), T(t), Name(n), Ptr(false) {}
00237   ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {}
00238 
00239   static ArgType Invalid() { return ArgType(InvalidTy); }
00240   bool isValid() const { return K != InvalidTy; }
00241 
00242   /// Create an ArgType which corresponds to the type pointer to A.
00243   static ArgType PtrTo(const ArgType& A) {
00244     assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown");
00245     ArgType Res = A;
00246     Res.Ptr = true;
00247     return Res;
00248   }
00249 
00250   bool matchesType(ASTContext &C, QualType argTy) const;
00251 
00252   QualType getRepresentativeType(ASTContext &C) const;
00253 
00254   std::string getRepresentativeTypeName(ASTContext &C) const;
00255 };
00256 
00257 class OptionalAmount {
00258 public:
00259   enum HowSpecified { NotSpecified, Constant, Arg, Invalid };
00260 
00261   OptionalAmount(HowSpecified howSpecified,
00262                  unsigned amount,
00263                  const char *amountStart,
00264                  unsigned amountLength,
00265                  bool usesPositionalArg)
00266   : start(amountStart), length(amountLength), hs(howSpecified), amt(amount),
00267   UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {}
00268 
00269   OptionalAmount(bool valid = true)
00270   : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0),
00271   UsesPositionalArg(0), UsesDotPrefix(0) {}
00272 
00273   bool isInvalid() const {
00274     return hs == Invalid;
00275   }
00276 
00277   HowSpecified getHowSpecified() const { return hs; }
00278   void setHowSpecified(HowSpecified h) { hs = h; }
00279 
00280   bool hasDataArgument() const { return hs == Arg; }
00281 
00282   unsigned getArgIndex() const {
00283     assert(hasDataArgument());
00284     return amt;
00285   }
00286 
00287   unsigned getConstantAmount() const {
00288     assert(hs == Constant);
00289     return amt;
00290   }
00291 
00292   const char *getStart() const {
00293       // We include the . character if it is given.
00294     return start - UsesDotPrefix;
00295   }
00296 
00297   unsigned getConstantLength() const {
00298     assert(hs == Constant);
00299     return length + UsesDotPrefix;
00300   }
00301 
00302   ArgType getArgType(ASTContext &Ctx) const;
00303 
00304   void toString(raw_ostream &os) const;
00305 
00306   bool usesPositionalArg() const { return (bool) UsesPositionalArg; }
00307   unsigned getPositionalArgIndex() const {
00308     assert(hasDataArgument());
00309     return amt + 1;
00310   }
00311 
00312   bool usesDotPrefix() const { return UsesDotPrefix; }
00313   void setUsesDotPrefix() { UsesDotPrefix = true; }
00314 
00315 private:
00316   const char *start;
00317   unsigned length;
00318   HowSpecified hs;
00319   unsigned amt;
00320   bool UsesPositionalArg : 1;
00321   bool UsesDotPrefix;
00322 };
00323 
00324 
00325 class FormatSpecifier {
00326 protected:
00327   LengthModifier LM;
00328   OptionalAmount FieldWidth;
00329   ConversionSpecifier CS;
00330   /// Positional arguments, an IEEE extension:
00331   ///  IEEE Std 1003.1, 2004 Edition
00332   ///  http://www.opengroup.org/onlinepubs/009695399/functions/printf.html
00333   bool UsesPositionalArg;
00334   unsigned argIndex;
00335 public:
00336   FormatSpecifier(bool isPrintf)
00337     : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {}
00338 
00339   void setLengthModifier(LengthModifier lm) {
00340     LM = lm;
00341   }
00342 
00343   void setUsesPositionalArg() { UsesPositionalArg = true; }
00344 
00345   void setArgIndex(unsigned i) {
00346     argIndex = i;
00347   }
00348 
00349   unsigned getArgIndex() const {
00350     return argIndex;
00351   }
00352 
00353   unsigned getPositionalArgIndex() const {
00354     return argIndex + 1;
00355   }
00356 
00357   const LengthModifier &getLengthModifier() const {
00358     return LM;
00359   }
00360 
00361   const OptionalAmount &getFieldWidth() const {
00362     return FieldWidth;
00363   }
00364 
00365   void setFieldWidth(const OptionalAmount &Amt) {
00366     FieldWidth = Amt;
00367   }
00368 
00369   bool usesPositionalArg() const { return UsesPositionalArg; }
00370 
00371   bool hasValidLengthModifier(const TargetInfo &Target) const;
00372 
00373   bool hasStandardLengthModifier() const;
00374 
00375   Optional<LengthModifier> getCorrectedLengthModifier() const;
00376 
00377   bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const;
00378 
00379   bool hasStandardLengthConversionCombination() const;
00380 
00381   /// For a TypedefType QT, if it is a named integer type such as size_t,
00382   /// assign the appropriate value to LM and return true.
00383   static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM);
00384 };
00385 
00386 } // end analyze_format_string namespace
00387 
00388 //===----------------------------------------------------------------------===//
00389 /// Pieces specific to fprintf format strings.
00390 
00391 namespace analyze_printf {
00392 
00393 class PrintfConversionSpecifier :
00394   public analyze_format_string::ConversionSpecifier  {
00395 public:
00396   PrintfConversionSpecifier()
00397     : ConversionSpecifier(true, nullptr, InvalidSpecifier) {}
00398 
00399   PrintfConversionSpecifier(const char *pos, Kind k)
00400     : ConversionSpecifier(true, pos, k) {}
00401 
00402   bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; }
00403   bool isDoubleArg() const { return kind >= DoubleArgBeg &&
00404                                     kind <= DoubleArgEnd; }
00405   unsigned getLength() const {
00406       // Conversion specifiers currently only are represented by
00407       // single characters, but we be flexible.
00408     return 1;
00409   }
00410 
00411   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
00412     return CS->isPrintfKind();
00413   }
00414 };
00415 
00416 using analyze_format_string::ArgType;
00417 using analyze_format_string::LengthModifier;
00418 using analyze_format_string::OptionalAmount;
00419 using analyze_format_string::OptionalFlag;
00420 
00421 class PrintfSpecifier : public analyze_format_string::FormatSpecifier {
00422   OptionalFlag HasThousandsGrouping; // ''', POSIX extension.
00423   OptionalFlag IsLeftJustified; // '-'
00424   OptionalFlag HasPlusPrefix; // '+'
00425   OptionalFlag HasSpacePrefix; // ' '
00426   OptionalFlag HasAlternativeForm; // '#'
00427   OptionalFlag HasLeadingZeroes; // '0'
00428   OptionalAmount Precision;
00429 public:
00430   PrintfSpecifier() :
00431     FormatSpecifier(/* isPrintf = */ true),
00432     HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"),
00433     HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {}
00434 
00435   static PrintfSpecifier Parse(const char *beg, const char *end);
00436 
00437     // Methods for incrementally constructing the PrintfSpecifier.
00438   void setConversionSpecifier(const PrintfConversionSpecifier &cs) {
00439     CS = cs;
00440   }
00441   void setHasThousandsGrouping(const char *position) {
00442     HasThousandsGrouping = true;
00443     HasThousandsGrouping.setPosition(position);
00444   }
00445   void setIsLeftJustified(const char *position) {
00446     IsLeftJustified = true;
00447     IsLeftJustified.setPosition(position);
00448   }
00449   void setHasPlusPrefix(const char *position) {
00450     HasPlusPrefix = true;
00451     HasPlusPrefix.setPosition(position);
00452   }
00453   void setHasSpacePrefix(const char *position) {
00454     HasSpacePrefix = true;
00455     HasSpacePrefix.setPosition(position);
00456   }
00457   void setHasAlternativeForm(const char *position) {
00458     HasAlternativeForm = true;
00459     HasAlternativeForm.setPosition(position);
00460   }
00461   void setHasLeadingZeros(const char *position) {
00462     HasLeadingZeroes = true;
00463     HasLeadingZeroes.setPosition(position);
00464   }
00465   void setUsesPositionalArg() { UsesPositionalArg = true; }
00466 
00467     // Methods for querying the format specifier.
00468 
00469   const PrintfConversionSpecifier &getConversionSpecifier() const {
00470     return cast<PrintfConversionSpecifier>(CS);
00471   }
00472 
00473   void setPrecision(const OptionalAmount &Amt) {
00474     Precision = Amt;
00475     Precision.setUsesDotPrefix();
00476   }
00477 
00478   const OptionalAmount &getPrecision() const {
00479     return Precision;
00480   }
00481 
00482   bool consumesDataArgument() const {
00483     return getConversionSpecifier().consumesDataArgument();
00484   }
00485 
00486   /// \brief Returns the builtin type that a data argument
00487   /// paired with this format specifier should have.  This method
00488   /// will return null if the format specifier does not have
00489   /// a matching data argument or the matching argument matches
00490   /// more than one type.
00491   ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const;
00492 
00493   const OptionalFlag &hasThousandsGrouping() const {
00494       return HasThousandsGrouping;
00495   }
00496   const OptionalFlag &isLeftJustified() const { return IsLeftJustified; }
00497   const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; }
00498   const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; }
00499   const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; }
00500   const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; }
00501   bool usesPositionalArg() const { return UsesPositionalArg; }
00502 
00503   /// Changes the specifier and length according to a QualType, retaining any
00504   /// flags or options. Returns true on success, or false when a conversion
00505   /// was not successful.
00506   bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx,
00507                bool IsObjCLiteral);
00508 
00509   void toString(raw_ostream &os) const;
00510 
00511   // Validation methods - to check if any element results in undefined behavior
00512   bool hasValidPlusPrefix() const;
00513   bool hasValidAlternativeForm() const;
00514   bool hasValidLeadingZeros() const;
00515   bool hasValidSpacePrefix() const;
00516   bool hasValidLeftJustified() const;
00517   bool hasValidThousandsGroupingPrefix() const;
00518 
00519   bool hasValidPrecision() const;
00520   bool hasValidFieldWidth() const;
00521 };
00522 }  // end analyze_printf namespace
00523 
00524 //===----------------------------------------------------------------------===//
00525 /// Pieces specific to fscanf format strings.
00526 
00527 namespace analyze_scanf {
00528 
00529 class ScanfConversionSpecifier :
00530     public analyze_format_string::ConversionSpecifier  {
00531 public:
00532   ScanfConversionSpecifier()
00533     : ConversionSpecifier(false, nullptr, InvalidSpecifier) {}
00534 
00535   ScanfConversionSpecifier(const char *pos, Kind k)
00536     : ConversionSpecifier(false, pos, k) {}
00537 
00538   void setEndScanList(const char *pos) { EndScanList = pos; }
00539 
00540   static bool classof(const analyze_format_string::ConversionSpecifier *CS) {
00541     return !CS->isPrintfKind();
00542   }
00543 };
00544 
00545 using analyze_format_string::ArgType;
00546 using analyze_format_string::LengthModifier;
00547 using analyze_format_string::OptionalAmount;
00548 using analyze_format_string::OptionalFlag;
00549 
00550 class ScanfSpecifier : public analyze_format_string::FormatSpecifier {
00551   OptionalFlag SuppressAssignment; // '*'
00552 public:
00553   ScanfSpecifier() :
00554     FormatSpecifier(/* isPrintf = */ false),
00555     SuppressAssignment("*") {}
00556 
00557   void setSuppressAssignment(const char *position) {
00558     SuppressAssignment = true;
00559     SuppressAssignment.setPosition(position);
00560   }
00561 
00562   const OptionalFlag &getSuppressAssignment() const {
00563     return SuppressAssignment;
00564   }
00565 
00566   void setConversionSpecifier(const ScanfConversionSpecifier &cs) {
00567     CS = cs;
00568   }
00569 
00570   const ScanfConversionSpecifier &getConversionSpecifier() const {
00571     return cast<ScanfConversionSpecifier>(CS);
00572   }
00573 
00574   bool consumesDataArgument() const {
00575     return CS.consumesDataArgument() && !SuppressAssignment;
00576   }
00577 
00578   ArgType getArgType(ASTContext &Ctx) const;
00579 
00580   bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt,
00581                ASTContext &Ctx);
00582 
00583   void toString(raw_ostream &os) const;
00584 
00585   static ScanfSpecifier Parse(const char *beg, const char *end);
00586 };
00587 
00588 } // end analyze_scanf namespace
00589 
00590 //===----------------------------------------------------------------------===//
00591 // Parsing and processing of format strings (both fprintf and fscanf).
00592 
00593 namespace analyze_format_string {
00594 
00595 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 };
00596 
00597 class FormatStringHandler {
00598 public:
00599   FormatStringHandler() {}
00600   virtual ~FormatStringHandler();
00601 
00602   virtual void HandleNullChar(const char *nullCharacter) {}
00603 
00604   virtual void HandlePosition(const char *startPos, unsigned posLen) {}
00605 
00606   virtual void HandleInvalidPosition(const char *startPos, unsigned posLen,
00607                                      PositionContext p) {}
00608 
00609   virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {}
00610 
00611   virtual void HandleIncompleteSpecifier(const char *startSpecifier,
00612                                          unsigned specifierLen) {}
00613 
00614   // Printf-specific handlers.
00615 
00616   virtual bool HandleInvalidPrintfConversionSpecifier(
00617                                       const analyze_printf::PrintfSpecifier &FS,
00618                                       const char *startSpecifier,
00619                                       unsigned specifierLen) {
00620     return true;
00621   }
00622 
00623   virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
00624                                      const char *startSpecifier,
00625                                      unsigned specifierLen) {
00626     return true;
00627   }
00628 
00629     // Scanf-specific handlers.
00630 
00631   virtual bool HandleInvalidScanfConversionSpecifier(
00632                                         const analyze_scanf::ScanfSpecifier &FS,
00633                                         const char *startSpecifier,
00634                                         unsigned specifierLen) {
00635     return true;
00636   }
00637 
00638   virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS,
00639                                     const char *startSpecifier,
00640                                     unsigned specifierLen) {
00641     return true;
00642   }
00643 
00644   virtual void HandleIncompleteScanList(const char *start, const char *end) {}
00645 };
00646 
00647 bool ParsePrintfString(FormatStringHandler &H,
00648                        const char *beg, const char *end, const LangOptions &LO,
00649                        const TargetInfo &Target);
00650   
00651 bool ParseFormatStringHasSArg(const char *beg, const char *end, const LangOptions &LO,
00652                               const TargetInfo &Target);
00653 
00654 bool ParseScanfString(FormatStringHandler &H,
00655                       const char *beg, const char *end, const LangOptions &LO,
00656                       const TargetInfo &Target);
00657 
00658 } // end analyze_format_string namespace
00659 } // end clang namespace
00660 #endif