clang API Documentation
00001 //= FormatString.h - Analysis of printf/fprintf format strings --*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This file defines APIs for analyzing the format strings of printf, fscanf, 00011 // and friends. 00012 // 00013 // The structure of format strings for fprintf are described in C99 7.19.6.1. 00014 // 00015 // The structure of format strings for fscanf are described in C99 7.19.6.2. 00016 // 00017 //===----------------------------------------------------------------------===// 00018 00019 #ifndef LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H 00020 #define LLVM_CLANG_ANALYSIS_ANALYSES_FORMATSTRING_H 00021 00022 #include "clang/AST/CanonicalType.h" 00023 00024 namespace clang { 00025 00026 class TargetInfo; 00027 00028 //===----------------------------------------------------------------------===// 00029 /// Common components of both fprintf and fscanf format strings. 00030 namespace analyze_format_string { 00031 00032 /// Class representing optional flags with location and representation 00033 /// information. 00034 class OptionalFlag { 00035 public: 00036 OptionalFlag(const char *Representation) 00037 : representation(Representation), flag(false) {} 00038 bool isSet() { return flag; } 00039 void set() { flag = true; } 00040 void clear() { flag = false; } 00041 void setPosition(const char *position) { 00042 assert(position); 00043 this->position = position; 00044 } 00045 const char *getPosition() const { 00046 assert(position); 00047 return position; 00048 } 00049 const char *toString() const { return representation; } 00050 00051 // Overloaded operators for bool like qualities 00052 LLVM_EXPLICIT operator bool() const { return flag; } 00053 OptionalFlag& operator=(const bool &rhs) { 00054 flag = rhs; 00055 return *this; // Return a reference to myself. 00056 } 00057 private: 00058 const char *representation; 00059 const char *position; 00060 bool flag; 00061 }; 00062 00063 /// Represents the length modifier in a format string in scanf/printf. 00064 class LengthModifier { 00065 public: 00066 enum Kind { 00067 None, 00068 AsChar, // 'hh' 00069 AsShort, // 'h' 00070 AsLong, // 'l' 00071 AsLongLong, // 'll' 00072 AsQuad, // 'q' (BSD, deprecated, for 64-bit integer types) 00073 AsIntMax, // 'j' 00074 AsSizeT, // 'z' 00075 AsPtrDiff, // 't' 00076 AsInt32, // 'I32' (MSVCRT, like __int32) 00077 AsInt3264, // 'I' (MSVCRT, like __int3264 from MIDL) 00078 AsInt64, // 'I64' (MSVCRT, like __int64) 00079 AsLongDouble, // 'L' 00080 AsAllocate, // for '%as', GNU extension to C90 scanf 00081 AsMAllocate, // for '%ms', GNU extension to scanf 00082 AsWide, // 'w' (MSVCRT, like l but only for c, C, s, S, or Z 00083 AsWideChar = AsLong // for '%ls', only makes sense for printf 00084 }; 00085 00086 LengthModifier() 00087 : Position(nullptr), kind(None) {} 00088 LengthModifier(const char *pos, Kind k) 00089 : Position(pos), kind(k) {} 00090 00091 const char *getStart() const { 00092 return Position; 00093 } 00094 00095 unsigned getLength() const { 00096 switch (kind) { 00097 default: 00098 return 1; 00099 case AsLongLong: 00100 case AsChar: 00101 return 2; 00102 case AsInt32: 00103 case AsInt64: 00104 return 3; 00105 case None: 00106 return 0; 00107 } 00108 } 00109 00110 Kind getKind() const { return kind; } 00111 void setKind(Kind k) { kind = k; } 00112 00113 const char *toString() const; 00114 00115 private: 00116 const char *Position; 00117 Kind kind; 00118 }; 00119 00120 class ConversionSpecifier { 00121 public: 00122 enum Kind { 00123 InvalidSpecifier = 0, 00124 // C99 conversion specifiers. 00125 cArg, 00126 dArg, 00127 DArg, // Apple extension 00128 iArg, 00129 IntArgBeg = dArg, IntArgEnd = iArg, 00130 00131 oArg, 00132 OArg, // Apple extension 00133 uArg, 00134 UArg, // Apple extension 00135 xArg, 00136 XArg, 00137 UIntArgBeg = oArg, UIntArgEnd = XArg, 00138 00139 fArg, 00140 FArg, 00141 eArg, 00142 EArg, 00143 gArg, 00144 GArg, 00145 aArg, 00146 AArg, 00147 DoubleArgBeg = fArg, DoubleArgEnd = AArg, 00148 00149 sArg, 00150 pArg, 00151 nArg, 00152 PercentArg, 00153 CArg, 00154 SArg, 00155 00156 // ** Printf-specific ** 00157 00158 ZArg, // MS extension 00159 00160 // Objective-C specific specifiers. 00161 ObjCObjArg, // '@' 00162 ObjCBeg = ObjCObjArg, ObjCEnd = ObjCObjArg, 00163 00164 // GlibC specific specifiers. 00165 PrintErrno, // 'm' 00166 00167 PrintfConvBeg = ObjCObjArg, PrintfConvEnd = PrintErrno, 00168 00169 // ** Scanf-specific ** 00170 ScanListArg, // '[' 00171 ScanfConvBeg = ScanListArg, ScanfConvEnd = ScanListArg 00172 }; 00173 00174 ConversionSpecifier(bool isPrintf = true) 00175 : IsPrintf(isPrintf), Position(nullptr), EndScanList(nullptr), 00176 kind(InvalidSpecifier) {} 00177 00178 ConversionSpecifier(bool isPrintf, const char *pos, Kind k) 00179 : IsPrintf(isPrintf), Position(pos), EndScanList(nullptr), kind(k) {} 00180 00181 const char *getStart() const { 00182 return Position; 00183 } 00184 00185 StringRef getCharacters() const { 00186 return StringRef(getStart(), getLength()); 00187 } 00188 00189 bool consumesDataArgument() const { 00190 switch (kind) { 00191 case PrintErrno: 00192 assert(IsPrintf); 00193 return false; 00194 case PercentArg: 00195 return false; 00196 default: 00197 return true; 00198 } 00199 } 00200 00201 Kind getKind() const { return kind; } 00202 void setKind(Kind k) { kind = k; } 00203 unsigned getLength() const { 00204 return EndScanList ? EndScanList - Position : 1; 00205 } 00206 00207 bool isIntArg() const { return kind >= IntArgBeg && kind <= IntArgEnd; } 00208 bool isUIntArg() const { return kind >= UIntArgBeg && kind <= UIntArgEnd; } 00209 bool isAnyIntArg() const { return kind >= IntArgBeg && kind <= UIntArgEnd; } 00210 const char *toString() const; 00211 00212 bool isPrintfKind() const { return IsPrintf; } 00213 00214 Optional<ConversionSpecifier> getStandardSpecifier() const; 00215 00216 protected: 00217 bool IsPrintf; 00218 const char *Position; 00219 const char *EndScanList; 00220 Kind kind; 00221 }; 00222 00223 class ArgType { 00224 public: 00225 enum Kind { UnknownTy, InvalidTy, SpecificTy, ObjCPointerTy, CPointerTy, 00226 AnyCharTy, CStrTy, WCStrTy, WIntTy }; 00227 private: 00228 const Kind K; 00229 QualType T; 00230 const char *Name; 00231 bool Ptr; 00232 public: 00233 ArgType(Kind k = UnknownTy, const char *n = nullptr) 00234 : K(k), Name(n), Ptr(false) {} 00235 ArgType(QualType t, const char *n = nullptr) 00236 : K(SpecificTy), T(t), Name(n), Ptr(false) {} 00237 ArgType(CanQualType t) : K(SpecificTy), T(t), Name(nullptr), Ptr(false) {} 00238 00239 static ArgType Invalid() { return ArgType(InvalidTy); } 00240 bool isValid() const { return K != InvalidTy; } 00241 00242 /// Create an ArgType which corresponds to the type pointer to A. 00243 static ArgType PtrTo(const ArgType& A) { 00244 assert(A.K >= InvalidTy && "ArgType cannot be pointer to invalid/unknown"); 00245 ArgType Res = A; 00246 Res.Ptr = true; 00247 return Res; 00248 } 00249 00250 bool matchesType(ASTContext &C, QualType argTy) const; 00251 00252 QualType getRepresentativeType(ASTContext &C) const; 00253 00254 std::string getRepresentativeTypeName(ASTContext &C) const; 00255 }; 00256 00257 class OptionalAmount { 00258 public: 00259 enum HowSpecified { NotSpecified, Constant, Arg, Invalid }; 00260 00261 OptionalAmount(HowSpecified howSpecified, 00262 unsigned amount, 00263 const char *amountStart, 00264 unsigned amountLength, 00265 bool usesPositionalArg) 00266 : start(amountStart), length(amountLength), hs(howSpecified), amt(amount), 00267 UsesPositionalArg(usesPositionalArg), UsesDotPrefix(0) {} 00268 00269 OptionalAmount(bool valid = true) 00270 : start(nullptr),length(0), hs(valid ? NotSpecified : Invalid), amt(0), 00271 UsesPositionalArg(0), UsesDotPrefix(0) {} 00272 00273 bool isInvalid() const { 00274 return hs == Invalid; 00275 } 00276 00277 HowSpecified getHowSpecified() const { return hs; } 00278 void setHowSpecified(HowSpecified h) { hs = h; } 00279 00280 bool hasDataArgument() const { return hs == Arg; } 00281 00282 unsigned getArgIndex() const { 00283 assert(hasDataArgument()); 00284 return amt; 00285 } 00286 00287 unsigned getConstantAmount() const { 00288 assert(hs == Constant); 00289 return amt; 00290 } 00291 00292 const char *getStart() const { 00293 // We include the . character if it is given. 00294 return start - UsesDotPrefix; 00295 } 00296 00297 unsigned getConstantLength() const { 00298 assert(hs == Constant); 00299 return length + UsesDotPrefix; 00300 } 00301 00302 ArgType getArgType(ASTContext &Ctx) const; 00303 00304 void toString(raw_ostream &os) const; 00305 00306 bool usesPositionalArg() const { return (bool) UsesPositionalArg; } 00307 unsigned getPositionalArgIndex() const { 00308 assert(hasDataArgument()); 00309 return amt + 1; 00310 } 00311 00312 bool usesDotPrefix() const { return UsesDotPrefix; } 00313 void setUsesDotPrefix() { UsesDotPrefix = true; } 00314 00315 private: 00316 const char *start; 00317 unsigned length; 00318 HowSpecified hs; 00319 unsigned amt; 00320 bool UsesPositionalArg : 1; 00321 bool UsesDotPrefix; 00322 }; 00323 00324 00325 class FormatSpecifier { 00326 protected: 00327 LengthModifier LM; 00328 OptionalAmount FieldWidth; 00329 ConversionSpecifier CS; 00330 /// Positional arguments, an IEEE extension: 00331 /// IEEE Std 1003.1, 2004 Edition 00332 /// http://www.opengroup.org/onlinepubs/009695399/functions/printf.html 00333 bool UsesPositionalArg; 00334 unsigned argIndex; 00335 public: 00336 FormatSpecifier(bool isPrintf) 00337 : CS(isPrintf), UsesPositionalArg(false), argIndex(0) {} 00338 00339 void setLengthModifier(LengthModifier lm) { 00340 LM = lm; 00341 } 00342 00343 void setUsesPositionalArg() { UsesPositionalArg = true; } 00344 00345 void setArgIndex(unsigned i) { 00346 argIndex = i; 00347 } 00348 00349 unsigned getArgIndex() const { 00350 return argIndex; 00351 } 00352 00353 unsigned getPositionalArgIndex() const { 00354 return argIndex + 1; 00355 } 00356 00357 const LengthModifier &getLengthModifier() const { 00358 return LM; 00359 } 00360 00361 const OptionalAmount &getFieldWidth() const { 00362 return FieldWidth; 00363 } 00364 00365 void setFieldWidth(const OptionalAmount &Amt) { 00366 FieldWidth = Amt; 00367 } 00368 00369 bool usesPositionalArg() const { return UsesPositionalArg; } 00370 00371 bool hasValidLengthModifier(const TargetInfo &Target) const; 00372 00373 bool hasStandardLengthModifier() const; 00374 00375 Optional<LengthModifier> getCorrectedLengthModifier() const; 00376 00377 bool hasStandardConversionSpecifier(const LangOptions &LangOpt) const; 00378 00379 bool hasStandardLengthConversionCombination() const; 00380 00381 /// For a TypedefType QT, if it is a named integer type such as size_t, 00382 /// assign the appropriate value to LM and return true. 00383 static bool namedTypeToLengthModifier(QualType QT, LengthModifier &LM); 00384 }; 00385 00386 } // end analyze_format_string namespace 00387 00388 //===----------------------------------------------------------------------===// 00389 /// Pieces specific to fprintf format strings. 00390 00391 namespace analyze_printf { 00392 00393 class PrintfConversionSpecifier : 00394 public analyze_format_string::ConversionSpecifier { 00395 public: 00396 PrintfConversionSpecifier() 00397 : ConversionSpecifier(true, nullptr, InvalidSpecifier) {} 00398 00399 PrintfConversionSpecifier(const char *pos, Kind k) 00400 : ConversionSpecifier(true, pos, k) {} 00401 00402 bool isObjCArg() const { return kind >= ObjCBeg && kind <= ObjCEnd; } 00403 bool isDoubleArg() const { return kind >= DoubleArgBeg && 00404 kind <= DoubleArgEnd; } 00405 unsigned getLength() const { 00406 // Conversion specifiers currently only are represented by 00407 // single characters, but we be flexible. 00408 return 1; 00409 } 00410 00411 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 00412 return CS->isPrintfKind(); 00413 } 00414 }; 00415 00416 using analyze_format_string::ArgType; 00417 using analyze_format_string::LengthModifier; 00418 using analyze_format_string::OptionalAmount; 00419 using analyze_format_string::OptionalFlag; 00420 00421 class PrintfSpecifier : public analyze_format_string::FormatSpecifier { 00422 OptionalFlag HasThousandsGrouping; // ''', POSIX extension. 00423 OptionalFlag IsLeftJustified; // '-' 00424 OptionalFlag HasPlusPrefix; // '+' 00425 OptionalFlag HasSpacePrefix; // ' ' 00426 OptionalFlag HasAlternativeForm; // '#' 00427 OptionalFlag HasLeadingZeroes; // '0' 00428 OptionalAmount Precision; 00429 public: 00430 PrintfSpecifier() : 00431 FormatSpecifier(/* isPrintf = */ true), 00432 HasThousandsGrouping("'"), IsLeftJustified("-"), HasPlusPrefix("+"), 00433 HasSpacePrefix(" "), HasAlternativeForm("#"), HasLeadingZeroes("0") {} 00434 00435 static PrintfSpecifier Parse(const char *beg, const char *end); 00436 00437 // Methods for incrementally constructing the PrintfSpecifier. 00438 void setConversionSpecifier(const PrintfConversionSpecifier &cs) { 00439 CS = cs; 00440 } 00441 void setHasThousandsGrouping(const char *position) { 00442 HasThousandsGrouping = true; 00443 HasThousandsGrouping.setPosition(position); 00444 } 00445 void setIsLeftJustified(const char *position) { 00446 IsLeftJustified = true; 00447 IsLeftJustified.setPosition(position); 00448 } 00449 void setHasPlusPrefix(const char *position) { 00450 HasPlusPrefix = true; 00451 HasPlusPrefix.setPosition(position); 00452 } 00453 void setHasSpacePrefix(const char *position) { 00454 HasSpacePrefix = true; 00455 HasSpacePrefix.setPosition(position); 00456 } 00457 void setHasAlternativeForm(const char *position) { 00458 HasAlternativeForm = true; 00459 HasAlternativeForm.setPosition(position); 00460 } 00461 void setHasLeadingZeros(const char *position) { 00462 HasLeadingZeroes = true; 00463 HasLeadingZeroes.setPosition(position); 00464 } 00465 void setUsesPositionalArg() { UsesPositionalArg = true; } 00466 00467 // Methods for querying the format specifier. 00468 00469 const PrintfConversionSpecifier &getConversionSpecifier() const { 00470 return cast<PrintfConversionSpecifier>(CS); 00471 } 00472 00473 void setPrecision(const OptionalAmount &Amt) { 00474 Precision = Amt; 00475 Precision.setUsesDotPrefix(); 00476 } 00477 00478 const OptionalAmount &getPrecision() const { 00479 return Precision; 00480 } 00481 00482 bool consumesDataArgument() const { 00483 return getConversionSpecifier().consumesDataArgument(); 00484 } 00485 00486 /// \brief Returns the builtin type that a data argument 00487 /// paired with this format specifier should have. This method 00488 /// will return null if the format specifier does not have 00489 /// a matching data argument or the matching argument matches 00490 /// more than one type. 00491 ArgType getArgType(ASTContext &Ctx, bool IsObjCLiteral) const; 00492 00493 const OptionalFlag &hasThousandsGrouping() const { 00494 return HasThousandsGrouping; 00495 } 00496 const OptionalFlag &isLeftJustified() const { return IsLeftJustified; } 00497 const OptionalFlag &hasPlusPrefix() const { return HasPlusPrefix; } 00498 const OptionalFlag &hasAlternativeForm() const { return HasAlternativeForm; } 00499 const OptionalFlag &hasLeadingZeros() const { return HasLeadingZeroes; } 00500 const OptionalFlag &hasSpacePrefix() const { return HasSpacePrefix; } 00501 bool usesPositionalArg() const { return UsesPositionalArg; } 00502 00503 /// Changes the specifier and length according to a QualType, retaining any 00504 /// flags or options. Returns true on success, or false when a conversion 00505 /// was not successful. 00506 bool fixType(QualType QT, const LangOptions &LangOpt, ASTContext &Ctx, 00507 bool IsObjCLiteral); 00508 00509 void toString(raw_ostream &os) const; 00510 00511 // Validation methods - to check if any element results in undefined behavior 00512 bool hasValidPlusPrefix() const; 00513 bool hasValidAlternativeForm() const; 00514 bool hasValidLeadingZeros() const; 00515 bool hasValidSpacePrefix() const; 00516 bool hasValidLeftJustified() const; 00517 bool hasValidThousandsGroupingPrefix() const; 00518 00519 bool hasValidPrecision() const; 00520 bool hasValidFieldWidth() const; 00521 }; 00522 } // end analyze_printf namespace 00523 00524 //===----------------------------------------------------------------------===// 00525 /// Pieces specific to fscanf format strings. 00526 00527 namespace analyze_scanf { 00528 00529 class ScanfConversionSpecifier : 00530 public analyze_format_string::ConversionSpecifier { 00531 public: 00532 ScanfConversionSpecifier() 00533 : ConversionSpecifier(false, nullptr, InvalidSpecifier) {} 00534 00535 ScanfConversionSpecifier(const char *pos, Kind k) 00536 : ConversionSpecifier(false, pos, k) {} 00537 00538 void setEndScanList(const char *pos) { EndScanList = pos; } 00539 00540 static bool classof(const analyze_format_string::ConversionSpecifier *CS) { 00541 return !CS->isPrintfKind(); 00542 } 00543 }; 00544 00545 using analyze_format_string::ArgType; 00546 using analyze_format_string::LengthModifier; 00547 using analyze_format_string::OptionalAmount; 00548 using analyze_format_string::OptionalFlag; 00549 00550 class ScanfSpecifier : public analyze_format_string::FormatSpecifier { 00551 OptionalFlag SuppressAssignment; // '*' 00552 public: 00553 ScanfSpecifier() : 00554 FormatSpecifier(/* isPrintf = */ false), 00555 SuppressAssignment("*") {} 00556 00557 void setSuppressAssignment(const char *position) { 00558 SuppressAssignment = true; 00559 SuppressAssignment.setPosition(position); 00560 } 00561 00562 const OptionalFlag &getSuppressAssignment() const { 00563 return SuppressAssignment; 00564 } 00565 00566 void setConversionSpecifier(const ScanfConversionSpecifier &cs) { 00567 CS = cs; 00568 } 00569 00570 const ScanfConversionSpecifier &getConversionSpecifier() const { 00571 return cast<ScanfConversionSpecifier>(CS); 00572 } 00573 00574 bool consumesDataArgument() const { 00575 return CS.consumesDataArgument() && !SuppressAssignment; 00576 } 00577 00578 ArgType getArgType(ASTContext &Ctx) const; 00579 00580 bool fixType(QualType QT, QualType RawQT, const LangOptions &LangOpt, 00581 ASTContext &Ctx); 00582 00583 void toString(raw_ostream &os) const; 00584 00585 static ScanfSpecifier Parse(const char *beg, const char *end); 00586 }; 00587 00588 } // end analyze_scanf namespace 00589 00590 //===----------------------------------------------------------------------===// 00591 // Parsing and processing of format strings (both fprintf and fscanf). 00592 00593 namespace analyze_format_string { 00594 00595 enum PositionContext { FieldWidthPos = 0, PrecisionPos = 1 }; 00596 00597 class FormatStringHandler { 00598 public: 00599 FormatStringHandler() {} 00600 virtual ~FormatStringHandler(); 00601 00602 virtual void HandleNullChar(const char *nullCharacter) {} 00603 00604 virtual void HandlePosition(const char *startPos, unsigned posLen) {} 00605 00606 virtual void HandleInvalidPosition(const char *startPos, unsigned posLen, 00607 PositionContext p) {} 00608 00609 virtual void HandleZeroPosition(const char *startPos, unsigned posLen) {} 00610 00611 virtual void HandleIncompleteSpecifier(const char *startSpecifier, 00612 unsigned specifierLen) {} 00613 00614 // Printf-specific handlers. 00615 00616 virtual bool HandleInvalidPrintfConversionSpecifier( 00617 const analyze_printf::PrintfSpecifier &FS, 00618 const char *startSpecifier, 00619 unsigned specifierLen) { 00620 return true; 00621 } 00622 00623 virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS, 00624 const char *startSpecifier, 00625 unsigned specifierLen) { 00626 return true; 00627 } 00628 00629 // Scanf-specific handlers. 00630 00631 virtual bool HandleInvalidScanfConversionSpecifier( 00632 const analyze_scanf::ScanfSpecifier &FS, 00633 const char *startSpecifier, 00634 unsigned specifierLen) { 00635 return true; 00636 } 00637 00638 virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, 00639 const char *startSpecifier, 00640 unsigned specifierLen) { 00641 return true; 00642 } 00643 00644 virtual void HandleIncompleteScanList(const char *start, const char *end) {} 00645 }; 00646 00647 bool ParsePrintfString(FormatStringHandler &H, 00648 const char *beg, const char *end, const LangOptions &LO, 00649 const TargetInfo &Target); 00650 00651 bool ParseFormatStringHasSArg(const char *beg, const char *end, const LangOptions &LO, 00652 const TargetInfo &Target); 00653 00654 bool ParseScanfString(FormatStringHandler &H, 00655 const char *beg, const char *end, const LangOptions &LO, 00656 const TargetInfo &Target); 00657 00658 } // end analyze_format_string namespace 00659 } // end clang namespace 00660 #endif