clang API Documentation
00001 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // Handling of format string in scanf and friends. The structure of format 00011 // strings for fscanf() are described in C99 7.19.6.2. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "clang/Analysis/Analyses/FormatString.h" 00016 #include "FormatStringParsing.h" 00017 #include "clang/Basic/TargetInfo.h" 00018 00019 using clang::analyze_format_string::ArgType; 00020 using clang::analyze_format_string::FormatStringHandler; 00021 using clang::analyze_format_string::LengthModifier; 00022 using clang::analyze_format_string::OptionalAmount; 00023 using clang::analyze_format_string::ConversionSpecifier; 00024 using clang::analyze_scanf::ScanfConversionSpecifier; 00025 using clang::analyze_scanf::ScanfSpecifier; 00026 using clang::UpdateOnReturn; 00027 using namespace clang; 00028 00029 typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier> 00030 ScanfSpecifierResult; 00031 00032 static bool ParseScanList(FormatStringHandler &H, 00033 ScanfConversionSpecifier &CS, 00034 const char *&Beg, const char *E) { 00035 const char *I = Beg; 00036 const char *start = I - 1; 00037 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 00038 00039 // No more characters? 00040 if (I == E) { 00041 H.HandleIncompleteScanList(start, I); 00042 return true; 00043 } 00044 00045 // Special case: ']' is the first character. 00046 if (*I == ']') { 00047 if (++I == E) { 00048 H.HandleIncompleteScanList(start, I - 1); 00049 return true; 00050 } 00051 } 00052 00053 // Special case: "^]" are the first characters. 00054 if (I + 1 != E && I[0] == '^' && I[1] == ']') { 00055 I += 2; 00056 if (I == E) { 00057 H.HandleIncompleteScanList(start, I - 1); 00058 return true; 00059 } 00060 } 00061 00062 // Look for a ']' character which denotes the end of the scan list. 00063 while (*I != ']') { 00064 if (++I == E) { 00065 H.HandleIncompleteScanList(start, I - 1); 00066 return true; 00067 } 00068 } 00069 00070 CS.setEndScanList(I); 00071 return false; 00072 } 00073 00074 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier. 00075 // We can possibly refactor. 00076 static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, 00077 const char *&Beg, 00078 const char *E, 00079 unsigned &argIndex, 00080 const LangOptions &LO, 00081 const TargetInfo &Target) { 00082 00083 using namespace clang::analyze_scanf; 00084 const char *I = Beg; 00085 const char *Start = nullptr; 00086 UpdateOnReturn <const char*> UpdateBeg(Beg, I); 00087 00088 // Look for a '%' character that indicates the start of a format specifier. 00089 for ( ; I != E ; ++I) { 00090 char c = *I; 00091 if (c == '\0') { 00092 // Detect spurious null characters, which are likely errors. 00093 H.HandleNullChar(I); 00094 return true; 00095 } 00096 if (c == '%') { 00097 Start = I++; // Record the start of the format specifier. 00098 break; 00099 } 00100 } 00101 00102 // No format specifier found? 00103 if (!Start) 00104 return false; 00105 00106 if (I == E) { 00107 // No more characters left? 00108 H.HandleIncompleteSpecifier(Start, E - Start); 00109 return true; 00110 } 00111 00112 ScanfSpecifier FS; 00113 if (ParseArgPosition(H, FS, Start, I, E)) 00114 return true; 00115 00116 if (I == E) { 00117 // No more characters left? 00118 H.HandleIncompleteSpecifier(Start, E - Start); 00119 return true; 00120 } 00121 00122 // Look for '*' flag if it is present. 00123 if (*I == '*') { 00124 FS.setSuppressAssignment(I); 00125 if (++I == E) { 00126 H.HandleIncompleteSpecifier(Start, E - Start); 00127 return true; 00128 } 00129 } 00130 00131 // Look for the field width (if any). Unlike printf, this is either 00132 // a fixed integer or isn't present. 00133 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(I, E); 00134 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) { 00135 assert(Amt.getHowSpecified() == OptionalAmount::Constant); 00136 FS.setFieldWidth(Amt); 00137 00138 if (I == E) { 00139 // No more characters left? 00140 H.HandleIncompleteSpecifier(Start, E - Start); 00141 return true; 00142 } 00143 } 00144 00145 // Look for the length modifier. 00146 if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) { 00147 // No more characters left? 00148 H.HandleIncompleteSpecifier(Start, E - Start); 00149 return true; 00150 } 00151 00152 // Detect spurious null characters, which are likely errors. 00153 if (*I == '\0') { 00154 H.HandleNullChar(I); 00155 return true; 00156 } 00157 00158 // Finally, look for the conversion specifier. 00159 const char *conversionPosition = I++; 00160 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier; 00161 switch (*conversionPosition) { 00162 default: 00163 break; 00164 case '%': k = ConversionSpecifier::PercentArg; break; 00165 case 'A': k = ConversionSpecifier::AArg; break; 00166 case 'E': k = ConversionSpecifier::EArg; break; 00167 case 'F': k = ConversionSpecifier::FArg; break; 00168 case 'G': k = ConversionSpecifier::GArg; break; 00169 case 'X': k = ConversionSpecifier::XArg; break; 00170 case 'a': k = ConversionSpecifier::aArg; break; 00171 case 'd': k = ConversionSpecifier::dArg; break; 00172 case 'e': k = ConversionSpecifier::eArg; break; 00173 case 'f': k = ConversionSpecifier::fArg; break; 00174 case 'g': k = ConversionSpecifier::gArg; break; 00175 case 'i': k = ConversionSpecifier::iArg; break; 00176 case 'n': k = ConversionSpecifier::nArg; break; 00177 case 'c': k = ConversionSpecifier::cArg; break; 00178 case 'C': k = ConversionSpecifier::CArg; break; 00179 case 'S': k = ConversionSpecifier::SArg; break; 00180 case '[': k = ConversionSpecifier::ScanListArg; break; 00181 case 'u': k = ConversionSpecifier::uArg; break; 00182 case 'x': k = ConversionSpecifier::xArg; break; 00183 case 'o': k = ConversionSpecifier::oArg; break; 00184 case 's': k = ConversionSpecifier::sArg; break; 00185 case 'p': k = ConversionSpecifier::pArg; break; 00186 // Apple extensions 00187 // Apple-specific 00188 case 'D': 00189 if (Target.getTriple().isOSDarwin()) 00190 k = ConversionSpecifier::DArg; 00191 break; 00192 case 'O': 00193 if (Target.getTriple().isOSDarwin()) 00194 k = ConversionSpecifier::OArg; 00195 break; 00196 case 'U': 00197 if (Target.getTriple().isOSDarwin()) 00198 k = ConversionSpecifier::UArg; 00199 break; 00200 } 00201 ScanfConversionSpecifier CS(conversionPosition, k); 00202 if (k == ScanfConversionSpecifier::ScanListArg) { 00203 if (ParseScanList(H, CS, I, E)) 00204 return true; 00205 } 00206 FS.setConversionSpecifier(CS); 00207 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() 00208 && !FS.usesPositionalArg()) 00209 FS.setArgIndex(argIndex++); 00210 00211 // FIXME: '%' and '*' doesn't make sense. Issue a warning. 00212 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense. 00213 00214 if (k == ScanfConversionSpecifier::InvalidSpecifier) { 00215 // Assume the conversion takes one argument. 00216 return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, I - Beg); 00217 } 00218 return ScanfSpecifierResult(Start, FS); 00219 } 00220 00221 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const { 00222 const ScanfConversionSpecifier &CS = getConversionSpecifier(); 00223 00224 if (!CS.consumesDataArgument()) 00225 return ArgType::Invalid(); 00226 00227 switch(CS.getKind()) { 00228 // Signed int. 00229 case ConversionSpecifier::dArg: 00230 case ConversionSpecifier::DArg: 00231 case ConversionSpecifier::iArg: 00232 switch (LM.getKind()) { 00233 case LengthModifier::None: 00234 return ArgType::PtrTo(Ctx.IntTy); 00235 case LengthModifier::AsChar: 00236 return ArgType::PtrTo(ArgType::AnyCharTy); 00237 case LengthModifier::AsShort: 00238 return ArgType::PtrTo(Ctx.ShortTy); 00239 case LengthModifier::AsLong: 00240 return ArgType::PtrTo(Ctx.LongTy); 00241 case LengthModifier::AsLongLong: 00242 case LengthModifier::AsQuad: 00243 return ArgType::PtrTo(Ctx.LongLongTy); 00244 case LengthModifier::AsInt64: 00245 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 00246 case LengthModifier::AsIntMax: 00247 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 00248 case LengthModifier::AsSizeT: 00249 // FIXME: ssize_t. 00250 return ArgType(); 00251 case LengthModifier::AsPtrDiff: 00252 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 00253 case LengthModifier::AsLongDouble: 00254 // GNU extension. 00255 return ArgType::PtrTo(Ctx.LongLongTy); 00256 case LengthModifier::AsAllocate: 00257 case LengthModifier::AsMAllocate: 00258 case LengthModifier::AsInt32: 00259 case LengthModifier::AsInt3264: 00260 case LengthModifier::AsWide: 00261 return ArgType::Invalid(); 00262 } 00263 00264 // Unsigned int. 00265 case ConversionSpecifier::oArg: 00266 case ConversionSpecifier::OArg: 00267 case ConversionSpecifier::uArg: 00268 case ConversionSpecifier::UArg: 00269 case ConversionSpecifier::xArg: 00270 case ConversionSpecifier::XArg: 00271 switch (LM.getKind()) { 00272 case LengthModifier::None: 00273 return ArgType::PtrTo(Ctx.UnsignedIntTy); 00274 case LengthModifier::AsChar: 00275 return ArgType::PtrTo(Ctx.UnsignedCharTy); 00276 case LengthModifier::AsShort: 00277 return ArgType::PtrTo(Ctx.UnsignedShortTy); 00278 case LengthModifier::AsLong: 00279 return ArgType::PtrTo(Ctx.UnsignedLongTy); 00280 case LengthModifier::AsLongLong: 00281 case LengthModifier::AsQuad: 00282 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 00283 case LengthModifier::AsInt64: 00284 return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64")); 00285 case LengthModifier::AsIntMax: 00286 return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t")); 00287 case LengthModifier::AsSizeT: 00288 return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t")); 00289 case LengthModifier::AsPtrDiff: 00290 // FIXME: Unsigned version of ptrdiff_t? 00291 return ArgType(); 00292 case LengthModifier::AsLongDouble: 00293 // GNU extension. 00294 return ArgType::PtrTo(Ctx.UnsignedLongLongTy); 00295 case LengthModifier::AsAllocate: 00296 case LengthModifier::AsMAllocate: 00297 case LengthModifier::AsInt32: 00298 case LengthModifier::AsInt3264: 00299 case LengthModifier::AsWide: 00300 return ArgType::Invalid(); 00301 } 00302 00303 // Float. 00304 case ConversionSpecifier::aArg: 00305 case ConversionSpecifier::AArg: 00306 case ConversionSpecifier::eArg: 00307 case ConversionSpecifier::EArg: 00308 case ConversionSpecifier::fArg: 00309 case ConversionSpecifier::FArg: 00310 case ConversionSpecifier::gArg: 00311 case ConversionSpecifier::GArg: 00312 switch (LM.getKind()) { 00313 case LengthModifier::None: 00314 return ArgType::PtrTo(Ctx.FloatTy); 00315 case LengthModifier::AsLong: 00316 return ArgType::PtrTo(Ctx.DoubleTy); 00317 case LengthModifier::AsLongDouble: 00318 return ArgType::PtrTo(Ctx.LongDoubleTy); 00319 default: 00320 return ArgType::Invalid(); 00321 } 00322 00323 // Char, string and scanlist. 00324 case ConversionSpecifier::cArg: 00325 case ConversionSpecifier::sArg: 00326 case ConversionSpecifier::ScanListArg: 00327 switch (LM.getKind()) { 00328 case LengthModifier::None: 00329 return ArgType::PtrTo(ArgType::AnyCharTy); 00330 case LengthModifier::AsLong: 00331 case LengthModifier::AsWide: 00332 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 00333 case LengthModifier::AsAllocate: 00334 case LengthModifier::AsMAllocate: 00335 return ArgType::PtrTo(ArgType::CStrTy); 00336 case LengthModifier::AsShort: 00337 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) 00338 return ArgType::PtrTo(ArgType::AnyCharTy); 00339 default: 00340 return ArgType::Invalid(); 00341 } 00342 case ConversionSpecifier::CArg: 00343 case ConversionSpecifier::SArg: 00344 // FIXME: Mac OS X specific? 00345 switch (LM.getKind()) { 00346 case LengthModifier::None: 00347 case LengthModifier::AsWide: 00348 return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t")); 00349 case LengthModifier::AsAllocate: 00350 case LengthModifier::AsMAllocate: 00351 return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *")); 00352 case LengthModifier::AsShort: 00353 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT()) 00354 return ArgType::PtrTo(ArgType::AnyCharTy); 00355 default: 00356 return ArgType::Invalid(); 00357 } 00358 00359 // Pointer. 00360 case ConversionSpecifier::pArg: 00361 return ArgType::PtrTo(ArgType::CPointerTy); 00362 00363 // Write-back. 00364 case ConversionSpecifier::nArg: 00365 switch (LM.getKind()) { 00366 case LengthModifier::None: 00367 return ArgType::PtrTo(Ctx.IntTy); 00368 case LengthModifier::AsChar: 00369 return ArgType::PtrTo(Ctx.SignedCharTy); 00370 case LengthModifier::AsShort: 00371 return ArgType::PtrTo(Ctx.ShortTy); 00372 case LengthModifier::AsLong: 00373 return ArgType::PtrTo(Ctx.LongTy); 00374 case LengthModifier::AsLongLong: 00375 case LengthModifier::AsQuad: 00376 return ArgType::PtrTo(Ctx.LongLongTy); 00377 case LengthModifier::AsInt64: 00378 return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64")); 00379 case LengthModifier::AsIntMax: 00380 return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t")); 00381 case LengthModifier::AsSizeT: 00382 return ArgType(); // FIXME: ssize_t 00383 case LengthModifier::AsPtrDiff: 00384 return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")); 00385 case LengthModifier::AsLongDouble: 00386 return ArgType(); // FIXME: Is this a known extension? 00387 case LengthModifier::AsAllocate: 00388 case LengthModifier::AsMAllocate: 00389 case LengthModifier::AsInt32: 00390 case LengthModifier::AsInt3264: 00391 case LengthModifier::AsWide: 00392 return ArgType::Invalid(); 00393 } 00394 00395 default: 00396 break; 00397 } 00398 00399 return ArgType(); 00400 } 00401 00402 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT, 00403 const LangOptions &LangOpt, 00404 ASTContext &Ctx) { 00405 00406 // %n is different from other conversion specifiers; don't try to fix it. 00407 if (CS.getKind() == ConversionSpecifier::nArg) 00408 return false; 00409 00410 if (!QT->isPointerType()) 00411 return false; 00412 00413 QualType PT = QT->getPointeeType(); 00414 00415 // If it's an enum, get its underlying type. 00416 if (const EnumType *ETy = PT->getAs<EnumType>()) 00417 PT = ETy->getDecl()->getIntegerType(); 00418 00419 const BuiltinType *BT = PT->getAs<BuiltinType>(); 00420 if (!BT) 00421 return false; 00422 00423 // Pointer to a character. 00424 if (PT->isAnyCharacterType()) { 00425 CS.setKind(ConversionSpecifier::sArg); 00426 if (PT->isWideCharType()) 00427 LM.setKind(LengthModifier::AsWideChar); 00428 else 00429 LM.setKind(LengthModifier::None); 00430 00431 // If we know the target array length, we can use it as a field width. 00432 if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) { 00433 if (CAT->getSizeModifier() == ArrayType::Normal) 00434 FieldWidth = OptionalAmount(OptionalAmount::Constant, 00435 CAT->getSize().getZExtValue() - 1, 00436 "", 0, false); 00437 00438 } 00439 return true; 00440 } 00441 00442 // Figure out the length modifier. 00443 switch (BT->getKind()) { 00444 // no modifier 00445 case BuiltinType::UInt: 00446 case BuiltinType::Int: 00447 case BuiltinType::Float: 00448 LM.setKind(LengthModifier::None); 00449 break; 00450 00451 // hh 00452 case BuiltinType::Char_U: 00453 case BuiltinType::UChar: 00454 case BuiltinType::Char_S: 00455 case BuiltinType::SChar: 00456 LM.setKind(LengthModifier::AsChar); 00457 break; 00458 00459 // h 00460 case BuiltinType::Short: 00461 case BuiltinType::UShort: 00462 LM.setKind(LengthModifier::AsShort); 00463 break; 00464 00465 // l 00466 case BuiltinType::Long: 00467 case BuiltinType::ULong: 00468 case BuiltinType::Double: 00469 LM.setKind(LengthModifier::AsLong); 00470 break; 00471 00472 // ll 00473 case BuiltinType::LongLong: 00474 case BuiltinType::ULongLong: 00475 LM.setKind(LengthModifier::AsLongLong); 00476 break; 00477 00478 // L 00479 case BuiltinType::LongDouble: 00480 LM.setKind(LengthModifier::AsLongDouble); 00481 break; 00482 00483 // Don't know. 00484 default: 00485 return false; 00486 } 00487 00488 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99. 00489 if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11)) 00490 namedTypeToLengthModifier(PT, LM); 00491 00492 // If fixing the length modifier was enough, we are done. 00493 if (hasValidLengthModifier(Ctx.getTargetInfo())) { 00494 const analyze_scanf::ArgType &AT = getArgType(Ctx); 00495 if (AT.isValid() && AT.matchesType(Ctx, QT)) 00496 return true; 00497 } 00498 00499 // Figure out the conversion specifier. 00500 if (PT->isRealFloatingType()) 00501 CS.setKind(ConversionSpecifier::fArg); 00502 else if (PT->isSignedIntegerType()) 00503 CS.setKind(ConversionSpecifier::dArg); 00504 else if (PT->isUnsignedIntegerType()) 00505 CS.setKind(ConversionSpecifier::uArg); 00506 else 00507 llvm_unreachable("Unexpected type"); 00508 00509 return true; 00510 } 00511 00512 void ScanfSpecifier::toString(raw_ostream &os) const { 00513 os << "%"; 00514 00515 if (usesPositionalArg()) 00516 os << getPositionalArgIndex() << "$"; 00517 if (SuppressAssignment) 00518 os << "*"; 00519 00520 FieldWidth.toString(os); 00521 os << LM.toString(); 00522 os << CS.toString(); 00523 } 00524 00525 bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H, 00526 const char *I, 00527 const char *E, 00528 const LangOptions &LO, 00529 const TargetInfo &Target) { 00530 00531 unsigned argIndex = 0; 00532 00533 // Keep looking for a format specifier until we have exhausted the string. 00534 while (I != E) { 00535 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex, 00536 LO, Target); 00537 // Did a fail-stop error of any kind occur when parsing the specifier? 00538 // If so, don't do any more processing. 00539 if (FSR.shouldStop()) 00540 return true; 00541 // Did we exhaust the string or encounter an error that 00542 // we can recover from? 00543 if (!FSR.hasValue()) 00544 continue; 00545 // We have a format specifier. Pass it to the callback. 00546 if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(), 00547 I - FSR.getStart())) { 00548 return true; 00549 } 00550 } 00551 assert(I == E && "Format string not exhausted"); 00552 return false; 00553 }