clang API Documentation
00001 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This checker defines the attack surface for generic taint propagation. 00011 // 00012 // The taint information produced by it might be useful to other checkers. For 00013 // example, checkers should report errors which involve tainted data more 00014 // aggressively, even if the involved symbols are under constrained. 00015 // 00016 //===----------------------------------------------------------------------===// 00017 #include "ClangSACheckers.h" 00018 #include "clang/AST/Attr.h" 00019 #include "clang/Basic/Builtins.h" 00020 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 00021 #include "clang/StaticAnalyzer/Core/Checker.h" 00022 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 00023 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 00024 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 00025 #include <climits> 00026 00027 using namespace clang; 00028 using namespace ento; 00029 00030 namespace { 00031 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>, 00032 check::PreStmt<CallExpr> > { 00033 public: 00034 static void *getTag() { static int Tag; return &Tag; } 00035 00036 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const; 00037 00038 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const; 00039 00040 private: 00041 static const unsigned InvalidArgIndex = UINT_MAX; 00042 /// Denotes the return vale. 00043 static const unsigned ReturnValueIndex = UINT_MAX - 1; 00044 00045 mutable std::unique_ptr<BugType> BT; 00046 inline void initBugType() const { 00047 if (!BT) 00048 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data")); 00049 } 00050 00051 /// \brief Catch taint related bugs. Check if tainted data is passed to a 00052 /// system call etc. 00053 bool checkPre(const CallExpr *CE, CheckerContext &C) const; 00054 00055 /// \brief Add taint sources on a pre-visit. 00056 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const; 00057 00058 /// \brief Propagate taint generated at pre-visit. 00059 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const; 00060 00061 /// \brief Add taint sources on a post visit. 00062 void addSourcesPost(const CallExpr *CE, CheckerContext &C) const; 00063 00064 /// Check if the region the expression evaluates to is the standard input, 00065 /// and thus, is tainted. 00066 static bool isStdin(const Expr *E, CheckerContext &C); 00067 00068 /// \brief Given a pointer argument, get the symbol of the value it contains 00069 /// (points to). 00070 static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg); 00071 00072 /// Functions defining the attack surface. 00073 typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, 00074 CheckerContext &C) const; 00075 ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; 00076 ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; 00077 ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; 00078 00079 /// Taint the scanned input if the file is tainted. 00080 ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const; 00081 00082 /// Check for CWE-134: Uncontrolled Format String. 00083 static const char MsgUncontrolledFormatString[]; 00084 bool checkUncontrolledFormatString(const CallExpr *CE, 00085 CheckerContext &C) const; 00086 00087 /// Check for: 00088 /// CERT/STR02-C. "Sanitize data passed to complex subsystems" 00089 /// CWE-78, "Failure to Sanitize Data into an OS Command" 00090 static const char MsgSanitizeSystemArgs[]; 00091 bool checkSystemCall(const CallExpr *CE, StringRef Name, 00092 CheckerContext &C) const; 00093 00094 /// Check if tainted data is used as a buffer size ins strn.. functions, 00095 /// and allocators. 00096 static const char MsgTaintedBufferSize[]; 00097 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl, 00098 CheckerContext &C) const; 00099 00100 /// Generate a report if the expression is tainted or points to tainted data. 00101 bool generateReportIfTainted(const Expr *E, const char Msg[], 00102 CheckerContext &C) const; 00103 00104 00105 typedef SmallVector<unsigned, 2> ArgVector; 00106 00107 /// \brief A struct used to specify taint propagation rules for a function. 00108 /// 00109 /// If any of the possible taint source arguments is tainted, all of the 00110 /// destination arguments should also be tainted. Use InvalidArgIndex in the 00111 /// src list to specify that all of the arguments can introduce taint. Use 00112 /// InvalidArgIndex in the dst arguments to signify that all the non-const 00113 /// pointer and reference arguments might be tainted on return. If 00114 /// ReturnValueIndex is added to the dst list, the return value will be 00115 /// tainted. 00116 struct TaintPropagationRule { 00117 /// List of arguments which can be taint sources and should be checked. 00118 ArgVector SrcArgs; 00119 /// List of arguments which should be tainted on function return. 00120 ArgVector DstArgs; 00121 // TODO: Check if using other data structures would be more optimal. 00122 00123 TaintPropagationRule() {} 00124 00125 TaintPropagationRule(unsigned SArg, 00126 unsigned DArg, bool TaintRet = false) { 00127 SrcArgs.push_back(SArg); 00128 DstArgs.push_back(DArg); 00129 if (TaintRet) 00130 DstArgs.push_back(ReturnValueIndex); 00131 } 00132 00133 TaintPropagationRule(unsigned SArg1, unsigned SArg2, 00134 unsigned DArg, bool TaintRet = false) { 00135 SrcArgs.push_back(SArg1); 00136 SrcArgs.push_back(SArg2); 00137 DstArgs.push_back(DArg); 00138 if (TaintRet) 00139 DstArgs.push_back(ReturnValueIndex); 00140 } 00141 00142 /// Get the propagation rule for a given function. 00143 static TaintPropagationRule 00144 getTaintPropagationRule(const FunctionDecl *FDecl, 00145 StringRef Name, 00146 CheckerContext &C); 00147 00148 inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } 00149 inline void addDstArg(unsigned A) { DstArgs.push_back(A); } 00150 00151 inline bool isNull() const { return SrcArgs.empty(); } 00152 00153 inline bool isDestinationArgument(unsigned ArgNum) const { 00154 return (std::find(DstArgs.begin(), 00155 DstArgs.end(), ArgNum) != DstArgs.end()); 00156 } 00157 00158 static inline bool isTaintedOrPointsToTainted(const Expr *E, 00159 ProgramStateRef State, 00160 CheckerContext &C) { 00161 return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) || 00162 (E->getType().getTypePtr()->isPointerType() && 00163 State->isTainted(getPointedToSymbol(C, E)))); 00164 } 00165 00166 /// \brief Pre-process a function which propagates taint according to the 00167 /// taint rule. 00168 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; 00169 00170 }; 00171 }; 00172 00173 const unsigned GenericTaintChecker::ReturnValueIndex; 00174 const unsigned GenericTaintChecker::InvalidArgIndex; 00175 00176 const char GenericTaintChecker::MsgUncontrolledFormatString[] = 00177 "Untrusted data is used as a format string " 00178 "(CWE-134: Uncontrolled Format String)"; 00179 00180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] = 00181 "Untrusted data is passed to a system call " 00182 "(CERT/STR02-C. Sanitize data passed to complex subsystems)"; 00183 00184 const char GenericTaintChecker::MsgTaintedBufferSize[] = 00185 "Untrusted data is used to specify the buffer size " 00186 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for " 00187 "character data and the null terminator)"; 00188 00189 } // end of anonymous namespace 00190 00191 /// A set which is used to pass information from call pre-visit instruction 00192 /// to the call post-visit. The values are unsigned integers, which are either 00193 /// ReturnValueIndex, or indexes of the pointer/reference argument, which 00194 /// points to data, which should be tainted on return. 00195 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned) 00196 00197 GenericTaintChecker::TaintPropagationRule 00198 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule( 00199 const FunctionDecl *FDecl, 00200 StringRef Name, 00201 CheckerContext &C) { 00202 // TODO: Currently, we might loose precision here: we always mark a return 00203 // value as tainted even if it's just a pointer, pointing to tainted data. 00204 00205 // Check for exact name match for functions without builtin substitutes. 00206 TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) 00207 .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) 00208 .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) 00209 .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) 00210 .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) 00211 .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) 00212 .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) 00213 .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) 00214 .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) 00215 .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) 00216 .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) 00217 .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) 00218 .Case("read", TaintPropagationRule(0, 2, 1, true)) 00219 .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) 00220 .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) 00221 .Case("fgets", TaintPropagationRule(2, 0, true)) 00222 .Case("getline", TaintPropagationRule(2, 0)) 00223 .Case("getdelim", TaintPropagationRule(3, 0)) 00224 .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) 00225 .Default(TaintPropagationRule()); 00226 00227 if (!Rule.isNull()) 00228 return Rule; 00229 00230 // Check if it's one of the memory setting/copying functions. 00231 // This check is specialized but faster then calling isCLibraryFunction. 00232 unsigned BId = 0; 00233 if ( (BId = FDecl->getMemoryFunctionKind()) ) 00234 switch(BId) { 00235 case Builtin::BImemcpy: 00236 case Builtin::BImemmove: 00237 case Builtin::BIstrncpy: 00238 case Builtin::BIstrncat: 00239 return TaintPropagationRule(1, 2, 0, true); 00240 case Builtin::BIstrlcpy: 00241 case Builtin::BIstrlcat: 00242 return TaintPropagationRule(1, 2, 0, false); 00243 case Builtin::BIstrndup: 00244 return TaintPropagationRule(0, 1, ReturnValueIndex); 00245 00246 default: 00247 break; 00248 }; 00249 00250 // Process all other functions which could be defined as builtins. 00251 if (Rule.isNull()) { 00252 if (C.isCLibraryFunction(FDecl, "snprintf") || 00253 C.isCLibraryFunction(FDecl, "sprintf")) 00254 return TaintPropagationRule(InvalidArgIndex, 0, true); 00255 else if (C.isCLibraryFunction(FDecl, "strcpy") || 00256 C.isCLibraryFunction(FDecl, "stpcpy") || 00257 C.isCLibraryFunction(FDecl, "strcat")) 00258 return TaintPropagationRule(1, 0, true); 00259 else if (C.isCLibraryFunction(FDecl, "bcopy")) 00260 return TaintPropagationRule(0, 2, 1, false); 00261 else if (C.isCLibraryFunction(FDecl, "strdup") || 00262 C.isCLibraryFunction(FDecl, "strdupa")) 00263 return TaintPropagationRule(0, ReturnValueIndex); 00264 else if (C.isCLibraryFunction(FDecl, "wcsdup")) 00265 return TaintPropagationRule(0, ReturnValueIndex); 00266 } 00267 00268 // Skipping the following functions, since they might be used for cleansing 00269 // or smart memory copy: 00270 // - memccpy - copying until hitting a special character. 00271 00272 return TaintPropagationRule(); 00273 } 00274 00275 void GenericTaintChecker::checkPreStmt(const CallExpr *CE, 00276 CheckerContext &C) const { 00277 // Check for errors first. 00278 if (checkPre(CE, C)) 00279 return; 00280 00281 // Add taint second. 00282 addSourcesPre(CE, C); 00283 } 00284 00285 void GenericTaintChecker::checkPostStmt(const CallExpr *CE, 00286 CheckerContext &C) const { 00287 if (propagateFromPre(CE, C)) 00288 return; 00289 addSourcesPost(CE, C); 00290 } 00291 00292 void GenericTaintChecker::addSourcesPre(const CallExpr *CE, 00293 CheckerContext &C) const { 00294 ProgramStateRef State = nullptr; 00295 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 00296 if (!FDecl || FDecl->getKind() != Decl::Function) 00297 return; 00298 00299 StringRef Name = C.getCalleeName(FDecl); 00300 if (Name.empty()) 00301 return; 00302 00303 // First, try generating a propagation rule for this function. 00304 TaintPropagationRule Rule = 00305 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C); 00306 if (!Rule.isNull()) { 00307 State = Rule.process(CE, C); 00308 if (!State) 00309 return; 00310 C.addTransition(State); 00311 return; 00312 } 00313 00314 // Otherwise, check if we have custom pre-processing implemented. 00315 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 00316 .Case("fscanf", &GenericTaintChecker::preFscanf) 00317 .Default(nullptr); 00318 // Check and evaluate the call. 00319 if (evalFunction) 00320 State = (this->*evalFunction)(CE, C); 00321 if (!State) 00322 return; 00323 C.addTransition(State); 00324 00325 } 00326 00327 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, 00328 CheckerContext &C) const { 00329 ProgramStateRef State = C.getState(); 00330 00331 // Depending on what was tainted at pre-visit, we determined a set of 00332 // arguments which should be tainted after the function returns. These are 00333 // stored in the state as TaintArgsOnPostVisit set. 00334 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>(); 00335 if (TaintArgs.isEmpty()) 00336 return false; 00337 00338 for (llvm::ImmutableSet<unsigned>::iterator 00339 I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { 00340 unsigned ArgNum = *I; 00341 00342 // Special handling for the tainted return value. 00343 if (ArgNum == ReturnValueIndex) { 00344 State = State->addTaint(CE, C.getLocationContext()); 00345 continue; 00346 } 00347 00348 // The arguments are pointer arguments. The data they are pointing at is 00349 // tainted after the call. 00350 if (CE->getNumArgs() < (ArgNum + 1)) 00351 return false; 00352 const Expr* Arg = CE->getArg(ArgNum); 00353 SymbolRef Sym = getPointedToSymbol(C, Arg); 00354 if (Sym) 00355 State = State->addTaint(Sym); 00356 } 00357 00358 // Clear up the taint info from the state. 00359 State = State->remove<TaintArgsOnPostVisit>(); 00360 00361 if (State != C.getState()) { 00362 C.addTransition(State); 00363 return true; 00364 } 00365 return false; 00366 } 00367 00368 void GenericTaintChecker::addSourcesPost(const CallExpr *CE, 00369 CheckerContext &C) const { 00370 // Define the attack surface. 00371 // Set the evaluation function by switching on the callee name. 00372 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 00373 if (!FDecl || FDecl->getKind() != Decl::Function) 00374 return; 00375 00376 StringRef Name = C.getCalleeName(FDecl); 00377 if (Name.empty()) 00378 return; 00379 FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name) 00380 .Case("scanf", &GenericTaintChecker::postScanf) 00381 // TODO: Add support for vfscanf & family. 00382 .Case("getchar", &GenericTaintChecker::postRetTaint) 00383 .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint) 00384 .Case("getenv", &GenericTaintChecker::postRetTaint) 00385 .Case("fopen", &GenericTaintChecker::postRetTaint) 00386 .Case("fdopen", &GenericTaintChecker::postRetTaint) 00387 .Case("freopen", &GenericTaintChecker::postRetTaint) 00388 .Case("getch", &GenericTaintChecker::postRetTaint) 00389 .Case("wgetch", &GenericTaintChecker::postRetTaint) 00390 .Case("socket", &GenericTaintChecker::postSocket) 00391 .Default(nullptr); 00392 00393 // If the callee isn't defined, it is not of security concern. 00394 // Check and evaluate the call. 00395 ProgramStateRef State = nullptr; 00396 if (evalFunction) 00397 State = (this->*evalFunction)(CE, C); 00398 if (!State) 00399 return; 00400 00401 C.addTransition(State); 00402 } 00403 00404 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ 00405 00406 if (checkUncontrolledFormatString(CE, C)) 00407 return true; 00408 00409 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 00410 if (!FDecl || FDecl->getKind() != Decl::Function) 00411 return false; 00412 00413 StringRef Name = C.getCalleeName(FDecl); 00414 if (Name.empty()) 00415 return false; 00416 00417 if (checkSystemCall(CE, Name, C)) 00418 return true; 00419 00420 if (checkTaintedBufferSize(CE, FDecl, C)) 00421 return true; 00422 00423 return false; 00424 } 00425 00426 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C, 00427 const Expr* Arg) { 00428 ProgramStateRef State = C.getState(); 00429 SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext()); 00430 if (AddrVal.isUnknownOrUndef()) 00431 return nullptr; 00432 00433 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>(); 00434 if (!AddrLoc) 00435 return nullptr; 00436 00437 const PointerType *ArgTy = 00438 dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr()); 00439 SVal Val = State->getSVal(*AddrLoc, 00440 ArgTy ? ArgTy->getPointeeType(): QualType()); 00441 return Val.getAsSymbol(); 00442 } 00443 00444 ProgramStateRef 00445 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE, 00446 CheckerContext &C) const { 00447 ProgramStateRef State = C.getState(); 00448 00449 // Check for taint in arguments. 00450 bool IsTainted = false; 00451 for (ArgVector::const_iterator I = SrcArgs.begin(), 00452 E = SrcArgs.end(); I != E; ++I) { 00453 unsigned ArgNum = *I; 00454 00455 if (ArgNum == InvalidArgIndex) { 00456 // Check if any of the arguments is tainted, but skip the 00457 // destination arguments. 00458 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 00459 if (isDestinationArgument(i)) 00460 continue; 00461 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) 00462 break; 00463 } 00464 break; 00465 } 00466 00467 if (CE->getNumArgs() < (ArgNum + 1)) 00468 return State; 00469 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) 00470 break; 00471 } 00472 if (!IsTainted) 00473 return State; 00474 00475 // Mark the arguments which should be tainted after the function returns. 00476 for (ArgVector::const_iterator I = DstArgs.begin(), 00477 E = DstArgs.end(); I != E; ++I) { 00478 unsigned ArgNum = *I; 00479 00480 // Should we mark all arguments as tainted? 00481 if (ArgNum == InvalidArgIndex) { 00482 // For all pointer and references that were passed in: 00483 // If they are not pointing to const data, mark data as tainted. 00484 // TODO: So far we are just going one level down; ideally we'd need to 00485 // recurse here. 00486 for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { 00487 const Expr *Arg = CE->getArg(i); 00488 // Process pointer argument. 00489 const Type *ArgTy = Arg->getType().getTypePtr(); 00490 QualType PType = ArgTy->getPointeeType(); 00491 if ((!PType.isNull() && !PType.isConstQualified()) 00492 || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) 00493 State = State->add<TaintArgsOnPostVisit>(i); 00494 } 00495 continue; 00496 } 00497 00498 // Should mark the return value? 00499 if (ArgNum == ReturnValueIndex) { 00500 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); 00501 continue; 00502 } 00503 00504 // Mark the given argument. 00505 assert(ArgNum < CE->getNumArgs()); 00506 State = State->add<TaintArgsOnPostVisit>(ArgNum); 00507 } 00508 00509 return State; 00510 } 00511 00512 00513 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0 00514 // and arg 1 should get taint. 00515 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, 00516 CheckerContext &C) const { 00517 assert(CE->getNumArgs() >= 2); 00518 ProgramStateRef State = C.getState(); 00519 00520 // Check is the file descriptor is tainted. 00521 if (State->isTainted(CE->getArg(0), C.getLocationContext()) || 00522 isStdin(CE->getArg(0), C)) { 00523 // All arguments except for the first two should get taint. 00524 for (unsigned int i = 2; i < CE->getNumArgs(); ++i) 00525 State = State->add<TaintArgsOnPostVisit>(i); 00526 return State; 00527 } 00528 00529 return nullptr; 00530 } 00531 00532 00533 // If argument 0(protocol domain) is network, the return value should get taint. 00534 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, 00535 CheckerContext &C) const { 00536 ProgramStateRef State = C.getState(); 00537 if (CE->getNumArgs() < 3) 00538 return State; 00539 00540 SourceLocation DomLoc = CE->getArg(0)->getExprLoc(); 00541 StringRef DomName = C.getMacroNameOrSpelling(DomLoc); 00542 // White list the internal communication protocols. 00543 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") || 00544 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36")) 00545 return State; 00546 State = State->addTaint(CE, C.getLocationContext()); 00547 return State; 00548 } 00549 00550 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, 00551 CheckerContext &C) const { 00552 ProgramStateRef State = C.getState(); 00553 if (CE->getNumArgs() < 2) 00554 return State; 00555 00556 // All arguments except for the very first one should get taint. 00557 for (unsigned int i = 1; i < CE->getNumArgs(); ++i) { 00558 // The arguments are pointer arguments. The data they are pointing at is 00559 // tainted after the call. 00560 const Expr* Arg = CE->getArg(i); 00561 SymbolRef Sym = getPointedToSymbol(C, Arg); 00562 if (Sym) 00563 State = State->addTaint(Sym); 00564 } 00565 return State; 00566 } 00567 00568 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE, 00569 CheckerContext &C) const { 00570 return C.getState()->addTaint(CE, C.getLocationContext()); 00571 } 00572 00573 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) { 00574 ProgramStateRef State = C.getState(); 00575 SVal Val = State->getSVal(E, C.getLocationContext()); 00576 00577 // stdin is a pointer, so it would be a region. 00578 const MemRegion *MemReg = Val.getAsRegion(); 00579 00580 // The region should be symbolic, we do not know it's value. 00581 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg); 00582 if (!SymReg) 00583 return false; 00584 00585 // Get it's symbol and find the declaration region it's pointing to. 00586 const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol()); 00587 if (!Sm) 00588 return false; 00589 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion()); 00590 if (!DeclReg) 00591 return false; 00592 00593 // This region corresponds to a declaration, find out if it's a global/extern 00594 // variable named stdin with the proper type. 00595 if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { 00596 D = D->getCanonicalDecl(); 00597 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) 00598 if (const PointerType * PtrTy = 00599 dyn_cast<PointerType>(D->getType().getTypePtr())) 00600 if (PtrTy->getPointeeType() == C.getASTContext().getFILEType()) 00601 return true; 00602 } 00603 return false; 00604 } 00605 00606 static bool getPrintfFormatArgumentNum(const CallExpr *CE, 00607 const CheckerContext &C, 00608 unsigned int &ArgNum) { 00609 // Find if the function contains a format string argument. 00610 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf, 00611 // vsnprintf, syslog, custom annotated functions. 00612 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 00613 if (!FDecl) 00614 return false; 00615 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) { 00616 ArgNum = Format->getFormatIdx() - 1; 00617 if ((Format->getType()->getName() == "printf") && 00618 CE->getNumArgs() > ArgNum) 00619 return true; 00620 } 00621 00622 // Or if a function is named setproctitle (this is a heuristic). 00623 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) { 00624 ArgNum = 0; 00625 return true; 00626 } 00627 00628 return false; 00629 } 00630 00631 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, 00632 const char Msg[], 00633 CheckerContext &C) const { 00634 assert(E); 00635 00636 // Check for taint. 00637 ProgramStateRef State = C.getState(); 00638 if (!State->isTainted(getPointedToSymbol(C, E)) && 00639 !State->isTainted(E, C.getLocationContext())) 00640 return false; 00641 00642 // Generate diagnostic. 00643 if (ExplodedNode *N = C.addTransition()) { 00644 initBugType(); 00645 BugReport *report = new BugReport(*BT, Msg, N); 00646 report->addRange(E->getSourceRange()); 00647 C.emitReport(report); 00648 return true; 00649 } 00650 return false; 00651 } 00652 00653 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, 00654 CheckerContext &C) const{ 00655 // Check if the function contains a format string argument. 00656 unsigned int ArgNum = 0; 00657 if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) 00658 return false; 00659 00660 // If either the format string content or the pointer itself are tainted, warn. 00661 if (generateReportIfTainted(CE->getArg(ArgNum), 00662 MsgUncontrolledFormatString, C)) 00663 return true; 00664 return false; 00665 } 00666 00667 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, 00668 StringRef Name, 00669 CheckerContext &C) const { 00670 // TODO: It might make sense to run this check on demand. In some cases, 00671 // we should check if the environment has been cleansed here. We also might 00672 // need to know if the user was reset before these calls(seteuid). 00673 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name) 00674 .Case("system", 0) 00675 .Case("popen", 0) 00676 .Case("execl", 0) 00677 .Case("execle", 0) 00678 .Case("execlp", 0) 00679 .Case("execv", 0) 00680 .Case("execvp", 0) 00681 .Case("execvP", 0) 00682 .Case("execve", 0) 00683 .Case("dlopen", 0) 00684 .Default(UINT_MAX); 00685 00686 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1)) 00687 return false; 00688 00689 if (generateReportIfTainted(CE->getArg(ArgNum), 00690 MsgSanitizeSystemArgs, C)) 00691 return true; 00692 00693 return false; 00694 } 00695 00696 // TODO: Should this check be a part of the CString checker? 00697 // If yes, should taint be a global setting? 00698 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE, 00699 const FunctionDecl *FDecl, 00700 CheckerContext &C) const { 00701 // If the function has a buffer size argument, set ArgNum. 00702 unsigned ArgNum = InvalidArgIndex; 00703 unsigned BId = 0; 00704 if ( (BId = FDecl->getMemoryFunctionKind()) ) 00705 switch(BId) { 00706 case Builtin::BImemcpy: 00707 case Builtin::BImemmove: 00708 case Builtin::BIstrncpy: 00709 ArgNum = 2; 00710 break; 00711 case Builtin::BIstrndup: 00712 ArgNum = 1; 00713 break; 00714 default: 00715 break; 00716 }; 00717 00718 if (ArgNum == InvalidArgIndex) { 00719 if (C.isCLibraryFunction(FDecl, "malloc") || 00720 C.isCLibraryFunction(FDecl, "calloc") || 00721 C.isCLibraryFunction(FDecl, "alloca")) 00722 ArgNum = 0; 00723 else if (C.isCLibraryFunction(FDecl, "memccpy")) 00724 ArgNum = 3; 00725 else if (C.isCLibraryFunction(FDecl, "realloc")) 00726 ArgNum = 1; 00727 else if (C.isCLibraryFunction(FDecl, "bcopy")) 00728 ArgNum = 2; 00729 } 00730 00731 if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum && 00732 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C)) 00733 return true; 00734 00735 return false; 00736 } 00737 00738 void ento::registerGenericTaintChecker(CheckerManager &mgr) { 00739 mgr.registerChecker<GenericTaintChecker>(); 00740 }