clang API Documentation

GenericTaintChecker.cpp
Go to the documentation of this file.
00001 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This checker defines the attack surface for generic taint propagation.
00011 //
00012 // The taint information produced by it might be useful to other checkers. For
00013 // example, checkers should report errors which involve tainted data more
00014 // aggressively, even if the involved symbols are under constrained.
00015 //
00016 //===----------------------------------------------------------------------===//
00017 #include "ClangSACheckers.h"
00018 #include "clang/AST/Attr.h"
00019 #include "clang/Basic/Builtins.h"
00020 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
00021 #include "clang/StaticAnalyzer/Core/Checker.h"
00022 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
00023 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
00024 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
00025 #include <climits>
00026 
00027 using namespace clang;
00028 using namespace ento;
00029 
00030 namespace {
00031 class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
00032                                             check::PreStmt<CallExpr> > {
00033 public:
00034   static void *getTag() { static int Tag; return &Tag; }
00035 
00036   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
00037 
00038   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
00039 
00040 private:
00041   static const unsigned InvalidArgIndex = UINT_MAX;
00042   /// Denotes the return vale.
00043   static const unsigned ReturnValueIndex = UINT_MAX - 1;
00044 
00045   mutable std::unique_ptr<BugType> BT;
00046   inline void initBugType() const {
00047     if (!BT)
00048       BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
00049   }
00050 
00051   /// \brief Catch taint related bugs. Check if tainted data is passed to a
00052   /// system call etc.
00053   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
00054 
00055   /// \brief Add taint sources on a pre-visit.
00056   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
00057 
00058   /// \brief Propagate taint generated at pre-visit.
00059   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
00060 
00061   /// \brief Add taint sources on a post visit.
00062   void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
00063 
00064   /// Check if the region the expression evaluates to is the standard input,
00065   /// and thus, is tainted.
00066   static bool isStdin(const Expr *E, CheckerContext &C);
00067 
00068   /// \brief Given a pointer argument, get the symbol of the value it contains
00069   /// (points to).
00070   static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
00071 
00072   /// Functions defining the attack surface.
00073   typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
00074                                                        CheckerContext &C) const;
00075   ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
00076   ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
00077   ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
00078 
00079   /// Taint the scanned input if the file is tainted.
00080   ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
00081 
00082   /// Check for CWE-134: Uncontrolled Format String.
00083   static const char MsgUncontrolledFormatString[];
00084   bool checkUncontrolledFormatString(const CallExpr *CE,
00085                                      CheckerContext &C) const;
00086 
00087   /// Check for:
00088   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
00089   /// CWE-78, "Failure to Sanitize Data into an OS Command"
00090   static const char MsgSanitizeSystemArgs[];
00091   bool checkSystemCall(const CallExpr *CE, StringRef Name,
00092                        CheckerContext &C) const;
00093 
00094   /// Check if tainted data is used as a buffer size ins strn.. functions,
00095   /// and allocators.
00096   static const char MsgTaintedBufferSize[];
00097   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
00098                               CheckerContext &C) const;
00099 
00100   /// Generate a report if the expression is tainted or points to tainted data.
00101   bool generateReportIfTainted(const Expr *E, const char Msg[],
00102                                CheckerContext &C) const;
00103                                
00104   
00105   typedef SmallVector<unsigned, 2> ArgVector;
00106 
00107   /// \brief A struct used to specify taint propagation rules for a function.
00108   ///
00109   /// If any of the possible taint source arguments is tainted, all of the
00110   /// destination arguments should also be tainted. Use InvalidArgIndex in the
00111   /// src list to specify that all of the arguments can introduce taint. Use
00112   /// InvalidArgIndex in the dst arguments to signify that all the non-const
00113   /// pointer and reference arguments might be tainted on return. If
00114   /// ReturnValueIndex is added to the dst list, the return value will be
00115   /// tainted.
00116   struct TaintPropagationRule {
00117     /// List of arguments which can be taint sources and should be checked.
00118     ArgVector SrcArgs;
00119     /// List of arguments which should be tainted on function return.
00120     ArgVector DstArgs;
00121     // TODO: Check if using other data structures would be more optimal.
00122 
00123     TaintPropagationRule() {}
00124 
00125     TaintPropagationRule(unsigned SArg,
00126                          unsigned DArg, bool TaintRet = false) {
00127       SrcArgs.push_back(SArg);
00128       DstArgs.push_back(DArg);
00129       if (TaintRet)
00130         DstArgs.push_back(ReturnValueIndex);
00131     }
00132 
00133     TaintPropagationRule(unsigned SArg1, unsigned SArg2,
00134                          unsigned DArg, bool TaintRet = false) {
00135       SrcArgs.push_back(SArg1);
00136       SrcArgs.push_back(SArg2);
00137       DstArgs.push_back(DArg);
00138       if (TaintRet)
00139         DstArgs.push_back(ReturnValueIndex);
00140     }
00141 
00142     /// Get the propagation rule for a given function.
00143     static TaintPropagationRule
00144       getTaintPropagationRule(const FunctionDecl *FDecl,
00145                               StringRef Name,
00146                               CheckerContext &C);
00147 
00148     inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
00149     inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
00150 
00151     inline bool isNull() const { return SrcArgs.empty(); }
00152 
00153     inline bool isDestinationArgument(unsigned ArgNum) const {
00154       return (std::find(DstArgs.begin(),
00155                         DstArgs.end(), ArgNum) != DstArgs.end());
00156     }
00157 
00158     static inline bool isTaintedOrPointsToTainted(const Expr *E,
00159                                                   ProgramStateRef State,
00160                                                   CheckerContext &C) {
00161       return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
00162               (E->getType().getTypePtr()->isPointerType() &&
00163                State->isTainted(getPointedToSymbol(C, E))));
00164     }
00165 
00166     /// \brief Pre-process a function which propagates taint according to the
00167     /// taint rule.
00168     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
00169 
00170   };
00171 };
00172 
00173 const unsigned GenericTaintChecker::ReturnValueIndex;
00174 const unsigned GenericTaintChecker::InvalidArgIndex;
00175 
00176 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
00177   "Untrusted data is used as a format string "
00178   "(CWE-134: Uncontrolled Format String)";
00179 
00180 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
00181   "Untrusted data is passed to a system call "
00182   "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
00183 
00184 const char GenericTaintChecker::MsgTaintedBufferSize[] =
00185   "Untrusted data is used to specify the buffer size "
00186   "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
00187   "character data and the null terminator)";
00188 
00189 } // end of anonymous namespace
00190 
00191 /// A set which is used to pass information from call pre-visit instruction
00192 /// to the call post-visit. The values are unsigned integers, which are either
00193 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
00194 /// points to data, which should be tainted on return.
00195 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
00196 
00197 GenericTaintChecker::TaintPropagationRule
00198 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
00199                                                      const FunctionDecl *FDecl,
00200                                                      StringRef Name,
00201                                                      CheckerContext &C) {
00202   // TODO: Currently, we might loose precision here: we always mark a return
00203   // value as tainted even if it's just a pointer, pointing to tainted data.
00204 
00205   // Check for exact name match for functions without builtin substitutes.
00206   TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
00207     .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
00208     .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
00209     .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
00210     .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
00211     .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
00212     .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
00213     .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
00214     .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
00215     .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
00216     .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
00217     .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
00218     .Case("read", TaintPropagationRule(0, 2, 1, true))
00219     .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
00220     .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
00221     .Case("fgets", TaintPropagationRule(2, 0, true))
00222     .Case("getline", TaintPropagationRule(2, 0))
00223     .Case("getdelim", TaintPropagationRule(3, 0))
00224     .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
00225     .Default(TaintPropagationRule());
00226 
00227   if (!Rule.isNull())
00228     return Rule;
00229 
00230   // Check if it's one of the memory setting/copying functions.
00231   // This check is specialized but faster then calling isCLibraryFunction.
00232   unsigned BId = 0;
00233   if ( (BId = FDecl->getMemoryFunctionKind()) )
00234     switch(BId) {
00235     case Builtin::BImemcpy:
00236     case Builtin::BImemmove:
00237     case Builtin::BIstrncpy:
00238     case Builtin::BIstrncat:
00239       return TaintPropagationRule(1, 2, 0, true);
00240     case Builtin::BIstrlcpy:
00241     case Builtin::BIstrlcat:
00242       return TaintPropagationRule(1, 2, 0, false);
00243     case Builtin::BIstrndup:
00244       return TaintPropagationRule(0, 1, ReturnValueIndex);
00245 
00246     default:
00247       break;
00248     };
00249 
00250   // Process all other functions which could be defined as builtins.
00251   if (Rule.isNull()) {
00252     if (C.isCLibraryFunction(FDecl, "snprintf") ||
00253         C.isCLibraryFunction(FDecl, "sprintf"))
00254       return TaintPropagationRule(InvalidArgIndex, 0, true);
00255     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
00256              C.isCLibraryFunction(FDecl, "stpcpy") ||
00257              C.isCLibraryFunction(FDecl, "strcat"))
00258       return TaintPropagationRule(1, 0, true);
00259     else if (C.isCLibraryFunction(FDecl, "bcopy"))
00260       return TaintPropagationRule(0, 2, 1, false);
00261     else if (C.isCLibraryFunction(FDecl, "strdup") ||
00262              C.isCLibraryFunction(FDecl, "strdupa"))
00263       return TaintPropagationRule(0, ReturnValueIndex);
00264     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
00265       return TaintPropagationRule(0, ReturnValueIndex);
00266   }
00267 
00268   // Skipping the following functions, since they might be used for cleansing
00269   // or smart memory copy:
00270   // - memccpy - copying until hitting a special character.
00271 
00272   return TaintPropagationRule();
00273 }
00274 
00275 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
00276                                        CheckerContext &C) const {
00277   // Check for errors first.
00278   if (checkPre(CE, C))
00279     return;
00280 
00281   // Add taint second.
00282   addSourcesPre(CE, C);
00283 }
00284 
00285 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
00286                                         CheckerContext &C) const {
00287   if (propagateFromPre(CE, C))
00288     return;
00289   addSourcesPost(CE, C);
00290 }
00291 
00292 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
00293                                         CheckerContext &C) const {
00294   ProgramStateRef State = nullptr;
00295   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
00296   if (!FDecl || FDecl->getKind() != Decl::Function)
00297     return;
00298 
00299   StringRef Name = C.getCalleeName(FDecl);
00300   if (Name.empty())
00301     return;
00302 
00303   // First, try generating a propagation rule for this function.
00304   TaintPropagationRule Rule =
00305     TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
00306   if (!Rule.isNull()) {
00307     State = Rule.process(CE, C);
00308     if (!State)
00309       return;
00310     C.addTransition(State);
00311     return;
00312   }
00313 
00314   // Otherwise, check if we have custom pre-processing implemented.
00315   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
00316     .Case("fscanf", &GenericTaintChecker::preFscanf)
00317     .Default(nullptr);
00318   // Check and evaluate the call.
00319   if (evalFunction)
00320     State = (this->*evalFunction)(CE, C);
00321   if (!State)
00322     return;
00323   C.addTransition(State);
00324 
00325 }
00326 
00327 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
00328                                            CheckerContext &C) const {
00329   ProgramStateRef State = C.getState();
00330 
00331   // Depending on what was tainted at pre-visit, we determined a set of
00332   // arguments which should be tainted after the function returns. These are
00333   // stored in the state as TaintArgsOnPostVisit set.
00334   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
00335   if (TaintArgs.isEmpty())
00336     return false;
00337 
00338   for (llvm::ImmutableSet<unsigned>::iterator
00339          I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
00340     unsigned ArgNum  = *I;
00341 
00342     // Special handling for the tainted return value.
00343     if (ArgNum == ReturnValueIndex) {
00344       State = State->addTaint(CE, C.getLocationContext());
00345       continue;
00346     }
00347 
00348     // The arguments are pointer arguments. The data they are pointing at is
00349     // tainted after the call.
00350     if (CE->getNumArgs() < (ArgNum + 1))
00351       return false;
00352     const Expr* Arg = CE->getArg(ArgNum);
00353     SymbolRef Sym = getPointedToSymbol(C, Arg);
00354     if (Sym)
00355       State = State->addTaint(Sym);
00356   }
00357 
00358   // Clear up the taint info from the state.
00359   State = State->remove<TaintArgsOnPostVisit>();
00360 
00361   if (State != C.getState()) {
00362     C.addTransition(State);
00363     return true;
00364   }
00365   return false;
00366 }
00367 
00368 void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
00369                                          CheckerContext &C) const {
00370   // Define the attack surface.
00371   // Set the evaluation function by switching on the callee name.
00372   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
00373   if (!FDecl || FDecl->getKind() != Decl::Function)
00374     return;
00375 
00376   StringRef Name = C.getCalleeName(FDecl);
00377   if (Name.empty())
00378     return;
00379   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
00380     .Case("scanf", &GenericTaintChecker::postScanf)
00381     // TODO: Add support for vfscanf & family.
00382     .Case("getchar", &GenericTaintChecker::postRetTaint)
00383     .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
00384     .Case("getenv", &GenericTaintChecker::postRetTaint)
00385     .Case("fopen", &GenericTaintChecker::postRetTaint)
00386     .Case("fdopen", &GenericTaintChecker::postRetTaint)
00387     .Case("freopen", &GenericTaintChecker::postRetTaint)
00388     .Case("getch", &GenericTaintChecker::postRetTaint)
00389     .Case("wgetch", &GenericTaintChecker::postRetTaint)
00390     .Case("socket", &GenericTaintChecker::postSocket)
00391     .Default(nullptr);
00392 
00393   // If the callee isn't defined, it is not of security concern.
00394   // Check and evaluate the call.
00395   ProgramStateRef State = nullptr;
00396   if (evalFunction)
00397     State = (this->*evalFunction)(CE, C);
00398   if (!State)
00399     return;
00400 
00401   C.addTransition(State);
00402 }
00403 
00404 bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
00405 
00406   if (checkUncontrolledFormatString(CE, C))
00407     return true;
00408 
00409   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
00410   if (!FDecl || FDecl->getKind() != Decl::Function)
00411     return false;
00412 
00413   StringRef Name = C.getCalleeName(FDecl);
00414   if (Name.empty())
00415     return false;
00416 
00417   if (checkSystemCall(CE, Name, C))
00418     return true;
00419 
00420   if (checkTaintedBufferSize(CE, FDecl, C))
00421     return true;
00422 
00423   return false;
00424 }
00425 
00426 SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
00427                                                   const Expr* Arg) {
00428   ProgramStateRef State = C.getState();
00429   SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
00430   if (AddrVal.isUnknownOrUndef())
00431     return nullptr;
00432 
00433   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
00434   if (!AddrLoc)
00435     return nullptr;
00436 
00437   const PointerType *ArgTy =
00438     dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
00439   SVal Val = State->getSVal(*AddrLoc,
00440                             ArgTy ? ArgTy->getPointeeType(): QualType());
00441   return Val.getAsSymbol();
00442 }
00443 
00444 ProgramStateRef 
00445 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
00446                                                    CheckerContext &C) const {
00447   ProgramStateRef State = C.getState();
00448 
00449   // Check for taint in arguments.
00450   bool IsTainted = false;
00451   for (ArgVector::const_iterator I = SrcArgs.begin(),
00452                                  E = SrcArgs.end(); I != E; ++I) {
00453     unsigned ArgNum = *I;
00454 
00455     if (ArgNum == InvalidArgIndex) {
00456       // Check if any of the arguments is tainted, but skip the
00457       // destination arguments.
00458       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
00459         if (isDestinationArgument(i))
00460           continue;
00461         if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
00462           break;
00463       }
00464       break;
00465     }
00466 
00467     if (CE->getNumArgs() < (ArgNum + 1))
00468       return State;
00469     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
00470       break;
00471   }
00472   if (!IsTainted)
00473     return State;
00474 
00475   // Mark the arguments which should be tainted after the function returns.
00476   for (ArgVector::const_iterator I = DstArgs.begin(),
00477                                  E = DstArgs.end(); I != E; ++I) {
00478     unsigned ArgNum = *I;
00479 
00480     // Should we mark all arguments as tainted?
00481     if (ArgNum == InvalidArgIndex) {
00482       // For all pointer and references that were passed in:
00483       //   If they are not pointing to const data, mark data as tainted.
00484       //   TODO: So far we are just going one level down; ideally we'd need to
00485       //         recurse here.
00486       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
00487         const Expr *Arg = CE->getArg(i);
00488         // Process pointer argument.
00489         const Type *ArgTy = Arg->getType().getTypePtr();
00490         QualType PType = ArgTy->getPointeeType();
00491         if ((!PType.isNull() && !PType.isConstQualified())
00492             || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
00493           State = State->add<TaintArgsOnPostVisit>(i);
00494       }
00495       continue;
00496     }
00497 
00498     // Should mark the return value?
00499     if (ArgNum == ReturnValueIndex) {
00500       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
00501       continue;
00502     }
00503 
00504     // Mark the given argument.
00505     assert(ArgNum < CE->getNumArgs());
00506     State = State->add<TaintArgsOnPostVisit>(ArgNum);
00507   }
00508 
00509   return State;
00510 }
00511 
00512 
00513 // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
00514 // and arg 1 should get taint.
00515 ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
00516                                                    CheckerContext &C) const {
00517   assert(CE->getNumArgs() >= 2);
00518   ProgramStateRef State = C.getState();
00519 
00520   // Check is the file descriptor is tainted.
00521   if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
00522       isStdin(CE->getArg(0), C)) {
00523     // All arguments except for the first two should get taint.
00524     for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
00525         State = State->add<TaintArgsOnPostVisit>(i);
00526     return State;
00527   }
00528 
00529   return nullptr;
00530 }
00531 
00532 
00533 // If argument 0(protocol domain) is network, the return value should get taint.
00534 ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
00535                                                 CheckerContext &C) const {
00536   ProgramStateRef State = C.getState();
00537   if (CE->getNumArgs() < 3)
00538     return State;
00539 
00540   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
00541   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
00542   // White list the internal communication protocols.
00543   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
00544       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
00545     return State;
00546   State = State->addTaint(CE, C.getLocationContext());
00547   return State;
00548 }
00549 
00550 ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
00551                                                    CheckerContext &C) const {
00552   ProgramStateRef State = C.getState();
00553   if (CE->getNumArgs() < 2)
00554     return State;
00555 
00556   // All arguments except for the very first one should get taint.
00557   for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
00558     // The arguments are pointer arguments. The data they are pointing at is
00559     // tainted after the call.
00560     const Expr* Arg = CE->getArg(i);
00561         SymbolRef Sym = getPointedToSymbol(C, Arg);
00562     if (Sym)
00563       State = State->addTaint(Sym);
00564   }
00565   return State;
00566 }
00567 
00568 ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
00569                                                   CheckerContext &C) const {
00570   return C.getState()->addTaint(CE, C.getLocationContext());
00571 }
00572 
00573 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
00574   ProgramStateRef State = C.getState();
00575   SVal Val = State->getSVal(E, C.getLocationContext());
00576 
00577   // stdin is a pointer, so it would be a region.
00578   const MemRegion *MemReg = Val.getAsRegion();
00579 
00580   // The region should be symbolic, we do not know it's value.
00581   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
00582   if (!SymReg)
00583     return false;
00584 
00585   // Get it's symbol and find the declaration region it's pointing to.
00586   const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
00587   if (!Sm)
00588     return false;
00589   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
00590   if (!DeclReg)
00591     return false;
00592 
00593   // This region corresponds to a declaration, find out if it's a global/extern
00594   // variable named stdin with the proper type.
00595   if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
00596     D = D->getCanonicalDecl();
00597     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
00598         if (const PointerType * PtrTy =
00599               dyn_cast<PointerType>(D->getType().getTypePtr()))
00600           if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
00601             return true;
00602   }
00603   return false;
00604 }
00605 
00606 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
00607                                        const CheckerContext &C,
00608                                        unsigned int &ArgNum) {
00609   // Find if the function contains a format string argument.
00610   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
00611   // vsnprintf, syslog, custom annotated functions.
00612   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
00613   if (!FDecl)
00614     return false;
00615   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
00616     ArgNum = Format->getFormatIdx() - 1;
00617     if ((Format->getType()->getName() == "printf") &&
00618          CE->getNumArgs() > ArgNum)
00619       return true;
00620   }
00621 
00622   // Or if a function is named setproctitle (this is a heuristic).
00623   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
00624     ArgNum = 0;
00625     return true;
00626   }
00627 
00628   return false;
00629 }
00630 
00631 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
00632                                                   const char Msg[],
00633                                                   CheckerContext &C) const {
00634   assert(E);
00635 
00636   // Check for taint.
00637   ProgramStateRef State = C.getState();
00638   if (!State->isTainted(getPointedToSymbol(C, E)) &&
00639       !State->isTainted(E, C.getLocationContext()))
00640     return false;
00641 
00642   // Generate diagnostic.
00643   if (ExplodedNode *N = C.addTransition()) {
00644     initBugType();
00645     BugReport *report = new BugReport(*BT, Msg, N);
00646     report->addRange(E->getSourceRange());
00647     C.emitReport(report);
00648     return true;
00649   }
00650   return false;
00651 }
00652 
00653 bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
00654                                                         CheckerContext &C) const{
00655   // Check if the function contains a format string argument.
00656   unsigned int ArgNum = 0;
00657   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
00658     return false;
00659 
00660   // If either the format string content or the pointer itself are tainted, warn.
00661   if (generateReportIfTainted(CE->getArg(ArgNum),
00662                               MsgUncontrolledFormatString, C))
00663     return true;
00664   return false;
00665 }
00666 
00667 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
00668                                           StringRef Name,
00669                                           CheckerContext &C) const {
00670   // TODO: It might make sense to run this check on demand. In some cases, 
00671   // we should check if the environment has been cleansed here. We also might 
00672   // need to know if the user was reset before these calls(seteuid).
00673   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
00674     .Case("system", 0)
00675     .Case("popen", 0)
00676     .Case("execl", 0)
00677     .Case("execle", 0)
00678     .Case("execlp", 0)
00679     .Case("execv", 0)
00680     .Case("execvp", 0)
00681     .Case("execvP", 0)
00682     .Case("execve", 0)
00683     .Case("dlopen", 0)
00684     .Default(UINT_MAX);
00685 
00686   if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
00687     return false;
00688 
00689   if (generateReportIfTainted(CE->getArg(ArgNum),
00690                               MsgSanitizeSystemArgs, C))
00691     return true;
00692 
00693   return false;
00694 }
00695 
00696 // TODO: Should this check be a part of the CString checker?
00697 // If yes, should taint be a global setting?
00698 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
00699                                                  const FunctionDecl *FDecl,
00700                                                  CheckerContext &C) const {
00701   // If the function has a buffer size argument, set ArgNum.
00702   unsigned ArgNum = InvalidArgIndex;
00703   unsigned BId = 0;
00704   if ( (BId = FDecl->getMemoryFunctionKind()) )
00705     switch(BId) {
00706     case Builtin::BImemcpy:
00707     case Builtin::BImemmove:
00708     case Builtin::BIstrncpy:
00709       ArgNum = 2;
00710       break;
00711     case Builtin::BIstrndup:
00712       ArgNum = 1;
00713       break;
00714     default:
00715       break;
00716     };
00717 
00718   if (ArgNum == InvalidArgIndex) {
00719     if (C.isCLibraryFunction(FDecl, "malloc") ||
00720         C.isCLibraryFunction(FDecl, "calloc") ||
00721         C.isCLibraryFunction(FDecl, "alloca"))
00722       ArgNum = 0;
00723     else if (C.isCLibraryFunction(FDecl, "memccpy"))
00724       ArgNum = 3;
00725     else if (C.isCLibraryFunction(FDecl, "realloc"))
00726       ArgNum = 1;
00727     else if (C.isCLibraryFunction(FDecl, "bcopy"))
00728       ArgNum = 2;
00729   }
00730 
00731   if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
00732       generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
00733     return true;
00734 
00735   return false;
00736 }
00737 
00738 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
00739   mgr.registerChecker<GenericTaintChecker>();
00740 }