clang API Documentation

CStringChecker.cpp
Go to the documentation of this file.
00001 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
00002 //
00003 //                     The LLVM Compiler Infrastructure
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 //
00010 // This defines CStringChecker, which is an assortment of checks on calls
00011 // to functions in <string.h>.
00012 //
00013 //===----------------------------------------------------------------------===//
00014 
00015 #include "ClangSACheckers.h"
00016 #include "InterCheckerAPI.h"
00017 #include "clang/Basic/CharInfo.h"
00018 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
00019 #include "clang/StaticAnalyzer/Core/Checker.h"
00020 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
00021 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
00022 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
00023 #include "llvm/ADT/STLExtras.h"
00024 #include "llvm/ADT/SmallString.h"
00025 #include "llvm/ADT/StringSwitch.h"
00026 #include "llvm/Support/raw_ostream.h"
00027 
00028 using namespace clang;
00029 using namespace ento;
00030 
00031 namespace {
00032 class CStringChecker : public Checker< eval::Call,
00033                                          check::PreStmt<DeclStmt>,
00034                                          check::LiveSymbols,
00035                                          check::DeadSymbols,
00036                                          check::RegionChanges
00037                                          > {
00038   mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
00039       BT_NotCString, BT_AdditionOverflow;
00040 
00041   mutable const char *CurrentFunctionDescription;
00042 
00043 public:
00044   /// The filter is used to filter out the diagnostics which are not enabled by
00045   /// the user.
00046   struct CStringChecksFilter {
00047     DefaultBool CheckCStringNullArg;
00048     DefaultBool CheckCStringOutOfBounds;
00049     DefaultBool CheckCStringBufferOverlap;
00050     DefaultBool CheckCStringNotNullTerm;
00051 
00052     CheckName CheckNameCStringNullArg;
00053     CheckName CheckNameCStringOutOfBounds;
00054     CheckName CheckNameCStringBufferOverlap;
00055     CheckName CheckNameCStringNotNullTerm;
00056   };
00057 
00058   CStringChecksFilter Filter;
00059 
00060   static void *getTag() { static int tag; return &tag; }
00061 
00062   bool evalCall(const CallExpr *CE, CheckerContext &C) const;
00063   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
00064   void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
00065   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
00066   bool wantsRegionChangeUpdate(ProgramStateRef state) const;
00067 
00068   ProgramStateRef 
00069     checkRegionChanges(ProgramStateRef state,
00070                        const InvalidatedSymbols *,
00071                        ArrayRef<const MemRegion *> ExplicitRegions,
00072                        ArrayRef<const MemRegion *> Regions,
00073                        const CallEvent *Call) const;
00074 
00075   typedef void (CStringChecker::*FnCheck)(CheckerContext &,
00076                                           const CallExpr *) const;
00077 
00078   void evalMemcpy(CheckerContext &C, const CallExpr *CE) const;
00079   void evalMempcpy(CheckerContext &C, const CallExpr *CE) const;
00080   void evalMemmove(CheckerContext &C, const CallExpr *CE) const;
00081   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
00082   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
00083                       ProgramStateRef state,
00084                       const Expr *Size,
00085                       const Expr *Source,
00086                       const Expr *Dest,
00087                       bool Restricted = false,
00088                       bool IsMempcpy = false) const;
00089 
00090   void evalMemcmp(CheckerContext &C, const CallExpr *CE) const;
00091 
00092   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
00093   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
00094   void evalstrLengthCommon(CheckerContext &C,
00095                            const CallExpr *CE, 
00096                            bool IsStrnlen = false) const;
00097 
00098   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
00099   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
00100   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
00101   void evalStrcpyCommon(CheckerContext &C,
00102                         const CallExpr *CE,
00103                         bool returnEnd,
00104                         bool isBounded,
00105                         bool isAppending) const;
00106 
00107   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
00108   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
00109 
00110   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
00111   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
00112   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
00113   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
00114   void evalStrcmpCommon(CheckerContext &C,
00115                         const CallExpr *CE,
00116                         bool isBounded = false,
00117                         bool ignoreCase = false) const;
00118 
00119   void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
00120 
00121   // Utility methods
00122   std::pair<ProgramStateRef , ProgramStateRef >
00123   static assumeZero(CheckerContext &C,
00124                     ProgramStateRef state, SVal V, QualType Ty);
00125 
00126   static ProgramStateRef setCStringLength(ProgramStateRef state,
00127                                               const MemRegion *MR,
00128                                               SVal strLength);
00129   static SVal getCStringLengthForRegion(CheckerContext &C,
00130                                         ProgramStateRef &state,
00131                                         const Expr *Ex,
00132                                         const MemRegion *MR,
00133                                         bool hypothetical);
00134   SVal getCStringLength(CheckerContext &C,
00135                         ProgramStateRef &state,
00136                         const Expr *Ex,
00137                         SVal Buf,
00138                         bool hypothetical = false) const;
00139 
00140   const StringLiteral *getCStringLiteral(CheckerContext &C, 
00141                                          ProgramStateRef &state,
00142                                          const Expr *expr,  
00143                                          SVal val) const;
00144 
00145   static ProgramStateRef InvalidateBuffer(CheckerContext &C,
00146                                           ProgramStateRef state,
00147                                           const Expr *Ex, SVal V,
00148                                           bool IsSourceBuffer);
00149 
00150   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
00151                               const MemRegion *MR);
00152 
00153   // Re-usable checks
00154   ProgramStateRef checkNonNull(CheckerContext &C,
00155                                    ProgramStateRef state,
00156                                    const Expr *S,
00157                                    SVal l) const;
00158   ProgramStateRef CheckLocation(CheckerContext &C,
00159                                     ProgramStateRef state,
00160                                     const Expr *S,
00161                                     SVal l,
00162                                     const char *message = nullptr) const;
00163   ProgramStateRef CheckBufferAccess(CheckerContext &C,
00164                                         ProgramStateRef state,
00165                                         const Expr *Size,
00166                                         const Expr *FirstBuf,
00167                                         const Expr *SecondBuf,
00168                                         const char *firstMessage = nullptr,
00169                                         const char *secondMessage = nullptr,
00170                                         bool WarnAboutSize = false) const;
00171 
00172   ProgramStateRef CheckBufferAccess(CheckerContext &C,
00173                                         ProgramStateRef state,
00174                                         const Expr *Size,
00175                                         const Expr *Buf,
00176                                         const char *message = nullptr,
00177                                         bool WarnAboutSize = false) const {
00178     // This is a convenience override.
00179     return CheckBufferAccess(C, state, Size, Buf, nullptr, message, nullptr,
00180                              WarnAboutSize);
00181   }
00182   ProgramStateRef CheckOverlap(CheckerContext &C,
00183                                    ProgramStateRef state,
00184                                    const Expr *Size,
00185                                    const Expr *First,
00186                                    const Expr *Second) const;
00187   void emitOverlapBug(CheckerContext &C,
00188                       ProgramStateRef state,
00189                       const Stmt *First,
00190                       const Stmt *Second) const;
00191 
00192   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
00193                                             ProgramStateRef state,
00194                                             NonLoc left,
00195                                             NonLoc right) const;
00196 };
00197 
00198 } //end anonymous namespace
00199 
00200 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
00201 
00202 //===----------------------------------------------------------------------===//
00203 // Individual checks and utility methods.
00204 //===----------------------------------------------------------------------===//
00205 
00206 std::pair<ProgramStateRef , ProgramStateRef >
00207 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
00208                            QualType Ty) {
00209   Optional<DefinedSVal> val = V.getAs<DefinedSVal>();
00210   if (!val)
00211     return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
00212 
00213   SValBuilder &svalBuilder = C.getSValBuilder();
00214   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
00215   return state->assume(svalBuilder.evalEQ(state, *val, zero));
00216 }
00217 
00218 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
00219                                             ProgramStateRef state,
00220                                             const Expr *S, SVal l) const {
00221   // If a previous check has failed, propagate the failure.
00222   if (!state)
00223     return nullptr;
00224 
00225   ProgramStateRef stateNull, stateNonNull;
00226   std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType());
00227 
00228   if (stateNull && !stateNonNull) {
00229     if (!Filter.CheckCStringNullArg)
00230       return nullptr;
00231 
00232     ExplodedNode *N = C.generateSink(stateNull);
00233     if (!N)
00234       return nullptr;
00235 
00236     if (!BT_Null)
00237       BT_Null.reset(new BuiltinBug(
00238           Filter.CheckNameCStringNullArg, categories::UnixAPI,
00239           "Null pointer argument in call to byte string function"));
00240 
00241     SmallString<80> buf;
00242     llvm::raw_svector_ostream os(buf);
00243     assert(CurrentFunctionDescription);
00244     os << "Null pointer argument in call to " << CurrentFunctionDescription;
00245 
00246     // Generate a report for this bug.
00247     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get());
00248     BugReport *report = new BugReport(*BT, os.str(), N);
00249 
00250     report->addRange(S->getSourceRange());
00251     bugreporter::trackNullOrUndefValue(N, S, *report);
00252     C.emitReport(report);
00253     return nullptr;
00254   }
00255 
00256   // From here on, assume that the value is non-null.
00257   assert(stateNonNull);
00258   return stateNonNull;
00259 }
00260 
00261 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
00262 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
00263                                              ProgramStateRef state,
00264                                              const Expr *S, SVal l,
00265                                              const char *warningMsg) const {
00266   // If a previous check has failed, propagate the failure.
00267   if (!state)
00268     return nullptr;
00269 
00270   // Check for out of bound array element access.
00271   const MemRegion *R = l.getAsRegion();
00272   if (!R)
00273     return state;
00274 
00275   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
00276   if (!ER)
00277     return state;
00278 
00279   assert(ER->getValueType() == C.getASTContext().CharTy &&
00280     "CheckLocation should only be called with char* ElementRegions");
00281 
00282   // Get the size of the array.
00283   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
00284   SValBuilder &svalBuilder = C.getSValBuilder();
00285   SVal Extent = 
00286     svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder));
00287   DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>();
00288 
00289   // Get the index of the accessed element.
00290   DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
00291 
00292   ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true);
00293   ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false);
00294   if (StOutBound && !StInBound) {
00295     ExplodedNode *N = C.generateSink(StOutBound);
00296     if (!N)
00297       return nullptr;
00298 
00299     if (!BT_Bounds) {
00300       BT_Bounds.reset(new BuiltinBug(
00301           Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access",
00302           "Byte string function accesses out-of-bound array element"));
00303     }
00304     BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get());
00305 
00306     // Generate a report for this bug.
00307     BugReport *report;
00308     if (warningMsg) {
00309       report = new BugReport(*BT, warningMsg, N);
00310     } else {
00311       assert(CurrentFunctionDescription);
00312       assert(CurrentFunctionDescription[0] != '\0');
00313 
00314       SmallString<80> buf;
00315       llvm::raw_svector_ostream os(buf);
00316       os << toUppercase(CurrentFunctionDescription[0])
00317          << &CurrentFunctionDescription[1]
00318          << " accesses out-of-bound array element";
00319       report = new BugReport(*BT, os.str(), N);      
00320     }
00321 
00322     // FIXME: It would be nice to eventually make this diagnostic more clear,
00323     // e.g., by referencing the original declaration or by saying *why* this
00324     // reference is outside the range.
00325 
00326     report->addRange(S->getSourceRange());
00327     C.emitReport(report);
00328     return nullptr;
00329   }
00330   
00331   // Array bound check succeeded.  From this point forward the array bound
00332   // should always succeed.
00333   return StInBound;
00334 }
00335 
00336 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C,
00337                                                  ProgramStateRef state,
00338                                                  const Expr *Size,
00339                                                  const Expr *FirstBuf,
00340                                                  const Expr *SecondBuf,
00341                                                  const char *firstMessage,
00342                                                  const char *secondMessage,
00343                                                  bool WarnAboutSize) const {
00344   // If a previous check has failed, propagate the failure.
00345   if (!state)
00346     return nullptr;
00347 
00348   SValBuilder &svalBuilder = C.getSValBuilder();
00349   ASTContext &Ctx = svalBuilder.getContext();
00350   const LocationContext *LCtx = C.getLocationContext();
00351 
00352   QualType sizeTy = Size->getType();
00353   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
00354 
00355   // Check that the first buffer is non-null.
00356   SVal BufVal = state->getSVal(FirstBuf, LCtx);
00357   state = checkNonNull(C, state, FirstBuf, BufVal);
00358   if (!state)
00359     return nullptr;
00360 
00361   // If out-of-bounds checking is turned off, skip the rest.
00362   if (!Filter.CheckCStringOutOfBounds)
00363     return state;
00364 
00365   // Get the access length and make sure it is known.
00366   // FIXME: This assumes the caller has already checked that the access length
00367   // is positive. And that it's unsigned.
00368   SVal LengthVal = state->getSVal(Size, LCtx);
00369   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
00370   if (!Length)
00371     return state;
00372 
00373   // Compute the offset of the last element to be accessed: size-1.
00374   NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
00375   NonLoc LastOffset = svalBuilder
00376       .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>();
00377 
00378   // Check that the first buffer is sufficiently long.
00379   SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType());
00380   if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
00381     const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf);
00382 
00383     SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
00384                                           LastOffset, PtrTy);
00385     state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage);
00386 
00387     // If the buffer isn't large enough, abort.
00388     if (!state)
00389       return nullptr;
00390   }
00391 
00392   // If there's a second buffer, check it as well.
00393   if (SecondBuf) {
00394     BufVal = state->getSVal(SecondBuf, LCtx);
00395     state = checkNonNull(C, state, SecondBuf, BufVal);
00396     if (!state)
00397       return nullptr;
00398 
00399     BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType());
00400     if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
00401       const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf);
00402 
00403       SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc,
00404                                             LastOffset, PtrTy);
00405       state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage);
00406     }
00407   }
00408 
00409   // Large enough or not, return this state!
00410   return state;
00411 }
00412 
00413 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
00414                                             ProgramStateRef state,
00415                                             const Expr *Size,
00416                                             const Expr *First,
00417                                             const Expr *Second) const {
00418   if (!Filter.CheckCStringBufferOverlap)
00419     return state;
00420 
00421   // Do a simple check for overlap: if the two arguments are from the same
00422   // buffer, see if the end of the first is greater than the start of the second
00423   // or vice versa.
00424 
00425   // If a previous check has failed, propagate the failure.
00426   if (!state)
00427     return nullptr;
00428 
00429   ProgramStateRef stateTrue, stateFalse;
00430 
00431   // Get the buffer values and make sure they're known locations.
00432   const LocationContext *LCtx = C.getLocationContext();
00433   SVal firstVal = state->getSVal(First, LCtx);
00434   SVal secondVal = state->getSVal(Second, LCtx);
00435 
00436   Optional<Loc> firstLoc = firstVal.getAs<Loc>();
00437   if (!firstLoc)
00438     return state;
00439 
00440   Optional<Loc> secondLoc = secondVal.getAs<Loc>();
00441   if (!secondLoc)
00442     return state;
00443 
00444   // Are the two values the same?
00445   SValBuilder &svalBuilder = C.getSValBuilder();  
00446   std::tie(stateTrue, stateFalse) =
00447     state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
00448 
00449   if (stateTrue && !stateFalse) {
00450     // If the values are known to be equal, that's automatically an overlap.
00451     emitOverlapBug(C, stateTrue, First, Second);
00452     return nullptr;
00453   }
00454 
00455   // assume the two expressions are not equal.
00456   assert(stateFalse);
00457   state = stateFalse;
00458 
00459   // Which value comes first?
00460   QualType cmpTy = svalBuilder.getConditionType();
00461   SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT,
00462                                          *firstLoc, *secondLoc, cmpTy);
00463   Optional<DefinedOrUnknownSVal> reverseTest =
00464       reverse.getAs<DefinedOrUnknownSVal>();
00465   if (!reverseTest)
00466     return state;
00467 
00468   std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
00469   if (stateTrue) {
00470     if (stateFalse) {
00471       // If we don't know which one comes first, we can't perform this test.
00472       return state;
00473     } else {
00474       // Switch the values so that firstVal is before secondVal.
00475       std::swap(firstLoc, secondLoc);
00476 
00477       // Switch the Exprs as well, so that they still correspond.
00478       std::swap(First, Second);
00479     }
00480   }
00481 
00482   // Get the length, and make sure it too is known.
00483   SVal LengthVal = state->getSVal(Size, LCtx);
00484   Optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
00485   if (!Length)
00486     return state;
00487 
00488   // Convert the first buffer's start address to char*.
00489   // Bail out if the cast fails.
00490   ASTContext &Ctx = svalBuilder.getContext();
00491   QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
00492   SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, 
00493                                          First->getType());
00494   Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
00495   if (!FirstStartLoc)
00496     return state;
00497 
00498   // Compute the end of the first buffer. Bail out if THAT fails.
00499   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add,
00500                                  *FirstStartLoc, *Length, CharPtrTy);
00501   Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
00502   if (!FirstEndLoc)
00503     return state;
00504 
00505   // Is the end of the first buffer past the start of the second buffer?
00506   SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT,
00507                                 *FirstEndLoc, *secondLoc, cmpTy);
00508   Optional<DefinedOrUnknownSVal> OverlapTest =
00509       Overlap.getAs<DefinedOrUnknownSVal>();
00510   if (!OverlapTest)
00511     return state;
00512 
00513   std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
00514 
00515   if (stateTrue && !stateFalse) {
00516     // Overlap!
00517     emitOverlapBug(C, stateTrue, First, Second);
00518     return nullptr;
00519   }
00520 
00521   // assume the two expressions don't overlap.
00522   assert(stateFalse);
00523   return stateFalse;
00524 }
00525 
00526 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
00527                                   const Stmt *First, const Stmt *Second) const {
00528   ExplodedNode *N = C.generateSink(state);
00529   if (!N)
00530     return;
00531 
00532   if (!BT_Overlap)
00533     BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
00534                                  categories::UnixAPI, "Improper arguments"));
00535 
00536   // Generate a report for this bug.
00537   BugReport *report = 
00538     new BugReport(*BT_Overlap,
00539       "Arguments must not be overlapping buffers", N);
00540   report->addRange(First->getSourceRange());
00541   report->addRange(Second->getSourceRange());
00542 
00543   C.emitReport(report);
00544 }
00545 
00546 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
00547                                                      ProgramStateRef state,
00548                                                      NonLoc left,
00549                                                      NonLoc right) const {
00550   // If out-of-bounds checking is turned off, skip the rest.
00551   if (!Filter.CheckCStringOutOfBounds)
00552     return state;
00553 
00554   // If a previous check has failed, propagate the failure.
00555   if (!state)
00556     return nullptr;
00557 
00558   SValBuilder &svalBuilder = C.getSValBuilder();
00559   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
00560 
00561   QualType sizeTy = svalBuilder.getContext().getSizeType();
00562   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
00563   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
00564 
00565   SVal maxMinusRight;
00566   if (right.getAs<nonloc::ConcreteInt>()) {
00567     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
00568                                                  sizeTy);
00569   } else {
00570     // Try switching the operands. (The order of these two assignments is
00571     // important!)
00572     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 
00573                                             sizeTy);
00574     left = right;
00575   }
00576 
00577   if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
00578     QualType cmpTy = svalBuilder.getConditionType();
00579     // If left > max - right, we have an overflow.
00580     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
00581                                                 *maxMinusRightNL, cmpTy);
00582 
00583     ProgramStateRef stateOverflow, stateOkay;
00584     std::tie(stateOverflow, stateOkay) =
00585       state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
00586 
00587     if (stateOverflow && !stateOkay) {
00588       // We have an overflow. Emit a bug report.
00589       ExplodedNode *N = C.generateSink(stateOverflow);
00590       if (!N)
00591         return nullptr;
00592 
00593       if (!BT_AdditionOverflow)
00594         BT_AdditionOverflow.reset(
00595             new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
00596                            "Sum of expressions causes overflow"));
00597 
00598       // This isn't a great error message, but this should never occur in real
00599       // code anyway -- you'd have to create a buffer longer than a size_t can
00600       // represent, which is sort of a contradiction.
00601       const char *warning =
00602         "This expression will create a string whose length is too big to "
00603         "be represented as a size_t";
00604 
00605       // Generate a report for this bug.
00606       BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N);
00607       C.emitReport(report);        
00608 
00609       return nullptr;
00610     }
00611 
00612     // From now on, assume an overflow didn't occur.
00613     assert(stateOkay);
00614     state = stateOkay;
00615   }
00616 
00617   return state;
00618 }
00619 
00620 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
00621                                                 const MemRegion *MR,
00622                                                 SVal strLength) {
00623   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
00624 
00625   MR = MR->StripCasts();
00626 
00627   switch (MR->getKind()) {
00628   case MemRegion::StringRegionKind:
00629     // FIXME: This can happen if we strcpy() into a string region. This is
00630     // undefined [C99 6.4.5p6], but we should still warn about it.
00631     return state;
00632 
00633   case MemRegion::SymbolicRegionKind:
00634   case MemRegion::AllocaRegionKind:
00635   case MemRegion::VarRegionKind:
00636   case MemRegion::FieldRegionKind:
00637   case MemRegion::ObjCIvarRegionKind:
00638     // These are the types we can currently track string lengths for.
00639     break;
00640 
00641   case MemRegion::ElementRegionKind:
00642     // FIXME: Handle element regions by upper-bounding the parent region's
00643     // string length.
00644     return state;
00645 
00646   default:
00647     // Other regions (mostly non-data) can't have a reliable C string length.
00648     // For now, just ignore the change.
00649     // FIXME: These are rare but not impossible. We should output some kind of
00650     // warning for things like strcpy((char[]){'a', 0}, "b");
00651     return state;
00652   }
00653 
00654   if (strLength.isUnknown())
00655     return state->remove<CStringLength>(MR);
00656 
00657   return state->set<CStringLength>(MR, strLength);
00658 }
00659 
00660 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
00661                                                ProgramStateRef &state,
00662                                                const Expr *Ex,
00663                                                const MemRegion *MR,
00664                                                bool hypothetical) {
00665   if (!hypothetical) {
00666     // If there's a recorded length, go ahead and return it.
00667     const SVal *Recorded = state->get<CStringLength>(MR);
00668     if (Recorded)
00669       return *Recorded;
00670   }
00671 
00672   // Otherwise, get a new symbol and update the state.
00673   SValBuilder &svalBuilder = C.getSValBuilder();
00674   QualType sizeTy = svalBuilder.getContext().getSizeType();
00675   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
00676                                                     MR, Ex, sizeTy,
00677                                                     C.blockCount());
00678 
00679   if (!hypothetical) {
00680     if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
00681       // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
00682       BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
00683       const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
00684       llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
00685       const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
00686                                                         fourInt);
00687       NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
00688       SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
00689                                                 maxLength, sizeTy);
00690       state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
00691     }
00692     state = state->set<CStringLength>(MR, strLength);
00693   }
00694 
00695   return strLength;
00696 }
00697 
00698 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
00699                                       const Expr *Ex, SVal Buf,
00700                                       bool hypothetical) const {
00701   const MemRegion *MR = Buf.getAsRegion();
00702   if (!MR) {
00703     // If we can't get a region, see if it's something we /know/ isn't a
00704     // C string. In the context of locations, the only time we can issue such
00705     // a warning is for labels.
00706     if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
00707       if (!Filter.CheckCStringNotNullTerm)
00708         return UndefinedVal();
00709 
00710       if (ExplodedNode *N = C.addTransition(state)) {
00711         if (!BT_NotCString)
00712           BT_NotCString.reset(new BuiltinBug(
00713               Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
00714               "Argument is not a null-terminated string."));
00715 
00716         SmallString<120> buf;
00717         llvm::raw_svector_ostream os(buf);
00718         assert(CurrentFunctionDescription);
00719         os << "Argument to " << CurrentFunctionDescription
00720            << " is the address of the label '" << Label->getLabel()->getName()
00721            << "', which is not a null-terminated string";
00722 
00723         // Generate a report for this bug.
00724         BugReport *report = new BugReport(*BT_NotCString, os.str(), N);
00725 
00726         report->addRange(Ex->getSourceRange());
00727         C.emitReport(report);        
00728       }
00729       return UndefinedVal();
00730 
00731     }
00732 
00733     // If it's not a region and not a label, give up.
00734     return UnknownVal();
00735   }
00736 
00737   // If we have a region, strip casts from it and see if we can figure out
00738   // its length. For anything we can't figure out, just return UnknownVal.
00739   MR = MR->StripCasts();
00740 
00741   switch (MR->getKind()) {
00742   case MemRegion::StringRegionKind: {
00743     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
00744     // so we can assume that the byte length is the correct C string length.
00745     SValBuilder &svalBuilder = C.getSValBuilder();
00746     QualType sizeTy = svalBuilder.getContext().getSizeType();
00747     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
00748     return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy);
00749   }
00750   case MemRegion::SymbolicRegionKind:
00751   case MemRegion::AllocaRegionKind:
00752   case MemRegion::VarRegionKind:
00753   case MemRegion::FieldRegionKind:
00754   case MemRegion::ObjCIvarRegionKind:
00755     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
00756   case MemRegion::CompoundLiteralRegionKind:
00757     // FIXME: Can we track this? Is it necessary?
00758     return UnknownVal();
00759   case MemRegion::ElementRegionKind:
00760     // FIXME: How can we handle this? It's not good enough to subtract the
00761     // offset from the base string length; consider "123\x00567" and &a[5].
00762     return UnknownVal();
00763   default:
00764     // Other regions (mostly non-data) can't have a reliable C string length.
00765     // In this case, an error is emitted and UndefinedVal is returned.
00766     // The caller should always be prepared to handle this case.
00767     if (!Filter.CheckCStringNotNullTerm)
00768       return UndefinedVal();
00769 
00770     if (ExplodedNode *N = C.addTransition(state)) {
00771       if (!BT_NotCString)
00772         BT_NotCString.reset(new BuiltinBug(
00773             Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
00774             "Argument is not a null-terminated string."));
00775 
00776       SmallString<120> buf;
00777       llvm::raw_svector_ostream os(buf);
00778 
00779       assert(CurrentFunctionDescription);
00780       os << "Argument to " << CurrentFunctionDescription << " is ";
00781 
00782       if (SummarizeRegion(os, C.getASTContext(), MR))
00783         os << ", which is not a null-terminated string";
00784       else
00785         os << "not a null-terminated string";
00786 
00787       // Generate a report for this bug.
00788       BugReport *report = new BugReport(*BT_NotCString,
00789                                                         os.str(), N);
00790 
00791       report->addRange(Ex->getSourceRange());
00792       C.emitReport(report);        
00793     }
00794 
00795     return UndefinedVal();
00796   }
00797 }
00798 
00799 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
00800   ProgramStateRef &state, const Expr *expr, SVal val) const {
00801 
00802   // Get the memory region pointed to by the val.
00803   const MemRegion *bufRegion = val.getAsRegion();
00804   if (!bufRegion)
00805     return nullptr;
00806 
00807   // Strip casts off the memory region.
00808   bufRegion = bufRegion->StripCasts();
00809 
00810   // Cast the memory region to a string region.
00811   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
00812   if (!strRegion)
00813     return nullptr;
00814 
00815   // Return the actual string in the string region.
00816   return strRegion->getStringLiteral();
00817 }
00818 
00819 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C,
00820                                                  ProgramStateRef state,
00821                                                  const Expr *E, SVal V,
00822                                                  bool IsSourceBuffer) {
00823   Optional<Loc> L = V.getAs<Loc>();
00824   if (!L)
00825     return state;
00826 
00827   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
00828   // some assumptions about the value that CFRefCount can't. Even so, it should
00829   // probably be refactored.
00830   if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
00831     const MemRegion *R = MR->getRegion()->StripCasts();
00832 
00833     // Are we dealing with an ElementRegion?  If so, we should be invalidating
00834     // the super-region.
00835     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
00836       R = ER->getSuperRegion();
00837       // FIXME: What about layers of ElementRegions?
00838     }
00839 
00840     // Invalidate this region.
00841     const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
00842 
00843     bool CausesPointerEscape = false;
00844     RegionAndSymbolInvalidationTraits ITraits;
00845     // Invalidate and escape only indirect regions accessible through the source
00846     // buffer.
00847     if (IsSourceBuffer) {
00848       ITraits.setTrait(R, 
00849                        RegionAndSymbolInvalidationTraits::TK_PreserveContents);
00850       ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
00851       CausesPointerEscape = true;
00852     }
00853 
00854     return state->invalidateRegions(R, E, C.blockCount(), LCtx, 
00855                                     CausesPointerEscape, nullptr, nullptr,
00856                                     &ITraits);
00857   }
00858 
00859   // If we have a non-region value by chance, just remove the binding.
00860   // FIXME: is this necessary or correct? This handles the non-Region
00861   //  cases.  Is it ever valid to store to these?
00862   return state->killBinding(*L);
00863 }
00864 
00865 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
00866                                      const MemRegion *MR) {
00867   const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR);
00868 
00869   switch (MR->getKind()) {
00870   case MemRegion::FunctionTextRegionKind: {
00871     const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl();
00872     if (FD)
00873       os << "the address of the function '" << *FD << '\'';
00874     else
00875       os << "the address of a function";
00876     return true;
00877   }
00878   case MemRegion::BlockTextRegionKind:
00879     os << "block text";
00880     return true;
00881   case MemRegion::BlockDataRegionKind:
00882     os << "a block";
00883     return true;
00884   case MemRegion::CXXThisRegionKind:
00885   case MemRegion::CXXTempObjectRegionKind:
00886     os << "a C++ temp object of type " << TVR->getValueType().getAsString();
00887     return true;
00888   case MemRegion::VarRegionKind:
00889     os << "a variable of type" << TVR->getValueType().getAsString();
00890     return true;
00891   case MemRegion::FieldRegionKind:
00892     os << "a field of type " << TVR->getValueType().getAsString();
00893     return true;
00894   case MemRegion::ObjCIvarRegionKind:
00895     os << "an instance variable of type " << TVR->getValueType().getAsString();
00896     return true;
00897   default:
00898     return false;
00899   }
00900 }
00901 
00902 //===----------------------------------------------------------------------===//
00903 // evaluation of individual function calls.
00904 //===----------------------------------------------------------------------===//
00905 
00906 void CStringChecker::evalCopyCommon(CheckerContext &C, 
00907                                     const CallExpr *CE,
00908                                     ProgramStateRef state,
00909                                     const Expr *Size, const Expr *Dest,
00910                                     const Expr *Source, bool Restricted,
00911                                     bool IsMempcpy) const {
00912   CurrentFunctionDescription = "memory copy function";
00913 
00914   // See if the size argument is zero.
00915   const LocationContext *LCtx = C.getLocationContext();
00916   SVal sizeVal = state->getSVal(Size, LCtx);
00917   QualType sizeTy = Size->getType();
00918 
00919   ProgramStateRef stateZeroSize, stateNonZeroSize;
00920   std::tie(stateZeroSize, stateNonZeroSize) =
00921     assumeZero(C, state, sizeVal, sizeTy);
00922 
00923   // Get the value of the Dest.
00924   SVal destVal = state->getSVal(Dest, LCtx);
00925 
00926   // If the size is zero, there won't be any actual memory access, so
00927   // just bind the return value to the destination buffer and return.
00928   if (stateZeroSize && !stateNonZeroSize) {
00929     stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
00930     C.addTransition(stateZeroSize);
00931     return;
00932   }
00933 
00934   // If the size can be nonzero, we have to check the other arguments.
00935   if (stateNonZeroSize) {
00936     state = stateNonZeroSize;
00937 
00938     // Ensure the destination is not null. If it is NULL there will be a
00939     // NULL pointer dereference.
00940     state = checkNonNull(C, state, Dest, destVal);
00941     if (!state)
00942       return;
00943 
00944     // Get the value of the Src.
00945     SVal srcVal = state->getSVal(Source, LCtx);
00946     
00947     // Ensure the source is not null. If it is NULL there will be a
00948     // NULL pointer dereference.
00949     state = checkNonNull(C, state, Source, srcVal);
00950     if (!state)
00951       return;
00952 
00953     // Ensure the accesses are valid and that the buffers do not overlap.
00954     const char * const writeWarning =
00955       "Memory copy function overflows destination buffer";
00956     state = CheckBufferAccess(C, state, Size, Dest, Source,
00957                               writeWarning, /* sourceWarning = */ nullptr);
00958     if (Restricted)
00959       state = CheckOverlap(C, state, Size, Dest, Source);
00960 
00961     if (!state)
00962       return;
00963 
00964     // If this is mempcpy, get the byte after the last byte copied and 
00965     // bind the expr.
00966     if (IsMempcpy) {
00967       loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>();
00968       
00969       // Get the length to copy.
00970       if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) {
00971         // Get the byte after the last byte copied.
00972         SValBuilder &SvalBuilder = C.getSValBuilder();
00973         ASTContext &Ctx = SvalBuilder.getContext();
00974         QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy);
00975         loc::MemRegionVal DestRegCharVal = SvalBuilder.evalCast(destRegVal,
00976           CharPtrTy, Dest->getType()).castAs<loc::MemRegionVal>();
00977         SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, 
00978                                                           DestRegCharVal,
00979                                                           *lenValNonLoc, 
00980                                                           Dest->getType());
00981       
00982         // The byte after the last byte copied is the return value.
00983         state = state->BindExpr(CE, LCtx, lastElement);
00984       } else {
00985         // If we don't know how much we copied, we can at least
00986         // conjure a return value for later.
00987         SVal result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
00988                                                           C.blockCount());
00989         state = state->BindExpr(CE, LCtx, result);
00990       }
00991 
00992     } else {
00993       // All other copies return the destination buffer.
00994       // (Well, bcopy() has a void return type, but this won't hurt.)
00995       state = state->BindExpr(CE, LCtx, destVal);
00996     }
00997 
00998     // Invalidate the destination (regular invalidation without pointer-escaping
00999     // the address of the top-level region).
01000     // FIXME: Even if we can't perfectly model the copy, we should see if we
01001     // can use LazyCompoundVals to copy the source values into the destination.
01002     // This would probably remove any existing bindings past the end of the
01003     // copied region, but that's still an improvement over blank invalidation.
01004     state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest), 
01005                              /*IsSourceBuffer*/false);
01006 
01007     // Invalidate the source (const-invalidation without const-pointer-escaping
01008     // the address of the top-level region).
01009     state = InvalidateBuffer(C, state, Source, C.getSVal(Source), 
01010                              /*IsSourceBuffer*/true);
01011 
01012     C.addTransition(state);
01013   }
01014 }
01015 
01016 
01017 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const {
01018   if (CE->getNumArgs() < 3)
01019     return;
01020 
01021   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
01022   // The return value is the address of the destination buffer.
01023   const Expr *Dest = CE->getArg(0);
01024   ProgramStateRef state = C.getState();
01025 
01026   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true);
01027 }
01028 
01029 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const {
01030   if (CE->getNumArgs() < 3)
01031     return;
01032 
01033   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
01034   // The return value is a pointer to the byte following the last written byte.
01035   const Expr *Dest = CE->getArg(0);
01036   ProgramStateRef state = C.getState();
01037   
01038   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true);
01039 }
01040 
01041 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const {
01042   if (CE->getNumArgs() < 3)
01043     return;
01044 
01045   // void *memmove(void *dst, const void *src, size_t n);
01046   // The return value is the address of the destination buffer.
01047   const Expr *Dest = CE->getArg(0);
01048   ProgramStateRef state = C.getState();
01049 
01050   evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1));
01051 }
01052 
01053 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
01054   if (CE->getNumArgs() < 3)
01055     return;
01056 
01057   // void bcopy(const void *src, void *dst, size_t n);
01058   evalCopyCommon(C, CE, C.getState(), 
01059                  CE->getArg(2), CE->getArg(1), CE->getArg(0));
01060 }
01061 
01062 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const {
01063   if (CE->getNumArgs() < 3)
01064     return;
01065 
01066   // int memcmp(const void *s1, const void *s2, size_t n);
01067   CurrentFunctionDescription = "memory comparison function";
01068 
01069   const Expr *Left = CE->getArg(0);
01070   const Expr *Right = CE->getArg(1);
01071   const Expr *Size = CE->getArg(2);
01072 
01073   ProgramStateRef state = C.getState();
01074   SValBuilder &svalBuilder = C.getSValBuilder();
01075 
01076   // See if the size argument is zero.
01077   const LocationContext *LCtx = C.getLocationContext();
01078   SVal sizeVal = state->getSVal(Size, LCtx);
01079   QualType sizeTy = Size->getType();
01080 
01081   ProgramStateRef stateZeroSize, stateNonZeroSize;
01082   std::tie(stateZeroSize, stateNonZeroSize) =
01083     assumeZero(C, state, sizeVal, sizeTy);
01084 
01085   // If the size can be zero, the result will be 0 in that case, and we don't
01086   // have to check either of the buffers.
01087   if (stateZeroSize) {
01088     state = stateZeroSize;
01089     state = state->BindExpr(CE, LCtx,
01090                             svalBuilder.makeZeroVal(CE->getType()));
01091     C.addTransition(state);
01092   }
01093 
01094   // If the size can be nonzero, we have to check the other arguments.
01095   if (stateNonZeroSize) {
01096     state = stateNonZeroSize;
01097     // If we know the two buffers are the same, we know the result is 0.
01098     // First, get the two buffers' addresses. Another checker will have already
01099     // made sure they're not undefined.
01100     DefinedOrUnknownSVal LV =
01101         state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>();
01102     DefinedOrUnknownSVal RV =
01103         state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>();
01104 
01105     // See if they are the same.
01106     DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
01107     ProgramStateRef StSameBuf, StNotSameBuf;
01108     std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
01109 
01110     // If the two arguments might be the same buffer, we know the result is 0,
01111     // and we only need to check one size.
01112     if (StSameBuf) {
01113       state = StSameBuf;
01114       state = CheckBufferAccess(C, state, Size, Left);
01115       if (state) {
01116         state = StSameBuf->BindExpr(CE, LCtx,
01117                                     svalBuilder.makeZeroVal(CE->getType()));
01118         C.addTransition(state);
01119       }
01120     }
01121 
01122     // If the two arguments might be different buffers, we have to check the
01123     // size of both of them.
01124     if (StNotSameBuf) {
01125       state = StNotSameBuf;
01126       state = CheckBufferAccess(C, state, Size, Left, Right);
01127       if (state) {
01128         // The return value is the comparison result, which we don't know.
01129         SVal CmpV = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
01130                                                  C.blockCount());
01131         state = state->BindExpr(CE, LCtx, CmpV);
01132         C.addTransition(state);
01133       }
01134     }
01135   }
01136 }
01137 
01138 void CStringChecker::evalstrLength(CheckerContext &C,
01139                                    const CallExpr *CE) const {
01140   if (CE->getNumArgs() < 1)
01141     return;
01142 
01143   // size_t strlen(const char *s);
01144   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
01145 }
01146 
01147 void CStringChecker::evalstrnLength(CheckerContext &C,
01148                                     const CallExpr *CE) const {
01149   if (CE->getNumArgs() < 2)
01150     return;
01151 
01152   // size_t strnlen(const char *s, size_t maxlen);
01153   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
01154 }
01155 
01156 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
01157                                          bool IsStrnlen) const {
01158   CurrentFunctionDescription = "string length function";
01159   ProgramStateRef state = C.getState();
01160   const LocationContext *LCtx = C.getLocationContext();
01161 
01162   if (IsStrnlen) {
01163     const Expr *maxlenExpr = CE->getArg(1);
01164     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
01165 
01166     ProgramStateRef stateZeroSize, stateNonZeroSize;
01167     std::tie(stateZeroSize, stateNonZeroSize) =
01168       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
01169 
01170     // If the size can be zero, the result will be 0 in that case, and we don't
01171     // have to check the string itself.
01172     if (stateZeroSize) {
01173       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
01174       stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
01175       C.addTransition(stateZeroSize);
01176     }
01177 
01178     // If the size is GUARANTEED to be zero, we're done!
01179     if (!stateNonZeroSize)
01180       return;
01181 
01182     // Otherwise, record the assumption that the size is nonzero.
01183     state = stateNonZeroSize;
01184   }
01185 
01186   // Check that the string argument is non-null.
01187   const Expr *Arg = CE->getArg(0);
01188   SVal ArgVal = state->getSVal(Arg, LCtx);
01189 
01190   state = checkNonNull(C, state, Arg, ArgVal);
01191 
01192   if (!state)
01193     return;
01194 
01195   SVal strLength = getCStringLength(C, state, Arg, ArgVal);
01196 
01197   // If the argument isn't a valid C string, there's no valid state to
01198   // transition to.
01199   if (strLength.isUndef())
01200     return;
01201 
01202   DefinedOrUnknownSVal result = UnknownVal();
01203 
01204   // If the check is for strnlen() then bind the return value to no more than
01205   // the maxlen value.
01206   if (IsStrnlen) {
01207     QualType cmpTy = C.getSValBuilder().getConditionType();
01208 
01209     // It's a little unfortunate to be getting this again,
01210     // but it's not that expensive...
01211     const Expr *maxlenExpr = CE->getArg(1);
01212     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
01213 
01214     Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
01215     Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
01216 
01217     if (strLengthNL && maxlenValNL) {
01218       ProgramStateRef stateStringTooLong, stateStringNotTooLong;
01219 
01220       // Check if the strLength is greater than the maxlen.
01221       std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
01222           C.getSValBuilder()
01223               .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
01224               .castAs<DefinedOrUnknownSVal>());
01225 
01226       if (stateStringTooLong && !stateStringNotTooLong) {
01227         // If the string is longer than maxlen, return maxlen.
01228         result = *maxlenValNL;
01229       } else if (stateStringNotTooLong && !stateStringTooLong) {
01230         // If the string is shorter than maxlen, return its length.
01231         result = *strLengthNL;
01232       }
01233     }
01234 
01235     if (result.isUnknown()) {
01236       // If we don't have enough information for a comparison, there's
01237       // no guarantee the full string length will actually be returned.
01238       // All we know is the return value is the min of the string length
01239       // and the limit. This is better than nothing.
01240       result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
01241                                                    C.blockCount());
01242       NonLoc resultNL = result.castAs<NonLoc>();
01243 
01244       if (strLengthNL) {
01245         state = state->assume(C.getSValBuilder().evalBinOpNN(
01246                                   state, BO_LE, resultNL, *strLengthNL, cmpTy)
01247                                   .castAs<DefinedOrUnknownSVal>(), true);
01248       }
01249       
01250       if (maxlenValNL) {
01251         state = state->assume(C.getSValBuilder().evalBinOpNN(
01252                                   state, BO_LE, resultNL, *maxlenValNL, cmpTy)
01253                                   .castAs<DefinedOrUnknownSVal>(), true);
01254       }
01255     }
01256 
01257   } else {
01258     // This is a plain strlen(), not strnlen().
01259     result = strLength.castAs<DefinedOrUnknownSVal>();
01260 
01261     // If we don't know the length of the string, conjure a return
01262     // value, so it can be used in constraints, at least.
01263     if (result.isUnknown()) {
01264       result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
01265                                                    C.blockCount());
01266     }
01267   }
01268 
01269   // Bind the return value.
01270   assert(!result.isUnknown() && "Should have conjured a value by now");
01271   state = state->BindExpr(CE, LCtx, result);
01272   C.addTransition(state);
01273 }
01274 
01275 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
01276   if (CE->getNumArgs() < 2)
01277     return;
01278 
01279   // char *strcpy(char *restrict dst, const char *restrict src);
01280   evalStrcpyCommon(C, CE, 
01281                    /* returnEnd = */ false, 
01282                    /* isBounded = */ false,
01283                    /* isAppending = */ false);
01284 }
01285 
01286 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
01287   if (CE->getNumArgs() < 3)
01288     return;
01289 
01290   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
01291   evalStrcpyCommon(C, CE, 
01292                    /* returnEnd = */ false, 
01293                    /* isBounded = */ true,
01294                    /* isAppending = */ false);
01295 }
01296 
01297 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
01298   if (CE->getNumArgs() < 2)
01299     return;
01300 
01301   // char *stpcpy(char *restrict dst, const char *restrict src);
01302   evalStrcpyCommon(C, CE, 
01303                    /* returnEnd = */ true, 
01304                    /* isBounded = */ false,
01305                    /* isAppending = */ false);
01306 }
01307 
01308 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
01309   if (CE->getNumArgs() < 2)
01310     return;
01311 
01312   //char *strcat(char *restrict s1, const char *restrict s2);
01313   evalStrcpyCommon(C, CE, 
01314                    /* returnEnd = */ false, 
01315                    /* isBounded = */ false,
01316                    /* isAppending = */ true);
01317 }
01318 
01319 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
01320   if (CE->getNumArgs() < 3)
01321     return;
01322 
01323   //char *strncat(char *restrict s1, const char *restrict s2, size_t n);
01324   evalStrcpyCommon(C, CE, 
01325                    /* returnEnd = */ false, 
01326                    /* isBounded = */ true,
01327                    /* isAppending = */ true);
01328 }
01329 
01330 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
01331                                       bool returnEnd, bool isBounded,
01332                                       bool isAppending) const {
01333   CurrentFunctionDescription = "string copy function";
01334   ProgramStateRef state = C.getState();
01335   const LocationContext *LCtx = C.getLocationContext();
01336 
01337   // Check that the destination is non-null.
01338   const Expr *Dst = CE->getArg(0);
01339   SVal DstVal = state->getSVal(Dst, LCtx);
01340 
01341   state = checkNonNull(C, state, Dst, DstVal);
01342   if (!state)
01343     return;
01344 
01345   // Check that the source is non-null.
01346   const Expr *srcExpr = CE->getArg(1);
01347   SVal srcVal = state->getSVal(srcExpr, LCtx);
01348   state = checkNonNull(C, state, srcExpr, srcVal);
01349   if (!state)
01350     return;
01351 
01352   // Get the string length of the source.
01353   SVal strLength = getCStringLength(C, state, srcExpr, srcVal);
01354 
01355   // If the source isn't a valid C string, give up.
01356   if (strLength.isUndef())
01357     return;
01358 
01359   SValBuilder &svalBuilder = C.getSValBuilder();
01360   QualType cmpTy = svalBuilder.getConditionType();
01361   QualType sizeTy = svalBuilder.getContext().getSizeType();
01362 
01363   // These two values allow checking two kinds of errors:
01364   // - actual overflows caused by a source that doesn't fit in the destination
01365   // - potential overflows caused by a bound that could exceed the destination
01366   SVal amountCopied = UnknownVal();
01367   SVal maxLastElementIndex = UnknownVal();
01368   const char *boundWarning = nullptr;
01369 
01370   // If the function is strncpy, strncat, etc... it is bounded.
01371   if (isBounded) {
01372     // Get the max number of characters to copy.
01373     const Expr *lenExpr = CE->getArg(2);
01374     SVal lenVal = state->getSVal(lenExpr, LCtx);
01375 
01376     // Protect against misdeclared strncpy().
01377     lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType());
01378 
01379     Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
01380     Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
01381 
01382     // If we know both values, we might be able to figure out how much
01383     // we're copying.
01384     if (strLengthNL && lenValNL) {
01385       ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
01386 
01387       // Check if the max number to copy is less than the length of the src.
01388       // If the bound is equal to the source length, strncpy won't null-
01389       // terminate the result!
01390       std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
01391           svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
01392               .castAs<DefinedOrUnknownSVal>());
01393 
01394       if (stateSourceTooLong && !stateSourceNotTooLong) {
01395         // Max number to copy is less than the length of the src, so the actual
01396         // strLength copied is the max number arg.
01397         state = stateSourceTooLong;
01398         amountCopied = lenVal;
01399 
01400       } else if (!stateSourceTooLong && stateSourceNotTooLong) {
01401         // The source buffer entirely fits in the bound.
01402         state = stateSourceNotTooLong;
01403         amountCopied = strLength;
01404       }
01405     }
01406 
01407     // We still want to know if the bound is known to be too large.
01408     if (lenValNL) {
01409       if (isAppending) {
01410         // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
01411 
01412         // Get the string length of the destination. If the destination is
01413         // memory that can't have a string length, we shouldn't be copying
01414         // into it anyway.
01415         SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
01416         if (dstStrLength.isUndef())
01417           return;
01418 
01419         if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) {
01420           maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add,
01421                                                         *lenValNL,
01422                                                         *dstStrLengthNL,
01423                                                         sizeTy);
01424           boundWarning = "Size argument is greater than the free space in the "
01425                          "destination buffer";
01426         }
01427 
01428       } else {
01429         // For strncpy, this is just checking that lenVal <= sizeof(dst)
01430         // (Yes, strncpy and strncat differ in how they treat termination.
01431         // strncat ALWAYS terminates, but strncpy doesn't.)
01432 
01433         // We need a special case for when the copy size is zero, in which
01434         // case strncpy will do no work at all. Our bounds check uses n-1
01435         // as the last element accessed, so n == 0 is problematic.
01436         ProgramStateRef StateZeroSize, StateNonZeroSize;
01437         std::tie(StateZeroSize, StateNonZeroSize) =
01438           assumeZero(C, state, *lenValNL, sizeTy);
01439 
01440         // If the size is known to be zero, we're done.
01441         if (StateZeroSize && !StateNonZeroSize) {
01442           StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
01443           C.addTransition(StateZeroSize);
01444           return;
01445         }
01446 
01447         // Otherwise, go ahead and figure out the last element we'll touch.
01448         // We don't record the non-zero assumption here because we can't
01449         // be sure. We won't warn on a possible zero.
01450         NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
01451         maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
01452                                                       one, sizeTy);
01453         boundWarning = "Size argument is greater than the length of the "
01454                        "destination buffer";
01455       }
01456     }
01457 
01458     // If we couldn't pin down the copy length, at least bound it.
01459     // FIXME: We should actually run this code path for append as well, but
01460     // right now it creates problems with constraints (since we can end up
01461     // trying to pass constraints from symbol to symbol).
01462     if (amountCopied.isUnknown() && !isAppending) {
01463       // Try to get a "hypothetical" string length symbol, which we can later
01464       // set as a real value if that turns out to be the case.
01465       amountCopied = getCStringLength(C, state, lenExpr, srcVal, true);
01466       assert(!amountCopied.isUndef());
01467 
01468       if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) {
01469         if (lenValNL) {
01470           // amountCopied <= lenVal
01471           SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE,
01472                                                              *amountCopiedNL,
01473                                                              *lenValNL,
01474                                                              cmpTy);
01475           state = state->assume(
01476               copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true);
01477           if (!state)
01478             return;
01479         }
01480 
01481         if (strLengthNL) {
01482           // amountCopied <= strlen(source)
01483           SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE,
01484                                                            *amountCopiedNL,
01485                                                            *strLengthNL,
01486                                                            cmpTy);
01487           state = state->assume(
01488               copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true);
01489           if (!state)
01490             return;
01491         }
01492       }
01493     }
01494 
01495   } else {
01496     // The function isn't bounded. The amount copied should match the length
01497     // of the source buffer.
01498     amountCopied = strLength;
01499   }
01500 
01501   assert(state);
01502 
01503   // This represents the number of characters copied into the destination
01504   // buffer. (It may not actually be the strlen if the destination buffer
01505   // is not terminated.)
01506   SVal finalStrLength = UnknownVal();
01507 
01508   // If this is an appending function (strcat, strncat...) then set the
01509   // string length to strlen(src) + strlen(dst) since the buffer will
01510   // ultimately contain both.
01511   if (isAppending) {
01512     // Get the string length of the destination. If the destination is memory
01513     // that can't have a string length, we shouldn't be copying into it anyway.
01514     SVal dstStrLength = getCStringLength(C, state, Dst, DstVal);
01515     if (dstStrLength.isUndef())
01516       return;
01517 
01518     Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>();
01519     Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
01520     
01521     // If we know both string lengths, we might know the final string length.
01522     if (srcStrLengthNL && dstStrLengthNL) {
01523       // Make sure the two lengths together don't overflow a size_t.
01524       state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL);
01525       if (!state)
01526         return;
01527 
01528       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, 
01529                                                *dstStrLengthNL, sizeTy);
01530     }
01531 
01532     // If we couldn't get a single value for the final string length,
01533     // we can at least bound it by the individual lengths.
01534     if (finalStrLength.isUnknown()) {
01535       // Try to get a "hypothetical" string length symbol, which we can later
01536       // set as a real value if that turns out to be the case.
01537       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
01538       assert(!finalStrLength.isUndef());
01539 
01540       if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) {
01541         if (srcStrLengthNL) {
01542           // finalStrLength >= srcStrLength
01543           SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE,
01544                                                         *finalStrLengthNL,
01545                                                         *srcStrLengthNL,
01546                                                         cmpTy);
01547           state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
01548                                 true);
01549           if (!state)
01550             return;
01551         }
01552 
01553         if (dstStrLengthNL) {
01554           // finalStrLength >= dstStrLength
01555           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
01556                                                       *finalStrLengthNL,
01557                                                       *dstStrLengthNL,
01558                                                       cmpTy);
01559           state =
01560               state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
01561           if (!state)
01562             return;
01563         }
01564       }
01565     }
01566 
01567   } else {
01568     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
01569     // the final string length will match the input string length.
01570     finalStrLength = amountCopied;
01571   }
01572 
01573   // The final result of the function will either be a pointer past the last
01574   // copied element, or a pointer to the start of the destination buffer.
01575   SVal Result = (returnEnd ? UnknownVal() : DstVal);
01576 
01577   assert(state);
01578 
01579   // If the destination is a MemRegion, try to check for a buffer overflow and
01580   // record the new string length.
01581   if (Optional<loc::MemRegionVal> dstRegVal =
01582           DstVal.getAs<loc::MemRegionVal>()) {
01583     QualType ptrTy = Dst->getType();
01584 
01585     // If we have an exact value on a bounded copy, use that to check for
01586     // overflows, rather than our estimate about how much is actually copied.
01587     if (boundWarning) {
01588       if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
01589         SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
01590                                                       *maxLastNL, ptrTy);
01591         state = CheckLocation(C, state, CE->getArg(2), maxLastElement, 
01592                               boundWarning);
01593         if (!state)
01594           return;
01595       }
01596     }
01597 
01598     // Then, if the final length is known...
01599     if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
01600       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
01601                                                  *knownStrLength, ptrTy);
01602 
01603       // ...and we haven't checked the bound, we'll check the actual copy.
01604       if (!boundWarning) {
01605         const char * const warningMsg =
01606           "String copy function overflows destination buffer";
01607         state = CheckLocation(C, state, Dst, lastElement, warningMsg);
01608         if (!state)
01609           return;
01610       }
01611 
01612       // If this is a stpcpy-style copy, the last element is the return value.
01613       if (returnEnd)
01614         Result = lastElement;
01615     }
01616 
01617     // Invalidate the destination (regular invalidation without pointer-escaping
01618     // the address of the top-level region). This must happen before we set the
01619     // C string length because invalidation will clear the length.
01620     // FIXME: Even if we can't perfectly model the copy, we should see if we
01621     // can use LazyCompoundVals to copy the source values into the destination.
01622     // This would probably remove any existing bindings past the end of the
01623     // string, but that's still an improvement over blank invalidation.
01624     state = InvalidateBuffer(C, state, Dst, *dstRegVal,
01625                              /*IsSourceBuffer*/false);
01626 
01627     // Invalidate the source (const-invalidation without const-pointer-escaping
01628     // the address of the top-level region).
01629     state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true);
01630 
01631     // Set the C string length of the destination, if we know it.
01632     if (isBounded && !isAppending) {
01633       // strncpy is annoying in that it doesn't guarantee to null-terminate
01634       // the result string. If the original string didn't fit entirely inside
01635       // the bound (including the null-terminator), we don't know how long the
01636       // result is.
01637       if (amountCopied != strLength)
01638         finalStrLength = UnknownVal();
01639     }
01640     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
01641   }
01642 
01643   assert(state);
01644 
01645   // If this is a stpcpy-style copy, but we were unable to check for a buffer
01646   // overflow, we still need a result. Conjure a return value.
01647   if (returnEnd && Result.isUnknown()) {
01648     Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
01649   }
01650 
01651   // Set the return value.
01652   state = state->BindExpr(CE, LCtx, Result);
01653   C.addTransition(state);
01654 }
01655 
01656 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
01657   if (CE->getNumArgs() < 2)
01658     return;
01659 
01660   //int strcmp(const char *s1, const char *s2);
01661   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false);
01662 }
01663 
01664 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
01665   if (CE->getNumArgs() < 3)
01666     return;
01667 
01668   //int strncmp(const char *s1, const char *s2, size_t n);
01669   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false);
01670 }
01671 
01672 void CStringChecker::evalStrcasecmp(CheckerContext &C, 
01673                                     const CallExpr *CE) const {
01674   if (CE->getNumArgs() < 2)
01675     return;
01676 
01677   //int strcasecmp(const char *s1, const char *s2);
01678   evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true);
01679 }
01680 
01681 void CStringChecker::evalStrncasecmp(CheckerContext &C, 
01682                                      const CallExpr *CE) const {
01683   if (CE->getNumArgs() < 3)
01684     return;
01685 
01686   //int strncasecmp(const char *s1, const char *s2, size_t n);
01687   evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true);
01688 }
01689 
01690 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
01691                                       bool isBounded, bool ignoreCase) const {
01692   CurrentFunctionDescription = "string comparison function";
01693   ProgramStateRef state = C.getState();
01694   const LocationContext *LCtx = C.getLocationContext();
01695 
01696   // Check that the first string is non-null
01697   const Expr *s1 = CE->getArg(0);
01698   SVal s1Val = state->getSVal(s1, LCtx);
01699   state = checkNonNull(C, state, s1, s1Val);
01700   if (!state)
01701     return;
01702 
01703   // Check that the second string is non-null.
01704   const Expr *s2 = CE->getArg(1);
01705   SVal s2Val = state->getSVal(s2, LCtx);
01706   state = checkNonNull(C, state, s2, s2Val);
01707   if (!state)
01708     return;
01709 
01710   // Get the string length of the first string or give up.
01711   SVal s1Length = getCStringLength(C, state, s1, s1Val);
01712   if (s1Length.isUndef())
01713     return;
01714 
01715   // Get the string length of the second string or give up.
01716   SVal s2Length = getCStringLength(C, state, s2, s2Val);
01717   if (s2Length.isUndef())
01718     return;
01719 
01720   // If we know the two buffers are the same, we know the result is 0.
01721   // First, get the two buffers' addresses. Another checker will have already
01722   // made sure they're not undefined.
01723   DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>();
01724   DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>();
01725 
01726   // See if they are the same.
01727   SValBuilder &svalBuilder = C.getSValBuilder();
01728   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
01729   ProgramStateRef StSameBuf, StNotSameBuf;
01730   std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
01731 
01732   // If the two arguments might be the same buffer, we know the result is 0,
01733   // and we only need to check one size.
01734   if (StSameBuf) {
01735     StSameBuf = StSameBuf->BindExpr(CE, LCtx,
01736                                     svalBuilder.makeZeroVal(CE->getType()));
01737     C.addTransition(StSameBuf);
01738 
01739     // If the two arguments are GUARANTEED to be the same, we're done!
01740     if (!StNotSameBuf)
01741       return;
01742   }
01743 
01744   assert(StNotSameBuf);
01745   state = StNotSameBuf;
01746 
01747   // At this point we can go about comparing the two buffers.
01748   // For now, we only do this if they're both known string literals.
01749 
01750   // Attempt to extract string literals from both expressions.
01751   const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val);
01752   const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val);
01753   bool canComputeResult = false;
01754 
01755   if (s1StrLiteral && s2StrLiteral) {
01756     StringRef s1StrRef = s1StrLiteral->getString();
01757     StringRef s2StrRef = s2StrLiteral->getString();
01758 
01759     if (isBounded) {
01760       // Get the max number of characters to compare.
01761       const Expr *lenExpr = CE->getArg(2);
01762       SVal lenVal = state->getSVal(lenExpr, LCtx);
01763 
01764       // If the length is known, we can get the right substrings.
01765       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
01766         // Create substrings of each to compare the prefix.
01767         s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue());
01768         s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue());
01769         canComputeResult = true;
01770       }
01771     } else {
01772       // This is a normal, unbounded strcmp.
01773       canComputeResult = true;
01774     }
01775 
01776     if (canComputeResult) {
01777       // Real strcmp stops at null characters.
01778       size_t s1Term = s1StrRef.find('\0');
01779       if (s1Term != StringRef::npos)
01780         s1StrRef = s1StrRef.substr(0, s1Term);
01781 
01782       size_t s2Term = s2StrRef.find('\0');
01783       if (s2Term != StringRef::npos)
01784         s2StrRef = s2StrRef.substr(0, s2Term);
01785 
01786       // Use StringRef's comparison methods to compute the actual result.
01787       int result;
01788 
01789       if (ignoreCase) {
01790         // Compare string 1 to string 2 the same way strcasecmp() does.
01791         result = s1StrRef.compare_lower(s2StrRef);
01792       } else {
01793         // Compare string 1 to string 2 the same way strcmp() does.
01794         result = s1StrRef.compare(s2StrRef);
01795       }
01796 
01797       // Build the SVal of the comparison and bind the return value.
01798       SVal resultVal = svalBuilder.makeIntVal(result, CE->getType());
01799       state = state->BindExpr(CE, LCtx, resultVal);
01800     }
01801   }
01802 
01803   if (!canComputeResult) {
01804     // Conjure a symbolic value. It's the best we can do.
01805     SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
01806                                                   C.blockCount());
01807     state = state->BindExpr(CE, LCtx, resultVal);
01808   }
01809 
01810   // Record this as a possible path.
01811   C.addTransition(state);
01812 }
01813 
01814 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
01815   //char *strsep(char **stringp, const char *delim);
01816   if (CE->getNumArgs() < 2)
01817     return;
01818 
01819   // Sanity: does the search string parameter match the return type?
01820   const Expr *SearchStrPtr = CE->getArg(0);
01821   QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType();
01822   if (CharPtrTy.isNull() ||
01823       CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
01824     return;
01825 
01826   CurrentFunctionDescription = "strsep()";
01827   ProgramStateRef State = C.getState();
01828   const LocationContext *LCtx = C.getLocationContext();
01829 
01830   // Check that the search string pointer is non-null (though it may point to
01831   // a null string).
01832   SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx);
01833   State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
01834   if (!State)
01835     return;
01836 
01837   // Check that the delimiter string is non-null.
01838   const Expr *DelimStr = CE->getArg(1);
01839   SVal DelimStrVal = State->getSVal(DelimStr, LCtx);
01840   State = checkNonNull(C, State, DelimStr, DelimStrVal);
01841   if (!State)
01842     return;
01843 
01844   SValBuilder &SVB = C.getSValBuilder();
01845   SVal Result;
01846   if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
01847     // Get the current value of the search string pointer, as a char*.
01848     Result = State->getSVal(*SearchStrLoc, CharPtrTy);
01849 
01850     // Invalidate the search string, representing the change of one delimiter
01851     // character to NUL.
01852     State = InvalidateBuffer(C, State, SearchStrPtr, Result,
01853                              /*IsSourceBuffer*/false);
01854 
01855     // Overwrite the search string pointer. The new value is either an address
01856     // further along in the same string, or NULL if there are no more tokens.
01857     State = State->bindLoc(*SearchStrLoc,
01858                            SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy,
01859                                                 C.blockCount()));
01860   } else {
01861     assert(SearchStrVal.isUnknown());
01862     // Conjure a symbolic value. It's the best we can do.
01863     Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
01864   }
01865 
01866   // Set the return value, and finish.
01867   State = State->BindExpr(CE, LCtx, Result);
01868   C.addTransition(State);
01869 }
01870 
01871 
01872 //===----------------------------------------------------------------------===//
01873 // The driver method, and other Checker callbacks.
01874 //===----------------------------------------------------------------------===//
01875 
01876 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const {
01877   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
01878 
01879   if (!FDecl)
01880     return false;
01881 
01882   // FIXME: Poorly-factored string switches are slow.
01883   FnCheck evalFunction = nullptr;
01884   if (C.isCLibraryFunction(FDecl, "memcpy"))
01885     evalFunction =  &CStringChecker::evalMemcpy;
01886   else if (C.isCLibraryFunction(FDecl, "mempcpy"))
01887     evalFunction =  &CStringChecker::evalMempcpy;
01888   else if (C.isCLibraryFunction(FDecl, "memcmp"))
01889     evalFunction =  &CStringChecker::evalMemcmp;
01890   else if (C.isCLibraryFunction(FDecl, "memmove"))
01891     evalFunction =  &CStringChecker::evalMemmove;
01892   else if (C.isCLibraryFunction(FDecl, "strcpy"))
01893     evalFunction =  &CStringChecker::evalStrcpy;
01894   else if (C.isCLibraryFunction(FDecl, "strncpy"))
01895     evalFunction =  &CStringChecker::evalStrncpy;
01896   else if (C.isCLibraryFunction(FDecl, "stpcpy"))
01897     evalFunction =  &CStringChecker::evalStpcpy;
01898   else if (C.isCLibraryFunction(FDecl, "strcat"))
01899     evalFunction =  &CStringChecker::evalStrcat;
01900   else if (C.isCLibraryFunction(FDecl, "strncat"))
01901     evalFunction =  &CStringChecker::evalStrncat;
01902   else if (C.isCLibraryFunction(FDecl, "strlen"))
01903     evalFunction =  &CStringChecker::evalstrLength;
01904   else if (C.isCLibraryFunction(FDecl, "strnlen"))
01905     evalFunction =  &CStringChecker::evalstrnLength;
01906   else if (C.isCLibraryFunction(FDecl, "strcmp"))
01907     evalFunction =  &CStringChecker::evalStrcmp;
01908   else if (C.isCLibraryFunction(FDecl, "strncmp"))
01909     evalFunction =  &CStringChecker::evalStrncmp;
01910   else if (C.isCLibraryFunction(FDecl, "strcasecmp"))
01911     evalFunction =  &CStringChecker::evalStrcasecmp;
01912   else if (C.isCLibraryFunction(FDecl, "strncasecmp"))
01913     evalFunction =  &CStringChecker::evalStrncasecmp;
01914   else if (C.isCLibraryFunction(FDecl, "strsep"))
01915     evalFunction =  &CStringChecker::evalStrsep;
01916   else if (C.isCLibraryFunction(FDecl, "bcopy"))
01917     evalFunction =  &CStringChecker::evalBcopy;
01918   else if (C.isCLibraryFunction(FDecl, "bcmp"))
01919     evalFunction =  &CStringChecker::evalMemcmp;
01920   
01921   // If the callee isn't a string function, let another checker handle it.
01922   if (!evalFunction)
01923     return false;
01924 
01925   // Make sure each function sets its own description.
01926   // (But don't bother in a release build.)
01927   assert(!(CurrentFunctionDescription = nullptr));
01928 
01929   // Check and evaluate the call.
01930   (this->*evalFunction)(C, CE);
01931 
01932   // If the evaluate call resulted in no change, chain to the next eval call
01933   // handler.
01934   // Note, the custom CString evaluation calls assume that basic safety
01935   // properties are held. However, if the user chooses to turn off some of these
01936   // checks, we ignore the issues and leave the call evaluation to a generic
01937   // handler.
01938   if (!C.isDifferent())
01939     return false;
01940 
01941   return true;
01942 }
01943 
01944 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
01945   // Record string length for char a[] = "abc";
01946   ProgramStateRef state = C.getState();
01947 
01948   for (const auto *I : DS->decls()) {
01949     const VarDecl *D = dyn_cast<VarDecl>(I);
01950     if (!D)
01951       continue;
01952 
01953     // FIXME: Handle array fields of structs.
01954     if (!D->getType()->isArrayType())
01955       continue;
01956 
01957     const Expr *Init = D->getInit();
01958     if (!Init)
01959       continue;
01960     if (!isa<StringLiteral>(Init))
01961       continue;
01962 
01963     Loc VarLoc = state->getLValue(D, C.getLocationContext());
01964     const MemRegion *MR = VarLoc.getAsRegion();
01965     if (!MR)
01966       continue;
01967 
01968     SVal StrVal = state->getSVal(Init, C.getLocationContext());
01969     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
01970     DefinedOrUnknownSVal strLength =
01971         getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
01972 
01973     state = state->set<CStringLength>(MR, strLength);
01974   }
01975 
01976   C.addTransition(state);
01977 }
01978 
01979 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const {
01980   CStringLengthTy Entries = state->get<CStringLength>();
01981   return !Entries.isEmpty();
01982 }
01983 
01984 ProgramStateRef 
01985 CStringChecker::checkRegionChanges(ProgramStateRef state,
01986                                    const InvalidatedSymbols *,
01987                                    ArrayRef<const MemRegion *> ExplicitRegions,
01988                                    ArrayRef<const MemRegion *> Regions,
01989                                    const CallEvent *Call) const {
01990   CStringLengthTy Entries = state->get<CStringLength>();
01991   if (Entries.isEmpty())
01992     return state;
01993 
01994   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
01995   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
01996 
01997   // First build sets for the changed regions and their super-regions.
01998   for (ArrayRef<const MemRegion *>::iterator
01999        I = Regions.begin(), E = Regions.end(); I != E; ++I) {
02000     const MemRegion *MR = *I;
02001     Invalidated.insert(MR);
02002 
02003     SuperRegions.insert(MR);
02004     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
02005       MR = SR->getSuperRegion();
02006       SuperRegions.insert(MR);
02007     }
02008   }
02009 
02010   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
02011 
02012   // Then loop over the entries in the current state.
02013   for (CStringLengthTy::iterator I = Entries.begin(),
02014        E = Entries.end(); I != E; ++I) {
02015     const MemRegion *MR = I.getKey();
02016 
02017     // Is this entry for a super-region of a changed region?
02018     if (SuperRegions.count(MR)) {
02019       Entries = F.remove(Entries, MR);
02020       continue;
02021     }
02022 
02023     // Is this entry for a sub-region of a changed region?
02024     const MemRegion *Super = MR;
02025     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
02026       Super = SR->getSuperRegion();
02027       if (Invalidated.count(Super)) {
02028         Entries = F.remove(Entries, MR);
02029         break;
02030       }
02031     }
02032   }
02033 
02034   return state->set<CStringLength>(Entries);
02035 }
02036 
02037 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
02038                                       SymbolReaper &SR) const {
02039   // Mark all symbols in our string length map as valid.
02040   CStringLengthTy Entries = state->get<CStringLength>();
02041 
02042   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
02043        I != E; ++I) {
02044     SVal Len = I.getData();
02045 
02046     for (SymExpr::symbol_iterator si = Len.symbol_begin(),
02047                                   se = Len.symbol_end(); si != se; ++si)
02048       SR.markInUse(*si);
02049   }
02050 }
02051 
02052 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
02053                                       CheckerContext &C) const {
02054   if (!SR.hasDeadSymbols())
02055     return;
02056 
02057   ProgramStateRef state = C.getState();
02058   CStringLengthTy Entries = state->get<CStringLength>();
02059   if (Entries.isEmpty())
02060     return;
02061 
02062   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
02063   for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end();
02064        I != E; ++I) {
02065     SVal Len = I.getData();
02066     if (SymbolRef Sym = Len.getAsSymbol()) {
02067       if (SR.isDead(Sym))
02068         Entries = F.remove(Entries, I.getKey());
02069     }
02070   }
02071 
02072   state = state->set<CStringLength>(Entries);
02073   C.addTransition(state);
02074 }
02075 
02076 #define REGISTER_CHECKER(name)                                                 \
02077   void ento::register##name(CheckerManager &mgr) {                             \
02078     CStringChecker *checker = mgr.registerChecker<CStringChecker>();           \
02079     checker->Filter.Check##name = true;                                        \
02080     checker->Filter.CheckName##name = mgr.getCurrentCheckName();               \
02081   }
02082 
02083 REGISTER_CHECKER(CStringNullArg)
02084 REGISTER_CHECKER(CStringOutOfBounds)
02085 REGISTER_CHECKER(CStringBufferOverlap)
02086 REGISTER_CHECKER(CStringNotNullTerm)
02087 
02088 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) {
02089   registerCStringNullArg(Mgr);
02090 }