clang API Documentation
00001 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-// 00002 // 00003 // The LLVM Compiler Infrastructure 00004 // 00005 // This file is distributed under the University of Illinois Open Source 00006 // License. See LICENSE.TXT for details. 00007 // 00008 //===----------------------------------------------------------------------===// 00009 // 00010 // This defines CStringChecker, which is an assortment of checks on calls 00011 // to functions in <string.h>. 00012 // 00013 //===----------------------------------------------------------------------===// 00014 00015 #include "ClangSACheckers.h" 00016 #include "InterCheckerAPI.h" 00017 #include "clang/Basic/CharInfo.h" 00018 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 00019 #include "clang/StaticAnalyzer/Core/Checker.h" 00020 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 00021 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 00022 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 00023 #include "llvm/ADT/STLExtras.h" 00024 #include "llvm/ADT/SmallString.h" 00025 #include "llvm/ADT/StringSwitch.h" 00026 #include "llvm/Support/raw_ostream.h" 00027 00028 using namespace clang; 00029 using namespace ento; 00030 00031 namespace { 00032 class CStringChecker : public Checker< eval::Call, 00033 check::PreStmt<DeclStmt>, 00034 check::LiveSymbols, 00035 check::DeadSymbols, 00036 check::RegionChanges 00037 > { 00038 mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap, 00039 BT_NotCString, BT_AdditionOverflow; 00040 00041 mutable const char *CurrentFunctionDescription; 00042 00043 public: 00044 /// The filter is used to filter out the diagnostics which are not enabled by 00045 /// the user. 00046 struct CStringChecksFilter { 00047 DefaultBool CheckCStringNullArg; 00048 DefaultBool CheckCStringOutOfBounds; 00049 DefaultBool CheckCStringBufferOverlap; 00050 DefaultBool CheckCStringNotNullTerm; 00051 00052 CheckName CheckNameCStringNullArg; 00053 CheckName CheckNameCStringOutOfBounds; 00054 CheckName CheckNameCStringBufferOverlap; 00055 CheckName CheckNameCStringNotNullTerm; 00056 }; 00057 00058 CStringChecksFilter Filter; 00059 00060 static void *getTag() { static int tag; return &tag; } 00061 00062 bool evalCall(const CallExpr *CE, CheckerContext &C) const; 00063 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 00064 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; 00065 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 00066 bool wantsRegionChangeUpdate(ProgramStateRef state) const; 00067 00068 ProgramStateRef 00069 checkRegionChanges(ProgramStateRef state, 00070 const InvalidatedSymbols *, 00071 ArrayRef<const MemRegion *> ExplicitRegions, 00072 ArrayRef<const MemRegion *> Regions, 00073 const CallEvent *Call) const; 00074 00075 typedef void (CStringChecker::*FnCheck)(CheckerContext &, 00076 const CallExpr *) const; 00077 00078 void evalMemcpy(CheckerContext &C, const CallExpr *CE) const; 00079 void evalMempcpy(CheckerContext &C, const CallExpr *CE) const; 00080 void evalMemmove(CheckerContext &C, const CallExpr *CE) const; 00081 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 00082 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 00083 ProgramStateRef state, 00084 const Expr *Size, 00085 const Expr *Source, 00086 const Expr *Dest, 00087 bool Restricted = false, 00088 bool IsMempcpy = false) const; 00089 00090 void evalMemcmp(CheckerContext &C, const CallExpr *CE) const; 00091 00092 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 00093 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 00094 void evalstrLengthCommon(CheckerContext &C, 00095 const CallExpr *CE, 00096 bool IsStrnlen = false) const; 00097 00098 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 00099 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 00100 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 00101 void evalStrcpyCommon(CheckerContext &C, 00102 const CallExpr *CE, 00103 bool returnEnd, 00104 bool isBounded, 00105 bool isAppending) const; 00106 00107 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 00108 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 00109 00110 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 00111 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 00112 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 00113 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 00114 void evalStrcmpCommon(CheckerContext &C, 00115 const CallExpr *CE, 00116 bool isBounded = false, 00117 bool ignoreCase = false) const; 00118 00119 void evalStrsep(CheckerContext &C, const CallExpr *CE) const; 00120 00121 // Utility methods 00122 std::pair<ProgramStateRef , ProgramStateRef > 00123 static assumeZero(CheckerContext &C, 00124 ProgramStateRef state, SVal V, QualType Ty); 00125 00126 static ProgramStateRef setCStringLength(ProgramStateRef state, 00127 const MemRegion *MR, 00128 SVal strLength); 00129 static SVal getCStringLengthForRegion(CheckerContext &C, 00130 ProgramStateRef &state, 00131 const Expr *Ex, 00132 const MemRegion *MR, 00133 bool hypothetical); 00134 SVal getCStringLength(CheckerContext &C, 00135 ProgramStateRef &state, 00136 const Expr *Ex, 00137 SVal Buf, 00138 bool hypothetical = false) const; 00139 00140 const StringLiteral *getCStringLiteral(CheckerContext &C, 00141 ProgramStateRef &state, 00142 const Expr *expr, 00143 SVal val) const; 00144 00145 static ProgramStateRef InvalidateBuffer(CheckerContext &C, 00146 ProgramStateRef state, 00147 const Expr *Ex, SVal V, 00148 bool IsSourceBuffer); 00149 00150 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 00151 const MemRegion *MR); 00152 00153 // Re-usable checks 00154 ProgramStateRef checkNonNull(CheckerContext &C, 00155 ProgramStateRef state, 00156 const Expr *S, 00157 SVal l) const; 00158 ProgramStateRef CheckLocation(CheckerContext &C, 00159 ProgramStateRef state, 00160 const Expr *S, 00161 SVal l, 00162 const char *message = nullptr) const; 00163 ProgramStateRef CheckBufferAccess(CheckerContext &C, 00164 ProgramStateRef state, 00165 const Expr *Size, 00166 const Expr *FirstBuf, 00167 const Expr *SecondBuf, 00168 const char *firstMessage = nullptr, 00169 const char *secondMessage = nullptr, 00170 bool WarnAboutSize = false) const; 00171 00172 ProgramStateRef CheckBufferAccess(CheckerContext &C, 00173 ProgramStateRef state, 00174 const Expr *Size, 00175 const Expr *Buf, 00176 const char *message = nullptr, 00177 bool WarnAboutSize = false) const { 00178 // This is a convenience override. 00179 return CheckBufferAccess(C, state, Size, Buf, nullptr, message, nullptr, 00180 WarnAboutSize); 00181 } 00182 ProgramStateRef CheckOverlap(CheckerContext &C, 00183 ProgramStateRef state, 00184 const Expr *Size, 00185 const Expr *First, 00186 const Expr *Second) const; 00187 void emitOverlapBug(CheckerContext &C, 00188 ProgramStateRef state, 00189 const Stmt *First, 00190 const Stmt *Second) const; 00191 00192 ProgramStateRef checkAdditionOverflow(CheckerContext &C, 00193 ProgramStateRef state, 00194 NonLoc left, 00195 NonLoc right) const; 00196 }; 00197 00198 } //end anonymous namespace 00199 00200 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) 00201 00202 //===----------------------------------------------------------------------===// 00203 // Individual checks and utility methods. 00204 //===----------------------------------------------------------------------===// 00205 00206 std::pair<ProgramStateRef , ProgramStateRef > 00207 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, 00208 QualType Ty) { 00209 Optional<DefinedSVal> val = V.getAs<DefinedSVal>(); 00210 if (!val) 00211 return std::pair<ProgramStateRef , ProgramStateRef >(state, state); 00212 00213 SValBuilder &svalBuilder = C.getSValBuilder(); 00214 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 00215 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 00216 } 00217 00218 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, 00219 ProgramStateRef state, 00220 const Expr *S, SVal l) const { 00221 // If a previous check has failed, propagate the failure. 00222 if (!state) 00223 return nullptr; 00224 00225 ProgramStateRef stateNull, stateNonNull; 00226 std::tie(stateNull, stateNonNull) = assumeZero(C, state, l, S->getType()); 00227 00228 if (stateNull && !stateNonNull) { 00229 if (!Filter.CheckCStringNullArg) 00230 return nullptr; 00231 00232 ExplodedNode *N = C.generateSink(stateNull); 00233 if (!N) 00234 return nullptr; 00235 00236 if (!BT_Null) 00237 BT_Null.reset(new BuiltinBug( 00238 Filter.CheckNameCStringNullArg, categories::UnixAPI, 00239 "Null pointer argument in call to byte string function")); 00240 00241 SmallString<80> buf; 00242 llvm::raw_svector_ostream os(buf); 00243 assert(CurrentFunctionDescription); 00244 os << "Null pointer argument in call to " << CurrentFunctionDescription; 00245 00246 // Generate a report for this bug. 00247 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Null.get()); 00248 BugReport *report = new BugReport(*BT, os.str(), N); 00249 00250 report->addRange(S->getSourceRange()); 00251 bugreporter::trackNullOrUndefValue(N, S, *report); 00252 C.emitReport(report); 00253 return nullptr; 00254 } 00255 00256 // From here on, assume that the value is non-null. 00257 assert(stateNonNull); 00258 return stateNonNull; 00259 } 00260 00261 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 00262 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, 00263 ProgramStateRef state, 00264 const Expr *S, SVal l, 00265 const char *warningMsg) const { 00266 // If a previous check has failed, propagate the failure. 00267 if (!state) 00268 return nullptr; 00269 00270 // Check for out of bound array element access. 00271 const MemRegion *R = l.getAsRegion(); 00272 if (!R) 00273 return state; 00274 00275 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 00276 if (!ER) 00277 return state; 00278 00279 assert(ER->getValueType() == C.getASTContext().CharTy && 00280 "CheckLocation should only be called with char* ElementRegions"); 00281 00282 // Get the size of the array. 00283 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 00284 SValBuilder &svalBuilder = C.getSValBuilder(); 00285 SVal Extent = 00286 svalBuilder.convertToArrayIndex(superReg->getExtent(svalBuilder)); 00287 DefinedOrUnknownSVal Size = Extent.castAs<DefinedOrUnknownSVal>(); 00288 00289 // Get the index of the accessed element. 00290 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); 00291 00292 ProgramStateRef StInBound = state->assumeInBound(Idx, Size, true); 00293 ProgramStateRef StOutBound = state->assumeInBound(Idx, Size, false); 00294 if (StOutBound && !StInBound) { 00295 ExplodedNode *N = C.generateSink(StOutBound); 00296 if (!N) 00297 return nullptr; 00298 00299 if (!BT_Bounds) { 00300 BT_Bounds.reset(new BuiltinBug( 00301 Filter.CheckNameCStringOutOfBounds, "Out-of-bound array access", 00302 "Byte string function accesses out-of-bound array element")); 00303 } 00304 BuiltinBug *BT = static_cast<BuiltinBug*>(BT_Bounds.get()); 00305 00306 // Generate a report for this bug. 00307 BugReport *report; 00308 if (warningMsg) { 00309 report = new BugReport(*BT, warningMsg, N); 00310 } else { 00311 assert(CurrentFunctionDescription); 00312 assert(CurrentFunctionDescription[0] != '\0'); 00313 00314 SmallString<80> buf; 00315 llvm::raw_svector_ostream os(buf); 00316 os << toUppercase(CurrentFunctionDescription[0]) 00317 << &CurrentFunctionDescription[1] 00318 << " accesses out-of-bound array element"; 00319 report = new BugReport(*BT, os.str(), N); 00320 } 00321 00322 // FIXME: It would be nice to eventually make this diagnostic more clear, 00323 // e.g., by referencing the original declaration or by saying *why* this 00324 // reference is outside the range. 00325 00326 report->addRange(S->getSourceRange()); 00327 C.emitReport(report); 00328 return nullptr; 00329 } 00330 00331 // Array bound check succeeded. From this point forward the array bound 00332 // should always succeed. 00333 return StInBound; 00334 } 00335 00336 ProgramStateRef CStringChecker::CheckBufferAccess(CheckerContext &C, 00337 ProgramStateRef state, 00338 const Expr *Size, 00339 const Expr *FirstBuf, 00340 const Expr *SecondBuf, 00341 const char *firstMessage, 00342 const char *secondMessage, 00343 bool WarnAboutSize) const { 00344 // If a previous check has failed, propagate the failure. 00345 if (!state) 00346 return nullptr; 00347 00348 SValBuilder &svalBuilder = C.getSValBuilder(); 00349 ASTContext &Ctx = svalBuilder.getContext(); 00350 const LocationContext *LCtx = C.getLocationContext(); 00351 00352 QualType sizeTy = Size->getType(); 00353 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 00354 00355 // Check that the first buffer is non-null. 00356 SVal BufVal = state->getSVal(FirstBuf, LCtx); 00357 state = checkNonNull(C, state, FirstBuf, BufVal); 00358 if (!state) 00359 return nullptr; 00360 00361 // If out-of-bounds checking is turned off, skip the rest. 00362 if (!Filter.CheckCStringOutOfBounds) 00363 return state; 00364 00365 // Get the access length and make sure it is known. 00366 // FIXME: This assumes the caller has already checked that the access length 00367 // is positive. And that it's unsigned. 00368 SVal LengthVal = state->getSVal(Size, LCtx); 00369 Optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 00370 if (!Length) 00371 return state; 00372 00373 // Compute the offset of the last element to be accessed: size-1. 00374 NonLoc One = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 00375 NonLoc LastOffset = svalBuilder 00376 .evalBinOpNN(state, BO_Sub, *Length, One, sizeTy).castAs<NonLoc>(); 00377 00378 // Check that the first buffer is sufficiently long. 00379 SVal BufStart = svalBuilder.evalCast(BufVal, PtrTy, FirstBuf->getType()); 00380 if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) { 00381 const Expr *warningExpr = (WarnAboutSize ? Size : FirstBuf); 00382 00383 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 00384 LastOffset, PtrTy); 00385 state = CheckLocation(C, state, warningExpr, BufEnd, firstMessage); 00386 00387 // If the buffer isn't large enough, abort. 00388 if (!state) 00389 return nullptr; 00390 } 00391 00392 // If there's a second buffer, check it as well. 00393 if (SecondBuf) { 00394 BufVal = state->getSVal(SecondBuf, LCtx); 00395 state = checkNonNull(C, state, SecondBuf, BufVal); 00396 if (!state) 00397 return nullptr; 00398 00399 BufStart = svalBuilder.evalCast(BufVal, PtrTy, SecondBuf->getType()); 00400 if (Optional<Loc> BufLoc = BufStart.getAs<Loc>()) { 00401 const Expr *warningExpr = (WarnAboutSize ? Size : SecondBuf); 00402 00403 SVal BufEnd = svalBuilder.evalBinOpLN(state, BO_Add, *BufLoc, 00404 LastOffset, PtrTy); 00405 state = CheckLocation(C, state, warningExpr, BufEnd, secondMessage); 00406 } 00407 } 00408 00409 // Large enough or not, return this state! 00410 return state; 00411 } 00412 00413 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, 00414 ProgramStateRef state, 00415 const Expr *Size, 00416 const Expr *First, 00417 const Expr *Second) const { 00418 if (!Filter.CheckCStringBufferOverlap) 00419 return state; 00420 00421 // Do a simple check for overlap: if the two arguments are from the same 00422 // buffer, see if the end of the first is greater than the start of the second 00423 // or vice versa. 00424 00425 // If a previous check has failed, propagate the failure. 00426 if (!state) 00427 return nullptr; 00428 00429 ProgramStateRef stateTrue, stateFalse; 00430 00431 // Get the buffer values and make sure they're known locations. 00432 const LocationContext *LCtx = C.getLocationContext(); 00433 SVal firstVal = state->getSVal(First, LCtx); 00434 SVal secondVal = state->getSVal(Second, LCtx); 00435 00436 Optional<Loc> firstLoc = firstVal.getAs<Loc>(); 00437 if (!firstLoc) 00438 return state; 00439 00440 Optional<Loc> secondLoc = secondVal.getAs<Loc>(); 00441 if (!secondLoc) 00442 return state; 00443 00444 // Are the two values the same? 00445 SValBuilder &svalBuilder = C.getSValBuilder(); 00446 std::tie(stateTrue, stateFalse) = 00447 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 00448 00449 if (stateTrue && !stateFalse) { 00450 // If the values are known to be equal, that's automatically an overlap. 00451 emitOverlapBug(C, stateTrue, First, Second); 00452 return nullptr; 00453 } 00454 00455 // assume the two expressions are not equal. 00456 assert(stateFalse); 00457 state = stateFalse; 00458 00459 // Which value comes first? 00460 QualType cmpTy = svalBuilder.getConditionType(); 00461 SVal reverse = svalBuilder.evalBinOpLL(state, BO_GT, 00462 *firstLoc, *secondLoc, cmpTy); 00463 Optional<DefinedOrUnknownSVal> reverseTest = 00464 reverse.getAs<DefinedOrUnknownSVal>(); 00465 if (!reverseTest) 00466 return state; 00467 00468 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 00469 if (stateTrue) { 00470 if (stateFalse) { 00471 // If we don't know which one comes first, we can't perform this test. 00472 return state; 00473 } else { 00474 // Switch the values so that firstVal is before secondVal. 00475 std::swap(firstLoc, secondLoc); 00476 00477 // Switch the Exprs as well, so that they still correspond. 00478 std::swap(First, Second); 00479 } 00480 } 00481 00482 // Get the length, and make sure it too is known. 00483 SVal LengthVal = state->getSVal(Size, LCtx); 00484 Optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 00485 if (!Length) 00486 return state; 00487 00488 // Convert the first buffer's start address to char*. 00489 // Bail out if the cast fails. 00490 ASTContext &Ctx = svalBuilder.getContext(); 00491 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 00492 SVal FirstStart = svalBuilder.evalCast(*firstLoc, CharPtrTy, 00493 First->getType()); 00494 Optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>(); 00495 if (!FirstStartLoc) 00496 return state; 00497 00498 // Compute the end of the first buffer. Bail out if THAT fails. 00499 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, 00500 *FirstStartLoc, *Length, CharPtrTy); 00501 Optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>(); 00502 if (!FirstEndLoc) 00503 return state; 00504 00505 // Is the end of the first buffer past the start of the second buffer? 00506 SVal Overlap = svalBuilder.evalBinOpLL(state, BO_GT, 00507 *FirstEndLoc, *secondLoc, cmpTy); 00508 Optional<DefinedOrUnknownSVal> OverlapTest = 00509 Overlap.getAs<DefinedOrUnknownSVal>(); 00510 if (!OverlapTest) 00511 return state; 00512 00513 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 00514 00515 if (stateTrue && !stateFalse) { 00516 // Overlap! 00517 emitOverlapBug(C, stateTrue, First, Second); 00518 return nullptr; 00519 } 00520 00521 // assume the two expressions don't overlap. 00522 assert(stateFalse); 00523 return stateFalse; 00524 } 00525 00526 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, 00527 const Stmt *First, const Stmt *Second) const { 00528 ExplodedNode *N = C.generateSink(state); 00529 if (!N) 00530 return; 00531 00532 if (!BT_Overlap) 00533 BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap, 00534 categories::UnixAPI, "Improper arguments")); 00535 00536 // Generate a report for this bug. 00537 BugReport *report = 00538 new BugReport(*BT_Overlap, 00539 "Arguments must not be overlapping buffers", N); 00540 report->addRange(First->getSourceRange()); 00541 report->addRange(Second->getSourceRange()); 00542 00543 C.emitReport(report); 00544 } 00545 00546 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, 00547 ProgramStateRef state, 00548 NonLoc left, 00549 NonLoc right) const { 00550 // If out-of-bounds checking is turned off, skip the rest. 00551 if (!Filter.CheckCStringOutOfBounds) 00552 return state; 00553 00554 // If a previous check has failed, propagate the failure. 00555 if (!state) 00556 return nullptr; 00557 00558 SValBuilder &svalBuilder = C.getSValBuilder(); 00559 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 00560 00561 QualType sizeTy = svalBuilder.getContext().getSizeType(); 00562 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 00563 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 00564 00565 SVal maxMinusRight; 00566 if (right.getAs<nonloc::ConcreteInt>()) { 00567 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 00568 sizeTy); 00569 } else { 00570 // Try switching the operands. (The order of these two assignments is 00571 // important!) 00572 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 00573 sizeTy); 00574 left = right; 00575 } 00576 00577 if (Optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) { 00578 QualType cmpTy = svalBuilder.getConditionType(); 00579 // If left > max - right, we have an overflow. 00580 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 00581 *maxMinusRightNL, cmpTy); 00582 00583 ProgramStateRef stateOverflow, stateOkay; 00584 std::tie(stateOverflow, stateOkay) = 00585 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>()); 00586 00587 if (stateOverflow && !stateOkay) { 00588 // We have an overflow. Emit a bug report. 00589 ExplodedNode *N = C.generateSink(stateOverflow); 00590 if (!N) 00591 return nullptr; 00592 00593 if (!BT_AdditionOverflow) 00594 BT_AdditionOverflow.reset( 00595 new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API", 00596 "Sum of expressions causes overflow")); 00597 00598 // This isn't a great error message, but this should never occur in real 00599 // code anyway -- you'd have to create a buffer longer than a size_t can 00600 // represent, which is sort of a contradiction. 00601 const char *warning = 00602 "This expression will create a string whose length is too big to " 00603 "be represented as a size_t"; 00604 00605 // Generate a report for this bug. 00606 BugReport *report = new BugReport(*BT_AdditionOverflow, warning, N); 00607 C.emitReport(report); 00608 00609 return nullptr; 00610 } 00611 00612 // From now on, assume an overflow didn't occur. 00613 assert(stateOkay); 00614 state = stateOkay; 00615 } 00616 00617 return state; 00618 } 00619 00620 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, 00621 const MemRegion *MR, 00622 SVal strLength) { 00623 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 00624 00625 MR = MR->StripCasts(); 00626 00627 switch (MR->getKind()) { 00628 case MemRegion::StringRegionKind: 00629 // FIXME: This can happen if we strcpy() into a string region. This is 00630 // undefined [C99 6.4.5p6], but we should still warn about it. 00631 return state; 00632 00633 case MemRegion::SymbolicRegionKind: 00634 case MemRegion::AllocaRegionKind: 00635 case MemRegion::VarRegionKind: 00636 case MemRegion::FieldRegionKind: 00637 case MemRegion::ObjCIvarRegionKind: 00638 // These are the types we can currently track string lengths for. 00639 break; 00640 00641 case MemRegion::ElementRegionKind: 00642 // FIXME: Handle element regions by upper-bounding the parent region's 00643 // string length. 00644 return state; 00645 00646 default: 00647 // Other regions (mostly non-data) can't have a reliable C string length. 00648 // For now, just ignore the change. 00649 // FIXME: These are rare but not impossible. We should output some kind of 00650 // warning for things like strcpy((char[]){'a', 0}, "b"); 00651 return state; 00652 } 00653 00654 if (strLength.isUnknown()) 00655 return state->remove<CStringLength>(MR); 00656 00657 return state->set<CStringLength>(MR, strLength); 00658 } 00659 00660 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 00661 ProgramStateRef &state, 00662 const Expr *Ex, 00663 const MemRegion *MR, 00664 bool hypothetical) { 00665 if (!hypothetical) { 00666 // If there's a recorded length, go ahead and return it. 00667 const SVal *Recorded = state->get<CStringLength>(MR); 00668 if (Recorded) 00669 return *Recorded; 00670 } 00671 00672 // Otherwise, get a new symbol and update the state. 00673 SValBuilder &svalBuilder = C.getSValBuilder(); 00674 QualType sizeTy = svalBuilder.getContext().getSizeType(); 00675 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 00676 MR, Ex, sizeTy, 00677 C.blockCount()); 00678 00679 if (!hypothetical) { 00680 if (Optional<NonLoc> strLn = strLength.getAs<NonLoc>()) { 00681 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 00682 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 00683 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 00684 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4); 00685 const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt, 00686 fourInt); 00687 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt); 00688 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, 00689 maxLength, sizeTy); 00690 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true); 00691 } 00692 state = state->set<CStringLength>(MR, strLength); 00693 } 00694 00695 return strLength; 00696 } 00697 00698 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, 00699 const Expr *Ex, SVal Buf, 00700 bool hypothetical) const { 00701 const MemRegion *MR = Buf.getAsRegion(); 00702 if (!MR) { 00703 // If we can't get a region, see if it's something we /know/ isn't a 00704 // C string. In the context of locations, the only time we can issue such 00705 // a warning is for labels. 00706 if (Optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) { 00707 if (!Filter.CheckCStringNotNullTerm) 00708 return UndefinedVal(); 00709 00710 if (ExplodedNode *N = C.addTransition(state)) { 00711 if (!BT_NotCString) 00712 BT_NotCString.reset(new BuiltinBug( 00713 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI, 00714 "Argument is not a null-terminated string.")); 00715 00716 SmallString<120> buf; 00717 llvm::raw_svector_ostream os(buf); 00718 assert(CurrentFunctionDescription); 00719 os << "Argument to " << CurrentFunctionDescription 00720 << " is the address of the label '" << Label->getLabel()->getName() 00721 << "', which is not a null-terminated string"; 00722 00723 // Generate a report for this bug. 00724 BugReport *report = new BugReport(*BT_NotCString, os.str(), N); 00725 00726 report->addRange(Ex->getSourceRange()); 00727 C.emitReport(report); 00728 } 00729 return UndefinedVal(); 00730 00731 } 00732 00733 // If it's not a region and not a label, give up. 00734 return UnknownVal(); 00735 } 00736 00737 // If we have a region, strip casts from it and see if we can figure out 00738 // its length. For anything we can't figure out, just return UnknownVal. 00739 MR = MR->StripCasts(); 00740 00741 switch (MR->getKind()) { 00742 case MemRegion::StringRegionKind: { 00743 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 00744 // so we can assume that the byte length is the correct C string length. 00745 SValBuilder &svalBuilder = C.getSValBuilder(); 00746 QualType sizeTy = svalBuilder.getContext().getSizeType(); 00747 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 00748 return svalBuilder.makeIntVal(strLit->getByteLength(), sizeTy); 00749 } 00750 case MemRegion::SymbolicRegionKind: 00751 case MemRegion::AllocaRegionKind: 00752 case MemRegion::VarRegionKind: 00753 case MemRegion::FieldRegionKind: 00754 case MemRegion::ObjCIvarRegionKind: 00755 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 00756 case MemRegion::CompoundLiteralRegionKind: 00757 // FIXME: Can we track this? Is it necessary? 00758 return UnknownVal(); 00759 case MemRegion::ElementRegionKind: 00760 // FIXME: How can we handle this? It's not good enough to subtract the 00761 // offset from the base string length; consider "123\x00567" and &a[5]. 00762 return UnknownVal(); 00763 default: 00764 // Other regions (mostly non-data) can't have a reliable C string length. 00765 // In this case, an error is emitted and UndefinedVal is returned. 00766 // The caller should always be prepared to handle this case. 00767 if (!Filter.CheckCStringNotNullTerm) 00768 return UndefinedVal(); 00769 00770 if (ExplodedNode *N = C.addTransition(state)) { 00771 if (!BT_NotCString) 00772 BT_NotCString.reset(new BuiltinBug( 00773 Filter.CheckNameCStringNotNullTerm, categories::UnixAPI, 00774 "Argument is not a null-terminated string.")); 00775 00776 SmallString<120> buf; 00777 llvm::raw_svector_ostream os(buf); 00778 00779 assert(CurrentFunctionDescription); 00780 os << "Argument to " << CurrentFunctionDescription << " is "; 00781 00782 if (SummarizeRegion(os, C.getASTContext(), MR)) 00783 os << ", which is not a null-terminated string"; 00784 else 00785 os << "not a null-terminated string"; 00786 00787 // Generate a report for this bug. 00788 BugReport *report = new BugReport(*BT_NotCString, 00789 os.str(), N); 00790 00791 report->addRange(Ex->getSourceRange()); 00792 C.emitReport(report); 00793 } 00794 00795 return UndefinedVal(); 00796 } 00797 } 00798 00799 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 00800 ProgramStateRef &state, const Expr *expr, SVal val) const { 00801 00802 // Get the memory region pointed to by the val. 00803 const MemRegion *bufRegion = val.getAsRegion(); 00804 if (!bufRegion) 00805 return nullptr; 00806 00807 // Strip casts off the memory region. 00808 bufRegion = bufRegion->StripCasts(); 00809 00810 // Cast the memory region to a string region. 00811 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 00812 if (!strRegion) 00813 return nullptr; 00814 00815 // Return the actual string in the string region. 00816 return strRegion->getStringLiteral(); 00817 } 00818 00819 ProgramStateRef CStringChecker::InvalidateBuffer(CheckerContext &C, 00820 ProgramStateRef state, 00821 const Expr *E, SVal V, 00822 bool IsSourceBuffer) { 00823 Optional<Loc> L = V.getAs<Loc>(); 00824 if (!L) 00825 return state; 00826 00827 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 00828 // some assumptions about the value that CFRefCount can't. Even so, it should 00829 // probably be refactored. 00830 if (Optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) { 00831 const MemRegion *R = MR->getRegion()->StripCasts(); 00832 00833 // Are we dealing with an ElementRegion? If so, we should be invalidating 00834 // the super-region. 00835 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 00836 R = ER->getSuperRegion(); 00837 // FIXME: What about layers of ElementRegions? 00838 } 00839 00840 // Invalidate this region. 00841 const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); 00842 00843 bool CausesPointerEscape = false; 00844 RegionAndSymbolInvalidationTraits ITraits; 00845 // Invalidate and escape only indirect regions accessible through the source 00846 // buffer. 00847 if (IsSourceBuffer) { 00848 ITraits.setTrait(R, 00849 RegionAndSymbolInvalidationTraits::TK_PreserveContents); 00850 ITraits.setTrait(R, RegionAndSymbolInvalidationTraits::TK_SuppressEscape); 00851 CausesPointerEscape = true; 00852 } 00853 00854 return state->invalidateRegions(R, E, C.blockCount(), LCtx, 00855 CausesPointerEscape, nullptr, nullptr, 00856 &ITraits); 00857 } 00858 00859 // If we have a non-region value by chance, just remove the binding. 00860 // FIXME: is this necessary or correct? This handles the non-Region 00861 // cases. Is it ever valid to store to these? 00862 return state->killBinding(*L); 00863 } 00864 00865 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 00866 const MemRegion *MR) { 00867 const TypedValueRegion *TVR = dyn_cast<TypedValueRegion>(MR); 00868 00869 switch (MR->getKind()) { 00870 case MemRegion::FunctionTextRegionKind: { 00871 const NamedDecl *FD = cast<FunctionTextRegion>(MR)->getDecl(); 00872 if (FD) 00873 os << "the address of the function '" << *FD << '\''; 00874 else 00875 os << "the address of a function"; 00876 return true; 00877 } 00878 case MemRegion::BlockTextRegionKind: 00879 os << "block text"; 00880 return true; 00881 case MemRegion::BlockDataRegionKind: 00882 os << "a block"; 00883 return true; 00884 case MemRegion::CXXThisRegionKind: 00885 case MemRegion::CXXTempObjectRegionKind: 00886 os << "a C++ temp object of type " << TVR->getValueType().getAsString(); 00887 return true; 00888 case MemRegion::VarRegionKind: 00889 os << "a variable of type" << TVR->getValueType().getAsString(); 00890 return true; 00891 case MemRegion::FieldRegionKind: 00892 os << "a field of type " << TVR->getValueType().getAsString(); 00893 return true; 00894 case MemRegion::ObjCIvarRegionKind: 00895 os << "an instance variable of type " << TVR->getValueType().getAsString(); 00896 return true; 00897 default: 00898 return false; 00899 } 00900 } 00901 00902 //===----------------------------------------------------------------------===// 00903 // evaluation of individual function calls. 00904 //===----------------------------------------------------------------------===// 00905 00906 void CStringChecker::evalCopyCommon(CheckerContext &C, 00907 const CallExpr *CE, 00908 ProgramStateRef state, 00909 const Expr *Size, const Expr *Dest, 00910 const Expr *Source, bool Restricted, 00911 bool IsMempcpy) const { 00912 CurrentFunctionDescription = "memory copy function"; 00913 00914 // See if the size argument is zero. 00915 const LocationContext *LCtx = C.getLocationContext(); 00916 SVal sizeVal = state->getSVal(Size, LCtx); 00917 QualType sizeTy = Size->getType(); 00918 00919 ProgramStateRef stateZeroSize, stateNonZeroSize; 00920 std::tie(stateZeroSize, stateNonZeroSize) = 00921 assumeZero(C, state, sizeVal, sizeTy); 00922 00923 // Get the value of the Dest. 00924 SVal destVal = state->getSVal(Dest, LCtx); 00925 00926 // If the size is zero, there won't be any actual memory access, so 00927 // just bind the return value to the destination buffer and return. 00928 if (stateZeroSize && !stateNonZeroSize) { 00929 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); 00930 C.addTransition(stateZeroSize); 00931 return; 00932 } 00933 00934 // If the size can be nonzero, we have to check the other arguments. 00935 if (stateNonZeroSize) { 00936 state = stateNonZeroSize; 00937 00938 // Ensure the destination is not null. If it is NULL there will be a 00939 // NULL pointer dereference. 00940 state = checkNonNull(C, state, Dest, destVal); 00941 if (!state) 00942 return; 00943 00944 // Get the value of the Src. 00945 SVal srcVal = state->getSVal(Source, LCtx); 00946 00947 // Ensure the source is not null. If it is NULL there will be a 00948 // NULL pointer dereference. 00949 state = checkNonNull(C, state, Source, srcVal); 00950 if (!state) 00951 return; 00952 00953 // Ensure the accesses are valid and that the buffers do not overlap. 00954 const char * const writeWarning = 00955 "Memory copy function overflows destination buffer"; 00956 state = CheckBufferAccess(C, state, Size, Dest, Source, 00957 writeWarning, /* sourceWarning = */ nullptr); 00958 if (Restricted) 00959 state = CheckOverlap(C, state, Size, Dest, Source); 00960 00961 if (!state) 00962 return; 00963 00964 // If this is mempcpy, get the byte after the last byte copied and 00965 // bind the expr. 00966 if (IsMempcpy) { 00967 loc::MemRegionVal destRegVal = destVal.castAs<loc::MemRegionVal>(); 00968 00969 // Get the length to copy. 00970 if (Optional<NonLoc> lenValNonLoc = sizeVal.getAs<NonLoc>()) { 00971 // Get the byte after the last byte copied. 00972 SValBuilder &SvalBuilder = C.getSValBuilder(); 00973 ASTContext &Ctx = SvalBuilder.getContext(); 00974 QualType CharPtrTy = Ctx.getPointerType(Ctx.CharTy); 00975 loc::MemRegionVal DestRegCharVal = SvalBuilder.evalCast(destRegVal, 00976 CharPtrTy, Dest->getType()).castAs<loc::MemRegionVal>(); 00977 SVal lastElement = C.getSValBuilder().evalBinOpLN(state, BO_Add, 00978 DestRegCharVal, 00979 *lenValNonLoc, 00980 Dest->getType()); 00981 00982 // The byte after the last byte copied is the return value. 00983 state = state->BindExpr(CE, LCtx, lastElement); 00984 } else { 00985 // If we don't know how much we copied, we can at least 00986 // conjure a return value for later. 00987 SVal result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 00988 C.blockCount()); 00989 state = state->BindExpr(CE, LCtx, result); 00990 } 00991 00992 } else { 00993 // All other copies return the destination buffer. 00994 // (Well, bcopy() has a void return type, but this won't hurt.) 00995 state = state->BindExpr(CE, LCtx, destVal); 00996 } 00997 00998 // Invalidate the destination (regular invalidation without pointer-escaping 00999 // the address of the top-level region). 01000 // FIXME: Even if we can't perfectly model the copy, we should see if we 01001 // can use LazyCompoundVals to copy the source values into the destination. 01002 // This would probably remove any existing bindings past the end of the 01003 // copied region, but that's still an improvement over blank invalidation. 01004 state = InvalidateBuffer(C, state, Dest, C.getSVal(Dest), 01005 /*IsSourceBuffer*/false); 01006 01007 // Invalidate the source (const-invalidation without const-pointer-escaping 01008 // the address of the top-level region). 01009 state = InvalidateBuffer(C, state, Source, C.getSVal(Source), 01010 /*IsSourceBuffer*/true); 01011 01012 C.addTransition(state); 01013 } 01014 } 01015 01016 01017 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE) const { 01018 if (CE->getNumArgs() < 3) 01019 return; 01020 01021 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 01022 // The return value is the address of the destination buffer. 01023 const Expr *Dest = CE->getArg(0); 01024 ProgramStateRef state = C.getState(); 01025 01026 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true); 01027 } 01028 01029 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE) const { 01030 if (CE->getNumArgs() < 3) 01031 return; 01032 01033 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 01034 // The return value is a pointer to the byte following the last written byte. 01035 const Expr *Dest = CE->getArg(0); 01036 ProgramStateRef state = C.getState(); 01037 01038 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1), true, true); 01039 } 01040 01041 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE) const { 01042 if (CE->getNumArgs() < 3) 01043 return; 01044 01045 // void *memmove(void *dst, const void *src, size_t n); 01046 // The return value is the address of the destination buffer. 01047 const Expr *Dest = CE->getArg(0); 01048 ProgramStateRef state = C.getState(); 01049 01050 evalCopyCommon(C, CE, state, CE->getArg(2), Dest, CE->getArg(1)); 01051 } 01052 01053 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 01054 if (CE->getNumArgs() < 3) 01055 return; 01056 01057 // void bcopy(const void *src, void *dst, size_t n); 01058 evalCopyCommon(C, CE, C.getState(), 01059 CE->getArg(2), CE->getArg(1), CE->getArg(0)); 01060 } 01061 01062 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE) const { 01063 if (CE->getNumArgs() < 3) 01064 return; 01065 01066 // int memcmp(const void *s1, const void *s2, size_t n); 01067 CurrentFunctionDescription = "memory comparison function"; 01068 01069 const Expr *Left = CE->getArg(0); 01070 const Expr *Right = CE->getArg(1); 01071 const Expr *Size = CE->getArg(2); 01072 01073 ProgramStateRef state = C.getState(); 01074 SValBuilder &svalBuilder = C.getSValBuilder(); 01075 01076 // See if the size argument is zero. 01077 const LocationContext *LCtx = C.getLocationContext(); 01078 SVal sizeVal = state->getSVal(Size, LCtx); 01079 QualType sizeTy = Size->getType(); 01080 01081 ProgramStateRef stateZeroSize, stateNonZeroSize; 01082 std::tie(stateZeroSize, stateNonZeroSize) = 01083 assumeZero(C, state, sizeVal, sizeTy); 01084 01085 // If the size can be zero, the result will be 0 in that case, and we don't 01086 // have to check either of the buffers. 01087 if (stateZeroSize) { 01088 state = stateZeroSize; 01089 state = state->BindExpr(CE, LCtx, 01090 svalBuilder.makeZeroVal(CE->getType())); 01091 C.addTransition(state); 01092 } 01093 01094 // If the size can be nonzero, we have to check the other arguments. 01095 if (stateNonZeroSize) { 01096 state = stateNonZeroSize; 01097 // If we know the two buffers are the same, we know the result is 0. 01098 // First, get the two buffers' addresses. Another checker will have already 01099 // made sure they're not undefined. 01100 DefinedOrUnknownSVal LV = 01101 state->getSVal(Left, LCtx).castAs<DefinedOrUnknownSVal>(); 01102 DefinedOrUnknownSVal RV = 01103 state->getSVal(Right, LCtx).castAs<DefinedOrUnknownSVal>(); 01104 01105 // See if they are the same. 01106 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 01107 ProgramStateRef StSameBuf, StNotSameBuf; 01108 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 01109 01110 // If the two arguments might be the same buffer, we know the result is 0, 01111 // and we only need to check one size. 01112 if (StSameBuf) { 01113 state = StSameBuf; 01114 state = CheckBufferAccess(C, state, Size, Left); 01115 if (state) { 01116 state = StSameBuf->BindExpr(CE, LCtx, 01117 svalBuilder.makeZeroVal(CE->getType())); 01118 C.addTransition(state); 01119 } 01120 } 01121 01122 // If the two arguments might be different buffers, we have to check the 01123 // size of both of them. 01124 if (StNotSameBuf) { 01125 state = StNotSameBuf; 01126 state = CheckBufferAccess(C, state, Size, Left, Right); 01127 if (state) { 01128 // The return value is the comparison result, which we don't know. 01129 SVal CmpV = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, 01130 C.blockCount()); 01131 state = state->BindExpr(CE, LCtx, CmpV); 01132 C.addTransition(state); 01133 } 01134 } 01135 } 01136 } 01137 01138 void CStringChecker::evalstrLength(CheckerContext &C, 01139 const CallExpr *CE) const { 01140 if (CE->getNumArgs() < 1) 01141 return; 01142 01143 // size_t strlen(const char *s); 01144 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 01145 } 01146 01147 void CStringChecker::evalstrnLength(CheckerContext &C, 01148 const CallExpr *CE) const { 01149 if (CE->getNumArgs() < 2) 01150 return; 01151 01152 // size_t strnlen(const char *s, size_t maxlen); 01153 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 01154 } 01155 01156 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 01157 bool IsStrnlen) const { 01158 CurrentFunctionDescription = "string length function"; 01159 ProgramStateRef state = C.getState(); 01160 const LocationContext *LCtx = C.getLocationContext(); 01161 01162 if (IsStrnlen) { 01163 const Expr *maxlenExpr = CE->getArg(1); 01164 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 01165 01166 ProgramStateRef stateZeroSize, stateNonZeroSize; 01167 std::tie(stateZeroSize, stateNonZeroSize) = 01168 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 01169 01170 // If the size can be zero, the result will be 0 in that case, and we don't 01171 // have to check the string itself. 01172 if (stateZeroSize) { 01173 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 01174 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); 01175 C.addTransition(stateZeroSize); 01176 } 01177 01178 // If the size is GUARANTEED to be zero, we're done! 01179 if (!stateNonZeroSize) 01180 return; 01181 01182 // Otherwise, record the assumption that the size is nonzero. 01183 state = stateNonZeroSize; 01184 } 01185 01186 // Check that the string argument is non-null. 01187 const Expr *Arg = CE->getArg(0); 01188 SVal ArgVal = state->getSVal(Arg, LCtx); 01189 01190 state = checkNonNull(C, state, Arg, ArgVal); 01191 01192 if (!state) 01193 return; 01194 01195 SVal strLength = getCStringLength(C, state, Arg, ArgVal); 01196 01197 // If the argument isn't a valid C string, there's no valid state to 01198 // transition to. 01199 if (strLength.isUndef()) 01200 return; 01201 01202 DefinedOrUnknownSVal result = UnknownVal(); 01203 01204 // If the check is for strnlen() then bind the return value to no more than 01205 // the maxlen value. 01206 if (IsStrnlen) { 01207 QualType cmpTy = C.getSValBuilder().getConditionType(); 01208 01209 // It's a little unfortunate to be getting this again, 01210 // but it's not that expensive... 01211 const Expr *maxlenExpr = CE->getArg(1); 01212 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 01213 01214 Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 01215 Optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>(); 01216 01217 if (strLengthNL && maxlenValNL) { 01218 ProgramStateRef stateStringTooLong, stateStringNotTooLong; 01219 01220 // Check if the strLength is greater than the maxlen. 01221 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume( 01222 C.getSValBuilder() 01223 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy) 01224 .castAs<DefinedOrUnknownSVal>()); 01225 01226 if (stateStringTooLong && !stateStringNotTooLong) { 01227 // If the string is longer than maxlen, return maxlen. 01228 result = *maxlenValNL; 01229 } else if (stateStringNotTooLong && !stateStringTooLong) { 01230 // If the string is shorter than maxlen, return its length. 01231 result = *strLengthNL; 01232 } 01233 } 01234 01235 if (result.isUnknown()) { 01236 // If we don't have enough information for a comparison, there's 01237 // no guarantee the full string length will actually be returned. 01238 // All we know is the return value is the min of the string length 01239 // and the limit. This is better than nothing. 01240 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 01241 C.blockCount()); 01242 NonLoc resultNL = result.castAs<NonLoc>(); 01243 01244 if (strLengthNL) { 01245 state = state->assume(C.getSValBuilder().evalBinOpNN( 01246 state, BO_LE, resultNL, *strLengthNL, cmpTy) 01247 .castAs<DefinedOrUnknownSVal>(), true); 01248 } 01249 01250 if (maxlenValNL) { 01251 state = state->assume(C.getSValBuilder().evalBinOpNN( 01252 state, BO_LE, resultNL, *maxlenValNL, cmpTy) 01253 .castAs<DefinedOrUnknownSVal>(), true); 01254 } 01255 } 01256 01257 } else { 01258 // This is a plain strlen(), not strnlen(). 01259 result = strLength.castAs<DefinedOrUnknownSVal>(); 01260 01261 // If we don't know the length of the string, conjure a return 01262 // value, so it can be used in constraints, at least. 01263 if (result.isUnknown()) { 01264 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 01265 C.blockCount()); 01266 } 01267 } 01268 01269 // Bind the return value. 01270 assert(!result.isUnknown() && "Should have conjured a value by now"); 01271 state = state->BindExpr(CE, LCtx, result); 01272 C.addTransition(state); 01273 } 01274 01275 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 01276 if (CE->getNumArgs() < 2) 01277 return; 01278 01279 // char *strcpy(char *restrict dst, const char *restrict src); 01280 evalStrcpyCommon(C, CE, 01281 /* returnEnd = */ false, 01282 /* isBounded = */ false, 01283 /* isAppending = */ false); 01284 } 01285 01286 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 01287 if (CE->getNumArgs() < 3) 01288 return; 01289 01290 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 01291 evalStrcpyCommon(C, CE, 01292 /* returnEnd = */ false, 01293 /* isBounded = */ true, 01294 /* isAppending = */ false); 01295 } 01296 01297 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 01298 if (CE->getNumArgs() < 2) 01299 return; 01300 01301 // char *stpcpy(char *restrict dst, const char *restrict src); 01302 evalStrcpyCommon(C, CE, 01303 /* returnEnd = */ true, 01304 /* isBounded = */ false, 01305 /* isAppending = */ false); 01306 } 01307 01308 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 01309 if (CE->getNumArgs() < 2) 01310 return; 01311 01312 //char *strcat(char *restrict s1, const char *restrict s2); 01313 evalStrcpyCommon(C, CE, 01314 /* returnEnd = */ false, 01315 /* isBounded = */ false, 01316 /* isAppending = */ true); 01317 } 01318 01319 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 01320 if (CE->getNumArgs() < 3) 01321 return; 01322 01323 //char *strncat(char *restrict s1, const char *restrict s2, size_t n); 01324 evalStrcpyCommon(C, CE, 01325 /* returnEnd = */ false, 01326 /* isBounded = */ true, 01327 /* isAppending = */ true); 01328 } 01329 01330 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 01331 bool returnEnd, bool isBounded, 01332 bool isAppending) const { 01333 CurrentFunctionDescription = "string copy function"; 01334 ProgramStateRef state = C.getState(); 01335 const LocationContext *LCtx = C.getLocationContext(); 01336 01337 // Check that the destination is non-null. 01338 const Expr *Dst = CE->getArg(0); 01339 SVal DstVal = state->getSVal(Dst, LCtx); 01340 01341 state = checkNonNull(C, state, Dst, DstVal); 01342 if (!state) 01343 return; 01344 01345 // Check that the source is non-null. 01346 const Expr *srcExpr = CE->getArg(1); 01347 SVal srcVal = state->getSVal(srcExpr, LCtx); 01348 state = checkNonNull(C, state, srcExpr, srcVal); 01349 if (!state) 01350 return; 01351 01352 // Get the string length of the source. 01353 SVal strLength = getCStringLength(C, state, srcExpr, srcVal); 01354 01355 // If the source isn't a valid C string, give up. 01356 if (strLength.isUndef()) 01357 return; 01358 01359 SValBuilder &svalBuilder = C.getSValBuilder(); 01360 QualType cmpTy = svalBuilder.getConditionType(); 01361 QualType sizeTy = svalBuilder.getContext().getSizeType(); 01362 01363 // These two values allow checking two kinds of errors: 01364 // - actual overflows caused by a source that doesn't fit in the destination 01365 // - potential overflows caused by a bound that could exceed the destination 01366 SVal amountCopied = UnknownVal(); 01367 SVal maxLastElementIndex = UnknownVal(); 01368 const char *boundWarning = nullptr; 01369 01370 // If the function is strncpy, strncat, etc... it is bounded. 01371 if (isBounded) { 01372 // Get the max number of characters to copy. 01373 const Expr *lenExpr = CE->getArg(2); 01374 SVal lenVal = state->getSVal(lenExpr, LCtx); 01375 01376 // Protect against misdeclared strncpy(). 01377 lenVal = svalBuilder.evalCast(lenVal, sizeTy, lenExpr->getType()); 01378 01379 Optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 01380 Optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>(); 01381 01382 // If we know both values, we might be able to figure out how much 01383 // we're copying. 01384 if (strLengthNL && lenValNL) { 01385 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong; 01386 01387 // Check if the max number to copy is less than the length of the src. 01388 // If the bound is equal to the source length, strncpy won't null- 01389 // terminate the result! 01390 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume( 01391 svalBuilder.evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy) 01392 .castAs<DefinedOrUnknownSVal>()); 01393 01394 if (stateSourceTooLong && !stateSourceNotTooLong) { 01395 // Max number to copy is less than the length of the src, so the actual 01396 // strLength copied is the max number arg. 01397 state = stateSourceTooLong; 01398 amountCopied = lenVal; 01399 01400 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 01401 // The source buffer entirely fits in the bound. 01402 state = stateSourceNotTooLong; 01403 amountCopied = strLength; 01404 } 01405 } 01406 01407 // We still want to know if the bound is known to be too large. 01408 if (lenValNL) { 01409 if (isAppending) { 01410 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) 01411 01412 // Get the string length of the destination. If the destination is 01413 // memory that can't have a string length, we shouldn't be copying 01414 // into it anyway. 01415 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 01416 if (dstStrLength.isUndef()) 01417 return; 01418 01419 if (Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>()) { 01420 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Add, 01421 *lenValNL, 01422 *dstStrLengthNL, 01423 sizeTy); 01424 boundWarning = "Size argument is greater than the free space in the " 01425 "destination buffer"; 01426 } 01427 01428 } else { 01429 // For strncpy, this is just checking that lenVal <= sizeof(dst) 01430 // (Yes, strncpy and strncat differ in how they treat termination. 01431 // strncat ALWAYS terminates, but strncpy doesn't.) 01432 01433 // We need a special case for when the copy size is zero, in which 01434 // case strncpy will do no work at all. Our bounds check uses n-1 01435 // as the last element accessed, so n == 0 is problematic. 01436 ProgramStateRef StateZeroSize, StateNonZeroSize; 01437 std::tie(StateZeroSize, StateNonZeroSize) = 01438 assumeZero(C, state, *lenValNL, sizeTy); 01439 01440 // If the size is known to be zero, we're done. 01441 if (StateZeroSize && !StateNonZeroSize) { 01442 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal); 01443 C.addTransition(StateZeroSize); 01444 return; 01445 } 01446 01447 // Otherwise, go ahead and figure out the last element we'll touch. 01448 // We don't record the non-zero assumption here because we can't 01449 // be sure. We won't warn on a possible zero. 01450 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 01451 maxLastElementIndex = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, 01452 one, sizeTy); 01453 boundWarning = "Size argument is greater than the length of the " 01454 "destination buffer"; 01455 } 01456 } 01457 01458 // If we couldn't pin down the copy length, at least bound it. 01459 // FIXME: We should actually run this code path for append as well, but 01460 // right now it creates problems with constraints (since we can end up 01461 // trying to pass constraints from symbol to symbol). 01462 if (amountCopied.isUnknown() && !isAppending) { 01463 // Try to get a "hypothetical" string length symbol, which we can later 01464 // set as a real value if that turns out to be the case. 01465 amountCopied = getCStringLength(C, state, lenExpr, srcVal, true); 01466 assert(!amountCopied.isUndef()); 01467 01468 if (Optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>()) { 01469 if (lenValNL) { 01470 // amountCopied <= lenVal 01471 SVal copiedLessThanBound = svalBuilder.evalBinOpNN(state, BO_LE, 01472 *amountCopiedNL, 01473 *lenValNL, 01474 cmpTy); 01475 state = state->assume( 01476 copiedLessThanBound.castAs<DefinedOrUnknownSVal>(), true); 01477 if (!state) 01478 return; 01479 } 01480 01481 if (strLengthNL) { 01482 // amountCopied <= strlen(source) 01483 SVal copiedLessThanSrc = svalBuilder.evalBinOpNN(state, BO_LE, 01484 *amountCopiedNL, 01485 *strLengthNL, 01486 cmpTy); 01487 state = state->assume( 01488 copiedLessThanSrc.castAs<DefinedOrUnknownSVal>(), true); 01489 if (!state) 01490 return; 01491 } 01492 } 01493 } 01494 01495 } else { 01496 // The function isn't bounded. The amount copied should match the length 01497 // of the source buffer. 01498 amountCopied = strLength; 01499 } 01500 01501 assert(state); 01502 01503 // This represents the number of characters copied into the destination 01504 // buffer. (It may not actually be the strlen if the destination buffer 01505 // is not terminated.) 01506 SVal finalStrLength = UnknownVal(); 01507 01508 // If this is an appending function (strcat, strncat...) then set the 01509 // string length to strlen(src) + strlen(dst) since the buffer will 01510 // ultimately contain both. 01511 if (isAppending) { 01512 // Get the string length of the destination. If the destination is memory 01513 // that can't have a string length, we shouldn't be copying into it anyway. 01514 SVal dstStrLength = getCStringLength(C, state, Dst, DstVal); 01515 if (dstStrLength.isUndef()) 01516 return; 01517 01518 Optional<NonLoc> srcStrLengthNL = amountCopied.getAs<NonLoc>(); 01519 Optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>(); 01520 01521 // If we know both string lengths, we might know the final string length. 01522 if (srcStrLengthNL && dstStrLengthNL) { 01523 // Make sure the two lengths together don't overflow a size_t. 01524 state = checkAdditionOverflow(C, state, *srcStrLengthNL, *dstStrLengthNL); 01525 if (!state) 01526 return; 01527 01528 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *srcStrLengthNL, 01529 *dstStrLengthNL, sizeTy); 01530 } 01531 01532 // If we couldn't get a single value for the final string length, 01533 // we can at least bound it by the individual lengths. 01534 if (finalStrLength.isUnknown()) { 01535 // Try to get a "hypothetical" string length symbol, which we can later 01536 // set as a real value if that turns out to be the case. 01537 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 01538 assert(!finalStrLength.isUndef()); 01539 01540 if (Optional<NonLoc> finalStrLengthNL = finalStrLength.getAs<NonLoc>()) { 01541 if (srcStrLengthNL) { 01542 // finalStrLength >= srcStrLength 01543 SVal sourceInResult = svalBuilder.evalBinOpNN(state, BO_GE, 01544 *finalStrLengthNL, 01545 *srcStrLengthNL, 01546 cmpTy); 01547 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(), 01548 true); 01549 if (!state) 01550 return; 01551 } 01552 01553 if (dstStrLengthNL) { 01554 // finalStrLength >= dstStrLength 01555 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 01556 *finalStrLengthNL, 01557 *dstStrLengthNL, 01558 cmpTy); 01559 state = 01560 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true); 01561 if (!state) 01562 return; 01563 } 01564 } 01565 } 01566 01567 } else { 01568 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 01569 // the final string length will match the input string length. 01570 finalStrLength = amountCopied; 01571 } 01572 01573 // The final result of the function will either be a pointer past the last 01574 // copied element, or a pointer to the start of the destination buffer. 01575 SVal Result = (returnEnd ? UnknownVal() : DstVal); 01576 01577 assert(state); 01578 01579 // If the destination is a MemRegion, try to check for a buffer overflow and 01580 // record the new string length. 01581 if (Optional<loc::MemRegionVal> dstRegVal = 01582 DstVal.getAs<loc::MemRegionVal>()) { 01583 QualType ptrTy = Dst->getType(); 01584 01585 // If we have an exact value on a bounded copy, use that to check for 01586 // overflows, rather than our estimate about how much is actually copied. 01587 if (boundWarning) { 01588 if (Optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) { 01589 SVal maxLastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 01590 *maxLastNL, ptrTy); 01591 state = CheckLocation(C, state, CE->getArg(2), maxLastElement, 01592 boundWarning); 01593 if (!state) 01594 return; 01595 } 01596 } 01597 01598 // Then, if the final length is known... 01599 if (Optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) { 01600 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 01601 *knownStrLength, ptrTy); 01602 01603 // ...and we haven't checked the bound, we'll check the actual copy. 01604 if (!boundWarning) { 01605 const char * const warningMsg = 01606 "String copy function overflows destination buffer"; 01607 state = CheckLocation(C, state, Dst, lastElement, warningMsg); 01608 if (!state) 01609 return; 01610 } 01611 01612 // If this is a stpcpy-style copy, the last element is the return value. 01613 if (returnEnd) 01614 Result = lastElement; 01615 } 01616 01617 // Invalidate the destination (regular invalidation without pointer-escaping 01618 // the address of the top-level region). This must happen before we set the 01619 // C string length because invalidation will clear the length. 01620 // FIXME: Even if we can't perfectly model the copy, we should see if we 01621 // can use LazyCompoundVals to copy the source values into the destination. 01622 // This would probably remove any existing bindings past the end of the 01623 // string, but that's still an improvement over blank invalidation. 01624 state = InvalidateBuffer(C, state, Dst, *dstRegVal, 01625 /*IsSourceBuffer*/false); 01626 01627 // Invalidate the source (const-invalidation without const-pointer-escaping 01628 // the address of the top-level region). 01629 state = InvalidateBuffer(C, state, srcExpr, srcVal, /*IsSourceBuffer*/true); 01630 01631 // Set the C string length of the destination, if we know it. 01632 if (isBounded && !isAppending) { 01633 // strncpy is annoying in that it doesn't guarantee to null-terminate 01634 // the result string. If the original string didn't fit entirely inside 01635 // the bound (including the null-terminator), we don't know how long the 01636 // result is. 01637 if (amountCopied != strLength) 01638 finalStrLength = UnknownVal(); 01639 } 01640 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 01641 } 01642 01643 assert(state); 01644 01645 // If this is a stpcpy-style copy, but we were unable to check for a buffer 01646 // overflow, we still need a result. Conjure a return value. 01647 if (returnEnd && Result.isUnknown()) { 01648 Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 01649 } 01650 01651 // Set the return value. 01652 state = state->BindExpr(CE, LCtx, Result); 01653 C.addTransition(state); 01654 } 01655 01656 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 01657 if (CE->getNumArgs() < 2) 01658 return; 01659 01660 //int strcmp(const char *s1, const char *s2); 01661 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ false); 01662 } 01663 01664 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 01665 if (CE->getNumArgs() < 3) 01666 return; 01667 01668 //int strncmp(const char *s1, const char *s2, size_t n); 01669 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ false); 01670 } 01671 01672 void CStringChecker::evalStrcasecmp(CheckerContext &C, 01673 const CallExpr *CE) const { 01674 if (CE->getNumArgs() < 2) 01675 return; 01676 01677 //int strcasecmp(const char *s1, const char *s2); 01678 evalStrcmpCommon(C, CE, /* isBounded = */ false, /* ignoreCase = */ true); 01679 } 01680 01681 void CStringChecker::evalStrncasecmp(CheckerContext &C, 01682 const CallExpr *CE) const { 01683 if (CE->getNumArgs() < 3) 01684 return; 01685 01686 //int strncasecmp(const char *s1, const char *s2, size_t n); 01687 evalStrcmpCommon(C, CE, /* isBounded = */ true, /* ignoreCase = */ true); 01688 } 01689 01690 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 01691 bool isBounded, bool ignoreCase) const { 01692 CurrentFunctionDescription = "string comparison function"; 01693 ProgramStateRef state = C.getState(); 01694 const LocationContext *LCtx = C.getLocationContext(); 01695 01696 // Check that the first string is non-null 01697 const Expr *s1 = CE->getArg(0); 01698 SVal s1Val = state->getSVal(s1, LCtx); 01699 state = checkNonNull(C, state, s1, s1Val); 01700 if (!state) 01701 return; 01702 01703 // Check that the second string is non-null. 01704 const Expr *s2 = CE->getArg(1); 01705 SVal s2Val = state->getSVal(s2, LCtx); 01706 state = checkNonNull(C, state, s2, s2Val); 01707 if (!state) 01708 return; 01709 01710 // Get the string length of the first string or give up. 01711 SVal s1Length = getCStringLength(C, state, s1, s1Val); 01712 if (s1Length.isUndef()) 01713 return; 01714 01715 // Get the string length of the second string or give up. 01716 SVal s2Length = getCStringLength(C, state, s2, s2Val); 01717 if (s2Length.isUndef()) 01718 return; 01719 01720 // If we know the two buffers are the same, we know the result is 0. 01721 // First, get the two buffers' addresses. Another checker will have already 01722 // made sure they're not undefined. 01723 DefinedOrUnknownSVal LV = s1Val.castAs<DefinedOrUnknownSVal>(); 01724 DefinedOrUnknownSVal RV = s2Val.castAs<DefinedOrUnknownSVal>(); 01725 01726 // See if they are the same. 01727 SValBuilder &svalBuilder = C.getSValBuilder(); 01728 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 01729 ProgramStateRef StSameBuf, StNotSameBuf; 01730 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 01731 01732 // If the two arguments might be the same buffer, we know the result is 0, 01733 // and we only need to check one size. 01734 if (StSameBuf) { 01735 StSameBuf = StSameBuf->BindExpr(CE, LCtx, 01736 svalBuilder.makeZeroVal(CE->getType())); 01737 C.addTransition(StSameBuf); 01738 01739 // If the two arguments are GUARANTEED to be the same, we're done! 01740 if (!StNotSameBuf) 01741 return; 01742 } 01743 01744 assert(StNotSameBuf); 01745 state = StNotSameBuf; 01746 01747 // At this point we can go about comparing the two buffers. 01748 // For now, we only do this if they're both known string literals. 01749 01750 // Attempt to extract string literals from both expressions. 01751 const StringLiteral *s1StrLiteral = getCStringLiteral(C, state, s1, s1Val); 01752 const StringLiteral *s2StrLiteral = getCStringLiteral(C, state, s2, s2Val); 01753 bool canComputeResult = false; 01754 01755 if (s1StrLiteral && s2StrLiteral) { 01756 StringRef s1StrRef = s1StrLiteral->getString(); 01757 StringRef s2StrRef = s2StrLiteral->getString(); 01758 01759 if (isBounded) { 01760 // Get the max number of characters to compare. 01761 const Expr *lenExpr = CE->getArg(2); 01762 SVal lenVal = state->getSVal(lenExpr, LCtx); 01763 01764 // If the length is known, we can get the right substrings. 01765 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { 01766 // Create substrings of each to compare the prefix. 01767 s1StrRef = s1StrRef.substr(0, (size_t)len->getZExtValue()); 01768 s2StrRef = s2StrRef.substr(0, (size_t)len->getZExtValue()); 01769 canComputeResult = true; 01770 } 01771 } else { 01772 // This is a normal, unbounded strcmp. 01773 canComputeResult = true; 01774 } 01775 01776 if (canComputeResult) { 01777 // Real strcmp stops at null characters. 01778 size_t s1Term = s1StrRef.find('\0'); 01779 if (s1Term != StringRef::npos) 01780 s1StrRef = s1StrRef.substr(0, s1Term); 01781 01782 size_t s2Term = s2StrRef.find('\0'); 01783 if (s2Term != StringRef::npos) 01784 s2StrRef = s2StrRef.substr(0, s2Term); 01785 01786 // Use StringRef's comparison methods to compute the actual result. 01787 int result; 01788 01789 if (ignoreCase) { 01790 // Compare string 1 to string 2 the same way strcasecmp() does. 01791 result = s1StrRef.compare_lower(s2StrRef); 01792 } else { 01793 // Compare string 1 to string 2 the same way strcmp() does. 01794 result = s1StrRef.compare(s2StrRef); 01795 } 01796 01797 // Build the SVal of the comparison and bind the return value. 01798 SVal resultVal = svalBuilder.makeIntVal(result, CE->getType()); 01799 state = state->BindExpr(CE, LCtx, resultVal); 01800 } 01801 } 01802 01803 if (!canComputeResult) { 01804 // Conjure a symbolic value. It's the best we can do. 01805 SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, 01806 C.blockCount()); 01807 state = state->BindExpr(CE, LCtx, resultVal); 01808 } 01809 01810 // Record this as a possible path. 01811 C.addTransition(state); 01812 } 01813 01814 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { 01815 //char *strsep(char **stringp, const char *delim); 01816 if (CE->getNumArgs() < 2) 01817 return; 01818 01819 // Sanity: does the search string parameter match the return type? 01820 const Expr *SearchStrPtr = CE->getArg(0); 01821 QualType CharPtrTy = SearchStrPtr->getType()->getPointeeType(); 01822 if (CharPtrTy.isNull() || 01823 CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType()) 01824 return; 01825 01826 CurrentFunctionDescription = "strsep()"; 01827 ProgramStateRef State = C.getState(); 01828 const LocationContext *LCtx = C.getLocationContext(); 01829 01830 // Check that the search string pointer is non-null (though it may point to 01831 // a null string). 01832 SVal SearchStrVal = State->getSVal(SearchStrPtr, LCtx); 01833 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal); 01834 if (!State) 01835 return; 01836 01837 // Check that the delimiter string is non-null. 01838 const Expr *DelimStr = CE->getArg(1); 01839 SVal DelimStrVal = State->getSVal(DelimStr, LCtx); 01840 State = checkNonNull(C, State, DelimStr, DelimStrVal); 01841 if (!State) 01842 return; 01843 01844 SValBuilder &SVB = C.getSValBuilder(); 01845 SVal Result; 01846 if (Optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) { 01847 // Get the current value of the search string pointer, as a char*. 01848 Result = State->getSVal(*SearchStrLoc, CharPtrTy); 01849 01850 // Invalidate the search string, representing the change of one delimiter 01851 // character to NUL. 01852 State = InvalidateBuffer(C, State, SearchStrPtr, Result, 01853 /*IsSourceBuffer*/false); 01854 01855 // Overwrite the search string pointer. The new value is either an address 01856 // further along in the same string, or NULL if there are no more tokens. 01857 State = State->bindLoc(*SearchStrLoc, 01858 SVB.conjureSymbolVal(getTag(), CE, LCtx, CharPtrTy, 01859 C.blockCount())); 01860 } else { 01861 assert(SearchStrVal.isUnknown()); 01862 // Conjure a symbolic value. It's the best we can do. 01863 Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 01864 } 01865 01866 // Set the return value, and finish. 01867 State = State->BindExpr(CE, LCtx, Result); 01868 C.addTransition(State); 01869 } 01870 01871 01872 //===----------------------------------------------------------------------===// 01873 // The driver method, and other Checker callbacks. 01874 //===----------------------------------------------------------------------===// 01875 01876 bool CStringChecker::evalCall(const CallExpr *CE, CheckerContext &C) const { 01877 const FunctionDecl *FDecl = C.getCalleeDecl(CE); 01878 01879 if (!FDecl) 01880 return false; 01881 01882 // FIXME: Poorly-factored string switches are slow. 01883 FnCheck evalFunction = nullptr; 01884 if (C.isCLibraryFunction(FDecl, "memcpy")) 01885 evalFunction = &CStringChecker::evalMemcpy; 01886 else if (C.isCLibraryFunction(FDecl, "mempcpy")) 01887 evalFunction = &CStringChecker::evalMempcpy; 01888 else if (C.isCLibraryFunction(FDecl, "memcmp")) 01889 evalFunction = &CStringChecker::evalMemcmp; 01890 else if (C.isCLibraryFunction(FDecl, "memmove")) 01891 evalFunction = &CStringChecker::evalMemmove; 01892 else if (C.isCLibraryFunction(FDecl, "strcpy")) 01893 evalFunction = &CStringChecker::evalStrcpy; 01894 else if (C.isCLibraryFunction(FDecl, "strncpy")) 01895 evalFunction = &CStringChecker::evalStrncpy; 01896 else if (C.isCLibraryFunction(FDecl, "stpcpy")) 01897 evalFunction = &CStringChecker::evalStpcpy; 01898 else if (C.isCLibraryFunction(FDecl, "strcat")) 01899 evalFunction = &CStringChecker::evalStrcat; 01900 else if (C.isCLibraryFunction(FDecl, "strncat")) 01901 evalFunction = &CStringChecker::evalStrncat; 01902 else if (C.isCLibraryFunction(FDecl, "strlen")) 01903 evalFunction = &CStringChecker::evalstrLength; 01904 else if (C.isCLibraryFunction(FDecl, "strnlen")) 01905 evalFunction = &CStringChecker::evalstrnLength; 01906 else if (C.isCLibraryFunction(FDecl, "strcmp")) 01907 evalFunction = &CStringChecker::evalStrcmp; 01908 else if (C.isCLibraryFunction(FDecl, "strncmp")) 01909 evalFunction = &CStringChecker::evalStrncmp; 01910 else if (C.isCLibraryFunction(FDecl, "strcasecmp")) 01911 evalFunction = &CStringChecker::evalStrcasecmp; 01912 else if (C.isCLibraryFunction(FDecl, "strncasecmp")) 01913 evalFunction = &CStringChecker::evalStrncasecmp; 01914 else if (C.isCLibraryFunction(FDecl, "strsep")) 01915 evalFunction = &CStringChecker::evalStrsep; 01916 else if (C.isCLibraryFunction(FDecl, "bcopy")) 01917 evalFunction = &CStringChecker::evalBcopy; 01918 else if (C.isCLibraryFunction(FDecl, "bcmp")) 01919 evalFunction = &CStringChecker::evalMemcmp; 01920 01921 // If the callee isn't a string function, let another checker handle it. 01922 if (!evalFunction) 01923 return false; 01924 01925 // Make sure each function sets its own description. 01926 // (But don't bother in a release build.) 01927 assert(!(CurrentFunctionDescription = nullptr)); 01928 01929 // Check and evaluate the call. 01930 (this->*evalFunction)(C, CE); 01931 01932 // If the evaluate call resulted in no change, chain to the next eval call 01933 // handler. 01934 // Note, the custom CString evaluation calls assume that basic safety 01935 // properties are held. However, if the user chooses to turn off some of these 01936 // checks, we ignore the issues and leave the call evaluation to a generic 01937 // handler. 01938 if (!C.isDifferent()) 01939 return false; 01940 01941 return true; 01942 } 01943 01944 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 01945 // Record string length for char a[] = "abc"; 01946 ProgramStateRef state = C.getState(); 01947 01948 for (const auto *I : DS->decls()) { 01949 const VarDecl *D = dyn_cast<VarDecl>(I); 01950 if (!D) 01951 continue; 01952 01953 // FIXME: Handle array fields of structs. 01954 if (!D->getType()->isArrayType()) 01955 continue; 01956 01957 const Expr *Init = D->getInit(); 01958 if (!Init) 01959 continue; 01960 if (!isa<StringLiteral>(Init)) 01961 continue; 01962 01963 Loc VarLoc = state->getLValue(D, C.getLocationContext()); 01964 const MemRegion *MR = VarLoc.getAsRegion(); 01965 if (!MR) 01966 continue; 01967 01968 SVal StrVal = state->getSVal(Init, C.getLocationContext()); 01969 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 01970 DefinedOrUnknownSVal strLength = 01971 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>(); 01972 01973 state = state->set<CStringLength>(MR, strLength); 01974 } 01975 01976 C.addTransition(state); 01977 } 01978 01979 bool CStringChecker::wantsRegionChangeUpdate(ProgramStateRef state) const { 01980 CStringLengthTy Entries = state->get<CStringLength>(); 01981 return !Entries.isEmpty(); 01982 } 01983 01984 ProgramStateRef 01985 CStringChecker::checkRegionChanges(ProgramStateRef state, 01986 const InvalidatedSymbols *, 01987 ArrayRef<const MemRegion *> ExplicitRegions, 01988 ArrayRef<const MemRegion *> Regions, 01989 const CallEvent *Call) const { 01990 CStringLengthTy Entries = state->get<CStringLength>(); 01991 if (Entries.isEmpty()) 01992 return state; 01993 01994 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 01995 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 01996 01997 // First build sets for the changed regions and their super-regions. 01998 for (ArrayRef<const MemRegion *>::iterator 01999 I = Regions.begin(), E = Regions.end(); I != E; ++I) { 02000 const MemRegion *MR = *I; 02001 Invalidated.insert(MR); 02002 02003 SuperRegions.insert(MR); 02004 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 02005 MR = SR->getSuperRegion(); 02006 SuperRegions.insert(MR); 02007 } 02008 } 02009 02010 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 02011 02012 // Then loop over the entries in the current state. 02013 for (CStringLengthTy::iterator I = Entries.begin(), 02014 E = Entries.end(); I != E; ++I) { 02015 const MemRegion *MR = I.getKey(); 02016 02017 // Is this entry for a super-region of a changed region? 02018 if (SuperRegions.count(MR)) { 02019 Entries = F.remove(Entries, MR); 02020 continue; 02021 } 02022 02023 // Is this entry for a sub-region of a changed region? 02024 const MemRegion *Super = MR; 02025 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 02026 Super = SR->getSuperRegion(); 02027 if (Invalidated.count(Super)) { 02028 Entries = F.remove(Entries, MR); 02029 break; 02030 } 02031 } 02032 } 02033 02034 return state->set<CStringLength>(Entries); 02035 } 02036 02037 void CStringChecker::checkLiveSymbols(ProgramStateRef state, 02038 SymbolReaper &SR) const { 02039 // Mark all symbols in our string length map as valid. 02040 CStringLengthTy Entries = state->get<CStringLength>(); 02041 02042 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 02043 I != E; ++I) { 02044 SVal Len = I.getData(); 02045 02046 for (SymExpr::symbol_iterator si = Len.symbol_begin(), 02047 se = Len.symbol_end(); si != se; ++si) 02048 SR.markInUse(*si); 02049 } 02050 } 02051 02052 void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 02053 CheckerContext &C) const { 02054 if (!SR.hasDeadSymbols()) 02055 return; 02056 02057 ProgramStateRef state = C.getState(); 02058 CStringLengthTy Entries = state->get<CStringLength>(); 02059 if (Entries.isEmpty()) 02060 return; 02061 02062 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 02063 for (CStringLengthTy::iterator I = Entries.begin(), E = Entries.end(); 02064 I != E; ++I) { 02065 SVal Len = I.getData(); 02066 if (SymbolRef Sym = Len.getAsSymbol()) { 02067 if (SR.isDead(Sym)) 02068 Entries = F.remove(Entries, I.getKey()); 02069 } 02070 } 02071 02072 state = state->set<CStringLength>(Entries); 02073 C.addTransition(state); 02074 } 02075 02076 #define REGISTER_CHECKER(name) \ 02077 void ento::register##name(CheckerManager &mgr) { \ 02078 CStringChecker *checker = mgr.registerChecker<CStringChecker>(); \ 02079 checker->Filter.Check##name = true; \ 02080 checker->Filter.CheckName##name = mgr.getCurrentCheckName(); \ 02081 } 02082 02083 REGISTER_CHECKER(CStringNullArg) 02084 REGISTER_CHECKER(CStringOutOfBounds) 02085 REGISTER_CHECKER(CStringBufferOverlap) 02086 REGISTER_CHECKER(CStringNotNullTerm) 02087 02088 void ento::registerCStringCheckerBasic(CheckerManager &Mgr) { 02089 registerCStringNullArg(Mgr); 02090 }