clang 22.0.0git
CStringChecker.cpp
Go to the documentation of this file.
1//= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This defines CStringChecker, which is an assortment of checks on calls
10// to functions in <string.h>.
11//
12//===----------------------------------------------------------------------===//
13
14#include "InterCheckerAPI.h"
29#include "llvm/ADT/APSInt.h"
30#include "llvm/ADT/STLExtras.h"
31#include "llvm/ADT/StringExtras.h"
32#include "llvm/Support/raw_ostream.h"
33#include <functional>
34#include <optional>
35
36using namespace clang;
37using namespace ento;
38using namespace std::placeholders;
39
40namespace {
41struct AnyArgExpr {
42 const Expr *Expression;
43 unsigned ArgumentIndex;
44};
45struct SourceArgExpr : AnyArgExpr {};
46struct DestinationArgExpr : AnyArgExpr {};
47struct SizeArgExpr : AnyArgExpr {};
48
49using ErrorMessage = SmallString<128>;
50enum class AccessKind { write, read };
51
52static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
53 AccessKind Access) {
54 ErrorMessage Message;
55 llvm::raw_svector_ostream Os(Message);
56
57 // Function classification like: Memory copy function
58 Os << toUppercase(FunctionDescription.front())
59 << &FunctionDescription.data()[1];
60
61 if (Access == AccessKind::write) {
62 Os << " overflows the destination buffer";
63 } else { // read access
64 Os << " accesses out-of-bound array element";
65 }
66
67 return Message;
68}
69
70enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
71
72enum class CharKind { Regular = 0, Wide };
73constexpr CharKind CK_Regular = CharKind::Regular;
74constexpr CharKind CK_Wide = CharKind::Wide;
75
76static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
77 return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
78 : Ctx.WideCharTy);
79}
80
81class CStringChecker
82 : public CheckerFamily<eval::Call, check::PreStmt<DeclStmt>,
83 check::LiveSymbols, check::DeadSymbols,
84 check::RegionChanges> {
85 mutable const char *CurrentFunctionDescription = nullptr;
86
87public:
88 // FIXME: The bug types emitted by this checker family have confused garbage
89 // in their Description and Category fields (e.g. `categories::UnixAPI` is
90 // passed as the description in several cases and `uninitialized` is mistyped
91 // as `unitialized`). This should be cleaned up.
92 CheckerFrontendWithBugType NullArg{categories::UnixAPI};
93 CheckerFrontendWithBugType OutOfBounds{"Out-of-bound array access"};
94 CheckerFrontendWithBugType BufferOverlap{categories::UnixAPI,
95 "Improper arguments"};
96 CheckerFrontendWithBugType NotNullTerm{categories::UnixAPI};
97 CheckerFrontendWithBugType UninitializedRead{
98 "Accessing unitialized/garbage values"};
99
100 StringRef getDebugTag() const override { return "MallocChecker"; }
101
102 static void *getTag() { static int tag; return &tag; }
103
104 bool evalCall(const CallEvent &Call, CheckerContext &C) const;
105 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
106 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
107 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
108
110 checkRegionChanges(ProgramStateRef state,
111 const InvalidatedSymbols *,
112 ArrayRef<const MemRegion *> ExplicitRegions,
113 ArrayRef<const MemRegion *> Regions,
114 const LocationContext *LCtx,
115 const CallEvent *Call) const;
116
117 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
118 const CallEvent &)>;
119
120 CallDescriptionMap<FnCheck> Callbacks = {
121 {{CDM::CLibraryMaybeHardened, {"memcpy"}, 3},
122 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
123 {{CDM::CLibraryMaybeHardened, {"wmemcpy"}, 3},
124 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
125 {{CDM::CLibraryMaybeHardened, {"mempcpy"}, 3},
126 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
127 {{CDM::CLibraryMaybeHardened, {"wmempcpy"}, 3},
128 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
129 {{CDM::CLibrary, {"memcmp"}, 3},
130 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
131 {{CDM::CLibrary, {"wmemcmp"}, 3},
132 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
133 {{CDM::CLibraryMaybeHardened, {"memmove"}, 3},
134 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
135 {{CDM::CLibraryMaybeHardened, {"wmemmove"}, 3},
136 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
137 {{CDM::CLibraryMaybeHardened, {"memset"}, 3},
138 &CStringChecker::evalMemset},
139 {{CDM::CLibrary, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
140 // FIXME: C23 introduces 'memset_explicit', maybe also model that
141 {{CDM::CLibraryMaybeHardened, {"strcpy"}, 2},
142 &CStringChecker::evalStrcpy},
143 {{CDM::CLibraryMaybeHardened, {"strncpy"}, 3},
144 &CStringChecker::evalStrncpy},
145 {{CDM::CLibraryMaybeHardened, {"stpcpy"}, 2},
146 &CStringChecker::evalStpcpy},
147 {{CDM::CLibraryMaybeHardened, {"strlcpy"}, 3},
148 &CStringChecker::evalStrlcpy},
149 {{CDM::CLibraryMaybeHardened, {"strcat"}, 2},
150 &CStringChecker::evalStrcat},
151 {{CDM::CLibraryMaybeHardened, {"strncat"}, 3},
152 &CStringChecker::evalStrncat},
153 {{CDM::CLibraryMaybeHardened, {"strlcat"}, 3},
154 &CStringChecker::evalStrlcat},
155 {{CDM::CLibraryMaybeHardened, {"strlen"}, 1},
156 &CStringChecker::evalstrLength},
157 {{CDM::CLibrary, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
158 {{CDM::CLibraryMaybeHardened, {"strnlen"}, 2},
159 &CStringChecker::evalstrnLength},
160 {{CDM::CLibrary, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
161 {{CDM::CLibrary, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
162 {{CDM::CLibrary, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
163 {{CDM::CLibrary, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
164 {{CDM::CLibrary, {"strncasecmp"}, 3}, &CStringChecker::evalStrncasecmp},
165 {{CDM::CLibrary, {"strsep"}, 2}, &CStringChecker::evalStrsep},
166 {{CDM::CLibrary, {"strxfrm"}, 3}, &CStringChecker::evalStrxfrm},
167 {{CDM::CLibrary, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
168 {{CDM::CLibrary, {"bcmp"}, 3},
169 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
170 {{CDM::CLibrary, {"bzero"}, 2}, &CStringChecker::evalBzero},
171 {{CDM::CLibraryMaybeHardened, {"explicit_bzero"}, 2},
172 &CStringChecker::evalBzero},
173
174 // When recognizing calls to the following variadic functions, we accept
175 // any number of arguments in the call (std::nullopt = accept any
176 // number), but check that in the declaration there are 2 and 3
177 // parameters respectively. (Note that the parameter count does not
178 // include the "...". Calls where the number of arguments is too small
179 // will be discarded by the callback.)
180 {{CDM::CLibraryMaybeHardened, {"sprintf"}, std::nullopt, 2},
181 &CStringChecker::evalSprintf},
182 {{CDM::CLibraryMaybeHardened, {"snprintf"}, std::nullopt, 3},
183 &CStringChecker::evalSnprintf},
184 };
185
186 // These require a bit of special handling.
187 CallDescription StdCopy{CDM::SimpleFunc, {"std", "copy"}, 3},
188 StdCopyBackward{CDM::SimpleFunc, {"std", "copy_backward"}, 3};
189
190 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
191 void evalMemcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
192 void evalMempcpy(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
193 void evalMemmove(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
194 void evalBcopy(CheckerContext &C, const CallEvent &Call) const;
195 void evalCopyCommon(CheckerContext &C, const CallEvent &Call,
196 ProgramStateRef state, SizeArgExpr Size,
197 DestinationArgExpr Dest, SourceArgExpr Source,
198 bool Restricted, bool IsMempcpy, CharKind CK) const;
199
200 void evalMemcmp(CheckerContext &C, const CallEvent &Call, CharKind CK) const;
201
202 void evalstrLength(CheckerContext &C, const CallEvent &Call) const;
203 void evalstrnLength(CheckerContext &C, const CallEvent &Call) const;
204 void evalstrLengthCommon(CheckerContext &C, const CallEvent &Call,
205 bool IsStrnlen = false) const;
206
207 void evalStrcpy(CheckerContext &C, const CallEvent &Call) const;
208 void evalStrncpy(CheckerContext &C, const CallEvent &Call) const;
209 void evalStpcpy(CheckerContext &C, const CallEvent &Call) const;
210 void evalStrlcpy(CheckerContext &C, const CallEvent &Call) const;
211 void evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
212 bool ReturnEnd, bool IsBounded, ConcatFnKind appendK,
213 bool returnPtr = true) const;
214
215 void evalStrxfrm(CheckerContext &C, const CallEvent &Call) const;
216
217 void evalStrcat(CheckerContext &C, const CallEvent &Call) const;
218 void evalStrncat(CheckerContext &C, const CallEvent &Call) const;
219 void evalStrlcat(CheckerContext &C, const CallEvent &Call) const;
220
221 void evalStrcmp(CheckerContext &C, const CallEvent &Call) const;
222 void evalStrncmp(CheckerContext &C, const CallEvent &Call) const;
223 void evalStrcasecmp(CheckerContext &C, const CallEvent &Call) const;
224 void evalStrncasecmp(CheckerContext &C, const CallEvent &Call) const;
225 void evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
226 bool IsBounded = false, bool IgnoreCase = false) const;
227
228 void evalStrsep(CheckerContext &C, const CallEvent &Call) const;
229
230 void evalStdCopy(CheckerContext &C, const CallEvent &Call) const;
231 void evalStdCopyBackward(CheckerContext &C, const CallEvent &Call) const;
232 void evalStdCopyCommon(CheckerContext &C, const CallEvent &Call) const;
233 void evalMemset(CheckerContext &C, const CallEvent &Call) const;
234 void evalBzero(CheckerContext &C, const CallEvent &Call) const;
235
236 void evalSprintf(CheckerContext &C, const CallEvent &Call) const;
237 void evalSnprintf(CheckerContext &C, const CallEvent &Call) const;
238 void evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
239 bool IsBounded) const;
240
241 // Utility methods
242 std::pair<ProgramStateRef , ProgramStateRef >
243 static assumeZero(CheckerContext &C,
244 ProgramStateRef state, SVal V, QualType Ty);
245
246 static ProgramStateRef setCStringLength(ProgramStateRef state,
247 const MemRegion *MR,
248 SVal strLength);
249 static SVal getCStringLengthForRegion(CheckerContext &C,
250 ProgramStateRef &state,
251 const Expr *Ex,
252 const MemRegion *MR,
253 bool hypothetical);
254 SVal getCStringLength(CheckerContext &C,
255 ProgramStateRef &state,
256 const Expr *Ex,
257 SVal Buf,
258 bool hypothetical = false) const;
259
260 const StringLiteral *getCStringLiteral(CheckerContext &C,
261 ProgramStateRef &state,
262 const Expr *expr,
263 SVal val) const;
264
265 /// Invalidate the destination buffer determined by characters copied.
266 static ProgramStateRef
267 invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
268 const Expr *BufE, ConstCFGElementRef Elem,
269 SVal BufV, SVal SizeV, QualType SizeTy);
270
271 /// Operation never overflows, do not invalidate the super region.
272 static ProgramStateRef invalidateDestinationBufferNeverOverflows(
273 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV);
274
275 /// We do not know whether the operation can overflow (e.g. size is unknown),
276 /// invalidate the super region and escape related pointers.
277 static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
278 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV);
279
280 /// Invalidate the source buffer for escaping pointers.
281 static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
284 SVal BufV);
285
286 /// @param InvalidationTraitOperations Determine how to invlidate the
287 /// MemRegion by setting the invalidation traits. Return true to cause pointer
288 /// escape, or false otherwise.
289 static ProgramStateRef invalidateBufferAux(
290 CheckerContext &C, ProgramStateRef State, ConstCFGElementRef Elem, SVal V,
291 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
292 const MemRegion *)>
293 InvalidationTraitOperations);
294
295 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
296 const MemRegion *MR);
297
298 static bool memsetAux(const Expr *DstBuffer, ConstCFGElementRef Elem,
299 SVal CharE, const Expr *Size, CheckerContext &C,
300 ProgramStateRef &State);
301
302 // Re-usable checks
303 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
304 AnyArgExpr Arg, SVal l) const;
305 // Check whether the origin region behind \p Element (like the actual array
306 // region \p Element is from) is initialized.
307 ProgramStateRef checkInit(CheckerContext &C, ProgramStateRef state,
308 AnyArgExpr Buffer, SVal Element, SVal Size) const;
309 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
310 AnyArgExpr Buffer, SVal Element,
311 AccessKind Access,
312 CharKind CK = CharKind::Regular) const;
313 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
314 AnyArgExpr Buffer, SizeArgExpr Size,
315 AccessKind Access,
316 CharKind CK = CharKind::Regular) const;
317 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
318 SizeArgExpr Size, AnyArgExpr First,
319 AnyArgExpr Second,
320 CharKind CK = CharKind::Regular) const;
321 void emitOverlapBug(CheckerContext &C,
322 ProgramStateRef state,
323 const Stmt *First,
324 const Stmt *Second) const;
325
326 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
327 StringRef WarningMsg) const;
328 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
329 const Stmt *S, StringRef WarningMsg) const;
330 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
331 const Stmt *S, StringRef WarningMsg) const;
332 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
333 const Expr *E, const MemRegion *R,
334 StringRef Msg) const;
335 ProgramStateRef checkAdditionOverflow(CheckerContext &C,
336 ProgramStateRef state,
337 NonLoc left,
338 NonLoc right) const;
339
340 // Return true if the destination buffer of the copy function may be in bound.
341 // Expects SVal of Size to be positive and unsigned.
342 // Expects SVal of FirstBuf to be a FieldRegion.
343 static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
344 SVal BufVal, QualType BufTy, SVal LengthVal,
345 QualType LengthTy);
346};
347
348} //end anonymous namespace
349
350REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
351
352//===----------------------------------------------------------------------===//
353// Individual checks and utility methods.
354//===----------------------------------------------------------------------===//
355
356std::pair<ProgramStateRef, ProgramStateRef>
357CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef State, SVal V,
358 QualType Ty) {
359 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
360 if (!val)
361 return std::pair<ProgramStateRef, ProgramStateRef>(State, State);
362
363 SValBuilder &svalBuilder = C.getSValBuilder();
364 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
365 return State->assume(svalBuilder.evalEQ(State, *val, zero));
366}
367
368ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
369 ProgramStateRef State,
370 AnyArgExpr Arg, SVal l) const {
371 // If a previous check has failed, propagate the failure.
372 if (!State)
373 return nullptr;
374
375 ProgramStateRef stateNull, stateNonNull;
376 std::tie(stateNull, stateNonNull) =
377 assumeZero(C, State, l, Arg.Expression->getType());
378
379 if (stateNull && !stateNonNull) {
380 if (NullArg.isEnabled()) {
381 SmallString<80> buf;
382 llvm::raw_svector_ostream OS(buf);
383 assert(CurrentFunctionDescription);
384 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
385 << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
386 << CurrentFunctionDescription;
387
388 emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
389 }
390 return nullptr;
391 }
392
393 // From here on, assume that the value is non-null.
394 assert(stateNonNull);
395 return stateNonNull;
396}
397
398static std::optional<NonLoc> getIndex(ProgramStateRef State,
399 const ElementRegion *ER, CharKind CK) {
401 ASTContext &Ctx = SVB.getContext();
402
403 if (CK == CharKind::Regular) {
404 if (ER->getValueType() != Ctx.CharTy)
405 return {};
406 return ER->getIndex();
407 }
408
409 if (ER->getValueType() != Ctx.WideCharTy)
410 return {};
411
412 QualType SizeTy = Ctx.getSizeType();
413 NonLoc WideSize =
415 SizeTy)
416 .castAs<NonLoc>();
417 SVal Offset =
418 SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
419 if (Offset.isUnknown())
420 return {};
421 return Offset.castAs<NonLoc>();
422}
423
424// Basically 1 -> 1st, 12 -> 12th, etc.
425static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
426 Os << Idx << llvm::getOrdinalSuffix(Idx);
427}
428
429ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
430 ProgramStateRef State,
431 AnyArgExpr Buffer, SVal Element,
432 SVal Size) const {
433
434 // If a previous check has failed, propagate the failure.
435 if (!State)
436 return nullptr;
437
438 const MemRegion *R = Element.getAsRegion();
439 const auto *ER = dyn_cast_or_null<ElementRegion>(R);
440 if (!ER)
441 return State;
442
443 const auto *SuperR = ER->getSuperRegion()->getAs<TypedValueRegion>();
444 if (!SuperR)
445 return State;
446
447 // FIXME: We ought to able to check objects as well. Maybe
448 // UninitializedObjectChecker could help?
449 if (!SuperR->getValueType()->isArrayType())
450 return State;
451
452 SValBuilder &SVB = C.getSValBuilder();
453 ASTContext &Ctx = SVB.getContext();
454
455 const QualType ElemTy = Ctx.getBaseElementType(SuperR->getValueType());
456 const NonLoc Zero = SVB.makeZeroArrayIndex();
457
458 std::optional<Loc> FirstElementVal =
459 State->getLValue(ElemTy, Zero, loc::MemRegionVal(SuperR)).getAs<Loc>();
460 if (!FirstElementVal)
461 return State;
462
463 // Ensure that we wouldn't read uninitialized value.
464 if (UninitializedRead.isEnabled() &&
465 State->getSVal(*FirstElementVal).isUndef()) {
466 llvm::SmallString<258> Buf;
467 llvm::raw_svector_ostream OS(Buf);
468 OS << "The first element of the ";
469 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
470 OS << " argument is undefined";
471 emitUninitializedReadBug(C, State, Buffer.Expression,
472 FirstElementVal->getAsRegion(), OS.str());
473 return nullptr;
474 }
475
476 // We won't check whether the entire region is fully initialized -- lets just
477 // check that the first and the last element is. So, onto checking the last
478 // element:
479 const QualType IdxTy = SVB.getArrayIndexType();
480
481 NonLoc ElemSize =
482 SVB.makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
483 .castAs<NonLoc>();
484
485 // FIXME: Check that the size arg to the cstring function is divisible by
486 // size of the actual element type?
487
488 // The type of the argument to the cstring function is either char or wchar,
489 // but thats not the type of the original array (or memory region).
490 // Suppose the following:
491 // int t[5];
492 // memcpy(dst, t, sizeof(t) / sizeof(t[0]));
493 // When checking whether t is fully initialized, we see it as char array of
494 // size sizeof(int)*5. If we check the last element as a character, we read
495 // the last byte of an integer, which will be undefined. But just because
496 // that value is undefined, it doesn't mean that the element is uninitialized!
497 // For this reason, we need to retrieve the actual last element with the
498 // correct type.
499
500 // Divide the size argument to the cstring function by the actual element
501 // type. This value will be size of the array, or the index to the
502 // past-the-end element.
503 std::optional<NonLoc> Offset =
504 SVB.evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
505 IdxTy)
506 .getAs<NonLoc>();
507
508 // Retrieve the index of the last element.
509 const NonLoc One = SVB.makeIntVal(1, IdxTy).castAs<NonLoc>();
510 SVal LastIdx = SVB.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
511
512 if (!Offset)
513 return State;
514
515 SVal LastElementVal =
516 State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(SuperR));
517 if (!isa<Loc>(LastElementVal))
518 return State;
519
520 if (UninitializedRead.isEnabled() &&
521 State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
522 const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
523 // If we can't get emit a sensible last element index, just bail out --
524 // prefer to emit nothing in favour of emitting garbage quality reports.
525 if (!IdxInt) {
526 C.addSink();
527 return nullptr;
528 }
529 llvm::SmallString<258> Buf;
530 llvm::raw_svector_ostream OS(Buf);
531 OS << "The last accessed element (at index ";
532 OS << IdxInt->getExtValue();
533 OS << ") in the ";
534 printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
535 OS << " argument is undefined";
536 emitUninitializedReadBug(C, State, Buffer.Expression,
537 LastElementVal.getAsRegion(), OS.str());
538 return nullptr;
539 }
540 return State;
541}
542// FIXME: The root of this logic was copied from the old checker
543// alpha.security.ArrayBound (which is removed within this commit).
544// It should be refactored to use the different, more sophisticated bounds
545// checking logic used by the new checker ``security.ArrayBound``.
546ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
547 ProgramStateRef state,
548 AnyArgExpr Buffer, SVal Element,
549 AccessKind Access,
550 CharKind CK) const {
551
552 // If a previous check has failed, propagate the failure.
553 if (!state)
554 return nullptr;
555
556 // Check for out of bound array element access.
557 const MemRegion *R = Element.getAsRegion();
558 if (!R)
559 return state;
560
561 const auto *ER = dyn_cast<ElementRegion>(R);
562 if (!ER)
563 return state;
564
565 // Get the index of the accessed element.
566 std::optional<NonLoc> Idx = getIndex(state, ER, CK);
567 if (!Idx)
568 return state;
569
570 // Get the size of the array.
571 const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
572 DefinedOrUnknownSVal Size =
573 getDynamicExtent(state, superReg, C.getSValBuilder());
574
575 auto [StInBound, StOutBound] = state->assumeInBoundDual(*Idx, Size);
576 if (StOutBound && !StInBound) {
577 if (!OutOfBounds.isEnabled())
578 return nullptr;
579
580 ErrorMessage Message =
581 createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
582 emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
583 return nullptr;
584 }
585
586 // Array bound check succeeded. From this point forward the array bound
587 // should always succeed.
588 return StInBound;
589}
590
592CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
593 AnyArgExpr Buffer, SizeArgExpr Size,
594 AccessKind Access, CharKind CK) const {
595 // If a previous check has failed, propagate the failure.
596 if (!State)
597 return nullptr;
598
599 SValBuilder &svalBuilder = C.getSValBuilder();
600 ASTContext &Ctx = svalBuilder.getContext();
601
602 QualType SizeTy = Size.Expression->getType();
603 QualType PtrTy = getCharPtrType(Ctx, CK);
604
605 // Check that the first buffer is non-null.
606 SVal BufVal = C.getSVal(Buffer.Expression);
607 State = checkNonNull(C, State, Buffer, BufVal);
608 if (!State)
609 return nullptr;
610
611 // If out-of-bounds checking is turned off, skip the rest.
612 if (!OutOfBounds.isEnabled())
613 return State;
614
615 SVal BufStart =
616 svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
617
618 // Check if the first byte of the buffer is accessible.
619 State = CheckLocation(C, State, Buffer, BufStart, Access, CK);
620
621 if (!State)
622 return nullptr;
623
624 // Get the access length and make sure it is known.
625 // FIXME: This assumes the caller has already checked that the access length
626 // is positive. And that it's unsigned.
627 SVal LengthVal = C.getSVal(Size.Expression);
628 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
629 if (!Length)
630 return State;
631
632 // Compute the offset of the last element to be accessed: size-1.
633 NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
634 SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
635 if (Offset.isUnknown())
636 return nullptr;
637 NonLoc LastOffset = Offset.castAs<NonLoc>();
638
639 // Check that the first buffer is sufficiently long.
640 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
641
642 SVal BufEnd =
643 svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
644 State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
645 if (Access == AccessKind::read)
646 State = checkInit(C, State, Buffer, BufEnd, *Length);
647
648 // If the buffer isn't large enough, abort.
649 if (!State)
650 return nullptr;
651 }
652
653 // Large enough or not, return this state!
654 return State;
655}
656
657ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
658 ProgramStateRef state,
659 SizeArgExpr Size, AnyArgExpr First,
660 AnyArgExpr Second,
661 CharKind CK) const {
662 if (!BufferOverlap.isEnabled())
663 return state;
664
665 // Do a simple check for overlap: if the two arguments are from the same
666 // buffer, see if the end of the first is greater than the start of the second
667 // or vice versa.
668
669 // If a previous check has failed, propagate the failure.
670 if (!state)
671 return nullptr;
672
673 ProgramStateRef stateTrue, stateFalse;
674
675 // Assume different address spaces cannot overlap.
676 if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
677 Second.Expression->getType()->getPointeeType().getAddressSpace())
678 return state;
679
680 // Get the buffer values and make sure they're known locations.
681 const LocationContext *LCtx = C.getLocationContext();
682 SVal firstVal = state->getSVal(First.Expression, LCtx);
683 SVal secondVal = state->getSVal(Second.Expression, LCtx);
684
685 std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
686 if (!firstLoc)
687 return state;
688
689 std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
690 if (!secondLoc)
691 return state;
692
693 // Are the two values the same?
694 SValBuilder &svalBuilder = C.getSValBuilder();
695 std::tie(stateTrue, stateFalse) =
696 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
697
698 if (stateTrue && !stateFalse) {
699 // If the values are known to be equal, that's automatically an overlap.
700 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
701 return nullptr;
702 }
703
704 // assume the two expressions are not equal.
705 assert(stateFalse);
706 state = stateFalse;
707
708 // Which value comes first?
709 QualType cmpTy = svalBuilder.getConditionType();
710 SVal reverse =
711 svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
712 std::optional<DefinedOrUnknownSVal> reverseTest =
713 reverse.getAs<DefinedOrUnknownSVal>();
714 if (!reverseTest)
715 return state;
716
717 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
718 if (stateTrue) {
719 if (stateFalse) {
720 // If we don't know which one comes first, we can't perform this test.
721 return state;
722 } else {
723 // Switch the values so that firstVal is before secondVal.
724 std::swap(firstLoc, secondLoc);
725
726 // Switch the Exprs as well, so that they still correspond.
727 std::swap(First, Second);
728 }
729 }
730
731 // Get the length, and make sure it too is known.
732 SVal LengthVal = state->getSVal(Size.Expression, LCtx);
733 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
734 if (!Length)
735 return state;
736
737 // Convert the first buffer's start address to char*.
738 // Bail out if the cast fails.
739 ASTContext &Ctx = svalBuilder.getContext();
740 QualType CharPtrTy = getCharPtrType(Ctx, CK);
741 SVal FirstStart =
742 svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
743 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
744 if (!FirstStartLoc)
745 return state;
746
747 // Compute the end of the first buffer. Bail out if THAT fails.
748 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
749 *Length, CharPtrTy);
750 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
751 if (!FirstEndLoc)
752 return state;
753
754 // Is the end of the first buffer past the start of the second buffer?
755 SVal Overlap =
756 svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
757 std::optional<DefinedOrUnknownSVal> OverlapTest =
758 Overlap.getAs<DefinedOrUnknownSVal>();
759 if (!OverlapTest)
760 return state;
761
762 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
763
764 if (stateTrue && !stateFalse) {
765 // Overlap!
766 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
767 return nullptr;
768 }
769
770 // assume the two expressions don't overlap.
771 assert(stateFalse);
772 return stateFalse;
773}
774
775void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
776 const Stmt *First, const Stmt *Second) const {
777 ExplodedNode *N = C.generateErrorNode(state);
778 if (!N)
779 return;
780
781 // Generate a report for this bug.
782 auto report = std::make_unique<PathSensitiveBugReport>(
783 BufferOverlap, "Arguments must not be overlapping buffers", N);
784 report->addRange(First->getSourceRange());
785 report->addRange(Second->getSourceRange());
786
787 C.emitReport(std::move(report));
788}
789
790void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
791 const Stmt *S, StringRef WarningMsg) const {
792 if (ExplodedNode *N = C.generateErrorNode(State)) {
793 auto Report =
794 std::make_unique<PathSensitiveBugReport>(NullArg, WarningMsg, N);
795 Report->addRange(S->getSourceRange());
796 if (const auto *Ex = dyn_cast<Expr>(S))
798 C.emitReport(std::move(Report));
799 }
800}
801
802void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
803 ProgramStateRef State,
804 const Expr *E, const MemRegion *R,
805 StringRef Msg) const {
806 if (ExplodedNode *N = C.generateErrorNode(State)) {
807 auto Report =
808 std::make_unique<PathSensitiveBugReport>(UninitializedRead, Msg, N);
809 Report->addNote("Other elements might also be undefined",
810 Report->getLocation());
811 Report->addRange(E->getSourceRange());
813 Report->addVisitor<NoStoreFuncVisitor>(R->castAs<SubRegion>());
814 C.emitReport(std::move(Report));
815 }
816}
817
818void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
819 ProgramStateRef State, const Stmt *S,
820 StringRef WarningMsg) const {
821 if (ExplodedNode *N = C.generateErrorNode(State)) {
822 // FIXME: It would be nice to eventually make this diagnostic more clear,
823 // e.g., by referencing the original declaration or by saying *why* this
824 // reference is outside the range.
825 auto Report =
826 std::make_unique<PathSensitiveBugReport>(OutOfBounds, WarningMsg, N);
827 Report->addRange(S->getSourceRange());
828 C.emitReport(std::move(Report));
829 }
830}
831
832void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
833 const Stmt *S,
834 StringRef WarningMsg) const {
835 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
836 auto Report =
837 std::make_unique<PathSensitiveBugReport>(NotNullTerm, WarningMsg, N);
838
839 Report->addRange(S->getSourceRange());
840 C.emitReport(std::move(Report));
841 }
842}
843
844ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
845 ProgramStateRef state,
846 NonLoc left,
847 NonLoc right) const {
848 // If out-of-bounds checking is turned off, skip the rest.
849 if (!OutOfBounds.isEnabled())
850 return state;
851
852 // If a previous check has failed, propagate the failure.
853 if (!state)
854 return nullptr;
855
856 SValBuilder &svalBuilder = C.getSValBuilder();
857 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
858
859 QualType sizeTy = svalBuilder.getContext().getSizeType();
860 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
861 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
862
863 SVal maxMinusRight;
864 if (isa<nonloc::ConcreteInt>(right)) {
865 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
866 sizeTy);
867 } else {
868 // Try switching the operands. (The order of these two assignments is
869 // important!)
870 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
871 sizeTy);
872 left = right;
873 }
874
875 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
876 QualType cmpTy = svalBuilder.getConditionType();
877 // If left > max - right, we have an overflow.
878 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
879 *maxMinusRightNL, cmpTy);
880
881 auto [StateOverflow, StateOkay] =
882 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
883
884 if (StateOverflow && !StateOkay) {
885 // On this path the analyzer is convinced that the addition of these two
886 // values would overflow `size_t` which must be caused by the inaccuracy
887 // of our modeling because this method is called in situations where the
888 // summands are size/length values which are much less than SIZE_MAX. To
889 // avoid false positives let's just sink this invalid path.
890 C.addSink(StateOverflow);
891 return nullptr;
892 }
893
894 // From now on, assume an overflow didn't occur.
895 assert(StateOkay);
896 state = StateOkay;
897 }
898
899 return state;
900}
901
902ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
903 const MemRegion *MR,
904 SVal strLength) {
905 assert(!strLength.isUndef() && "Attempt to set an undefined string length");
906
907 MR = MR->StripCasts();
908
909 switch (MR->getKind()) {
910 case MemRegion::StringRegionKind:
911 // FIXME: This can happen if we strcpy() into a string region. This is
912 // undefined [C99 6.4.5p6], but we should still warn about it.
913 return state;
914
915 case MemRegion::SymbolicRegionKind:
916 case MemRegion::AllocaRegionKind:
917 case MemRegion::NonParamVarRegionKind:
918 case MemRegion::ParamVarRegionKind:
919 case MemRegion::FieldRegionKind:
920 case MemRegion::ObjCIvarRegionKind:
921 // These are the types we can currently track string lengths for.
922 break;
923
924 case MemRegion::ElementRegionKind:
925 // FIXME: Handle element regions by upper-bounding the parent region's
926 // string length.
927 return state;
928
929 default:
930 // Other regions (mostly non-data) can't have a reliable C string length.
931 // For now, just ignore the change.
932 // FIXME: These are rare but not impossible. We should output some kind of
933 // warning for things like strcpy((char[]){'a', 0}, "b");
934 return state;
935 }
936
937 if (strLength.isUnknown())
938 return state->remove<CStringLength>(MR);
939
940 return state->set<CStringLength>(MR, strLength);
941}
942
943SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
944 ProgramStateRef &state,
945 const Expr *Ex,
946 const MemRegion *MR,
947 bool hypothetical) {
948 if (!hypothetical) {
949 // If there's a recorded length, go ahead and return it.
950 const SVal *Recorded = state->get<CStringLength>(MR);
951 if (Recorded)
952 return *Recorded;
953 }
954
955 // Otherwise, get a new symbol and update the state.
956 SValBuilder &svalBuilder = C.getSValBuilder();
957 QualType sizeTy = svalBuilder.getContext().getSizeType();
958 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
959 MR, Ex, sizeTy,
960 C.getLocationContext(),
961 C.blockCount());
962
963 if (!hypothetical) {
964 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
965 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
966 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
967 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
968 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
969 std::optional<APSIntPtr> maxLengthInt =
970 BVF.evalAPSInt(BO_Div, maxValInt, fourInt);
971 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
972 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, maxLength,
973 svalBuilder.getConditionType());
974 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
975 }
976 state = state->set<CStringLength>(MR, strLength);
977 }
978
979 return strLength;
980}
981
982SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
983 const Expr *Ex, SVal Buf,
984 bool hypothetical) const {
985 const MemRegion *MR = Buf.getAsRegion();
986 if (!MR) {
987 // If we can't get a region, see if it's something we /know/ isn't a
988 // C string. In the context of locations, the only time we can issue such
989 // a warning is for labels.
990 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
991 if (NotNullTerm.isEnabled()) {
992 SmallString<120> buf;
993 llvm::raw_svector_ostream os(buf);
994 assert(CurrentFunctionDescription);
995 os << "Argument to " << CurrentFunctionDescription
996 << " is the address of the label '" << Label->getLabel()->getName()
997 << "', which is not a null-terminated string";
998
999 emitNotCStringBug(C, state, Ex, os.str());
1000 }
1001 return UndefinedVal();
1002 }
1003
1004 // If it's not a region and not a label, give up.
1005 return UnknownVal();
1006 }
1007
1008 // If we have a region, strip casts from it and see if we can figure out
1009 // its length. For anything we can't figure out, just return UnknownVal.
1010 MR = MR->StripCasts();
1011
1012 switch (MR->getKind()) {
1013 case MemRegion::StringRegionKind: {
1014 // Modifying the contents of string regions is undefined [C99 6.4.5p6],
1015 // so we can assume that the byte length is the correct C string length.
1016 SValBuilder &svalBuilder = C.getSValBuilder();
1017 QualType sizeTy = svalBuilder.getContext().getSizeType();
1018 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
1019 return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
1020 }
1021 case MemRegion::NonParamVarRegionKind: {
1022 // If we have a global constant with a string literal initializer,
1023 // compute the initializer's length.
1024 const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl();
1025 if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) {
1026 if (const Expr *Init = Decl->getInit()) {
1027 if (auto *StrLit = dyn_cast<StringLiteral>(Init)) {
1028 SValBuilder &SvalBuilder = C.getSValBuilder();
1029 QualType SizeTy = SvalBuilder.getContext().getSizeType();
1030 return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy);
1031 }
1032 }
1033 }
1034 [[fallthrough]];
1035 }
1036 case MemRegion::SymbolicRegionKind:
1037 case MemRegion::AllocaRegionKind:
1038 case MemRegion::ParamVarRegionKind:
1039 case MemRegion::FieldRegionKind:
1040 case MemRegion::ObjCIvarRegionKind:
1041 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
1042 case MemRegion::CompoundLiteralRegionKind:
1043 // FIXME: Can we track this? Is it necessary?
1044 return UnknownVal();
1045 case MemRegion::ElementRegionKind:
1046 // FIXME: How can we handle this? It's not good enough to subtract the
1047 // offset from the base string length; consider "123\x00567" and &a[5].
1048 return UnknownVal();
1049 default:
1050 // Other regions (mostly non-data) can't have a reliable C string length.
1051 // In this case, an error is emitted and UndefinedVal is returned.
1052 // The caller should always be prepared to handle this case.
1053 if (NotNullTerm.isEnabled()) {
1054 SmallString<120> buf;
1055 llvm::raw_svector_ostream os(buf);
1056
1057 assert(CurrentFunctionDescription);
1058 os << "Argument to " << CurrentFunctionDescription << " is ";
1059
1060 if (SummarizeRegion(os, C.getASTContext(), MR))
1061 os << ", which is not a null-terminated string";
1062 else
1063 os << "not a null-terminated string";
1064
1065 emitNotCStringBug(C, state, Ex, os.str());
1066 }
1067 return UndefinedVal();
1068 }
1069}
1070
1071const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
1072 ProgramStateRef &state, const Expr *expr, SVal val) const {
1073
1074 // Get the memory region pointed to by the val.
1075 const MemRegion *bufRegion = val.getAsRegion();
1076 if (!bufRegion)
1077 return nullptr;
1078
1079 // Strip casts off the memory region.
1080 bufRegion = bufRegion->StripCasts();
1081
1082 // Cast the memory region to a string region.
1083 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
1084 if (!strRegion)
1085 return nullptr;
1086
1087 // Return the actual string in the string region.
1088 return strRegion->getStringLiteral();
1089}
1090
1091bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
1092 SVal BufVal, QualType BufTy,
1093 SVal LengthVal, QualType LengthTy) {
1094 // If we do not know that the buffer is long enough we return 'true'.
1095 // Otherwise the parent region of this field region would also get
1096 // invalidated, which would lead to warnings based on an unknown state.
1097
1098 if (LengthVal.isUnknown())
1099 return false;
1100
1101 // Originally copied from CheckBufferAccess and CheckLocation.
1102 SValBuilder &SB = C.getSValBuilder();
1103 ASTContext &Ctx = C.getASTContext();
1104
1105 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
1106
1107 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
1108 if (!Length)
1109 return true; // cf top comment.
1110
1111 // Compute the offset of the last element to be accessed: size-1.
1112 NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1113 SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
1114 if (Offset.isUnknown())
1115 return true; // cf top comment
1116 NonLoc LastOffset = Offset.castAs<NonLoc>();
1117
1118 // Check that the first buffer is sufficiently long.
1119 SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
1120 std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1121 if (!BufLoc)
1122 return true; // cf top comment.
1123
1124 SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
1125
1126 // Check for out of bound array element access.
1127 const MemRegion *R = BufEnd.getAsRegion();
1128 if (!R)
1129 return true; // cf top comment.
1130
1131 const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1132 if (!ER)
1133 return true; // cf top comment.
1134
1135 // Support library functions defined with non-default address spaces
1136 assert(ER->getValueType()->getCanonicalTypeUnqualified() ==
1137 C.getASTContext().CharTy &&
1138 "isFirstBufInBound should only be called with char* ElementRegions");
1139
1140 // Get the size of the array.
1141 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1142 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
1143
1144 // Get the index of the accessed element.
1145 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1146
1147 ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
1148
1149 return static_cast<bool>(StInBound);
1150}
1151
1152ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1153 CheckerContext &C, ProgramStateRef S, const Expr *BufE,
1154 ConstCFGElementRef Elem, SVal BufV, SVal SizeV, QualType SizeTy) {
1155 auto InvalidationTraitOperations =
1156 [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1157 SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1158 // If destination buffer is a field region and access is in bound, do
1159 // not invalidate its super region.
1160 if (MemRegion::FieldRegionKind == R->getKind() &&
1161 isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1162 ITraits.setTrait(
1163 R,
1165 }
1166 return false;
1167 };
1168
1169 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1170}
1171
1173CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1174 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV) {
1175 auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1176 const MemRegion *R) {
1177 return isa<FieldRegion>(R);
1178 };
1179
1180 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1181}
1182
1183ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1184 CheckerContext &C, ProgramStateRef S, ConstCFGElementRef Elem, SVal BufV) {
1185 auto InvalidationTraitOperations =
1186 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1187 if (MemRegion::FieldRegionKind == R->getKind())
1188 ITraits.setTrait(
1189 R,
1191 return false;
1192 };
1193
1194 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1195}
1196
1197ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1199 ConstCFGElementRef Elem,
1200 SVal BufV) {
1201 auto InvalidationTraitOperations =
1202 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1203 ITraits.setTrait(
1204 R->getBaseRegion(),
1206 ITraits.setTrait(R,
1208 return true;
1209 };
1210
1211 return invalidateBufferAux(C, S, Elem, BufV, InvalidationTraitOperations);
1212}
1213
1214ProgramStateRef CStringChecker::invalidateBufferAux(
1215 CheckerContext &C, ProgramStateRef State, ConstCFGElementRef Elem, SVal V,
1216 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1217 const MemRegion *)>
1218 InvalidationTraitOperations) {
1219 std::optional<Loc> L = V.getAs<Loc>();
1220 if (!L)
1221 return State;
1222
1223 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1224 // some assumptions about the value that CFRefCount can't. Even so, it should
1225 // probably be refactored.
1226 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1227 const MemRegion *R = MR->getRegion()->StripCasts();
1228
1229 // Are we dealing with an ElementRegion? If so, we should be invalidating
1230 // the super-region.
1231 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1232 R = ER->getSuperRegion();
1233 // FIXME: What about layers of ElementRegions?
1234 }
1235
1236 // Invalidate this region.
1237 const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1238 RegionAndSymbolInvalidationTraits ITraits;
1239 bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1240
1241 return State->invalidateRegions(R, Elem, C.blockCount(), LCtx,
1242 CausesPointerEscape, nullptr, nullptr,
1243 &ITraits);
1244 }
1245
1246 // If we have a non-region value by chance, just remove the binding.
1247 // FIXME: is this necessary or correct? This handles the non-Region
1248 // cases. Is it ever valid to store to these?
1249 return State->killBinding(*L);
1250}
1251
1252bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1253 const MemRegion *MR) {
1254 switch (MR->getKind()) {
1255 case MemRegion::FunctionCodeRegionKind: {
1256 if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1257 os << "the address of the function '" << *FD << '\'';
1258 else
1259 os << "the address of a function";
1260 return true;
1261 }
1262 case MemRegion::BlockCodeRegionKind:
1263 os << "block text";
1264 return true;
1265 case MemRegion::BlockDataRegionKind:
1266 os << "a block";
1267 return true;
1268 case MemRegion::CXXThisRegionKind:
1269 case MemRegion::CXXTempObjectRegionKind:
1270 os << "a C++ temp object of type "
1271 << cast<TypedValueRegion>(MR)->getValueType();
1272 return true;
1273 case MemRegion::NonParamVarRegionKind:
1274 os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1275 return true;
1276 case MemRegion::ParamVarRegionKind:
1277 os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1278 return true;
1279 case MemRegion::FieldRegionKind:
1280 os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1281 return true;
1282 case MemRegion::ObjCIvarRegionKind:
1283 os << "an instance variable of type "
1284 << cast<TypedValueRegion>(MR)->getValueType();
1285 return true;
1286 default:
1287 return false;
1288 }
1289}
1290
1291bool CStringChecker::memsetAux(const Expr *DstBuffer, ConstCFGElementRef Elem,
1292 SVal CharVal, const Expr *Size,
1293 CheckerContext &C, ProgramStateRef &State) {
1294 SVal MemVal = C.getSVal(DstBuffer);
1295 SVal SizeVal = C.getSVal(Size);
1296 const MemRegion *MR = MemVal.getAsRegion();
1297 if (!MR)
1298 return false;
1299
1300 // We're about to model memset by producing a "default binding" in the Store.
1301 // Our current implementation - RegionStore - doesn't support default bindings
1302 // that don't cover the whole base region. So we should first get the offset
1303 // and the base region to figure out whether the offset of buffer is 0.
1304 RegionOffset Offset = MR->getAsOffset();
1305 const MemRegion *BR = Offset.getRegion();
1306
1307 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1308 if (!SizeNL)
1309 return false;
1310
1311 SValBuilder &svalBuilder = C.getSValBuilder();
1312 ASTContext &Ctx = C.getASTContext();
1313
1314 // void *memset(void *dest, int ch, size_t count);
1315 // For now we can only handle the case of offset is 0 and concrete char value.
1316 if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1317 Offset.getOffset() == 0) {
1318 // Get the base region's size.
1319 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1320
1321 ProgramStateRef StateWholeReg, StateNotWholeReg;
1322 std::tie(StateWholeReg, StateNotWholeReg) =
1323 State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1324
1325 // With the semantic of 'memset()', we should convert the CharVal to
1326 // unsigned char.
1327 CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1328
1329 ProgramStateRef StateNullChar, StateNonNullChar;
1330 std::tie(StateNullChar, StateNonNullChar) =
1331 assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1332
1333 if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1334 !StateNonNullChar) {
1335 // If the 'memset()' acts on the whole region of destination buffer and
1336 // the value of the second argument of 'memset()' is zero, bind the second
1337 // argument's value to the destination buffer with 'default binding'.
1338 // FIXME: Since there is no perfect way to bind the non-zero character, we
1339 // can only deal with zero value here. In the future, we need to deal with
1340 // the binding of non-zero value in the case of whole region.
1341 State = State->bindDefaultZero(svalBuilder.makeLoc(BR),
1342 C.getLocationContext());
1343 } else {
1344 // If the destination buffer's extent is not equal to the value of
1345 // third argument, just invalidate buffer.
1346 State = invalidateDestinationBufferBySize(
1347 C, State, DstBuffer, Elem, MemVal, SizeVal, Size->getType());
1348 }
1349
1350 if (StateNullChar && !StateNonNullChar) {
1351 // If the value of the second argument of 'memset()' is zero, set the
1352 // string length of destination buffer to 0 directly.
1353 State = setCStringLength(State, MR,
1354 svalBuilder.makeZeroVal(Ctx.getSizeType()));
1355 } else if (!StateNullChar && StateNonNullChar) {
1356 SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1357 CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1358 C.getLocationContext(), C.blockCount());
1359
1360 // If the value of second argument is not zero, then the string length
1361 // is at least the size argument.
1362 SVal NewStrLenGESize = svalBuilder.evalBinOp(
1363 State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1364
1365 State = setCStringLength(
1366 State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1367 MR, NewStrLen);
1368 }
1369 } else {
1370 // If the offset is not zero and char value is not concrete, we can do
1371 // nothing but invalidate the buffer.
1372 State = invalidateDestinationBufferBySize(C, State, DstBuffer, Elem, MemVal,
1373 SizeVal, Size->getType());
1374 }
1375 return true;
1376}
1377
1378//===----------------------------------------------------------------------===//
1379// evaluation of individual function calls.
1380//===----------------------------------------------------------------------===//
1381
1382void CStringChecker::evalCopyCommon(CheckerContext &C, const CallEvent &Call,
1383 ProgramStateRef state, SizeArgExpr Size,
1384 DestinationArgExpr Dest,
1385 SourceArgExpr Source, bool Restricted,
1386 bool IsMempcpy, CharKind CK) const {
1387 CurrentFunctionDescription = "memory copy function";
1388
1389 // See if the size argument is zero.
1390 const LocationContext *LCtx = C.getLocationContext();
1391 SVal sizeVal = state->getSVal(Size.Expression, LCtx);
1392 QualType sizeTy = Size.Expression->getType();
1393
1394 ProgramStateRef stateZeroSize, stateNonZeroSize;
1395 std::tie(stateZeroSize, stateNonZeroSize) =
1396 assumeZero(C, state, sizeVal, sizeTy);
1397
1398 // Get the value of the Dest.
1399 SVal destVal = state->getSVal(Dest.Expression, LCtx);
1400
1401 // If the size is zero, there won't be any actual memory access, so
1402 // just bind the return value to the destination buffer and return.
1403 if (stateZeroSize && !stateNonZeroSize) {
1404 stateZeroSize =
1405 stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1406 C.addTransition(stateZeroSize);
1407 return;
1408 }
1409
1410 // If the size can be nonzero, we have to check the other arguments.
1411 if (stateNonZeroSize) {
1412 // TODO: If Size is tainted and we cannot prove that it is smaller or equal
1413 // to the size of the destination buffer, then emit a warning
1414 // that an attacker may provoke a buffer overflow error.
1415 state = stateNonZeroSize;
1416
1417 // Ensure the destination is not null. If it is NULL there will be a
1418 // NULL pointer dereference.
1419 state = checkNonNull(C, state, Dest, destVal);
1420 if (!state)
1421 return;
1422
1423 // Get the value of the Src.
1424 SVal srcVal = state->getSVal(Source.Expression, LCtx);
1425
1426 // Ensure the source is not null. If it is NULL there will be a
1427 // NULL pointer dereference.
1428 state = checkNonNull(C, state, Source, srcVal);
1429 if (!state)
1430 return;
1431
1432 // Ensure the accesses are valid and that the buffers do not overlap.
1433 state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1434 state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1435
1436 if (Restricted)
1437 state = CheckOverlap(C, state, Size, Dest, Source, CK);
1438
1439 if (!state)
1440 return;
1441
1442 // If this is mempcpy, get the byte after the last byte copied and
1443 // bind the expr.
1444 if (IsMempcpy) {
1445 // Get the byte after the last byte copied.
1446 SValBuilder &SvalBuilder = C.getSValBuilder();
1447 ASTContext &Ctx = SvalBuilder.getContext();
1448 QualType CharPtrTy = getCharPtrType(Ctx, CK);
1449 SVal DestRegCharVal =
1450 SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1451 SVal lastElement = C.getSValBuilder().evalBinOp(
1452 state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1453 // If we don't know how much we copied, we can at least
1454 // conjure a return value for later.
1455 if (lastElement.isUnknown())
1456 lastElement = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1457
1458 // The byte after the last byte copied is the return value.
1459 state = state->BindExpr(Call.getOriginExpr(), LCtx, lastElement);
1460 } else {
1461 // All other copies return the destination buffer.
1462 // (Well, bcopy() has a void return type, but this won't hurt.)
1463 state = state->BindExpr(Call.getOriginExpr(), LCtx, destVal);
1464 }
1465
1466 // Invalidate the destination (regular invalidation without pointer-escaping
1467 // the address of the top-level region).
1468 // FIXME: Even if we can't perfectly model the copy, we should see if we
1469 // can use LazyCompoundVals to copy the source values into the destination.
1470 // This would probably remove any existing bindings past the end of the
1471 // copied region, but that's still an improvement over blank invalidation.
1472 state = invalidateDestinationBufferBySize(
1473 C, state, Dest.Expression, Call.getCFGElementRef(),
1474 C.getSVal(Dest.Expression), sizeVal, Size.Expression->getType());
1475
1476 // Invalidate the source (const-invalidation without const-pointer-escaping
1477 // the address of the top-level region).
1478 state = invalidateSourceBuffer(C, state, Call.getCFGElementRef(),
1479 C.getSVal(Source.Expression));
1480
1481 C.addTransition(state);
1482 }
1483}
1484
1485void CStringChecker::evalMemcpy(CheckerContext &C, const CallEvent &Call,
1486 CharKind CK) const {
1487 // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1488 // The return value is the address of the destination buffer.
1489 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1490 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1491 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1492
1493 ProgramStateRef State = C.getState();
1494
1495 constexpr bool IsRestricted = true;
1496 constexpr bool IsMempcpy = false;
1497 evalCopyCommon(C, Call, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1498}
1499
1500void CStringChecker::evalMempcpy(CheckerContext &C, const CallEvent &Call,
1501 CharKind CK) const {
1502 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1503 // The return value is a pointer to the byte following the last written byte.
1504 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1505 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1506 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1507
1508 constexpr bool IsRestricted = true;
1509 constexpr bool IsMempcpy = true;
1510 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1511 IsMempcpy, CK);
1512}
1513
1514void CStringChecker::evalMemmove(CheckerContext &C, const CallEvent &Call,
1515 CharKind CK) const {
1516 // void *memmove(void *dst, const void *src, size_t n);
1517 // The return value is the address of the destination buffer.
1518 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
1519 SourceArgExpr Src = {{Call.getArgExpr(1), 1}};
1520 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1521
1522 constexpr bool IsRestricted = false;
1523 constexpr bool IsMempcpy = false;
1524 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1525 IsMempcpy, CK);
1526}
1527
1528void CStringChecker::evalBcopy(CheckerContext &C, const CallEvent &Call) const {
1529 // void bcopy(const void *src, void *dst, size_t n);
1530 SourceArgExpr Src{{Call.getArgExpr(0), 0}};
1531 DestinationArgExpr Dest = {{Call.getArgExpr(1), 1}};
1532 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1533
1534 constexpr bool IsRestricted = false;
1535 constexpr bool IsMempcpy = false;
1536 evalCopyCommon(C, Call, C.getState(), Size, Dest, Src, IsRestricted,
1537 IsMempcpy, CharKind::Regular);
1538}
1539
1540void CStringChecker::evalMemcmp(CheckerContext &C, const CallEvent &Call,
1541 CharKind CK) const {
1542 // int memcmp(const void *s1, const void *s2, size_t n);
1543 CurrentFunctionDescription = "memory comparison function";
1544
1545 AnyArgExpr Left = {Call.getArgExpr(0), 0};
1546 AnyArgExpr Right = {Call.getArgExpr(1), 1};
1547 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
1548
1549 ProgramStateRef State = C.getState();
1550 SValBuilder &Builder = C.getSValBuilder();
1551 const LocationContext *LCtx = C.getLocationContext();
1552
1553 // See if the size argument is zero.
1554 SVal sizeVal = State->getSVal(Size.Expression, LCtx);
1555 QualType sizeTy = Size.Expression->getType();
1556
1557 ProgramStateRef stateZeroSize, stateNonZeroSize;
1558 std::tie(stateZeroSize, stateNonZeroSize) =
1559 assumeZero(C, State, sizeVal, sizeTy);
1560
1561 // If the size can be zero, the result will be 0 in that case, and we don't
1562 // have to check either of the buffers.
1563 if (stateZeroSize) {
1564 State = stateZeroSize;
1565 State = State->BindExpr(Call.getOriginExpr(), LCtx,
1566 Builder.makeZeroVal(Call.getResultType()));
1567 C.addTransition(State);
1568 }
1569
1570 // If the size can be nonzero, we have to check the other arguments.
1571 if (stateNonZeroSize) {
1572 State = stateNonZeroSize;
1573 // If we know the two buffers are the same, we know the result is 0.
1574 // First, get the two buffers' addresses. Another checker will have already
1575 // made sure they're not undefined.
1576 DefinedOrUnknownSVal LV =
1577 State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1578 DefinedOrUnknownSVal RV =
1579 State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1580
1581 // See if they are the same.
1582 ProgramStateRef SameBuffer, NotSameBuffer;
1583 std::tie(SameBuffer, NotSameBuffer) =
1584 State->assume(Builder.evalEQ(State, LV, RV));
1585
1586 // If the two arguments are the same buffer, we know the result is 0,
1587 // and we only need to check one size.
1588 if (SameBuffer && !NotSameBuffer) {
1589 State = SameBuffer;
1590 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1591 if (State) {
1592 State = SameBuffer->BindExpr(Call.getOriginExpr(), LCtx,
1593 Builder.makeZeroVal(Call.getResultType()));
1594 C.addTransition(State);
1595 }
1596 return;
1597 }
1598
1599 // If the two arguments might be different buffers, we have to check
1600 // the size of both of them.
1601 assert(NotSameBuffer);
1602 State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1603 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1604 if (State) {
1605 // The return value is the comparison result, which we don't know.
1606 SVal CmpV = Builder.conjureSymbolVal(Call, C.blockCount());
1607 State = State->BindExpr(Call.getOriginExpr(), LCtx, CmpV);
1608 C.addTransition(State);
1609 }
1610 }
1611}
1612
1613void CStringChecker::evalstrLength(CheckerContext &C,
1614 const CallEvent &Call) const {
1615 // size_t strlen(const char *s);
1616 evalstrLengthCommon(C, Call, /* IsStrnlen = */ false);
1617}
1618
1619void CStringChecker::evalstrnLength(CheckerContext &C,
1620 const CallEvent &Call) const {
1621 // size_t strnlen(const char *s, size_t maxlen);
1622 evalstrLengthCommon(C, Call, /* IsStrnlen = */ true);
1623}
1624
1625void CStringChecker::evalstrLengthCommon(CheckerContext &C,
1626 const CallEvent &Call,
1627 bool IsStrnlen) const {
1628 CurrentFunctionDescription = "string length function";
1629 ProgramStateRef state = C.getState();
1630 const LocationContext *LCtx = C.getLocationContext();
1631
1632 if (IsStrnlen) {
1633 const Expr *maxlenExpr = Call.getArgExpr(1);
1634 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1635
1636 ProgramStateRef stateZeroSize, stateNonZeroSize;
1637 std::tie(stateZeroSize, stateNonZeroSize) =
1638 assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1639
1640 // If the size can be zero, the result will be 0 in that case, and we don't
1641 // have to check the string itself.
1642 if (stateZeroSize) {
1643 SVal zero = C.getSValBuilder().makeZeroVal(Call.getResultType());
1644 stateZeroSize = stateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, zero);
1645 C.addTransition(stateZeroSize);
1646 }
1647
1648 // If the size is GUARANTEED to be zero, we're done!
1649 if (!stateNonZeroSize)
1650 return;
1651
1652 // Otherwise, record the assumption that the size is nonzero.
1653 state = stateNonZeroSize;
1654 }
1655
1656 // Check that the string argument is non-null.
1657 AnyArgExpr Arg = {Call.getArgExpr(0), 0};
1658 SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
1659 state = checkNonNull(C, state, Arg, ArgVal);
1660
1661 if (!state)
1662 return;
1663
1664 SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1665
1666 // If the argument isn't a valid C string, there's no valid state to
1667 // transition to.
1668 if (strLength.isUndef())
1669 return;
1670
1671 DefinedOrUnknownSVal result = UnknownVal();
1672
1673 // If the check is for strnlen() then bind the return value to no more than
1674 // the maxlen value.
1675 if (IsStrnlen) {
1676 QualType cmpTy = C.getSValBuilder().getConditionType();
1677
1678 // It's a little unfortunate to be getting this again,
1679 // but it's not that expensive...
1680 const Expr *maxlenExpr = Call.getArgExpr(1);
1681 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1682
1683 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1684 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1685
1686 if (strLengthNL && maxlenValNL) {
1687 ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1688
1689 // Check if the strLength is greater than the maxlen.
1690 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1691 C.getSValBuilder()
1692 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1693 .castAs<DefinedOrUnknownSVal>());
1694
1695 if (stateStringTooLong && !stateStringNotTooLong) {
1696 // If the string is longer than maxlen, return maxlen.
1697 result = *maxlenValNL;
1698 } else if (stateStringNotTooLong && !stateStringTooLong) {
1699 // If the string is shorter than maxlen, return its length.
1700 result = *strLengthNL;
1701 }
1702 }
1703
1704 if (result.isUnknown()) {
1705 // If we don't have enough information for a comparison, there's
1706 // no guarantee the full string length will actually be returned.
1707 // All we know is the return value is the min of the string length
1708 // and the limit. This is better than nothing.
1709 result = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1710 NonLoc resultNL = result.castAs<NonLoc>();
1711
1712 if (strLengthNL) {
1713 state = state->assume(C.getSValBuilder().evalBinOpNN(
1714 state, BO_LE, resultNL, *strLengthNL, cmpTy)
1715 .castAs<DefinedOrUnknownSVal>(), true);
1716 }
1717
1718 if (maxlenValNL) {
1719 state = state->assume(C.getSValBuilder().evalBinOpNN(
1720 state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1721 .castAs<DefinedOrUnknownSVal>(), true);
1722 }
1723 }
1724
1725 } else {
1726 // This is a plain strlen(), not strnlen().
1727 result = strLength.castAs<DefinedOrUnknownSVal>();
1728
1729 // If we don't know the length of the string, conjure a return
1730 // value, so it can be used in constraints, at least.
1731 if (result.isUnknown()) {
1732 result = C.getSValBuilder().conjureSymbolVal(Call, C.blockCount());
1733 }
1734 }
1735
1736 // Bind the return value.
1737 assert(!result.isUnknown() && "Should have conjured a value by now");
1738 state = state->BindExpr(Call.getOriginExpr(), LCtx, result);
1739 C.addTransition(state);
1740}
1741
1742void CStringChecker::evalStrcpy(CheckerContext &C,
1743 const CallEvent &Call) const {
1744 // char *strcpy(char *restrict dst, const char *restrict src);
1745 evalStrcpyCommon(C, Call,
1746 /* ReturnEnd = */ false,
1747 /* IsBounded = */ false,
1748 /* appendK = */ ConcatFnKind::none);
1749}
1750
1751void CStringChecker::evalStrncpy(CheckerContext &C,
1752 const CallEvent &Call) const {
1753 // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1754 evalStrcpyCommon(C, Call,
1755 /* ReturnEnd = */ false,
1756 /* IsBounded = */ true,
1757 /* appendK = */ ConcatFnKind::none);
1758}
1759
1760void CStringChecker::evalStpcpy(CheckerContext &C,
1761 const CallEvent &Call) const {
1762 // char *stpcpy(char *restrict dst, const char *restrict src);
1763 evalStrcpyCommon(C, Call,
1764 /* ReturnEnd = */ true,
1765 /* IsBounded = */ false,
1766 /* appendK = */ ConcatFnKind::none);
1767}
1768
1769void CStringChecker::evalStrlcpy(CheckerContext &C,
1770 const CallEvent &Call) const {
1771 // size_t strlcpy(char *dest, const char *src, size_t size);
1772 evalStrcpyCommon(C, Call,
1773 /* ReturnEnd = */ true,
1774 /* IsBounded = */ true,
1775 /* appendK = */ ConcatFnKind::none,
1776 /* returnPtr = */ false);
1777}
1778
1779void CStringChecker::evalStrcat(CheckerContext &C,
1780 const CallEvent &Call) const {
1781 // char *strcat(char *restrict s1, const char *restrict s2);
1782 evalStrcpyCommon(C, Call,
1783 /* ReturnEnd = */ false,
1784 /* IsBounded = */ false,
1785 /* appendK = */ ConcatFnKind::strcat);
1786}
1787
1788void CStringChecker::evalStrncat(CheckerContext &C,
1789 const CallEvent &Call) const {
1790 // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1791 evalStrcpyCommon(C, Call,
1792 /* ReturnEnd = */ false,
1793 /* IsBounded = */ true,
1794 /* appendK = */ ConcatFnKind::strcat);
1795}
1796
1797void CStringChecker::evalStrlcat(CheckerContext &C,
1798 const CallEvent &Call) const {
1799 // size_t strlcat(char *dst, const char *src, size_t size);
1800 // It will append at most size - strlen(dst) - 1 bytes,
1801 // NULL-terminating the result.
1802 evalStrcpyCommon(C, Call,
1803 /* ReturnEnd = */ false,
1804 /* IsBounded = */ true,
1805 /* appendK = */ ConcatFnKind::strlcat,
1806 /* returnPtr = */ false);
1807}
1808
1809void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallEvent &Call,
1810 bool ReturnEnd, bool IsBounded,
1811 ConcatFnKind appendK,
1812 bool returnPtr) const {
1813 if (appendK == ConcatFnKind::none)
1814 CurrentFunctionDescription = "string copy function";
1815 else
1816 CurrentFunctionDescription = "string concatenation function";
1817
1818 ProgramStateRef state = C.getState();
1819 const LocationContext *LCtx = C.getLocationContext();
1820
1821 // Check that the destination is non-null.
1822 DestinationArgExpr Dst = {{Call.getArgExpr(0), 0}};
1823 SVal DstVal = state->getSVal(Dst.Expression, LCtx);
1824 state = checkNonNull(C, state, Dst, DstVal);
1825 if (!state)
1826 return;
1827
1828 // Check that the source is non-null.
1829 SourceArgExpr srcExpr = {{Call.getArgExpr(1), 1}};
1830 SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
1831 state = checkNonNull(C, state, srcExpr, srcVal);
1832 if (!state)
1833 return;
1834
1835 // Get the string length of the source.
1836 SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1837 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1838
1839 // Get the string length of the destination buffer.
1840 SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1841 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1842
1843 // If the source isn't a valid C string, give up.
1844 if (strLength.isUndef())
1845 return;
1846
1847 SValBuilder &svalBuilder = C.getSValBuilder();
1848 QualType cmpTy = svalBuilder.getConditionType();
1849 QualType sizeTy = svalBuilder.getContext().getSizeType();
1850
1851 // These two values allow checking two kinds of errors:
1852 // - actual overflows caused by a source that doesn't fit in the destination
1853 // - potential overflows caused by a bound that could exceed the destination
1854 SVal amountCopied = UnknownVal();
1855 SVal maxLastElementIndex = UnknownVal();
1856 const char *boundWarning = nullptr;
1857
1858 // FIXME: Why do we choose the srcExpr if the access has no size?
1859 // Note that the 3rd argument of the call would be the size parameter.
1860 SizeArgExpr SrcExprAsSizeDummy = {
1861 {srcExpr.Expression, srcExpr.ArgumentIndex}};
1862 state = CheckOverlap(
1863 C, state,
1864 (IsBounded ? SizeArgExpr{{Call.getArgExpr(2), 2}} : SrcExprAsSizeDummy),
1865 Dst, srcExpr);
1866
1867 if (!state)
1868 return;
1869
1870 // If the function is strncpy, strncat, etc... it is bounded.
1871 if (IsBounded) {
1872 // Get the max number of characters to copy.
1873 SizeArgExpr lenExpr = {{Call.getArgExpr(2), 2}};
1874 SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);
1875
1876 // Protect against misdeclared strncpy().
1877 lenVal =
1878 svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1879
1880 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1881
1882 // If we know both values, we might be able to figure out how much
1883 // we're copying.
1884 if (strLengthNL && lenValNL) {
1885 switch (appendK) {
1886 case ConcatFnKind::none:
1887 case ConcatFnKind::strcat: {
1888 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1889 // Check if the max number to copy is less than the length of the src.
1890 // If the bound is equal to the source length, strncpy won't null-
1891 // terminate the result!
1892 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1893 svalBuilder
1894 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1895 .castAs<DefinedOrUnknownSVal>());
1896
1897 if (stateSourceTooLong && !stateSourceNotTooLong) {
1898 // Max number to copy is less than the length of the src, so the
1899 // actual strLength copied is the max number arg.
1900 state = stateSourceTooLong;
1901 amountCopied = lenVal;
1902
1903 } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1904 // The source buffer entirely fits in the bound.
1905 state = stateSourceNotTooLong;
1906 amountCopied = strLength;
1907 }
1908 break;
1909 }
1910 case ConcatFnKind::strlcat:
1911 if (!dstStrLengthNL)
1912 return;
1913
1914 // amountCopied = min (size - dstLen - 1 , srcLen)
1915 SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1916 *dstStrLengthNL, sizeTy);
1917 if (!isa<NonLoc>(freeSpace))
1918 return;
1919 freeSpace =
1920 svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1921 svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1922 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1923
1924 // While unlikely, it is possible that the subtraction is
1925 // too complex to compute, let's check whether it succeeded.
1926 if (!freeSpaceNL)
1927 return;
1928 SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1929 state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1930
1931 ProgramStateRef TrueState, FalseState;
1932 std::tie(TrueState, FalseState) =
1933 state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1934
1935 // srcStrLength <= size - dstStrLength -1
1936 if (TrueState && !FalseState) {
1937 amountCopied = strLength;
1938 }
1939
1940 // srcStrLength > size - dstStrLength -1
1941 if (!TrueState && FalseState) {
1942 amountCopied = freeSpace;
1943 }
1944
1945 if (TrueState && FalseState)
1946 amountCopied = UnknownVal();
1947 break;
1948 }
1949 }
1950 // We still want to know if the bound is known to be too large.
1951 if (lenValNL) {
1952 switch (appendK) {
1953 case ConcatFnKind::strcat:
1954 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1955
1956 // Get the string length of the destination. If the destination is
1957 // memory that can't have a string length, we shouldn't be copying
1958 // into it anyway.
1959 if (dstStrLength.isUndef())
1960 return;
1961
1962 if (dstStrLengthNL) {
1963 maxLastElementIndex = svalBuilder.evalBinOpNN(
1964 state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
1965
1966 boundWarning = "Size argument is greater than the free space in the "
1967 "destination buffer";
1968 }
1969 break;
1970 case ConcatFnKind::none:
1971 case ConcatFnKind::strlcat:
1972 // For strncpy and strlcat, this is just checking
1973 // that lenVal <= sizeof(dst).
1974 // (Yes, strncpy and strncat differ in how they treat termination.
1975 // strncat ALWAYS terminates, but strncpy doesn't.)
1976
1977 // We need a special case for when the copy size is zero, in which
1978 // case strncpy will do no work at all. Our bounds check uses n-1
1979 // as the last element accessed, so n == 0 is problematic.
1980 ProgramStateRef StateZeroSize, StateNonZeroSize;
1981 std::tie(StateZeroSize, StateNonZeroSize) =
1982 assumeZero(C, state, *lenValNL, sizeTy);
1983
1984 // If the size is known to be zero, we're done.
1985 if (StateZeroSize && !StateNonZeroSize) {
1986 if (returnPtr) {
1987 StateZeroSize =
1988 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, DstVal);
1989 } else {
1990 if (appendK == ConcatFnKind::none) {
1991 // strlcpy returns strlen(src)
1992 StateZeroSize = StateZeroSize->BindExpr(Call.getOriginExpr(),
1993 LCtx, strLength);
1994 } else {
1995 // strlcat returns strlen(src) + strlen(dst)
1996 SVal retSize = svalBuilder.evalBinOp(
1997 state, BO_Add, strLength, dstStrLength, sizeTy);
1998 StateZeroSize =
1999 StateZeroSize->BindExpr(Call.getOriginExpr(), LCtx, retSize);
2000 }
2001 }
2002 C.addTransition(StateZeroSize);
2003 return;
2004 }
2005
2006 // Otherwise, go ahead and figure out the last element we'll touch.
2007 // We don't record the non-zero assumption here because we can't
2008 // be sure. We won't warn on a possible zero.
2009 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
2010 maxLastElementIndex =
2011 svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
2012 boundWarning = "Size argument is greater than the length of the "
2013 "destination buffer";
2014 break;
2015 }
2016 }
2017 } else {
2018 // The function isn't bounded. The amount copied should match the length
2019 // of the source buffer.
2020 amountCopied = strLength;
2021 }
2022
2023 assert(state);
2024
2025 // This represents the number of characters copied into the destination
2026 // buffer. (It may not actually be the strlen if the destination buffer
2027 // is not terminated.)
2028 SVal finalStrLength = UnknownVal();
2029 SVal strlRetVal = UnknownVal();
2030
2031 if (appendK == ConcatFnKind::none && !returnPtr) {
2032 // strlcpy returns the sizeof(src)
2033 strlRetVal = strLength;
2034 }
2035
2036 // If this is an appending function (strcat, strncat...) then set the
2037 // string length to strlen(src) + strlen(dst) since the buffer will
2038 // ultimately contain both.
2039 if (appendK != ConcatFnKind::none) {
2040 // Get the string length of the destination. If the destination is memory
2041 // that can't have a string length, we shouldn't be copying into it anyway.
2042 if (dstStrLength.isUndef())
2043 return;
2044
2045 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
2046 strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
2047 *dstStrLengthNL, sizeTy);
2048 }
2049
2050 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
2051
2052 // If we know both string lengths, we might know the final string length.
2053 if (amountCopiedNL && dstStrLengthNL) {
2054 // Make sure the two lengths together don't overflow a size_t.
2055 state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
2056 if (!state)
2057 return;
2058
2059 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
2060 *dstStrLengthNL, sizeTy);
2061 }
2062
2063 // If we couldn't get a single value for the final string length,
2064 // we can at least bound it by the individual lengths.
2065 if (finalStrLength.isUnknown()) {
2066 // Try to get a "hypothetical" string length symbol, which we can later
2067 // set as a real value if that turns out to be the case.
2068 finalStrLength =
2069 getCStringLength(C, state, Call.getOriginExpr(), DstVal, true);
2070 assert(!finalStrLength.isUndef());
2071
2072 if (std::optional<NonLoc> finalStrLengthNL =
2073 finalStrLength.getAs<NonLoc>()) {
2074 if (amountCopiedNL && appendK == ConcatFnKind::none) {
2075 // we overwrite dst string with the src
2076 // finalStrLength >= srcStrLength
2077 SVal sourceInResult = svalBuilder.evalBinOpNN(
2078 state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
2079 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
2080 true);
2081 if (!state)
2082 return;
2083 }
2084
2085 if (dstStrLengthNL && appendK != ConcatFnKind::none) {
2086 // we extend the dst string with the src
2087 // finalStrLength >= dstStrLength
2088 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
2089 *finalStrLengthNL,
2090 *dstStrLengthNL,
2091 cmpTy);
2092 state =
2093 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
2094 if (!state)
2095 return;
2096 }
2097 }
2098 }
2099
2100 } else {
2101 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
2102 // the final string length will match the input string length.
2103 finalStrLength = amountCopied;
2104 }
2105
2106 SVal Result;
2107
2108 if (returnPtr) {
2109 // The final result of the function will either be a pointer past the last
2110 // copied element, or a pointer to the start of the destination buffer.
2111 Result = (ReturnEnd ? UnknownVal() : DstVal);
2112 } else {
2113 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
2114 //strlcpy, strlcat
2115 Result = strlRetVal;
2116 else
2117 Result = finalStrLength;
2118 }
2119
2120 assert(state);
2121
2122 // If the destination is a MemRegion, try to check for a buffer overflow and
2123 // record the new string length.
2124 if (std::optional<loc::MemRegionVal> dstRegVal =
2125 DstVal.getAs<loc::MemRegionVal>()) {
2126 QualType ptrTy = Dst.Expression->getType();
2127
2128 // If we have an exact value on a bounded copy, use that to check for
2129 // overflows, rather than our estimate about how much is actually copied.
2130 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2131 SVal maxLastElement =
2132 svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
2133
2134 // Check if the first byte of the destination is writable.
2135 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2136 if (!state)
2137 return;
2138 // Check if the last byte of the destination is writable.
2139 state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
2140 if (!state)
2141 return;
2142 }
2143
2144 // Then, if the final length is known...
2145 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2146 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
2147 *knownStrLength, ptrTy);
2148
2149 // ...and we haven't checked the bound, we'll check the actual copy.
2150 if (!boundWarning) {
2151 // Check if the first byte of the destination is writable.
2152 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write);
2153 if (!state)
2154 return;
2155 // Check if the last byte of the destination is writable.
2156 state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
2157 if (!state)
2158 return;
2159 }
2160
2161 // If this is a stpcpy-style copy, the last element is the return value.
2162 if (returnPtr && ReturnEnd)
2163 Result = lastElement;
2164 }
2165
2166 // For bounded method, amountCopied take the minimum of two values,
2167 // for ConcatFnKind::strlcat:
2168 // amountCopied = min (size - dstLen - 1 , srcLen)
2169 // for others:
2170 // amountCopied = min (srcLen, size)
2171 // So even if we don't know about amountCopied, as long as one of them will
2172 // not cause an out-of-bound access, the whole function's operation will not
2173 // too, that will avoid invalidating the superRegion of data member in that
2174 // situation.
2175 bool CouldAccessOutOfBound = true;
2176 if (IsBounded && amountCopied.isUnknown()) {
2177 auto CouldAccessOutOfBoundForSVal =
2178 [&](std::optional<NonLoc> Val) -> bool {
2179 if (!Val)
2180 return true;
2181 return !isFirstBufInBound(C, state, C.getSVal(Dst.Expression),
2182 Dst.Expression->getType(), *Val,
2183 C.getASTContext().getSizeType());
2184 };
2185
2186 CouldAccessOutOfBound = CouldAccessOutOfBoundForSVal(strLengthNL);
2187
2188 if (CouldAccessOutOfBound) {
2189 // Get the max number of characters to copy.
2190 const Expr *LenExpr = Call.getArgExpr(2);
2191 SVal LenVal = state->getSVal(LenExpr, LCtx);
2192
2193 // Protect against misdeclared strncpy().
2194 LenVal = svalBuilder.evalCast(LenVal, sizeTy, LenExpr->getType());
2195
2196 // Because analyzer doesn't handle expressions like `size -
2197 // dstLen - 1` very well, we roughly use `size` for
2198 // ConcatFnKind::strlcat here, same with other concat kinds.
2199 CouldAccessOutOfBound =
2200 CouldAccessOutOfBoundForSVal(LenVal.getAs<NonLoc>());
2201 }
2202 }
2203
2204 // Invalidate the destination (regular invalidation without pointer-escaping
2205 // the address of the top-level region). This must happen before we set the
2206 // C string length because invalidation will clear the length.
2207 // FIXME: Even if we can't perfectly model the copy, we should see if we
2208 // can use LazyCompoundVals to copy the source values into the destination.
2209 // This would probably remove any existing bindings past the end of the
2210 // string, but that's still an improvement over blank invalidation.
2211 if (CouldAccessOutOfBound)
2212 state = invalidateDestinationBufferBySize(
2213 C, state, Dst.Expression, Call.getCFGElementRef(), *dstRegVal,
2214 amountCopied, C.getASTContext().getSizeType());
2215 else
2216 state = invalidateDestinationBufferNeverOverflows(
2217 C, state, Call.getCFGElementRef(), *dstRegVal);
2218
2219 // Invalidate the source (const-invalidation without const-pointer-escaping
2220 // the address of the top-level region).
2221 state = invalidateSourceBuffer(C, state, Call.getCFGElementRef(), srcVal);
2222
2223 // Set the C string length of the destination, if we know it.
2224 if (IsBounded && (appendK == ConcatFnKind::none)) {
2225 // strncpy is annoying in that it doesn't guarantee to null-terminate
2226 // the result string. If the original string didn't fit entirely inside
2227 // the bound (including the null-terminator), we don't know how long the
2228 // result is.
2229 if (amountCopied != strLength)
2230 finalStrLength = UnknownVal();
2231 }
2232 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
2233 }
2234
2235 assert(state);
2236
2237 if (returnPtr) {
2238 // If this is a stpcpy-style copy, but we were unable to check for a buffer
2239 // overflow, we still need a result. Conjure a return value.
2240 if (ReturnEnd && Result.isUnknown()) {
2241 Result = svalBuilder.conjureSymbolVal(Call, C.blockCount());
2242 }
2243 }
2244 // Set the return value.
2245 state = state->BindExpr(Call.getOriginExpr(), LCtx, Result);
2246 C.addTransition(state);
2247}
2248
2249void CStringChecker::evalStrxfrm(CheckerContext &C,
2250 const CallEvent &Call) const {
2251 // size_t strxfrm(char *dest, const char *src, size_t n);
2252 CurrentFunctionDescription = "locale transformation function";
2253
2254 ProgramStateRef State = C.getState();
2255 const LocationContext *LCtx = C.getLocationContext();
2256 SValBuilder &SVB = C.getSValBuilder();
2257
2258 // Get arguments
2259 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2260 SourceArgExpr Source = {{Call.getArgExpr(1), 1}};
2261 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2262
2263 // `src` can never be null
2264 SVal SrcVal = State->getSVal(Source.Expression, LCtx);
2265 State = checkNonNull(C, State, Source, SrcVal);
2266 if (!State)
2267 return;
2268
2269 // Buffer must not overlap
2270 State = CheckOverlap(C, State, Size, Dest, Source, CK_Regular);
2271 if (!State)
2272 return;
2273
2274 // The function returns an implementation-defined length needed for
2275 // transformation
2276 SVal RetVal = SVB.conjureSymbolVal(Call, C.blockCount());
2277
2278 auto BindReturnAndTransition = [&RetVal, &Call, LCtx,
2279 &C](ProgramStateRef State) {
2280 if (State) {
2281 State = State->BindExpr(Call.getOriginExpr(), LCtx, RetVal);
2282 C.addTransition(State);
2283 }
2284 };
2285
2286 // Check if size is zero
2287 SVal SizeVal = State->getSVal(Size.Expression, LCtx);
2288 QualType SizeTy = Size.Expression->getType();
2289
2290 auto [StateZeroSize, StateSizeNonZero] =
2291 assumeZero(C, State, SizeVal, SizeTy);
2292
2293 // We can't assume anything about size, just bind the return value and be done
2294 if (!StateZeroSize && !StateSizeNonZero)
2295 return BindReturnAndTransition(State);
2296
2297 // If `n` is 0, we just return the implementation defined length
2298 if (StateZeroSize && !StateSizeNonZero)
2299 return BindReturnAndTransition(StateZeroSize);
2300
2301 // If `n` is not 0, `dest` can not be null.
2302 SVal DestVal = StateSizeNonZero->getSVal(Dest.Expression, LCtx);
2303 StateSizeNonZero = checkNonNull(C, StateSizeNonZero, Dest, DestVal);
2304 if (!StateSizeNonZero)
2305 return;
2306
2307 // Check that we can write to the destination buffer
2308 StateSizeNonZero = CheckBufferAccess(C, StateSizeNonZero, Dest, Size,
2309 AccessKind::write, CK_Regular);
2310 if (!StateSizeNonZero)
2311 return;
2312
2313 // Success: return value < `n`
2314 // Failure: return value >= `n`
2315 auto ComparisonVal = SVB.evalBinOp(StateSizeNonZero, BO_LT, RetVal, SizeVal,
2316 SVB.getConditionType())
2317 .getAs<DefinedOrUnknownSVal>();
2318 if (!ComparisonVal) {
2319 // Fallback: invalidate the buffer.
2320 StateSizeNonZero = invalidateDestinationBufferBySize(
2321 C, StateSizeNonZero, Dest.Expression, Call.getCFGElementRef(), DestVal,
2322 SizeVal, Size.Expression->getType());
2323 return BindReturnAndTransition(StateSizeNonZero);
2324 }
2325
2326 auto [StateSuccess, StateFailure] = StateSizeNonZero->assume(*ComparisonVal);
2327
2328 if (StateSuccess) {
2329 // The transformation invalidated the buffer.
2330 StateSuccess = invalidateDestinationBufferBySize(
2331 C, StateSuccess, Dest.Expression, Call.getCFGElementRef(), DestVal,
2332 SizeVal, Size.Expression->getType());
2333 BindReturnAndTransition(StateSuccess);
2334 // Fallthrough: We also want to add a transition to the failure state below.
2335 }
2336
2337 if (StateFailure) {
2338 // `dest` buffer content is undefined
2339 if (auto DestLoc = DestVal.getAs<loc::MemRegionVal>()) {
2340 StateFailure = StateFailure->killBinding(*DestLoc);
2341 StateFailure =
2342 StateFailure->bindDefaultInitial(*DestLoc, UndefinedVal{}, LCtx);
2343 }
2344
2345 BindReturnAndTransition(StateFailure);
2346 }
2347}
2348
2349void CStringChecker::evalStrcmp(CheckerContext &C,
2350 const CallEvent &Call) const {
2351 //int strcmp(const char *s1, const char *s2);
2352 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ false);
2353}
2354
2355void CStringChecker::evalStrncmp(CheckerContext &C,
2356 const CallEvent &Call) const {
2357 //int strncmp(const char *s1, const char *s2, size_t n);
2358 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ false);
2359}
2360
2361void CStringChecker::evalStrcasecmp(CheckerContext &C,
2362 const CallEvent &Call) const {
2363 //int strcasecmp(const char *s1, const char *s2);
2364 evalStrcmpCommon(C, Call, /* IsBounded = */ false, /* IgnoreCase = */ true);
2365}
2366
2367void CStringChecker::evalStrncasecmp(CheckerContext &C,
2368 const CallEvent &Call) const {
2369 //int strncasecmp(const char *s1, const char *s2, size_t n);
2370 evalStrcmpCommon(C, Call, /* IsBounded = */ true, /* IgnoreCase = */ true);
2371}
2372
2373void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallEvent &Call,
2374 bool IsBounded, bool IgnoreCase) const {
2375 CurrentFunctionDescription = "string comparison function";
2376 ProgramStateRef state = C.getState();
2377 const LocationContext *LCtx = C.getLocationContext();
2378
2379 // Check that the first string is non-null
2380 AnyArgExpr Left = {Call.getArgExpr(0), 0};
2381 SVal LeftVal = state->getSVal(Left.Expression, LCtx);
2382 state = checkNonNull(C, state, Left, LeftVal);
2383 if (!state)
2384 return;
2385
2386 // Check that the second string is non-null.
2387 AnyArgExpr Right = {Call.getArgExpr(1), 1};
2388 SVal RightVal = state->getSVal(Right.Expression, LCtx);
2389 state = checkNonNull(C, state, Right, RightVal);
2390 if (!state)
2391 return;
2392
2393 // Get the string length of the first string or give up.
2394 SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2395 if (LeftLength.isUndef())
2396 return;
2397
2398 // Get the string length of the second string or give up.
2399 SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2400 if (RightLength.isUndef())
2401 return;
2402
2403 // If we know the two buffers are the same, we know the result is 0.
2404 // First, get the two buffers' addresses. Another checker will have already
2405 // made sure they're not undefined.
2406 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2407 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2408
2409 // See if they are the same.
2410 SValBuilder &svalBuilder = C.getSValBuilder();
2411 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2412 ProgramStateRef StSameBuf, StNotSameBuf;
2413 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2414
2415 // If the two arguments might be the same buffer, we know the result is 0,
2416 // and we only need to check one size.
2417 if (StSameBuf) {
2418 StSameBuf =
2419 StSameBuf->BindExpr(Call.getOriginExpr(), LCtx,
2420 svalBuilder.makeZeroVal(Call.getResultType()));
2421 C.addTransition(StSameBuf);
2422
2423 // If the two arguments are GUARANTEED to be the same, we're done!
2424 if (!StNotSameBuf)
2425 return;
2426 }
2427
2428 assert(StNotSameBuf);
2429 state = StNotSameBuf;
2430
2431 // At this point we can go about comparing the two buffers.
2432 // For now, we only do this if they're both known string literals.
2433
2434 // Attempt to extract string literals from both expressions.
2435 const StringLiteral *LeftStrLiteral =
2436 getCStringLiteral(C, state, Left.Expression, LeftVal);
2437 const StringLiteral *RightStrLiteral =
2438 getCStringLiteral(C, state, Right.Expression, RightVal);
2439 bool canComputeResult = false;
2440 SVal resultVal = svalBuilder.conjureSymbolVal(Call, C.blockCount());
2441
2442 if (LeftStrLiteral && RightStrLiteral) {
2443 StringRef LeftStrRef = LeftStrLiteral->getString();
2444 StringRef RightStrRef = RightStrLiteral->getString();
2445
2446 if (IsBounded) {
2447 // Get the max number of characters to compare.
2448 const Expr *lenExpr = Call.getArgExpr(2);
2449 SVal lenVal = state->getSVal(lenExpr, LCtx);
2450
2451 // If the length is known, we can get the right substrings.
2452 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2453 // Create substrings of each to compare the prefix.
2454 LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2455 RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2456 canComputeResult = true;
2457 }
2458 } else {
2459 // This is a normal, unbounded strcmp.
2460 canComputeResult = true;
2461 }
2462
2463 if (canComputeResult) {
2464 // Real strcmp stops at null characters.
2465 size_t s1Term = LeftStrRef.find('\0');
2466 if (s1Term != StringRef::npos)
2467 LeftStrRef = LeftStrRef.substr(0, s1Term);
2468
2469 size_t s2Term = RightStrRef.find('\0');
2470 if (s2Term != StringRef::npos)
2471 RightStrRef = RightStrRef.substr(0, s2Term);
2472
2473 // Use StringRef's comparison methods to compute the actual result.
2474 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2475 : LeftStrRef.compare(RightStrRef);
2476
2477 // The strcmp function returns an integer greater than, equal to, or less
2478 // than zero, [c11, p7.24.4.2].
2479 if (compareRes == 0) {
2480 resultVal = svalBuilder.makeIntVal(compareRes, Call.getResultType());
2481 }
2482 else {
2483 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, Call.getResultType());
2484 // Constrain strcmp's result range based on the result of StringRef's
2485 // comparison methods.
2486 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2487 SVal compareWithZero =
2488 svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2489 svalBuilder.getConditionType());
2490 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2491 state = state->assume(compareWithZeroVal, true);
2492 }
2493 }
2494 }
2495
2496 state = state->BindExpr(Call.getOriginExpr(), LCtx, resultVal);
2497
2498 // Record this as a possible path.
2499 C.addTransition(state);
2500}
2501
2502void CStringChecker::evalStrsep(CheckerContext &C,
2503 const CallEvent &Call) const {
2504 // char *strsep(char **stringp, const char *delim);
2505 // Verify whether the search string parameter matches the return type.
2506 SourceArgExpr SearchStrPtr = {{Call.getArgExpr(0), 0}};
2507
2508 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2509 if (CharPtrTy.isNull() || Call.getResultType().getUnqualifiedType() !=
2510 CharPtrTy.getUnqualifiedType())
2511 return;
2512
2513 CurrentFunctionDescription = "strsep()";
2514 ProgramStateRef State = C.getState();
2515 const LocationContext *LCtx = C.getLocationContext();
2516
2517 // Check that the search string pointer is non-null (though it may point to
2518 // a null string).
2519 SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
2520 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2521 if (!State)
2522 return;
2523
2524 // Check that the delimiter string is non-null.
2525 AnyArgExpr DelimStr = {Call.getArgExpr(1), 1};
2526 SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
2527 State = checkNonNull(C, State, DelimStr, DelimStrVal);
2528 if (!State)
2529 return;
2530
2531 SValBuilder &SVB = C.getSValBuilder();
2532 SVal Result;
2533 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2534 // Get the current value of the search string pointer, as a char*.
2535 Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2536
2537 // Invalidate the search string, representing the change of one delimiter
2538 // character to NUL.
2539 // As the replacement never overflows, do not invalidate its super region.
2540 State = invalidateDestinationBufferNeverOverflows(
2541 C, State, Call.getCFGElementRef(), Result);
2542
2543 // Overwrite the search string pointer. The new value is either an address
2544 // further along in the same string, or NULL if there are no more tokens.
2545 State = State->bindLoc(*SearchStrLoc,
2546 SVB.conjureSymbolVal(Call, C.blockCount(), getTag()),
2547 LCtx);
2548 } else {
2549 assert(SearchStrVal.isUnknown());
2550 // Conjure a symbolic value. It's the best we can do.
2551 Result = SVB.conjureSymbolVal(Call, C.blockCount());
2552 }
2553
2554 // Set the return value, and finish.
2555 State = State->BindExpr(Call.getOriginExpr(), LCtx, Result);
2556 C.addTransition(State);
2557}
2558
2559// These should probably be moved into a C++ standard library checker.
2560void CStringChecker::evalStdCopy(CheckerContext &C,
2561 const CallEvent &Call) const {
2562 evalStdCopyCommon(C, Call);
2563}
2564
2565void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2566 const CallEvent &Call) const {
2567 evalStdCopyCommon(C, Call);
2568}
2569
2570void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2571 const CallEvent &Call) const {
2572 if (!Call.getArgExpr(2)->getType()->isPointerType())
2573 return;
2574
2575 ProgramStateRef State = C.getState();
2576
2577 const LocationContext *LCtx = C.getLocationContext();
2578
2579 // template <class _InputIterator, class _OutputIterator>
2580 // _OutputIterator
2581 // copy(_InputIterator __first, _InputIterator __last,
2582 // _OutputIterator __result)
2583
2584 // Invalidate the destination buffer
2585 const Expr *Dst = Call.getArgExpr(2);
2586 SVal DstVal = State->getSVal(Dst, LCtx);
2587 // FIXME: As we do not know how many items are copied, we also invalidate the
2588 // super region containing the target location.
2589 State = invalidateDestinationBufferAlwaysEscapeSuperRegion(
2590 C, State, Call.getCFGElementRef(), DstVal);
2591
2592 SValBuilder &SVB = C.getSValBuilder();
2593
2594 SVal ResultVal = SVB.conjureSymbolVal(Call, C.blockCount());
2595 State = State->BindExpr(Call.getOriginExpr(), LCtx, ResultVal);
2596
2597 C.addTransition(State);
2598}
2599
2600void CStringChecker::evalMemset(CheckerContext &C,
2601 const CallEvent &Call) const {
2602 // void *memset(void *s, int c, size_t n);
2603 CurrentFunctionDescription = "memory set function";
2604
2605 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2606 AnyArgExpr CharE = {Call.getArgExpr(1), 1};
2607 SizeArgExpr Size = {{Call.getArgExpr(2), 2}};
2608
2609 ProgramStateRef State = C.getState();
2610
2611 // See if the size argument is zero.
2612 const LocationContext *LCtx = C.getLocationContext();
2613 SVal SizeVal = C.getSVal(Size.Expression);
2614 QualType SizeTy = Size.Expression->getType();
2615
2616 ProgramStateRef ZeroSize, NonZeroSize;
2617 std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2618
2619 // Get the value of the memory area.
2620 SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2621
2622 // If the size is zero, there won't be any actual memory access, so
2623 // just bind the return value to the buffer and return.
2624 if (ZeroSize && !NonZeroSize) {
2625 ZeroSize = ZeroSize->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2626 C.addTransition(ZeroSize);
2627 return;
2628 }
2629
2630 // Ensure the memory area is not null.
2631 // If it is NULL there will be a NULL pointer dereference.
2632 State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2633 if (!State)
2634 return;
2635
2636 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2637 if (!State)
2638 return;
2639
2640 // According to the values of the arguments, bind the value of the second
2641 // argument to the destination buffer and set string length, or just
2642 // invalidate the destination buffer.
2643 if (!memsetAux(Buffer.Expression, Call.getCFGElementRef(),
2644 C.getSVal(CharE.Expression), Size.Expression, C, State))
2645 return;
2646
2647 State = State->BindExpr(Call.getOriginExpr(), LCtx, BufferPtrVal);
2648 C.addTransition(State);
2649}
2650
2651void CStringChecker::evalBzero(CheckerContext &C, const CallEvent &Call) const {
2652 CurrentFunctionDescription = "memory clearance function";
2653
2654 DestinationArgExpr Buffer = {{Call.getArgExpr(0), 0}};
2655 SizeArgExpr Size = {{Call.getArgExpr(1), 1}};
2656 SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2657
2658 ProgramStateRef State = C.getState();
2659
2660 // See if the size argument is zero.
2661 SVal SizeVal = C.getSVal(Size.Expression);
2662 QualType SizeTy = Size.Expression->getType();
2663
2664 ProgramStateRef StateZeroSize, StateNonZeroSize;
2665 std::tie(StateZeroSize, StateNonZeroSize) =
2666 assumeZero(C, State, SizeVal, SizeTy);
2667
2668 // If the size is zero, there won't be any actual memory access,
2669 // In this case we just return.
2670 if (StateZeroSize && !StateNonZeroSize) {
2671 C.addTransition(StateZeroSize);
2672 return;
2673 }
2674
2675 // Get the value of the memory area.
2676 SVal MemVal = C.getSVal(Buffer.Expression);
2677
2678 // Ensure the memory area is not null.
2679 // If it is NULL there will be a NULL pointer dereference.
2680 State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2681 if (!State)
2682 return;
2683
2684 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2685 if (!State)
2686 return;
2687
2688 if (!memsetAux(Buffer.Expression, Call.getCFGElementRef(), Zero,
2689 Size.Expression, C, State))
2690 return;
2691
2692 C.addTransition(State);
2693}
2694
2695void CStringChecker::evalSprintf(CheckerContext &C,
2696 const CallEvent &Call) const {
2697 CurrentFunctionDescription = "'sprintf'";
2698 evalSprintfCommon(C, Call, /* IsBounded = */ false);
2699}
2700
2701void CStringChecker::evalSnprintf(CheckerContext &C,
2702 const CallEvent &Call) const {
2703 CurrentFunctionDescription = "'snprintf'";
2704 evalSprintfCommon(C, Call, /* IsBounded = */ true);
2705}
2706
2707void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallEvent &Call,
2708 bool IsBounded) const {
2709 ProgramStateRef State = C.getState();
2710 const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2711 DestinationArgExpr Dest = {{Call.getArgExpr(0), 0}};
2712
2713 const auto NumParams = Call.parameters().size();
2714 if (CE->getNumArgs() < NumParams) {
2715 // This is an invalid call, let's just ignore it.
2716 return;
2717 }
2718
2719 const auto AllArguments =
2720 llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
2721 const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);
2722
2723 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2724 // We consider only string buffers
2725 if (const QualType type = ArgExpr->getType();
2726 !type->isAnyPointerType() ||
2727 !type->getPointeeType()->isAnyCharacterType())
2728 continue;
2729 SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};
2730
2731 // Ensure the buffers do not overlap.
2732 SizeArgExpr SrcExprAsSizeDummy = {
2733 {Source.Expression, Source.ArgumentIndex}};
2734 State = CheckOverlap(
2735 C, State,
2736 (IsBounded ? SizeArgExpr{{Call.getArgExpr(1), 1}} : SrcExprAsSizeDummy),
2737 Dest, Source);
2738 if (!State)
2739 return;
2740 }
2741
2742 C.addTransition(State);
2743}
2744
2745//===----------------------------------------------------------------------===//
2746// The driver method, and other Checker callbacks.
2747//===----------------------------------------------------------------------===//
2748
2749CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2750 CheckerContext &C) const {
2751 const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2752 if (!CE)
2753 return nullptr;
2754
2755 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2756 if (!FD)
2757 return nullptr;
2758
2759 if (StdCopy.matches(Call))
2760 return &CStringChecker::evalStdCopy;
2761 if (StdCopyBackward.matches(Call))
2762 return &CStringChecker::evalStdCopyBackward;
2763
2764 // Pro-actively check that argument types are safe to do arithmetic upon.
2765 // We do not want to crash if someone accidentally passes a structure
2766 // into, say, a C++ overload of any of these functions. We could not check
2767 // that for std::copy because they may have arguments of other types.
2768 for (auto I : CE->arguments()) {
2769 QualType T = I->getType();
2771 return nullptr;
2772 }
2773
2774 const FnCheck *Callback = Callbacks.lookup(Call);
2775 if (Callback)
2776 return *Callback;
2777
2778 return nullptr;
2779}
2780
2781bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2782 FnCheck Callback = identifyCall(Call, C);
2783
2784 // If the callee isn't a string function, let another checker handle it.
2785 if (!Callback)
2786 return false;
2787
2788 // Check and evaluate the call.
2789 assert(isa<CallExpr>(Call.getOriginExpr()));
2790 Callback(this, C, Call);
2791
2792 // If the evaluate call resulted in no change, chain to the next eval call
2793 // handler.
2794 // Note, the custom CString evaluation calls assume that basic safety
2795 // properties are held. However, if the user chooses to turn off some of these
2796 // checks, we ignore the issues and leave the call evaluation to a generic
2797 // handler.
2798 return C.isDifferent();
2799}
2800
2801void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2802 // Record string length for char a[] = "abc";
2803 ProgramStateRef state = C.getState();
2804
2805 for (const auto *I : DS->decls()) {
2806 const VarDecl *D = dyn_cast<VarDecl>(I);
2807 if (!D)
2808 continue;
2809
2810 // FIXME: Handle array fields of structs.
2811 if (!D->getType()->isArrayType())
2812 continue;
2813
2814 const Expr *Init = D->getInit();
2815 if (!Init)
2816 continue;
2818 continue;
2819
2820 Loc VarLoc = state->getLValue(D, C.getLocationContext());
2821 const MemRegion *MR = VarLoc.getAsRegion();
2822 if (!MR)
2823 continue;
2824
2825 SVal StrVal = C.getSVal(Init);
2826 assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2827 DefinedOrUnknownSVal strLength =
2828 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2829
2830 state = state->set<CStringLength>(MR, strLength);
2831 }
2832
2833 C.addTransition(state);
2834}
2835
2837CStringChecker::checkRegionChanges(ProgramStateRef state,
2838 const InvalidatedSymbols *,
2839 ArrayRef<const MemRegion *> ExplicitRegions,
2840 ArrayRef<const MemRegion *> Regions,
2841 const LocationContext *LCtx,
2842 const CallEvent *Call) const {
2843 CStringLengthTy Entries = state->get<CStringLength>();
2844 if (Entries.isEmpty())
2845 return state;
2846
2847 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2848 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2849
2850 // First build sets for the changed regions and their super-regions.
2851 for (const MemRegion *MR : Regions) {
2852 Invalidated.insert(MR);
2853
2854 SuperRegions.insert(MR);
2855 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2856 MR = SR->getSuperRegion();
2857 SuperRegions.insert(MR);
2858 }
2859 }
2860
2861 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2862
2863 // Then loop over the entries in the current state.
2864 for (const MemRegion *MR : llvm::make_first_range(Entries)) {
2865 // Is this entry for a super-region of a changed region?
2866 if (SuperRegions.count(MR)) {
2867 Entries = F.remove(Entries, MR);
2868 continue;
2869 }
2870
2871 // Is this entry for a sub-region of a changed region?
2872 const MemRegion *Super = MR;
2873 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2874 Super = SR->getSuperRegion();
2875 if (Invalidated.count(Super)) {
2876 Entries = F.remove(Entries, MR);
2877 break;
2878 }
2879 }
2880 }
2881
2882 return state->set<CStringLength>(Entries);
2883}
2884
2885void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2886 SymbolReaper &SR) const {
2887 // Mark all symbols in our string length map as valid.
2888 CStringLengthTy Entries = state->get<CStringLength>();
2889
2890 for (SVal Len : llvm::make_second_range(Entries)) {
2891 for (SymbolRef Sym : Len.symbols())
2892 SR.markInUse(Sym);
2893 }
2894}
2895
2896void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2897 CheckerContext &C) const {
2898 ProgramStateRef state = C.getState();
2899 CStringLengthTy Entries = state->get<CStringLength>();
2900 if (Entries.isEmpty())
2901 return;
2902
2903 CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2904 for (auto [Reg, Len] : Entries) {
2905 if (SymbolRef Sym = Len.getAsSymbol()) {
2906 if (SR.isDead(Sym))
2907 Entries = F.remove(Entries, Reg);
2908 }
2909 }
2910
2911 state = state->set<CStringLength>(Entries);
2912 C.addTransition(state);
2913}
2914
2915void ento::registerCStringModeling(CheckerManager &Mgr) {
2916 // Other checker relies on the modeling implemented in this checker family,
2917 // so this "modeling checker" can register the 'CStringChecker' backend for
2918 // its callbacks without enabling any of its frontends.
2919 Mgr.getChecker<CStringChecker>();
2920}
2921
2922bool ento::shouldRegisterCStringModeling(const CheckerManager &) {
2923 return true;
2924}
2925
2926#define REGISTER_CHECKER(NAME) \
2927 void ento::registerCString##NAME(CheckerManager &Mgr) { \
2928 Mgr.getChecker<CStringChecker>()->NAME.enable(Mgr); \
2929 } \
2930 \
2931 bool ento::shouldRegisterCString##NAME(const CheckerManager &) { \
2932 return true; \
2933 }
2934
2935REGISTER_CHECKER(NullArg)
2936REGISTER_CHECKER(OutOfBounds)
2937REGISTER_CHECKER(BufferOverlap)
2938REGISTER_CHECKER(NotNullTerm)
2939REGISTER_CHECKER(UninitializedRead)
#define V(N, I)
static std::optional< NonLoc > getIndex(ProgramStateRef State, const ElementRegion *ER, CharKind CK)
static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx)
#define REGISTER_CHECKER(name)
#define REGISTER_MAP_WITH_PROGRAMSTATE(Name, Key, Value)
Declares an immutable map of type NameTy, suitable for placement into the ProgramState.
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition ASTContext.h:188
QualType getPointerType(QualType T) const
Return the uniqued reference to the type for a pointer to the specified type.
CanQualType WideCharTy
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
CanQualType CharTy
CanQualType IntTy
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
CanQualType UnsignedCharTy
QualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition CharUnits.h:185
decl_range decls()
Definition Stmt.h:1659
QualType getType() const
Definition Expr.h:144
A (possibly-)qualified type.
Definition TypeBase.h:937
bool isNull() const
Return true if this QualType doesn't point to a type yet.
Definition TypeBase.h:1004
LangAS getAddressSpace() const
Return the address space of this type.
Definition TypeBase.h:8411
QualType getUnqualifiedType() const
Retrieve the unqualified variant of the given type, removing as little sugar as possible.
Definition TypeBase.h:8379
SourceRange getSourceRange() const LLVM_READONLY
SourceLocation tokens are not useful in isolation - they are low level value objects created/interpre...
Definition Stmt.cpp:334
unsigned getLength() const
Definition Expr.h:1911
StringRef getString() const
Definition Expr.h:1869
bool isArrayType() const
Definition TypeBase.h:8621
bool isPointerType() const
Definition TypeBase.h:8522
CanQualType getCanonicalTypeUnqualified() const
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee.
Definition Type.cpp:752
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition TypeBase.h:8996
QualType getType() const
Definition Decl.h:722
const Expr * getInit() const
Definition Decl.h:1367
APSIntPtr getMaxValue(const llvm::APSInt &v)
std::optional< APSIntPtr > evalAPSInt(BinaryOperator::Opcode Op, const llvm::APSInt &V1, const llvm::APSInt &V2)
bool matches(const CallEvent &Call) const
Returns true if the CallEvent is a call to a function that matches the CallDescription.
Checker families (where a single backend class implements multiple related frontends) should derive f...
Definition Checker.h:584
CHECKER * getChecker(AT &&...Args)
If the the singleton instance of a checker class is not yet constructed, then construct it (with the ...
ElementRegion is used to represent both array elements and casts.
Definition MemRegion.h:1227
QualType getValueType() const override
Definition MemRegion.h:1249
MemRegion - The root abstract class for all memory regions.
Definition MemRegion.h:98
LLVM_ATTRIBUTE_RETURNS_NONNULL const RegionTy * castAs() const
Definition MemRegion.h:1424
RegionOffset getAsOffset() const
Compute the offset within the top level memory object.
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * StripCasts(bool StripBaseAndDerivedCasts=true) const
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getBaseRegion() const
Kind getKind() const
Definition MemRegion.h:203
@ TK_PreserveContents
Tells that a region's contents is not changed.
Definition MemRegion.h:1672
@ TK_SuppressEscape
Suppress pointer-escaping of a region.
Definition MemRegion.h:1675
void setTrait(SymbolRef Sym, InvalidationKinds IK)
bool hasSymbolicOffset() const
Definition MemRegion.h:83
const MemRegion * getRegion() const
It might return null.
Definition MemRegion.h:81
int64_t getOffset() const
Definition MemRegion.h:85
DefinedOrUnknownSVal makeZeroVal(QualType type)
Construct an SVal representing '0' for the specified type.
BasicValueFactory & getBasicValueFactory()
virtual SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with a memory location and non-location opera...
DefinedSVal getMetadataSymbolVal(const void *symbolTag, const MemRegion *region, const Expr *expr, QualType type, const LocationContext *LCtx, unsigned count)
ProgramStateManager & getStateManager()
virtual SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, Loc lhs, Loc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two memory location operands.
ASTContext & getContext()
nonloc::ConcreteInt makeIntVal(const IntegerLiteral *integer)
QualType getArrayIndexType() const
loc::MemRegionVal makeLoc(SymbolRef sym)
virtual SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, NonLoc lhs, NonLoc rhs, QualType resultTy)=0
Create a new value which represents a binary expression with two non- location operands.
SVal evalCast(SVal V, QualType CastTy, QualType OriginalTy)
Cast a given SVal to another SVal using given QualType's.
QualType getConditionType() const
SVal evalEQ(ProgramStateRef state, SVal lhs, SVal rhs)
DefinedOrUnknownSVal conjureSymbolVal(const void *symbolTag, ConstCFGElementRef elem, const LocationContext *LCtx, unsigned count)
Create a new symbol with a unique 'name'.
SVal evalBinOp(ProgramStateRef state, BinaryOperator::Opcode op, SVal lhs, SVal rhs, QualType type)
SVal - This represents a symbolic expression, which can be either an L-value or an R-value.
Definition SVals.h:56
bool isUndef() const
Definition SVals.h:107
std::optional< T > getAs() const
Convert to the specified SVal type, returning std::nullopt if this SVal is not of the desired type.
Definition SVals.h:87
const MemRegion * getAsRegion() const
Definition SVals.cpp:119
bool isValid() const
Definition SVals.h:111
T castAs() const
Convert to the specified SVal type, asserting that this SVal is of the desired type.
Definition SVals.h:83
bool isUnknown() const
Definition SVals.h:105
LLVM_ATTRIBUTE_RETURNS_NONNULL const StringLiteral * getStringLiteral() const
Definition MemRegion.h:873
LLVM_ATTRIBUTE_RETURNS_NONNULL const MemRegion * getSuperRegion() const
Definition MemRegion.h:487
llvm::iterator_range< symbol_iterator > symbols() const
Definition SymExpr.h:107
bool isDead(SymbolRef sym)
Returns whether or not a symbol has been confirmed dead.
void markInUse(SymbolRef sym)
Marks a symbol as important to a checker.
__inline void unsigned int _2
const internal::VariadicAllOfMatcher< Type > type
Matches Types in the clang AST.
const internal::VariadicDynCastAllOfMatcher< Stmt, Expr > expr
Matches expressions.
bool trackExpressionValue(const ExplodedNode *N, const Expr *E, PathSensitiveBugReport &R, TrackingOptions Opts={})
Attempts to add visitors to track expression value back to its point of origin.
llvm::DenseSet< SymbolRef > InvalidatedSymbols
Definition Store.h:51
IntrusiveRefCntPtr< const ProgramState > ProgramStateRef
const SymExpr * SymbolRef
Definition SymExpr.h:133
DefinedOrUnknownSVal getDynamicExtent(ProgramStateRef State, const MemRegion *MR, SValBuilder &SVB)
@ OS
Indicates that the tracking object is a descendant of a referenced-counted OSObject,...
std::variant< struct RequiresDecl, struct HeaderDecl, struct UmbrellaDirDecl, struct ModuleDecl, struct ExcludeDecl, struct ExportDecl, struct ExportAsDecl, struct ExternModuleDecl, struct UseDecl, struct LinkDecl, struct ConfigMacrosDecl, struct ConflictDecl > Decl
All declarations that can appear in a module declaration.
The JSON file list parser is used to communicate input to InstallAPI.
bool isa(CodeGen::Address addr)
Definition Address.h:330
CFGBlock::ConstCFGElementRef ConstCFGElementRef
Definition CFG.h:1199
@ Result
The result type of a method or function.
Definition TypeBase.h:905
const FunctionProtoType * T
LLVM_READONLY char toUppercase(char c)
Converts the given ASCII character to its uppercase equivalent.
Definition CharInfo.h:233
U cast(CodeGen::Address addr)
Definition Address.h:327
int const char * function
Definition c++config.h:31