clang 22.0.0git
ModuleDepCollector.h
Go to the documentation of this file.
1//===- ModuleDepCollector.h - Callbacks to collect deps ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#ifndef LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
10#define LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
11
12#include "clang/Basic/LLVM.h"
13#include "clang/Basic/Module.h"
21#include "llvm/ADT/DenseMap.h"
22#include "llvm/ADT/Hashing.h"
23#include "llvm/ADT/StringSet.h"
24#include "llvm/Support/raw_ostream.h"
25#include <optional>
26#include <string>
27#include <unordered_map>
28#include <variant>
29
30namespace clang {
31namespace tooling {
32namespace dependencies {
33
37
38/// Modular dependency that has already been built prior to the dependency scan.
40 std::string ModuleName;
41 std::string PCMFile;
42 std::string ModuleMapFile;
43
44 explicit PrebuiltModuleDep(const Module *M)
45 : ModuleName(M->getTopLevelModuleName()),
46 PCMFile(M->getASTFile()->getName()),
47 ModuleMapFile(M->PresumedModuleMapFile) {}
48};
49
50/// Attributes loaded from AST files of prebuilt modules collected prior to
51/// ModuleDepCollector creation.
52using PrebuiltModulesAttrsMap = llvm::StringMap<PrebuiltModuleASTAttrs>;
54public:
55 /// When a module is discovered to not be in stable directories, traverse &
56 /// update all modules that depend on it.
57 void
59
60 /// Read-only access to whether the module is made up of dependencies in
61 /// stable directories.
62 bool isInStableDir() const { return IsInStableDirs; }
63
64 /// Read-only access to vfs map files.
65 const llvm::StringSet<> &getVFS() const { return VFSMap; }
66
67 /// Update the VFSMap to the one discovered from serializing the AST file.
68 void setVFS(llvm::StringSet<> &&VFS) { VFSMap = std::move(VFS); }
69
70 /// Add a direct dependent module file, so it can be updated if the current
71 /// module is from stable directores.
72 void addDependent(StringRef ModuleFile) {
73 ModuleFileDependents.insert(ModuleFile);
74 }
75
76 /// Update whether the prebuilt module resolves entirely in a stable
77 /// directories.
78 void setInStableDir(bool V = false) {
79 // Cannot reset attribute once it's false.
80 if (!IsInStableDirs)
81 return;
82 IsInStableDirs = V;
83 }
84
85private:
86 llvm::StringSet<> VFSMap;
87 bool IsInStableDirs = true;
88 std::set<StringRef> ModuleFileDependents;
89};
90
91/// This is used to identify a specific module.
92struct ModuleID {
93 /// The name of the module. This may include `:` for C++20 module partitions,
94 /// or a header-name for C++20 header units.
95 std::string ModuleName;
96
97 /// The context hash of a module represents the compiler options that affect
98 /// the resulting command-line invocation.
99 ///
100 /// Modules with the same name and ContextHash but different invocations could
101 /// cause non-deterministic build results.
102 ///
103 /// Modules with the same name but a different \c ContextHash should be
104 /// treated as separate modules for the purpose of a build.
105 std::string ContextHash;
106
107 bool operator==(const ModuleID &Other) const {
108 return std::tie(ModuleName, ContextHash) ==
109 std::tie(Other.ModuleName, Other.ContextHash);
110 }
111
112 bool operator<(const ModuleID& Other) const {
113 return std::tie(ModuleName, ContextHash) <
114 std::tie(Other.ModuleName, Other.ContextHash);
115 }
116};
117
118/// P1689ModuleInfo - Represents the needed information of standard C++20
119/// modules for P1689 format.
121 /// The name of the module. This may include `:` for partitions.
122 std::string ModuleName;
123
124 /// Optional. The source path to the module.
125 std::string SourcePath;
126
127 /// If this module is a standard c++ interface unit.
129
130 enum class ModuleType {
132 // To be supported
133 // AngleHeaderUnit,
134 // QuoteHeaderUnit
135 };
137};
138
139/// An output from a module compilation, such as the path of the module file.
141 /// The module file (.pcm). Required.
143 /// The path of the dependency file (.d), if any.
145 /// The null-separated list of names to use as the targets in the dependency
146 /// file, if any. Defaults to the value of \c ModuleFile, as in the driver.
148 /// The path of the serialized diagnostic file (.dia), if any.
150};
151
153 /// The identifier of the module.
155
156 /// Whether this is a "system" module.
158
159 /// Whether this module is fully composed of file & module inputs from
160 /// locations likely to stay the same across the active development and build
161 /// cycle. For example, when all those input paths only resolve in Sysroot.
162 ///
163 /// External paths, as opposed to virtual file paths, are always used
164 /// for computing this value.
166
167 /// The path to the modulemap file which defines this module.
168 ///
169 /// This can be used to explicitly build this module. This file will
170 /// additionally appear in \c FileDeps as a dependency.
172
173 /// A collection of absolute paths to module map files that this module needs
174 /// to know about. The ordering is significant.
175 std::vector<std::string> ModuleMapFileDeps;
176
177 /// A collection of prebuilt modular dependencies this module directly depends
178 /// on, not including transitive dependencies.
179 std::vector<PrebuiltModuleDep> PrebuiltModuleDeps;
180
181 /// A list of module identifiers this module directly depends on, not
182 /// including transitive dependencies.
183 ///
184 /// This may include modules with a different context hash when it can be
185 /// determined that the differences are benign for this compilation.
186 std::vector<ModuleID> ClangModuleDeps;
187
188 /// The set of libraries or frameworks to link against when
189 /// an entity from this module is used.
191
192 /// Invokes \c Cb for all file dependencies of this module. Each provided
193 /// \c StringRef is only valid within the individual callback invocation.
194 void forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const;
195
196 /// Get (or compute) the compiler invocation that can be used to build this
197 /// module. Does not include argv[0].
198 const std::vector<std::string> &getBuildArguments() const;
199
200private:
201 friend class ModuleDepCollector;
203
204 /// The base directory for relative paths in \c FileDeps.
205 std::string FileDepsBaseDir;
206
207 /// A collection of paths to files that this module directly depends on, not
208 /// including transitive dependencies.
209 std::vector<std::string> FileDeps;
210
211 mutable std::variant<std::monostate, CowCompilerInvocation,
212 std::vector<std::string>>
213 BuildInfo;
214};
215
217
218/// Callback that records textual includes and direct modular includes/imports
219/// during preprocessing. At the end of the main file, it also collects
220/// transitive modular dependencies and passes everything to the
221/// \c DependencyConsumer of the parent \c ModuleDepCollector.
222class ModuleDepCollectorPP final : public PPCallbacks {
223public:
225
228 SourceLocation Loc) override;
229 void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
230 StringRef FileName, bool IsAngled,
231 CharSourceRange FilenameRange,
232 OptionalFileEntryRef File, StringRef SearchPath,
233 StringRef RelativePath, const Module *SuggestedModule,
234 bool ModuleImported,
236 void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path,
237 const Module *Imported) override;
238
239 void EndOfMainFile() override;
240
241private:
242 /// The parent dependency collector.
244
245 void handleImport(const Module *Imported);
246
247 /// Adds direct modular dependencies that have already been built to the
248 /// ModuleDeps instance.
249 void
250 addAllSubmodulePrebuiltDeps(const Module *M, ModuleDeps &MD,
251 llvm::DenseSet<const Module *> &SeenSubmodules);
252 void addModulePrebuiltDeps(const Module *M, ModuleDeps &MD,
253 llvm::DenseSet<const Module *> &SeenSubmodules);
254
255 /// Traverses the previously collected direct modular dependencies to discover
256 /// transitive modular dependencies and fills the parent \c ModuleDepCollector
257 /// with both.
258 /// Returns the ID or nothing if the dependency is spurious and is ignored.
259 std::optional<ModuleID> handleTopLevelModule(const Module *M);
260 void addAllSubmoduleDeps(const Module *M, ModuleDeps &MD,
261 llvm::DenseSet<const Module *> &AddedModules);
262 void addModuleDep(const Module *M, ModuleDeps &MD,
263 llvm::DenseSet<const Module *> &AddedModules);
264
265 /// Traverses the affecting modules and updates \c MD with references to the
266 /// parent \c ModuleDepCollector info.
267 void addAllAffectingClangModules(const Module *M, ModuleDeps &MD,
268 llvm::DenseSet<const Module *> &AddedModules);
269 void addAffectingClangModule(const Module *M, ModuleDeps &MD,
270 llvm::DenseSet<const Module *> &AddedModules);
271
272 /// Add discovered module dependency for the given module.
273 void addOneModuleDep(const Module *M, const ModuleID ID, ModuleDeps &MD);
274};
275
276/// Collects modular and non-modular dependencies of the main file by attaching
277/// \c ModuleDepCollectorPP to the preprocessor.
279public:
281 std::unique_ptr<DependencyOutputOptions> Opts,
282 CompilerInstance &ScanInstance, DependencyConsumer &C,
283 DependencyActionController &Controller,
284 CompilerInvocation OriginalCI,
285 const PrebuiltModulesAttrsMap PrebuiltModulesASTMap,
286 const ArrayRef<StringRef> StableDirs);
287
288 void attachToPreprocessor(Preprocessor &PP) override;
289 void attachToASTReader(ASTReader &R) override;
290
291 /// Apply any changes implied by the discovered dependencies to the given
292 /// invocation, (e.g. disable implicit modules, add explicit module paths).
294
295private:
296 friend ModuleDepCollectorPP;
297
298 /// The parent dependency scanning service.
300 /// The compiler instance for scanning the current translation unit.
301 CompilerInstance &ScanInstance;
302 /// The consumer of collected dependency information.
303 DependencyConsumer &Consumer;
304 /// Callbacks for computing dependency information.
305 DependencyActionController &Controller;
306 /// Mapping from prebuilt AST filepaths to their attributes referenced during
307 /// dependency collecting.
308 const PrebuiltModulesAttrsMap PrebuiltModulesASTMap;
309 /// Directory paths known to be stable through an active development and build
310 /// cycle.
311 const ArrayRef<StringRef> StableDirs;
312 /// Path to the main source file.
313 std::string MainFile;
314 /// Non-modular file dependencies. This includes the main source file and
315 /// textually included header files.
316 std::vector<std::string> FileDeps;
317 /// Direct and transitive modular dependencies of the main source file.
318 llvm::MapVector<const Module *, std::unique_ptr<ModuleDeps>> ModularDeps;
319 /// Secondary mapping for \c ModularDeps allowing lookup by ModuleID without
320 /// a preprocessor. Storage owned by \c ModularDeps.
321 llvm::DenseMap<ModuleID, ModuleDeps *> ModuleDepsByID;
322 /// Direct modular dependencies that have already been built.
323 llvm::MapVector<const Module *, PrebuiltModuleDep> DirectPrebuiltModularDeps;
324 /// Working set of direct modular dependencies.
325 llvm::SetVector<const Module *> DirectModularDeps;
326 /// Working set of direct modular dependencies, as they were imported.
328 /// All direct and transitive visible modules.
329 llvm::StringSet<> VisibleModules;
330
331 /// Options that control the dependency output generation.
332 std::unique_ptr<DependencyOutputOptions> Opts;
333 /// A Clang invocation that's based on the original TU invocation and that has
334 /// been partially transformed into one that can perform explicit build of
335 /// a discovered modular dependency. Note that this still needs to be adjusted
336 /// for each individual module.
337 CowCompilerInvocation CommonInvocation;
338
339 std::optional<P1689ModuleInfo> ProvidedStdCXXModule;
340 std::vector<P1689ModuleInfo> RequiredStdCXXModules;
341
342 /// Checks whether the module is known as being prebuilt.
343 bool isPrebuiltModule(const Module *M);
344
345 /// Computes all visible modules resolved from direct imports.
346 void addVisibleModules();
347
348 /// Adds \p Path to \c FileDeps, making it absolute if necessary.
349 void addFileDep(StringRef Path);
350 /// Adds \p Path to \c MD.FileDeps, making it absolute if necessary.
351 void addFileDep(ModuleDeps &MD, StringRef Path);
352
353 /// Get a Clang invocation adjusted to build the given modular dependency.
354 /// This excludes paths that are yet-to-be-provided by the build system.
355 CowCompilerInvocation getInvocationAdjustedForModuleBuildWithoutOutputs(
356 const ModuleDeps &Deps,
357 llvm::function_ref<void(CowCompilerInvocation &)> Optimize) const;
358
359 /// Collect module map files for given modules.
360 llvm::DenseSet<const FileEntry *>
361 collectModuleMapFiles(ArrayRef<ModuleID> ClangModuleDeps) const;
362
363 /// Add module map files to the invocation, if needed.
364 void addModuleMapFiles(CompilerInvocation &CI,
365 ArrayRef<ModuleID> ClangModuleDeps) const;
366 /// Add module files (pcm) to the invocation, if needed.
367 void addModuleFiles(CompilerInvocation &CI,
368 ArrayRef<ModuleID> ClangModuleDeps) const;
369 void addModuleFiles(CowCompilerInvocation &CI,
370 ArrayRef<ModuleID> ClangModuleDeps) const;
371
372 /// Add paths that require looking up outputs to the given dependencies.
373 void addOutputPaths(CowCompilerInvocation &CI, ModuleDeps &Deps);
374
375 /// Compute the context hash for \p Deps, and create the mapping
376 /// \c ModuleDepsByID[Deps.ID] = &Deps.
377 void associateWithContextHash(const CowCompilerInvocation &CI, bool IgnoreCWD,
378 ModuleDeps &Deps);
379};
380
381/// Resets codegen options that don't affect modules/PCH.
383 const LangOptions &LangOpts,
384 CodeGenOptions &CGOpts);
385
386/// Determine if \c Input can be resolved within a stable directory.
387///
388/// \param Directories Paths known to be in a stable location. e.g. Sysroot.
389/// \param Input Path to evaluate.
390bool isPathInStableDir(const ArrayRef<StringRef> Directories,
391 const StringRef Input);
392
393/// Determine if options collected from a module's
394/// compilation can safely be considered as stable.
395///
396/// \param Directories Paths known to be in a stable location. e.g. Sysroot.
397/// \param HSOpts Header search options derived from the compiler invocation.
398bool areOptionsInStableDir(const ArrayRef<StringRef> Directories,
399 const HeaderSearchOptions &HSOpts);
400
401} // end namespace dependencies
402} // end namespace tooling
403} // end namespace clang
404
405namespace llvm {
407 return hash_combine(ID.ModuleName, ID.ContextHash);
408}
409
410template <> struct DenseMapInfo<clang::tooling::dependencies::ModuleID> {
412 static inline ModuleID getEmptyKey() { return ModuleID{"", ""}; }
413 static inline ModuleID getTombstoneKey() {
414 return ModuleID{"~", "~"}; // ~ is not a valid module name or context hash
415 }
416 static unsigned getHashValue(const ModuleID &ID) { return hash_value(ID); }
417 static bool isEqual(const ModuleID &LHS, const ModuleID &RHS) {
418 return LHS == RHS;
419 }
420};
421} // namespace llvm
422
423#endif // LLVM_CLANG_TOOLING_DEPENDENCYSCANNING_MODULEDEPCOLLECTOR_H
#define V(N, I)
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified.
llvm::MachO::FileType FileType
Definition MachO.h:46
Defines the clang::Module class, which describes a module in the source code.
Defines the PPCallbacks interface.
Defines the SourceManager interface.
Reads an AST files chain containing the contents of a translation unit.
Definition ASTReader.h:430
Represents a character-granular source range.
CodeGenOptions - Track various options which control how the code is optimized and passed to the back...
CompilerInstance - Helper class for managing a single instance of the Clang compiler.
Helper class for holding the data necessary to invoke the compiler.
Same as CompilerInvocation, but with copy-on-write optimization.
An interface for collecting the dependencies of a compilation.
Definition Utils.h:63
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
HeaderSearchOptions - Helper class for storing options related to the initialization of the HeaderSea...
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Describes a module or submodule.
Definition Module.h:144
This interface provides a way to observe the actions of the preprocessor as it does its thing.
Definition PPCallbacks.h:37
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Encodes a location in the source.
Token - This structure provides full information about a lexed token.
Definition Token.h:36
Dependency scanner callbacks that are used during scanning to influence the behaviour of the scan - f...
The dependency scanning service contains shared configuration and state that is used by the individua...
void EndOfMainFile() override
Callback invoked when the end of the main file is reached.
void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok, StringRef FileName, bool IsAngled, CharSourceRange FilenameRange, OptionalFileEntryRef File, StringRef SearchPath, StringRef RelativePath, const Module *SuggestedModule, bool ModuleImported, SrcMgr::CharacteristicKind FileType) override
Callback invoked whenever an inclusion directive of any kind (#include, #import, etc....
void LexedFileChanged(FileID FID, LexedFileChangeReason Reason, SrcMgr::CharacteristicKind FileType, FileID PrevFID, SourceLocation Loc) override
Callback invoked whenever the Lexer moves to a different file for lexing.
void moduleImport(SourceLocation ImportLoc, ModuleIdPath Path, const Module *Imported) override
Callback invoked whenever there was an explicit module-import syntax.
Collects modular and non-modular dependencies of the main file by attaching ModuleDepCollectorPP to t...
ModuleDepCollector(DependencyScanningService &Service, std::unique_ptr< DependencyOutputOptions > Opts, CompilerInstance &ScanInstance, DependencyConsumer &C, DependencyActionController &Controller, CompilerInvocation OriginalCI, const PrebuiltModulesAttrsMap PrebuiltModulesASTMap, const ArrayRef< StringRef > StableDirs)
void applyDiscoveredDependencies(CompilerInvocation &CI)
Apply any changes implied by the discovered dependencies to the given invocation, (e....
void updateDependentsNotInStableDirs(PrebuiltModulesAttrsMap &PrebuiltModulesMap)
When a module is discovered to not be in stable directories, traverse & update all modules that depen...
void setVFS(llvm::StringSet<> &&VFS)
Update the VFSMap to the one discovered from serializing the AST file.
void addDependent(StringRef ModuleFile)
Add a direct dependent module file, so it can be updated if the current module is from stable directo...
void setInStableDir(bool V=false)
Update whether the prebuilt module resolves entirely in a stable directories.
const llvm::StringSet & getVFS() const
Read-only access to vfs map files.
bool isInStableDir() const
Read-only access to whether the module is made up of dependencies in stable directories.
CharacteristicKind
Indicates whether a file or directory holds normal user code, system code, or system code which is im...
ModuleOutputKind
An output from a module compilation, such as the path of the module file.
@ DiagnosticSerializationFile
The path of the serialized diagnostic file (.dia), if any.
@ DependencyFile
The path of the dependency file (.d), if any.
@ DependencyTargets
The null-separated list of names to use as the targets in the dependency file, if any.
@ ModuleFile
The module file (.pcm). Required.
void resetBenignCodeGenOptions(frontend::ActionKind ProgramAction, const LangOptions &LangOpts, CodeGenOptions &CGOpts)
Resets codegen options that don't affect modules/PCH.
@ IgnoreCWD
Ignore the compiler's working directory if it is safe.
bool areOptionsInStableDir(const ArrayRef< StringRef > Directories, const HeaderSearchOptions &HSOpts)
Determine if options collected from a module's compilation can safely be considered as stable.
bool isPathInStableDir(const ArrayRef< StringRef > Directories, const StringRef Input)
Determine if Input can be resolved within a stable directory.
llvm::StringMap< PrebuiltModuleASTAttrs > PrebuiltModulesAttrsMap
Attributes loaded from AST files of prebuilt modules collected prior to ModuleDepCollector creation.
The JSON file list parser is used to communicate input to InstallAPI.
CustomizableOptional< FileEntryRef > OptionalFileEntryRef
Definition FileEntry.h:208
ArrayRef< IdentifierLoc > ModuleIdPath
A sequence of identifier/location pairs used to describe a particular module or submodule,...
@ Other
Other implicit parameter.
Definition Decl.h:1745
Diagnostic wrappers for TextAPI types for error reporting.
Definition Dominators.h:30
hash_code hash_value(const clang::tooling::dependencies::ModuleID &ID)
std::string ClangModuleMapFile
The path to the modulemap file which defines this module.
std::vector< std::string > ModuleMapFileDeps
A collection of absolute paths to module map files that this module needs to know about.
llvm::SmallVector< Module::LinkLibrary, 2 > LinkLibraries
The set of libraries or frameworks to link against when an entity from this module is used.
bool IsInStableDirectories
Whether this module is fully composed of file & module inputs from locations likely to stay the same ...
void forEachFileDep(llvm::function_ref< void(StringRef)> Cb) const
Invokes Cb for all file dependencies of this module.
std::vector< PrebuiltModuleDep > PrebuiltModuleDeps
A collection of prebuilt modular dependencies this module directly depends on, not including transiti...
std::vector< ModuleID > ClangModuleDeps
A list of module identifiers this module directly depends on, not including transitive dependencies.
ModuleID ID
The identifier of the module.
const std::vector< std::string > & getBuildArguments() const
Get (or compute) the compiler invocation that can be used to build this module.
bool IsSystem
Whether this is a "system" module.
This is used to identify a specific module.
std::string ContextHash
The context hash of a module represents the compiler options that affect the resulting command-line i...
std::string ModuleName
The name of the module.
bool operator==(const ModuleID &Other) const
bool operator<(const ModuleID &Other) const
P1689ModuleInfo - Represents the needed information of standard C++20 modules for P1689 format.
std::string SourcePath
Optional. The source path to the module.
std::string ModuleName
The name of the module. This may include : for partitions.
bool IsStdCXXModuleInterface
If this module is a standard c++ interface unit.
static bool isEqual(const ModuleID &LHS, const ModuleID &RHS)