LLVM 22.0.0git
PDBStringTableBuilder.cpp
Go to the documentation of this file.
1//===- PDBStringTableBuilder.cpp - PDB String Table -------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10
11#include "llvm/ADT/ArrayRef.h"
15#include "llvm/Support/Endian.h"
17
18#include <map>
19
20using namespace llvm;
21using namespace llvm::msf;
22using namespace llvm::support;
23using namespace llvm::support::endian;
24using namespace llvm::pdb;
25
28
30 // The reference implementation doesn't include code for /src/headerblock
31 // handling, but it can only read natvis entries lld's PDB files if
32 // this hash function truncates the hash to 16 bit.
33 // PDB/include/misc.h in the reference implementation has a hashSz() function
34 // that returns an unsigned short, that seems what's being used for
35 // /src/headerblock.
36 return static_cast<uint16_t>(Table->getIdForString(S));
37}
38
42
46
48 return Strings.insert(S);
49}
50
52 return Strings.getIdForString(S);
53}
54
56 return Strings.getStringForId(Id);
57}
58
60 // This is a precomputed list of Buckets given the specified number of
61 // strings. Matching the reference algorithm exactly is not strictly
62 // necessary for correctness, but it helps when comparing LLD's PDBs with
63 // Microsoft's PDBs so as to eliminate superfluous differences.
64 // The reference implementation does (in nmt.h, NMT::grow()):
65 // unsigned StringCount = 0;
66 // unsigned BucketCount = 1;
67 // fn insert() {
68 // ++StringCount;
69 // if (BucketCount * 3 / 4 < StringCount)
70 // BucketCount = BucketCount * 3 / 2 + 1;
71 // }
72 // This list contains all StringCount, BucketCount pairs where BucketCount was
73 // just incremented. It ends before the first BucketCount entry where
74 // BucketCount * 3 would overflow a 32-bit unsigned int.
75 static const std::pair<uint32_t, uint32_t> StringsToBuckets[] = {
76 {0, 1},
77 {1, 2},
78 {2, 4},
79 {4, 7},
80 {6, 11},
81 {9, 17},
82 {13, 26},
83 {20, 40},
84 {31, 61},
85 {46, 92},
86 {70, 139},
87 {105, 209},
88 {157, 314},
89 {236, 472},
90 {355, 709},
91 {532, 1064},
92 {799, 1597},
93 {1198, 2396},
94 {1798, 3595},
95 {2697, 5393},
96 {4045, 8090},
97 {6068, 12136},
98 {9103, 18205},
99 {13654, 27308},
100 {20482, 40963},
101 {30723, 61445},
102 {46084, 92168},
103 {69127, 138253},
104 {103690, 207380},
105 {155536, 311071},
106 {233304, 466607},
107 {349956, 699911},
108 {524934, 1049867},
109 {787401, 1574801},
110 {1181101, 2362202},
111 {1771652, 3543304},
112 {2657479, 5314957},
113 {3986218, 7972436},
114 {5979328, 11958655},
115 {8968992, 17937983},
116 {13453488, 26906975},
117 {20180232, 40360463},
118 {30270348, 60540695},
119 {45405522, 90811043},
120 {68108283, 136216565},
121 {102162424, 204324848},
122 {153243637, 306487273},
123 {229865455, 459730910},
124 {344798183, 689596366},
125 {517197275, 1034394550},
126 {775795913, 1551591826},
127 {1163693870, 2327387740}};
128 const auto *Entry = llvm::lower_bound(
129 StringsToBuckets, std::make_pair(NumStrings, 0U), llvm::less_first());
130 assert(Entry != std::end(StringsToBuckets));
131 return Entry->second;
132}
133
134uint32_t PDBStringTableBuilder::calculateHashTableSize() const {
135 uint32_t Size = sizeof(uint32_t); // Hash table begins with 4-byte size field.
136 Size += sizeof(uint32_t) * computeBucketCount(Strings.size());
137
138 return Size;
139}
140
142 uint32_t Size = 0;
143 Size += sizeof(PDBStringTableHeader);
144 Size += Strings.calculateSerializedSize();
145 Size += calculateHashTableSize();
146 Size += sizeof(uint32_t); // The /names stream ends with the string count.
147 return Size;
148}
149
152 this->Strings = Strings;
153}
154
155Error PDBStringTableBuilder::writeHeader(BinaryStreamWriter &Writer) const {
156 // Write a header
158 H.Signature = PDBStringTableSignature;
159 H.HashVersion = 1;
160 H.ByteSize = Strings.calculateSerializedSize();
161 if (auto EC = Writer.writeObject(H))
162 return EC;
163 assert(Writer.bytesRemaining() == 0);
164 return Error::success();
165}
166
167Error PDBStringTableBuilder::writeStrings(BinaryStreamWriter &Writer) const {
168 if (auto EC = Strings.commit(Writer))
169 return EC;
170
171 assert(Writer.bytesRemaining() == 0);
172 return Error::success();
173}
174
175Error PDBStringTableBuilder::writeHashTable(BinaryStreamWriter &Writer) const {
176 // Write a hash table.
177 uint32_t BucketCount = computeBucketCount(Strings.size());
178 if (auto EC = Writer.writeInteger(BucketCount))
179 return EC;
180 std::vector<ulittle32_t> Buckets(BucketCount);
181
182 for (const auto &Pair : Strings) {
183 StringRef S = Pair.getKey();
184 uint32_t Offset = Pair.getValue();
185 uint32_t Hash = hashStringV1(S);
186
187 for (uint32_t I = 0; I != BucketCount; ++I) {
188 uint32_t Slot = (Hash + I) % BucketCount;
189 if (Buckets[Slot] != 0)
190 continue;
191 Buckets[Slot] = Offset;
192 break;
193 }
194 }
195
196 if (auto EC = Writer.writeArray(ArrayRef<ulittle32_t>(Buckets)))
197 return EC;
198
199 assert(Writer.bytesRemaining() == 0);
200 return Error::success();
201}
202
203Error PDBStringTableBuilder::writeEpilogue(BinaryStreamWriter &Writer) const {
204 if (auto EC = Writer.writeInteger<uint32_t>(Strings.size()))
205 return EC;
206 assert(Writer.bytesRemaining() == 0);
207 return Error::success();
208}
209
211 llvm::TimeTraceScope timeScope("Commit strings table");
212 BinaryStreamWriter SectionWriter;
213
214 std::tie(SectionWriter, Writer) = Writer.split(sizeof(PDBStringTableHeader));
215 if (auto EC = writeHeader(SectionWriter))
216 return EC;
217
218 std::tie(SectionWriter, Writer) =
219 Writer.split(Strings.calculateSerializedSize());
220 if (auto EC = writeStrings(SectionWriter))
221 return EC;
222
223 std::tie(SectionWriter, Writer) = Writer.split(calculateHashTableSize());
224 if (auto EC = writeHashTable(SectionWriter))
225 return EC;
226
227 std::tie(SectionWriter, Writer) = Writer.split(sizeof(uint32_t));
228 if (auto EC = writeEpilogue(SectionWriter))
229 return EC;
230
231 return Error::success();
232}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define I(x, y, z)
Definition MD5.cpp:58
#define H(x, y, z)
Definition MD5.cpp:57
static uint32_t computeBucketCount(uint32_t NumStrings)
Provides write only access to a subclass of WritableBinaryStream.
Error writeArray(ArrayRef< T > Array)
Writes an array of objects of type T to the underlying stream, as if by using memcpy.
Error writeInteger(T Value)
Write the integer Value to the underlying stream in the specified endianness.
Lightweight error class with error context and mandatory checking.
Definition Error.h:159
static ErrorSuccess success()
Create a success value.
Definition Error.h:336
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Represents a read-write view of a CodeView string table.
Error commit(BinaryStreamWriter &Writer) const override
LLVM_ABI uint32_t insert(StringRef S)
LLVM_ABI uint32_t calculateSerializedSize() const
LLVM_ABI void setStrings(const codeview::DebugStringTableSubsection &Strings)
LLVM_ABI Error commit(BinaryStreamWriter &Writer) const
LLVM_ABI StringRef getStringForId(uint32_t Id) const
LLVM_ABI uint32_t getIdForString(StringRef S) const
LLVM_ABI uint32_t hashStringV1(StringRef Str)
Definition Hash.cpp:20
const uint32_t PDBStringTableSignature
Definition RawTypes.h:318
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition STLExtras.h:1976
ArrayRef(const T &OneElt) -> ArrayRef< T >
Function object to check whether the first component of a container supported by std::get (like std::...
Definition STLExtras.h:1435
The header preceding the /names stream.
Definition RawTypes.h:312
LLVM_ABI uint32_t hashLookupKey(StringRef S) const
LLVM_ABI StringRef storageKeyToLookupKey(uint32_t Offset) const
LLVM_ABI StringTableHashTraits(PDBStringTableBuilder &Table)
LLVM_ABI uint32_t lookupKeyToStorageKey(StringRef S)
Adapter to write values to a stream in a particular byte order.