1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
| //===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file contains data definitions and a reader and builder for a symbol
// table for LLVM IR. Its purpose is to allow linkers and other consumers of
// bitcode files to efficiently read the symbol table for symbol resolution
// purposes without needing to construct a module in memory.
//
// As with most object files the symbol table has two parts: the symbol table
// itself and a string table which is referenced by the symbol table.
//
// A symbol table corresponds to a single bitcode file, which may consist of
// multiple modules, so symbol tables may likewise contain symbols for multiple
// modules.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_OBJECT_IRSYMTAB_H
#define LLVM_OBJECT_IRSYMTAB_H
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator_range.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/Object/SymbolicFile.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include <cassert>
#include <cstdint>
#include <vector>
namespace llvm {
struct BitcodeFileContents;
class StringTableBuilder;
namespace irsymtab {
namespace storage {
// The data structures in this namespace define the low-level serialization
// format. Clients that just want to read a symbol table should use the
// irsymtab::Reader class.
using Word = support::ulittle32_t;
/// A reference to a string in the string table.
struct Str {
Word Offset, Size;
StringRef get(StringRef Strtab) const {
return {Strtab.data() + Offset, Size};
}
};
/// A reference to a range of objects in the symbol table.
template <typename T> struct Range {
Word Offset, Size;
ArrayRef<T> get(StringRef Symtab) const {
return {reinterpret_cast<const T *>(Symtab.data() + Offset), Size};
}
};
/// Describes the range of a particular module's symbols within the symbol
/// table.
struct Module {
Word Begin, End;
/// The index of the first Uncommon for this Module.
Word UncBegin;
};
/// This is equivalent to an IR comdat.
struct Comdat {
Str Name;
};
/// Contains the information needed by linkers for symbol resolution, as well as
/// by the LTO implementation itself.
struct Symbol {
/// The mangled symbol name.
Str Name;
/// The unmangled symbol name, or the empty string if this is not an IR
/// symbol.
Str IRName;
/// The index into Header::Comdats, or -1 if not a comdat member.
Word ComdatIndex;
Word Flags;
enum FlagBits {
FB_visibility, // 2 bits
FB_has_uncommon = FB_visibility + 2,
FB_undefined,
FB_weak,
FB_common,
FB_indirect,
FB_used,
FB_tls,
FB_may_omit,
FB_global,
FB_format_specific,
FB_unnamed_addr,
FB_executable,
};
};
/// This data structure contains rarely used symbol fields and is optionally
/// referenced by a Symbol.
struct Uncommon {
Word CommonSize, CommonAlign;
/// COFF-specific: the name of the symbol that a weak external resolves to
/// if not defined.
Str COFFWeakExternFallbackName;
/// Specified section name, if any.
Str SectionName;
};
struct Header {
/// Version number of the symtab format. This number should be incremented
/// when the format changes, but it does not need to be incremented if a
/// change to LLVM would cause it to create a different symbol table.
Word Version;
enum { kCurrentVersion = 2 };
/// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION).
/// Consumers should rebuild the symbol table from IR if the producer's
/// version does not match the consumer's version due to potential differences
/// in symbol table format, symbol enumeration order and so on.
Str Producer;
Range<Module> Modules;
Range<Comdat> Comdats;
Range<Symbol> Symbols;
Range<Uncommon> Uncommons;
Str TargetTriple, SourceFileName;
/// COFF-specific: linker directives.
Str COFFLinkerOpts;
/// Dependent Library Specifiers
Range<Str> DependentLibraries;
};
} // end namespace storage
/// Fills in Symtab and StrtabBuilder with a valid symbol and string table for
/// Mods.
Error build(ArrayRef<Module *> Mods, SmallVector<char, 0> &Symtab,
StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc);
/// This represents a symbol that has been read from a storage::Symbol and
/// possibly a storage::Uncommon.
struct Symbol {
// Copied from storage::Symbol.
StringRef Name, IRName;
int ComdatIndex;
uint32_t Flags;
// Copied from storage::Uncommon.
uint32_t CommonSize, CommonAlign;
StringRef COFFWeakExternFallbackName;
StringRef SectionName;
/// Returns the mangled symbol name.
StringRef getName() const { return Name; }
/// Returns the unmangled symbol name, or the empty string if this is not an
/// IR symbol.
StringRef getIRName() const { return IRName; }
/// Returns the index into the comdat table (see Reader::getComdatTable()), or
/// -1 if not a comdat member.
int getComdatIndex() const { return ComdatIndex; }
using S = storage::Symbol;
GlobalValue::VisibilityTypes getVisibility() const {
return GlobalValue::VisibilityTypes((Flags >> S::FB_visibility) & 3);
}
bool isUndefined() const { return (Flags >> S::FB_undefined) & 1; }
bool isWeak() const { return (Flags >> S::FB_weak) & 1; }
bool isCommon() const { return (Flags >> S::FB_common) & 1; }
bool isIndirect() const { return (Flags >> S::FB_indirect) & 1; }
bool isUsed() const { return (Flags >> S::FB_used) & 1; }
bool isTLS() const { return (Flags >> S::FB_tls) & 1; }
bool canBeOmittedFromSymbolTable() const {
return (Flags >> S::FB_may_omit) & 1;
}
bool isGlobal() const { return (Flags >> S::FB_global) & 1; }
bool isFormatSpecific() const { return (Flags >> S::FB_format_specific) & 1; }
bool isUnnamedAddr() const { return (Flags >> S::FB_unnamed_addr) & 1; }
bool isExecutable() const { return (Flags >> S::FB_executable) & 1; }
uint64_t getCommonSize() const {
assert(isCommon());
return CommonSize;
}
uint32_t getCommonAlignment() const {
assert(isCommon());
return CommonAlign;
}
/// COFF-specific: for weak externals, returns the name of the symbol that is
/// used as a fallback if the weak external remains undefined.
StringRef getCOFFWeakExternalFallback() const {
assert(isWeak() && isIndirect());
return COFFWeakExternFallbackName;
}
StringRef getSectionName() const { return SectionName; }
};
/// This class can be used to read a Symtab and Strtab produced by
/// irsymtab::build.
class Reader {
StringRef Symtab, Strtab;
ArrayRef<storage::Module> Modules;
ArrayRef<storage::Comdat> Comdats;
ArrayRef<storage::Symbol> Symbols;
ArrayRef<storage::Uncommon> Uncommons;
ArrayRef<storage::Str> DependentLibraries;
StringRef str(storage::Str S) const { return S.get(Strtab); }
template <typename T> ArrayRef<T> range(storage::Range<T> R) const {
return R.get(Symtab);
}
const storage::Header &header() const {
return *reinterpret_cast<const storage::Header *>(Symtab.data());
}
public:
class SymbolRef;
Reader() = default;
Reader(StringRef Symtab, StringRef Strtab) : Symtab(Symtab), Strtab(Strtab) {
Modules = range(header().Modules);
Comdats = range(header().Comdats);
Symbols = range(header().Symbols);
Uncommons = range(header().Uncommons);
DependentLibraries = range(header().DependentLibraries);
}
using symbol_range = iterator_range<object::content_iterator<SymbolRef>>;
/// Returns the symbol table for the entire bitcode file.
/// The symbols enumerated by this method are ephemeral, but they can be
/// copied into an irsymtab::Symbol object.
symbol_range symbols() const;
size_t getNumModules() const { return Modules.size(); }
/// Returns a slice of the symbol table for the I'th module in the file.
/// The symbols enumerated by this method are ephemeral, but they can be
/// copied into an irsymtab::Symbol object.
symbol_range module_symbols(unsigned I) const;
StringRef getTargetTriple() const { return str(header().TargetTriple); }
/// Returns the source file path specified at compile time.
StringRef getSourceFileName() const { return str(header().SourceFileName); }
/// Returns a table with all the comdats used by this file.
std::vector<StringRef> getComdatTable() const {
std::vector<StringRef> ComdatTable;
ComdatTable.reserve(Comdats.size());
for (auto C : Comdats)
ComdatTable.push_back(str(C.Name));
return ComdatTable;
}
/// COFF-specific: returns linker options specified in the input file.
StringRef getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts); }
/// Returns dependent library specifiers
std::vector<StringRef> getDependentLibraries() const {
std::vector<StringRef> Specifiers;
Specifiers.reserve(DependentLibraries.size());
for (auto S : DependentLibraries) {
Specifiers.push_back(str(S));
}
return Specifiers;
}
};
/// Ephemeral symbols produced by Reader::symbols() and
/// Reader::module_symbols().
class Reader::SymbolRef : public Symbol {
const storage::Symbol *SymI, *SymE;
const storage::Uncommon *UncI;
const Reader *R;
void read() {
if (SymI == SymE)
return;
Name = R->str(SymI->Name);
IRName = R->str(SymI->IRName);
ComdatIndex = SymI->ComdatIndex;
Flags = SymI->Flags;
if (Flags & (1 << storage::Symbol::FB_has_uncommon)) {
CommonSize = UncI->CommonSize;
CommonAlign = UncI->CommonAlign;
COFFWeakExternFallbackName = R->str(UncI->COFFWeakExternFallbackName);
SectionName = R->str(UncI->SectionName);
} else
// Reset this field so it can be queried unconditionally for all symbols.
SectionName = "";
}
public:
SymbolRef(const storage::Symbol *SymI, const storage::Symbol *SymE,
const storage::Uncommon *UncI, const Reader *R)
: SymI(SymI), SymE(SymE), UncI(UncI), R(R) {
read();
}
void moveNext() {
++SymI;
if (Flags & (1 << storage::Symbol::FB_has_uncommon))
++UncI;
read();
}
bool operator==(const SymbolRef &Other) const { return SymI == Other.SymI; }
};
inline Reader::symbol_range Reader::symbols() const {
return {SymbolRef(Symbols.begin(), Symbols.end(), Uncommons.begin(), this),
SymbolRef(Symbols.end(), Symbols.end(), nullptr, this)};
}
inline Reader::symbol_range Reader::module_symbols(unsigned I) const {
const storage::Module &M = Modules[I];
const storage::Symbol *MBegin = Symbols.begin() + M.Begin,
*MEnd = Symbols.begin() + M.End;
return {SymbolRef(MBegin, MEnd, Uncommons.begin() + M.UncBegin, this),
SymbolRef(MEnd, MEnd, nullptr, this)};
}
/// The contents of the irsymtab in a bitcode file. Any underlying data for the
/// irsymtab are owned by Symtab and Strtab.
struct FileContents {
SmallVector<char, 0> Symtab, Strtab;
Reader TheReader;
};
/// Reads the contents of a bitcode file, creating its irsymtab if necessary.
Expected<FileContents> readBitcode(const BitcodeFileContents &BFC);
} // end namespace irsymtab
} // end namespace llvm
#endif // LLVM_OBJECT_IRSYMTAB_H
|