1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
| //===--- AArch64Subtarget.h - Define Subtarget for the AArch64 -*- C++ -*--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file declares the AArch64 specific subclass of TargetSubtarget.
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
#define LLVM_LIB_TARGET_AARCH64_AARCH64SUBTARGET_H
#include "AArch64FrameLowering.h"
#include "AArch64ISelLowering.h"
#include "AArch64InstrInfo.h"
#include "AArch64RegisterInfo.h"
#include "AArch64SelectionDAGInfo.h"
#include "llvm/CodeGen/GlobalISel/CallLowering.h"
#include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/IR/DataLayout.h"
#include <string>
#define GET_SUBTARGETINFO_HEADER
#include "AArch64GenSubtargetInfo.inc"
namespace llvm {
class GlobalValue;
class StringRef;
class Triple;
class AArch64Subtarget final : public AArch64GenSubtargetInfo {
public:
enum ARMProcFamilyEnum : uint8_t {
Others,
CortexA35,
CortexA53,
CortexA55,
CortexA57,
CortexA65,
CortexA72,
CortexA73,
CortexA75,
CortexA76,
Cyclone,
ExynosM1,
ExynosM3,
Falkor,
Kryo,
NeoverseE1,
NeoverseN1,
Saphira,
ThunderX2T99,
ThunderX,
ThunderXT81,
ThunderXT83,
ThunderXT88,
TSV110
};
protected:
/// ARMProcFamily - ARM processor family: Cortex-A53, Cortex-A57, and others.
ARMProcFamilyEnum ARMProcFamily = Others;
bool HasV8_1aOps = false;
bool HasV8_2aOps = false;
bool HasV8_3aOps = false;
bool HasV8_4aOps = false;
bool HasV8_5aOps = false;
bool HasFPARMv8 = false;
bool HasNEON = false;
bool HasCrypto = false;
bool HasDotProd = false;
bool HasCRC = false;
bool HasLSE = false;
bool HasRAS = false;
bool HasRDM = false;
bool HasPerfMon = false;
bool HasFullFP16 = false;
bool HasFP16FML = false;
bool HasSPE = false;
// ARMv8.1 extensions
bool HasVH = false;
bool HasPAN = false;
bool HasLOR = false;
// ARMv8.2 extensions
bool HasPsUAO = false;
bool HasPAN_RWV = false;
bool HasCCPP = false;
// Armv8.2 Crypto extensions
bool HasSM4 = false;
bool HasSHA3 = false;
bool HasSHA2 = false;
bool HasAES = false;
// ARMv8.3 extensions
bool HasPA = false;
bool HasJS = false;
bool HasCCIDX = false;
bool HasComplxNum = false;
// ARMv8.4 extensions
bool HasNV = false;
bool HasRASv8_4 = false;
bool HasMPAM = false;
bool HasDIT = false;
bool HasTRACEV8_4 = false;
bool HasAM = false;
bool HasSEL2 = false;
bool HasPMU = false;
bool HasTLB_RMI = false;
bool HasFMI = false;
bool HasRCPC_IMMO = false;
bool HasLSLFast = false;
bool HasSVE = false;
bool HasSVE2 = false;
bool HasRCPC = false;
bool HasAggressiveFMA = false;
// Armv8.5-A Extensions
bool HasAlternativeNZCV = false;
bool HasFRInt3264 = false;
bool HasSpecRestrict = false;
bool HasSSBS = false;
bool HasSB = false;
bool HasPredRes = false;
bool HasCCDP = false;
bool HasBTI = false;
bool HasRandGen = false;
bool HasMTE = false;
bool HasTME = false;
// Arm SVE2 extensions
bool HasSVE2AES = false;
bool HasSVE2SM4 = false;
bool HasSVE2SHA3 = false;
bool HasSVE2BitPerm = false;
// Future architecture extensions.
bool HasETE = false;
bool HasTRBE = false;
// HasZeroCycleRegMove - Has zero-cycle register mov instructions.
bool HasZeroCycleRegMove = false;
// HasZeroCycleZeroing - Has zero-cycle zeroing instructions.
bool HasZeroCycleZeroing = false;
bool HasZeroCycleZeroingGP = false;
bool HasZeroCycleZeroingFP = false;
bool HasZeroCycleZeroingFPWorkaround = false;
// StrictAlign - Disallow unaligned memory accesses.
bool StrictAlign = false;
// NegativeImmediates - transform instructions with negative immediates
bool NegativeImmediates = true;
// Enable 64-bit vectorization in SLP.
unsigned MinVectorRegisterBitWidth = 64;
bool UseAA = false;
bool PredictableSelectIsExpensive = false;
bool BalanceFPOps = false;
bool CustomAsCheapAsMove = false;
bool ExynosAsCheapAsMove = false;
bool UsePostRAScheduler = false;
bool Misaligned128StoreIsSlow = false;
bool Paired128IsSlow = false;
bool STRQroIsSlow = false;
bool UseAlternateSExtLoadCVTF32Pattern = false;
bool HasArithmeticBccFusion = false;
bool HasArithmeticCbzFusion = false;
bool HasFuseAddress = false;
bool HasFuseAES = false;
bool HasFuseArithmeticLogic = false;
bool HasFuseCCSelect = false;
bool HasFuseCryptoEOR = false;
bool HasFuseLiterals = false;
bool DisableLatencySchedHeuristic = false;
bool UseRSqrt = false;
bool Force32BitJumpTables = false;
bool UseEL1ForTP = false;
bool UseEL2ForTP = false;
bool UseEL3ForTP = false;
bool AllowTaggedGlobals = false;
uint8_t MaxInterleaveFactor = 2;
uint8_t VectorInsertExtractBaseCost = 3;
uint16_t CacheLineSize = 0;
uint16_t PrefetchDistance = 0;
uint16_t MinPrefetchStride = 1;
unsigned MaxPrefetchIterationsAhead = UINT_MAX;
unsigned PrefFunctionLogAlignment = 0;
unsigned PrefLoopLogAlignment = 0;
unsigned MaxJumpTableSize = 0;
unsigned WideningBaseCost = 0;
// ReserveXRegister[i] - X#i is not available as a general purpose register.
BitVector ReserveXRegister;
// CustomCallUsedXRegister[i] - X#i call saved.
BitVector CustomCallSavedXRegs;
bool IsLittle;
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
AArch64FrameLowering FrameLowering;
AArch64InstrInfo InstrInfo;
AArch64SelectionDAGInfo TSInfo;
AArch64TargetLowering TLInfo;
/// GlobalISel related APIs.
std::unique_ptr<CallLowering> CallLoweringInfo;
std::unique_ptr<InstructionSelector> InstSelector;
std::unique_ptr<LegalizerInfo> Legalizer;
std::unique_ptr<RegisterBankInfo> RegBankInfo;
private:
/// initializeSubtargetDependencies - Initializes using CPUString and the
/// passed in feature string so that we can use initializer lists for
/// subtarget initialization.
AArch64Subtarget &initializeSubtargetDependencies(StringRef FS,
StringRef CPUString);
/// Initialize properties based on the selected processor family.
void initializeProperties();
public:
/// This constructor initializes the data members to match that
/// of the specified triple.
AArch64Subtarget(const Triple &TT, const std::string &CPU,
const std::string &FS, const TargetMachine &TM,
bool LittleEndian);
const AArch64SelectionDAGInfo *getSelectionDAGInfo() const override {
return &TSInfo;
}
const AArch64FrameLowering *getFrameLowering() const override {
return &FrameLowering;
}
const AArch64TargetLowering *getTargetLowering() const override {
return &TLInfo;
}
const AArch64InstrInfo *getInstrInfo() const override { return &InstrInfo; }
const AArch64RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
const CallLowering *getCallLowering() const override;
InstructionSelector *getInstructionSelector() const override;
const LegalizerInfo *getLegalizerInfo() const override;
const RegisterBankInfo *getRegBankInfo() const override;
const Triple &getTargetTriple() const { return TargetTriple; }
bool enableMachineScheduler() const override { return true; }
bool enablePostRAScheduler() const override {
return UsePostRAScheduler;
}
/// Returns ARM processor family.
/// Avoid this function! CPU specifics should be kept local to this class
/// and preferably modeled with SubtargetFeatures or properties in
/// initializeProperties().
ARMProcFamilyEnum getProcFamily() const {
return ARMProcFamily;
}
bool hasV8_1aOps() const { return HasV8_1aOps; }
bool hasV8_2aOps() const { return HasV8_2aOps; }
bool hasV8_3aOps() const { return HasV8_3aOps; }
bool hasV8_4aOps() const { return HasV8_4aOps; }
bool hasV8_5aOps() const { return HasV8_5aOps; }
bool hasZeroCycleRegMove() const { return HasZeroCycleRegMove; }
bool hasZeroCycleZeroingGP() const { return HasZeroCycleZeroingGP; }
bool hasZeroCycleZeroingFP() const { return HasZeroCycleZeroingFP; }
bool hasZeroCycleZeroingFPWorkaround() const {
return HasZeroCycleZeroingFPWorkaround;
}
bool requiresStrictAlign() const { return StrictAlign; }
bool isXRaySupported() const override { return true; }
unsigned getMinVectorRegisterBitWidth() const {
return MinVectorRegisterBitWidth;
}
bool isXRegisterReserved(size_t i) const { return ReserveXRegister[i]; }
unsigned getNumXRegisterReserved() const { return ReserveXRegister.count(); }
bool isXRegCustomCalleeSaved(size_t i) const {
return CustomCallSavedXRegs[i];
}
bool hasCustomCallingConv() const { return CustomCallSavedXRegs.any(); }
bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
bool hasCrypto() const { return HasCrypto; }
bool hasDotProd() const { return HasDotProd; }
bool hasCRC() const { return HasCRC; }
bool hasLSE() const { return HasLSE; }
bool hasRAS() const { return HasRAS; }
bool hasRDM() const { return HasRDM; }
bool hasSM4() const { return HasSM4; }
bool hasSHA3() const { return HasSHA3; }
bool hasSHA2() const { return HasSHA2; }
bool hasAES() const { return HasAES; }
bool balanceFPOps() const { return BalanceFPOps; }
bool predictableSelectIsExpensive() const {
return PredictableSelectIsExpensive;
}
bool hasCustomCheapAsMoveHandling() const { return CustomAsCheapAsMove; }
bool hasExynosCheapAsMoveHandling() const { return ExynosAsCheapAsMove; }
bool isMisaligned128StoreSlow() const { return Misaligned128StoreIsSlow; }
bool isPaired128Slow() const { return Paired128IsSlow; }
bool isSTRQroSlow() const { return STRQroIsSlow; }
bool useAlternateSExtLoadCVTF32Pattern() const {
return UseAlternateSExtLoadCVTF32Pattern;
}
bool hasArithmeticBccFusion() const { return HasArithmeticBccFusion; }
bool hasArithmeticCbzFusion() const { return HasArithmeticCbzFusion; }
bool hasFuseAddress() const { return HasFuseAddress; }
bool hasFuseAES() const { return HasFuseAES; }
bool hasFuseArithmeticLogic() const { return HasFuseArithmeticLogic; }
bool hasFuseCCSelect() const { return HasFuseCCSelect; }
bool hasFuseCryptoEOR() const { return HasFuseCryptoEOR; }
bool hasFuseLiterals() const { return HasFuseLiterals; }
/// Return true if the CPU supports any kind of instruction fusion.
bool hasFusion() const {
return hasArithmeticBccFusion() || hasArithmeticCbzFusion() ||
hasFuseAES() || hasFuseArithmeticLogic() ||
hasFuseCCSelect() || hasFuseLiterals();
}
bool useEL1ForTP() const { return UseEL1ForTP; }
bool useEL2ForTP() const { return UseEL2ForTP; }
bool useEL3ForTP() const { return UseEL3ForTP; }
bool useRSqrt() const { return UseRSqrt; }
bool force32BitJumpTables() const { return Force32BitJumpTables; }
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
unsigned getVectorInsertExtractBaseCost() const {
return VectorInsertExtractBaseCost;
}
unsigned getCacheLineSize() const override { return CacheLineSize; }
unsigned getPrefetchDistance() const override { return PrefetchDistance; }
unsigned getMinPrefetchStride() const override { return MinPrefetchStride; }
unsigned getMaxPrefetchIterationsAhead() const override {
return MaxPrefetchIterationsAhead;
}
unsigned getPrefFunctionLogAlignment() const {
return PrefFunctionLogAlignment;
}
unsigned getPrefLoopLogAlignment() const { return PrefLoopLogAlignment; }
unsigned getMaximumJumpTableSize() const { return MaxJumpTableSize; }
unsigned getWideningBaseCost() const { return WideningBaseCost; }
/// CPU has TBI (top byte of addresses is ignored during HW address
/// translation) and OS enables it.
bool supportsAddressTopByteIgnored() const;
bool hasPerfMon() const { return HasPerfMon; }
bool hasFullFP16() const { return HasFullFP16; }
bool hasFP16FML() const { return HasFP16FML; }
bool hasSPE() const { return HasSPE; }
bool hasLSLFast() const { return HasLSLFast; }
bool hasSVE() const { return HasSVE; }
bool hasSVE2() const { return HasSVE2; }
bool hasRCPC() const { return HasRCPC; }
bool hasAggressiveFMA() const { return HasAggressiveFMA; }
bool hasAlternativeNZCV() const { return HasAlternativeNZCV; }
bool hasFRInt3264() const { return HasFRInt3264; }
bool hasSpecRestrict() const { return HasSpecRestrict; }
bool hasSSBS() const { return HasSSBS; }
bool hasSB() const { return HasSB; }
bool hasPredRes() const { return HasPredRes; }
bool hasCCDP() const { return HasCCDP; }
bool hasBTI() const { return HasBTI; }
bool hasRandGen() const { return HasRandGen; }
bool hasMTE() const { return HasMTE; }
bool hasTME() const { return HasTME; }
// Arm SVE2 extensions
bool hasSVE2AES() const { return HasSVE2AES; }
bool hasSVE2SM4() const { return HasSVE2SM4; }
bool hasSVE2SHA3() const { return HasSVE2SHA3; }
bool hasSVE2BitPerm() const { return HasSVE2BitPerm; }
bool isLittleEndian() const { return IsLittle; }
bool isTargetDarwin() const { return TargetTriple.isOSDarwin(); }
bool isTargetIOS() const { return TargetTriple.isiOS(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetWindows() const { return TargetTriple.isOSWindows(); }
bool isTargetAndroid() const { return TargetTriple.isAndroid(); }
bool isTargetFuchsia() const { return TargetTriple.isOSFuchsia(); }
bool isTargetCOFF() const { return TargetTriple.isOSBinFormatCOFF(); }
bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetMachO() const { return TargetTriple.isOSBinFormatMachO(); }
bool isTargetILP32() const { return TargetTriple.isArch32Bit(); }
bool useAA() const override { return UseAA; }
bool hasVH() const { return HasVH; }
bool hasPAN() const { return HasPAN; }
bool hasLOR() const { return HasLOR; }
bool hasPsUAO() const { return HasPsUAO; }
bool hasPAN_RWV() const { return HasPAN_RWV; }
bool hasCCPP() const { return HasCCPP; }
bool hasPA() const { return HasPA; }
bool hasJS() const { return HasJS; }
bool hasCCIDX() const { return HasCCIDX; }
bool hasComplxNum() const { return HasComplxNum; }
bool hasNV() const { return HasNV; }
bool hasRASv8_4() const { return HasRASv8_4; }
bool hasMPAM() const { return HasMPAM; }
bool hasDIT() const { return HasDIT; }
bool hasTRACEV8_4() const { return HasTRACEV8_4; }
bool hasAM() const { return HasAM; }
bool hasSEL2() const { return HasSEL2; }
bool hasPMU() const { return HasPMU; }
bool hasTLB_RMI() const { return HasTLB_RMI; }
bool hasFMI() const { return HasFMI; }
bool hasRCPC_IMMO() const { return HasRCPC_IMMO; }
bool addrSinkUsingGEPs() const override {
// Keeping GEPs inbounds is important for exploiting AArch64
// addressing-modes in ILP32 mode.
return useAA() || isTargetILP32();
}
bool useSmallAddressing() const {
switch (TLInfo.getTargetMachine().getCodeModel()) {
case CodeModel::Kernel:
// Kernel is currently allowed only for Fuchsia targets,
// where it is the same as Small for almost all purposes.
case CodeModel::Small:
return true;
default:
return false;
}
}
/// ParseSubtargetFeatures - Parses features string setting specified
/// subtarget options. Definition of function is auto generated by tblgen.
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
/// ClassifyGlobalReference - Find the target operand flags that describe
/// how a global value should be referenced for the current subtarget.
unsigned ClassifyGlobalReference(const GlobalValue *GV,
const TargetMachine &TM) const;
unsigned classifyGlobalFunctionReference(const GlobalValue *GV,
const TargetMachine &TM) const;
void overrideSchedPolicy(MachineSchedPolicy &Policy,
unsigned NumRegionInstrs) const override;
bool enableEarlyIfConversion() const override;
std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
bool isCallingConvWin64(CallingConv::ID CC) const {
switch (CC) {
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Swift:
return isTargetWindows();
case CallingConv::Win64:
return true;
default:
return false;
}
}
void mirFileLoaded(MachineFunction &MF) const override;
};
} // End llvm namespace
#endif
|