1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
| //==- llvm/CodeGen/GlobalISel/RegBankSelect.cpp - RegBankSelect --*- C++ -*-==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
/// \file
/// This file implements the RegBankSelect class.
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
#include "llvm/ADT/PostOrderIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
#include "llvm/CodeGen/GlobalISel/RegisterBank.h"
#include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
#include "llvm/CodeGen/GlobalISel/Utils.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineOperand.h"
#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetOpcodes.h"
#include "llvm/CodeGen/TargetPassConfig.h"
#include "llvm/CodeGen/TargetRegisterInfo.h"
#include "llvm/CodeGen/TargetSubtargetInfo.h"
#include "llvm/Config/llvm-config.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/Function.h"
#include "llvm/Pass.h"
#include "llvm/Support/BlockFrequency.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <limits>
#include <memory>
#include <utility>
#define DEBUG_TYPE "regbankselect"
using namespace llvm;
static cl::opt<RegBankSelect::Mode> RegBankSelectMode(
cl::desc("Mode of the RegBankSelect pass"), cl::Hidden, cl::Optional,
cl::values(clEnumValN(RegBankSelect::Mode::Fast, "regbankselect-fast",
"Run the Fast mode (default mapping)"),
clEnumValN(RegBankSelect::Mode::Greedy, "regbankselect-greedy",
"Use the Greedy mode (best local mapping)")));
char RegBankSelect::ID = 0;
INITIALIZE_PASS_BEGIN(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers",
false, false);
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfo)
INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE,
"Assign register bank of generic virtual registers", false,
false)
RegBankSelect::RegBankSelect(Mode RunningMode)
: MachineFunctionPass(ID), OptMode(RunningMode) {
if (RegBankSelectMode.getNumOccurrences() != 0) {
OptMode = RegBankSelectMode;
if (RegBankSelectMode != RunningMode)
LLVM_DEBUG(dbgs() << "RegBankSelect mode overrided by command line\n");
}
}
void RegBankSelect::init(MachineFunction &MF) {
RBI = MF.getSubtarget().getRegBankInfo();
assert(RBI && "Cannot work without RegisterBankInfo");
MRI = &MF.getRegInfo();
TRI = MF.getSubtarget().getRegisterInfo();
TPC = &getAnalysis<TargetPassConfig>();
if (OptMode != Mode::Fast) {
MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
} else {
MBFI = nullptr;
MBPI = nullptr;
}
MIRBuilder.setMF(MF);
MORE = std::make_unique<MachineOptimizationRemarkEmitter>(MF, MBFI);
}
void RegBankSelect::getAnalysisUsage(AnalysisUsage &AU) const {
if (OptMode != Mode::Fast) {
// We could preserve the information from these two analysis but
// the APIs do not allow to do so yet.
AU.addRequired<MachineBlockFrequencyInfo>();
AU.addRequired<MachineBranchProbabilityInfo>();
}
AU.addRequired<TargetPassConfig>();
getSelectionDAGFallbackAnalysisUsage(AU);
MachineFunctionPass::getAnalysisUsage(AU);
}
bool RegBankSelect::assignmentMatch(
Register Reg, const RegisterBankInfo::ValueMapping &ValMapping,
bool &OnlyAssign) const {
// By default we assume we will have to repair something.
OnlyAssign = false;
// Each part of a break down needs to end up in a different register.
// In other word, Reg assignment does not match.
if (ValMapping.NumBreakDowns != 1)
return false;
const RegisterBank *CurRegBank = RBI->getRegBank(Reg, *MRI, *TRI);
const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
// Reg is free of assignment, a simple assignment will make the
// register bank to match.
OnlyAssign = CurRegBank == nullptr;
LLVM_DEBUG(dbgs() << "Does assignment already match: ";
if (CurRegBank) dbgs() << *CurRegBank; else dbgs() << "none";
dbgs() << " against ";
assert(DesiredRegBrank && "The mapping must be valid");
dbgs() << *DesiredRegBrank << '\n';);
return CurRegBank == DesiredRegBrank;
}
bool RegBankSelect::repairReg(
MachineOperand &MO, const RegisterBankInfo::ValueMapping &ValMapping,
RegBankSelect::RepairingPlacement &RepairPt,
const iterator_range<SmallVectorImpl<Register>::const_iterator> &NewVRegs) {
assert(ValMapping.NumBreakDowns == (unsigned)size(NewVRegs) &&
"need new vreg for each breakdown");
// An empty range of new register means no repairing.
assert(!NewVRegs.empty() && "We should not have to repair");
MachineInstr *MI;
if (ValMapping.NumBreakDowns == 1) {
// Assume we are repairing a use and thus, the original reg will be
// the source of the repairing.
Register Src = MO.getReg();
Register Dst = *NewVRegs.begin();
// If we repair a definition, swap the source and destination for
// the repairing.
if (MO.isDef())
std::swap(Src, Dst);
assert((RepairPt.getNumInsertPoints() == 1 ||
Register::isPhysicalRegister(Dst)) &&
"We are about to create several defs for Dst");
// Build the instruction used to repair, then clone it at the right
// places. Avoiding buildCopy bypasses the check that Src and Dst have the
// same types because the type is a placeholder when this function is called.
MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY)
.addDef(Dst)
.addUse(Src);
LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst)
<< '\n');
} else {
// TODO: Support with G_IMPLICIT_DEF + G_INSERT sequence or G_EXTRACT
// sequence.
assert(ValMapping.partsAllUniform() && "irregular breakdowns not supported");
LLT RegTy = MRI->getType(MO.getReg());
if (MO.isDef()) {
unsigned MergeOp;
if (RegTy.isVector()) {
if (ValMapping.NumBreakDowns == RegTy.getNumElements())
MergeOp = TargetOpcode::G_BUILD_VECTOR;
else {
assert(
(ValMapping.BreakDown[0].Length * ValMapping.NumBreakDowns ==
RegTy.getSizeInBits()) &&
(ValMapping.BreakDown[0].Length % RegTy.getScalarSizeInBits() ==
0) &&
"don't understand this value breakdown");
MergeOp = TargetOpcode::G_CONCAT_VECTORS;
}
} else
MergeOp = TargetOpcode::G_MERGE_VALUES;
auto MergeBuilder =
MIRBuilder.buildInstrNoInsert(MergeOp)
.addDef(MO.getReg());
for (Register SrcReg : NewVRegs)
MergeBuilder.addUse(SrcReg);
MI = MergeBuilder;
} else {
MachineInstrBuilder UnMergeBuilder =
MIRBuilder.buildInstrNoInsert(TargetOpcode::G_UNMERGE_VALUES);
for (Register DefReg : NewVRegs)
UnMergeBuilder.addDef(DefReg);
UnMergeBuilder.addUse(MO.getReg());
MI = UnMergeBuilder;
}
}
if (RepairPt.getNumInsertPoints() != 1)
report_fatal_error("need testcase to support multiple insertion points");
// TODO:
// Check if MI is legal. if not, we need to legalize all the
// instructions we are going to insert.
std::unique_ptr<MachineInstr *[]> NewInstrs(
new MachineInstr *[RepairPt.getNumInsertPoints()]);
bool IsFirst = true;
unsigned Idx = 0;
for (const std::unique_ptr<InsertPoint> &InsertPt : RepairPt) {
MachineInstr *CurMI;
if (IsFirst)
CurMI = MI;
else
CurMI = MIRBuilder.getMF().CloneMachineInstr(MI);
InsertPt->insert(*CurMI);
NewInstrs[Idx++] = CurMI;
IsFirst = false;
}
// TODO:
// Legalize NewInstrs if need be.
return true;
}
uint64_t RegBankSelect::getRepairCost(
const MachineOperand &MO,
const RegisterBankInfo::ValueMapping &ValMapping) const {
assert(MO.isReg() && "We should only repair register operand");
assert(ValMapping.NumBreakDowns && "Nothing to map??");
bool IsSameNumOfValues = ValMapping.NumBreakDowns == 1;
const RegisterBank *CurRegBank = RBI->getRegBank(MO.getReg(), *MRI, *TRI);
// If MO does not have a register bank, we should have just been
// able to set one unless we have to break the value down.
assert(CurRegBank || MO.isDef());
// Def: Val <- NewDefs
// Same number of values: copy
// Different number: Val = build_sequence Defs1, Defs2, ...
// Use: NewSources <- Val.
// Same number of values: copy.
// Different number: Src1, Src2, ... =
// extract_value Val, Src1Begin, Src1Len, Src2Begin, Src2Len, ...
// We should remember that this value is available somewhere else to
// coalesce the value.
if (ValMapping.NumBreakDowns != 1)
return RBI->getBreakDownCost(ValMapping, CurRegBank);
if (IsSameNumOfValues) {
const RegisterBank *DesiredRegBrank = ValMapping.BreakDown[0].RegBank;
// If we repair a definition, swap the source and destination for
// the repairing.
if (MO.isDef())
std::swap(CurRegBank, DesiredRegBrank);
// TODO: It may be possible to actually avoid the copy.
// If we repair something where the source is defined by a copy
// and the source of that copy is on the right bank, we can reuse
// it for free.
// E.g.,
// RegToRepair<BankA> = copy AlternativeSrc<BankB>
// = op RegToRepair<BankA>
// We can simply propagate AlternativeSrc instead of copying RegToRepair
// into a new virtual register.
// We would also need to propagate this information in the
// repairing placement.
unsigned Cost = RBI->copyCost(*DesiredRegBrank, *CurRegBank,
RBI->getSizeInBits(MO.getReg(), *MRI, *TRI));
// TODO: use a dedicated constant for ImpossibleCost.
if (Cost != std::numeric_limits<unsigned>::max())
return Cost;
// Return the legalization cost of that repairing.
}
return std::numeric_limits<unsigned>::max();
}
const RegisterBankInfo::InstructionMapping &RegBankSelect::findBestMapping(
MachineInstr &MI, RegisterBankInfo::InstructionMappings &PossibleMappings,
SmallVectorImpl<RepairingPlacement> &RepairPts) {
assert(!PossibleMappings.empty() &&
"Do not know how to map this instruction");
const RegisterBankInfo::InstructionMapping *BestMapping = nullptr;
MappingCost Cost = MappingCost::ImpossibleCost();
SmallVector<RepairingPlacement, 4> LocalRepairPts;
for (const RegisterBankInfo::InstructionMapping *CurMapping :
PossibleMappings) {
MappingCost CurCost =
computeMapping(MI, *CurMapping, LocalRepairPts, &Cost);
if (CurCost < Cost) {
LLVM_DEBUG(dbgs() << "New best: " << CurCost << '\n');
Cost = CurCost;
BestMapping = CurMapping;
RepairPts.clear();
for (RepairingPlacement &RepairPt : LocalRepairPts)
RepairPts.emplace_back(std::move(RepairPt));
}
}
if (!BestMapping && !TPC->isGlobalISelAbortEnabled()) {
// If none of the mapping worked that means they are all impossible.
// Thus, pick the first one and set an impossible repairing point.
// It will trigger the failed isel mode.
BestMapping = *PossibleMappings.begin();
RepairPts.emplace_back(
RepairingPlacement(MI, 0, *TRI, *this, RepairingPlacement::Impossible));
} else
assert(BestMapping && "No suitable mapping for instruction");
return *BestMapping;
}
void RegBankSelect::tryAvoidingSplit(
RegBankSelect::RepairingPlacement &RepairPt, const MachineOperand &MO,
const RegisterBankInfo::ValueMapping &ValMapping) const {
const MachineInstr &MI = *MO.getParent();
assert(RepairPt.hasSplit() && "We should not have to adjust for split");
// Splitting should only occur for PHIs or between terminators,
// because we only do local repairing.
assert((MI.isPHI() || MI.isTerminator()) && "Why do we split?");
assert(&MI.getOperand(RepairPt.getOpIdx()) == &MO &&
"Repairing placement does not match operand");
// If we need splitting for phis, that means it is because we
// could not find an insertion point before the terminators of
// the predecessor block for this argument. In other words,
// the input value is defined by one of the terminators.
assert((!MI.isPHI() || !MO.isDef()) && "Need split for phi def?");
// We split to repair the use of a phi or a terminator.
if (!MO.isDef()) {
if (MI.isTerminator()) {
assert(&MI != &(*MI.getParent()->getFirstTerminator()) &&
"Need to split for the first terminator?!");
} else {
// For the PHI case, the split may not be actually required.
// In the copy case, a phi is already a copy on the incoming edge,
// therefore there is no need to split.
if (ValMapping.NumBreakDowns == 1)
// This is a already a copy, there is nothing to do.
RepairPt.switchTo(RepairingPlacement::RepairingKind::Reassign);
}
return;
}
// At this point, we need to repair a defintion of a terminator.
// Technically we need to fix the def of MI on all outgoing
// edges of MI to keep the repairing local. In other words, we
// will create several definitions of the same register. This
// does not work for SSA unless that definition is a physical
// register.
// However, there are other cases where we can get away with
// that while still keeping the repairing local.
assert(MI.isTerminator() && MO.isDef() &&
"This code is for the def of a terminator");
// Since we use RPO traversal, if we need to repair a definition
// this means this definition could be:
// 1. Used by PHIs (i.e., this VReg has been visited as part of the
// uses of a phi.), or
// 2. Part of a target specific instruction (i.e., the target applied
// some register class constraints when creating the instruction.)
// If the constraints come for #2, the target said that another mapping
// is supported so we may just drop them. Indeed, if we do not change
// the number of registers holding that value, the uses will get fixed
// when we get to them.
// Uses in PHIs may have already been proceeded though.
// If the constraints come for #1, then, those are weak constraints and
// no actual uses may rely on them. However, the problem remains mainly
// the same as for #2. If the value stays in one register, we could
// just switch the register bank of the definition, but we would need to
// account for a repairing cost for each phi we silently change.
//
// In any case, if the value needs to be broken down into several
// registers, the repairing is not local anymore as we need to patch
// every uses to rebuild the value in just one register.
//
// To summarize:
// - If the value is in a physical register, we can do the split and
// fix locally.
// Otherwise if the value is in a virtual register:
// - If the value remains in one register, we do not have to split
// just switching the register bank would do, but we need to account
// in the repairing cost all the phi we changed.
// - If the value spans several registers, then we cannot do a local
// repairing.
// Check if this is a physical or virtual register.
Register Reg = MO.getReg();
if (Register::isPhysicalRegister(Reg)) {
// We are going to split every outgoing edges.
// Check that this is possible.
// FIXME: The machine representation is currently broken
// since it also several terminators in one basic block.
// Because of that we would technically need a way to get
// the targets of just one terminator to know which edges
// we have to split.
// Assert that we do not hit the ill-formed representation.
// If there are other terminators before that one, some of
// the outgoing edges may not be dominated by this definition.
assert(&MI == &(*MI.getParent()->getFirstTerminator()) &&
"Do not know which outgoing edges are relevant");
const MachineInstr *Next = MI.getNextNode();
assert((!Next || Next->isUnconditionalBranch()) &&
"Do not know where each terminator ends up");
if (Next)
// If the next terminator uses Reg, this means we have
// to split right after MI and thus we need a way to ask
// which outgoing edges are affected.
assert(!Next->readsRegister(Reg) && "Need to split between terminators");
// We will split all the edges and repair there.
} else {
// This is a virtual register defined by a terminator.
if (ValMapping.NumBreakDowns == 1) {
// There is nothing to repair, but we may actually lie on
// the repairing cost because of the PHIs already proceeded
// as already stated.
// Though the code will be correct.
assert(false && "Repairing cost may not be accurate");
} else {
// We need to do non-local repairing. Basically, patch all
// the uses (i.e., phis) that we already proceeded.
// For now, just say this mapping is not possible.
RepairPt.switchTo(RepairingPlacement::RepairingKind::Impossible);
}
}
}
RegBankSelect::MappingCost RegBankSelect::computeMapping(
MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
SmallVectorImpl<RepairingPlacement> &RepairPts,
const RegBankSelect::MappingCost *BestCost) {
assert((MBFI || !BestCost) && "Costs comparison require MBFI");
if (!InstrMapping.isValid())
return MappingCost::ImpossibleCost();
// If mapped with InstrMapping, MI will have the recorded cost.
MappingCost Cost(MBFI ? MBFI->getBlockFreq(MI.getParent()) : 1);
bool Saturated = Cost.addLocalCost(InstrMapping.getCost());
assert(!Saturated && "Possible mapping saturated the cost");
LLVM_DEBUG(dbgs() << "Evaluating mapping cost for: " << MI);
LLVM_DEBUG(dbgs() << "With: " << InstrMapping << '\n');
RepairPts.clear();
if (BestCost && Cost > *BestCost) {
LLVM_DEBUG(dbgs() << "Mapping is too expensive from the start\n");
return Cost;
}
// Moreover, to realize this mapping, the register bank of each operand must
// match this mapping. In other words, we may need to locally reassign the
// register banks. Account for that repairing cost as well.
// In this context, local means in the surrounding of MI.
for (unsigned OpIdx = 0, EndOpIdx = InstrMapping.getNumOperands();
OpIdx != EndOpIdx; ++OpIdx) {
const MachineOperand &MO = MI.getOperand(OpIdx);
if (!MO.isReg())
continue;
Register Reg = MO.getReg();
if (!Reg)
continue;
LLVM_DEBUG(dbgs() << "Opd" << OpIdx << '\n');
const RegisterBankInfo::ValueMapping &ValMapping =
InstrMapping.getOperandMapping(OpIdx);
// If Reg is already properly mapped, this is free.
bool Assign;
if (assignmentMatch(Reg, ValMapping, Assign)) {
LLVM_DEBUG(dbgs() << "=> is free (match).\n");
continue;
}
if (Assign) {
LLVM_DEBUG(dbgs() << "=> is free (simple assignment).\n");
RepairPts.emplace_back(RepairingPlacement(MI, OpIdx, *TRI, *this,
RepairingPlacement::Reassign));
continue;
}
// Find the insertion point for the repairing code.
RepairPts.emplace_back(
RepairingPlacement(MI, OpIdx, *TRI, *this, RepairingPlacement::Insert));
RepairingPlacement &RepairPt = RepairPts.back();
// If we need to split a basic block to materialize this insertion point,
// we may give a higher cost to this mapping.
// Nevertheless, we may get away with the split, so try that first.
if (RepairPt.hasSplit())
tryAvoidingSplit(RepairPt, MO, ValMapping);
// Check that the materialization of the repairing is possible.
if (!RepairPt.canMaterialize()) {
LLVM_DEBUG(dbgs() << "Mapping involves impossible repairing\n");
return MappingCost::ImpossibleCost();
}
// Account for the split cost and repair cost.
// Unless the cost is already saturated or we do not care about the cost.
if (!BestCost || Saturated)
continue;
// To get accurate information we need MBFI and MBPI.
// Thus, if we end up here this information should be here.
assert(MBFI && MBPI && "Cost computation requires MBFI and MBPI");
// FIXME: We will have to rework the repairing cost model.
// The repairing cost depends on the register bank that MO has.
// However, when we break down the value into different values,
// MO may not have a register bank while still needing repairing.
// For the fast mode, we don't compute the cost so that is fine,
// but still for the repairing code, we will have to make a choice.
// For the greedy mode, we should choose greedily what is the best
// choice based on the next use of MO.
// Sums up the repairing cost of MO at each insertion point.
uint64_t RepairCost = getRepairCost(MO, ValMapping);
// This is an impossible to repair cost.
if (RepairCost == std::numeric_limits<unsigned>::max())
return MappingCost::ImpossibleCost();
// Bias used for splitting: 5%.
const uint64_t PercentageForBias = 5;
uint64_t Bias = (RepairCost * PercentageForBias + 99) / 100;
// We should not need more than a couple of instructions to repair
// an assignment. In other words, the computation should not
// overflow because the repairing cost is free of basic block
// frequency.
assert(((RepairCost < RepairCost * PercentageForBias) &&
(RepairCost * PercentageForBias <
RepairCost * PercentageForBias + 99)) &&
"Repairing involves more than a billion of instructions?!");
for (const std::unique_ptr<InsertPoint> &InsertPt : RepairPt) {
assert(InsertPt->canMaterialize() && "We should not have made it here");
// We will applied some basic block frequency and those uses uint64_t.
if (!InsertPt->isSplit())
Saturated = Cost.addLocalCost(RepairCost);
else {
uint64_t CostForInsertPt = RepairCost;
// Again we shouldn't overflow here givent that
// CostForInsertPt is frequency free at this point.
assert(CostForInsertPt + Bias > CostForInsertPt &&
"Repairing + split bias overflows");
CostForInsertPt += Bias;
uint64_t PtCost = InsertPt->frequency(*this) * CostForInsertPt;
// Check if we just overflowed.
if ((Saturated = PtCost < CostForInsertPt))
Cost.saturate();
else
Saturated = Cost.addNonLocalCost(PtCost);
}
// Stop looking into what it takes to repair, this is already
// too expensive.
if (BestCost && Cost > *BestCost) {
LLVM_DEBUG(dbgs() << "Mapping is too expensive, stop processing\n");
return Cost;
}
// No need to accumulate more cost information.
// We need to still gather the repairing information though.
if (Saturated)
break;
}
}
LLVM_DEBUG(dbgs() << "Total cost is: " << Cost << "\n");
return Cost;
}
bool RegBankSelect::applyMapping(
MachineInstr &MI, const RegisterBankInfo::InstructionMapping &InstrMapping,
SmallVectorImpl<RegBankSelect::RepairingPlacement> &RepairPts) {
// OpdMapper will hold all the information needed for the rewriting.
RegisterBankInfo::OperandsMapper OpdMapper(MI, InstrMapping, *MRI);
// First, place the repairing code.
for (RepairingPlacement &RepairPt : RepairPts) {
if (!RepairPt.canMaterialize() ||
RepairPt.getKind() == RepairingPlacement::Impossible)
return false;
assert(RepairPt.getKind() != RepairingPlacement::None &&
"This should not make its way in the list");
unsigned OpIdx = RepairPt.getOpIdx();
MachineOperand &MO = MI.getOperand(OpIdx);
const RegisterBankInfo::ValueMapping &ValMapping =
InstrMapping.getOperandMapping(OpIdx);
Register Reg = MO.getReg();
switch (RepairPt.getKind()) {
case RepairingPlacement::Reassign:
assert(ValMapping.NumBreakDowns == 1 &&
"Reassignment should only be for simple mapping");
MRI->setRegBank(Reg, *ValMapping.BreakDown[0].RegBank);
break;
case RepairingPlacement::Insert:
OpdMapper.createVRegs(OpIdx);
if (!repairReg(MO, ValMapping, RepairPt, OpdMapper.getVRegs(OpIdx)))
return false;
break;
default:
llvm_unreachable("Other kind should not happen");
}
}
// Second, rewrite the instruction.
LLVM_DEBUG(dbgs() << "Actual mapping of the operands: " << OpdMapper << '\n');
RBI->applyMapping(OpdMapper);
return true;
}
bool RegBankSelect::assignInstr(MachineInstr &MI) {
LLVM_DEBUG(dbgs() << "Assign: " << MI);
// Remember the repairing placement for all the operands.
SmallVector<RepairingPlacement, 4> RepairPts;
const RegisterBankInfo::InstructionMapping *BestMapping;
if (OptMode == RegBankSelect::Mode::Fast) {
BestMapping = &RBI->getInstrMapping(MI);
MappingCost DefaultCost = computeMapping(MI, *BestMapping, RepairPts);
(void)DefaultCost;
if (DefaultCost == MappingCost::ImpossibleCost())
return false;
} else {
RegisterBankInfo::InstructionMappings PossibleMappings =
RBI->getInstrPossibleMappings(MI);
if (PossibleMappings.empty())
return false;
BestMapping = &findBestMapping(MI, PossibleMappings, RepairPts);
}
// Make sure the mapping is valid for MI.
assert(BestMapping->verify(MI) && "Invalid instruction mapping");
LLVM_DEBUG(dbgs() << "Best Mapping: " << *BestMapping << '\n');
// After this call, MI may not be valid anymore.
// Do not use it.
return applyMapping(MI, *BestMapping, RepairPts);
}
bool RegBankSelect::runOnMachineFunction(MachineFunction &MF) {
// If the ISel pipeline failed, do not bother running that pass.
if (MF.getProperties().hasProperty(
MachineFunctionProperties::Property::FailedISel))
return false;
LLVM_DEBUG(dbgs() << "Assign register banks for: " << MF.getName() << '\n');
const Function &F = MF.getFunction();
Mode SaveOptMode = OptMode;
if (F.hasOptNone())
OptMode = Mode::Fast;
init(MF);
#ifndef NDEBUG
// Check that our input is fully legal: we require the function to have the
// Legalized property, so it should be.
// FIXME: This should be in the MachineVerifier.
if (!DisableGISelLegalityCheck)
if (const MachineInstr *MI = machineFunctionIsIllegal(MF)) {
reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
"instruction is not legal", *MI);
return false;
}
#endif
// Walk the function and assign register banks to all operands.
// Use a RPOT to make sure all registers are assigned before we choose
// the best mapping of the current instruction.
ReversePostOrderTraversal<MachineFunction*> RPOT(&MF);
for (MachineBasicBlock *MBB : RPOT) {
// Set a sensible insertion point so that subsequent calls to
// MIRBuilder.
MIRBuilder.setMBB(*MBB);
for (MachineBasicBlock::iterator MII = MBB->begin(), End = MBB->end();
MII != End;) {
// MI might be invalidated by the assignment, so move the
// iterator before hand.
MachineInstr &MI = *MII++;
// Ignore target-specific post-isel instructions: they should use proper
// regclasses.
if (isTargetSpecificOpcode(MI.getOpcode()) && !MI.isPreISelOpcode())
continue;
if (!assignInstr(MI)) {
reportGISelFailure(MF, *TPC, *MORE, "gisel-regbankselect",
"unable to map instruction", MI);
return false;
}
// It's possible the mapping changed control flow, and moved the following
// instruction to a new block, so figure out the new parent.
if (MII != End) {
MachineBasicBlock *NextInstBB = MII->getParent();
if (NextInstBB != MBB) {
LLVM_DEBUG(dbgs() << "Instruction mapping changed control flow\n");
MBB = NextInstBB;
MIRBuilder.setMBB(*MBB);
End = MBB->end();
}
}
}
}
OptMode = SaveOptMode;
return false;
}
//------------------------------------------------------------------------------
// Helper Classes Implementation
//------------------------------------------------------------------------------
RegBankSelect::RepairingPlacement::RepairingPlacement(
MachineInstr &MI, unsigned OpIdx, const TargetRegisterInfo &TRI, Pass &P,
RepairingPlacement::RepairingKind Kind)
// Default is, we are going to insert code to repair OpIdx.
: Kind(Kind), OpIdx(OpIdx),
CanMaterialize(Kind != RepairingKind::Impossible), P(P) {
const MachineOperand &MO = MI.getOperand(OpIdx);
assert(MO.isReg() && "Trying to repair a non-reg operand");
if (Kind != RepairingKind::Insert)
return;
// Repairings for definitions happen after MI, uses happen before.
bool Before = !MO.isDef();
// Check if we are done with MI.
if (!MI.isPHI() && !MI.isTerminator()) {
addInsertPoint(MI, Before);
// We are done with the initialization.
return;
}
// Now, look for the special cases.
if (MI.isPHI()) {
// - PHI must be the first instructions:
// * Before, we have to split the related incoming edge.
// * After, move the insertion point past the last phi.
if (!Before) {
MachineBasicBlock::iterator It = MI.getParent()->getFirstNonPHI();
if (It != MI.getParent()->end())
addInsertPoint(*It, /*Before*/ true);
else
addInsertPoint(*(--It), /*Before*/ false);
return;
}
// We repair a use of a phi, we may need to split the related edge.
MachineBasicBlock &Pred = *MI.getOperand(OpIdx + 1).getMBB();
// Check if we can move the insertion point prior to the
// terminators of the predecessor.
Register Reg = MO.getReg();
MachineBasicBlock::iterator It = Pred.getLastNonDebugInstr();
for (auto Begin = Pred.begin(); It != Begin && It->isTerminator(); --It)
if (It->modifiesRegister(Reg, &TRI)) {
// We cannot hoist the repairing code in the predecessor.
// Split the edge.
addInsertPoint(Pred, *MI.getParent());
return;
}
// At this point, we can insert in Pred.
// - If It is invalid, Pred is empty and we can insert in Pred
// wherever we want.
// - If It is valid, It is the first non-terminator, insert after It.
if (It == Pred.end())
addInsertPoint(Pred, /*Beginning*/ false);
else
addInsertPoint(*It, /*Before*/ false);
} else {
// - Terminators must be the last instructions:
// * Before, move the insert point before the first terminator.
// * After, we have to split the outcoming edges.
if (Before) {
// Check whether Reg is defined by any terminator.
MachineBasicBlock::reverse_iterator It = MI;
auto REnd = MI.getParent()->rend();
for (; It != REnd && It->isTerminator(); ++It) {
assert(!It->modifiesRegister(MO.getReg(), &TRI) &&
"copy insertion in middle of terminators not handled");
}
if (It == REnd) {
addInsertPoint(*MI.getParent()->begin(), true);
return;
}
// We are sure to be right before the first terminator.
addInsertPoint(*It, /*Before*/ false);
return;
}
// Make sure Reg is not redefined by other terminators, otherwise
// we do not know how to split.
for (MachineBasicBlock::iterator It = MI, End = MI.getParent()->end();
++It != End;)
// The machine verifier should reject this kind of code.
assert(It->modifiesRegister(MO.getReg(), &TRI) &&
"Do not know where to split");
// Split each outcoming edges.
MachineBasicBlock &Src = *MI.getParent();
for (auto &Succ : Src.successors())
addInsertPoint(Src, Succ);
}
}
void RegBankSelect::RepairingPlacement::addInsertPoint(MachineInstr &MI,
bool Before) {
addInsertPoint(*new InstrInsertPoint(MI, Before));
}
void RegBankSelect::RepairingPlacement::addInsertPoint(MachineBasicBlock &MBB,
bool Beginning) {
addInsertPoint(*new MBBInsertPoint(MBB, Beginning));
}
void RegBankSelect::RepairingPlacement::addInsertPoint(MachineBasicBlock &Src,
MachineBasicBlock &Dst) {
addInsertPoint(*new EdgeInsertPoint(Src, Dst, P));
}
void RegBankSelect::RepairingPlacement::addInsertPoint(
RegBankSelect::InsertPoint &Point) {
CanMaterialize &= Point.canMaterialize();
HasSplit |= Point.isSplit();
InsertPoints.emplace_back(&Point);
}
RegBankSelect::InstrInsertPoint::InstrInsertPoint(MachineInstr &Instr,
bool Before)
: InsertPoint(), Instr(Instr), Before(Before) {
// Since we do not support splitting, we do not need to update
// liveness and such, so do not do anything with P.
assert((!Before || !Instr.isPHI()) &&
"Splitting before phis requires more points");
assert((!Before || !Instr.getNextNode() || !Instr.getNextNode()->isPHI()) &&
"Splitting between phis does not make sense");
}
void RegBankSelect::InstrInsertPoint::materialize() {
if (isSplit()) {
// Slice and return the beginning of the new block.
// If we need to split between the terminators, we theoritically
// need to know where the first and second set of terminators end
// to update the successors properly.
// Now, in pratice, we should have a maximum of 2 branch
// instructions; one conditional and one unconditional. Therefore
// we know how to update the successor by looking at the target of
// the unconditional branch.
// If we end up splitting at some point, then, we should update
// the liveness information and such. I.e., we would need to
// access P here.
// The machine verifier should actually make sure such cases
// cannot happen.
llvm_unreachable("Not yet implemented");
}
// Otherwise the insertion point is just the current or next
// instruction depending on Before. I.e., there is nothing to do
// here.
}
bool RegBankSelect::InstrInsertPoint::isSplit() const {
// If the insertion point is after a terminator, we need to split.
if (!Before)
return Instr.isTerminator();
// If we insert before an instruction that is after a terminator,
// we are still after a terminator.
return Instr.getPrevNode() && Instr.getPrevNode()->isTerminator();
}
uint64_t RegBankSelect::InstrInsertPoint::frequency(const Pass &P) const {
// Even if we need to split, because we insert between terminators,
// this split has actually the same frequency as the instruction.
const MachineBlockFrequencyInfo *MBFI =
P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
if (!MBFI)
return 1;
return MBFI->getBlockFreq(Instr.getParent()).getFrequency();
}
uint64_t RegBankSelect::MBBInsertPoint::frequency(const Pass &P) const {
const MachineBlockFrequencyInfo *MBFI =
P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
if (!MBFI)
return 1;
return MBFI->getBlockFreq(&MBB).getFrequency();
}
void RegBankSelect::EdgeInsertPoint::materialize() {
// If we end up repairing twice at the same place before materializing the
// insertion point, we may think we have to split an edge twice.
// We should have a factory for the insert point such that identical points
// are the same instance.
assert(Src.isSuccessor(DstOrSplit) && DstOrSplit->isPredecessor(&Src) &&
"This point has already been split");
MachineBasicBlock *NewBB = Src.SplitCriticalEdge(DstOrSplit, P);
assert(NewBB && "Invalid call to materialize");
// We reuse the destination block to hold the information of the new block.
DstOrSplit = NewBB;
}
uint64_t RegBankSelect::EdgeInsertPoint::frequency(const Pass &P) const {
const MachineBlockFrequencyInfo *MBFI =
P.getAnalysisIfAvailable<MachineBlockFrequencyInfo>();
if (!MBFI)
return 1;
if (WasMaterialized)
return MBFI->getBlockFreq(DstOrSplit).getFrequency();
const MachineBranchProbabilityInfo *MBPI =
P.getAnalysisIfAvailable<MachineBranchProbabilityInfo>();
if (!MBPI)
return 1;
// The basic block will be on the edge.
return (MBFI->getBlockFreq(&Src) * MBPI->getEdgeProbability(&Src, DstOrSplit))
.getFrequency();
}
bool RegBankSelect::EdgeInsertPoint::canMaterialize() const {
// If this is not a critical edge, we should not have used this insert
// point. Indeed, either the successor or the predecessor should
// have do.
assert(Src.succ_size() > 1 && DstOrSplit->pred_size() > 1 &&
"Edge is not critical");
return Src.canSplitCriticalEdge(DstOrSplit);
}
RegBankSelect::MappingCost::MappingCost(const BlockFrequency &LocalFreq)
: LocalFreq(LocalFreq.getFrequency()) {}
bool RegBankSelect::MappingCost::addLocalCost(uint64_t Cost) {
// Check if this overflows.
if (LocalCost + Cost < LocalCost) {
saturate();
return true;
}
LocalCost += Cost;
return isSaturated();
}
bool RegBankSelect::MappingCost::addNonLocalCost(uint64_t Cost) {
// Check if this overflows.
if (NonLocalCost + Cost < NonLocalCost) {
saturate();
return true;
}
NonLocalCost += Cost;
return isSaturated();
}
bool RegBankSelect::MappingCost::isSaturated() const {
return LocalCost == UINT64_MAX - 1 && NonLocalCost == UINT64_MAX &&
LocalFreq == UINT64_MAX;
}
void RegBankSelect::MappingCost::saturate() {
*this = ImpossibleCost();
--LocalCost;
}
RegBankSelect::MappingCost RegBankSelect::MappingCost::ImpossibleCost() {
return MappingCost(UINT64_MAX, UINT64_MAX, UINT64_MAX);
}
bool RegBankSelect::MappingCost::operator<(const MappingCost &Cost) const {
// Sort out the easy cases.
if (*this == Cost)
return false;
// If one is impossible to realize the other is cheaper unless it is
// impossible as well.
if ((*this == ImpossibleCost()) || (Cost == ImpossibleCost()))
return (*this == ImpossibleCost()) < (Cost == ImpossibleCost());
// If one is saturated the other is cheaper, unless it is saturated
// as well.
if (isSaturated() || Cost.isSaturated())
return isSaturated() < Cost.isSaturated();
// At this point we know both costs hold sensible values.
// If both values have a different base frequency, there is no much
// we can do but to scale everything.
// However, if they have the same base frequency we can avoid making
// complicated computation.
uint64_t ThisLocalAdjust;
uint64_t OtherLocalAdjust;
if (LLVM_LIKELY(LocalFreq == Cost.LocalFreq)) {
// At this point, we know the local costs are comparable.
// Do the case that do not involve potential overflow first.
if (NonLocalCost == Cost.NonLocalCost)
// Since the non-local costs do not discriminate on the result,
// just compare the local costs.
return LocalCost < Cost.LocalCost;
// The base costs are comparable so we may only keep the relative
// value to increase our chances of avoiding overflows.
ThisLocalAdjust = 0;
OtherLocalAdjust = 0;
if (LocalCost < Cost.LocalCost)
OtherLocalAdjust = Cost.LocalCost - LocalCost;
else
ThisLocalAdjust = LocalCost - Cost.LocalCost;
} else {
ThisLocalAdjust = LocalCost;
OtherLocalAdjust = Cost.LocalCost;
}
// The non-local costs are comparable, just keep the relative value.
uint64_t ThisNonLocalAdjust = 0;
uint64_t OtherNonLocalAdjust = 0;
if (NonLocalCost < Cost.NonLocalCost)
OtherNonLocalAdjust = Cost.NonLocalCost - NonLocalCost;
else
ThisNonLocalAdjust = NonLocalCost - Cost.NonLocalCost;
// Scale everything to make them comparable.
uint64_t ThisScaledCost = ThisLocalAdjust * LocalFreq;
// Check for overflow on that operation.
bool ThisOverflows = ThisLocalAdjust && (ThisScaledCost < ThisLocalAdjust ||
ThisScaledCost < LocalFreq);
uint64_t OtherScaledCost = OtherLocalAdjust * Cost.LocalFreq;
// Check for overflow on the last operation.
bool OtherOverflows =
OtherLocalAdjust &&
(OtherScaledCost < OtherLocalAdjust || OtherScaledCost < Cost.LocalFreq);
// Add the non-local costs.
ThisOverflows |= ThisNonLocalAdjust &&
ThisScaledCost + ThisNonLocalAdjust < ThisNonLocalAdjust;
ThisScaledCost += ThisNonLocalAdjust;
OtherOverflows |= OtherNonLocalAdjust &&
OtherScaledCost + OtherNonLocalAdjust < OtherNonLocalAdjust;
OtherScaledCost += OtherNonLocalAdjust;
// If both overflows, we cannot compare without additional
// precision, e.g., APInt. Just give up on that case.
if (ThisOverflows && OtherOverflows)
return false;
// If one overflows but not the other, we can still compare.
if (ThisOverflows || OtherOverflows)
return ThisOverflows < OtherOverflows;
// Otherwise, just compare the values.
return ThisScaledCost < OtherScaledCost;
}
bool RegBankSelect::MappingCost::operator==(const MappingCost &Cost) const {
return LocalCost == Cost.LocalCost && NonLocalCost == Cost.NonLocalCost &&
LocalFreq == Cost.LocalFreq;
}
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
LLVM_DUMP_METHOD void RegBankSelect::MappingCost::dump() const {
print(dbgs());
dbgs() << '\n';
}
#endif
void RegBankSelect::MappingCost::print(raw_ostream &OS) const {
if (*this == ImpossibleCost()) {
OS << "impossible";
return;
}
if (isSaturated()) {
OS << "saturated";
return;
}
OS << LocalFreq << " * " << LocalCost << " + " << NonLocalCost;
}
|