add general backend performance optimizations
This commit is contained in:
@@ -1,5 +1,6 @@
|
|||||||
#include "ir/PassManager.h"
|
#include "ir/PassManager.h"
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
#include <tuple>
|
#include <tuple>
|
||||||
|
|
||||||
@@ -56,10 +57,30 @@ bool RunCSE(Function* func) {
|
|||||||
|
|
||||||
for (const auto& bbPtr : func->GetBlocks()) {
|
for (const auto& bbPtr : func->GetBlocks()) {
|
||||||
std::vector<Instruction*> seen_instructions;
|
std::vector<Instruction*> seen_instructions;
|
||||||
|
std::unordered_map<Value*, Instruction*> available_loads;
|
||||||
std::vector<Instruction*> to_erase;
|
std::vector<Instruction*> to_erase;
|
||||||
|
|
||||||
for (const auto& instPtr : bbPtr->GetInstructions()) {
|
for (const auto& instPtr : bbPtr->GetInstructions()) {
|
||||||
auto* inst = instPtr.get();
|
auto* inst = instPtr.get();
|
||||||
|
|
||||||
|
if (inst->GetOpcode() == Opcode::Load) {
|
||||||
|
auto* load = static_cast<LoadInst*>(inst);
|
||||||
|
auto it = available_loads.find(load->GetPtr());
|
||||||
|
if (it != available_loads.end()) {
|
||||||
|
inst->ReplaceAllUsesWith(it->second);
|
||||||
|
to_erase.push_back(inst);
|
||||||
|
changed = true;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
available_loads[load->GetPtr()] = inst;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inst->GetOpcode() == Opcode::Store ||
|
||||||
|
inst->GetOpcode() == Opcode::Call) {
|
||||||
|
available_loads.clear();
|
||||||
|
}
|
||||||
|
|
||||||
Instruction* match = nullptr;
|
Instruction* match = nullptr;
|
||||||
for (auto* seen : seen_instructions) {
|
for (auto* seen : seen_instructions) {
|
||||||
if (IsEquivalent(inst, seen)) {
|
if (IsEquivalent(inst, seen)) {
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ bool IsFloatReg(PhysReg reg) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void PrintStackAccess(std::ostream& os, const char* mnemonic, PhysReg reg,
|
void PrintStackAccess(std::ostream& os, const char* mnemonic, PhysReg reg,
|
||||||
int offset) {
|
int offset, int frame_size) {
|
||||||
bool is_float = IsFloatReg(reg);
|
bool is_float = IsFloatReg(reg);
|
||||||
const char* ldr_cmd = is_float ? "ldr" : "ldr";
|
const char* ldr_cmd = is_float ? "ldr" : "ldr";
|
||||||
const char* str_cmd = is_float ? "str" : "str";
|
const char* str_cmd = is_float ? "str" : "str";
|
||||||
@@ -37,10 +37,24 @@ void PrintStackAccess(std::ostream& os, const char* mnemonic, PhysReg reg,
|
|||||||
} else {
|
} else {
|
||||||
os << " " << mnemonic << " " << PhysRegName(reg) << ", [x29, #" << offset << "]\n";
|
os << " " << mnemonic << " " << PhysRegName(reg) << ", [x29, #" << offset << "]\n";
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
int sp_offset = frame_size + offset;
|
||||||
|
int access_size = 4;
|
||||||
|
if ((reg >= PhysReg::X0 && reg <= PhysReg::X28) ||
|
||||||
|
reg == PhysReg::X29 || reg == PhysReg::X30 ||
|
||||||
|
reg == PhysReg::SP) {
|
||||||
|
access_size = 8;
|
||||||
|
}
|
||||||
|
int max_offset = access_size == 8 ? 32760 : 16380;
|
||||||
|
if (sp_offset >= 0 && sp_offset <= max_offset &&
|
||||||
|
sp_offset % access_size == 0) {
|
||||||
|
os << " " << base_mnemonic << " " << PhysRegName(reg)
|
||||||
|
<< ", [sp, #" << sp_offset << "]\n";
|
||||||
} else {
|
} else {
|
||||||
os << " ldr x10, =" << offset << "\n";
|
os << " ldr x10, =" << offset << "\n";
|
||||||
os << " " << base_mnemonic << " " << PhysRegName(reg) << ", [x29, x10]\n";
|
os << " " << base_mnemonic << " " << PhysRegName(reg) << ", [x29, x10]\n";
|
||||||
}
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string GetBlockLabel(const std::string& func_name, const std::string& block_name) {
|
std::string GetBlockLabel(const std::string& func_name, const std::string& block_name) {
|
||||||
@@ -125,12 +139,14 @@ void PrintAsm(const MachineFunction& function, std::ostream& os) {
|
|||||||
}
|
}
|
||||||
case Opcode::LoadStack: {
|
case Opcode::LoadStack: {
|
||||||
const auto& slot = GetFrameSlot(function, ops.at(1));
|
const auto& slot = GetFrameSlot(function, ops.at(1));
|
||||||
PrintStackAccess(os, "ldur", ops.at(0).GetReg(), slot.offset);
|
PrintStackAccess(os, "ldur", ops.at(0).GetReg(), slot.offset,
|
||||||
|
function.GetFrameSize());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Opcode::StoreStack: {
|
case Opcode::StoreStack: {
|
||||||
const auto& slot = GetFrameSlot(function, ops.at(1));
|
const auto& slot = GetFrameSlot(function, ops.at(1));
|
||||||
PrintStackAccess(os, "stur", ops.at(0).GetReg(), slot.offset);
|
PrintStackAccess(os, "stur", ops.at(0).GetReg(), slot.offset,
|
||||||
|
function.GetFrameSize());
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Opcode::AddRR:
|
case Opcode::AddRR:
|
||||||
|
|||||||
@@ -7,6 +7,10 @@ namespace mir {
|
|||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
int AlignTo(int value, int align) {
|
||||||
|
return ((value + align - 1) / align) * align;
|
||||||
|
}
|
||||||
|
|
||||||
PhysReg NormalizeReg(PhysReg reg) {
|
PhysReg NormalizeReg(PhysReg reg) {
|
||||||
int r = static_cast<int>(reg);
|
int r = static_cast<int>(reg);
|
||||||
// Map 64-bit X0-X28 registers to 32-bit W0-W28 registers to handle aliasing
|
// Map 64-bit X0-X28 registers to 32-bit W0-W28 registers to handle aliasing
|
||||||
@@ -96,6 +100,29 @@ std::vector<MachineInstr> SimplifyCompareToBranch(
|
|||||||
return simplified;
|
return simplified;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CompactFrameSlots(MachineFunction& function) {
|
||||||
|
std::unordered_set<int> used_slots;
|
||||||
|
for (const auto& block : function.GetBlocks()) {
|
||||||
|
for (const auto& inst : block.GetInstructions()) {
|
||||||
|
for (const auto& opnd : inst.GetOperands()) {
|
||||||
|
if (opnd.GetKind() == Operand::Kind::FrameIndex) {
|
||||||
|
used_slots.insert(opnd.GetFrameIndex());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int cursor = 0;
|
||||||
|
for (const auto& slot : function.GetFrameSlots()) {
|
||||||
|
if (used_slots.find(slot.index) == used_slots.end()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
cursor += slot.size;
|
||||||
|
function.GetFrameSlot(slot.index).offset = -cursor;
|
||||||
|
}
|
||||||
|
function.SetFrameSize(AlignTo(cursor, 16));
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
void RunPeephole(MachineFunction& function) {
|
void RunPeephole(MachineFunction& function) {
|
||||||
@@ -285,6 +312,8 @@ void RunPeephole(MachineFunction& function) {
|
|||||||
}
|
}
|
||||||
insts = std::move(optimized);
|
insts = std::move(optimized);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
CompactFrameSlots(function);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace mir
|
} // namespace mir
|
||||||
|
|||||||
Reference in New Issue
Block a user