strength reduce power-of-two GEP offsets

This commit is contained in:
2026-06-30 00:25:29 +08:00
parent cd46ff6fdd
commit 108f3d9e4b
4 changed files with 29 additions and 2 deletions

View File

@@ -55,6 +55,7 @@ enum class Opcode {
MovReg,
Adrp,
AddRegImm,
LslImm,
LdrRegReg,
StrRegReg,
SIToFP,

View File

@@ -250,6 +250,11 @@ void PrintAsm(const MachineFunction& function, std::ostream& os) {
}
break;
}
case Opcode::LslImm:
os << " lsl " << PhysRegName(ops.at(0).GetReg()) << ", "
<< PhysRegName(ops.at(1).GetReg()) << ", #"
<< ops.at(2).GetImm() << "\n";
break;
case Opcode::LdrRegReg: {
PhysReg reg = ops.at(0).GetReg();
const char* ldr_cmd = IsFloatReg(reg) ? "ldr" : "ldr";

View File

@@ -112,6 +112,19 @@ uint32_t GetAllocaSize(const ir::Instruction& inst, const std::unordered_set<con
return GetTypeSize(type.get());
}
bool IsPowerOfTwo(uint32_t value) {
return value != 0 && (value & (value - 1)) == 0;
}
int Log2(uint32_t value) {
int shift = 0;
while (value > 1) {
value >>= 1;
shift++;
}
return shift;
}
bool LooksLikeConstantArrayModuloSumLoop(const ir::Function& function,
int* per_iteration_sum,
int* modulo) {
@@ -626,13 +639,20 @@ void LowerInstruction(const ir::Instruction& inst, MachineFunction& function,
}
EmitValueToReg(idx, PhysReg::W9, slots, block);
if (stride > 1) {
bool shifted = false;
if (stride > 1 && IsPowerOfTwo(stride)) {
block.Append(Opcode::ZExt, {Operand::Reg(PhysReg::X9), Operand::Reg(PhysReg::W9)});
block.Append(Opcode::LslImm, {Operand::Reg(PhysReg::X9), Operand::Reg(PhysReg::X9), Operand::Imm(Log2(stride))});
shifted = true;
} else if (stride > 1) {
block.Append(Opcode::MovImm, {Operand::Reg(PhysReg::W10), Operand::Imm(stride)});
block.Append(Opcode::MulRR, {Operand::Reg(PhysReg::W9), Operand::Reg(PhysReg::W9), Operand::Reg(PhysReg::W10)});
}
// Extend W9 to X9 and add to base address X8
block.Append(Opcode::ZExt, {Operand::Reg(PhysReg::X9), Operand::Reg(PhysReg::W9)});
if (!shifted) {
block.Append(Opcode::ZExt, {Operand::Reg(PhysReg::X9), Operand::Reg(PhysReg::W9)});
}
block.Append(Opcode::AddRR, {Operand::Reg(PhysReg::X8), Operand::Reg(PhysReg::X8), Operand::Reg(PhysReg::X9)});
}

View File

@@ -150,6 +150,7 @@ void RunPeephole(MachineFunction& function) {
case Opcode::MovReg:
case Opcode::Adrp:
case Opcode::AddRegImm:
case Opcode::LslImm:
case Opcode::LdrRegReg:
case Opcode::SIToFP:
case Opcode::FPToSI: