[midend-IVE]参考libdivide库,实现了魔数的正确求解,如果后续出错直接用API或者不要除法强度削弱了
This commit is contained in:
@@ -7,6 +7,8 @@
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <unordered_map>
|
||||
#include <climits>
|
||||
|
||||
// 使用全局调试开关
|
||||
extern int DEBUG;
|
||||
@@ -104,65 +106,188 @@ bool StrengthReductionContext::analyzeInductionVariableRange(
|
||||
return hasNegativePotential;
|
||||
}
|
||||
|
||||
std::pair<int64_t, int> StrengthReductionContext::computeMulhMagicNumbers(int divisor) const {
|
||||
// 计算用于除法的魔数 (magic number) 和移位量
|
||||
// 基于 "Division by Invariant Integers using Multiplication" 算法
|
||||
//该实现参考了libdivide的算法
|
||||
std::pair<int, int> StrengthReductionContext::computeMulhMagicNumbers(int divisor) const {
|
||||
|
||||
int64_t magic = 0;
|
||||
int shift = 0;
|
||||
bool isPowerOfTwo = (divisor & (divisor - 1)) == 0;
|
||||
|
||||
if (isPowerOfTwo) {
|
||||
// 对于2的幂,不需要魔数,直接使用移位
|
||||
magic = 1;
|
||||
shift = __builtin_ctz(divisor); // 计算尾随零的个数
|
||||
return {magic, shift};
|
||||
if (DEBUG) {
|
||||
std::cout << "\n[SR] ===== Computing magic numbers for divisor " << divisor << " (libdivide algorithm) =====" << std::endl;
|
||||
}
|
||||
|
||||
// 对于非2的幂的正数除数,计算魔数
|
||||
// 使用32位有符号整数范围
|
||||
const int bitWidth = 32;
|
||||
const int64_t maxMagic = (1LL << (bitWidth - 1)) - 1;
|
||||
if (divisor == 0) {
|
||||
if (DEBUG) std::cout << "[SR] Error: divisor must be != 0" << std::endl;
|
||||
return {-1, -1};
|
||||
}
|
||||
|
||||
// libdivide 常数
|
||||
const uint8_t LIBDIVIDE_ADD_MARKER = 0x40;
|
||||
const uint8_t LIBDIVIDE_NEGATIVE_DIVISOR = 0x80;
|
||||
|
||||
int64_t d = divisor;
|
||||
int64_t nc = (1LL << (bitWidth - 1)) - (1LL << (bitWidth - 1)) % d;
|
||||
int64_t delta = d - (1LL << (bitWidth - 1)) % d;
|
||||
// 辅助函数:计算前导零个数
|
||||
auto count_leading_zeros32 = [](uint32_t val) -> uint32_t {
|
||||
if (val == 0) return 32;
|
||||
return __builtin_clz(val);
|
||||
};
|
||||
|
||||
shift = bitWidth - 1;
|
||||
// 辅助函数:64位除法返回32位商和余数
|
||||
auto div_64_32 = [](uint32_t high, uint32_t low, uint32_t divisor, uint32_t* rem) -> uint32_t {
|
||||
uint64_t dividend = ((uint64_t)high << 32) | low;
|
||||
uint32_t quotient = dividend / divisor;
|
||||
*rem = dividend % divisor;
|
||||
return quotient;
|
||||
};
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] Input divisor: " << divisor << std::endl;
|
||||
}
|
||||
|
||||
// libdivide_internal_s32_gen 算法实现
|
||||
int32_t d = divisor;
|
||||
uint32_t ud = (uint32_t)d;
|
||||
uint32_t absD = (d < 0) ? -ud : ud;
|
||||
|
||||
// 找到合适的魔数和移位量
|
||||
while (shift < bitWidth + 30) { // 避免无限循环
|
||||
int64_t q1 = (1LL << shift) / nc;
|
||||
int64_t r1 = (1LL << shift) - q1 * nc;
|
||||
int64_t q2 = (1LL << shift) / delta;
|
||||
int64_t r2 = (1LL << shift) - q2 * delta;
|
||||
|
||||
if (q1 < q2 || (q1 == q2 && r1 < r2)) {
|
||||
magic = q2 + 1;
|
||||
if (magic <= maxMagic) {
|
||||
break;
|
||||
}
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] absD = " << absD << std::endl;
|
||||
}
|
||||
|
||||
uint32_t floor_log_2_d = 31 - count_leading_zeros32(absD);
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] floor_log_2_d = " << floor_log_2_d << std::endl;
|
||||
}
|
||||
|
||||
// 检查 absD 是否为2的幂
|
||||
if ((absD & (absD - 1)) == 0) {
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] " << absD << " 是2的幂,使用移位方法" << std::endl;
|
||||
}
|
||||
|
||||
shift++;
|
||||
nc = 2 * nc;
|
||||
delta = 2 * delta;
|
||||
// 对于2的幂,我们只使用移位,不需要魔数
|
||||
int shift = floor_log_2_d;
|
||||
if (d < 0) shift |= 0x80; // 标记负数
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] Power of 2 result: magic=0, shift=" << shift << std::endl;
|
||||
std::cout << "[SR] ===== End magic computation =====" << std::endl;
|
||||
}
|
||||
|
||||
// 对于我们的目的,我们将在IR生成中以不同方式处理2的幂
|
||||
// 返回特殊标记
|
||||
return {0, shift};
|
||||
}
|
||||
|
||||
if (magic > maxMagic) {
|
||||
// 回退到简单的魔数
|
||||
magic = (1LL << bitWidth) / d + 1;
|
||||
shift = bitWidth;
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] " << absD << " is not a power of 2, computing magic number" << std::endl;
|
||||
}
|
||||
|
||||
// 调整移位量以移除多余的2的幂因子
|
||||
shift = shift - bitWidth;
|
||||
if (shift < 0) shift = 0;
|
||||
// 非2的幂除数的魔数计算
|
||||
uint8_t more;
|
||||
uint32_t rem, proposed_m;
|
||||
|
||||
return {magic, shift};
|
||||
// 计算 proposed_m = floor(2^(floor_log_2_d + 31) / absD)
|
||||
proposed_m = div_64_32((uint32_t)1 << (floor_log_2_d - 1), 0, absD, &rem);
|
||||
const uint32_t e = absD - rem;
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] proposed_m = " << proposed_m << ", rem = " << rem << ", e = " << e << std::endl;
|
||||
}
|
||||
|
||||
// 确定是否需要"加法"版本
|
||||
const bool branchfree = false; // 使用分支版本
|
||||
|
||||
if (!branchfree && e < ((uint32_t)1 << floor_log_2_d)) {
|
||||
// 这个幂次有效
|
||||
more = (uint8_t)(floor_log_2_d - 1);
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] Using basic algorithm, shift = " << (int)more << std::endl;
|
||||
}
|
||||
} else {
|
||||
// 我们需要上升一个等级
|
||||
proposed_m += proposed_m;
|
||||
const uint32_t twice_rem = rem + rem;
|
||||
if (twice_rem >= absD || twice_rem < rem) {
|
||||
proposed_m += 1;
|
||||
}
|
||||
more = (uint8_t)(floor_log_2_d | LIBDIVIDE_ADD_MARKER);
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] Using add algorithm, proposed_m = " << proposed_m << ", more = " << (int)more << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
proposed_m += 1;
|
||||
int32_t magic = (int32_t)proposed_m;
|
||||
|
||||
// 处理负除数
|
||||
if (d < 0) {
|
||||
more |= LIBDIVIDE_NEGATIVE_DIVISOR;
|
||||
if (!branchfree) {
|
||||
magic = -magic;
|
||||
}
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] Negative divisor, magic = " << magic << ", more = " << (int)more << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// 为我们的IR生成提取移位量和标志
|
||||
int shift = more & 0x3F; // 移除标志,保留移位量(位0-5)
|
||||
bool need_add = (more & LIBDIVIDE_ADD_MARKER) != 0;
|
||||
bool is_negative = (more & LIBDIVIDE_NEGATIVE_DIVISOR) != 0;
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] Final result: magic = " << magic << ", more = " << (int)more
|
||||
<< " (0x" << std::hex << (int)more << std::dec << ")" << std::endl;
|
||||
std::cout << "[SR] Shift = " << shift << ", need_add = " << need_add
|
||||
<< ", is_negative = " << is_negative << std::endl;
|
||||
|
||||
// Test the magic number using the correct libdivide algorithm
|
||||
std::cout << "[SR] Testing magic number (libdivide algorithm):" << std::endl;
|
||||
int test_values[] = {1, 7, 37, 100, 999, -1, -7, -37, -100};
|
||||
|
||||
for (int test_val : test_values) {
|
||||
int64_t quotient;
|
||||
|
||||
// 实现正确的libdivide算法
|
||||
int64_t product = (int64_t)test_val * magic;
|
||||
int64_t high_bits = product >> 32;
|
||||
|
||||
if (need_add) {
|
||||
// ADD_MARKER情况:移位前加上被除数
|
||||
// 这是libdivide的关键洞察!
|
||||
high_bits += test_val;
|
||||
quotient = high_bits >> shift;
|
||||
} else {
|
||||
// 正常情况:只是移位
|
||||
quotient = high_bits >> shift;
|
||||
}
|
||||
|
||||
// 符号修正:这是libdivide有符号除法的关键部分!
|
||||
// 如果被除数为负,商需要加1来匹配C语言的截断除法语义
|
||||
if (test_val < 0) {
|
||||
quotient += 1;
|
||||
}
|
||||
|
||||
int expected = test_val / divisor;
|
||||
|
||||
bool correct = (quotient == expected);
|
||||
std::cout << "[SR] " << test_val << " / " << divisor << " = " << quotient
|
||||
<< " (expected " << expected << ") " << (correct ? "✓" : "✗") << std::endl;
|
||||
}
|
||||
|
||||
std::cout << "[SR] ===== End magic computation =====" << std::endl;
|
||||
}
|
||||
|
||||
// 返回魔数、移位量,并在移位中编码ADD_MARKER标志
|
||||
// 我们将使用移位的第6位表示ADD_MARKER,第7位表示负数(如果需要)
|
||||
int encoded_shift = shift;
|
||||
if (need_add) {
|
||||
encoded_shift |= 0x40; // 设置第6位表示ADD_MARKER
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] Encoding ADD_MARKER in shift: " << encoded_shift << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
return {magic, encoded_shift};
|
||||
}
|
||||
|
||||
|
||||
bool LoopStrengthReduction::runOnFunction(Function* F, AnalysisManager& AM) {
|
||||
if (F->getBasicBlocks().empty()) {
|
||||
return false; // 空函数
|
||||
@@ -651,7 +776,7 @@ bool StrengthReductionContext::createNewInductionVariable(StrengthReductionCandi
|
||||
// 2. 在循环头创建新的 phi 指令
|
||||
builder->setPosition(header, header->begin());
|
||||
candidate->newPhi = builder->createPhiInst(originalPhi->getType());
|
||||
candidate->newPhi->setName(originalPhi->getName() + "_sr");
|
||||
candidate->newPhi->setName("sr_" + originalPhi->getName());
|
||||
|
||||
// 3. 计算新归纳变量的初始值和步长
|
||||
// 新IV的初始值 = 原IV初始值 * multiplier
|
||||
@@ -895,14 +1020,35 @@ Value* StrengthReductionContext::generateConstantDivisionReplacement(
|
||||
// 使用mulh指令优化任意常数除法
|
||||
auto [magic, shift] = computeMulhMagicNumbers(candidate->multiplier);
|
||||
|
||||
if (magic == 1 && shift > 0) {
|
||||
// 特殊情况:可以直接使用移位
|
||||
Value* shiftConstant = ConstantInteger::get(shift);
|
||||
// 检查是否无法优化(magic == -1, shift == -1 表示失败)
|
||||
if (magic == -1 && shift == -1) {
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] Cannot optimize division by " << candidate->multiplier
|
||||
<< ", keeping original division" << std::endl;
|
||||
}
|
||||
// 返回 nullptr 表示无法优化,调用方应该保持原始除法
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// 2的幂次方除法可以用移位优化(但这不是魔数法的情况)这种情况应该不会被分类到这里但是还是做一个保护措施
|
||||
if ((candidate->multiplier & (candidate->multiplier - 1)) == 0 && candidate->multiplier > 0) {
|
||||
// 是2的幂次方,可以用移位
|
||||
int shift_amount = 0;
|
||||
int temp = candidate->multiplier;
|
||||
while (temp > 1) {
|
||||
temp >>= 1;
|
||||
shift_amount++;
|
||||
}
|
||||
|
||||
Value* shiftConstant = ConstantInteger::get(shift_amount);
|
||||
if (candidate->hasNegativeValues) {
|
||||
// 对于有符号除法,需要先加上除数-1然后再移位(为了正确处理负数舍入)
|
||||
Value* divisor_minus_1 = ConstantInteger::get(candidate->multiplier - 1);
|
||||
Value* adjusted = builder->createAddInst(candidate->inductionVar, divisor_minus_1);
|
||||
return builder->createBinaryInst(
|
||||
Instruction::Kind::kSra, // 算术右移
|
||||
candidate->inductionVar->getType(),
|
||||
candidate->inductionVar,
|
||||
adjusted,
|
||||
shiftConstant
|
||||
);
|
||||
} else {
|
||||
@@ -916,8 +1062,25 @@ Value* StrengthReductionContext::generateConstantDivisionReplacement(
|
||||
}
|
||||
|
||||
// 创建魔数常量
|
||||
// 检查魔数是否能放入32位,如果不能,则不进行优化
|
||||
if (magic > INT32_MAX || magic < INT32_MIN) {
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] Magic number " << magic << " exceeds 32-bit range, skipping optimization" << std::endl;
|
||||
}
|
||||
return nullptr; // 无法优化,保持原始除法
|
||||
}
|
||||
|
||||
Value* magicConstant = ConstantInteger::get((int32_t)magic);
|
||||
|
||||
// 检查是否需要ADD_MARKER处理(加法调整)
|
||||
bool needAdd = (shift & 0x40) != 0;
|
||||
int actualShift = shift & 0x3F; // 提取真实的移位量
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] IR Generation: magic=" << magic << ", needAdd=" << needAdd
|
||||
<< ", actualShift=" << actualShift << std::endl;
|
||||
}
|
||||
|
||||
// 执行高位乘法:mulh(x, magic)
|
||||
Value* mulhResult = builder->createBinaryInst(
|
||||
Instruction::Kind::kMulh, // 高位乘法
|
||||
@@ -926,9 +1089,18 @@ Value* StrengthReductionContext::generateConstantDivisionReplacement(
|
||||
magicConstant
|
||||
);
|
||||
|
||||
if (shift > 0) {
|
||||
if (needAdd) {
|
||||
// ADD_MARKER 情况:需要在移位前加上被除数
|
||||
// 这对应于 libdivide 的加法调整算法
|
||||
if (DEBUG) {
|
||||
std::cout << "[SR] Applying ADD_MARKER: adding dividend before shift" << std::endl;
|
||||
}
|
||||
mulhResult = builder->createAddInst(mulhResult, candidate->inductionVar);
|
||||
}
|
||||
|
||||
if (actualShift > 0) {
|
||||
// 如果需要额外移位
|
||||
Value* shiftConstant = ConstantInteger::get(shift);
|
||||
Value* shiftConstant = ConstantInteger::get(actualShift);
|
||||
mulhResult = builder->createBinaryInst(
|
||||
Instruction::Kind::kSra, // 算术右移
|
||||
candidate->inductionVar->getType(),
|
||||
@@ -937,14 +1109,11 @@ Value* StrengthReductionContext::generateConstantDivisionReplacement(
|
||||
);
|
||||
}
|
||||
|
||||
// 处理负数校正 - 简化版本
|
||||
if (candidate->hasNegativeValues) {
|
||||
// 简化处理:添加一个常数偏移来处理负数情况
|
||||
// 这是一个简化的实现,实际的负数校正会更复杂
|
||||
Value* zero = ConstantInteger::get(0);
|
||||
Value* isNegative = builder->createICmpLTInst(candidate->inductionVar, zero);
|
||||
// 这里应该有条件逻辑,但为了简化实现,暂时直接返回mulhResult
|
||||
}
|
||||
// 标准的有符号除法符号修正:如果被除数为负,商需要加1
|
||||
// 这对所有有符号除法都需要,不管是否可能有负数
|
||||
Value* isNegative = builder->createICmpLTInst(candidate->inductionVar, ConstantInteger::get(0));
|
||||
// 将i1转换为i32:负数时为1,非负数时为0 ICmpLTInst的结果会默认转化为32位
|
||||
mulhResult = builder->createAddInst(mulhResult, isNegative);
|
||||
|
||||
return mulhResult;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user