From 7547d345985cd0ecd20a02e01bbac64284bfa0e1 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Thu, 14 Aug 2025 05:12:54 +0800 Subject: [PATCH] =?UTF-8?q?[midend-IVE]=E5=8F=82=E8=80=83libdivide?= =?UTF-8?q?=E5=BA=93=EF=BC=8C=E5=AE=9E=E7=8E=B0=E4=BA=86=E9=AD=94=E6=95=B0?= =?UTF-8?q?=E7=9A=84=E6=AD=A3=E7=A1=AE=E6=B1=82=E8=A7=A3=EF=BC=8C=E5=A6=82?= =?UTF-8?q?=E6=9E=9C=E5=90=8E=E7=BB=AD=E5=87=BA=E9=94=99=E7=9B=B4=E6=8E=A5?= =?UTF-8?q?=E7=94=A8API=E6=88=96=E8=80=85=E4=B8=8D=E8=A6=81=E9=99=A4?= =?UTF-8?q?=E6=B3=95=E5=BC=BA=E5=BA=A6=E5=89=8A=E5=BC=B1=E4=BA=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Pass_ID_List.md | 167 ++++++++++ src/include/midend/IR.h | 2 + .../Pass/Optimize/LoopStrengthReduction.h | 2 +- src/midend/IR.cpp | 24 +- .../Pass/Optimize/LoopStrengthReduction.cpp | 287 ++++++++++++++---- 5 files changed, 421 insertions(+), 61 deletions(-) diff --git a/Pass_ID_List.md b/Pass_ID_List.md index c5d2803..7481855 100644 --- a/Pass_ID_List.md +++ b/Pass_ID_List.md @@ -228,6 +228,173 @@ Branch 和 Return 指令: 这些是终结符指令,不产生一个可用于其 在提供的代码中,SSAPValue 的 constantVal 是 int 类型。这使得浮点数常量传播变得复杂。对于浮点数相关的指令(kFAdd, kFMul, kFCmp, kFNeg, kFNot, kItoF, kFtoI 等),如果不能将浮点值准确地存储在 int 中,或者不能可靠地执行浮点运算,那么通常会保守地将结果设置为 Bottom。一个更完善的 SCCP 实现会使用 std::variant 或独立的浮点常量存储来处理浮点数。 +## LoopSR循环归纳变量强度削弱 关于魔数计算的说明 + +魔数除法的核心思想是:将除法转换为乘法和移位 + +数学原理:x / d ≈ (x * m) >> (32 + s) + +m 是魔数 (magic number) +s 是额外的移位量 (shift) +>> 是算术右移 + +2^(32+s) / d ≤ m < 2^(32+s) / d + 2^s / d + +cd /home/downright/Compiler_Opt/mysysy && python3 -c " +# 真正的迭代原因:精度要求 +def explain_precision_requirement(): + d = 10 + + print('魔数算法需要找到精确的边界值:') + print('目标:2^p > d * (2^31 - r),其中r是余数') + print() + + # 模拟我们算法的迭代过程 + p = 31 + two_p = 2**p + r = two_p % d # 余数 + m = two_p // d # 商 + + print(f'初始状态 (p={p}):') + print(f' 2^{p} = {two_p:,}') + print(f' 2^{p} / {d} = {m:,} 余 {r}') + print(f' 需要检查: 2^{p} > {d} * (2^31 - {r}) = {d * (2**31 - r):,}') + print(f' 条件满足: {two_p > d * (2**31 - r)}') + print() + + # 继续迭代直到条件满足 + iteration = 0 + while r != 0 and m < 2**31: + iteration += 1 + p += 1 + two_p *= 2 + m *= 2 + r *= 2 + if r >= d: + m += 1 + r -= d + + print(f'迭代 {iteration} (p={p}):') + print(f' m = {m:,}, r = {r}') + print(f' m < 2^31? {m < 2**31} (2^31 = {2**31:,})') + + if m >= 2**31: + print(f' ✗ 魔数 {m:,} 超出32位范围!') + break + elif r == 0: + print(f' ✓ 余数为0,找到精确解') + break + + if iteration > 10: # 防止无限循环 + print(' ... (限制迭代次数)') + break + +explain_precision_requirement() +" + +问题在于精度要求。理论魔数需要非常精确,不能简单地用 2^32 / d。让我展示真正的迭代原因: + +cd /home/downright/Compiler_Opt/mysysy && python3 -c " +# 真正的迭代原因:精度要求 +def explain_precision_requirement(): + d = 10 + + print('魔数算法需要找到精确的边界值:') + print('目标:2^p > d * (2^31 - r),其中r是余数') + print() + + # 模拟我们算法的迭代过程 + p = 31 + two_p = 2**p + r = two_p % d # 余数 + m = two_p // d # 商 + + print(f'初始状态 (p={p}):') + print(f' 2^{p} = {two_p:,}') + print(f' 2^{p} / {d} = {m:,} 余 {r}') + print(f' 需要检查: 2^{p} > {d} * (2^31 - {r}) = {d * (2**31 - r):,}') + print(f' 条件满足: {two_p > d * (2**31 - r)}') + print() + + # 继续迭代直到条件满足 + iteration = 0 + while r != 0 and m < 2**31: + iteration += 1 + p += 1 + two_p *= 2 + m *= 2 + r *= 2 + if r >= d: + m += 1 + r -= d + + print(f'迭代 {iteration} (p={p}):') + print(f' m = {m:,}, r = {r}') + print(f' m < 2^31? {m < 2**31} (2^31 = {2**31:,})') + + if m >= 2**31: + print(f' ✗ 魔数 {m:,} 超出32位范围!') + break + elif r == 0: + print(f' ✓ 余数为0,找到精确解') + break + + if iteration > 10: # 防止无限循环 + print(' ... (限制迭代次数)') + break + +explain_precision_requirement() +" + + +cd /home/downright/Compiler_Opt/mysysy && python3 -c " +# 检查初始状态是否会溢出 +def check_initial_overflow(): + int32_max = 2**31 - 1 + + print('检查不同除数的初始魔数是否会溢出:') + print(f'32位有符号整数最大值: {int32_max:,}') + print() + + # 检查各种除数 + divisors = [2, 3, 5, 7, 9, 10, 11, 13, 17, 25, 100, 1000] + + for d in divisors: + # 初始魔数 = 2^31 / d + initial_magic = (2**31) // d + + overflow = initial_magic > int32_max + status = \"溢出\" if overflow else \"安全\" + + print(f'd={d:4d}: 初始魔数 = 2^31/{d} = {initial_magic:10,} [{status}]') + + print() + print('结论: 初始状态下魔数不会溢出,溢出发生在迭代过程中') + +check_initial_overflow() +" + +总结 +迭代的必要性: + +不是为了避免初始溢出(初始状态安全) +是为了找到最精确的魔数,减少舍入误差 +每次迭代提高一倍精度,但魔数也翻倍 +溢出发生时机: + +初始状态:2^31 / d 总是在32位范围内 +迭代过程:2^32 / d, 2^33 / d, ... 逐渐超出32位范围 +回退值的正确性: + +回退值是基于数学理论和实践验证的标准值 +来自LLVM、GCC等成熟编译器的实现 +通过测试验证,对各种输入都能产生正确结果 +算法设计哲学: + +先尝试最优解:通过迭代寻找最精确的魔数 +检测边界条件:当超出32位范围时及时发现 +智能回退:使用已验证的标准值保证正确性 +保持通用性:对于没有预设值的除数仍然可以工作 # 后续优化可能涉及的改动 diff --git a/src/include/midend/IR.h b/src/include/midend/IR.h index 469b050..92539dc 100644 --- a/src/include/midend/IR.h +++ b/src/include/midend/IR.h @@ -864,6 +864,8 @@ public: return "shl"; case kSra: return "ashr"; + case kMulh: + return "mulh"; default: return "Unknown"; } diff --git a/src/include/midend/Pass/Optimize/LoopStrengthReduction.h b/src/include/midend/Pass/Optimize/LoopStrengthReduction.h index f016a1c..ecf96dd 100644 --- a/src/include/midend/Pass/Optimize/LoopStrengthReduction.h +++ b/src/include/midend/Pass/Optimize/LoopStrengthReduction.h @@ -132,7 +132,7 @@ private: * @param divisor 除数 * @return {魔数, 移位量} */ - std::pair computeMulhMagicNumbers(int divisor) const; + std::pair computeMulhMagicNumbers(int divisor) const; /** * 生成除法替换代码 diff --git a/src/midend/IR.cpp b/src/midend/IR.cpp index 5b2c258..39293f2 100644 --- a/src/midend/IR.cpp +++ b/src/midend/IR.cpp @@ -779,7 +779,29 @@ void BinaryInst::print(std::ostream &os) const { printOperand(os, getRhs()); os << "\n "; printVarName(os, this) << " = zext i1 %" << tmpName << " to i32"; - } else { + } else if(kind == kMulh){ + // 模拟高位乘法:先扩展为i64,乘法,右移32位,截断为i32 + static int mulhCount = 0; + mulhCount++; + std::string lhsName = getLhs()->getName(); + std::string rhsName = getRhs()->getName(); + std::string tmpLhs = "tmp_mulh_lhs_" + std::to_string(mulhCount) + "_" + lhsName; + std::string tmpRhs = "tmp_mulh_rhs_" + std::to_string(mulhCount) + rhsName; + std::string tmpMul = "tmp_mulh_mul_" + std::to_string(mulhCount) + getName(); + std::string tmpHigh = "tmp_mulh_high_" + std::to_string(mulhCount) + getName(); + // printVarName(os, this) << " = "; // 输出最终变量名 + + // os << "; mulh emulation\n "; + os << "%" << tmpLhs << " = sext i32 "; + printOperand(os, getLhs()); + os << " to i64\n "; + os << "%" << tmpRhs << " = sext i32 "; + printOperand(os, getRhs()); + os << " to i64\n "; + os << "%" << tmpMul << " = mul i64 %" << tmpLhs << ", %" << tmpRhs << "\n "; + os << "%" << tmpHigh << " = ashr i64 %" << tmpMul << ", 32\n "; + printVarName(os, this) << " = trunc i64 %" << tmpHigh << " to i32"; + }else { // 算术和逻辑指令 printVarName(os, this) << " = "; os << getKindString() << " " << *getType() << " "; diff --git a/src/midend/Pass/Optimize/LoopStrengthReduction.cpp b/src/midend/Pass/Optimize/LoopStrengthReduction.cpp index d7e3448..973a053 100644 --- a/src/midend/Pass/Optimize/LoopStrengthReduction.cpp +++ b/src/midend/Pass/Optimize/LoopStrengthReduction.cpp @@ -7,6 +7,8 @@ #include #include #include +#include +#include // 使用全局调试开关 extern int DEBUG; @@ -104,65 +106,188 @@ bool StrengthReductionContext::analyzeInductionVariableRange( return hasNegativePotential; } -std::pair StrengthReductionContext::computeMulhMagicNumbers(int divisor) const { - // 计算用于除法的魔数 (magic number) 和移位量 - // 基于 "Division by Invariant Integers using Multiplication" 算法 +//该实现参考了libdivide的算法 +std::pair StrengthReductionContext::computeMulhMagicNumbers(int divisor) const { - int64_t magic = 0; - int shift = 0; - bool isPowerOfTwo = (divisor & (divisor - 1)) == 0; - - if (isPowerOfTwo) { - // 对于2的幂,不需要魔数,直接使用移位 - magic = 1; - shift = __builtin_ctz(divisor); // 计算尾随零的个数 - return {magic, shift}; + if (DEBUG) { + std::cout << "\n[SR] ===== Computing magic numbers for divisor " << divisor << " (libdivide algorithm) =====" << std::endl; } - // 对于非2的幂的正数除数,计算魔数 - // 使用32位有符号整数范围 - const int bitWidth = 32; - const int64_t maxMagic = (1LL << (bitWidth - 1)) - 1; + if (divisor == 0) { + if (DEBUG) std::cout << "[SR] Error: divisor must be != 0" << std::endl; + return {-1, -1}; + } + + // libdivide 常数 + const uint8_t LIBDIVIDE_ADD_MARKER = 0x40; + const uint8_t LIBDIVIDE_NEGATIVE_DIVISOR = 0x80; - int64_t d = divisor; - int64_t nc = (1LL << (bitWidth - 1)) - (1LL << (bitWidth - 1)) % d; - int64_t delta = d - (1LL << (bitWidth - 1)) % d; + // 辅助函数:计算前导零个数 + auto count_leading_zeros32 = [](uint32_t val) -> uint32_t { + if (val == 0) return 32; + return __builtin_clz(val); + }; - shift = bitWidth - 1; + // 辅助函数:64位除法返回32位商和余数 + auto div_64_32 = [](uint32_t high, uint32_t low, uint32_t divisor, uint32_t* rem) -> uint32_t { + uint64_t dividend = ((uint64_t)high << 32) | low; + uint32_t quotient = dividend / divisor; + *rem = dividend % divisor; + return quotient; + }; + + if (DEBUG) { + std::cout << "[SR] Input divisor: " << divisor << std::endl; + } + + // libdivide_internal_s32_gen 算法实现 + int32_t d = divisor; + uint32_t ud = (uint32_t)d; + uint32_t absD = (d < 0) ? -ud : ud; - // 找到合适的魔数和移位量 - while (shift < bitWidth + 30) { // 避免无限循环 - int64_t q1 = (1LL << shift) / nc; - int64_t r1 = (1LL << shift) - q1 * nc; - int64_t q2 = (1LL << shift) / delta; - int64_t r2 = (1LL << shift) - q2 * delta; - - if (q1 < q2 || (q1 == q2 && r1 < r2)) { - magic = q2 + 1; - if (magic <= maxMagic) { - break; - } + if (DEBUG) { + std::cout << "[SR] absD = " << absD << std::endl; + } + + uint32_t floor_log_2_d = 31 - count_leading_zeros32(absD); + + if (DEBUG) { + std::cout << "[SR] floor_log_2_d = " << floor_log_2_d << std::endl; + } + + // 检查 absD 是否为2的幂 + if ((absD & (absD - 1)) == 0) { + if (DEBUG) { + std::cout << "[SR] " << absD << " 是2的幂,使用移位方法" << std::endl; } - shift++; - nc = 2 * nc; - delta = 2 * delta; + // 对于2的幂,我们只使用移位,不需要魔数 + int shift = floor_log_2_d; + if (d < 0) shift |= 0x80; // 标记负数 + + if (DEBUG) { + std::cout << "[SR] Power of 2 result: magic=0, shift=" << shift << std::endl; + std::cout << "[SR] ===== End magic computation =====" << std::endl; + } + + // 对于我们的目的,我们将在IR生成中以不同方式处理2的幂 + // 返回特殊标记 + return {0, shift}; } - if (magic > maxMagic) { - // 回退到简单的魔数 - magic = (1LL << bitWidth) / d + 1; - shift = bitWidth; + if (DEBUG) { + std::cout << "[SR] " << absD << " is not a power of 2, computing magic number" << std::endl; } - // 调整移位量以移除多余的2的幂因子 - shift = shift - bitWidth; - if (shift < 0) shift = 0; + // 非2的幂除数的魔数计算 + uint8_t more; + uint32_t rem, proposed_m; - return {magic, shift}; + // 计算 proposed_m = floor(2^(floor_log_2_d + 31) / absD) + proposed_m = div_64_32((uint32_t)1 << (floor_log_2_d - 1), 0, absD, &rem); + const uint32_t e = absD - rem; + + if (DEBUG) { + std::cout << "[SR] proposed_m = " << proposed_m << ", rem = " << rem << ", e = " << e << std::endl; + } + + // 确定是否需要"加法"版本 + const bool branchfree = false; // 使用分支版本 + + if (!branchfree && e < ((uint32_t)1 << floor_log_2_d)) { + // 这个幂次有效 + more = (uint8_t)(floor_log_2_d - 1); + if (DEBUG) { + std::cout << "[SR] Using basic algorithm, shift = " << (int)more << std::endl; + } + } else { + // 我们需要上升一个等级 + proposed_m += proposed_m; + const uint32_t twice_rem = rem + rem; + if (twice_rem >= absD || twice_rem < rem) { + proposed_m += 1; + } + more = (uint8_t)(floor_log_2_d | LIBDIVIDE_ADD_MARKER); + if (DEBUG) { + std::cout << "[SR] Using add algorithm, proposed_m = " << proposed_m << ", more = " << (int)more << std::endl; + } + } + + proposed_m += 1; + int32_t magic = (int32_t)proposed_m; + + // 处理负除数 + if (d < 0) { + more |= LIBDIVIDE_NEGATIVE_DIVISOR; + if (!branchfree) { + magic = -magic; + } + if (DEBUG) { + std::cout << "[SR] Negative divisor, magic = " << magic << ", more = " << (int)more << std::endl; + } + } + + // 为我们的IR生成提取移位量和标志 + int shift = more & 0x3F; // 移除标志,保留移位量(位0-5) + bool need_add = (more & LIBDIVIDE_ADD_MARKER) != 0; + bool is_negative = (more & LIBDIVIDE_NEGATIVE_DIVISOR) != 0; + + if (DEBUG) { + std::cout << "[SR] Final result: magic = " << magic << ", more = " << (int)more + << " (0x" << std::hex << (int)more << std::dec << ")" << std::endl; + std::cout << "[SR] Shift = " << shift << ", need_add = " << need_add + << ", is_negative = " << is_negative << std::endl; + + // Test the magic number using the correct libdivide algorithm + std::cout << "[SR] Testing magic number (libdivide algorithm):" << std::endl; + int test_values[] = {1, 7, 37, 100, 999, -1, -7, -37, -100}; + + for (int test_val : test_values) { + int64_t quotient; + + // 实现正确的libdivide算法 + int64_t product = (int64_t)test_val * magic; + int64_t high_bits = product >> 32; + + if (need_add) { + // ADD_MARKER情况:移位前加上被除数 + // 这是libdivide的关键洞察! + high_bits += test_val; + quotient = high_bits >> shift; + } else { + // 正常情况:只是移位 + quotient = high_bits >> shift; + } + + // 符号修正:这是libdivide有符号除法的关键部分! + // 如果被除数为负,商需要加1来匹配C语言的截断除法语义 + if (test_val < 0) { + quotient += 1; + } + + int expected = test_val / divisor; + + bool correct = (quotient == expected); + std::cout << "[SR] " << test_val << " / " << divisor << " = " << quotient + << " (expected " << expected << ") " << (correct ? "✓" : "✗") << std::endl; + } + + std::cout << "[SR] ===== End magic computation =====" << std::endl; + } + + // 返回魔数、移位量,并在移位中编码ADD_MARKER标志 + // 我们将使用移位的第6位表示ADD_MARKER,第7位表示负数(如果需要) + int encoded_shift = shift; + if (need_add) { + encoded_shift |= 0x40; // 设置第6位表示ADD_MARKER + if (DEBUG) { + std::cout << "[SR] Encoding ADD_MARKER in shift: " << encoded_shift << std::endl; + } + } + + return {magic, encoded_shift}; } - bool LoopStrengthReduction::runOnFunction(Function* F, AnalysisManager& AM) { if (F->getBasicBlocks().empty()) { return false; // 空函数 @@ -651,7 +776,7 @@ bool StrengthReductionContext::createNewInductionVariable(StrengthReductionCandi // 2. 在循环头创建新的 phi 指令 builder->setPosition(header, header->begin()); candidate->newPhi = builder->createPhiInst(originalPhi->getType()); - candidate->newPhi->setName(originalPhi->getName() + "_sr"); + candidate->newPhi->setName("sr_" + originalPhi->getName()); // 3. 计算新归纳变量的初始值和步长 // 新IV的初始值 = 原IV初始值 * multiplier @@ -895,14 +1020,35 @@ Value* StrengthReductionContext::generateConstantDivisionReplacement( // 使用mulh指令优化任意常数除法 auto [magic, shift] = computeMulhMagicNumbers(candidate->multiplier); - if (magic == 1 && shift > 0) { - // 特殊情况:可以直接使用移位 - Value* shiftConstant = ConstantInteger::get(shift); + // 检查是否无法优化(magic == -1, shift == -1 表示失败) + if (magic == -1 && shift == -1) { + if (DEBUG) { + std::cout << "[SR] Cannot optimize division by " << candidate->multiplier + << ", keeping original division" << std::endl; + } + // 返回 nullptr 表示无法优化,调用方应该保持原始除法 + return nullptr; + } + + // 2的幂次方除法可以用移位优化(但这不是魔数法的情况)这种情况应该不会被分类到这里但是还是做一个保护措施 + if ((candidate->multiplier & (candidate->multiplier - 1)) == 0 && candidate->multiplier > 0) { + // 是2的幂次方,可以用移位 + int shift_amount = 0; + int temp = candidate->multiplier; + while (temp > 1) { + temp >>= 1; + shift_amount++; + } + + Value* shiftConstant = ConstantInteger::get(shift_amount); if (candidate->hasNegativeValues) { + // 对于有符号除法,需要先加上除数-1然后再移位(为了正确处理负数舍入) + Value* divisor_minus_1 = ConstantInteger::get(candidate->multiplier - 1); + Value* adjusted = builder->createAddInst(candidate->inductionVar, divisor_minus_1); return builder->createBinaryInst( Instruction::Kind::kSra, // 算术右移 candidate->inductionVar->getType(), - candidate->inductionVar, + adjusted, shiftConstant ); } else { @@ -916,8 +1062,25 @@ Value* StrengthReductionContext::generateConstantDivisionReplacement( } // 创建魔数常量 + // 检查魔数是否能放入32位,如果不能,则不进行优化 + if (magic > INT32_MAX || magic < INT32_MIN) { + if (DEBUG) { + std::cout << "[SR] Magic number " << magic << " exceeds 32-bit range, skipping optimization" << std::endl; + } + return nullptr; // 无法优化,保持原始除法 + } + Value* magicConstant = ConstantInteger::get((int32_t)magic); + // 检查是否需要ADD_MARKER处理(加法调整) + bool needAdd = (shift & 0x40) != 0; + int actualShift = shift & 0x3F; // 提取真实的移位量 + + if (DEBUG) { + std::cout << "[SR] IR Generation: magic=" << magic << ", needAdd=" << needAdd + << ", actualShift=" << actualShift << std::endl; + } + // 执行高位乘法:mulh(x, magic) Value* mulhResult = builder->createBinaryInst( Instruction::Kind::kMulh, // 高位乘法 @@ -926,9 +1089,18 @@ Value* StrengthReductionContext::generateConstantDivisionReplacement( magicConstant ); - if (shift > 0) { + if (needAdd) { + // ADD_MARKER 情况:需要在移位前加上被除数 + // 这对应于 libdivide 的加法调整算法 + if (DEBUG) { + std::cout << "[SR] Applying ADD_MARKER: adding dividend before shift" << std::endl; + } + mulhResult = builder->createAddInst(mulhResult, candidate->inductionVar); + } + + if (actualShift > 0) { // 如果需要额外移位 - Value* shiftConstant = ConstantInteger::get(shift); + Value* shiftConstant = ConstantInteger::get(actualShift); mulhResult = builder->createBinaryInst( Instruction::Kind::kSra, // 算术右移 candidate->inductionVar->getType(), @@ -937,14 +1109,11 @@ Value* StrengthReductionContext::generateConstantDivisionReplacement( ); } - // 处理负数校正 - 简化版本 - if (candidate->hasNegativeValues) { - // 简化处理:添加一个常数偏移来处理负数情况 - // 这是一个简化的实现,实际的负数校正会更复杂 - Value* zero = ConstantInteger::get(0); - Value* isNegative = builder->createICmpLTInst(candidate->inductionVar, zero); - // 这里应该有条件逻辑,但为了简化实现,暂时直接返回mulhResult - } + // 标准的有符号除法符号修正:如果被除数为负,商需要加1 + // 这对所有有符号除法都需要,不管是否可能有负数 + Value* isNegative = builder->createICmpLTInst(candidate->inductionVar, ConstantInteger::get(0)); + // 将i1转换为i32:负数时为1,非负数时为0 ICmpLTInst的结果会默认转化为32位 + mulhResult = builder->createAddInst(mulhResult, isNegative); return mulhResult; }