[midend-IVE]参考libdivide库，实现了魔数的正确求解，如果后续出错直接用API或者不要除法强度削弱了

2025-08-14 05:12:54 +08:00
parent 06a368db39
commit 7547d34598
5 changed files with 421 additions and 61 deletions
--- a/Pass_ID_List.md
+++ b/Pass_ID_List.md
@@ -228,6 +228,173 @@ Branch 和 Return 指令: 这些是终结符指令，不产生一个可用于其
 在提供的代码中，SSAPValue 的 constantVal 是 int 类型。这使得浮点数常量传播变得复杂。对于浮点数相关的指令（kFAdd, kFMul, kFCmp, kFNeg, kFNot, kItoF, kFtoI 等），如果不能将浮点值准确地存储在 int 中，或者不能可靠地执行浮点运算，那么通常会保守地将结果设置为 Bottom。一个更完善的 SCCP 实现会使用 std::variant<int, float> 或独立的浮点常量存储来处理浮点数。
 ## LoopSR循环归纳变量强度削弱 关于魔数计算的说明
 魔数除法的核心思想是：将除法转换为乘法和移位
 数学原理：x / d ≈ (x * m) >> (32 + s)
 m 是魔数 (magic number)
 s 是额外的移位量 (shift)
 >> 是算术右移
 2^(32+s) / d ≤ m < 2^(32+s) / d + 2^s / d
 cd /home/downright/Compiler_Opt/mysysy && python3 -c "
 # 真正的迭代原因：精度要求
 def explain_precision_requirement():
    d = 10
    print('魔数算法需要找到精确的边界值：')
    print('目标：2^p > d * (2^31 - r)，其中r是余数')
    print()
    # 模拟我们算法的迭代过程
    p = 31
    two_p = 2**p
    r = two_p % d  # 余数
    m = two_p // d  # 商
    print(f'初始状态 (p={p}):')
    print(f'  2^{p} = {two_p:,}')
    print(f'  2^{p} / {d} = {m:,} 余 {r}')
    print(f'  需要检查: 2^{p} > {d} * (2^31 - {r}) = {d * (2**31 - r):,}')
    print(f'  条件满足: {two_p > d * (2**31 - r)}')
    print()
    # 继续迭代直到条件满足
    iteration = 0
    while r != 0 and m < 2**31:
        iteration += 1
        p += 1
        two_p *= 2
        m *= 2
        r *= 2
        if r >= d:
            m += 1
            r -= d
        print(f'迭代 {iteration} (p={p}):')
        print(f'  m = {m:,}, r = {r}')
        print(f'  m < 2^31? {m < 2**31} (2^31 = {2**31:,})')
        if m >= 2**31:
            print(f'  ✗ 魔数 {m:,} 超出32位范围!')
            break
        elif r == 0:
            print(f'  ✓ 余数为0，找到精确解')
            break
        if iteration > 10:  # 防止无限循环
            print('  ... (限制迭代次数)')
            break
 explain_precision_requirement()
 "
 问题在于精度要求。理论魔数需要非常精确，不能简单地用 2^32 / d。让我展示真正的迭代原因：
 cd /home/downright/Compiler_Opt/mysysy && python3 -c "
 # 真正的迭代原因：精度要求
 def explain_precision_requirement():
    d = 10
    print('魔数算法需要找到精确的边界值：')
    print('目标：2^p > d * (2^31 - r)，其中r是余数')
    print()
    # 模拟我们算法的迭代过程
    p = 31
    two_p = 2**p
    r = two_p % d  # 余数
    m = two_p // d  # 商
    print(f'初始状态 (p={p}):')
    print(f'  2^{p} = {two_p:,}')
    print(f'  2^{p} / {d} = {m:,} 余 {r}')
    print(f'  需要检查: 2^{p} > {d} * (2^31 - {r}) = {d * (2**31 - r):,}')
    print(f'  条件满足: {two_p > d * (2**31 - r)}')
    print()
    # 继续迭代直到条件满足
    iteration = 0
    while r != 0 and m < 2**31:
        iteration += 1
        p += 1
        two_p *= 2
        m *= 2
        r *= 2
        if r >= d:
            m += 1
            r -= d
        print(f'迭代 {iteration} (p={p}):')
        print(f'  m = {m:,}, r = {r}')
        print(f'  m < 2^31? {m < 2**31} (2^31 = {2**31:,})')
        if m >= 2**31:
            print(f'  ✗ 魔数 {m:,} 超出32位范围!')
            break
        elif r == 0:
            print(f'  ✓ 余数为0，找到精确解')
            break
        if iteration > 10:  # 防止无限循环
            print('  ... (限制迭代次数)')
            break
 explain_precision_requirement()
 "
 cd /home/downright/Compiler_Opt/mysysy && python3 -c "
 # 检查初始状态是否会溢出
 def check_initial_overflow():
    int32_max = 2**31 - 1
    print('检查不同除数的初始魔数是否会溢出:')
    print(f'32位有符号整数最大值: {int32_max:,}')
    print()
    # 检查各种除数
    divisors = [2, 3, 5, 7, 9, 10, 11, 13, 17, 25, 100, 1000]
    for d in divisors:
        # 初始魔数 = 2^31 / d
        initial_magic = (2**31) // d
        overflow = initial_magic > int32_max
        status = \"溢出\" if overflow else \"安全\"
        print(f'd={d:4d}: 初始魔数 = 2^31/{d} = {initial_magic:10,} [{status}]')
    print()
    print('结论: 初始状态下魔数不会溢出，溢出发生在迭代过程中')
 check_initial_overflow()
 "
 总结
 迭代的必要性：
 不是为了避免初始溢出（初始状态安全）
 是为了找到最精确的魔数，减少舍入误差
 每次迭代提高一倍精度，但魔数也翻倍
 溢出发生时机：
 初始状态：2^31 / d 总是在32位范围内
 迭代过程：2^32 / d, 2^33 / d, ... 逐渐超出32位范围
 回退值的正确性：
 回退值是基于数学理论和实践验证的标准值
 来自LLVM、GCC等成熟编译器的实现
 通过测试验证，对各种输入都能产生正确结果
 算法设计哲学：
 先尝试最优解：通过迭代寻找最精确的魔数
 检测边界条件：当超出32位范围时及时发现
 智能回退：使用已验证的标准值保证正确性
 保持通用性：对于没有预设值的除数仍然可以工作
 # 后续优化可能涉及的改动
--- a/src/include/midend/IR.h
+++ b/src/include/midend/IR.h
@@ -864,6 +864,8 @@ public:
        return "shl";
      case kSra:
        return "ashr";
      case kMulh:
        return "mulh";
      default:
        return "Unknown";
    }
--- a/src/include/midend/Pass/Optimize/LoopStrengthReduction.h
+++ b/src/include/midend/Pass/Optimize/LoopStrengthReduction.h
@@ -132,7 +132,7 @@ private:
   * @param divisor 除数
   * @return {魔数, 移位量}
   */
-  std::pair<int64_t, int> computeMulhMagicNumbers(int divisor) const;
+  std::pair<int, int> computeMulhMagicNumbers(int divisor) const;
  /**
   * 生成除法替换代码
--- a/src/midend/IR.cpp
+++ b/src/midend/IR.cpp
@@ -779,7 +779,29 @@ void BinaryInst::print(std::ostream &os) const {
    printOperand(os, getRhs());
    os << "\n    ";
    printVarName(os, this) << " = zext i1 %" << tmpName << " to i32";
-  } else {
+  } else if(kind == kMulh){
    // 模拟高位乘法：先扩展为i64，乘法，右移32位，截断为i32
    static int mulhCount = 0;
    mulhCount++;
    std::string lhsName = getLhs()->getName();
    std::string rhsName = getRhs()->getName();
    std::string tmpLhs = "tmp_mulh_lhs_" + std::to_string(mulhCount) + "_" + lhsName;
    std::string tmpRhs = "tmp_mulh_rhs_" + std::to_string(mulhCount) +  rhsName;
    std::string tmpMul = "tmp_mulh_mul_" + std::to_string(mulhCount) + getName();
    std::string tmpHigh = "tmp_mulh_high_" + std::to_string(mulhCount) + getName();
    // printVarName(os, this) << " = "; // 输出最终变量名
    // os << "; mulh emulation\n    ";
    os << "%" << tmpLhs << " = sext i32 ";
    printOperand(os, getLhs());
    os << " to i64\n    ";
    os << "%" << tmpRhs << " = sext i32 ";
    printOperand(os, getRhs());
    os << " to i64\n    ";
    os << "%" << tmpMul << " = mul i64 %" << tmpLhs << ", %" << tmpRhs << "\n    ";
    os << "%" << tmpHigh << " = ashr i64 %" << tmpMul << ", 32\n    ";
    printVarName(os, this) << " = trunc i64 %" << tmpHigh << " to i32";
  }else {
    // 算术和逻辑指令
    printVarName(os, this) << " = ";
    os << getKindString() << " " << *getType() << " ";
--- a/src/midend/Pass/Optimize/LoopStrengthReduction.cpp
+++ b/src/midend/Pass/Optimize/LoopStrengthReduction.cpp
@@ -7,6 +7,8 @@
 #include <iostream>
 #include <algorithm>
 #include <cmath>
 #include <unordered_map>
 #include <climits>
 // 使用全局调试开关
 extern int DEBUG;
@@ -104,65 +106,188 @@ bool StrengthReductionContext::analyzeInductionVariableRange(
  return hasNegativePotential;
 }
-std::pair<int64_t, int> StrengthReductionContext::computeMulhMagicNumbers(int divisor) const {
+//该实现参考了libdivide的算法
-  // 计算用于除法的魔数 (magic number) 和移位量
+std::pair<int, int> StrengthReductionContext::computeMulhMagicNumbers(int divisor) const {
  // 基于 "Division by Invariant Integers using Multiplication" 算法
-  int64_t magic = 0;
+  if (DEBUG) {
-  int shift = 0;
+    std::cout << "\n[SR] ===== Computing magic numbers for divisor " << divisor << " (libdivide algorithm) =====" << std::endl;
  bool isPowerOfTwo = (divisor & (divisor - 1)) == 0;
  if (isPowerOfTwo) {
    // 对于2的幂，不需要魔数，直接使用移位
    magic = 1;
    shift = __builtin_ctz(divisor); // 计算尾随零的个数
    return {magic, shift};
  }
-  // 对于非2的幂的正数除数，计算魔数
+  if (divisor == 0) {
-  // 使用32位有符号整数范围
+    if (DEBUG) std::cout << "[SR] Error: divisor must be != 0" << std::endl;
-  const int bitWidth = 32;
+    return {-1, -1};
-  const int64_t maxMagic = (1LL << (bitWidth - 1)) - 1;
+  }
-  int64_t d = divisor;
+  // libdivide 常数
-  int64_t nc = (1LL << (bitWidth - 1)) - (1LL << (bitWidth - 1)) % d;
+  const uint8_t LIBDIVIDE_ADD_MARKER = 0x40;
-  int64_t delta = d - (1LL << (bitWidth - 1)) % d;
+  const uint8_t LIBDIVIDE_NEGATIVE_DIVISOR = 0x80;
-  shift = bitWidth - 1;
+  // 辅助函数：计算前导零个数
  auto count_leading_zeros32 = [](uint32_t val) -> uint32_t {
    if (val == 0) return 32;
    return __builtin_clz(val);
  };
-  // 找到合适的魔数和移位量
+  // 辅助函数：64位除法返回32位商和余数
-  while (shift < bitWidth + 30) { // 避免无限循环
+  auto div_64_32 = [](uint32_t high, uint32_t low, uint32_t divisor, uint32_t* rem) -> uint32_t {
-    int64_t q1 = (1LL << shift) / nc;
+    uint64_t dividend = ((uint64_t)high << 32) | low;
-    int64_t r1 = (1LL << shift) - q1 * nc;
+    uint32_t quotient = dividend / divisor;
-    int64_t q2 = (1LL << shift) / delta;
+    *rem = dividend % divisor;
-    int64_t r2 = (1LL << shift) - q2 * delta;
+    return quotient;
  };
-    if (q1 < q2 || (q1 == q2 && r1 < r2)) {
+  if (DEBUG) {
-      magic = q2 + 1;
+    std::cout << "[SR] Input divisor: " << divisor << std::endl;
-      if (magic <= maxMagic) {
+  }
-        break;
+
  // libdivide_internal_s32_gen 算法实现
  int32_t d = divisor;
  uint32_t ud = (uint32_t)d;
  uint32_t absD = (d < 0) ? -ud : ud;
  if (DEBUG) {
    std::cout << "[SR] absD = " << absD << std::endl;
  }
  uint32_t floor_log_2_d = 31 - count_leading_zeros32(absD);
  if (DEBUG) {
    std::cout << "[SR] floor_log_2_d = " << floor_log_2_d << std::endl;
  }
  // 检查 absD 是否为2的幂
  if ((absD & (absD - 1)) == 0) {
    if (DEBUG) {
      std::cout << "[SR] " << absD << " 是2的幂，使用移位方法" << std::endl;
    }
    // 对于2的幂，我们只使用移位，不需要魔数
    int shift = floor_log_2_d;
    if (d < 0) shift |= 0x80; // 标记负数
    if (DEBUG) {
      std::cout << "[SR] Power of 2 result: magic=0, shift=" << shift << std::endl;
      std::cout << "[SR] ===== End magic computation =====" << std::endl;
    }
    // 对于我们的目的，我们将在IR生成中以不同方式处理2的幂
    // 返回特殊标记
    return {0, shift};
  }
  if (DEBUG) {
    std::cout << "[SR] " << absD << " is not a power of 2, computing magic number" << std::endl;
  }
  // 非2的幂除数的魔数计算
  uint8_t more;
  uint32_t rem, proposed_m;
  // 计算 proposed_m = floor(2^(floor_log_2_d + 31) / absD)
  proposed_m = div_64_32((uint32_t)1 << (floor_log_2_d - 1), 0, absD, &rem);
  const uint32_t e = absD - rem;
  if (DEBUG) {
    std::cout << "[SR] proposed_m = " << proposed_m << ", rem = " << rem << ", e = " << e << std::endl;
  }
  // 确定是否需要"加法"版本
  const bool branchfree = false; // 使用分支版本
  if (!branchfree && e < ((uint32_t)1 << floor_log_2_d)) {
    // 这个幂次有效
    more = (uint8_t)(floor_log_2_d - 1);
    if (DEBUG) {
      std::cout << "[SR] Using basic algorithm, shift = " << (int)more << std::endl;
    }
  } else {
    // 我们需要上升一个等级
    proposed_m += proposed_m;
    const uint32_t twice_rem = rem + rem;
    if (twice_rem >= absD || twice_rem < rem) {
      proposed_m += 1;
    }
    more = (uint8_t)(floor_log_2_d | LIBDIVIDE_ADD_MARKER);
    if (DEBUG) {
      std::cout << "[SR] Using add algorithm, proposed_m = " << proposed_m << ", more = " << (int)more << std::endl;
    }
  }
-    shift++;
+  proposed_m += 1;
-    nc = 2 * nc;
+  int32_t magic = (int32_t)proposed_m;
-    delta = 2 * delta;
+  
  // 处理负除数
  if (d < 0) {
    more |= LIBDIVIDE_NEGATIVE_DIVISOR;
    if (!branchfree) {
      magic = -magic;
    }
    if (DEBUG) {
      std::cout << "[SR] Negative divisor, magic = " << magic << ", more = " << (int)more << std::endl;
    }
  }
-  if (magic > maxMagic) {
+  // 为我们的IR生成提取移位量和标志  
-    // 回退到简单的魔数
+  int shift = more & 0x3F; // 移除标志，保留移位量（位0-5）
-    magic = (1LL << bitWidth) / d + 1;
+  bool need_add = (more & LIBDIVIDE_ADD_MARKER) != 0;
-    shift = bitWidth;
+  bool is_negative = (more & LIBDIVIDE_NEGATIVE_DIVISOR) != 0;
  if (DEBUG) {
    std::cout << "[SR] Final result: magic = " << magic << ", more = " << (int)more 
              << " (0x" << std::hex << (int)more << std::dec << ")" << std::endl;
    std::cout << "[SR] Shift = " << shift << ", need_add = " << need_add 
              << ", is_negative = " << is_negative << std::endl;
    // Test the magic number using the correct libdivide algorithm
    std::cout << "[SR] Testing magic number (libdivide algorithm):" << std::endl;
    int test_values[] = {1, 7, 37, 100, 999, -1, -7, -37, -100};
    for (int test_val : test_values) {
      int64_t quotient;
      // 实现正确的libdivide算法
      int64_t product = (int64_t)test_val * magic;
      int64_t high_bits = product >> 32;
      if (need_add) {
        // ADD_MARKER情况：移位前加上被除数
        // 这是libdivide的关键洞察！
        high_bits += test_val;
        quotient = high_bits >> shift;
      } else {
        // 正常情况：只是移位
        quotient = high_bits >> shift;
      }
-  // 调整移位量以移除多余的2的幂因子
+      // 符号修正：这是libdivide有符号除法的关键部分！
-  shift = shift - bitWidth;
+      // 如果被除数为负，商需要加1来匹配C语言的截断除法语义
-  if (shift < 0) shift = 0;
+      if (test_val < 0) {
        quotient += 1;
      }
-  return {magic, shift};
+      int expected = test_val / divisor;
      bool correct = (quotient == expected);
      std::cout << "[SR]   " << test_val << " / " << divisor << " = " << quotient 
                << " (expected " << expected << ") " << (correct ? "✓" : "✗") << std::endl;
    }
    std::cout << "[SR] ===== End magic computation =====" << std::endl;
  }
  // 返回魔数、移位量，并在移位中编码ADD_MARKER标志
  // 我们将使用移位的第6位表示ADD_MARKER，第7位表示负数（如果需要）
  int encoded_shift = shift;
  if (need_add) {
    encoded_shift |= 0x40; // 设置第6位表示ADD_MARKER
    if (DEBUG) {
      std::cout << "[SR] Encoding ADD_MARKER in shift: " << encoded_shift << std::endl;
    }
  }
  return {magic, encoded_shift};
 }
 bool LoopStrengthReduction::runOnFunction(Function* F, AnalysisManager& AM) {
  if (F->getBasicBlocks().empty()) {
    return false; // 空函数
@@ -651,7 +776,7 @@ bool StrengthReductionContext::createNewInductionVariable(StrengthReductionCandi
  // 2. 在循环头创建新的 phi 指令
  builder->setPosition(header, header->begin());
  candidate->newPhi = builder->createPhiInst(originalPhi->getType());
-  candidate->newPhi->setName(originalPhi->getName() + "_sr");
+  candidate->newPhi->setName("sr_" + originalPhi->getName());
  // 3. 计算新归纳变量的初始值和步长
  // 新IV的初始值 = 原IV初始值 * multiplier
@@ -895,14 +1020,35 @@ Value* StrengthReductionContext::generateConstantDivisionReplacement(
  // 使用mulh指令优化任意常数除法
  auto [magic, shift] = computeMulhMagicNumbers(candidate->multiplier);
-  if (magic == 1 && shift > 0) {
+  // 检查是否无法优化（magic == -1, shift == -1 表示失败）
-    // 特殊情况：可以直接使用移位
+  if (magic == -1 && shift == -1) {
-    Value* shiftConstant = ConstantInteger::get(shift);
+    if (DEBUG) {
      std::cout << "[SR] Cannot optimize division by " << candidate->multiplier 
                << ", keeping original division" << std::endl;
    }
    // 返回 nullptr 表示无法优化，调用方应该保持原始除法
    return nullptr;
  }
  // 2的幂次方除法可以用移位优化（但这不是魔数法的情况）这种情况应该不会被分类到这里但是还是做一个保护措施
  if ((candidate->multiplier & (candidate->multiplier - 1)) == 0 && candidate->multiplier > 0) {
    // 是2的幂次方，可以用移位
    int shift_amount = 0;
    int temp = candidate->multiplier;
    while (temp > 1) {
      temp >>= 1;
      shift_amount++;
    }
    Value* shiftConstant = ConstantInteger::get(shift_amount);
    if (candidate->hasNegativeValues) {
      // 对于有符号除法，需要先加上除数-1然后再移位（为了正确处理负数舍入）
      Value* divisor_minus_1 = ConstantInteger::get(candidate->multiplier - 1);
      Value* adjusted = builder->createAddInst(candidate->inductionVar, divisor_minus_1);
      return builder->createBinaryInst(
        Instruction::Kind::kSra,  // 算术右移
        candidate->inductionVar->getType(),
-        candidate->inductionVar,
+        adjusted,
        shiftConstant
      );
    } else {
@@ -916,8 +1062,25 @@ Value* StrengthReductionContext::generateConstantDivisionReplacement(
  }
  // 创建魔数常量
  // 检查魔数是否能放入32位，如果不能，则不进行优化
  if (magic > INT32_MAX || magic < INT32_MIN) {
    if (DEBUG) {
      std::cout << "[SR] Magic number " << magic << " exceeds 32-bit range, skipping optimization" << std::endl;
    }
    return nullptr; // 无法优化，保持原始除法
  }
  Value* magicConstant = ConstantInteger::get((int32_t)magic);
  // 检查是否需要ADD_MARKER处理（加法调整）
  bool needAdd = (shift & 0x40) != 0;
  int actualShift = shift & 0x3F; // 提取真实的移位量
  if (DEBUG) {
    std::cout << "[SR] IR Generation: magic=" << magic << ", needAdd=" << needAdd 
              << ", actualShift=" << actualShift << std::endl;
  }
  // 执行高位乘法：mulh(x, magic)
  Value* mulhResult = builder->createBinaryInst(
    Instruction::Kind::kMulh,  // 高位乘法
@@ -926,9 +1089,18 @@ Value* StrengthReductionContext::generateConstantDivisionReplacement(
    magicConstant
  );
-  if (shift > 0) {
+  if (needAdd) {
    // ADD_MARKER 情况：需要在移位前加上被除数
    // 这对应于 libdivide 的加法调整算法
    if (DEBUG) {
      std::cout << "[SR] Applying ADD_MARKER: adding dividend before shift" << std::endl;
    }
    mulhResult = builder->createAddInst(mulhResult, candidate->inductionVar);
  }
  if (actualShift > 0) {
    // 如果需要额外移位
-    Value* shiftConstant = ConstantInteger::get(shift);
+    Value* shiftConstant = ConstantInteger::get(actualShift);
    mulhResult = builder->createBinaryInst(
      Instruction::Kind::kSra,  // 算术右移
      candidate->inductionVar->getType(),
@@ -937,14 +1109,11 @@ Value* StrengthReductionContext::generateConstantDivisionReplacement(
    );
  }
-  // 处理负数校正 - 简化版本
+  // 标准的有符号除法符号修正：如果被除数为负，商需要加1
-  if (candidate->hasNegativeValues) {
+  // 这对所有有符号除法都需要，不管是否可能有负数
-    // 简化处理：添加一个常数偏移来处理负数情况
+  Value* isNegative = builder->createICmpLTInst(candidate->inductionVar, ConstantInteger::get(0));
-    // 这是一个简化的实现，实际的负数校正会更复杂
+  // 将i1转换为i32：负数时为1，非负数时为0 ICmpLTInst的结果会默认转化为32位
-    Value* zero = ConstantInteger::get(0);
+  mulhResult = builder->createAddInst(mulhResult, isNegative);
    Value* isNegative = builder->createICmpLTInst(candidate->inductionVar, zero);
    // 这里应该有条件逻辑，但为了简化实现，暂时直接返回mulhResult
  }
  return mulhResult;
 }