[midend]修复函数参数为数组指针的退化问题，能够正确区分局部变量和函数参数并生成正确的GEP指令

2025-07-25 01:53:49 +08:00
parent 18dc8dbfee
commit 96c6b0ab6e
3 changed files with 200 additions and 60 deletions
--- a/src/include/IRBuilder.h
+++ b/src/include/IRBuilder.h
@@ -347,22 +347,69 @@ class IRBuilder {

  static Type *getIndexedType(Type *pointerType, const std::vector<Value *> &indices) {
    assert(pointerType->isPointer() && "base must be a pointer type!");
-    Type *CurrentType = pointerType;
-    // 遍历所有索引来深入类型层次结构Sysy只支持数组
+    // Type *CurrentType = pointerType->as<PointerType>()->getBaseType();
+    // GEP 的类型推断从基指针所指向的类型开始。
+    // 例如：
+    // - 如果 pointerType 是 `[20 x [10 x i32]]*` (指向一个二维数组的指针)，
+    //   那么 `currentWalkType` 将从 `[20 x [10 x i32]]` (二维数组类型) 开始。
+    // - 如果 pointerType 是 `i32*` (指向一个整数的指针)，
+    //   那么 `currentWalkType` 将从 `i32` (整数类型) 开始。
+    Type *currentWalkType = pointerType->as<PointerType>()->getBaseType();
+
+    // 遍历所有索引来深入类型层次结构。
+    // 注意：这里的 `indices` 向量通常已经包含了 `getGEPAddressInst` 添加的第一个“步过”索引（通常是0）。
+    // 因此，`indices[0]` 对应的是 GEP 操作的第一个逻辑步骤。
    for (int i = 0; i < indices.size(); ++i) {
-      if(i == 0) {
-        // 第一个索引是指针类型的元素类型
-        CurrentType = pointerType->as<PointerType>()->getBaseType();
-      } else
-      if (CurrentType->isArray()) {
-        CurrentType = CurrentType->as<ArrayType>()->getElementType();
-      }
-      else {
-        // 如果类型不是聚合类型但仍有索引，说明索引过多，这是错误的
-        CurrentType = nullptr;
-      }
+        if (currentWalkType->isArray()) {
+            // 情况1：当前类型是数组类型 (例如 `[20 x [10 x i32]]` 或 `[10 x i32]`)。
+            // 此时，当前的索引用于选择数组中的一个元素（或子数组）。
+            // 新的 `currentWalkType` 变为该数组的元素类型。
+            // 例如：`[20 x [10 x i32]]` 经过一个索引后，变为 `[10 x i32]`。
+            currentWalkType = currentWalkType->as<ArrayType>()->getElementType();
+        // } else if (currentWalkType->isStruct()) {
+        //     // 情况2：当前类型是结构体类型。
+        //     // 此时，索引必须是一个常量整数，用于选择结构体中的特定成员。
+        //     // SysY 语言通常只支持数组，但如果你的 IR 支持结构体，这里需要实现。
+        //     ConstantInteger* structIdx = dynamic_cast<ConstantInteger*>(indices[i]);
+        //     assert(structIdx && "Struct index must be a constant integer!");
+        //     assert(structIdx->getInt() >= 0 && "Struct index cannot be negative!");
+        //     // 确保 `StructType` 类有 `getNumMembers()` 和 `getMemberType()` 方法。
+        //     // 如果你的 Type 系统没有这些方法，需要根据实际情况调整。
+        //     assert(structIdx->getInt() < currentWalkType->as<StructType>()->getNumMembers() && "Struct index out of bounds!");
+        //     currentWalkType = currentWalkType->as<StructType>()->getMemberType(structIdx->getInt());
+        } else {
+            // 情况3：当前类型既不是数组也不是结构体（即它是一个标量类型，如 `i32` 或 `float`）。
+            //
+            // 如果 `currentWalkType` 是一个标量类型，并且**后面还有未处理的索引** (`i < indices.size() - 1`)，
+            // 这意味着我们试图对一个标量类型进行进一步的结构性索引，这是**无效的**。
+            // 例如：`int* ptr; ptr[0][0];`
+            // - `ptr` 的类型是 `int*`。
+            // - `currentWalkType` 初始化为 `int`。
+            // - `i = 0` 时，`currentWalkType` 是 `int`。它不是数组/结构体，类型不变。
+            // - `i = 1` 时，`currentWalkType` 仍然是 `int`。此时 `i < indices.size() - 1` (即 `1 < 2 - 1 = 1`) 为假。
+            //   因为 `indices.size()` 是 2 (`[0, 0]`)，`i` 是 1。`i < indices.size() - 1` 是 `1 < 1`，为假。
+            //   所以不会触发断言，`currentWalkType` 保持 `int`。这是正确的。
+            //
+            // 让我重新检查一下 `if (i > 0)` 和 `if (i < indices.size() - 1)` 的区别。
+            // 原始的 `if (i > 0)` 导致 `arr[1]` 失败是因为 `currentWalkType` 变成 `int` 后，
+            // `i=1` 触发了断言。
+            //
+            // LLVM GEP 的行为是：如果当前类型是标量，并且这是 GEP 的**最后一个索引**，那么 GEP 是合法的，
+            // 最终的类型就是这个标量类型。如果不是最后一个索引，则报错。
+            
+            // 修正后的判断：
+            // 如果当前类型是标量，并且当前索引 `i` 不是 `indices` 向量中的**最后一个索引**，
+            // 那么就意味着尝试对标量进行额外的结构性索引，这是错误的。
+            if (i < indices.size() - 1) { 
+                assert(false && "Invalid GEP indexing: attempting to index into a non-aggregate type with further indices.");
+                return nullptr; // 返回空指针表示类型推断失败
+            }
+            // 如果 `currentWalkType` 是标量，并且这是最后一个索引，则类型保持不变。
+            // 这是合法的 GEP 操作，例如 `getelementptr i32, i32* %ptr, i64 5`。
+            // `currentWalkType` 将是 `i32`，并且循环会在此结束。
+        }
    }
-    return CurrentType;
+    return currentWalkType;
  }
 };

--- a/src/include/SysYIRGenerator.h
+++ b/src/include/SysYIRGenerator.h
@@ -139,6 +139,8 @@ public:
  // 构建数组类型
  Type* buildArrayType(Type* baseType, const std::vector<Value*>& dims);

+  unsigned countArrayDimensions(Type* type);
+
 }; // class SysYIRGenerator

 } // namespace sysy