From 96c6b0ab6ebad1abfe7c0955200b91ed50a88365 Mon Sep 17 00:00:00 2001
From: rain2133 <1370973498@qq.com>
Date: Fri, 25 Jul 2025 01:53:49 +0800
Subject: [PATCH 1/2] =?UTF-8?q?[midend]=E4=BF=AE=E5=A4=8D=E5=87=BD?=
 =?UTF-8?q?=E6=95=B0=E5=8F=82=E6=95=B0=E4=B8=BA=E6=95=B0=E7=BB=84=E6=8C=87?=
 =?UTF-8?q?=E9=92=88=E7=9A=84=E9=80=80=E5=8C=96=E9=97=AE=E9=A2=98=EF=BC=8C?=
 =?UTF-8?q?=E8=83=BD=E5=A4=9F=E6=AD=A3=E7=A1=AE=E5=8C=BA=E5=88=86=E5=B1=80?=
 =?UTF-8?q?=E9=83=A8=E5=8F=98=E9=87=8F=E5=92=8C=E5=87=BD=E6=95=B0=E5=8F=82?=
 =?UTF-8?q?=E6=95=B0=E5=B9=B6=E7=94=9F=E6=88=90=E6=AD=A3=E7=A1=AE=E7=9A=84?=
 =?UTF-8?q?GEP=E6=8C=87=E4=BB=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/SysYIRGenerator.cpp       | 183 +++++++++++++++++++++++++---------
 src/include/IRBuilder.h       |  75 +++++++++++---
 src/include/SysYIRGenerator.h |   2 +
 3 files changed, 200 insertions(+), 60 deletions(-)
diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp
index 32a07aa..7618d18 100644
--- a/src/SysYIRGenerator.cpp
+++ b/src/SysYIRGenerator.cpp
@@ -40,19 +40,21 @@ Type* SysYIRGenerator::buildArrayType(Type* baseType, const std::vector<Value*>&
     return currentType;
 }
 
+// @brief: 获取 GEP 指令的地址
+// @param basePointer: GEP 的基指针，已经过适当的加载/处理，类型为 LLVM IR 中的指针类型。
+//                     例如，对于局部数组，它是 AllocaInst；对于参数数组，它是 LoadInst 的结果。
+// @param indices: 已经包含了所有必要的偏移索引 (包括可能的初始 0 索引，由 visitLValue 准备)。
+// @return: 计算得到的地址值 (也是一个指针类型)
 Value* SysYIRGenerator::getGEPAddressInst(Value* basePointer, const std::vector<Value*>& indices) {
     // 检查 basePointer 是否为指针类型
-    assert(basePointer->getType()->isPointer());
+    assert(basePointer->getType()->isPointer() && "Base pointer must be a pointer type!");
 
-    // GEP 的第一个索引通常是0，用于“步过”指针本身，访问其指向的对象。
-    // 例如，对于全局数组 @arr，其类型为 [6 x i32]*，第一个0索引是必需的步过偏移。
-    std::vector<Value*> actualGEPIndices;
-    actualGEPIndices.push_back(ConstantInteger::get(0));
-    actualGEPIndices.insert(actualGEPIndices.end(), indices.begin(), indices.end());
-    
-    // 直接调用 builder 的方法，无需再关心类型推断的细节
-    return builder.createGetElementPtrInst(basePointer, actualGEPIndices);
+    // `indices` 向量现在由调用方（如 visitLValue, visitVarDecl, visitAssignStmt）负责完整准备，
+    // 包括是否需要添加初始的 `0` 索引。
+    // 所以这里直接将其传递给 `builder.createGetElementPtrInst`。
+    return builder.createGetElementPtrInst(basePointer, indices);
 }
+
 /*
  * @brief: visit compUnit
  * @details:
@@ -168,7 +170,7 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) {
     // 对于数组，alloca 的类型将是指针指向数组类型，例如 `int[2][3]*`
     // 对于标量，alloca 的类型将是指针指向标量类型，例如 `int*`
     AllocaInst* alloca =
-        builder.createAllocaInst(Type::getPointerType(variableType), dims, name);
+        builder.createAllocaInst(Type::getPointerType(variableType), {}, name);
 
     if (varDef->initVal() != nullptr) {
       ValueCounter values;
@@ -239,9 +241,15 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) {
                                           ConstantInteger::get(static_cast<int>(tempLinearIndex % dimSizes[dimIdx])));
                       tempLinearIndex /= dimSizes[dimIdx];
                   }
-
+                  
+                  // 对于局部数组，alloca 本身就是 GEP 的基指针。
+                  // GEP 的第一个索引必须是 0，用于“步过”整个数组。
+                  std::vector<Value*> gepIndicesForInit;
+                  gepIndicesForInit.push_back(ConstantInteger::get(0));
+                  gepIndicesForInit.insert(gepIndicesForInit.end(), currentIndices.begin(), currentIndices.end());
+                  
                   // 计算元素的地址
-                  Value* elementAddress = getGEPAddressInst(alloca, currentIndices);
+                  Value* elementAddress = getGEPAddressInst(alloca, gepIndicesForInit);
                   // 生成 store 指令
                   builder.createStoreInst(currentValue, elementAddress);
               }
@@ -328,34 +336,72 @@ std::any SysYIRGenerator::visitFuncDef(SysYParser::FuncDefContext *ctx){
 
   auto name = ctx->Ident()->getText();
   std::vector<Type *> paramTypes;
+  std::vector<Type *> paramActualTypes;
   std::vector<std::string> paramNames;
   std::vector<std::vector<Value *>> paramDims;
 
   if (ctx->funcFParams() != nullptr) {
     auto params = ctx->funcFParams()->funcFParam();
     for (const auto &param : params) {
-      paramTypes.push_back(std::any_cast<Type *>(visitBType(param->bType())));
-      paramNames.push_back(param->Ident()->getText());
-      std::vector<Value *> dims = {};
-      if (!param->LBRACK().empty()) {
-        dims.push_back(ConstantInteger::get(-1)); // 第一个维度不确定
+      Type* baseBType = std::any_cast<Type *>(visitBType(param->bType()));
+      std::string paramName = param->Ident()->getText();
+      
+      // 用于收集当前参数的维度信息（如果它是数组）
+      std::vector<Value *> currentParamDims; 
+      if (!param->LBRACK().empty()) { // 如果参数声明中有方括号，说明是数组
+        // SysY 数组参数的第一个维度可以是未知的（例如 int arr[] 或 int arr[][10]）
+        // 这里的 ConstantInteger::get(-1) 表示未知维度，但对于 LLVM 类型构建，我们主要关注已知维度
+        currentParamDims.push_back(ConstantInteger::get(-1)); // 标记第一个维度为未知
         for (const auto &exp : param->exp()) {
-          dims.push_back(std::any_cast<Value *>(visitExp(exp)));
+          // 访问表达式以获取维度大小，这些维度必须是常量
+          Value* dimVal = std::any_cast<Value *>(visitExp(exp));
+          // 确保维度是常量整数，否则 buildArrayType 会断言失败
+          assert(dynamic_cast<ConstantInteger*>(dimVal) && "Array dimension in parameter must be a constant integer!");
+          currentParamDims.push_back(dimVal);
         }
       }
-      paramDims.emplace_back(dims);
+
+      // 根据解析出的信息，确定参数在 LLVM IR 中的实际类型
+      Type* actualParamType;
+      if (currentParamDims.empty()) { // 情况1：标量参数 (e.g., int x)
+        actualParamType = baseBType; // 实际类型就是基本类型
+      } else { // 情况2&3：数组参数 (e.g., int arr[] 或 int arr[][10])
+        // 数组参数在函数传递时会退化为指针。
+        // 这个指针指向的类型是除第一维外，由后续维度构成的数组类型。
+        
+        // 从 currentParamDims 中移除第一个标记未知维度的 -1
+        std::vector<Value*> fixedDimsForTypeBuilding;
+        if (currentParamDims.size() > 1) { // 如果有固定维度 (e.g., int arr[][10])
+            // 复制除第一个 -1 之外的所有维度
+            fixedDimsForTypeBuilding.assign(currentParamDims.begin() + 1, currentParamDims.end());
+        }
+        
+        Type* pointedToArrayType = baseBType; // 从基本类型开始构建
+        // 从最内层维度向外层构建数组类型
+        // buildArrayType 期望 dims 是从最外层到最内层，但它内部反向迭代，所以这里直接传入
+        // 例如，对于 int arr[][10]，fixedDimsForTypeBuilding 包含 [10]，构建出 [10 x i32]
+        if (!fixedDimsForTypeBuilding.empty()) {
+          pointedToArrayType = buildArrayType(baseBType, fixedDimsForTypeBuilding);
+        }
+        
+        // 实际参数类型是指向这个构建好的数组类型的指针
+        actualParamType = Type::getPointerType(pointedToArrayType); // e.g., i32* 或 [10 x i32]*
+      }
+      
+      paramActualTypes.push_back(actualParamType); // 存储参数的实际 LLVM IR 类型
+      paramNames.push_back(paramName); // 存储参数名称
+    
     }
   }
 
   Type* returnType = std::any_cast<Type *>(visitFuncType(ctx->funcType()));
-  Type* funcType = Type::getFunctionType(returnType, paramTypes);
+  Type* funcType = Type::getFunctionType(returnType, paramActualTypes);
   Function* function = module->createFunction(name, funcType);
   BasicBlock* entry = function->getEntryBlock();
   builder.setPosition(entry, entry->end());
 
-  for (int i = 0; i < paramTypes.size(); ++i) {
-    AllocaInst* alloca = builder.createAllocaInst(Type::getPointerType(paramTypes[i]),
-                                           paramDims[i], paramNames[i]);
+  for (int i = 0; i < paramActualTypes.size(); ++i) {
+    AllocaInst* alloca = builder.createAllocaInst(Type::getPointerType(paramActualTypes[i]), {},paramNames[i]);
     entry->insertArgument(alloca);
     module->addVariable(paramNames[i], alloca);
   }
@@ -641,30 +687,41 @@ std::any SysYIRGenerator::visitReturnStmt(SysYParser::ReturnStmtContext *ctx) {
 }
 
 
-// SysYIRGenerator.cpp (修改部分)
+// 辅助函数：计算给定类型中嵌套的数组维度数量
+// 例如：
+// - 对于 i32* 类型，它指向 i32，维度为 0。
+// - 对于 [10 x i32]* 类型，它指向 [10 x i32]，维度为 1。
+// - 对于 [20 x [10 x i32]]* 类型，它指向 [20 x [10 x i32]]，维度为 2。
+unsigned SysYIRGenerator::countArrayDimensions(Type* type) {
+    unsigned dims = 0;
+    Type* currentType = type;
+
+    // 如果是指针类型，先获取它指向的基础类型
+    if (currentType->isPointer()) {
+        currentType = currentType->as<PointerType>()->getBaseType();
+    }
+
+    // 递归地计算数组的维度层数
+    while (currentType && currentType->isArray()) {
+        dims++;
+        currentType = currentType->as<ArrayType>()->getElementType();
+    }
+    return dims;
+}
 
 std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) {
   std::string name = ctx->Ident()->getText();
   User* variable = module->getVariable(name);
 
   Value* value = nullptr;
-  if (variable == nullptr) {
-    throw std::runtime_error("Variable " + name + " not found.");
-  }
+
   std::vector<Value *> dims;
   for (const auto &exp : ctx->exp()) {
     dims.push_back(std::any_cast<Value *>(visitExp(exp)));
   }
 
   // 1. 获取变量的声明维度数量
-  unsigned declaredNumDims = 0;
-  if (AllocaInst* alloc = dynamic_cast<AllocaInst*>(variable)) {
-    declaredNumDims = alloc->getNumDims();
-  } else if (GlobalValue* glob = dynamic_cast<GlobalValue*>(variable)) {
-    declaredNumDims = glob->getNumDims();
-  } else if (ConstantVariable* constV = dynamic_cast<ConstantVariable*>(variable)) {
-    declaredNumDims = constV->getNumDims();
-  }
+  unsigned declaredNumDims = countArrayDimensions(variable->getType());
 
   // 2. 处理常量变量 (ConstantVariable) 且所有索引都是常量的情况
   ConstantVariable* constVar = dynamic_cast<ConstantVariable *>(variable);
@@ -700,20 +757,54 @@ std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) {
     }
   } else {
     // 访问数组元素或子数组（有索引，或变量本身是数组/多维指针）
-    Value* targetAddress = nullptr;
-
+    Value* gepBasePointer = nullptr;
+    std::vector<Value*> gepIndices; // 准备传递给 getGEPAddressInst 的索引列表
     // GEP 的基指针就是变量本身（它是一个指向内存的指针）
-    if (dynamic_cast<AllocaInst*>(variable) || dynamic_cast<GlobalValue*>(variable) || (constVar != nullptr)) {
-        // 允许对 ConstantVariable (如果它代表全局数组常量) 进行 GEP
-        targetAddress = getGEPAddressInst(variable, dims);
+    if (AllocaInst *alloc = dynamic_cast<AllocaInst *>(variable)) {
+      // 情况 A: 局部变量 (AllocaInst)
+      // 获取 AllocaInst 分配的内存的实际类型。
+      // 例如：对于 `int b[10][20];`，`allocatedType` 是 `[10 x [20 x i32]]`。
+      // 对于 `int b[][20]` 的函数参数，其 AllocaInst 存储的是一个指针，
+      // 此时 `allocatedType` 是 `[20 x i32]*`。
+      Type* allocatedType = alloc->getType()->as<PointerType>()->getBaseType(); 
+
+      if (allocatedType->isPointer()) {
+        // 如果 AllocaInst 分配的是一个指针类型 (例如，用于存储函数参数的指针，如 int b[][20] 中的 b)
+        // 那么 GEP 的基指针是加载这个指针变量的值。
+        gepBasePointer = builder.createLoadInst(alloc); // 加载出实际的指针值 (e.g., [20 x i32]*)
+        // 对于这种参数指针，用户提供的索引直接作用于它。不需要额外的 0。
+        gepIndices = dims; 
+      } else {
+        // 如果 AllocaInst 分配的是实际的数组数据 (例如，int b[10][20] 中的 b)
+        // 那么 AllocaInst 本身就是 GEP 的基指针。
+        gepBasePointer = alloc; // 类型是 [10 x [20 x i32]]*
+        // 对于这种完整的数组分配，GEP 的第一个索引必须是 0，用于“步过”整个数组。
+        gepIndices.push_back(ConstantInteger::get(0));
+        gepIndices.insert(gepIndices.end(), dims.begin(), dims.end());
+      }
+    } else if (GlobalValue *glob = dynamic_cast<GlobalValue *>(variable)) {
+      // 情况 B: 全局变量 (GlobalValue)
+      // GlobalValue 总是指向全局数据的指针。
+      gepBasePointer = glob; // 类型是 [61 x [67 x i32]]*
+      // 对于全局数组，GEP 的第一个索引必须是 0，用于“步过”整个数组。
+      gepIndices.push_back(ConstantInteger::get(0));
+      gepIndices.insert(gepIndices.end(), dims.begin(), dims.end());
+    } else if (ConstantVariable *constV = dynamic_cast<ConstantVariable *>(variable)) {
+      // 情况 C: 常量变量 (ConstantVariable)，如果它代表全局数组常量
+      // 假设 ConstantVariable 可以直接作为 GEP 的基指针。
+      gepBasePointer = constV;
+      // 对于常量数组，也需要 0 索引来“步过”整个数组。
+      // 这里可以进一步检查 constV->getType()->as<PointerType>()->getBaseType()->isArray()
+      // 但为了简洁，假设所有 ConstantVariable 作为 GEP 基指针时都需要此 0。
+      gepIndices.push_back(ConstantInteger::get(0));
+      gepIndices.insert(gepIndices.end(), dims.begin(), dims.end());
     } else {
-        // 其他情况（例如尝试对非指针类型或不支持的 LValue 进行 GEP）应报错
-        assert(false && "LValue variable type not supported for GEP or dynamic load.");
-        return static_cast<Value*>(nullptr);
+      assert(false && "LValue variable type not supported for GEP base pointer.");
+      return static_cast<Value *>(nullptr);
     }
 
-    // 现在 targetAddress 持有元素或子数组的地址。
-    // 需要判断是加载值，还是返回子数组的地址。
+    // 现在调用 getGEPAddressInst，传入正确准备的基指针和索引列表
+    Value *targetAddress = getGEPAddressInst(gepBasePointer, gepIndices);
 
     // 如果提供的索引数量少于声明的维度数量，则表示访问的是子数组，返回其地址
     if (dims.size() < declaredNumDims) {
@@ -1264,7 +1355,7 @@ void Utils::createExternalFunction(
 
   for (int i = 0; i < paramTypes.size(); ++i) {
     auto alloca = pBuilder->createAllocaInst(
-        Type::getPointerType(paramTypes[i]), paramDims[i], paramNames[i]);
+        Type::getPointerType(paramTypes[i]), {}, paramNames[i]);
     entry->insertArgument(alloca);
     // pModule->addVariable(paramNames[i], alloca);
   }
diff --git a/src/include/IRBuilder.h b/src/include/IRBuilder.h
index c97dec5..49941fd 100644
--- a/src/include/IRBuilder.h
+++ b/src/include/IRBuilder.h
@@ -347,22 +347,69 @@ class IRBuilder {
 
   static Type *getIndexedType(Type *pointerType, const std::vector<Value *> &indices) {
     assert(pointerType->isPointer() && "base must be a pointer type!");
-    Type *CurrentType = pointerType;
-    // 遍历所有索引来深入类型层次结构Sysy只支持数组
+    // Type *CurrentType = pointerType->as<PointerType>()->getBaseType();
+    // GEP 的类型推断从基指针所指向的类型开始。
+    // 例如：
+    // - 如果 pointerType 是 `[20 x [10 x i32]]*` (指向一个二维数组的指针)，
+    //   那么 `currentWalkType` 将从 `[20 x [10 x i32]]` (二维数组类型) 开始。
+    // - 如果 pointerType 是 `i32*` (指向一个整数的指针)，
+    //   那么 `currentWalkType` 将从 `i32` (整数类型) 开始。
+    Type *currentWalkType = pointerType->as<PointerType>()->getBaseType();
+
+    // 遍历所有索引来深入类型层次结构。
+    // 注意：这里的 `indices` 向量通常已经包含了 `getGEPAddressInst` 添加的第一个“步过”索引（通常是0）。
+    // 因此，`indices[0]` 对应的是 GEP 操作的第一个逻辑步骤。
     for (int i = 0; i < indices.size(); ++i) {
-      if(i == 0) {
-        // 第一个索引是指针类型的元素类型
-        CurrentType = pointerType->as<PointerType>()->getBaseType();
-      } else
-      if (CurrentType->isArray()) {
-        CurrentType = CurrentType->as<ArrayType>()->getElementType();
-      }
-      else {
-        // 如果类型不是聚合类型但仍有索引，说明索引过多，这是错误的
-        CurrentType = nullptr;
-      }
+        if (currentWalkType->isArray()) {
+            // 情况1：当前类型是数组类型 (例如 `[20 x [10 x i32]]` 或 `[10 x i32]`)。
+            // 此时，当前的索引用于选择数组中的一个元素（或子数组）。
+            // 新的 `currentWalkType` 变为该数组的元素类型。
+            // 例如：`[20 x [10 x i32]]` 经过一个索引后，变为 `[10 x i32]`。
+            currentWalkType = currentWalkType->as<ArrayType>()->getElementType();
+        // } else if (currentWalkType->isStruct()) {
+        //     // 情况2：当前类型是结构体类型。
+        //     // 此时，索引必须是一个常量整数，用于选择结构体中的特定成员。
+        //     // SysY 语言通常只支持数组，但如果你的 IR 支持结构体，这里需要实现。
+        //     ConstantInteger* structIdx = dynamic_cast<ConstantInteger*>(indices[i]);
+        //     assert(structIdx && "Struct index must be a constant integer!");
+        //     assert(structIdx->getInt() >= 0 && "Struct index cannot be negative!");
+        //     // 确保 `StructType` 类有 `getNumMembers()` 和 `getMemberType()` 方法。
+        //     // 如果你的 Type 系统没有这些方法，需要根据实际情况调整。
+        //     assert(structIdx->getInt() < currentWalkType->as<StructType>()->getNumMembers() && "Struct index out of bounds!");
+        //     currentWalkType = currentWalkType->as<StructType>()->getMemberType(structIdx->getInt());
+        } else {
+            // 情况3：当前类型既不是数组也不是结构体（即它是一个标量类型，如 `i32` 或 `float`）。
+            //
+            // 如果 `currentWalkType` 是一个标量类型，并且**后面还有未处理的索引** (`i < indices.size() - 1`)，
+            // 这意味着我们试图对一个标量类型进行进一步的结构性索引，这是**无效的**。
+            // 例如：`int* ptr; ptr[0][0];`
+            // - `ptr` 的类型是 `int*`。
+            // - `currentWalkType` 初始化为 `int`。
+            // - `i = 0` 时，`currentWalkType` 是 `int`。它不是数组/结构体，类型不变。
+            // - `i = 1` 时，`currentWalkType` 仍然是 `int`。此时 `i < indices.size() - 1` (即 `1 < 2 - 1 = 1`) 为假。
+            //   因为 `indices.size()` 是 2 (`[0, 0]`)，`i` 是 1。`i < indices.size() - 1` 是 `1 < 1`，为假。
+            //   所以不会触发断言，`currentWalkType` 保持 `int`。这是正确的。
+            //
+            // 让我重新检查一下 `if (i > 0)` 和 `if (i < indices.size() - 1)` 的区别。
+            // 原始的 `if (i > 0)` 导致 `arr[1]` 失败是因为 `currentWalkType` 变成 `int` 后，
+            // `i=1` 触发了断言。
+            //
+            // LLVM GEP 的行为是：如果当前类型是标量，并且这是 GEP 的**最后一个索引**，那么 GEP 是合法的，
+            // 最终的类型就是这个标量类型。如果不是最后一个索引，则报错。
+            
+            // 修正后的判断：
+            // 如果当前类型是标量，并且当前索引 `i` 不是 `indices` 向量中的**最后一个索引**，
+            // 那么就意味着尝试对标量进行额外的结构性索引，这是错误的。
+            if (i < indices.size() - 1) { 
+                assert(false && "Invalid GEP indexing: attempting to index into a non-aggregate type with further indices.");
+                return nullptr; // 返回空指针表示类型推断失败
+            }
+            // 如果 `currentWalkType` 是标量，并且这是最后一个索引，则类型保持不变。
+            // 这是合法的 GEP 操作，例如 `getelementptr i32, i32* %ptr, i64 5`。
+            // `currentWalkType` 将是 `i32`，并且循环会在此结束。
+        }
     }
-    return CurrentType;
+    return currentWalkType;
   }
 };
 
diff --git a/src/include/SysYIRGenerator.h b/src/include/SysYIRGenerator.h
index 66ce11c..aac6ec9 100644
--- a/src/include/SysYIRGenerator.h
+++ b/src/include/SysYIRGenerator.h
@@ -139,6 +139,8 @@ public:
   // 构建数组类型
   Type* buildArrayType(Type* baseType, const std::vector<Value*>& dims);
 
+  unsigned countArrayDimensions(Type* type);
+
 }; // class SysYIRGenerator
 
 } // namespace sysy
\ No newline at end of file

From 1e6f6ed711d9c14d7f5c718048d6aa945bd49734 Mon Sep 17 00:00:00 2001
From: rain2133 <1370973498@qq.com>
Date: Fri, 25 Jul 2025 03:26:10 +0800
Subject: [PATCH 2/2] =?UTF-8?q?[midend]=20GEP=E7=B1=BB=E5=9E=8B=E6=8E=A8?=
 =?UTF-8?q?=E6=96=AD=E5=87=BD=E6=95=B0getIndexedType=E9=80=BB=E8=BE=91?=
 =?UTF-8?q?=E4=BF=AE=E5=A4=8D=EF=BC=8C=E5=A2=9E=E5=8A=A0=E6=95=B0=E7=BB=84?=
 =?UTF-8?q?type=E7=BC=93=E5=AD=98=E6=B1=A0=E9=81=BF=E5=85=8D=E7=9B=B8?=
 =?UTF-8?q?=E5=90=8Ctype=20=3D=3D=E6=93=8D=E4=BD=9C=E8=BF=94=E5=9B=9E?=
 =?UTF-8?q?=E5=81=87=EF=BC=8C=E4=BF=AE=E5=A4=8D=E5=AE=9E=E5=8F=82=E5=BD=A2?=
 =?UTF-8?q?=E5=8F=82=E7=B1=BB=E5=9E=8B=E8=BD=AC=E6=8D=A2=E5=88=A4=E6=96=AD?=
 =?UTF-8?q?=E9=80=BB=E8=BE=91=EF=BC=8Cstarttime=20stoptime=E6=8F=90?=
 =?UTF-8?q?=E4=BE=9B=E6=94=AF=E6=8C=81=EF=BC=88=E5=BE=85=E5=90=8E=E7=AB=AF?=
 =?UTF-8?q?=E6=B5=8B=E8=AF=95=EF=BC=89?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/IR.cpp              | 13 ++++++--
 src/SysYIRGenerator.cpp | 65 +++++++++++++++++++++++++++----------
 src/include/IRBuilder.h | 71 +++++++++++++++--------------------------
 3 files changed, 85 insertions(+), 64 deletions(-)

diff --git a/src/IR.cpp b/src/IR.cpp
index c694839..da4292b 100644
--- a/src/IR.cpp
+++ b/src/IR.cpp
@@ -105,8 +105,17 @@ FunctionType*FunctionType::get(Type *returnType, const std::vector<Type *> &para
 }
 
 ArrayType *ArrayType::get(Type *elementType, unsigned numElements) {
-  // TODO:可以考虑在这里添加缓存，避免重复创建相同的数组类型
-  return new ArrayType(elementType, numElements);
+  static std::set<std::unique_ptr<ArrayType>> arrayTypes;
+  auto iter = std::find_if(arrayTypes.begin(), arrayTypes.end(), [&](const std::unique_ptr<ArrayType> &type) -> bool {
+    return elementType == type->getElementType() && numElements == type->getNumElements();
+  });
+  if (iter != arrayTypes.end()) {
+    return iter->get();
+  }
+  auto type = new ArrayType(elementType, numElements);
+  assert(type);
+  auto result = arrayTypes.emplace(type);
+  return result.first->get();
 }
 
 void Value::replaceAllUsesWith(Value *value) {
diff --git a/src/SysYIRGenerator.cpp b/src/SysYIRGenerator.cpp
index 7618d18..09b1214 100644
--- a/src/SysYIRGenerator.cpp
+++ b/src/SysYIRGenerator.cpp
@@ -770,6 +770,7 @@ std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) {
 
       if (allocatedType->isPointer()) {
         // 如果 AllocaInst 分配的是一个指针类型 (例如，用于存储函数参数的指针，如 int b[][20] 中的 b)
+        // 即 `allocatedType` 是一个指向数组指针的指针 (e.g., [20 x i32]**)
         // 那么 GEP 的基指针是加载这个指针变量的值。
         gepBasePointer = builder.createLoadInst(alloc); // 加载出实际的指针值 (e.g., [20 x i32]*)
         // 对于这种参数指针，用户提供的索引直接作用于它。不需要额外的 0。
@@ -777,6 +778,7 @@ std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) {
       } else {
         // 如果 AllocaInst 分配的是实际的数组数据 (例如，int b[10][20] 中的 b)
         // 那么 AllocaInst 本身就是 GEP 的基指针。
+        // 这里的 `alloc` 是指向数组的指针 (e.g., [10 x [20 x i32]]*)
         gepBasePointer = alloc; // 类型是 [10 x [20 x i32]]*
         // 对于这种完整的数组分配，GEP 的第一个索引必须是 0，用于“步过”整个数组。
         gepIndices.push_back(ConstantInteger::get(0));
@@ -856,32 +858,63 @@ std::any SysYIRGenerator::visitCall(SysYParser::CallContext *ctx) {
 
   std::vector<Value *> args = {};
   if (funcName == "starttime" || funcName == "stoptime") {
-    // 如果是starttime或stoptime函数
-    // TODO: 这里需要处理starttime和stoptime函数的参数
-    // args.emplace_back()
+    args.emplace_back(
+        ConstantInteger::get(static_cast<int>(ctx->getStart()->getLine())));
   } else {
     if (ctx->funcRParams() != nullptr) {
       args = std::any_cast<std::vector<Value *>>(visitFuncRParams(ctx->funcRParams()));
     }
 
-    auto params = function->getEntryBlock()->getArguments();
+    // 获取形参列表。`getArguments()` 返回的是 `Argument*` 的集合，
+    // 每个 `Argument` 代表一个函数形参，其 `getType()` 就是指向形参的类型的指针类型。
+    auto formalParamsAlloca = function->getEntryBlock()->getArguments();
+
+    // 检查实参和形参数量是否匹配。
+    if (args.size() != formalParamsAlloca.size()) {
+      std::cerr << "Error: Function call argument count mismatch for function '" << funcName << "'." << std::endl;
+      assert(false && "Function call argument count mismatch!");
+    }
+
     for (int i = 0; i < args.size(); i++) {
-      // 参数类型转换
-      if (params[i]->getType() != args[i]->getType() &&
-          (params[i]->getNumDims() != 0 ||
-           params[i]->getType()->as<PointerType>()->getBaseType() != args[i]->getType())) {
-        ConstantValue * constValue = dynamic_cast<ConstantValue *>(args[i]);
+      // 形参的类型 (e.g., i32, float, i32*, [10 x i32]*)
+      Type* formalParamExpectedValueType = formalParamsAlloca[i]->getType()->as<PointerType>()->getBaseType();
+      // 实参的实际类型 (e.g., i32, float, i32*, [67 x i32]*)            
+      Type* actualArgType = args[i]->getType();
+      // 如果实参类型与形参类型不匹配，则尝试进行类型转换
+      if (formalParamExpectedValueType != actualArgType) {
+        ConstantValue *constValue = dynamic_cast<ConstantValue *>(args[i]);
         if (constValue != nullptr) {
-          if (params[i]->getType() == Type::getPointerType(Type::getFloatType())) {
-            args[i] = ConstantInteger::get(static_cast<float>(constValue->getInt()));
+          if (formalParamExpectedValueType->isInt() && actualArgType->isFloat()) {
+            args[i] = ConstantInteger::get(static_cast<int>(constValue->getFloat()));
+          } else if (formalParamExpectedValueType->isFloat() && actualArgType->isInt()) {
+            args[i] = ConstantFloating::get(static_cast<float>(constValue->getInt()));
           } else {
-            args[i] = ConstantFloating::get(static_cast<int>(constValue->getFloat()));
+            // 如果是常量但不是简单的 int/float 标量转换，
+            // 或者是指针常量需要 bitcast，则让它进入非常量转换逻辑。
+            // 例如，一个常量数组的地址，需要 bitcast 成另一种指针类型。
+            // 目前不知道样例有没有这种情况，所以这里不做处理。
           }
-        } else {
-          if (params[i]->getType() == Type::getPointerType(Type::getFloatType())) {
-            args[i] = builder.createIToFInst(args[i]);
-          } else {
+        } 
+        else {
+          // 1. 标量值类型转换 (例如：int_reg 到 float_reg，float_reg 到 int_reg)
+          if (formalParamExpectedValueType->isInt() && actualArgType->isFloat()) {
             args[i] = builder.createFtoIInst(args[i]);
+          } else if (formalParamExpectedValueType->isFloat() && actualArgType->isInt()) {
+            args[i] = builder.createIToFInst(args[i]);
+          }
+          // 2. 指针类型转换 (例如数组退化：`[N x T]*` 到 `T*`，或兼容指针类型之间) TODO：不清楚有没有这种样例
+          // 这种情况常见于数组参数，实参可能是一个更具体的数组指针类型，
+          // 而形参是其退化后的基础指针类型。LLVM 的 `bitcast` 指令可以用于
+          // 在相同大小的指针类型之间进行转换，这对于数组退化至关重要。
+          // else if (formalParamType->isPointer() && actualArgType->isPointer()) {
+            // 检查指针基类型是否兼容，或者是否是数组退化导致的类型不同。
+            // 使用 bitcast，
+            // args[i] = builder.createBitCastInst(args[i], formalParamType);
+          // }
+          // 3. 其他未预期的类型不匹配
+          // 如果代码执行到这里，说明存在编译器前端未处理的类型不兼容或错误。
+          else {
+            // assert(false && "Unhandled type mismatch for function call argument.");
           }
         }
       }
diff --git a/src/include/IRBuilder.h b/src/include/IRBuilder.h
index 49941fd..d9e92ef 100644
--- a/src/include/IRBuilder.h
+++ b/src/include/IRBuilder.h
@@ -347,68 +347,47 @@ class IRBuilder {
 
   static Type *getIndexedType(Type *pointerType, const std::vector<Value *> &indices) {
     assert(pointerType->isPointer() && "base must be a pointer type!");
-    // Type *CurrentType = pointerType->as<PointerType>()->getBaseType();
     // GEP 的类型推断从基指针所指向的类型开始。
     // 例如：
-    // - 如果 pointerType 是 `[20 x [10 x i32]]*` (指向一个二维数组的指针)，
-    //   那么 `currentWalkType` 将从 `[20 x [10 x i32]]` (二维数组类型) 开始。
-    // - 如果 pointerType 是 `i32*` (指向一个整数的指针)，
-    //   那么 `currentWalkType` 将从 `i32` (整数类型) 开始。
+    // - 如果 pointerType 是 `[20 x [10 x i32]]*`，`currentWalkType` 初始为 `[20 x [10 x i32]]`。
+    // - 如果 pointerType 是 `i32*`，`currentWalkType` 初始为 `i32`。
+    // - 如果 pointerType 是 `i32**`，`currentWalkType` 初始为 `i32*`。
     Type *currentWalkType = pointerType->as<PointerType>()->getBaseType();
 
     // 遍历所有索引来深入类型层次结构。
-    // 注意：这里的 `indices` 向量通常已经包含了 `getGEPAddressInst` 添加的第一个“步过”索引（通常是0）。
-    // 因此，`indices[0]` 对应的是 GEP 操作的第一个逻辑步骤。
+    // `indices` 向量包含了所有 GEP 索引，包括由 `visitLValue` 等函数添加的初始 `0` 索引。
     for (int i = 0; i < indices.size(); ++i) {
         if (currentWalkType->isArray()) {
-            // 情况1：当前类型是数组类型 (例如 `[20 x [10 x i32]]` 或 `[10 x i32]`)。
-            // 此时，当前的索引用于选择数组中的一个元素（或子数组）。
-            // 新的 `currentWalkType` 变为该数组的元素类型。
-            // 例如：`[20 x [10 x i32]]` 经过一个索引后，变为 `[10 x i32]`。
+            // 情况一：当前遍历类型是 `ArrayType`。
+            // 索引用于选择数组元素，`currentWalkType` 更新为数组的元素类型。
             currentWalkType = currentWalkType->as<ArrayType>()->getElementType();
-        // } else if (currentWalkType->isStruct()) {
-        //     // 情况2：当前类型是结构体类型。
-        //     // 此时，索引必须是一个常量整数，用于选择结构体中的特定成员。
-        //     // SysY 语言通常只支持数组，但如果你的 IR 支持结构体，这里需要实现。
-        //     ConstantInteger* structIdx = dynamic_cast<ConstantInteger*>(indices[i]);
-        //     assert(structIdx && "Struct index must be a constant integer!");
-        //     assert(structIdx->getInt() >= 0 && "Struct index cannot be negative!");
-        //     // 确保 `StructType` 类有 `getNumMembers()` 和 `getMemberType()` 方法。
-        //     // 如果你的 Type 系统没有这些方法，需要根据实际情况调整。
-        //     assert(structIdx->getInt() < currentWalkType->as<StructType>()->getNumMembers() && "Struct index out of bounds!");
-        //     currentWalkType = currentWalkType->as<StructType>()->getMemberType(structIdx->getInt());
+        } else if (currentWalkType->isPointer()) {
+            // 情况二：当前遍历类型是 `PointerType`。
+            // 这意味着我们正在通过一个指针来访问其指向的内存。
+            // 索引用于选择该指针所指向的“数组”的元素。
+            // `currentWalkType` 更新为该指针所指向的基础类型。
+            // 例如：如果 `currentWalkType` 是 `i32*`，它将变为 `i32`。
+            // 如果 `currentWalkType` 是 `[10 x i32]*`，它将变为 `[10 x i32]`。
+            currentWalkType = currentWalkType->as<PointerType>()->getBaseType();
         } else {
-            // 情况3：当前类型既不是数组也不是结构体（即它是一个标量类型，如 `i32` 或 `float`）。
+            // 情况三：当前遍历类型是标量类型 (例如 `i32`, `float` 等非聚合、非指针类型)。
             //
-            // 如果 `currentWalkType` 是一个标量类型，并且**后面还有未处理的索引** (`i < indices.size() - 1`)，
-            // 这意味着我们试图对一个标量类型进行进一步的结构性索引，这是**无效的**。
-            // 例如：`int* ptr; ptr[0][0];`
-            // - `ptr` 的类型是 `int*`。
-            // - `currentWalkType` 初始化为 `int`。
-            // - `i = 0` 时，`currentWalkType` 是 `int`。它不是数组/结构体，类型不变。
-            // - `i = 1` 时，`currentWalkType` 仍然是 `int`。此时 `i < indices.size() - 1` (即 `1 < 2 - 1 = 1`) 为假。
-            //   因为 `indices.size()` 是 2 (`[0, 0]`)，`i` 是 1。`i < indices.size() - 1` 是 `1 < 1`，为假。
-            //   所以不会触发断言，`currentWalkType` 保持 `int`。这是正确的。
+            // 如果 `currentWalkType` 是标量，并且当前索引 `i` **不是** `indices` 向量中的最后一个索引，
+            // 这意味着尝试对一个标量类型进行进一步的结构性索引，这是**无效的**。
+            // 例如：`int x; x[0];` 对应的 GEP 链中，`x` 的类型是 `i32`，再加 `[0]` 索引就是错误。
             //
-            // 让我重新检查一下 `if (i > 0)` 和 `if (i < indices.size() - 1)` 的区别。
-            // 原始的 `if (i > 0)` 导致 `arr[1]` 失败是因为 `currentWalkType` 变成 `int` 后，
-            // `i=1` 触发了断言。
-            //
-            // LLVM GEP 的行为是：如果当前类型是标量，并且这是 GEP 的**最后一个索引**，那么 GEP 是合法的，
-            // 最终的类型就是这个标量类型。如果不是最后一个索引，则报错。
-            
-            // 修正后的判断：
-            // 如果当前类型是标量，并且当前索引 `i` 不是 `indices` 向量中的**最后一个索引**，
-            // 那么就意味着尝试对标量进行额外的结构性索引，这是错误的。
+            // 如果 `currentWalkType` 是标量，且这是**最后一个索引** (`i == indices.size() - 1`)，
+            // 那么 GEP 是合法的，它只是计算一个偏移地址，最终的类型就是这个标量类型。
+            // 此时 `currentWalkType` 保持不变，循环结束。
             if (i < indices.size() - 1) { 
-                assert(false && "Invalid GEP indexing: attempting to index into a non-aggregate type with further indices.");
+                assert(false && "Invalid GEP indexing: attempting to index into a non-aggregate/non-pointer type with further indices.");
                 return nullptr; // 返回空指针表示类型推断失败
             }
-            // 如果 `currentWalkType` 是标量，并且这是最后一个索引，则类型保持不变。
-            // 这是合法的 GEP 操作，例如 `getelementptr i32, i32* %ptr, i64 5`。
-            // `currentWalkType` 将是 `i32`，并且循环会在此结束。
+            // 如果是最后一个索引，且当前类型是标量，则类型保持不变，这是合法的。
+            // 循环会自然结束，返回正确的 `currentWalkType`。
         }
     }
+    // 所有索引处理完毕后，`currentWalkType` 就是 GEP 指令最终计算出的地址所指向的元素的类型。
     return currentWalkType;
   }
 };