#include "Mem2Reg.h"
#include "SysYIRPrinter.h"
#include <algorithm>
#include <cassert>
#include <memory>
#include <queue>
#include <stack>
#include <unordered_map>
#include <unordered_set>
#include <vector>

namespace sysy {

// --- 私有成员函数实现 ---

// 计算给定定义块集合的迭代支配边界
std::unordered_set<BasicBlock*> Mem2Reg::computeIteratedDomFrontiers(const std::unordered_set<BasicBlock*>& blocks) {
  std::unordered_set<BasicBlock*> result;
  std::queue<BasicBlock*> worklist; // 使用队列进行 BFS-like 遍历

  for (auto* block : blocks)
    worklist.push(block);

  while (!worklist.empty()) {
    auto* block = worklist.front();
    worklist.pop();

    auto* blockInfo = controlFlowAnalysis->getBlockAnalysisInfo(block);
    if (!blockInfo) continue; 

    for (auto* df : blockInfo->getDomFrontiers()) {
      if (result.find(df) == result.end()) { // If not already in result
        result.insert(df);
        worklist.push(df);
      }
    }
  }
  return result;
}

// 分析一个 alloca 的所有 uses，填充 allocaDefsBlock 和 allocaUsesBlock
void Mem2Reg::allocaAnalysis(AllocaInst* alloca) {
    allocaDefsBlock[alloca].clear();
    allocaUsesBlock[alloca].clear();

    for (auto use : alloca->getUses()) {
        Instruction* userInst = dynamic_cast<Instruction*>(use->getUser());
        if (!userInst) continue; 

        if (StoreInst* store = dynamic_cast<StoreInst*>(userInst)) {
            if (store->getOperand(1) == alloca) { // Store's second operand is the pointer
                allocaDefsBlock[alloca].insert(store->getParent()); // Store's parent is the defining block
            }
        } else if (LoadInst* load = dynamic_cast<LoadInst*>(userInst)) {
            if (load->getOperand(0) == alloca) { // Load's first operand is the pointer
                allocaUsesBlock[alloca].insert(load->getParent()); // Load's parent is the using block
            }
        }
    }
}

// 判断一个 alloca 是否可以被提升为寄存器 (无地址逃逸，标量类型)
bool Mem2Reg::is_promoted(AllocaInst* alloca) {
  // 检查是否是标量类型 (非数组、非全局变量等)   
  if(!(SysYIROptUtils::isArr(alloca) || SysYIROptUtils::isGlobal(alloca))){
    return false; // 只有标量类型的 alloca 才能被提升
  }

    // 获取 alloca 指向的基类型
  PointerType* ptrType = dynamic_cast<PointerType*>(alloca->getType());
  if (!ptrType) return false; // Should always be a pointer type
  Type* allocabaseType = ptrType->getBaseType();

  for (const auto& use : alloca->getUses()) {
    Instruction* userInst = dynamic_cast<Instruction*>(use->getUser());
    if (!userInst) {
      // 如果不是指令的 use，比如作为全局变量的初始值等，通常认为逃逸
      return false; 
    }

    if (LoadInst* load = dynamic_cast<LoadInst*>(userInst)) {
      // Load 指令结果的类型必须与 alloca 的基类型一致
      if (load->getType() != allocabaseType) {
        return false;
      }
    } else if (StoreInst* store = dynamic_cast<StoreInst*>(userInst)) {
      // Store 指令的值操作数类型必须与 alloca 的基类型一致
      // 且 store 的指针操作数必须是当前 alloca
      if (store->getOperand(1) != alloca || store->getOperand(0)->getType() != allocabaseType) {
        return false;
      }
    } else if (userInst->isGetSubArray()) {
      // GSA 指令表示对数组的访问
      // 这意味着地址逃逸，不能简单提升为单个寄存器
      return false;
    } else if (userInst->isCall()) {
      // 如果 alloca 作为函数参数传递，通常认为地址逃逸
      return false;
    }
    // 如果有其他类型的指令使用 alloca 的地址，也需要判断是否是逃逸
    // 例如：BitCastInst, PtrToIntInst, 如果这些操作将地址暴露，则不能提升
  }
  return true;
}

// 在迭代支配边界处插入 Phi 指令
void Mem2Reg::insertPhiNodes(Function* func) {
  // 清空上次 Phi 插入的结果
  phiMap.clear();
  allPhiInstructions.clear();

  std::unordered_set<BasicBlock*> phiPlacementBlocks; // 存放需要插入 Phi 的块
  std::queue<BasicBlock*> workQueue; // BFS 队列，用于迭代支配边界计算

  // 遍历所有可提升的 alloca
  for (AllocaInst* alloca : currentFunctionAllocas) {
    phiPlacementBlocks.clear(); // 为每个 alloca 重新计算 Phi 放置位置
    
    // 初始化工作队列，放入所有定义该 alloca 的基本块
    for (BasicBlock* defBB : allocaDefsBlock[alloca]) {
      workQueue.push(defBB);
    }

    while (!workQueue.empty()) {
      BasicBlock* currentBB = workQueue.front();
      workQueue.pop();

      auto* blockInfo = controlFlowAnalysis->getBlockAnalysisInfo(currentBB);
      if (!blockInfo) continue;

      // 遍历当前块的支配边界
      for (BasicBlock* domFrontierBB : blockInfo->getDomFrontiers()) {
        // 如果这个支配边界块还没有为当前 alloca 插入 Phi 指令
        if (phiPlacementBlocks.find(domFrontierBB) == phiPlacementBlocks.end()) {
          // 获取 alloca 的基类型，作为 Phi 指令的结果类型
          Type* phiType = dynamic_cast<PointerType*>(alloca->getType())->getBaseType();
          
          // 在支配边界块的开头插入 Phi 指令
          pBuilder->setPosition(domFrontierBB->begin());
          PhiInst* newPhi = pBuilder->createPhiInst(phiType, {}, {}); // 初始入边为空
          
          allPhiInstructions.push_back(newPhi); // 记录所有 Phi
          phiPlacementBlocks.insert(domFrontierBB); // 标记已插入

          // 将 Phi 指令映射到它所代表的原始 alloca
          phiMap[domFrontierBB][newPhi] = alloca;

          // 如果支配边界块本身没有定义该 alloca，则其支配边界也可能需要 Phi
          // 只有当这个块不是当前alloca的定义块时，才将其加入workQueue，以计算其DF。
          if (allocaDefsBlock[alloca].find(domFrontierBB) == allocaDefsBlock[alloca].end()) {
            workQueue.push(domFrontierBB);
          }
        }
      }
    }
  }
}

// 获取前驱块在后继块前驱列表中的索引
int Mem2Reg::getPredIndex(BasicBlock* pred, BasicBlock* succ) {
  int index = 0;
  for (auto* elem : succ->getPredecessors()) {
    if (elem == pred) {
      return index;
    }
    ++index;
  }
  // 断言通常在你的 IR 框架中应该确保前驱是存在的
  // assert(false && "Predecessor not found in successor's predecessor list"); 
  return -1; // 应该不会发生
}

// 递归地重命名基本块中的变量并填充 Phi 指令
void Mem2Reg::renameBlock(BasicBlock* block, 
                          std::unordered_map<AllocaInst*, Value*>& currentIncomings,
                          std::unordered_set<BasicBlock*>& visitedBlocks) {
  
  // 记录在此块中发生的定义，以便在退出时将它们从栈中弹出
  std::unordered_map<AllocaInst*, int> definitionsInBlockCount; 

  // 如果已经访问过这个块，直接返回（防止无限循环或重复处理，在DFS中尤其重要）
  if (visitedBlocks.count(block)) {
    return;
  }
  visitedBlocks.insert(block);

  // --- 1. 处理当前基本块内的指令 ---
  // 使用迭代器安全地遍历和删除指令
  for (auto it = block->getInstructions().begin(); it != block->getInstructions().end(); ) {
    Instruction* currentInst = it->get();

    if (AllocaInst* alloca = dynamic_cast<AllocaInst*>(currentInst)) {
      // 如果是可提升的 alloca，标记为删除
      if (std::find(currentFunctionAllocas.begin(), currentFunctionAllocas.end(), alloca) != currentFunctionAllocas.end()) {
        SysYIROptUtils::usedelete(currentInst); // 标记为删除（或直接删除取决于你的 IR 管理）
        it = block->getInstructions().erase(it); // 从列表中移除
        continue; // 继续下一个指令
      }
    } else if (LoadInst* load = dynamic_cast<LoadInst*>(currentInst)) {
      AllocaInst* originalAlloca = dynamic_cast<AllocaInst*>(load->getOperand(0)); // load 的第一个操作数是指针
      if (originalAlloca && std::find(currentFunctionAllocas.begin(), currentFunctionAllocas.end(), originalAlloca) != currentFunctionAllocas.end()) {
        // 如果是可提升 alloca 的 load 指令
        Value* incomingVal = nullptr;
        if (currentIncomings.count(originalAlloca)) {
          incomingVal = currentIncomings[originalAlloca];
        } else {
          // 如果在当前路径上没有找到定义，则使用 UndefinedValue
          incomingVal = UndefinedValue::get(originalAlloca->getType()->isPointer() ?
                                            dynamic_cast<PointerType*>(originalAlloca->getType())->getBaseType() :
                                            originalAlloca->getType());
        }
        
        load->replaceAllUsesWith(incomingVal); // 用最新值替换所有 load 的用途
        SysYIROptUtils::usedelete(currentInst);
        it = block->getInstructions().erase(it);
        continue;
      }
    } else if (StoreInst* store = dynamic_cast<StoreInst*>(currentInst)) {
      AllocaInst* originalAlloca = dynamic_cast<AllocaInst*>(store->getOperand(1)); // store 的第二个操作数是指针
      if (originalAlloca && std::find(currentFunctionAllocas.begin(), currentFunctionAllocas.end(), originalAlloca) != currentFunctionAllocas.end()) {
        // 如果是可提升 alloca 的 store 指令，更新当前值
        currentIncomings[originalAlloca] = store->getOperand(0); // store 的第一个操作数是值
        definitionsInBlockCount[originalAlloca]++; // 记录在该块中进行的定义数量
        SysYIROptUtils::usedelete(currentInst);
        it = block->getInstructions().erase(it);
        continue;
      }
    } else if (PhiInst* phi = dynamic_cast<PhiInst*>(currentInst)) {
      // 如果是 Mem2Reg 插入的 Phi 指令 (通过 phiMap 判断)
      if (phiMap[block].count(phi)) {
        AllocaInst* originalAlloca = phiMap[block][phi];
        currentIncomings[originalAlloca] = phi; // Phi 指令本身成为该变量的新定义
        definitionsInBlockCount[originalAlloca]++; // 记录该 Phi 的定义
      }
    }
    ++it; // 移动到下一个指令
  }

  // --- 2. 填充后继基本块中 Phi 指令的入边 ---
  for (BasicBlock* successorBB : block->getSuccessors()) {
    int predIndex = getPredIndex(block, successorBB);
    if (predIndex == -1) continue; 

    // Phi 指令总是在基本块的开头
    for (auto& inst_ptr : successorBB->getInstructions()) {
      if (PhiInst* phi = dynamic_cast<PhiInst*>(inst_ptr.get())) {
        if (phiMap[successorBB].count(phi)) { // 确保这是我们关心的 Phi 指令
          AllocaInst* originalAlloca = phiMap[successorBB][phi];
          Value* incomingValue = nullptr;
          
          if (currentIncomings.count(originalAlloca)) {
            incomingValue = currentIncomings[originalAlloca];
          } else {
            // 如果在当前块没有找到对应的定义，使用 UndefinedValue
            incomingValue = UndefinedValue::get(originalAlloca->getType()->isPointer() ?
                                                dynamic_cast<PointerType*>(originalAlloca->getType())->getBaseType() :
                                                originalAlloca->getType());
          }
          
          if (incomingValue) {
            phi->addIncoming(incomingValue, block); // 添加 (值, 前驱块) 对
          }
        }
      } else {
        // 遇到非 Phi 指令，说明已经处理完所有 Phi，可以跳出
        break; 
      }
    }
  }

  // --- 3. 递归调用支配树的子节点 ---
  auto* blockInfo = controlFlowAnalysis->getBlockAnalysisInfo(block);
  if (blockInfo) {
    for (BasicBlock* dominatedChildBB : blockInfo->getSdoms()) { // getSdoms 获取直接支配的子节点
      // 递归调用，传递当前 Incomings 的副本（或通过值传递以实现回溯）
      // 注意：这里是传递 `currentIncomings` 的拷贝，以便递归返回后可以恢复。
      // 但如果 `currentIncomings` 是引用传递，则这里需要回溯逻辑。
      // 鉴于它是值传递，此处的 `definitionsInBlockCount` 仅用于统计，无需实际操作 `currentIncomings`。
      renameBlock(dominatedChildBB, currentIncomings, visitedBlocks); 
    }
  }

  // --- 4. 回溯：从栈中弹出在此块中创建的所有定义 ---
  for (auto const& [alloca, count] : definitionsInBlockCount) {
    // 在我们的实现中，`currentIncomings` 是通过值传递的，每次递归都收到一个新的拷贝。
    // 因此，不需要显式地 "pop" 栈。`currentIncomings` 在函数返回时会自动销毁。
    // 这种方式模拟了 "SSA 栈" 的行为，每个函数调用帧有自己的局部定义环境。
  }
}

// 简化冗余的 Phi 指令 (当所有输入都相同时)
void Mem2Reg::simplifyphi(PhiInst* phi) {
  BasicBlock* phifromblock = phi->getParent();
  if (!phifromblock) return; // 指令可能已经被删除

  Value* commonValue = nullptr;
  bool allSame = true;

  // Phi 指令的操作数是 Value, BasicBlock 交替出现，所以是 getOperandSize() / 2 个入边
  if (phi->getNumOperands() == 0) { // 空 Phi，通常是无效的，直接删除
    phi->replaceAllUsesWith(UndefinedValue::get(phi->getType())); // 用 UndefinedValue 替换所有用途
    // phi->getParent()->delete_inst(phi);
    // 删除 Phi 指令后直接返回
    // phi指令在开头一个比较快
    // TODO：后续可优化查找
    auto tofind = std::find_if(phifromblock->getInstructions().begin(), phifromblock->getInstructions().end(),
                                   [phi](const auto &instr) { return instr.get() == phi; });
    SysYIROptUtils::usedelete(phi); // 使用 SysYIROptUtils 删除指令
    phifromblock->getInstructions().erase(tofind);
    // 从基本块中删除 Phi 指令
    return;
  }

  for (size_t i = 0; i < phi->getNumIncomingValues(); ++i) {
    Value* incomingVal = phi->getOperand(2 * i); // 值位于偶数索引

    if (incomingVal == phi) { // 如果 Phi 指令引用自身 (循环变量)
      // 这种情况下，Phi 暂时不能简化，除非所有入边都是它自己，这通常通过其他优化处理
      // 为避免复杂性，我们在此处不处理自引用 Phi 的简化，除非它是唯一选择。
      // 更好的做法是，如果所有入边都指向自身，则该Phi是冗余的，可以替换为undef或其第一个实际值
      // 但这需要更复杂的分析来确定循环的初始值。目前简单返回。
      // TODO：留到后续循环优化处理
      return; 
    }

    if (commonValue == nullptr) {
      commonValue = incomingVal;
    } else if (commonValue != incomingVal) {
      allSame = false;
      break; // 发现不同的入边值
    }
  }

  if (allSame && commonValue != nullptr) {
    // 所有入边值都相同，用这个值替换 Phi 指令的所有用途
    phi->replaceAllUsesWith(commonValue);
    // 从基本块中删除 Phi 指令
    auto tofind = std::find_if(phifromblock->getInstructions().begin(), phifromblock->getInstructions().end(),
                                   [phi](const auto &instr) { return instr.get() == phi; });
    SysYIROptUtils::usedelete(phi); // 使用 SysYIROptUtils 删除指令
    phifromblock->getInstructions().erase(tofind);
  }
}

// 对单个函数执行内存到寄存器的提升
bool Mem2Reg::promoteMemoryToRegisters(Function* func) {
  bool changed = false;

  // 每次开始对一个函数进行 Mem2Reg 时，清空所有上下文信息
  currentFunctionAllocas.clear();
  allocaDefsBlock.clear();
  allocaUsesBlock.clear();
  phiMap.clear();
  allPhiInstructions.clear();

  // 1. 收集所有可提升的 AllocaInst，并进行初步分析
  BasicBlock* entryBB = func->getEntryBlock();
  if (!entryBB) return false;

  // 逆序遍历入口块的指令，安全地识别 Alloca
  for (auto it = entryBB->getInstructions().rbegin(); it != entryBB->getInstructions().rend(); ++it) {
    if (AllocaInst* alloca = dynamic_cast<AllocaInst*>(it->get())) {
      if (is_promoted(alloca)) {
        currentFunctionAllocas.push_back(alloca);
      }
    }
  }
  // 收集后反转，使其按原始顺序排列 (如果需要的话，但对后续分析影响不大)
  std::reverse(currentFunctionAllocas.begin(), currentFunctionAllocas.end());

  // 对收集到的所有 alloca 进行 DefsBlock 和 UsesBlock 分析
  for (AllocaInst* alloca : currentFunctionAllocas) {
    allocaAnalysis(alloca);
  }

  // 2. 预处理：删除无用的 AllocaInst (没有 Load 和 Store)
  // 迭代 currentFunctionAllocas，安全删除
  for (unsigned int i = 0; i < currentFunctionAllocas.size(); ) {
    AllocaInst* alloca = currentFunctionAllocas[i];
    
    bool hasRelevantUse = false;
    // 检查 alloca 的 uses 列表，看是否有 Load 或 Store
    // 只要有 Load/Store，就认为是"相关用途"，不删除
    for (auto use_ptr : alloca->getUses()) {
      Instruction* user_inst = dynamic_cast<Instruction*>(use_ptr->getUser());
      if (user_inst && (dynamic_cast<LoadInst*>(user_inst) || dynamic_cast<StoreInst*>(user_inst))) {
        hasRelevantUse = true;
        break;
      }
    }

    // 如果没有相关用途（没有 Load 和 Store），则 alloca 是死代码
    if (!hasRelevantUse && allocaDefsBlock[alloca].empty() && allocaUsesBlock[alloca].empty()) {
      if (alloca->getParent()) {
        // alloca->getParent()->delete_inst(alloca); // 从其所在块删除 alloca 指令
        auto tofind = std::find_if(alloca->getParent()->getInstructions().begin(), alloca->getParent()->getInstructions().end(),
                                   [alloca](const auto &instr) { return instr.get() == alloca; });
        SysYIROptUtils::usedelete(alloca);
        alloca->getParent()->getInstructions().erase(tofind);
      }
      currentFunctionAllocas.erase(currentFunctionAllocas.begin() + i); // 从列表中移除
      changed = true; // 发生了改变
    } else {
      i++; // 否则，移动到下一个 alloca
    }
  }
  
  // 如果没有可提升的 alloca 了，直接返回
  if (currentFunctionAllocas.empty()) {
    return changed;
  }

  // 3. 插入 Phi 指令
  insertPhiNodes(func);
  if (!allPhiInstructions.empty()) changed = true;

  // 4. 重命名变量，转换为 SSA 形式并填充 Phi 指令
  std::unordered_map<AllocaInst*, Value*> initialIncomings;
  std::unordered_set<BasicBlock*> visitedBlocks; // 用于 DFS 遍历，防止循环

  // 初始化 entry block 的 Incomings 状态
  for (AllocaInst* alloca : currentFunctionAllocas) {
    initialIncomings[alloca] = UndefinedValue::get(dynamic_cast<PointerType*>(alloca->getType())->getBaseType());
  }

  // 从入口块开始递归重命名
  renameBlock(entryBB, initialIncomings, visitedBlocks); 

  // 5. 简化 Phi 指令
  // 由于 renameBlock 可能会删除 Phi，这里复制一份列表以安全迭代
  std::vector<PhiInst*> phisToSimplify = allPhiInstructions; 
  for (PhiInst* phi : phisToSimplify) {
    // 检查 phi 是否还在 IR 中 (可能已被其他优化删除)
    // 一个简单检查是看它是否有父块
    if (phi->getParent()) {
      simplifyphi(phi);
      // simplifyphi 内部会删除 Phi，所以这里不需要再处理 allPhiInstructions
      // 最终的 allPhiInstructions 清理将在 promoteMemoryToRegisters 结束后进行
    }
  }
  
  // 清理所有 Phi 的列表和映射
  // 遍历 allPhiInstructions，删除那些在 simplifyphi 后可能仍然存在的、但已经没有 uses 的 Phi
  std::vector<PhiInst*> remainingPhis;
  for(PhiInst* phi : allPhiInstructions) {
    if(phi->getParent() && phi->getUses().empty()){ // 如果还在IR中但没有用处

      // phi->getParent()->delete_inst(phi);
      // 找到phi节点对应的迭代器
      auto tofind = std::find_if(phi->getParent()->getInstructions().begin(), phi->getParent()->getInstructions().end(),
                                   [phi](const auto &instr) { return instr.get() == phi; });
      SysYIROptUtils::usedelete(phi); // 使用 SysYIROptUtils 删除指令
      phi->getParent()->getInstructions().erase(tofind);
      
      changed = true;
    } else if (phi->getParent()) { // 仍在IR中且有uses
      remainingPhis.push_back(phi);
    }
  }
  allPhiInstructions = remainingPhis; // 更新为仅包含未被删除的 Phi

  // 重新清理 phiMap 中已经删除的 Phi 指令项
  for (auto& pairBBPhiMap : phiMap) {
    std::vector<PhiInst*> phisToRemoveFromMap;
    for (auto& pairPhiAlloca : pairBBPhiMap.second) {
      if (!pairPhiAlloca.first->getParent()) { // 如果 Phi 已经被删除
        phisToRemoveFromMap.push_back(pairPhiAlloca.first);
      }
    }
    for (PhiInst* phi : phisToRemoveFromMap) {
      pairBBPhiMap.second.erase(phi);
    }
  }


  return changed;
}

// --- run函数实现 ---
void Mem2Reg::run() {
  // 每次运行整个 Mem2Reg Pass 时，重新进行分析
  controlFlowAnalysis->clear();
  controlFlowAnalysis->runControlFlowAnalysis();
  activeVarAnalysis->clear();
  // 假设 dataFlowAnalysisUtils 可以管理和运行各个分析器
  dataFlowAnalysisUtils.addBackwardAnalyzer(activeVarAnalysis); 
  dataFlowAnalysisUtils.backwardAnalyze(pModule); // 运行活跃变量分析

  bool globalChanged = false;
  // 循环直到没有更多的 alloca 可以被提升
  // 每次 promoteMemoryToRegisters 会尝试在一个函数内完成所有 Mem2Reg 优化
  do {
    globalChanged = false;
    for (const auto& [_, func] : pModule->getFunctions()) {
      // 对每个函数执行 Mem2Reg
      if (promoteMemoryToRegisters(func.get())) {
        globalChanged = true;
        // 如果一个函数发生改变，可能影响其他函数或需要重新分析
        // 因此需要重新运行控制流和活跃变量分析，以备下一次循环
        controlFlowAnalysis->clear();
        controlFlowAnalysis->runControlFlowAnalysis();
        activeVarAnalysis->clear();
        dataFlowAnalysisUtils.backwardAnalyze(pModule); // 重新分析活跃变量
      }
    }
  } while (globalChanged); // 如果全局有任何函数发生改变，则继续迭代

  // 最终清理和重新分析
  controlFlowAnalysis->clear();
  controlFlowAnalysis->runControlFlowAnalysis();
  activeVarAnalysis->clear();
  dataFlowAnalysisUtils.backwardAnalyze(pModule);
}

} // namespace sysy