Merge branch 'midend' into midend-LoopAnalysis

This commit is contained in:
rain2133
2025-08-11 21:20:34 +08:00
41 changed files with 5604 additions and 2382 deletions

View File

@@ -18,6 +18,8 @@ add_library(midend_lib STATIC
Pass/Optimize/SysYIRCFGOpt.cpp
Pass/Optimize/SCCP.cpp
Pass/Optimize/LoopNormalization.cpp
Pass/Optimize/BuildCFG.cpp
Pass/Optimize/LargeArrayToGlobal.cpp
)
# 包含中端模块所需的头文件路径

File diff suppressed because it is too large Load Diff

View File

@@ -1,21 +1,30 @@
#include "Dom.h"
#include <algorithm> // for std::set_intersection, std::set_difference, std::set_union
#include <algorithm> // for std::set_intersection, std::reverse
#include <iostream> // for debug output
#include <limits> // for std::numeric_limits
#include <queue>
#include <functional> // for std::function
#include <map>
#include <vector>
#include <set>
namespace sysy {
// 初始化 支配树静态 ID
// ==============================================================
// DominatorTreeAnalysisPass 的静态ID
// ==============================================================
void *DominatorTreeAnalysisPass::ID = (void *)&DominatorTreeAnalysisPass::ID;
// ==============================================================
// DominatorTree 结果类的实现
// ==============================================================
// 构造函数:初始化关联函数,但不进行计算
DominatorTree::DominatorTree(Function *F) : AssociatedFunction(F) {
// 构造时可以不计算,在分析遍运行里计算并填充
// 构造时不需要计算,在分析遍运行里计算并填充
}
// Getter 方法 (保持不变)
const std::set<BasicBlock *> *DominatorTree::getDominators(BasicBlock *BB) const {
auto it = Dominators.find(BB);
if (it != Dominators.end()) {
@@ -48,7 +57,7 @@ const std::set<BasicBlock *> *DominatorTree::getDominatorTreeChildren(BasicBlock
return nullptr;
}
// 辅助函数:打印 BasicBlock 集合
// 辅助函数:打印 BasicBlock 集合 (保持不变)
void printBBSet(const std::string &prefix, const std::set<BasicBlock *> &s) {
if (!DEBUG)
return;
@@ -63,24 +72,52 @@ void printBBSet(const std::string &prefix, const std::set<BasicBlock *> &s) {
std::cout << "}" << std::endl;
}
// 辅助函数:计算逆后序遍历 (RPO) - 保持不变
std::vector<BasicBlock*> DominatorTree::computeReversePostOrder(Function* F) {
std::vector<BasicBlock*> postOrder;
std::set<BasicBlock*> visited;
std::function<void(BasicBlock*)> dfs_rpo =
[&](BasicBlock* bb) {
visited.insert(bb);
for (BasicBlock* succ : bb->getSuccessors()) {
if (visited.find(succ) == visited.end()) {
dfs_rpo(succ);
}
}
postOrder.push_back(bb);
};
dfs_rpo(F->getEntryBlock());
std::reverse(postOrder.begin(), postOrder.end());
if (DEBUG) {
std::cout << "--- Computed RPO: ";
for (BasicBlock* bb : postOrder) {
std::cout << bb->getName() << " ";
}
std::cout << "---" << std::endl;
}
return postOrder;
}
// computeDominators 方法 (保持不变因为它它是独立于IDom算法的)
void DominatorTree::computeDominators(Function *F) {
if (DEBUG)
std::cout << "--- Computing Dominators ---" << std::endl;
BasicBlock *entryBlock = F->getEntryBlock();
std::vector<BasicBlock *> bbs_in_order; // 用于确定遍历顺序,如果需要的话
std::vector<BasicBlock*> bbs_rpo = computeReversePostOrder(F);
// 初始化:入口块只被自己支配,其他块被所有块支配
for (const auto &bb_ptr : F->getBasicBlocks()) {
BasicBlock *bb = bb_ptr.get();
bbs_in_order.push_back(bb); // 收集所有块
for (BasicBlock *bb : bbs_rpo) {
if (bb == entryBlock) {
Dominators[bb].clear();
Dominators[bb].insert(bb);
if (DEBUG)
std::cout << "Init Dominators[" << bb->getName() << "]: {" << bb->getName() << "}" << std::endl;
if (DEBUG) std::cout << "Init Dominators[" << bb->getName() << "]: {" << bb->getName() << "}" << std::endl;
} else {
for (const auto &all_bb_ptr : F->getBasicBlocks()) {
Dominators[bb].insert(all_bb_ptr.get());
Dominators[bb].clear();
for (BasicBlock *all_bb : bbs_rpo) {
Dominators[bb].insert(all_bb);
}
if (DEBUG) {
std::cout << "Init Dominators[" << bb->getName() << "]: ";
@@ -94,35 +131,29 @@ void DominatorTree::computeDominators(Function *F) {
while (changed) {
changed = false;
iteration++;
if (DEBUG)
std::cout << "Iteration " << iteration << std::endl;
if (DEBUG) std::cout << "Iteration " << iteration << std::endl;
// 确保遍历顺序一致性例如可以按照DFS或BFS顺序或者简单的迭代器顺序
// 如果Function::getBasicBlocks()返回的迭代器顺序稳定则无需bbs_in_order
for (const auto &bb_ptr : F->getBasicBlocks()) { // 假设这个迭代器顺序稳定
BasicBlock *bb = bb_ptr.get();
if (bb == entryBlock)
continue;
for (BasicBlock *bb : bbs_rpo) {
if (bb == entryBlock) continue;
// 计算所有前驱的支配者集合的交集
std::set<BasicBlock *> newDom;
bool firstPredProcessed = false;
for (BasicBlock *pred : bb->getPredecessors()) {
// 确保前驱的支配者集合已经计算过
if (Dominators.count(pred)) {
if (!firstPredProcessed) {
newDom = Dominators[pred];
firstPredProcessed = true;
} else {
std::set<BasicBlock *> intersection;
std::set_intersection(newDom.begin(), newDom.end(), Dominators[pred].begin(), Dominators[pred].end(),
std::inserter(intersection, intersection.begin()));
newDom = intersection;
}
if(DEBUG){
std::cout << " Processing predecessor: " << pred->getName() << std::endl;
}
if (!firstPredProcessed) {
newDom = Dominators[pred];
firstPredProcessed = true;
} else {
std::set<BasicBlock *> intersection;
std::set_intersection(newDom.begin(), newDom.end(), Dominators[pred].begin(), Dominators[pred].end(),
std::inserter(intersection, intersection.begin()));
newDom = intersection;
}
}
newDom.insert(bb); // BB 永远支配自己
newDom.insert(bb);
if (newDom != Dominators[bb]) {
if (DEBUG) {
@@ -140,78 +171,242 @@ void DominatorTree::computeDominators(Function *F) {
std::cout << "--- Dominators Computation Finished ---" << std::endl;
}
void DominatorTree::computeIDoms(Function *F) {
if (DEBUG)
std::cout << "--- Computing Immediate Dominators (IDoms) ---" << std::endl;
// ==============================================================
// Lengauer-Tarjan 算法辅助数据结构和函数 (私有成员)
// ==============================================================
BasicBlock *entryBlock = F->getEntryBlock();
IDoms[entryBlock] = nullptr; // 入口块没有即时支配者
// 遍历所有非入口块
for (const auto &bb_ptr : F->getBasicBlocks()) {
BasicBlock *bb = bb_ptr.get();
if (bb == entryBlock)
continue;
BasicBlock *currentIDom = nullptr;
const std::set<BasicBlock *> *domsOfBB = getDominators(bb);
if (!domsOfBB) {
if (DEBUG)
std::cerr << "Warning: Dominators for " << bb->getName() << " not found!" << std::endl;
continue;
// DFS 遍历,填充 dfnum_map, vertex_vec, parent_map
// 对应用户代码的 dfs 函数
void DominatorTree::dfs_lt_helper(BasicBlock* u) {
dfnum_map[u] = df_counter;
if (df_counter >= vertex_vec.size()) { // 动态调整大小
vertex_vec.resize(df_counter + 1);
}
vertex_vec[df_counter] = u;
if (DEBUG) std::cout << " DFS: Visiting " << u->getName() << ", dfnum = " << df_counter << std::endl;
df_counter++;
// 遍历bb的所有严格支配者 D (即 bb 的支配者中除了 bb 自身)
for (BasicBlock *D_candidate : *domsOfBB) {
if (D_candidate == bb)
continue; // 跳过bb自身
bool D_candidate_is_IDom = true;
// 检查是否存在另一个块 X使得 D_candidate 严格支配 X 且 X 严格支配 bb
// 或者更直接的,检查 D_candidate 是否被 bb 的所有其他严格支配者所支配
for (BasicBlock *X_other_dom : *domsOfBB) {
if (X_other_dom == bb || X_other_dom == D_candidate)
continue; // 跳过bb自身和D_candidate
// 如果 X_other_dom 严格支配 bb (它在 domsOfBB 中且不是bb自身)
// 并且 X_other_dom 不被 D_candidate 支配,那么 D_candidate 就不是 IDom
const std::set<BasicBlock *> *domsOfX_other_dom = getDominators(X_other_dom);
if (domsOfX_other_dom && domsOfX_other_dom->count(D_candidate)) { // X_other_dom 支配 D_candidate
// D_candidate 被另一个支配者 X_other_dom 支配
// 这说明 D_candidate 位于 X_other_dom 的“下方”X_other_dom 更接近 bb
// 因此 D_candidate 不是 IDom
D_candidate_is_IDom = false;
break;
for (BasicBlock* v : u->getSuccessors()) {
if (dfnum_map.find(v) == dfnum_map.end()) { // 如果 v 未访问过
parent_map[v] = u;
if (DEBUG) std::cout << " DFS: Setting parent[" << v->getName() << "] = " << u->getName() << std::endl;
dfs_lt_helper(v);
}
}
if (D_candidate_is_IDom) {
currentIDom = D_candidate;
break; // 找到即时支配者,可以退出循环,因为它是唯一的
}
}
IDoms[bb] = currentIDom;
if (DEBUG) {
std::cout << " IDom[" << bb->getName() << "] = " << (currentIDom ? currentIDom->getName() : "nullptr")
<< std::endl;
}
}
if (DEBUG)
std::cout << "--- Immediate Dominators Computation Finished ---" << std::endl;
}
/*
for each node n in a postorder traversal of the dominator tree:
df[n] = empty set
// compute DF_local(n)
for each child y of n in the CFG:
if idom[y] != n:
df[n] = df[n] U {y}
// compute DF_up(n)
for each child c of n in the dominator tree:
for each element w in df[c]:
if idom[w] != n:
df[n] = df[n] U {w}
*/
// 并查集:找到集合的代表,并进行路径压缩
// 同时更新 label确保 label[i] 总是指向其祖先链中 sdom_map 最小的节点
// 对应用户代码的 find 函数,也包含了 eval 的逻辑
BasicBlock* DominatorTree::evalAndCompress_lt_helper(BasicBlock* i) {
if (DEBUG) std::cout << " Eval: Processing " << i->getName() << std::endl;
// 如果 i 是根 (ancestor_map[i] == nullptr)
if (ancestor_map.find(i) == ancestor_map.end() || ancestor_map[i] == nullptr) {
if (DEBUG) std::cout << " Eval: " << i->getName() << " is root, returning itself." << std::endl;
return i; // 根节点自身就是路径上sdom最小的因为它没有祖先
}
// 如果 i 的祖先不是根,则递归查找并进行路径压缩
BasicBlock* root_ancestor = evalAndCompress_lt_helper(ancestor_map[i]);
// 路径压缩时,根据 sdom_map 比较并更新 label_map
// 确保 label_map[i] 存储的是 i 到 root_ancestor 路径上 sdom_map 最小的节点
// 注意:这里的 ancestor_map[i] 已经被递归调用压缩过一次了所以是root_ancestor的旧路径
// 应该比较的是 label_map[ancestor_map[i]] 和 label_map[i]
if (sdom_map.count(label_map[ancestor_map[i]]) && // 确保 label_map[ancestor_map[i]] 存在 sdom
sdom_map.count(label_map[i]) && // 确保 label_map[i] 存在 sdom
dfnum_map[sdom_map[label_map[ancestor_map[i]]]] < dfnum_map[sdom_map[label_map[i]]]) {
if (DEBUG) std::cout << " Eval: Updating label for " << i->getName() << " from "
<< label_map[i]->getName() << " to " << label_map[ancestor_map[i]]->getName() << std::endl;
label_map[i] = label_map[ancestor_map[i]];
}
ancestor_map[i] = root_ancestor; // 执行路径压缩:将 i 直接指向其所属集合的根
if (DEBUG) std::cout << " Eval: Path compression for " << i->getName() << ", new ancestor = "
<< (root_ancestor ? root_ancestor->getName() : "nullptr") << std::endl;
return label_map[i]; // <-- **将这里改为返回 label_map[i]**
}
// Link 函数:将 v 加入 u 的 DFS 树子树中 (实际上是并查集操作)
// 对应用户代码的 fa[u] = fth[u];
void DominatorTree::link_lt_helper(BasicBlock* u_parent, BasicBlock* v_child) {
ancestor_map[v_child] = u_parent; // 设置并查集父节点
label_map[v_child] = v_child; // 初始化 label 为自身
if (DEBUG) std::cout << " Link: " << v_child->getName() << " linked to " << u_parent->getName() << std::endl;
}
// ==============================================================
// Lengauer-Tarjan 算法实现 computeIDoms
// ==============================================================
void DominatorTree::computeIDoms(Function *F) {
if (DEBUG) std::cout << "--- Computing Immediate Dominators (IDoms) using Lengauer-Tarjan ---" << std::endl;
BasicBlock *entryBlock = F->getEntryBlock();
// 1. 初始化所有 LT 相关的数据结构
dfnum_map.clear();
vertex_vec.clear();
parent_map.clear();
sdom_map.clear();
idom_map.clear();
bucket_map.clear();
ancestor_map.clear();
label_map.clear();
df_counter = 0; // DFS 计数器从 0 开始
// 预分配 vertex_vec 的大小避免频繁resize
vertex_vec.resize(F->getBasicBlocks().size() + 1);
// 在 DFS 遍历之前,先为所有基本块初始化 sdom 和 label
// 这是 Lengauer-Tarjan 算法的要求,确保所有节点在 Phase 2 开始前都在 map 中
for (auto &bb_ptr : F->getBasicBlocks()) {
BasicBlock* bb = bb_ptr.get();
sdom_map[bb] = bb; // sdom(bb) 初始化为 bb 自身
label_map[bb] = bb; // label(bb) 初始化为 bb 自身 (用于 Union-Find 的路径压缩)
}
// 确保入口块也被正确初始化(如果它不在 F->getBasicBlocks() 的正常迭代中)
sdom_map[entryBlock] = entryBlock;
label_map[entryBlock] = entryBlock;
// Phase 1: DFS 遍历并预处理
// 对应用户代码的 dfs(st)
dfs_lt_helper(entryBlock);
idom_map[entryBlock] = nullptr; // 入口块没有即时支配者
if (DEBUG) std::cout << " IDom[" << entryBlock->getName() << "] = nullptr" << std::endl;
if (DEBUG) std::cout << " Sdom[" << entryBlock->getName() << "] = " << entryBlock->getName() << std::endl;
// 初始化并查集的祖先和 label
for (auto const& [bb_key, dfn_val] : dfnum_map) {
ancestor_map[bb_key] = nullptr; // 初始为独立集合的根
label_map[bb_key] = bb_key; // 初始 label 为自身
}
if (DEBUG) {
std::cout << " --- DFS Phase Complete ---" << std::endl;
std::cout << " dfnum_map:" << std::endl;
for (auto const& [bb, dfn] : dfnum_map) {
std::cout << " " << bb->getName() << " -> " << dfn << std::endl;
}
std::cout << " vertex_vec (by dfnum):" << std::endl;
for (size_t k = 0; k < df_counter; ++k) {
if (vertex_vec[k]) std::cout << " [" << k << "] -> " << vertex_vec[k]->getName() << std::endl;
}
std::cout << " parent_map:" << std::endl;
for (auto const& [child, parent] : parent_map) {
std::cout << " " << child->getName() << " -> " << (parent ? parent->getName() : "nullptr") << std::endl;
}
std::cout << " ------------------------" << std::endl;
}
// Phase 2: 计算半支配者 (sdom)
// 对应用户代码的 for (int i = dfc; i >= 2; --i) 循环的上半部分
// 按照 DFS 编号递减的顺序遍历所有节点 (除了 entryBlock它的 DFS 编号是 0)
if (DEBUG) std::cout << "--- Phase 2: Computing Semi-Dominators (sdom) ---" << std::endl;
for (int i = df_counter - 1; i >= 1; --i) { // 从 DFS 编号最大的节点开始,到 1
BasicBlock* w = vertex_vec[i]; // 当前处理的节点
if (DEBUG) std::cout << " Processing node w: " << w->getName() << " (dfnum=" << i << ")" << std::endl;
// 对于 w 的每个前驱 v
for (BasicBlock* v : w->getPredecessors()) {
if (DEBUG) std::cout << " Considering predecessor v: " << v->getName() << std::endl;
// 如果前驱 v 未被 DFS 访问过 (即不在 dfnum_map 中),则跳过
if (dfnum_map.find(v) == dfnum_map.end()) {
if (DEBUG) std::cout << " Predecessor " << v->getName() << " not in DFS tree, skipping." << std::endl;
continue;
}
// 调用 evalAndCompress 来找到 v 在其 DFS 树祖先链上具有最小 sdom 的节点
BasicBlock* u_with_min_sdom_on_path = evalAndCompress_lt_helper(v);
if (DEBUG) std::cout << " Eval(" << v->getName() << ") returned "
<< u_with_min_sdom_on_path->getName() << std::endl;
if (DEBUG && sdom_map.count(u_with_min_sdom_on_path) && sdom_map.count(w)) {
std::cout << " Comparing sdom: dfnum[" << sdom_map[u_with_min_sdom_on_path]->getName() << "] (" << dfnum_map[sdom_map[u_with_min_sdom_on_path]]
<< ") vs dfnum[" << sdom_map[w]->getName() << "] (" << dfnum_map[sdom_map[w]] << ")" << std::endl;
}
// 比较 sdom(u) 和 sdom(w)
if (sdom_map.count(u_with_min_sdom_on_path) && sdom_map.count(w) &&
dfnum_map[sdom_map[u_with_min_sdom_on_path]] < dfnum_map[sdom_map[w]]) {
if (DEBUG) std::cout << " Updating sdom[" << w->getName() << "] from "
<< sdom_map[w]->getName() << " to "
<< sdom_map[u_with_min_sdom_on_path]->getName() << std::endl;
sdom_map[w] = sdom_map[u_with_min_sdom_on_path]; // 更新 sdom(w)
if (DEBUG) std::cout << " Sdom update applied. New sdom[" << w->getName() << "] = " << sdom_map[w]->getName() << std::endl;
}
}
// 将 w 加入 sdom(w) 对应的桶中
bucket_map[sdom_map[w]].push_back(w);
if (DEBUG) std::cout << " Adding " << w->getName() << " to bucket of sdom(" << w->getName() << "): "
<< sdom_map[w]->getName() << std::endl;
// 将 w 的父节点加入并查集 (link 操作)
if (parent_map.count(w) && parent_map[w] != nullptr) {
link_lt_helper(parent_map[w], w);
}
// Phase 3-part 1: 处理 parent[w] 的桶中所有节点,确定部分 idom
if (parent_map.count(w) && parent_map[w] != nullptr) {
BasicBlock* p = parent_map[w]; // p 是 w 的父节点
if (DEBUG) std::cout << " Processing bucket for parent " << p->getName() << std::endl;
// 注意这里需要复制桶的内容因为原始桶在循环中会被clear
std::vector<BasicBlock*> nodes_in_p_bucket_copy = bucket_map[p];
for (BasicBlock* y : nodes_in_p_bucket_copy) {
if (DEBUG) std::cout << " Processing node y from bucket: " << y->getName() << std::endl;
// 找到 y 在其 DFS 树祖先链上具有最小 sdom 的节点
BasicBlock* u = evalAndCompress_lt_helper(y);
if (DEBUG) std::cout << " Eval(" << y->getName() << ") returned " << u->getName() << std::endl;
// 确定 idom(y)
// if sdom(eval(y)) == sdom(parent(w)), then idom(y) = parent(w)
// else idom(y) = eval(y)
if (sdom_map.count(u) && sdom_map.count(p) &&
dfnum_map[sdom_map[u]] < dfnum_map[sdom_map[p]]) {
idom_map[y] = u; // 确定的 idom
if (DEBUG) std::cout << " IDom[" << y->getName() << "] set to " << u->getName() << std::endl;
} else {
idom_map[y] = p; // p 是 y 的 idom
if (DEBUG) std::cout << " IDom[" << y->getName() << "] set to " << p->getName() << std::endl;
}
}
bucket_map[p].clear(); // 清空桶,防止重复处理
if (DEBUG) std::cout << " Cleared bucket for parent " << p->getName() << std::endl;
}
}
// Phase 3-part 2: 最终确定 idom (处理那些 idom != sdom 的节点)
if (DEBUG) std::cout << "--- Phase 3: Finalizing Immediate Dominators (idom) ---" << std::endl;
for (int i = 1; i < df_counter; ++i) { // 从 DFS 编号最小的节点 (除了 entryBlock) 开始
BasicBlock* w = vertex_vec[i];
if (DEBUG) std::cout << " Finalizing node w: " << w->getName() << std::endl;
if (idom_map.count(w) && sdom_map.count(w) && idom_map[w] != sdom_map[w]) {
// idom[w] 的 idom 是其真正的 idom
if (DEBUG) std::cout << " idom[" << w->getName() << "] (" << idom_map[w]->getName()
<< ") != sdom[" << w->getName() << "] (" << sdom_map[w]->getName() << ")" << std::endl;
if (idom_map.count(idom_map[w])) {
idom_map[w] = idom_map[idom_map[w]];
if (DEBUG) std::cout << " Updating idom[" << w->getName() << "] to idom(idom(w)): "
<< idom_map[w]->getName() << std::endl;
} else {
if (DEBUG) std::cout << " Warning: idom(idom(" << w->getName() << ")) not found, leaving idom[" << w->getName() << "] as is." << std::endl;
}
}
if (DEBUG) {
std::cout << " Final IDom[" << w->getName() << "] = " << (idom_map[w] ? idom_map[w]->getName() : "nullptr") << std::endl;
}
}
// 将计算结果从 idom_map 存储到 DominatorTree 的成员变量 IDoms 中
IDoms = idom_map;
if (DEBUG) std::cout << "--- Immediate Dominators Computation Finished ---" << std::endl;
}
// ==============================================================
// computeDominanceFrontiers 和 computeDominatorTreeChildren (保持不变)
// ==============================================================
void DominatorTree::computeDominanceFrontiers(Function *F) {
if (DEBUG)
@@ -221,21 +416,17 @@ void DominatorTree::computeDominanceFrontiers(Function *F) {
BasicBlock *X = bb_ptr_X.get();
DominanceFrontiers[X].clear();
// 遍历所有可能的 Z (X支配Z或者Z就是X)
for (const auto &bb_ptr_Z : F->getBasicBlocks()) {
BasicBlock *Z = bb_ptr_Z.get();
const std::set<BasicBlock *> *domsOfZ = getDominators(Z);
// 如果 X 不支配 Z则 Z 与 DF(X) 无关
if (!domsOfZ || domsOfZ->find(X) == domsOfZ->end()) {
if (!domsOfZ || domsOfZ->find(X) == domsOfZ->end()) { // Z 不被 X 支配
continue;
}
// 遍历 Z 的所有后继 Y
for (BasicBlock *Y : Z->getSuccessors()) {
// 如果 Y 不被 X 严格支配,则 Y 在 DF(X) 中
// Y 不被 X 严格支配意味着 (Y不被X支配) 或 (Y就是X)
const std::set<BasicBlock *> *domsOfY = getDominators(Y);
// 如果 Y == X或者 Y 不被 X 严格支配 (即 Y 不被 X 支配)
if (Y == X || (domsOfY && domsOfY->find(X) == domsOfY->end())) {
DominanceFrontiers[X].insert(Y);
}
@@ -274,23 +465,21 @@ void DominatorTree::computeDominatorTreeChildren(Function *F) {
}
// ==============================================================
// DominatorTreeAnalysisPass 的实现
// DominatorTreeAnalysisPass 的实现 (保持不变)
// ==============================================================
bool DominatorTreeAnalysisPass::runOnFunction(Function *F, AnalysisManager &AM) {
// 每次运行时清空旧数据,确保重新计算
CurrentDominatorTree = std::make_unique<DominatorTree>(F);
// 不需要手动清空mapunique_ptr会创建新的DominatorTree对象其map是空的
CurrentDominatorTree->computeDominators(F);
CurrentDominatorTree->computeIDoms(F); // 修正后的IDoms算法
CurrentDominatorTree->computeIDoms(F); // 修正后的LT算法
CurrentDominatorTree->computeDominanceFrontiers(F);
CurrentDominatorTree->computeDominatorTreeChildren(F);
return false; // 分析遍通常返回 false表示不修改 IR
return false;
}
std::unique_ptr<AnalysisResultBase> DominatorTreeAnalysisPass::getResult() {
// 返回计算好的 DominatorTree 实例,所有权转移给 AnalysisManager
return std::move(CurrentDominatorTree);
}

View File

@@ -0,0 +1,79 @@
#include "BuildCFG.h"
#include "Dom.h"
#include "Liveness.h"
#include <iostream>
#include <queue>
#include <set>
namespace sysy {
void *BuildCFG::ID = (void *)&BuildCFG::ID; // 定义唯一的 Pass ID
// 声明Pass的分析使用
void BuildCFG::getAnalysisUsage(std::set<void *> &analysisDependencies, std::set<void *> &analysisInvalidations) const {
// BuildCFG不依赖其他分析
// analysisDependencies.insert(&DominatorTreeAnalysisPass::ID); // 错误的例子
// BuildCFG会使所有依赖于CFG的分析结果失效所以它必须声明这些失效
analysisInvalidations.insert(&DominatorTreeAnalysisPass::ID);
analysisInvalidations.insert(&LivenessAnalysisPass::ID);
}
bool BuildCFG::runOnFunction(Function *F, AnalysisManager &AM) {
if (DEBUG) {
std::cout << "Running BuildCFG pass on function: " << F->getName() << std::endl;
}
bool changed = false;
// 1. 清空所有基本块的前驱和后继列表
for (auto &bb : F->getBasicBlocks()) {
bb->clearPredecessors();
bb->clearSuccessors();
}
// 2. 遍历每个基本块重建CFG
for (auto &bb : F->getBasicBlocks()) {
// 获取基本块的最后一条指令
auto &inst = *bb->terminator();
Instruction *termInst = inst.get();
// 确保基本块有终结指令
if (!termInst) {
continue;
}
// 根据终结指令类型,建立前驱后继关系
if (termInst->isBranch()) {
// 无条件跳转
if (termInst->isUnconditional()) {
auto brInst = dynamic_cast<UncondBrInst *>(termInst);
BasicBlock *succ = dynamic_cast<BasicBlock *>(brInst->getBlock());
assert(succ && "Branch instruction's target must be a BasicBlock");
bb->addSuccessor(succ);
succ->addPredecessor(bb.get());
changed = true;
// 条件跳转
} else if (termInst->isConditional()) {
auto brInst = dynamic_cast<CondBrInst *>(termInst);
BasicBlock *trueSucc = dynamic_cast<BasicBlock *>(brInst->getThenBlock());
BasicBlock *falseSucc = dynamic_cast<BasicBlock *>(brInst->getElseBlock());
assert(trueSucc && falseSucc && "Branch instruction's targets must be BasicBlocks");
bb->addSuccessor(trueSucc);
trueSucc->addPredecessor(bb.get());
bb->addSuccessor(falseSucc);
falseSucc->addPredecessor(bb.get());
changed = true;
}
} else if (auto retInst = dynamic_cast<ReturnInst *>(termInst)) {
// RetInst没有后继无需处理
// ...
}
}
return changed;
}
} // namespace sysy

View File

@@ -0,0 +1,145 @@
#include "../../include/midend/Pass/Optimize/LargeArrayToGlobal.h"
#include "../../IR.h"
#include <unordered_map>
#include <sstream>
#include <string>
namespace sysy {
// Helper function to convert type to string
static std::string typeToString(Type *type) {
if (!type) return "null";
switch (type->getKind()) {
case Type::kInt:
return "int";
case Type::kFloat:
return "float";
case Type::kPointer:
return "ptr";
case Type::kArray: {
auto *arrayType = type->as<ArrayType>();
return "[" + std::to_string(arrayType->getNumElements()) + " x " +
typeToString(arrayType->getElementType()) + "]";
}
default:
return "unknown";
}
}
void *LargeArrayToGlobalPass::ID = &LargeArrayToGlobalPass::ID;
bool LargeArrayToGlobalPass::runOnModule(Module *M, AnalysisManager &AM) {
bool changed = false;
if (!M) {
return false;
}
// Collect all alloca instructions from all functions
std::vector<std::pair<AllocaInst*, Function*>> allocasToConvert;
for (auto &funcPair : M->getFunctions()) {
Function *F = funcPair.second.get();
if (!F || F->getBasicBlocks().begin() == F->getBasicBlocks().end()) {
continue;
}
for (auto &BB : F->getBasicBlocks()) {
for (auto &inst : BB->getInstructions()) {
if (auto *alloca = dynamic_cast<AllocaInst*>(inst.get())) {
Type *allocatedType = alloca->getAllocatedType();
// Calculate the size of the allocated type
unsigned size = calculateTypeSize(allocatedType);
if(DEBUG){
// Debug: print size information
std::cout << "LargeArrayToGlobalPass: Found alloca with size " << size
<< " for type " << typeToString(allocatedType) << std::endl;
}
// Convert arrays of 1KB (1024 bytes) or larger to global variables
if (size >= 1024) {
if(DEBUG)
std::cout << "LargeArrayToGlobalPass: Converting array of size " << size << " to global" << std::endl;
allocasToConvert.emplace_back(alloca, F);
}
}
}
}
}
// Convert the collected alloca instructions to global variables
for (auto [alloca, F] : allocasToConvert) {
convertAllocaToGlobal(alloca, F, M);
changed = true;
}
return changed;
}
unsigned LargeArrayToGlobalPass::calculateTypeSize(Type *type) {
if (!type) return 0;
switch (type->getKind()) {
case Type::kInt:
case Type::kFloat:
return 4;
case Type::kPointer:
return 8;
case Type::kArray: {
auto *arrayType = type->as<ArrayType>();
return arrayType->getNumElements() * calculateTypeSize(arrayType->getElementType());
}
default:
return 0;
}
}
void LargeArrayToGlobalPass::convertAllocaToGlobal(AllocaInst *alloca, Function *F, Module *M) {
Type *allocatedType = alloca->getAllocatedType();
// Create a unique name for the global variable
std::string globalName = generateUniqueGlobalName(alloca, F);
// Create the global variable - GlobalValue expects pointer type
Type *pointerType = Type::getPointerType(allocatedType);
GlobalValue *globalVar = M->createGlobalValue(globalName, pointerType);
if (!globalVar) {
return;
}
// Replace all uses of the alloca with the global variable
alloca->replaceAllUsesWith(globalVar);
// Remove the alloca instruction from its basic block
for (auto &BB : F->getBasicBlocks()) {
auto &instructions = BB->getInstructions();
for (auto it = instructions.begin(); it != instructions.end(); ++it) {
if (it->get() == alloca) {
instructions.erase(it);
break;
}
}
}
}
std::string LargeArrayToGlobalPass::generateUniqueGlobalName(AllocaInst *alloca, Function *F) {
std::string baseName = alloca->getName();
if (baseName.empty()) {
baseName = "array";
}
// Ensure uniqueness by appending function name and counter
static std::unordered_map<std::string, int> nameCounter;
std::string key = F->getName() + "." + baseName;
int counter = nameCounter[key]++;
std::ostringstream oss;
oss << key << "." << counter;
return oss.str();
}
} // namespace sysy

View File

@@ -148,8 +148,8 @@ void Reg2MemContext::rewritePhis(Function *func) {
// 1. 为 Phi 指令的每个入边,在前驱块的末尾插入 Store 指令
// PhiInst 假设有 getIncomingValues() 和 getIncomingBlocks()
for (unsigned i = 0; i < phiInst->getNumIncomingValues(); ++i) { // 假设 PhiInst 是通过操作数来管理入边的
Value *incomingValue = phiInst->getValue(i); // 获取入值
BasicBlock *incomingBlock = phiInst->getBlock(i); // 获取对应的入块
Value *incomingValue = phiInst->getIncomingValue(i); // 获取入值
BasicBlock *incomingBlock = phiInst->getIncomingBlock(i); // 获取对应的入块
// 在入块的跳转指令之前插入 StoreInst
// 需要找到 incomingBlock 的终结指令 (Terminator Instruction)

View File

@@ -468,6 +468,22 @@ void SCCPContext::ProcessInstruction(Instruction *inst) {
return; // 不处理不可达块中的指令的实际值
}
if(DEBUG) {
std::cout << "Processing instruction: " << inst->getName() << " in block " << inst->getParent()->getName() << std::endl;
std::cout << "Old state: ";
if (oldState.state == LatticeVal::Top) {
std::cout << "Top";
} else if (oldState.state == LatticeVal::Constant) {
if (oldState.constant_type == ValueType::Integer) {
std::cout << "Const<int>(" << std::get<int>(oldState.constantVal) << ")";
} else {
std::cout << "Const<float>(" << std::get<float>(oldState.constantVal) << ")";
}
} else {
std::cout << "Bottom";
}
}
switch (inst->getKind()) {
case Instruction::kAdd:
case Instruction::kSub:
@@ -815,19 +831,71 @@ void SCCPContext::ProcessInstruction(Instruction *inst) {
}
case Instruction::kPhi: {
PhiInst *phi = static_cast<PhiInst *>(inst);
if(DEBUG) {
std::cout << "Processing Phi node: " << phi->getName() << std::endl;
}
// 标准SCCP的phi节点处理
// 只考虑可执行前驱,但要保证单调性
SSAPValue currentPhiState = GetValueState(phi);
SSAPValue phiResult = SSAPValue(); // 初始为 Top
bool hasAnyExecutablePred = false;
for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) {
Value *incomingVal = phi->getIncomingValue(i);
BasicBlock *incomingBlock = phi->getIncomingBlock(i);
if (executableBlocks.count(incomingBlock)) { // 仅考虑可执行前驱
phiResult = Meet(phiResult, GetValueState(incomingVal));
if (phiResult.state == LatticeVal::Bottom)
break; // 如果已经 Bottom则提前退出
if (executableBlocks.count(incomingBlock)) {
hasAnyExecutablePred = true;
Value *incomingVal = phi->getIncomingValue(i);
SSAPValue incomingState = GetValueState(incomingVal);
if(DEBUG) {
std::cout << " Incoming from block " << incomingBlock->getName()
<< " with value " << incomingVal->getName() << " state: ";
if (incomingState.state == LatticeVal::Top)
std::cout << "Top";
else if (incomingState.state == LatticeVal::Constant) {
if (incomingState.constant_type == ValueType::Integer)
std::cout << "Const<int>(" << std::get<int>(incomingState.constantVal) << ")";
else
std::cout << "Const<float>(" << std::get<float>(incomingState.constantVal) << ")";
} else
std::cout << "Bottom";
std::cout << std::endl;
}
phiResult = Meet(phiResult, incomingState);
if (phiResult.state == LatticeVal::Bottom) {
break; // 提前退出优化
}
}
// 不可执行前驱暂时被忽略
// 这是标准SCCP的做法依赖于单调性保证正确性
}
if (!hasAnyExecutablePred) {
// 没有可执行前驱保持Top状态
newState = SSAPValue();
} else {
// 关键修复:使用严格的单调性
// 确保phi的值只能从Top -> Constant -> Bottom单向变化
if (currentPhiState.state == LatticeVal::Top) {
// 从Top状态可以变为任何计算结果
newState = phiResult;
} else if (currentPhiState.state == LatticeVal::Constant) {
// 从Constant状态只能保持相同常量或变为Bottom
if (phiResult.state == LatticeVal::Constant &&
currentPhiState.constantVal == phiResult.constantVal &&
currentPhiState.constant_type == phiResult.constant_type) {
// 保持相同的常量
newState = currentPhiState;
} else {
// 不同的值必须变为Bottom
newState = SSAPValue(LatticeVal::Bottom);
}
} else {
// 已经是Bottom保持Bottom
newState = currentPhiState;
}
}
newState = phiResult;
break;
}
case Instruction::kAlloca: // 对应 kAlloca
@@ -884,6 +952,22 @@ void SCCPContext::ProcessInstruction(Instruction *inst) {
}
}
}
if (DEBUG) {
std::cout << "New state: ";
if (newState.state == LatticeVal::Top) {
std::cout << "Top";
} else if (newState.state == LatticeVal::Constant) {
if (newState.constant_type == ValueType::Integer) {
std::cout << "Const<int>(" << std::get<int>(newState.constantVal) << ")";
} else {
std::cout << "Const<float>(" << std::get<float>(newState.constantVal) << ")";
}
} else {
std::cout << "Bottom";
}
std::cout << std::endl;
}
}
// 辅助函数:处理单条控制流边
@@ -891,14 +975,22 @@ void SCCPContext::ProcessEdge(const std::pair<BasicBlock *, BasicBlock *> &edge)
BasicBlock *fromBB = edge.first;
BasicBlock *toBB = edge.second;
// 检查目标块是否已经可执行
bool wasAlreadyExecutable = executableBlocks.count(toBB) > 0;
// 标记目标块为可执行(如果还不是的话)
MarkBlockExecutable(toBB);
// 对于目标块中的所有 Phi 指令,重新评估其值,因为可能有新的前驱被激活
for (auto &inst_ptr : toBB->getInstructions()) {
if (dynamic_cast<PhiInst *>(inst_ptr.get())) {
instWorkList.push(inst_ptr.get());
// 如果目标块之前就已经可执行那么需要重新处理其中的phi节点
// 因为现在有新的前驱变为可执行phi节点的值可能需要更新
if (wasAlreadyExecutable) {
for (auto &inst_ptr : toBB->getInstructions()) {
if (dynamic_cast<PhiInst *>(inst_ptr.get())) {
instWorkList.push(inst_ptr.get());
}
}
}
// 如果目标块是新变为可执行的MarkBlockExecutable已经添加了所有指令
}
// 阶段1: 常量传播与折叠
@@ -913,18 +1005,29 @@ bool SCCPContext::PropagateConstants(Function *func) {
}
}
// 初始化函数参数为Bottom因为它们在编译时是未知的
for (auto arg : func->getArguments()) {
valueState[arg] = SSAPValue(LatticeVal::Bottom);
if (DEBUG) {
std::cout << "Initializing function argument " << arg->getName() << " to Bottom" << std::endl;
}
}
// 标记入口块为可执行
if (!func->getBasicBlocks().empty()) {
MarkBlockExecutable(func->getEntryBlock());
}
// 主循环:处理工作列表直到不动点
// 主循环:标准的SCCP工作列表算法
// 交替处理边工作列表和指令工作列表直到不动点
while (!instWorkList.empty() || !edgeWorkList.empty()) {
// 处理所有待处理的CFG边
while (!edgeWorkList.empty()) {
ProcessEdge(edgeWorkList.front());
edgeWorkList.pop();
}
// 处理所有待处理的指令
while (!instWorkList.empty()) {
Instruction *inst = instWorkList.front();
instWorkList.pop();
@@ -1243,7 +1346,7 @@ void SCCPContext::RemovePhiIncoming(BasicBlock *phiParentBB, BasicBlock *removed
for (Instruction *inst : insts_to_check) {
if (auto phi = dynamic_cast<PhiInst *>(inst)) {
phi->delBlk(removedPred);
phi->removeIncomingBlock(removedPred);
}
}
}

View File

@@ -42,7 +42,7 @@ bool SysYCFGOptUtils::SysYDelInstAfterBr(Function *func) {
++Branchiter;
while (Branchiter != instructions.end()) {
changed = true;
Branchiter = instructions.erase(Branchiter);
Branchiter = SysYIROptUtils::usedelete(Branchiter); // 删除指令
}
if (Branch) { // 更新前驱后继关系
@@ -77,6 +77,11 @@ bool SysYCFGOptUtils::SysYBlockMerge(Function *func) {
bool changed = false;
for (auto blockiter = func->getBasicBlocks().begin(); blockiter != func->getBasicBlocks().end();) {
// 检查当前块是是不是entry块
if( blockiter->get() == func->getEntryBlock() ) {
blockiter++;
continue; // 跳过入口块
}
if (blockiter->get()->getNumSuccessors() == 1) {
// 如果当前块只有一个后继块
// 且后继块只有一个前驱块
@@ -86,7 +91,7 @@ bool SysYCFGOptUtils::SysYBlockMerge(Function *func) {
BasicBlock *block = blockiter->get();
BasicBlock *nextBlock = blockiter->get()->getSuccessors()[0];
// auto nextarguments = nextBlock->getArguments();
// 删除br指令
// 删除block的br指令
if (block->getNumInstructions() != 0) {
auto thelastinstinst = block->terminator();
if (thelastinstinst->get()->isUnconditional()) {
@@ -98,14 +103,21 @@ bool SysYCFGOptUtils::SysYBlockMerge(Function *func) {
if (brinst->getThenBlock() == brinst->getElseBlock()) {
thelastinstinst = SysYIROptUtils::usedelete(thelastinstinst);
}
else{
assert(false && "SysYBlockMerge: unexpected conditional branch with different then and else blocks");
}
}
}
// 将后继块的指令移动到当前块
// 并将后继块的父指针改为当前块
for (auto institer = nextBlock->begin(); institer != nextBlock->end();) {
institer->get()->setParent(block);
block->getInstructions().emplace_back(institer->release());
institer = nextBlock->getInstructions().erase(institer);
// institer->get()->setParent(block);
// block->getInstructions().emplace_back(institer->release());
// 用usedelete删除会导致use关系被删除我只希望移动指令到当前块
// institer = SysYIROptUtils::usedelete(institer);
// institer = nextBlock->getInstructions().erase(institer);
institer = nextBlock->moveInst(institer, block->getInstructions().end(), block);
}
// 更新前驱后继关系,类似树节点操作
block->removeSuccessor(nextBlock);
@@ -189,7 +201,7 @@ bool SysYCFGOptUtils::SysYDelNoPreBLock(Function *func) {
break;
}
// 将这个 Phi 节点中来自不可达前驱unreachableBlock的输入参数删除
dynamic_cast<PhiInst *>(phiInstPtr.get())->delBlk(unreachableBlock);
dynamic_cast<PhiInst *>(phiInstPtr.get())->removeIncomingBlock(unreachableBlock);
}
}
}
@@ -288,13 +300,12 @@ bool SysYCFGOptUtils::SysYDelEmptyBlock(Function *func, IRBuilder *pBuilder) {
continue;
}
std::function<Value *(Value *, BasicBlock *)> getUltimateSourceValue = [&](Value *val,
BasicBlock *currentDefBlock) -> Value * {
// 如果值不是指令,例如常量或函数参数,则它本身就是最终来源
if (auto instr = dynamic_cast<Instruction *>(val)) { // Assuming Value* has a method to check if it's an instruction
std::function<Value *(Value *, BasicBlock *)> getUltimateSourceValue = [&](Value *val, BasicBlock *currentDefBlock) -> Value * {
if(!dynamic_cast<Instruction *>(val)) {
// 如果 val 不是指令,直接返回它
return val;
}
Instruction *inst = dynamic_cast<Instruction *>(val);
// 如果定义指令不在任何空块中,它就是最终来源
if (!emptyBlockRedirectMap.count(currentDefBlock)) {
@@ -311,7 +322,7 @@ bool SysYCFGOptUtils::SysYDelEmptyBlock(Function *func, IRBuilder *pBuilder) {
// 找到在空块链中导致 currentDefBlock 的那个前驱块
if (emptyBlockRedirectMap.count(incomingBlock) || incomingBlock == currentBlock) {
// 递归追溯该传入值
return getUltimateSourceValue(phi->getIncomingValue(incomingBlock), incomingBlock);
return getUltimateSourceValue(phi->getValfromBlk(incomingBlock), incomingBlock);
}
}
}
@@ -354,7 +365,7 @@ bool SysYCFGOptUtils::SysYDelEmptyBlock(Function *func, IRBuilder *pBuilder) {
if (actualEmptyPredecessorOfS) {
// 获取 Phi 节点原本从 actualEmptyPredecessorOfS 接收的值
Value *valueFromEmptyPredecessor = phiInst->getIncomingValue(actualEmptyPredecessorOfS);
Value *valueFromEmptyPredecessor = phiInst->getValfromBlk(actualEmptyPredecessorOfS);
// 追溯这个值,找到它在非空块中的最终来源
// currentBlock 是 P
@@ -364,12 +375,13 @@ bool SysYCFGOptUtils::SysYDelEmptyBlock(Function *func, IRBuilder *pBuilder) {
// 替换 Phi 节点的传入块和传入值
if (ultimateSourceValue) { // 确保成功追溯到有效来源
phiInst->replaceIncoming(actualEmptyPredecessorOfS, currentBlock, ultimateSourceValue);
// phiInst->replaceIncoming(actualEmptyPredecessorOfS, currentBlock, ultimateSourceValue);
phiInst->replaceIncomingBlock(actualEmptyPredecessorOfS, currentBlock, ultimateSourceValue);
} else {
assert(false && "[DelEmptyBlock] Unable to trace a valid source for Phi instruction");
// 无法追溯到有效来源,这可能是个错误或特殊情况
// 此时可能需要移除该 Phi 项,或者插入一个 undef 值
phiInst->removeIncoming(actualEmptyPredecessorOfS);
phiInst->getValfromBlk(actualEmptyPredecessorOfS);
}
}
} else {
@@ -421,7 +433,7 @@ bool SysYCFGOptUtils::SysYDelEmptyBlock(Function *func, IRBuilder *pBuilder) {
if (actualEmptyPredecessorOfS) {
// 获取 Phi 节点原本从 actualEmptyPredecessorOfS 接收的值
Value *valueFromEmptyPredecessor = phiInst->getIncomingValue(actualEmptyPredecessorOfS);
Value *valueFromEmptyPredecessor = phiInst->getValfromBlk(actualEmptyPredecessorOfS);
// 追溯这个值,找到它在非空块中的最终来源
// currentBlock 是 P
@@ -431,12 +443,13 @@ bool SysYCFGOptUtils::SysYDelEmptyBlock(Function *func, IRBuilder *pBuilder) {
// 替换 Phi 节点的传入块和传入值
if (ultimateSourceValue) { // 确保成功追溯到有效来源
phiInst->replaceIncoming(actualEmptyPredecessorOfS, currentBlock, ultimateSourceValue);
// phiInst->replaceIncoming(actualEmptyPredecessorOfS, currentBlock, ultimateSourceValue);
phiInst->replaceIncomingBlock(actualEmptyPredecessorOfS, currentBlock, ultimateSourceValue);
} else {
assert(false && "[DelEmptyBlock] Unable to trace a valid source for Phi instruction");
// 无法追溯到有效来源,这可能是个错误或特殊情况
// 此时可能需要移除该 Phi 项,或者插入一个 undef 值
phiInst->removeIncoming(actualEmptyPredecessorOfS);
phiInst->removeIncomingBlock(actualEmptyPredecessorOfS);
}
}
} else {
@@ -481,7 +494,7 @@ bool SysYCFGOptUtils::SysYDelEmptyBlock(Function *func, IRBuilder *pBuilder) {
if (actualEmptyPredecessorOfS) {
// 获取 Phi 节点原本从 actualEmptyPredecessorOfS 接收的值
Value *valueFromEmptyPredecessor = phiInst->getIncomingValue(actualEmptyPredecessorOfS);
Value *valueFromEmptyPredecessor = phiInst->getValfromBlk(actualEmptyPredecessorOfS);
// 追溯这个值,找到它在非空块中的最终来源
// currentBlock 是 P
@@ -491,12 +504,13 @@ bool SysYCFGOptUtils::SysYDelEmptyBlock(Function *func, IRBuilder *pBuilder) {
// 替换 Phi 节点的传入块和传入值
if (ultimateSourceValue) { // 确保成功追溯到有效来源
phiInst->replaceIncoming(actualEmptyPredecessorOfS, currentBlock, ultimateSourceValue);
// phiInst->replaceIncoming(actualEmptyPredecessorOfS, currentBlock, ultimateSourceValue);
phiInst->replaceIncomingBlock(actualEmptyPredecessorOfS, currentBlock, ultimateSourceValue);
} else {
assert(false && "[DelEmptyBlock] Unable to trace a valid source for Phi instruction");
// 无法追溯到有效来源,这可能是个错误或特殊情况
// 此时可能需要移除该 Phi 项,或者插入一个 undef 值
phiInst->removeIncoming(actualEmptyPredecessorOfS);
phiInst->removeIncomingBlock(actualEmptyPredecessorOfS);
}
}
} else {
@@ -647,7 +661,7 @@ bool SysYCFGOptUtils::SysYCondBr2Br(Function *func, IRBuilder *pBuilder) {
break;
}
// 使用 delBlk 方法删除 basicblock.get() 对应的传入值
dynamic_cast<PhiInst *>(phiinst.get())->removeIncoming(basicblock.get());
dynamic_cast<PhiInst *>(phiinst.get())->removeIncomingBlock(basicblock.get());
}
} else { // cond为false或0
@@ -665,7 +679,7 @@ bool SysYCFGOptUtils::SysYCondBr2Br(Function *func, IRBuilder *pBuilder) {
break;
}
// 使用 delBlk 方法删除 basicblock.get() 对应的传入值
dynamic_cast<PhiInst *>(phiinst.get())->removeIncoming(basicblock.get());
dynamic_cast<PhiInst *>(phiinst.get())->removeIncomingBlock(basicblock.get());
}
}
}

View File

@@ -11,6 +11,8 @@
#include "Mem2Reg.h"
#include "Reg2Mem.h"
#include "SCCP.h"
#include "BuildCFG.h"
#include "LargeArrayToGlobal.h"
#include "Pass.h"
#include <iostream>
#include <queue>
@@ -40,6 +42,8 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
3. 添加优化passid
*/
// 注册分析遍
registerAnalysisPass<DominatorTreeAnalysisPass>();
registerAnalysisPass<LivenessAnalysisPass>();
registerAnalysisPass<sysy::DominatorTreeAnalysisPass>();
registerAnalysisPass<sysy::LivenessAnalysisPass>();
registerAnalysisPass<SysYAliasAnalysisPass>(); // 别名分析 (优先级高)
@@ -49,6 +53,9 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
registerAnalysisPass<LoopCharacteristicsPass>(); // 循环特征分析依赖别名分析
// 注册优化遍
registerOptimizationPass<BuildCFG>();
registerOptimizationPass<LargeArrayToGlobalPass>();
registerOptimizationPass<SysYDelInstAfterBrPass>();
registerOptimizationPass<SysYDelNoPreBLockPass>();
registerOptimizationPass<SysYBlockMergePass>();
@@ -68,6 +75,16 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
if (DEBUG) std::cout << "Applying -O1 optimizations.\n";
if (DEBUG) std::cout << "--- Running custom optimization sequence ---\n";
if(DEBUG) {
std::cout << "=== IR Before CFGOpt Optimizations ===\n";
printPasses();
}
this->clearPasses();
this->addPass(&BuildCFG::ID);
this->addPass(&LargeArrayToGlobalPass::ID);
this->run();
this->clearPasses();
this->addPass(&SysYDelInstAfterBrPass::ID);
this->addPass(&SysYDelNoPreBLockPass::ID);
@@ -77,6 +94,10 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
this->addPass(&SysYAddReturnPass::ID);
this->run();
this->clearPasses();
this->addPass(&BuildCFG::ID);
this->run();
if(DEBUG) {
std::cout << "=== IR After CFGOpt Optimizations ===\n";
printPasses();
@@ -117,7 +138,9 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
std::cout << "=== IR After Reg2Mem Optimizations ===\n";
printPasses();
}
this->clearPasses();
this->addPass(&BuildCFG::ID);
this->run();
if (DEBUG) std::cout << "--- Custom optimization sequence finished ---\n";
}
@@ -132,6 +155,7 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
SysYPrinter printer(moduleIR);
printer.printIR();
}
}
void PassManager::clearPasses() {

View File

@@ -15,6 +15,139 @@
using namespace std;
namespace sysy {
std::pair<long long, int> calculate_signed_magic(int d) {
if (d == 0) throw std::runtime_error("Division by zero");
if (d == 1 || d == -1) return {0, 0}; // Not used by strength reduction
int k = 0;
unsigned int ad = (d > 0) ? d : -d;
unsigned int temp = ad;
while (temp > 0) {
temp >>= 1;
k++;
}
if ((ad & (ad - 1)) == 0) { // if power of 2
k--;
}
unsigned __int128 m_val = 1;
m_val <<= (32 + k - 1);
unsigned __int128 m_prime = m_val / ad;
long long m = m_prime + 1;
return {m, k};
}
// 清除因函数调用而失效的表达式缓存(保守策略)
void SysYIRGenerator::invalidateExpressionsOnCall() {
availableBinaryExpressions.clear();
availableUnaryExpressions.clear();
availableLoads.clear();
availableGEPs.clear();
}
// 在进入新的基本块时清空所有表达式缓存
void SysYIRGenerator::enterNewBasicBlock() {
availableBinaryExpressions.clear();
availableUnaryExpressions.clear();
availableLoads.clear();
availableGEPs.clear();
}
// 清除因变量赋值而失效的表达式缓存
// @param storedAddress: store 指令的目标地址 (例如 AllocaInst* 或 GEPInst*)
void SysYIRGenerator::invalidateExpressionsOnStore(Value *storedAddress) {
// 遍历二元表达式缓存,移除受影响的条目
// 创建一个临时列表来存储要移除的键,避免在迭代时修改容器
std::vector<ExpKey> binaryKeysToRemove;
for (const auto &pair : availableBinaryExpressions) {
// 检查左操作数
// 如果左操作数是 LoadInst并且它从 storedAddress 加载
if (auto loadInst = dynamic_cast<LoadInst *>(pair.first.left)) {
if (loadInst->getPointer() == storedAddress) {
binaryKeysToRemove.push_back(pair.first);
continue; // 这个表达式已标记为移除,跳到下一个
}
}
// 如果左操作数本身就是被存储的地址 (例如,将一个地址值直接作为操作数,虽然不常见)
if (pair.first.left == storedAddress) {
binaryKeysToRemove.push_back(pair.first);
continue;
}
// 检查右操作数,逻辑同左操作数
if (auto loadInst = dynamic_cast<LoadInst *>(pair.first.right)) {
if (loadInst->getPointer() == storedAddress) {
binaryKeysToRemove.push_back(pair.first);
continue;
}
}
if (pair.first.right == storedAddress) {
binaryKeysToRemove.push_back(pair.first);
continue;
}
}
// 实际移除条目
for (const auto &key : binaryKeysToRemove) {
availableBinaryExpressions.erase(key);
}
// 遍历一元表达式缓存,移除受影响的条目
std::vector<UnExpKey> unaryKeysToRemove;
for (const auto &pair : availableUnaryExpressions) {
// 检查操作数
if (auto loadInst = dynamic_cast<LoadInst *>(pair.first.operand)) {
if (loadInst->getPointer() == storedAddress) {
unaryKeysToRemove.push_back(pair.first);
continue;
}
}
if (pair.first.operand == storedAddress) {
unaryKeysToRemove.push_back(pair.first);
continue;
}
}
// 实际移除条目
for (const auto &key : unaryKeysToRemove) {
availableUnaryExpressions.erase(key);
}
availableLoads.erase(storedAddress);
std::vector<GEPKey> gepKeysToRemove;
for (const auto &pair : availableGEPs) {
// 检查 GEP 的基指针是否受存储影响
if (auto loadInst = dynamic_cast<LoadInst *>(pair.first.basePointer)) {
if (loadInst->getPointer() == storedAddress) {
gepKeysToRemove.push_back(pair.first);
continue; // 标记此GEP为移除跳过后续检查
}
}
// 如果基指针本身就是存储的目标地址 (不常见,但可能)
if (pair.first.basePointer == storedAddress) {
gepKeysToRemove.push_back(pair.first);
continue;
}
// 检查 GEP 的每个索引是否受存储影响
for (const auto &indexVal : pair.first.indices) {
if (auto loadInst = dynamic_cast<LoadInst *>(indexVal)) {
if (loadInst->getPointer() == storedAddress) {
gepKeysToRemove.push_back(pair.first);
break; // 标记此GEP为移除并跳出内部循环
}
}
// 如果索引本身就是存储的目标地址
if (indexVal == storedAddress) {
gepKeysToRemove.push_back(pair.first);
break;
}
}
}
// 实际移除条目
for (const auto &key : gepKeysToRemove) {
availableGEPs.erase(key);
}
}
// std::vector<Value*> BinaryValueStack; ///< 用于存储value的栈
// std::vector<int> BinaryOpStack; ///< 用于存储二元表达式的操作符栈
@@ -244,27 +377,37 @@ void SysYIRGenerator::compute() {
}
} else {
// 否则创建相应的IR指令
if (commonType == Type::getIntType()) {
switch (op) {
case BinaryOp::ADD: resultValue = builder.createAddInst(lhs, rhs); break;
case BinaryOp::SUB: resultValue = builder.createSubInst(lhs, rhs); break;
case BinaryOp::MUL: resultValue = builder.createMulInst(lhs, rhs); break;
case BinaryOp::DIV: resultValue = builder.createDivInst(lhs, rhs); break;
case BinaryOp::MOD: resultValue = builder.createRemInst(lhs, rhs); break;
}
} else if (commonType == Type::getFloatType()) {
switch (op) {
case BinaryOp::ADD: resultValue = builder.createFAddInst(lhs, rhs); break;
case BinaryOp::SUB: resultValue = builder.createFSubInst(lhs, rhs); break;
case BinaryOp::MUL: resultValue = builder.createFMulInst(lhs, rhs); break;
case BinaryOp::DIV: resultValue = builder.createFDivInst(lhs, rhs); break;
case BinaryOp::MOD:
std::cerr << "Error: Modulo operator not supported for float types." << std::endl;
ExpKey currentExpKey(static_cast<BinaryOp>(op), lhs, rhs);
auto it = availableBinaryExpressions.find(currentExpKey);
if (it != availableBinaryExpressions.end()) {
// 在缓存中找到,重用结果
resultValue = it->second;
} else {
if (commonType == Type::getIntType()) {
switch (op) {
case BinaryOp::ADD: resultValue = builder.createAddInst(lhs, rhs); break;
case BinaryOp::SUB: resultValue = builder.createSubInst(lhs, rhs); break;
case BinaryOp::MUL: resultValue = builder.createMulInst(lhs, rhs); break;
case BinaryOp::DIV: resultValue = builder.createDivInst(lhs, rhs); break;
case BinaryOp::MOD: resultValue = builder.createRemInst(lhs, rhs); break;
}
} else if (commonType == Type::getFloatType()) {
switch (op) {
case BinaryOp::ADD: resultValue = builder.createFAddInst(lhs, rhs); break;
case BinaryOp::SUB: resultValue = builder.createFSubInst(lhs, rhs); break;
case BinaryOp::MUL: resultValue = builder.createFMulInst(lhs, rhs); break;
case BinaryOp::DIV: resultValue = builder.createFDivInst(lhs, rhs); break;
case BinaryOp::MOD:
std::cerr << "Error: Modulo operator not supported for float types." << std::endl;
return;
}
} else {
std::cerr << "Error: Unsupported type for binary instruction." << std::endl;
return;
}
} else {
std::cerr << "Error: Unsupported type for binary instruction." << std::endl;
return;
// 将新创建的指令结果添加到缓存
availableBinaryExpressions[currentExpKey] = resultValue;
}
}
break;
@@ -316,36 +459,45 @@ void SysYIRGenerator::compute() {
return;
}
} else {
// 否则创建相应的IR指令
switch (op) {
case BinaryOp::PLUS:
resultValue = operand; // 一元加指令通常直接返回操作数
break;
case BinaryOp::NEG: {
if (commonType == sysy::Type::getIntType()) {
resultValue = builder.createNegInst(operand);
} else if (commonType == sysy::Type::getFloatType()) {
resultValue = builder.createFNegInst(operand);
} else {
std::cerr << "Error: Negation not supported for operand type." << std::endl;
return;
// 否则创建相应的IR指令 (在这里应用CSE)
UnExpKey currentUnExpKey(static_cast<BinaryOp>(op), operand);
auto it = availableUnaryExpressions.find(currentUnExpKey);
if (it != availableUnaryExpressions.end()) {
// 在缓存中找到,重用结果
resultValue = it->second;
} else {
switch (op) {
case BinaryOp::PLUS:
resultValue = operand; // 一元加指令通常直接返回操作数
break;
case BinaryOp::NEG: {
if (commonType == sysy::Type::getIntType()) {
resultValue = builder.createNegInst(operand);
} else if (commonType == sysy::Type::getFloatType()) {
resultValue = builder.createFNegInst(operand);
} else {
std::cerr << "Error: Negation not supported for operand type." << std::endl;
return;
}
break;
}
case BinaryOp::NOT:
// 逻辑非
if (commonType == sysy::Type::getIntType()) {
resultValue = builder.createNotInst(operand);
} else if (commonType == sysy::Type::getFloatType()) {
resultValue = builder.createFNotInst(operand);
} else {
std::cerr << "Error: Logical NOT not supported for operand type." << std::endl;
return;
}
break;
default:
std::cerr << "Error: Unknown unary operator for instructions: " << op << std::endl;
return;
}
break;
}
case BinaryOp::NOT:
// 逻辑非
if (commonType == sysy::Type::getIntType()) {
resultValue = builder.createNotInst(operand);
} else if (commonType == sysy::Type::getFloatType()) {
resultValue = builder.createFNotInst(operand);
} else {
std::cerr << "Error: Logical NOT not supported for operand type." << std::endl;
return;
}
break;
default:
std::cerr << "Error: Unknown unary operator for instructions: " << op << std::endl;
return;
// 将新创建的指令结果添加到缓存
availableUnaryExpressions[currentUnExpKey] = resultValue;
}
}
break;
@@ -487,7 +639,19 @@ Value* SysYIRGenerator::getGEPAddressInst(Value* basePointer, const std::vector<
// `indices` 向量现在由调用方(如 visitLValue, visitVarDecl, visitAssignStmt负责完整准备
// 包括是否需要添加初始的 `0` 索引。
// 所以这里直接将其传递给 `builder.createGetElementPtrInst`。
return builder.createGetElementPtrInst(basePointer, indices);
GEPKey key = {basePointer, indices};
// 尝试从缓存中查找
auto it = availableGEPs.find(key);
if (it != availableGEPs.end()) {
return it->second; // 缓存命中,返回已有的 GEPInst*
}
// 缓存未命中,创建新的 GEPInst
Value* gepInst = builder.createGetElementPtrInst(basePointer, indices); // 假设 builder 提供了 createGEPInst 方法
availableGEPs[key] = gepInst; // 将新的 GEPInst* 加入缓存
return gepInst;
}
/*
@@ -586,7 +750,13 @@ std::any SysYIRGenerator::visitConstDecl(SysYParser::ConstDeclContext *ctx) {
// 显式地为局部常量在栈上分配空间
// alloca 的类型将是指针指向常量类型,例如 `int*` 或 `int[2][3]*`
// 将alloca全部集中到entry中
auto entry = builder.getBasicBlock()->getParent()->getEntryBlock();
auto it = builder.getPosition();
auto nowblk = builder.getBasicBlock();
builder.setPosition(entry, entry->terminator());
AllocaInst *alloca = builder.createAllocaInst(Type::getPointerType(variableType), name);
builder.setPosition(nowblk, it);
ArrayValueTree *root = std::any_cast<ArrayValueTree *>(constDef->constInitVal()->accept(this));
ValueCounter values;
@@ -743,8 +913,12 @@ std::any SysYIRGenerator::visitVarDecl(SysYParser::VarDeclContext *ctx) {
// 对于数组alloca 的类型将是指针指向数组类型,例如 `int[2][3]*`
// 对于标量alloca 的类型将是指针指向标量类型,例如 `int*`
AllocaInst* alloca =
builder.createAllocaInst(Type::getPointerType(variableType), name);
auto entry = builder.getBasicBlock()->getParent()->getEntryBlock();
auto it = builder.getPosition();
auto nowblk = builder.getBasicBlock();
builder.setPosition(entry, entry->terminator());
AllocaInst *alloca = builder.createAllocaInst(Type::getPointerType(variableType), name);
builder.setPosition(nowblk, it);
if (varDef->initVal() != nullptr) {
ValueCounter values;
@@ -946,6 +1120,8 @@ std::any SysYIRGenerator::visitFuncType(SysYParser::FuncTypeContext *ctx) {
std::any SysYIRGenerator::visitFuncDef(SysYParser::FuncDefContext *ctx){
// 更新作用域
module->enterNewScope();
// 清除CSE缓存
enterNewBasicBlock();
auto name = ctx->Ident()->getText();
std::vector<Type *> paramActualTypes;
@@ -1015,15 +1191,25 @@ std::any SysYIRGenerator::visitFuncDef(SysYParser::FuncDefContext *ctx){
for(int i = 0; i < paramActualTypes.size(); ++i) {
Argument* arg = new Argument(paramActualTypes[i], function, i, paramNames[i]);
function->insertArgument(arg);
}
// 先将所有参数名字注册到符号表中确保alloca不会使用相同的名字
for (int i = 0; i < paramNames.size(); ++i) {
// 预先注册参数名字这样addVariable就会使用不同的后缀
module->registerParameterName(paramNames[i]);
}
auto funcArgs = function->getArguments();
std::vector<AllocaInst *> allocas;
for (int i = 0; i < paramActualTypes.size(); ++i) {
AllocaInst *alloca = builder.createAllocaInst(Type::getPointerType(paramActualTypes[i]), paramNames[i]);
// 使用函数特定的前缀来确保参数alloca名字唯一
std::string allocaName = name + "_param_" + paramNames[i];
AllocaInst *alloca = builder.createAllocaInst(Type::getPointerType(paramActualTypes[i]), allocaName);
// 直接设置唯一名字不依赖addVariable的命名逻辑
alloca->setName(allocaName);
allocas.push_back(alloca);
module->addVariable(paramNames[i], alloca);
// 直接添加到符号表,使用原参数名作为查找键
module->addVariableDirectly(paramNames[i], alloca);
}
for(int i = 0; i < paramActualTypes.size(); ++i) {
@@ -1037,6 +1223,7 @@ std::any SysYIRGenerator::visitFuncDef(SysYParser::FuncDefContext *ctx){
// 从 entryBB 无条件跳转到 funcBodyEntry
builder.createUncondBrInst(funcBodyEntry);
BasicBlock::conectBlocks(entry, funcBodyEntry); // 连接 entryBB 和 funcBodyEntry
builder.setPosition(funcBodyEntry,funcBodyEntry->end()); // 将插入点设置到 funcBodyEntry
for (auto item : ctx->blockStmt()->blockItem()) {
@@ -1091,6 +1278,45 @@ std::any SysYIRGenerator::visitAssignStmt(SysYParser::AssignStmtContext *ctx) {
if (dynamic_cast<AllocaInst*>(variable) || dynamic_cast<GlobalValue*>(variable)) {
LValue = variable;
}
// 标量变量的类型推断
Type* LType = builder.getIndexedType(variable->getType(), indices);
Value* RValue = computeExp(ctx->exp(), LType); // 右值计算
Type* RType = RValue->getType();
// TODO:computeExp处理了类型转换可以考虑删除判断逻辑
if (LType != RType) {
ConstantValue *constValue = dynamic_cast<ConstantValue *>(RValue);
if (constValue != nullptr) {
if (LType == Type::getFloatType()) {
if(dynamic_cast<ConstantInteger *>(constValue)) {
// 如果是整型常量,转换为浮点型
RValue = ConstantFloating::get(static_cast<float>(constValue->getInt()));
} else if (dynamic_cast<ConstantFloating *>(constValue)) {
// 如果是浮点型常量,直接使用
RValue = ConstantFloating::get(static_cast<float>(constValue->getFloat()));
}
} else { // 假设如果不是浮点型,就是整型
if(dynamic_cast<ConstantFloating *>(constValue)) {
// 如果是浮点型常量,转换为整型
RValue = ConstantInteger::get(static_cast<int>(constValue->getFloat()));
} else if (dynamic_cast<ConstantInteger *>(constValue)) {
// 如果是整型常量,直接使用
RValue = ConstantInteger::get(static_cast<int>(constValue->getInt()));
}
}
} else {
if (LType == Type::getFloatType() && RType != Type::getFloatType()) {
RValue = builder.createItoFInst(RValue);
} else if (LType != Type::getFloatType() && RType == Type::getFloatType()) {
RValue = builder.createFtoIInst(RValue);
}
// 如果两者都是同一类型,就不需要转换
}
}
builder.createStoreInst(RValue, LValue);
}
else {
// 对于数组或多维数组的左值处理
@@ -1100,7 +1326,16 @@ std::any SysYIRGenerator::visitAssignStmt(SysYParser::AssignStmtContext *ctx) {
if (AllocaInst *alloc = dynamic_cast<AllocaInst *>(variable)) {
Type* allocatedType = alloc->getType()->as<PointerType>()->getBaseType();
if (allocatedType->isPointer()) {
gepBasePointer = builder.createLoadInst(alloc);
// 尝试从缓存中获取 builder.createLoadInst(alloc) 的结果
auto it = availableLoads.find(alloc);
if (it != availableLoads.end()) {
gepBasePointer = it->second; // 缓存命中,重用
} else {
gepBasePointer = builder.createLoadInst(alloc); // 缓存未命中,创建新的 LoadInst
availableLoads[alloc] = gepBasePointer; // 将结果加入缓存
}
// --- CSE 结束 ---
// gepBasePointer = builder.createLoadInst(alloc);
gepIndices = indices;
} else {
gepBasePointer = alloc;
@@ -1119,52 +1354,48 @@ std::any SysYIRGenerator::visitAssignStmt(SysYParser::AssignStmtContext *ctx) {
}
// 左值为地址
LValue = getGEPAddressInst(gepBasePointer, gepIndices);
}
// 数组变量的类型推断使用gepIndices和gepBasePointer的类型
Type* LType = builder.getIndexedType(gepBasePointer->getType(), gepIndices);
Value* RValue = computeExp(ctx->exp(), LType); // 右值计算
Type* RType = RValue->getType();
// Value* RValue = std::any_cast<Value *>(visitExp(ctx->exp())); // 右值
// 先推断 LValue 的类型
// 如果 LValue 是指向数组的指针,则需要根据 indices 获取正确的类型
// 如果 LValue 是标量,则直接使用其类型
// 注意LValue 的类型可能是指向数组的指针 (e.g., int(*)[3]) 或者指向标量的指针 (e.g., int*) 也能推断
Type* LType = builder.getIndexedType(variable->getType(), indices);
Value* RValue = computeExp(ctx->exp(), LType); // 右值计算
Type* RType = RValue->getType();
// TODO:computeExp处理了类型转换可以考虑删除判断逻辑
if (LType != RType) {
ConstantValue *constValue = dynamic_cast<ConstantValue *>(RValue);
if (constValue != nullptr) {
if (LType == Type::getFloatType()) {
if(dynamic_cast<ConstantInteger *>(constValue)) {
// 如果是整型常量,转换为浮点型
RValue = ConstantFloating::get(static_cast<float>(constValue->getInt()));
} else if (dynamic_cast<ConstantFloating *>(constValue)) {
// 如果是浮点型常量,直接使用
RValue = ConstantFloating::get(static_cast<float>(constValue->getFloat()));
// TODO:computeExp处理了类型转换可以考虑删除判断逻辑
if (LType != RType) {
ConstantValue *constValue = dynamic_cast<ConstantValue *>(RValue);
if (constValue != nullptr) {
if (LType == Type::getFloatType()) {
if(dynamic_cast<ConstantInteger *>(constValue)) {
// 如果是整型常量,转换为浮点型
RValue = ConstantFloating::get(static_cast<float>(constValue->getInt()));
} else if (dynamic_cast<ConstantFloating *>(constValue)) {
// 如果是浮点型常量,直接使用
RValue = ConstantFloating::get(static_cast<float>(constValue->getFloat()));
}
} else { // 假设如果不是浮点型,就是整型
if(dynamic_cast<ConstantFloating *>(constValue)) {
// 如果是浮点型常量,转换为整型
RValue = ConstantInteger::get(static_cast<int>(constValue->getFloat()));
} else if (dynamic_cast<ConstantInteger *>(constValue)) {
// 如果是整型常量,直接使用
RValue = ConstantInteger::get(static_cast<int>(constValue->getInt()));
}
}
} else { // 假设如果不是浮点型,就是整型
if(dynamic_cast<ConstantFloating *>(constValue)) {
// 如果是浮点型常量,转换为整型
RValue = ConstantInteger::get(static_cast<int>(constValue->getFloat()));
} else if (dynamic_cast<ConstantInteger *>(constValue)) {
// 如果是整型常量,直接使用
RValue = ConstantInteger::get(static_cast<int>(constValue->getInt()));
} else {
if (LType == Type::getFloatType() && RType != Type::getFloatType()) {
RValue = builder.createItoFInst(RValue);
} else if (LType != Type::getFloatType() && RType == Type::getFloatType()) {
RValue = builder.createFtoIInst(RValue);
}
}
} else {
if (LType == Type::getFloatType()) {
RValue = builder.createItoFInst(RValue);
} else { // 假设如果不是浮点型,就是整型
RValue = builder.createFtoIInst(RValue);
// 如果两者都是同一类型,就不需要转换
}
}
builder.createStoreInst(RValue, LValue);
}
builder.createStoreInst(RValue, LValue);
invalidateExpressionsOnStore(LValue);
return std::any();
}
@@ -1201,7 +1432,9 @@ std::any SysYIRGenerator::visitIfStmt(SysYParser::IfStmtContext *ctx) {
labelstring.str("");
function->addBasicBlock(thenBlock);
builder.setPosition(thenBlock, thenBlock->end());
// CSE清除缓存
enterNewBasicBlock();
auto block = dynamic_cast<SysYParser::BlockStmtContext *>(ctx->stmt(0));
// 如果是块语句,直接访问
// 否则访问语句
@@ -1220,7 +1453,9 @@ std::any SysYIRGenerator::visitIfStmt(SysYParser::IfStmtContext *ctx) {
labelstring.str("");
function->addBasicBlock(elseBlock);
builder.setPosition(elseBlock, elseBlock->end());
// CSE清除缓存
enterNewBasicBlock();
block = dynamic_cast<SysYParser::BlockStmtContext *>(ctx->stmt(1));
if (block != nullptr) {
visitBlockStmt(block);
@@ -1237,7 +1472,9 @@ std::any SysYIRGenerator::visitIfStmt(SysYParser::IfStmtContext *ctx) {
labelstring.str("");
function->addBasicBlock(exitBlock);
builder.setPosition(exitBlock, exitBlock->end());
// CSE清除缓存
enterNewBasicBlock();
} else {
builder.pushTrueBlock(thenBlock);
builder.pushFalseBlock(exitBlock);
@@ -1250,7 +1487,9 @@ std::any SysYIRGenerator::visitIfStmt(SysYParser::IfStmtContext *ctx) {
labelstring.str("");
function->addBasicBlock(thenBlock);
builder.setPosition(thenBlock, thenBlock->end());
// CSE清除缓存
enterNewBasicBlock();
auto block = dynamic_cast<SysYParser::BlockStmtContext *>(ctx->stmt(0));
if (block != nullptr) {
visitBlockStmt(block);
@@ -1267,6 +1506,9 @@ std::any SysYIRGenerator::visitIfStmt(SysYParser::IfStmtContext *ctx) {
labelstring.str("");
function->addBasicBlock(exitBlock);
builder.setPosition(exitBlock, exitBlock->end());
// CSE清除缓存
enterNewBasicBlock();
}
return std::any();
}
@@ -1284,7 +1526,9 @@ std::any SysYIRGenerator::visitWhileStmt(SysYParser::WhileStmtContext *ctx) {
builder.createUncondBrInst(headBlock);
BasicBlock::conectBlocks(curBlock, headBlock);
builder.setPosition(headBlock, headBlock->end());
// CSE清除缓存
enterNewBasicBlock();
BasicBlock* bodyBlock = new BasicBlock(function);
BasicBlock* exitBlock = new BasicBlock(function);
@@ -1300,6 +1544,8 @@ std::any SysYIRGenerator::visitWhileStmt(SysYParser::WhileStmtContext *ctx) {
labelstring.str("");
function->addBasicBlock(bodyBlock);
builder.setPosition(bodyBlock, bodyBlock->end());
// CSE清除缓存
enterNewBasicBlock();
builder.pushBreakBlock(exitBlock);
builder.pushContinueBlock(headBlock);
@@ -1315,7 +1561,7 @@ std::any SysYIRGenerator::visitWhileStmt(SysYParser::WhileStmtContext *ctx) {
}
builder.createUncondBrInst(headBlock);
BasicBlock::conectBlocks(builder.getBasicBlock(), exitBlock);
BasicBlock::conectBlocks(builder.getBasicBlock(), headBlock);
builder.popBreakBlock();
builder.popContinueBlock();
@@ -1324,7 +1570,9 @@ std::any SysYIRGenerator::visitWhileStmt(SysYParser::WhileStmtContext *ctx) {
labelstring.str("");
function->addBasicBlock(exitBlock);
builder.setPosition(exitBlock, exitBlock->end());
// CSE清除缓存
enterNewBasicBlock();
return std::any();
}
@@ -1430,90 +1678,101 @@ std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) {
break;
}
}
if (allIndicesConstant) {
// 如果是常量变量且所有索引都是常量,并且不是数组名单独出现的情况
if (allIndicesConstant && !dims.empty()) {
// 如果是常量变量且所有索引都是常量,直接通过 getByIndices 获取编译时值
// 这个方法会根据索引深度返回最终的标量值或指向子数组的指针 (作为 ConstantValue/Variable)
return constVar->getByIndices(dims);
}
// 如果dims为空检查是否是常量标量
if (dims.empty() && declaredNumDims == 0) {
// 常量标量,直接返回其值
// 默认传入空索引列表,表示访问标量本身
return constVar->getByIndices(dims);
}
// 如果dims为空但不是标量数组名单独出现需要走GEP路径来实现数组到指针的退化
}
// 3. 处理可变变量 (AllocaInst/GlobalValue) 或带非常量索引的常量变量
// 这里区分标量访问和数组元素/子数组访问
Value *targetAddress = nullptr;
// 检查是否是访问标量变量本身没有索引且声明维度为0
if (dims.empty() && declaredNumDims == 0) {
// 对于标量变量,直接加载其值。
// variable 本身就是指向标量的指针 (e.g., int* %a)
if (dynamic_cast<AllocaInst*>(variable) || dynamic_cast<GlobalValue*>(variable)) {
value = builder.createLoadInst(variable);
} else {
// 如果走到这里且不是AllocaInst/GlobalValue但dims为空且declaredNumDims为0
// 且又不是ConstantVariable (前面已处理),则可能是错误情况。
targetAddress = variable;
}
else {
assert(false && "Unhandled scalar variable type in LValue access.");
return static_cast<Value*>(nullptr);
}
} else {
// 访问数组元素或子数组(有索引,或变量本身是数组/多维指针)
Value* gepBasePointer = nullptr;
std::vector<Value*> gepIndices; // 准备传递给 getGEPAddressInst 的索引列表
// GEP 的基指针就是变量本身(它是一个指向内存的指针)
std::vector<Value*> gepIndices;
if (AllocaInst *alloc = dynamic_cast<AllocaInst *>(variable)) {
// 情况 A: 局部变量 (AllocaInst)
// 获取 AllocaInst 分配的内存的实际类型。
// 例如:对于 `int b[10][20];``allocatedType` 是 `[10 x [20 x i32]]`。
// 对于 `int b[][20]` 的函数参数,其 AllocaInst 存储的是一个指针,
// 此时 `allocatedType` 是 `[20 x i32]*`。
Type* allocatedType = alloc->getType()->as<PointerType>()->getBaseType();
if (allocatedType->isPointer()) {
// 如果 AllocaInst 分配的是一个指针类型 (例如,用于存储函数参数的指针,如 int b[][20] 中的 b)
// 即 `allocatedType` 是一个指向数组指针的指针 (e.g., [20 x i32]**)
// 那么 GEP 的基指针是加载这个指针变量的值。
gepBasePointer = builder.createLoadInst(alloc); // 加载出实际的指针值 (e.g., [20 x i32]*)
// 对于这种参数指针,用户提供的索引直接作用于它。不需要额外的 0。
gepBasePointer = builder.createLoadInst(alloc);
gepIndices = dims;
} else {
// 如果 AllocaInst 分配的是实际的数组数据 (例如int b[10][20] 中的 b)
// 那么 AllocaInst 本身就是 GEP 的基指针。
// 这里的 `alloc` 是指向数组的指针 (e.g., [10 x [20 x i32]]*)
gepBasePointer = alloc; // 类型是 [10 x [20 x i32]]*
// 对于这种完整的数组分配GEP 的第一个索引必须是 0用于“步过”整个数组。
gepBasePointer = alloc;
gepIndices.push_back(ConstantInteger::get(0));
gepIndices.insert(gepIndices.end(), dims.begin(), dims.end());
if (dims.empty() && declaredNumDims > 0) {
// 数组名单独出现没有索引在SysY中多维数组名应该退化为指向第一行的指针
// 对于二维数组 T[M][N],退化为 T(*)[N]需要GEP: getelementptr T[M][N], T[M][N]* ptr, i32 0, i32 0
// 第一个i32 0: 选择数组本身第二个i32 0: 选择第0行
// 结果类型: T[N]*
gepIndices.push_back(ConstantInteger::get(0));
} else {
// 正常的数组元素访问
gepIndices.insert(gepIndices.end(), dims.begin(), dims.end());
}
}
} else if (GlobalValue *glob = dynamic_cast<GlobalValue *>(variable)) {
// 情况 B: 全局变量 (GlobalValue)
// GlobalValue 总是指向全局数据的指针。
gepBasePointer = glob; // 类型是 [61 x [67 x i32]]*
// 对于全局数组GEP 的第一个索引必须是 0用于“步过”整个数组。
gepBasePointer = glob;
gepIndices.push_back(ConstantInteger::get(0));
gepIndices.insert(gepIndices.end(), dims.begin(), dims.end());
if (dims.empty() && declaredNumDims > 0) {
// 全局数组名单独出现(没有索引):应该退化为指向第一行的指针
// 需要添加一个额外的i32 0索引
gepIndices.push_back(ConstantInteger::get(0));
} else {
// 正常的数组元素访问
gepIndices.insert(gepIndices.end(), dims.begin(), dims.end());
}
} else if (ConstantVariable *constV = dynamic_cast<ConstantVariable *>(variable)) {
// 情况 C: 常量变量 (ConstantVariable),如果它代表全局数组常量
// 假设 ConstantVariable 可以直接作为 GEP 的基指针。
gepBasePointer = constV;
// 对于常量数组,也需要 0 索引来“步过”整个数组。
// 这里可以进一步检查 constV->getType()->as<PointerType>()->getBaseType()->isArray()
// 但为了简洁,假设所有 ConstantVariable 作为 GEP 基指针时都需要此 0。
gepIndices.push_back(ConstantInteger::get(0));
gepIndices.insert(gepIndices.end(), dims.begin(), dims.end());
if (dims.empty() && declaredNumDims > 0) {
// 常量数组名单独出现(没有索引):应该退化为指向第一行的指针
// 需要添加一个额外的i32 0索引
gepIndices.push_back(ConstantInteger::get(0));
} else {
// 正常的数组元素访问
gepIndices.insert(gepIndices.end(), dims.begin(), dims.end());
}
} else {
assert(false && "LValue variable type not supported for GEP base pointer.");
return static_cast<Value *>(nullptr);
}
// 现在调用 getGEPAddressInst传入正确准备的基指针和索引列表
Value *targetAddress = getGEPAddressInst(gepBasePointer, gepIndices);
targetAddress = getGEPAddressInst(gepBasePointer, gepIndices);
// 如果提供的索引数量少于声明的维度数量,则表示访问的是子数组,返回其地址
if (dims.size() < declaredNumDims) {
value = targetAddress;
}
// 如果提供的索引数量少于声明的维度数量,则表示访问的是子数组,返回其地址 (无需加载)
if (dims.size() < declaredNumDims) {
value = targetAddress;
} else {
// value = builder.createLoadInst(targetAddress);
auto it = availableLoads.find(targetAddress);
if (it != availableLoads.end()) {
value = it->second; // 缓存命中,重用已有的 LoadInst 结果
} else {
// 否则,表示访问的是最终的标量元素,加载其值
// 假设 createLoadInst 接受 Value* pointer
value = builder.createLoadInst(targetAddress);
// 缓存未命中,创建新的 LoadInst
value = builder.createLoadInst(targetAddress);
availableLoads[targetAddress] = value; // 将新的 LoadInst 结果加入缓存
}
}
return value;
}
@@ -1571,10 +1830,10 @@ std::any SysYIRGenerator::visitCall(SysYParser::CallContext *ctx) {
// 获取形参列表。`getArguments()` 返回的是 `Argument*` 的集合,
// 每个 `Argument` 代表一个函数形参,其 `getType()` 就是指向形参的类型的指针类型。
auto formalParams = function->getArguments();
const auto& formalParams = function->getArguments();
// 检查实参和形参数量是否匹配。
if (args.size() != formalParams.size()) {
if (args.size() != function->getNumArguments()) {
std::cerr << "Error: Function call argument count mismatch for function '" << funcName << "'." << std::endl;
assert(false && "Function call argument count mismatch!");
}
@@ -1606,15 +1865,27 @@ std::any SysYIRGenerator::visitCall(SysYParser::CallContext *ctx) {
} else if (formalParamExpectedValueType->isFloat() && actualArgType->isInt()) {
args[i] = builder.createItoFInst(args[i]);
}
// 2. 指针类型转换 (例如数组退化:`[N x T]*` 到 `T*`,或兼容指针类型之间) TODO不清楚有没有这种样例
// 2. 指针类型转换 (例如数组退化:`[N x T]*` 到 `T*`,或兼容指针类型之间)
// 这种情况常见于数组参数,实参可能是一个更具体的数组指针类型,
// 而形参是其退化后的基础指针类型。LLVM 的 `bitcast` 指令可以用于
// 在相同大小的指针类型之间进行转换,这对于数组退化至关重要。
// else if (formalParamType->isPointer() && actualArgType->isPointer()) {
// 检查指针基类型是否兼容,或者是否是数组退化导致的类型不同。
// 使用 bitcast
// args[i] = builder.createBitCastInst(args[i], formalParamType);
// }
// 而形参是其退化后的基础指针类型。
else if (formalParamExpectedValueType->isPointer() && actualArgType->isPointer()) {
// 检查是否是数组指针到元素指针的decay
// 例如:[N x T]* -> T*
auto formalPtrType = formalParamExpectedValueType->as<PointerType>();
auto actualPtrType = actualArgType->as<PointerType>();
if (formalPtrType && actualPtrType && actualPtrType->getBaseType()->isArray()) {
auto actualArrayType = actualPtrType->getBaseType()->as<ArrayType>();
if (actualArrayType &&
formalPtrType->getBaseType() == actualArrayType->getElementType()) {
// 这是数组decay的情况添加GEP来获取数组的第一个元素
std::vector<Value*> indices;
indices.push_back(ConstantInteger::get(0)); // 第一个索引:解引用指针
indices.push_back(ConstantInteger::get(0)); // 第二个索引:获取数组第一个元素
args[i] = getGEPAddressInst(args[i], indices);
}
}
}
// 3. 其他未预期的类型不匹配
// 如果代码执行到这里,说明存在编译器前端未处理的类型不兼容或错误。
else {
@@ -1633,6 +1904,7 @@ std::any SysYIRGenerator::visitUnaryExp(SysYParser::UnaryExpContext *ctx) {
visitPrimaryExp(ctx->primaryExp());
} else if (ctx->call() != nullptr) {
BinaryExpStack.push_back(std::any_cast<Value *>(visitCall(ctx->call())));BinaryExpLenStack.back()++;
invalidateExpressionsOnCall();
} else if (ctx->unaryOp() != nullptr) {
// 遇到一元操作符,将其压入 BinaryExpStack
auto opNode = dynamic_cast<antlr4::tree::TerminalNode*>(ctx->unaryOp()->children[0]);
@@ -1997,15 +2269,23 @@ void Utils::createExternalFunction(
const std::vector<std::string> &paramNames,
const std::vector<std::vector<Value *>> &paramDims, Type *returnType,
const std::string &funcName, Module *pModule, IRBuilder *pBuilder) {
auto funcType = Type::getFunctionType(returnType, paramTypes);
// 根据paramDims调整参数类型数组参数需要转换为指针类型
std::vector<Type *> adjustedParamTypes = paramTypes;
for (int i = 0; i < paramTypes.size() && i < paramDims.size(); ++i) {
if (!paramDims[i].empty()) {
// 如果参数有维度信息,说明是数组参数,转换为指针类型
adjustedParamTypes[i] = Type::getPointerType(paramTypes[i]);
}
}
auto funcType = Type::getFunctionType(returnType, adjustedParamTypes);
auto function = pModule->createExternalFunction(funcName, funcType);
auto entry = function->getEntryBlock();
pBuilder->setPosition(entry, entry->end());
for (int i = 0; i < paramTypes.size(); ++i) {
auto arg = new Argument(paramTypes[i], function, i, paramNames[i]);
auto arg = new Argument(adjustedParamTypes[i], function, i, paramNames[i]);
auto alloca = pBuilder->createAllocaInst(
Type::getPointerType(paramTypes[i]), paramNames[i]);
Type::getPointerType(adjustedParamTypes[i]), paramNames[i]);
function->insertArgument(arg);
auto store = pBuilder->createStoreInst(arg, alloca);
pModule->addVariable(paramNames[i], alloca);

View File

@@ -240,6 +240,8 @@ void SysYPrinter::printInst(Instruction *pInst) {
case Kind::kMul:
case Kind::kDiv:
case Kind::kRem:
case Kind::kSRA:
case Kind::kMulh:
case Kind::kFAdd:
case Kind::kFSub:
case Kind::kFMul:
@@ -272,6 +274,8 @@ void SysYPrinter::printInst(Instruction *pInst) {
case Kind::kMul: std::cout << "mul"; break;
case Kind::kDiv: std::cout << "sdiv"; break;
case Kind::kRem: std::cout << "srem"; break;
case Kind::kSRA: std::cout << "ashr"; break;
case Kind::kMulh: std::cout << "mulh"; break;
case Kind::kFAdd: std::cout << "fadd"; break;
case Kind::kFSub: std::cout << "fsub"; break;
case Kind::kFMul: std::cout << "fmul"; break;
@@ -295,7 +299,12 @@ void SysYPrinter::printInst(Instruction *pInst) {
// Types and operands
std::cout << " ";
printType(binInst->getType());
// For comparison operations, print operand types instead of result type
if (pInst->getKind() >= Kind::kICmpEQ && pInst->getKind() <= Kind::kFCmpGE) {
printType(binInst->getLhs()->getType());
} else {
printType(binInst->getType());
}
std::cout << " ";
printValue(binInst->getLhs());
std::cout << ", ";
@@ -508,9 +517,9 @@ void SysYPrinter::printInst(Instruction *pInst) {
if (!firstPair) std::cout << ", ";
firstPair = false;
std::cout << "[ ";
printValue(phiInst->getValue(i));
printValue(phiInst->getIncomingValue(i));
std::cout << ", %";
printBlock(phiInst->getBlock(i));
printBlock(phiInst->getIncomingBlock(i));
std::cout << " ]";
}
std::cout << std::endl;