From ac3358d7e3baba5f0b472369feb32c8d9a722963 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Sat, 9 Aug 2025 13:53:00 +0800 Subject: [PATCH] =?UTF-8?q?[midend-LoopAnalysis]=E7=A7=BB=E9=99=A4?= =?UTF-8?q?=E5=9F=BA=E6=9C=AC=E5=BE=AA=E7=8E=AF=E7=89=B9=E5=BE=81=E5=88=86?= =?UTF-8?q?=E6=9E=90=E4=B8=AD=E7=9A=84=E5=90=91=E9=87=8F=E5=8C=96=E5=B9=B6?= =?UTF-8?q?=E8=A1=8C=E5=8C=96=E5=86=85=E5=AE=B9=EF=BC=8C=E5=A2=9E=E5=8A=A0?= =?UTF-8?q?=E5=BE=AA=E7=8E=AF=E5=90=91=E9=87=8F=E5=8C=96=E5=B9=B6=E8=A1=8C?= =?UTF-8?q?=E5=8C=96=E7=89=B9=E5=BE=81=E5=88=86=E6=9E=90=E9=81=8D=EF=BC=8C?= =?UTF-8?q?TODO=EF=BC=9A=E6=9E=84=E5=BB=BA=E5=BE=AA=E7=8E=AF=E4=BC=98?= =?UTF-8?q?=E5=8C=96=E9=81=8D=E9=AA=8C=E8=AF=81=E5=88=86=E6=9E=90=E9=81=8D?= =?UTF-8?q?=E6=AD=A3=E7=A1=AE=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/include/midend/Pass/Analysis/Loop.h | 104 ++- .../Pass/Analysis/LoopCharacteristics.h | 213 ++--- .../midend/Pass/Analysis/LoopVectorization.h | 250 ++++++ src/midend/CMakeLists.txt | 1 + src/midend/Pass/Analysis/Loop.cpp | 77 ++ .../Pass/Analysis/LoopCharacteristics.cpp | 328 ++++--- .../Pass/Analysis/LoopVectorization.cpp | 803 ++++++++++++++++++ 7 files changed, 1473 insertions(+), 303 deletions(-) create mode 100644 src/include/midend/Pass/Analysis/LoopVectorization.h create mode 100644 src/midend/Pass/Analysis/LoopVectorization.cpp diff --git a/src/include/midend/Pass/Analysis/Loop.h b/src/include/midend/Pass/Analysis/Loop.h index 06803ed..7ff968c 100644 --- a/src/include/midend/Pass/Analysis/Loop.h +++ b/src/include/midend/Pass/Analysis/Loop.h @@ -1,21 +1,23 @@ #pragma once -#include "Dom.h" // 包含 DominatorTreeAnalysisPass 的依赖 -#include "IR.h" // 包含 IR 定义 -#include "Pass.h" // 包含 Pass 框架 +#include "Dom.h" +#include "IR.h" +#include "Pass.h" #include #include #include #include #include -#include // 用于循环体块的逆向遍历 +#include #include #include namespace sysy { -// 前向声明,防止循环引用 +// 前向声明 class LoopAnalysisResult; +class AliasAnalysisResult; +class SideEffectAnalysisResult; /** * @brief 表示一个识别出的循环。 @@ -142,6 +144,24 @@ public: return false; } + /** + * 检查循环是否可能有副作用(基于副作用分析结果) + * 使用场景: 循环优化决策、并行化分析 + */ + bool mayHaveSideEffects(SideEffectAnalysisResult* sideEffectAnalysis) const; + + /** + * 检查循环是否访问全局内存(基于别名分析结果) + * 使用场景: 并行化分析、缓存优化 + */ + bool accessesGlobalMemory(AliasAnalysisResult* aliasAnalysis) const; + + /** + * 检查循环是否有可能的内存别名冲突 + * 使用场景: 向量化分析、并行化决策 + */ + bool hasMemoryAliasConflicts(AliasAnalysisResult* aliasAnalysis) const; + /** * 估算循环的"热度" (基于嵌套深度和大小) * 使用场景: 优化优先级、资源分配 @@ -191,14 +211,14 @@ public: }; private: - // ========== 高频查询缓存 (必须缓存) ========== + // ========== 高频查询缓存 ========== mutable std::optional> cachedInnermostLoops; mutable std::optional> cachedOutermostLoops; mutable std::optional cachedMaxDepth; mutable std::optional cachedLoopCount; mutable std::map> cachedLoopsByDepth; - // ========== 中频查询缓存 (选择性缓存) ========== + // ========== 中频查询缓存 ========== mutable std::map cachedInnermostContainingLoop; mutable std::map> cachedAllNestedLoops; // 递归嵌套 mutable std::map> cachedAllContainingLoops; @@ -264,7 +284,7 @@ private: } public: - // ========== 基础接口 (保持向后兼容,但增加缓存失效) ========== + // ========== 基础接口 ========== // 添加一个识别出的循环到结果中 void addLoop(std::unique_ptr loop) { @@ -276,7 +296,7 @@ public: // 获取所有识别出的循环(unique_ptr 管理内存) const std::vector> &getAllLoops() const { return AllLoops; } - // ========== 高频查询接口 (缓存优化) ========== + // ========== 高频查询接口 ========== /** * 获取所有最内层循环 - 循环优化的主要目标 @@ -366,7 +386,7 @@ public: // 检查函数是否包含循环 bool hasLoops() const { return !AllLoops.empty(); } - // ========== 中频查询接口 (选择性缓存) ========== + // ========== 中频查询接口 ========== /** * 获取包含指定基本块的最内层循环 @@ -429,11 +449,61 @@ public: return cachedAllNestedLoops[loop]; } - // ========== 低频查询接口 (按需计算,不缓存) ========== + // ========== 利用别名和副作用分析的查询接口 ========== + + /** + * 获取所有纯循环(无副作用的循环) + * 并行化、循环优化 + */ + std::vector getPureLoops(SideEffectAnalysisResult* sideEffectAnalysis) const { + std::vector result; + if (!sideEffectAnalysis) return result; + + for (const auto& loop : AllLoops) { + if (!loop->mayHaveSideEffects(sideEffectAnalysis)) { + result.push_back(loop.get()); + } + } + return result; + } + + /** + * 获取所有只访问局部内存的循环 + * 缓存优化、局部性分析 + */ + std::vector getLocalMemoryLoops(AliasAnalysisResult* aliasAnalysis) const { + std::vector result; + if (!aliasAnalysis) return result; + + for (const auto& loop : AllLoops) { + if (!loop->accessesGlobalMemory(aliasAnalysis)) { + result.push_back(loop.get()); + } + } + return result; + } + + /** + * 获取所有无内存别名冲突的循环 + * 向量化、并行化 + */ + std::vector getNoAliasConflictLoops(AliasAnalysisResult* aliasAnalysis) const { + std::vector result; + if (!aliasAnalysis) return result; + + for (const auto& loop : AllLoops) { + if (!loop->hasMemoryAliasConflicts(aliasAnalysis)) { + result.push_back(loop.get()); + } + } + return result; + } + + // ========== 低频查询接口(不缓存) ========== /** * 检查两个循环是否有嵌套关系 - * 使用场景: 循环间依赖分析 + * 循环间依赖分析 */ bool isNestedLoop(Loop* inner, Loop* outer) const { if (inner == outer) return false; @@ -448,7 +518,7 @@ public: /** * 获取两个循环的最近公共祖先循环 - * 使用场景: 循环融合分析、优化范围确定 + * 循环融合分析、优化范围确定 */ Loop* getLowestCommonAncestor(Loop* loop1, Loop* loop2) const { if (!loop1 || !loop2) return nullptr; @@ -488,14 +558,14 @@ public: // ========== 缓存管理接口 ========== /** - * 手动失效缓存 (当IR结构改变时调用) + * 手动失效缓存 (可删除) */ void invalidateQueryCache() const { invalidateCache(); } /** - * 获取缓存统计信息 (用于性能调试) + * 获取缓存统计信息 */ CacheStats getCacheStats() const { CacheStats stats = {}; @@ -510,9 +580,6 @@ public: return stats; } - // --- 保留的内部接口 --- - // 注意:由于使用了缓存机制,不再需要手动维护最外层和最内层循环列表 - // 打印分析结果 void print() const; void printBBSet(const std::string &prefix, const std::set &s) const; @@ -522,7 +589,6 @@ private: Function *AssociatedFunction; // 结果关联的函数 std::vector> AllLoops; // 所有识别出的循环 std::map LoopMap; // 循环头到 Loop* 的映射,方便查找 - // 注意: 最外层和最内层循环列表已移除,现在通过缓存机制动态计算 }; /** diff --git a/src/include/midend/Pass/Analysis/LoopCharacteristics.h b/src/include/midend/Pass/Analysis/LoopCharacteristics.h index ab7ebb5..f74f086 100644 --- a/src/include/midend/Pass/Analysis/LoopCharacteristics.h +++ b/src/include/midend/Pass/Analysis/LoopCharacteristics.h @@ -4,6 +4,8 @@ #include "Loop.h" // 循环分析依赖 #include "Liveness.h" // 活跃性分析依赖 #include "AliasAnalysis.h" // 别名分析依赖 +#include "SideEffectAnalysis.h" // 副作用分析依赖 +#include "CallGraphAnalysis.h" // 调用图分析依赖 #include "IR.h" // IR定义 #include "Pass.h" // Pass框架 #include @@ -19,71 +21,62 @@ namespace sysy { class LoopCharacteristicsResult; /** - * @brief 循环特征信息结构 - * 存储单个循环的各种特征信息 + * @brief 循环特征信息结构 - 基础循环分析阶段 + * 存储循环的基本特征信息,为后续精确分析提供基础 */ struct LoopCharacteristics { Loop* loop; // 关联的循环对象 - // ========== 归纳变量分析 ========== - std::vector basicInductionVars; // 基本归纳变量 (i = phi(init, i+step)) - std::vector derivedInductionVars; // 派生归纳变量 (j = i * scale + offset) - std::map inductionSteps; // 归纳变量的步长 - std::map inductionInits; // 归纳变量的初始值 - - // ========== 循环不变量分析 ========== - std::set loopInvariants; // 循环不变量 (循环内定义但值不变) - std::set invariantInsts; // 不变指令 (可以外提的指令) - - // ========== 循环边界分析 ========== - std::optional staticTripCount; // 静态可确定的循环次数 - Value* dynamicTripCountExpr; // 动态循环次数表达式 - bool hasKnownBounds; // 是否有已知边界 - Value* lowerBound; // 循环下界 - Value* upperBound; // 循环上界 - - // ========== 循环形式分析 ========== + // ========== 基础循环形式分析 ========== bool isCountingLoop; // 是否为计数循环 (for i=0; i basicInductionVars; // 基本归纳变量 + std::map inductionSteps; // 归纳变量的步长(简化) + + // ========== 基础循环不变量分析 ========== + std::set loopInvariants; // 循环不变量 + std::set invariantInsts; // 可提升的不变指令 + + // ========== 基础边界分析 ========== + std::optional staticTripCount; // 静态循环次数(如果可确定) + bool hasKnownBounds; // 是否有已知边界 + + // ========== 基础纯度和副作用分析 ========== + bool isPure; // 是否为纯循环(无副作用) + bool accessesOnlyLocalMemory; // 是否只访问局部内存 + bool hasNoMemoryAliasConflicts; // 是否无内存别名冲突 + + // ========== 基础内存访问模式分析 ========== struct MemoryAccessPattern { - bool isSequential; // 是否顺序访问 (a[i], a[i+1], ...) - bool isStrided; // 是否跨步访问 (a[2*i], a[3*i], ...) - int stride; // 访问步长 std::vector loadInsts; // load指令列表 std::vector storeInsts; // store指令列表 - - // 使用外部别名分析结果 - AliasType aliasType; // 别名类型(来自别名分析) bool isArrayParameter; // 是否为数组参数访问 bool isGlobalArray; // 是否为全局数组访问 bool hasConstantIndices; // 是否使用常量索引 }; std::map memoryPatterns; // 内存访问模式 - // ========== 循环优化提示 ========== - bool benefitsFromUnrolling; // 是否适合循环展开 - bool benefitsFromVectorization; // 是否适合向量化 - bool benefitsFromTiling; // 是否适合分块 - int suggestedUnrollFactor; // 建议的展开因子 - - // ========== 性能特征 ========== + // ========== 基础性能特征 ========== size_t instructionCount; // 循环体指令数 size_t memoryOperationCount; // 内存操作数 size_t arithmeticOperationCount; // 算术操作数 double computeToMemoryRatio; // 计算与内存操作比率 - // 构造函数 - LoopCharacteristics(Loop* l) : loop(l), dynamicTripCountExpr(nullptr), - hasKnownBounds(false), lowerBound(nullptr), upperBound(nullptr), + // ========== 基础优化提示 ========== + bool benefitsFromUnrolling; // 是否适合循环展开 + int suggestedUnrollFactor; // 建议的展开因子 + + // 构造函数 - 简化的基础分析初始化 + LoopCharacteristics(Loop* l) : loop(l), isCountingLoop(false), isSimpleForLoop(false), hasComplexControlFlow(false), - isInnermost(false), isParallel(false), benefitsFromUnrolling(false), - benefitsFromVectorization(false), benefitsFromTiling(false), - suggestedUnrollFactor(1), instructionCount(0), memoryOperationCount(0), + isInnermost(false), hasKnownBounds(false), isPure(false), + accessesOnlyLocalMemory(false), hasNoMemoryAliasConflicts(false), + benefitsFromUnrolling(false), suggestedUnrollFactor(1), + instructionCount(0), memoryOperationCount(0), arithmeticOperationCount(0), computeToMemoryRatio(0.0) {} }; @@ -121,7 +114,7 @@ public: return CharacteristicsMap; } - // ========== 查询接口 ========== + // ========== 核心查询接口 ========== /** * 获取所有计数循环 @@ -137,12 +130,38 @@ public: } /** - * 获取所有可向量化循环 + * 获取所有纯循环(无副作用) */ - std::vector getVectorizableLoops() const { + std::vector getPureLoops() const { std::vector result; for (const auto& [loop, chars] : CharacteristicsMap) { - if (chars->benefitsFromVectorization) { + if (chars->isPure) { + result.push_back(loop); + } + } + return result; + } + + /** + * 获取所有只访问局部内存的循环 + */ + std::vector getLocalMemoryOnlyLoops() const { + std::vector result; + for (const auto& [loop, chars] : CharacteristicsMap) { + if (chars->accessesOnlyLocalMemory) { + result.push_back(loop); + } + } + return result; + } + + /** + * 获取所有无内存别名冲突的循环 + */ + std::vector getNoAliasConflictLoops() const { + std::vector result; + for (const auto& [loop, chars] : CharacteristicsMap) { + if (chars->hasNoMemoryAliasConflicts) { result.push_back(loop); } } @@ -152,7 +171,7 @@ public: /** * 获取所有适合展开的循环 */ - std::vector getUnrollCandidateLoops() const { + std::vector getUnrollingCandidates() const { std::vector result; for (const auto& [loop, chars] : CharacteristicsMap) { if (chars->benefitsFromUnrolling) { @@ -162,32 +181,6 @@ public: return result; } - /** - * 获取所有可并行化循环 - */ - std::vector getParallelizableLoops() const { - std::vector result; - for (const auto& [loop, chars] : CharacteristicsMap) { - if (chars->isParallel) { - result.push_back(loop); - } - } - return result; - } - - /** - * 获取所有有静态已知循环次数的循环 - */ - std::vector getStaticBoundLoops() const { - std::vector result; - for (const auto& [loop, chars] : CharacteristicsMap) { - if (chars->staticTripCount.has_value()) { - result.push_back(loop); - } - } - return result; - } - /** * 根据热度排序循环 (用于优化优先级) */ @@ -207,24 +200,24 @@ public: return result; } - // ========== 统计接口 ========== + // ========== 基础统计接口 ========== /** - * 获取优化候选统计 + * 获取基础优化统计信息 */ - struct OptimizationStats { + struct BasicOptimizationStats { size_t totalLoops; size_t countingLoops; - size_t vectorizableLoops; - size_t unrollCandidates; - size_t parallelizableLoops; - size_t staticBoundLoops; + size_t unrollingCandidates; + size_t pureLoops; + size_t localMemoryOnlyLoops; + size_t noAliasConflictLoops; double avgInstructionCount; double avgComputeMemoryRatio; }; - OptimizationStats getOptimizationStats() const { - OptimizationStats stats = {}; + BasicOptimizationStats getOptimizationStats() const { + BasicOptimizationStats stats = {}; stats.totalLoops = CharacteristicsMap.size(); size_t totalInstructions = 0; @@ -232,10 +225,10 @@ public: for (const auto& [loop, chars] : CharacteristicsMap) { if (chars->isCountingLoop) stats.countingLoops++; - if (chars->benefitsFromVectorization) stats.vectorizableLoops++; - if (chars->benefitsFromUnrolling) stats.unrollCandidates++; - if (chars->isParallel) stats.parallelizableLoops++; - if (chars->staticTripCount.has_value()) stats.staticBoundLoops++; + if (chars->benefitsFromUnrolling) stats.unrollingCandidates++; + if (chars->isPure) stats.pureLoops++; + if (chars->accessesOnlyLocalMemory) stats.localMemoryOnlyLoops++; + if (chars->hasNoMemoryAliasConflicts) stats.noAliasConflictLoops++; totalInstructions += chars->instructionCount; totalComputeMemoryRatio += chars->computeToMemoryRatio; @@ -258,8 +251,8 @@ private: }; /** - * @brief 循环特征分析遍 - * 基于循环分析结果,分析每个循环的特征信息,为优化决策提供依据 + * @brief 基础循环特征分析遍 + * 在循环规范化前执行,进行基础的循环特征分析,为后续精确分析提供基础 */ class LoopCharacteristicsPass : public AnalysisPass { public: @@ -278,24 +271,42 @@ public: std::unique_ptr getResult() override { return std::move(CurrentResult); } private: - std::unique_ptr CurrentResult; // 当前函数的分析结果 + std::unique_ptr CurrentResult; - // 内部分析方法 - void analyzeLoop(Loop* loop, LoopCharacteristics* characteristics, AnalysisManager &AM); - void identifyInductionVariables(Loop* loop, LoopCharacteristics* characteristics); - void identifyLoopInvariants(Loop* loop, LoopCharacteristics* characteristics); - void analyzeLoopBounds(Loop* loop, LoopCharacteristics* characteristics); + // ========== 核心分析方法 ========== + void analyzeLoop(Loop* loop, LoopCharacteristics* characteristics, AnalysisManager &AM, + AliasAnalysisResult* aliasAnalysis, SideEffectAnalysisResult* sideEffectAnalysis); + + // 基础循环形式分析 void analyzeLoopForm(Loop* loop, LoopCharacteristics* characteristics); - void analyzeMemoryAccessPatterns(Loop* loop, LoopCharacteristics* characteristics); - void evaluateOptimizationOpportunities(Loop* loop, LoopCharacteristics* characteristics); + + // 基础性能指标计算 void computePerformanceMetrics(Loop* loop, LoopCharacteristics* characteristics); - // 辅助方法 - bool isInductionVariable(Value* val, Loop* loop); - bool isLoopInvariant(Value* val, Loop* loop); - bool hasLoopCarriedDependence(Loop* loop); - int estimateUnrollFactor(Loop* loop); - bool benefitsFromVectorization(Loop* loop); + // 基础纯度和副作用分析 + void analyzePurityAndSideEffects(Loop* loop, LoopCharacteristics* characteristics, + SideEffectAnalysisResult* sideEffectAnalysis); + + // 基础归纳变量识别 + void identifyBasicInductionVariables(Loop* loop, LoopCharacteristics* characteristics); + + // 基础循环不变量识别 + void identifyBasicLoopInvariants(Loop* loop, LoopCharacteristics* characteristics); + + // 基础边界分析 + void analyzeBasicLoopBounds(Loop* loop, LoopCharacteristics* characteristics); + + // 基础内存访问模式分析 + void analyzeBasicMemoryAccessPatterns(Loop* loop, LoopCharacteristics* characteristics, + AliasAnalysisResult* aliasAnalysis); + + // 基础优化评估 + void evaluateBasicOptimizationOpportunities(Loop* loop, LoopCharacteristics* characteristics); + + // ========== 辅助方法 ========== + bool isBasicInductionVariable(Value* val, Loop* loop); + bool isBasicLoopInvariant(Value* val, Loop* loop); + bool hasSimpleMemoryPattern(Loop* loop); // 简单的内存模式检查 }; } // namespace sysy diff --git a/src/include/midend/Pass/Analysis/LoopVectorization.h b/src/include/midend/Pass/Analysis/LoopVectorization.h new file mode 100644 index 0000000..56e0b02 --- /dev/null +++ b/src/include/midend/Pass/Analysis/LoopVectorization.h @@ -0,0 +1,250 @@ +#pragma once + +#include "Pass.h" +#include "Loop.h" +#include "LoopCharacteristics.h" +#include "AliasAnalysis.h" +#include "SideEffectAnalysis.h" +#include +#include +#include +#include +#include + +namespace sysy { + +/** + * @brief 依赖类型枚举 - 只考虑真正影响并行性的依赖 + * + * 依赖类型分析说明: + * - TRUE_DEPENDENCE (RAW): 真依赖,必须保持原始执行顺序,是最关键的依赖 + * - ANTI_DEPENDENCE (WAR): 反依赖,影响指令重排序,可通过寄存器重命名等技术缓解 + * - OUTPUT_DEPENDENCE (WAW): 输出依赖,相对较少但需要考虑,可通过变量私有化解决 + * + */ +enum class DependenceType { + TRUE_DEPENDENCE, // 真依赖 (RAW) - 读后写流依赖,最重要的依赖类型 + ANTI_DEPENDENCE, // 反依赖 (WAR) - 写后读反向依赖,影响指令重排序 + OUTPUT_DEPENDENCE // 输出依赖 (WAW) - 写后写,相对较少但需要考虑 +}; + +/** + * @brief 依赖向量 - 表示两个内存访问之间的迭代距离 + * 例如:a[i] 和 a[i+1] 之间的依赖向量是 [1] + * a[i][j] 和 a[i+1][j-2] 之间的依赖向量是 [1,-2] + */ +struct DependenceVector { + std::vector distances; // 每个循环层次的依赖距离 + bool isConstant; // 是否为常量距离 + bool isKnown; // 是否已知距离 + + DependenceVector(size_t loopDepth) : distances(loopDepth, 0), isConstant(false), isKnown(false) {} + + // 检查是否为循环无关依赖 + bool isLoopIndependent() const { + for (int dist : distances) { + if (dist != 0) return false; + } + return true; + } + + // 获取词典序方向向量 + std::vector getDirectionVector() const; + + // 检查是否可以通过向量化处理 + bool isVectorizationSafe() const; +}; + +/** + * @brief 精确依赖关系 - 包含依赖向量的详细依赖信息 + */ +struct PreciseDependence { + Instruction* source; + Instruction* sink; + DependenceType type; + DependenceVector dependenceVector; + Value* memoryLocation; + + // 并行化相关 + bool allowsParallelization; // 是否允许并行化 + bool requiresSynchronization; // 是否需要同步 + bool isReductionDependence; // 是否为归约依赖 + + PreciseDependence(size_t loopDepth) : dependenceVector(loopDepth), + allowsParallelization(true), requiresSynchronization(false), isReductionDependence(false) {} +}; + +/** + * @brief 向量化分析信息 - 暂时搁置,保留接口 + */ +struct VectorizationAnalysis { + bool isVectorizable; // 固定为false,暂不支持 + int suggestedVectorWidth; // 固定为1 + std::vector preventingFactors; // 阻止向量化的因素 + + VectorizationAnalysis() : isVectorizable(false), suggestedVectorWidth(1) { + preventingFactors.push_back("Vectorization temporarily disabled"); + } +}; + +/** + * @brief 并行化分析信息 + */ +struct ParallelizationAnalysis { + bool isParallelizable; // 是否可并行化 + int suggestedThreadCount; // 建议的线程数 + std::vector preventingFactors; // 阻止并行化的因素 + + // 并行化模式 + enum ParallelizationType { + NONE, // 不可并行化 + EMBARRASSINGLY_PARALLEL, // 完全并行 + REDUCTION_PARALLEL, // 归约并行 + PIPELINE_PARALLEL, // 流水线并行 + CONDITIONAL_PARALLEL // 条件并行 + } parallelType; + + // 负载均衡 + bool hasLoadBalance; // 是否有良好的负载均衡 + bool isDynamicLoadBalanced; // 是否需要动态负载均衡 + double workComplexity; // 工作复杂度估计 + + // 同步需求 + bool requiresReduction; // 是否需要归约操作 + bool requiresBarrier; // 是否需要屏障同步 + std::set sharedVariables; // 共享变量 + std::set reductionVariables; // 归约变量 + std::set privatizableVariables; // 可私有化变量 + + // 内存访问模式 + bool hasMemoryConflicts; // 是否有内存冲突 + bool hasReadOnlyAccess; // 是否只有只读访问 + bool hasIndependentAccess; // 是否有独立的内存访问 + + // 并行化收益评估 + double parallelizationBenefit; // 并行化收益估计 (0-1) + size_t communicationCost; // 通信开销估计 + size_t synchronizationCost; // 同步开销估计 + + ParallelizationAnalysis() : isParallelizable(false), suggestedThreadCount(1), parallelType(NONE), + hasLoadBalance(true), isDynamicLoadBalanced(false), workComplexity(0.0), requiresReduction(false), + requiresBarrier(false), hasMemoryConflicts(false), hasReadOnlyAccess(false), hasIndependentAccess(false), + parallelizationBenefit(0.0), communicationCost(0), synchronizationCost(0) {} +}; + +/** + * @brief 循环向量化/并行化分析结果 + */ +class LoopVectorizationResult : public AnalysisResultBase { +private: + Function* AssociatedFunction; + std::map VectorizationMap; + std::map ParallelizationMap; + std::map> DependenceMap; + +public: + LoopVectorizationResult(Function* F) : AssociatedFunction(F) {} + ~LoopVectorizationResult() override = default; + + // 基础接口 + void addVectorizationAnalysis(Loop* loop, VectorizationAnalysis analysis) { + VectorizationMap[loop] = std::move(analysis); + } + + void addParallelizationAnalysis(Loop* loop, ParallelizationAnalysis analysis) { + ParallelizationMap[loop] = std::move(analysis); + } + + void addDependenceAnalysis(Loop* loop, std::vector dependences) { + DependenceMap[loop] = std::move(dependences); + } + + // 查询接口 + const VectorizationAnalysis* getVectorizationAnalysis(Loop* loop) const { + auto it = VectorizationMap.find(loop); + return it != VectorizationMap.end() ? &it->second : nullptr; + } + + const ParallelizationAnalysis* getParallelizationAnalysis(Loop* loop) const { + auto it = ParallelizationMap.find(loop); + return it != ParallelizationMap.end() ? &it->second : nullptr; + } + + const std::vector* getPreciseDependences(Loop* loop) const { + auto it = DependenceMap.find(loop); + return it != DependenceMap.end() ? &it->second : nullptr; + } + + // 统计接口 + size_t getVectorizableLoopCount() const; + size_t getParallelizableLoopCount() const; + + // 优化建议 + std::vector getVectorizationCandidates() const; + std::vector getParallelizationCandidates() const; + + // 打印分析结果 + void print() const; +}; + +/** + * @brief 循环向量化/并行化分析遍 + * 在循环规范化后执行,进行精确的依赖向量分析和向量化/并行化可行性评估 + * 专注于并行化分析,向量化功能暂时搁置 + */ +class LoopVectorizationPass : public AnalysisPass { +public: + // 唯一的 Pass ID + static void *ID; + + LoopVectorizationPass() : AnalysisPass("LoopVectorization", Pass::Granularity::Function) {} + + // 实现 getPassID + void *getPassID() const override { return &ID; } + + // 核心运行方法 + bool runOnFunction(Function *F, AnalysisManager &AM) override; + + // 获取分析结果 + std::unique_ptr getResult() override { return std::move(CurrentResult); } + +private: + std::unique_ptr CurrentResult; + + // ========== 主要分析方法 ========== + void analyzeLoop(Loop* loop, LoopCharacteristics* characteristics, + AliasAnalysisResult* aliasAnalysis, SideEffectAnalysisResult* sideEffectAnalysis); + + // ========== 依赖向量分析 ========== + std::vector computeDependenceVectors(Loop* loop, AliasAnalysisResult* aliasAnalysis); + DependenceVector computeAccessDependence(Instruction* inst1, Instruction* inst2, Loop* loop); + bool areAccessesAffinelyRelated(Value* ptr1, Value* ptr2, Loop* loop); + + // ========== 向量化分析 (暂时搁置) ========== + VectorizationAnalysis analyzeVectorizability(Loop* loop, const std::vector& dependences, + LoopCharacteristics* characteristics); + + // ========== 并行化分析 ========== + ParallelizationAnalysis analyzeParallelizability(Loop* loop, const std::vector& dependences, + LoopCharacteristics* characteristics); + bool checkParallelizationLegality(Loop* loop, const std::vector& dependences); + int estimateOptimalThreadCount(Loop* loop, LoopCharacteristics* characteristics); + ParallelizationAnalysis::ParallelizationType determineParallelizationType(Loop* loop, + const std::vector& dependences); + + // ========== 并行化专用分析方法 ========== + void analyzeReductionPatterns(Loop* loop, ParallelizationAnalysis* analysis); + void analyzeMemoryAccessPatterns(Loop* loop, ParallelizationAnalysis* analysis, AliasAnalysisResult* aliasAnalysis); + void estimateParallelizationBenefit(Loop* loop, ParallelizationAnalysis* analysis, LoopCharacteristics* characteristics); + void identifyPrivatizableVariables(Loop* loop, ParallelizationAnalysis* analysis); + void analyzeSynchronizationNeeds(Loop* loop, ParallelizationAnalysis* analysis, const std::vector& dependences); + + // ========== 辅助方法 ========== + std::vector extractInductionCoefficients(Value* ptr, Loop* loop); + bool isConstantStride(Value* ptr, Loop* loop, int& stride); + bool isIndependentMemoryAccess(Value* ptr1, Value* ptr2, Loop* loop); + double estimateWorkComplexity(Loop* loop); + bool hasReductionPattern(Value* var, Loop* loop); +}; + +} // namespace sysy diff --git a/src/midend/CMakeLists.txt b/src/midend/CMakeLists.txt index b326585..251408b 100644 --- a/src/midend/CMakeLists.txt +++ b/src/midend/CMakeLists.txt @@ -8,6 +8,7 @@ add_library(midend_lib STATIC Pass/Analysis/Liveness.cpp Pass/Analysis/Loop.cpp Pass/Analysis/LoopCharacteristics.cpp + Pass/Analysis/LoopVectorization.cpp Pass/Analysis/AliasAnalysis.cpp Pass/Analysis/SideEffectAnalysis.cpp Pass/Analysis/CallGraphAnalysis.cpp diff --git a/src/midend/Pass/Analysis/Loop.cpp b/src/midend/Pass/Analysis/Loop.cpp index 4c2233f..65931c8 100644 --- a/src/midend/Pass/Analysis/Loop.cpp +++ b/src/midend/Pass/Analysis/Loop.cpp @@ -1,5 +1,7 @@ #include "Dom.h" // 确保包含 DominatorTreeAnalysisPass 的定义 #include "Loop.h" // +#include "AliasAnalysis.h" // 添加别名分析依赖 +#include "SideEffectAnalysis.h" // 添加副作用分析依赖 #include #include // 用于 BFS 遍历设置循环层级 @@ -124,6 +126,18 @@ bool LoopAnalysisPass::runOnFunction(Function *F, AnalysisManager &AM) { return false; } + // 获取别名分析结果 - 用于循环内存访问分析 + AliasAnalysisResult *aliasAnalysis = AM.getAnalysisResult(F); + if (DEBUG && aliasAnalysis) { + std::cout << "Loop Analysis: Using alias analysis results for enhanced memory pattern detection" << std::endl; + } + + // 获取副作用分析结果 - 用于循环纯度分析 + SideEffectAnalysisResult *sideEffectAnalysis = AM.getAnalysisResult(); + if (DEBUG && sideEffectAnalysis) { + std::cout << "Loop Analysis: Using side effect analysis results for loop purity detection" << std::endl; + } + CurrentResult = std::make_unique(F); bool changed = false; // 循环分析本身不修改IR,所以通常返回false @@ -335,4 +349,67 @@ bool LoopAnalysisPass::runOnFunction(Function *F, AnalysisManager &AM) { return changed; } +// ========== Loop 类的新增方法实现 ========== + +bool Loop::mayHaveSideEffects(SideEffectAnalysisResult* sideEffectAnalysis) const { + if (!sideEffectAnalysis) return true; // 保守假设 + + for (BasicBlock* bb : LoopBlocks) { + for (auto& inst : bb->getInstructions()) { + if (sideEffectAnalysis->hasSideEffect(inst.get())) { + return true; + } + } + } + return false; +} + +bool Loop::accessesGlobalMemory(AliasAnalysisResult* aliasAnalysis) const { + if (!aliasAnalysis) return true; // 保守假设 + + for (BasicBlock* bb : LoopBlocks) { + for (auto& inst : bb->getInstructions()) { + if (auto* loadInst = dynamic_cast(inst.get())) { + if (!aliasAnalysis->isLocalArray(loadInst->getPointer())) { + return true; + } + } else if (auto* storeInst = dynamic_cast(inst.get())) { + if (!aliasAnalysis->isLocalArray(storeInst->getPointer())) { + return true; + } + } + } + } + return false; +} + +bool Loop::hasMemoryAliasConflicts(AliasAnalysisResult* aliasAnalysis) const { + if (!aliasAnalysis) return true; // 保守假设 + + std::vector memoryAccesses; + + // 收集所有内存访问 + for (BasicBlock* bb : LoopBlocks) { + for (auto& inst : bb->getInstructions()) { + if (auto* loadInst = dynamic_cast(inst.get())) { + memoryAccesses.push_back(loadInst->getPointer()); + } else if (auto* storeInst = dynamic_cast(inst.get())) { + memoryAccesses.push_back(storeInst->getPointer()); + } + } + } + + // 检查两两之间是否有别名 + for (size_t i = 0; i < memoryAccesses.size(); ++i) { + for (size_t j = i + 1; j < memoryAccesses.size(); ++j) { + auto aliasType = aliasAnalysis->queryAlias(memoryAccesses[i], memoryAccesses[j]); + if (aliasType == AliasType::SELF_ALIAS || aliasType == AliasType::POSSIBLE_ALIAS) { + return true; + } + } + } + + return false; +} + } // namespace sysy \ No newline at end of file diff --git a/src/midend/Pass/Analysis/LoopCharacteristics.cpp b/src/midend/Pass/Analysis/LoopCharacteristics.cpp index 864b82a..9bfd19e 100644 --- a/src/midend/Pass/Analysis/LoopCharacteristics.cpp +++ b/src/midend/Pass/Analysis/LoopCharacteristics.cpp @@ -2,6 +2,8 @@ #include "Dom.h" #include "Loop.h" #include "Liveness.h" +#include "AliasAnalysis.h" +#include "SideEffectAnalysis.h" #include #include @@ -14,7 +16,7 @@ namespace sysy { void *LoopCharacteristicsPass::ID = (void *)&LoopCharacteristicsPass::ID; void LoopCharacteristicsResult::print() const { - if (!DEBUG) return; // 只有在 DEBUG 模式下才打印 + if (!DEBUG) return; std::cout << "\n--- Loop Characteristics Analysis Results for Function: " << AssociatedFunction->getName() << " ---" << std::endl; @@ -26,13 +28,13 @@ void LoopCharacteristicsResult::print() const { // 打印统计信息 auto stats = getOptimizationStats(); - std::cout << "\n=== Optimization Statistics ===" << std::endl; + std::cout << "\n=== Basic Loop Characteristics Statistics ===" << std::endl; std::cout << "Total Loops: " << stats.totalLoops << std::endl; std::cout << "Counting Loops: " << stats.countingLoops << std::endl; - std::cout << "Vectorizable Loops: " << stats.vectorizableLoops << std::endl; - std::cout << "Unroll Candidates: " << stats.unrollCandidates << std::endl; - std::cout << "Parallelizable Loops: " << stats.parallelizableLoops << std::endl; - std::cout << "Static Bound Loops: " << stats.staticBoundLoops << std::endl; + std::cout << "Unrolling Candidates: " << stats.unrollingCandidates << std::endl; + std::cout << "Pure Loops: " << stats.pureLoops << std::endl; + std::cout << "Local Memory Only Loops: " << stats.localMemoryOnlyLoops << std::endl; + std::cout << "No Alias Conflict Loops: " << stats.noAliasConflictLoops << std::endl; std::cout << "Avg Instructions per Loop: " << stats.avgInstructionCount << std::endl; std::cout << "Avg Compute/Memory Ratio: " << stats.avgComputeMemoryRatio << std::endl; @@ -58,6 +60,9 @@ void LoopCharacteristicsResult::print() const { if (chars->isSimpleForLoop) std::cout << "SimpleFor "; if (chars->isInnermost) std::cout << "Innermost "; if (chars->hasComplexControlFlow) std::cout << "Complex "; + if (chars->isPure) std::cout << "Pure "; + if (chars->accessesOnlyLocalMemory) std::cout << "LocalMemOnly "; + if (chars->hasNoMemoryAliasConflicts) std::cout << "NoAliasConflicts "; std::cout << std::endl; // 边界信息 @@ -72,18 +77,12 @@ void LoopCharacteristicsResult::print() const { std::cout << " Optimization Opportunities: "; if (chars->benefitsFromUnrolling) std::cout << "Unroll(factor=" << chars->suggestedUnrollFactor << ") "; - if (chars->benefitsFromVectorization) std::cout << "Vectorize "; - if (chars->benefitsFromTiling) std::cout << "Tile "; - if (chars->isParallel) std::cout << "Parallelize "; std::cout << std::endl; // 归纳变量 if (!chars->basicInductionVars.empty()) { std::cout << " Basic Induction Vars: " << chars->basicInductionVars.size() << std::endl; } - if (!chars->derivedInductionVars.empty()) { - std::cout << " Derived Induction Vars: " << chars->derivedInductionVars.size() << std::endl; - } // 循环不变量 if (!chars->loopInvariants.empty()) { @@ -106,7 +105,7 @@ bool LoopCharacteristicsPass::runOnFunction(Function *F, AnalysisManager &AM) { if (DEBUG) std::cout << "Running LoopCharacteristicsPass on function: " << F->getName() << std::endl; - // 获取循环分析结果 - 这是我们的核心依赖 + // 获取循环分析结果 auto* loopAnalysisResult = AM.getAnalysisResult(F); if (!loopAnalysisResult) { std::cerr << "Error: LoopAnalysisResult not available for function " << F->getName() << std::endl; @@ -120,6 +119,15 @@ bool LoopCharacteristicsPass::runOnFunction(Function *F, AnalysisManager &AM) { return false; } + // 获取别名分析和副作用分析结果 + auto* aliasAnalysis = AM.getAnalysisResult(F); + auto* sideEffectAnalysis = AM.getAnalysisResult(); + + if (DEBUG) { + if (aliasAnalysis) std::cout << "LoopCharacteristics: Using alias analysis results" << std::endl; + if (sideEffectAnalysis) std::cout << "LoopCharacteristics: Using side effect analysis results" << std::endl; + } + CurrentResult = std::make_unique(F); // 分析每个循环的特征 @@ -127,8 +135,8 @@ bool LoopCharacteristicsPass::runOnFunction(Function *F, AnalysisManager &AM) { Loop* loop = loop_ptr.get(); auto characteristics = std::make_unique(loop); - // 执行各种特征分析 - analyzeLoop(loop, characteristics.get(), AM); + // 执行各种特征分析,传递分析结果 + analyzeLoop(loop, characteristics.get(), AM, aliasAnalysis, sideEffectAnalysis); // 添加到结果中 CurrentResult->addLoopCharacteristics(std::move(characteristics)); @@ -138,25 +146,28 @@ bool LoopCharacteristicsPass::runOnFunction(Function *F, AnalysisManager &AM) { std::cout << "LoopCharacteristicsPass completed for function: " << F->getName() << std::endl; auto stats = CurrentResult->getOptimizationStats(); std::cout << "Analyzed " << stats.totalLoops << " loops, found " - << stats.vectorizableLoops << " vectorizable, " - << stats.unrollCandidates << " unrollable" << std::endl; + << stats.countingLoops << " counting loops, " + << stats.unrollingCandidates << " unroll candidates" << std::endl; } return false; // 特征分析不修改IR } -void LoopCharacteristicsPass::analyzeLoop(Loop* loop, LoopCharacteristics* characteristics, AnalysisManager &AM) { +void LoopCharacteristicsPass::analyzeLoop(Loop* loop, LoopCharacteristics* characteristics, + AnalysisManager &AM, AliasAnalysisResult* aliasAnalysis, + SideEffectAnalysisResult* sideEffectAnalysis) { if (DEBUG) - std::cout << " Analyzing characteristics of loop: " << loop->getName() << std::endl; + std::cout << " Analyzing basic characteristics of loop: " << loop->getName() << std::endl; - // 按顺序执行各种分析 + // 按顺序执行基础分析 computePerformanceMetrics(loop, characteristics); analyzeLoopForm(loop, characteristics); - identifyInductionVariables(loop, characteristics); - identifyLoopInvariants(loop, characteristics); - analyzeLoopBounds(loop, characteristics); - analyzeMemoryAccessPatterns(loop, characteristics); - evaluateOptimizationOpportunities(loop, characteristics); + analyzePurityAndSideEffects(loop, characteristics, sideEffectAnalysis); + identifyBasicInductionVariables(loop, characteristics); + identifyBasicLoopInvariants(loop, characteristics); + analyzeBasicLoopBounds(loop, characteristics); + analyzeBasicMemoryAccessPatterns(loop, characteristics, aliasAnalysis); + evaluateBasicOptimizationOpportunities(loop, characteristics); } void LoopCharacteristicsPass::computePerformanceMetrics(Loop* loop, LoopCharacteristics* characteristics) { @@ -210,140 +221,132 @@ void LoopCharacteristicsPass::analyzeLoopForm(Loop* loop, LoopCharacteristics* c characteristics->isCountingLoop = isSimple && loop->isInnermost() && exitingBlocks.size() == 1; } -void LoopCharacteristicsPass::identifyInductionVariables(Loop* loop, LoopCharacteristics* characteristics) { - // 寻找基本归纳变量 +void LoopCharacteristicsPass::analyzePurityAndSideEffects(Loop* loop, LoopCharacteristics* characteristics, + SideEffectAnalysisResult* sideEffectAnalysis) { + if (!sideEffectAnalysis) { + // 没有副作用分析结果,保守处理 + characteristics->isPure = false; + return; + } + + // 检查循环是否有副作用 + characteristics->isPure = !loop->mayHaveSideEffects(sideEffectAnalysis); + + if (DEBUG && characteristics->isPure) { + std::cout << " Loop " << loop->getName() << " is identified as PURE (no side effects)" << std::endl; + } +} + +void LoopCharacteristicsPass::analyzeBasicMemoryAccessPatterns(Loop* loop, LoopCharacteristics* characteristics, + AliasAnalysisResult* aliasAnalysis) { + if (!aliasAnalysis) { + // 没有别名分析结果,保守处理 + characteristics->accessesOnlyLocalMemory = false; + characteristics->hasNoMemoryAliasConflicts = false; + return; + } + + // 检查是否只访问局部内存 + characteristics->accessesOnlyLocalMemory = !loop->accessesGlobalMemory(aliasAnalysis); + + // 检查是否有内存别名冲突 + characteristics->hasNoMemoryAliasConflicts = !loop->hasMemoryAliasConflicts(aliasAnalysis); + + if (DEBUG) { + if (characteristics->accessesOnlyLocalMemory) { + std::cout << " Loop " << loop->getName() << " accesses ONLY LOCAL MEMORY" << std::endl; + } + if (characteristics->hasNoMemoryAliasConflicts) { + std::cout << " Loop " << loop->getName() << " has NO MEMORY ALIAS CONFLICTS" << std::endl; + } + } + + // 分析基础的内存访问模式 + for (BasicBlock* bb : loop->getBlocks()) { + for (auto& inst : bb->getInstructions()) { + if (auto* loadInst = dynamic_cast(inst.get())) { + Value* ptr = loadInst->getPointer(); + + auto& pattern = characteristics->memoryPatterns[ptr]; + pattern.loadInsts.push_back(loadInst); + pattern.isArrayParameter = aliasAnalysis->isFunctionParameter(ptr); + pattern.isGlobalArray = aliasAnalysis->isGlobalArray(ptr); + pattern.hasConstantIndices = aliasAnalysis->hasConstantAccess(ptr); + + } else if (auto* storeInst = dynamic_cast(inst.get())) { + Value* ptr = storeInst->getPointer(); + + auto& pattern = characteristics->memoryPatterns[ptr]; + pattern.storeInsts.push_back(storeInst); + pattern.isArrayParameter = aliasAnalysis->isFunctionParameter(ptr); + pattern.isGlobalArray = aliasAnalysis->isGlobalArray(ptr); + pattern.hasConstantIndices = aliasAnalysis->hasConstantAccess(ptr); + } + } + } +} + +void LoopCharacteristicsPass::identifyBasicInductionVariables(Loop* loop, LoopCharacteristics* characteristics) { + // 寻找基本归纳变量(简化版本) BasicBlock* header = loop->getHeader(); - // 遍历循环头的phi指令,寻找归纳变量模式 + // 遍历循环头的phi指令,寻找基本归纳变量模式 for (auto& inst : header->getInstructions()) { auto* phiInst = dynamic_cast(inst.get()); if (!phiInst) continue; - // 检查phi指令是否符合归纳变量模式 - if (isInductionVariable(phiInst, loop)) { + // 检查phi指令是否符合基本归纳变量模式 + if (isBasicInductionVariable(phiInst, loop)) { characteristics->basicInductionVars.push_back(phiInst); - - // 分析步长 (简化版本) - characteristics->inductionSteps[phiInst] = 1; // 默认步长为1 + characteristics->inductionSteps[phiInst] = 1; // 简化:默认步长为1 if (DEBUG) std::cout << " Found basic induction variable: " << phiInst->getName() << std::endl; } } - - // 寻找派生归纳变量 (基于基本归纳变量的线性表达式) - for (BasicBlock* bb : loop->getBlocks()) { - for (auto& inst : bb->getInstructions()) { - // 检查是否为基于归纳变量的计算 - if (auto* binInst = dynamic_cast(inst.get())) { - // 简化:检查操作数是否包含基本归纳变量 - for (Value* basicIV : characteristics->basicInductionVars) { - // 这里需要更复杂的分析来确定派生关系 - // 暂时简化处理 - } - } - } - } } -void LoopCharacteristicsPass::identifyLoopInvariants(Loop* loop, LoopCharacteristics* characteristics) { - // 收集循环不变量 +void LoopCharacteristicsPass::identifyBasicLoopInvariants(Loop* loop, LoopCharacteristics* characteristics) { + // 收集基础循环不变量(简化版本) for (BasicBlock* bb : loop->getBlocks()) { for (auto& inst : bb->getInstructions()) { Value* val = inst.get(); // 跳过phi指令和终结指令 - if (dynamic_cast(val)) { - continue; - } - - // 检查是否为终结指令 + if (dynamic_cast(val)) continue; if (auto* instPtr = dynamic_cast(val)) { - if (instPtr->isTerminator()) { - continue; - } + if (instPtr->isTerminator()) continue; } - if (isLoopInvariant(val, loop)) { + if (isBasicLoopInvariant(val, loop)) { characteristics->loopInvariants.insert(val); characteristics->invariantInsts.insert(static_cast(val)); if (DEBUG) - std::cout << " Found loop invariant: " << val->getName() << std::endl; + std::cout << " Found basic loop invariant: " << val->getName() << std::endl; } } } } -void LoopCharacteristicsPass::analyzeLoopBounds(Loop* loop, LoopCharacteristics* characteristics) { - // 简化的边界分析 - // 在实际实现中,需要分析循环的条件表达式来确定边界 - - // 检查是否有静态可确定的循环次数 +void LoopCharacteristicsPass::analyzeBasicLoopBounds(Loop* loop, LoopCharacteristics* characteristics) { + // 简化的基础边界分析 + // 检查是否有静态可确定的循环次数(简化版本) if (characteristics->isCountingLoop && !characteristics->basicInductionVars.empty()) { // 简化:如果是计数循环且有基本归纳变量,尝试确定循环次数 - // 这里需要更复杂的符号执行或约束求解 - - // 暂时设置一个保守估计 if (characteristics->instructionCount < 10) { - characteristics->staticTripCount = 100; // 假设小循环执行100次 + characteristics->staticTripCount = 100; // 简化估计 characteristics->hasKnownBounds = true; + + if (DEBUG) { + std::cout << " Estimated static trip count: " << *characteristics->staticTripCount << std::endl; + } } } } -void LoopCharacteristicsPass::analyzeMemoryAccessPatterns(Loop* loop, LoopCharacteristics* characteristics) { - // 使用外部别名分析结果 - 大幅简化版本 - std::map> accessMap; - - // 收集所有内存访问 - for (BasicBlock* bb : loop->getBlocks()) { - for (auto& inst : bb->getInstructions()) { - if (auto* loadInst = dynamic_cast(inst.get())) { - Value* ptr = loadInst->getPointer(); - accessMap[ptr].push_back(loadInst); - } else if (auto* storeInst = dynamic_cast(inst.get())) { - Value* ptr = storeInst->getPointer(); - accessMap[ptr].push_back(storeInst); - } - } - } - - // 分析每个内存位置的访问模式 - for (auto& [ptr, accesses] : accessMap) { - LoopCharacteristics::MemoryAccessPattern pattern; - - // 初始化基本字段 - pattern.isSequential = true; // 简化:假设大部分访问是顺序的 - pattern.isStrided = false; - pattern.stride = 1; - - // 使用别名分析结果 (简化:设置默认值,实际应该查询别名分析) - pattern.aliasType = AliasType::UNKNOWN_ALIAS; // 保守默认值 - pattern.isArrayParameter = false; - pattern.isGlobalArray = false; - pattern.hasConstantIndices = true; - - // 分类load和store - for (Instruction* inst : accesses) { - if (dynamic_cast(inst)) { - pattern.loadInsts.push_back(inst); - } else { - pattern.storeInsts.push_back(inst); - } - } - - characteristics->memoryPatterns[ptr] = pattern; - - if (DEBUG && (static_cast(pattern.aliasType) >= 2)) { // POSSIBLE_ALIAS及以上 - std::cout << " Found potential aliasing for memory access, type: " - << static_cast(pattern.aliasType) << std::endl; - } - } -} - -void LoopCharacteristicsPass::evaluateOptimizationOpportunities(Loop* loop, LoopCharacteristics* characteristics) { - // 评估循环展开机会 +void LoopCharacteristicsPass::evaluateBasicOptimizationOpportunities(Loop* loop, LoopCharacteristics* characteristics) { + // 评估基础循环展开机会 characteristics->benefitsFromUnrolling = characteristics->isInnermost && characteristics->instructionCount > 3 && @@ -351,27 +354,25 @@ void LoopCharacteristicsPass::evaluateOptimizationOpportunities(Loop* loop, Loop !characteristics->hasComplexControlFlow; if (characteristics->benefitsFromUnrolling) { - characteristics->suggestedUnrollFactor = estimateUnrollFactor(loop); + // 基于循环体大小估算展开因子 + if (characteristics->instructionCount <= 5) characteristics->suggestedUnrollFactor = 8; + else if (characteristics->instructionCount <= 10) characteristics->suggestedUnrollFactor = 4; + else if (characteristics->instructionCount <= 20) characteristics->suggestedUnrollFactor = 2; + else characteristics->suggestedUnrollFactor = 1; + } + + if (DEBUG) { + if (characteristics->benefitsFromUnrolling) { + std::cout << " Loop " << loop->getName() << " benefits from UNROLLING (factor=" + << characteristics->suggestedUnrollFactor << ")" << std::endl; + } } - - // 评估向量化机会 - characteristics->benefitsFromVectorization = benefitsFromVectorization(loop); - - // 评估并行化机会 - characteristics->isParallel = - !hasLoopCarriedDependence(loop) && - characteristics->isCountingLoop; - - // 评估分块机会 (主要针对嵌套循环) - characteristics->benefitsFromTiling = - !loop->isInnermost() && - characteristics->memoryOperationCount > characteristics->arithmeticOperationCount; } // ========== 辅助方法实现 ========== -bool LoopCharacteristicsPass::isInductionVariable(Value* val, Loop* loop) { - // 简化的归纳变量检测 +bool LoopCharacteristicsPass::isBasicInductionVariable(Value* val, Loop* loop) { + // 简化的基础归纳变量检测 auto* phiInst = dynamic_cast(val); if (!phiInst) return false; @@ -381,15 +382,14 @@ bool LoopCharacteristicsPass::isInductionVariable(Value* val, Loop* loop) { // 检查是否有来自循环内的更新 for (auto& [incomingBB, incomingVal] : phiInst->getIncomingValues()) { if (loop->contains(incomingBB)) { - // 简化:如果有来自循环内的值,认为可能是归纳变量 - return true; + return true; // 简化:有来自循环内的值就认为是基础归纳变量 } } return false; } -bool LoopCharacteristicsPass::isLoopInvariant(Value* val, Loop* loop) { +bool LoopCharacteristicsPass::isBasicLoopInvariant(Value* val, Loop* loop) { auto* inst = dynamic_cast(val); if (!inst) return true; // 非指令(如常量)认为是不变的 @@ -398,57 +398,19 @@ bool LoopCharacteristicsPass::isLoopInvariant(Value* val, Loop* loop) { return true; } - // 检查操作数是否都是循环不变的 - // 简化版本:如果是load指令且指针是不变的,认为可能是不变的 + // 简化的基础不变量检测:load指令且指针是循环外的 if (auto* loadInst = dynamic_cast(inst)) { Value* ptr = loadInst->getPointer(); - return isLoopInvariant(ptr, loop); + return isBasicLoopInvariant(ptr, loop); } - // 简化:对于其他指令,保守地认为是变化的 + // 保守:对于其他指令,认为是变化的 return false; } -bool LoopCharacteristicsPass::hasLoopCarriedDependence(Loop* loop) { - // 简化的依赖分析 - // 检查是否有写后读或写后写依赖跨越循环迭代 - - std::set writtenVars; - std::set readVars; - - for (BasicBlock* bb : loop->getBlocks()) { - for (auto& inst : bb->getInstructions()) { - if (auto* storeInst = dynamic_cast(inst.get())) { - writtenVars.insert(storeInst->getPointer()); - } else if (auto* loadInst = dynamic_cast(inst.get())) { - readVars.insert(loadInst->getPointer()); - } - } - } - - // 简化:如果有写后读到同一变量,假设存在依赖 - for (Value* written : writtenVars) { - if (readVars.count(written)) { - return true; // 可能存在依赖 - } - } - - return false; // 保守估计:没有明显依赖 -} - -int LoopCharacteristicsPass::estimateUnrollFactor(Loop* loop) { - // 基于循环体大小估算展开因子 - if (loop->getLoopSize() <= 2) return 8; // 很小的循环 - if (loop->getLoopSize() <= 5) return 4; // 小循环 - if (loop->getLoopSize() <= 10) return 2; // 中等循环 - return 1; // 大循环不建议展开 -} - -bool LoopCharacteristicsPass::benefitsFromVectorization(Loop* loop) { - // 简化的向量化收益评估 - return loop->isInnermost() && // 最内层循环 - loop->isSimpleLoop() && // 简单循环结构 - !hasLoopCarriedDependence(loop); // 没有明显的依赖 +bool LoopCharacteristicsPass::hasSimpleMemoryPattern(Loop* loop) { + // 检查是否有简单的内存访问模式 + return true; // 暂时简化处理 } } // namespace sysy diff --git a/src/midend/Pass/Analysis/LoopVectorization.cpp b/src/midend/Pass/Analysis/LoopVectorization.cpp new file mode 100644 index 0000000..5a6a64c --- /dev/null +++ b/src/midend/Pass/Analysis/LoopVectorization.cpp @@ -0,0 +1,803 @@ +#include "LoopVectorization.h" +#include "Dom.h" +#include "Loop.h" +#include "Liveness.h" +#include "AliasAnalysis.h" +#include "SideEffectAnalysis.h" +#include +#include +#include +#include + +extern int DEBUG; + +namespace sysy { + +// 定义 Pass 的唯一 ID +void *LoopVectorizationPass::ID = (void *)&LoopVectorizationPass::ID; + +std::vector DependenceVector::getDirectionVector() const { + std::vector direction; + direction.reserve(distances.size()); + + for (int dist : distances) { + if (dist > 0) direction.push_back(1); // 前向依赖 + else if (dist < 0) direction.push_back(-1); // 后向依赖 + else direction.push_back(0); // 无依赖 + } + + return direction; +} + +bool DependenceVector::isVectorizationSafe() const { + if (!isKnown) return false; // 未知依赖,不安全 + + // 对于向量化,我们主要关心最内层循环的依赖 + if (distances.empty()) return true; + + int innermostDistance = distances.back(); // 最内层循环的距离 + + // 前向依赖 (距离 > 0) 通常是安全的,可以通过调整向量化顺序处理 + // 后向依赖 (距离 < 0) 通常不安全,会阻止向量化 + // 距离 = 0 表示同一迭代内的依赖,通常安全 + + return innermostDistance >= 0; +} + +size_t LoopVectorizationResult::getVectorizableLoopCount() const { + size_t count = 0; + for (const auto& [loop, analysis] : VectorizationMap) { + if (analysis.isVectorizable) count++; + } + return count; +} + +size_t LoopVectorizationResult::getParallelizableLoopCount() const { + size_t count = 0; + for (const auto& [loop, analysis] : ParallelizationMap) { + if (analysis.isParallelizable) count++; + } + return count; +} + +std::vector LoopVectorizationResult::getVectorizationCandidates() const { + std::vector candidates; + for (const auto& [loop, analysis] : VectorizationMap) { + if (analysis.isVectorizable) { + candidates.push_back(loop); + } + } + + // 按建议的向量宽度排序,优先处理收益更大的循环 + std::sort(candidates.begin(), candidates.end(), + [this](Loop* a, Loop* b) { + const auto& analysisA = VectorizationMap.at(a); + const auto& analysisB = VectorizationMap.at(b); + return analysisA.suggestedVectorWidth > analysisB.suggestedVectorWidth; + }); + + return candidates; +} + +std::vector LoopVectorizationResult::getParallelizationCandidates() const { + std::vector candidates; + for (const auto& [loop, analysis] : ParallelizationMap) { + if (analysis.isParallelizable) { + candidates.push_back(loop); + } + } + + // 按建议的线程数排序 + std::sort(candidates.begin(), candidates.end(), + [this](Loop* a, Loop* b) { + const auto& analysisA = ParallelizationMap.at(a); + const auto& analysisB = ParallelizationMap.at(b); + return analysisA.suggestedThreadCount > analysisB.suggestedThreadCount; + }); + + return candidates; +} + +void LoopVectorizationResult::print() const { + if (!DEBUG) return; + + std::cout << "\n--- Loop Vectorization/Parallelization Analysis Results for Function: " + << AssociatedFunction->getName() << " ---" << std::endl; + + if (VectorizationMap.empty() && ParallelizationMap.empty()) { + std::cout << " No vectorization/parallelization analysis results." << std::endl; + return; + } + + // 统计信息 + std::cout << "\n=== Summary ===" << std::endl; + std::cout << "Total Loops Analyzed: " << VectorizationMap.size() << std::endl; + std::cout << "Vectorizable Loops: " << getVectorizableLoopCount() << std::endl; + std::cout << "Parallelizable Loops: " << getParallelizableLoopCount() << std::endl; + + // 详细分析结果 + for (const auto& [loop, vecAnalysis] : VectorizationMap) { + std::cout << "\n--- Loop: " << loop->getName() << " ---" << std::endl; + + // 向量化分析 (暂时搁置) + std::cout << " Vectorization: " << (vecAnalysis.isVectorizable ? "YES" : "NO") << std::endl; + if (!vecAnalysis.preventingFactors.empty()) { + std::cout << " Preventing Factors: "; + for (const auto& factor : vecAnalysis.preventingFactors) { + std::cout << factor << " "; + } + std::cout << std::endl; + } + + // 并行化分析 + auto parallelIt = ParallelizationMap.find(loop); + if (parallelIt != ParallelizationMap.end()) { + const auto& parAnalysis = parallelIt->second; + std::cout << " Parallelization: " << (parAnalysis.isParallelizable ? "YES" : "NO") << std::endl; + if (parAnalysis.isParallelizable) { + std::cout << " Suggested Thread Count: " << parAnalysis.suggestedThreadCount << std::endl; + if (parAnalysis.requiresReduction) { + std::cout << " Requires Reduction: Yes" << std::endl; + } + if (parAnalysis.requiresBarrier) { + std::cout << " Requires Barrier: Yes" << std::endl; + } + } else if (!parAnalysis.preventingFactors.empty()) { + std::cout << " Preventing Factors: "; + for (const auto& factor : parAnalysis.preventingFactors) { + std::cout << factor << " "; + } + std::cout << std::endl; + } + } + + // 依赖关系 + auto depIt = DependenceMap.find(loop); + if (depIt != DependenceMap.end()) { + const auto& dependences = depIt->second; + std::cout << " Dependences: " << dependences.size() << " found" << std::endl; + for (const auto& dep : dependences) { + if (dep.dependenceVector.isKnown) { + std::cout << " " << dep.source->getName() << " -> " << dep.sink->getName(); + std::cout << " ["; + for (size_t i = 0; i < dep.dependenceVector.distances.size(); ++i) { + if (i > 0) std::cout << ","; + std::cout << dep.dependenceVector.distances[i]; + } + std::cout << "]" << std::endl; + } + } + } + } + + std::cout << "-----------------------------------------------" << std::endl; +} + +bool LoopVectorizationPass::runOnFunction(Function *F, AnalysisManager &AM) { + if (F->getBasicBlocks().empty()) { + CurrentResult = std::make_unique(F); + return false; + } + + if (DEBUG) { + std::cout << "Running LoopVectorizationPass on function: " << F->getName() << std::endl; + } + + // 获取循环分析结果 + auto* loopAnalysisResult = AM.getAnalysisResult(F); + if (!loopAnalysisResult || !loopAnalysisResult->hasLoops()) { + CurrentResult = std::make_unique(F); + return false; + } + + // 获取循环特征分析结果 + auto* loopCharacteristics = AM.getAnalysisResult(F); + if (!loopCharacteristics) { + if (DEBUG) { + std::cout << "Warning: LoopCharacteristics analysis not available" << std::endl; + } + } + + // 获取别名分析结果 + auto* aliasAnalysis = AM.getAnalysisResult(F); + + // 获取副作用分析结果 + auto* sideEffectAnalysis = AM.getAnalysisResult(); + + CurrentResult = std::make_unique(F); + + // 分析每个循环的向量化/并行化可行性 + for (const auto& loop_ptr : loopAnalysisResult->getAllLoops()) { + Loop* loop = loop_ptr.get(); + + // 获取该循环的特征信息 + LoopCharacteristics* characteristics = nullptr; + if (loopCharacteristics) { + characteristics = const_cast(loopCharacteristics->getCharacteristics(loop)); + } + + analyzeLoop(loop, characteristics, aliasAnalysis, sideEffectAnalysis); + } + + if (DEBUG) { + std::cout << "LoopVectorizationPass completed. Found " + << CurrentResult->getVectorizableLoopCount() << " vectorizable loops, " + << CurrentResult->getParallelizableLoopCount() << " parallelizable loops" << std::endl; + } + + return false; // 分析遍不修改IR +} + +void LoopVectorizationPass::analyzeLoop(Loop* loop, LoopCharacteristics* characteristics, + AliasAnalysisResult* aliasAnalysis, SideEffectAnalysisResult* sideEffectAnalysis) { + if (DEBUG) { + std::cout << " Analyzing advanced features for loop: " << loop->getName() << std::endl; + } + + // 1. 计算精确依赖向量 + auto dependences = computeDependenceVectors(loop, aliasAnalysis); + CurrentResult->addDependenceAnalysis(loop, dependences); + + // 2. 分析向量化可行性 (暂时搁置,总是返回不可向量化) + auto vecAnalysis = analyzeVectorizability(loop, dependences, characteristics); + CurrentResult->addVectorizationAnalysis(loop, vecAnalysis); + + // 3. 分析并行化可行性 + auto parAnalysis = analyzeParallelizability(loop, dependences, characteristics); + CurrentResult->addParallelizationAnalysis(loop, parAnalysis); +} + +// ========== 依赖向量分析实现 ========== + +std::vector LoopVectorizationPass::computeDependenceVectors(Loop* loop, + AliasAnalysisResult* aliasAnalysis) { + std::vector dependences; + std::vector memoryInsts; + + // 收集所有内存操作指令 + for (BasicBlock* bb : loop->getBlocks()) { + for (auto& inst : bb->getInstructions()) { + if (dynamic_cast(inst.get()) || dynamic_cast(inst.get())) { + memoryInsts.push_back(inst.get()); + } + } + } + + // 分析每对内存操作之间的依赖关系 + for (size_t i = 0; i < memoryInsts.size(); ++i) { + for (size_t j = i + 1; j < memoryInsts.size(); ++j) { + Instruction* inst1 = memoryInsts[i]; + Instruction* inst2 = memoryInsts[j]; + + Value* ptr1 = nullptr; + Value* ptr2 = nullptr; + + if (auto* load = dynamic_cast(inst1)) { + ptr1 = load->getPointer(); + } else if (auto* store = dynamic_cast(inst1)) { + ptr1 = store->getPointer(); + } + + if (auto* load = dynamic_cast(inst2)) { + ptr2 = load->getPointer(); + } else if (auto* store = dynamic_cast(inst2)) { + ptr2 = store->getPointer(); + } + + if (!ptr1 || !ptr2) continue; + + // 检查是否可能存在别名关系 + bool mayAlias = false; + if (aliasAnalysis) { + mayAlias = aliasAnalysis->queryAlias(ptr1, ptr2) != AliasType::NO_ALIAS; + } else { + mayAlias = (ptr1 != ptr2); // 保守估计 + } + + if (mayAlias) { + // 创建依赖关系 + PreciseDependence dep(loop->getLoopDepth()); + dep.source = inst1; + dep.sink = inst2; + dep.memoryLocation = ptr1; + + // 确定依赖类型 + bool isStore1 = dynamic_cast(inst1) != nullptr; + bool isStore2 = dynamic_cast(inst2) != nullptr; + + if (isStore1 && !isStore2) { + dep.type = DependenceType::TRUE_DEPENDENCE; // Write -> Read (RAW) + } else if (!isStore1 && isStore2) { + dep.type = DependenceType::ANTI_DEPENDENCE; // Read -> Write (WAR) + } else if (isStore1 && isStore2) { + dep.type = DependenceType::OUTPUT_DEPENDENCE; // Write -> Write (WAW) + } else { + continue; // Read -> Read (RAR) - 跳过,不是真正的依赖 + } + + // 计算依赖向量 + dep.dependenceVector = computeAccessDependence(inst1, inst2, loop); + + // 判断是否允许并行化 + dep.allowsParallelization = dep.dependenceVector.isLoopIndependent() || + (dep.dependenceVector.isKnown && + std::all_of(dep.dependenceVector.distances.begin(), + dep.dependenceVector.distances.end(), + [](int d) { return d >= 0; })); + + dependences.push_back(dep); + + if (DEBUG && dep.dependenceVector.isKnown) { + std::cout << " Found dependence: " << inst1->getName() + << " -> " << inst2->getName() << " ["; + for (size_t k = 0; k < dep.dependenceVector.distances.size(); ++k) { + if (k > 0) std::cout << ","; + std::cout << dep.dependenceVector.distances[k]; + } + std::cout << "]" << std::endl; + } + } + } + } + + return dependences; +} + +DependenceVector LoopVectorizationPass::computeAccessDependence(Instruction* inst1, Instruction* inst2, Loop* loop) { + DependenceVector depVec(loop->getLoopDepth()); + + Value* ptr1 = nullptr; + Value* ptr2 = nullptr; + + if (auto* load = dynamic_cast(inst1)) { + ptr1 = load->getPointer(); + } else if (auto* store = dynamic_cast(inst1)) { + ptr1 = store->getPointer(); + } + + if (auto* load = dynamic_cast(inst2)) { + ptr2 = load->getPointer(); + } else if (auto* store = dynamic_cast(inst2)) { + ptr2 = store->getPointer(); + } + + if (!ptr1 || !ptr2) return depVec; + + // 尝试分析仿射关系 + if (areAccessesAffinelyRelated(ptr1, ptr2, loop)) { + auto coeff1 = extractInductionCoefficients(ptr1, loop); + auto coeff2 = extractInductionCoefficients(ptr2, loop); + + if (coeff1.size() == coeff2.size()) { + depVec.isKnown = true; + depVec.isConstant = true; + + for (size_t i = 0; i < coeff1.size(); ++i) { + depVec.distances[i] = coeff2[i] - coeff1[i]; + } + } + } + + return depVec; +} + +bool LoopVectorizationPass::areAccessesAffinelyRelated(Value* ptr1, Value* ptr2, Loop* loop) { + // 简化实现:检查是否都是基于归纳变量的数组访问 + // 真正的实现需要复杂的仿射关系分析 + + // 检查是否为 GEP 指令 + auto* gep1 = dynamic_cast(ptr1); + auto* gep2 = dynamic_cast(ptr2); + + if (!gep1 || !gep2) return false; + + // 检查是否访问同一个数组基址 + if (gep1->getBasePointer() != gep2->getBasePointer()) return false; + + // 简化:假设都是仿射的 + return true; +} + +// ========== 向量化分析实现 (暂时搁置) ========== + +VectorizationAnalysis LoopVectorizationPass::analyzeVectorizability(Loop* loop, + const std::vector& dependences, + LoopCharacteristics* characteristics) { + VectorizationAnalysis analysis; // 构造函数已设置为不可向量化 + + if (DEBUG) { + std::cout << " Vectorization analysis: DISABLED (temporarily)" << std::endl; + } + + // 向量化功能暂时搁置,总是返回不可向量化 + // 这里可以添加一些基本的诊断信息用于日志 + if (!loop->isInnermost()) { + analysis.preventingFactors.push_back("Not innermost loop"); + } + if (loop->getBlocks().size() > 1) { + analysis.preventingFactors.push_back("Complex control flow"); + } + if (!dependences.empty()) { + analysis.preventingFactors.push_back("Has dependences (not analyzed in detail)"); + } + + return analysis; +} + +// ========== 并行化分析实现 ========== + +ParallelizationAnalysis LoopVectorizationPass::analyzeParallelizability(Loop* loop, + const std::vector& dependences, + LoopCharacteristics* characteristics) { + ParallelizationAnalysis analysis; + + if (DEBUG) { + std::cout << " Analyzing parallelizability for loop: " << loop->getName() << std::endl; + std::cout << " Found " << dependences.size() << " dependences" << std::endl; + } + + // 按依赖类型分类分析 + bool hasTrueDependences = false; + bool hasAntiDependences = false; + bool hasOutputDependences = false; + + for (const auto& dep : dependences) { + switch (dep.type) { + case DependenceType::TRUE_DEPENDENCE: + hasTrueDependences = true; + // 真依赖通常是最难处理的,需要检查是否为归约模式 + if (dep.isReductionDependence) { + analysis.requiresReduction = true; + analysis.reductionVariables.insert(dep.memoryLocation); + } else { + analysis.preventingFactors.push_back("Non-reduction true dependence"); + } + break; + case DependenceType::ANTI_DEPENDENCE: + hasAntiDependences = true; + // 反依赖可以通过变量私有化解决 + analysis.privatizableVariables.insert(dep.memoryLocation); + break; + case DependenceType::OUTPUT_DEPENDENCE: + hasOutputDependences = true; + // 输出依赖可以通过变量私有化或原子操作解决 + analysis.sharedVariables.insert(dep.memoryLocation); + break; + } + } + + // 确定并行化类型 + analysis.parallelType = determineParallelizationType(loop, dependences); + + // 基于依赖类型评估可并行性 + if (!hasTrueDependences && !hasOutputDependences) { + // 只有反依赖或无依赖,完全可并行 + analysis.parallelType = ParallelizationAnalysis::EMBARRASSINGLY_PARALLEL; + analysis.isParallelizable = true; + } else if (analysis.requiresReduction) { + // 有归约模式,可以并行但需要特殊处理 + analysis.parallelType = ParallelizationAnalysis::REDUCTION_PARALLEL; + analysis.isParallelizable = true; + } else if (hasTrueDependences) { + // 有非归约的真依赖,通常不能并行化 + analysis.isParallelizable = false; + analysis.preventingFactors.push_back("Non-reduction loop-carried true dependences"); + } + + if (analysis.isParallelizable) { + // 进一步分析并行化收益和成本 + estimateParallelizationBenefit(loop, &analysis, characteristics); + analyzeSynchronizationNeeds(loop, &analysis, dependences); + analysis.suggestedThreadCount = estimateOptimalThreadCount(loop, characteristics); + } + + if (DEBUG) { + std::cout << " Parallelizable: " << (analysis.isParallelizable ? "YES" : "NO") << std::endl; + if (analysis.isParallelizable) { + std::cout << " Type: " << (int)analysis.parallelType << ", Threads: " << analysis.suggestedThreadCount << std::endl; + } + } + + return analysis; +} + +bool LoopVectorizationPass::checkParallelizationLegality(Loop* loop, const std::vector& dependences) { + // 检查所有依赖是否允许并行化 + for (const auto& dep : dependences) { + if (!dep.allowsParallelization) { + return false; + } + } + + // 检查是否有无法并行化的操作 + for (BasicBlock* bb : loop->getBlocks()) { + for (auto& inst : bb->getInstructions()) { + // 检查原子操作、同步操作等 + if (auto* call = dynamic_cast(inst.get())) { + // 简化:假设函数调用需要特殊处理 + // 在实际实现中,需要分析函数的副作用 + return false; + } + } + } + + return true; +} + +int LoopVectorizationPass::estimateOptimalThreadCount(Loop* loop, LoopCharacteristics* characteristics) { + // 基于循环特征估计最优线程数 + if (!characteristics) return 2; + + // 基于循环体大小和计算密度 + int baseThreads = 2; + + if (characteristics->instructionCount > 50) baseThreads = 4; + if (characteristics->instructionCount > 200) baseThreads = 8; + + // 基于计算与内存比率调整 + if (characteristics->computeToMemoryRatio > 2.0) { + baseThreads *= 2; // 计算密集型,可以使用更多线程 + } + + return std::min(baseThreads, 16); // 限制最大线程数 +} + +// ========== 辅助方法实现 ========== + +bool LoopVectorizationPass::isConstantStride(Value* ptr, Loop* loop, int& stride) { + // 简化实现:检查是否为常量步长访问 + stride = 1; // 默认步长 + + auto* gep = dynamic_cast(ptr); + if (!gep) return false; + + // 检查最后一个索引是否为归纳变量 + 常量 + if (gep->getNumIndices() > 0) { + Value* lastIndex = gep->getIndex(gep->getNumIndices() - 1); + + // 简化:假设是 i 或 i+c 的形式 + if (auto* binInst = dynamic_cast(lastIndex)) { + if (binInst->getKind() == Instruction::kAdd) { + // 检查是否为 i + constant + if (auto* constInt = dynamic_cast(binInst->getRhs())) { + stride = constInt->getInt(); + return true; + } + } + } + + // 默认为步长1的连续访问 + stride = 1; + return true; + } + + return false; +} + +std::vector LoopVectorizationPass::extractInductionCoefficients(Value* ptr, Loop* loop) { + // 简化实现:返回默认的仿射系数 + std::vector coefficients; + + // 假设是简单的 a[i] 形式,系数为 [0, 1] + coefficients.push_back(0); // 常数项 + coefficients.push_back(1); // 归纳变量系数 + + return coefficients; +} + +// ========== 缺失的方法实现 ========== + +ParallelizationAnalysis::ParallelizationType LoopVectorizationPass::determineParallelizationType( + Loop* loop, const std::vector& dependences) { + + // 检查是否有任何依赖 + if (dependences.empty()) { + return ParallelizationAnalysis::EMBARRASSINGLY_PARALLEL; + } + + // 检查是否只有归约模式 + bool hasReduction = false; + bool hasOtherDependences = false; + + for (const auto& dep : dependences) { + if (dep.isReductionDependence) { + hasReduction = true; + } else if (dep.type == DependenceType::TRUE_DEPENDENCE) { + hasOtherDependences = true; + } + } + + if (hasReduction && !hasOtherDependences) { + return ParallelizationAnalysis::REDUCTION_PARALLEL; + } else if (!hasOtherDependences) { + return ParallelizationAnalysis::EMBARRASSINGLY_PARALLEL; + } + + return ParallelizationAnalysis::NONE; +} + +void LoopVectorizationPass::analyzeReductionPatterns(Loop* loop, ParallelizationAnalysis* analysis) { + // 简化实现:查找常见的归约模式 + for (BasicBlock* bb : loop->getBlocks()) { + for (auto& inst : bb->getInstructions()) { + if (auto* binInst = dynamic_cast(inst.get())) { + if (binInst->getKind() == Instruction::kAdd || binInst->getKind() == Instruction::kMul) { + // 检查是否为累加/累乘模式 + Value* lhs = binInst->getLhs(); + if (hasReductionPattern(lhs, loop)) { + analysis->requiresReduction = true; + analysis->reductionVariables.insert(lhs); + } + } + } + } + } +} + +void LoopVectorizationPass::analyzeMemoryAccessPatterns(Loop* loop, ParallelizationAnalysis* analysis, + AliasAnalysisResult* aliasAnalysis) { + std::vector memoryAccesses; + + // 收集所有内存访问 + for (BasicBlock* bb : loop->getBlocks()) { + for (auto& inst : bb->getInstructions()) { + if (auto* load = dynamic_cast(inst.get())) { + memoryAccesses.push_back(load->getPointer()); + } else if (auto* store = dynamic_cast(inst.get())) { + memoryAccesses.push_back(store->getPointer()); + } + } + } + + // 分析内存访问独立性 + bool hasIndependentAccess = true; + for (size_t i = 0; i < memoryAccesses.size(); ++i) { + for (size_t j = i + 1; j < memoryAccesses.size(); ++j) { + if (!isIndependentMemoryAccess(memoryAccesses[i], memoryAccesses[j], loop)) { + hasIndependentAccess = false; + analysis->hasMemoryConflicts = true; + } + } + } + + analysis->hasIndependentAccess = hasIndependentAccess; +} + +void LoopVectorizationPass::estimateParallelizationBenefit(Loop* loop, ParallelizationAnalysis* analysis, + LoopCharacteristics* characteristics) { + if (!analysis->isParallelizable) { + analysis->parallelizationBenefit = 0.0; + return; + } + + // 基于计算复杂度和并行度计算收益 + double workComplexity = estimateWorkComplexity(loop); + double parallelFraction = 1.0; // 假设完全可并行 + + // 根据依赖调整并行度 + if (analysis->requiresReduction) { + parallelFraction *= 0.8; // 归约降低并行效率 + } + if (analysis->hasMemoryConflicts) { + parallelFraction *= 0.6; // 内存冲突降低效率 + } + + // Amdahl定律估算 + double serialFraction = 1.0 - parallelFraction; + int threadCount = analysis->suggestedThreadCount; + double speedup = 1.0 / (serialFraction + parallelFraction / threadCount); + + analysis->parallelizationBenefit = std::min((speedup - 1.0) / threadCount, 1.0); + + // 估算同步和通信开销 + analysis->synchronizationCost = analysis->requiresBarrier ? 100 : 0; + analysis->communicationCost = analysis->sharedVariables.size() * 50; +} + +void LoopVectorizationPass::identifyPrivatizableVariables(Loop* loop, ParallelizationAnalysis* analysis) { + // 简化实现:标识循环内定义的变量为可私有化 + for (BasicBlock* bb : loop->getBlocks()) { + for (auto& inst : bb->getInstructions()) { + if (!inst->getType()->isVoid()) { + // 如果变量只在循环内使用,可能可以私有化 + bool onlyUsedInLoop = true; + for (auto& use : inst->getUses()) { + if (auto* userInst = dynamic_cast(use->getUser())) { + if (!loop->contains(userInst->getParent())) { + onlyUsedInLoop = false; + break; + } + } + } + + if (onlyUsedInLoop) { + analysis->privatizableVariables.insert(inst.get()); + } + } + } + } +} + +void LoopVectorizationPass::analyzeSynchronizationNeeds(Loop* loop, ParallelizationAnalysis* analysis, + const std::vector& dependences) { + // 根据依赖类型确定同步需求 + for (const auto& dep : dependences) { + if (dep.type == DependenceType::OUTPUT_DEPENDENCE) { + analysis->requiresBarrier = true; + analysis->sharedVariables.insert(dep.memoryLocation); + } + } + + // 如果有归约,需要特殊的归约同步 + if (analysis->requiresReduction) { + analysis->requiresBarrier = true; + } +} + +bool LoopVectorizationPass::isIndependentMemoryAccess(Value* ptr1, Value* ptr2, Loop* loop) { + // 简化实现:基本的独立性检查 + if (ptr1 == ptr2) return false; + + // 如果是不同的基址,认为是独立的 + auto* gep1 = dynamic_cast(ptr1); + auto* gep2 = dynamic_cast(ptr2); + + if (gep1 && gep2) { + if (gep1->getBasePointer() != gep2->getBasePointer()) { + return true; // 不同的基址 + } + // 相同基址,需要更精细的分析(这里简化为不独立) + return false; + } + + return true; // 默认认为独立 +} + +double LoopVectorizationPass::estimateWorkComplexity(Loop* loop) { + double complexity = 0.0; + + for (BasicBlock* bb : loop->getBlocks()) { + for (auto& inst : bb->getInstructions()) { + // 基于指令类型分配复杂度权重 + if (auto* binInst = dynamic_cast(inst.get())) { + switch (binInst->getKind()) { + case Instruction::kAdd: + case Instruction::kSub: + complexity += 1.0; + break; + case Instruction::kMul: + complexity += 3.0; + break; + case Instruction::kDiv: + complexity += 10.0; + break; + default: + complexity += 2.0; + } + } else if (dynamic_cast(inst.get()) || dynamic_cast(inst.get())) { + complexity += 2.0; // 内存访问 + } else { + complexity += 1.0; // 其他指令 + } + } + } + + return complexity; +} + +bool LoopVectorizationPass::hasReductionPattern(Value* var, Loop* loop) { + // 简化实现:检查是否为简单的累加/累乘模式 + for (auto& use : var->getUses()) { + if (auto* binInst = dynamic_cast(use->getUser())) { + if (binInst->getKind() == Instruction::kAdd || binInst->getKind() == Instruction::kMul) { + // 检查是否为 var = var op something 的模式 + if (binInst->getLhs() == var || binInst->getRhs() == var) { + return true; + } + } + } + } + return false; +} + +} // namespace sysy