diff --git a/src/include/midend/Pass/Analysis/CallGraphAnalysis.h b/src/include/midend/Pass/Analysis/CallGraphAnalysis.h new file mode 100644 index 0000000..8b7ac8e --- /dev/null +++ b/src/include/midend/Pass/Analysis/CallGraphAnalysis.h @@ -0,0 +1,242 @@ +#pragma once + +#include "IR.h" +#include "Pass.h" +#include +#include +#include +#include +#include +#include + +namespace sysy { + +// 前向声明 +class CallGraphAnalysisResult; + +/** + * @brief 调用图节点信息 + * 存储单个函数在调用图中的信息 + */ +struct CallGraphNode { + Function* function; // 关联的函数 + std::set callers; // 调用此函数的函数集合 + std::set callees; // 此函数调用的函数集合 + + // 递归信息 + bool isRecursive; // 是否参与递归调用 + bool isSelfRecursive; // 是否自递归 + int recursiveDepth; // 递归深度(-1表示无限递归) + + // 调用统计 + size_t totalCallers; // 调用者总数 + size_t totalCallees; // 被调用函数总数 + size_t callSiteCount; // 调用点总数 + + CallGraphNode(Function* f) : function(f), isRecursive(false), + isSelfRecursive(false), recursiveDepth(0), totalCallers(0), + totalCallees(0), callSiteCount(0) {} +}; + +/** + * @brief 调用图分析结果类 + * 包含整个模块的调用图信息和查询接口 + */ +class CallGraphAnalysisResult : public AnalysisResultBase { +public: + CallGraphAnalysisResult(Module* M) : AssociatedModule(M) {} + ~CallGraphAnalysisResult() override = default; + + // ========== 基础查询接口 ========== + + /** + * 获取函数的调用图节点 + */ + const CallGraphNode* getNode(Function* F) const { + auto it = nodes.find(F); + return (it != nodes.end()) ? it->second.get() : nullptr; + } + + /** + * 获取函数的调用图节点(非const版本) + */ + CallGraphNode* getMutableNode(Function* F) { + auto it = nodes.find(F); + return (it != nodes.end()) ? it->second.get() : nullptr; + } + + /** + * 获取所有函数节点 + */ + const std::map>& getAllNodes() const { + return nodes; + } + + /** + * 检查函数是否存在于调用图中 + */ + bool hasFunction(Function* F) const { + return nodes.find(F) != nodes.end(); + } + + // ========== 调用关系查询 ========== + + /** + * 检查是否存在从caller到callee的调用 + */ + bool hasCallEdge(Function* caller, Function* callee) const { + auto node = getNode(caller); + return node && node->callees.count(callee) > 0; + } + + /** + * 获取函数的所有调用者 + */ + std::vector getCallers(Function* F) const { + auto node = getNode(F); + if (!node) return {}; + return std::vector(node->callers.begin(), node->callers.end()); + } + + /** + * 获取函数的所有被调用函数 + */ + std::vector getCallees(Function* F) const { + auto node = getNode(F); + if (!node) return {}; + return std::vector(node->callees.begin(), node->callees.end()); + } + + // ========== 递归分析查询 ========== + + /** + * 检查函数是否参与递归调用 + */ + bool isRecursive(Function* F) const { + auto node = getNode(F); + return node && node->isRecursive; + } + + /** + * 检查函数是否自递归 + */ + bool isSelfRecursive(Function* F) const { + auto node = getNode(F); + return node && node->isSelfRecursive; + } + + /** + * 获取递归深度 + */ + int getRecursiveDepth(Function* F) const { + auto node = getNode(F); + return node ? node->recursiveDepth : 0; + } + + // ========== 拓扑排序和SCC ========== + + /** + * 获取函数的拓扑排序结果 + * 保证被调用函数在调用函数之前 + */ + const std::vector& getTopologicalOrder() const { + return topologicalOrder; + } + + /** + * 获取强连通分量列表 + * 每个SCC表示一个递归函数群 + */ + const std::vector>& getStronglyConnectedComponents() const { + return sccs; + } + + /** + * 获取函数所在的SCC索引 + */ + int getSCCIndex(Function* F) const { + auto it = functionToSCC.find(F); + return (it != functionToSCC.end()) ? it->second : -1; + } + + // ========== 统计信息 ========== + + struct Statistics { + size_t totalFunctions; + size_t totalCallEdges; + size_t recursiveFunctions; + size_t selfRecursiveFunctions; + size_t stronglyConnectedComponents; + size_t maxSCCSize; + double avgCallersPerFunction; + double avgCalleesPerFunction; + }; + + Statistics getStatistics() const; + + /** + * 打印调用图分析结果 + */ + void print() const; + + // ========== 内部构建接口 ========== + + void addNode(Function* F); + void addCallEdge(Function* caller, Function* callee); + void computeTopologicalOrder(); + void computeStronglyConnectedComponents(); + void analyzeRecursion(); + +private: + Module* AssociatedModule; // 关联的模块 + std::map> nodes; // 调用图节点 + std::vector topologicalOrder; // 拓扑排序结果 + std::vector> sccs; // 强连通分量 + std::map functionToSCC; // 函数到SCC的映射 + + // 内部辅助方法 + void dfsTopological(Function* F, std::unordered_set& visited, + std::vector& result); + void tarjanSCC(); + void tarjanDFS(Function* F, int& index, std::vector& indices, + std::vector& lowlinks, std::vector& stack, + std::unordered_set& onStack); +}; + +/** + * @brief SysY调用图分析Pass + * Module级别的分析Pass,构建整个模块的函数调用图 + */ +class CallGraphAnalysisPass : public AnalysisPass { +public: + // 唯一的 Pass ID + static void* ID; + + CallGraphAnalysisPass() : AnalysisPass("CallGraphAnalysis", Pass::Granularity::Module) {} + + // 实现 getPassID + void* getPassID() const override { return &ID; } + + // 核心运行方法 + bool runOnModule(Module* M, AnalysisManager& AM) override; + + // 获取分析结果 + std::unique_ptr getResult() override { return std::move(CurrentResult); } + +private: + std::unique_ptr CurrentResult; // 当前模块的分析结果 + + // ========== 主要分析流程 ========== + + void buildCallGraph(Module* M); // 构建调用图 + void scanFunctionCalls(Function* F); // 扫描函数的调用 + void processCallInstruction(CallInst* call, Function* caller); // 处理调用指令 + + // ========== 辅助方法 ========== + + bool isLibraryFunction(Function* F) const; // 判断是否为标准库函数 + bool isIntrinsicFunction(Function* F) const; // 判断是否为内置函数 + void printStatistics() const; // 打印统计信息 +}; + +} // namespace sysy diff --git a/src/midend/Pass/Analysis/CallGraphAnalysis.cpp b/src/midend/Pass/Analysis/CallGraphAnalysis.cpp new file mode 100644 index 0000000..f0eab5b --- /dev/null +++ b/src/midend/Pass/Analysis/CallGraphAnalysis.cpp @@ -0,0 +1,417 @@ +#include "CallGraphAnalysis.h" +#include "SysYIRPrinter.h" +#include +#include +#include + +extern int DEBUG; + +namespace sysy { + +// 静态成员初始化 +void* CallGraphAnalysisPass::ID = (void*)&CallGraphAnalysisPass::ID; + +// ========== CallGraphAnalysisResult 实现 ========== + +CallGraphAnalysisResult::Statistics CallGraphAnalysisResult::getStatistics() const { + Statistics stats = {}; + stats.totalFunctions = nodes.size(); + + size_t totalCallEdges = 0; + size_t recursiveFunctions = 0; + size_t selfRecursiveFunctions = 0; + size_t totalCallers = 0; + size_t totalCallees = 0; + + for (const auto& pair : nodes) { + const auto& node = pair.second; + totalCallEdges += node->callees.size(); + totalCallers += node->callers.size(); + totalCallees += node->callees.size(); + + if (node->isRecursive) recursiveFunctions++; + if (node->isSelfRecursive) selfRecursiveFunctions++; + } + + stats.totalCallEdges = totalCallEdges; + stats.recursiveFunctions = recursiveFunctions; + stats.selfRecursiveFunctions = selfRecursiveFunctions; + stats.stronglyConnectedComponents = sccs.size(); + + // 计算最大SCC大小 + size_t maxSCCSize = 0; + for (const auto& scc : sccs) { + maxSCCSize = std::max(maxSCCSize, scc.size()); + } + stats.maxSCCSize = maxSCCSize; + + // 计算平均值 + if (stats.totalFunctions > 0) { + stats.avgCallersPerFunction = static_cast(totalCallers) / stats.totalFunctions; + stats.avgCalleesPerFunction = static_cast(totalCallees) / stats.totalFunctions; + } + + return stats; +} + +void CallGraphAnalysisResult::print() const { + std::cout << "---- Call Graph Analysis Results for Module ----\n"; + + // 打印基本统计信息 + auto stats = getStatistics(); + std::cout << " Statistics:\n"; + std::cout << " Total Functions: " << stats.totalFunctions << "\n"; + std::cout << " Total Call Edges: " << stats.totalCallEdges << "\n"; + std::cout << " Recursive Functions: " << stats.recursiveFunctions << "\n"; + std::cout << " Self-Recursive Functions: " << stats.selfRecursiveFunctions << "\n"; + std::cout << " Strongly Connected Components: " << stats.stronglyConnectedComponents << "\n"; + std::cout << " Max SCC Size: " << stats.maxSCCSize << "\n"; + std::cout << " Avg Callers per Function: " << stats.avgCallersPerFunction << "\n"; + std::cout << " Avg Callees per Function: " << stats.avgCalleesPerFunction << "\n"; + + // 打印拓扑排序结果 + std::cout << " Topological Order (" << topologicalOrder.size() << "):\n"; + for (size_t i = 0; i < topologicalOrder.size(); ++i) { + std::cout << " " << i << ": " << topologicalOrder[i]->getName() << "\n"; + } + + // 打印强连通分量 + if (!sccs.empty()) { + std::cout << " Strongly Connected Components:\n"; + for (size_t i = 0; i < sccs.size(); ++i) { + std::cout << " SCC " << i << " (size " << sccs[i].size() << "): "; + for (size_t j = 0; j < sccs[i].size(); ++j) { + if (j > 0) std::cout << ", "; + std::cout << sccs[i][j]->getName(); + } + std::cout << "\n"; + } + } + + // 打印每个函数的详细信息 + std::cout << " Function Details:\n"; + for (const auto& pair : nodes) { + const auto& node = pair.second; + std::cout << " Function: " << node->function->getName(); + + if (node->isRecursive) { + std::cout << " (Recursive"; + if (node->isSelfRecursive) std::cout << ", Self"; + if (node->recursiveDepth >= 0) std::cout << ", Depth=" << node->recursiveDepth; + std::cout << ")"; + } + std::cout << "\n"; + + if (!node->callers.empty()) { + std::cout << " Callers (" << node->callers.size() << "): "; + bool first = true; + for (Function* caller : node->callers) { + if (!first) std::cout << ", "; + std::cout << caller->getName(); + first = false; + } + std::cout << "\n"; + } + + if (!node->callees.empty()) { + std::cout << " Callees (" << node->callees.size() << "): "; + bool first = true; + for (Function* callee : node->callees) { + if (!first) std::cout << ", "; + std::cout << callee->getName(); + first = false; + } + std::cout << "\n"; + } + } + + std::cout << "--------------------------------------------------\n"; +} + +void CallGraphAnalysisResult::addNode(Function* F) { + if (nodes.find(F) == nodes.end()) { + nodes[F] = std::make_unique(F); + } +} + +void CallGraphAnalysisResult::addCallEdge(Function* caller, Function* callee) { + // 确保两个函数都有对应的节点 + addNode(caller); + addNode(callee); + + // 添加调用边 + nodes[caller]->callees.insert(callee); + nodes[callee]->callers.insert(caller); + + // 更新统计信息 + nodes[caller]->totalCallees = nodes[caller]->callees.size(); + nodes[callee]->totalCallers = nodes[callee]->callers.size(); + + // 检查自递归 + if (caller == callee) { + nodes[caller]->isSelfRecursive = true; + nodes[caller]->isRecursive = true; + } +} + +void CallGraphAnalysisResult::computeTopologicalOrder() { + topologicalOrder.clear(); + std::unordered_set visited; + + // 对每个未访问的函数进行DFS + for (const auto& pair : nodes) { + Function* F = pair.first; + if (visited.find(F) == visited.end()) { + dfsTopological(F, visited, topologicalOrder); + } + } + + // 反转结果(因为我们在后序遍历中添加) + std::reverse(topologicalOrder.begin(), topologicalOrder.end()); +} + +void CallGraphAnalysisResult::dfsTopological(Function* F, std::unordered_set& visited, + std::vector& result) { + visited.insert(F); + + auto node = getNode(F); + if (node) { + // 先访问所有被调用的函数 + for (Function* callee : node->callees) { + if (visited.find(callee) == visited.end()) { + dfsTopological(callee, visited, result); + } + } + } + + // 后序遍历:访问完所有子节点后添加当前节点 + result.push_back(F); +} + +void CallGraphAnalysisResult::computeStronglyConnectedComponents() { + tarjanSCC(); + + // 为每个函数设置其所属的SCC + functionToSCC.clear(); + for (size_t i = 0; i < sccs.size(); ++i) { + for (Function* F : sccs[i]) { + functionToSCC[F] = static_cast(i); + } + } +} + +void CallGraphAnalysisResult::tarjanSCC() { + sccs.clear(); + + std::vector indices(nodes.size(), -1); + std::vector lowlinks(nodes.size(), -1); + std::vector stack; + std::unordered_set onStack; + int index = 0; + + // 为函数分配索引 + std::map functionIndex; + int idx = 0; + for (const auto& pair : nodes) { + functionIndex[pair.first] = idx++; + } + + // 对每个未访问的函数运行Tarjan算法 + for (const auto& pair : nodes) { + Function* F = pair.first; + int fIdx = functionIndex[F]; + if (indices[fIdx] == -1) { + tarjanDFS(F, index, indices, lowlinks, stack, onStack); + } + } +} + +void CallGraphAnalysisResult::tarjanDFS(Function* F, int& index, std::vector& indices, + std::vector& lowlinks, std::vector& stack, + std::unordered_set& onStack) { + // 这里需要函数到索引的映射,简化实现 + // 在实际实现中应该维护一个全局的函数索引映射 + static std::map functionIndex; + static int nextIndex = 0; + + if (functionIndex.find(F) == functionIndex.end()) { + functionIndex[F] = nextIndex++; + } + + int fIdx = functionIndex[F]; + + // 确保向量足够大 + if (fIdx >= static_cast(indices.size())) { + indices.resize(fIdx + 1, -1); + lowlinks.resize(fIdx + 1, -1); + } + + indices[fIdx] = index; + lowlinks[fIdx] = index; + index++; + + stack.push_back(F); + onStack.insert(F); + + auto node = getNode(F); + if (node) { + for (Function* callee : node->callees) { + int calleeIdx = functionIndex[callee]; + + // 确保向量足够大 + if (calleeIdx >= static_cast(indices.size())) { + indices.resize(calleeIdx + 1, -1); + lowlinks.resize(calleeIdx + 1, -1); + } + + if (indices[calleeIdx] == -1) { + // 递归访问 + tarjanDFS(callee, index, indices, lowlinks, stack, onStack); + lowlinks[fIdx] = std::min(lowlinks[fIdx], lowlinks[calleeIdx]); + } else if (onStack.find(callee) != onStack.end()) { + // 后向边 + lowlinks[fIdx] = std::min(lowlinks[fIdx], indices[calleeIdx]); + } + } + } + + // 如果F是SCC的根 + if (lowlinks[fIdx] == indices[fIdx]) { + std::vector scc; + Function* w; + do { + w = stack.back(); + stack.pop_back(); + onStack.erase(w); + scc.push_back(w); + } while (w != F); + + sccs.push_back(std::move(scc)); + } +} + +void CallGraphAnalysisResult::analyzeRecursion() { + // 基于SCC分析递归 + for (const auto& scc : sccs) { + if (scc.size() > 1) { + // 多函数的SCC,标记为相互递归 + for (Function* F : scc) { + auto* node = getMutableNode(F); + if (node) { + node->isRecursive = true; + node->recursiveDepth = -1; // 相互递归,深度未定义 + } + } + } else if (scc.size() == 1) { + // 单函数SCC,检查是否自递归 + Function* F = scc[0]; + auto* node = getMutableNode(F); + if (node && node->callees.count(F) > 0) { + node->isSelfRecursive = true; + node->isRecursive = true; + node->recursiveDepth = -1; // 简化:不计算递归深度 + } + } + } +} + +// ========== CallGraphAnalysisPass 实现 ========== + +bool CallGraphAnalysisPass::runOnModule(Module* M, AnalysisManager& AM) { + if (DEBUG) { + std::cout << "Running Call Graph Analysis on module\n"; + } + + // 创建分析结果 + CurrentResult = std::make_unique(M); + + // 执行主要分析步骤 + buildCallGraph(M); + CurrentResult->computeTopologicalOrder(); + CurrentResult->computeStronglyConnectedComponents(); + CurrentResult->analyzeRecursion(); + + if (DEBUG) { + CurrentResult->print(); + } + + return false; // 分析遍不修改IR +} + +void CallGraphAnalysisPass::buildCallGraph(Module* M) { + // 1. 为所有函数创建节点(包括声明但未定义的函数) + for (auto& pair : M->getFunctions()) { + Function* F = pair.second.get(); + if (!isLibraryFunction(F) && !isIntrinsicFunction(F)) { + CurrentResult->addNode(F); + } + } + + // 2. 扫描所有函数的调用关系 + for (auto& pair : M->getFunctions()) { + Function* F = pair.second.get(); + if (!isLibraryFunction(F) && !isIntrinsicFunction(F)) { + scanFunctionCalls(F); + } + } +} + +void CallGraphAnalysisPass::scanFunctionCalls(Function* F) { + // 遍历函数中的所有基本块和指令 + for (auto& BB : F->getBasicBlocks_NoRange()) { + for (auto& I : BB->getInstructions()) { + if (CallInst* call = dynamic_cast(I.get())) { + processCallInstruction(call, F); + } + } + } +} + +void CallGraphAnalysisPass::processCallInstruction(CallInst* call, Function* caller) { + Function* callee = call->getCallee(); + + if (!callee) { + // 间接调用,无法静态确定目标函数 + return; + } + + if (isLibraryFunction(callee) || isIntrinsicFunction(callee)) { + // 跳过标准库函数和内置函数 + return; + } + + // 添加调用边 + CurrentResult->addCallEdge(caller, callee); + + // 更新调用点统计 + auto* node = CurrentResult->getMutableNode(caller); + if (node) { + node->callSiteCount++; + } +} + +bool CallGraphAnalysisPass::isLibraryFunction(Function* F) const { + std::string name = F->getName(); + + // SysY标准库函数 + return name == "getint" || name == "getch" || name == "getfloat" || + name == "getarray" || name == "getfarray" || + name == "putint" || name == "putch" || name == "putfloat" || + name == "putarray" || name == "putfarray" || + name == "_sysy_starttime" || name == "_sysy_stoptime"; +} + +bool CallGraphAnalysisPass::isIntrinsicFunction(Function* F) const { + std::string name = F->getName(); + + // 编译器内置函数(后续可以增加某些内置函数) + return name.substr(0, 5) == "llvm." || name.substr(0, 5) == "sysy."; +} + +void CallGraphAnalysisPass::printStatistics() const { + if (CurrentResult) { + CurrentResult->print(); + } +} + +} // namespace sysy