530 lines
18 KiB
C++
530 lines
18 KiB
C++
#include "SysYIRAnalyser.h"
|
||
#include <iostream>
|
||
|
||
|
||
namespace sysy {
|
||
|
||
|
||
void ControlFlowAnalysis::init() {
|
||
// 初始化分析器
|
||
auto &functions = pModule->getFunctions();
|
||
for (const auto &function : functions) {
|
||
auto func = function.second.get();
|
||
auto basicBlocks = func->getBasicBlocks();
|
||
for (auto &basicBlock : basicBlocks) {
|
||
blockAnalysisInfo[basicBlock.get()] = new BlockAnalysisInfo();
|
||
blockAnalysisInfo[basicBlock.get()]->clear();
|
||
}
|
||
functionAnalysisInfo[func] = new FunctionAnalysisInfo();
|
||
functionAnalysisInfo[func]->clear();
|
||
}
|
||
}
|
||
|
||
void ControlFlowAnalysis::runControlFlowAnalysis() {
|
||
// 运行控制流分析
|
||
clear(); // 清空之前的分析结果
|
||
init(); // 初始化分析器
|
||
computeDomNode();
|
||
computeDomTree();
|
||
computeDomFrontierAllBlk();
|
||
}
|
||
|
||
void ControlFlowAnalysis::intersectOP4Dom(std::unordered_set<BasicBlock *> &dom, const std::unordered_set<BasicBlock *> &other) {
|
||
// 计算交集
|
||
for (auto it = dom.begin(); it != dom.end();) {
|
||
if (other.find(*it) == other.end()) {
|
||
// 如果other中没有这个基本块,则从dom中删除
|
||
it = dom.erase(it);
|
||
} else {
|
||
++it;
|
||
}
|
||
}
|
||
}
|
||
|
||
auto ControlFlowAnalysis::findCommonDominator(BasicBlock *a, BasicBlock *b) -> BasicBlock * {
|
||
// 查找两个基本块的共同支配结点
|
||
while (a != b) {
|
||
BlockAnalysisInfo* infoA = blockAnalysisInfo[a];
|
||
BlockAnalysisInfo* infoB = blockAnalysisInfo[b];
|
||
// 如果深度不同,则向上移动到直接支配结点
|
||
// TODO:空间换时间倍增优化,优先级较低
|
||
while (infoA->getDomDepth() > infoB->getDomDepth()) {
|
||
a = const_cast<BasicBlock*>(infoA->getIdom());
|
||
infoA = blockAnalysisInfo[a];
|
||
}
|
||
while (infoB->getDomDepth() > infoA->getDomDepth()) {
|
||
b = const_cast<BasicBlock*>(infoB->getIdom());
|
||
infoB = blockAnalysisInfo[b];
|
||
}
|
||
if (a == b) break;
|
||
a = const_cast<BasicBlock*>(infoA->getIdom());
|
||
b = const_cast<BasicBlock*>(infoB->getIdom());
|
||
}
|
||
return a;
|
||
}
|
||
|
||
void ControlFlowAnalysis::computeDomNode(){
|
||
auto &functions = pModule->getFunctions();
|
||
// 分析每个函数内的基本块
|
||
for (const auto &function : functions) {
|
||
auto func = function.second.get();
|
||
auto basicBlocks = func->getBasicBlocks();
|
||
std::unordered_set<BasicBlock *> domSetTmp;
|
||
// 一开始把domSetTmp置为所有block
|
||
auto entry_block = func->getEntryBlock();
|
||
entry_block->setName("Entry");
|
||
blockAnalysisInfo[entry_block]->addDominants(entry_block);
|
||
for (auto &basicBlock : basicBlocks) {
|
||
domSetTmp.emplace(basicBlock.get());
|
||
}
|
||
// 初始化
|
||
for (auto &basicBlock : basicBlocks) {
|
||
if (basicBlock.get() != entry_block) {
|
||
blockAnalysisInfo[basicBlock.get()]->setDominants(domSetTmp);
|
||
// 先把所有block的必经结点都设为N
|
||
}
|
||
}
|
||
|
||
// 支配节点计算公式
|
||
//DOM[B]={B}∪ {⋂P∈pred(B) DOM[P]}
|
||
// 其中pred(B)是B的所有前驱结点
|
||
// 迭代计算支配结点,直到不再变化
|
||
// 这里使用迭代法,直到支配结点不再变化
|
||
// TODO:Lengauer-Tarjan 算法可以更高效地计算支配结点
|
||
// 或者按照CFG拓扑序遍历效率更高
|
||
bool changed = true;
|
||
while (changed) {
|
||
changed = false;
|
||
// 循环非start结点
|
||
for (auto &basicBlock : basicBlocks) {
|
||
if (basicBlock.get() != entry_block) {
|
||
auto olddom =
|
||
blockAnalysisInfo[basicBlock.get()]->getDominants();
|
||
|
||
std::unordered_set<BasicBlock *> dom =
|
||
blockAnalysisInfo[basicBlock->getPredecessors().front()]->getDominants();
|
||
|
||
// 对于每个基本块,计算其支配结点
|
||
// 取其前驱结点的支配结点的交集和自己
|
||
for (auto pred : basicBlock->getPredecessors()) {
|
||
intersectOP4Dom(dom, blockAnalysisInfo[pred]->getDominants());
|
||
}
|
||
dom.emplace(basicBlock.get());
|
||
blockAnalysisInfo[basicBlock.get()]->setDominants(dom);
|
||
|
||
if (dom != olddom) {
|
||
changed = true;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// TODO: SEMI-NCA算法改进
|
||
void ControlFlowAnalysis::computeDomTree() {
|
||
// 构造支配树
|
||
auto &functions = pModule->getFunctions();
|
||
for (const auto &function : functions) {
|
||
auto func = function.second.get();
|
||
auto basicBlocks = func->getBasicBlocks();
|
||
auto entry_block = func->getEntryBlock();
|
||
|
||
blockAnalysisInfo[entry_block]->setIdom(entry_block);
|
||
blockAnalysisInfo[entry_block]->setDomDepth(0); // 入口块深度为0
|
||
|
||
bool changed = true;
|
||
while (changed) {
|
||
changed = false;
|
||
|
||
for (auto &basicBlock : basicBlocks) {
|
||
if (basicBlock.get() == entry_block) continue;
|
||
|
||
BasicBlock *new_idom = nullptr;
|
||
for (auto pred : basicBlock->getPredecessors()) {
|
||
// 跳过未处理的前驱
|
||
if (blockAnalysisInfo[pred]->getIdom() == nullptr) continue;
|
||
// new_idom = (new_idom == nullptr) ? pred : findCommonDominator(new_idom, pred);
|
||
if (new_idom == nullptr)
|
||
new_idom = pred;
|
||
else
|
||
new_idom = findCommonDominator(new_idom, pred);
|
||
}
|
||
// 更新直接支配节点
|
||
if (new_idom && new_idom != blockAnalysisInfo[basicBlock.get()]->getIdom()) {
|
||
// 移除旧的支配关系
|
||
if (blockAnalysisInfo[basicBlock.get()]->getIdom()) {
|
||
blockAnalysisInfo[const_cast<BasicBlock*>(blockAnalysisInfo[basicBlock.get()]->getIdom())]->removeSdoms(basicBlock.get());
|
||
}
|
||
// 设置新的支配关系
|
||
|
||
// std::cout << "Block: " << basicBlock->getName()
|
||
// << " New Idom: " << new_idom->getName() << std::endl;
|
||
|
||
blockAnalysisInfo[basicBlock.get()]->setIdom(new_idom);
|
||
blockAnalysisInfo[new_idom]->addSdoms(basicBlock.get());
|
||
// 更新深度 = 直接支配节点深度 + 1
|
||
blockAnalysisInfo[basicBlock.get()]->setDomDepth(
|
||
blockAnalysisInfo[new_idom]->getDomDepth() + 1);
|
||
|
||
changed = true;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
// for (auto &basicBlock : basicBlocks) {
|
||
// if (basicBlock.get() != func->getEntryBlock()) {
|
||
// auto dominats =
|
||
// blockAnalysisInfo[basicBlock.get()]->getDominants();
|
||
// bool found = false;
|
||
// // 从前驱结点开始寻找直接支配结点
|
||
// std::queue<BasicBlock *> q;
|
||
// for (auto pred : basicBlock->getPredecessors()) {
|
||
// q.push(pred);
|
||
// }
|
||
// // BFS遍历前驱结点,直到找到直接支配结点
|
||
// while (!found && !q.empty()) {
|
||
// auto curr = q.front();
|
||
// q.pop();
|
||
// if (curr == basicBlock.get())
|
||
// continue;
|
||
// if (dominats.count(curr) != 0U) {
|
||
// blockAnalysisInfo[basicBlock.get()]->setIdom(curr);
|
||
// blockAnalysisInfo[curr]->addSdoms(basicBlock.get());
|
||
// found = true;
|
||
// } else {
|
||
// for (auto pred : curr->getPredecessors()) {
|
||
// q.push(pred);
|
||
// }
|
||
// }
|
||
// }
|
||
// }
|
||
// }
|
||
}
|
||
|
||
// std::unordered_set<BasicBlock *> ControlFlowAnalysis::computeDomFrontier(BasicBlock *block) {
|
||
// std::unordered_set<BasicBlock *> ret_list;
|
||
// // 计算 localDF
|
||
// for (auto local_successor : block->getSuccessors()) {
|
||
// if (local_successor->getIdom() != block) {
|
||
// ret_list.emplace(local_successor);
|
||
// }
|
||
// }
|
||
// // 计算 upDF
|
||
// for (auto up_successor : block->getSdoms()) {
|
||
// auto childrenDF = computeDF(up_successor);
|
||
// for (auto w : childrenDF) {
|
||
// if (block != w->getIdom() || block == w) {
|
||
// ret_list.emplace(w);
|
||
// }
|
||
// }
|
||
// }
|
||
|
||
// return ret_list;
|
||
// }
|
||
|
||
void ControlFlowAnalysis::computeDomFrontierAllBlk() {
|
||
auto &functions = pModule->getFunctions();
|
||
for (const auto &function : functions) {
|
||
auto func = function.second.get();
|
||
auto basicBlocks = func->getBasicBlocks();
|
||
|
||
// 按支配树深度排序(从深到浅)
|
||
std::vector<BasicBlock *> orderedBlocks;
|
||
for (auto &bb : basicBlocks) {
|
||
orderedBlocks.push_back(bb.get());
|
||
}
|
||
std::sort(orderedBlocks.begin(), orderedBlocks.end(),
|
||
[this](BasicBlock *a, BasicBlock *b) {
|
||
return blockAnalysisInfo[a]->getDomDepth() > blockAnalysisInfo[b]->getDomDepth();
|
||
});
|
||
|
||
// 计算支配边界
|
||
for (auto block : orderedBlocks) {
|
||
std::unordered_set<BasicBlock *> df;
|
||
|
||
// Local DF: 直接后继中不被当前块支配的
|
||
for (auto succ : block->getSuccessors()) {
|
||
// 当前块不支配该后继(即不是其直接支配节点)
|
||
if (blockAnalysisInfo[succ]->getIdom() != block) {
|
||
df.insert(succ);
|
||
}
|
||
}
|
||
|
||
// Up DF: 从支配子树中继承
|
||
for (auto child : blockAnalysisInfo[block]->getSdoms()) {
|
||
for (auto w : blockAnalysisInfo[child]->getDomFrontiers()) {
|
||
// 如果w不被当前块支配
|
||
if (block != blockAnalysisInfo[w]->getIdom()) {
|
||
df.insert(w);
|
||
}
|
||
}
|
||
}
|
||
|
||
blockAnalysisInfo[block]->setDomFrontiers(df);
|
||
}
|
||
}
|
||
}
|
||
|
||
// ==========================
|
||
// dataflow analysis utils
|
||
// ==========================
|
||
|
||
// 先引用学长的代码
|
||
// TODO: Worklist 增加逆后序遍历机制
|
||
void DataFlowAnalysisUtils::forwardAnalyze(Module *pModule){
|
||
std::map<DataFlowAnalysis *, bool> workAnalysis;
|
||
for (auto &dataflow : forwardAnalysisList) {
|
||
dataflow->init(pModule);
|
||
}
|
||
|
||
for (const auto &function : pModule->getFunctions()) {
|
||
for (auto &dataflow : forwardAnalysisList) {
|
||
workAnalysis.emplace(dataflow, false);
|
||
}
|
||
while (!workAnalysis.empty()) {
|
||
for (const auto &block : function.second->getBasicBlocks()) {
|
||
for (auto &elem : workAnalysis) {
|
||
if (elem.first->analyze(pModule, block.get())) {
|
||
elem.second = true;
|
||
}
|
||
}
|
||
}
|
||
std::map<DataFlowAnalysis *, bool> tmp;
|
||
std::remove_copy_if(workAnalysis.begin(), workAnalysis.end(), std::inserter(tmp, tmp.end()),
|
||
[](const std::pair<DataFlowAnalysis *, bool> &elem) -> bool { return !elem.second; });
|
||
workAnalysis.swap(tmp);
|
||
|
||
for (auto &elem : workAnalysis) {
|
||
elem.second = false;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
void DataFlowAnalysisUtils::backwardAnalyze(Module *pModule) {
|
||
std::map<DataFlowAnalysis *, bool> workAnalysis;
|
||
for (auto &dataflow : backwardAnalysisList) {
|
||
dataflow->init(pModule);
|
||
}
|
||
|
||
for (const auto &function : pModule->getFunctions()) {
|
||
for (auto &dataflow : backwardAnalysisList) {
|
||
workAnalysis.emplace(dataflow, false);
|
||
}
|
||
while (!workAnalysis.empty()) {
|
||
for (const auto &block : function.second->getBasicBlocks()) {
|
||
for (auto &elem : workAnalysis) {
|
||
if (elem.first->analyze(pModule, block.get())) {
|
||
elem.second = true;
|
||
}
|
||
}
|
||
}
|
||
std::map<DataFlowAnalysis *, bool> tmp;
|
||
std::remove_copy_if(workAnalysis.begin(), workAnalysis.end(), std::inserter(tmp, tmp.end()),
|
||
[](const std::pair<DataFlowAnalysis *, bool> &elem) -> bool { return !elem.second; });
|
||
workAnalysis.swap(tmp);
|
||
|
||
for (auto &elem : workAnalysis) {
|
||
elem.second = false;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
std::set<User *> ActiveVarAnalysis::getUsedSet(Instruction *inst) {
|
||
using Kind = Instruction::Kind;
|
||
std::vector<User *> operands;
|
||
for (const auto &operand : inst->getOperands()) {
|
||
operands.emplace_back(dynamic_cast<User *>(operand->getValue()));
|
||
}
|
||
std::set<User *> result;
|
||
switch (inst->getKind()) {
|
||
// phi op
|
||
case Kind::kPhi:
|
||
case Kind::kCall:
|
||
result.insert(std::next(operands.begin()), operands.end());
|
||
break;
|
||
case Kind::kCondBr:
|
||
result.insert(operands[0]);
|
||
break;
|
||
case Kind::kBr:
|
||
case Kind::kAlloca:
|
||
break;
|
||
// mem op
|
||
case Kind::kStore:
|
||
// StoreInst 的第一个操作数是被存储的值,第二个操作数是存储的变量
|
||
// 后续的是可能的数组维度
|
||
result.insert(operands[0]);
|
||
result.insert(operands.begin() + 2, operands.end());
|
||
break;
|
||
case Kind::kLoad:
|
||
case Kind::kLa: {
|
||
auto variable = dynamic_cast<AllocaInst *>(operands[0]);
|
||
auto global = dynamic_cast<GlobalValue *>(operands[0]);
|
||
auto constArray = dynamic_cast<ConstantVariable *>(operands[0]);
|
||
if ((variable != nullptr && variable->getNumDims() == 0) || (global != nullptr && global->getNumDims() == 0) ||
|
||
(constArray != nullptr && constArray->getNumDims() == 0)) {
|
||
result.insert(operands[0]);
|
||
}
|
||
result.insert(std::next(operands.begin()), operands.end());
|
||
break;
|
||
}
|
||
case Kind::kGetSubArray: {
|
||
for (unsigned i = 2; i < operands.size(); i++) {
|
||
// 数组的维度信息
|
||
result.insert(operands[i]);
|
||
}
|
||
break;
|
||
}
|
||
case Kind::kMemset: {
|
||
result.insert(std::next(operands.begin()), operands.end());
|
||
break;
|
||
}
|
||
case Kind::kInvalid:
|
||
// Binary
|
||
case Kind::kAdd:
|
||
case Kind::kSub:
|
||
case Kind::kMul:
|
||
case Kind::kDiv:
|
||
case Kind::kRem:
|
||
case Kind::kICmpEQ:
|
||
case Kind::kICmpNE:
|
||
case Kind::kICmpLT:
|
||
case Kind::kICmpLE:
|
||
case Kind::kICmpGT:
|
||
case Kind::kICmpGE:
|
||
case Kind::kFAdd:
|
||
case Kind::kFSub:
|
||
case Kind::kFMul:
|
||
case Kind::kFDiv:
|
||
case Kind::kFCmpEQ:
|
||
case Kind::kFCmpNE:
|
||
case Kind::kFCmpLT:
|
||
case Kind::kFCmpLE:
|
||
case Kind::kFCmpGT:
|
||
case Kind::kFCmpGE:
|
||
case Kind::kAnd:
|
||
case Kind::kOr:
|
||
// Unary
|
||
case Kind::kNeg:
|
||
case Kind::kNot:
|
||
case Kind::kFNot:
|
||
case Kind::kFNeg:
|
||
case Kind::kFtoI:
|
||
case Kind::kItoF:
|
||
// terminator
|
||
case Kind::kReturn:
|
||
result.insert(operands.begin(), operands.end());
|
||
break;
|
||
default:
|
||
assert(false);
|
||
break;
|
||
}
|
||
result.erase(nullptr);
|
||
return result;
|
||
}
|
||
|
||
User * ActiveVarAnalysis::getDefine(Instruction *inst) {
|
||
User *result = nullptr;
|
||
if (inst->isStore()) {
|
||
StoreInst* store = dynamic_cast<StoreInst *>(inst);
|
||
auto operand = store->getPointer();
|
||
AllocaInst* variable = dynamic_cast<AllocaInst *>(operand);
|
||
GlobalValue* global = dynamic_cast<GlobalValue *>(operand);
|
||
if ((variable != nullptr && variable->getNumDims() != 0) || (global != nullptr && global->getNumDims() != 0)) {
|
||
// 如果是数组变量或者全局变量,则不返回定义
|
||
// TODO:兼容数组变量
|
||
result = nullptr;
|
||
} else {
|
||
result = dynamic_cast<User *>(operand);
|
||
}
|
||
} else if (inst->isPhi()) {
|
||
result = dynamic_cast<User *>(inst->getOperand(0));
|
||
} else if (inst->isBinary() || inst->isUnary() || inst->isCall() ||
|
||
inst->isLoad() || inst->isLa()) {
|
||
result = dynamic_cast<User *>(inst);
|
||
}
|
||
return result;
|
||
}
|
||
|
||
void ActiveVarAnalysis::init(Module *pModule) {
|
||
for (const auto &function : pModule->getFunctions()) {
|
||
for (const auto &block : function.second->getBasicBlocks()) {
|
||
activeTable.emplace(block.get(), std::vector<std::set<User *>>{});
|
||
for (unsigned i = 0; i < block->getNumInstructions() + 1; i++)
|
||
activeTable.at(block.get()).emplace_back();
|
||
}
|
||
}
|
||
}
|
||
|
||
// 活跃变量分析公式 每个块内的分析动作供分析器调用
|
||
bool ActiveVarAnalysis::analyze(Module *pModule, BasicBlock *block) {
|
||
bool changed = false; // 标记数据流结果是否有变化
|
||
std::set<User *> activeSet{}; // 当前计算的活跃变量集合
|
||
|
||
// 步骤1: 计算基本块出口的活跃变量集 (OUT[B])
|
||
// 公式: OUT[B] = ∪_{S ∈ succ(B)} IN[S]
|
||
for (const auto &succ : block->getSuccessors()) {
|
||
// 获取后继块入口的活跃变量集 (IN[S])
|
||
auto succActiveSet = activeTable.at(succ).front();
|
||
// 合并所有后继块的入口活跃变量
|
||
activeSet.insert(succActiveSet.begin(), succActiveSet.end());
|
||
}
|
||
|
||
// 步骤2: 处理基本块出口处的活跃变量集
|
||
const auto &instructions = block->getInstructions();
|
||
const auto numInstructions = instructions.size();
|
||
|
||
// 获取旧的出口活跃变量集 (block出口对应索引numInstructions)
|
||
const auto &oldEndActiveSet = activeTable.at(block)[numInstructions];
|
||
|
||
// 检查出口活跃变量集是否有变化
|
||
if (!std::equal(activeSet.begin(), activeSet.end(),
|
||
oldEndActiveSet.begin(), oldEndActiveSet.end()))
|
||
{
|
||
changed = true; // 标记变化
|
||
activeTable.at(block)[numInstructions] = activeSet; // 更新出口活跃变量集
|
||
}
|
||
|
||
// 步骤3: 逆序遍历基本块中的指令
|
||
// 从最后一条指令开始向前计算每个程序点的活跃变量
|
||
auto instructionIter = instructions.end();
|
||
instructionIter--; // 指向最后一条指令
|
||
|
||
// 从出口向入口遍历 (索引从numInstructions递减到1)
|
||
for (unsigned i = numInstructions; i > 0; i--) {
|
||
auto inst = instructionIter->get(); // 当前指令
|
||
|
||
auto used = getUsedSet(inst);
|
||
User *defined = getDefine(inst);
|
||
|
||
// 步骤3.3: 计算指令入口的活跃变量 (IN[i])
|
||
// 公式: IN[i] = use_i ∪ (OUT[i] - def_i)
|
||
activeSet.erase(defined); // 移除被定义的变量 (OUT[i] - def_i)
|
||
activeSet.insert(used.begin(), used.end()); // 添加使用的变量
|
||
|
||
// 获取旧的入口活跃变量集 (位置i-1对应当前指令的入口)
|
||
const auto &oldActiveSet = activeTable.at(block)[i - 1];
|
||
|
||
// 检查活跃变量集是否有变化
|
||
if (!std::equal(activeSet.begin(), activeSet.end(),
|
||
oldActiveSet.begin(), oldActiveSet.end()))
|
||
{
|
||
changed = true; // 标记变化
|
||
activeTable.at(block)[i - 1] = activeSet; // 更新入口活跃变量集
|
||
}
|
||
|
||
instructionIter--; // 移动到前一条指令
|
||
}
|
||
|
||
return changed; // 返回数据流结果是否变化
|
||
}
|
||
|
||
|
||
|
||
|
||
} // namespace sysy
|
||
|