From 8ca64610ebea02af370959aa58328daf8ba36a48 Mon Sep 17 00:00:00 2001 From: rain2133 <1370973498@qq.com> Date: Sun, 17 Aug 2025 16:33:15 +0800 Subject: [PATCH] =?UTF-8?q?[midend-GVN]=E9=87=8D=E6=9E=84GVN=E7=9A=84?= =?UTF-8?q?=E5=80=BC=E7=BC=96=E5=8F=B7=E7=B3=BB=E7=BB=9F?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/include/midend/Pass/Optimize/GVN.h | 43 +- src/midend/Pass/Optimize/GVN.cpp | 821 ++++++++----------------- 2 files changed, 291 insertions(+), 573 deletions(-) diff --git a/src/include/midend/Pass/Optimize/GVN.h b/src/include/midend/Pass/Optimize/GVN.h index 2aafd8d..552b82e 100644 --- a/src/include/midend/Pass/Optimize/GVN.h +++ b/src/include/midend/Pass/Optimize/GVN.h @@ -19,8 +19,11 @@ public: void run(Function* func, AnalysisManager* AM, bool& changed); private: - // 值编号的哈希表:Value -> 代表值 - std::unordered_map hashtable; + // 新的值编号系统 + std::unordered_map valueToNumber; // Value -> 值编号 + std::unordered_map numberToValue; // 值编号 -> 代表值 + std::unordered_map expressionToNumber; // 表达式 -> 值编号 + unsigned nextValueNumber = 1; // 已访问的基本块集合 std::unordered_set visited; @@ -39,31 +42,27 @@ private: void computeRPO(Function* func); void dfs(BasicBlock* bb); - // 检查哈希表并获取值编号 - Value* checkHashtable(Value* value); + // 新的值编号方法 + unsigned getValueNumber(Value* value); + unsigned assignValueNumber(Value* value); - // 为不同类型的指令获取值编号 - Value* getValueNumber(Instruction* inst); - Value* getValueNumber(BinaryInst* inst); - Value* getValueNumber(UnaryInst* inst); - Value* getValueNumber(GetElementPtrInst* inst); - Value* getValueNumber(LoadInst* inst); - Value* getValueNumber(CallInst* inst); + // 基本块处理 + void processBasicBlock(BasicBlock* bb, bool& changed); - // 访问指令并进行GVN优化 - void visitInstruction(Instruction* inst); + // 指令处理 + bool processInstruction(Instruction* inst); - // 检查是否可以安全地用一个值替换另一个值 - bool canReplace(Instruction* original, Value* replacement); + // 表达式构建和查找 + std::string buildExpressionKey(Instruction* inst); + Value* findExistingValue(const std::string& exprKey, Instruction* inst); - // 检查两个load指令之间是否有store指令修改了相同的内存位置 - bool hasInterveningStore(LoadInst* earlierLoad, LoadInst* laterLoad, Value* ptr); + // 支配关系和安全性检查 + bool dominates(Instruction* a, Instruction* b); + bool isMemorySafe(LoadInst* earlierLoad, LoadInst* laterLoad); - // 使受store指令影响的load指令失效 - void invalidateLoadsAffectedByStore(StoreInst* storeInst); - - // 生成表达式的标准化字符串 - std::string getCanonicalExpression(Instruction* inst); + // 清理方法 + void eliminateRedundantInstructions(bool& changed); + void invalidateMemoryValues(StoreInst* store); }; // GVN优化遍类 diff --git a/src/midend/Pass/Optimize/GVN.cpp b/src/midend/Pass/Optimize/GVN.cpp index a2f1c57..09b67a1 100644 --- a/src/midend/Pass/Optimize/GVN.cpp +++ b/src/midend/Pass/Optimize/GVN.cpp @@ -4,6 +4,8 @@ #include #include #include +#include +#include extern int DEBUG; @@ -62,9 +64,33 @@ void GVN::getAnalysisUsage(std::set &analysisDependencies, std::set operands; + Type* resultType; + + bool operator==(const ExpressionKey& other) const { + return type == other.type && opcode == other.opcode && + operands == other.operands && resultType == other.resultType; + } +}; + +struct ExpressionKeyHash { + size_t operator()(const ExpressionKey& key) const { + size_t hash = std::hash()(static_cast(key.type)) ^ + std::hash()(key.opcode); + for (auto op : key.operands) { + hash ^= std::hash()(op) + 0x9e3779b9 + (hash << 6) + (hash >> 2); + } + return hash; + } +}; + void GVNContext::run(Function *func, AnalysisManager *AM, bool &changed) { if (DEBUG) { std::cout << " Starting GVN analysis for function: " << func->getName() << std::endl; @@ -90,7 +116,10 @@ void GVNContext::run(Function *func, AnalysisManager *AM, bool &changed) { } // 清空状态 - hashtable.clear(); + valueToNumber.clear(); + numberToValue.clear(); + expressionToNumber.clear(); + nextValueNumber = 1; visited.clear(); rpoBlocks.clear(); needRemove.clear(); @@ -110,14 +139,7 @@ void GVNContext::run(Function *func, AnalysisManager *AM, bool &changed) { << ": " << bb->getName() << std::endl; } - int instCount = 0; - for (auto &instPtr : bb->getInstructions()) { - if (DEBUG) { - std::cout << " Processing instruction " << ++instCount - << ": " << instPtr->getName() << std::endl; - } - visitInstruction(instPtr.get()); - } + processBasicBlock(bb, changed); } if (DEBUG) { @@ -125,24 +147,11 @@ void GVNContext::run(Function *func, AnalysisManager *AM, bool &changed) { } // 删除冗余指令 - int removeCount = 0; - for (auto inst : needRemove) { - auto bb = inst->getParent(); - if (DEBUG) { - std::cout << " Removing redundant instruction " << ++removeCount - << "/" << needRemove.size() << ": " << inst->getName() << std::endl; - } - // 删除指令前先断开所有使用关系 - inst->replaceAllUsesWith(nullptr); - // 使用基本块的删除方法 - // bb->removeInst(inst); - SysYIROptUtils::usedelete(inst); - changed = true; - } + eliminateRedundantInstructions(changed); if (DEBUG) { std::cout << " GVN analysis completed for function: " << func->getName() << std::endl; - std::cout << " Total instructions analyzed: " << hashtable.size() << std::endl; + std::cout << " Total values numbered: " << valueToNumber.size() << std::endl; std::cout << " Instructions eliminated: " << needRemove.size() << std::endl; } } @@ -175,599 +184,309 @@ void GVNContext::dfs(BasicBlock *bb) { rpoBlocks.push_back(bb); } -Value *GVNContext::checkHashtable(Value *value) { - // 避免无限递归:如果已经在哈希表中,直接返回映射的值 - if (auto it = hashtable.find(value); it != hashtable.end()) { - if (DEBUG >= 2) { - std::cout << " Found " << value->getName() << " in hashtable, mapped to " - << it->second->getName() << std::endl; - } +unsigned GVNContext::getValueNumber(Value* value) { + // 如果已经有值编号,直接返回 + auto it = valueToNumber.find(value); + if (it != valueToNumber.end()) { return it->second; } + + // 为新值分配编号 + return assignValueNumber(value); +} - // 如果是指令,尝试获取其值编号 - if (auto inst = dynamic_cast(value)) { - if (auto valueNumber = getValueNumber(inst)) { - // 如果找到了等价的值,建立映射关系 - if (valueNumber != inst) { - hashtable[value] = valueNumber; - if (DEBUG >= 2) { - std::cout << " Mapping " << value->getName() << " to equivalent value " - << valueNumber->getName() << std::endl; - } - return valueNumber; - } - } - } - - // 没有找到等价值,将自己映射到自己 - hashtable[value] = value; +unsigned GVNContext::assignValueNumber(Value* value) { + unsigned number = nextValueNumber++; + valueToNumber[value] = number; + numberToValue[number] = value; + if (DEBUG >= 2) { - std::cout << " Mapping " << value->getName() << " to itself (unique)" << std::endl; + std::cout << " Assigned value number " << number + << " to " << value->getName() << std::endl; } - return value; + + return number; } -Value *GVNContext::getValueNumber(Instruction *inst) { - if (auto binary = dynamic_cast(inst)) { - return getValueNumber(binary); - } else if (auto unary = dynamic_cast(inst)) { - return getValueNumber(unary); - } else if (auto gep = dynamic_cast(inst)) { - return getValueNumber(gep); - } else if (auto load = dynamic_cast(inst)) { - return getValueNumber(load); - } else if (auto call = dynamic_cast(inst)) { - // 只为无副作用的函数调用进行GVN - if (sideEffectAnalysis && sideEffectAnalysis->isPureFunction(call->getCallee())) { - return getValueNumber(call); - } - return nullptr; - } - - return nullptr; -} - -Value *GVNContext::getValueNumber(BinaryInst *inst) { - auto lhs = checkHashtable(inst->getLhs()); - auto rhs = checkHashtable(inst->getRhs()); - - if (DEBUG) { - std::cout << " Checking binary instruction: " << inst->getName() - << " (kind: " << static_cast(inst->getKind()) << ")" << std::endl; - } - - for (auto [key, value] : hashtable) { - if (auto binary = dynamic_cast(key)) { - auto binLhs = checkHashtable(binary->getLhs()); - auto binRhs = checkHashtable(binary->getRhs()); - - if (binary->getKind() == inst->getKind()) { - // 检查操作数是否匹配 - bool operandsMatch = false; - if (lhs == binLhs && rhs == binRhs) { - operandsMatch = true; - } else if (inst->isCommutative() && lhs == binRhs && rhs == binLhs) { - operandsMatch = true; - } - - if (operandsMatch) { - // 检查支配关系,确保替换是安全的 - if (canReplace(inst, binary)) { - // 对于涉及load指令的情况,需要特别检查 - bool hasLoadOperands = (dynamic_cast(lhs) != nullptr) || - (dynamic_cast(rhs) != nullptr); - - if (hasLoadOperands) { - // 检查是否有任何load操作数之间有intervening store - bool hasIntervening = false; - - auto loadLhs = dynamic_cast(lhs); - auto loadRhs = dynamic_cast(rhs); - auto binLoadLhs = dynamic_cast(binLhs); - auto binLoadRhs = dynamic_cast(binRhs); - - if (loadLhs && binLoadLhs) { - if (hasInterveningStore(binLoadLhs, loadLhs, checkHashtable(loadLhs->getPointer()))) { - hasIntervening = true; - } - } - - if (!hasIntervening && loadRhs && binLoadRhs) { - if (hasInterveningStore(binLoadRhs, loadRhs, checkHashtable(loadRhs->getPointer()))) { - hasIntervening = true; - } - } - - // 对于交换操作数的情况,也需要检查 - if (!hasIntervening && inst->isCommutative()) { - if (loadLhs && binLoadRhs) { - if (hasInterveningStore(binLoadRhs, loadLhs, checkHashtable(loadLhs->getPointer()))) { - hasIntervening = true; - } - } - - if (!hasIntervening && loadRhs && binLoadLhs) { - if (hasInterveningStore(binLoadLhs, loadRhs, checkHashtable(loadRhs->getPointer()))) { - hasIntervening = true; - } - } - } - - if (hasIntervening) { - if (DEBUG) { - std::cout << " Found equivalent binary but load operands have intervening store, skipping" << std::endl; - } - continue; - } - } - - if (DEBUG) { - std::cout << " Found equivalent binary instruction: " << binary->getName() << std::endl; - } - return value; - } else { - if (DEBUG) { - std::cout << " Found equivalent binary but dominance check failed: " << binary->getName() << std::endl; - } - } - } - } - } - } - - if (DEBUG) { - std::cout << " No equivalent binary instruction found" << std::endl; - } - return inst; -} - -Value *GVNContext::getValueNumber(UnaryInst *inst) { - auto operand = checkHashtable(inst->getOperand()); - - for (auto [key, value] : hashtable) { - if (auto unary = dynamic_cast(key)) { - auto unOperand = checkHashtable(unary->getOperand()); - - if (unary->getKind() == inst->getKind() && operand == unOperand) { - return value; - } - } - } - - return inst; -} - -Value *GVNContext::getValueNumber(GetElementPtrInst *inst) { - auto ptr = checkHashtable(inst->getBasePointer()); - std::vector indices; - - // 使用正确的索引访问方法 - for (unsigned i = 0; i < inst->getNumIndices(); ++i) { - indices.push_back(checkHashtable(inst->getIndex(i))); - } - - for (auto [key, value] : hashtable) { - if (auto gep = dynamic_cast(key)) { - auto gepPtr = checkHashtable(gep->getBasePointer()); - - if (ptr == gepPtr && gep->getNumIndices() == inst->getNumIndices()) { - bool indicesMatch = true; - for (unsigned i = 0; i < inst->getNumIndices(); ++i) { - if (checkHashtable(gep->getIndex(i)) != indices[i]) { - indicesMatch = false; - break; - } - } - - if (indicesMatch && inst->getType() == gep->getType()) { - return value; - } - } - } - } - - return inst; -} - -Value *GVNContext::getValueNumber(LoadInst *inst) { - auto ptr = checkHashtable(inst->getPointer()); - - if (DEBUG) { - std::cout << " Checking load instruction: " << inst->getName() - << " from address: " << ptr->getName() << std::endl; - } - - for (auto [key, value] : hashtable) { - if (auto load = dynamic_cast(key)) { - auto loadPtr = checkHashtable(load->getPointer()); - - if (ptr == loadPtr && inst->getType() == load->getType()) { - if (DEBUG) { - std::cout << " Found potential equivalent load: " << load->getName() << std::endl; - } - - // 检查支配关系:load 必须支配 inst - if (!canReplace(inst, load)) { - if (DEBUG) { - std::cout << " Equivalent load does not dominate current load, skipping" << std::endl; - } - continue; - } - - // 检查是否有中间的store指令影响 - if (hasInterveningStore(load, inst, ptr)) { - if (DEBUG) { - std::cout << " Found intervening store, cannot reuse load value" << std::endl; - } - continue; // 如果有store指令,不能复用之前的load - } - - if (DEBUG) { - std::cout << " Can safely reuse load value from: " << load->getName() << std::endl; - } - return value; - } - } - } - - if (DEBUG) { - std::cout << " No equivalent load found" << std::endl; - } - return inst; -} - -Value *GVNContext::getValueNumber(CallInst *inst) { - // 此时已经确认是无副作用的函数调用,可以安全进行GVN - for (auto [key, value] : hashtable) { - if (auto call = dynamic_cast(key)) { - if (call->getCallee() == inst->getCallee() && call->getNumOperands() == inst->getNumOperands()) { - - bool argsMatch = true; - // 跳过第一个操作数(函数指针),从参数开始比较 - for (size_t i = 1; i < inst->getNumOperands(); ++i) { - if (checkHashtable(inst->getOperand(i)) != checkHashtable(call->getOperand(i))) { - argsMatch = false; - break; - } - } - - if (argsMatch) { - return value; - } - } - } - } - - return inst; -} - -void GVNContext::visitInstruction(Instruction *inst) { - // 跳过分支指令 - if (inst->isBranch()) { +void GVNContext::processBasicBlock(BasicBlock* bb, bool& changed) { + int instCount = 0; + for (auto &instPtr : bb->getInstructions()) { if (DEBUG) { - std::cout << " Skipping branch instruction: " << inst->getName() << std::endl; + std::cout << " Processing instruction " << ++instCount + << ": " << instPtr->getName() << std::endl; + } + + if (processInstruction(instPtr.get())) { + changed = true; } - return; } +} - // 如果是store指令,需要清理hashtable中可能被影响的load指令 - if (auto storeInst = dynamic_cast(inst)) { - invalidateLoadsAffectedByStore(storeInst); +bool GVNContext::processInstruction(Instruction* inst) { + // 跳过分支指令和其他不可优化的指令 + if (inst->isBranch() || dynamic_cast(inst) || + dynamic_cast(inst) || dynamic_cast(inst)) { + + // 如果是store指令,需要使相关的内存值失效 + if (auto store = dynamic_cast(inst)) { + invalidateMemoryValues(store); + } + + // 为这些指令分配值编号但不尝试优化 + getValueNumber(inst); + return false; } - + if (DEBUG) { - std::cout << " Visiting instruction: " << inst->getName() + std::cout << " Processing optimizable instruction: " << inst->getName() << " (kind: " << static_cast(inst->getKind()) << ")" << std::endl; } - - auto value = checkHashtable(inst); - - if (inst != value) { - if (auto instValue = dynamic_cast(value)) { - if (canReplace(inst, instValue)) { - inst->replaceAllUsesWith(instValue); - needRemove.insert(inst); - + + // 构建表达式键 + std::string exprKey = buildExpressionKey(inst); + if (exprKey.empty()) { + // 不可优化的指令,只分配值编号 + getValueNumber(inst); + return false; + } + + if (DEBUG >= 2) { + std::cout << " Expression key: " << exprKey << std::endl; + } + + // 查找已存在的等价值 + Value* existing = findExistingValue(exprKey, inst); + if (existing && existing != inst) { + // 检查支配关系 + if (auto existingInst = dynamic_cast(existing)) { + if (dominates(existingInst, inst)) { if (DEBUG) { - std::cout << " GVN: Replacing redundant instruction " << inst->getName() - << " with existing instruction " << instValue->getName() << std::endl; + std::cout << " GVN: Replacing " << inst->getName() + << " with existing " << existing->getName() << std::endl; } + + // 用已存在的值替换当前指令 + inst->replaceAllUsesWith(existing); + needRemove.insert(inst); + + // 将当前指令的值编号指向已存在的值 + unsigned existingNumber = getValueNumber(existing); + valueToNumber[inst] = existingNumber; + + return true; } else { if (DEBUG) { - std::cout << " Cannot replace instruction " << inst->getName() - << " with " << instValue->getName() << " (dominance check failed)" << std::endl; + std::cout << " Found equivalent but dominance check failed" << std::endl; } } } - } else { - if (DEBUG) { - std::cout << " Instruction " << inst->getName() << " is unique" << std::endl; - } } -} - -bool GVNContext::canReplace(Instruction *original, Value *replacement) { - auto replInst = dynamic_cast(replacement); - if (!replInst) { - return true; // 替换为常量总是安全的 + + // 没有找到等价值,为这个表达式分配新的值编号 + unsigned number = assignValueNumber(inst); + expressionToNumber[exprKey] = number; + + if (DEBUG) { + std::cout << " Instruction " << inst->getName() << " is unique" << std::endl; } - - auto originalBB = original->getParent(); - auto replBB = replInst->getParent(); - - // 如果replacement是Call指令,需要特殊处理 - if (auto callInst = dynamic_cast(replInst)) { - if (sideEffectAnalysis && !sideEffectAnalysis->isPureFunction(callInst->getCallee())) { - // 对于有副作用的函数,只有在同一个基本块且相邻时才能替换 - if (originalBB != replBB) { - return false; - } - - // 检查指令顺序 - auto &insts = originalBB->getInstructions(); - auto origIt = - std::find_if(insts.begin(), insts.end(), [original](const auto &ptr) { return ptr.get() == original; }); - auto replIt = - std::find_if(insts.begin(), insts.end(), [replInst](const auto &ptr) { return ptr.get() == replInst; }); - - if (origIt == insts.end() || replIt == insts.end()) { - return false; - } - - return std::abs(std::distance(origIt, replIt)) == 1; - } - } - - // 简单的支配关系检查:如果在同一个基本块,检查指令顺序 - if (originalBB == replBB) { - auto &insts = originalBB->getInstructions(); - auto origIt = - std::find_if(insts.begin(), insts.end(), [original](const auto &ptr) { return ptr.get() == original; }); - auto replIt = - std::find_if(insts.begin(), insts.end(), [replInst](const auto &ptr) { return ptr.get() == replInst; }); - - if (origIt == insts.end() || replIt == insts.end()) { - if (DEBUG) { - std::cout << " Cannot find instructions in basic block for dominance check" << std::endl; - } - return false; - } - - // 替换指令必须在原指令之前(支配原指令) - bool canRepl = std::distance(insts.begin(), replIt) < std::distance(insts.begin(), origIt); - if (DEBUG) { - std::cout << " Same block dominance check: " << (canRepl ? "PASS" : "FAIL") - << " (repl at " << std::distance(insts.begin(), replIt) - << ", orig at " << std::distance(insts.begin(), origIt) << ")" << std::endl; - } - return canRepl; - } - - // 使用支配关系检查(如果支配树分析可用) - if (domTree) { - auto dominators = domTree->getDominators(originalBB); - if (dominators && dominators->count(replBB)) { - return true; - } - } - + return false; } -bool GVNContext::hasInterveningStore(LoadInst* earlierLoad, LoadInst* laterLoad, Value* ptr) { - // 如果两个load在不同的基本块,需要更复杂的分析 +std::string GVNContext::buildExpressionKey(Instruction* inst) { + std::ostringstream oss; + + if (auto binary = dynamic_cast(inst)) { + oss << "binary_" << static_cast(binary->getKind()) << "_"; + oss << getValueNumber(binary->getLhs()) << "_" << getValueNumber(binary->getRhs()); + + // 对于可交换操作,确保操作数顺序一致 + if (binary->isCommutative()) { + unsigned lhsNum = getValueNumber(binary->getLhs()); + unsigned rhsNum = getValueNumber(binary->getRhs()); + if (lhsNum > rhsNum) { + oss.str(""); + oss << "binary_" << static_cast(binary->getKind()) << "_"; + oss << rhsNum << "_" << lhsNum; + } + } + } else if (auto unary = dynamic_cast(inst)) { + oss << "unary_" << static_cast(unary->getKind()) << "_"; + oss << getValueNumber(unary->getOperand()); + } else if (auto gep = dynamic_cast(inst)) { + oss << "gep_" << getValueNumber(gep->getBasePointer()); + for (unsigned i = 0; i < gep->getNumIndices(); ++i) { + oss << "_" << getValueNumber(gep->getIndex(i)); + } + } else if (auto load = dynamic_cast(inst)) { + oss << "load_" << getValueNumber(load->getPointer()); + oss << "_" << reinterpret_cast(load->getType()); // 类型区分 + } else if (auto call = dynamic_cast(inst)) { + // 只为无副作用的函数调用建立表达式 + if (sideEffectAnalysis && sideEffectAnalysis->isPureFunction(call->getCallee())) { + oss << "call_" << call->getCallee()->getName(); + for (size_t i = 1; i < call->getNumOperands(); ++i) { // 跳过函数指针 + oss << "_" << getValueNumber(call->getOperand(i)); + } + } else { + return ""; // 有副作用的函数调用不可优化 + } + } else { + return ""; // 不支持的指令类型 + } + + return oss.str(); +} + +Value* GVNContext::findExistingValue(const std::string& exprKey, Instruction* inst) { + auto it = expressionToNumber.find(exprKey); + if (it != expressionToNumber.end()) { + unsigned number = it->second; + auto valueIt = numberToValue.find(number); + if (valueIt != numberToValue.end()) { + Value* existing = valueIt->second; + + // 对于load指令,需要额外检查内存安全性 + if (auto loadInst = dynamic_cast(inst)) { + if (auto existingLoad = dynamic_cast(existing)) { + if (!isMemorySafe(existingLoad, loadInst)) { + return nullptr; + } + } + } + + return existing; + } + } + return nullptr; +} + +bool GVNContext::dominates(Instruction* a, Instruction* b) { + auto aBB = a->getParent(); + auto bBB = b->getParent(); + + // 同一基本块内的情况 + if (aBB == bBB) { + auto &insts = aBB->getInstructions(); + auto aIt = std::find_if(insts.begin(), insts.end(), + [a](const auto &ptr) { return ptr.get() == a; }); + auto bIt = std::find_if(insts.begin(), insts.end(), + [b](const auto &ptr) { return ptr.get() == b; }); + + if (aIt == insts.end() || bIt == insts.end()) { + return false; + } + + return std::distance(insts.begin(), aIt) < std::distance(insts.begin(), bIt); + } + + // 不同基本块的情况,使用支配树 + if (domTree) { + auto dominators = domTree->getDominators(bBB); + return dominators && dominators->count(aBB); + } + + return false; // 保守做法 +} + +bool GVNContext::isMemorySafe(LoadInst* earlierLoad, LoadInst* laterLoad) { + // 检查两个load是否访问相同的内存位置 + unsigned earlierPtr = getValueNumber(earlierLoad->getPointer()); + unsigned laterPtr = getValueNumber(laterLoad->getPointer()); + + if (earlierPtr != laterPtr) { + return false; // 不同的内存位置 + } + + // 检查类型是否匹配 + if (earlierLoad->getType() != laterLoad->getType()) { + return false; + } + + // 简单情况:如果在同一个基本块且没有中间的store,则安全 auto earlierBB = earlierLoad->getParent(); auto laterBB = laterLoad->getParent(); if (earlierBB != laterBB) { - // 跨基本块的情况:为了安全起见,暂时认为有intervening store - // 这是保守的做法,可能会错过一些优化机会,但确保正确性 - if (DEBUG) { - std::cout << " Cross-block load optimization: conservatively assuming intervening store" << std::endl; - } - return true; + // 跨基本块的情况需要更复杂的分析,暂时保守处理 + return false; } - // 同一基本块内的情况:检查指令序列 + // 同一基本块内检查是否有中间的store auto &insts = earlierBB->getInstructions(); - - // 找到两个load指令的位置 - auto earlierIt = std::find_if(insts.begin(), insts.end(), + auto earlierIt = std::find_if(insts.begin(), insts.end(), [earlierLoad](const auto &ptr) { return ptr.get() == earlierLoad; }); auto laterIt = std::find_if(insts.begin(), insts.end(), [laterLoad](const auto &ptr) { return ptr.get() == laterLoad; }); if (earlierIt == insts.end() || laterIt == insts.end()) { - if (DEBUG) { - std::cout << " Could not find load instructions in basic block" << std::endl; - } - return true; // 找不到指令,保守返回true + return false; } - // 确定实际的执行顺序(哪个load在前,哪个在后) - auto firstIt = earlierIt; - auto secondIt = laterIt; - - if (std::distance(insts.begin(), earlierIt) > std::distance(insts.begin(), laterIt)) { - // 如果"earlier"实际上在"later"之后,交换它们 - firstIt = laterIt; - secondIt = earlierIt; - if (DEBUG) { - std::cout << " Swapped load order: " << laterLoad->getName() - << " actually comes before " << earlierLoad->getName() << std::endl; - } + // 确保earlierLoad真的在laterLoad之前 + if (std::distance(insts.begin(), earlierIt) >= std::distance(insts.begin(), laterIt)) { + return false; } - // 检查两个load之间的所有指令 - for (auto it = std::next(firstIt); it != secondIt; ++it) { - auto inst = it->get(); - - // 检查是否是store指令 - if (auto storeInst = dynamic_cast(inst)) { - auto storePtr = checkHashtable(storeInst->getPointer()); - - // 如果store的目标地址与load的地址相同,说明内存被修改了 - if (storePtr == ptr) { - if (DEBUG) { - std::cout << " Found intervening store to same address: " << storeInst->getName() << std::endl; - } - return true; + // 检查中间是否有store指令修改了相同的内存位置 + for (auto it = std::next(earlierIt); it != laterIt; ++it) { + if (auto store = dynamic_cast(it->get())) { + unsigned storePtr = getValueNumber(store->getPointer()); + if (storePtr == earlierPtr) { + return false; // 找到中间的store } - - // TODO: 这里还应该检查别名分析,看store是否可能影响load的地址 - // 为了简化,现在只检查精确匹配 } // 检查函数调用是否可能修改内存 - if (auto callInst = dynamic_cast(inst)) { - if (sideEffectAnalysis && !sideEffectAnalysis->isPureFunction(callInst->getCallee())) { - // 如果是有副作用的函数调用,且load的是全局变量,则可能被修改 - if (auto globalPtr = dynamic_cast(ptr)) { - if (DEBUG) { - std::cout << " Found function call that may modify global variable: " << callInst->getName() << std::endl; - } - return true; - } - // TODO: 这里还应该检查函数是否可能修改通过指针参数传递的内存 + if (auto call = dynamic_cast(it->get())) { + if (sideEffectAnalysis && !sideEffectAnalysis->isPureFunction(call->getCallee())) { + // 保守处理:有副作用的函数可能修改内存 + return false; } } } - if (DEBUG) { - std::cout << " No intervening store found between loads" << std::endl; - } - - return false; // 没有找到会修改内存的指令 + return true; // 安全 } -void GVNContext::invalidateLoadsAffectedByStore(StoreInst* storeInst) { - auto storePtr = checkHashtable(storeInst->getPointer()); +void GVNContext::invalidateMemoryValues(StoreInst* store) { + unsigned storePtr = getValueNumber(store->getPointer()); if (DEBUG) { - std::cout << " Invalidating loads affected by store to address" << std::endl; + std::cout << " Invalidating memory values affected by store" << std::endl; } - // 查找hashtable中所有可能被这个store影响的指令 - std::vector toRemove; - std::set invalidatedLoads; + // 找到所有可能被这个store影响的load表达式 + std::vector toRemove; - // 第一步:找到所有被直接影响的load指令 - for (auto& [key, value] : hashtable) { - if (auto loadInst = dynamic_cast(key)) { - auto loadPtr = checkHashtable(loadInst->getPointer()); - - // 如果load的地址与store的地址相同,则需要从hashtable中移除 - if (loadPtr == storePtr) { - toRemove.push_back(key); - invalidatedLoads.insert(loadInst); - if (DEBUG) { - std::cout << " Invalidating load from same address: " << loadInst->getName() << std::endl; - } + for (auto& [exprKey, number] : expressionToNumber) { + if (exprKey.find("load_" + std::to_string(storePtr)) == 0) { + toRemove.push_back(exprKey); + if (DEBUG) { + std::cout << " Invalidating expression: " << exprKey << std::endl; } } } - // 第二步:找到所有依赖被失效load的指令(如binary指令) - bool foundMore = true; - while (foundMore) { - foundMore = false; - std::vector additionalToRemove; - - for (auto& [key, value] : hashtable) { - // 跳过已经标记要删除的指令 - if (std::find(toRemove.begin(), toRemove.end(), key) != toRemove.end()) { - continue; - } - - bool shouldInvalidate = false; - - // 检查binary指令的操作数 - if (auto binaryInst = dynamic_cast(key)) { - auto lhs = checkHashtable(binaryInst->getLhs()); - auto rhs = checkHashtable(binaryInst->getRhs()); - - if (invalidatedLoads.count(lhs) || invalidatedLoads.count(rhs)) { - shouldInvalidate = true; - if (DEBUG) { - std::cout << " Invalidating binary instruction due to invalidated operand: " - << binaryInst->getName() << std::endl; - } - } - } - // 检查unary指令的操作数 - else if (auto unaryInst = dynamic_cast(key)) { - auto operand = checkHashtable(unaryInst->getOperand()); - if (invalidatedLoads.count(operand)) { - shouldInvalidate = true; - if (DEBUG) { - std::cout << " Invalidating unary instruction due to invalidated operand: " - << unaryInst->getName() << std::endl; - } - } - } - // 检查GEP指令的操作数 - else if (auto gepInst = dynamic_cast(key)) { - auto basePtr = checkHashtable(gepInst->getBasePointer()); - if (invalidatedLoads.count(basePtr)) { - shouldInvalidate = true; - } else { - // 检查索引操作数 - for (unsigned i = 0; i < gepInst->getNumIndices(); ++i) { - if (invalidatedLoads.count(checkHashtable(gepInst->getIndex(i)))) { - shouldInvalidate = true; - break; - } - } - } - if (shouldInvalidate && DEBUG) { - std::cout << " Invalidating GEP instruction due to invalidated operand: " - << gepInst->getName() << std::endl; - } - } - - if (shouldInvalidate) { - additionalToRemove.push_back(key); - if (auto inst = dynamic_cast(key)) { - invalidatedLoads.insert(inst); - } - foundMore = true; - } - } - - // 将新找到的失效指令加入移除列表 - toRemove.insert(toRemove.end(), additionalToRemove.begin(), additionalToRemove.end()); - } - - // 从hashtable中移除所有被影响的指令 - for (auto key : toRemove) { - hashtable.erase(key); - } - - if (DEBUG && toRemove.size() > invalidatedLoads.size()) { - std::cout << " Total invalidated instructions: " << toRemove.size() - << " (including " << (toRemove.size() - invalidatedLoads.size()) << " dependent instructions)" << std::endl; + // 移除失效的表达式 + for (const auto& key : toRemove) { + expressionToNumber.erase(key); } } -std::string GVNContext::getCanonicalExpression(Instruction *inst) { - std::ostringstream oss; - - if (auto binary = dynamic_cast(inst)) { - oss << "binary_" << static_cast(binary->getKind()) << "_"; - oss << checkHashtable(binary->getLhs()) << "_"; - oss << checkHashtable(binary->getRhs()); - } else if (auto unary = dynamic_cast(inst)) { - oss << "unary_" << static_cast(unary->getKind()) << "_"; - oss << checkHashtable(unary->getOperand()); - } else if (auto gep = dynamic_cast(inst)) { - oss << "gep_" << checkHashtable(gep->getBasePointer()); - for (unsigned i = 0; i < gep->getNumIndices(); ++i) { - oss << "_" << checkHashtable(gep->getIndex(i)); +void GVNContext::eliminateRedundantInstructions(bool& changed) { + int removeCount = 0; + for (auto inst : needRemove) { + if (DEBUG) { + std::cout << " Removing redundant instruction " << ++removeCount + << "/" << needRemove.size() << ": " << inst->getName() << std::endl; } + + // 删除指令前先断开所有使用关系 + // inst->replaceAllUsesWith 已在 processInstruction 中调用 + SysYIROptUtils::usedelete(inst); + changed = true; } - - return oss.str(); } } // namespace sysy