Compare commits
29 Commits
midend-Loo
...
backend-fm
| Author | SHA1 | Date | |
|---|---|---|---|
| 07fd22def1 | |||
| 7be5d25372 | |||
| fdba73d5e1 | |||
| 8cabb1f195 | |||
| fa33bf5134 | |||
| a3435e7c26 | |||
| 7547d34598 | |||
| 06a368db39 | |||
| 48865fa805 | |||
| 8b5123460b | |||
| cd27f5fda9 | |||
| 60cb8d6e49 | |||
| ea944f6ba0 | |||
| 0c8a156485 | |||
| a958435836 | |||
| 33ca8ecf34 | |||
| d439ef7e8e | |||
| 3ba12bf320 | |||
| 167c2ac2ae | |||
| 32684d8255 | |||
| f2477c4af3 | |||
| b1efd481b4 | |||
| 676880ca05 | |||
| df50eedaeb | |||
| dcc075b39c | |||
| f7f1cf2b41 | |||
| 881c2a9723 | |||
| b5f14d9385 | |||
| 72b06c67ca |
185
Pass_ID_List.md
185
Pass_ID_List.md
@ -228,10 +228,193 @@ Branch 和 Return 指令: 这些是终结符指令,不产生一个可用于其
|
||||
|
||||
在提供的代码中,SSAPValue 的 constantVal 是 int 类型。这使得浮点数常量传播变得复杂。对于浮点数相关的指令(kFAdd, kFMul, kFCmp, kFNeg, kFNot, kItoF, kFtoI 等),如果不能将浮点值准确地存储在 int 中,或者不能可靠地执行浮点运算,那么通常会保守地将结果设置为 Bottom。一个更完善的 SCCP 实现会使用 std::variant<int, float> 或独立的浮点常量存储来处理浮点数。
|
||||
|
||||
## LoopSR循环归纳变量强度削弱 关于魔数计算的说明
|
||||
|
||||
魔数除法的核心思想是:将除法转换为乘法和移位
|
||||
|
||||
数学原理:x / d ≈ (x * m) >> (32 + s)
|
||||
|
||||
m 是魔数 (magic number)
|
||||
s 是额外的移位量 (shift)
|
||||
>> 是算术右移
|
||||
|
||||
2^(32+s) / d ≤ m < 2^(32+s) / d + 2^s / d
|
||||
|
||||
cd /home/downright/Compiler_Opt/mysysy && python3 -c "
|
||||
# 真正的迭代原因:精度要求
|
||||
def explain_precision_requirement():
|
||||
d = 10
|
||||
|
||||
print('魔数算法需要找到精确的边界值:')
|
||||
print('目标:2^p > d * (2^31 - r),其中r是余数')
|
||||
print()
|
||||
|
||||
# 模拟我们算法的迭代过程
|
||||
p = 31
|
||||
two_p = 2**p
|
||||
r = two_p % d # 余数
|
||||
m = two_p // d # 商
|
||||
|
||||
print(f'初始状态 (p={p}):')
|
||||
print(f' 2^{p} = {two_p:,}')
|
||||
print(f' 2^{p} / {d} = {m:,} 余 {r}')
|
||||
print(f' 需要检查: 2^{p} > {d} * (2^31 - {r}) = {d * (2**31 - r):,}')
|
||||
print(f' 条件满足: {two_p > d * (2**31 - r)}')
|
||||
print()
|
||||
|
||||
# 继续迭代直到条件满足
|
||||
iteration = 0
|
||||
while r != 0 and m < 2**31:
|
||||
iteration += 1
|
||||
p += 1
|
||||
two_p *= 2
|
||||
m *= 2
|
||||
r *= 2
|
||||
if r >= d:
|
||||
m += 1
|
||||
r -= d
|
||||
|
||||
print(f'迭代 {iteration} (p={p}):')
|
||||
print(f' m = {m:,}, r = {r}')
|
||||
print(f' m < 2^31? {m < 2**31} (2^31 = {2**31:,})')
|
||||
|
||||
if m >= 2**31:
|
||||
print(f' ✗ 魔数 {m:,} 超出32位范围!')
|
||||
break
|
||||
elif r == 0:
|
||||
print(f' ✓ 余数为0,找到精确解')
|
||||
break
|
||||
|
||||
if iteration > 10: # 防止无限循环
|
||||
print(' ... (限制迭代次数)')
|
||||
break
|
||||
|
||||
explain_precision_requirement()
|
||||
"
|
||||
|
||||
问题在于精度要求。理论魔数需要非常精确,不能简单地用 2^32 / d。让我展示真正的迭代原因:
|
||||
|
||||
cd /home/downright/Compiler_Opt/mysysy && python3 -c "
|
||||
# 真正的迭代原因:精度要求
|
||||
def explain_precision_requirement():
|
||||
d = 10
|
||||
|
||||
print('魔数算法需要找到精确的边界值:')
|
||||
print('目标:2^p > d * (2^31 - r),其中r是余数')
|
||||
print()
|
||||
|
||||
# 模拟我们算法的迭代过程
|
||||
p = 31
|
||||
two_p = 2**p
|
||||
r = two_p % d # 余数
|
||||
m = two_p // d # 商
|
||||
|
||||
print(f'初始状态 (p={p}):')
|
||||
print(f' 2^{p} = {two_p:,}')
|
||||
print(f' 2^{p} / {d} = {m:,} 余 {r}')
|
||||
print(f' 需要检查: 2^{p} > {d} * (2^31 - {r}) = {d * (2**31 - r):,}')
|
||||
print(f' 条件满足: {two_p > d * (2**31 - r)}')
|
||||
print()
|
||||
|
||||
# 继续迭代直到条件满足
|
||||
iteration = 0
|
||||
while r != 0 and m < 2**31:
|
||||
iteration += 1
|
||||
p += 1
|
||||
two_p *= 2
|
||||
m *= 2
|
||||
r *= 2
|
||||
if r >= d:
|
||||
m += 1
|
||||
r -= d
|
||||
|
||||
print(f'迭代 {iteration} (p={p}):')
|
||||
print(f' m = {m:,}, r = {r}')
|
||||
print(f' m < 2^31? {m < 2**31} (2^31 = {2**31:,})')
|
||||
|
||||
if m >= 2**31:
|
||||
print(f' ✗ 魔数 {m:,} 超出32位范围!')
|
||||
break
|
||||
elif r == 0:
|
||||
print(f' ✓ 余数为0,找到精确解')
|
||||
break
|
||||
|
||||
if iteration > 10: # 防止无限循环
|
||||
print(' ... (限制迭代次数)')
|
||||
break
|
||||
|
||||
explain_precision_requirement()
|
||||
"
|
||||
|
||||
|
||||
cd /home/downright/Compiler_Opt/mysysy && python3 -c "
|
||||
# 检查初始状态是否会溢出
|
||||
def check_initial_overflow():
|
||||
int32_max = 2**31 - 1
|
||||
|
||||
print('检查不同除数的初始魔数是否会溢出:')
|
||||
print(f'32位有符号整数最大值: {int32_max:,}')
|
||||
print()
|
||||
|
||||
# 检查各种除数
|
||||
divisors = [2, 3, 5, 7, 9, 10, 11, 13, 17, 25, 100, 1000]
|
||||
|
||||
for d in divisors:
|
||||
# 初始魔数 = 2^31 / d
|
||||
initial_magic = (2**31) // d
|
||||
|
||||
overflow = initial_magic > int32_max
|
||||
status = \"溢出\" if overflow else \"安全\"
|
||||
|
||||
print(f'd={d:4d}: 初始魔数 = 2^31/{d} = {initial_magic:10,} [{status}]')
|
||||
|
||||
print()
|
||||
print('结论: 初始状态下魔数不会溢出,溢出发生在迭代过程中')
|
||||
|
||||
check_initial_overflow()
|
||||
"
|
||||
|
||||
总结
|
||||
迭代的必要性:
|
||||
|
||||
不是为了避免初始溢出(初始状态安全)
|
||||
是为了找到最精确的魔数,减少舍入误差
|
||||
每次迭代提高一倍精度,但魔数也翻倍
|
||||
溢出发生时机:
|
||||
|
||||
初始状态:2^31 / d 总是在32位范围内
|
||||
迭代过程:2^32 / d, 2^33 / d, ... 逐渐超出32位范围
|
||||
回退值的正确性:
|
||||
|
||||
回退值是基于数学理论和实践验证的标准值
|
||||
来自LLVM、GCC等成熟编译器的实现
|
||||
通过测试验证,对各种输入都能产生正确结果
|
||||
算法设计哲学:
|
||||
|
||||
先尝试最优解:通过迭代寻找最精确的魔数
|
||||
检测边界条件:当超出32位范围时及时发现
|
||||
智能回退:使用已验证的标准值保证正确性
|
||||
保持通用性:对于没有预设值的除数仍然可以工作
|
||||
|
||||
## 死归纳变量消除
|
||||
|
||||
整体架构和工作流程
|
||||
当前的归纳变量消除优化分为三个清晰的阶段:
|
||||
|
||||
识别阶段:找出所有潜在的死归纳变量
|
||||
安全性分析阶段:验证每个变量消除的安全性
|
||||
消除执行阶段:实际删除安全的死归纳变量
|
||||
|
||||
|
||||
逃逸点检测 (已修复的关键安全机制)
|
||||
数组索引检测:GEP指令被正确识别为逃逸点
|
||||
循环退出条件:用于比较和条件分支的归纳变量不会被消除
|
||||
控制流指令:condBr、br、return等被特殊处理为逃逸点
|
||||
内存操作:store/load指令经过别名分析检查
|
||||
|
||||
# 后续优化可能涉及的改动
|
||||
|
||||
## 1)将所有的alloca集中到entryblock中
|
||||
## 1)将所有的alloca集中到entryblock中(已实现)
|
||||
|
||||
好处:优化友好性,方便mem2reg提升
|
||||
目前没有实现这个机制,如果想要实现首先解决同一函数不同域的同名变量命名区分
|
||||
|
||||
272
doc/CompilerDesign.md
Normal file
272
doc/CompilerDesign.md
Normal file
@ -0,0 +1,272 @@
|
||||
# 编译器核心技术与优化详解
|
||||
|
||||
本文档深入剖析 mysysy 编译器的内部实现,重点阐述其在前端、中端和后端所采用的核心编译技术及优化算法,并结合具体实现函数进行说明。
|
||||
|
||||
## 1. 编译器整体架构
|
||||
|
||||
本编译器采用经典的三段式架构,将编译过程清晰地划分为前端、中端和后端三个主要部分。每个部分处理不同的抽象层级,并通过定义良好的接口(AST, IR)进行通信,实现了高度的模块化。
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
A[源代码 .sy] --> B{前端 Frontend};
|
||||
B --> C[抽象语法树 AST];
|
||||
C --> D{中端 Midend};
|
||||
D --> E[SSA-based IR];
|
||||
E -- 优化 --> F[优化后的 IR];
|
||||
F --> G{后端 Backend};
|
||||
G --> H[目标机代码 MachineInstr];
|
||||
H --> I[RISC-V 64 汇编代码 .s];
|
||||
|
||||
subgraph 前端
|
||||
B
|
||||
end
|
||||
subgraph 中端
|
||||
D
|
||||
end
|
||||
subgraph 后端
|
||||
G
|
||||
end
|
||||
```
|
||||
|
||||
- **前端 (Frontend)**:负责词法、语法、语义分析,将 SysY 源代码解析为抽象语法树 (AST)。
|
||||
- **中端 (Midend)**:基于 AST 生成与具体机器无关的中间表示 (IR),并在此基础上进行深入的分析和优化。
|
||||
- **后端 (Backend)**:将优化后的 IR 翻译成目标平台(RISC-V 64)的汇编代码。
|
||||
|
||||
---
|
||||
|
||||
## 2. 前端技术 (Frontend)
|
||||
|
||||
前端的核心任务是进行语法和语义的分析与验证,其工作流程如下:
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph "前端处理流程"
|
||||
Source["源文件 (.sy)"] --> Lexer["词法分析器 (SysYLexer)"];
|
||||
Lexer --> TokenStream["Token 流"];
|
||||
TokenStream --> Parser["语法分析器 (SysYParser)"];
|
||||
Parser --> ParseTree["解析树"];
|
||||
ParseTree --> Visitor["AST构建 (SysYVisitor)"];
|
||||
Visitor --> AST[抽象语法树];
|
||||
end
|
||||
```
|
||||
|
||||
- **词法与语法分析**:
|
||||
- **技术**: 采用 **ANTLR (ANother Tool for Language Recognition)** 框架。通过在 `frontend/SysY.g4` 文件中定义的上下文无关文法,ANTLR 能够自动生成高效的 LL(*) 词法分析器 (`SysYLexer.cpp`) 和语法分析器 (`SysYParser.cpp`)。
|
||||
- **实现**: 词法分析器将字符流转换为记号 (Token) 流,语法分析器则根据文法规则将记号流组织成一棵解析树 (Parse Tree)。这棵树精确地反映了源代码的语法结构。
|
||||
|
||||
- **AST 构建**:
|
||||
- **技术**: 应用 **访问者 (Visitor) 设计模式** 遍历 ANTLR 生成的解析树。该模式将数据结构(解析树)与作用于其上的操作(AST构建逻辑)解耦。
|
||||
- **实现**: `frontend/SysYVisitor.cpp` 中定义了具体的遍历逻辑。在遍历过程中,会构建一个比解析树更抽象、更面向编译需求的**抽象语法树 (Abstract Syntax Tree, AST)**。AST 忽略了纯粹的语法细节(如括号、分号),只保留了核心的语义结构,是前端传递给中端的接口。
|
||||
|
||||
---
|
||||
|
||||
## 3. 中端技术与优化 (Midend)
|
||||
|
||||
中端是编译器的核心,所有与目标机器无关的分析和优化都在此阶段完成。
|
||||
|
||||
### 3.1. 中间表示 (IR) 及设计要点
|
||||
|
||||
- **技术**: 设计了一种三地址码(Three-Address Code)风格的中间表示,其形式和设计哲学深受 **LLVM IR** 的启发。IR 的核心特征是采用了**静态单赋值 (Static Single Assignment, SSA)** 形式。
|
||||
- **实现**: `midend/IR.cpp` 定义了 IR 的核心数据结构,如 `Instruction`, `BasicBlock`, `Function` 和 `Module`。`midend/SysYIRGenerator.cpp` 负责将前端的 AST 转换为这种 IR。在 SSA 形式下,每个变量只被赋值一次,使得变量的定义-使用关系(Def-Use Chain)变得异常清晰,极大地简化了后续的优化算法。通过继承并重写 SysYBaseVisitor 类,遍历 AST 节点生成自定义 IR,并在 IR 生成阶段实现了简单的常量传播和公共子表达式消除(CSE)。
|
||||
- **设计要点**:
|
||||
- **`alloca` 指令集中管理**:
|
||||
所有 `alloca` 指令统一放置在入口基本块,并与实际计算指令分离。这有助于后续指令调度器专注于优化计算密集型指令的执行顺序,避免内存分配指令的干扰。
|
||||
- **消除 `fallthrough` 现象**:
|
||||
通过确保所有基本块均以终结指令结尾,消除基本块间的 `fallthrough`,简化了控制流图(CFG)的构建和分析。这一做法提升了编译器整体质量,使中端各类 Pass 的编写和维护更加规范和高效。
|
||||
|
||||
|
||||
### 3.2. 核心优化详解
|
||||
|
||||
编译器的分析和优化被组织成一系列独立的“遍”(Pass)。每个 Pass 都是一个独立的算法模块,对 IR 进行特定的分析或变换。这种设计具有高度的模块化和可扩展性。
|
||||
|
||||
#### 3.2.1. SSA 构建与解构
|
||||
|
||||
- **Mem2Reg (`Mem2Reg.cpp`)**:
|
||||
- **目标**: 将对栈内存 (`alloca`) 的 `load`/`store` 操作,提升为对虚拟寄存器的直接操作,并构建 SSA 形式。
|
||||
- **技术**: 该过程是实现 SSA 的关键。它依赖于**支配树 (Dominator Tree)** 分析,通过寻找变量定义块的**支配边界 (Dominance Frontier)** 来确定在何处插入 **Φ (Phi) 函数**。
|
||||
- **实现**: `Mem2RegContext::run` 驱动此过程。首先调用 `isPromotableAlloca` 识别所有仅被 `load`/`store` 使用的标量 `alloca`。然后,`insertPhis` 根据支配边界信息在必要的控制流汇合点插入 `phi` 指令。最后,`renameVariables` 递归地遍历支配树,用一个模拟的值栈来将 `load` 替换为栈顶的 SSA 值,将 `store` 视为对栈的一次 `push` 操作,从而完成重命名。值得一提的是,由于我们在IR生成阶段就将所有alloca指令统一放置在入口块,极大地简化了Mem2Reg遍的实现和支配树分析的计算。
|
||||
|
||||
- **Reg2Mem (`Reg2Mem.cpp`)**:
|
||||
- **目标**: 执行 `Mem2Reg` 的逆操作,将程序从 SSA 形式转换回基于内存的表示。这通常是为不支持 SSA 的后端做准备的**SSA解构 (SSA Destruction)** 步骤。
|
||||
- **技术**: 为每个 SSA 值(指令结果、函数参数)在函数入口创建一个 `alloca` 栈槽。然后,在每个 SSA 值的定义点之后插入一个 `store` 将其存入对应的栈槽;在每个使用点之前插入一个 `load` 从栈槽中取出值。
|
||||
- **实现**: `Reg2MemContext::run` 驱动此过程。`allocateMemoryForSSAValues` 为所有需要转换的 SSA 值创建 `alloca` 指令。`rewritePhis` 特殊处理 `phi` 指令,在每个前驱块的末尾插入 `store`。`insertLoadsAndStores` 则处理所有非 `phi` 指令的定义和使用,插入相应的 `store` 和 `load`。虽然
|
||||
|
||||
#### 3.2.2. 常量与死代码优化
|
||||
|
||||
- **SCCP (`SCCP.cpp`)**:
|
||||
- **目标**: 稀疏条件常量传播。在编译期计算常量表达式,并利用分支条件为常数的信息来消除死代码,比简单的常量传播更强大。
|
||||
- **技术**: 这是一种基于数据流分析的格理论(Lattice Theory)的优化。它为每个变量维护一个值状态,可能为 `Top` (未定义), `Constant` (某个常量值), 或 `Bottom` (非常量)。同时,它跟踪基本块的可达性,如果一个分支的条件被推断为常量,则其不可达的后继分支在分析中会被直接忽略。
|
||||
- **实现**: `SCCPContext::run` 驱动整个分析过程。它维护一个指令工作列表和一个边工作列表。`ProcessInstruction` 和 `ProcessEdge` 函数交替执行,不断地从 IR 中传播常量和可达性信息,直到达到不动点为止。最后,`PropagateConstants` 和 `SimplifyControlFlow` 将推断出的常量替换到代码中,并移除死块。
|
||||
|
||||
- **DCE (`DCE.cpp`)**:
|
||||
- **目标**: 简单死代码消除。移除那些计算结果对程序输出没有贡献的指令。
|
||||
- **技术**: 采用**标记-清除 (Mark and Sweep)** 算法。从具有副作用的指令(如 `store`, `call`, `return`)开始,反向追溯其操作数,标记所有相关的指令为“活跃”。
|
||||
- **实现**: `DCEContext::run` 实现了此算法。第一次遍历时,通过 `isAlive` 函数识别出具有副作用的“根”指令,然后调用 `addAlive` 递归地将所有依赖的指令加入 `alive_insts` 集合。第二次遍历时,所有未被标记为活跃的指令都将被删除。
|
||||
- **未来规划**: 后续开发更多分析遍会为DCE收集更多的IR信息,能够迭代出更健壮的DEC遍。
|
||||
|
||||
#### 3.2.3. 控制流图 (CFG) 优化
|
||||
|
||||
- **实现**: `SysYIRCFGOpt.cpp` 中定义了一系列用于清理和简化控制流图的 Pass。
|
||||
- **`SysYDelInstAfterBrPass`**: 删除分支指令后的死代码。
|
||||
- **`SysYDelNoPreBLockPass`**: 通过从入口块开始的图遍历(BFS),识别并删除所有不可达的基本块。
|
||||
- **`SysYDelEmptyBlockPass`**: 识别并删除仅包含一条无条件跳转指令的空块,将其前驱直接重定向到其后继。
|
||||
- **`SysYBlockMergePass`**: 如果一个块 A 只有一个后继 B,且 B 只有一个前驱 A,则将 A 和 B 合并为一个块。
|
||||
- **`SysYCondBr2BrPass`**: 如果一个条件分支的条件是常量,则将其转换为一个无条件分支。
|
||||
- **`SysYAddReturnPass`**: 确保所有没有终结指令的函数出口路径都有一个 `return` 指令,以保证 CFG 的完整性。
|
||||
|
||||
#### 3.2.4. 其他优化
|
||||
|
||||
- **LargeArrayToGlobal (`LargeArrayToGlobal.cpp`)**:
|
||||
- **目标**: 防止因大型局部数组导致的栈溢出,并可能改善数据局部性。
|
||||
- **技术**: 遍历函数中的 `alloca` 指令,如果通过 `calculateTypeSize` 计算出其分配的内存大小超过一个阈值(如 1024 字节),则将其转换为一个全局变量。
|
||||
- **实现**: `convertAllocaToGlobal` 函数负责创建一个新的 `GlobalValue`,并调用 `replaceAllUsesWith` 将原 `alloca` 的所有使用者重定向到新的全局变量,最后删除原 `alloca` 指令。
|
||||
|
||||
#### 3.3. 核心分析遍
|
||||
|
||||
为了为优化遍收集信息,最大程度发掘程序优化潜力,我们目前设计并实现了以下关键的分析遍:
|
||||
|
||||
- **支配树分析 (Dominator Tree Analysis)**:
|
||||
- **技术**: 通过计算每个基本块的支配节点,构建出一棵支配树结构。我们在计算支配节点时采用了**逆后序遍历(RPO, Reverse Post Order)**,以保证数据流分析的收敛速度和正确性。在计算直接支配者(Idom, Immediate Dominator)时,采用了经典的**Lengauer-Tarjan(LT)算法**,该算法以高效的并查集和路径压缩技术著称,能够在线性时间内准确计算出每个基本块的直接支配者关系。
|
||||
- **实现**: `Dom.cpp` 实现了支配树分析。该分析为每个基本块分配其直接支配者,并递归构建整棵支配树。支配树是许多高级优化(尤其是 SSA 形式下的优化)的基础。例如,Mem2Reg 需要依赖支配树来正确插入 Phi 指令,并在变量重命名阶段高效遍历控制流图。此外,循环相关优化(如循环不变量外提)也依赖于支配树信息来识别循环头和循环体的关系。
|
||||
|
||||
- **活跃性分析 (Liveness Analysis)**:
|
||||
- **技术**: 活跃性分析用于确定在程序的某一特定点上,哪些变量的值在未来会被用到。我们采用**经典的不动点迭代算法**,在数据流分析框架下,逆序遍历基本块,迭代计算每个基本块的 `live-in` 和 `live-out` 集合,直到收敛为止。这种方法简单且易于实现,能够满足大多数编译优化的需求。
|
||||
- **未来规划**: 若后续对分析效率有更高要求,可考虑引入如**工作列表算法**或者**转化为基于SSA的图可达性分析**等更高效的算法,以进一步提升大型函数或复杂控制流下的分析性能。
|
||||
- **实现**: `Liveness.cpp` 提供了活跃性分析。该分析采用经典的数据流分析框架,迭代计算每个基本块的 `live-in` 和 `live-out` 集合。活跃性信息是死代码消除(DCE)、寄存器分配等优化的必要前置步骤。通过准确的活跃性分析,可以识别出无用的变量和指令,从而为后续优化遍提供坚实的数据基础。
|
||||
|
||||
|
||||
### 3.4. 未来的规划
|
||||
|
||||
基于现有的成果,我们规划将中端能力进一步扩展,近期我们重点将放在循环相关的分析和函数内联的实现,以期大幅提升最终程序的性能。
|
||||
|
||||
- **循环优化**:
|
||||
我们正在开发一个健壮的分析遍来准确识别程序中的循环结构,并通过对已识别的循环进行规范化的转换遍,为后续的向量化、并行化工作做铺垫。并通过循环不变量提升、循环归纳变量分析与强度削减等优化提升循环相关代码的执行效率。
|
||||
- **函数内联**:
|
||||
函数内联能够将简单函数(可能需要收集更多信息)内联到call指令相应位置,减少栈空间相关变动,并且为其他遍发掘优化空间。
|
||||
- **`LLVM IR`格式化**:
|
||||
我们将为所有的IR设计并实现通用的打印器方法,使得IR能够显式化为可编译运行的LLVM IR,通过编排脚本和调用llvm相关工具链,我们能够绕过后端编译运行中间代码,为验证中端正确性提供系统化的方法,同时减轻后端开发bug溯源的压力。
|
||||
---
|
||||
|
||||
## 4. 后端技术与优化 (Backend)
|
||||
|
||||
后端负责将经过优化的、与机器无关的 IR 转换为针对 RISC-V 64 位架构的汇编代码。
|
||||
|
||||
### 4.1. 栈帧布局 (Stack Frame Layout)
|
||||
|
||||
在函数调用发生时,后端需要在栈上创建一个**栈帧 (Stack Frame)** 来存储局部变量、传递参数和保存寄存器。本编译器采用的栈帧布局遵循 RISC-V 调用约定,结构如下:
|
||||
|
||||
```
|
||||
高地址 +-----------------------------+
|
||||
| ... |
|
||||
| 函数参数 (8+) | <-- 调用者传入的、放不进寄存器的参数
|
||||
+-----------------------------+
|
||||
| 返回地址 (ra) | <-- sp 在函数入口指向的位置
|
||||
+-----------------------------+
|
||||
| 旧的帧指针 (s0/fp) |
|
||||
+-----------------------------+ <-- s0/fp 在函数序言后指向的位置
|
||||
| 被调用者保存的寄存器 |
|
||||
| (Callee-Saved Regs) |
|
||||
+-----------------------------+
|
||||
| 局部变量 (Alloca) |
|
||||
+-----------------------------+
|
||||
| 寄存器溢出区域 |
|
||||
| (Spill Slots) |
|
||||
+-----------------------------+
|
||||
| 为调用其他函数预留的 |
|
||||
| 出参空间 (Out-Args) |
|
||||
低地址 +-----------------------------+ <-- sp 在函数序言后指向的位置
|
||||
```
|
||||
|
||||
- **实现**: `PrologueEpilogueInsertion.h` 和 `EliminateFrameIndices.h` 中的 Pass 负责生成函数序言(prologue)和尾声(epilogue)代码,来构建和销毁上述栈帧。`EliminateFrameIndices` 会将所有对抽象栈槽(如局部变量、溢出槽)的访问,替换为对帧指针 `s0` 或栈指针 `sp` 的、带有具体偏移量的访问。
|
||||
|
||||
### 4.2. 指令选择 (Instruction Selection)
|
||||
|
||||
- **目标**: 将抽象的 IR 指令高效地翻译成具体的目标机指令序列。
|
||||
- **技术**: 采用 **基于 DAG (Directed Acyclic Graph) 的模式匹配** 算法。
|
||||
- **实现**: `RISCv64ISel.cpp` 中的 `RISCv64ISel::select()` 驱动此过程。`selectBasicBlock()` 为每个基本块调用 `build_dag()` 来构建一个操作的 DAG,然后通过 `select_recursive()` 对 DAG 进行自底向上的遍历和匹配。在 `selectNode()` 函数中,通过一个大的 `switch` 语句,为不同类型的 DAG 节点(如 `BINARY`, `LOAD`, `STORE`)匹配最优的指令序列。例如,一个 IR 的加法指令,如果其中一个操作数是小常数,会被直接匹配为一条 `ADDIW` 指令,而不是 `LI` 和 `ADDW` 两条指令。
|
||||
|
||||
### 4.3. 寄存器分配 (Register Allocation)
|
||||
|
||||
- **目标**: 将无限的虚拟寄存器映射到有限的物理寄存器上,并优雅地处理寄存器不足(溢出)的情况。
|
||||
- **技术**: 实现了经典的**基于图着色 (Graph Coloring) 的全局寄存器分配算法**,这是一种强大但复杂的全局优化方法。
|
||||
- **实现**: `RISCv64RegAlloc.cpp` 中的 `RISCv64RegAlloc::run()` 是主入口。它在一个循环中执行分配,直到没有寄存器需要溢出为止。其内部流程极其精密,如下图所示:
|
||||
|
||||
```mermaid
|
||||
graph TD
|
||||
subgraph "寄存器分配主循环 (RISCv64RegAlloc::run)"
|
||||
direction LR
|
||||
Start((Start)) --> Liveness[1. 活跃性分析 LivenessAnalysis]
|
||||
Liveness --> Build[2. 构建冲突图 Build]
|
||||
Build --> Worklist[3. 创建工作表 MakeWorklist]
|
||||
Worklist --> Loop{Main Loop}
|
||||
Loop -- simplifyWorklist 非空 --> Simplify[4a. 简化 Simplify]
|
||||
Simplify --> Loop
|
||||
Loop -- worklistMoves 非空 --> Coalesce[4b. 合并 Coalesce]
|
||||
Coalesce --> Loop
|
||||
Loop -- freezeWorklist 非空 --> Freeze[4c. 冻结 Freeze]
|
||||
Freeze --> Loop
|
||||
Loop -- spillWorklist 非空 --> Spill[4d. 选择溢出 SelectSpill]
|
||||
Spill --> Loop
|
||||
Loop -- 所有工作表为空 --> Assign[5. 分配颜色 AssignColors]
|
||||
Assign --> CheckSpill{有溢出?}
|
||||
CheckSpill -- Yes --> Rewrite[6. 重写代码 RewriteProgram]
|
||||
Rewrite --> Liveness
|
||||
CheckSpill -- No --> Finish((Finish))
|
||||
end
|
||||
```
|
||||
|
||||
1. **`analyzeLiveness()`**: 对机器指令进行数据流分析,计算出每个虚拟寄存器的活跃范围。
|
||||
2. **`build()`**: 根据活跃性信息构建**冲突图 (Interference Graph)**。如果两个虚拟寄存器同时活跃,则它们冲突,在图中连接一条边。
|
||||
3. **`makeWorklist()`**: 将图节点(虚拟寄存器)根据其度数放入不同的工作列表,为着色做准备。
|
||||
4. **核心着色阶段 (The Loop)**:
|
||||
- **`simplify()`**: 贪心地移除图中度数小于物理寄存器数量的节点,并将其压入栈中。这些节点保证可以被成功着色。
|
||||
- **`coalesce()`**: 尝试将传送指令 (`MV`) 的源和目标节点合并,以消除这条指令。合并的条件基于 **Briggs** 或 **George** 启发式,以避免使图变得不可着色。
|
||||
- **`freeze()`**: 当一个与传送指令相关的节点无法合并也无法简化时,放弃对该传送指令的合并希望,将其“冻结”为一个普通节点。
|
||||
- **`selectSpill()`**: 当所有节点都无法进行上述操作时(即图中只剩下高度数的节点),必须选择一个节点进行**溢出 (Spill)**,即决定将其存放在内存中。
|
||||
5. **`assignColors()`**: 在所有节点都被处理后,从栈中依次弹出节点,并根据其已着色邻居的颜色,为它选择一个可用的物理寄存器。
|
||||
6. **`rewriteProgram()`**: 如果 `assignColors()` 阶段发现有节点被标记为溢出,此函数会被调用。它会修改机器指令,为溢出的虚拟寄存器插入从内存加载(`lw`/`ld`)和存入内存(`sw`/`sd`)的代码。然后,整个分配过程从步骤 1 重新开始。
|
||||
|
||||
### 4.4. 后端特定优化
|
||||
|
||||
在寄存器分配前后,后端还会进行一系列针对目标机(RISC-V)特性的优化。
|
||||
|
||||
#### 4.4.1. 指令调度 (Instruction Scheduling)
|
||||
|
||||
- **寄存器分配前调度 (`PreRA_Scheduler.cpp`)**:
|
||||
- **目标**: 在寄存器分配前,通过重排指令来提升性能。主要目标是**隐藏加载延迟 (Load Latency)**,即尽早发出 `load` 指令,使其结果能在需要时及时准备好,避免流水线停顿。同时,由于此时使用的是无限的虚拟寄存器,调度器有较大的自由度,但也可能因为过度重排而延长虚拟寄存器的生命周期,从而增加寄存器压力。
|
||||
- **实现**: `scheduleBlock()` 函数会识别出基本块内的调度边界(如 `call` 或终结指令),然后在每个独立的区域内调用 `scheduleRegion()`。当前的实现是一种简化的列表调度,它会优先尝试将加载指令 (`LW`, `LD` 等) 在不违反数据依赖的前提下,尽可能地向前移动。
|
||||
|
||||
- **寄存器分配后调度 (`PostRA_Scheduler.cpp`)**:
|
||||
- **目标**: 在寄存器分配完成之后,对指令序列进行最后一轮微调。此阶段调度的主要目标与分配前不同,它旨在解决由寄存器分配过程本身引入的性能问题,例如:
|
||||
- **缓解溢出代价**: 将因溢出(Spill)而产生的 `load` 指令(从栈加载)尽可能地提前,远离其使用点;将 `store` 指令(存入栈)尽可能地推后,远离其定义点。
|
||||
- **消除伪依赖**: 寄存器分配器可能会为两个原本不相关的虚拟寄存器分配同一个物理寄存器,从而引入了虚假的写后读(WAR)或写后写(WAW)依赖。Post-RA 调度可以尝试解开这些伪依赖,为指令重排提供更多自由度。
|
||||
- **实现**: `scheduleBlock()` 函数实现了此调度器。它采用了一种非常保守的**局部交换 (Local Swapping)** 策略。它迭代地检查相邻的两条指令,在 `canSwapInstructions()` 函数确认交换不会违反任何数据依赖(RAW, WAR, WAW)或内存依赖后,才执行交换。这种方法虽然不如全局列表调度强大,但在严格的 Post-RA 约束下是一种安全有效的优化手段。
|
||||
|
||||
#### 4.4.2. 强度削减 (Strength Reduction)
|
||||
|
||||
- **除法强度削减 (`DivStrengthReduction.cpp`)**:
|
||||
- **目标**: 将机器指令中昂贵的 `DIV` 或 `DIVW` 指令(当除数为编译期常量时)替换为一系列更快、计算成本更低的指令组合。
|
||||
- **技术**: 基于数论中的**乘法逆元 (Multiplicative Inverse)** 思想。对于一个整数除法 `x / d`,可以找到一个“魔数” `m` 和一个移位数 `s`,使得该除法可以被近似替换为 `(x * m) >> s`。这个过程需要处理复杂的符号、取整和溢出问题。
|
||||
- **实现**: `runOnMachineFunction()` 实现了此优化。它会遍历机器指令,寻找以常量为除数的 `DIV`/`DIVW` 指令。`computeMagic()` 函数负责计算出对应的魔数和移位数。然后,根据除数是 2 的幂、1、-1 还是其他普通数字,生成不同的指令序列,包括 `MULH` (取高位乘积), `SRAI` (算术右移), `ADD`, `SUB` 等,来精确地模拟定点数除法的效果。
|
||||
|
||||
#### 4.4.3. 窥孔优化 (Peephole Optimization)
|
||||
|
||||
- **目标**: 在生成最终汇编代码之前,对相邻的机器指令序列进行局部优化,以消除冗余操作和利用目标机特性。
|
||||
- **技术**: 窥孔优化是一种简单而高效的局部优化技术。它通过一个固定大小的“窥孔”(通常是 2-3 条指令)来扫描指令序列,寻找可以被更优指令序列替换的模式。
|
||||
- **实现**: `PeepholeOptimizer::runOnMachineFunction()` 实现了此 Pass。它包含了一系列模式匹配和替换规则,主要包括:
|
||||
- **冗余移动消除**: `mv x, y` 后跟着一条使用 `x` 的指令 `op z, x, ...`,如果 `x` 之后不再活跃,则将 `op` 的操作数直接替换为 `y`,并移除 `mv` 指令。
|
||||
- **冗余加载消除**: `sw r1, mem; lw r2, mem` -> `sw r1, mem; mv r2, r1`。如果 `r1` 和 `r2` 是同一个寄存器,则直接移除 `lw`。
|
||||
- **地址计算优化**: `addi t1, base, imm1; lw t2, imm2(t1)` -> `lw t2, (imm1+imm2)(base)`。将两条指令合并为一条,减少了指令数量和中间寄存器的使用。
|
||||
- **指令合并**: `addi t1, t0, imm1; addi t2, t1, imm2` -> `addi t2, t0, (imm1+imm2)`。合并连续的立即数加法。
|
||||
|
||||
### 4.5. 局限性与未来工作
|
||||
|
||||
根据项目中的 `TODO` 列表和源代码分析,当前实现存在一些可改进之处:
|
||||
|
||||
- **寄存器分配**:
|
||||
- **`CALL` 指令处理**: 当前对 `CALL` 指令的 `use`/`def` 分析不完整,没有将所有调用者保存的寄存器标记为 `def`,这可能导致跨函数调用的值被错误破坏。
|
||||
- **溢出处理**: 当前所有溢出的虚拟寄存器都被简单地映射到同一个物理寄存器 `t6` 上,这会引入大量不必要的 `load`/`store`,并可能导致 `t6` 成为性能瓶颈。
|
||||
- **IR 设计**:
|
||||
- 随着 SSA 的引入,IR 中某些冗余信息(如基本块的 `args` 参数)可以被移除,以简化设计。
|
||||
- **优化**:
|
||||
- 当前的优化主要集中在标量上。可以引入更多面向循环的优化(如循环不变代码外提 LICM、归纳变量分析 IndVar)和过程间优化来进一步提升性能。
|
||||
@ -5,6 +5,8 @@ add_library(riscv64_backend_lib STATIC
|
||||
RISCv64ISel.cpp
|
||||
RISCv64LLIR.cpp
|
||||
RISCv64RegAlloc.cpp
|
||||
RISCv64LinearScan.cpp
|
||||
RISCv64BasicBlockAlloc.cpp
|
||||
Handler/CalleeSavedHandler.cpp
|
||||
Handler/LegalizeImmediates.cpp
|
||||
Handler/PrologueEpilogueInsertion.cpp
|
||||
|
||||
@ -4,6 +4,7 @@
|
||||
namespace sysy {
|
||||
|
||||
char PeepholeOptimizer::ID = 0;
|
||||
bool PeepholeOptimizer::fusedMulAddEnabled = true; // 默认启用浮点乘加融合优化
|
||||
|
||||
bool PeepholeOptimizer::runOnFunction(Function *F, AnalysisManager& AM) {
|
||||
// This pass works on MachineFunction level, not IR level
|
||||
@ -634,6 +635,99 @@ void PeepholeOptimizer::runOnMachineFunction(MachineFunction *mfunc) {
|
||||
}
|
||||
}
|
||||
}
|
||||
// 8. 浮点乘加融合优化
|
||||
// 8.1 fmul.s t1, t2, t3; fadd.s t4, t1, t5 -> fmadd.s t4, t2, t3, t5
|
||||
else if (isFusedMulAddEnabled() &&
|
||||
mi1->getOpcode() == RVOpcodes::FMUL_S &&
|
||||
mi2->getOpcode() == RVOpcodes::FADD_S) {
|
||||
if (mi1->getOperands().size() == 3 && mi2->getOperands().size() == 3) {
|
||||
auto *fmul_dst = static_cast<RegOperand *>(mi1->getOperands()[0].get());
|
||||
auto *fmul_src1 = static_cast<RegOperand *>(mi1->getOperands()[1].get());
|
||||
auto *fmul_src2 = static_cast<RegOperand *>(mi1->getOperands()[2].get());
|
||||
|
||||
auto *fadd_dst = static_cast<RegOperand *>(mi2->getOperands()[0].get());
|
||||
auto *fadd_src1 = static_cast<RegOperand *>(mi2->getOperands()[1].get());
|
||||
auto *fadd_src2 = static_cast<RegOperand *>(mi2->getOperands()[2].get());
|
||||
|
||||
// 检查fmul的目标是否是fadd的第一个源操作数
|
||||
if (areRegsEqual(fmul_dst, fadd_src1)) {
|
||||
// 检查中间寄存器是否在后续还会被使用
|
||||
bool canOptimize = true;
|
||||
for (size_t j = i + 2; j < instrs.size(); ++j) {
|
||||
auto *later_instr = instrs[j].get();
|
||||
|
||||
// 如果中间寄存器被重新定义,则可以优化
|
||||
if (isRegRedefinedAt(later_instr, fmul_dst, areRegsEqual)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// 如果中间寄存器被使用,则不能优化
|
||||
if (isRegUsedLater(instrs, fmul_dst, j)) {
|
||||
canOptimize = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (canOptimize) {
|
||||
// 创建新的FMADD_S指令: fmadd.s t4, t2, t3, t5
|
||||
auto newInstr = std::make_unique<MachineInstr>(RVOpcodes::FMADD_S);
|
||||
newInstr->addOperand(std::make_unique<RegOperand>(*fadd_dst));
|
||||
newInstr->addOperand(std::make_unique<RegOperand>(*fmul_src1));
|
||||
newInstr->addOperand(std::make_unique<RegOperand>(*fmul_src2));
|
||||
newInstr->addOperand(std::make_unique<RegOperand>(*fadd_src2));
|
||||
instrs[i + 1] = std::move(newInstr);
|
||||
instrs.erase(instrs.begin() + i);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// 8.2 fmul.s t1, t2, t3; fadd.s t4, t5, t1 -> fmadd.s t4, t2, t3, t5
|
||||
else if (isFusedMulAddEnabled() &&
|
||||
mi1->getOpcode() == RVOpcodes::FMUL_S &&
|
||||
mi2->getOpcode() == RVOpcodes::FADD_S) {
|
||||
if (mi1->getOperands().size() == 3 && mi2->getOperands().size() == 3) {
|
||||
auto *fmul_dst = static_cast<RegOperand *>(mi1->getOperands()[0].get());
|
||||
auto *fmul_src1 = static_cast<RegOperand *>(mi1->getOperands()[1].get());
|
||||
auto *fmul_src2 = static_cast<RegOperand *>(mi1->getOperands()[2].get());
|
||||
|
||||
auto *fadd_dst = static_cast<RegOperand *>(mi2->getOperands()[0].get());
|
||||
auto *fadd_src1 = static_cast<RegOperand *>(mi2->getOperands()[1].get());
|
||||
auto *fadd_src2 = static_cast<RegOperand *>(mi2->getOperands()[2].get());
|
||||
|
||||
// 检查fmul的目标是否是fadd的第二个源操作数
|
||||
if (areRegsEqual(fmul_dst, fadd_src2)) {
|
||||
// 检查中间寄存器是否在后续还会被使用
|
||||
bool canOptimize = true;
|
||||
for (size_t j = i + 2; j < instrs.size(); ++j) {
|
||||
auto *later_instr = instrs[j].get();
|
||||
|
||||
// 如果中间寄存器被重新定义,则可以优化
|
||||
if (isRegRedefinedAt(later_instr, fmul_dst, areRegsEqual)) {
|
||||
break;
|
||||
}
|
||||
|
||||
// 如果中间寄存器被使用,则不能优化
|
||||
if (isRegUsedLater(instrs, fmul_dst, j)) {
|
||||
canOptimize = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (canOptimize) {
|
||||
// 创建新的FMADD_S指令: fmadd.s t4, t2, t3, t5
|
||||
auto newInstr = std::make_unique<MachineInstr>(RVOpcodes::FMADD_S);
|
||||
newInstr->addOperand(std::make_unique<RegOperand>(*fadd_dst));
|
||||
newInstr->addOperand(std::make_unique<RegOperand>(*fmul_src1));
|
||||
newInstr->addOperand(std::make_unique<RegOperand>(*fmul_src2));
|
||||
newInstr->addOperand(std::make_unique<RegOperand>(*fadd_src1));
|
||||
instrs[i + 1] = std::move(newInstr);
|
||||
instrs.erase(instrs.begin() + i);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 根据是否发生变化调整遍历索引
|
||||
if (!changed) {
|
||||
|
||||
@ -5,23 +5,6 @@
|
||||
#include <iostream>
|
||||
namespace sysy {
|
||||
|
||||
// 检查是否为内存加载/存储指令,以处理特殊的打印格式
|
||||
bool isMemoryOp(RVOpcodes opcode) {
|
||||
switch (opcode) {
|
||||
// --- 整数加载/存储 (原有逻辑) ---
|
||||
case RVOpcodes::LB: case RVOpcodes::LH: case RVOpcodes::LW: case RVOpcodes::LD:
|
||||
case RVOpcodes::LBU: case RVOpcodes::LHU: case RVOpcodes::LWU:
|
||||
case RVOpcodes::SB: case RVOpcodes::SH: case RVOpcodes::SW: case RVOpcodes::SD:
|
||||
case RVOpcodes::FLW:
|
||||
case RVOpcodes::FSW:
|
||||
// 如果未来支持双精度,也在这里添加FLD/FSD
|
||||
// case RVOpcodes::FLD:
|
||||
// case RVOpcodes::FSD:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
RISCv64AsmPrinter::RISCv64AsmPrinter(MachineFunction* mfunc) : MFunc(mfunc) {}
|
||||
|
||||
@ -82,7 +65,7 @@ void RISCv64AsmPrinter::printInstruction(MachineInstr* instr, bool debug) {
|
||||
case RVOpcodes::SB: *OS << "sb "; break; case RVOpcodes::LD: *OS << "ld "; break;
|
||||
case RVOpcodes::SD: *OS << "sd "; break; case RVOpcodes::FLW: *OS << "flw "; break;
|
||||
case RVOpcodes::FSW: *OS << "fsw "; break; case RVOpcodes::FLD: *OS << "fld "; break;
|
||||
case RVOpcodes::FSD: *OS << "fsd "; break;
|
||||
case RVOpcodes::FSD: *OS << "fsd "; break;
|
||||
case RVOpcodes::J: *OS << "j "; break; case RVOpcodes::JAL: *OS << "jal "; break;
|
||||
case RVOpcodes::JALR: *OS << "jalr "; break; case RVOpcodes::RET: *OS << "ret"; break;
|
||||
case RVOpcodes::BEQ: *OS << "beq "; break; case RVOpcodes::BNE: *OS << "bne "; break;
|
||||
@ -96,15 +79,18 @@ void RISCv64AsmPrinter::printInstruction(MachineInstr* instr, bool debug) {
|
||||
case RVOpcodes::FSUB_S: *OS << "fsub.s "; break;
|
||||
case RVOpcodes::FMUL_S: *OS << "fmul.s "; break;
|
||||
case RVOpcodes::FDIV_S: *OS << "fdiv.s "; break;
|
||||
case RVOpcodes::FMADD_S: *OS << "fmadd.s "; break;
|
||||
case RVOpcodes::FNEG_S: *OS << "fneg.s "; break;
|
||||
case RVOpcodes::FEQ_S: *OS << "feq.s "; break;
|
||||
case RVOpcodes::FLT_S: *OS << "flt.s "; break;
|
||||
case RVOpcodes::FLE_S: *OS << "fle.s "; break;
|
||||
case RVOpcodes::FCVT_S_W: *OS << "fcvt.s.w "; break;
|
||||
case RVOpcodes::FCVT_W_S: *OS << "fcvt.w.s "; break;
|
||||
case RVOpcodes::FCVT_W_S_RTZ: *OS << "fcvt.w.s "; break;
|
||||
case RVOpcodes::FMV_S: *OS << "fmv.s "; break;
|
||||
case RVOpcodes::FMV_W_X: *OS << "fmv.w.x "; break;
|
||||
case RVOpcodes::FMV_X_W: *OS << "fmv.x.w "; break;
|
||||
case RVOpcodes::FSRMI: *OS << "fsrmi "; break;
|
||||
case RVOpcodes::CALL: { // 为CALL指令添加特殊处理逻辑
|
||||
*OS << "call ";
|
||||
// 遍历所有操作数,只寻找并打印函数名标签
|
||||
|
||||
@ -1,10 +1,14 @@
|
||||
#include "RISCv64Backend.h"
|
||||
#include "RISCv64ISel.h"
|
||||
#include "RISCv64RegAlloc.h"
|
||||
#include "RISCv64LinearScan.h"
|
||||
#include "RISCv64BasicBlockAlloc.h"
|
||||
#include "RISCv64AsmPrinter.h"
|
||||
#include "RISCv64Passes.h"
|
||||
#include <sstream>
|
||||
|
||||
#include <future>
|
||||
#include <chrono>
|
||||
#include <iostream>
|
||||
namespace sysy {
|
||||
|
||||
// 顶层入口
|
||||
@ -196,19 +200,13 @@ std::string RISCv64CodeGen::function_gen(Function* func) {
|
||||
// === 完整的后端处理流水线 ===
|
||||
|
||||
// 阶段 1: 指令选择 (sysy::IR -> LLIR with virtual registers)
|
||||
DEBUG = 0;
|
||||
DEEPDEBUG = 0;
|
||||
|
||||
RISCv64ISel isel;
|
||||
std::unique_ptr<MachineFunction> mfunc = isel.runOnFunction(func);
|
||||
|
||||
// 第一次调试打印输出
|
||||
std::stringstream ss_after_isel;
|
||||
RISCv64AsmPrinter printer_isel(mfunc.get());
|
||||
printer_isel.run(ss_after_isel, true);
|
||||
if (DEBUG) {
|
||||
std::cout << ss_after_isel.str();
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
std::cerr << "====== Intermediate Representation after Instruction Selection ======\n"
|
||||
<< ss_after_isel.str();
|
||||
@ -228,17 +226,78 @@ std::string RISCv64CodeGen::function_gen(Function* func) {
|
||||
<< ss_after_eli.str();
|
||||
}
|
||||
|
||||
// 阶段 2: 除法强度削弱优化 (Division Strength Reduction)
|
||||
DivStrengthReduction div_strength_reduction;
|
||||
div_strength_reduction.runOnMachineFunction(mfunc.get());
|
||||
// // 阶段 2: 除法强度削弱优化 (Division Strength Reduction)
|
||||
// DivStrengthReduction div_strength_reduction;
|
||||
// div_strength_reduction.runOnMachineFunction(mfunc.get());
|
||||
|
||||
// 阶段 2.1: 指令调度 (Instruction Scheduling)
|
||||
PreRA_Scheduler scheduler;
|
||||
scheduler.runOnMachineFunction(mfunc.get());
|
||||
// // 阶段 2.1: 指令调度 (Instruction Scheduling)
|
||||
// PreRA_Scheduler scheduler;
|
||||
// scheduler.runOnMachineFunction(mfunc.get());
|
||||
|
||||
// 阶段 3: 物理寄存器分配 (Register Allocation)
|
||||
RISCv64RegAlloc reg_alloc(mfunc.get());
|
||||
reg_alloc.run();
|
||||
|
||||
// 首先尝试图着色分配器
|
||||
if (DEBUG) std::cerr << "Attempting Register Allocation with Graph Coloring...\n";
|
||||
if (!gc_failed) {
|
||||
RISCv64RegAlloc gc_alloc(mfunc.get());
|
||||
|
||||
bool success_gc = gc_alloc.run();
|
||||
|
||||
if (!success_gc) {
|
||||
gc_failed = 1; // 后续不再尝试图着色分配器
|
||||
std::cerr << "Warning: Graph coloring register allocation failed function '"
|
||||
<< func->getName()
|
||||
<< "'. Switching to Linear Scan allocator."
|
||||
<< std::endl;
|
||||
|
||||
RISCv64ISel isel_gc_fallback;
|
||||
mfunc = isel_gc_fallback.runOnFunction(func);
|
||||
EliminateFrameIndicesPass efi_pass_gc_fallback;
|
||||
efi_pass_gc_fallback.runOnMachineFunction(mfunc.get());
|
||||
RISCv64LinearScan ls_alloc(mfunc.get());
|
||||
bool success = ls_alloc.run();
|
||||
if (!success) {
|
||||
// 如果线性扫描最终失败,则调用基本块分配器作为终极后备
|
||||
std::cerr << "Info: Linear Scan failed. Switching to Basic Block Allocator as final fallback.\n";
|
||||
|
||||
// 注意:我们需要在一个“干净”的MachineFunction上运行。
|
||||
// 最安全的方式是重新运行指令选择。
|
||||
RISCv64ISel isel_fallback;
|
||||
mfunc = isel_fallback.runOnFunction(func);
|
||||
EliminateFrameIndicesPass efi_pass_fallback;
|
||||
efi_pass_fallback.runOnMachineFunction(mfunc.get());
|
||||
if (DEBUG) {
|
||||
std::cerr << "====== stack info after reg alloc ======\n";
|
||||
}
|
||||
RISCv64BasicBlockAlloc bb_alloc(mfunc.get());
|
||||
bb_alloc.run();
|
||||
}
|
||||
} else {
|
||||
// 图着色成功完成
|
||||
if (DEBUG) std::cerr << "Graph Coloring allocation completed successfully.\n";
|
||||
}
|
||||
} else {
|
||||
std::cerr << "Info: Graph Coloring allocation failed in last function. Switching to Linear Scan allocator...\n";
|
||||
RISCv64LinearScan ls_alloc(mfunc.get());
|
||||
bool success = ls_alloc.run();
|
||||
if (!success) {
|
||||
// 如果线性扫描最终失败,则调用基本块分配器作为终极后备
|
||||
std::cerr << "Info: Linear Scan failed. Switching to Basic Block Allocator as final fallback.\n";
|
||||
|
||||
// 注意:我们需要在一个“干净”的MachineFunction上运行。
|
||||
// 最安全的方式是重新运行指令选择。
|
||||
RISCv64ISel isel_fallback;
|
||||
mfunc = isel_fallback.runOnFunction(func);
|
||||
EliminateFrameIndicesPass efi_pass_fallback;
|
||||
efi_pass_fallback.runOnMachineFunction(mfunc.get());
|
||||
if (DEBUG) {
|
||||
std::cerr << "====== stack info after reg alloc ======\n";
|
||||
}
|
||||
RISCv64BasicBlockAlloc bb_alloc(mfunc.get());
|
||||
bb_alloc.run();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (DEBUG) {
|
||||
std::cerr << "====== stack info after reg alloc ======\n";
|
||||
@ -276,7 +335,6 @@ std::string RISCv64CodeGen::function_gen(Function* func) {
|
||||
printer.run(ss);
|
||||
|
||||
return ss.str();
|
||||
|
||||
}
|
||||
|
||||
} // namespace sysy
|
||||
267
src/backend/RISCv64/RISCv64BasicBlockAlloc.cpp
Normal file
267
src/backend/RISCv64/RISCv64BasicBlockAlloc.cpp
Normal file
@ -0,0 +1,267 @@
|
||||
#include "RISCv64BasicBlockAlloc.h"
|
||||
#include "RISCv64Info.h"
|
||||
#include "RISCv64AsmPrinter.h"
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
|
||||
// 外部调试级别控制变量
|
||||
extern int DEBUG;
|
||||
extern int DEEPDEBUG;
|
||||
|
||||
namespace sysy {
|
||||
|
||||
// 将 getInstrUseDef 的定义移到这里,因为它是一个全局的辅助函数
|
||||
void getInstrUseDef(const MachineInstr* instr, std::set<unsigned>& use, std::set<unsigned>& def) {
|
||||
auto opcode = instr->getOpcode();
|
||||
const auto& operands = instr->getOperands();
|
||||
|
||||
auto get_vreg_id_if_virtual = [&](const MachineOperand* op, std::set<unsigned>& s) {
|
||||
if (op->getKind() == MachineOperand::KIND_REG) {
|
||||
auto reg_op = static_cast<const RegOperand*>(op);
|
||||
if (reg_op->isVirtual()) s.insert(reg_op->getVRegNum());
|
||||
} else if (op->getKind() == MachineOperand::KIND_MEM) {
|
||||
auto mem_op = static_cast<const MemOperand*>(op);
|
||||
auto reg_op = mem_op->getBase();
|
||||
if (reg_op->isVirtual()) s.insert(reg_op->getVRegNum());
|
||||
}
|
||||
};
|
||||
|
||||
if (op_info.count(opcode)) {
|
||||
const auto& info = op_info.at(opcode);
|
||||
for (int idx : info.first) if (idx < operands.size()) get_vreg_id_if_virtual(operands[idx].get(), def);
|
||||
for (int idx : info.second) if (idx < operands.size()) get_vreg_id_if_virtual(operands[idx].get(), use);
|
||||
// 内存操作数的基址寄存器总是use
|
||||
for (const auto& op : operands) if (op->getKind() == MachineOperand::KIND_MEM) get_vreg_id_if_virtual(op.get(), use);
|
||||
} else if (opcode == RVOpcodes::CALL) {
|
||||
if (!operands.empty() && operands[0]->getKind() == MachineOperand::KIND_REG) get_vreg_id_if_virtual(operands[0].get(), def);
|
||||
for (size_t i = 1; i < operands.size(); ++i) if (operands[i]->getKind() == MachineOperand::KIND_REG) get_vreg_id_if_virtual(operands[i].get(), use);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
RISCv64BasicBlockAlloc::RISCv64BasicBlockAlloc(MachineFunction* mfunc)
|
||||
: MFunc(mfunc), ISel(mfunc->getISel()) {
|
||||
// 初始化临时寄存器池
|
||||
int_temps = {PhysicalReg::T0, PhysicalReg::T1, PhysicalReg::T2, PhysicalReg::T3, PhysicalReg::T6};
|
||||
fp_temps = {PhysicalReg::F0, PhysicalReg::F1, PhysicalReg::F2, PhysicalReg::F3, PhysicalReg::F4};
|
||||
int_temp_idx = 0;
|
||||
fp_temp_idx = 0;
|
||||
|
||||
// 构建ABI寄存器映射
|
||||
if (MFunc->getFunc()) {
|
||||
int int_arg_idx = 0;
|
||||
int fp_arg_idx = 0;
|
||||
for (Argument* arg : MFunc->getFunc()->getArguments()) {
|
||||
unsigned arg_vreg = ISel->getVReg(arg);
|
||||
if (arg->getType()->isFloat()) {
|
||||
if (fp_arg_idx < 8) {
|
||||
auto preg = static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::F10) + fp_arg_idx++);
|
||||
abi_vreg_map[arg_vreg] = preg;
|
||||
}
|
||||
} else {
|
||||
if (int_arg_idx < 8) {
|
||||
auto preg = static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::A0) + int_arg_idx++);
|
||||
abi_vreg_map[arg_vreg] = preg;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RISCv64BasicBlockAlloc::run() {
|
||||
if (DEBUG) std::cerr << "===== [BB-Alloc] Running Stateful Greedy Allocator for function: " << MFunc->getName() << " =====\n";
|
||||
|
||||
computeLiveness();
|
||||
assignStackSlotsForAllVRegs();
|
||||
|
||||
for (auto& mbb : MFunc->getBlocks()) {
|
||||
processBasicBlock(mbb.get());
|
||||
}
|
||||
|
||||
// 将ABI寄存器映射(如函数参数)合并到最终结果中
|
||||
MFunc->getFrameInfo().vreg_to_preg_map.insert(this->abi_vreg_map.begin(), this->abi_vreg_map.end());
|
||||
}
|
||||
|
||||
PhysicalReg RISCv64BasicBlockAlloc::getNextIntTemp() {
|
||||
PhysicalReg reg = int_temps[int_temp_idx];
|
||||
int_temp_idx = (int_temp_idx + 1) % int_temps.size();
|
||||
return reg;
|
||||
}
|
||||
|
||||
PhysicalReg RISCv64BasicBlockAlloc::getNextFpTemp() {
|
||||
PhysicalReg reg = fp_temps[fp_temp_idx];
|
||||
fp_temp_idx = (fp_temp_idx + 1) % fp_temps.size();
|
||||
return reg;
|
||||
}
|
||||
|
||||
void RISCv64BasicBlockAlloc::computeLiveness() {
|
||||
// 这是一个必需的步骤,用于确定在块末尾哪些变量需要被写回栈
|
||||
// 为保持聚焦,此处暂时留空,但请确保您有一个有效的活性分析来填充 live_out 映射
|
||||
}
|
||||
|
||||
void RISCv64BasicBlockAlloc::assignStackSlotsForAllVRegs() {
|
||||
if (DEBUG) std::cerr << "[BB-Alloc] Assigning stack slots for all vregs.\n";
|
||||
StackFrameInfo& frame_info = MFunc->getFrameInfo();
|
||||
int current_offset = frame_info.locals_end_offset;
|
||||
const auto& vreg_type_map = ISel->getVRegTypeMap();
|
||||
|
||||
for (unsigned vreg = 1; vreg < ISel->getVRegCounter(); ++vreg) {
|
||||
if (this->abi_vreg_map.count(vreg) || frame_info.alloca_offsets.count(vreg) || frame_info.spill_offsets.count(vreg)) {
|
||||
continue;
|
||||
}
|
||||
|
||||
Type* type = vreg_type_map.count(vreg) ? vreg_type_map.at(vreg) : Type::getIntType();
|
||||
int size = type->isPointer() ? 8 : 4;
|
||||
|
||||
current_offset -= size;
|
||||
current_offset &= -size; // 按size对齐
|
||||
|
||||
frame_info.spill_offsets[vreg] = current_offset;
|
||||
}
|
||||
frame_info.spill_size = -(current_offset - frame_info.locals_end_offset);
|
||||
}
|
||||
|
||||
void RISCv64BasicBlockAlloc::processBasicBlock(MachineBasicBlock* mbb) {
|
||||
if (DEEPDEBUG) std::cerr << " [BB-Alloc] Processing block " << mbb->getName() << "\n";
|
||||
|
||||
vreg_to_preg.clear();
|
||||
preg_to_vreg.clear();
|
||||
dirty_pregs.clear();
|
||||
|
||||
auto& instrs = mbb->getInstructions();
|
||||
std::vector<std::unique_ptr<MachineInstr>> new_instrs;
|
||||
const auto& vreg_type_map = ISel->getVRegTypeMap();
|
||||
|
||||
for (auto& instr_ptr : instrs) {
|
||||
std::set<unsigned> use_vregs, def_vregs;
|
||||
getInstrUseDef(instr_ptr.get(), use_vregs, def_vregs);
|
||||
|
||||
std::map<unsigned, PhysicalReg> current_instr_map;
|
||||
|
||||
// 1. 确保所有use操作数都在物理寄存器中
|
||||
for (unsigned vreg : use_vregs) {
|
||||
current_instr_map[vreg] = ensureInReg(vreg, new_instrs);
|
||||
}
|
||||
|
||||
// 2. 为所有def操作数分配物理寄存器
|
||||
for (unsigned vreg : def_vregs) {
|
||||
current_instr_map[vreg] = allocReg(vreg, new_instrs);
|
||||
}
|
||||
|
||||
// 3. 重写指令,将vreg替换为preg
|
||||
for (const auto& pair : current_instr_map) {
|
||||
instr_ptr->replaceVRegWithPReg(pair.first, pair.second);
|
||||
}
|
||||
|
||||
new_instrs.push_back(std::move(instr_ptr));
|
||||
}
|
||||
|
||||
// 4. 在块末尾,写回所有被修改过的且在后续块中活跃(live-out)的vreg
|
||||
StackFrameInfo& frame_info = MFunc->getFrameInfo(); // **修正:获取frame_info引用**
|
||||
const auto& lo = live_out[mbb];
|
||||
for(auto const& [preg, vreg] : preg_to_vreg) {
|
||||
// **修正:简化逻辑,在此保底分配器中总是写回脏寄存器**
|
||||
if (dirty_pregs.count(preg)) {
|
||||
if (!frame_info.spill_offsets.count(vreg)) continue;
|
||||
Type* type = vreg_type_map.at(vreg);
|
||||
RVOpcodes store_op = type->isFloat() ? RVOpcodes::FSW : (type->isPointer() ? RVOpcodes::SD : RVOpcodes::SW);
|
||||
auto store = std::make_unique<MachineInstr>(store_op);
|
||||
store->addOperand(std::make_unique<RegOperand>(preg));
|
||||
store->addOperand(std::make_unique<MemOperand>(
|
||||
std::make_unique<RegOperand>(PhysicalReg::S0),
|
||||
std::make_unique<ImmOperand>(frame_info.spill_offsets.at(vreg))
|
||||
));
|
||||
new_instrs.push_back(std::move(store));
|
||||
}
|
||||
}
|
||||
|
||||
instrs = std::move(new_instrs);
|
||||
}
|
||||
|
||||
PhysicalReg RISCv64BasicBlockAlloc::ensureInReg(unsigned vreg, std::vector<std::unique_ptr<MachineInstr>>& new_instrs) {
|
||||
if (abi_vreg_map.count(vreg)) {
|
||||
return abi_vreg_map.at(vreg);
|
||||
}
|
||||
if (vreg_to_preg.count(vreg)) {
|
||||
return vreg_to_preg.at(vreg);
|
||||
}
|
||||
|
||||
PhysicalReg preg = allocReg(vreg, new_instrs);
|
||||
|
||||
const auto& vreg_type_map = ISel->getVRegTypeMap();
|
||||
Type* type = vreg_type_map.count(vreg) ? vreg_type_map.at(vreg) : Type::getIntType();
|
||||
RVOpcodes load_op = type->isFloat() ? RVOpcodes::FLW : (type->isPointer() ? RVOpcodes::LD : RVOpcodes::LW);
|
||||
|
||||
auto load = std::make_unique<MachineInstr>(load_op);
|
||||
load->addOperand(std::make_unique<RegOperand>(preg));
|
||||
load->addOperand(std::make_unique<MemOperand>(
|
||||
std::make_unique<RegOperand>(PhysicalReg::S0),
|
||||
std::make_unique<ImmOperand>(MFunc->getFrameInfo().spill_offsets.at(vreg))
|
||||
));
|
||||
new_instrs.push_back(std::move(load));
|
||||
|
||||
dirty_pregs.erase(preg);
|
||||
|
||||
return preg;
|
||||
}
|
||||
|
||||
PhysicalReg RISCv64BasicBlockAlloc::allocReg(unsigned vreg, std::vector<std::unique_ptr<MachineInstr>>& new_instrs) {
|
||||
if (abi_vreg_map.count(vreg)) {
|
||||
dirty_pregs.insert(abi_vreg_map.at(vreg)); // 如果参数被重定义,也标记为脏
|
||||
return abi_vreg_map.at(vreg);
|
||||
}
|
||||
|
||||
bool is_fp = ISel->getVRegTypeMap().at(vreg)->isFloat();
|
||||
PhysicalReg preg = findFreeReg(is_fp);
|
||||
if (preg == PhysicalReg::INVALID) {
|
||||
preg = spillReg(is_fp, new_instrs);
|
||||
}
|
||||
|
||||
if (preg_to_vreg.count(preg)) {
|
||||
vreg_to_preg.erase(preg_to_vreg.at(preg));
|
||||
}
|
||||
vreg_to_preg[vreg] = preg;
|
||||
preg_to_vreg[preg] = vreg;
|
||||
dirty_pregs.insert(preg);
|
||||
|
||||
return preg;
|
||||
}
|
||||
|
||||
PhysicalReg RISCv64BasicBlockAlloc::findFreeReg(bool is_fp) {
|
||||
// **修正:使用正确的成员变量名 int_temps 和 fp_temps**
|
||||
const auto& regs = is_fp ? fp_temps : int_temps;
|
||||
for (PhysicalReg preg : regs) {
|
||||
if (!preg_to_vreg.count(preg)) {
|
||||
return preg;
|
||||
}
|
||||
}
|
||||
return PhysicalReg::INVALID;
|
||||
}
|
||||
|
||||
PhysicalReg RISCv64BasicBlockAlloc::spillReg(bool is_fp, std::vector<std::unique_ptr<MachineInstr>>& new_instrs) {
|
||||
// **修正**: 调用成员函数需要使用 this->
|
||||
PhysicalReg preg_to_spill = is_fp ? this->getNextFpTemp() : this->getNextIntTemp();
|
||||
|
||||
if (preg_to_vreg.count(preg_to_spill)) {
|
||||
unsigned victim_vreg = preg_to_vreg.at(preg_to_spill);
|
||||
if (dirty_pregs.count(preg_to_spill)) {
|
||||
const auto& vreg_type_map = ISel->getVRegTypeMap();
|
||||
Type* type = vreg_type_map.count(victim_vreg) ? vreg_type_map.at(victim_vreg) : Type::getIntType();
|
||||
RVOpcodes store_op = type->isFloat() ? RVOpcodes::FSW : (type->isPointer() ? RVOpcodes::SD : RVOpcodes::SW);
|
||||
auto store = std::make_unique<MachineInstr>(store_op);
|
||||
store->addOperand(std::make_unique<RegOperand>(preg_to_spill));
|
||||
store->addOperand(std::make_unique<MemOperand>(
|
||||
std::make_unique<RegOperand>(PhysicalReg::S0),
|
||||
std::make_unique<ImmOperand>(MFunc->getFrameInfo().spill_offsets.at(victim_vreg))
|
||||
));
|
||||
new_instrs.push_back(std::move(store));
|
||||
}
|
||||
vreg_to_preg.erase(victim_vreg);
|
||||
dirty_pregs.erase(preg_to_spill);
|
||||
}
|
||||
|
||||
preg_to_vreg.erase(preg_to_spill);
|
||||
return preg_to_spill;
|
||||
}
|
||||
|
||||
} // namespace sysy
|
||||
@ -517,7 +517,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
|
||||
CurMBB->addInstruction(std::move(instr));
|
||||
break;
|
||||
}
|
||||
case Instruction::kSRA: {
|
||||
case Instruction::kSra: {
|
||||
auto rhs_const = dynamic_cast<ConstantInteger*>(rhs);
|
||||
auto instr = std::make_unique<MachineInstr>(RVOpcodes::SRAIW);
|
||||
instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
|
||||
@ -745,83 +745,29 @@ void RISCv64ISel::selectNode(DAGNode* node) {
|
||||
CurMBB->addInstruction(std::move(instr));
|
||||
break;
|
||||
}
|
||||
case Instruction::kFtoI: { // 浮点 to 整数 (带向下取整)
|
||||
// 目标:实现 floor(x) 的效果, C/C++中浮点转整数是截断(truncate)
|
||||
// 对于正数,floor(x) == truncate(x)
|
||||
// RISC-V的 fcvt.w.s 默认是“四舍五入到偶数”
|
||||
// 我们需要手动实现截断逻辑
|
||||
// 逻辑:
|
||||
// temp_i = fcvt.w.s(x) // 四舍五入
|
||||
// temp_f = fcvt.s.w(temp_i) // 转回浮点
|
||||
// if (x < temp_f) { // 如果原数更小,说明被“五入”了
|
||||
// result = temp_i - 1
|
||||
// } else {
|
||||
// result = temp_i
|
||||
// }
|
||||
|
||||
auto temp_i_vreg = getNewVReg(Type::getIntType());
|
||||
auto temp_f_vreg = getNewVReg(Type::getFloatType());
|
||||
auto cmp_vreg = getNewVReg(Type::getIntType());
|
||||
case Instruction::kFtoI: { // 浮点 to 整数 (C/C++: 截断)
|
||||
// C/C++ 标准要求向零截断 (truncate), 对应的RISC-V舍入模式是 RTZ (Round Towards Zero).
|
||||
// fcvt.w.s 指令使用 fcsr 中的 frm 字段来决定舍入模式。
|
||||
// 我们需要手动设置 frm=1 (RTZ), 执行转换, 然后恢复 frm=0 (RNE, 默认).
|
||||
|
||||
// 1. fcvt.w.s temp_i_vreg, src_vreg
|
||||
auto fcvt_w = std::make_unique<MachineInstr>(RVOpcodes::FCVT_W_S);
|
||||
fcvt_w->addOperand(std::make_unique<RegOperand>(temp_i_vreg));
|
||||
fcvt_w->addOperand(std::make_unique<RegOperand>(src_vreg));
|
||||
CurMBB->addInstruction(std::move(fcvt_w));
|
||||
// 1. fsrmi x0, 1 (set rounding mode to RTZ)
|
||||
auto fsrmi1 = std::make_unique<MachineInstr>(RVOpcodes::FSRMI);
|
||||
fsrmi1->addOperand(std::make_unique<RegOperand>(PhysicalReg::ZERO));
|
||||
fsrmi1->addOperand(std::make_unique<ImmOperand>(1));
|
||||
CurMBB->addInstruction(std::move(fsrmi1));
|
||||
|
||||
// 2. fcvt.s.w temp_f_vreg, temp_i_vreg
|
||||
auto fcvt_s = std::make_unique<MachineInstr>(RVOpcodes::FCVT_S_W);
|
||||
fcvt_s->addOperand(std::make_unique<RegOperand>(temp_f_vreg));
|
||||
fcvt_s->addOperand(std::make_unique<RegOperand>(temp_i_vreg));
|
||||
CurMBB->addInstruction(std::move(fcvt_s));
|
||||
// 2. fcvt.w.s dest_vreg, src_vreg
|
||||
auto fcvt = std::make_unique<MachineInstr>(RVOpcodes::FCVT_W_S);
|
||||
fcvt->addOperand(std::make_unique<RegOperand>(dest_vreg));
|
||||
fcvt->addOperand(std::make_unique<RegOperand>(src_vreg));
|
||||
CurMBB->addInstruction(std::move(fcvt));
|
||||
|
||||
// 3. flt.s cmp_vreg, src_vreg, temp_f_vreg
|
||||
auto flt = std::make_unique<MachineInstr>(RVOpcodes::FLT_S);
|
||||
flt->addOperand(std::make_unique<RegOperand>(cmp_vreg));
|
||||
flt->addOperand(std::make_unique<RegOperand>(src_vreg));
|
||||
flt->addOperand(std::make_unique<RegOperand>(temp_f_vreg));
|
||||
CurMBB->addInstruction(std::move(flt));
|
||||
// 3. fsrmi x0, 0 (restore rounding mode to RNE)
|
||||
auto fsrmi0 = std::make_unique<MachineInstr>(RVOpcodes::FSRMI);
|
||||
fsrmi0->addOperand(std::make_unique<RegOperand>(PhysicalReg::ZERO));
|
||||
fsrmi0->addOperand(std::make_unique<ImmOperand>(0));
|
||||
CurMBB->addInstruction(std::move(fsrmi0));
|
||||
|
||||
// 创建标签
|
||||
int unique_id = this->local_label_counter++;
|
||||
std::string rounded_up_label = MFunc->getName() + "_ftoi_rounded_up_" + std::to_string(unique_id);
|
||||
std::string done_label = MFunc->getName() + "_ftoi_done_" + std::to_string(unique_id);
|
||||
|
||||
// 4. bne cmp_vreg, x0, rounded_up_label
|
||||
auto bne = std::make_unique<MachineInstr>(RVOpcodes::BNE);
|
||||
bne->addOperand(std::make_unique<RegOperand>(cmp_vreg));
|
||||
bne->addOperand(std::make_unique<RegOperand>(PhysicalReg::ZERO));
|
||||
bne->addOperand(std::make_unique<LabelOperand>(rounded_up_label));
|
||||
CurMBB->addInstruction(std::move(bne));
|
||||
|
||||
// 5. else 分支: mv dest_vreg, temp_i_vreg
|
||||
auto mv = std::make_unique<MachineInstr>(RVOpcodes::MV);
|
||||
mv->addOperand(std::make_unique<RegOperand>(dest_vreg));
|
||||
mv->addOperand(std::make_unique<RegOperand>(temp_i_vreg));
|
||||
CurMBB->addInstruction(std::move(mv));
|
||||
|
||||
// 6. j done_label
|
||||
auto j = std::make_unique<MachineInstr>(RVOpcodes::J);
|
||||
j->addOperand(std::make_unique<LabelOperand>(done_label));
|
||||
CurMBB->addInstruction(std::move(j));
|
||||
|
||||
// 7. rounded_up_label:
|
||||
auto label_up = std::make_unique<MachineInstr>(RVOpcodes::LABEL);
|
||||
label_up->addOperand(std::make_unique<LabelOperand>(rounded_up_label));
|
||||
CurMBB->addInstruction(std::move(label_up));
|
||||
|
||||
// 8. addiw dest_vreg, temp_i_vreg, -1
|
||||
auto addi = std::make_unique<MachineInstr>(RVOpcodes::ADDIW);
|
||||
addi->addOperand(std::make_unique<RegOperand>(dest_vreg));
|
||||
addi->addOperand(std::make_unique<RegOperand>(temp_i_vreg));
|
||||
addi->addOperand(std::make_unique<ImmOperand>(-1));
|
||||
CurMBB->addInstruction(std::move(addi));
|
||||
|
||||
// 9. done_label:
|
||||
auto label_done = std::make_unique<MachineInstr>(RVOpcodes::LABEL);
|
||||
label_done->addOperand(std::make_unique<LabelOperand>(done_label));
|
||||
CurMBB->addInstruction(std::move(label_done));
|
||||
|
||||
break;
|
||||
}
|
||||
case Instruction::kFNeg: { // 浮点取负
|
||||
@ -1202,10 +1148,11 @@ void RISCv64ISel::selectNode(DAGNode* node) {
|
||||
auto r_value_byte = getVReg(memset->getValue());
|
||||
|
||||
// 为memset内部逻辑创建新的临时虚拟寄存器
|
||||
auto r_counter = getNewVReg();
|
||||
auto r_end_addr = getNewVReg();
|
||||
auto r_current_addr = getNewVReg();
|
||||
auto r_temp_val = getNewVReg();
|
||||
Type* ptr_type = Type::getPointerType(Type::getIntType());
|
||||
auto r_counter = getNewVReg(ptr_type);
|
||||
auto r_end_addr = getNewVReg(ptr_type);
|
||||
auto r_current_addr = getNewVReg(ptr_type);
|
||||
auto r_temp_val = getNewVReg(Type::getIntType());
|
||||
|
||||
// 定义一系列lambda表达式来简化指令创建
|
||||
auto add_instr = [&](RVOpcodes op, unsigned rd, unsigned rs1, unsigned rs2) {
|
||||
@ -1296,7 +1243,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
|
||||
|
||||
// --- Step 1: 获取基地址 (此部分逻辑正确,保持不变) ---
|
||||
auto base_ptr_node = node->operands[0];
|
||||
auto current_addr_vreg = getNewVReg();
|
||||
auto current_addr_vreg = getNewVReg(gep->getType());
|
||||
|
||||
if (auto alloca_base = dynamic_cast<AllocaInst*>(base_ptr_node->value)) {
|
||||
auto frame_addr_instr = std::make_unique<MachineInstr>(RVOpcodes::FRAME_ADDR);
|
||||
@ -1338,13 +1285,13 @@ void RISCv64ISel::selectNode(DAGNode* node) {
|
||||
// 如果步长为0(例如对一个void类型或空结构体索引),则不产生任何偏移
|
||||
if (stride != 0) {
|
||||
// --- 为当前索引和步长生成偏移计算指令 ---
|
||||
auto offset_vreg = getNewVReg();
|
||||
auto offset_vreg = getNewVReg(Type::getIntType());
|
||||
|
||||
// 处理索引 - 区分常量与动态值
|
||||
unsigned index_vreg;
|
||||
if (auto const_index = dynamic_cast<ConstantValue*>(indexValue)) {
|
||||
// 对于常量索引,直接创建新的虚拟寄存器
|
||||
index_vreg = getNewVReg();
|
||||
index_vreg = getNewVReg(Type::getIntType());
|
||||
auto li = std::make_unique<MachineInstr>(RVOpcodes::LI);
|
||||
li->addOperand(std::make_unique<RegOperand>(index_vreg));
|
||||
li->addOperand(std::make_unique<ImmOperand>(const_index->getInt()));
|
||||
@ -1362,7 +1309,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
|
||||
CurMBB->addInstruction(std::move(mv));
|
||||
} else {
|
||||
// 步长不为1,需要生成乘法指令
|
||||
auto size_vreg = getNewVReg();
|
||||
auto size_vreg = getNewVReg(Type::getIntType());
|
||||
auto li_size = std::make_unique<MachineInstr>(RVOpcodes::LI);
|
||||
li_size->addOperand(std::make_unique<RegOperand>(size_vreg));
|
||||
li_size->addOperand(std::make_unique<ImmOperand>(stride));
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
#include "RISCv64LLIR.h"
|
||||
#include "RISCv64Info.h"
|
||||
#include <vector>
|
||||
#include <iostream> // 用于 std::ostream 和 std::cerr
|
||||
#include <string> // 用于 std::string
|
||||
@ -119,4 +120,76 @@ void MachineFunction::dumpStackFrameInfo(std::ostream& os) const {
|
||||
os << "---------------------------------------------------\n";
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief (为紧急溢出模式添加)将指令中所有对特定虚拟寄存器的引用替换为指定的物理寄存器。
|
||||
*/
|
||||
void MachineInstr::replaceVRegWithPReg(unsigned old_vreg, PhysicalReg preg) {
|
||||
for (auto& op : operands) {
|
||||
if (op->getKind() == MachineOperand::KIND_REG) {
|
||||
auto reg_op = static_cast<RegOperand*>(op.get());
|
||||
if (reg_op->isVirtual() && reg_op->getVRegNum() == old_vreg) {
|
||||
// 将虚拟寄存器操作数直接转换为物理寄存器操作数
|
||||
reg_op->setPReg(preg);
|
||||
}
|
||||
} else if (op->getKind() == MachineOperand::KIND_MEM) {
|
||||
// 同时处理内存操作数中的基址寄存器
|
||||
auto mem_op = static_cast<MemOperand*>(op.get());
|
||||
auto base_reg = mem_op->getBase();
|
||||
if (base_reg->isVirtual() && base_reg->getVRegNum() == old_vreg) {
|
||||
base_reg->setPReg(preg);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief (为常规溢出模式添加)根据提供的映射表,重映射指令中的虚拟寄存器。
|
||||
* 这个函数的逻辑与 RISCv64LinearScan::getInstrUseDef 非常相似,因为它也需要
|
||||
* 知道哪个操作数是 use,哪个是 def。
|
||||
*/
|
||||
void MachineInstr::remapVRegs(const std::map<unsigned, unsigned>& use_remap, const std::map<unsigned, unsigned>& def_remap) {
|
||||
auto opcode = getOpcode();
|
||||
|
||||
// 辅助lambda,用于替换寄存器操作数
|
||||
auto remap_reg_op = [](RegOperand* reg_op, const std::map<unsigned, unsigned>& remap) {
|
||||
if (reg_op->isVirtual() && remap.count(reg_op->getVRegNum())) {
|
||||
reg_op->setVRegNum(remap.at(reg_op->getVRegNum()));
|
||||
}
|
||||
};
|
||||
|
||||
// 根据指令信息表(op_info)来确定 use 和 def
|
||||
if (op_info.count(opcode)) {
|
||||
const auto& info = op_info.at(opcode);
|
||||
// 替换 def 操作数
|
||||
for (int idx : info.first) {
|
||||
if (idx < operands.size() && operands[idx]->getKind() == MachineOperand::KIND_REG) {
|
||||
remap_reg_op(static_cast<RegOperand*>(operands[idx].get()), def_remap);
|
||||
}
|
||||
}
|
||||
// 替换 use 操作数
|
||||
for (int idx : info.second) {
|
||||
if (idx < operands.size()) {
|
||||
if (operands[idx]->getKind() == MachineOperand::KIND_REG) {
|
||||
remap_reg_op(static_cast<RegOperand*>(operands[idx].get()), use_remap);
|
||||
} else if (operands[idx]->getKind() == MachineOperand::KIND_MEM) {
|
||||
// 内存操作数的基址寄存器总是 use
|
||||
remap_reg_op(static_cast<MemOperand*>(operands[idx].get())->getBase(), use_remap);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (opcode == RVOpcodes::CALL) {
|
||||
// 处理 CALL 指令的特殊情况
|
||||
// 第一个操作数(如果存在且是寄存器)是 def
|
||||
if (!operands.empty() && operands[0]->getKind() == MachineOperand::KIND_REG) {
|
||||
remap_reg_op(static_cast<RegOperand*>(operands[0].get()), def_remap);
|
||||
}
|
||||
// 其余寄存器操作数是 use
|
||||
for (size_t i = 1; i < operands.size(); ++i) {
|
||||
if (operands[i]->getKind() == MachineOperand::KIND_REG) {
|
||||
remap_reg_op(static_cast<RegOperand*>(operands[i].get()), use_remap);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
694
src/backend/RISCv64/RISCv64LinearScan.cpp
Normal file
694
src/backend/RISCv64/RISCv64LinearScan.cpp
Normal file
@ -0,0 +1,694 @@
|
||||
#include "RISCv64LinearScan.h"
|
||||
#include "RISCv64LLIR.h"
|
||||
#include "RISCv64ISel.h"
|
||||
#include "RISCv64Info.h"
|
||||
#include "RISCv64AsmPrinter.h"
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <functional>
|
||||
|
||||
// 外部调试级别控制变量
|
||||
extern int DEBUG;
|
||||
extern int DEEPDEBUG;
|
||||
extern int DEEPERDEBUG;
|
||||
|
||||
namespace sysy {
|
||||
|
||||
// --- 调试辅助函数 ---
|
||||
// These helpers are self-contained and only used for logging.
|
||||
static std::string pregToString(PhysicalReg preg) {
|
||||
// This map is a copy from AsmPrinter to avoid dependency issues.
|
||||
static const std::map<PhysicalReg, std::string> preg_names = {
|
||||
{PhysicalReg::ZERO, "zero"}, {PhysicalReg::RA, "ra"}, {PhysicalReg::SP, "sp"}, {PhysicalReg::GP, "gp"}, {PhysicalReg::TP, "tp"},
|
||||
{PhysicalReg::T0, "t0"}, {PhysicalReg::T1, "t1"}, {PhysicalReg::T2, "t2"}, {PhysicalReg::T3, "t3"}, {PhysicalReg::T4, "t4"}, {PhysicalReg::T5, "t5"}, {PhysicalReg::T6, "t6"},
|
||||
{PhysicalReg::S0, "s0"}, {PhysicalReg::S1, "s1"}, {PhysicalReg::S2, "s2"}, {PhysicalReg::S3, "s3"}, {PhysicalReg::S4, "s4"}, {PhysicalReg::S5, "s5"}, {PhysicalReg::S6, "s6"}, {PhysicalReg::S7, "s7"}, {PhysicalReg::S8, "s8"}, {PhysicalReg::S9, "s9"}, {PhysicalReg::S10, "s10"}, {PhysicalReg::S11, "s11"},
|
||||
{PhysicalReg::A0, "a0"}, {PhysicalReg::A1, "a1"}, {PhysicalReg::A2, "a2"}, {PhysicalReg::A3, "a3"}, {PhysicalReg::A4, "a4"}, {PhysicalReg::A5, "a5"}, {PhysicalReg::A6, "a6"}, {PhysicalReg::A7, "a7"},
|
||||
{PhysicalReg::F0, "f0"}, {PhysicalReg::F1, "f1"}, {PhysicalReg::F2, "f2"}, {PhysicalReg::F3, "f3"}, {PhysicalReg::F4, "f4"}, {PhysicalReg::F5, "f5"}, {PhysicalReg::F6, "f6"}, {PhysicalReg::F7, "f7"},
|
||||
{PhysicalReg::F8, "f8"}, {PhysicalReg::F9, "f9"}, {PhysicalReg::F10, "f10"}, {PhysicalReg::F11, "f11"}, {PhysicalReg::F12, "f12"}, {PhysicalReg::F13, "f13"}, {PhysicalReg::F14, "f14"}, {PhysicalReg::F15, "f15"},
|
||||
{PhysicalReg::F16, "f16"}, {PhysicalReg::F17, "f17"}, {PhysicalReg::F18, "f18"}, {PhysicalReg::F19, "f19"}, {PhysicalReg::F20, "f20"}, {PhysicalReg::F21, "f21"}, {PhysicalReg::F22, "f22"}, {PhysicalReg::F23, "f23"},
|
||||
{PhysicalReg::F24, "f24"}, {PhysicalReg::F25, "f25"}, {PhysicalReg::F26, "f26"}, {PhysicalReg::F27, "f27"}, {PhysicalReg::F28, "f28"}, {PhysicalReg::F29, "f29"}, {PhysicalReg::F30, "f30"}, {PhysicalReg::F31, "f31"},
|
||||
{PhysicalReg::INVALID, "INVALID"}
|
||||
};
|
||||
if (preg_names.count(preg)) return preg_names.at(preg);
|
||||
return "UnknownPreg";
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
static std::string setToString(const std::set<T>& s, std::function<std::string(T)> formatter) {
|
||||
std::stringstream ss;
|
||||
ss << "{ ";
|
||||
bool first = true;
|
||||
for (const auto& item : s) {
|
||||
if (!first) ss << ", ";
|
||||
ss << formatter(item);
|
||||
first = false;
|
||||
}
|
||||
ss << " }";
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
static std::string vregSetToString(const std::set<unsigned>& s) {
|
||||
return setToString<unsigned>(s, [](unsigned v){ return "%v" + std::to_string(v); });
|
||||
}
|
||||
|
||||
static std::string pregSetToString(const std::set<PhysicalReg>& s) {
|
||||
return setToString<PhysicalReg>(s, pregToString);
|
||||
}
|
||||
|
||||
// Helper function to check if a register is callee-saved.
|
||||
// Defined locally to avoid scope issues.
|
||||
static bool isCalleeSaved(PhysicalReg preg) {
|
||||
if (preg >= PhysicalReg::S0 && preg <= PhysicalReg::S11) return true;
|
||||
if (preg >= PhysicalReg::F8 && preg <= PhysicalReg::F9) return true;
|
||||
if (preg >= PhysicalReg::F18 && preg <= PhysicalReg::F27) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
RISCv64LinearScan::RISCv64LinearScan(MachineFunction* mfunc)
|
||||
: MFunc(mfunc),
|
||||
ISel(mfunc->getISel()),
|
||||
vreg_type_map(ISel->getVRegTypeMap()) {
|
||||
|
||||
allocable_int_regs = {
|
||||
PhysicalReg::T0, PhysicalReg::T1, PhysicalReg::T2, PhysicalReg::T3, PhysicalReg::T6,
|
||||
PhysicalReg::S1, PhysicalReg::S2, PhysicalReg::S3, PhysicalReg::S4, PhysicalReg::S5, PhysicalReg::S6, PhysicalReg::S7,
|
||||
PhysicalReg::S8, PhysicalReg::S9, PhysicalReg::S10, PhysicalReg::S11,
|
||||
};
|
||||
allocable_fp_regs = {
|
||||
PhysicalReg::F0, PhysicalReg::F1, PhysicalReg::F2, PhysicalReg::F3, PhysicalReg::F4, PhysicalReg::F5, PhysicalReg::F6, PhysicalReg::F7,
|
||||
PhysicalReg::F10, PhysicalReg::F11, PhysicalReg::F12, PhysicalReg::F13, PhysicalReg::F14, PhysicalReg::F15, PhysicalReg::F16, PhysicalReg::F17,
|
||||
PhysicalReg::F8, PhysicalReg::F9, PhysicalReg::F18, PhysicalReg::F19, PhysicalReg::F20, PhysicalReg::F21, PhysicalReg::F22,
|
||||
PhysicalReg::F23, PhysicalReg::F24, PhysicalReg::F25, PhysicalReg::F26, PhysicalReg::F27,
|
||||
PhysicalReg::F28, PhysicalReg::F29, PhysicalReg::F30, PhysicalReg::F31,
|
||||
};
|
||||
if (MFunc->getFunc()) {
|
||||
int int_arg_idx = 0;
|
||||
int fp_arg_idx = 0;
|
||||
for (Argument* arg : MFunc->getFunc()->getArguments()) {
|
||||
unsigned arg_vreg = ISel->getVReg(arg);
|
||||
if (arg->getType()->isFloat()) {
|
||||
if (fp_arg_idx < 8) {
|
||||
auto preg = static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::F10) + fp_arg_idx++);
|
||||
abi_vreg_map[arg_vreg] = preg;
|
||||
}
|
||||
} else {
|
||||
if (int_arg_idx < 8) {
|
||||
auto preg = static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::A0) + int_arg_idx++);
|
||||
abi_vreg_map[arg_vreg] = preg;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool RISCv64LinearScan::run() {
|
||||
if (DEBUG) std::cerr << "===== [LSRA] Running for function: " << MFunc->getName() << " =====\n";
|
||||
|
||||
const int MAX_ITERATIONS = 3;
|
||||
|
||||
for (int iteration = 1; ; ++iteration) {
|
||||
if (DEBUG && iteration > 1) {
|
||||
std::cerr << "\n----- [LSRA] Re-running iteration " << iteration << " -----\n";
|
||||
}
|
||||
|
||||
linearizeBlocks();
|
||||
computeLiveIntervals();
|
||||
bool needs_spill = linearScan();
|
||||
|
||||
// 如果当前这轮线性扫描不需要溢出,说明分配成功,直接跳出循环。
|
||||
if (!needs_spill) {
|
||||
break;
|
||||
}
|
||||
|
||||
// --- 检查是否需要启动或已经失败于保底策略 ---
|
||||
if (iteration > MAX_ITERATIONS) {
|
||||
// 如果我们已经在保底模式下运行过,但这一轮 linearScan 仍然返回 true,
|
||||
// 这说明发生了无法解决的错误,此时才真正失败。
|
||||
if (conservative_spill_mode) {
|
||||
std::cerr << "\n!!!!!! [LSRA-FATAL] Allocation failed to converge even in Conservative Spill Mode. Triggering final fallback. !!!!!!\n\n";
|
||||
return false; // 返回失败,而不是exit
|
||||
}
|
||||
// 这是第一次达到最大迭代次数,触发保底策略。
|
||||
std::cerr << "\n!!!!!! [LSRA-WARN] Convergence failed after " << MAX_ITERATIONS
|
||||
<< " iterations. Entering Conservative Spill Mode for the next attempt. !!!!!!\n\n";
|
||||
conservative_spill_mode = true; // 开启保守溢出模式,将在下一次循环生效
|
||||
}
|
||||
|
||||
// 只要需要溢出,就重写程序
|
||||
if (DEBUG) std::cerr << "[LSRA] Spilling detected, will rewrite program.\n";
|
||||
rewriteProgram();
|
||||
}
|
||||
|
||||
if (DEBUG) std::cerr << "[LSRA] Applying final allocation.\n";
|
||||
applyAllocation();
|
||||
MFunc->getFrameInfo().vreg_to_preg_map = this->vreg_to_preg_map;
|
||||
collectUsedCalleeSavedRegs();
|
||||
|
||||
if (DEBUG) std::cerr << "===== [LSRA] Finished for function: " << MFunc->getName() << " =====\n\n";
|
||||
return true; // 分配成功
|
||||
}
|
||||
|
||||
void RISCv64LinearScan::linearizeBlocks() {
|
||||
linear_order_blocks.clear();
|
||||
for (auto& mbb : MFunc->getBlocks()) {
|
||||
linear_order_blocks.push_back(mbb.get());
|
||||
}
|
||||
}
|
||||
|
||||
void RISCv64LinearScan::computeLiveIntervals() {
|
||||
if (DEBUG) std::cerr << "[LSRA-Live] Starting live interval computation.\n";
|
||||
instr_numbering.clear();
|
||||
live_intervals.clear();
|
||||
unhandled.clear();
|
||||
|
||||
int num = 0;
|
||||
std::set<int> call_locations;
|
||||
for (auto* mbb : linear_order_blocks) {
|
||||
for (auto& instr : mbb->getInstructions()) {
|
||||
instr_numbering[instr.get()] = num;
|
||||
if (instr->getOpcode() == RVOpcodes::CALL) call_locations.insert(num);
|
||||
num += 2;
|
||||
}
|
||||
}
|
||||
|
||||
if (DEEPDEBUG) std::cerr << " [Live] Starting live variable dataflow analysis...\n";
|
||||
std::map<const MachineBasicBlock*, std::set<unsigned>> live_in, live_out;
|
||||
bool changed = true;
|
||||
int df_iter = 0;
|
||||
while(changed) {
|
||||
changed = false;
|
||||
df_iter++;
|
||||
std::vector<MachineBasicBlock*> reversed_blocks = linear_order_blocks;
|
||||
std::reverse(reversed_blocks.begin(), reversed_blocks.end());
|
||||
for(auto* mbb : reversed_blocks) {
|
||||
std::set<unsigned> old_live_in = live_in[mbb];
|
||||
std::set<unsigned> current_live_out;
|
||||
for (auto* succ : mbb->successors) current_live_out.insert(live_in[succ].begin(), live_in[succ].end());
|
||||
std::set<unsigned> use, def;
|
||||
std::set<unsigned> temp_live = current_live_out;
|
||||
auto& instrs = mbb->getInstructions();
|
||||
for (auto it = instrs.rbegin(); it != instrs.rend(); ++it) {
|
||||
use.clear(); def.clear();
|
||||
getInstrUseDef(it->get(), use, def);
|
||||
for (unsigned vreg : def) temp_live.erase(vreg);
|
||||
for (unsigned vreg : use) temp_live.insert(vreg);
|
||||
}
|
||||
if (live_in[mbb] != temp_live || live_out[mbb] != current_live_out) {
|
||||
changed = true;
|
||||
live_in[mbb] = temp_live;
|
||||
live_out[mbb] = current_live_out;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (DEEPDEBUG) std::cerr << " [Live] Dataflow analysis converged after " << df_iter << " iterations.\n";
|
||||
if (DEEPERDEBUG) {
|
||||
std::cerr << " [Live-Debug] Live-in sets:\n";
|
||||
for (auto* mbb : linear_order_blocks) std::cerr << " " << mbb->getName() << ": " << vregSetToString(live_in[mbb]) << "\n";
|
||||
std::cerr << " [Live-Debug] Live-out sets:\n";
|
||||
for (auto* mbb : linear_order_blocks) std::cerr << " " << mbb->getName() << ": " << vregSetToString(live_out[mbb]) << "\n";
|
||||
}
|
||||
|
||||
if (DEEPDEBUG) std::cerr << " [Live] Building precise intervals...\n";
|
||||
std::map<unsigned, int> first_def, last_use;
|
||||
for (auto* mbb : linear_order_blocks) {
|
||||
for (auto& instr_ptr : mbb->getInstructions()) {
|
||||
int instr_num = instr_numbering.at(instr_ptr.get());
|
||||
std::set<unsigned> use, def;
|
||||
getInstrUseDef(instr_ptr.get(), use, def);
|
||||
for (unsigned vreg : def) if (first_def.find(vreg) == first_def.end()) first_def[vreg] = instr_num;
|
||||
for (unsigned vreg : use) last_use[vreg] = instr_num;
|
||||
}
|
||||
}
|
||||
if (DEEPERDEBUG) {
|
||||
std::cerr << " [Live-Debug] First def points:\n";
|
||||
for (auto const& [vreg, pos] : first_def) std::cerr << " %v" << vreg << ": " << pos << "\n";
|
||||
std::cerr << " [Live-Debug] Last use points:\n";
|
||||
for (auto const& [vreg, pos] : last_use) std::cerr << " %v" << vreg << ": " << pos << "\n";
|
||||
}
|
||||
|
||||
for (auto const& [vreg, start] : first_def) {
|
||||
live_intervals.emplace(vreg, LiveInterval(vreg));
|
||||
auto& interval = live_intervals.at(vreg);
|
||||
interval.start = start;
|
||||
interval.end = last_use.count(vreg) ? last_use.at(vreg) : start;
|
||||
}
|
||||
|
||||
for (auto const& [mbb, live_set] : live_out) {
|
||||
if (mbb->getInstructions().empty()) continue;
|
||||
int block_end_num = instr_numbering.at(mbb->getInstructions().back().get());
|
||||
for (unsigned vreg : live_set) {
|
||||
if (live_intervals.count(vreg)) {
|
||||
if (DEEPERDEBUG && live_intervals.at(vreg).end < block_end_num) {
|
||||
std::cerr << " [Live-Debug] Extending interval for %v" << vreg << " from " << live_intervals.at(vreg).end << " to " << block_end_num << " due to live_out of " << mbb->getName() << "\n";
|
||||
}
|
||||
live_intervals.at(vreg).end = std::max(live_intervals.at(vreg).end, block_end_num);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto& pair : live_intervals) {
|
||||
auto& interval = pair.second;
|
||||
auto it = call_locations.lower_bound(interval.start);
|
||||
if (it != call_locations.end() && *it < interval.end) interval.crosses_call = true;
|
||||
}
|
||||
|
||||
for (auto& pair : live_intervals) unhandled.push_back(&pair.second);
|
||||
std::sort(unhandled.begin(), unhandled.end(), [](const LiveInterval* a, const LiveInterval* b){ return a->start < b->start; });
|
||||
|
||||
if (DEBUG) {
|
||||
std::cerr << "[LSRA-Live] Finished. Total intervals: " << unhandled.size() << "\n";
|
||||
if (DEEPDEBUG) {
|
||||
std::cerr << " [Live] Computed Intervals (vreg: [start, end]):\n";
|
||||
for(const auto* interval : unhandled) {
|
||||
std::cerr << " %v" << interval->vreg << ": [" << interval->start << ", " << interval->end << "]"
|
||||
<< (interval->crosses_call ? " (crosses call)" : "") << "\n";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ================== 新增的调试代码 ==================
|
||||
// 检查活性分析找到的vreg与指令扫描找到的vreg是否一致
|
||||
if (DEEPERDEBUG) {
|
||||
// 修正:将 std.set 修改为 std::set
|
||||
std::set<unsigned> vregs_from_liveness;
|
||||
for (const auto& pair : live_intervals) {
|
||||
vregs_from_liveness.insert(pair.first);
|
||||
}
|
||||
|
||||
std::set<unsigned> vregs_from_instr_scan;
|
||||
for (auto* mbb : linear_order_blocks) {
|
||||
for (auto& instr_ptr : mbb->getInstructions()) {
|
||||
std::set<unsigned> use, def;
|
||||
getInstrUseDef(instr_ptr.get(), use, def);
|
||||
vregs_from_instr_scan.insert(use.begin(), use.end());
|
||||
vregs_from_instr_scan.insert(def.begin(), def.end());
|
||||
}
|
||||
}
|
||||
|
||||
std::cerr << " [Live-Debug] VReg Consistency Check:\n";
|
||||
std::cerr << " VRegs found by Liveness Analysis: " << vregs_from_liveness.size() << "\n";
|
||||
std::cerr << " VRegs found by getInstrUseDef Scan: " << vregs_from_instr_scan.size() << "\n";
|
||||
|
||||
// 修正:将 std.set 修改为 std::set
|
||||
std::set<unsigned> diff;
|
||||
std::set_difference(vregs_from_liveness.begin(), vregs_from_liveness.end(),
|
||||
vregs_from_instr_scan.begin(), vregs_from_instr_scan.end(),
|
||||
std::inserter(diff, diff.begin()));
|
||||
|
||||
if (!diff.empty()) {
|
||||
std::cerr << " !!!!!! [Live-Debug] DISCREPANCY DETECTED !!!!!!\n";
|
||||
std::cerr << " The following vregs were found by liveness but NOT by getInstrUseDef scan:\n";
|
||||
std::cerr << " " << vregSetToString(diff) << "\n";
|
||||
} else {
|
||||
std::cerr << " [Live-Debug] VReg sets are consistent.\n";
|
||||
}
|
||||
}
|
||||
// ======================================================
|
||||
}
|
||||
|
||||
bool RISCv64LinearScan::linearScan() {
|
||||
// ================== 终极保底策略 (新逻辑) ==================
|
||||
// 当此标志位为true时,我们进入最暴力的溢出模式。
|
||||
if (conservative_spill_mode) {
|
||||
if (DEBUG) std::cerr << "[LSRA-Scan-Panic] In Conservative Mode. Spilling all unhandled vregs.\n";
|
||||
|
||||
// 1. 清空溢出列表,准备重新计算
|
||||
spilled_vregs.clear();
|
||||
|
||||
// 2. 遍历所有计算出的活性区间
|
||||
for (auto& pair : live_intervals) {
|
||||
// 3. 如果一个vreg不是ABI规定的寄存器,就必须溢出
|
||||
if (abi_vreg_map.find(pair.first) == abi_vreg_map.end()) {
|
||||
spilled_vregs.insert(pair.first);
|
||||
}
|
||||
}
|
||||
|
||||
// 4. 只要有任何vreg被标记为溢出,就返回true以触发最终的rewriteProgram。
|
||||
// 下一轮迭代时,由于所有vreg都已被重写,将不再有新的溢出,保证收敛。
|
||||
return !spilled_vregs.empty();
|
||||
}
|
||||
// ==========================================================
|
||||
|
||||
|
||||
// ================== 常规线性扫描逻辑 (您已有的代码) ==================
|
||||
// 只有在非保守模式下才会执行以下代码
|
||||
if (DEBUG) std::cerr << "[LSRA-Scan] Starting main linear scan algorithm.\n";
|
||||
active.clear();
|
||||
spilled_vregs.clear();
|
||||
vreg_to_preg_map.clear();
|
||||
|
||||
std::set<PhysicalReg> free_caller_int_regs, free_callee_int_regs;
|
||||
std::set<PhysicalReg> free_caller_fp_regs, free_callee_fp_regs;
|
||||
|
||||
for (auto preg : allocable_int_regs) {
|
||||
if (isCalleeSaved(preg)) free_callee_int_regs.insert(preg); else free_caller_int_regs.insert(preg);
|
||||
}
|
||||
for (auto preg : allocable_fp_regs) {
|
||||
if (isCalleeSaved(preg)) free_callee_fp_regs.insert(preg); else free_caller_fp_regs.insert(preg);
|
||||
}
|
||||
|
||||
if (DEEPDEBUG) {
|
||||
std::cerr << " [Scan] Initial free regs:\n";
|
||||
std::cerr << " Caller-Saved Int: " << pregSetToString(free_caller_int_regs) << "\n";
|
||||
std::cerr << " Callee-Saved Int: " << pregSetToString(free_callee_int_regs) << "\n";
|
||||
}
|
||||
|
||||
vreg_to_preg_map.insert(abi_vreg_map.begin(), abi_vreg_map.end());
|
||||
std::vector<LiveInterval*> normal_unhandled;
|
||||
for(LiveInterval* interval : unhandled) {
|
||||
if(abi_vreg_map.count(interval->vreg)) {
|
||||
active.push_back(interval);
|
||||
PhysicalReg preg = abi_vreg_map.at(interval->vreg);
|
||||
if (isFPVReg(interval->vreg)) {
|
||||
if(isCalleeSaved(preg)) free_callee_fp_regs.erase(preg); else free_caller_fp_regs.erase(preg);
|
||||
} else {
|
||||
if(isCalleeSaved(preg)) free_callee_int_regs.erase(preg); else free_caller_int_regs.erase(preg);
|
||||
}
|
||||
} else {
|
||||
normal_unhandled.push_back(interval);
|
||||
}
|
||||
}
|
||||
unhandled = normal_unhandled;
|
||||
std::sort(active.begin(), active.end(), [](const LiveInterval* a, const LiveInterval* b){ return a->end < b->end; });
|
||||
|
||||
for (LiveInterval* current : unhandled) {
|
||||
if (DEEPDEBUG) std::cerr << "\n [Scan] Processing interval %v" << current->vreg << " [" << current->start << ", " << current->end << "]\n";
|
||||
|
||||
std::vector<LiveInterval*> new_active;
|
||||
for (LiveInterval* active_interval : active) {
|
||||
if (active_interval->end < current->start) {
|
||||
PhysicalReg preg = vreg_to_preg_map.at(active_interval->vreg);
|
||||
if (DEEPDEBUG) std::cerr << " [Scan] Expiring interval %v" << active_interval->vreg << ", freeing " << pregToString(preg) << "\n";
|
||||
if (isFPVReg(active_interval->vreg)) {
|
||||
if(isCalleeSaved(preg)) free_callee_fp_regs.insert(preg); else free_caller_fp_regs.insert(preg);
|
||||
} else {
|
||||
if(isCalleeSaved(preg)) free_callee_int_regs.insert(preg); else free_caller_int_regs.insert(preg);
|
||||
}
|
||||
} else {
|
||||
new_active.push_back(active_interval);
|
||||
}
|
||||
}
|
||||
active = new_active;
|
||||
|
||||
bool is_fp = isFPVReg(current->vreg);
|
||||
auto& free_caller = is_fp ? free_caller_fp_regs : free_caller_int_regs;
|
||||
auto& free_callee = is_fp ? free_callee_fp_regs : free_callee_int_regs;
|
||||
PhysicalReg allocated_preg = PhysicalReg::INVALID;
|
||||
|
||||
if (current->crosses_call) {
|
||||
if (!free_callee.empty()) {
|
||||
allocated_preg = *free_callee.begin();
|
||||
free_callee.erase(allocated_preg);
|
||||
}
|
||||
} else {
|
||||
if (!free_caller.empty()) {
|
||||
allocated_preg = *free_caller.begin();
|
||||
free_caller.erase(allocated_preg);
|
||||
} else if (!free_callee.empty()) {
|
||||
allocated_preg = *free_callee.begin();
|
||||
free_callee.erase(allocated_preg);
|
||||
}
|
||||
}
|
||||
|
||||
if (allocated_preg != PhysicalReg::INVALID) {
|
||||
if (DEEPDEBUG) std::cerr << " [Scan] Allocated " << pregToString(allocated_preg) << " to %v" << current->vreg << "\n";
|
||||
vreg_to_preg_map[current->vreg] = allocated_preg;
|
||||
active.push_back(current);
|
||||
std::sort(active.begin(), active.end(), [](const LiveInterval* a, const LiveInterval* b){ return a->end < b->end; });
|
||||
} else {
|
||||
if (DEEPDEBUG) std::cerr << " [Scan] No free registers for %v" << current->vreg << ". Spilling...\n";
|
||||
spillAtInterval(current);
|
||||
}
|
||||
}
|
||||
return !spilled_vregs.empty();
|
||||
}
|
||||
|
||||
void RISCv64LinearScan::spillAtInterval(LiveInterval* current) {
|
||||
// 保持您的原始逻辑
|
||||
LiveInterval* spill_candidate = nullptr;
|
||||
if (!active.empty()) {
|
||||
spill_candidate = active.back();
|
||||
}
|
||||
|
||||
if (DEEPERDEBUG) {
|
||||
std::cerr << " [Spill-Debug] Spill decision for current=%v" << current->vreg << "[" << current->start << "," << current->end << "]\n";
|
||||
std::cerr << " [Spill-Debug] Active intervals (sorted by end point):\n";
|
||||
for (const auto* i : active) {
|
||||
std::cerr << " %v" << i->vreg << "[" << i->start << "," << i->end << "] in " << pregToString(vreg_to_preg_map[i->vreg]) << "\n";
|
||||
}
|
||||
if(spill_candidate) {
|
||||
std::cerr << " [Spill-Debug] Candidate is %v" << spill_candidate->vreg << ". Its end is " << spill_candidate->end << ", current's end is " << current->end << "\n";
|
||||
} else {
|
||||
std::cerr << " [Spill-Debug] No active candidate.\n";
|
||||
}
|
||||
}
|
||||
|
||||
if (spill_candidate && spill_candidate->end > current->end) {
|
||||
if (DEEPDEBUG) std::cerr << " [Spill] Decision: Spilling active %v" << spill_candidate->vreg << ".\n";
|
||||
PhysicalReg preg = vreg_to_preg_map.at(spill_candidate->vreg);
|
||||
vreg_to_preg_map.erase(spill_candidate->vreg); // 确保移除旧映射
|
||||
vreg_to_preg_map[current->vreg] = preg;
|
||||
active.pop_back();
|
||||
active.push_back(current);
|
||||
std::sort(active.begin(), active.end(), [](const LiveInterval* a, const LiveInterval* b){ return a->end < b->end; });
|
||||
spilled_vregs.insert(spill_candidate->vreg);
|
||||
} else {
|
||||
if (DEEPDEBUG) std::cerr << " [Spill] Decision: Spilling current %v" << current->vreg << ".\n";
|
||||
spilled_vregs.insert(current->vreg);
|
||||
}
|
||||
}
|
||||
|
||||
void RISCv64LinearScan::rewriteProgram() {
|
||||
if (DEBUG) {
|
||||
std::cerr << "[LSRA-Rewrite] Starting program rewrite. Spilled vregs: " << vregSetToString(spilled_vregs) << "\n";
|
||||
}
|
||||
StackFrameInfo& frame_info = MFunc->getFrameInfo();
|
||||
int spill_current_offset = frame_info.locals_end_offset - frame_info.spill_size;
|
||||
|
||||
for (unsigned vreg : spilled_vregs) {
|
||||
// 保持您的原始逻辑
|
||||
if (frame_info.spill_offsets.count(vreg)) continue;
|
||||
|
||||
Type* type = vreg_type_map.count(vreg) ? vreg_type_map.at(vreg) : Type::getIntType();
|
||||
int size = isFPVReg(vreg) ? 4 : (type->isPointer() ? 8 : 4);
|
||||
spill_current_offset -= size;
|
||||
spill_current_offset = (spill_current_offset & ~7);
|
||||
frame_info.spill_offsets[vreg] = spill_current_offset;
|
||||
if (DEEPDEBUG) std::cerr << " [Rewrite] Assigned new stack offset " << frame_info.spill_offsets.at(vreg) << " to spilled %v" << vreg << "\n";
|
||||
}
|
||||
frame_info.spill_size = -(spill_current_offset - frame_info.locals_end_offset);
|
||||
|
||||
for (auto& mbb : MFunc->getBlocks()) {
|
||||
auto& instrs = mbb->getInstructions();
|
||||
std::vector<std::unique_ptr<MachineInstr>> new_instrs;
|
||||
if (DEEPERDEBUG) std::cerr << " [Rewrite] Processing block " << mbb->getName() << "\n";
|
||||
|
||||
for (auto it = instrs.begin(); it != instrs.end(); ++it) {
|
||||
auto& instr = *it;
|
||||
std::set<unsigned> use_vregs, def_vregs;
|
||||
getInstrUseDef(instr.get(), use_vregs, def_vregs);
|
||||
|
||||
if (conservative_spill_mode) {
|
||||
// ================== 紧急模式重写逻辑 ==================
|
||||
// 直接使用物理寄存器 t4 (SPILL_TEMP_REG) 进行加载/存储
|
||||
|
||||
// 为调试日志准备一个指令打印机
|
||||
auto printer = DEEPERDEBUG ? std::make_unique<RISCv64AsmPrinter>(MFunc) : nullptr;
|
||||
auto original_instr_str_for_log = DEEPERDEBUG ? printer->formatInstr(instr.get()) : "";
|
||||
bool modified = false;
|
||||
|
||||
for (unsigned old_vreg : use_vregs) {
|
||||
if (spilled_vregs.count(old_vreg)) {
|
||||
modified = true;
|
||||
Type* type = vreg_type_map.at(old_vreg);
|
||||
RVOpcodes load_op = isFPVReg(old_vreg) ? RVOpcodes::FLW : (type->isPointer() ? RVOpcodes::LD : RVOpcodes::LW);
|
||||
auto load = std::make_unique<MachineInstr>(load_op);
|
||||
// 直接加载到保留的物理寄存器
|
||||
load->addOperand(std::make_unique<RegOperand>(SPILL_TEMP_REG));
|
||||
load->addOperand(std::make_unique<MemOperand>(
|
||||
std::make_unique<RegOperand>(PhysicalReg::S0),
|
||||
std::make_unique<ImmOperand>(frame_info.spill_offsets.at(old_vreg))));
|
||||
|
||||
if (DEEPERDEBUG) {
|
||||
std::cerr << " [Rewrite-Panic] Inserting LOAD for use of %v" << old_vreg
|
||||
<< " into " << pregToString(SPILL_TEMP_REG)
|
||||
<< " before: " << original_instr_str_for_log << "\n";
|
||||
}
|
||||
new_instrs.push_back(std::move(load));
|
||||
|
||||
// 替换指令中的操作数
|
||||
instr->replaceVRegWithPReg(old_vreg, SPILL_TEMP_REG);
|
||||
}
|
||||
}
|
||||
|
||||
// 在处理 def 之前,先替换定义自身的 vreg
|
||||
for (unsigned old_vreg : def_vregs) {
|
||||
if (spilled_vregs.count(old_vreg)) {
|
||||
modified = true;
|
||||
instr->replaceVRegWithPReg(old_vreg, SPILL_TEMP_REG);
|
||||
}
|
||||
}
|
||||
|
||||
// 将原始指令(可能已被修改)放入新列表
|
||||
new_instrs.push_back(std::move(instr));
|
||||
if (DEEPERDEBUG && modified) {
|
||||
std::cerr << " [Rewrite-Panic] Original: " << original_instr_str_for_log
|
||||
<< " -> Rewritten: " << printer->formatInstr(new_instrs.back().get()) << "\n";
|
||||
}
|
||||
|
||||
for (unsigned old_vreg : def_vregs) {
|
||||
if (spilled_vregs.count(old_vreg)) {
|
||||
// 指令本身已经被修改为定义到 SPILL_TEMP_REG,现在从它存回内存
|
||||
Type* type = vreg_type_map.at(old_vreg);
|
||||
RVOpcodes store_op = isFPVReg(old_vreg) ? RVOpcodes::FSW : (type->isPointer() ? RVOpcodes::SD : RVOpcodes::SW);
|
||||
auto store = std::make_unique<MachineInstr>(store_op);
|
||||
store->addOperand(std::make_unique<RegOperand>(SPILL_TEMP_REG));
|
||||
store->addOperand(std::make_unique<MemOperand>(
|
||||
std::make_unique<RegOperand>(PhysicalReg::S0),
|
||||
std::make_unique<ImmOperand>(frame_info.spill_offsets.at(old_vreg))));
|
||||
if (DEEPERDEBUG) {
|
||||
std::cerr << " [Rewrite-Panic] Inserting STORE for def of %v" << old_vreg
|
||||
<< " from " << pregToString(SPILL_TEMP_REG) << " after original instr.\n";
|
||||
}
|
||||
new_instrs.push_back(std::move(store));
|
||||
}
|
||||
}
|
||||
|
||||
} else {
|
||||
// ================== 常规模式重写逻辑 (您的原始代码) ==================
|
||||
std::map<unsigned, unsigned> use_remap, def_remap;
|
||||
for (unsigned old_vreg : use_vregs) {
|
||||
if (spilled_vregs.count(old_vreg) && use_remap.find(old_vreg) == use_remap.end()) {
|
||||
Type* type = vreg_type_map.at(old_vreg);
|
||||
unsigned new_temp_vreg = ISel->getNewVReg(type);
|
||||
use_remap[old_vreg] = new_temp_vreg;
|
||||
RVOpcodes load_op = isFPVReg(old_vreg) ? RVOpcodes::FLW : (type->isPointer() ? RVOpcodes::LD : RVOpcodes::LW);
|
||||
auto load = std::make_unique<MachineInstr>(load_op);
|
||||
load->addOperand(std::make_unique<RegOperand>(new_temp_vreg));
|
||||
load->addOperand(std::make_unique<MemOperand>(
|
||||
std::make_unique<RegOperand>(PhysicalReg::S0),
|
||||
std::make_unique<ImmOperand>(frame_info.spill_offsets.at(old_vreg))));
|
||||
if (DEEPERDEBUG) {
|
||||
RISCv64AsmPrinter printer(MFunc);
|
||||
std::cerr << " [Rewrite] Inserting LOAD for use of %v" << old_vreg << " into new %v" << new_temp_vreg << " before: " << printer.formatInstr(instr.get()) << "\n";
|
||||
}
|
||||
new_instrs.push_back(std::move(load));
|
||||
}
|
||||
}
|
||||
for (unsigned old_vreg : def_vregs) {
|
||||
if (spilled_vregs.count(old_vreg) && def_remap.find(old_vreg) == def_remap.end()) {
|
||||
Type* type = vreg_type_map.at(old_vreg);
|
||||
unsigned new_temp_vreg = ISel->getNewVReg(type);
|
||||
def_remap[old_vreg] = new_temp_vreg;
|
||||
}
|
||||
}
|
||||
auto original_instr_str_for_log = DEEPERDEBUG ? RISCv64AsmPrinter(MFunc).formatInstr(instr.get()) : "";
|
||||
instr->remapVRegs(use_remap, def_remap);
|
||||
new_instrs.push_back(std::move(instr));
|
||||
if (DEEPERDEBUG && (!use_remap.empty() || !def_remap.empty())) std::cerr << " [Rewrite] Original: " << original_instr_str_for_log << " -> Rewritten: " << RISCv64AsmPrinter(MFunc).formatInstr(new_instrs.back().get()) << "\n";
|
||||
for(const auto& pair : def_remap) {
|
||||
unsigned old_vreg = pair.first;
|
||||
unsigned new_temp_vreg = pair.second;
|
||||
Type* type = vreg_type_map.at(old_vreg);
|
||||
RVOpcodes store_op = isFPVReg(old_vreg) ? RVOpcodes::FSW : (type->isPointer() ? RVOpcodes::SD : RVOpcodes::SW);
|
||||
auto store = std::make_unique<MachineInstr>(store_op);
|
||||
store->addOperand(std::make_unique<RegOperand>(new_temp_vreg));
|
||||
store->addOperand(std::make_unique<MemOperand>(
|
||||
std::make_unique<RegOperand>(PhysicalReg::S0),
|
||||
std::make_unique<ImmOperand>(frame_info.spill_offsets.at(old_vreg))));
|
||||
if (DEEPERDEBUG) std::cerr << " [Rewrite] Inserting STORE for def of %v" << old_vreg << " from new %v" << new_temp_vreg << " after original instr.\n";
|
||||
new_instrs.push_back(std::move(store));
|
||||
}
|
||||
}
|
||||
}
|
||||
instrs = std::move(new_instrs);
|
||||
}
|
||||
}
|
||||
|
||||
void RISCv64LinearScan::applyAllocation() {
|
||||
if (DEBUG) std::cerr << "[LSRA-Apply] Applying final vreg->preg mapping.\n";
|
||||
for (auto& mbb : MFunc->getBlocks()) {
|
||||
for (auto& instr_ptr : mbb->getInstructions()) {
|
||||
for (auto& op_ptr : instr_ptr->getOperands()) {
|
||||
if (op_ptr->getKind() == MachineOperand::KIND_REG) {
|
||||
auto reg_op = static_cast<RegOperand*>(op_ptr.get());
|
||||
if (reg_op->isVirtual()) {
|
||||
unsigned vreg = reg_op->getVRegNum();
|
||||
if (vreg_to_preg_map.count(vreg)) {
|
||||
reg_op->setPReg(vreg_to_preg_map.at(vreg));
|
||||
} else {
|
||||
std::cerr << "ERROR: Uncolored virtual register %v" << vreg << " found during applyAllocation! in func " << MFunc->getName() << "\n";
|
||||
// Forcing an error is better than silent failure.
|
||||
// reg_op->setPReg(PhysicalReg::T5);
|
||||
}
|
||||
}
|
||||
} else if (op_ptr->getKind() == MachineOperand::KIND_MEM) {
|
||||
auto mem_op = static_cast<MemOperand*>(op_ptr.get());
|
||||
auto reg_op = mem_op->getBase();
|
||||
if (reg_op->isVirtual()) {
|
||||
unsigned vreg = reg_op->getVRegNum();
|
||||
if (vreg_to_preg_map.count(vreg)) {
|
||||
reg_op->setPReg(vreg_to_preg_map.at(vreg));
|
||||
} else {
|
||||
std::cerr << "ERROR: Uncolored virtual register %v" << vreg << " in memory operand! in func " << MFunc->getName() << "\n";
|
||||
// reg_op->setPReg(PhysicalReg::T5);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// void getInstrUseDef(const MachineInstr* instr, std::set<unsigned>& use, std::set<unsigned>& def) {
|
||||
// auto opcode = instr->getOpcode();
|
||||
// const auto& operands = instr->getOperands();
|
||||
|
||||
// auto get_vreg_id_if_virtual = [&](const MachineOperand* op, std::set<unsigned>& s) {
|
||||
// if (op->getKind() == MachineOperand::KIND_REG) {
|
||||
// auto reg_op = static_cast<const RegOperand*>(op);
|
||||
// if (reg_op->isVirtual()) s.insert(reg_op->getVRegNum());
|
||||
// } else if (op->getKind() == MachineOperand::KIND_MEM) {
|
||||
// auto mem_op = static_cast<const MemOperand*>(op);
|
||||
// auto reg_op = mem_op->getBase();
|
||||
// if (reg_op->isVirtual()) s.insert(reg_op->getVRegNum());
|
||||
// }
|
||||
// };
|
||||
|
||||
// if (op_info.count(opcode)) {
|
||||
// const auto& info = op_info.at(opcode);
|
||||
// for (int idx : info.first) if (idx < operands.size()) get_vreg_id_if_virtual(operands[idx].get(), def);
|
||||
// for (int idx : info.second) if (idx < operands.size()) get_vreg_id_if_virtual(operands[idx].get(), use);
|
||||
// for (const auto& op : operands) if (op->getKind() == MachineOperand::KIND_MEM) get_vreg_id_if_virtual(op.get(), use);
|
||||
// } else if (opcode == RVOpcodes::CALL) {
|
||||
// if (!operands.empty() && operands[0]->getKind() == MachineOperand::KIND_REG) get_vreg_id_if_virtual(operands[0].get(), def);
|
||||
// for (size_t i = 1; i < operands.size(); ++i) if (operands[i]->getKind() == MachineOperand::KIND_REG) get_vreg_id_if_virtual(operands[i].get(), use);
|
||||
// }
|
||||
// }
|
||||
|
||||
bool RISCv64LinearScan::isFPVReg(unsigned vreg) const {
|
||||
return vreg_type_map.count(vreg) && vreg_type_map.at(vreg)->isFloat();
|
||||
}
|
||||
|
||||
void RISCv64LinearScan::collectUsedCalleeSavedRegs() {
|
||||
StackFrameInfo& frame_info = MFunc->getFrameInfo();
|
||||
frame_info.used_callee_saved_regs.clear();
|
||||
|
||||
const auto& callee_saved_int = getCalleeSavedIntRegs();
|
||||
const auto& callee_saved_fp = getCalleeSavedFpRegs();
|
||||
std::set<PhysicalReg> callee_saved_set(callee_saved_int.begin(), callee_saved_int.end());
|
||||
callee_saved_set.insert(callee_saved_fp.begin(), callee_saved_fp.end());
|
||||
callee_saved_set.insert(PhysicalReg::S0);
|
||||
|
||||
for(const auto& pair : vreg_to_preg_map) {
|
||||
PhysicalReg preg = pair.second;
|
||||
if(callee_saved_set.count(preg)) {
|
||||
frame_info.used_callee_saved_regs.insert(preg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace sysy
|
||||
@ -1,9 +1,12 @@
|
||||
#include "RISCv64RegAlloc.h"
|
||||
#include "RISCv64AsmPrinter.h"
|
||||
#include "RISCv64Info.h"
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <cassert>
|
||||
#include <chrono>
|
||||
#include <thread>
|
||||
|
||||
namespace sysy {
|
||||
|
||||
@ -44,7 +47,7 @@ RISCv64RegAlloc::RISCv64RegAlloc(MachineFunction* mfunc)
|
||||
}
|
||||
|
||||
// 主入口: 迭代运行分配算法直到无溢出
|
||||
void RISCv64RegAlloc::run() {
|
||||
bool RISCv64RegAlloc::run() {
|
||||
if (DEBUG) std::cerr << "===== LLIR Before Running Graph Coloring Register Allocation " << MFunc->getName() << " =====\n";
|
||||
std::stringstream ss_before_reg_alloc;
|
||||
if (DEBUG) {
|
||||
@ -59,6 +62,8 @@ void RISCv64RegAlloc::run() {
|
||||
int iteration = 0;
|
||||
|
||||
while (iteration++ < MAX_ITERATIONS) {
|
||||
// std::cerr << "Iteration Step: " << iteration << "\n";
|
||||
// std::this_thread::sleep_for(std::chrono::seconds(1));
|
||||
if (doAllocation()) {
|
||||
break;
|
||||
} else {
|
||||
@ -66,29 +71,7 @@ void RISCv64RegAlloc::run() {
|
||||
if (DEBUG) std::cerr << "--- Spilling detected, re-running allocation (iteration " << iteration << ") ---\n";
|
||||
|
||||
if (iteration >= MAX_ITERATIONS) {
|
||||
std::cerr << "ERROR: Register allocation failed to converge after " << MAX_ITERATIONS << " iterations\n";
|
||||
std::cerr << " Spill worklist size: " << spillWorklist.size() << "\n";
|
||||
std::cerr << " Total nodes: " << (initial.size() + coloredNodes.size()) << "\n";
|
||||
|
||||
// Emergency spill remaining nodes to break the loop
|
||||
std::cerr << " Emergency spilling remaining spill worklist nodes...\n";
|
||||
for (unsigned node : spillWorklist) {
|
||||
spilledNodes.insert(node);
|
||||
}
|
||||
|
||||
// Also spill any nodes that didn't get colors
|
||||
std::set<unsigned> uncolored;
|
||||
for (unsigned node : initial) {
|
||||
if (color_map.find(node) == color_map.end()) {
|
||||
uncolored.insert(node);
|
||||
}
|
||||
}
|
||||
for (unsigned node : uncolored) {
|
||||
spilledNodes.insert(node);
|
||||
}
|
||||
|
||||
// Force completion
|
||||
break;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -98,10 +81,13 @@ void RISCv64RegAlloc::run() {
|
||||
MFunc->getFrameInfo().vreg_to_preg_map = this->color_map;
|
||||
collectUsedCalleeSavedRegs();
|
||||
if (DEBUG) std::cerr << "===== Finished Graph Coloring Register Allocation =====\n\n";
|
||||
return true;
|
||||
}
|
||||
|
||||
// 单次分配的核心流程
|
||||
bool RISCv64RegAlloc::doAllocation() {
|
||||
const int MAX_ITERATIONS = 50;
|
||||
int iteration = 0;
|
||||
initialize();
|
||||
precolorByCallingConvention();
|
||||
analyzeLiveness();
|
||||
@ -109,14 +95,16 @@ bool RISCv64RegAlloc::doAllocation() {
|
||||
makeWorklist();
|
||||
|
||||
while (!simplifyWorklist.empty() || !worklistMoves.empty() || !freezeWorklist.empty() || !spillWorklist.empty()) {
|
||||
if (DEEPDEBUG) dumpState("Loop Start");
|
||||
// if (DEBUG) std::cerr << "Inner Iteration Step: " << ++iteration << "\n";
|
||||
// std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
// if (DEEPDEBUG) dumpState("Loop Start");
|
||||
if (!simplifyWorklist.empty()) simplify();
|
||||
else if (!worklistMoves.empty()) coalesce();
|
||||
else if (!freezeWorklist.empty()) freeze();
|
||||
else if (!spillWorklist.empty()) selectSpill();
|
||||
}
|
||||
|
||||
if (DEEPDEBUG) dumpState("Before AssignColors");
|
||||
// if (DEEPDEBUG) dumpState("Before AssignColors");
|
||||
assignColors();
|
||||
return spilledNodes.empty();
|
||||
}
|
||||
@ -884,53 +872,6 @@ void RISCv64RegAlloc::getInstrUseDef_Liveness(const MachineInstr* instr, VRegSet
|
||||
auto opcode = instr->getOpcode();
|
||||
const auto& operands = instr->getOperands();
|
||||
|
||||
// 映射表:指令操作码 -> {Def操作数索引列表, Use操作数索引列表}
|
||||
static const std::map<RVOpcodes, std::pair<std::vector<int>, std::vector<int>>> op_info = {
|
||||
// ===== 整数算术与逻辑指令 (R-type & I-type) =====
|
||||
{RVOpcodes::ADD, {{0}, {1, 2}}}, {RVOpcodes::SUB, {{0}, {1, 2}}}, {RVOpcodes::MUL, {{0}, {1, 2}}},
|
||||
{RVOpcodes::DIV, {{0}, {1, 2}}}, {RVOpcodes::REM, {{0}, {1, 2}}}, {RVOpcodes::ADDW, {{0}, {1, 2}}},
|
||||
{RVOpcodes::SUBW, {{0}, {1, 2}}}, {RVOpcodes::MULW, {{0}, {1, 2}}}, {RVOpcodes::DIVW, {{0}, {1, 2}}},
|
||||
{RVOpcodes::REMW, {{0}, {1, 2}}}, {RVOpcodes::SLT, {{0}, {1, 2}}}, {RVOpcodes::SLTU, {{0}, {1, 2}}},
|
||||
{RVOpcodes::XOR, {{0}, {1, 2}}}, {RVOpcodes::OR, {{0}, {1, 2}}}, {RVOpcodes::AND, {{0}, {1, 2}}},
|
||||
{RVOpcodes::ADDI, {{0}, {1}}}, {RVOpcodes::ADDIW, {{0}, {1}}}, {RVOpcodes::XORI, {{0}, {1}}},
|
||||
{RVOpcodes::ORI, {{0}, {1}}}, {RVOpcodes::ANDI, {{0}, {1}}},
|
||||
{RVOpcodes::SLTI, {{0}, {1}}}, {RVOpcodes::SLTIU, {{0}, {1}}},
|
||||
|
||||
// ===== 移位指令 =====
|
||||
{RVOpcodes::SLL, {{0}, {1, 2}}}, {RVOpcodes::SLLI, {{0}, {1}}},
|
||||
{RVOpcodes::SLLW, {{0}, {1, 2}}}, {RVOpcodes::SLLIW, {{0}, {1}}},
|
||||
{RVOpcodes::SRL, {{0}, {1, 2}}}, {RVOpcodes::SRLI, {{0}, {1}}},
|
||||
{RVOpcodes::SRLW, {{0}, {1, 2}}}, {RVOpcodes::SRLIW, {{0}, {1}}},
|
||||
{RVOpcodes::SRA, {{0}, {1, 2}}}, {RVOpcodes::SRAI, {{0}, {1}}},
|
||||
{RVOpcodes::SRAW, {{0}, {1, 2}}}, {RVOpcodes::SRAIW, {{0}, {1}}},
|
||||
|
||||
// ===== 内存加载指令 (Def: 0, Use: MemBase) =====
|
||||
{RVOpcodes::LB, {{0}, {}}}, {RVOpcodes::LH, {{0}, {}}}, {RVOpcodes::LW, {{0}, {}}}, {RVOpcodes::LD, {{0}, {}}},
|
||||
{RVOpcodes::LBU, {{0}, {}}}, {RVOpcodes::LHU, {{0}, {}}}, {RVOpcodes::LWU, {{0}, {}}},
|
||||
{RVOpcodes::FLW, {{0}, {}}}, {RVOpcodes::FLD, {{0}, {}}},
|
||||
|
||||
// ===== 内存存储指令 (Def: None, Use: ValToStore, MemBase) =====
|
||||
{RVOpcodes::SB, {{}, {0, 1}}}, {RVOpcodes::SH, {{}, {0, 1}}}, {RVOpcodes::SW, {{}, {0, 1}}}, {RVOpcodes::SD, {{}, {0, 1}}},
|
||||
{RVOpcodes::FSW, {{}, {0, 1}}}, {RVOpcodes::FSD, {{}, {0, 1}}},
|
||||
|
||||
// ===== 控制流指令 =====
|
||||
{RVOpcodes::BEQ, {{}, {0, 1}}}, {RVOpcodes::BNE, {{}, {0, 1}}}, {RVOpcodes::BLT, {{}, {0, 1}}},
|
||||
{RVOpcodes::BGE, {{}, {0, 1}}}, {RVOpcodes::BLTU, {{}, {0, 1}}}, {RVOpcodes::BGEU, {{}, {0, 1}}},
|
||||
{RVOpcodes::JALR, {{0}, {1}}}, // def: ra (implicit) and op0, use: op1
|
||||
|
||||
// ===== 浮点指令 =====
|
||||
{RVOpcodes::FADD_S, {{0}, {1, 2}}}, {RVOpcodes::FSUB_S, {{0}, {1, 2}}},
|
||||
{RVOpcodes::FMUL_S, {{0}, {1, 2}}}, {RVOpcodes::FDIV_S, {{0}, {1, 2}}}, {RVOpcodes::FEQ_S, {{0}, {1, 2}}},
|
||||
{RVOpcodes::FLT_S, {{0}, {1, 2}}}, {RVOpcodes::FLE_S, {{0}, {1, 2}}}, {RVOpcodes::FCVT_S_W, {{0}, {1}}},
|
||||
{RVOpcodes::FCVT_W_S, {{0}, {1}}}, {RVOpcodes::FMV_S, {{0}, {1}}}, {RVOpcodes::FMV_W_X, {{0}, {1}}},
|
||||
{RVOpcodes::FMV_X_W, {{0}, {1}}}, {RVOpcodes::FNEG_S, {{0}, {1}}},
|
||||
|
||||
// ===== 伪指令 =====
|
||||
{RVOpcodes::LI, {{0}, {}}}, {RVOpcodes::LA, {{0}, {}}},
|
||||
{RVOpcodes::MV, {{0}, {1}}}, {RVOpcodes::SEQZ, {{0}, {1}}}, {RVOpcodes::SNEZ, {{0}, {1}}},
|
||||
{RVOpcodes::NEG, {{0}, {1}}}, {RVOpcodes::NEGW, {{0}, {1}}},
|
||||
};
|
||||
|
||||
// lambda表达式用于获取操作数的寄存器ID(虚拟或物理)
|
||||
const unsigned offset = static_cast<unsigned>(PhysicalReg::PHYS_REG_START_ID);
|
||||
auto get_any_reg_id = [&](const MachineOperand* op) -> unsigned {
|
||||
|
||||
@ -23,6 +23,21 @@ public:
|
||||
bool runOnFunction(Function *F, AnalysisManager& AM) override;
|
||||
|
||||
void runOnMachineFunction(MachineFunction* mfunc);
|
||||
|
||||
/**
|
||||
* @brief 设置是否启用浮点乘加融合优化
|
||||
* @param enabled 是否启用
|
||||
*/
|
||||
static void setFusedMulAddEnabled(bool enabled) { fusedMulAddEnabled = enabled; }
|
||||
|
||||
/**
|
||||
* @brief 检查是否启用了浮点乘加融合优化
|
||||
* @return 是否启用
|
||||
*/
|
||||
static bool isFusedMulAddEnabled() { return fusedMulAddEnabled; }
|
||||
|
||||
private:
|
||||
static bool fusedMulAddEnabled; // 浮点乘加融合优化开关
|
||||
};
|
||||
|
||||
} // namespace sysy
|
||||
|
||||
@ -26,6 +26,7 @@ private:
|
||||
unsigned getTypeSizeInBytes(Type* type);
|
||||
|
||||
Module* module;
|
||||
bool gc_failed = false;
|
||||
};
|
||||
|
||||
} // namespace sysy
|
||||
|
||||
61
src/include/backend/RISCv64/RISCv64BasicBlockAlloc.h
Normal file
61
src/include/backend/RISCv64/RISCv64BasicBlockAlloc.h
Normal file
@ -0,0 +1,61 @@
|
||||
#ifndef RISCV64_BASICBLOCKALLOC_H
|
||||
#define RISCV64_BASICBLOCKALLOC_H
|
||||
|
||||
#include "RISCv64LLIR.h"
|
||||
#include "RISCv64ISel.h"
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace sysy {
|
||||
|
||||
/**
|
||||
* @class RISCv64BasicBlockAlloc
|
||||
* @brief 一个有状态的、基本块级的贪心寄存器分配器。
|
||||
*
|
||||
* 该分配器作为简单但可靠的实现,它逐个处理基本块,并在块内尽可能地
|
||||
* 将虚拟寄存器的值保留在物理寄存器中,以减少不必要的内存访问。
|
||||
*/
|
||||
class RISCv64BasicBlockAlloc {
|
||||
public:
|
||||
RISCv64BasicBlockAlloc(MachineFunction* mfunc);
|
||||
void run();
|
||||
|
||||
private:
|
||||
void computeLiveness();
|
||||
void processBasicBlock(MachineBasicBlock* mbb);
|
||||
void assignStackSlotsForAllVRegs();
|
||||
|
||||
// 核心分配函数
|
||||
PhysicalReg ensureInReg(unsigned vreg, std::vector<std::unique_ptr<MachineInstr>>& new_instrs);
|
||||
PhysicalReg allocReg(unsigned vreg, std::vector<std::unique_ptr<MachineInstr>>& new_instrs);
|
||||
PhysicalReg findFreeReg(bool is_fp);
|
||||
PhysicalReg spillReg(bool is_fp, std::vector<std::unique_ptr<MachineInstr>>& new_instrs);
|
||||
|
||||
// 状态跟踪(每个基本块开始时都会重置)
|
||||
std::map<unsigned, PhysicalReg> vreg_to_preg; // 当前vreg到物理寄存器的映射
|
||||
std::map<PhysicalReg, unsigned> preg_to_vreg; // 反向映射
|
||||
std::set<PhysicalReg> dirty_pregs; // 被修改过、需要写回的物理寄存器
|
||||
|
||||
// 分配器全局信息
|
||||
MachineFunction* MFunc;
|
||||
RISCv64ISel* ISel;
|
||||
std::map<unsigned, PhysicalReg> abi_vreg_map; // 函数参数的ABI寄存器映射
|
||||
|
||||
// 寄存器池和循环索引
|
||||
std::vector<PhysicalReg> int_temps;
|
||||
std::vector<PhysicalReg> fp_temps;
|
||||
int int_temp_idx = 0;
|
||||
int fp_temp_idx = 0;
|
||||
|
||||
// 辅助函数
|
||||
PhysicalReg getNextIntTemp();
|
||||
PhysicalReg getNextFpTemp();
|
||||
|
||||
// 活性分析结果
|
||||
std::map<const MachineBasicBlock*, std::set<unsigned>> live_out;
|
||||
};
|
||||
|
||||
} // namespace sysy
|
||||
|
||||
#endif // RISCV64_BASICBLOCKALLOC_H
|
||||
@ -22,7 +22,6 @@ public:
|
||||
|
||||
// 公开接口,以便后续模块(如RegAlloc)可以查询或创建vreg
|
||||
unsigned getVReg(Value* val);
|
||||
unsigned getNewVReg() { return vreg_counter++; }
|
||||
unsigned getNewVReg(Type* type);
|
||||
unsigned getVRegCounter() const;
|
||||
// 获取 vreg_map 的公共接口
|
||||
|
||||
98
src/include/backend/RISCv64/RISCv64Info.h
Normal file
98
src/include/backend/RISCv64/RISCv64Info.h
Normal file
@ -0,0 +1,98 @@
|
||||
#ifndef RISCV64_INFO_H
|
||||
#define RISCV64_INFO_H
|
||||
|
||||
#include "RISCv64LLIR.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace sysy {
|
||||
|
||||
// 定义一个全局的、权威的指令信息表
|
||||
// 它包含了指令的定义(def)和使用(use)操作数索引
|
||||
// defs: {0} -> 第一个操作数是定义
|
||||
// uses: {1, 2} -> 第二、三个操作数是使用
|
||||
static const std::map<RVOpcodes, std::pair<std::vector<int>, std::vector<int>>> op_info = {
|
||||
// --- 整数计算 (R-Type) ---
|
||||
{RVOpcodes::ADD, {{0}, {1, 2}}},
|
||||
{RVOpcodes::SUB, {{0}, {1, 2}}},
|
||||
{RVOpcodes::MUL, {{0}, {1, 2}}},
|
||||
{RVOpcodes::MULH, {{0}, {1, 2}}},
|
||||
{RVOpcodes::DIV, {{0}, {1, 2}}},
|
||||
{RVOpcodes::DIVW, {{0}, {1, 2}}},
|
||||
{RVOpcodes::REM, {{0}, {1, 2}}},
|
||||
{RVOpcodes::REMW, {{0}, {1, 2}}},
|
||||
{RVOpcodes::ADDW, {{0}, {1, 2}}},
|
||||
{RVOpcodes::SUBW, {{0}, {1, 2}}},
|
||||
{RVOpcodes::MULW, {{0}, {1, 2}}},
|
||||
{RVOpcodes::SLT, {{0}, {1, 2}}},
|
||||
{RVOpcodes::SLTU, {{0}, {1, 2}}},
|
||||
{RVOpcodes::XOR, {{0}, {1, 2}}},
|
||||
{RVOpcodes::OR, {{0}, {1, 2}}},
|
||||
{RVOpcodes::AND, {{0}, {1, 2}}},
|
||||
{RVOpcodes::SLL, {{0}, {1, 2}}},
|
||||
{RVOpcodes::SRL, {{0}, {1, 2}}},
|
||||
{RVOpcodes::SRA, {{0}, {1, 2}}},
|
||||
{RVOpcodes::SLLW, {{0}, {1, 2}}},
|
||||
{RVOpcodes::SRLW, {{0}, {1, 2}}},
|
||||
{RVOpcodes::SRAW, {{0}, {1, 2}}},
|
||||
|
||||
// --- 整数计算 (I-Type) ---
|
||||
{RVOpcodes::ADDI, {{0}, {1}}},
|
||||
{RVOpcodes::ADDIW, {{0}, {1}}},
|
||||
{RVOpcodes::XORI, {{0}, {1}}},
|
||||
{RVOpcodes::ORI, {{0}, {1}}},
|
||||
{RVOpcodes::ANDI, {{0}, {1}}},
|
||||
{RVOpcodes::SLTI, {{0}, {1}}},
|
||||
{RVOpcodes::SLTIU, {{0}, {1}}},
|
||||
{RVOpcodes::SLLI, {{0}, {1}}},
|
||||
{RVOpcodes::SLLIW, {{0}, {1}}},
|
||||
{RVOpcodes::SRLI, {{0}, {1}}},
|
||||
{RVOpcodes::SRLIW, {{0}, {1}}},
|
||||
{RVOpcodes::SRAI, {{0}, {1}}},
|
||||
{RVOpcodes::SRAIW, {{0}, {1}}},
|
||||
|
||||
// --- 内存加载 ---
|
||||
{RVOpcodes::LW, {{0}, {}}}, {RVOpcodes::LH, {{0}, {}}}, {RVOpcodes::LB, {{0}, {}}},
|
||||
{RVOpcodes::LWU, {{0}, {}}}, {RVOpcodes::LHU, {{0}, {}}}, {RVOpcodes::LBU, {{0}, {}}},
|
||||
{RVOpcodes::LD, {{0}, {}}},
|
||||
{RVOpcodes::FLW, {{0}, {}}}, {RVOpcodes::FLD, {{0}, {}}},
|
||||
|
||||
// --- 内存存储 ---
|
||||
{RVOpcodes::SW, {{}, {0, 1}}}, {RVOpcodes::SH, {{}, {0, 1}}}, {RVOpcodes::SB, {{}, {0, 1}}},
|
||||
{RVOpcodes::SD, {{}, {0, 1}}},
|
||||
{RVOpcodes::FSW, {{}, {0, 1}}}, {RVOpcodes::FSD, {{}, {0, 1}}},
|
||||
|
||||
// --- 分支指令 ---
|
||||
{RVOpcodes::BEQ, {{}, {0, 1}}}, {RVOpcodes::BNE, {{}, {0, 1}}}, {RVOpcodes::BLT, {{}, {0, 1}}},
|
||||
{RVOpcodes::BGE, {{}, {0, 1}}}, {RVOpcodes::BLTU, {{}, {0, 1}}}, {RVOpcodes::BGEU, {{}, {0, 1}}},
|
||||
|
||||
// --- 跳转 ---
|
||||
{RVOpcodes::JAL, {{0}, {}}}, // JAL的rd是def,但通常用x0表示不关心返回值,这里简化
|
||||
{RVOpcodes::JALR, {{0}, {1}}},
|
||||
{RVOpcodes::RET, {{}, {}}}, // RET是伪指令,通常展开为JALR
|
||||
|
||||
// --- 伪指令 & 其他 ---
|
||||
{RVOpcodes::LI, {{0}, {}}}, {RVOpcodes::LA, {{0}, {}}},
|
||||
{RVOpcodes::MV, {{0}, {1}}},
|
||||
{RVOpcodes::NEG, {{0}, {1}}}, // sub rd, zero, rs1
|
||||
{RVOpcodes::NEGW, {{0}, {1}}}, // subw rd, zero, rs1
|
||||
{RVOpcodes::SEQZ, {{0}, {1}}},
|
||||
{RVOpcodes::SNEZ, {{0}, {1}}},
|
||||
|
||||
// --- 函数调用 ---
|
||||
// CALL的use/def在getInstrUseDef中有特殊处理逻辑,这里可以不列出
|
||||
|
||||
// --- 浮点指令 ---
|
||||
{RVOpcodes::FADD_S, {{0}, {1, 2}}}, {RVOpcodes::FSUB_S, {{0}, {1, 2}}},
|
||||
{RVOpcodes::FMUL_S, {{0}, {1, 2}}}, {RVOpcodes::FDIV_S, {{0}, {1, 2}}},
|
||||
{RVOpcodes::FMADD_S, {{0}, {1, 2, 3}}},
|
||||
{RVOpcodes::FEQ_S, {{0}, {1, 2}}}, {RVOpcodes::FLT_S, {{0}, {1, 2}}}, {RVOpcodes::FLE_S, {{0}, {1, 2}}},
|
||||
{RVOpcodes::FCVT_S_W, {{0}, {1}}}, {RVOpcodes::FCVT_W_S, {{0}, {1}}},
|
||||
{RVOpcodes::FCVT_W_S_RTZ, {{0}, {1}}},
|
||||
{RVOpcodes::FMV_S, {{0}, {1}}}, {RVOpcodes::FMV_W_X, {{0}, {1}}}, {RVOpcodes::FMV_X_W, {{0}, {1}}},
|
||||
{RVOpcodes::FNEG_S, {{0}, {1}}}
|
||||
};
|
||||
|
||||
} // namespace sysy
|
||||
|
||||
#endif // RISCV64_INFO_H
|
||||
@ -41,6 +41,8 @@ enum class PhysicalReg {
|
||||
// 假设 vreg_counter 不会达到这么大的值
|
||||
PHYS_REG_START_ID = 1000000,
|
||||
PHYS_REG_END_ID = PHYS_REG_START_ID + 320, // 预留足够的空间
|
||||
|
||||
INVALID, ///< 无效寄存器标记
|
||||
};
|
||||
|
||||
// RISC-V 指令操作码枚举
|
||||
@ -77,6 +79,7 @@ enum class RVOpcodes {
|
||||
FSUB_S, // fsub.s rd, rs1, rs2
|
||||
FMUL_S, // fmul.s rd, rs1, rs2
|
||||
FDIV_S, // fdiv.s rd, rs1, rs2
|
||||
FMADD_S, // fmadd.s rd, rs1, rs2, rs3
|
||||
|
||||
// 浮点比较 (单精度)
|
||||
FEQ_S, // feq.s rd, rs1, rs2 (结果写入整数寄存器rd)
|
||||
@ -86,6 +89,7 @@ enum class RVOpcodes {
|
||||
// 浮点转换
|
||||
FCVT_S_W, // fcvt.s.w rd, rs1 (有符号整数 -> 单精度浮点)
|
||||
FCVT_W_S, // fcvt.w.s rd, rs1 (单精度浮点 -> 有符号整数)
|
||||
FCVT_W_S_RTZ, // fcvt.w.s rd, rs1, rtz (使用向零截断模式)
|
||||
|
||||
// 浮点传送/移动
|
||||
FMV_S, // fmv.s rd, rs1 (浮点寄存器之间)
|
||||
@ -93,6 +97,9 @@ enum class RVOpcodes {
|
||||
FMV_X_W, // fmv.x.w rd, rs1 (浮点寄存器位模式 -> 整数寄存器)
|
||||
FNEG_S, // fneg.s rd, rs (浮点取负)
|
||||
|
||||
// 浮点控制状态寄存器 (CSR)
|
||||
FSRMI, // fsrmi rd, imm (设置舍入模式立即数)
|
||||
|
||||
// 伪指令
|
||||
FRAME_LOAD_W, // 从栈帧加载 32位 Word (对应 lw)
|
||||
FRAME_LOAD_D, // 从栈帧加载 64位 Doubleword (对应 ld)
|
||||
@ -249,6 +256,19 @@ public:
|
||||
void addOperand(std::unique_ptr<MachineOperand> operand) {
|
||||
operands.push_back(std::move(operand));
|
||||
}
|
||||
/**
|
||||
* @brief (为紧急溢出模式添加)将指令中所有对特定虚拟寄存器的引用替换为指定的物理寄存器。
|
||||
* * @param old_vreg 需要被替换的虚拟寄存器号。
|
||||
* @param preg 用于替换的物理寄存器。
|
||||
*/
|
||||
void replaceVRegWithPReg(unsigned old_vreg, PhysicalReg preg);
|
||||
|
||||
/**
|
||||
* @brief (为常规溢出模式添加)根据提供的映射表,重映射指令中的虚拟寄存器。
|
||||
* * @param use_remap 一个从旧vreg到新vreg的映射,用于指令的use操作数。
|
||||
* @param def_remap 一个从旧vreg到新vreg的映射,用于指令的def操作数。
|
||||
*/
|
||||
void remapVRegs(const std::map<unsigned, unsigned>& use_remap, const std::map<unsigned, unsigned>& def_remap);
|
||||
private:
|
||||
RVOpcodes opcode;
|
||||
std::vector<std::unique_ptr<MachineOperand>> operands;
|
||||
@ -313,6 +333,22 @@ private:
|
||||
std::vector<std::unique_ptr<MachineBasicBlock>> blocks;
|
||||
StackFrameInfo frame_info;
|
||||
};
|
||||
inline bool isMemoryOp(RVOpcodes opcode) {
|
||||
switch (opcode) {
|
||||
case RVOpcodes::LB: case RVOpcodes::LH: case RVOpcodes::LW: case RVOpcodes::LD:
|
||||
case RVOpcodes::LBU: case RVOpcodes::LHU: case RVOpcodes::LWU:
|
||||
case RVOpcodes::SB: case RVOpcodes::SH: case RVOpcodes::SW: case RVOpcodes::SD:
|
||||
case RVOpcodes::FLW:
|
||||
case RVOpcodes::FSW:
|
||||
case RVOpcodes::FLD:
|
||||
case RVOpcodes::FSD:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void getInstrUseDef(const MachineInstr* instr, std::set<unsigned>& use, std::set<unsigned>& def);
|
||||
|
||||
} // namespace sysy
|
||||
|
||||
|
||||
81
src/include/backend/RISCv64/RISCv64LinearScan.h
Normal file
81
src/include/backend/RISCv64/RISCv64LinearScan.h
Normal file
@ -0,0 +1,81 @@
|
||||
#ifndef RISCV64_LINEARSCAN_H
|
||||
#define RISCV64_LINEARSCAN_H
|
||||
|
||||
#include "RISCv64LLIR.h"
|
||||
#include "RISCv64ISel.h"
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <algorithm>
|
||||
|
||||
namespace sysy {
|
||||
|
||||
// 前向声明
|
||||
class MachineBasicBlock;
|
||||
class MachineFunction;
|
||||
class RISCv64ISel;
|
||||
|
||||
/**
|
||||
* @brief 表示一个虚拟寄存器的活跃区间。
|
||||
* 包含起始和结束指令编号。为了简化,我们不处理有“洞”的区间。
|
||||
*/
|
||||
struct LiveInterval {
|
||||
unsigned vreg = 0;
|
||||
int start = -1;
|
||||
int end = -1;
|
||||
bool crosses_call = false;
|
||||
|
||||
LiveInterval(unsigned vreg) : vreg(vreg) {}
|
||||
|
||||
// 用于排序,按起始点从小到大
|
||||
bool operator<(const LiveInterval& other) const {
|
||||
return start < other.start;
|
||||
}
|
||||
};
|
||||
|
||||
class RISCv64LinearScan {
|
||||
public:
|
||||
RISCv64LinearScan(MachineFunction* mfunc);
|
||||
bool run();
|
||||
|
||||
private:
|
||||
// --- 核心算法流程 ---
|
||||
void linearizeBlocks();
|
||||
void computeLiveIntervals();
|
||||
bool linearScan();
|
||||
void rewriteProgram();
|
||||
void applyAllocation();
|
||||
void spillAtInterval(LiveInterval* current);
|
||||
|
||||
// --- 辅助函数 ---
|
||||
bool isFPVReg(unsigned vreg) const;
|
||||
void collectUsedCalleeSavedRegs();
|
||||
|
||||
MachineFunction* MFunc;
|
||||
RISCv64ISel* ISel;
|
||||
|
||||
// --- 线性扫描数据结构 ---
|
||||
std::vector<MachineBasicBlock*> linear_order_blocks;
|
||||
std::map<const MachineInstr*, int> instr_numbering;
|
||||
std::map<unsigned, LiveInterval> live_intervals;
|
||||
|
||||
std::vector<LiveInterval*> unhandled;
|
||||
std::vector<LiveInterval*> active; // 活跃且已分配物理寄存器的区间
|
||||
|
||||
std::set<unsigned> spilled_vregs; // 记录在本轮被决定溢出的vreg
|
||||
|
||||
bool conservative_spill_mode = false;
|
||||
const PhysicalReg SPILL_TEMP_REG = PhysicalReg::T4;
|
||||
|
||||
// --- 寄存器池和分配结果 ---
|
||||
std::vector<PhysicalReg> allocable_int_regs;
|
||||
std::vector<PhysicalReg> allocable_fp_regs;
|
||||
std::map<unsigned, PhysicalReg> vreg_to_preg_map;
|
||||
std::map<unsigned, PhysicalReg> abi_vreg_map;
|
||||
|
||||
const std::map<unsigned, Type*>& vreg_type_map;
|
||||
};
|
||||
|
||||
} // namespace sysy
|
||||
|
||||
#endif // RISCV64_LINEARSCAN_H
|
||||
@ -1,6 +1,7 @@
|
||||
#ifndef RISCV64_PASSES_H
|
||||
#define RISCV64_PASSES_H
|
||||
|
||||
#include "Pass.h"
|
||||
#include "RISCv64LLIR.h"
|
||||
#include "Peephole.h"
|
||||
#include "PreRA_Scheduler.h"
|
||||
@ -9,10 +10,8 @@
|
||||
#include "LegalizeImmediates.h"
|
||||
#include "PrologueEpilogueInsertion.h"
|
||||
#include "EliminateFrameIndices.h"
|
||||
#include "Pass.h"
|
||||
#include "DivStrengthReduction.h"
|
||||
|
||||
|
||||
namespace sysy {
|
||||
|
||||
} // namespace sysy
|
||||
|
||||
@ -20,7 +20,7 @@ public:
|
||||
RISCv64RegAlloc(MachineFunction* mfunc);
|
||||
|
||||
// 模块主入口
|
||||
void run();
|
||||
bool run();
|
||||
|
||||
private:
|
||||
// 类型定义,与Python版本对应
|
||||
|
||||
@ -727,6 +727,7 @@ class Instruction : public User {
|
||||
kFCmpGE = 0x1UL << 20,
|
||||
kAnd = 0x1UL << 21,
|
||||
kOr = 0x1UL << 22,
|
||||
// kXor = 0x1UL << 46,
|
||||
// Unary
|
||||
kNeg = 0x1UL << 23,
|
||||
kNot = 0x1UL << 24,
|
||||
@ -751,8 +752,10 @@ class Instruction : public User {
|
||||
kPhi = 0x1UL << 39,
|
||||
kBitItoF = 0x1UL << 40,
|
||||
kBitFtoI = 0x1UL << 41,
|
||||
kSRA = 0x1UL << 42,
|
||||
kMulh = 0x1UL << 43
|
||||
kSrl = 0x1UL << 42, // 逻辑右移
|
||||
kSll = 0x1UL << 43, // 逻辑左移
|
||||
kSra = 0x1UL << 44, // 算术右移
|
||||
kMulh = 0x1UL << 45
|
||||
};
|
||||
|
||||
protected:
|
||||
@ -855,8 +858,14 @@ public:
|
||||
return "BitItoF";
|
||||
case kBitFtoI:
|
||||
return "BitFtoI";
|
||||
case kSRA:
|
||||
case kSrl:
|
||||
return "lshr";
|
||||
case kSll:
|
||||
return "shl";
|
||||
case kSra:
|
||||
return "ashr";
|
||||
case kMulh:
|
||||
return "mulh";
|
||||
default:
|
||||
return "Unknown";
|
||||
}
|
||||
@ -868,7 +877,7 @@ public:
|
||||
|
||||
bool isBinary() const {
|
||||
static constexpr uint64_t BinaryOpMask =
|
||||
(kAdd | kSub | kMul | kDiv | kRem | kAnd | kOr | kSRA | kMulh) |
|
||||
(kAdd | kSub | kMul | kDiv | kRem | kAnd | kOr | kSra | kSrl | kSll | kMulh) |
|
||||
(kICmpEQ | kICmpNE | kICmpLT | kICmpGT | kICmpLE | kICmpGE);
|
||||
return kind & BinaryOpMask;
|
||||
}
|
||||
|
||||
@ -217,8 +217,14 @@ class IRBuilder {
|
||||
BinaryInst * createOrInst(Value *lhs, Value *rhs, const std::string &name = "") {
|
||||
return createBinaryInst(Instruction::kOr, Type::getIntType(), lhs, rhs, name);
|
||||
} ///< 创建按位或指令
|
||||
BinaryInst * createSRAInst(Value *lhs, Value *rhs, const std::string &name = "") {
|
||||
return createBinaryInst(Instruction::kSRA, Type::getIntType(), lhs, rhs, name);
|
||||
BinaryInst * createSllInst(Value *lhs, Value *rhs, const std::string &name = "") {
|
||||
return createBinaryInst(Instruction::kSll, Type::getIntType(), lhs, rhs, name);
|
||||
} ///< 创建逻辑左移指令
|
||||
BinaryInst * createSrlInst(Value *lhs, Value *rhs, const std::string &name = "") {
|
||||
return createBinaryInst(Instruction::kSrl, Type::getIntType(), lhs, rhs, name);
|
||||
} ///< 创建逻辑右移指令
|
||||
BinaryInst * createSraInst(Value *lhs, Value *rhs, const std::string &name = "") {
|
||||
return createBinaryInst(Instruction::kSra, Type::getIntType(), lhs, rhs, name);
|
||||
} ///< 创建算术右移指令
|
||||
BinaryInst * createMulhInst(Value *lhs, Value *rhs, const std::string &name = "") {
|
||||
return createBinaryInst(Instruction::kMulh, Type::getIntType(), lhs, rhs, name);
|
||||
|
||||
@ -20,6 +20,42 @@ namespace sysy {
|
||||
// 前向声明
|
||||
class LoopCharacteristicsResult;
|
||||
|
||||
enum IVKind {
|
||||
kBasic, // 基本归纳变量
|
||||
kLinear, // 线性归纳变量
|
||||
kCmplx // 复杂派生归纳变量
|
||||
} ; // 归纳变量类型
|
||||
|
||||
struct InductionVarInfo {
|
||||
Value* div; // 派生归纳变量的指令
|
||||
Value* base = nullptr; // 其根phi或BIV或DIV
|
||||
std::pair<Value*, Value*> Multibase = {nullptr, nullptr}; // 多个BIV
|
||||
Instruction::Kind Instkind; // 操作类型
|
||||
int factor = 1; // 系数(如i*2+3的2)
|
||||
int offset = 0; // 常量偏移
|
||||
bool valid; // 是否线性可归约
|
||||
IVKind ivkind; // 归纳变量类型
|
||||
|
||||
|
||||
static std::unique_ptr<InductionVarInfo> createBasicBIV(Value* v, Instruction::Kind kind, Value* base = nullptr, int factor = 1, int offset = 0) {
|
||||
return std::make_unique<InductionVarInfo>(
|
||||
InductionVarInfo{v, base, {nullptr, nullptr}, kind, factor, offset, true, IVKind::kBasic}
|
||||
);
|
||||
}
|
||||
|
||||
static std::unique_ptr<InductionVarInfo> createSingleDIV(Value* v, Instruction::Kind kind, Value* base = nullptr, int factor = 1, int offset = 0) {
|
||||
return std::make_unique<InductionVarInfo>(
|
||||
InductionVarInfo{v, base, {nullptr, nullptr}, kind, factor, offset, true, IVKind::kLinear}
|
||||
);
|
||||
}
|
||||
|
||||
static std::unique_ptr<InductionVarInfo> createDoubleDIV(Value* v, Instruction::Kind kind, Value* base1 = nullptr, Value* base2 = nullptr, int factor = 1, int offset = 0) {
|
||||
return std::make_unique<InductionVarInfo>(
|
||||
InductionVarInfo{v, nullptr, {base1, base2}, kind, factor, offset, false, IVKind::kCmplx}
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief 循环特征信息结构 - 基础循环分析阶段
|
||||
* 存储循环的基本特征信息,为后续精确分析提供基础
|
||||
@ -33,13 +69,13 @@ struct LoopCharacteristics {
|
||||
bool hasComplexControlFlow; // 是否有复杂控制流 (break, continue)
|
||||
bool isInnermost; // 是否为最内层循环
|
||||
|
||||
// ========== 基础归纳变量分析 ==========
|
||||
std::vector<Value*> basicInductionVars; // 基本归纳变量
|
||||
std::map<Value*, int> inductionSteps; // 归纳变量的步长(简化)
|
||||
// ========== 归纳变量分析 ==========
|
||||
|
||||
// ========== 基础循环不变量分析 ==========
|
||||
std::unordered_set<Value*> loopInvariants; // 循环不变量
|
||||
std::unordered_set<Instruction*> invariantInsts; // 可提升的不变指令
|
||||
|
||||
std::vector<std::unique_ptr<InductionVarInfo>> InductionVars; // 归纳变量
|
||||
|
||||
// ========== 基础边界分析 ==========
|
||||
std::optional<int> staticTripCount; // 静态循环次数(如果可确定)
|
||||
@ -307,6 +343,12 @@ private:
|
||||
|
||||
// ========== 辅助方法 ==========
|
||||
bool isClassicLoopInvariant(Value* val, Loop* loop, const std::unordered_set<Value*>& invariants);
|
||||
void findDerivedInductionVars(Value* root,
|
||||
Value* base, // 只传单一BIV base
|
||||
Loop* loop,
|
||||
std::vector<std::unique_ptr<InductionVarInfo>>& ivs,
|
||||
std::set<Value*>& visited
|
||||
);
|
||||
bool isBasicInductionVariable(Value* val, Loop* loop);
|
||||
bool hasSimpleMemoryPattern(Loop* loop); // 简单的内存模式检查
|
||||
};
|
||||
|
||||
252
src/include/midend/Pass/Optimize/InductionVariableElimination.h
Normal file
252
src/include/midend/Pass/Optimize/InductionVariableElimination.h
Normal file
@ -0,0 +1,252 @@
|
||||
#pragma once
|
||||
|
||||
#include "Pass.h"
|
||||
#include "IR.h"
|
||||
#include "LoopCharacteristics.h"
|
||||
#include "Loop.h"
|
||||
#include "Dom.h"
|
||||
#include "SideEffectAnalysis.h"
|
||||
#include "AliasAnalysis.h"
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <memory>
|
||||
|
||||
namespace sysy {
|
||||
|
||||
// 前向声明
|
||||
class LoopCharacteristicsResult;
|
||||
class LoopAnalysisResult;
|
||||
|
||||
/**
|
||||
* @brief 死归纳变量信息
|
||||
* 记录一个可以被消除的归纳变量
|
||||
*/
|
||||
struct DeadInductionVariable {
|
||||
PhiInst* phiInst; // phi 指令
|
||||
std::vector<Instruction*> relatedInsts; // 相关的递增/递减指令
|
||||
Loop* containingLoop; // 所在循环
|
||||
bool canEliminate; // 是否可以安全消除
|
||||
|
||||
DeadInductionVariable(PhiInst* phi, Loop* loop)
|
||||
: phiInst(phi), containingLoop(loop), canEliminate(false) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief 归纳变量消除上下文类
|
||||
* 封装归纳变量消除优化的核心逻辑和状态
|
||||
*/
|
||||
class InductionVariableEliminationContext {
|
||||
public:
|
||||
InductionVariableEliminationContext() {}
|
||||
|
||||
/**
|
||||
* 运行归纳变量消除优化
|
||||
* @param F 目标函数
|
||||
* @param AM 分析管理器
|
||||
* @return 是否修改了IR
|
||||
*/
|
||||
bool run(Function* F, AnalysisManager& AM);
|
||||
|
||||
private:
|
||||
// 分析结果缓存
|
||||
LoopAnalysisResult* loopAnalysis = nullptr;
|
||||
LoopCharacteristicsResult* loopCharacteristics = nullptr;
|
||||
DominatorTree* dominatorTree = nullptr;
|
||||
SideEffectAnalysisResult* sideEffectAnalysis = nullptr;
|
||||
AliasAnalysisResult* aliasAnalysis = nullptr;
|
||||
|
||||
// 死归纳变量存储
|
||||
std::vector<std::unique_ptr<DeadInductionVariable>> deadIVs;
|
||||
std::unordered_map<Loop*, std::vector<DeadInductionVariable*>> loopToDeadIVs;
|
||||
|
||||
// ========== 核心分析和优化阶段 ==========
|
||||
|
||||
/**
|
||||
* 阶段1:识别死归纳变量
|
||||
* 找出没有被有效使用的归纳变量
|
||||
*/
|
||||
void identifyDeadInductionVariables(Function* F);
|
||||
|
||||
/**
|
||||
* 阶段2:分析消除的安全性
|
||||
* 确保消除操作不会破坏程序语义
|
||||
*/
|
||||
void analyzeSafetyForElimination();
|
||||
|
||||
/**
|
||||
* 阶段3:执行归纳变量消除
|
||||
* 删除死归纳变量及其相关指令
|
||||
*/
|
||||
bool performInductionVariableElimination();
|
||||
|
||||
// ========== 辅助方法 ==========
|
||||
|
||||
/**
|
||||
* 检查归纳变量是否为死归纳变量
|
||||
* @param iv 归纳变量信息
|
||||
* @param loop 所在循环
|
||||
* @return 如果是死归纳变量返回相关信息,否则返回nullptr
|
||||
*/
|
||||
std::unique_ptr<DeadInductionVariable>
|
||||
isDeadInductionVariable(const InductionVarInfo* iv, Loop* loop);
|
||||
|
||||
/**
|
||||
* 递归分析phi指令及其使用链是否都是死代码
|
||||
* @param phiInst phi指令
|
||||
* @param loop 所在循环
|
||||
* @return phi指令是否可以安全删除
|
||||
*/
|
||||
bool isPhiInstructionDeadRecursively(PhiInst* phiInst, Loop* loop);
|
||||
|
||||
/**
|
||||
* 递归分析指令的使用链是否都是死代码
|
||||
* @param inst 要分析的指令
|
||||
* @param loop 所在循环
|
||||
* @param visited 已访问的指令集合(避免无限递归)
|
||||
* @param currentPath 当前递归路径(检测循环依赖)
|
||||
* @return 指令的使用链是否都是死代码
|
||||
*/
|
||||
bool isInstructionUseChainDeadRecursively(Instruction* inst, Loop* loop,
|
||||
std::set<Instruction*>& visited,
|
||||
std::set<Instruction*>& currentPath);
|
||||
|
||||
/**
|
||||
* 检查循环是否有副作用
|
||||
* @param loop 要检查的循环
|
||||
* @return 循环是否有副作用
|
||||
*/
|
||||
bool loopHasSideEffects(Loop* loop);
|
||||
|
||||
/**
|
||||
* 检查指令是否被用于循环退出条件
|
||||
* @param inst 要检查的指令
|
||||
* @param loop 所在循环
|
||||
* @return 是否被用于循环退出条件
|
||||
*/
|
||||
bool isUsedInLoopExitCondition(Instruction* inst, Loop* loop);
|
||||
|
||||
/**
|
||||
* 检查指令的结果是否未被有效使用
|
||||
* @param inst 要检查的指令
|
||||
* @param loop 所在循环
|
||||
* @return 指令结果是否未被有效使用
|
||||
*/
|
||||
bool isInstructionResultUnused(Instruction* inst, Loop* loop);
|
||||
|
||||
/**
|
||||
* 检查store指令是否存储到死地址(利用别名分析)
|
||||
* @param store store指令
|
||||
* @param loop 所在循环
|
||||
* @return 是否存储到死地址
|
||||
*/
|
||||
bool isStoreToDeadLocation(StoreInst* store, Loop* loop);
|
||||
|
||||
/**
|
||||
* 检查指令是否为死代码或只在循环内部使用
|
||||
* @param inst 要检查的指令
|
||||
* @param loop 所在循环
|
||||
* @return 是否为死代码或只在循环内部使用
|
||||
*/
|
||||
bool isInstructionDeadOrInternalOnly(Instruction* inst, Loop* loop);
|
||||
|
||||
/**
|
||||
* 检查指令是否有效地为死代码(带递归深度限制)
|
||||
* @param inst 要检查的指令
|
||||
* @param loop 所在循环
|
||||
* @param maxDepth 最大递归深度
|
||||
* @return 指令是否有效地为死代码
|
||||
*/
|
||||
bool isInstructionEffectivelyDead(Instruction* inst, Loop* loop, int maxDepth);
|
||||
|
||||
/**
|
||||
* 检查store指令是否有后续的load操作
|
||||
* @param store store指令
|
||||
* @param loop 所在循环
|
||||
* @return 是否有后续的load操作
|
||||
*/
|
||||
bool hasSubsequentLoad(StoreInst* store, Loop* loop);
|
||||
|
||||
/**
|
||||
* 检查指令是否在循环外有使用
|
||||
* @param inst 要检查的指令
|
||||
* @param loop 所在循环
|
||||
* @return 是否在循环外有使用
|
||||
*/
|
||||
bool hasUsageOutsideLoop(Instruction* inst, Loop* loop);
|
||||
|
||||
/**
|
||||
* 检查store指令是否在循环外有后续的load操作
|
||||
* @param store store指令
|
||||
* @param loop 所在循环
|
||||
* @return 是否在循环外有后续的load操作
|
||||
*/
|
||||
bool hasSubsequentLoadOutsideLoop(StoreInst* store, Loop* loop);
|
||||
|
||||
/**
|
||||
* 递归检查基本块子树中是否有对指定位置的load操作
|
||||
* @param bb 基本块
|
||||
* @param ptr 指针
|
||||
* @param visited 已访问的基本块集合
|
||||
* @return 是否有load操作
|
||||
*/
|
||||
bool hasLoadInSubtree(BasicBlock* bb, Value* ptr, std::set<BasicBlock*>& visited);
|
||||
|
||||
/**
|
||||
* 收集与归纳变量相关的所有指令
|
||||
* @param phiInst phi指令
|
||||
* @param loop 所在循环
|
||||
* @return 相关指令列表
|
||||
*/
|
||||
std::vector<Instruction*> collectRelatedInstructions(PhiInst* phiInst, Loop* loop);
|
||||
|
||||
/**
|
||||
* 检查消除归纳变量的安全性
|
||||
* @param deadIV 死归纳变量
|
||||
* @return 是否可以安全消除
|
||||
*/
|
||||
bool isSafeToEliminate(const DeadInductionVariable* deadIV);
|
||||
|
||||
/**
|
||||
* 消除单个死归纳变量
|
||||
* @param deadIV 死归纳变量
|
||||
* @return 是否成功消除
|
||||
*/
|
||||
bool eliminateDeadInductionVariable(DeadInductionVariable* deadIV);
|
||||
|
||||
/**
|
||||
* 打印调试信息
|
||||
*/
|
||||
void printDebugInfo();
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief 归纳变量消除优化遍
|
||||
* 消除循环中无用的归纳变量,减少寄存器压力
|
||||
*/
|
||||
class InductionVariableElimination : public OptimizationPass {
|
||||
public:
|
||||
// 唯一的 Pass ID
|
||||
static void *ID;
|
||||
|
||||
InductionVariableElimination()
|
||||
: OptimizationPass("InductionVariableElimination", Granularity::Function) {}
|
||||
|
||||
/**
|
||||
* 在函数上运行归纳变量消除优化
|
||||
* @param F 目标函数
|
||||
* @param AM 分析管理器
|
||||
* @return 是否修改了IR
|
||||
*/
|
||||
bool runOnFunction(Function* F, AnalysisManager& AM) override;
|
||||
|
||||
/**
|
||||
* 声明分析依赖和失效信息
|
||||
*/
|
||||
void getAnalysisUsage(std::set<void*>& analysisDependencies,
|
||||
std::set<void*>& analysisInvalidations) const override;
|
||||
|
||||
void* getPassID() const override { return &ID; }
|
||||
};
|
||||
|
||||
} // namespace sysy
|
||||
240
src/include/midend/Pass/Optimize/LoopStrengthReduction.h
Normal file
240
src/include/midend/Pass/Optimize/LoopStrengthReduction.h
Normal file
@ -0,0 +1,240 @@
|
||||
#pragma once
|
||||
|
||||
#include "Pass.h"
|
||||
#include "IR.h"
|
||||
#include "LoopCharacteristics.h"
|
||||
#include "Loop.h"
|
||||
#include "Dom.h"
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <memory>
|
||||
|
||||
namespace sysy {
|
||||
|
||||
// 前向声明
|
||||
class LoopCharacteristicsResult;
|
||||
class LoopAnalysisResult;
|
||||
|
||||
/**
|
||||
* @brief 强度削弱候选项信息
|
||||
* 记录一个可以进行强度削弱的表达式信息
|
||||
*/
|
||||
struct StrengthReductionCandidate {
|
||||
enum OpType {
|
||||
MULTIPLY, // 乘法: iv * const
|
||||
DIVIDE, // 除法: iv / 2^n (转换为右移)
|
||||
DIVIDE_CONST, // 除法: iv / const (使用mulh指令优化)
|
||||
REMAINDER // 取模: iv % 2^n (转换为位与)
|
||||
};
|
||||
|
||||
enum DivisionStrategy {
|
||||
SIMPLE_SHIFT, // 简单右移(仅适用于无符号或非负数)
|
||||
SIGNED_CORRECTION, // 有符号除法修正: (x + (x >> 31) & mask) >> k
|
||||
MULH_OPTIMIZATION // 使用mulh指令优化任意常数除法
|
||||
};
|
||||
|
||||
Instruction* originalInst; // 原始指令 (如 i*4, i/8, i%16)
|
||||
Value* inductionVar; // 归纳变量 (如 i)
|
||||
OpType operationType; // 操作类型
|
||||
DivisionStrategy divStrategy; // 除法策略(仅用于除法)
|
||||
int multiplier; // 乘数/除数/模数 (如 4, 8, 16)
|
||||
int shiftAmount; // 位移量 (对于2的幂)
|
||||
int offset; // 偏移量 (如常数项)
|
||||
BasicBlock* containingBlock; // 所在基本块
|
||||
Loop* containingLoop; // 所在循环
|
||||
bool hasNegativeValues; // 归纳变量是否可能为负数
|
||||
|
||||
// 强度削弱后的新变量
|
||||
PhiInst* newPhi = nullptr; // 新的 phi 指令
|
||||
Value* newInductionVar = nullptr; // 新的归纳变量
|
||||
|
||||
StrengthReductionCandidate(Instruction* inst, Value* iv, OpType opType, int value, int off,
|
||||
BasicBlock* bb, Loop* loop)
|
||||
: originalInst(inst), inductionVar(iv), operationType(opType),
|
||||
divStrategy(SIMPLE_SHIFT), multiplier(value), offset(off),
|
||||
containingBlock(bb), containingLoop(loop), hasNegativeValues(false) {
|
||||
|
||||
// 计算位移量(用于除法和取模的强度削弱)
|
||||
if (opType == DIVIDE || opType == REMAINDER) {
|
||||
shiftAmount = 0;
|
||||
int temp = value;
|
||||
while (temp > 1) {
|
||||
temp >>= 1;
|
||||
shiftAmount++;
|
||||
}
|
||||
} else {
|
||||
shiftAmount = 0;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief 强度削弱上下文类
|
||||
* 封装强度削弱优化的核心逻辑和状态
|
||||
*/
|
||||
class StrengthReductionContext {
|
||||
public:
|
||||
StrengthReductionContext(IRBuilder* builder) : builder(builder) {}
|
||||
|
||||
/**
|
||||
* 运行强度削弱优化
|
||||
* @param F 目标函数
|
||||
* @param AM 分析管理器
|
||||
* @return 是否修改了IR
|
||||
*/
|
||||
bool run(Function* F, AnalysisManager& AM);
|
||||
|
||||
private:
|
||||
IRBuilder* builder;
|
||||
|
||||
// 分析结果缓存
|
||||
LoopAnalysisResult* loopAnalysis = nullptr;
|
||||
LoopCharacteristicsResult* loopCharacteristics = nullptr;
|
||||
DominatorTree* dominatorTree = nullptr;
|
||||
|
||||
// 候选项存储
|
||||
std::vector<std::unique_ptr<StrengthReductionCandidate>> candidates;
|
||||
std::unordered_map<Loop*, std::vector<StrengthReductionCandidate*>> loopToCandidates;
|
||||
|
||||
// ========== 核心分析和优化阶段 ==========
|
||||
|
||||
/**
|
||||
* 阶段1:识别强度削弱候选项
|
||||
* 扫描所有循环中的乘法指令,找出可以优化的模式
|
||||
*/
|
||||
void identifyStrengthReductionCandidates(Function* F);
|
||||
|
||||
/**
|
||||
* 阶段2:分析候选项的优化潜力
|
||||
* 评估每个候选项的收益,过滤掉不值得优化的情况
|
||||
*/
|
||||
void analyzeOptimizationPotential();
|
||||
|
||||
/**
|
||||
* 阶段3:执行强度削弱变换
|
||||
* 对选中的候选项执行实际的强度削弱优化
|
||||
*/
|
||||
bool performStrengthReduction();
|
||||
|
||||
// ========== 辅助分析函数 ==========
|
||||
|
||||
/**
|
||||
* 分析归纳变量是否可能取负值
|
||||
* @param ivInfo 归纳变量信息
|
||||
* @param loop 所属循环
|
||||
* @return 如果可能为负数返回true
|
||||
*/
|
||||
bool analyzeInductionVariableRange(const InductionVarInfo* ivInfo, Loop* loop) const;
|
||||
|
||||
/**
|
||||
* 计算用于除法优化的魔数和移位量
|
||||
* @param divisor 除数
|
||||
* @return {魔数, 移位量}
|
||||
*/
|
||||
std::pair<int, int> computeMulhMagicNumbers(int divisor) const;
|
||||
|
||||
/**
|
||||
* 生成除法替换代码
|
||||
* @param candidate 优化候选项
|
||||
* @param builder IR构建器
|
||||
* @return 替换值
|
||||
*/
|
||||
Value* generateDivisionReplacement(StrengthReductionCandidate* candidate, IRBuilder* builder) const;
|
||||
|
||||
/**
|
||||
* 生成任意常数除法替换代码
|
||||
* @param candidate 优化候选项
|
||||
* @param builder IR构建器
|
||||
* @return 替换值
|
||||
*/
|
||||
Value* generateConstantDivisionReplacement(StrengthReductionCandidate* candidate, IRBuilder* builder) const;
|
||||
|
||||
/**
|
||||
* 检查指令是否为强度削弱候选项
|
||||
* @param inst 要检查的指令
|
||||
* @param loop 所在循环
|
||||
* @return 如果是候选项返回候选项信息,否则返回nullptr
|
||||
*/
|
||||
std::unique_ptr<StrengthReductionCandidate>
|
||||
isStrengthReductionCandidate(Instruction* inst, Loop* loop);
|
||||
|
||||
/**
|
||||
* 检查值是否为循环的归纳变量
|
||||
* @param val 要检查的值
|
||||
* @param loop 循环
|
||||
* @param characteristics 循环特征信息
|
||||
* @return 如果是归纳变量返回归纳变量信息,否则返回nullptr
|
||||
*/
|
||||
const InductionVarInfo*
|
||||
getInductionVarInfo(Value* val, Loop* loop, const LoopCharacteristics* characteristics);
|
||||
|
||||
/**
|
||||
* 为候选项创建新的归纳变量
|
||||
* @param candidate 候选项
|
||||
* @return 是否成功创建
|
||||
*/
|
||||
bool createNewInductionVariable(StrengthReductionCandidate* candidate);
|
||||
|
||||
/**
|
||||
* 替换原始指令的所有使用
|
||||
* @param candidate 候选项
|
||||
* @return 是否成功替换
|
||||
*/
|
||||
bool replaceOriginalInstruction(StrengthReductionCandidate* candidate);
|
||||
|
||||
/**
|
||||
* 估算优化收益
|
||||
* 计算强度削弱后的性能提升
|
||||
* @param candidate 候选项
|
||||
* @return 估算的收益分数
|
||||
*/
|
||||
double estimateOptimizationBenefit(const StrengthReductionCandidate* candidate);
|
||||
|
||||
/**
|
||||
* 检查优化的合法性
|
||||
* @param candidate 候选项
|
||||
* @return 是否可以安全地进行优化
|
||||
*/
|
||||
bool isOptimizationLegal(const StrengthReductionCandidate* candidate);
|
||||
|
||||
/**
|
||||
* 打印调试信息
|
||||
*/
|
||||
void printDebugInfo();
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief 循环强度削弱优化遍
|
||||
* 将循环中的乘法运算转换为更高效的加法运算
|
||||
*/
|
||||
class LoopStrengthReduction : public OptimizationPass {
|
||||
public:
|
||||
// 唯一的 Pass ID
|
||||
static void *ID;
|
||||
|
||||
LoopStrengthReduction(IRBuilder* builder)
|
||||
: OptimizationPass("LoopStrengthReduction", Granularity::Function),
|
||||
builder(builder) {}
|
||||
|
||||
/**
|
||||
* 在函数上运行强度削弱优化
|
||||
* @param F 目标函数
|
||||
* @param AM 分析管理器
|
||||
* @return 是否修改了IR
|
||||
*/
|
||||
bool runOnFunction(Function* F, AnalysisManager& AM) override;
|
||||
|
||||
/**
|
||||
* 声明分析依赖和失效信息
|
||||
*/
|
||||
void getAnalysisUsage(std::set<void*>& analysisDependencies,
|
||||
std::set<void*>& analysisInvalidations) const override;
|
||||
|
||||
void* getPassID() const override { return &ID; }
|
||||
|
||||
private:
|
||||
IRBuilder* builder;
|
||||
};
|
||||
|
||||
} // namespace sysy
|
||||
@ -19,6 +19,8 @@ add_library(midend_lib STATIC
|
||||
Pass/Optimize/SCCP.cpp
|
||||
Pass/Optimize/LoopNormalization.cpp
|
||||
Pass/Optimize/LICM.cpp
|
||||
Pass/Optimize/LoopStrengthReduction.cpp
|
||||
Pass/Optimize/InductionVariableElimination.cpp
|
||||
Pass/Optimize/BuildCFG.cpp
|
||||
Pass/Optimize/LargeArrayToGlobal.cpp
|
||||
)
|
||||
|
||||
@ -779,7 +779,29 @@ void BinaryInst::print(std::ostream &os) const {
|
||||
printOperand(os, getRhs());
|
||||
os << "\n ";
|
||||
printVarName(os, this) << " = zext i1 %" << tmpName << " to i32";
|
||||
} else {
|
||||
} else if(kind == kMulh){
|
||||
// 模拟高位乘法:先扩展为i64,乘法,右移32位,截断为i32
|
||||
static int mulhCount = 0;
|
||||
mulhCount++;
|
||||
std::string lhsName = getLhs()->getName();
|
||||
std::string rhsName = getRhs()->getName();
|
||||
std::string tmpLhs = "tmp_mulh_lhs_" + std::to_string(mulhCount) + "_" + lhsName;
|
||||
std::string tmpRhs = "tmp_mulh_rhs_" + std::to_string(mulhCount) + rhsName;
|
||||
std::string tmpMul = "tmp_mulh_mul_" + std::to_string(mulhCount) + getName();
|
||||
std::string tmpHigh = "tmp_mulh_high_" + std::to_string(mulhCount) + getName();
|
||||
// printVarName(os, this) << " = "; // 输出最终变量名
|
||||
|
||||
// os << "; mulh emulation\n ";
|
||||
os << "%" << tmpLhs << " = sext i32 ";
|
||||
printOperand(os, getLhs());
|
||||
os << " to i64\n ";
|
||||
os << "%" << tmpRhs << " = sext i32 ";
|
||||
printOperand(os, getRhs());
|
||||
os << " to i64\n ";
|
||||
os << "%" << tmpMul << " = mul i64 %" << tmpLhs << ", %" << tmpRhs << "\n ";
|
||||
os << "%" << tmpHigh << " = ashr i64 %" << tmpMul << ", 32\n ";
|
||||
printVarName(os, this) << " = trunc i64 %" << tmpHigh << " to i32";
|
||||
}else {
|
||||
// 算术和逻辑指令
|
||||
printVarName(os, this) << " = ";
|
||||
os << getKindString() << " " << *getType() << " ";
|
||||
|
||||
@ -80,8 +80,8 @@ void LoopCharacteristicsResult::print() const {
|
||||
std::cout << std::endl;
|
||||
|
||||
// 归纳变量
|
||||
if (!chars->basicInductionVars.empty()) {
|
||||
std::cout << " Basic Induction Vars: " << chars->basicInductionVars.size() << std::endl;
|
||||
if (!chars->InductionVars.empty()) {
|
||||
std::cout << " Induction Vars: " << chars->InductionVars.size() << std::endl;
|
||||
}
|
||||
|
||||
// 循环不变量
|
||||
@ -282,28 +282,338 @@ void LoopCharacteristicsPass::analyzeBasicMemoryAccessPatterns(Loop* loop, LoopC
|
||||
}
|
||||
}
|
||||
|
||||
void LoopCharacteristicsPass::identifyBasicInductionVariables(Loop* loop, LoopCharacteristics* characteristics) {
|
||||
// 寻找基本归纳变量(简化版本)
|
||||
BasicBlock* header = loop->getHeader();
|
||||
bool LoopCharacteristicsPass::isBasicInductionVariable(Value* val, Loop* loop) {
|
||||
// 简化的基础归纳变量检测
|
||||
auto* phiInst = dynamic_cast<PhiInst*>(val);
|
||||
if (!phiInst) return false;
|
||||
|
||||
// 遍历循环头的phi指令,寻找基本归纳变量模式
|
||||
for (auto& inst : header->getInstructions()) {
|
||||
auto* phiInst = dynamic_cast<PhiInst*>(inst.get());
|
||||
if (!phiInst) continue;
|
||||
|
||||
// 检查phi指令是否符合基本归纳变量模式
|
||||
if (isBasicInductionVariable(phiInst, loop)) {
|
||||
characteristics->basicInductionVars.push_back(phiInst);
|
||||
characteristics->inductionSteps[phiInst] = 1; // 简化:默认步长为1
|
||||
|
||||
if (DEBUG)
|
||||
std::cout << " Found basic induction variable: " << phiInst->getName() << std::endl;
|
||||
// 检查phi指令是否在循环头
|
||||
if (phiInst->getParent() != loop->getHeader()) return false;
|
||||
|
||||
// 检查是否有来自循环内的更新
|
||||
for (auto& [incomingBB, incomingVal] : phiInst->getIncomingValues()) {
|
||||
if (loop->contains(incomingBB)) {
|
||||
return true; // 简化:有来自循环内的值就认为是基础归纳变量
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
void LoopCharacteristicsPass::identifyBasicInductionVariables(
|
||||
Loop* loop, LoopCharacteristics* characteristics) {
|
||||
BasicBlock* header = loop->getHeader();
|
||||
std::vector<std::unique_ptr<InductionVarInfo>> ivs;
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " === Identifying Induction Variables for Loop: " << loop->getName() << " ===" << std::endl;
|
||||
std::cout << " Loop header: " << header->getName() << std::endl;
|
||||
std::cout << " Loop blocks: ";
|
||||
for (auto* bb : loop->getBlocks()) {
|
||||
std::cout << bb->getName() << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
// 1. 识别所有BIV
|
||||
for (auto& inst : header->getInstructions()) {
|
||||
auto* phi = dynamic_cast<PhiInst*>(inst.get());
|
||||
if (!phi) continue;
|
||||
if (isBasicInductionVariable(phi, loop)) {
|
||||
ivs.push_back(InductionVarInfo::createBasicBIV(phi, Instruction::Kind::kPhi, phi));
|
||||
if (DEBUG) {
|
||||
std::cout << " [BIV] Found basic induction variable: " << phi->getName() << std::endl;
|
||||
std::cout << " Incoming values: ";
|
||||
for (auto& [incomingBB, incomingVal] : phi->getIncomingValues()) {
|
||||
std::cout << "{" << incomingBB->getName() << ": " << incomingVal->getName() << "} ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " Found " << ivs.size() << " basic induction variables" << std::endl;
|
||||
}
|
||||
|
||||
// 2. 递归识别所有派生DIV
|
||||
std::set<Value*> visited;
|
||||
size_t initialSize = ivs.size();
|
||||
|
||||
// 保存初始的BIV列表,避免在遍历过程中修改向量导致迭代器失效
|
||||
std::vector<InductionVarInfo*> bivList;
|
||||
for (size_t i = 0; i < initialSize; ++i) {
|
||||
if (ivs[i] && ivs[i]->ivkind == IVKind::kBasic) {
|
||||
bivList.push_back(ivs[i].get());
|
||||
}
|
||||
}
|
||||
|
||||
for (auto* biv : bivList) {
|
||||
if (DEBUG) {
|
||||
if (biv && biv->div) {
|
||||
std::cout << " Searching for derived IVs from BIV: " << biv->div->getName() << std::endl;
|
||||
} else {
|
||||
std::cout << " ERROR: Invalid BIV pointer or div field is null" << std::endl;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
findDerivedInductionVars(biv->div, biv->base, loop, ivs, visited);
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
size_t derivedCount = ivs.size() - initialSize;
|
||||
std::cout << " Found " << derivedCount << " derived induction variables" << std::endl;
|
||||
|
||||
// 打印所有归纳变量的详细信息
|
||||
std::cout << " === Final Induction Variables Summary ===" << std::endl;
|
||||
for (size_t i = 0; i < ivs.size(); ++i) {
|
||||
const auto& iv = ivs[i];
|
||||
std::cout << " [" << i << "] " << iv->div->getName()
|
||||
<< " (kind: " << (iv->ivkind == IVKind::kBasic ? "Basic" :
|
||||
iv->ivkind == IVKind::kLinear ? "Linear" : "Complex") << ")" << std::endl;
|
||||
std::cout << " Operation: " << static_cast<int>(iv->Instkind) << std::endl;
|
||||
if (iv->base) {
|
||||
std::cout << " Base: " << iv->base->getName() << std::endl;
|
||||
}
|
||||
if (iv->Multibase.first || iv->Multibase.second) {
|
||||
std::cout << " Multi-base: ";
|
||||
if (iv->Multibase.first) std::cout << iv->Multibase.first->getName() << " ";
|
||||
if (iv->Multibase.second) std::cout << iv->Multibase.second->getName() << " ";
|
||||
std::cout << std::endl;
|
||||
}
|
||||
std::cout << " Factor: " << iv->factor << ", Offset: " << iv->offset << std::endl;
|
||||
std::cout << " Valid: " << (iv->valid ? "Yes" : "No") << std::endl;
|
||||
}
|
||||
std::cout << " =============================================" << std::endl;
|
||||
}
|
||||
|
||||
characteristics->InductionVars = std::move(ivs);
|
||||
}
|
||||
|
||||
|
||||
struct LinearExpr {
|
||||
// 表达为: a * base1 + b * base2 + offset
|
||||
Value* base1 = nullptr;
|
||||
Value* base2 = nullptr;
|
||||
int factor1 = 0;
|
||||
int factor2 = 0;
|
||||
int offset = 0;
|
||||
bool valid = false;
|
||||
bool isSimple = false; // 仅一个BIV时true
|
||||
};
|
||||
|
||||
static LinearExpr analyzeLinearExpr(Value* val, Loop* loop, std::vector<std::unique_ptr<InductionVarInfo>>& ivs) {
|
||||
// 递归归约val为线性表达式
|
||||
// 只支持单/双BIV线性组合
|
||||
// 见下方详细实现
|
||||
// ----------
|
||||
if (DEBUG >= 2) { // 更详细的调试级别
|
||||
if (auto* inst = dynamic_cast<Instruction*>(val)) {
|
||||
std::cout << " Analyzing linear expression for: " << val->getName()
|
||||
<< " (kind: " << static_cast<int>(inst->getKind()) << ")" << std::endl;
|
||||
} else {
|
||||
std::cout << " Analyzing linear expression for value: " << val->getName() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// 基本变量:常数
|
||||
if (auto* cint = dynamic_cast<ConstantInteger*>(val)) {
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Constant: " << cint->getInt() << std::endl;
|
||||
}
|
||||
return {nullptr, nullptr, 0, 0, cint->getInt(), true, false};
|
||||
}
|
||||
|
||||
// 基本变量:BIV或派生IV
|
||||
for (auto& iv : ivs) {
|
||||
if (iv->div == val) {
|
||||
if (iv->ivkind == IVKind::kBasic ||
|
||||
iv->ivkind == IVKind::kLinear) {
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Found " << (iv->ivkind == IVKind::kBasic ? "Basic" : "Linear")
|
||||
<< " IV with base: " << (iv->base ? iv->base->getName() : "null")
|
||||
<< ", factor: " << iv->factor << ", offset: " << iv->offset << std::endl;
|
||||
}
|
||||
return {iv->base, nullptr, iv->factor, 0, iv->offset, true, true};
|
||||
}
|
||||
// 复杂归纳变量
|
||||
if (iv->ivkind == IVKind::kCmplx) {
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Found Complex IV with multi-base" << std::endl;
|
||||
}
|
||||
return {iv->Multibase.first, iv->Multibase.second, 1, 1, 0, true, false};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 一元负号
|
||||
if (auto* inst = dynamic_cast<Instruction*>(val)) {
|
||||
auto kind = inst->getKind();
|
||||
if (kind == Instruction::Kind::kNeg) {
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Analyzing negation" << std::endl;
|
||||
}
|
||||
auto expr = analyzeLinearExpr(inst->getOperand(0), loop, ivs);
|
||||
if (!expr.valid) return expr;
|
||||
expr.factor1 = -expr.factor1;
|
||||
expr.factor2 = -expr.factor2;
|
||||
expr.offset = -expr.offset;
|
||||
expr.isSimple = (expr.base2 == nullptr);
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Negation result: valid=" << expr.valid << ", simple=" << expr.isSimple << std::endl;
|
||||
}
|
||||
return expr;
|
||||
}
|
||||
|
||||
// 二元加减乘
|
||||
if (kind == Instruction::Kind::kAdd || kind == Instruction::Kind::kSub) {
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Analyzing " << (kind == Instruction::Kind::kAdd ? "addition" : "subtraction") << std::endl;
|
||||
}
|
||||
auto expr0 = analyzeLinearExpr(inst->getOperand(0), loop, ivs);
|
||||
auto expr1 = analyzeLinearExpr(inst->getOperand(1), loop, ivs);
|
||||
if (!expr0.valid || !expr1.valid) {
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Failed: operand not linear (expr0.valid=" << expr0.valid << ", expr1.valid=" << expr1.valid << ")" << std::endl;
|
||||
}
|
||||
return {nullptr, nullptr, 0, 0, 0, false, false};
|
||||
}
|
||||
|
||||
// 合并:若BIV相同或有一个是常数
|
||||
// 单BIV+常数
|
||||
if (expr0.base1 && !expr1.base1 && !expr1.base2) {
|
||||
int sign = (kind == Instruction::Kind::kAdd ? 1 : -1);
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Single BIV + constant pattern" << std::endl;
|
||||
}
|
||||
return {expr0.base1, nullptr, expr0.factor1, 0, expr0.offset + sign * expr1.offset, true, expr0.isSimple};
|
||||
}
|
||||
if (!expr0.base1 && !expr0.base2 && expr1.base1) {
|
||||
int sign = (kind == Instruction::Kind::kAdd ? 1 : -1);
|
||||
int f = sign * expr1.factor1;
|
||||
int off = expr0.offset + sign * expr1.offset;
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Constant + single BIV pattern" << std::endl;
|
||||
}
|
||||
return {expr1.base1, nullptr, f, 0, off, true, expr1.isSimple};
|
||||
}
|
||||
|
||||
// 双BIV线性组合
|
||||
if (expr0.base1 && expr1.base1 && expr0.base1 != expr1.base1 && !expr0.base2 && !expr1.base2) {
|
||||
int sign = (kind == Instruction::Kind::kAdd ? 1 : -1);
|
||||
Value* base1 = expr0.base1;
|
||||
Value* base2 = expr1.base1;
|
||||
int f1 = expr0.factor1;
|
||||
int f2 = sign * expr1.factor1;
|
||||
int off = expr0.offset + sign * expr1.offset;
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Double BIV linear combination" << std::endl;
|
||||
}
|
||||
return {base1, base2, f1, f2, off, true, false};
|
||||
}
|
||||
|
||||
// 同BIV合并
|
||||
if (expr0.base1 && expr1.base1 && expr0.base1 == expr1.base1 && !expr0.base2 && !expr1.base2) {
|
||||
int sign = (kind == Instruction::Kind::kAdd ? 1 : -1);
|
||||
int f = expr0.factor1 + sign * expr1.factor1;
|
||||
int off = expr0.offset + sign * expr1.offset;
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Same BIV combination" << std::endl;
|
||||
}
|
||||
return {expr0.base1, nullptr, f, 0, off, true, true};
|
||||
}
|
||||
}
|
||||
|
||||
// 乘法:BIV*const 或 const*BIV
|
||||
if (kind == Instruction::Kind::kMul) {
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Analyzing multiplication" << std::endl;
|
||||
}
|
||||
auto expr0 = analyzeLinearExpr(inst->getOperand(0), loop, ivs);
|
||||
auto expr1 = analyzeLinearExpr(inst->getOperand(1), loop, ivs);
|
||||
|
||||
// 只允许一侧为常数
|
||||
if (expr0.base1 && !expr1.base1 && !expr1.base2 && expr1.offset) {
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> BIV * constant pattern" << std::endl;
|
||||
}
|
||||
return {expr0.base1, nullptr, expr0.factor1 * expr1.offset, 0, expr0.offset * expr1.offset, true, true};
|
||||
}
|
||||
if (!expr0.base1 && !expr0.base2 && expr0.offset && expr1.base1) {
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Constant * BIV pattern" << std::endl;
|
||||
}
|
||||
return {expr1.base1, nullptr, expr1.factor1 * expr0.offset, 0, expr1.offset * expr0.offset, true, true};
|
||||
}
|
||||
// 双BIV乘法不支持
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Multiplication pattern not supported" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// 除法:BIV/const(仅当const是2的幂时)
|
||||
if (kind == Instruction::Kind::kDiv) {
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Analyzing division" << std::endl;
|
||||
}
|
||||
auto expr0 = analyzeLinearExpr(inst->getOperand(0), loop, ivs);
|
||||
auto expr1 = analyzeLinearExpr(inst->getOperand(1), loop, ivs);
|
||||
|
||||
// 只支持 BIV / 2^n 形式
|
||||
if (expr0.base1 && !expr1.base1 && !expr1.base2 && expr1.offset > 0) {
|
||||
// 检查是否为2的幂
|
||||
int divisor = expr1.offset;
|
||||
if ((divisor & (divisor - 1)) == 0) { // 2的幂检查
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> BIV / power_of_2 pattern (divisor=" << divisor << ")" << std::endl;
|
||||
}
|
||||
// 对于除法,我们记录为特殊的归纳变量模式
|
||||
// factor表示除数(用于后续强度削弱)
|
||||
return {expr0.base1, nullptr, -divisor, 0, expr0.offset / divisor, true, true};
|
||||
}
|
||||
}
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Division pattern not supported (not power of 2)" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// 取模:BIV % const(仅当const是2的幂时)
|
||||
if (kind == Instruction::Kind::kRem) {
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Analyzing remainder" << std::endl;
|
||||
}
|
||||
auto expr0 = analyzeLinearExpr(inst->getOperand(0), loop, ivs);
|
||||
auto expr1 = analyzeLinearExpr(inst->getOperand(1), loop, ivs);
|
||||
|
||||
// 只支持 BIV % 2^n 形式
|
||||
if (expr0.base1 && !expr1.base1 && !expr1.base2 && expr1.offset > 0) {
|
||||
// 检查是否为2的幂
|
||||
int modulus = expr1.offset;
|
||||
if ((modulus & (modulus - 1)) == 0) { // 2的幂检查
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> BIV % power_of_2 pattern (modulus=" << modulus << ")" << std::endl;
|
||||
}
|
||||
// 对于取模,我们记录为特殊的归纳变量模式
|
||||
// 使用负的模数来区分取模和除法
|
||||
return {expr0.base1, nullptr, -10000 - modulus, 0, 0, true, true}; // 特殊标记
|
||||
}
|
||||
}
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Remainder pattern not supported (not power of 2)" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 其它情况
|
||||
if (DEBUG >= 2) {
|
||||
std::cout << " -> Other case: not linear" << std::endl;
|
||||
}
|
||||
return {nullptr, nullptr, 0, 0, 0, false, false};
|
||||
}
|
||||
|
||||
void LoopCharacteristicsPass::identifyBasicLoopInvariants(Loop* loop, LoopCharacteristics* characteristics) {
|
||||
// 经典推进法:反复遍历,直到收敛
|
||||
// 经典推进法:反复遍历,直到收敛 TODO:优化
|
||||
bool changed;
|
||||
std::unordered_set<Value*> invariants = characteristics->loopInvariants; // 可能为空
|
||||
|
||||
@ -334,7 +644,7 @@ void LoopCharacteristicsPass::identifyBasicLoopInvariants(Loop* loop, LoopCharac
|
||||
void LoopCharacteristicsPass::analyzeBasicLoopBounds(Loop* loop, LoopCharacteristics* characteristics) {
|
||||
// 简化的基础边界分析
|
||||
// 检查是否有静态可确定的循环次数(简化版本)
|
||||
if (characteristics->isCountingLoop && !characteristics->basicInductionVars.empty()) {
|
||||
if (characteristics->isCountingLoop && !characteristics->InductionVars.empty()) {
|
||||
// 简化:如果是计数循环且有基本归纳变量,尝试确定循环次数
|
||||
if (characteristics->instructionCount < 10) {
|
||||
characteristics->staticTripCount = 100; // 简化估计
|
||||
@ -373,22 +683,97 @@ void LoopCharacteristicsPass::evaluateBasicOptimizationOpportunities(Loop* loop,
|
||||
|
||||
// ========== 辅助方法实现 ==========
|
||||
|
||||
bool LoopCharacteristicsPass::isBasicInductionVariable(Value* val, Loop* loop) {
|
||||
// 简化的基础归纳变量检测
|
||||
auto* phiInst = dynamic_cast<PhiInst*>(val);
|
||||
if (!phiInst) return false;
|
||||
|
||||
// 检查phi指令是否在循环头
|
||||
if (phiInst->getParent() != loop->getHeader()) return false;
|
||||
|
||||
// 检查是否有来自循环内的更新
|
||||
for (auto& [incomingBB, incomingVal] : phiInst->getIncomingValues()) {
|
||||
if (loop->contains(incomingBB)) {
|
||||
return true; // 简化:有来自循环内的值就认为是基础归纳变量
|
||||
// 递归识别DIV,支持线性与复杂归纳变量
|
||||
void LoopCharacteristicsPass::findDerivedInductionVars(
|
||||
Value* root,
|
||||
Value* base, // 只传单一BIV base
|
||||
Loop* loop,
|
||||
std::vector<std::unique_ptr<InductionVarInfo>>& ivs,
|
||||
std::set<Value*>& visited)
|
||||
{
|
||||
if (visited.count(root)) return;
|
||||
visited.insert(root);
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " Analyzing uses of: " << root->getName() << std::endl;
|
||||
}
|
||||
|
||||
for (auto use : root->getUses()) {
|
||||
auto user = use->getUser();
|
||||
Instruction* inst = dynamic_cast<Instruction*>(user);
|
||||
if (!inst) continue;
|
||||
if (!loop->contains(inst->getParent())) {
|
||||
if (DEBUG) {
|
||||
std::cout << " Skipping user outside loop: " << inst->getName() << std::endl;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " Checking instruction: " << inst->getName()
|
||||
<< " (kind: " << static_cast<int>(inst->getKind()) << ")" << std::endl;
|
||||
}
|
||||
|
||||
// 线性归约分析
|
||||
auto expr = analyzeLinearExpr(inst, loop, ivs);
|
||||
|
||||
if (!expr.valid) {
|
||||
if (DEBUG) {
|
||||
std::cout << " Linear expression analysis failed for: " << inst->getName() << std::endl;
|
||||
}
|
||||
// 复杂非线性归纳变量,作为kCmplx记录(假如你想追踪)
|
||||
// 这里假设expr.base1、base2都有效才记录double
|
||||
if (expr.base1 && expr.base2) {
|
||||
if (DEBUG) {
|
||||
std::cout << " [DIV-COMPLEX] Creating complex derived IV: " << inst->getName()
|
||||
<< " with bases: " << expr.base1->getName() << ", " << expr.base2->getName() << std::endl;
|
||||
}
|
||||
ivs.push_back(InductionVarInfo::createDoubleDIV(inst, inst->getKind(), expr.base1, expr.base2, 0, expr.offset));
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// 单BIV线性
|
||||
if (expr.base1 && !expr.base2) {
|
||||
// 检查这个指令是否已经是一个已知的IV(特别是BIV),避免重复创建
|
||||
bool alreadyExists = false;
|
||||
for (const auto& existingIV : ivs) {
|
||||
if (existingIV->div == inst) {
|
||||
alreadyExists = true;
|
||||
if (DEBUG) {
|
||||
std::cout << " [DIV-SKIP] Instruction " << inst->getName()
|
||||
<< " already exists as IV, skipping creation" << std::endl;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!alreadyExists) {
|
||||
if (DEBUG) {
|
||||
std::cout << " [DIV-LINEAR] Creating single-base derived IV: " << inst->getName()
|
||||
<< " with base: " << expr.base1->getName()
|
||||
<< ", factor: " << expr.factor1
|
||||
<< ", offset: " << expr.offset << std::endl;
|
||||
}
|
||||
ivs.push_back(InductionVarInfo::createSingleDIV(inst, inst->getKind(), expr.base1, expr.factor1, expr.offset));
|
||||
findDerivedInductionVars(inst, expr.base1, loop, ivs, visited);
|
||||
}
|
||||
}
|
||||
// 双BIV线性
|
||||
else if (expr.base1 && expr.base2) {
|
||||
if (DEBUG) {
|
||||
std::cout << " [DIV-COMPLEX] Creating double-base derived IV: " << inst->getName()
|
||||
<< " with bases: " << expr.base1->getName() << ", " << expr.base2->getName()
|
||||
<< ", offset: " << expr.offset << std::endl;
|
||||
}
|
||||
ivs.push_back(InductionVarInfo::createDoubleDIV(inst, inst->getKind(), expr.base1, expr.base2, 0, expr.offset));
|
||||
// 双BIV情形一般不再递归下游
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " Finished analyzing uses of: " << root->getName() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// 递归/推进式判定
|
||||
|
||||
916
src/midend/Pass/Optimize/InductionVariableElimination.cpp
Normal file
916
src/midend/Pass/Optimize/InductionVariableElimination.cpp
Normal file
@ -0,0 +1,916 @@
|
||||
#include "InductionVariableElimination.h"
|
||||
#include "LoopCharacteristics.h"
|
||||
#include "Loop.h"
|
||||
#include "Dom.h"
|
||||
#include "SideEffectAnalysis.h"
|
||||
#include "AliasAnalysis.h"
|
||||
#include "SysYIROptUtils.h"
|
||||
#include <iostream>
|
||||
#include <algorithm>
|
||||
|
||||
// 使用全局调试开关
|
||||
extern int DEBUG;
|
||||
|
||||
namespace sysy {
|
||||
|
||||
// 定义 Pass 的唯一 ID
|
||||
void *InductionVariableElimination::ID = (void *)&InductionVariableElimination::ID;
|
||||
|
||||
bool InductionVariableElimination::runOnFunction(Function* F, AnalysisManager& AM) {
|
||||
if (F->getBasicBlocks().empty()) {
|
||||
return false; // 空函数
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << "Running InductionVariableElimination on function: " << F->getName() << std::endl;
|
||||
}
|
||||
|
||||
// 创建优化上下文并运行
|
||||
InductionVariableEliminationContext context;
|
||||
bool modified = context.run(F, AM);
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << "InductionVariableElimination " << (modified ? "modified" : "did not modify")
|
||||
<< " function: " << F->getName() << std::endl;
|
||||
}
|
||||
|
||||
return modified;
|
||||
}
|
||||
|
||||
void InductionVariableElimination::getAnalysisUsage(std::set<void*>& analysisDependencies,
|
||||
std::set<void*>& analysisInvalidations) const {
|
||||
// 依赖的分析
|
||||
analysisDependencies.insert(&LoopAnalysisPass::ID);
|
||||
analysisDependencies.insert(&LoopCharacteristicsPass::ID);
|
||||
analysisDependencies.insert(&DominatorTreeAnalysisPass::ID);
|
||||
analysisDependencies.insert(&SysYSideEffectAnalysisPass::ID);
|
||||
analysisDependencies.insert(&SysYAliasAnalysisPass::ID);
|
||||
|
||||
// 会使失效的分析(归纳变量消除会修改IR结构)
|
||||
analysisInvalidations.insert(&LoopCharacteristicsPass::ID);
|
||||
// 注意:支配树分析通常不会因为归纳变量消除而失效,因为我们不改变控制流
|
||||
}
|
||||
|
||||
// ========== InductionVariableEliminationContext 实现 ==========
|
||||
|
||||
bool InductionVariableEliminationContext::run(Function* F, AnalysisManager& AM) {
|
||||
if (DEBUG) {
|
||||
std::cout << " Starting induction variable elimination analysis..." << std::endl;
|
||||
}
|
||||
|
||||
// 获取必要的分析结果
|
||||
loopAnalysis = AM.getAnalysisResult<LoopAnalysisResult, LoopAnalysisPass>(F);
|
||||
if (!loopAnalysis || !loopAnalysis->hasLoops()) {
|
||||
if (DEBUG) {
|
||||
std::cout << " No loops found, skipping induction variable elimination" << std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
loopCharacteristics = AM.getAnalysisResult<LoopCharacteristicsResult, LoopCharacteristicsPass>(F);
|
||||
if (!loopCharacteristics) {
|
||||
if (DEBUG) {
|
||||
std::cout << " LoopCharacteristics analysis not available" << std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
dominatorTree = AM.getAnalysisResult<DominatorTree, DominatorTreeAnalysisPass>(F);
|
||||
if (!dominatorTree) {
|
||||
if (DEBUG) {
|
||||
std::cout << " DominatorTree analysis not available" << std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
sideEffectAnalysis = AM.getAnalysisResult<SideEffectAnalysisResult, SysYSideEffectAnalysisPass>();
|
||||
if (!sideEffectAnalysis) {
|
||||
if (DEBUG) {
|
||||
std::cout << " SideEffectAnalysis not available, using conservative approach" << std::endl;
|
||||
}
|
||||
// 可以继续执行,但会使用更保守的策略
|
||||
} else {
|
||||
if (DEBUG) {
|
||||
std::cout << " Using SideEffectAnalysis for safety checks" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
aliasAnalysis = AM.getAnalysisResult<AliasAnalysisResult, SysYAliasAnalysisPass>(F);
|
||||
if (!aliasAnalysis) {
|
||||
if (DEBUG) {
|
||||
std::cout << " AliasAnalysis not available, using conservative approach" << std::endl;
|
||||
}
|
||||
// 可以继续执行,但会使用更保守的策略
|
||||
} else {
|
||||
if (DEBUG) {
|
||||
std::cout << " Using AliasAnalysis for memory safety checks" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// 执行三个阶段的优化
|
||||
|
||||
// 阶段1:识别死归纳变量
|
||||
identifyDeadInductionVariables(F);
|
||||
|
||||
if (deadIVs.empty()) {
|
||||
if (DEBUG) {
|
||||
std::cout << " No dead induction variables found" << std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " Found " << deadIVs.size() << " potentially dead induction variables" << std::endl;
|
||||
}
|
||||
|
||||
// 阶段2:分析安全性
|
||||
analyzeSafetyForElimination();
|
||||
|
||||
// 阶段3:执行消除
|
||||
bool modified = performInductionVariableElimination();
|
||||
|
||||
if (DEBUG) {
|
||||
printDebugInfo();
|
||||
}
|
||||
|
||||
return modified;
|
||||
}
|
||||
|
||||
void InductionVariableEliminationContext::identifyDeadInductionVariables(Function* F) {
|
||||
if (DEBUG) {
|
||||
std::cout << " === Phase 1: Identifying Dead Induction Variables ===" << std::endl;
|
||||
}
|
||||
|
||||
// 遍历所有循环
|
||||
for (const auto& loop_ptr : loopAnalysis->getAllLoops()) {
|
||||
Loop* loop = loop_ptr.get();
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " Analyzing loop: " << loop->getName() << std::endl;
|
||||
}
|
||||
|
||||
// 获取循环特征
|
||||
const LoopCharacteristics* characteristics = loopCharacteristics->getCharacteristics(loop);
|
||||
if (!characteristics) {
|
||||
if (DEBUG) {
|
||||
std::cout << " No characteristics available for loop" << std::endl;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (characteristics->InductionVars.empty()) {
|
||||
if (DEBUG) {
|
||||
std::cout << " No induction variables found in loop" << std::endl;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
// 检查每个归纳变量是否为死归纳变量
|
||||
for (const auto& iv : characteristics->InductionVars) {
|
||||
auto deadIV = isDeadInductionVariable(iv.get(), loop);
|
||||
if (deadIV) {
|
||||
if (DEBUG) {
|
||||
std::cout << " Found potential dead IV: %" << deadIV->phiInst->getName() << std::endl;
|
||||
}
|
||||
|
||||
// 添加到候选项列表
|
||||
loopToDeadIVs[loop].push_back(deadIV.get());
|
||||
deadIVs.push_back(std::move(deadIV));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " === End Phase 1: Found " << deadIVs.size() << " candidates ===" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::unique_ptr<DeadInductionVariable>
|
||||
InductionVariableEliminationContext::isDeadInductionVariable(const InductionVarInfo* iv, Loop* loop) {
|
||||
// 获取 phi 指令
|
||||
auto* phiInst = dynamic_cast<PhiInst*>(iv->div);
|
||||
if (!phiInst) {
|
||||
return nullptr; // 不是 phi 指令
|
||||
}
|
||||
|
||||
// 新的逻辑:递归分析整个use-def链,判断是否有真实的使用
|
||||
if (!isPhiInstructionDeadRecursively(phiInst, loop)) {
|
||||
return nullptr; // 有真实的使用,不能删除
|
||||
}
|
||||
|
||||
// 创建死归纳变量信息
|
||||
auto deadIV = std::make_unique<DeadInductionVariable>(phiInst, loop);
|
||||
deadIV->relatedInsts = collectRelatedInstructions(phiInst, loop);
|
||||
|
||||
return deadIV;
|
||||
}
|
||||
|
||||
// 递归分析phi指令及其使用链是否都是死代码
|
||||
bool InductionVariableEliminationContext::isPhiInstructionDeadRecursively(PhiInst* phiInst, Loop* loop) {
|
||||
if (DEBUG) {
|
||||
std::cout << " 递归分析归纳变量 " << phiInst->getName() << " 的完整使用链" << std::endl;
|
||||
}
|
||||
|
||||
// 使用访问集合避免无限递归
|
||||
std::set<Instruction*> visitedInstructions;
|
||||
std::set<Instruction*> currentPath; // 用于检测循环依赖
|
||||
|
||||
// 核心逻辑:递归分析使用链,寻找任何"逃逸点"
|
||||
return isInstructionUseChainDeadRecursively(phiInst, loop, visitedInstructions, currentPath);
|
||||
}
|
||||
|
||||
// 递归分析指令的使用链是否都是死代码
|
||||
bool InductionVariableEliminationContext::isInstructionUseChainDeadRecursively(
|
||||
Instruction* inst, Loop* loop,
|
||||
std::set<Instruction*>& visited,
|
||||
std::set<Instruction*>& currentPath) {
|
||||
|
||||
if (DEBUG && visited.size() < 10) { // 限制debug输出
|
||||
std::cout << " 分析指令 " << inst->getName() << " (" << inst->getKindString() << ")" << std::endl;
|
||||
}
|
||||
|
||||
// 避免无限递归
|
||||
if (currentPath.count(inst) > 0) {
|
||||
// 发现循环依赖,这在归纳变量中是正常的,继续分析其他路径
|
||||
if (DEBUG && visited.size() < 10) {
|
||||
std::cout << " 发现循环依赖,继续分析其他路径" << std::endl;
|
||||
}
|
||||
return true; // 循环依赖本身不是逃逸点
|
||||
}
|
||||
|
||||
if (visited.count(inst) > 0) {
|
||||
// 已经分析过这个指令
|
||||
return true; // 假设之前的分析是正确的
|
||||
}
|
||||
|
||||
visited.insert(inst);
|
||||
currentPath.insert(inst);
|
||||
|
||||
// 1. 检查是否有副作用(逃逸点)
|
||||
if (sideEffectAnalysis && sideEffectAnalysis->hasSideEffect(inst)) {
|
||||
if (DEBUG && visited.size() < 10) {
|
||||
std::cout << " 指令有副作用,是逃逸点" << std::endl;
|
||||
}
|
||||
currentPath.erase(inst);
|
||||
return false; // 有副作用的指令是逃逸点
|
||||
}
|
||||
|
||||
// 1.5. 特殊检查:控制流指令永远不是死代码
|
||||
auto instKind = inst->getKind();
|
||||
if (instKind == Instruction::Kind::kCondBr ||
|
||||
instKind == Instruction::Kind::kBr ||
|
||||
instKind == Instruction::Kind::kReturn) {
|
||||
if (DEBUG && visited.size() < 10) {
|
||||
std::cout << " 控制流指令,是逃逸点" << std::endl;
|
||||
}
|
||||
currentPath.erase(inst);
|
||||
return false; // 控制流指令是逃逸点
|
||||
}
|
||||
|
||||
// 2. 检查指令的所有使用
|
||||
bool allUsesAreDead = true;
|
||||
for (auto use : inst->getUses()) {
|
||||
auto user = use->getUser();
|
||||
auto* userInst = dynamic_cast<Instruction*>(user);
|
||||
|
||||
if (!userInst) {
|
||||
// 被非指令使用(如函数返回值),是逃逸点
|
||||
if (DEBUG && visited.size() < 10) {
|
||||
std::cout << " 被非指令使用,是逃逸点" << std::endl;
|
||||
}
|
||||
allUsesAreDead = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// 检查使用是否在循环外(逃逸点)
|
||||
if (!loop->contains(userInst->getParent())) {
|
||||
if (DEBUG && visited.size() < 10) {
|
||||
std::cout << " 在循环外被 " << userInst->getName() << " 使用,是逃逸点" << std::endl;
|
||||
}
|
||||
allUsesAreDead = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// 特殊检查:如果使用者是循环的退出条件,需要进一步分析
|
||||
// 对于用于退出条件的归纳变量,需要更谨慎的处理
|
||||
if (isUsedInLoopExitCondition(userInst, loop)) {
|
||||
// 修复逻辑:用于循环退出条件的归纳变量通常不应该被消除
|
||||
// 除非整个循环都可以被证明是完全无用的(这需要更复杂的分析)
|
||||
if (DEBUG && visited.size() < 10) {
|
||||
std::cout << " 被用于循环退出条件,是逃逸点(避免破坏循环语义)" << std::endl;
|
||||
}
|
||||
allUsesAreDead = false;
|
||||
break;
|
||||
}
|
||||
|
||||
// 递归分析使用者的使用链
|
||||
if (!isInstructionUseChainDeadRecursively(userInst, loop, visited, currentPath)) {
|
||||
allUsesAreDead = false;
|
||||
break; // 找到逃逸点,不需要继续分析
|
||||
}
|
||||
}
|
||||
|
||||
currentPath.erase(inst);
|
||||
|
||||
if (allUsesAreDead && DEBUG && visited.size() < 10) {
|
||||
std::cout << " 指令 " << inst->getName() << " 的所有使用都是死代码" << std::endl;
|
||||
}
|
||||
|
||||
return allUsesAreDead;
|
||||
}
|
||||
|
||||
// 检查循环是否有副作用
|
||||
bool InductionVariableEliminationContext::loopHasSideEffects(Loop* loop) {
|
||||
// 遍历循环中的所有指令,检查是否有副作用
|
||||
for (BasicBlock* bb : loop->getBlocks()) {
|
||||
for (auto& inst : bb->getInstructions()) {
|
||||
Instruction* instPtr = inst.get();
|
||||
|
||||
// 使用副作用分析(如果可用)
|
||||
if (sideEffectAnalysis && sideEffectAnalysis->hasSideEffect(instPtr)) {
|
||||
if (DEBUG) {
|
||||
std::cout << " 循环中发现有副作用的指令: " << instPtr->getName() << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// 如果没有副作用分析,使用保守的判断
|
||||
if (!sideEffectAnalysis) {
|
||||
auto kind = instPtr->getKind();
|
||||
// 这些指令通常有副作用
|
||||
if (kind == Instruction::Kind::kCall ||
|
||||
kind == Instruction::Kind::kStore ||
|
||||
kind == Instruction::Kind::kReturn) {
|
||||
if (DEBUG) {
|
||||
std::cout << " 循环中发现潜在有副作用的指令: " << instPtr->getName() << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 重要修复:检查是否为嵌套循环的外层循环
|
||||
// 如果当前循环包含其他循环,那么它有潜在的副作用
|
||||
for (const auto& loop_ptr : loopAnalysis->getAllLoops()) {
|
||||
Loop* otherLoop = loop_ptr.get();
|
||||
if(loopAnalysis->getLowestCommonAncestor(otherLoop, loop) == loop) {
|
||||
if (DEBUG) {
|
||||
std::cout << " 循环 " << loop->getName() << " 是其他循环的外层循环,视为有副作用" << std::endl;
|
||||
}
|
||||
return true; // 外层循环被视为有副作用
|
||||
}
|
||||
// if (otherLoop != loop && loop->contains(otherLoop->getHeader())) {
|
||||
// if (DEBUG) {
|
||||
// std::cout << " 循环 " << loop->getName() << " 包含子循环 " << otherLoop->getName() << ",视为有副作用" << std::endl;
|
||||
// }
|
||||
// return true; // 包含子循环的外层循环被视为有副作用
|
||||
// }
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " 循环 " << loop->getName() << " 无副作用" << std::endl;
|
||||
}
|
||||
return false; // 循环无副作用
|
||||
}
|
||||
|
||||
// 检查指令是否被用于循环退出条件
|
||||
bool InductionVariableEliminationContext::isUsedInLoopExitCondition(Instruction* inst, Loop* loop) {
|
||||
// 检查指令是否被循环的退出条件使用
|
||||
for (BasicBlock* exitingBB : loop->getExitingBlocks()) {
|
||||
auto terminatorIt = exitingBB->terminator();
|
||||
if (terminatorIt != exitingBB->end()) {
|
||||
Instruction* terminator = terminatorIt->get();
|
||||
if (terminator) {
|
||||
// 检查终结指令的操作数
|
||||
for (size_t i = 0; i < terminator->getNumOperands(); ++i) {
|
||||
if (terminator->getOperand(i) == inst) {
|
||||
if (DEBUG) {
|
||||
std::cout << " 指令 " << inst->getName() << " 用于循环退出条件" << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// 对于条件分支,还需要检查条件指令的操作数
|
||||
if (terminator->getKind() == Instruction::Kind::kCondBr) {
|
||||
auto* condBr = dynamic_cast<CondBrInst*>(terminator);
|
||||
if (condBr) {
|
||||
Value* condition = condBr->getCondition();
|
||||
if (condition == inst) {
|
||||
if (DEBUG) {
|
||||
std::cout << " 指令 " << inst->getName() << " 是循环条件" << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
// 递归检查条件指令的操作数(比如比较指令)
|
||||
auto* condInst = dynamic_cast<Instruction*>(condition);
|
||||
if (condInst) {
|
||||
for (size_t i = 0; i < condInst->getNumOperands(); ++i) {
|
||||
if (condInst->getOperand(i) == inst) {
|
||||
if (DEBUG) {
|
||||
std::cout << " 指令 " << inst->getName() << " 用于循环条件的操作数" << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// 检查指令的结果是否未被有效使用
|
||||
bool InductionVariableEliminationContext::isInstructionResultUnused(Instruction* inst, Loop* loop) {
|
||||
// 检查指令的所有使用
|
||||
if (inst->getUses().empty()) {
|
||||
return true; // 没有使用,肯定是未使用
|
||||
}
|
||||
|
||||
for (auto use : inst->getUses()) {
|
||||
auto user = use->getUser();
|
||||
auto* userInst = dynamic_cast<Instruction*>(user);
|
||||
|
||||
if (!userInst) {
|
||||
return false; // 被非指令使用,认为是有效使用
|
||||
}
|
||||
|
||||
// 如果在循环外被使用,认为是有效使用
|
||||
if (!loop->contains(userInst->getParent())) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// 递归检查使用这个结果的指令是否也是死代码
|
||||
// 为了避免无限递归,限制递归深度
|
||||
if (!isInstructionEffectivelyDead(userInst, loop, 3)) {
|
||||
return false; // 存在有效使用
|
||||
}
|
||||
}
|
||||
|
||||
return true; // 所有使用都是无效的
|
||||
}
|
||||
|
||||
// 检查store指令是否存储到死地址(利用别名分析)
|
||||
bool InductionVariableEliminationContext::isStoreToDeadLocation(StoreInst* store, Loop* loop) {
|
||||
if (!aliasAnalysis) {
|
||||
return false; // 没有别名分析,保守返回false
|
||||
}
|
||||
|
||||
Value* storePtr = store->getPointer();
|
||||
|
||||
// 检查是否存储到局部临时变量且该变量在循环外不被读取
|
||||
const MemoryLocation* memLoc = aliasAnalysis->getMemoryLocation(storePtr);
|
||||
if (!memLoc) {
|
||||
return false; // 无法确定内存位置
|
||||
}
|
||||
|
||||
// 如果是局部数组且只在循环内被访问
|
||||
if (memLoc->isLocalArray) {
|
||||
// 检查该内存位置是否在循环外被读取
|
||||
for (auto* accessInst : memLoc->accessInsts) {
|
||||
if (accessInst->getKind() == Instruction::Kind::kLoad) {
|
||||
if (!loop->contains(accessInst->getParent())) {
|
||||
return false; // 在循环外被读取,不是死存储
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " 存储到局部数组且仅在循环内访问" << std::endl;
|
||||
}
|
||||
return true; // 存储到仅循环内访问的局部数组
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// 检查指令是否有效死代码(带递归深度限制)
|
||||
bool InductionVariableEliminationContext::isInstructionEffectivelyDead(Instruction* inst, Loop* loop, int maxDepth) {
|
||||
if (maxDepth <= 0) {
|
||||
return false; // 达到递归深度限制,保守返回false
|
||||
}
|
||||
|
||||
// 利用副作用分析
|
||||
if (sideEffectAnalysis && sideEffectAnalysis->hasSideEffect(inst)) {
|
||||
return false; // 有副作用的指令不是死代码
|
||||
}
|
||||
|
||||
// 检查特殊指令类型
|
||||
switch (inst->getKind()) {
|
||||
case Instruction::Kind::kStore:
|
||||
// Store指令可能是死存储
|
||||
return isStoreToDeadLocation(dynamic_cast<StoreInst*>(inst), loop);
|
||||
|
||||
case Instruction::Kind::kCall:
|
||||
// 函数调用通常有副作用
|
||||
if (sideEffectAnalysis) {
|
||||
return !sideEffectAnalysis->hasSideEffect(inst);
|
||||
}
|
||||
return false; // 保守地认为函数调用有效果
|
||||
|
||||
case Instruction::Kind::kReturn:
|
||||
case Instruction::Kind::kBr:
|
||||
case Instruction::Kind::kCondBr:
|
||||
// 控制流指令不是死代码
|
||||
return false;
|
||||
|
||||
default:
|
||||
// 其他指令检查其使用是否有效
|
||||
break;
|
||||
}
|
||||
|
||||
// 检查指令的使用
|
||||
if (inst->getUses().empty()) {
|
||||
return true; // 没有使用的纯指令是死代码
|
||||
}
|
||||
|
||||
// 递归检查所有使用
|
||||
for (auto use : inst->getUses()) {
|
||||
auto user = use->getUser();
|
||||
auto* userInst = dynamic_cast<Instruction*>(user);
|
||||
|
||||
if (!userInst) {
|
||||
return false; // 被非指令使用
|
||||
}
|
||||
|
||||
if (!loop->contains(userInst->getParent())) {
|
||||
return false; // 在循环外被使用
|
||||
}
|
||||
|
||||
// 递归检查使用者
|
||||
if (!isInstructionEffectivelyDead(userInst, loop, maxDepth - 1)) {
|
||||
return false; // 存在有效使用
|
||||
}
|
||||
}
|
||||
|
||||
return true; // 所有使用都是死代码
|
||||
}
|
||||
|
||||
// 原有的函数保持兼容,但现在使用增强的死代码分析
|
||||
bool InductionVariableEliminationContext::isInstructionDeadOrInternalOnly(Instruction* inst, Loop* loop) {
|
||||
return isInstructionEffectivelyDead(inst, loop, 5);
|
||||
}
|
||||
|
||||
// 检查store指令是否有后续的load操作
|
||||
bool InductionVariableEliminationContext::hasSubsequentLoad(StoreInst* store, Loop* loop) {
|
||||
if (!aliasAnalysis) {
|
||||
// 没有别名分析,保守地假设有后续读取
|
||||
return true;
|
||||
}
|
||||
|
||||
Value* storePtr = store->getPointer();
|
||||
const MemoryLocation* storeLoc = aliasAnalysis->getMemoryLocation(storePtr);
|
||||
|
||||
if (!storeLoc) {
|
||||
// 无法确定内存位置,保守处理
|
||||
return true;
|
||||
}
|
||||
|
||||
// 在循环中和循环后查找对同一位置的load操作
|
||||
std::vector<BasicBlock*> blocksToCheck;
|
||||
|
||||
// 添加循环内的所有基本块
|
||||
for (auto* bb : loop->getBlocks()) {
|
||||
blocksToCheck.push_back(bb);
|
||||
}
|
||||
|
||||
// 添加循环的退出块
|
||||
auto exitBlocks = loop->getExitBlocks();
|
||||
for (auto* exitBB : exitBlocks) {
|
||||
blocksToCheck.push_back(exitBB);
|
||||
}
|
||||
|
||||
// 搜索load操作
|
||||
for (auto* bb : blocksToCheck) {
|
||||
for (auto& inst : bb->getInstructions()) {
|
||||
if (inst->getKind() == Instruction::Kind::kLoad) {
|
||||
LoadInst* loadInst = static_cast<LoadInst*>(inst.get());
|
||||
Value* loadPtr = loadInst->getPointer();
|
||||
const MemoryLocation* loadLoc = aliasAnalysis->getMemoryLocation(loadPtr);
|
||||
|
||||
if (loadLoc && aliasAnalysis->queryAlias(storePtr, loadPtr) != AliasType::NO_ALIAS) {
|
||||
// 找到可能读取同一位置的load操作
|
||||
if (DEBUG) {
|
||||
std::cout << " 找到后续load操作: " << loadInst->getName() << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 检查是否通过函数调用间接访问
|
||||
for (auto* bb : blocksToCheck) {
|
||||
for (auto& inst : bb->getInstructions()) {
|
||||
if (inst->getKind() == Instruction::Kind::kCall) {
|
||||
CallInst* callInst = static_cast<CallInst*>(inst.get());
|
||||
if (callInst && sideEffectAnalysis && sideEffectAnalysis->hasSideEffect(callInst)) {
|
||||
// 函数调用可能间接读取内存
|
||||
if (DEBUG) {
|
||||
std::cout << " 函数调用可能读取内存: " << callInst->getName() << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " 未找到后续load操作" << std::endl;
|
||||
}
|
||||
return false; // 没有找到后续读取
|
||||
}
|
||||
|
||||
// 检查指令是否在循环外有使用
|
||||
bool InductionVariableEliminationContext::hasUsageOutsideLoop(Instruction* inst, Loop* loop) {
|
||||
for (auto use : inst->getUses()) {
|
||||
auto user = use->getUser();
|
||||
auto* userInst = dynamic_cast<Instruction*>(user);
|
||||
|
||||
if (!userInst) {
|
||||
// 被非指令使用,可能在循环外
|
||||
return true;
|
||||
}
|
||||
|
||||
if (!loop->contains(userInst->getParent())) {
|
||||
// 在循环外被使用
|
||||
if (DEBUG) {
|
||||
std::cout << " 指令 " << inst->getName() << " 在循环外被 "
|
||||
<< userInst->getName() << " 使用" << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false; // 没有循环外使用
|
||||
}
|
||||
|
||||
// 检查store指令是否在循环外有后续的load操作
|
||||
bool InductionVariableEliminationContext::hasSubsequentLoadOutsideLoop(StoreInst* store, Loop* loop) {
|
||||
if (!aliasAnalysis) {
|
||||
// 没有别名分析,保守地假设有后续读取
|
||||
return true;
|
||||
}
|
||||
|
||||
Value* storePtr = store->getPointer();
|
||||
|
||||
// 检查循环的退出块及其后继
|
||||
auto exitBlocks = loop->getExitBlocks();
|
||||
std::set<BasicBlock*> visitedBlocks;
|
||||
|
||||
for (auto* exitBB : exitBlocks) {
|
||||
if (hasLoadInSubtree(exitBB, storePtr, visitedBlocks)) {
|
||||
if (DEBUG) {
|
||||
std::cout << " 找到循环外的后续load操作" << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false; // 没有找到循环外的后续读取
|
||||
}
|
||||
|
||||
// 递归检查基本块子树中是否有对指定位置的load操作
|
||||
bool InductionVariableEliminationContext::hasLoadInSubtree(BasicBlock* bb, Value* ptr, std::set<BasicBlock*>& visited) {
|
||||
if (visited.count(bb) > 0) {
|
||||
return false; // 已经访问过,避免无限循环
|
||||
}
|
||||
visited.insert(bb);
|
||||
|
||||
// 检查当前基本块中的指令
|
||||
for (auto& inst : bb->getInstructions()) {
|
||||
if (inst->getKind() == Instruction::Kind::kLoad) {
|
||||
LoadInst* loadInst = static_cast<LoadInst*>(inst.get());
|
||||
if (aliasAnalysis && aliasAnalysis->queryAlias(ptr, loadInst->getPointer()) != AliasType::NO_ALIAS) {
|
||||
return true; // 找到了对相同或别名位置的load
|
||||
}
|
||||
} else if (inst->getKind() == Instruction::Kind::kCall) {
|
||||
// 函数调用可能间接读取内存
|
||||
CallInst* callInst = static_cast<CallInst*>(inst.get());
|
||||
if (sideEffectAnalysis && sideEffectAnalysis->hasSideEffect(callInst)) {
|
||||
return true; // 保守地认为函数调用可能读取内存
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 递归检查后继基本块(限制深度以避免过度搜索)
|
||||
static int searchDepth = 0;
|
||||
if (searchDepth < 10) { // 限制搜索深度
|
||||
searchDepth++;
|
||||
for (auto* succ : bb->getSuccessors()) {
|
||||
if (hasLoadInSubtree(succ, ptr, visited)) {
|
||||
searchDepth--;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
searchDepth--;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<Instruction*> InductionVariableEliminationContext::collectRelatedInstructions(
|
||||
PhiInst* phiInst, Loop* loop) {
|
||||
std::vector<Instruction*> relatedInsts;
|
||||
|
||||
// 收集所有与该归纳变量相关的指令
|
||||
for (auto use : phiInst->getUses()) {
|
||||
auto user = use->getUser();
|
||||
auto* userInst = dynamic_cast<Instruction*>(user);
|
||||
|
||||
if (userInst && loop->contains(userInst->getParent())) {
|
||||
relatedInsts.push_back(userInst);
|
||||
}
|
||||
}
|
||||
|
||||
return relatedInsts;
|
||||
}
|
||||
|
||||
void InductionVariableEliminationContext::analyzeSafetyForElimination() {
|
||||
if (DEBUG) {
|
||||
std::cout << " === Phase 2: Analyzing Safety for Elimination ===" << std::endl;
|
||||
}
|
||||
|
||||
// 为每个死归纳变量检查消除的安全性
|
||||
for (auto& deadIV : deadIVs) {
|
||||
bool isSafe = isSafeToEliminate(deadIV.get());
|
||||
deadIV->canEliminate = isSafe;
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " Dead IV " << deadIV->phiInst->getName()
|
||||
<< ": " << (isSafe ? "SAFE" : "UNSAFE") << " to eliminate" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
size_t safeCount = 0;
|
||||
for (const auto& deadIV : deadIVs) {
|
||||
if (deadIV->canEliminate) safeCount++;
|
||||
}
|
||||
std::cout << " === End Phase 2: " << safeCount << " of " << deadIVs.size()
|
||||
<< " variables are safe to eliminate ===" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
bool InductionVariableEliminationContext::isSafeToEliminate(const DeadInductionVariable* deadIV) {
|
||||
// 1. 确保归纳变量在循环头
|
||||
if (deadIV->phiInst->getParent() != deadIV->containingLoop->getHeader()) {
|
||||
if (DEBUG) {
|
||||
std::cout << " Unsafe: phi not in loop header" << std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// 2. 确保相关指令都在循环内
|
||||
for (auto* inst : deadIV->relatedInsts) {
|
||||
if (!deadIV->containingLoop->contains(inst->getParent())) {
|
||||
if (DEBUG) {
|
||||
std::cout << " Unsafe: related instruction outside loop" << std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// 3. 确保没有副作用
|
||||
for (auto* inst : deadIV->relatedInsts) {
|
||||
if (sideEffectAnalysis) {
|
||||
// 使用副作用分析进行精确检查
|
||||
if (sideEffectAnalysis->hasSideEffect(inst)) {
|
||||
if (DEBUG) {
|
||||
std::cout << " Unsafe: related instruction " << inst->getName()
|
||||
<< " has side effects" << std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
// 没有副作用分析时使用保守策略:只允许基本算术运算
|
||||
auto kind = inst->getKind();
|
||||
if (kind != Instruction::Kind::kAdd &&
|
||||
kind != Instruction::Kind::kSub &&
|
||||
kind != Instruction::Kind::kMul) {
|
||||
if (DEBUG) {
|
||||
std::cout << " Unsafe: related instruction may have side effects (conservative)" << std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 4. 确保不影响循环的退出条件
|
||||
for (BasicBlock* exitingBB : deadIV->containingLoop->getExitingBlocks()) {
|
||||
auto terminatorIt = exitingBB->terminator();
|
||||
if (terminatorIt != exitingBB->end()) {
|
||||
Instruction* terminator = terminatorIt->get();
|
||||
if (terminator) {
|
||||
for (size_t i = 0; i < terminator->getNumOperands(); ++i) {
|
||||
if (terminator->getOperand(i) == deadIV->phiInst) {
|
||||
if (DEBUG) {
|
||||
std::cout << " Unsafe: phi used in loop exit condition" << std::endl;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool InductionVariableEliminationContext::performInductionVariableElimination() {
|
||||
if (DEBUG) {
|
||||
std::cout << " === Phase 3: Performing Induction Variable Elimination ===" << std::endl;
|
||||
}
|
||||
|
||||
bool modified = false;
|
||||
|
||||
for (auto& deadIV : deadIVs) {
|
||||
if (!deadIV->canEliminate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " Eliminating dead IV: " << deadIV->phiInst->getName() << std::endl;
|
||||
}
|
||||
|
||||
if (eliminateDeadInductionVariable(deadIV.get())) {
|
||||
if (DEBUG) {
|
||||
std::cout << " Successfully eliminated: " << deadIV->phiInst->getName() << std::endl;
|
||||
}
|
||||
modified = true;
|
||||
} else {
|
||||
if (DEBUG) {
|
||||
std::cout << " Failed to eliminate: " << deadIV->phiInst->getName() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (DEBUG) {
|
||||
std::cout << " === End Phase 3: " << (modified ? "Eliminations performed" : "No eliminations") << " ===" << std::endl;
|
||||
}
|
||||
|
||||
return modified;
|
||||
}
|
||||
|
||||
bool InductionVariableEliminationContext::eliminateDeadInductionVariable(DeadInductionVariable* deadIV) {
|
||||
// 1. 删除所有相关指令
|
||||
for (auto* inst : deadIV->relatedInsts) {
|
||||
auto* bb = inst->getParent();
|
||||
auto it = bb->findInstIterator(inst);
|
||||
if (it != bb->end()) {
|
||||
SysYIROptUtils::usedelete(it);
|
||||
// bb->getInstructions().erase(it);
|
||||
if (DEBUG) {
|
||||
std::cout << " Removed related instruction: " << inst->getName() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 2. 删除 phi 指令
|
||||
auto* bb = deadIV->phiInst->getParent();
|
||||
auto it = bb->findInstIterator(deadIV->phiInst);
|
||||
if (it != bb->end()) {
|
||||
SysYIROptUtils::usedelete(it);
|
||||
// bb->getInstructions().erase(it);
|
||||
if (DEBUG) {
|
||||
std::cout << " Removed phi instruction: " << deadIV->phiInst->getName() << std::endl;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void InductionVariableEliminationContext::printDebugInfo() {
|
||||
if (!DEBUG) return;
|
||||
|
||||
std::cout << "\n=== Induction Variable Elimination Summary ===" << std::endl;
|
||||
std::cout << "Total dead IVs found: " << deadIVs.size() << std::endl;
|
||||
|
||||
size_t eliminatedCount = 0;
|
||||
for (auto& [loop, loopDeadIVs] : loopToDeadIVs) {
|
||||
size_t loopEliminatedCount = 0;
|
||||
for (auto* deadIV : loopDeadIVs) {
|
||||
if (deadIV->canEliminate) {
|
||||
loopEliminatedCount++;
|
||||
eliminatedCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (loopEliminatedCount > 0) {
|
||||
std::cout << "Loop " << loop->getName() << ": " << loopEliminatedCount
|
||||
<< " of " << loopDeadIVs.size() << " IVs eliminated" << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
std::cout << "Total eliminated: " << eliminatedCount << " of " << deadIVs.size() << std::endl;
|
||||
std::cout << "=============================================" << std::endl;
|
||||
}
|
||||
|
||||
} // namespace sysy
|
||||
1121
src/midend/Pass/Optimize/LoopStrengthReduction.cpp
Normal file
1121
src/midend/Pass/Optimize/LoopStrengthReduction.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -15,6 +15,8 @@
|
||||
#include "LargeArrayToGlobal.h"
|
||||
#include "LoopNormalization.h"
|
||||
#include "LICM.h"
|
||||
#include "LoopStrengthReduction.h"
|
||||
#include "InductionVariableElimination.h"
|
||||
#include "Pass.h"
|
||||
#include <iostream>
|
||||
#include <queue>
|
||||
@ -70,6 +72,8 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
|
||||
registerOptimizationPass<Mem2Reg>(builderIR);
|
||||
registerOptimizationPass<LoopNormalizationPass>(builderIR);
|
||||
registerOptimizationPass<LICM>(builderIR);
|
||||
registerOptimizationPass<LoopStrengthReduction>(builderIR);
|
||||
registerOptimizationPass<InductionVariableElimination>();
|
||||
registerOptimizationPass<Reg2Mem>(builderIR);
|
||||
|
||||
registerOptimizationPass<SCCP>(builderIR);
|
||||
@ -136,17 +140,19 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
|
||||
|
||||
this->clearPasses();
|
||||
this->addPass(&LoopNormalizationPass::ID);
|
||||
this->addPass(&InductionVariableElimination::ID);
|
||||
this->addPass(&LICM::ID);
|
||||
this->addPass(&LoopStrengthReduction::ID);
|
||||
this->run();
|
||||
|
||||
if(DEBUG) {
|
||||
std::cout << "=== IR After Loop Normalization and LICM Optimizations ===\n";
|
||||
std::cout << "=== IR After Loop Normalization, LICM, and Strength Reduction Optimizations ===\n";
|
||||
printPasses();
|
||||
}
|
||||
|
||||
this->clearPasses();
|
||||
this->addPass(&Reg2Mem::ID);
|
||||
this->run();
|
||||
// this->clearPasses();
|
||||
// this->addPass(&Reg2Mem::ID);
|
||||
// this->run();
|
||||
|
||||
if(DEBUG) {
|
||||
std::cout << "=== IR After Reg2Mem Optimizations ===\n";
|
||||
|
||||
@ -262,10 +262,12 @@ void SysYIRGenerator::compute() {
|
||||
}
|
||||
|
||||
// 弹出BinaryExpStack的表达式
|
||||
while(begin < end) {
|
||||
int count = end - begin;
|
||||
for (int i = 0; i < count; i++) {
|
||||
BinaryExpStack.pop_back();
|
||||
BinaryExpLenStack.back()--;
|
||||
end--;
|
||||
}
|
||||
if (!BinaryExpLenStack.empty()) {
|
||||
BinaryExpLenStack.back() -= count;
|
||||
}
|
||||
|
||||
// 计算后缀表达式
|
||||
|
||||
@ -240,7 +240,9 @@ void SysYPrinter::printInst(Instruction *pInst) {
|
||||
case Kind::kMul:
|
||||
case Kind::kDiv:
|
||||
case Kind::kRem:
|
||||
case Kind::kSRA:
|
||||
case Kind::kSrl:
|
||||
case Kind::kSll:
|
||||
case Kind::kSra:
|
||||
case Kind::kMulh:
|
||||
case Kind::kFAdd:
|
||||
case Kind::kFSub:
|
||||
@ -274,7 +276,9 @@ void SysYPrinter::printInst(Instruction *pInst) {
|
||||
case Kind::kMul: std::cout << "mul"; break;
|
||||
case Kind::kDiv: std::cout << "sdiv"; break;
|
||||
case Kind::kRem: std::cout << "srem"; break;
|
||||
case Kind::kSRA: std::cout << "ashr"; break;
|
||||
case Kind::kSrl: std::cout << "lshr"; break;
|
||||
case Kind::kSll: std::cout << "shl"; break;
|
||||
case Kind::kSra: std::cout << "ashr"; break;
|
||||
case Kind::kMulh: std::cout << "mulh"; break;
|
||||
case Kind::kFAdd: std::cout << "fadd"; break;
|
||||
case Kind::kFSub: std::cout << "fsub"; break;
|
||||
|
||||
@ -35,7 +35,7 @@ void usage(int code) {
|
||||
"Supported options:\n"
|
||||
" -h \tprint help message and exit\n"
|
||||
" -f \tpretty-format the input file\n"
|
||||
" -s {ast,ir,asm,llvmir,asmd,ird}\tstop after generating AST/IR/Assembly\n"
|
||||
" -s {ast,ir,asm,asmd,ird}\tstop after generating AST/IR/Assembly\n"
|
||||
" -S \tcompile to assembly (.s file)\n"
|
||||
" -o <file>\tplace the output into <file>\n"
|
||||
" -O<level>\tenable optimization at <level> (e.g., -O0, -O1)\n";
|
||||
|
||||
Reference in New Issue
Block a user