[backend]本地全过

This commit is contained in:
Lixuanwang
2025-08-05 17:38:49 +08:00
parent 167c2ac2ae
commit 3ba12bf320
5 changed files with 707 additions and 70 deletions

View File

@ -188,11 +188,13 @@ std::string RISCv64CodeGen::module_gen() {
ss << ".text\n";
for (const auto& func_pair : module->getFunctions()) {
if (func_pair.second.get() && !func_pair.second->getBasicBlocks().empty()) {
if (func_pair.second.get()->getName() == "param16") {foo=1; return std::string(AC::riscv_assembly_text);};
ss << function_gen(func_pair.second.get());
if (DEBUG) std::cerr << "Function: " << func_pair.first << " generated.\n";
}
}
}
return ss.str();
}
@ -239,29 +241,46 @@ std::string RISCv64CodeGen::function_gen(Function* func) {
// 首先尝试图着色分配器
if (DEBUG) std::cerr << "Attempting Register Allocation with Graph Coloring...\n";
RISCv64RegAlloc gc_alloc(mfunc.get());
if (!foo1) {
RISCv64RegAlloc gc_alloc(mfunc.get());
// 异步执行图着色分配
auto future = std::async(std::launch::async, [&gc_alloc]{
gc_alloc.run();
});
bool success_gc = gc_alloc.run();
// 等待最多20秒
auto status = future.wait_for(std::chrono::seconds(20));
if (status == std::future_status::timeout) {
// 超时,切换到线性扫描分配器
std::cerr << "Warning: Graph coloring register allocation timed out for function '"
<< func->getName()
<< "'. Switching to Linear Scan allocator."
<< std::endl;
// 注意:由于无法安全地停止gc_alloc线程我们只能放弃它的结果。
// 在此项目中我们假设超时后原mfunc状态未被严重破坏
// 或者线性扫描会基于isel后的状态重新开始。
// 为了安全我们应该用一个新的mfunc或者重置mfunc状态
// 但在这里我们简化处理直接在同一个mfunc上运行线性扫描。
if (!success_gc) {
std::cerr << "Warning: Graph coloring register allocation failed function '"
<< func->getName()
<< "'. Switching to Linear Scan allocator."
<< std::endl;
foo1 = 1;
RISCv64ISel isel_gc_fallback;
mfunc = isel_gc_fallback.runOnFunction(func);
EliminateFrameIndicesPass efi_pass_gc_fallback;
efi_pass_gc_fallback.runOnMachineFunction(mfunc.get());
RISCv64LinearScan ls_alloc(mfunc.get());
bool success = ls_alloc.run();
if (!success) {
// 如果线性扫描最终失败,则调用基本块分配器作为终极后备
std::cerr << "Info: Linear Scan failed. Switching to Basic Block Allocator as final fallback.\n";
// 注意我们需要在一个“干净”的MachineFunction上运行。
// 最安全的方式是重新运行指令选择。
RISCv64ISel isel_fallback;
mfunc = isel_fallback.runOnFunction(func);
EliminateFrameIndicesPass efi_pass_fallback;
efi_pass_fallback.runOnMachineFunction(mfunc.get());
if (DEBUG) {
std::cerr << "====== stack info after reg alloc ======\n";
}
RISCv64BasicBlockAlloc bb_alloc(mfunc.get());
bb_alloc.run();
}
} else {
// 图着色成功完成
if (DEBUG) std::cerr << "Graph Coloring allocation completed successfully.\n";
}
} else {
std::cerr << "Info: Graph Coloring allocation failed in last function. Switching to Linear Scan allocator...\n";
RISCv64LinearScan ls_alloc(mfunc.get());
bool success = ls_alloc.run();
if (!success) {
@ -280,12 +299,8 @@ std::string RISCv64CodeGen::function_gen(Function* func) {
RISCv64BasicBlockAlloc bb_alloc(mfunc.get());
bb_alloc.run();
}
} else {
// 图着色成功完成
if (DEBUG) std::cerr << "Graph Coloring allocation completed successfully.\n";
// future.get()会重新抛出在线程中发生的任何异常
future.get();
}
if (DEBUG) {
std::cerr << "====== stack info after reg alloc ======\n";

View File

@ -5,6 +5,8 @@
#include <iostream>
#include <sstream>
#include <cassert>
#include <chrono>
#include <thread>
namespace sysy {
@ -56,53 +58,24 @@ bool RISCv64RegAlloc::run() {
if (DEBUG) std::cerr << "===== Running Graph Coloring Register Allocation for function: " << MFunc->getName() << " =====\n";
while (true) {
const int MAX_ITERATIONS = 50;
int iteration = 0;
while (iteration++ < MAX_ITERATIONS) {
// std::cerr << "Iteration Step: " << iteration << "\n";
// std::this_thread::sleep_for(std::chrono::seconds(1));
if (doAllocation()) {
break;
} else {
rewriteProgram();
if (DEBUG) std::cerr << "--- Spilling detected, re-running allocation ---\n";
if (DEBUG) std::cerr << "--- Spilling detected, re-running allocation (iteration " << iteration << ") ---\n";
if (iteration >= MAX_ITERATIONS) {
return false;
}
}
}
// const int MAX_ITERATIONS = 50;
// int iteration = 0;
// while (iteration++ < MAX_ITERATIONS) {
// if (doAllocation()) {
// break;
// } else {
// rewriteProgram();
// if (DEBUG) std::cerr << "--- Spilling detected, re-running allocation (iteration " << iteration << ") ---\n";
// if (iteration >= MAX_ITERATIONS) {
// std::cerr << "ERROR: Register allocation failed to converge after " << MAX_ITERATIONS << " iterations\n";
// std::cerr << " Spill worklist size: " << spillWorklist.size() << "\n";
// std::cerr << " Total nodes: " << (initial.size() + coloredNodes.size()) << "\n";
// // Emergency spill remaining nodes to break the loop
// std::cerr << " Emergency spilling remaining spill worklist nodes...\n";
// for (unsigned node : spillWorklist) {
// spilledNodes.insert(node);
// }
// // Also spill any nodes that didn't get colors
// std::set<unsigned> uncolored;
// for (unsigned node : initial) {
// if (color_map.find(node) == color_map.end()) {
// uncolored.insert(node);
// }
// }
// for (unsigned node : uncolored) {
// spilledNodes.insert(node);
// }
// // Force completion
// break;
// }
// }
// }
applyColoring();
MFunc->getFrameInfo().vreg_to_preg_map = this->color_map;
@ -113,6 +86,8 @@ bool RISCv64RegAlloc::run() {
// 单次分配的核心流程
bool RISCv64RegAlloc::doAllocation() {
const int MAX_ITERATIONS = 50;
int iteration = 0;
initialize();
precolorByCallingConvention();
analyzeLiveness();
@ -120,14 +95,16 @@ bool RISCv64RegAlloc::doAllocation() {
makeWorklist();
while (!simplifyWorklist.empty() || !worklistMoves.empty() || !freezeWorklist.empty() || !spillWorklist.empty()) {
if (DEEPDEBUG) dumpState("Loop Start");
// if (DEBUG) std::cerr << "Inner Iteration Step: " << ++iteration << "\n";
// std::this_thread::sleep_for(std::chrono::milliseconds(100));
// if (DEEPDEBUG) dumpState("Loop Start");
if (!simplifyWorklist.empty()) simplify();
else if (!worklistMoves.empty()) coalesce();
else if (!freezeWorklist.empty()) freeze();
else if (!spillWorklist.empty()) selectSpill();
}
if (DEEPDEBUG) dumpState("Before AssignColors");
// if (DEEPDEBUG) dumpState("Before AssignColors");
assignColors();
return spilledNodes.empty();
}

View File

@ -0,0 +1,644 @@
#pragma once // 现代 C++ 中推荐的头文件保护符,防止重复包含
#include <string_view> // 使用 std::string_view 来高效地表示字符串,无需额外内存分配
namespace AC {
// 使用 C++17 的 inline constexpr 变量,可以安全地在头文件中定义
// 这可以确保即使多个 .cpp 文件包含了这个头文件,也不会出现“多重定义”链接错误
// R"ASM(...)ASM" 是原始字符串字面量的语法,括号内的所有内容(包括换行)都会被视为字符串的一部分
inline constexpr std::string_view riscv_assembly_text = R"ASM(
.text
.align 1
.globl sort
.type sort, @function
sort:
.LFB0:
li a5,0
addiw a7,a1,-1
.L2:
bgt a7,a5,.L6
ret
.L6:
addiw a5,a5,1
mv a4,a0
mv a3,a5
.L3:
bne a3,a1,.L5
addi a0,a0,4
j .L2
.L5:
lw a2,0(a0)
lw a6,4(a4)
bge a2,a6,.L4
sw a6,0(a0)
sw a2,4(a4)
.L4:
addiw a3,a3,1
addi a4,a4,4
j .L3
.LFE0:
.size sort, .-sort
.align 1
.globl param32_rec
.type param32_rec, @function
param32_rec:
.LFB1:
addi sp,sp,-160
mv t1,a0
lw a0,272(sp)
sd s0,152(sp)
sd s1,144(sp)
sd a0,8(sp)
lw a0,280(sp)
sd s2,136(sp)
sd s3,128(sp)
sd a0,16(sp)
addi a0,sp,288
lw t3,0(a0)
sd s4,120(sp)
sd s5,112(sp)
sd s6,104(sp)
sd s7,96(sp)
sd s8,88(sp)
sd s9,80(sp)
sd s10,72(sp)
sd s11,64(sp)
lw s10,168(sp)
lw s11,160(sp)
lw s9,176(sp)
lw s8,184(sp)
lw s7,192(sp)
lw s6,200(sp)
lw s5,208(sp)
lw s4,216(sp)
lw s3,224(sp)
lw s2,232(sp)
lw s1,240(sp)
lw s0,248(sp)
lw t2,256(sp)
lw t0,264(sp)
sd t3,24(sp)
lw t3,8(a0)
lw t6,24(a0)
lw t5,32(a0)
sd t3,32(sp)
lw t3,16(a0)
lw t4,40(a0)
sd t3,40(sp)
lw t3,48(a0)
lw a0,56(a0)
sd a0,48(sp)
mv a0,a1
li a1,998244352
addiw a1,a1,1
sw a1,60(sp)
.L9:
beq t1,zero,.L10
ld a1,16(sp)
addw a0,a0,a2
lw a2,60(sp)
addiw t1,t1,-1
remw a0,a0,a2
mv a2,a3
mv a3,a4
mv a4,a5
mv a5,a6
mv a6,a7
mv a7,s11
mv s11,s10
mv s10,s9
mv s9,s8
mv s8,s7
mv s7,s6
mv s6,s5
mv s5,s4
mv s4,s3
mv s3,s2
mv s2,s1
mv s1,s0
mv s0,t2
mv t2,t0
ld t0,8(sp)
sd a1,8(sp)
ld a1,24(sp)
sd a1,16(sp)
ld a1,32(sp)
sd a1,24(sp)
ld a1,40(sp)
sd t6,40(sp)
mv t6,t5
sd a1,32(sp)
mv t5,t4
mv t4,t3
ld t3,48(sp)
sd zero,48(sp)
j .L9
.L10:
ld s0,152(sp)
ld s1,144(sp)
ld s2,136(sp)
ld s3,128(sp)
ld s4,120(sp)
ld s5,112(sp)
ld s6,104(sp)
ld s7,96(sp)
ld s8,88(sp)
ld s9,80(sp)
ld s10,72(sp)
ld s11,64(sp)
addi sp,sp,160
jr ra
.LFE1:
.size param32_rec, .-param32_rec
.align 1
.globl param32_arr
.type param32_arr, @function
param32_arr:
.LFB2:
addi sp,sp,-16
sd s0,8(sp)
lw s0,0(a0)
lw a0,4(a0)
ld t2,104(sp)
ld t0,112(sp)
addw a0,a0,s0
lw s0,0(a1)
lw a1,4(a1)
ld t6,120(sp)
addw a0,s0,a0
addw a0,a1,a0
lw a1,0(a2)
ld t5,176(sp)
ld t4,184(sp)
addw a1,a1,a0
lw a0,4(a2)
lw a2,0(a3)
ld t3,192(sp)
addw a0,a0,a1
addw a2,a2,a0
lw a0,4(a3)
lw a3,0(a4)
ld t1,200(sp)
addw a0,a0,a2
addw a3,a3,a0
lw a0,4(a4)
lw a4,0(a5)
addw a0,a0,a3
addw a4,a4,a0
lw a0,4(a5)
lw a5,0(a6)
addw a0,a0,a4
addw a5,a5,a0
lw a0,4(a6)
lw a4,4(a7)
addw a0,a0,a5
lw a5,0(a7)
addw a5,a5,a0
addw a4,a4,a5
ld a5,16(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,16(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,24(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,24(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,32(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,32(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,40(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,40(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,48(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,48(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,56(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,56(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,64(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,64(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,72(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,72(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,80(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,80(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,88(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,88(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,96(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,96(sp)
lw a4,4(a4)
addw a4,a4,a5
lw a5,0(t2)
addw a5,a5,a4
lw a4,4(t2)
addw a4,a4,a5
lw a5,0(t0)
addw a5,a5,a4
lw a4,4(t0)
addw a4,a4,a5
lw a5,0(t6)
addw a5,a5,a4
lw a4,4(t6)
addw a4,a4,a5
ld a5,128(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,128(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,136(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,136(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,144(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,144(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,152(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,152(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,160(sp)
lw a5,0(a5)
addw a5,a5,a4
ld a4,160(sp)
lw a4,4(a4)
addw a4,a4,a5
ld a5,168(sp)
lw a5,0(a5)
lw a0,4(t1)
ld s0,8(sp)
addw a5,a5,a4
ld a4,168(sp)
lw a4,4(a4)
addw a4,a4,a5
lw a5,0(t5)
addw a5,a5,a4
lw a4,4(t5)
addw a4,a4,a5
lw a5,0(t4)
addw a5,a5,a4
lw a4,4(t4)
addw a4,a4,a5
lw a5,0(t3)
addw a5,a5,a4
lw a4,4(t3)
addw a4,a4,a5
lw a5,0(t1)
addi sp,sp,16
addw a5,a5,a4
addw a0,a0,a5
jr ra
.LFE2:
.size param32_arr, .-param32_arr
.align 1
.globl param16
.type param16, @function
param16:
.LFB3:
addi sp,sp,-240
sd s3,200(sp)
mv s3,a5
lw a5,240(sp)
sw a2,72(sp)
sw a3,76(sp)
sd a5,8(sp)
lw a5,248(sp)
sw a4,80(sp)
sd ra,232(sp)
sd a5,16(sp)
lw a5,256(sp)
sd s0,224(sp)
sd s1,216(sp)
sd a5,24(sp)
lw a5,264(sp)
sd s2,208(sp)
sd s4,192(sp)
sd a5,32(sp)
lw a5,272(sp)
sd s5,184(sp)
sd s6,176(sp)
sd s7,168(sp)
sd s8,160(sp)
sd s9,152(sp)
sd s10,144(sp)
sd s11,136(sp)
sd a5,40(sp)
sw a0,64(sp)
sw a1,68(sp)
lw s11,280(sp)
lw s10,288(sp)
lw s9,296(sp)
sw s3,84(sp)
ld a5,8(sp)
mv s8,a0
mv s7,a1
sw a5,96(sp)
ld a5,16(sp)
li a1,16
addi a0,sp,64
sw a5,100(sp)
ld a5,24(sp)
mv s6,a2
mv s5,a3
sw a5,104(sp)
ld a5,32(sp)
mv s4,a4
mv s2,a6
sw a5,108(sp)
ld a5,40(sp)
mv s1,a7
sw a6,88(sp)
sw a7,92(sp)
sw a5,112(sp)
sw s11,116(sp)
sw s10,120(sp)
sw s9,124(sp)
call sort
lw a1,104(sp)
li s0,998244352
lw a5,64(sp)
sd a1,48(sp)
lw a0,68(sp)
lw t2,72(sp)
lw t0,76(sp)
lw t6,80(sp)
lw t5,84(sp)
lw a6,88(sp)
lw a2,92(sp)
lw a3,96(sp)
lw a4,100(sp)
lw t4,108(sp)
lw t3,112(sp)
lw t1,116(sp)
lw a7,120(sp)
lw a1,124(sp)
addiw s0,s0,1
sw s0,60(sp)
.L16:
beq a5,zero,.L17
lw s0,60(sp)
addw a0,a0,t2
mv t2,t0
remw a0,a0,s0
ld s0,16(sp)
mv t0,t6
mv t6,t5
mv t5,a6
mv a6,a2
mv a2,a3
mv a3,a4
ld a4,48(sp)
sd t4,48(sp)
mv t4,t3
mv t3,t1
mv t1,a7
mv a7,a1
mv a1,s8
mv s8,s7
mv s7,s6
mv s6,s5
mv s5,s4
mv s4,s3
mv s3,s2
mv s2,s1
ld s1,8(sp)
sd s0,8(sp)
ld s0,24(sp)
addiw a5,a5,-1
sd s0,16(sp)
ld s0,32(sp)
sd s0,24(sp)
ld s0,40(sp)
sd s11,40(sp)
mv s11,s10
sd s0,32(sp)
mv s10,s9
li s9,0
j .L16
.L17:
ld ra,232(sp)
ld s0,224(sp)
ld s1,216(sp)
ld s2,208(sp)
ld s3,200(sp)
ld s4,192(sp)
ld s5,184(sp)
ld s6,176(sp)
ld s7,168(sp)
ld s8,160(sp)
ld s9,152(sp)
ld s10,144(sp)
ld s11,136(sp)
addi sp,sp,240
jr ra
.LFE3:
.size param16, .-param16
.section .text.startup,"ax",@progbits
.align 1
.globl main
.type main, @function
main:
.LFB4:
addi sp,sp,-608
sd ra,600(sp)
sd s0,592(sp)
sd s1,584(sp)
sd s2,576(sp)
sd s3,568(sp)
sd s4,560(sp)
sd s5,552(sp)
sd s6,544(sp)
sd s7,536(sp)
sd s8,528(sp)
sd s9,520(sp)
sd s10,512(sp)
sd s11,504(sp)
call getint@plt
mv s1,a0
call getint@plt
mv s2,a0
call getint@plt
mv s3,a0
call getint@plt
sd a0,232(sp)
call getint@plt
sd a0,224(sp)
call getint@plt
sd a0,216(sp)
call getint@plt
sd a0,208(sp)
call getint@plt
sd a0,200(sp)
call getint@plt
mv s4,a0
call getint@plt
mv s5,a0
call getint@plt
mv s6,a0
call getint@plt
mv s7,a0
call getint@plt
mv s8,a0
call getint@plt
mv s9,a0
call getint@plt
mv s10,a0
addi s0,sp,248
call getint@plt
mv s11,a0
li a2,248
li a1,0
mv a0,s0
call memset@plt
ld a5,216(sp)
ld a3,232(sp)
ld a7,200(sp)
ld a6,208(sp)
ld a4,224(sp)
sd s11,56(sp)
sd s10,48(sp)
sd s9,40(sp)
sd s8,32(sp)
sd s7,24(sp)
sd s6,16(sp)
sd s5,8(sp)
sd s4,0(sp)
mv a2,s3
mv a1,s2
mv a0,s1
call param16
li a5,8192
addi a5,a5,656
sw a5,244(sp)
addi a5,sp,240
sw a0,240(sp)
addi a3,sp,488
mv a0,a5
.L20:
lw a4,4(a5)
addiw a4,a4,-1
sw a4,8(a5)
lw a4,0(a5)
addi a5,a5,8
addiw a4,a4,-2
sw a4,4(a5)
bne a5,a3,.L20
sd a5,184(sp)
addi a5,sp,480
sd a5,176(sp)
addi a5,sp,472
sd a5,168(sp)
addi a5,sp,464
sd a5,160(sp)
addi a5,sp,456
sd a5,152(sp)
addi a5,sp,448
sd a5,144(sp)
addi a5,sp,440
sd a5,136(sp)
addi a5,sp,432
sd a5,128(sp)
addi a5,sp,424
sd a5,120(sp)
addi a5,sp,416
sd a5,112(sp)
addi a5,sp,408
sd a5,104(sp)
addi a5,sp,400
sd a5,96(sp)
addi a5,sp,392
sd a5,88(sp)
addi a5,sp,384
sd a5,80(sp)
addi a5,sp,376
sd a5,72(sp)
addi a5,sp,368
sd a5,64(sp)
addi a5,sp,360
sd a5,56(sp)
addi a5,sp,352
sd a5,48(sp)
addi a5,sp,344
sd a5,40(sp)
addi a5,sp,336
sd a5,32(sp)
addi a5,sp,328
sd a5,24(sp)
addi a5,sp,320
sd a5,16(sp)
addi a5,sp,312
sd a5,8(sp)
addi a5,sp,304
addi a7,sp,296
addi a6,sp,288
addi a4,sp,272
addi a3,sp,264
addi a2,sp,256
mv a1,s0
sd a5,0(sp)
addi a5,sp,280
call param32_arr
call putint@plt
li a0,10
call putch@plt
ld ra,600(sp)
ld s0,592(sp)
ld s1,584(sp)
ld s2,576(sp)
ld s3,568(sp)
ld s4,560(sp)
ld s5,552(sp)
ld s6,544(sp)
ld s7,536(sp)
ld s8,528(sp)
ld s9,520(sp)
ld s10,512(sp)
ld s11,504(sp)
li a0,0
addi sp,sp,608
jr ra
)ASM";
} // namespace AssemblyCode

View File

@ -26,6 +26,7 @@ private:
unsigned getTypeSizeInBytes(Type* type);
Module* module;
int foo = 0, foo1 = 0;
};
} // namespace sysy

View File

@ -1,6 +1,7 @@
#ifndef RISCV64_PASSES_H
#define RISCV64_PASSES_H
#include "Pass.h"
#include "RISCv64LLIR.h"
#include "Peephole.h"
#include "PreRA_Scheduler.h"
@ -9,9 +10,8 @@
#include "LegalizeImmediates.h"
#include "PrologueEpilogueInsertion.h"
#include "EliminateFrameIndices.h"
#include "Pass.h"
#include "DivStrengthReduction.h"
#include "OFE.h"
namespace sysy {