Merge branch 'backend-rec' into backend

2025-07-28 23:40:58 +08:00
parent fcc3806342 792dc9c1f6
commit b0cecca081
6 changed files with 296 additions and 184 deletions
--- a/src/RISCv64AsmPrinter.cpp
+++ b/src/RISCv64AsmPrinter.cpp
@ -35,15 +35,26 @@ void RISCv64AsmPrinter::run(std::ostream& os, bool debug) {

 void RISCv64AsmPrinter::printPrologue() {
    StackFrameInfo& frame_info = MFunc->getFrameInfo();
-    // 序言需要为保存ra和s0预留16字节
-    int total_stack_size = frame_info.locals_size + frame_info.spill_size + 16;
+    // 计算总栈帧大小。
+    // 包含三部分：局部变量区、寄存器溢出区、以及为被调用者保存(callee-saved)寄存器预留的区域。
+    // 最后再加上为保存 ra 和 s0 固定的16字节。
+    int total_stack_size = frame_info.locals_size + 
+                           frame_info.spill_size + 
+                           frame_info.callee_saved_size + 
+                           16; 
+    
+    // 保持栈指针16字节对齐
    int aligned_stack_size = (total_stack_size + 15) & ~15;
-    frame_info.total_size = aligned_stack_size;
+    frame_info.total_size = aligned_stack_size; // 更新最终的栈大小

+    // 只有在需要分配栈空间时才生成指令
    if (aligned_stack_size > 0) {
+        // 1. 一次性分配整个栈帧
        *OS << "    addi sp, sp, -" << aligned_stack_size << "\n";
+        // 2. 在新的栈顶附近保存 ra 和 s0
        *OS << "    sd ra, " << (aligned_stack_size - 8) << "(sp)\n";
        *OS << "    sd s0, " << (aligned_stack_size - 16) << "(sp)\n";
+        // 3. 设置新的帧指针 s0，使其指向栈帧的底部（高地址）
        *OS << "    addi s0, sp, " << aligned_stack_size << "\n";
    }
 }
--- a/src/RISCv64Passes.cpp
+++ b/src/RISCv64Passes.cpp
@ -52,27 +52,25 @@ void PostRA_Scheduler::runOnMachineFunction(MachineFunction* mfunc) {
 }

 void CalleeSavedHandler::runOnMachineFunction(MachineFunction* mfunc) {
+    // 【最终方案】: 此 Pass 负责分析、分配栈空间并插入 callee-saved 寄存器的保存/恢复指令。
+    // 它通过与 FrameInfo 协作，确保为 callee-saved 寄存器分配的空间与局部变量/溢出槽的空间不冲突。
+    // 这样做可以使生成的 sd/ld 指令能被后续的优化 Pass (如 PostRA-Scheduler) 处理。
+
    StackFrameInfo& frame_info = mfunc->getFrameInfo();
    std::set<PhysicalReg> used_callee_saved;

-    // 1. 扫描所有指令，找出被使用的s寄存器
+    // 1. 扫描所有指令，找出被使用的s寄存器 (s1-s11)
    for (auto& mbb : mfunc->getBlocks()) {
        for (auto& instr : mbb->getInstructions()) {
            for (auto& op : instr->getOperands()) {
-                
-                // 辅助Lambda，用于检查和插入寄存器
                auto check_and_insert_reg = [&](RegOperand* reg_op) {
                    if (!reg_op->isVirtual()) {
                        PhysicalReg preg = reg_op->getPReg();
-                        // --- 关键检查点 ---
-                        // 必须严格判断是否在 s0-s11 的范围内。
-                        // a0, t0 等寄存器绝对不应被视为被调用者保存寄存器。
-                        if (preg >= PhysicalReg::S0 && preg <= PhysicalReg::S11) {
+                        if (preg >= PhysicalReg::S1 && preg <= PhysicalReg::S11) {
                            used_callee_saved.insert(preg);
                        }
                    }
                };
-
                if (op->getKind() == MachineOperand::KIND_REG) {
                    check_and_insert_reg(static_cast<RegOperand*>(op.get()));
                } else if (op->getKind() == MachineOperand::KIND_MEM) {
@ -81,66 +79,82 @@ void CalleeSavedHandler::runOnMachineFunction(MachineFunction* mfunc) {
            }
        }
    }
-    
-    // 如果没有使用s寄存器（除了可能作为帧指针的s0），则无需操作
-    if (used_callee_saved.empty() || (used_callee_saved.size() == 1 && used_callee_saved.count(PhysicalReg::S0))) {
-        return;
+
+    if (used_callee_saved.empty()) {
+        frame_info.callee_saved_size = 0; // 确保大小被初始化
+        return; // 无需操作
    }

-    // 将结果存入StackFrameInfo，供后续使用
-    frame_info.used_callee_saved_regs = used_callee_saved;
+    // 2. 计算为 callee-saved 寄存器分配的栈空间
+    //    这里的关键是，偏移的基准点要在局部变量和溢出槽之下。
+    int callee_saved_size = used_callee_saved.size() * 8;
+    frame_info.callee_saved_size = callee_saved_size; // 将大小存入 FrameInfo
+
+    // 3. 计算无冲突的栈偏移
+    //    栈向下增长，所以偏移是负数。
+    //    ra/s0 占用 -8 和 -16。局部变量和溢出区在它们之下。callee-saved 区在更下方。
+    //    我们使用相对于 s0 的偏移。s0 将指向栈顶 (sp + total_size)。
+    int base_offset = -16 - frame_info.locals_size - frame_info.spill_size;

-    // 2. 在函数序言插入保存指令
-    MachineBasicBlock* entry_block = mfunc->getBlocks().front().get();
-    auto& entry_instrs = entry_block->getInstructions();
-    auto prologue_end = entry_instrs.begin();
-    
-    // 找到序言结束的位置（通常是addi s0, sp, size之后）
-    for (auto it = entry_instrs.begin(); it != entry_instrs.end(); ++it) {
-        if ((*it)->getOpcode() == RVOpcodes::ADDI && 
-            (*it)->getOperands()[0]->getKind() == MachineOperand::KIND_REG &&
-            static_cast<RegOperand*>((*it)->getOperands()[0].get())->getPReg() == PhysicalReg::S0) 
-        {
-            prologue_end = std::next(it);
-            break;
-        }
-    }
-    
    // 为了栈帧布局确定性，对寄存器进行排序
    std::vector<PhysicalReg> sorted_regs(used_callee_saved.begin(), used_callee_saved.end());
    std::sort(sorted_regs.begin(), sorted_regs.end());
    
-    int current_offset = -16; // ra和s0已经占用了-8和-16的位置
+    // 4. 在函数序言插入保存指令
+    MachineBasicBlock* entry_block = mfunc->getBlocks().front().get();
+    auto& entry_instrs = entry_block->getInstructions();
+    auto prologue_end = entry_instrs.begin();
+    
+    // 找到序言结束的位置（通常是addi s0, sp, size之后，但为了让优化器看到，我们插在更前面）
+    // 合理的位置是在 IR 指令开始之前，即在任何非序言指令（如第一个标签）之前。
+    // 为简单起见，我们直接插入到块的开头，后续重排 pass 会处理。
+    // (更优的实现会寻找一个特定的插入点)
+
+    int current_offset = base_offset;
    for (PhysicalReg reg : sorted_regs) {
-        if (reg == PhysicalReg::S0) continue; // s0已经在序言中处理
-        current_offset -= 8;
        auto sd = std::make_unique<MachineInstr>(RVOpcodes::SD);
        sd->addOperand(std::make_unique<RegOperand>(reg));
        sd->addOperand(std::make_unique<MemOperand>(
-            std::make_unique<RegOperand>(PhysicalReg::S0), // 假设s0是帧指针
+            std::make_unique<RegOperand>(PhysicalReg::S0), // 基址为帧指针 s0
            std::make_unique<ImmOperand>(current_offset)
        ));
-        entry_instrs.insert(prologue_end, std::move(sd));
+        // 从头部插入，但要放在函数标签之后
+        entry_instrs.insert(entry_instrs.begin() + 1, std::move(sd)); 
+        current_offset -= 8;
    }

-    // 3. 在函数结尾（ret之前）插入恢复指令
+    // 5. 【已修复】在函数结尾（ret之前）插入恢复指令，使用反向遍历来避免迭代器失效
    for (auto& mbb : mfunc->getBlocks()) {
+        // 使用手动控制的反向循环
        for (auto it = mbb->getInstructions().begin(); it != mbb->getInstructions().end(); ++it) {
            if ((*it)->getOpcode() == RVOpcodes::RET) {
-                // 以相反的顺序恢复
-                current_offset = -16;
+                // 1. 创建一个临时vector来存储所有需要插入的恢复指令
+                std::vector<std::unique_ptr<MachineInstr>> restore_instrs;
+                
+                int current_offset_load = base_offset;
+                // 以相同的顺序（例如 s1, s2, ...）创建恢复指令
                for (PhysicalReg reg : sorted_regs) {
-                    if (reg == PhysicalReg::S0) continue;
-                    current_offset -= 8;
                    auto ld = std::make_unique<MachineInstr>(RVOpcodes::LD);
                    ld->addOperand(std::make_unique<RegOperand>(reg));
                    ld->addOperand(std::make_unique<MemOperand>(
                        std::make_unique<RegOperand>(PhysicalReg::S0),
-                        std::make_unique<ImmOperand>(current_offset)
+                        std::make_unique<ImmOperand>(current_offset_load)
                    ));
-                    mbb->getInstructions().insert(it, std::move(ld));
+                    restore_instrs.push_back(std::move(ld));
+                    current_offset_load -= 8;
                }
-                break; // 处理完一个基本块的ret即可
+
+                // 2. 使用 make_move_iterator 一次性将所有恢复指令插入到 RET 指令之前
+                //    这可以高效地转移指令的所有权，并且只让迭代器失效一次。
+                if (!restore_instrs.empty()) {
+                    mbb->getInstructions().insert(it, 
+                        std::make_move_iterator(restore_instrs.begin()),
+                        std::make_move_iterator(restore_instrs.end())
+                    );
+                }
+
+                // 找到了RET并处理完毕后，就可以跳出内层循环，继续寻找下一个基本块
+                break; 
            }
        }
    }
--- a/src/RISCv64RegAlloc.cpp
+++ b/src/RISCv64RegAlloc.cpp
@ -1,5 +1,6 @@
 #include "RISCv64RegAlloc.h"
 #include "RISCv64ISel.h"
+#include "RISCv64AsmPrinter.h" // For DEBUG output
 #include <algorithm>
 #include <vector>
 #include <iostream> // For DEBUG output
@ -30,7 +31,21 @@ void RISCv64RegAlloc::run() {
    // 阶段 1: 处理函数调用约定（参数寄存器预着色）
    handleCallingConvention();    
    // 阶段 2: 消除帧索引（为局部变量和栈参数分配栈偏移）
-    eliminateFrameIndices();      
+    eliminateFrameIndices();
+    { // 使用大括号创建一个局部作用域，避免printer变量泄露
+        if (DEBUG) {
+            std::cerr   << "\n===== LLIR after eliminateFrameIndices for function: " 
+                        << MFunc->getName() << " =====\n";
+            // 1. 创建一个 AsmPrinter 实例，传入当前的 MachineFunction
+            RISCv64AsmPrinter printer(MFunc);
+            // 2. 调用 run 方法，将结果打印到标准错误流 (std::cerr)
+            // 3. 必须将 debug 参数设为 true！
+            //    因为此时指令中仍然包含虚拟寄存器 (%vreg)，
+            //    debug模式下的 AsmPrinter 才能正确处理它们而不会报错。
+            printer.run(std::cerr, true);
+            std::cerr << "===== End of LLIR =====\n\n";
+        }
+    }
    // 阶段 3: 活跃性分析
    analyzeLiveness();            
    // 阶段 4: 构建干扰图（包含CALL指令对调用者保存寄存器的影响）
@ -258,41 +273,13 @@ void RISCv64RegAlloc::getInstrUseDef(MachineInstr* instr, LiveSet& use, LiveSet&
    // JAL 和 JALR 指令定义 ra (x1)
    if (opcode == RVOpcodes::JAL || opcode == RVOpcodes::JALR) {
        // 使用 ra 对应的特殊虚拟寄存器ID
-        def.insert(static_cast<unsigned>(PhysicalReg::RA)); 
+        def.insert(preg_to_vreg_id_map.at(PhysicalReg::RA)); 
        first_reg_is_def = false; // JAL/JALR 的第一个操作数是 ra，已经处理为 def
    }
    
    // 2. CALL 指令的特殊处理
    if (opcode == RVOpcodes::CALL) {
-        // // [协议] 我们约定，ISel生成的CALL指令会遵循以下格式：
-        // // call %vreg_ret, @func_name, %vreg_arg1, %vreg_arg2, ...
-        // // 其中，第一个操作数（如果存在且是vreg）是返回值(def)，函数名是标签，其余是参数(use)。
-        // bool has_return_val = false;
-        // // 1.1 处理返回值 (def)
-        // if (!instr->getOperands().empty() && instr->getOperands().front()->getKind() == MachineOperand::KIND_REG) {
-        //     auto reg_op = static_cast<RegOperand*>(instr->getOperands().front().get());
-        //     if (reg_op->isVirtual()) {
-        //         def.insert(reg_op->getVRegNum());
-        //         has_return_val = true;
-        //     }
-        // }
-
-        // // 1.2 处理参数 (use)
-        // // 遍历所有操作数，跳过返回值(第一个)和函数名标签
-        // for (size_t i = 1; i < instr->getOperands().size(); ++i) {
-        //     auto& op = instr->getOperands()[i];
-        //     if (op->getKind() == MachineOperand::KIND_REG) {
-        //         auto reg_op = static_cast<RegOperand*>(op.get());
-        //         if (reg_op->isVirtual()) {
-        //             // 如果第一个操作数是返回值，则跳过它（因为它不是use）
-        //             if (i == 0 && has_return_val) {
-        //                 continue;
-        //             }
-        //             use.insert(reg_op->getVRegNum());
-        //         }
-        //     }
-        // }
-        // [新增] 根据我们在ISel中定义的新协议，解析操作数列表
+        // 根据 s1 分支 ISel 定义的协议来解析操作数列表
        bool first_reg_operand_is_def = true;
        for (auto& op : instr->getOperands()) {
            if (op->getKind() == MachineOperand::KIND_REG) {
@ -306,54 +293,55 @@ void RISCv64RegAlloc::getInstrUseDef(MachineInstr* instr, LiveSet& use, LiveSet&
                        // 后续所有寄存器操作数都是参数 (use)
                        use.insert(reg_op->getVRegNum());
                    }
+                } else { // [修复] CALL指令也可能定义物理寄存器（如a0）
+                    if (first_reg_operand_is_def) {
+                         if (preg_to_vreg_id_map.count(reg_op->getPReg())) {
+                            def.insert(preg_to_vreg_id_map.at(reg_op->getPReg()));
+                        }
+                        first_reg_operand_is_def = false;
+                    } else {
+                         if (preg_to_vreg_id_map.count(reg_op->getPReg())) {
+                            use.insert(preg_to_vreg_id_map.at(reg_op->getPReg()));
+                        }
+                    }
                }
            }
        }
-
-        // [新增] CALL指令隐式地使用了通过物理寄存器(a0-a7)传递的参数
-        // 并且隐式地定义了a0(返回值)和所有调用者保存的寄存器。
-        // a0-a7的use/def关系已经被显式操作数和预着色处理。
-        // 调用者保存寄存器的冲突在 buildInterferenceGraph 中处理。
-        // 所以这里只需要解析我们协议中定义的显式操作数即可。
-
-        // **重要**: CALL指令隐式定义（杀死）了所有调用者保存的寄存器。
-        // **这部分逻辑不在getInstrUseDef中直接处理**。
-        // 而是通过`buildInterferenceGraph`中添加物理寄存器节点与活跃虚拟寄存器之间的干扰边来完成。
-        // 这样 Liveness Analysis 可以在虚拟寄存器层面进行，而物理寄存器干扰的复杂性则留给干扰图。
-
-        return; // CALL 指令处理完毕，直接返回
+        return; // CALL 指令处理完毕
    }
    
-    // 3. 对其他所有指令的通用处理逻辑
+    // 3. 对其他所有指令的通用处理逻辑 [已重构和修复]
    for (const auto& op : instr->getOperands()) {
        if (op->getKind() == MachineOperand::KIND_REG) {
            auto reg_op = static_cast<RegOperand*>(op.get());
            
-            if (reg_op->isVirtual()) { // 如果是虚拟寄存器
-                if (first_reg_is_def) { 
+            if (first_reg_is_def) {
+                // --- 处理定义（Def） ---
+                if (reg_op->isVirtual()) {
                    def.insert(reg_op->getVRegNum());
-                    first_reg_is_def = false; 
-                } else {
-                    use.insert(reg_op->getVRegNum());
+                } else { // 物理寄存器也可以是 Def
+                    if (preg_to_vreg_id_map.count(reg_op->getPReg())) {
+                        def.insert(preg_to_vreg_id_map.at(reg_op->getPReg()));
+                    }
                }
-            } else { // 如果是物理寄存器
-                if (!first_reg_is_def) {
-                    PhysicalReg preg = reg_op->getPReg();
-                    // [核心修复] 在访问map前，先检查key是否存在
-                    // 我们只关心那些参与图着色的物理寄存器节点的活跃性
-                    if (preg_to_vreg_id_map.count(preg)) {
-                        // 将物理寄存器对应的特殊ID加入Use集合
-                        use.insert(preg_to_vreg_id_map.at(preg));
+                first_reg_is_def = false; // **关键**：处理完第一个寄存器后，立即更新标志
+            } else {
+                // --- 处理使用（Use） ---
+                if (reg_op->isVirtual()) {
+                    use.insert(reg_op->getVRegNum());
+                } else { // 物理寄存器也可以是 Use
+                    if (preg_to_vreg_id_map.count(reg_op->getPReg())) {
+                        use.insert(preg_to_vreg_id_map.at(reg_op->getPReg()));
                    }
                }
            }
        } else if (op->getKind() == MachineOperand::KIND_MEM) {
+            // [保持不变] 内存操作数的处理逻辑看起来是正确的
            auto mem_op = static_cast<MemOperand*>(op.get());
            auto base_reg = mem_op->getBase();
            if (base_reg->isVirtual()) {
                use.insert(base_reg->getVRegNum());
            } else {
-                // [核心修复] 同样地，检查物理基址寄存器是否存在于map中
                PhysicalReg preg = base_reg->getPReg();
                if (preg_to_vreg_id_map.count(preg)) {
                    use.insert(preg_to_vreg_id_map.at(preg));
@ -362,14 +350,15 @@ void RISCv64RegAlloc::getInstrUseDef(MachineInstr* instr, LiveSet& use, LiveSet&
            
            // 对于存储内存指令 (SW, SD)，要存储的值（第一个操作数）也是 `use`
            if ((opcode == RVOpcodes::SW || opcode == RVOpcodes::SD) && 
-                 !instr->getOperands().empty() && // 确保有操作数
-                 instr->getOperands().front()->getKind() == MachineOperand::KIND_REG) { // 且第一个操作数是寄存器
+                 !instr->getOperands().empty() &&
+                 instr->getOperands().front()->getKind() == MachineOperand::KIND_REG) {
                auto src_reg_op = static_cast<RegOperand*>(instr->getOperands().front().get());
                if (src_reg_op->isVirtual()) {
                    use.insert(src_reg_op->getVRegNum());
                } else {
-                    // 同样可以处理基址是物理寄存器的情况
-                    use.insert(preg_to_vreg_id_map.at(mem_op->getBase()->getPReg()));
+                    if (preg_to_vreg_id_map.count(src_reg_op->getPReg())) {
+                       use.insert(preg_to_vreg_id_map.at(src_reg_op->getPReg()));
+                    }
                }
            }
        }
@ -414,71 +403,104 @@ unsigned RISCv64RegAlloc::getTypeSizeInBytes(Type* type) {
 }

 void RISCv64RegAlloc::analyzeLiveness() {
-    bool changed = true;
-    int iteration = 0; // [新] 添加一个迭代计数器
-
-    // [新] 辅助函数，用于将LiveSet打印为字符串
-    auto liveset_to_string = [](const LiveSet& s) {
-        std::string out = "{";
-        for (unsigned vreg : s) {
-            out += "%vreg" + std::to_string(vreg) + " ";
-        }
-        if (!s.empty()) out.pop_back();
-        out += "}";
-        return out;
-    };
-
-    while (changed) {
-        changed = false;
-        iteration++; // [新] 迭代计数
-        if (DEEPDEBUG) {
-            std::cout << "\n===== Liveness Analysis Iteration " << iteration << " =====\n";
-        }
-
-        for (auto it = MFunc->getBlocks().rbegin(); it != MFunc->getBlocks().rend(); ++it) {
-            auto& mbb = *it;
-            LiveSet live_out;
-            for (auto succ : mbb->successors) {
-                if (!succ->getInstructions().empty()) {
-                    auto first_instr = succ->getInstructions().front().get();
-                    if (live_in_map.count(first_instr)) {
-                        live_out.insert(live_in_map.at(first_instr).begin(), live_in_map.at(first_instr).end());
-                    }
+    // === 阶段 1: 预计算每个基本块的 use 和 def 集合 ===
+    // 这样可以避免在主循环中重复计算
+    std::map<MachineBasicBlock*, LiveSet> block_uses;
+    std::map<MachineBasicBlock*, LiveSet> block_defs;
+    for (auto& mbb_ptr : MFunc->getBlocks()) {
+        MachineBasicBlock* mbb = mbb_ptr.get();
+        LiveSet uses, defs;
+        for (auto& instr_ptr : mbb->getInstructions()) {
+            LiveSet instr_use, instr_def;
+            getInstrUseDef(instr_ptr.get(), instr_use, instr_def);
+            // use[B] = use[B] U (instr_use - def[B])
+            for (unsigned u : instr_use) {
+                if (defs.find(u) == defs.end()) {
+                    uses.insert(u);
                }
            }
+            // def[B] = def[B] U instr_def
+            defs.insert(instr_def.begin(), instr_def.end());
+        }
+        block_uses[mbb] = uses;
+        block_defs[mbb] = defs;
+    }

-            for (auto instr_it = mbb->getInstructions().rbegin(); instr_it != mbb->getInstructions().rend(); ++instr_it) {
-                MachineInstr* instr = instr_it->get();
-                LiveSet old_live_in = live_in_map[instr];
-                live_out_map[instr] = live_out;
-                
-                LiveSet use, def;
-                getInstrUseDef(instr, use, def);
+    // === 阶段 2: 在“块”粒度上进行迭代数据流分析，直到收敛 ===
+    std::map<MachineBasicBlock*, LiveSet> block_live_in;
+    std::map<MachineBasicBlock*, LiveSet> block_live_out;
+    bool changed = true;
+    while (changed) {
+        changed = false;
+        // 以逆后序遍历基本块，可以加速收敛，但简单的逆序对于大多数情况也有效
+        for (auto it = MFunc->getBlocks().rbegin(); it != MFunc->getBlocks().rend(); ++it) {
+            auto& mbb = *it;
+            
+            // 2.1 计算 live_out[B] = U_{S in succ(B)} live_in[S]
+            LiveSet new_live_out;
+            for (auto succ : mbb->successors) {
+                new_live_out.insert(block_live_in[succ].begin(), block_live_in[succ].end());
+            }

-                LiveSet live_in = use;
-                LiveSet diff = live_out;
-                for (auto vreg : def) {
-                    diff.erase(vreg);
-                }
-                live_in.insert(diff.begin(), diff.end());
-                live_in_map[instr] = live_in;
-                
-                if (DEEPDEBUG && mbb->getName() == "if_exit.L1") {
-                    std::cout << "  Instr (" << (void*)instr << "): \n"
-                              << "    Use:      " << liveset_to_string(use) << "\n"
-                              << "    Def:      " << liveset_to_string(def) << "\n"
-                              << "    Live Out: " << liveset_to_string(live_out_map[instr]) << "\n"
-                              << "    Live In:  " << liveset_to_string(live_in) << std::endl;
-                }
+            // 2.2 计算 live_in[B] = use[B] U (live_out[B] - def[B])
+            LiveSet live_out_minus_def = new_live_out;
+            for (unsigned d : block_defs[mbb.get()]) {
+                live_out_minus_def.erase(d);
+            }
+            LiveSet new_live_in = block_uses[mbb.get()];
+            new_live_in.insert(live_out_minus_def.begin(), live_out_minus_def.end());

-                live_out = live_in;
-
-                if (live_in_map[instr] != old_live_in) {
-                    changed = true;
-                }
+            // 2.3 检查 live_in 和 live_out 是否变化，以判断是否达到不动点
+            if (block_live_out[mbb.get()] != new_live_out) {
+                changed = true;
+                block_live_out[mbb.get()] = new_live_out;
+            }
+            if (block_live_in[mbb.get()] != new_live_in) {
+                changed = true;
+                block_live_in[mbb.get()] = new_live_in;
            }
        }
    }
+
+    // === 阶段 3: 进行一次指令粒度的遍历，填充最终的 live_in_map 和 live_out_map ===
+    // 此时块级别的活跃信息已经稳定，我们只需遍历一次即可
+    for (auto& mbb_ptr : MFunc->getBlocks()) {
+        MachineBasicBlock* mbb = mbb_ptr.get();
+        LiveSet live_out = block_live_out[mbb]; // 从已收敛的块级 live_out 开始
+
+        for (auto instr_it = mbb->getInstructions().rbegin(); instr_it != mbb->getInstructions().rend(); ++instr_it) {
+            MachineInstr* instr = instr_it->get();
+            live_out_map[instr] = live_out;
+
+            LiveSet use, def;
+            getInstrUseDef(instr, use, def);
+
+            LiveSet live_in = use;
+            LiveSet diff = live_out;
+            for (auto vreg : def) {
+                diff.erase(vreg);
+            }
+            live_in.insert(diff.begin(), diff.end());
+            live_in_map[instr] = live_in;
+            
+            // 更新 live_out，为块内的上一条指令做准备
+            live_out = live_in;
+        }
+    }
+}
+
+// 辅助函数，用于清晰地打印寄存器集合。可以放在 .cpp 文件的顶部。
+void RISCv64RegAlloc::printLiveSet(const LiveSet& s, const std::string& name, std::ostream& os) {
+    os << "    " << name << ": { ";
+    for (unsigned vreg : s) {
+        // 为了可读性，将物理寄存器对应的特殊ID进行转换
+        if (vreg >= static_cast<unsigned>(sysy::PhysicalReg::PHYS_REG_START_ID)) {
+            os << "preg(" << (vreg - static_cast<unsigned>(sysy::PhysicalReg::PHYS_REG_START_ID)) << ") ";
+        } else {
+            os << "%vreg" << vreg << " ";
+        }
+    }
+    os << "}\n";
 }

 void RISCv64RegAlloc::buildInterferenceGraph() {
@ -497,34 +519,74 @@ void RISCv64RegAlloc::buildInterferenceGraph() {
        all_vregs.insert(preg_to_vreg_id_map.at(preg));
    }

-
+    // 初始化干扰图邻接表
    for (auto vreg : all_vregs) { interference_graph[vreg] = {}; }

+    // 创建一个临时的AsmPrinter用于打印指令，方便调试
+    RISCv64AsmPrinter temp_printer(MFunc);
+    temp_printer.setStream(std::cerr);
+
    for (auto& mbb : MFunc->getBlocks()) {
-        for (auto& instr : mbb->getInstructions()) {
+        if (DEEPDEBUG) std::cerr << "--- Building Graph for Basic Block: " << mbb->getName() << " ---\n";
+        for (auto& instr_ptr : mbb->getInstructions()) {
+            MachineInstr* instr = instr_ptr.get();
+            if (DEEPDEBUG) {
+                // 打印当前正在处理的指令
+                std::cerr << "  Instr: ";
+                temp_printer.printInstruction(instr, true); // 使用 true 来打印虚拟寄存器
+            }
+            
            LiveSet def, use;
-            getInstrUseDef(instr.get(), use, def);
-            const LiveSet& live_out = live_out_map.at(instr.get());
+            getInstrUseDef(instr, use, def);
+            const LiveSet& live_out = live_out_map.at(instr);
+
+            // [新增调试逻辑] 打印所有相关的寄存器集合
+            if (DEEPDEBUG) {
+                printLiveSet(use, "Use     ", std::cerr);
+                printLiveSet(def, "Def     ", std::cerr);
+                printLiveSet(live_out, "Live_Out", std::cerr); // 这是我们最关心的信息
+            }

            // 标准干扰图构建：def 与 live_out 中的其他变量干扰
            for (unsigned d : def) {
                for (unsigned l : live_out) {
                    if (d != l) {
+                        // [新增调试逻辑] 打印添加的干扰边及其原因
+                        if (DEEPDEBUG && interference_graph[d].find(l) == interference_graph[d].end()) {
+                           std::cerr << "    Edge (Def-LiveOut): %vreg" << d << " <-> %vreg" << l << "\n";
+                        }
                        interference_graph[d].insert(l);
                        interference_graph[l].insert(d);
                    }
                }
            }

-            // *** 核心修改点：处理 CALL 指令的隐式 def ***
+            // 在非move指令中，def 与 use 互相干扰
+            if (instr->getOpcode() != RVOpcodes::MV) {
+                for (unsigned d : def) {
+                    for (unsigned u : use) {
+                        if (d != u) {
+                            // [新增调试逻辑] 打印添加的干扰边及其原因
+                            if (DEEPDEBUG && interference_graph[d].find(u) == interference_graph[d].end()) {
+                                std::cerr << "    Edge (Def-Use)    : %vreg" << d << " <-> %vreg" << u << "\n";
+                            }
+                            interference_graph[d].insert(u);
+                            interference_graph[u].insert(d);
+                        }
+                    }
+                }
+            }
+
+            // *** 处理 CALL 指令的隐式 def ***
            if (instr->getOpcode() == RVOpcodes::CALL) {
-                if (DEBUG) {
+                // 你的原始CALL调试信息
+                if (DEEPDEBUG) {
                    std::string live_out_str;
                    for (unsigned vreg : live_out) {
                        live_out_str += "%vreg" + std::to_string(vreg) + " ";
                    }
-                    std::cout << "[DEBUG] buildInterferenceGraph: CALL instruction found. Live out set is: {" 
-                            << live_out_str << "}" << std::endl;
+                    std::cerr << "[DEEPDEBUG] buildInterferenceGraph: CALL instruction found. Live out set is: {" 
+                              << live_out_str << "}" << std::endl;
                }
                // CALL 指令会定义（杀死）所有调用者保存的寄存器。
                // 因此，所有调用者保存的物理寄存器都与 CALL 指令的 live_out 中的所有变量冲突。
@ -535,12 +597,17 @@ void RISCv64RegAlloc::buildInterferenceGraph() {
                    // 将这个物理寄存器节点与 CALL 指令的 live_out 中的所有虚拟寄存器添加干扰边。
                    for (unsigned live_vreg_out : live_out) {
                        if (cs_vreg_id != live_vreg_out) { // 避免自己和自己干扰
+                            // [新增调试逻辑] 打印添加的干扰边及其原因
+                            if (DEEPDEBUG && interference_graph[cs_vreg_id].find(live_vreg_out) == interference_graph[cs_vreg_id].end()) {
+                                std::cerr << "    Edge (CALL)       : preg(" << static_cast<int>(cs_reg) << ") <-> %vreg" << live_vreg_out << "\n";
+                            }
                            interference_graph[cs_vreg_id].insert(live_vreg_out);
                            interference_graph[live_vreg_out].insert(cs_vreg_id);
                        }
                    }
                }
            }
+            if (DEEPDEBUG) std::cerr << "  ----------------\n";
        }
    }
 }
@ -548,7 +615,8 @@ void RISCv64RegAlloc::buildInterferenceGraph() {
 void RISCv64RegAlloc::colorGraph() {
    std::vector<unsigned> sorted_vregs;
    for (auto const& [vreg, neighbors] : interference_graph) {
-        if (color_map.find(vreg) == color_map.end()) {
+        // 只为未预着色的虚拟寄存器排序和着色
+        if (color_map.find(vreg) == color_map.end() && vreg < static_cast<unsigned>(PhysicalReg::PHYS_REG_START_ID)) {
            sorted_vregs.push_back(vreg);
        }
    }
@ -561,9 +629,18 @@ void RISCv64RegAlloc::colorGraph() {
    // 着色
    for (unsigned vreg : sorted_vregs) {
        std::set<PhysicalReg> used_colors;
-        for (unsigned neighbor : interference_graph.at(vreg)) {
-            if (color_map.count(neighbor)) {
-                used_colors.insert(color_map.at(neighbor));
+        for (unsigned neighbor_id : interference_graph.at(vreg)) {
+            // --- 关键改进 (来自 rec 分支) ---
+            
+            // 情况 1: 邻居是一个已经被着色的虚拟寄存器
+            if (color_map.count(neighbor_id)) {
+                used_colors.insert(color_map.at(neighbor_id));
+            } 
+            // 情况 2: 邻居本身就是一个代表物理寄存器的节点
+            else if (neighbor_id >= static_cast<unsigned>(PhysicalReg::PHYS_REG_START_ID)) {
+                // 从特殊ID反向解析出是哪个物理寄存器
+                PhysicalReg neighbor_preg = static_cast<PhysicalReg>(neighbor_id - static_cast<unsigned>(PhysicalReg::PHYS_REG_START_ID));
+                used_colors.insert(neighbor_preg);
            }
        }
        
--- a/src/include/RISCv64AsmPrinter.h
+++ b/src/include/RISCv64AsmPrinter.h
@ -12,22 +12,25 @@ namespace sysy {
 class RISCv64AsmPrinter {
 public:
    RISCv64AsmPrinter(MachineFunction* mfunc);
+    
    // 主入口
    void run(std::ostream& os, bool debug = false);
-
+    void printInstruction(MachineInstr* instr, bool debug = false);
+    // 辅助函数
+    void setStream(std::ostream& os) { OS = &os; }
 private:
    // 打印各个部分
    void printPrologue();
    void printEpilogue();
    void printBasicBlock(MachineBasicBlock* mbb, bool debug = false);
-    void printInstruction(MachineInstr* instr, bool debug = false);
-    
+
    // 辅助函数
    std::string regToString(PhysicalReg reg);
    void printOperand(MachineOperand* op);

    MachineFunction* MFunc;
-    std::ostream* OS;
+    std::ostream* OS = nullptr;
+
 };

 } // namespace sysy
--- a/src/include/RISCv64LLIR.h
+++ b/src/include/RISCv64LLIR.h
@ -39,8 +39,8 @@ enum class PhysicalReg {

    // 用于内部表示物理寄存器在干扰图中的节点ID（一个简单的特殊ID，确保不与vreg_counter冲突）
    // 假设 vreg_counter 不会达到这么大的值
-    PHYS_REG_START_ID = 10000, 
-    PHYS_REG_END_ID = PHYS_REG_START_ID + 32, // 预留足够的空间
+    PHYS_REG_START_ID = 100000, 
+    PHYS_REG_END_ID = PHYS_REG_START_ID + 320, // 预留足够的空间
 };

 // RISC-V 指令操作码枚举
@ -195,6 +195,7 @@ struct StackFrameInfo {
    int locals_size = 0; // 仅为AllocaInst分配的大小
    int spill_size = 0; // 仅为溢出分配的大小
    int total_size = 0; // 总大小
+    int callee_saved_size = 0; // 保存寄存器的大小
    std::map<unsigned, int> alloca_offsets; // <AllocaInst的vreg, 栈偏移>
    std::map<unsigned, int> spill_offsets;  // <溢出vreg, 栈偏移>
    std::set<PhysicalReg> used_callee_saved_regs; // 使用的保存寄存器
--- a/src/include/RISCv64RegAlloc.h
+++ b/src/include/RISCv64RegAlloc.h
@ -4,6 +4,9 @@
 #include "RISCv64LLIR.h"
 #include "RISCv64ISel.h" // 包含 RISCv64ISel.h 以访问 ISel 和 Value 类型

+extern int DEBUG;
+extern int DEEPDEBUG;
+
 namespace sysy {

 class RISCv64RegAlloc {
@ -61,6 +64,9 @@ private:
    
    // 用于计算类型大小的辅助函数
    unsigned getTypeSizeInBytes(Type* type);
+
+    // 辅助函数，用于打印集合
+    static void printLiveSet(const LiveSet& s, const std::string& name, std::ostream& os);
    
 };