From 2e8b564d8fdebf705014b6c989d693dc93156049 Mon Sep 17 00:00:00 2001
From: Lixuanwang <xlwmail@nudt.edu.cn>
Date: Sun, 27 Jul 2025 01:07:08 +0800
Subject: [PATCH] =?UTF-8?q?[backend]=E4=BF=AE=E5=A4=8D=E4=BA=86=E9=80=92?=
 =?UTF-8?q?=E5=BD=92=E5=87=BD=E6=95=B0=E7=9A=84=E8=B0=83=E7=94=A8=E9=97=AE?=
 =?UTF-8?q?=E9=A2=98=EF=BC=8C=E5=BC=95=E5=85=A5=E4=BA=86=E6=96=B0=E7=9A=84?=
 =?UTF-8?q?bug=EF=BC=9F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/RISCv64Passes.cpp     | 90 ++++++++++++++++++++-------------------
 src/RISCv64RegAlloc.cpp   | 26 +++++++++--
 src/include/RISCv64LLIR.h |  1 +
 3 files changed, 70 insertions(+), 47 deletions(-)
diff --git a/src/RISCv64Passes.cpp b/src/RISCv64Passes.cpp
index efa333f..79716de 100644
--- a/src/RISCv64Passes.cpp
+++ b/src/RISCv64Passes.cpp
@@ -52,27 +52,25 @@ void PostRA_Scheduler::runOnMachineFunction(MachineFunction* mfunc) {
 }
 
 void CalleeSavedHandler::runOnMachineFunction(MachineFunction* mfunc) {
+    // 【最终方案】: 此 Pass 负责分析、分配栈空间并插入 callee-saved 寄存器的保存/恢复指令。
+    // 它通过与 FrameInfo 协作，确保为 callee-saved 寄存器分配的空间与局部变量/溢出槽的空间不冲突。
+    // 这样做可以使生成的 sd/ld 指令能被后续的优化 Pass (如 PostRA-Scheduler) 处理。
+
     StackFrameInfo& frame_info = mfunc->getFrameInfo();
     std::set<PhysicalReg> used_callee_saved;
 
-    // 1. 扫描所有指令，找出被使用的s寄存器
+    // 1. 扫描所有指令，找出被使用的s寄存器 (s1-s11)
     for (auto& mbb : mfunc->getBlocks()) {
         for (auto& instr : mbb->getInstructions()) {
             for (auto& op : instr->getOperands()) {
-                
-                // 辅助Lambda，用于检查和插入寄存器
                 auto check_and_insert_reg = [&](RegOperand* reg_op) {
                     if (!reg_op->isVirtual()) {
                         PhysicalReg preg = reg_op->getPReg();
-                        // --- 关键检查点 ---
-                        // 必须严格判断是否在 s0-s11 的范围内。
-                        // a0, t0 等寄存器绝对不应被视为被调用者保存寄存器。
-                        if (preg >= PhysicalReg::S0 && preg <= PhysicalReg::S11) {
+                        if (preg >= PhysicalReg::S1 && preg <= PhysicalReg::S11) {
                             used_callee_saved.insert(preg);
                         }
                     }
                 };
-
                 if (op->getKind() == MachineOperand::KIND_REG) {
                     check_and_insert_reg(static_cast<RegOperand*>(op.get()));
                 } else if (op->getKind() == MachineOperand::KIND_MEM) {
@@ -81,66 +79,72 @@ void CalleeSavedHandler::runOnMachineFunction(MachineFunction* mfunc) {
             }
         }
     }
-    
-    // 如果没有使用s寄存器（除了可能作为帧指针的s0），则无需操作
-    if (used_callee_saved.empty() || (used_callee_saved.size() == 1 && used_callee_saved.count(PhysicalReg::S0))) {
-        return;
+
+    if (used_callee_saved.empty()) {
+        frame_info.callee_saved_size = 0; // 确保大小被初始化
+        return; // 无需操作
     }
 
-    // 将结果存入StackFrameInfo，供后续使用
-    frame_info.used_callee_saved_regs = used_callee_saved;
+    // 2. 计算为 callee-saved 寄存器分配的栈空间
+    //    这里的关键是，偏移的基准点要在局部变量和溢出槽之下。
+    int callee_saved_size = used_callee_saved.size() * 8;
+    frame_info.callee_saved_size = callee_saved_size; // 将大小存入 FrameInfo
+
+    // 3. 计算无冲突的栈偏移
+    //    栈向下增长，所以偏移是负数。
+    //    ra/s0 占用 -8 和 -16。局部变量和溢出区在它们之下。callee-saved 区在更下方。
+    //    我们使用相对于 s0 的偏移。s0 将指向栈顶 (sp + total_size)。
+    int base_offset = -16 - frame_info.locals_size - frame_info.spill_size;
 
-    // 2. 在函数序言插入保存指令
-    MachineBasicBlock* entry_block = mfunc->getBlocks().front().get();
-    auto& entry_instrs = entry_block->getInstructions();
-    auto prologue_end = entry_instrs.begin();
-    
-    // 找到序言结束的位置（通常是addi s0, sp, size之后）
-    for (auto it = entry_instrs.begin(); it != entry_instrs.end(); ++it) {
-        if ((*it)->getOpcode() == RVOpcodes::ADDI && 
-            (*it)->getOperands()[0]->getKind() == MachineOperand::KIND_REG &&
-            static_cast<RegOperand*>((*it)->getOperands()[0].get())->getPReg() == PhysicalReg::S0) 
-        {
-            prologue_end = std::next(it);
-            break;
-        }
-    }
-    
     // 为了栈帧布局确定性，对寄存器进行排序
     std::vector<PhysicalReg> sorted_regs(used_callee_saved.begin(), used_callee_saved.end());
     std::sort(sorted_regs.begin(), sorted_regs.end());
     
-    int current_offset = -16; // ra和s0已经占用了-8和-16的位置
+    // 4. 在函数序言插入保存指令
+    MachineBasicBlock* entry_block = mfunc->getBlocks().front().get();
+    auto& entry_instrs = entry_block->getInstructions();
+    auto prologue_end = entry_instrs.begin();
+    
+    // 找到序言结束的位置（通常是addi s0, sp, size之后，但为了让优化器看到，我们插在更前面）
+    // 合理的位置是在 IR 指令开始之前，即在任何非序言指令（如第一个标签）之前。
+    // 为简单起见，我们直接插入到块的开头，后续重排 pass 会处理。
+    // (更优的实现会寻找一个特定的插入点)
+
+    int current_offset = base_offset;
     for (PhysicalReg reg : sorted_regs) {
-        if (reg == PhysicalReg::S0) continue; // s0已经在序言中处理
-        current_offset -= 8;
         auto sd = std::make_unique<MachineInstr>(RVOpcodes::SD);
         sd->addOperand(std::make_unique<RegOperand>(reg));
         sd->addOperand(std::make_unique<MemOperand>(
-            std::make_unique<RegOperand>(PhysicalReg::S0), // 假设s0是帧指针
+            std::make_unique<RegOperand>(PhysicalReg::S0), // 基址为帧指针 s0
             std::make_unique<ImmOperand>(current_offset)
         ));
-        entry_instrs.insert(prologue_end, std::move(sd));
+        // 从头部插入，但要放在函数标签之后
+        entry_instrs.insert(entry_instrs.begin() + 1, std::move(sd)); 
+        current_offset -= 8;
     }
 
-    // 3. 在函数结尾（ret之前）插入恢复指令
+    // 5. 【已修复】在函数结尾（ret之前）插入恢复指令，使用反向遍历来避免迭代器失效
     for (auto& mbb : mfunc->getBlocks()) {
-        for (auto it = mbb->getInstructions().begin(); it != mbb->getInstructions().end(); ++it) {
+        // 使用手动控制的反向循环
+        for (auto it = mbb->getInstructions().end(); it != mbb->getInstructions().begin(); ) {
+            // 在循环开始时就递减迭代器
+            --it;
+
             if ((*it)->getOpcode() == RVOpcodes::RET) {
-                // 以相反的顺序恢复
-                current_offset = -16;
+                int current_offset_load = base_offset;
+                // 以相同的顺序恢复（从 s1 开始）
                 for (PhysicalReg reg : sorted_regs) {
-                    if (reg == PhysicalReg::S0) continue;
-                    current_offset -= 8;
                     auto ld = std::make_unique<MachineInstr>(RVOpcodes::LD);
                     ld->addOperand(std::make_unique<RegOperand>(reg));
                     ld->addOperand(std::make_unique<MemOperand>(
                         std::make_unique<RegOperand>(PhysicalReg::S0),
-                        std::make_unique<ImmOperand>(current_offset)
+                        std::make_unique<ImmOperand>(current_offset_load)
                     ));
+                    // 在 'it' (即 RET 指令) 之前插入。
+                    // 因为我们是反向遍历，所以这不会影响下一次循环的 'it'。
                     mbb->getInstructions().insert(it, std::move(ld));
+                    current_offset_load -= 8;
                 }
-                break; // 处理完一个基本块的ret即可
             }
         }
     }
diff --git a/src/RISCv64RegAlloc.cpp b/src/RISCv64RegAlloc.cpp
index 279fea9..c2ccc27 100644
--- a/src/RISCv64RegAlloc.cpp
+++ b/src/RISCv64RegAlloc.cpp
@@ -13,7 +13,8 @@ RISCv64RegAlloc::RISCv64RegAlloc(MachineFunction* mfunc) : MFunc(mfunc) {
         PhysicalReg::T4, PhysicalReg::T5, PhysicalReg::T6,
         PhysicalReg::A0, PhysicalReg::A1, PhysicalReg::A2, PhysicalReg::A3,
         PhysicalReg::A4, PhysicalReg::A5, PhysicalReg::A6, PhysicalReg::A7,
-        PhysicalReg::S0, PhysicalReg::S1, PhysicalReg::S2, PhysicalReg::S3,
+        // PhysicalReg::S0,
+        PhysicalReg::S1, PhysicalReg::S2, PhysicalReg::S3,
         PhysicalReg::S4, PhysicalReg::S5, PhysicalReg::S6, PhysicalReg::S7,
         PhysicalReg::S8, PhysicalReg::S9, PhysicalReg::S10, PhysicalReg::S11,
     };
@@ -457,10 +458,27 @@ void RISCv64RegAlloc::colorGraph() {
     // 着色
     for (unsigned vreg : sorted_vregs) {
         std::set<PhysicalReg> used_colors;
-        for (unsigned neighbor : interference_graph.at(vreg)) {
-            if (color_map.count(neighbor)) {
-                used_colors.insert(color_map.at(neighbor));
+        for (unsigned neighbor_id : interference_graph.at(vreg)) {
+            // --- 修改开始 ---
+            
+            // 情况 1: 邻居是一个已经被着色的虚拟寄存器
+            if (color_map.count(neighbor_id)) {
+                used_colors.insert(color_map.at(neighbor_id));
+            } 
+            // 情况 2: 邻居本身就是一个代表物理寄存器的节点
+            else if (neighbor_id >= static_cast<unsigned>(PhysicalReg::PHYS_REG_START_ID)) {
+                // 需要一个反向映射来从特殊ID找回PhysicalReg
+                // 假设你有这样一个映射 inv_preg_to_vreg_id_map
+                // 或者，你可以重新计算
+                for (auto const& [preg, id] : preg_to_vreg_id_map) {
+                    if (id == neighbor_id) {
+                        used_colors.insert(preg);
+                        break;
+                    }
+                }
             }
+            
+            // --- 修改结束 ---
         }
         
         bool colored = false;
diff --git a/src/include/RISCv64LLIR.h b/src/include/RISCv64LLIR.h
index 3f68566..5999182 100644
--- a/src/include/RISCv64LLIR.h
+++ b/src/include/RISCv64LLIR.h
@@ -195,6 +195,7 @@ struct StackFrameInfo {
     int locals_size = 0; // 仅为AllocaInst分配的大小
     int spill_size = 0; // 仅为溢出分配的大小
     int total_size = 0; // 总大小
+    int callee_saved_size = 0; // 保存寄存器的大小
     std::map<unsigned, int> alloca_offsets; // <AllocaInst的vreg, 栈偏移>
     std::map<unsigned, int> spill_offsets;  // <溢出vreg, 栈偏移>
     std::set<PhysicalReg> used_callee_saved_regs; // 使用的保存寄存器