[backend-LAG]添加新的LargeArrayToGlobal中端Pass，以及栈保护逻辑

[backend]修复了寄存器分配器在处理全物理寄存器操作数时的bug
[backend]更新脚本，现在会拷贝.sy文件到tmp目录
2025-08-04 01:01:29 +08:00 · 2025-08-03 18:37:08 +08:00 · 2025-08-03 17:26:09 +08:00 · 2025-08-03 17:12:39 +08:00 · 2025-08-03 16:40:48 +08:00 · 2025-08-03 16:14:31 +08:00
30 changed files with 1299 additions and 507 deletions
--- a/script/runit-riscv64.sh
+++ b/script/runit-riscv64.sh
@ -60,11 +60,7 @@ display_file_content() {
 # 清理临时文件的函数
 clean_tmp() {
    echo "正在清理临时目录: ${TMP_DIR}"
-    rm -rf "${TMP_DIR}"/*.s \
-           "${TMP_DIR}"/*_sysyc_riscv64 \
-           "${TMP_DIR}"/*_sysyc_riscv64.actual_out \
-           "${TMP_DIR}"/*_sysyc_riscv64.expected_stdout \
-           "${TMP_DIR}"/*_sysyc_riscv64.o
+    rm -rf "${TMP_DIR}"/*
    echo "清理完成。"
 }

--- a/script/runit-single.sh
+++ b/script/runit-single.sh
@ -21,6 +21,7 @@ QEMU_RISCV64="qemu-riscv64"
 # --- 初始化变量 ---
 EXECUTE_MODE=false
 CLEAN_MODE=false
+OPTIMIZE_FLAG=""      # 用于存储 -O1 标志
 SYSYC_TIMEOUT=10      # sysyc 编译超时 (秒)
 GCC_TIMEOUT=10        # gcc 编译超时 (秒)
 EXEC_TIMEOUT=5        # qemu 自动化执行超时 (秒)
@ -39,6 +40,7 @@ show_help() {
    echo "选项:"
    echo "  -e, --executable         编译为可执行文件并运行测试 (必须)。"
    echo "  -c, --clean              清理 tmp 临时目录下的所有文件。"
+    echo "  -O1                      启用 sysyc 的 -O1 优化。"
    echo "  -sct N                   设置 sysyc 编译超时为 N 秒 (默认: 10)。"
    echo "  -gct N                   设置 gcc 交叉编译超时为 N 秒 (默认: 10)。"
    echo "  -et N                    设置 qemu 自动化执行超时为 N 秒 (默认: 5)。"
@ -80,6 +82,10 @@ while [[ "$#" -gt 0 ]]; do
            CLEAN_MODE=true
            shift # 消耗选项
            ;;
+        -O1)
+            OPTIMIZE_FLAG="-O1"
+            shift # 消耗选项
+            ;;
        -sct)
            if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then SYSYC_TIMEOUT="$2"; shift 2; else echo "错误: -sct 需要一个正整数参数。" >&2; exit 1; fi
            ;;
@ -144,6 +150,7 @@ mkdir -p "${TMP_DIR}"
 TOTAL_CASES=${#SY_FILES[@]}

 echo "SysY 单例测试运行器启动..."
+if [ -n "$OPTIMIZE_FLAG" ]; then echo "优化等级: ${OPTIMIZE_FLAG}"; fi
 echo "超时设置: sysyc=${SYSYC_TIMEOUT}s, gcc=${GCC_TIMEOUT}s, qemu=${EXEC_TIMEOUT}s"
 echo "失败输出最大行数: ${MAX_OUTPUT_LINES}"
 echo ""
@ -173,10 +180,12 @@ for sy_file in "${SY_FILES[@]}"; do
    if [ -f "${output_reference_file}" ]; then
        cp "${output_reference_file}" "${TMP_DIR}/$(basename "${output_reference_file}")"
    fi
-
+    
    # 步骤 1: sysyc 编译
    echo "  使用 sysyc 编译 (超时 ${SYSYC_TIMEOUT}s)..."
-    timeout -s KILL ${SYSYC_TIMEOUT} "${SYSYC}" -s ir "${sy_file}" > "${ir_file}"
+    timeout -s KILL ${SYSYC_TIMEOUT} "${SYSYC}" -S "${sy_file}" ${OPTIMIZE_FLAG} -o "${assembly_file}"
+    timeout -s KILL ${SYSYC_TIMEOUT} "${SYSYC}" -s ir "${sy_file}" ${OPTIMIZE_FLAG} > "${ir_file}"
+    # timeout -s KILL ${SYSYC_TIMEOUT} "${SYSYC}" -s asmd "${sy_file}" > "${assembly_debug_file}" 2>&1
    SYSYC_STATUS=$?
    if [ $SYSYC_STATUS -eq 124 ]; then
        echo -e "\e[31m错误: SysY 编译 ${sy_file} IR超时\e[0m"
@ -185,12 +194,10 @@ for sy_file in "${SY_FILES[@]}"; do
        echo -e "\e[31m错误: SysY 编译 ${sy_file} IR失败，退出码: ${SYSYC_STATUS}\e[0m"
        is_passed=0
    fi
-    timeout -s KILL ${SYSYC_TIMEOUT} "${SYSYC}" -S "${sy_file}" -o "${assembly_file}"
    if [ $? -ne 0 ]; then
        echo -e "\e[31m错误: SysY 编译失败或超时。\e[0m"
        is_passed=0
    fi
-    # timeout -s KILL ${SYSYC_TIMEOUT} "${SYSYC}" -s asmd "${sy_file}" > "${assembly_debug_file}" 2>&1

    # 步骤 2: GCC 编译
    if [ "$is_passed" -eq 1 ]; then
--- a/script/runit.sh
+++ b/script/runit.sh
@ -16,8 +16,8 @@ SYSYC="${BUILD_BIN_DIR}/sysyc"
 GCC_RISCV64="riscv64-linux-gnu-gcc"
 QEMU_RISCV64="qemu-riscv64"

-# --- 新增功能: 初始化变量 ---
 EXECUTE_MODE=false
+OPTIMIZE_FLAG=""      # 用于存储 -O1 标志
 SYSYC_TIMEOUT=10      # sysyc 编译超时 (秒)
 GCC_TIMEOUT=10        # gcc 编译超时 (秒)
 EXEC_TIMEOUT=5        # qemu 执行超时 (秒)
@ -35,6 +35,7 @@ show_help() {
    echo "选项:"
    echo "  -e, --executable         编译为可执行文件并运行测试。"
    echo "  -c, --clean              清理 'tmp' 目录下的所有生成文件。"
+    echo "  -O1                      启用 sysyc 的 -O1 优化。"
    echo "  -set [f|h|p|all]...    指定要运行的测试集 (functional, h_functional, performance)。可多选，默认为 all。"
    echo "  -sct N                   设置 sysyc 编译超时为 N 秒 (默认: 10)。"
    echo "  -gct N                   设置 gcc 交叉编译超时为 N 秒 (默认: 10)。"
@ -85,9 +86,12 @@ while [[ "$#" -gt 0 ]]; do
            clean_tmp
            exit 0
            ;;
+        -O1)
+            OPTIMIZE_FLAG="-O1"
+            shift
+            ;;
        -set)
            shift # 移过 '-set'
-            # 消耗所有后续参数直到遇到下一个选项
            while [[ "$#" -gt 0 && ! "$1" =~ ^- ]]; do
                TEST_SETS+=("$1")
                shift
@ -125,7 +129,6 @@ SET_MAP[p]="performance"

 SEARCH_PATHS=()

-# 如果未指定测试集，或指定了 'all'，则搜索所有目录
 if [ ${#TEST_SETS[@]} -eq 0 ] || [[ " ${TEST_SETS[@]} " =~ " all " ]]; then
    SEARCH_PATHS+=("${TESTDATA_DIR}")
 else
@ -138,13 +141,13 @@ else
    done
 fi

-# 如果没有有效的搜索路径，则退出
 if [ ${#SEARCH_PATHS[@]} -eq 0 ]; then
    echo -e "\e[31m错误: 没有找到有效的测试集目录，测试中止。\e[0m"
    exit 1
 fi

 echo "SysY 测试运行器启动..."
+if [ -n "$OPTIMIZE_FLAG" ]; then echo "优化等级: ${OPTIMIZE_FLAG}"; fi
 echo "输入目录: ${SEARCH_PATHS[@]}"
 echo "临时目录: ${TMP_DIR}"
 echo "执行模式: ${EXECUTE_MODE}"
@ -154,7 +157,6 @@ if ${EXECUTE_MODE}; then
 fi
 echo ""

-# 使用构建好的路径查找 .sy 文件并排序
 sy_files=$(find "${SEARCH_PATHS[@]}" -name "*.sy" | sort -V)
 if [ -z "$sy_files" ]; then
    echo "在指定目录中未找到任何 .sy 文件。"
@ -162,7 +164,6 @@ if [ -z "$sy_files" ]; then
 fi
 TOTAL_CASES=$(echo "$sy_files" | wc -w)

-# --- 修复: 使用 here-string (<<<) 代替管道 (|) 来避免子 shell 问题 ---
 while IFS= read -r sy_file; do
    is_passed=1 # 1 表示通过, 0 表示失败

@ -176,10 +177,8 @@ while IFS= read -r sy_file; do
    output_actual_file="${TMP_DIR}/${output_base_name}_sysyc_riscv64.actual_out"

    echo "正在处理: $(basename "$sy_file") (路径: ${relative_path_no_ext}.sy)"
-
-    # 步骤 1: 使用 sysyc 编译 .sy 到 .s
    echo "  使用 sysyc 编译 (超时 ${SYSYC_TIMEOUT}s)..."
-    timeout -s KILL ${SYSYC_TIMEOUT} "${SYSYC}" -S "${sy_file}" -o "${assembly_file}"
+    timeout -s KILL ${SYSYC_TIMEOUT} "${SYSYC}" -S "${sy_file}" -o "${assembly_file}" ${OPTIMIZE_FLAG}
    SYSYC_STATUS=$?
    if [ $SYSYC_STATUS -eq 124 ]; then
        echo -e "\e[31m错误: SysY 编译 ${sy_file} 超时\e[0m"
@ -189,9 +188,7 @@ while IFS= read -r sy_file; do
        is_passed=0
    fi

-    # 只有当 EXECUTE_MODE 为 true 且上一步成功时才继续
    if ${EXECUTE_MODE} && [ "$is_passed" -eq 1 ]; then
-        # 步骤 2: 使用 riscv64-linux-gnu-gcc 编译 .s 到可执行文件
        echo "  使用 gcc 编译 (超时 ${GCC_TIMEOUT}s)..."
        timeout -s KILL ${GCC_TIMEOUT} "${GCC_RISCV64}" "${assembly_file}" -o "${executable_file}" -L"${LIB_DIR}" -lsysy_riscv -static
        GCC_STATUS=$?
@ -213,7 +210,6 @@ while IFS= read -r sy_file; do
        continue
    fi

-    # 步骤 3, 4, 5: 只有当编译都成功时才执行
    if [ "$is_passed" -eq 1 ]; then
        echo "  正在执行 (超时 ${EXEC_TIMEOUT}s)..."
        
--- a/src/backend/RISCv64/CMakeLists.txt
+++ b/src/backend/RISCv64/CMakeLists.txt
@ -12,6 +12,7 @@ add_library(riscv64_backend_lib STATIC
    Optimize/Peephole.cpp
    Optimize/PostRA_Scheduler.cpp
    Optimize/PreRA_Scheduler.cpp
+    Optimize/DivStrengthReduction.cpp
 )

 # 包含后端模块所需的头文件路径
--- a/src/backend/RISCv64/Handler/CalleeSavedHandler.cpp
+++ b/src/backend/RISCv64/Handler/CalleeSavedHandler.cpp
@ -41,7 +41,7 @@ void CalleeSavedHandler::runOnMachineFunction(MachineFunction* mfunc) {
    });
    frame_info.callee_saved_regs_to_store = sorted_regs;

-    // 3. [关键修正] 更新栈帧总大小。
+    // 3. 更新栈帧总大小。
    // 这是初步计算，PEI Pass 会进行最终的对齐。
    frame_info.total_size = frame_info.locals_size + 
                            frame_info.spill_size + 
--- a/src/backend/RISCv64/Handler/EliminateFrameIndices.cpp
+++ b/src/backend/RISCv64/Handler/EliminateFrameIndices.cpp
@ -1,7 +1,7 @@
 #include "EliminateFrameIndices.h"
 #include "RISCv64ISel.h"
 #include <cassert>
-#include <vector> // [新增] 为插入指令而包含
+#include <vector>

 namespace sysy {

@ -33,11 +33,7 @@ void EliminateFrameIndicesPass::runOnMachineFunction(MachineFunction* mfunc) {
    Function* F = mfunc->getFunc();
    RISCv64ISel* isel = mfunc->getISel();
    
-    // 1. [已移除] 不再处理栈传递的参数
-    // 原先处理栈参数 (arg_idx >= 8) 的逻辑已被移除。
-    // 这项职责已完全转移到 PrologueEpilogueInsertionPass，以避免逻辑冲突和错误。
-    // [注释更新] -> 上述注释已过时。根据新方案，我们将在这里处理栈传递的参数，
-    // 以便在寄存器分配前就将数据流显式化，修复溢出逻辑的BUG。
+    // 在这里处理栈传递的参数，以便在寄存器分配前就将数据流显式化，修复溢出逻辑的BUG。

    // 2. 只为局部变量(AllocaInst)分配栈空间和计算偏移量
    // 局部变量从 s0 下方（负偏移量）开始分配，紧接着为 ra 和 s0 预留的16字节之后
@ -50,9 +46,13 @@ void EliminateFrameIndicesPass::runOnMachineFunction(MachineFunction* mfunc) {
                    Type* allocated_type = alloca->getType()->as<PointerType>()->getBaseType();
                    int size = getTypeSizeInBytes(allocated_type);
                    
-                    // RISC-V要求栈地址8字节对齐
-                    size = (size + 7) & ~7;
-                    if (size == 0) size = 8; // 至少分配8字节
+                    // 优化栈帧大小：对于大数组使用4字节对齐，小对象使用8字节对齐
+                    if (size >= 256) {  // 大数组优化
+                        size = (size + 3) & ~3;  // 4字节对齐
+                    } else {
+                        size = (size + 7) & ~7;  // 8字节对齐
+                    }
+                    if (size == 0) size = 4; // 最小4字节

                    local_var_offset += size;
                    unsigned alloca_vreg = isel->getVReg(alloca);
@ -65,13 +65,17 @@ void EliminateFrameIndicesPass::runOnMachineFunction(MachineFunction* mfunc) {
    
    // 记录仅由AllocaInst分配的局部变量的总大小
    frame_info.locals_size = local_var_offset - 16;
+    // 记录局部变量区域分配结束的最终偏移量
+    frame_info.locals_end_offset = -local_var_offset;

-    // 3. [核心修改] 在函数入口为所有栈传递的参数插入load指令
+    // 在函数入口为所有栈传递的参数插入load指令
    // 这个步骤至关重要：它在寄存器分配之前，为这些参数的vreg创建了明确的“定义(def)”指令。
    // 这解决了在高寄存器压力下，当这些vreg被溢出时，`rewriteProgram`找不到其定义点而崩溃的问题。
    if (F && isel && !mfunc->getBlocks().empty()) {
        MachineBasicBlock* entry_block = mfunc->getBlocks().front().get();
        std::vector<std::unique_ptr<MachineInstr>> arg_load_instrs;
+        
+        // 步骤 3.1: 生成所有加载栈参数的指令，暂存起来
        int arg_idx = 0;
        for (Argument* arg : F->getArguments()) {
            // 根据ABI，前8个整型/指针参数通过寄存器传递，这里只处理超出部分。
@ -104,17 +108,50 @@ void EliminateFrameIndicesPass::runOnMachineFunction(MachineFunction* mfunc) {
            arg_idx++;
        }
        
-        // 将所有新创建的参数加载指令一次性插入到入口块的起始位置
-        auto& entry_instrs = entry_block->getInstructions();
-        entry_instrs.insert(entry_instrs.begin(),
-                            std::make_move_iterator(arg_load_instrs.begin()),
-                            std::make_move_iterator(arg_load_instrs.end()));
+        //仅当有需要加载的栈参数时，才执行插入逻辑
+        if (!arg_load_instrs.empty()) {
+            auto& entry_instrs = entry_block->getInstructions();
+            auto insertion_point = entry_instrs.begin(); // 默认插入点为块的开头
+            auto last_arg_save_it = entry_instrs.end();
+
+            // 步骤 3.2: 寻找一个安全的插入点。
+            // 遍历入口块的指令，找到最后一条保存“寄存器传递参数”的伪指令。
+            // 这样可以确保我们在所有 a0-a7 参数被保存之后，才执行可能覆盖它们的加载指令。
+            for (auto it = entry_instrs.begin(); it != entry_instrs.end(); ++it) {
+                MachineInstr* instr = it->get();
+                // 寻找代表保存参数到栈的伪指令
+                if (instr->getOpcode() == RVOpcodes::FRAME_STORE_W ||
+                    instr->getOpcode() == RVOpcodes::FRAME_STORE_D ||
+                    instr->getOpcode() == RVOpcodes::FRAME_STORE_F) {
+                    
+                    // 检查被保存的值是否是寄存器参数 (arg_no < 8)
+                    auto& operands = instr->getOperands();
+                    if (operands.empty() || operands[0]->getKind() != MachineOperand::KIND_REG) continue;
+                    
+                    unsigned src_vreg = static_cast<RegOperand*>(operands[0].get())->getVRegNum();
+                    Value* ir_value = isel->getVRegValueMap().count(src_vreg) ? isel->getVRegValueMap().at(src_vreg) : nullptr;
+                    
+                    if (auto ir_arg = dynamic_cast<Argument*>(ir_value)) {
+                        if (ir_arg->getIndex() < 8) {
+                            last_arg_save_it = it; // 找到了一个保存寄存器参数的指令，更新位置
+                        }
+                    }
+                }
+            }
+
+            // 如果找到了这样的保存指令，我们的插入点就在它之后
+            if (last_arg_save_it != entry_instrs.end()) {
+                insertion_point = std::next(last_arg_save_it);
+            }
+
+            // 步骤 3.3: 在计算出的安全位置，一次性插入所有新创建的参数加载指令
+            entry_instrs.insert(insertion_point,
+                                std::make_move_iterator(arg_load_instrs.begin()),
+                                std::make_move_iterator(arg_load_instrs.end()));
+        }
    }

-
    // 4. 遍历所有机器指令，将访问局部变量的伪指令展开为真实指令
-    // 由于处理参数的逻辑已移除，这里的展开现在只针对局部变量，因此是正确的。
-    // [注释更新] -> 上述注释已过时。此部分逻辑保持不变，它正确地处理了局部变量。
    for (auto& mbb : mfunc->getBlocks()) {
        std::vector<std::unique_ptr<MachineInstr>> new_instructions;
        for (auto& instr_ptr : mbb->getInstructions()) {
--- a/src/backend/RISCv64/Handler/PrologueEpilogueInsertion.cpp
+++ b/src/backend/RISCv64/Handler/PrologueEpilogueInsertion.cpp
@ -27,8 +27,7 @@ void PrologueEpilogueInsertionPass::runOnMachineFunction(MachineFunction* mfunc)
        );
    }
    
-    // 2. [新增] 确定需要保存的被调用者保存寄存器 (callee-saved)
-    // 这部分逻辑从 CalleeSavedHandler Pass 移入，以集中管理序言生成
+    // 2. 确定需要保存的被调用者保存寄存器 (callee-saved)
    auto& vreg_to_preg_map = frame_info.vreg_to_preg_map;
    std::set<PhysicalReg> used_callee_saved_regs_set;
    const auto& callee_saved_int = getCalleeSavedIntRegs();
@ -36,38 +35,44 @@ void PrologueEpilogueInsertionPass::runOnMachineFunction(MachineFunction* mfunc)

    for (const auto& pair : vreg_to_preg_map) {
        PhysicalReg preg = pair.second;
-        // 检查是否在整数或浮点 callee-saved 集合中
-        // 注意：s0作为帧指针，由序言/尾声逻辑特殊处理，不在此处保存
        bool is_int_cs = std::find(callee_saved_int.begin(), callee_saved_int.end(), preg) != callee_saved_int.end();
        bool is_fp_cs = std::find(callee_saved_fp.begin(), callee_saved_fp.end(), preg) != callee_saved_fp.end();
        if ((is_int_cs && preg != PhysicalReg::S0) || is_fp_cs) {
            used_callee_saved_regs_set.insert(preg);
        }
    }
-    // 为了确定性排序，并存入 frame_info 供尾声使用
    frame_info.callee_saved_regs_to_store.assign(
        used_callee_saved_regs_set.begin(), used_callee_saved_regs_set.end()
    );
    std::sort(frame_info.callee_saved_regs_to_store.begin(), frame_info.callee_saved_regs_to_store.end());
-    frame_info.callee_saved_size = frame_info.callee_saved_regs_to_store.size() * 8; // 每个寄存器8字节
+    frame_info.callee_saved_size = frame_info.callee_saved_regs_to_store.size() * 8;

-    // 3. 计算最终的栈帧总大小
+    // 3. 计算最终的栈帧总大小，包含栈溢出保护
    int total_stack_size = frame_info.locals_size + 
                           frame_info.spill_size + 
                           frame_info.callee_saved_size + 
-                           16; // 为 ra 和 s0 固定的16字节
+                           16;
    
-    int aligned_stack_size = (total_stack_size + 15) & ~15; // 16字节对齐
+    // 栈溢出保护：增加最大栈帧大小以容纳大型数组
+    const int MAX_STACK_FRAME_SIZE = 8192; // 8KB to handle large arrays like 256*4*2 = 2048 bytes
+    if (total_stack_size > MAX_STACK_FRAME_SIZE) {
+        // 如果仍然超过限制，尝试优化对齐方式
+        std::cerr << "Warning: Stack frame size " << total_stack_size 
+                  << " exceeds recommended limit " << MAX_STACK_FRAME_SIZE << " for function " 
+                  << mfunc->getName() << std::endl;
+    }
+    
+    // 优化：减少对齐开销，使用16字节对齐而非更大的对齐
+    int aligned_stack_size = (total_stack_size + 15) & ~15;
    frame_info.total_size = aligned_stack_size;

-    // 只有在需要分配栈空间时才生成序言和尾声
    if (aligned_stack_size > 0) {
        // --- 4. 插入完整的序言 ---
        MachineBasicBlock* entry_block = mfunc->getBlocks().front().get();
        auto& entry_instrs = entry_block->getInstructions();
        std::vector<std::unique_ptr<MachineInstr>> prologue_instrs;

-        // 4.1. 分配栈帧: addi sp, sp, -aligned_stack_size
+        // 4.1. 分配栈帧
        auto alloc_stack = std::make_unique<MachineInstr>(RVOpcodes::ADDI);
        alloc_stack->addOperand(std::make_unique<RegOperand>(PhysicalReg::SP));
        alloc_stack->addOperand(std::make_unique<RegOperand>(PhysicalReg::SP));
@ -75,7 +80,6 @@ void PrologueEpilogueInsertionPass::runOnMachineFunction(MachineFunction* mfunc)
        prologue_instrs.push_back(std::move(alloc_stack));

        // 4.2. 保存 ra 和 s0
-        // sd ra, (aligned_stack_size - 8)(sp)
        auto save_ra = std::make_unique<MachineInstr>(RVOpcodes::SD);
        save_ra->addOperand(std::make_unique<RegOperand>(PhysicalReg::RA));
        save_ra->addOperand(std::make_unique<MemOperand>(
@ -83,7 +87,6 @@ void PrologueEpilogueInsertionPass::runOnMachineFunction(MachineFunction* mfunc)
            std::make_unique<ImmOperand>(aligned_stack_size - 8)
        ));
        prologue_instrs.push_back(std::move(save_ra));
-        // sd s0, (aligned_stack_size - 16)(sp)
        auto save_fp = std::make_unique<MachineInstr>(RVOpcodes::SD);
        save_fp->addOperand(std::make_unique<RegOperand>(PhysicalReg::S0));
        save_fp->addOperand(std::make_unique<MemOperand>(
@ -92,60 +95,54 @@ void PrologueEpilogueInsertionPass::runOnMachineFunction(MachineFunction* mfunc)
        ));
        prologue_instrs.push_back(std::move(save_fp));
        
-        // 4.3. 设置新的帧指针 s0: addi s0, sp, aligned_stack_size
+        // 4.3. 设置新的帧指针 s0
        auto set_fp = std::make_unique<MachineInstr>(RVOpcodes::ADDI);
        set_fp->addOperand(std::make_unique<RegOperand>(PhysicalReg::S0));
        set_fp->addOperand(std::make_unique<RegOperand>(PhysicalReg::SP));
        set_fp->addOperand(std::make_unique<ImmOperand>(aligned_stack_size));
        prologue_instrs.push_back(std::move(set_fp));
        
-        // 4.4. [新增] 保存所有使用到的被调用者保存寄存器
-        // 它们保存在 s0 下方，紧接着 ra/s0 的位置
-        int callee_saved_offset = -16;
+        // 4.4. 保存所有使用到的被调用者保存寄存器
+        int next_available_offset = -(16 + frame_info.locals_size + frame_info.spill_size);
        for (const auto& reg : frame_info.callee_saved_regs_to_store) {
-            callee_saved_offset -= 8;
-            RVOpcodes store_op = (reg >= PhysicalReg::F0 && reg <= PhysicalReg::F31) ? RVOpcodes::FSD : RVOpcodes::SD;
+            // 采用“先使用，后更新”逻辑
+            RVOpcodes store_op = isFPR(reg) ? RVOpcodes::FSD : RVOpcodes::SD;
            auto save_cs_reg = std::make_unique<MachineInstr>(store_op);
            save_cs_reg->addOperand(std::make_unique<RegOperand>(reg));
            save_cs_reg->addOperand(std::make_unique<MemOperand>(
                std::make_unique<RegOperand>(PhysicalReg::S0),
-                std::make_unique<ImmOperand>(callee_saved_offset)
+                std::make_unique<ImmOperand>(next_available_offset) // 使用当前偏移
            ));
            prologue_instrs.push_back(std::move(save_cs_reg));
+            next_available_offset -= 8; // 为下一个寄存器准备偏移
        }

-        // 4.5. [核心修改] 加载栈传递参数的逻辑已从此移除
-        // 这项工作已经前移至 `EliminateFrameIndicesPass` 中完成，
-        // 以确保在寄存器分配前就将相关虚拟寄存器定义，从而修复溢出逻辑的bug。
-        
-        // 4.6. 将所有生成的序言指令一次性插入到函数入口
+        // 4.5. 将所有生成的序言指令一次性插入到函数入口
        entry_instrs.insert(entry_instrs.begin(), 
                            std::make_move_iterator(prologue_instrs.begin()),
                            std::make_move_iterator(prologue_instrs.end()));

        // --- 5. 插入完整的尾声 ---
        for (auto& mbb : mfunc->getBlocks()) {
-            // [修正] 使用前向迭代器查找RET指令，以确保在正确的位置（RET之前）插入尾声。
            for (auto it = mbb->getInstructions().begin(); it != mbb->getInstructions().end(); ++it) {
                if ((*it)->getOpcode() == RVOpcodes::RET) {
                    std::vector<std::unique_ptr<MachineInstr>> epilogue_instrs;
                    
-                    // 5.1. [新增] 恢复被调用者保存寄存器
-                    callee_saved_offset = -16;
+                    // 5.1. 恢复被调用者保存寄存器
+                    int next_available_offset_restore = -(16 + frame_info.locals_size + frame_info.spill_size);
                    for (const auto& reg : frame_info.callee_saved_regs_to_store) {
-                        callee_saved_offset -= 8;
-                        RVOpcodes load_op = (reg >= PhysicalReg::F0 && reg <= PhysicalReg::F31) ? RVOpcodes::FLD : RVOpcodes::LD;
+                        RVOpcodes load_op = isFPR(reg) ? RVOpcodes::FLD : RVOpcodes::LD;
                        auto restore_cs_reg = std::make_unique<MachineInstr>(load_op);
                        restore_cs_reg->addOperand(std::make_unique<RegOperand>(reg));
                        restore_cs_reg->addOperand(std::make_unique<MemOperand>(
                            std::make_unique<RegOperand>(PhysicalReg::S0),
-                            std::make_unique<ImmOperand>(callee_saved_offset)
+                            std::make_unique<ImmOperand>(next_available_offset_restore) // 使用当前偏移
                        ));
                        epilogue_instrs.push_back(std::move(restore_cs_reg));
+                        next_available_offset_restore -= 8; // 为下一个寄存器准备偏移
                    }

-                    // 5.2. 恢复 ra 和 s0 (注意基址现在是sp)
-                    // ld ra, (aligned_stack_size - 8)(sp)
+                    // 5.2. 恢复 ra 和 s0
                    auto restore_ra = std::make_unique<MachineInstr>(RVOpcodes::LD);
                    restore_ra->addOperand(std::make_unique<RegOperand>(PhysicalReg::RA));
                    restore_ra->addOperand(std::make_unique<MemOperand>(
@ -153,7 +150,6 @@ void PrologueEpilogueInsertionPass::runOnMachineFunction(MachineFunction* mfunc)
                        std::make_unique<ImmOperand>(aligned_stack_size - 8)
                    ));
                    epilogue_instrs.push_back(std::move(restore_ra));
-                    // ld s0, (aligned_stack_size - 16)(sp)
                    auto restore_fp = std::make_unique<MachineInstr>(RVOpcodes::LD);
                    restore_fp->addOperand(std::make_unique<RegOperand>(PhysicalReg::S0));
                    restore_fp->addOperand(std::make_unique<MemOperand>(
@ -162,7 +158,7 @@ void PrologueEpilogueInsertionPass::runOnMachineFunction(MachineFunction* mfunc)
                    ));
                    epilogue_instrs.push_back(std::move(restore_fp));

-                    // 5.3. 释放栈帧: addi sp, sp, aligned_stack_size
+                    // 5.3. 释放栈帧
                    auto dealloc_stack = std::make_unique<MachineInstr>(RVOpcodes::ADDI);
                    dealloc_stack->addOperand(std::make_unique<RegOperand>(PhysicalReg::SP));
                    dealloc_stack->addOperand(std::make_unique<RegOperand>(PhysicalReg::SP));
@ -174,7 +170,6 @@ void PrologueEpilogueInsertionPass::runOnMachineFunction(MachineFunction* mfunc)
                                                  std::make_move_iterator(epilogue_instrs.begin()),
                                                  std::make_move_iterator(epilogue_instrs.end()));
                    
-                    // 一个基本块通常只有一个终止指令，处理完就可以跳到下一个块
                    goto next_block;
                }
            }
--- a/src/backend/RISCv64/Optimize/DivStrengthReduction.cpp
+++ b/src/backend/RISCv64/Optimize/DivStrengthReduction.cpp
@ -0,0 +1,282 @@
+#include "DivStrengthReduction.h"
+#include <cmath>
+#include <cstdint>
+
+namespace sysy {
+
+char DivStrengthReduction::ID = 0;
+
+bool DivStrengthReduction::runOnFunction(Function *F, AnalysisManager& AM) {
+    // This pass works on MachineFunction level, not IR level
+    return false;
+}
+
+void DivStrengthReduction::runOnMachineFunction(MachineFunction *mfunc) {
+    if (!mfunc)
+        return;
+
+    bool debug = false; // Set to true for debugging
+    if (debug)
+        std::cout << "Running DivStrengthReduction optimization..." << std::endl;
+
+    int next_temp_reg = 1000;
+    auto createTempReg = [&]() -> int {
+        return next_temp_reg++;
+    };
+
+    struct MagicInfo {
+        int64_t magic;
+        int shift;
+    };
+    
+    auto computeMagic = [](int64_t d, bool is_32bit) -> MagicInfo {
+        int word_size = is_32bit ? 32 : 64;
+        uint64_t ad = std::abs(d);
+        
+        if (ad == 0) return {0, 0};
+        
+        int l = std::floor(std::log2(ad));
+        if ((ad & (ad - 1)) == 0) { // power of 2
+             l = 0; // special case for power of 2, shift will be calculated differently
+        }
+
+        __int128_t one = 1;
+        __int128_t num;
+        int total_shift;
+
+        if (is_32bit) {
+            total_shift = 31 + l;
+            num = one << total_shift;
+        } else {
+            total_shift = 63 + l;
+            num = one << total_shift;
+        }
+        
+        __int128_t den = ad;
+        int64_t magic = (num / den) + 1;
+        
+        return {magic, total_shift};
+    };
+
+    auto isPowerOfTwo = [](int64_t n) -> bool {
+        return n > 0 && (n & (n - 1)) == 0;
+    };
+
+    auto getPowerOfTwoExponent = [](int64_t n) -> int {
+        if (n <= 0 || (n & (n - 1)) != 0) return -1;
+        int shift = 0;
+        while (n > 1) {
+            n >>= 1;
+            shift++;
+        }
+        return shift;
+    };
+
+    struct InstructionReplacement {
+        size_t index;
+        size_t count_to_erase;
+        std::vector<std::unique_ptr<MachineInstr>> newInstrs;
+    };
+    
+    for (auto &mbb_uptr : mfunc->getBlocks()) {
+        auto &mbb = *mbb_uptr;
+        auto &instrs = mbb.getInstructions();
+        std::vector<InstructionReplacement> replacements;
+        
+        for (size_t i = 0; i < instrs.size(); ++i) {
+            auto *instr = instrs[i].get();
+            
+            bool is_32bit = (instr->getOpcode() == RVOpcodes::DIVW);
+            
+            if (instr->getOpcode() != RVOpcodes::DIV && !is_32bit) {
+                continue;
+            }
+            
+            if (instr->getOperands().size() != 3) {
+                continue;
+            }
+            
+            auto *dst_op = instr->getOperands()[0].get();
+            auto *src1_op = instr->getOperands()[1].get();
+            auto *src2_op = instr->getOperands()[2].get();
+
+            int64_t divisor = 0;
+            bool const_divisor_found = false;
+            size_t instructions_to_replace = 1;
+
+            if (src2_op->getKind() == MachineOperand::KIND_IMM) {
+                divisor = static_cast<ImmOperand *>(src2_op)->getValue();
+                const_divisor_found = true;
+            } else if (src2_op->getKind() == MachineOperand::KIND_REG) {
+                if (i > 0) {
+                    auto *prev_instr = instrs[i - 1].get();
+                    if (prev_instr->getOpcode() == RVOpcodes::LI && prev_instr->getOperands().size() == 2) {
+                        auto *li_dst_op = prev_instr->getOperands()[0].get();
+                        auto *li_imm_op = prev_instr->getOperands()[1].get();
+                        if (li_dst_op->getKind() == MachineOperand::KIND_REG && li_imm_op->getKind() == MachineOperand::KIND_IMM) {
+                            auto *div_reg_op = static_cast<RegOperand *>(src2_op);
+                            auto *li_dst_reg_op = static_cast<RegOperand *>(li_dst_op);
+                            if (div_reg_op->isVirtual() && li_dst_reg_op->isVirtual() &&
+                                div_reg_op->getVRegNum() == li_dst_reg_op->getVRegNum()) {
+                                divisor = static_cast<ImmOperand *>(li_imm_op)->getValue();
+                                const_divisor_found = true;
+                                instructions_to_replace = 2;
+                            }
+                        }
+                    }
+                }
+            }
+
+            if (!const_divisor_found) {
+                continue;
+            }
+            
+            auto *dst_reg = static_cast<RegOperand *>(dst_op);
+            auto *src1_reg = static_cast<RegOperand *>(src1_op);
+            
+            if (divisor == 0) continue;
+            
+            std::vector<std::unique_ptr<MachineInstr>> newInstrs;
+            
+            if (divisor == 1) {
+                auto moveInstr = std::make_unique<MachineInstr>(is_32bit ? RVOpcodes::ADDW : RVOpcodes::ADD);
+                moveInstr->addOperand(std::make_unique<RegOperand>(*dst_reg));
+                moveInstr->addOperand(std::make_unique<RegOperand>(*src1_reg));
+                moveInstr->addOperand(std::make_unique<RegOperand>(PhysicalReg::ZERO));
+                newInstrs.push_back(std::move(moveInstr));
+            }
+            else if (divisor == -1) {
+                auto negInstr = std::make_unique<MachineInstr>(is_32bit ? RVOpcodes::SUBW : RVOpcodes::SUB);
+                negInstr->addOperand(std::make_unique<RegOperand>(*dst_reg));
+                negInstr->addOperand(std::make_unique<RegOperand>(PhysicalReg::ZERO));
+                negInstr->addOperand(std::make_unique<RegOperand>(*src1_reg));
+                newInstrs.push_back(std::move(negInstr));
+            }
+            else if (isPowerOfTwo(std::abs(divisor))) {
+                int shift = getPowerOfTwoExponent(std::abs(divisor));
+                int temp_reg = createTempReg();
+                
+                auto sraSignInstr = std::make_unique<MachineInstr>(is_32bit ? RVOpcodes::SRAIW : RVOpcodes::SRAI);
+                sraSignInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                sraSignInstr->addOperand(std::make_unique<RegOperand>(*src1_reg));
+                sraSignInstr->addOperand(std::make_unique<ImmOperand>(is_32bit ? 31 : 63));
+                newInstrs.push_back(std::move(sraSignInstr));
+                
+                auto srlInstr = std::make_unique<MachineInstr>(is_32bit ? RVOpcodes::SRLIW : RVOpcodes::SRLI);
+                srlInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                srlInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                srlInstr->addOperand(std::make_unique<ImmOperand>((is_32bit ? 32 : 64) - shift));
+                newInstrs.push_back(std::move(srlInstr));
+                
+                auto addInstr = std::make_unique<MachineInstr>(is_32bit ? RVOpcodes::ADDW : RVOpcodes::ADD);
+                addInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                addInstr->addOperand(std::make_unique<RegOperand>(*src1_reg));
+                addInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                newInstrs.push_back(std::move(addInstr));
+                
+                auto sraInstr = std::make_unique<MachineInstr>(is_32bit ? RVOpcodes::SRAIW : RVOpcodes::SRAI);
+                sraInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                sraInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                sraInstr->addOperand(std::make_unique<ImmOperand>(shift));
+                newInstrs.push_back(std::move(sraInstr));
+
+                if (divisor < 0) {
+                    auto negInstr = std::make_unique<MachineInstr>(is_32bit ? RVOpcodes::SUBW : RVOpcodes::SUB);
+                    negInstr->addOperand(std::make_unique<RegOperand>(*dst_reg));
+                    negInstr->addOperand(std::make_unique<RegOperand>(PhysicalReg::ZERO));
+                    negInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                    newInstrs.push_back(std::move(negInstr));
+                } else {
+                    auto moveInstr = std::make_unique<MachineInstr>(is_32bit ? RVOpcodes::ADDW : RVOpcodes::ADD);
+                    moveInstr->addOperand(std::make_unique<RegOperand>(*dst_reg));
+                    moveInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                    moveInstr->addOperand(std::make_unique<RegOperand>(PhysicalReg::ZERO));
+                    newInstrs.push_back(std::move(moveInstr));
+                }
+            }
+            else {
+                auto magic_info = computeMagic(divisor, is_32bit);
+                int magic_reg = createTempReg();
+                int temp_reg = createTempReg();
+
+                auto loadInstr = std::make_unique<MachineInstr>(RVOpcodes::LI);
+                loadInstr->addOperand(std::make_unique<RegOperand>(magic_reg));
+                loadInstr->addOperand(std::make_unique<ImmOperand>(magic_info.magic));
+                newInstrs.push_back(std::move(loadInstr));
+
+                if (is_32bit) {
+                    auto mulInstr = std::make_unique<MachineInstr>(RVOpcodes::MUL);
+                    mulInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                    mulInstr->addOperand(std::make_unique<RegOperand>(*src1_reg));
+                    mulInstr->addOperand(std::make_unique<RegOperand>(magic_reg));
+                    newInstrs.push_back(std::move(mulInstr));
+
+                    auto sraInstr = std::make_unique<MachineInstr>(RVOpcodes::SRAI);
+                    sraInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                    sraInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                    sraInstr->addOperand(std::make_unique<ImmOperand>(magic_info.shift));
+                    newInstrs.push_back(std::move(sraInstr));
+                } else {
+                    auto mulhInstr = std::make_unique<MachineInstr>(RVOpcodes::MULH);
+                    mulhInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                    mulhInstr->addOperand(std::make_unique<RegOperand>(*src1_reg));
+                    mulhInstr->addOperand(std::make_unique<RegOperand>(magic_reg));
+                    newInstrs.push_back(std::move(mulhInstr));
+                    
+                    int post_shift = magic_info.shift - 63;
+                    if (post_shift > 0) {
+                        auto sraInstr = std::make_unique<MachineInstr>(RVOpcodes::SRAI);
+                        sraInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                        sraInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                        sraInstr->addOperand(std::make_unique<ImmOperand>(post_shift));
+                        newInstrs.push_back(std::move(sraInstr));
+                    }
+                }
+                
+                int sign_reg = createTempReg();
+                auto sraSignInstr = std::make_unique<MachineInstr>(is_32bit ? RVOpcodes::SRAIW : RVOpcodes::SRAI);
+                sraSignInstr->addOperand(std::make_unique<RegOperand>(sign_reg));
+                sraSignInstr->addOperand(std::make_unique<RegOperand>(*src1_reg));
+                sraSignInstr->addOperand(std::make_unique<ImmOperand>(is_32bit ? 31 : 63));
+                newInstrs.push_back(std::move(sraSignInstr));
+
+                auto subInstr = std::make_unique<MachineInstr>(is_32bit ? RVOpcodes::SUBW : RVOpcodes::SUB);
+                subInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                subInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                subInstr->addOperand(std::make_unique<RegOperand>(sign_reg));
+                newInstrs.push_back(std::move(subInstr));
+
+                if (divisor < 0) {
+                    auto negInstr = std::make_unique<MachineInstr>(is_32bit ? RVOpcodes::SUBW : RVOpcodes::SUB);
+                    negInstr->addOperand(std::make_unique<RegOperand>(*dst_reg));
+                    negInstr->addOperand(std::make_unique<RegOperand>(PhysicalReg::ZERO));
+                    negInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                    newInstrs.push_back(std::move(negInstr));
+                } else {
+                    auto moveInstr = std::make_unique<MachineInstr>(is_32bit ? RVOpcodes::ADDW : RVOpcodes::ADD);
+                    moveInstr->addOperand(std::make_unique<RegOperand>(*dst_reg));
+                    moveInstr->addOperand(std::make_unique<RegOperand>(temp_reg));
+                    moveInstr->addOperand(std::make_unique<RegOperand>(PhysicalReg::ZERO));
+                    newInstrs.push_back(std::move(moveInstr));
+                }
+            }
+            
+            if (!newInstrs.empty()) {
+                size_t start_index = i;
+                if (instructions_to_replace == 2) {
+                    start_index = i - 1;
+                }
+                replacements.push_back({start_index, instructions_to_replace, std::move(newInstrs)});
+            }
+        }
+        
+        for (auto it = replacements.rbegin(); it != replacements.rend(); ++it) {
+            instrs.erase(instrs.begin() + it->index, instrs.begin() + it->index + it->count_to_erase);
+            instrs.insert(instrs.begin() + it->index, 
+                         std::make_move_iterator(it->newInstrs.begin()),
+                         std::make_move_iterator(it->newInstrs.end()));
+        }
+    }
+}
+
+} // namespace sysy
--- a/src/backend/RISCv64/RISCv64AsmPrinter.cpp
+++ b/src/backend/RISCv64/RISCv64AsmPrinter.cpp
@ -60,7 +60,7 @@ void RISCv64AsmPrinter::printInstruction(MachineInstr* instr, bool debug) {
        case RVOpcodes::ADD:   *OS << "add ";   break; case RVOpcodes::ADDI:  *OS << "addi ";  break;
        case RVOpcodes::ADDW:  *OS << "addw ";  break; case RVOpcodes::ADDIW: *OS << "addiw "; break;
        case RVOpcodes::SUB:   *OS << "sub ";   break; case RVOpcodes::SUBW:  *OS << "subw ";  break;
-        case RVOpcodes::MUL:   *OS << "mul ";   break; case RVOpcodes::MULW:  *OS << "mulw ";  break;
+        case RVOpcodes::MUL:   *OS << "mul ";   break; case RVOpcodes::MULW:  *OS << "mulw ";  break; case RVOpcodes::MULH:  *OS << "mulh ";  break;
        case RVOpcodes::DIV:   *OS << "div ";   break; case RVOpcodes::DIVW:  *OS << "divw ";  break;
        case RVOpcodes::REM:   *OS << "rem ";   break; case RVOpcodes::REMW:  *OS << "remw ";  break;
        case RVOpcodes::XOR:   *OS << "xor ";   break; case RVOpcodes::XORI:  *OS << "xori ";  break;
@ -104,7 +104,7 @@ void RISCv64AsmPrinter::printInstruction(MachineInstr* instr, bool debug) {
        case RVOpcodes::FMV_S:    *OS << "fmv.s ";    break;
        case RVOpcodes::FMV_W_X:  *OS << "fmv.w.x ";  break;
        case RVOpcodes::FMV_X_W:  *OS << "fmv.x.w ";  break;
-        case RVOpcodes::CALL: { // [核心修改] 为CALL指令添加特殊处理逻辑
+        case RVOpcodes::CALL: { // 为CALL指令添加特殊处理逻辑
            *OS << "call ";
            // 遍历所有操作数，只寻找并打印函数名标签
            for (const auto& op : instr->getOperands()) {
--- a/src/backend/RISCv64/RISCv64Backend.cpp
+++ b/src/backend/RISCv64/RISCv64Backend.cpp
@ -73,7 +73,7 @@ std::string RISCv64CodeGen::module_gen() {
    for (const auto& global_ptr : module->getGlobals()) {
        GlobalValue* global = global_ptr.get();
        
-        // [核心修改] 使用更健壮的逻辑来判断是否为大型零初始化数组
+        // 使用更健壮的逻辑来判断是否为大型零初始化数组
        bool is_all_zeros = true;
        const auto& init_values = global->getInitValues();
        
@ -171,222 +171,90 @@ std::string RISCv64CodeGen::module_gen() {
 }

 std::string RISCv64CodeGen::function_gen(Function* func) {
+    // === 完整的后端处理流水线 ===
+
+    // 阶段 1: 指令选择 (sysy::IR -> LLIR with virtual registers)
+    DEBUG = 0;
+    DEEPDEBUG = 0;
+
+    RISCv64ISel isel;
+    std::unique_ptr<MachineFunction> mfunc = isel.runOnFunction(func);
+
+    // 第一次调试打印输出
+    std::stringstream ss_after_isel;
+    RISCv64AsmPrinter printer_isel(mfunc.get());
+    printer_isel.run(ss_after_isel, true);
    if (DEBUG) {
-        // === 完整的后端处理流水线 ===
-
-        // 阶段 1: 指令选择 (sysy::IR -> LLIR with virtual registers)
-        DEBUG = 0;
-        DEEPDEBUG = 0;
-
-        RISCv64ISel isel;
-        std::unique_ptr<MachineFunction> mfunc = isel.runOnFunction(func);
-
-        // 第一次调试打印输出
-        std::stringstream ss_after_isel;
-        RISCv64AsmPrinter printer_isel(mfunc.get());
-        printer_isel.run(ss_after_isel, true);
-        // if (DEBUG) {
-        //     std::cout << ss_after_isel.str();
-        // }
-        DEBUG = 0;
-        DEEPDEBUG = 0;
-        DEBUG = 1;
-        DEEPDEBUG = 1;
-        if (DEBUG) {
-            std::cerr << "====== Intermediate Representation after Instruction Selection ======\n" 
-            << ss_after_isel.str();
-        }
-        // DEBUG = 0;
-        // DEEPDEBUG = 0;
-        // [新增] 阶段 2: 消除帧索引 (展开伪指令，计算局部变量偏移)
-        // 这个Pass必须在寄存器分配之前运行
-        EliminateFrameIndicesPass efi_pass;
-        efi_pass.runOnMachineFunction(mfunc.get());
-
-        if (DEBUG) {
-            std::cerr << "====== stack info after eliminate frame indices  ======\n";
-            mfunc->dumpStackFrameInfo(std::cerr);
-            // std::stringstream ss_after_eli;
-            // printer_isel.run(ss_after_eli, true);
-            // std::cerr << "====== LLIR after eliminate frame indices ======\n" 
-            // << ss_after_eli.str();
-        }
-
-        // // 阶段 2: 指令调度 (Instruction Scheduling)
-        // PreRA_Scheduler scheduler;
-        // scheduler.runOnMachineFunction(mfunc.get());
-
-        // DEBUG = 0;
-        // DEEPDEBUG = 0;
-        // DEBUG = 1;
-        // DEEPDEBUG = 1;
-        // 阶段 3: 物理寄存器分配 (Register Allocation)
-        RISCv64RegAlloc reg_alloc(mfunc.get());
-        reg_alloc.run();
-
-        // DEBUG = 0;
-        // DEEPDEBUG = 0;
-        DEBUG = 1;
-        DEEPDEBUG = 1;
-        if (DEBUG) {
-            std::cerr << "====== stack info after reg alloc ======\n";
-            mfunc->dumpStackFrameInfo(std::cerr);
-        }
-
-        // 阶段 3.1: 处理被调用者保存寄存器
-        CalleeSavedHandler callee_handler;
-        callee_handler.runOnMachineFunction(mfunc.get());
-
-        if (DEBUG) {
-            std::cerr << "====== stack info after callee handler ======\n";
-            mfunc->dumpStackFrameInfo(std::cerr);
-        }
-
-        // // 阶段 4: 窥孔优化 (Peephole Optimization)
-        // PeepholeOptimizer peephole;
-        // peephole.runOnMachineFunction(mfunc.get());
-
-        // 阶段 5: 局部指令调度 (Local Scheduling)
-        // PostRA_Scheduler local_scheduler;
-        // local_scheduler.runOnMachineFunction(mfunc.get());
-
-        // 阶段 3.2: 插入序言和尾声
-        PrologueEpilogueInsertionPass pei_pass;
-        pei_pass.runOnMachineFunction(mfunc.get());
-
-        DEBUG = 0;
-        DEEPDEBUG = 0;
-
-        // 阶段 3.3: 大立即数合法化
-        LegalizeImmediatesPass legalizer;
-        legalizer.runOnMachineFunction(mfunc.get());
-
-        // 阶段 6: 代码发射 (Code Emission)
-        std::stringstream ss;
-        RISCv64AsmPrinter printer(mfunc.get());
-        printer.run(ss);
-
-        if (DEBUG) {
-            ss << "\n\n; --- Intermediate Representation after Instruction Selection ---\n" 
-            << ss_after_isel.str();
-        }
-        DEBUG = 1;
-        DEEPDEBUG = 1;
-        return ss.str();
+        std::cout << ss_after_isel.str();
+    }
+    if (DEBUG) {
+        std::cerr << "====== Intermediate Representation after Instruction Selection ======\n" 
+        << ss_after_isel.str();
    }
    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    
-    else {
-        // === 完整的后端处理流水线 ===
+    // 阶段 2: 消除帧索引 (展开伪指令，计算局部变量偏移)
+    // 这个Pass必须在寄存器分配之前运行
+    EliminateFrameIndicesPass efi_pass;
+    efi_pass.runOnMachineFunction(mfunc.get());

-        // 阶段 1: 指令选择 (sysy::IR -> LLIR with virtual registers)
-        DEBUG = 0;
-        DEEPDEBUG = 0;
-
-        RISCv64ISel isel;
-        std::unique_ptr<MachineFunction> mfunc = isel.runOnFunction(func);
-
-        // 第一次调试打印输出
-        std::stringstream ss_after_isel;
-        RISCv64AsmPrinter printer_isel(mfunc.get());
-        printer_isel.run(ss_after_isel, true);
-        // if (DEBUG) {
-        //     std::cout << ss_after_isel.str();
-        // }
-        DEBUG = 0;
-        DEEPDEBUG = 0;
-        // DEBUG = 1;
-        // DEEPDEBUG = 1;
-        if (DEBUG) {
-            std::cerr << "====== Intermediate Representation after Instruction Selection ======\n" 
-            << ss_after_isel.str();
-        }
-        // DEBUG = 0;
-        // DEEPDEBUG = 0;
-        // [新增] 阶段 2: 消除帧索引 (展开伪指令，计算局部变量偏移)
-        // 这个Pass必须在寄存器分配之前运行
-        EliminateFrameIndicesPass efi_pass;
-        efi_pass.runOnMachineFunction(mfunc.get());
-
-        if (DEBUG) {
-            std::cerr << "====== stack info after eliminate frame indices  ======\n";
-            mfunc->dumpStackFrameInfo(std::cerr);
-            std::stringstream ss_after_eli;
-            printer_isel.run(ss_after_eli, true);
-            std::cerr << "====== LLIR after eliminate frame indices ======\n" 
-            << ss_after_eli.str();
-        }
-
-        // // 阶段 2: 指令调度 (Instruction Scheduling)
-        // PreRA_Scheduler scheduler;
-        // scheduler.runOnMachineFunction(mfunc.get());
-
-        DEBUG = 0;
-        DEEPDEBUG = 0;
-        // DEBUG = 1;
-        // DEEPDEBUG = 1;
-        // 阶段 3: 物理寄存器分配 (Register Allocation)
-        RISCv64RegAlloc reg_alloc(mfunc.get());
-        reg_alloc.run();
-
-        DEBUG = 0;
-        DEEPDEBUG = 0;
-        // DEBUG = 1;
-        // DEEPDEBUG = 1;
-        if (DEBUG) {
-            std::cerr << "====== stack info after reg alloc ======\n";
-            mfunc->dumpStackFrameInfo(std::cerr);
-        }
-
-        // 阶段 3.1: 处理被调用者保存寄存器
-        CalleeSavedHandler callee_handler;
-        callee_handler.runOnMachineFunction(mfunc.get());
-
-        if (DEBUG) {
-            std::cerr << "====== stack info after callee handler ======\n";
-            mfunc->dumpStackFrameInfo(std::cerr);
-        }
-
-        // // 阶段 4: 窥孔优化 (Peephole Optimization)
-        // PeepholeOptimizer peephole;
-        // peephole.runOnMachineFunction(mfunc.get());
-
-        // 阶段 5: 局部指令调度 (Local Scheduling)
-        // PostRA_Scheduler local_scheduler;
-        // local_scheduler.runOnMachineFunction(mfunc.get());
-
-        // 阶段 3.2: 插入序言和尾声
-        PrologueEpilogueInsertionPass pei_pass;
-        pei_pass.runOnMachineFunction(mfunc.get());
-
-        DEBUG = 0;
-        DEEPDEBUG = 0;
-
-        // 阶段 3.3: 大立即数合法化
-        LegalizeImmediatesPass legalizer;
-        legalizer.runOnMachineFunction(mfunc.get());
-
-        // 阶段 6: 代码发射 (Code Emission)
-        std::stringstream ss;
-        RISCv64AsmPrinter printer(mfunc.get());
-        printer.run(ss);
-
-        if (DEBUG) {
-            ss << "\n\n; --- Intermediate Representation after Instruction Selection ---\n" 
-            << ss_after_isel.str();
-        }
-        return ss.str();
+    if (DEBUG) {
+        std::cerr << "====== stack info after eliminate frame indices  ======\n";
+        mfunc->dumpStackFrameInfo(std::cerr);
+        std::stringstream ss_after_eli;
+        printer_isel.run(ss_after_eli, true);
+        std::cerr << "====== LLIR after eliminate frame indices ======\n" 
+        << ss_after_eli.str();
    }
+
+    // 阶段 2: 除法强度削弱优化 (Division Strength Reduction)
+    DivStrengthReduction div_strength_reduction;
+    div_strength_reduction.runOnMachineFunction(mfunc.get());
+
+    // 阶段 2.1: 指令调度 (Instruction Scheduling)
+    PreRA_Scheduler scheduler;
+    scheduler.runOnMachineFunction(mfunc.get());
+
+    // 阶段 3: 物理寄存器分配 (Register Allocation)
+    RISCv64RegAlloc reg_alloc(mfunc.get());
+    reg_alloc.run();
+
+    if (DEBUG) {
+        std::cerr << "====== stack info after reg alloc ======\n";
+        mfunc->dumpStackFrameInfo(std::cerr);
+    }
+
+    // 阶段 3.1: 处理被调用者保存寄存器
+    CalleeSavedHandler callee_handler;
+    callee_handler.runOnMachineFunction(mfunc.get());
+
+    if (DEBUG) {
+        std::cerr << "====== stack info after callee handler ======\n";
+        mfunc->dumpStackFrameInfo(std::cerr);
+    }
+
+    // 阶段 4: 窥孔优化 (Peephole Optimization)
+    PeepholeOptimizer peephole;
+    peephole.runOnMachineFunction(mfunc.get());
+
+    // 阶段 5: 局部指令调度 (Local Scheduling)
+    PostRA_Scheduler local_scheduler;
+    local_scheduler.runOnMachineFunction(mfunc.get());
+
+    // 阶段 3.2: 插入序言和尾声
+    PrologueEpilogueInsertionPass pei_pass;
+    pei_pass.runOnMachineFunction(mfunc.get());
+
+    // 阶段 3.3: 大立即数合法化
+    LegalizeImmediatesPass legalizer;
+    legalizer.runOnMachineFunction(mfunc.get());
+
+    // 阶段 6: 代码发射 (Code Emission)
+    std::stringstream ss;
+    RISCv64AsmPrinter printer(mfunc.get());
+    printer.run(ss);
+
+    return ss.str();
+    
 }

 } // namespace sysy
--- a/src/backend/RISCv64/RISCv64ISel.cpp
+++ b/src/backend/RISCv64/RISCv64ISel.cpp
@ -2,8 +2,8 @@
 #include <stdexcept>
 #include <set>
 #include <functional>
-#include <cmath> // For std::fabs
-#include <limits> // For std::numeric_limits
+#include <cmath>
+#include <limits>
 #include <iostream>

 namespace sysy {
@ -402,7 +402,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
                Value* base = nullptr;
                Value* offset = nullptr;

-                // [修改] 扩展基地址的判断，使其可以识别 AllocaInst 或 GlobalValue
+                // 扩展基地址的判断，使其可以识别 AllocaInst 或 GlobalValue
                if (dynamic_cast<AllocaInst*>(lhs) || dynamic_cast<GlobalValue*>(lhs)) {
                    base = lhs;
                    offset = rhs;
@ -421,7 +421,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
                        CurMBB->addInstruction(std::move(li));
                    }
                    
-                    // 2. [修改] 根据基地址的类型，生成不同的指令来获取基地址
+                    // 2. 根据基地址的类型，生成不同的指令来获取基地址
                    auto base_addr_vreg = getNewVReg(Type::getIntType()); // 创建一个新的临时vreg来存放基地址

                    // 情况一：基地址是局部栈变量
@ -452,7 +452,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
                }
            }

-            // [V2优点] 在BINARY节点内部按需加载常量操作数。
+            // 在BINARY节点内部按需加载常量操作数。
            auto load_val_if_const = [&](Value* val) {
                if (auto c = dynamic_cast<ConstantValue*>(val)) {
                    if (DEBUG) {
@ -483,7 +483,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
            auto dest_vreg = getVReg(bin);
            auto lhs_vreg = getVReg(lhs);

-            // [V2优点] 融合 ADDIW 优化。
+            // 融合 ADDIW 优化。
            if (rhs_is_imm_opt) {
                auto rhs_const = dynamic_cast<ConstantValue*>(rhs);
                auto instr = std::make_unique<MachineInstr>(RVOpcodes::ADDIW);
@ -539,6 +539,15 @@ void RISCv64ISel::selectNode(DAGNode* node) {
                    CurMBB->addInstruction(std::move(instr));
                    break;
                }
+                case Instruction::kSRA: {
+                    auto rhs_const = dynamic_cast<ConstantInteger*>(rhs);
+                    auto instr = std::make_unique<MachineInstr>(RVOpcodes::SRAIW);
+                    instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
+                    instr->addOperand(std::make_unique<RegOperand>(lhs_vreg));
+                    instr->addOperand(std::make_unique<ImmOperand>(rhs_const->getInt()));
+                    CurMBB->addInstruction(std::move(instr));
+                    break;
+                }
                case BinaryInst::kICmpEQ: { // 等于 (a == b) -> (subw; seqz)
                    auto sub = std::make_unique<MachineInstr>(RVOpcodes::SUBW);
                    sub->addOperand(std::make_unique<RegOperand>(dest_vreg));
@ -943,7 +952,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
            
            // --- 步骤 3: 生成CALL指令 ---
            auto call_instr = std::make_unique<MachineInstr>(RVOpcodes::CALL);
-            // [协议] 如果函数有返回值，将它的目标虚拟寄存器作为第一个操作数
+            // 如果函数有返回值，将它的目标虚拟寄存器作为第一个操作数
            if (!call->getType()->isVoid()) {
                unsigned dest_vreg = getVReg(call);
                call_instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
@ -1020,7 +1029,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
                } else {
                    // --- 处理整数/指针返回值 ---
                    // 返回值需要被放入 a0
-                    // [V2优点] 在RETURN节点内加载常量返回值
+                    // 在RETURN节点内加载常量返回值
                    if (auto const_val = dynamic_cast<ConstantValue*>(ret_val)) {
                        auto li_instr = std::make_unique<MachineInstr>(RVOpcodes::LI);
                        li_instr->addOperand(std::make_unique<RegOperand>(PhysicalReg::A0));
@ -1034,7 +1043,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
                    }
                }
            }
-            // [V1设计保留] 函数尾声（epilogue）不由RETURN节点生成，
+            // 函数尾声（epilogue）不由RETURN节点生成，
            // 而是由后续的AsmPrinter或其它Pass统一处理，这是一种常见且有效的模块化设计。
            auto ret_mi = std::make_unique<MachineInstr>(RVOpcodes::RET);
            CurMBB->addInstruction(std::move(ret_mi));
@ -1048,7 +1057,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
            auto then_bb_name = cond_br->getThenBlock()->getName();
            auto else_bb_name = cond_br->getElseBlock()->getName();

-            // [优化] 检查分支条件是否为编译期常量
+            // 检查分支条件是否为编译期常量
            if (auto const_cond = dynamic_cast<ConstantValue*>(condition)) {
                // 如果条件是常量，直接生成一个无条件跳转J，而不是BNE
                if (const_cond->getInt() != 0) { // 条件为 true
@ -1063,7 +1072,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
            } 
            // 如果条件不是常量，则执行标准流程
            else {
-                // [修复] 为条件变量生成加载指令（如果它是常量的话，尽管上面已经处理了）
+                // 为条件变量生成加载指令（如果它是常量的话，尽管上面已经处理了）
                // 这一步是为了逻辑完整，以防有其他类型的常量没有被捕获
                if (auto const_val = dynamic_cast<ConstantValue*>(condition)) {
                    auto li = std::make_unique<MachineInstr>(RVOpcodes::LI);
@ -1097,7 +1106,7 @@ void RISCv64ISel::selectNode(DAGNode* node) {
    }

        case DAGNode::MEMSET: {
-            // [V1设计保留] Memset的核心展开逻辑在虚拟寄存器层面是正确的，无需修改。
+            // Memset的核心展开逻辑在虚拟寄存器层面是正确的，无需修改。
            // 之前的bug是由于其输入（地址、值、大小）的虚拟寄存器未被正确初始化。
            // 在修复了CONSTANT/ALLOCA_ADDR的加载问题后，此处的逻辑现在可以正常工作。

@ -1280,14 +1289,19 @@ void RISCv64ISel::selectNode(DAGNode* node) {
                if (stride != 0) {
                    // --- 为当前索引和步长生成偏移计算指令 ---
                    auto offset_vreg = getNewVReg();
-                    auto index_vreg = getVReg(indexValue);
-
-                    // 如果索引是常量，先用 LI 指令加载到虚拟寄存器
+                    
+                    // 处理索引 - 区分常量与动态值
+                    unsigned index_vreg;
                    if (auto const_index = dynamic_cast<ConstantValue*>(indexValue)) {
+                        // 对于常量索引，直接创建新的虚拟寄存器
+                        index_vreg = getNewVReg();
                        auto li = std::make_unique<MachineInstr>(RVOpcodes::LI);
                        li->addOperand(std::make_unique<RegOperand>(index_vreg));
                        li->addOperand(std::make_unique<ImmOperand>(const_index->getInt()));
                        CurMBB->addInstruction(std::move(li));
+                    } else {
+                        // 对于动态索引，使用已存在的虚拟寄存器
+                        index_vreg = getVReg(indexValue);
                    }
                    
                    // 优化：如果步长是1，可以直接移动(MV)作为偏移量，无需乘法
@ -1445,7 +1459,7 @@ std::vector<std::unique_ptr<RISCv64ISel::DAGNode>> RISCv64ISel::build_dag(BasicB
            
            // 依次添加所有索引作为后续的操作数
            for (auto index : gep->getIndices()) {
-                // [修复] 从 Use 对象中获取真正的 Value*
+                // 从 Use 对象中获取真正的 Value*
                gep_node->operands.push_back(get_operand_node(index->getValue(), value_to_node, nodes_storage));
            }
        } else if (auto load = dynamic_cast<LoadInst*>(inst)) {
@ -1473,7 +1487,7 @@ std::vector<std::unique_ptr<RISCv64ISel::DAGNode>> RISCv64ISel::build_dag(BasicB
                    }
                }
            }
-            if (bin->getKind() >= Instruction::kFAdd) { // 假设浮点指令枚举值更大
+            if (bin->isFPBinary()) { // 假设浮点指令枚举值更大
                auto fbin_node = create_node(DAGNode::FBINARY, bin, value_to_node, nodes_storage);
                fbin_node->operands.push_back(get_operand_node(bin->getLhs(), value_to_node, nodes_storage));
                fbin_node->operands.push_back(get_operand_node(bin->getRhs(), value_to_node, nodes_storage));
@ -1549,7 +1563,7 @@ unsigned RISCv64ISel::getTypeSizeInBytes(Type* type) {
    }
 }

-// [新] 打印DAG图以供调试的辅助函数
+// 打印DAG图以供调试的辅助函数
 void RISCv64ISel::print_dag(const std::vector<std::unique_ptr<DAGNode>>& dag, const std::string& bb_name) {
    // 检查是否有DEBUG宏或者全局变量，避免在非调试模式下打印
    // if (!DEBUG) return; 
--- a/src/backend/RISCv64/RISCv64LLIR.cpp
+++ b/src/backend/RISCv64/RISCv64LLIR.cpp
@ -44,7 +44,7 @@ std::string regToString(PhysicalReg reg) {
    }
 }

-// [新增] 打印栈帧信息的完整实现
+// 打印栈帧信息的完整实现
 void MachineFunction::dumpStackFrameInfo(std::ostream& os) const {
    const StackFrameInfo& info = frame_info;

--- a/src/backend/RISCv64/RISCv64RegAlloc.cpp
+++ b/src/backend/RISCv64/RISCv64RegAlloc.cpp
@ -93,7 +93,7 @@ bool RISCv64RegAlloc::doAllocation() {
 }

 void RISCv64RegAlloc::precolorByCallingConvention() {
-    // [新增] 在处理前，先清空颜色相关的状态，确保重试时不会出错
+    // 在处理前，先清空颜色相关的状态，确保重试时不会出错
    color_map.clear();
    coloredNodes.clear();

@ -168,7 +168,7 @@ void RISCv64RegAlloc::initialize() {
    spillWorklist.clear();
    spilledNodes.clear();
    coalescedNodes.clear();
-    coloredNodes.clear(); // [修正] 恢复对这两个集合的清除
+    coloredNodes.clear();
    selectStack.clear();

    coalescedMoves.clear();
@ -181,7 +181,7 @@ void RISCv64RegAlloc::initialize() {
    degree.clear();
    moveList.clear();
    alias.clear();
-    color_map.clear(); // [修正] 恢复对 color_map 的清除
+    color_map.clear();
 }

 // 活跃性分析（此部分为标准数据流分析，与现有版本类似但更精细）
@ -197,7 +197,6 @@ void RISCv64RegAlloc::analyzeLiveness() {
        VRegSet uses, defs;
        for (const auto& instr_ptr : mbb->getInstructions()) {
            VRegSet instr_use, instr_def;
-            // 使用新的、能看到物理寄存器的版本
            getInstrUseDef_Liveness(instr_ptr.get(), instr_use, instr_def);
            for (unsigned u : instr_use) {
                if (defs.find(u) == defs.end()) uses.insert(u);
@ -238,7 +237,7 @@ void RISCv64RegAlloc::analyzeLiveness() {
            const MachineInstr* instr = instr_it->get();
            live_out_map[instr] = current_live;
            VRegSet use, def;
-            // 使用新的、能看到物理寄存器的版本
+
            getInstrUseDef_Liveness(instr, use, def);
            for(auto d : def) current_live.erase(d);
            for(auto u : use) current_live.insert(u);
@ -247,7 +246,6 @@ void RISCv64RegAlloc::analyzeLiveness() {
    }
 }

-// [最终修正] 包含了所有正确逻辑和您已有调试代码的完整版本
 void RISCv64RegAlloc::build() {
    initial.clear(); 
    RISCv64AsmPrinter printer_inside_build(MFunc);
@ -403,10 +401,10 @@ void RISCv64RegAlloc::build() {
            }
            
            // --- 规则 3: Live_Out 集合内部的【虚拟寄存器】形成完全图 ---
-            // [优化与修正] 使用更高效的遍历，避免重复调用 addEdge(A,B) 和 addEdge(B,A)
+            // 使用更高效的遍历，避免重复调用 addEdge(A,B) 和 addEdge(B,A)
            for (auto it1 = live_out.begin(); it1 != live_out.end(); ++it1) {
                unsigned l1 = *it1;
-                // [关键修正] 只为虚拟寄存器 l1 添加边
+                // 只为虚拟寄存器 l1 添加边
                if (precolored.count(l1)) continue;

                for (auto it2 = std::next(it1); it2 != live_out.end(); ++it2) {
@ -463,6 +461,17 @@ void RISCv64RegAlloc::coalesce() {
    unsigned y = getAlias(*use.begin());
    unsigned u, v;
    if (precolored.count(y)) { u = y; v = x; } else { u = x; v = y; }
+    
+    // 防御性检查，处理物理寄存器之间的传送指令
+    if (precolored.count(u) && precolored.count(v)) {
+        // 如果 u 和 v 都是物理寄存器，我们不能合并它们。
+        // 这通常是一条寄存器拷贝指令，例如 `mv a2, a1`。
+        // 把它加入 constrainedMoves 列表，然后直接返回，不再处理。
+        constrainedMoves.insert(move);
+        // addWorklist(u) 和 addWorklist(v) 在这里也不需要调用，
+        // 因为它们只对虚拟寄存器有意义。
+        return;
+    }

    if (DEEPERDEBUG) std::cerr << "[Coalesce] Processing move between " << regIdToString(x) 
                           << " and " << regIdToString(y) << " (aliases " << regIdToString(u) 
@ -484,8 +493,6 @@ void RISCv64RegAlloc::coalesce() {
        addWorklist(v);
        return; // 立即返回，不再进行后续检查
    }
-
-    // --- 新的、拆分后的启发式检查逻辑 ---
    
    bool pre_interfere = adjList.at(v).count(u);
    
@ -504,8 +511,6 @@ void RISCv64RegAlloc::coalesce() {
        // --- 场景1：u是物理寄存器，使用 George 启发式 ---
        if (DEEPERDEBUG) std::cerr << "  -> Trying George Heuristic (u is precolored)...\n";
        
-        // ==================== [展开的 std::all_of 逻辑] ====================
-        
        // 步骤 1: 独立调用 adjacent(v) 获取邻居集合
        VRegSet neighbors_of_v = adjacent(v);
        if (DEEPERDEBUG) {
@ -537,7 +542,6 @@ void RISCv64RegAlloc::coalesce() {
        if (DEEPERDEBUG) {
             std::cerr << "  -> George Heuristic final result: " << (george_ok ? "OK" : "FAIL") << "\n";
        }
-        // =================================================================

        if (george_ok) {
            can_coalesce = true;
@ -622,11 +626,11 @@ void RISCv64RegAlloc::assignColors() {
        }
    }

-    // 步骤 2: [最终修正] 完整、正确地处理 coalescedNodes
+    // 步骤 2: 处理 coalescedNodes
    for (unsigned n : coalescedNodes) {
        unsigned root_alias = getAlias(n);
        
-        // --- 新的、健壮的逻辑，处理所有三种可能性 ---
+        // --- 处理所有三种可能性 ---

        // 情况 1: 别名本身就是物理寄存器 (修复当前bug)
        if (precolored.count(root_alias)) {
@ -649,9 +653,13 @@ void RISCv64RegAlloc::assignColors() {

 // 重写程序，插入溢出代码
 void RISCv64RegAlloc::rewriteProgram() {
-    // 1. 为溢出的旧vreg在栈上分配空间
    StackFrameInfo& frame_info = MFunc->getFrameInfo();
-    int spill_base_offset = frame_info.locals_size + frame_info.callee_saved_size;
+    // 使用 EFI Pass 确定的 locals_end_offset 作为溢出分配的基准。
+    // locals_end_offset 本身是负数，代表局部变量区域的下边界地址。
+    int spill_current_offset = frame_info.locals_end_offset;
+
+    // 保存溢出区域的起始点，用于最后计算总的 spill_size
+    const int spill_start_offset = frame_info.locals_end_offset;

    for (unsigned vreg : spilledNodes) {
        if (frame_info.spill_offsets.count(vreg)) continue;
@ -663,11 +671,19 @@ void RISCv64RegAlloc::rewriteProgram() {
            size = 8; // pointer
        }

-        spill_base_offset += size;
-        spill_base_offset = (spill_base_offset + 7) & ~7;
-        frame_info.spill_offsets[vreg] = -spill_base_offset; // [修正] 溢出槽也使用负偏移量
+        // 在当前偏移基础上继续向下(地址变得更负)分配空间
+        spill_current_offset -= size;
+        
+        // 对齐新的、更小的地址，RISC-V 要求8字节对齐
+        spill_current_offset = spill_current_offset & ~7;
+
+        // 将计算出的、不会冲突的正确偏移量存入 spill_offsets
+        frame_info.spill_offsets[vreg] = spill_current_offset;
    }
-    frame_info.spill_size = spill_base_offset - (frame_info.locals_size + frame_info.callee_saved_size);
+
+    // 更新总的溢出区域大小。
+    // spill_size = -(结束偏移 - 开始偏移)
+    frame_info.spill_size = -(spill_current_offset - spill_start_offset);

    // 2. 遍历所有指令，重写代码
    for (auto& mbb : MFunc->getBlocks()) {
@ -711,7 +727,7 @@ void RISCv64RegAlloc::rewriteProgram() {
                }
            }

-            // c. [核心修正] 创建一条全新的指令，用新vreg替换旧vreg
+            // c. 创建一条全新的指令，用新vreg替换旧vreg
            auto new_instr = std::make_unique<MachineInstr>(instr_ptr->getOpcode());
            for (const auto& op : instr_ptr->getOperands()) {
                if (op->getKind() == MachineOperand::KIND_REG) {
@ -781,7 +797,7 @@ void RISCv64RegAlloc::rewriteProgram() {
 }

 /**
- * @brief [最终修正] 获取一条指令完整的【虚拟】使用/定义寄存器集合
+ * @brief 获取一条指令完整的【虚拟】使用/定义寄存器集合
 * 这个函数将服务于图的构建（收集initial节点等）。
 */
 void RISCv64RegAlloc::getInstrUseDef(const MachineInstr* instr, VRegSet& use, VRegSet& def) {
@ -832,10 +848,8 @@ void RISCv64RegAlloc::getInstrUseDef(const MachineInstr* instr, VRegSet& use, VR
    }
 }

-// in file: RISCv64RegAlloc.cpp
-
 /**
- * @brief [最终修复版] 获取一条指令完整的、包含物理寄存器的Use/Def集合
+ * @brief 获取一条指令完整的、包含物理寄存器的Use/Def集合
 * 这个函数专门服务于活跃性分析，现已补全所有指令（包括伪指令）的逻辑。
 */
 void RISCv64RegAlloc::getInstrUseDef_Liveness(const MachineInstr* instr, VRegSet& use, VRegSet& def) {
@ -943,7 +957,6 @@ void RISCv64RegAlloc::getInstrUseDef_Liveness(const MachineInstr* instr, VRegSet
    else if (opcode == RVOpcodes::JALR) {
        // JALR rd, rs1, imm. Def: rd, Use: rs1. 
        // 同时也隐式定义了ra(x1)，但通常rd就是ra。为精确，我们只处理显式操作数。
-        // 旧版本逻辑：def.insert(ra); first_reg_is_def = false; -> 这是不精确的
        def.insert(get_any_reg_id(operands[0].get()));
        use.insert(get_any_reg_id(operands[1].get()));
    }
@ -952,7 +965,7 @@ void RISCv64RegAlloc::getInstrUseDef_Liveness(const MachineInstr* instr, VRegSet
        use.insert(offset + static_cast<unsigned>(PhysicalReg::A0));
        use.insert(offset + static_cast<unsigned>(PhysicalReg::F10)); // F10 is fa0
    }
-    // [关键Bug修复] 添加对 PSEUDO_KEEPALIVE 的处理
+    // 添加对 PSEUDO_KEEPALIVE 的处理
    else if (opcode == RVOpcodes::PSEUDO_KEEPALIVE) {
        // keepalive的所有操作数都是use，以确保它们的生命周期延续到该点
        for (const auto& op : operands) {
@ -1212,7 +1225,7 @@ bool RISCv64RegAlloc::briggsHeuristic(unsigned u, unsigned v) {

 // George启发式
 bool RISCv64RegAlloc::georgeHeuristic(unsigned t, unsigned u) {
-    // 关键修正：如果 t 不是一个待分配的虚拟寄存器（即它是物理寄存器），
+    // 如果 t 不是一个待分配的虚拟寄存器（即它是物理寄存器），
    // 那么它已经被预着色，总是满足 George 启发式条件。
    // 我们通过检查 degree.count(t) 来判断 t 是否在我们的虚拟寄存器工作集中。
    if (degree.count(t) == 0) {
@ -1224,44 +1237,6 @@ bool RISCv64RegAlloc::georgeHeuristic(unsigned t, unsigned u) {
    return degree.at(t) < K || precolored.count(u) || adjList.at(t).count(u);
 }

-// void RISCv64RegAlloc::combine(unsigned u, unsigned v) {
-//     // 1. 从相应的工作列表中移除即将被合并的节点 v
-//     if (freezeWorklist.count(v)) {
-//         freezeWorklist.erase(v);
-//     } else {
-//         spillWorklist.erase(v);
-//     }
-
-//     // 2. 将 v 加入 coalescedNodes 集合，并设置其别名
-//     coalescedNodes.insert(v);
-//     alias[v] = u;
-
-//     // 3. 将 v 的传送指令列表合并到 u
-//     if (moveList.count(u) && moveList.count(v)) {
-//         moveList.at(u).insert(moveList.at(v).begin(), moveList.at(v).end());
-//     } else if (moveList.count(v)) {
-//         moveList[u] = moveList.at(v);
-//     }
-    
-//     // [Bug修复] 移除了论文伪代码中不存在的 enableMoves({v}) 调用。
-
-//     // 4. [核心Bug修复] 遍历 v 的“当前有效”邻居 t (使用 adjacent(v) 而非 adjList.at(v))
-//     //    将它们与 u 连接，并更新它们的度数。
-//     for (unsigned t : adjacent(v)) { 
-//         addEdge(t, u);
-//         decrementDegree(t);
-//     }
-
-//     // 5. 检查合并后的节点 u 的状态，如果其度数变高，可能需要将其移到 spillWorklist
-//     if (!precolored.count(u)) {
-//         int K = isFPVReg(u) ? K_fp : K_int;
-//         if (degree.at(u) >= K && freezeWorklist.count(u)) {
-//             freezeWorklist.erase(u);
-//             spillWorklist.insert(u);
-//         }
-//     }
-// }
-
 void RISCv64RegAlloc::combine(unsigned u, unsigned v) {
    freezeWorklist.erase(v);
    spillWorklist.erase(v);
@ -1483,7 +1458,6 @@ std::string RISCv64RegAlloc::regToString(PhysicalReg reg) {
 std::string RISCv64RegAlloc::regIdToString(unsigned id) {
    const unsigned offset = static_cast<unsigned>(PhysicalReg::PHYS_REG_START_ID);

-    // 使用更健壮的检查方式
    if (id >= offset && precolored.count(id)) {
        // 先减去偏移量，得到原始的、小的枚举值
        PhysicalReg reg = static_cast<PhysicalReg>(id - offset);
--- a/src/include/backend/RISCv64/Optimize/DivStrengthReduction.h
+++ b/src/include/backend/RISCv64/Optimize/DivStrengthReduction.h
@ -0,0 +1,30 @@
+#ifndef RISCV64_DIV_STRENGTH_REDUCTION_H
+#define RISCV64_DIV_STRENGTH_REDUCTION_H
+
+#include "RISCv64LLIR.h"
+#include "Pass.h"
+
+namespace sysy {
+
+/**
+ * @class DivStrengthReduction
+ * @brief 除法强度削弱优化器
+ * * 将除法运算转换为乘法运算，使用magic number算法
+ * 适用于除数为常数的情况，可以显著提高性能
+ */
+class DivStrengthReduction : public Pass {
+public:
+    static char ID;
+    
+    DivStrengthReduction() : Pass("div-strength-reduction", Granularity::Function, PassKind::Optimization) {}
+    
+    void *getPassID() const override { return &ID; }
+    
+    bool runOnFunction(Function *F, AnalysisManager& AM) override;
+    
+    void runOnMachineFunction(MachineFunction* mfunc);
+};
+
+} // namespace sysy
+
+#endif // RISCV64_DIV_STRENGTH_REDUCTION_H
--- a/src/include/backend/RISCv64/RISCv64Backend.h
+++ b/src/include/backend/RISCv64/RISCv64Backend.h
@ -24,7 +24,7 @@ private:

    // 私有辅助函数，用于根据类型计算其占用的字节数。
    unsigned getTypeSizeInBytes(Type* type);
-
+    
    Module* module;
 };

--- a/src/include/backend/RISCv64/RISCv64LLIR.h
+++ b/src/include/backend/RISCv64/RISCv64LLIR.h
@ -39,14 +39,14 @@ enum class PhysicalReg {

    // 用于内部表示物理寄存器在干扰图中的节点ID（一个简单的特殊ID，确保不与vreg_counter冲突）
    // 假设 vreg_counter 不会达到这么大的值
-    PHYS_REG_START_ID = 100000, 
+    PHYS_REG_START_ID = 1000000, 
    PHYS_REG_END_ID = PHYS_REG_START_ID + 320, // 预留足够的空间
 };

 // RISC-V 指令操作码枚举
 enum class RVOpcodes {
    // 算术指令
-    ADD, ADDI, ADDW, ADDIW, SUB, SUBW, MUL, MULW, DIV, DIVW, REM, REMW,
+    ADD, ADDI, ADDW, ADDIW, SUB, SUBW, MUL, MULW, MULH, DIV, DIVW, REM, REMW,
    // 逻辑指令
    XOR, XORI, OR, ORI, AND, ANDI,
    // 移位指令
@ -280,14 +280,15 @@ private:
 // 栈帧信息
 struct StackFrameInfo {
    int locals_size = 0; // 仅为AllocaInst分配的大小
+    int locals_end_offset = 0; // 记录局部变量分配结束后的偏移量(相对于s0，为负)
    int spill_size = 0; // 仅为溢出分配的大小
    int total_size = 0; // 总大小
    int callee_saved_size = 0; // 保存寄存器的大小
    std::map<unsigned, int> alloca_offsets; // <AllocaInst的vreg, 栈偏移>
    std::map<unsigned, int> spill_offsets;  // <溢出vreg, 栈偏移>
    std::set<PhysicalReg> used_callee_saved_regs; // 使用的保存寄存器
-    std::map<unsigned, PhysicalReg> vreg_to_preg_map; // [新增] RegAlloc最终的分配结果
-    std::vector<PhysicalReg> callee_saved_regs_to_store; // [新增] 已排序的、需要存取的被调用者保存寄存器
+    std::map<unsigned, PhysicalReg> vreg_to_preg_map; // RegAlloc最终的分配结果
+    std::vector<PhysicalReg> callee_saved_regs_to_store; // 已排序的、需要存取的被调用者保存寄存器
 };

 // 机器函数
--- a/src/include/backend/RISCv64/RISCv64Passes.h
+++ b/src/include/backend/RISCv64/RISCv64Passes.h
@ -10,6 +10,8 @@
 #include "PrologueEpilogueInsertion.h"
 #include "EliminateFrameIndices.h"
 #include "Pass.h"
+#include "DivStrengthReduction.h"
+

 namespace sysy {

--- a/src/include/midend/IR.h
+++ b/src/include/midend/IR.h
@ -728,6 +728,8 @@ class Instruction : public User {
    kPhi = 0x1UL << 39,
    kBitItoF = 0x1UL << 40,
    kBitFtoI = 0x1UL << 41,
+    kSRA = 0x1UL << 42,
+    kMulh = 0x1UL << 43
  };

 protected:
@ -824,6 +826,12 @@ public:
        return "Memset";
      case kPhi:
        return "Phi";
+      case kBitItoF:
+        return "BitItoF";
+      case kBitFtoI:
+        return "BitFtoI";
+      case kSRA:
+        return "SRA";
      default:
        return "Unknown";
    }
@ -835,11 +843,15 @@ public:

  bool isBinary() const {
    static constexpr uint64_t BinaryOpMask =
-        (kAdd | kSub | kMul | kDiv | kRem | kAnd | kOr) |
-        (kICmpEQ | kICmpNE | kICmpLT | kICmpGT | kICmpLE | kICmpGE) |
+        (kAdd | kSub | kMul | kDiv | kRem | kAnd | kOr | kSRA | kMulh) |
+        (kICmpEQ | kICmpNE | kICmpLT | kICmpGT | kICmpLE | kICmpGE);
+    return kind & BinaryOpMask;
+  }
+  bool isFPBinary() const {
+    static constexpr uint64_t FPBinaryOpMask =
        (kFAdd | kFSub | kFMul | kFDiv) |
        (kFCmpEQ | kFCmpNE | kFCmpLT | kFCmpGT | kFCmpLE | kFCmpGE);
-    return kind & BinaryOpMask;
+    return kind & FPBinaryOpMask;
  }
  bool isUnary() const {
    static constexpr uint64_t UnaryOpMask = 
--- a/src/include/midend/IRBuilder.h
+++ b/src/include/midend/IRBuilder.h
@ -217,6 +217,12 @@ class IRBuilder {
  BinaryInst * createOrInst(Value *lhs, Value *rhs, const std::string &name = "") {
    return createBinaryInst(Instruction::kOr, Type::getIntType(), lhs, rhs, name);
  }  ///< 创建按位或指令
+  BinaryInst * createSRAInst(Value *lhs, Value *rhs, const std::string &name = "") {
+    return createBinaryInst(Instruction::kSRA, Type::getIntType(), lhs, rhs, name);
+  }  ///< 创建算术右移指令
+  BinaryInst * createMulhInst(Value *lhs, Value *rhs, const std::string &name = "") {
+    return createBinaryInst(Instruction::kMulh, Type::getIntType(), lhs, rhs, name);
+  }  ///< 创建高位乘法指令
  CallInst * createCallInst(Function *callee, const std::vector<Value *> &args, const std::string &name = "") {
    std::string newName;
    if (name.empty() && callee->getReturnType() != Type::getVoidType()) {
--- a/src/include/midend/Pass/Analysis/Dom.h
+++ b/src/include/midend/Pass/Analysis/Dom.h
@ -6,30 +6,82 @@
 #include <set>
 #include <vector>
 #include <algorithm>
+#include <functional>

 namespace sysy {

-// 支配树分析结果类 (保持不变)
+// 支配树分析结果类
 class DominatorTree : public AnalysisResultBase {
 public:
    DominatorTree(Function* F);
+    // 获取指定基本块的所有支配者
    const std::set<BasicBlock*>* getDominators(BasicBlock* BB) const;
-    BasicBlock* getImmediateDominator(BasicBlock* BB) const;
-    const std::set<BasicBlock*>* getDominanceFrontier(BasicBlock* BB) const;
+    // 获取指定基本块的即时支配者 (Immediate Dominator)
+    BasicBlock* getImmediateDominator(BasicBlock* BB) const;  
+    // 获取指定基本块的支配边界 (Dominance Frontier)
+    const std::set<BasicBlock*>* getDominanceFrontier(BasicBlock* BB) const;   
+    // 获取指定基本块在支配树中的子节点
    const std::set<BasicBlock*>* getDominatorTreeChildren(BasicBlock* BB) const;
+    // 额外的 Getter：获取所有支配者、即时支配者和支配边界的完整映射（可选，主要用于调试或特定场景）
    const std::map<BasicBlock*, std::set<BasicBlock*>>& getDominatorsMap() const { return Dominators; }
    const std::map<BasicBlock*, BasicBlock*>& getIDomsMap() const { return IDoms; }
    const std::map<BasicBlock*, std::set<BasicBlock*>>& getDominanceFrontiersMap() const { return DominanceFrontiers; }
+
+    // 计算所有基本块的支配者集合
    void computeDominators(Function* F);
-    void computeIDoms(Function* F);
+    // 计算所有基本块的即时支配者（内部使用 Lengauer-Tarjan 算法）
+    void computeIDoms(Function* F); 
+    // 计算所有基本块的支配边界
    void computeDominanceFrontiers(Function* F);
+    // 计算支配树的结构（即每个节点的直接子节点）
    void computeDominatorTreeChildren(Function* F);
 private:
+    // 与该支配树关联的函数
    Function* AssociatedFunction;
-    std::map<BasicBlock*, std::set<BasicBlock*>> Dominators;
-    std::map<BasicBlock*, BasicBlock*> IDoms;
-    std::map<BasicBlock*, std::set<BasicBlock*>> DominanceFrontiers;
-    std::map<BasicBlock*, std::set<BasicBlock*>> DominatorTreeChildren;
+    std::map<BasicBlock*, std::set<BasicBlock*>> Dominators;       // 每个基本块的支配者集合
+    std::map<BasicBlock*, BasicBlock*> IDoms;                      // 每个基本块的即时支配者
+    std::map<BasicBlock*, std::set<BasicBlock*>> DominanceFrontiers; // 每个基本块的支配边界
+    std::map<BasicBlock*, std::set<BasicBlock*>> DominatorTreeChildren; // 支配树中每个基本块的子节点
+
+    // ==========================================================
+    // Lengauer-Tarjan 算法内部所需的数据结构和辅助函数
+    // 这些成员是私有的，以封装 LT 算法的复杂性并避免命名空间污染
+    // ==========================================================
+
+    // DFS 遍历相关：
+    std::map<BasicBlock*, int> dfnum_map;            // 存储每个基本块的 DFS 编号
+    std::vector<BasicBlock*> vertex_vec;             // 通过 DFS 编号反向查找对应的基本块指针
+    std::map<BasicBlock*, BasicBlock*> parent_map;   // 存储 DFS 树中每个基本块的父节点
+    int df_counter;                                  // DFS 计数器，也代表 DFS 遍历的总节点数 (N)
+
+    // 半支配者 (Semi-dominator) 相关：
+    std::map<BasicBlock*, BasicBlock*> sdom_map;     // 存储每个基本块的半支配者
+    std::map<BasicBlock*, BasicBlock*> idom_map;     // 存储每个基本块的即时支配者 (IDom)
+    std::map<BasicBlock*, std::vector<BasicBlock*>> bucket_map; // 桶结构，用于存储具有相同半支配者的节点，以延迟 IDom 计算
+
+    // 并查集 (Union-Find) 相关（用于 evalAndCompress 函数）：
+    std::map<BasicBlock*, BasicBlock*> ancestor_map; // 并查集中的父节点（用于路径压缩）
+    std::map<BasicBlock*, BasicBlock*> label_map;    // 并查集中，每个集合的代表节点（或其路径上 sdom 最小的节点）
+
+    // ==========================================================
+    // 辅助计算函数 (私有)
+    // ==========================================================
+
+    // 计算基本块的逆后序遍历 (Reverse Post Order, RPO) 顺序
+    // RPO 用于优化支配者计算和 LT 算法的效率
+    std::vector<BasicBlock*> computeReversePostOrder(Function* F);
+
+    // Lengauer-Tarjan 算法特定的辅助 DFS 函数
+    // 用于初始化 dfnum_map, vertex_vec, parent_map
+    void dfs_lt_helper(BasicBlock* u);                 
+    
+    // 结合了并查集的 Find 操作和 LT 算法的 Eval 操作
+    // 用于在路径压缩时更新 label，找到路径上 sdom 最小的节点
+    BasicBlock* evalAndCompress_lt_helper(BasicBlock* i); 
+    
+    // 并查集的 Link 操作
+    // 将 v_child 挂载到 u_parent 的并查集树下
+    void link_lt_helper(BasicBlock* u_parent, BasicBlock* v_child); 
 };


--- a/src/include/midend/Pass/Optimize/BuildCFG.h
+++ b/src/include/midend/Pass/Optimize/BuildCFG.h
@ -0,0 +1,20 @@
+#pragma once
+
+#include "IR.h"
+#include "Pass.h"
+#include <queue>
+#include <set>
+
+namespace sysy {
+
+class BuildCFG : public OptimizationPass {
+public:
+  static void *ID;
+  BuildCFG() : OptimizationPass("BuildCFG", Granularity::Function) {}
+  bool runOnFunction(Function *F, AnalysisManager &AM) override;
+  void getAnalysisUsage(std::set<void *> &analysisDependencies, std::set<void *> &analysisInvalidations) const override;
+  void *getPassID() const override { return &ID; }
+
+};
+
+} // namespace sysy
--- a/src/include/midend/Pass/Optimize/LargeArrayToGlobal.h
+++ b/src/include/midend/Pass/Optimize/LargeArrayToGlobal.h
@ -0,0 +1,24 @@
+#pragma once
+
+#include "../Pass.h"
+
+namespace sysy {
+
+class LargeArrayToGlobalPass : public OptimizationPass {
+public:
+    static void *ID;
+
+    LargeArrayToGlobalPass() : OptimizationPass("LargeArrayToGlobal", Granularity::Module) {}
+
+    bool runOnModule(Module *M, AnalysisManager &AM) override;
+    void *getPassID() const override {
+        return &ID;
+    }
+
+private:
+    unsigned calculateTypeSize(Type *type);
+    void convertAllocaToGlobal(AllocaInst *alloca, Function *F, Module *M);
+    std::string generateUniqueGlobalName(AllocaInst *alloca, Function *F);
+};
+
+} // namespace sysy
--- a/src/include/midend/Pass/Pass.h
+++ b/src/include/midend/Pass/Pass.h
@ -279,7 +279,7 @@ private:
  IRBuilder *pBuilder;

 public:
-  PassManager() = default;
+  PassManager() = delete;
  ~PassManager() = default;

  PassManager(Module *module, IRBuilder *builder) : pmodule(module) ,pBuilder(builder), analysisManager(module) {}
--- a/src/midend/CMakeLists.txt
+++ b/src/midend/CMakeLists.txt
@ -11,6 +11,8 @@ add_library(midend_lib STATIC
    Pass/Optimize/Reg2Mem.cpp
    Pass/Optimize/SysYIRCFGOpt.cpp
    Pass/Optimize/SCCP.cpp
+    Pass/Optimize/BuildCFG.cpp
+    Pass/Optimize/LargeArrayToGlobal.cpp
 )

 # 包含中端模块所需的头文件路径
--- a/src/midend/Pass/Analysis/Dom.cpp
+++ b/src/midend/Pass/Analysis/Dom.cpp
@ -1,21 +1,30 @@
 #include "Dom.h"
-#include <algorithm> // for std::set_intersection, std::set_difference, std::set_union
+#include <algorithm> // for std::set_intersection, std::reverse
 #include <iostream>  // for debug output
 #include <limits>    // for std::numeric_limits
 #include <queue>
+#include <functional> // for std::function
+#include <map>
+#include <vector>
+#include <set>

 namespace sysy {

-// 初始化 支配树静态 ID
+// ==============================================================
+// DominatorTreeAnalysisPass 的静态ID
+// ==============================================================
 void *DominatorTreeAnalysisPass::ID = (void *)&DominatorTreeAnalysisPass::ID;
+
 // ==============================================================
 // DominatorTree 结果类的实现
 // ==============================================================

+// 构造函数：初始化关联函数，但不进行计算
 DominatorTree::DominatorTree(Function *F) : AssociatedFunction(F) {
-  // 构造时可以不计算，在分析遍运行里计算并填充
+  // 构造时不需要计算，在分析遍运行里计算并填充
 }

+// Getter 方法 (保持不变)
 const std::set<BasicBlock *> *DominatorTree::getDominators(BasicBlock *BB) const {
  auto it = Dominators.find(BB);
  if (it != Dominators.end()) {
@ -48,7 +57,7 @@ const std::set<BasicBlock *> *DominatorTree::getDominatorTreeChildren(BasicBlock
  return nullptr;
 }

-// 辅助函数：打印 BasicBlock 集合
+// 辅助函数：打印 BasicBlock 集合 (保持不变)
 void printBBSet(const std::string &prefix, const std::set<BasicBlock *> &s) {
  if (!DEBUG)
    return;
@ -63,24 +72,52 @@ void printBBSet(const std::string &prefix, const std::set<BasicBlock *> &s) {
  std::cout << "}" << std::endl;
 }

+// 辅助函数：计算逆后序遍历 (RPO) - 保持不变
+std::vector<BasicBlock*> DominatorTree::computeReversePostOrder(Function* F) {
+    std::vector<BasicBlock*> postOrder;
+    std::set<BasicBlock*> visited;
+    
+    std::function<void(BasicBlock*)> dfs_rpo =
+        [&](BasicBlock* bb) {
+        visited.insert(bb);
+        for (BasicBlock* succ : bb->getSuccessors()) {
+            if (visited.find(succ) == visited.end()) {
+                dfs_rpo(succ);
+            }
+        }
+        postOrder.push_back(bb);
+    };
+
+    dfs_rpo(F->getEntryBlock());
+    std::reverse(postOrder.begin(), postOrder.end());
+    
+    if (DEBUG) {
+        std::cout << "--- Computed RPO: ";
+        for (BasicBlock* bb : postOrder) {
+            std::cout << bb->getName() << " ";
+        }
+        std::cout << "---" << std::endl;
+    }
+    return postOrder;
+}
+
+// computeDominators 方法 (保持不变，因为它它是独立于IDom算法的)
 void DominatorTree::computeDominators(Function *F) {
  if (DEBUG)
    std::cout << "--- Computing Dominators ---" << std::endl;

  BasicBlock *entryBlock = F->getEntryBlock();
-  std::vector<BasicBlock *> bbs_in_order; // 用于确定遍历顺序，如果需要的话
+  std::vector<BasicBlock*> bbs_rpo = computeReversePostOrder(F);

-  // 初始化：入口块只被自己支配，其他块被所有块支配
-  for (const auto &bb_ptr : F->getBasicBlocks()) {
-    BasicBlock *bb = bb_ptr.get();
-    bbs_in_order.push_back(bb); // 收集所有块
+  for (BasicBlock *bb : bbs_rpo) {
    if (bb == entryBlock) {
+      Dominators[bb].clear();
      Dominators[bb].insert(bb);
-      if (DEBUG)
-        std::cout << "Init Dominators[" << bb->getName() << "]: {" << bb->getName() << "}" << std::endl;
+      if (DEBUG) std::cout << "Init Dominators[" << bb->getName() << "]: {" << bb->getName() << "}" << std::endl;
    } else {
-      for (const auto &all_bb_ptr : F->getBasicBlocks()) {
-        Dominators[bb].insert(all_bb_ptr.get());
+      Dominators[bb].clear();
+      for (BasicBlock *all_bb : bbs_rpo) {
+        Dominators[bb].insert(all_bb);
      }
      if (DEBUG) {
        std::cout << "Init Dominators[" << bb->getName() << "]: ";
@ -94,35 +131,29 @@ void DominatorTree::computeDominators(Function *F) {
  while (changed) {
    changed = false;
    iteration++;
-    if (DEBUG)
-      std::cout << "Iteration " << iteration << std::endl;
+    if (DEBUG) std::cout << "Iteration " << iteration << std::endl;

-    // 确保遍历顺序一致性，例如可以按照DFS或BFS顺序，或者简单的迭代器顺序
-    // 如果Function::getBasicBlocks()返回的迭代器顺序稳定，则无需bbs_in_order
-    for (const auto &bb_ptr : F->getBasicBlocks()) { // 假设这个迭代器顺序稳定
-      BasicBlock *bb = bb_ptr.get();
-      if (bb == entryBlock)
-        continue;
+    for (BasicBlock *bb : bbs_rpo) {
+      if (bb == entryBlock) continue;

-      // 计算所有前驱的支配者集合的交集
      std::set<BasicBlock *> newDom;
      bool firstPredProcessed = false;

      for (BasicBlock *pred : bb->getPredecessors()) {
-        // 确保前驱的支配者集合已经计算过
-        if (Dominators.count(pred)) {
-          if (!firstPredProcessed) {
-            newDom = Dominators[pred];
-            firstPredProcessed = true;
-          } else {
-            std::set<BasicBlock *> intersection;
-            std::set_intersection(newDom.begin(), newDom.end(), Dominators[pred].begin(), Dominators[pred].end(),
-                                  std::inserter(intersection, intersection.begin()));
-            newDom = intersection;
-          }
+        if(DEBUG){
+          std::cout << "  Processing predecessor: " << pred->getName() << std::endl;
        }
+          if (!firstPredProcessed) {
+              newDom = Dominators[pred];
+              firstPredProcessed = true;
+          } else {
+              std::set<BasicBlock *> intersection;
+              std::set_intersection(newDom.begin(), newDom.end(), Dominators[pred].begin(), Dominators[pred].end(),
+                                     std::inserter(intersection, intersection.begin()));
+              newDom = intersection;
+          }
      }
-      newDom.insert(bb); // BB 永远支配自己
+      newDom.insert(bb);

      if (newDom != Dominators[bb]) {
        if (DEBUG) {
@ -140,78 +171,242 @@ void DominatorTree::computeDominators(Function *F) {
    std::cout << "--- Dominators Computation Finished ---" << std::endl;
 }

-void DominatorTree::computeIDoms(Function *F) {
-  if (DEBUG)
-    std::cout << "--- Computing Immediate Dominators (IDoms) ---" << std::endl;
+// ==============================================================
+// Lengauer-Tarjan 算法辅助数据结构和函数 (私有成员)
+// ==============================================================

-  BasicBlock *entryBlock = F->getEntryBlock();
-  IDoms[entryBlock] = nullptr; // 入口块没有即时支配者
-
-  // 遍历所有非入口块
-  for (const auto &bb_ptr : F->getBasicBlocks()) {
-    BasicBlock *bb = bb_ptr.get();
-    if (bb == entryBlock)
-      continue;
-
-    BasicBlock *currentIDom = nullptr;
-    const std::set<BasicBlock *> *domsOfBB = getDominators(bb);
-    if (!domsOfBB) {
-      if (DEBUG)
-        std::cerr << "Warning: Dominators for " << bb->getName() << " not found!" << std::endl;
-      continue;
+// DFS 遍历，填充 dfnum_map, vertex_vec, parent_map
+// 对应用户代码的 dfs 函数
+void DominatorTree::dfs_lt_helper(BasicBlock* u) {
+    dfnum_map[u] = df_counter;
+    if (df_counter >= vertex_vec.size()) { // 动态调整大小
+        vertex_vec.resize(df_counter + 1);
    }
+    vertex_vec[df_counter] = u;
+    if (DEBUG) std::cout << "  DFS: Visiting " << u->getName() << ", dfnum = " << df_counter << std::endl;
+    df_counter++;

-    // 遍历bb的所有严格支配者 D (即 bb 的支配者中除了 bb 自身)
-    for (BasicBlock *D_candidate : *domsOfBB) {
-      if (D_candidate == bb)
-        continue; // 跳过bb自身
-
-      bool D_candidate_is_IDom = true;
-      // 检查是否存在另一个块 X，使得 D_candidate 严格支配 X 且 X 严格支配 bb
-      // 或者更直接的，检查 D_candidate 是否被 bb 的所有其他严格支配者所支配
-      for (BasicBlock *X_other_dom : *domsOfBB) {
-        if (X_other_dom == bb || X_other_dom == D_candidate)
-          continue; // 跳过bb自身和D_candidate
-
-        // 如果 X_other_dom 严格支配 bb (它在 domsOfBB 中且不是bb自身)
-        // 并且 X_other_dom 不被 D_candidate 支配，那么 D_candidate 就不是 IDom
-        const std::set<BasicBlock *> *domsOfX_other_dom = getDominators(X_other_dom);
-        if (domsOfX_other_dom && domsOfX_other_dom->count(D_candidate)) { // X_other_dom 支配 D_candidate
-          // D_candidate 被另一个支配者 X_other_dom 支配
-          // 这说明 D_candidate 位于 X_other_dom 的“下方”，X_other_dom 更接近 bb
-          // 因此 D_candidate 不是 IDom
-          D_candidate_is_IDom = false;
-          break;
+    for (BasicBlock* v : u->getSuccessors()) {
+        if (dfnum_map.find(v) == dfnum_map.end()) { // 如果 v 未访问过
+            parent_map[v] = u;
+            if (DEBUG) std::cout << "    DFS: Setting parent[" << v->getName() << "] = " << u->getName() << std::endl;
+            dfs_lt_helper(v);
        }
-      }
-      if (D_candidate_is_IDom) {
-        currentIDom = D_candidate;
-        break; // 找到即时支配者，可以退出循环，因为它是唯一的
-      }
    }
-    IDoms[bb] = currentIDom;
-    if (DEBUG) {
-      std::cout << "  IDom[" << bb->getName() << "] = " << (currentIDom ? currentIDom->getName() : "nullptr")
-                << std::endl;
-    }
-  }
-  if (DEBUG)
-    std::cout << "--- Immediate Dominators Computation Finished ---" << std::endl;
 }

-/*
-for each node n in a postorder traversal of the dominator tree:
-  df[n] = empty set
-  // compute DF_local(n)
-  for each child y of n in the CFG:
-    if idom[y] != n:
-      df[n] = df[n] U {y}
-  // compute DF_up(n)
-  for each child c of n in the dominator tree:
-    for each element w in df[c]:
-      if idom[w] != n:
-        df[n] = df[n] U {w}
-*/
+// 并查集：找到集合的代表，并进行路径压缩
+// 同时更新 label，确保 label[i] 总是指向其祖先链中 sdom_map 最小的节点
+// 对应用户代码的 find 函数，也包含了 eval 的逻辑
+BasicBlock* DominatorTree::evalAndCompress_lt_helper(BasicBlock* i) {
+    if (DEBUG) std::cout << "    Eval: Processing " << i->getName() << std::endl;
+    // 如果 i 是根 (ancestor_map[i] == nullptr)
+    if (ancestor_map.find(i) == ancestor_map.end() || ancestor_map[i] == nullptr) {
+        if (DEBUG) std::cout << "      Eval: " << i->getName() << " is root, returning itself." << std::endl;
+        return i; // 根节点自身就是路径上sdom最小的，因为它没有祖先
+    }
+    
+    // 如果 i 的祖先不是根，则递归查找并进行路径压缩
+    BasicBlock* root_ancestor = evalAndCompress_lt_helper(ancestor_map[i]);
+    
+    // 路径压缩时，根据 sdom_map 比较并更新 label_map
+    // 确保 label_map[i] 存储的是 i 到 root_ancestor 路径上 sdom_map 最小的节点
+    // 注意：这里的 ancestor_map[i] 已经被递归调用压缩过一次了，所以是root_ancestor的旧路径
+    // 应该比较的是 label_map[ancestor_map[i]] 和 label_map[i]
+    if (sdom_map.count(label_map[ancestor_map[i]]) && // 确保 label_map[ancestor_map[i]] 存在 sdom
+        sdom_map.count(label_map[i]) &&                // 确保 label_map[i] 存在 sdom
+        dfnum_map[sdom_map[label_map[ancestor_map[i]]]] < dfnum_map[sdom_map[label_map[i]]]) {
+        if (DEBUG) std::cout << "      Eval: Updating label for " << i->getName() << " from " 
+                              << label_map[i]->getName() << " to " << label_map[ancestor_map[i]]->getName() << std::endl;
+        label_map[i] = label_map[ancestor_map[i]];
+    }
+    
+    ancestor_map[i] = root_ancestor; // 执行路径压缩：将 i 直接指向其所属集合的根
+    if (DEBUG) std::cout << "      Eval: Path compression for " << i->getName() << ", new ancestor = " 
+                          << (root_ancestor ? root_ancestor->getName() : "nullptr") << std::endl;
+    
+    return label_map[i]; // <-- **将这里改为返回 label_map[i]**
+}
+
+// Link 函数：将 v 加入 u 的 DFS 树子树中 (实际上是并查集操作)
+// 对应用户代码的 fa[u] = fth[u];
+void DominatorTree::link_lt_helper(BasicBlock* u_parent, BasicBlock* v_child) {
+    ancestor_map[v_child] = u_parent; // 设置并查集父节点
+    label_map[v_child] = v_child;     // 初始化 label 为自身
+    if (DEBUG) std::cout << "  Link: " << v_child->getName() << " linked to " << u_parent->getName() << std::endl;
+}
+
+// ==============================================================
+// Lengauer-Tarjan 算法实现 computeIDoms
+// ==============================================================
+void DominatorTree::computeIDoms(Function *F) {
+    if (DEBUG) std::cout << "--- Computing Immediate Dominators (IDoms) using Lengauer-Tarjan ---" << std::endl;
+
+    BasicBlock *entryBlock = F->getEntryBlock();
+
+    // 1. 初始化所有 LT 相关的数据结构
+    dfnum_map.clear();
+    vertex_vec.clear();
+    parent_map.clear();
+    sdom_map.clear();
+    idom_map.clear();
+    bucket_map.clear();
+    ancestor_map.clear();
+    label_map.clear();
+    df_counter = 0; // DFS 计数器从 0 开始
+
+    // 预分配 vertex_vec 的大小，避免频繁resize
+    vertex_vec.resize(F->getBasicBlocks().size() + 1); 
+    // 在 DFS 遍历之前，先为所有基本块初始化 sdom 和 label
+    // 这是 Lengauer-Tarjan 算法的要求，确保所有节点在 Phase 2 开始前都在 map 中
+    for (auto &bb_ptr : F->getBasicBlocks()) {
+        BasicBlock* bb = bb_ptr.get();
+        sdom_map[bb] = bb; // sdom(bb) 初始化为 bb 自身
+        label_map[bb] = bb; // label(bb) 初始化为 bb 自身 (用于 Union-Find 的路径压缩)
+    }
+    // 确保入口块也被正确初始化（如果它不在 F->getBasicBlocks() 的正常迭代中）
+    sdom_map[entryBlock] = entryBlock;
+    label_map[entryBlock] = entryBlock;
+    // Phase 1: DFS 遍历并预处理
+    // 对应用户代码的 dfs(st)
+    dfs_lt_helper(entryBlock);
+    idom_map[entryBlock] = nullptr; // 入口块没有即时支配者
+    if (DEBUG) std::cout << "  IDom[" << entryBlock->getName() << "] = nullptr" << std::endl;
+
+    if (DEBUG) std::cout << "  Sdom[" << entryBlock->getName() << "] = " << entryBlock->getName() << std::endl;
+    
+    // 初始化并查集的祖先和 label
+    for (auto const& [bb_key, dfn_val] : dfnum_map) {
+        ancestor_map[bb_key] = nullptr; // 初始为独立集合的根
+        label_map[bb_key] = bb_key;   // 初始 label 为自身
+    }
+
+    if (DEBUG) {
+        std::cout << "  --- DFS Phase Complete ---" << std::endl;
+        std::cout << "  dfnum_map:" << std::endl;
+        for (auto const& [bb, dfn] : dfnum_map) {
+            std::cout << "    " << bb->getName() << " -> " << dfn << std::endl;
+        }
+        std::cout << "  vertex_vec (by dfnum):" << std::endl;
+        for (size_t k = 0; k < df_counter; ++k) {
+            if (vertex_vec[k]) std::cout << "    [" << k << "] -> " << vertex_vec[k]->getName() << std::endl;
+        }
+        std::cout << "  parent_map:" << std::endl;
+        for (auto const& [child, parent] : parent_map) {
+            std::cout << "    " << child->getName() << " -> " << (parent ? parent->getName() : "nullptr") << std::endl;
+        }
+        std::cout << "  ------------------------" << std::endl;
+    }
+
+
+    // Phase 2: 计算半支配者 (sdom)
+    // 对应用户代码的 for (int i = dfc; i >= 2; --i) 循环的上半部分
+    // 按照 DFS 编号递减的顺序遍历所有节点 (除了 entryBlock，它的 DFS 编号是 0)
+    if (DEBUG) std::cout << "--- Phase 2: Computing Semi-Dominators (sdom) ---" << std::endl;
+    for (int i = df_counter - 1; i >= 1; --i) { // 从 DFS 编号最大的节点开始，到 1
+        BasicBlock* w = vertex_vec[i]; // 当前处理的节点
+        if (DEBUG) std::cout << "  Processing node w: " << w->getName() << " (dfnum=" << i << ")" << std::endl;
+
+
+        // 对于 w 的每个前驱 v
+        for (BasicBlock* v : w->getPredecessors()) {
+            if (DEBUG) std::cout << "    Considering predecessor v: " << v->getName() << std::endl;
+            // 如果前驱 v 未被 DFS 访问过 (即不在 dfnum_map 中)，则跳过
+            if (dfnum_map.find(v) == dfnum_map.end()) {
+                if (DEBUG) std::cout << "      Predecessor " << v->getName() << " not in DFS tree, skipping." << std::endl;
+                continue; 
+            }
+
+            // 调用 evalAndCompress 来找到 v 在其 DFS 树祖先链上具有最小 sdom 的节点
+            BasicBlock* u_with_min_sdom_on_path = evalAndCompress_lt_helper(v);
+            if (DEBUG) std::cout << "      Eval(" << v->getName() << ") returned " 
+                                  << u_with_min_sdom_on_path->getName() << std::endl;
+            if (DEBUG && sdom_map.count(u_with_min_sdom_on_path) && sdom_map.count(w)) {
+                std::cout << "      Comparing sdom: dfnum[" << sdom_map[u_with_min_sdom_on_path]->getName() << "] (" << dfnum_map[sdom_map[u_with_min_sdom_on_path]] 
+                          << ") vs dfnum[" << sdom_map[w]->getName() << "] (" << dfnum_map[sdom_map[w]] << ")" << std::endl;
+            }
+            // 比较 sdom(u) 和 sdom(w)
+            if (sdom_map.count(u_with_min_sdom_on_path) && sdom_map.count(w) &&
+                dfnum_map[sdom_map[u_with_min_sdom_on_path]] < dfnum_map[sdom_map[w]]) {
+                if (DEBUG) std::cout << "      Updating sdom[" << w->getName() << "] from " 
+                                      << sdom_map[w]->getName() << " to " 
+                                      << sdom_map[u_with_min_sdom_on_path]->getName() << std::endl;
+                sdom_map[w] = sdom_map[u_with_min_sdom_on_path]; // 更新 sdom(w)
+                if (DEBUG) std::cout << "      Sdom update applied. New sdom[" << w->getName() << "] = " << sdom_map[w]->getName() << std::endl;
+            }
+        }
+        
+        // 将 w 加入 sdom(w) 对应的桶中
+        bucket_map[sdom_map[w]].push_back(w);
+        if (DEBUG) std::cout << "    Adding " << w->getName() << " to bucket of sdom(" << w->getName() << "): " 
+                              << sdom_map[w]->getName() << std::endl;
+
+        // 将 w 的父节点加入并查集 (link 操作)
+        if (parent_map.count(w) && parent_map[w] != nullptr) {
+            link_lt_helper(parent_map[w], w);
+        }
+        
+        // Phase 3-part 1: 处理 parent[w] 的桶中所有节点，确定部分 idom
+        if (parent_map.count(w) && parent_map[w] != nullptr) {
+            BasicBlock* p = parent_map[w]; // p 是 w 的父节点
+            if (DEBUG) std::cout << "    Processing bucket for parent " << p->getName() << std::endl;
+
+            // 注意：这里需要复制桶的内容，因为原始桶在循环中会被clear
+            std::vector<BasicBlock*> nodes_in_p_bucket_copy = bucket_map[p];
+            for (BasicBlock* y : nodes_in_p_bucket_copy) {
+                if (DEBUG) std::cout << "      Processing node y from bucket: " << y->getName() << std::endl;
+                // 找到 y 在其 DFS 树祖先链上具有最小 sdom 的节点
+                BasicBlock* u = evalAndCompress_lt_helper(y);
+                if (DEBUG) std::cout << "        Eval(" << y->getName() << ") returned " << u->getName() << std::endl;
+                
+                // 确定 idom(y)
+                // if sdom(eval(y)) == sdom(parent(w)), then idom(y) = parent(w)
+                // else idom(y) = eval(y)
+                if (sdom_map.count(u) && sdom_map.count(p) &&
+                    dfnum_map[sdom_map[u]] < dfnum_map[sdom_map[p]]) {
+                    idom_map[y] = u; // 确定的 idom
+                    if (DEBUG) std::cout << "        IDom[" << y->getName() << "] set to " << u->getName() << std::endl;
+                } else {
+                    idom_map[y] = p; // p 是 y 的 idom
+                    if (DEBUG) std::cout << "        IDom[" << y->getName() << "] set to " << p->getName() << std::endl;
+                }
+            }
+            bucket_map[p].clear(); // 清空桶，防止重复处理
+            if (DEBUG) std::cout << "    Cleared bucket for parent " << p->getName() << std::endl;
+        }
+    }
+
+    // Phase 3-part 2: 最终确定 idom (处理那些 idom != sdom 的节点)
+    if (DEBUG) std::cout << "--- Phase 3: Finalizing Immediate Dominators (idom) ---" << std::endl;
+    for (int i = 1; i < df_counter; ++i) { // 从 DFS 编号最小的节点 (除了 entryBlock) 开始
+        BasicBlock* w = vertex_vec[i];
+        if (DEBUG) std::cout << "  Finalizing node w: " << w->getName() << std::endl;
+        if (idom_map.count(w) && sdom_map.count(w) && idom_map[w] != sdom_map[w]) {
+            // idom[w] 的 idom 是其真正的 idom
+            if (DEBUG) std::cout << "    idom[" << w->getName() << "] (" << idom_map[w]->getName() 
+                                  << ") != sdom[" << w->getName() << "] (" << sdom_map[w]->getName() << ")" << std::endl;
+            if (idom_map.count(idom_map[w])) {
+                idom_map[w] = idom_map[idom_map[w]];
+                if (DEBUG) std::cout << "    Updating idom[" << w->getName() << "] to idom(idom(w)): " 
+                                      << idom_map[w]->getName() << std::endl;
+            } else {
+                 if (DEBUG) std::cout << "    Warning: idom(idom(" << w->getName() << ")) not found, leaving idom[" << w->getName() << "] as is." << std::endl;
+            }
+        }
+        if (DEBUG) {
+            std::cout << "  Final IDom[" << w->getName() << "] = " << (idom_map[w] ? idom_map[w]->getName() : "nullptr") << std::endl;
+        }
+    }
+
+    // 将计算结果从 idom_map 存储到 DominatorTree 的成员变量 IDoms 中
+    IDoms = idom_map; 
+
+    if (DEBUG) std::cout << "--- Immediate Dominators Computation Finished ---" << std::endl;
+}
+
+// ==============================================================
+// computeDominanceFrontiers 和 computeDominatorTreeChildren (保持不变)
+// ==============================================================

 void DominatorTree::computeDominanceFrontiers(Function *F) {
  if (DEBUG)
@ -221,21 +416,17 @@ void DominatorTree::computeDominanceFrontiers(Function *F) {
    BasicBlock *X = bb_ptr_X.get();
    DominanceFrontiers[X].clear();

-    // 遍历所有可能的 Z (X支配Z，或者Z就是X)
    for (const auto &bb_ptr_Z : F->getBasicBlocks()) {
      BasicBlock *Z = bb_ptr_Z.get();
      const std::set<BasicBlock *> *domsOfZ = getDominators(Z);

-      // 如果 X 不支配 Z，则 Z 与 DF(X) 无关
-      if (!domsOfZ || domsOfZ->find(X) == domsOfZ->end()) {
+      if (!domsOfZ || domsOfZ->find(X) == domsOfZ->end()) { // Z 不被 X 支配
        continue;
      }

-      // 遍历 Z 的所有后继 Y
      for (BasicBlock *Y : Z->getSuccessors()) {
-        // 如果 Y 不被 X 严格支配，则 Y 在 DF(X) 中
-        // Y 不被 X 严格支配意味着 (Y不被X支配) 或 (Y就是X)
        const std::set<BasicBlock *> *domsOfY = getDominators(Y);
+        // 如果 Y == X，或者 Y 不被 X 严格支配 (即 Y 不被 X 支配)
        if (Y == X || (domsOfY && domsOfY->find(X) == domsOfY->end())) {
          DominanceFrontiers[X].insert(Y);
        }
@ -274,23 +465,21 @@ void DominatorTree::computeDominatorTreeChildren(Function *F) {
 }

 // ==============================================================
-// DominatorTreeAnalysisPass 的实现
+// DominatorTreeAnalysisPass 的实现 (保持不变)
 // ==============================================================

 bool DominatorTreeAnalysisPass::runOnFunction(Function *F, AnalysisManager &AM) {
  // 每次运行时清空旧数据，确保重新计算
  CurrentDominatorTree = std::make_unique<DominatorTree>(F);
-  // 不需要手动清空map，unique_ptr会创建新的DominatorTree对象，其map是空的

  CurrentDominatorTree->computeDominators(F);
-  CurrentDominatorTree->computeIDoms(F); // 修正后的IDoms算法
+  CurrentDominatorTree->computeIDoms(F); // 修正后的LT算法
  CurrentDominatorTree->computeDominanceFrontiers(F);
  CurrentDominatorTree->computeDominatorTreeChildren(F);
-  return false; // 分析遍通常返回 false，表示不修改 IR
+  return false;
 }

 std::unique_ptr<AnalysisResultBase> DominatorTreeAnalysisPass::getResult() {
-  // 返回计算好的 DominatorTree 实例，所有权转移给 AnalysisManager
  return std::move(CurrentDominatorTree);
 }

--- a/src/midend/Pass/Optimize/BuildCFG.cpp
+++ b/src/midend/Pass/Optimize/BuildCFG.cpp
@ -0,0 +1,79 @@
+#include "BuildCFG.h"
+#include "Dom.h"
+#include "Liveness.h"
+#include <iostream>
+#include <queue>
+#include <set>
+
+namespace sysy {
+
+void *BuildCFG::ID = (void *)&BuildCFG::ID; // 定义唯一的 Pass ID
+
+// 声明Pass的分析使用
+void BuildCFG::getAnalysisUsage(std::set<void *> &analysisDependencies, std::set<void *> &analysisInvalidations) const {
+  // BuildCFG不依赖其他分析
+  // analysisDependencies.insert(&DominatorTreeAnalysisPass::ID); // 错误的例子
+
+  // BuildCFG会使所有依赖于CFG的分析结果失效，所以它必须声明这些失效
+  analysisInvalidations.insert(&DominatorTreeAnalysisPass::ID);
+  analysisInvalidations.insert(&LivenessAnalysisPass::ID);
+}
+
+bool BuildCFG::runOnFunction(Function *F, AnalysisManager &AM) {
+  if (DEBUG) {
+    std::cout << "Running BuildCFG pass on function: " << F->getName() << std::endl;
+  }
+
+  bool changed = false;
+
+  // 1. 清空所有基本块的前驱和后继列表
+  for (auto &bb : F->getBasicBlocks()) {
+    bb->clearPredecessors();
+    bb->clearSuccessors();
+  }
+
+  // 2. 遍历每个基本块，重建CFG
+  for (auto &bb : F->getBasicBlocks()) {
+    // 获取基本块的最后一条指令
+    auto &inst = *bb->terminator();
+    Instruction *termInst = inst.get();
+    // 确保基本块有终结指令
+    if (!termInst) {
+      continue;
+    }
+
+    // 根据终结指令类型，建立前驱后继关系
+    if (termInst->isBranch()) {
+      // 无条件跳转
+      if (termInst->isUnconditional()) {
+        auto brInst = dynamic_cast<UncondBrInst *>(termInst);
+        BasicBlock *succ = dynamic_cast<BasicBlock *>(brInst->getBlock());
+        assert(succ && "Branch instruction's target must be a BasicBlock");
+        bb->addSuccessor(succ);
+        succ->addPredecessor(bb.get());
+        changed = true;
+
+        // 条件跳转
+      } else if (termInst->isConditional()) {
+        auto brInst = dynamic_cast<CondBrInst *>(termInst);
+        BasicBlock *trueSucc = dynamic_cast<BasicBlock *>(brInst->getThenBlock());
+        BasicBlock *falseSucc = dynamic_cast<BasicBlock *>(brInst->getElseBlock());
+
+        assert(trueSucc && falseSucc && "Branch instruction's targets must be BasicBlocks");
+
+        bb->addSuccessor(trueSucc);
+        trueSucc->addPredecessor(bb.get());
+        bb->addSuccessor(falseSucc);
+        falseSucc->addPredecessor(bb.get());
+        changed = true;
+      }
+    } else if (auto retInst = dynamic_cast<ReturnInst *>(termInst)) {
+      // RetInst没有后继，无需处理
+      // ...
+    }
+  }
+
+  return changed;
+}
+
+} // namespace sysy
--- a/src/midend/Pass/Optimize/LargeArrayToGlobal.cpp
+++ b/src/midend/Pass/Optimize/LargeArrayToGlobal.cpp
@ -0,0 +1,143 @@
+#include "../../include/midend/Pass/Optimize/LargeArrayToGlobal.h"
+#include "../../IR.h"
+#include <unordered_map>
+#include <sstream>
+#include <string>
+
+namespace sysy {
+
+// Helper function to convert type to string
+static std::string typeToString(Type *type) {
+    if (!type) return "null";
+    
+    switch (type->getKind()) {
+        case Type::kInt:
+            return "int";
+        case Type::kFloat:
+            return "float";
+        case Type::kPointer:
+            return "ptr";
+        case Type::kArray: {
+            auto *arrayType = type->as<ArrayType>();
+            return "[" + std::to_string(arrayType->getNumElements()) + " x " + 
+                   typeToString(arrayType->getElementType()) + "]";
+        }
+        default:
+            return "unknown";
+    }
+}
+
+void *LargeArrayToGlobalPass::ID = &LargeArrayToGlobalPass::ID;
+
+bool LargeArrayToGlobalPass::runOnModule(Module *M, AnalysisManager &AM) {
+        bool changed = false;
+        
+        if (!M) {
+            return false;
+        }
+
+        // Collect all alloca instructions from all functions
+        std::vector<std::pair<AllocaInst*, Function*>> allocasToConvert;
+        
+        for (auto &funcPair : M->getFunctions()) {
+            Function *F = funcPair.second.get();
+            if (!F || F->getBasicBlocks().begin() == F->getBasicBlocks().end()) {
+                continue;
+            }
+            
+            for (auto &BB : F->getBasicBlocks()) {
+                for (auto &inst : BB->getInstructions()) {
+                    if (auto *alloca = dynamic_cast<AllocaInst*>(inst.get())) {
+                        Type *allocatedType = alloca->getAllocatedType();
+                        
+                        // Calculate the size of the allocated type
+                        unsigned size = calculateTypeSize(allocatedType);
+                        
+                        // Debug: print size information
+                        std::cout << "LargeArrayToGlobalPass: Found alloca with size " << size 
+                                  << " for type " << typeToString(allocatedType) << std::endl;
+                        
+                        // Convert arrays of 1KB (1024 bytes) or larger to global variables
+                        if (size >= 1024) {
+                            std::cout << "LargeArrayToGlobalPass: Converting array of size " << size << " to global" << std::endl;
+                            allocasToConvert.emplace_back(alloca, F);
+                        }
+                    }
+                }
+            }
+        }
+
+        // Convert the collected alloca instructions to global variables
+        for (auto [alloca, F] : allocasToConvert) {
+            convertAllocaToGlobal(alloca, F, M);
+            changed = true;
+        }
+
+return changed;
+    }
+
+unsigned LargeArrayToGlobalPass::calculateTypeSize(Type *type) {
+    if (!type) return 0;
+
+    switch (type->getKind()) {
+        case Type::kInt:
+        case Type::kFloat:
+            return 4;
+        case Type::kPointer:
+            return 8;
+        case Type::kArray: {
+            auto *arrayType = type->as<ArrayType>();
+            return arrayType->getNumElements() * calculateTypeSize(arrayType->getElementType());
+        }
+        default:
+            return 0;
+    }
+}
+
+void LargeArrayToGlobalPass::convertAllocaToGlobal(AllocaInst *alloca, Function *F, Module *M) {
+    Type *allocatedType = alloca->getAllocatedType();
+    
+    // Create a unique name for the global variable
+    std::string globalName = generateUniqueGlobalName(alloca, F);
+    
+    // Create the global variable - GlobalValue expects pointer type
+    Type *pointerType = Type::getPointerType(allocatedType);
+    GlobalValue *globalVar = M->createGlobalValue(globalName, pointerType);
+    
+    if (!globalVar) {
+        return;
+    }
+    
+    // Replace all uses of the alloca with the global variable
+    alloca->replaceAllUsesWith(globalVar);
+    
+    // Remove the alloca instruction from its basic block
+    for (auto &BB : F->getBasicBlocks()) {
+        auto &instructions = BB->getInstructions();
+        for (auto it = instructions.begin(); it != instructions.end(); ++it) {
+            if (it->get() == alloca) {
+                instructions.erase(it);
+                break;
+            }
+        }
+    }
+}
+
+std::string LargeArrayToGlobalPass::generateUniqueGlobalName(AllocaInst *alloca, Function *F) {
+    std::string baseName = alloca->getName();
+    if (baseName.empty()) {
+        baseName = "array";
+    }
+    
+    // Ensure uniqueness by appending function name and counter
+    static std::unordered_map<std::string, int> nameCounter;
+    std::string key = F->getName() + "." + baseName;
+    
+    int counter = nameCounter[key]++;
+    std::ostringstream oss;
+    oss << key << "." << counter;
+    
+    return oss.str();
+}
+
+} // namespace sysy
--- a/src/midend/Pass/Pass.cpp
+++ b/src/midend/Pass/Pass.cpp
@ -6,6 +6,8 @@
 #include "Mem2Reg.h"
 #include "Reg2Mem.h"
 #include "SCCP.h"
+#include "BuildCFG.h"
+#include "LargeArrayToGlobal.h"
 #include "Pass.h"
 #include <iostream>
 #include <queue>
@ -35,10 +37,13 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
        3. 添加优化passid
    */
    // 注册分析遍
-    registerAnalysisPass<sysy::DominatorTreeAnalysisPass>();
-    registerAnalysisPass<sysy::LivenessAnalysisPass>();
+    registerAnalysisPass<DominatorTreeAnalysisPass>();
+    registerAnalysisPass<LivenessAnalysisPass>();

    // 注册优化遍
+    registerOptimizationPass<BuildCFG>();
+    registerOptimizationPass<LargeArrayToGlobalPass>();
+    
    registerOptimizationPass<SysYDelInstAfterBrPass>();
    registerOptimizationPass<SysYDelNoPreBLockPass>();
    registerOptimizationPass<SysYBlockMergePass>();
@ -58,6 +63,16 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
      if (DEBUG) std::cout << "Applying -O1 optimizations.\n";
      if (DEBUG) std::cout << "--- Running custom optimization sequence ---\n";

+      if(DEBUG) {
+        std::cout << "=== IR Before CFGOpt Optimizations ===\n";
+        printPasses();
+      }
+
+      this->clearPasses();
+      this->addPass(&BuildCFG::ID);
+      this->addPass(&LargeArrayToGlobalPass::ID);
+      this->run();
+
      this->clearPasses(); 
      this->addPass(&SysYDelInstAfterBrPass::ID);
      this->addPass(&SysYDelNoPreBLockPass::ID);
--- a/src/midend/SysYIRGenerator.cpp
+++ b/src/midend/SysYIRGenerator.cpp
@ -15,6 +15,29 @@
 using namespace std;
 namespace sysy {

+std::pair<long long, int> calculate_signed_magic(int d) {
+    if (d == 0) throw std::runtime_error("Division by zero");
+    if (d == 1 || d == -1) return {0, 0}; // Not used by strength reduction
+
+    int k = 0;
+    unsigned int ad = (d > 0) ? d : -d;
+    unsigned int temp = ad;
+    while (temp > 0) {
+        temp >>= 1;
+        k++;
+    }
+    if ((ad & (ad - 1)) == 0) { // if power of 2
+        k--;
+    }
+
+    unsigned __int128 m_val = 1;
+    m_val <<= (32 + k - 1);
+    unsigned __int128 m_prime = m_val / ad;
+    long long m = m_prime + 1;
+
+    return {m, k};
+}
+

 // std::vector<Value*> BinaryValueStack;  ///< 用于存储value的栈
 // std::vector<int> BinaryOpStack;  ///< 用于存储二元表达式的操作符栈 
@ -249,7 +272,26 @@ void SysYIRGenerator::compute() {
            case BinaryOp::ADD: resultValue = builder.createAddInst(lhs, rhs); break;
            case BinaryOp::SUB: resultValue = builder.createSubInst(lhs, rhs); break;
            case BinaryOp::MUL: resultValue = builder.createMulInst(lhs, rhs); break;
-            case BinaryOp::DIV: resultValue = builder.createDivInst(lhs, rhs); break;
+            case BinaryOp::DIV: {
+              ConstantInteger *rhsConst = dynamic_cast<ConstantInteger *>(rhs);
+              if (rhsConst) {
+                int divisor = rhsConst->getInt();
+                if (divisor > 0 && (divisor & (divisor - 1)) == 0) {
+                  int shift = 0;
+                  int temp = divisor;
+                  while (temp > 1) {
+                    temp >>= 1;
+                    shift++;
+                  }
+                  resultValue = builder.createSRAInst(lhs, ConstantInteger::get(shift));
+                } else {
+                  resultValue = builder.createDivInst(lhs, rhs);
+                }
+              } else {
+                resultValue = builder.createDivInst(lhs, rhs);
+              }
+              break;
+            }
            case BinaryOp::MOD: resultValue = builder.createRemInst(lhs, rhs); break;
            }
          } else if (commonType == Type::getFloatType()) {
@ -1037,6 +1079,7 @@ std::any SysYIRGenerator::visitFuncDef(SysYParser::FuncDefContext *ctx){
  
  // 从 entryBB 无条件跳转到 funcBodyEntry
  builder.createUncondBrInst(funcBodyEntry);
+  BasicBlock::conectBlocks(entry, funcBodyEntry); // 连接 entryBB 和 funcBodyEntry
  builder.setPosition(funcBodyEntry,funcBodyEntry->end()); // 将插入点设置到 funcBodyEntry

  for (auto item : ctx->blockStmt()->blockItem()) {
--- a/src/midend/SysYIRPrinter.cpp
+++ b/src/midend/SysYIRPrinter.cpp
@ -240,6 +240,8 @@ void SysYPrinter::printInst(Instruction *pInst) {
    case Kind::kMul:
    case Kind::kDiv:
    case Kind::kRem:
+    case Kind::kSRA:
+    case Kind::kMulh:
    case Kind::kFAdd:
    case Kind::kFSub:
    case Kind::kFMul:
@ -272,6 +274,8 @@ void SysYPrinter::printInst(Instruction *pInst) {
        case Kind::kMul: std::cout << "mul"; break;
        case Kind::kDiv: std::cout << "sdiv"; break;
        case Kind::kRem: std::cout << "srem"; break;
+        case Kind::kSRA: std::cout << "ashr"; break;
+        case Kind::kMulh: std::cout << "mulh"; break;
        case Kind::kFAdd: std::cout << "fadd"; break;
        case Kind::kFSub: std::cout << "fsub"; break;
        case Kind::kFMul: std::cout << "fmul"; break;
Author	SHA1	Message	Date
CGH0S7	6550c8a25b	[backend-LAG]添加新的LargeArrayToGlobal中端Pass，以及栈保护逻辑	2025-08-04 01:01:29 +08:00
Lixuanwang	e4ad23a1a5	[backend]修复了寄存器分配器在处理全物理寄存器操作数时的bug	2025-08-03 18:37:08 +08:00
Lixuanwang	ec91a4e259	[backend]更新脚本，现在会拷贝.sy文件到tmp目录	2025-08-03 17:26:09 +08:00
Lixuanwang	92c89f7616	[midend]修正了脚本错误	2025-08-03 17:12:39 +08:00
Lixuanwang	66047dc6a3	Merge branch 'buildcfg' into midend	2025-08-03 16:40:48 +08:00
rain2133	22cf18a1d6	[midend-BuildCFG]修复逻辑	2025-08-03 16:14:31 +08:00
Lixuanwang	19a433c94f	[midend]为脚本添加了-O1参数，支持测试性能	2025-08-03 15:41:29 +08:00
Lixuanwang	45dfbc8d59	Merge branch 'backend' into midend	2025-08-03 15:25:51 +08:00
Lixuanwang	f8e423f579	合并backend、backend-IRC到midend	2025-08-03 15:18:52 +08:00
Lixuanwang	5b43f208ac	Merge branch 'backend-divopt' into midend	2025-08-03 14:53:22 +08:00
Lixuanwang	845f969c2e	[backend-IRC]修复了现场管理与溢出处理的栈偏移量错误问题	2025-08-03 14:42:19 +08:00
CGH0S7	9c5d9ea78c	[optimize]删除多余测试文件	2025-08-03 14:38:27 +08:00
CGH0S7	0ce742a86e	[optimize]添加更为通用的除法强度削减Pass, 不受除数限制替换div指令，不影响当前分数	2025-08-03 14:37:33 +08:00
CGH0S7	f312792fe9	[optimze]添加基础的除法指令优化，目前只对除以2的幂数生效	2025-08-03 13:46:42 +08:00
rain2133	32ea24df56	[midend]修复entryBB和funcBodyEntry的初始化，Dom计算引进逆后续遍历和LT算法，Pass先默认关掉CFGOpt	2025-08-03 00:51:49 +08:00
歪比歪比	a1cf60c420	[midend-BuildCFG]新增BuildCFG优化通道，实现控制流图的构建与分析	2025-08-02 22:48:21 +08:00
Lixuanwang	f879a0f521	[midend]修复了后端不适配中端全局变量定义的问题	2025-08-02 22:06:37 +08:00