From 042b1a5d9964a658092056fe57bcc3195ffda87b Mon Sep 17 00:00:00 2001
From: CGH0S7 <776459475@qq.com>
Date: Tue, 19 Aug 2025 00:13:32 +0800
Subject: [PATCH 1/9] =?UTF-8?q?[midend-tco]=E4=BF=AE=E5=A4=8D=E5=91=BD?=
 =?UTF-8?q?=E5=90=8D=E9=87=8D=E5=A4=8D=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/midend/Pass/Optimize/TailCallOpt.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/midend/Pass/Optimize/TailCallOpt.cpp b/src/midend/Pass/Optimize/TailCallOpt.cpp
index df2d0aa..5fd8ea0 100644
--- a/src/midend/Pass/Optimize/TailCallOpt.cpp
+++ b/src/midend/Pass/Optimize/TailCallOpt.cpp
@@ -55,8 +55,8 @@ bool TailCallOpt::runOnFunction(Function *F, AnalysisManager &AM) {
 
   // 创建一个新的入口基本块，作为循环的前置块
   auto original_entry = F->getEntryBlock();
-  auto new_entry = F->addBasicBlock("tco.entry");
-  auto loop_header = F->addBasicBlock("tco.loop_header");
+  auto new_entry = F->addBasicBlock("tco.entry." + F->getName());
+  auto loop_header = F->addBasicBlock("tco.loop_header." + F->getName());
   
   // 将原入口块中的所有指令移动到循环头块
   loop_header->getInstructions().splice(loop_header->end(), original_entry->getInstructions());

From d79857feb96ffe3fe853450d3a637ee3b50fc5ca Mon Sep 17 00:00:00 2001
From: Lixuanwang <xlwmail@nudt.edu.cn>
Date: Tue, 19 Aug 2025 08:29:43 +0800
Subject: [PATCH 2/9] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E8=84=9A=E6=9C=AC?=
 =?UTF-8?q?=EF=BC=8C=E5=A2=9E=E5=BC=BA=E8=BE=93=E5=87=BA=E6=88=AA=E6=96=AD?=
 =?UTF-8?q?=E9=80=BB=E8=BE=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 script/runit-single.sh | 31 ++++++++++++++++++++++---------
 script/runit.sh        | 33 ++++++++++++++++++++++-----------
 2 files changed, 44 insertions(+), 20 deletions(-)

diff --git a/script/runit-single.sh b/script/runit-single.sh
index bfbbcdc..786986f 100644
--- a/script/runit-single.sh
+++ b/script/runit-single.sh
@@ -20,18 +20,19 @@ QEMU_RISCV64="qemu-riscv64"
 
 # --- 初始化变量 ---
 EXECUTE_MODE=false
-IR_EXECUTE_MODE=false # 新增
+IR_EXECUTE_MODE=false
 CLEAN_MODE=false
 OPTIMIZE_FLAG=""
 SYSYC_TIMEOUT=30
-LLC_TIMEOUT=10 # 新增
+LLC_TIMEOUT=10
 GCC_TIMEOUT=10
 EXEC_TIMEOUT=30
 MAX_OUTPUT_LINES=20
+MAX_OUTPUT_CHARS=1000
 SY_FILES=()
 PASSED_CASES=0
 FAILED_CASES_LIST=""
-INTERRUPTED=false # 新增
+INTERRUPTED=false
 
 # =================================================================
 # --- 函数定义 ---
@@ -50,22 +51,31 @@ show_help() {
     echo "  -gct N                   设置 gcc 交叉编译超时为 N 秒 (默认: 10)。"
     echo "  -et N                    设置 qemu 自动化执行超时为 N 秒 (默认: 30)。"
     echo "  -ml N, --max-lines N     当输出对比失败时，最多显示 N 行内容 (默认: 20)。"
+    echo "  -mc N, --max-chars N     当输出对比失败时，最多显示 N 个字符 (默认: 1000)。"
     echo "  -h, --help               显示此帮助信息并退出。"
     echo ""
     echo "可在任何时候按 Ctrl+C 来中断测试并显示当前已完成的测例总结。"
 }
 
+# 显示文件内容并根据行数和字符数截断的函数
 display_file_content() {
     local file_path="$1"
     local title="$2"
     local max_lines="$3"
+    local max_chars="$4" # 新增参数
     if [ ! -f "$file_path" ]; then return; fi
     echo -e "$title"
     local line_count
+    local char_count
     line_count=$(wc -l < "$file_path")
+    char_count=$(wc -c < "$file_path")
+
     if [ "$line_count" -gt "$max_lines" ]; then
         head -n "$max_lines" "$file_path"
-        echo -e "\e[33m[... 输出已截断，共 ${line_count} 行 ...]\e[0m"
+        echo -e "\e[33m[... 输出因行数过多 (共 ${line_count} 行) 而截断 ...]\e[0m"
+    elif [ "$char_count" -gt "$max_chars" ]; then
+        head -c "$max_chars" "$file_path"
+        echo -e "\n\e[33m[... 输出因字符数过多 (共 ${char_count} 字符) 而截断 ...]\e[0m"
     else
         cat "$file_path"
     fi
@@ -131,6 +141,7 @@ while [[ "$#" -gt 0 ]]; do
         -gct) if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then GCC_TIMEOUT="$2"; shift 2; else echo "错误: -gct 需要一个正整数参数。" >&2; exit 1; fi ;;
         -et) if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then EXEC_TIMEOUT="$2"; shift 2; else echo "错误: -et 需要一个正整数参数。" >&2; exit 1; fi ;;
         -ml|--max-lines) if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then MAX_OUTPUT_LINES="$2"; shift 2; else echo "错误: --max-lines 需要一个正整数参数。" >&2; exit 1; fi ;;
+        -mc|--max-chars) if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then MAX_OUTPUT_CHARS="$2"; shift 2; else echo "错误: --max-chars 需要一个正整数参数。" >&2; exit 1; fi ;;
         -h|--help) show_help; exit 0 ;;
         -*) echo "未知选项: $1"; show_help; exit 1 ;;
         *)
@@ -180,6 +191,8 @@ TOTAL_CASES=${#SY_FILES[@]}
 echo "SysY 单例测试运行器启动..."
 if [ -n "$OPTIMIZE_FLAG" ]; then echo "优化等级: ${OPTIMIZE_FLAG}"; fi
 echo "超时设置: sysyc=${SYSYC_TIMEOUT}s, llc=${LLC_TIMEOUT}s, gcc=${GCC_TIMEOUT}s, qemu=${EXEC_TIMEOUT}s"
+echo "失败输出最大行数: ${MAX_OUTPUT_LINES}"
+echo "失败输出最大字符数: ${MAX_OUTPUT_CHARS}"
 echo ""
 
 for sy_file in "${SY_FILES[@]}"; do
@@ -260,8 +273,8 @@ for sy_file in "${SY_FILES[@]}"; do
                         out_ok=1
                         if ! diff -q <(tr -d '[:space:]' < "${output_actual_file}") <(tr -d '[:space:]' < "${EXPECTED_STDOUT_FILE}") >/dev/null 2>&1; then
                             echo -e "\e[31m  标准输出测试失败。\e[0m"; out_ok=0
-                            display_file_content "${EXPECTED_STDOUT_FILE}" "    \e[36m--- 期望输出 ---\e[0m" "${MAX_OUTPUT_LINES}"
-                            display_file_content "${output_actual_file}" "    \e[36m--- 实际输出 ---\e[0m" "${MAX_OUTPUT_LINES}"
+                            display_file_content "${EXPECTED_STDOUT_FILE}" "    \e[36m--- 期望输出 ---\e[0m" "${MAX_OUTPUT_LINES}" "${MAX_OUTPUT_CHARS}"
+                            display_file_content "${output_actual_file}" "    \e[36m--- 实际输出 ---\e[0m" "${MAX_OUTPUT_LINES}" "${MAX_OUTPUT_CHARS}"
                         fi
 
                         if [ "$ret_ok" -eq 1 ] && [ "$out_ok" -eq 1 ]; then echo -e "\e[32m  返回码与标准输出测试成功。\e[0m"; else is_passed=0; fi
@@ -271,8 +284,8 @@ for sy_file in "${SY_FILES[@]}"; do
                             echo -e "\e[32m  标准输出测试成功。\e[0m"
                         else
                             echo -e "\e[31m  标准输出测试失败。\e[0m"; is_passed=0
-                            display_file_content "${output_reference_file}" "    \e[36m--- 期望输出 ---\e[0m" "${MAX_OUTPUT_LINES}"
-                            display_file_content "${output_actual_file}" "    \e[36m--- 实际输出 ---\e[0m" "${MAX_OUTPUT_LINES}"
+                            display_file_content "${output_reference_file}" "    \e[36m--- 期望输出 ---\e[0m" "${MAX_OUTPUT_LINES}" "${MAX_OUTPUT_CHARS}"
+                            display_file_content "${output_actual_file}" "    \e[36m--- 实际输出 ---\e[0m" "${MAX_OUTPUT_LINES}" "${MAX_OUTPUT_CHARS}"
                         fi
                     fi
                 else
@@ -301,4 +314,4 @@ for sy_file in "${SY_FILES[@]}"; do
 done
 
 # --- 打印最终总结 ---
-print_summary
\ No newline at end of file
+print_summary
diff --git a/script/runit.sh b/script/runit.sh
index e27c905..c090415 100644
--- a/script/runit.sh
+++ b/script/runit.sh
@@ -27,11 +27,12 @@ LLC_TIMEOUT=10
 GCC_TIMEOUT=10
 EXEC_TIMEOUT=30
 MAX_OUTPUT_LINES=20
+MAX_OUTPUT_CHARS=1000
 TEST_SETS=()
 TOTAL_CASES=0
 PASSED_CASES=0
 FAILED_CASES_LIST=""
-INTERRUPTED=false # 新增：用于标记是否被中断
+INTERRUPTED=false
 
 # =================================================================
 # --- 函数定义 ---
@@ -53,6 +54,7 @@ show_help() {
     echo "  -gct N                   设置 gcc 交叉编译超时为 N 秒 (默认: 10)。"
     echo "  -et N                    设置 qemu 执行超时为 N 秒 (默认: 30)。"
     echo "  -ml N, --max-lines N     当输出对比失败时，最多显示 N 行内容 (默认: 20)。"
+    echo "  -mc N, --max-chars N     当输出对比失败时，最多显示 N 个字符 (默认: 1000)。"
     echo "  -h, --help               显示此帮助信息并退出。"
     echo ""
     echo "注意: 默认行为 (无 -e 或 -eir) 是将 .sy 文件同时编译为 .s (汇编) 和 .ll (IR)，不执行。"
@@ -60,18 +62,25 @@ show_help() {
 }
 
 
-# 显示文件内容并根据行数截断的函数
+# 显示文件内容并根据行数和字符数截断的函数
 display_file_content() {
     local file_path="$1"
     local title="$2"
     local max_lines="$3"
+    local max_chars="$4" # 新增参数
     if [ ! -f "$file_path" ]; then return; fi
     echo -e "$title"
     local line_count
+    local char_count
     line_count=$(wc -l < "$file_path")
+    char_count=$(wc -c < "$file_path")
+
     if [ "$line_count" -gt "$max_lines" ]; then
         head -n "$max_lines" "$file_path"
-        echo -e "\e[33m[... 输出已截断，共 ${line_count} 行 ...]\e[0m"
+        echo -e "\e[33m[... 输出因行数过多 (共 ${line_count} 行) 而截断 ...]\e[0m"
+    elif [ "$char_count" -gt "$max_chars" ]; then
+        head -c "$max_chars" "$file_path"
+        echo -e "\n\e[33m[... 输出因字符数过多 (共 ${char_count} 字符) 而截断 ...]\e[0m"
     else
         cat "$file_path"
     fi
@@ -151,6 +160,7 @@ while [[ "$#" -gt 0 ]]; do
         -gct) if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then GCC_TIMEOUT="$2"; shift 2; else echo "错误: -gct 需要一个正整数参数。" >&2; exit 1; fi ;;
         -et) if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then EXEC_TIMEOUT="$2"; shift 2; else echo "错误: -et 需要一个正整数参数。" >&2; exit 1; fi ;;
         -ml|--max-lines) if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then MAX_OUTPUT_LINES="$2"; shift 2; else echo "错误: --max-lines 需要一个正整数参数。" >&2; exit 1; fi ;;
+        -mc|--max-chars) if [[ -n "$2" && "$2" =~ ^[0-9]+$ ]]; then MAX_OUTPUT_CHARS="$2"; shift 2; else echo "错误: --max-chars 需要一个正整数参数。" >&2; exit 1; fi ;;
         -h|--help) show_help; exit 0 ;;
         *) echo "未知选项: $1"; show_help; exit 1 ;;
     esac
@@ -204,6 +214,7 @@ echo "运行模式: ${RUN_MODE_INFO}"
 echo "${TIMEOUT_INFO}"
 if ${EXECUTE_MODE} || ${IR_EXECUTE_MODE}; then
     echo "失败输出最大行数: ${MAX_OUTPUT_LINES}"
+    echo "失败输出最大字符数: ${MAX_OUTPUT_CHARS}"
 fi
 echo ""
 
@@ -298,8 +309,8 @@ while IFS= read -r sy_file; do
                              [ "$test_logic_passed" -eq 1 ] && echo -e "\e[32m  标准输出测试成功\e[0m"
                         else
                             echo -e "\e[31m  标准输出测试失败\e[0m"
-                            display_file_content "${EXPECTED_STDOUT_FILE}" "    \e[36m---------- 期望输出 ----------\e[0m" "${MAX_OUTPUT_LINES}"
-                            display_file_content "${output_actual_file_from_ir}" "    \e[36m---------- 实际输出 ----------\e[0m" "${MAX_OUTPUT_LINES}"
+                            display_file_content "${EXPECTED_STDOUT_FILE}" "    \e[36m---------- 期望输出 ----------\e[0m" "${MAX_OUTPUT_LINES}" "${MAX_OUTPUT_CHARS}"
+                            display_file_content "${output_actual_file_from_ir}" "    \e[36m---------- 实际输出 ----------\e[0m" "${MAX_OUTPUT_LINES}" "${MAX_OUTPUT_CHARS}"
                             test_logic_passed=0
                         fi
                     else
@@ -308,8 +319,8 @@ while IFS= read -r sy_file; do
                             echo -e "\e[32m  成功: 输出与参考输出匹配\e[0m"
                         else
                             echo -e "\e[31m  失败: 输出不匹配\e[0m"
-                            display_file_content "${output_reference_file}" "    \e[36m---------- 期望输出 ----------\e[0m" "${MAX_OUTPUT_LINES}"
-                            display_file_content "${output_actual_file_from_ir}" "    \e[36m---------- 实际输出 ----------\e[0m" "${MAX_OUTPUT_LINES}"
+                            display_file_content "${output_reference_file}" "    \e[36m---------- 期望输出 ----------\e[0m" "${MAX_OUTPUT_LINES}" "${MAX_OUTPUT_CHARS}"
+                            display_file_content "${output_actual_file_from_ir}" "    \e[36m---------- 实际输出 ----------\e[0m" "${MAX_OUTPUT_LINES}" "${MAX_OUTPUT_CHARS}"
                             test_logic_passed=0
                         fi
                     fi
@@ -375,8 +386,8 @@ while IFS= read -r sy_file; do
                             [ "$test_logic_passed" -eq 1 ] && echo -e "\e[32m  标准输出测试成功\e[0m"
                         else
                             echo -e "\e[31m  标准输出测试失败\e[0m"
-                            display_file_content "${EXPECTED_STDOUT_FILE}" "    \e[36m---------- 期望输出 ----------\e[0m" "${MAX_OUTPUT_LINES}"
-                            display_file_content "${output_actual_file_S}" "    \e[36m---------- 实际输出 ----------\e[0m" "${MAX_OUTPUT_LINES}"
+                            display_file_content "${EXPECTED_STDOUT_FILE}" "    \e[36m---------- 期望输出 ----------\e[0m" "${MAX_OUTPUT_LINES}" "${MAX_OUTPUT_CHARS}"
+                            display_file_content "${output_actual_file_S}" "    \e[36m---------- 实际输出 ----------\e[0m" "${MAX_OUTPUT_LINES}" "${MAX_OUTPUT_CHARS}"
                             test_logic_passed=0
                         fi
                     else
@@ -385,8 +396,8 @@ while IFS= read -r sy_file; do
                             echo -e "\e[32m  成功: 输出与参考输出匹配\e[0m"
                         else
                             echo -e "\e[31m  失败: 输出不匹配\e[0m"
-                            display_file_content "${output_reference_file}" "    \e[36m---------- 期望输出 ----------\e[0m" "${MAX_OUTPUT_LINES}"
-                            display_file_content "${output_actual_file_S}" "    \e[36m---------- 实际输出 ----------\e[0m" "${MAX_OUTPUT_LINES}"
+                            display_file_content "${output_reference_file}" "    \e[36m---------- 期望输出 ----------\e[0m" "${MAX_OUTPUT_LINES}" "${MAX_OUTPUT_CHARS}"
+                            display_file_content "${output_actual_file_S}" "    \e[36m---------- 实际输出 ----------\e[0m" "${MAX_OUTPUT_LINES}" "${MAX_OUTPUT_CHARS}"
                             test_logic_passed=0
                         fi
                     fi

From 06b4df79eebc42bf0efba4b54021074b81a1db90 Mon Sep 17 00:00:00 2001
From: Lixuanwang <xlwmail@nudt.edu.cn>
Date: Tue, 19 Aug 2025 08:30:55 +0800
Subject: [PATCH 3/9] =?UTF-8?q?[backend]=E6=9A=82=E6=97=B6=E7=A6=81?=
 =?UTF-8?q?=E7=94=A8=E4=B8=AD=E7=AB=AF=E5=BC=BA=E5=BA=A6=E5=89=8A=E5=BC=B1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/backend/RISCv64/RISCv64Backend.cpp |  4 ++--
 src/midend/Pass/Pass.cpp               | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/src/backend/RISCv64/RISCv64Backend.cpp b/src/backend/RISCv64/RISCv64Backend.cpp
index 6be37fc..f6e4515 100644
--- a/src/backend/RISCv64/RISCv64Backend.cpp
+++ b/src/backend/RISCv64/RISCv64Backend.cpp
@@ -208,12 +208,12 @@ std::string RISCv64CodeGen::function_gen(Function* func) {
     std::stringstream ss_after_isel;
     RISCv64AsmPrinter printer_isel(mfunc.get());
     printer_isel.run(ss_after_isel, true);
-    DEBUG = 1;
+    // DEBUG = 1;
     if (DEBUG) {
         std::cerr << "====== Intermediate Representation after Instruction Selection ======\n" 
         << ss_after_isel.str();
     }
-    DEBUG = 0;
+    // DEBUG = 0;
     // 阶段 2: 消除帧索引 (展开伪指令，计算局部变量偏移)
     EliminateFrameIndicesPass efi_pass;
     efi_pass.runOnMachineFunction(mfunc.get());
diff --git a/src/midend/Pass/Pass.cpp b/src/midend/Pass/Pass.cpp
index f711ba6..be7404f 100644
--- a/src/midend/Pass/Pass.cpp
+++ b/src/midend/Pass/Pass.cpp
@@ -185,19 +185,19 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
         printPasses();
       }
       
-      this->clearPasses();
-      this->addPass(&LoopStrengthReduction::ID);
-      this->run();
+      // this->clearPasses();
+      // this->addPass(&LoopStrengthReduction::ID);
+      // this->run();
 
       if(DEBUG) {
         std::cout << "=== IR After Loop Normalization, and Strength Reduction Optimizations ===\n";
         printPasses();
       }
 
-      // 全局强度削弱优化，包括代数优化和魔数除法
-      this->clearPasses();
-      this->addPass(&GlobalStrengthReduction::ID);
-      this->run();
+      // // 全局强度削弱优化，包括代数优化和魔数除法
+      // this->clearPasses();
+      // this->addPass(&GlobalStrengthReduction::ID);
+      // this->run();
 
       if(DEBUG) {
         std::cout << "=== IR After Global Strength Reduction Optimizations ===\n";

From 1ab937961f8eeee8b41202d920a55d004fc4ffef Mon Sep 17 00:00:00 2001
From: Lixuanwang <xlwmail@nudt.edu.cn>
Date: Tue, 19 Aug 2025 14:09:08 +0800
Subject: [PATCH 4/9] =?UTF-8?q?[backend-O1]=E4=BF=AE=E5=A4=8D=E5=90=8E?=
 =?UTF-8?q?=E7=AB=AF=E5=9C=A8-O1=E6=83=85=E5=86=B5=E4=B8=8B=E5=AD=98?=
 =?UTF-8?q?=E5=9C=A8=E7=9A=84=E5=A4=A7=E9=87=8Fbug?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/backend/RISCv64/RISCv64ISel.cpp           |  54 +++++++
 src/backend/RISCv64/RISCv64RegAlloc.cpp       | 134 ++++++++++--------
 src/include/backend/RISCv64/RISCv64ISel.h     |   1 +
 src/include/backend/RISCv64/RISCv64RegAlloc.h |   1 +
 src/midend/Pass/Optimize/Reg2Mem.cpp          |  36 ++---
 src/sysyc.cpp                                 |   2 +-
 6 files changed, 153 insertions(+), 75 deletions(-)

diff --git a/src/backend/RISCv64/RISCv64ISel.cpp b/src/backend/RISCv64/RISCv64ISel.cpp
index a0ad2f1..22ccc22 100644
--- a/src/backend/RISCv64/RISCv64ISel.cpp
+++ b/src/backend/RISCv64/RISCv64ISel.cpp
@@ -103,6 +103,60 @@ void RISCv64ISel::select() {
         }
     }
 
+    if (optLevel > 0) {
+        if (F && !F->getBasicBlocks().empty()) {
+            // 定位到第一个MachineBasicBlock，也就是函数入口
+            BasicBlock* first_ir_block = F->getBasicBlocks_NoRange().front().get();
+            CurMBB = bb_map.at(first_ir_block);
+
+            int int_arg_idx = 0;
+            int fp_arg_idx = 0;
+
+            for (Argument* arg : F->getArguments()) {
+                Type* arg_type = arg->getType();
+
+                // --- 处理整数/指针参数 ---
+                if (!arg_type->isFloat() && int_arg_idx < 8) {
+                    // 1. 获取参数原始的、将被预着色为 a0-a7 的 vreg
+                    unsigned original_vreg = getVReg(arg);
+
+                    // 2. 创建一个新的、安全的 vreg 来持有参数的值
+                    unsigned saved_vreg = getNewVReg(arg_type);
+
+                    // 3. 生成 mv saved_vreg, original_vreg 指令
+                    auto mv = std::make_unique<MachineInstr>(RVOpcodes::MV);
+                    mv->addOperand(std::make_unique<RegOperand>(saved_vreg));
+                    mv->addOperand(std::make_unique<RegOperand>(original_vreg));
+                    CurMBB->addInstruction(std::move(mv));
+
+                    // 4.【关键】更新vreg映射表，将arg的vreg指向新的、安全的vreg
+                    //    这样，后续所有对该参数的 getVReg(arg) 调用都会自动获得 saved_vreg，
+                    //    使得函数体内的代码都使用这个被保存过的值。
+                    vreg_map[arg] = saved_vreg;
+
+                    int_arg_idx++;
+                }
+                // --- 处理浮点参数 ---
+                else if (arg_type->isFloat() && fp_arg_idx < 8) {
+                    unsigned original_vreg = getVReg(arg);
+                    unsigned saved_vreg = getNewVReg(arg_type);
+
+                    // 对于浮点数，使用 fmv.s 指令
+                    auto fmv = std::make_unique<MachineInstr>(RVOpcodes::FMV_S);
+                    fmv->addOperand(std::make_unique<RegOperand>(saved_vreg));
+                    fmv->addOperand(std::make_unique<RegOperand>(original_vreg));
+                    CurMBB->addInstruction(std::move(fmv));
+
+                    // 同样更新映射
+                    vreg_map[arg] = saved_vreg;
+
+                    fp_arg_idx++;
+                }
+                // 对于栈传递的参数，则无需处理
+            }
+        }
+    }
+
     // 遍历基本块，进行指令选择
     for (const auto& bb_ptr : F->getBasicBlocks()) {
         selectBasicBlock(bb_ptr.get());
diff --git a/src/backend/RISCv64/RISCv64RegAlloc.cpp b/src/backend/RISCv64/RISCv64RegAlloc.cpp
index 84d397e..ee40c00 100644
--- a/src/backend/RISCv64/RISCv64RegAlloc.cpp
+++ b/src/backend/RISCv64/RISCv64RegAlloc.cpp
@@ -127,20 +127,46 @@ void RISCv64RegAlloc::precolorByCallingConvention() {
     int int_arg_idx = 0;
     int float_arg_idx = 0;
 
-    for (Argument* arg : F->getArguments()) {
-        unsigned vreg = ISel->getVReg(arg);
-        
-        if (arg->getType()->isFloat()) {
-            if (float_arg_idx < 8) { // fa0-fa7
-                auto preg = static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::F10) + float_arg_idx);
-                color_map[vreg] = preg;
-                float_arg_idx++;
+    if (optLevel > 0)
+    {
+        for (const auto& pair : vreg_to_value_map) {
+            unsigned vreg = pair.first;
+            Value* val = pair.second;
+
+            // 检查这个 Value* 是不是一个 Argument 对象
+            if (auto arg = dynamic_cast<Argument*>(val)) {
+                // 如果是，那么 vreg 就是最初分配给这个参数的 vreg
+                int arg_idx = arg->getIndex();
+
+                if (arg->getType()->isFloat()) {
+                    if (arg_idx < 8) { // fa0-fa7
+                        auto preg = static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::F10) + arg_idx);
+                        color_map[vreg] = preg;
+                    }
+                } else { // 整数或指针
+                    if (arg_idx < 8) { // a0-a7
+                        auto preg = static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::A0) + arg_idx);
+                        color_map[vreg] = preg;
+                    }
+                }
             }
-        } else { // 整数或指针
-            if (int_arg_idx < 8) { // a0-a7
-                auto preg = static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::A0) + int_arg_idx);
-                color_map[vreg] = preg;
-                int_arg_idx++;
+        }
+    } else {
+        for (Argument* arg : F->getArguments()) {
+            unsigned vreg = ISel->getVReg(arg);
+            
+            if (arg->getType()->isFloat()) {
+                if (float_arg_idx < 8) { // fa0-fa7
+                    auto preg = static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::F10) + float_arg_idx);
+                    color_map[vreg] = preg;
+                    float_arg_idx++;
+                }
+            } else { // 整数或指针
+                if (int_arg_idx < 8) { // a0-a7
+                    auto preg = static_cast<PhysicalReg>(static_cast<int>(PhysicalReg::A0) + int_arg_idx);
+                    color_map[vreg] = preg;
+                    int_arg_idx++;
+                }
             }
         }
     }
@@ -477,16 +503,18 @@ void RISCv64RegAlloc::coalesce() {
     unsigned x = getAlias(*def.begin());
     unsigned y = getAlias(*use.begin());
     unsigned u, v;
-    if (precolored.count(y)) { u = y; v = x; } else { u = x; v = y; }
+
+    // 进一步修正：标准化u和v的逻辑，必须同时考虑物理寄存器和已预着色的虚拟寄存器。
+    // 目标是确保如果两个操作数中有一个是预着色的，它一定会被赋给 u。
+    if (precolored.count(y) || coloredNodes.count(y)) { 
+        u = y; v = x; 
+    } else { 
+        u = x; v = y; 
+    }
     
     // 防御性检查，处理物理寄存器之间的传送指令
     if (precolored.count(u) && precolored.count(v)) {
-        // 如果 u 和 v 都是物理寄存器，我们不能合并它们。
-        // 这通常是一条寄存器拷贝指令，例如 `mv a2, a1`。
-        // 把它加入 constrainedMoves 列表，然后直接返回，不再处理。
         constrainedMoves.insert(move);
-        // addWorklist(u) 和 addWorklist(v) 在这里也不需要调用，
-        // 因为它们只对虚拟寄存器有意义。
         return;
     }
 
@@ -498,7 +526,7 @@ void RISCv64RegAlloc::coalesce() {
         if (DEEPERDEBUG) std::cerr << "  -> Trivial coalesce (u == v).\n";
         coalescedMoves.insert(move);
         addWorklist(u);
-        return; // 处理完毕，提前返回
+        return;
     }
 
     if (isFPVReg(u) != isFPVReg(v)) {
@@ -508,10 +536,13 @@ void RISCv64RegAlloc::coalesce() {
         constrainedMoves.insert(move);
         addWorklist(u);
         addWorklist(v);
-        return; // 立即返回，不再进行后续检查
+        return;
     }
     
-    bool pre_interfere = adjList.at(v).count(u);
+    // 注意：如果v已经是u的邻居， pre_interfere 会为true。
+    // 但如果v不在adjList中（例如v是预着色节点），我们需要检查u是否在v的邻居中。
+    // 为了简化，我们假设adjList包含了所有虚拟寄存器。对于(Phys, Virt)对，冲突信息存储在Virt节点的邻接表中。
+    bool pre_interfere = (adjList.count(v) && adjList.at(v).count(u)) || (adjList.count(u) && adjList.at(u).count(v));
     
     if (pre_interfere) {
         if (DEEPERDEBUG) std::cerr << "  -> Constrained (nodes already interfere).\n";
@@ -521,63 +552,50 @@ void RISCv64RegAlloc::coalesce() {
         return;
     }
 
-    bool is_u_precolored = precolored.count(u);
+    // 考虑物理寄存器和已预着色的虚拟寄存器
+    bool u_is_effectively_precolored = precolored.count(u) || coloredNodes.count(u);
     bool can_coalesce = false;
     
-    if (is_u_precolored) {
-        // --- 场景1：u是物理寄存器，使用 George 启发式 ---
-        if (DEEPERDEBUG) std::cerr << "  -> Trying George Heuristic (u is precolored)...\n";
+    if (u_is_effectively_precolored) {
+        // --- 场景1：u是物理寄存器或已预着色虚拟寄存器，使用 George 启发式 ---
+        if (DEEPERDEBUG) std::cerr << "  -> Trying George Heuristic (u is effectively precolored)...\n";
         
-        // 步骤 1: 独立调用 adjacent(v) 获取邻居集合
         VRegSet neighbors_of_v = adjacent(v);
         if (DEEPERDEBUG) {
             std::cerr << "      - Neighbors of " << regIdToString(v) << " to check are (" << neighbors_of_v.size() << "): { ";
             for (unsigned id : neighbors_of_v) std::cerr << regIdToString(id) << " ";
             std::cerr << "}\n";
         }
-
-        // 步骤 2: 使用显式的 for 循环来代替 std::all_of
-        bool george_ok = true; // 默认假设成功，任何一个邻居失败都会将此设为 false
+        
+        bool george_ok = true;
         for (unsigned t : neighbors_of_v) {
-            if (DEEPERDEBUG) {
-                std::cerr << "      - Checking neighbor " << regIdToString(t) << ":\n";
-            }
+            if (DEEPERDEBUG) std::cerr << "      - Checking neighbor " << regIdToString(t) << ":\n";
 
-            // 步骤 3: 独立调用启发式函数
-            bool heuristic_result = georgeHeuristic(t, u);
+            unsigned u_phys_id = precolored.count(u) ? u : (static_cast<unsigned>(PhysicalReg::PHYS_REG_START_ID) + static_cast<unsigned>(color_map.at(u)));
+            bool heuristic_result = georgeHeuristic(t, u_phys_id);
             
             if (DEEPERDEBUG) {
-                std::cerr << "          - georgeHeuristic(" << regIdToString(t) << ", " << regIdToString(u) << ") -> " << (heuristic_result ? "OK" : "FAIL") << "\n";
+                std::cerr << "          - georgeHeuristic(" << regIdToString(t) << ", " << regIdToString(u_phys_id) << ") -> " << (heuristic_result ? "OK" : "FAIL") << "\n";
             }
 
             if (!heuristic_result) {
-                george_ok = false; // 只要有一个邻居不满足条件，整个检查就失败
-                break;             // 并且可以立即停止检查其他邻居
+                george_ok = false;
+                break;
             }
         }
         
-        if (DEEPERDEBUG) {
-             std::cerr << "  -> George Heuristic final result: " << (george_ok ? "OK" : "FAIL") << "\n";
-        }
-
-        if (george_ok) {
-            can_coalesce = true;
-        }
+        if (DEEPERDEBUG) std::cerr << "  -> George Heuristic final result: " << (george_ok ? "OK" : "FAIL") << "\n";
+        if (george_ok) can_coalesce = true;
 
     } else {
-        // --- 场景2：u和v都是虚拟寄存器，使用 Briggs 启发式 ---
+        // --- 场景2：u和v都是未着色的虚拟寄存器，使用 Briggs 启发式 ---
         if (DEEPERDEBUG) std::cerr << "  -> Trying Briggs Heuristic (u and v are virtual)...\n";
         
         bool briggs_ok = briggsHeuristic(u, v);
         if (DEEPERDEBUG) std::cerr << "      - briggsHeuristic(" << regIdToString(u) << ", " << regIdToString(v) << ") -> " << (briggs_ok ? "OK" : "FAIL") << "\n";
-
-        if (briggs_ok) {
-            can_coalesce = true;
-        }
+        if (briggs_ok) can_coalesce = true;
     }
 
-    // --- 根据启发式结果进行最终决策 ---
-    
     if (can_coalesce) {
         if (DEEPERDEBUG) std::cerr << "  -> Heuristic OK. Combining " << regIdToString(v) << " into " << regIdToString(u) << ".\n";
         coalescedMoves.insert(move);
@@ -1133,7 +1151,7 @@ unsigned RISCv64RegAlloc::getAlias(unsigned n) {
 }
 
 void RISCv64RegAlloc::addWorklist(unsigned u) {
-    if (precolored.count(u)) return;
+    if (precolored.count(u) || color_map.count(u)) return;
 
     int K = isFPVReg(u) ? K_fp : K_int;
     if (!moveRelated(u) && degree.at(u) < K) {
@@ -1208,8 +1226,12 @@ bool RISCv64RegAlloc::georgeHeuristic(unsigned t, unsigned u) {
     }
     
     int K = isFPVReg(t) ? K_fp : K_int;
-    // adjList.at(t) 现在是安全的，因为 degree.count(t) > 0 保证了 adjList.count(t) > 0
-    return degree.at(t) < K || precolored.count(u) || adjList.at(t).count(u);
+    
+    // 缺陷 #2 修正: 移除了致命的 || precolored.count(u) 条件。
+    // 在此函数的上下文中，u 总是预着色的物理寄存器ID，导致旧的条件永远为true，使整个启发式失效。
+    // 正确的逻辑是检查：邻居t的度数是否小于K，或者t是否已经与u冲突。
+    // return degree.at(t) < K || adjList.at(t).count(u);
+    return degree.at(t) < K || !adjList.at(t).count(u);
 }
 
 void RISCv64RegAlloc::combine(unsigned u, unsigned v) {
@@ -1257,7 +1279,7 @@ void RISCv64RegAlloc::freezeMoves(unsigned u) {
         activeMoves.erase(move);
         frozenMoves.insert(move);
 
-        if (!precolored.count(v_alias) && nodeMoves(v_alias).empty() && degree.at(v_alias) < (isFPVReg(v_alias) ? K_fp : K_int)) {
+        if (!precolored.count(v_alias) && !coloredNodes.count(v_alias) && nodeMoves(v_alias).empty() && degree.at(v_alias) < (isFPVReg(v_alias) ? K_fp : K_int)) {
             freezeWorklist.erase(v_alias);
             simplifyWorklist.insert(v_alias);
             if (DEEPERDEBUG) {
diff --git a/src/include/backend/RISCv64/RISCv64ISel.h b/src/include/backend/RISCv64/RISCv64ISel.h
index 35fb7a7..7c52fb0 100644
--- a/src/include/backend/RISCv64/RISCv64ISel.h
+++ b/src/include/backend/RISCv64/RISCv64ISel.h
@@ -11,6 +11,7 @@ namespace sysy {
 
 extern int DEBUG;
 extern int DEEPDEBUG;
+extern int optLevel;
 
 namespace sysy {
 
diff --git a/src/include/backend/RISCv64/RISCv64RegAlloc.h b/src/include/backend/RISCv64/RISCv64RegAlloc.h
index 1d76fac..123c403 100644
--- a/src/include/backend/RISCv64/RISCv64RegAlloc.h
+++ b/src/include/backend/RISCv64/RISCv64RegAlloc.h
@@ -12,6 +12,7 @@ extern int DEBUG;
 extern int DEEPDEBUG;
 extern int DEBUGLENGTH; // 用于限制调试输出的长度
 extern int DEEPERDEBUG; // 用于更深层次的调试输出
+extern int optLevel;
 
 namespace sysy {
 
diff --git a/src/midend/Pass/Optimize/Reg2Mem.cpp b/src/midend/Pass/Optimize/Reg2Mem.cpp
index cd52f51..3e4303a 100644
--- a/src/midend/Pass/Optimize/Reg2Mem.cpp
+++ b/src/midend/Pass/Optimize/Reg2Mem.cpp
@@ -70,20 +70,20 @@ void Reg2MemContext::allocateMemoryForSSAValues(Function *func) {
 
   // 1. 为函数参数分配内存
   builder->setPosition(entryBlock, entryBlock->begin()); // 确保在入口块的开始位置插入
-  for (auto arg : func->getArguments()) {
-    // 默认情况下，将所有参数是提升到内存
-    if (isPromotableToMemory(arg)) {
-      // 参数的类型就是 AllocaInst 需要分配的类型
-      AllocaInst *alloca = builder->createAllocaInst(Type::getPointerType(arg->getType()), arg->getName() + ".reg2mem");
-      // 将参数值 store 到 alloca 中 (这是 Mem2Reg 逆转的关键一步)
-      valueToAllocaMap[arg] = alloca;
+  // for (auto arg : func->getArguments()) {
+  //   // 默认情况下，将所有参数是提升到内存
+  //   if (isPromotableToMemory(arg)) {
+  //     // 参数的类型就是 AllocaInst 需要分配的类型
+  //     AllocaInst *alloca = builder->createAllocaInst(Type::getPointerType(arg->getType()), arg->getName() + ".reg2mem");
+  //     // 将参数值 store 到 alloca 中 (这是 Mem2Reg 逆转的关键一步)
+  //     valueToAllocaMap[arg] = alloca;
 
-      // 确保 alloca 位于入口块的顶部，但在所有参数的 store 指令之前
-      // 通常 alloca 都在 entry block 的最开始
-      // 这里我们只是创建，并让 builder 决定插入位置 (通常在当前插入点)
-      // 如果需要严格控制顺序，可能需要手动 insert 到 instruction list
-    }
-  }
+  //     // 确保 alloca 位于入口块的顶部，但在所有参数的 store 指令之前
+  //     // 通常 alloca 都在 entry block 的最开始
+  //     // 这里我们只是创建，并让 builder 决定插入位置 (通常在当前插入点)
+  //     // 如果需要严格控制顺序，可能需要手动 insert 到 instruction list
+  //   }
+  // }
 
   // 2. 为指令结果分配内存
   // 遍历所有基本块和指令，找出所有需要分配 Alloca 的指令结果
@@ -123,11 +123,11 @@ void Reg2MemContext::allocateMemoryForSSAValues(Function *func) {
   }
 
   // 插入所有参数的初始 Store 指令
-  for (auto arg : func->getArguments()) {
-      if (valueToAllocaMap.count(arg)) { // 检查是否为其分配了 alloca
-          builder->createStoreInst(arg, valueToAllocaMap[arg]);
-      }
-  }
+  // for (auto arg : func->getArguments()) {
+  //     if (valueToAllocaMap.count(arg)) { // 检查是否为其分配了 alloca
+  //         builder->createStoreInst(arg, valueToAllocaMap[arg]);
+  //     }
+  // }
   
   builder->setPosition(entryBlock, entryBlock->terminator());
 }
diff --git a/src/sysyc.cpp b/src/sysyc.cpp
index 78930a0..7ef22f9 100644
--- a/src/sysyc.cpp
+++ b/src/sysyc.cpp
@@ -28,7 +28,7 @@ static string argStopAfter;
 static string argInputFile;
 static bool argFormat = false; // 目前未使用，但保留
 static string argOutputFilename;
-static int optLevel = 0; // 优化级别，默认为0 (不加-O参数时)
+int optLevel = 0; // 优化级别，默认为0 (不加-O参数时)
 
 void usage(int code) {
   const char *msg = "Usage: sysyc [options] inputfile\n\n"

From 7af38270982418c4ebd10c79b6d6787c8b855f6d Mon Sep 17 00:00:00 2001
From: CGH0S7 <776459475@qq.com>
Date: Tue, 19 Aug 2025 16:37:52 +0800
Subject: [PATCH 5/9] =?UTF-8?q?[midend-m2r]=E4=BF=AE=E6=94=B9Mem2Reg.cpp?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/midend/Pass/Optimize/Mem2Reg.cpp | 296 +++++++++++++++------------
 1 file changed, 166 insertions(+), 130 deletions(-)

diff --git a/src/midend/Pass/Optimize/Mem2Reg.cpp b/src/midend/Pass/Optimize/Mem2Reg.cpp
index f1c7d59..5573eb7 100644
--- a/src/midend/Pass/Optimize/Mem2Reg.cpp
+++ b/src/midend/Pass/Optimize/Mem2Reg.cpp
@@ -7,6 +7,8 @@
 #include "SysYIROptUtils.h"
 #include <cassert>   // 用于断言
 #include <iostream>  // 用于调试输出
+#include <queue>     // 用于工作队列
+#include <unordered_set> // 用于已访问集合
 
 namespace sysy {
 
@@ -100,29 +102,31 @@ bool Mem2RegContext::isPromotableAlloca(AllocaInst *alloca) {
       // 这种 GEP 有两个操作数：基指针和索引。
 
       // 检查 GEP 的操作数数量和索引值
-      // GEP 的操作数通常是：<base_pointer>, <index_1>, <index_2>, ...
-      // 对于一个 `i32*` 类型的 `alloca`，如果它被 GEP 使用，那么 GEP 的第一个索引通常是 `0`
-      // （表示解引用指针本身），后续索引才是数组元素的索引。
-      // 如果 GEP 的操作数数量大于 2 (即 `base_ptr` 和 `index_0` 之外还有其他索引)，
-      // 或者 `index_0` 不是常量 0，则它可能是一个复杂的数组访问。
-      // 假设 `gep->getNumOperands()` 和 `gep->getOperand(idx)->getValue()`
-      // 假设 `ConstantInt` 类用于表示常量整数值
-      if (gep->getNumOperands() > 2) { // 如果有超过一个索引（除了基指针的第一个隐式索引）
-        // std::cerr << "Mem2Reg: Not promotable (GEP with multiple indices): " << alloca->name() << std::endl;
-        return false; // 复杂 GEP，通常表示数组或结构体字段访问
+      // GEP 的操作数包括：基指针 + 索引列表
+      // getNumOperands() == 1 表示只有基指针，没有索引（不应该出现）
+      // getNumOperands() == 2 表示基指针 + 1个索引
+      // getNumOperands() > 2 表示基指针 + 多个索引（复杂访问）
+      
+      if (gep->getNumOperands() < 2) {
+        // 没有索引的GEP是无效的
+        return false;
       }
-      if (gep->getNumOperands() == 2) {                            // 只有基指针和一个索引
-        Value *firstIndexVal = gep->getOperand(1); // 获取第一个索引值
-        if (auto constInt = dynamic_cast<ConstantInteger *>(firstIndexVal)) {
-          if (constInt->getInt() != 0) {
-            // std::cerr << "Mem2Reg: Not promotable (GEP with non-zero first index): " << alloca->name() << std::endl;
-            return false; // 索引不是0，表示访问数组的非第一个元素
-          }
-        } else {
-          // std::cerr << "Mem2Reg: Not promotable (GEP with non-constant first index): " << alloca->name() <<
-          // std::endl;
-          return false; // 索引不是常量，表示动态数组访问
+      
+      if (gep->getNumOperands() > 2) {
+        // 多个索引，表示复杂的数组或结构体访问
+        return false;
+      }
+      
+      // 只有一个索引的情况，检查索引是否为常量0
+      Value *firstIndexVal = gep->getOperand(1);
+      if (auto constInt = dynamic_cast<ConstantInteger *>(firstIndexVal)) {
+        if (constInt->getInt() != 0) {
+          // 索引不是0，表示访问数组元素
+          return false;
         }
+      } else {
+        // 索引不是常量，表示动态数组访问
+        return false;
       }
 
       // 此外，GEP 的结果也必须只被 LoadInst 或 StoreInst 使用
@@ -151,23 +155,35 @@ bool Mem2RegContext::isPromotableAlloca(AllocaInst *alloca) {
 
 // 收集所有对给定 AllocaInst 进行存储的 StoreInst
 void Mem2RegContext::collectStores(AllocaInst *alloca) {
-  // 遍历 alloca 的所有用途
-  for (auto use : alloca->getUses()) {
-    auto user = use->getUser();
-    if (!user)
-      continue;
-
-    if (auto storeInst = dynamic_cast<StoreInst *>(user)) {
-      allocaToStoresMap[alloca].insert(storeInst);
-      allocaToDefBlocksMap[alloca].insert(storeInst->getParent());
-    } else if (auto gep = dynamic_cast<GetElementPtrInst *>(user)) {
-      // 如果是 GEP，递归收集其下游的 store
-      for (auto gep_use : gep->getUses()) {
-        if (auto gep_store = dynamic_cast<StoreInst *>(gep_use->getUser())) {
-          allocaToStoresMap[alloca].insert(gep_store);
-          allocaToDefBlocksMap[alloca].insert(gep_store->getParent());
-        }
+  // 使用工作队列处理所有可能的用户，包括嵌套的GEP
+  std::queue<Value*> workQueue;
+  std::unordered_set<Value*> visited;
+  
+  // 初始化工作队列
+  workQueue.push(alloca);
+  visited.insert(alloca);
+  
+  while (!workQueue.empty()) {
+    Value* current = workQueue.front();
+    workQueue.pop();
+    
+    // 遍历当前值的所有用户
+    for (auto use : current->getUses()) {
+      auto user = use->getUser();
+      if (!user || visited.count(user))
+        continue;
+      
+      visited.insert(user);
+      
+      if (auto storeInst = dynamic_cast<StoreInst *>(user)) {
+        // 找到一个store指令
+        allocaToStoresMap[alloca].insert(storeInst);
+        allocaToDefBlocksMap[alloca].insert(storeInst->getParent());
+      } else if (auto gep = dynamic_cast<GetElementPtrInst *>(user)) {
+        // 找到一个GEP指令，将其加入工作队列继续处理
+        workQueue.push(gep);
       }
+      // 其他类型的用户不处理
     }
   }
 }
@@ -211,56 +227,70 @@ void Mem2RegContext::insertPhis(AllocaInst *alloca, const std::unordered_set<Bas
 }
 
 // 对支配树进行深度优先遍历，重命名变量并替换 load/store 指令
-// 移除了 AllocaInst *currentAlloca 参数，因为这个函数是为整个基本块处理所有可提升的 Alloca
 void Mem2RegContext::renameVariables(BasicBlock *currentBB) {
-  // 1. 在函数开始时，记录每个 promotableAlloca 的当前栈深度。
-  // 这将用于在函数返回时精确地回溯栈状态。
+  // 记录每个 alloca 在此基本块开始时的栈深度，用于退出时精确回溯
   std::map<AllocaInst *, size_t> originalStackSizes;
   for (auto alloca : promotableAllocas) {
     originalStackSizes[alloca] = allocaToValueStackMap[alloca].size();
   }
 
   // --------------------------------------------------------------------
-  // 处理当前基本块的指令
+  // 第一步：处理当前基本块开头的 PHI 指令，为它们分配新的 SSA 值
+  // --------------------------------------------------------------------
+  for (auto alloca : promotableAllocas) {
+    if (allocaToPhiMap[alloca].count(currentBB)) {
+      PhiInst *phiInst = allocaToPhiMap[alloca][currentBB];
+      // 将 PHI 指令本身作为新的 SSA 值压入栈顶
+      allocaToValueStackMap[alloca].push(phiInst);
+      if (DEBUG) {
+        std::cout << "Mem2Reg: Pushed PHI " << (phiInst->getName().empty() ? "anonymous" : phiInst->getName()) 
+                  << " for alloca " << alloca->getName() << ". Stack size: " << allocaToValueStackMap[alloca].size() << std::endl;
+      }
+    }
+  }
+
+  // --------------------------------------------------------------------
+  // 第二步：处理当前基本块中的非PHI指令，替换 load/store 指令
   // --------------------------------------------------------------------
   for (auto instIter = currentBB->getInstructions().begin(); instIter != currentBB->getInstructions().end();) {
     Instruction *inst = instIter->get();
-      bool instDeleted = false;
+    bool instDeleted = false;
 
-    // 处理 Phi 指令 (如果是当前 alloca 的 Phi)
-    if (auto phiInst = dynamic_cast<PhiInst *>(inst)) {
-      // 检查这个 Phi 是否是为某个可提升的 alloca 插入的
-      for (auto alloca : promotableAllocas) {
-        if (allocaToPhiMap[alloca].count(currentBB) && allocaToPhiMap[alloca][currentBB] == phiInst) {
-          // 为 Phi 指令的输出创建一个新的 SSA 值，并压入值栈
-          allocaToValueStackMap[alloca].push(phiInst);
-          if (DEBUG) {
-            std::cout << "Mem2Reg: Pushed Phi " << (phiInst->getName().empty() ? "anonymous" : phiInst->getName()) << " for alloca " << alloca->getName()
-              << ". Stack size: " << allocaToValueStackMap[alloca].size() << std::endl;
-          }
-          break; // 找到对应的 alloca，处理下一个指令
-        }
-      }
+    // 跳过PHI指令，它们已在第一步处理
+    if (dynamic_cast<PhiInst *>(inst)) {
+      ++instIter;
+      continue;
     }
+
     // 处理 LoadInst
-    else if (auto loadInst = dynamic_cast<LoadInst *>(inst)) {
+    if (auto loadInst = dynamic_cast<LoadInst *>(inst)) {
       for (auto alloca : promotableAllocas) {
-        // 检查 LoadInst 的指针是否直接是 alloca，或者是指向 alloca 的 GEP
         Value *ptrOperand = loadInst->getPointer();
-        if (ptrOperand == alloca || (dynamic_cast<GetElementPtrInst *>(ptrOperand) &&
-                                     dynamic_cast<GetElementPtrInst *>(ptrOperand)->getBasePointer() == alloca)) {
-          assert(!allocaToValueStackMap[alloca].empty() && "Value stack empty for alloca during load replacement!");
-          if (DEBUG) {
-            std::cout << "Mem2Reg: Replacing load "
-                      << (ptrOperand->getName().empty() ? "anonymous" : ptrOperand->getName()) << " with SSA value "
-                      << (allocaToValueStackMap[alloca].top()->getName().empty()
-                              ? "anonymous"
-                              : allocaToValueStackMap[alloca].top()->getName())
-                      << " for alloca " << alloca->getName() << std::endl;
-            std::cout << "Mem2Reg: allocaToValueStackMap[" << alloca->getName()
-                      << "] size: " << allocaToValueStackMap[alloca].size() << std::endl;
+        
+        // 优化：只做一次dynamic_cast
+        auto gepPtr = dynamic_cast<GetElementPtrInst *>(ptrOperand);
+        if (ptrOperand == alloca || 
+            (gepPtr && gepPtr->getOperand(0) == alloca)) {  // GEP的第一个操作数是基指针
+          
+          if (allocaToValueStackMap[alloca].empty()) {
+            // 栈为空时使用未定义值而非崩溃
+            if (DEBUG) {
+              std::cerr << "Warning: Value stack empty for alloca " << alloca->getName() 
+                        << " during load replacement. Using undefined value." << std::endl;
+            }
+            Value *undefValue = UndefinedValue::get(alloca->getType()->as<PointerType>()->getBaseType());
+            loadInst->replaceAllUsesWith(undefValue);
+          } else {
+            Value *currentValue = allocaToValueStackMap[alloca].top();
+            
+            if (DEBUG) {
+              std::cout << "Mem2Reg: Replacing load with SSA value " 
+                        << (currentValue->getName().empty() ? "anonymous" : currentValue->getName())
+                        << " for alloca " << alloca->getName() << std::endl;
+            }
+            
+            loadInst->replaceAllUsesWith(currentValue);
           }
-          loadInst->replaceAllUsesWith(allocaToValueStackMap[alloca].top());
           instIter = SysYIROptUtils::usedelete(instIter);
           instDeleted = true;
           break;
@@ -270,98 +300,104 @@ void Mem2RegContext::renameVariables(BasicBlock *currentBB) {
     // 处理 StoreInst
     else if (auto storeInst = dynamic_cast<StoreInst *>(inst)) {
       for (auto alloca : promotableAllocas) {
-        // 检查 StoreInst 的指针是否直接是 alloca，或者是指向 alloca 的 GEP
         Value *ptrOperand = storeInst->getPointer();
-        if (ptrOperand == alloca || (dynamic_cast<GetElementPtrInst *>(ptrOperand) &&
-                                     dynamic_cast<GetElementPtrInst *>(ptrOperand)->getBasePointer() == alloca)) {
+        
+        // 优化：只做一次dynamic_cast
+        auto gepPtr = dynamic_cast<GetElementPtrInst *>(ptrOperand);
+        if (ptrOperand == alloca || 
+            (gepPtr && gepPtr->getOperand(0) == alloca)) {  // GEP的第一个操作数是基指针
+          
+          Value *storedValue = storeInst->getValue();
+          allocaToValueStackMap[alloca].push(storedValue);
+          
           if (DEBUG) {
-            std::cout << "Mem2Reg: Replacing store to "
-                      << (ptrOperand->getName().empty() ? "anonymous" : ptrOperand->getName()) << " with SSA value "
-                      << (storeInst->getValue()->getName().empty() ? "anonymous" : storeInst->getValue()->getName())
-                      << " for alloca " << alloca->getName() << std::endl;
-            std::cout << "Mem2Reg: allocaToValueStackMap[" << alloca->getName()
-                      << "] size before push: " << allocaToValueStackMap[alloca].size() << std::endl;
+            std::cout << "Mem2Reg: Replacing store with SSA value " 
+                      << (storedValue->getName().empty() ? "anonymous" : storedValue->getName())
+                      << " for alloca " << alloca->getName() 
+                      << ". Stack size: " << allocaToValueStackMap[alloca].size() << std::endl;
           }
-          allocaToValueStackMap[alloca].push(storeInst->getValue());
+          
           instIter = SysYIROptUtils::usedelete(instIter);
           instDeleted = true;
-          if (DEBUG) {
-            std::cout << "Mem2Reg: allocaToValueStackMap[" << alloca->getName()
-                      << "] size after push: " << allocaToValueStackMap[alloca].size() << std::endl;
-          }
           break;
         }
       }
     }
+
     if (!instDeleted) {
-      ++instIter; // 如果指令没有被删除，移动到下一个
+      ++instIter;
     }
   }
+
   // --------------------------------------------------------------------
-  // 处理后继基本块的 Phi 指令参数
+  // 第三步：为后继基本块中的 PHI 指令填充参数
   // --------------------------------------------------------------------
   for (auto successorBB : currentBB->getSuccessors()) {
-    if (!successorBB)
-      continue;
+    if (!successorBB) continue;
+    
     for (auto alloca : promotableAllocas) {
-      // 如果后继基本块包含为当前 alloca 插入的 Phi 指令
       if (allocaToPhiMap[alloca].count(successorBB)) {
-        auto phiInst = allocaToPhiMap[alloca][successorBB];
-        // 为 Phi 指令添加来自当前基本块的参数
-        // 参数值是当前 alloca 值栈顶部的 SSA 值
-        assert(!allocaToValueStackMap[alloca].empty() && "Value stack empty for alloca when setting phi operand!");
-        phiInst->addIncoming(allocaToValueStackMap[alloca].top(), currentBB);
-        if (DEBUG) {
-          std::cout << "Mem2Reg: Added incoming arg to Phi "
-                    << (phiInst->getName().empty() ? "anonymous" : phiInst->getName()) << " from "
-                    << currentBB->getName() << " with value "
-                    << (allocaToValueStackMap[alloca].top()->getName().empty()
-                            ? "anonymous"
-                            : allocaToValueStackMap[alloca].top()->getName())
-                    << std::endl;
+        PhiInst *phiInst = allocaToPhiMap[alloca][successorBB];
+        if (!allocaToValueStackMap[alloca].empty()) {
+          Value *currentValue = allocaToValueStackMap[alloca].top();
+          phiInst->addIncoming(currentValue, currentBB);
+          
+          if (DEBUG) {
+            std::cout << "Mem2Reg: Added incoming arg to PHI " 
+                      << (phiInst->getName().empty() ? "anonymous" : phiInst->getName())
+                      << " from " << currentBB->getName() 
+                      << " with value " << (currentValue->getName().empty() ? "anonymous" : currentValue->getName())
+                      << std::endl;
+          }
+        } else {
+          // 栈为空时使用未定义值
+          if (DEBUG) {
+            std::cerr << "Warning: Value stack empty for alloca " << alloca->getName() 
+                      << " when setting phi operand. Using undefined value." << std::endl;
+          }
+          Value *undefValue = UndefinedValue::get(alloca->getType()->as<PointerType>()->getBaseType());
+          phiInst->addIncoming(undefValue, currentBB);
         }
       }
     }
   }
-  // --------------------------------------------------------------------
-  // 递归访问支配树的子节点
-  // --------------------------------------------------------------------
-  const std::set<BasicBlock *> *dominatedBlocks = dt->getDominatorTreeChildren(currentBB);
-  if (dominatedBlocks) { // 检查是否存在子节点
-    if(DEBUG){
-      std::cout << "Mem2Reg: Processing dominated blocks for " << currentBB->getName() << std::endl;
-      for (auto dominatedBB : *dominatedBlocks) {
-        std::cout << "Mem2Reg: Dominated block: " << (dominatedBB ? dominatedBB->getName() : "null") << std::endl;
-      }
-    }
-    for (auto dominatedBB : *dominatedBlocks) {
-      if (dominatedBB) { // 确保子块有效
-        if (DEBUG) {
-          std::cout << "Mem2Reg: Recursively renaming variables in dominated block: " << dominatedBB->getName()
-                    << std::endl;
-        }
-        renameVariables(dominatedBB); // 递归调用，不再传递 currentAlloca
-      }
-    }
-  }
 
   // --------------------------------------------------------------------
-  // 退出基本块时，弹出在此块中压入值栈的 SSA 值，恢复栈到进入该块时的状态
+  // 第四步：递归处理支配树的子节点
+  // --------------------------------------------------------------------
+  const std::set<BasicBlock *> *dominatedBlocks = dt->getDominatorTreeChildren(currentBB);
+  if (dominatedBlocks) {
+    if (DEBUG) {
+      std::cout << "Mem2Reg: Processing " << dominatedBlocks->size() 
+                << " dominated blocks for " << currentBB->getName() << std::endl;
+    }
+    
+    for (auto dominatedBB : *dominatedBlocks) {
+      if (dominatedBB) {
+        if (DEBUG) {
+          std::cout << "Mem2Reg: Recursively processing dominated block: " 
+                    << dominatedBB->getName() << std::endl;
+        }
+        renameVariables(dominatedBB);
+      }
+    }
+  }
+
+  // --------------------------------------------------------------------
+  // 第五步：退出时恢复值栈状态
   // --------------------------------------------------------------------
   for (auto alloca : promotableAllocas) {
     while (allocaToValueStackMap[alloca].size() > originalStackSizes[alloca]) {
       if (DEBUG) {
         std::cout << "Mem2Reg: Popping value "
-                  << (allocaToValueStackMap[alloca].top()->getName().empty()
-                          ? "anonymous"
-                          : allocaToValueStackMap[alloca].top()->getName())
-                  << " for alloca " << alloca->getName() << ". Stack size: " << allocaToValueStackMap[alloca].size()
+                  << (allocaToValueStackMap[alloca].top()->getName().empty() ? "anonymous" : allocaToValueStackMap[alloca].top()->getName())
+                  << " for alloca " << alloca->getName() 
+                  << ". Stack size: " << allocaToValueStackMap[alloca].size() 
                   << " -> " << (allocaToValueStackMap[alloca].size() - 1) << std::endl;
       }
       allocaToValueStackMap[alloca].pop();
     }
   }
-
 }
 
 // 删除所有原始的 AllocaInst、LoadInst 和 StoreInst

From 3c49183280a545e3d6c182d4dd62d0afbc004df7 Mon Sep 17 00:00:00 2001
From: CGH0S7 <776459475@qq.com>
Date: Tue, 19 Aug 2025 17:32:01 +0800
Subject: [PATCH 6/9] =?UTF-8?q?[midend-m2r]=E7=A7=BB=E9=99=A4=E9=94=99?=
 =?UTF-8?q?=E8=AF=AF=E7=9A=84LAG=E4=BC=98=E5=8C=96=EF=BC=8Cperformance?=
 =?UTF-8?q?=E9=80=9A=E8=BF=87?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 doc/CompilerDesign.md                         |  58 ++++---
 .../midend/Pass/Optimize/LargeArrayToGlobal.h |  24 ---
 src/midend/CMakeLists.txt                     |   1 -
 .../Pass/Optimize/LargeArrayToGlobal.cpp      | 145 ------------------
 src/midend/Pass/Pass.cpp                      |  10 +-
 5 files changed, 29 insertions(+), 209 deletions(-)
 delete mode 100644 src/include/midend/Pass/Optimize/LargeArrayToGlobal.h
 delete mode 100644 src/midend/Pass/Optimize/LargeArrayToGlobal.cpp

diff --git a/doc/CompilerDesign.md b/doc/CompilerDesign.md
index f939a09..4b37c9d 100644
--- a/doc/CompilerDesign.md
+++ b/doc/CompilerDesign.md
@@ -74,31 +74,30 @@ graph TD
   - **消除 `fallthrough` 现象**：  
   通过确保所有基本块均以终结指令结尾，消除基本块间的 `fallthrough`，简化了控制流图（CFG）的构建和分析。这一做法提升了编译器整体质量，使中端各类 Pass 的编写和维护更加规范和高效。
 
-
 ### 3.2. 核心优化详解
 
 编译器的分析和优化被组织成一系列独立的“遍”（Pass）。每个 Pass 都是一个独立的算法模块，对 IR 进行特定的分析或变换。这种设计具有高度的模块化和可扩展性。
 
 #### 3.2.1. SSA 构建与解构
 
-- **Mem2Reg (`Mem2Reg.cpp`)**: 
+- **Mem2Reg (`Mem2Reg.cpp`)**:
   - **目标**: 将对栈内存 (`alloca`) 的 `load`/`store` 操作，提升为对虚拟寄存器的直接操作，并构建 SSA 形式。
   - **技术**: 该过程是实现 SSA 的关键。它依赖于**支配树 (Dominator Tree)** 分析，通过寻找变量定义块的**支配边界 (Dominance Frontier)** 来确定在何处插入 **Φ (Phi) 函数**。
   - **实现**: `Mem2RegContext::run` 驱动此过程。首先调用 `isPromotableAlloca` 识别所有仅被 `load`/`store` 使用的标量 `alloca`。然后，`insertPhis` 根据支配边界信息在必要的控制流汇合点插入 `phi` 指令。最后，`renameVariables` 递归地遍历支配树，用一个模拟的值栈来将 `load` 替换为栈顶的 SSA 值，将 `store` 视为对栈的一次 `push` 操作，从而完成重命名。值得一提的是，由于我们在IR生成阶段就将所有alloca指令统一放置在入口块，极大地简化了Mem2Reg遍的实现和支配树分析的计算。
 
-- **Reg2Mem (`Reg2Mem.cpp`)**: 
+- **Reg2Mem (`Reg2Mem.cpp`)**:
   - **目标**: 执行 `Mem2Reg` 的逆操作，将程序从 SSA 形式转换回基于内存的表示。这通常是为不支持 SSA 的后端做准备的**SSA解构 (SSA Destruction)** 步骤。
   - **技术**: 为每个 SSA 值（指令结果、函数参数）在函数入口创建一个 `alloca` 栈槽。然后，在每个 SSA 值的定义点之后插入一个 `store` 将其存入对应的栈槽；在每个使用点之前插入一个 `load` 从栈槽中取出值。
   - **实现**: `Reg2MemContext::run` 驱动此过程。`allocateMemoryForSSAValues` 为所有需要转换的 SSA 值创建 `alloca` 指令。`rewritePhis` 特殊处理 `phi` 指令，在每个前驱块的末尾插入 `store`。`insertLoadsAndStores` 则处理所有非 `phi` 指令的定义和使用，插入相应的 `store` 和 `load`。虽然
 
 #### 3.2.2. 常量与死代码优化
 
-- **SCCP (`SCCP.cpp`)**: 
+- **SCCP (`SCCP.cpp`)**:
   - **目标**: 稀疏条件常量传播。在编译期计算常量表达式，并利用分支条件为常数的信息来消除死代码，比简单的常量传播更强大。
   - **技术**: 这是一种基于数据流分析的格理论（Lattice Theory）的优化。它为每个变量维护一个值状态，可能为 `Top` (未定义), `Constant` (某个常量值), 或 `Bottom` (非常量)。同时，它跟踪基本块的可达性，如果一个分支的条件被推断为常量，则其不可达的后继分支在分析中会被直接忽略。
   - **实现**: `SCCPContext::run` 驱动整个分析过程。它维护一个指令工作列表和一个边工作列表。`ProcessInstruction` 和 `ProcessEdge` 函数交替执行，不断地从 IR 中传播常量和可达性信息，直到达到不动点为止。最后，`PropagateConstants` 和 `SimplifyControlFlow` 将推断出的常量替换到代码中，并移除死块。
 
-- **DCE (`DCE.cpp`)**: 
+- **DCE (`DCE.cpp`)**:
   - **目标**: 简单死代码消除。移除那些计算结果对程序输出没有贡献的指令。
   - **技术**: 采用**标记-清除 (Mark and Sweep)** 算法。从具有副作用的指令（如 `store`, `call`, `return`）开始，反向追溯其操作数，标记所有相关的指令为“活跃”。
   - **实现**: `DCEContext::run` 实现了此算法。第一次遍历时，通过 `isAlive` 函数识别出具有副作用的“根”指令，然后调用 `addAlive` 递归地将所有依赖的指令加入 `alive_insts` 集合。第二次遍历时，所有未被标记为活跃的指令都将被删除。
@@ -116,24 +115,18 @@ graph TD
 
 #### 3.2.4. 其他优化
 
-- **LargeArrayToGlobal (`LargeArrayToGlobal.cpp`)**: 
-  - **目标**: 防止因大型局部数组导致的栈溢出，并可能改善数据局部性。
-  - **技术**: 遍历函数中的 `alloca` 指令，如果通过 `calculateTypeSize` 计算出其分配的内存大小超过一个阈值（如 1024 字节），则将其转换为一个全局变量。
-  - **实现**: `convertAllocaToGlobal` 函数负责创建一个新的 `GlobalValue`，并调用 `replaceAllUsesWith` 将原 `alloca` 的所有使用者重定向到新的全局变量，最后删除原 `alloca` 指令。
-
-  #### 3.3. 核心分析遍
+#### 3.3. 核心分析遍
 
   为了为优化遍收集信息，最大程度发掘程序优化潜力，我们目前设计并实现了以下关键的分析遍：
 
-  - **支配树分析 (Dominator Tree Analysis)**:
-    - **技术**: 通过计算每个基本块的支配节点，构建出一棵支配树结构。我们在计算支配节点时采用了**逆后序遍历（RPO, Reverse Post Order）**，以保证数据流分析的收敛速度和正确性。在计算直接支配者（Idom, Immediate Dominator）时，采用了经典的**Lengauer-Tarjan（LT）算法**，该算法以高效的并查集和路径压缩技术著称，能够在线性时间内准确计算出每个基本块的直接支配者关系。
-    - **实现**: `Dom.cpp` 实现了支配树分析。该分析为每个基本块分配其直接支配者，并递归构建整棵支配树。支配树是许多高级优化（尤其是 SSA 形式下的优化）的基础。例如，Mem2Reg 需要依赖支配树来正确插入 Phi 指令，并在变量重命名阶段高效遍历控制流图。此外，循环相关优化（如循环不变量外提）也依赖于支配树信息来识别循环头和循环体的关系。
-
-  - **活跃性分析 (Liveness Analysis)**:
-    - **技术**: 活跃性分析用于确定在程序的某一特定点上，哪些变量的值在未来会被用到。我们采用**经典的不动点迭代算法**，在数据流分析框架下，逆序遍历基本块，迭代计算每个基本块的 `live-in` 和 `live-out` 集合，直到收敛为止。这种方法简单且易于实现，能够满足大多数编译优化的需求。
-    - **未来规划**: 若后续对分析效率有更高要求，可考虑引入如**工作列表算法**或者**转化为基于SSA的图可达性分析**等更高效的算法，以进一步提升大型函数或复杂控制流下的分析性能。
-    - **实现**: `Liveness.cpp` 提供了活跃性分析。该分析采用经典的数据流分析框架，迭代计算每个基本块的 `live-in` 和 `live-out` 集合。活跃性信息是死代码消除（DCE）、寄存器分配等优化的必要前置步骤。通过准确的活跃性分析，可以识别出无用的变量和指令，从而为后续优化遍提供坚实的数据基础。
+- **支配树分析 (Dominator Tree Analysis)**:
+  - **技术**: 通过计算每个基本块的支配节点，构建出一棵支配树结构。我们在计算支配节点时采用了**逆后序遍历（RPO, Reverse Post Order）**，以保证数据流分析的收敛速度和正确性。在计算直接支配者（Idom, Immediate Dominator）时，采用了经典的**Lengauer-Tarjan（LT）算法**，该算法以高效的并查集和路径压缩技术著称，能够在线性时间内准确计算出每个基本块的直接支配者关系。
+  - **实现**: `Dom.cpp` 实现了支配树分析。该分析为每个基本块分配其直接支配者，并递归构建整棵支配树。支配树是许多高级优化（尤其是 SSA 形式下的优化）的基础。例如，Mem2Reg 需要依赖支配树来正确插入 Phi 指令，并在变量重命名阶段高效遍历控制流图。此外，循环相关优化（如循环不变量外提）也依赖于支配树信息来识别循环头和循环体的关系。
 
+- **活跃性分析 (Liveness Analysis)**:
+  - **技术**: 活跃性分析用于确定在程序的某一特定点上，哪些变量的值在未来会被用到。我们采用**经典的不动点迭代算法**，在数据流分析框架下，逆序遍历基本块，迭代计算每个基本块的 `live-in` 和 `live-out` 集合，直到收敛为止。这种方法简单且易于实现，能够满足大多数编译优化的需求。
+  - **未来规划**: 若后续对分析效率有更高要求，可考虑引入如**工作列表算法**或者**转化为基于SSA的图可达性分析**等更高效的算法，以进一步提升大型函数或复杂控制流下的分析性能。
+  - **实现**: `Liveness.cpp` 提供了活跃性分析。该分析采用经典的数据流分析框架，迭代计算每个基本块的 `live-in` 和 `live-out` 集合。活跃性信息是死代码消除（DCE）、寄存器分配等优化的必要前置步骤。通过准确的活跃性分析，可以识别出无用的变量和指令，从而为后续优化遍提供坚实的数据基础。
 
 ### 3.4. 未来的规划
 
@@ -145,6 +138,7 @@ graph TD
   函数内联能够将简单函数（可能需要收集更多信息）内联到call指令相应位置，减少栈空间相关变动，并且为其他遍发掘优化空间。
 - **`LLVM IR`格式化**:
   我们将为所有的IR设计并实现通用的打印器方法，使得IR能够显式化为可编译运行的LLVM IR，通过编排脚本和调用llvm相关工具链，我们能够绕过后端编译运行中间代码，为验证中端正确性提供系统化的方法，同时减轻后端开发bug溯源的压力。
+
 ---
 
 ## 4. 后端技术与优化 (Backend)
@@ -215,16 +209,16 @@ graph TD
     end
 ```
 
-  1.  **`analyzeLiveness()`**: 对机器指令进行数据流分析，计算出每个虚拟寄存器的活跃范围。
-  2.  **`build()`**: 根据活跃性信息构建**冲突图 (Interference Graph)**。如果两个虚拟寄存器同时活跃，则它们冲突，在图中连接一条边。
-  3.  **`makeWorklist()`**: 将图节点（虚拟寄存器）根据其度数放入不同的工作列表，为着色做准备。
-  4.  **核心着色阶段 (The Loop)**:
+  1. **`analyzeLiveness()`**: 对机器指令进行数据流分析，计算出每个虚拟寄存器的活跃范围。
+  2. **`build()`**: 根据活跃性信息构建**冲突图 (Interference Graph)**。如果两个虚拟寄存器同时活跃，则它们冲突，在图中连接一条边。
+  3. **`makeWorklist()`**: 将图节点（虚拟寄存器）根据其度数放入不同的工作列表，为着色做准备。
+  4. **核心着色阶段 (The Loop)**:
       - **`simplify()`**: 贪心地移除图中度数小于物理寄存器数量的节点，并将其压入栈中。这些节点保证可以被成功着色。
       - **`coalesce()`**: 尝试将传送指令 (`MV`) 的源和目标节点合并，以消除这条指令。合并的条件基于 **Briggs** 或 **George** 启发式，以避免使图变得不可着色。
       - **`freeze()`**: 当一个与传送指令相关的节点无法合并也无法简化时，放弃对该传送指令的合并希望，将其“冻结”为一个普通节点。
       - **`selectSpill()`**: 当所有节点都无法进行上述操作时（即图中只剩下高度数的节点），必须选择一个节点进行**溢出 (Spill)**，即决定将其存放在内存中。
-  5.  **`assignColors()`**: 在所有节点都被处理后，从栈中依次弹出节点，并根据其已着色邻居的颜色，为它选择一个可用的物理寄存器。
-  6.  **`rewriteProgram()`**: 如果 `assignColors()` 阶段发现有节点被标记为溢出，此函数会被调用。它会修改机器指令，为溢出的虚拟寄存器插入从内存加载（`lw`/`ld`）和存入内存（`sw`/`sd`）的代码。然后，整个分配过程从步骤 1 重新开始。
+  5. **`assignColors()`**: 在所有节点都被处理后，从栈中依次弹出节点，并根据其已着色邻居的颜色，为它选择一个可用的物理寄存器。
+  6. **`rewriteProgram()`**: 如果 `assignColors()` 阶段发现有节点被标记为溢出，此函数会被调用。它会修改机器指令，为溢出的虚拟寄存器插入从内存加载（`lw`/`ld`）和存入内存（`sw`/`sd`）的代码。然后，整个分配过程从步骤 1 重新开始。
 
 ### 4.4. 后端特定优化
 
@@ -232,11 +226,11 @@ graph TD
 
 #### 4.4.1. 指令调度 (Instruction Scheduling)
 
-- **寄存器分配前调度 (`PreRA_Scheduler.cpp`)**: 
+- **寄存器分配前调度 (`PreRA_Scheduler.cpp`)**:
   - **目标**: 在寄存器分配前，通过重排指令来提升性能。主要目标是**隐藏加载延迟 (Load Latency)**，即尽早发出 `load` 指令，使其结果能在需要时及时准备好，避免流水线停顿。同时，由于此时使用的是无限的虚拟寄存器，调度器有较大的自由度，但也可能因为过度重排而延长虚拟寄存器的生命周期，从而增加寄存器压力。
   - **实现**: `scheduleBlock()` 函数会识别出基本块内的调度边界（如 `call` 或终结指令），然后在每个独立的区域内调用 `scheduleRegion()`。当前的实现是一种简化的列表调度，它会优先尝试将加载指令 (`LW`, `LD` 等) 在不违反数据依赖的前提下，尽可能地向前移动。
 
-- **寄存器分配后调度 (`PostRA_Scheduler.cpp`)**: 
+- **寄存器分配后调度 (`PostRA_Scheduler.cpp`)**:
   - **目标**: 在寄存器分配完成之后，对指令序列进行最后一轮微调。此阶段调度的主要目标与分配前不同，它旨在解决由寄存器分配过程本身引入的性能问题，例如：
     - **缓解溢出代价**: 将因溢出（Spill）而产生的 `load` 指令（从栈加载）尽可能地提前，远离其使用点；将 `store` 指令（存入栈）尽可能地推后，远离其定义点。
     - **消除伪依赖**: 寄存器分配器可能会为两个原本不相关的虚拟寄存器分配同一个物理寄存器，从而引入了虚假的写后读（WAR）或写后写（WAW）依赖。Post-RA 调度可以尝试解开这些伪依赖，为指令重排提供更多自由度。
@@ -244,7 +238,7 @@ graph TD
 
 #### 4.4.2. 强度削减 (Strength Reduction)
 
-- **除法强度削减 (`DivStrengthReduction.cpp`)**: 
+- **除法强度削减 (`DivStrengthReduction.cpp`)**:
   - **目标**: 将机器指令中昂贵的 `DIV` 或 `DIVW` 指令（当除数为编译期常量时）替换为一系列更快、计算成本更低的指令组合。
   - **技术**: 基于数论中的**乘法逆元 (Multiplicative Inverse)** 思想。对于一个整数除法 `x / d`，可以找到一个“魔数” `m` 和一个移位数 `s`，使得该除法可以被近似替换为 `(x * m) >> s`。这个过程需要处理复杂的符号、取整和溢出问题。
   - **实现**: `runOnMachineFunction()` 实现了此优化。它会遍历机器指令，寻找以常量为除数的 `DIV`/`DIVW` 指令。`computeMagic()` 函数负责计算出对应的魔数和移位数。然后，根据除数是 2 的幂、1、-1 还是其他普通数字，生成不同的指令序列，包括 `MULH` (取高位乘积), `SRAI` (算术右移), `ADD`, `SUB` 等，来精确地模拟定点数除法的效果。
@@ -263,10 +257,10 @@ graph TD
 
 根据项目中的 `TODO` 列表和源代码分析，当前实现存在一些可改进之处：
 
-- **寄存器分配**: 
+- **寄存器分配**:
   - **`CALL` 指令处理**: 当前对 `CALL` 指令的 `use`/`def` 分析不完整，没有将所有调用者保存的寄存器标记为 `def`，这可能导致跨函数调用的值被错误破坏。
   - **溢出处理**: 当前所有溢出的虚拟寄存器都被简单地映射到同一个物理寄存器 `t6` 上，这会引入大量不必要的 `load`/`store`，并可能导致 `t6` 成为性能瓶颈。
-- **IR 设计**: 
+- **IR 设计**:
   - 随着 SSA 的引入，IR 中某些冗余信息（如基本块的 `args` 参数）可以被移除，以简化设计。
-- **优化**: 
-  - 当前的优化主要集中在标量上。可以引入更多面向循环的优化（如循环不变代码外提 LICM、归纳变量分析 IndVar）和过程间优化来进一步提升性能。
\ No newline at end of file
+- **优化**:
+  - 当前的优化主要集中在标量上。可以引入更多面向循环的优化（如循环不变代码外提 LICM、归纳变量分析 IndVar）和过程间优化来进一步提升性能。
diff --git a/src/include/midend/Pass/Optimize/LargeArrayToGlobal.h b/src/include/midend/Pass/Optimize/LargeArrayToGlobal.h
deleted file mode 100644
index 39c5a52..0000000
--- a/src/include/midend/Pass/Optimize/LargeArrayToGlobal.h
+++ /dev/null
@@ -1,24 +0,0 @@
-#pragma once
-
-#include "../Pass.h"
-
-namespace sysy {
-
-class LargeArrayToGlobalPass : public OptimizationPass {
-public:
-    static void *ID;
-
-    LargeArrayToGlobalPass() : OptimizationPass("LargeArrayToGlobal", Granularity::Module) {}
-
-    bool runOnModule(Module *M, AnalysisManager &AM) override;
-    void *getPassID() const override {
-        return &ID;
-    }
-
-private:
-    unsigned calculateTypeSize(Type *type);
-    void convertAllocaToGlobal(AllocaInst *alloca, Function *F, Module *M);
-    std::string generateUniqueGlobalName(AllocaInst *alloca, Function *F);
-};
-
-} // namespace sysy
\ No newline at end of file
diff --git a/src/midend/CMakeLists.txt b/src/midend/CMakeLists.txt
index 73bafde..b07cf2b 100644
--- a/src/midend/CMakeLists.txt
+++ b/src/midend/CMakeLists.txt
@@ -24,7 +24,6 @@ add_library(midend_lib STATIC
     Pass/Optimize/InductionVariableElimination.cpp
     Pass/Optimize/GlobalStrengthReduction.cpp
     Pass/Optimize/BuildCFG.cpp
-    Pass/Optimize/LargeArrayToGlobal.cpp
     Pass/Optimize/TailCallOpt.cpp
 )
 
diff --git a/src/midend/Pass/Optimize/LargeArrayToGlobal.cpp b/src/midend/Pass/Optimize/LargeArrayToGlobal.cpp
deleted file mode 100644
index 12f380e..0000000
--- a/src/midend/Pass/Optimize/LargeArrayToGlobal.cpp
+++ /dev/null
@@ -1,145 +0,0 @@
-#include "../../include/midend/Pass/Optimize/LargeArrayToGlobal.h"
-#include "../../IR.h"
-#include <unordered_map>
-#include <sstream>
-#include <string>
-
-namespace sysy {
-
-// Helper function to convert type to string
-static std::string typeToString(Type *type) {
-    if (!type) return "null";
-    
-    switch (type->getKind()) {
-        case Type::kInt:
-            return "int";
-        case Type::kFloat:
-            return "float";
-        case Type::kPointer:
-            return "ptr";
-        case Type::kArray: {
-            auto *arrayType = type->as<ArrayType>();
-            return "[" + std::to_string(arrayType->getNumElements()) + " x " + 
-                   typeToString(arrayType->getElementType()) + "]";
-        }
-        default:
-            return "unknown";
-    }
-}
-
-void *LargeArrayToGlobalPass::ID = &LargeArrayToGlobalPass::ID;
-
-bool LargeArrayToGlobalPass::runOnModule(Module *M, AnalysisManager &AM) {
-        bool changed = false;
-        
-        if (!M) {
-            return false;
-        }
-
-        // Collect all alloca instructions from all functions
-        std::vector<std::pair<AllocaInst*, Function*>> allocasToConvert;
-        
-        for (auto &funcPair : M->getFunctions()) {
-            Function *F = funcPair.second.get();
-            if (!F || F->getBasicBlocks().begin() == F->getBasicBlocks().end()) {
-                continue;
-            }
-            
-            for (auto &BB : F->getBasicBlocks()) {
-                for (auto &inst : BB->getInstructions()) {
-                    if (auto *alloca = dynamic_cast<AllocaInst*>(inst.get())) {
-                        Type *allocatedType = alloca->getAllocatedType();
-                        
-                        // Calculate the size of the allocated type
-                        unsigned size = calculateTypeSize(allocatedType);
-                        if(DEBUG){
-                            // Debug: print size information
-                             std::cout << "LargeArrayToGlobalPass: Found alloca with size " << size 
-                                  << " for type " << typeToString(allocatedType) << std::endl;
-                        }
-                        
-                        // Convert arrays of 1KB (1024 bytes) or larger to global variables
-                        if (size >= 1024) {
-                            if(DEBUG)
-                                std::cout << "LargeArrayToGlobalPass: Converting array of size " << size << " to global" << std::endl;
-                            allocasToConvert.emplace_back(alloca, F);
-                        }
-                    }
-                }
-            }
-        }
-
-        // Convert the collected alloca instructions to global variables
-        for (auto [alloca, F] : allocasToConvert) {
-            convertAllocaToGlobal(alloca, F, M);
-            changed = true;
-        }
-
-return changed;
-    }
-
-unsigned LargeArrayToGlobalPass::calculateTypeSize(Type *type) {
-    if (!type) return 0;
-
-    switch (type->getKind()) {
-        case Type::kInt:
-        case Type::kFloat:
-            return 4;
-        case Type::kPointer:
-            return 8;
-        case Type::kArray: {
-            auto *arrayType = type->as<ArrayType>();
-            return arrayType->getNumElements() * calculateTypeSize(arrayType->getElementType());
-        }
-        default:
-            return 0;
-    }
-}
-
-void LargeArrayToGlobalPass::convertAllocaToGlobal(AllocaInst *alloca, Function *F, Module *M) {
-    Type *allocatedType = alloca->getAllocatedType();
-    
-    // Create a unique name for the global variable
-    std::string globalName = generateUniqueGlobalName(alloca, F);
-    
-    // Create the global variable - GlobalValue expects pointer type
-    Type *pointerType = Type::getPointerType(allocatedType);
-    GlobalValue *globalVar = M->createGlobalValue(globalName, pointerType);
-    
-    if (!globalVar) {
-        return;
-    }
-    
-    // Replace all uses of the alloca with the global variable
-    alloca->replaceAllUsesWith(globalVar);
-    
-    // Remove the alloca instruction from its basic block
-    for (auto &BB : F->getBasicBlocks()) {
-        auto &instructions = BB->getInstructions();
-        for (auto it = instructions.begin(); it != instructions.end(); ++it) {
-            if (it->get() == alloca) {
-                instructions.erase(it);
-                break;
-            }
-        }
-    }
-}
-
-std::string LargeArrayToGlobalPass::generateUniqueGlobalName(AllocaInst *alloca, Function *F) {
-    std::string baseName = alloca->getName();
-    if (baseName.empty()) {
-        baseName = "array";
-    }
-    
-    // Ensure uniqueness by appending function name and counter
-    static std::unordered_map<std::string, int> nameCounter;
-    std::string key = F->getName() + "." + baseName;
-    
-    int counter = nameCounter[key]++;
-    std::ostringstream oss;
-    oss << key << "." << counter;
-    
-    return oss.str();
-}
-
-} // namespace sysy
\ No newline at end of file
diff --git a/src/midend/Pass/Pass.cpp b/src/midend/Pass/Pass.cpp
index be7404f..93401ee 100644
--- a/src/midend/Pass/Pass.cpp
+++ b/src/midend/Pass/Pass.cpp
@@ -13,7 +13,6 @@
 #include "GVN.h"
 #include "SCCP.h"
 #include "BuildCFG.h"
-#include "LargeArrayToGlobal.h"
 #include "LoopNormalization.h"
 #include "LICM.h"
 #include "LoopStrengthReduction.h"
@@ -61,8 +60,6 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
 
     // 注册优化遍
     registerOptimizationPass<BuildCFG>();
-    registerOptimizationPass<LargeArrayToGlobalPass>();
-
     registerOptimizationPass<GVN>();
     
     registerOptimizationPass<SysYDelInstAfterBrPass>();
@@ -98,7 +95,6 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
 
       this->clearPasses();
       this->addPass(&BuildCFG::ID);
-      this->addPass(&LargeArrayToGlobalPass::ID);
       this->run();
 
       this->clearPasses(); 
@@ -128,9 +124,9 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
         printPasses();
       }
 
-      this->clearPasses();
-      this->addPass(&Mem2Reg::ID);
-      this->run();
+      // this->clearPasses();
+      // this->addPass(&Mem2Reg::ID);
+      // this->run();
 
       if(DEBUG) {
         std::cout << "=== IR After Mem2Reg Optimizations ===\n";

From d465fb02a51291737ed96a234cded7a1b0f5c9a6 Mon Sep 17 00:00:00 2001
From: CGH0S7 <776459475@qq.com>
Date: Tue, 19 Aug 2025 17:44:54 +0800
Subject: [PATCH 7/9] =?UTF-8?q?[midend-m2r]=E6=81=A2=E5=A4=8D=E5=8E=9F?=
 =?UTF-8?q?=E6=9C=89mem2reg?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/midend/Pass/Optimize/Mem2Reg.cpp | 282 ++++++++++++---------------
 src/midend/Pass/Pass.cpp             |   6 +-
 2 files changed, 126 insertions(+), 162 deletions(-)

diff --git a/src/midend/Pass/Optimize/Mem2Reg.cpp b/src/midend/Pass/Optimize/Mem2Reg.cpp
index 5573eb7..f1c7d59 100644
--- a/src/midend/Pass/Optimize/Mem2Reg.cpp
+++ b/src/midend/Pass/Optimize/Mem2Reg.cpp
@@ -7,8 +7,6 @@
 #include "SysYIROptUtils.h"
 #include <cassert>   // 用于断言
 #include <iostream>  // 用于调试输出
-#include <queue>     // 用于工作队列
-#include <unordered_set> // 用于已访问集合
 
 namespace sysy {
 
@@ -102,31 +100,29 @@ bool Mem2RegContext::isPromotableAlloca(AllocaInst *alloca) {
       // 这种 GEP 有两个操作数：基指针和索引。
 
       // 检查 GEP 的操作数数量和索引值
-      // GEP 的操作数包括：基指针 + 索引列表
-      // getNumOperands() == 1 表示只有基指针，没有索引（不应该出现）
-      // getNumOperands() == 2 表示基指针 + 1个索引
-      // getNumOperands() > 2 表示基指针 + 多个索引（复杂访问）
-      
-      if (gep->getNumOperands() < 2) {
-        // 没有索引的GEP是无效的
-        return false;
+      // GEP 的操作数通常是：<base_pointer>, <index_1>, <index_2>, ...
+      // 对于一个 `i32*` 类型的 `alloca`，如果它被 GEP 使用，那么 GEP 的第一个索引通常是 `0`
+      // （表示解引用指针本身），后续索引才是数组元素的索引。
+      // 如果 GEP 的操作数数量大于 2 (即 `base_ptr` 和 `index_0` 之外还有其他索引)，
+      // 或者 `index_0` 不是常量 0，则它可能是一个复杂的数组访问。
+      // 假设 `gep->getNumOperands()` 和 `gep->getOperand(idx)->getValue()`
+      // 假设 `ConstantInt` 类用于表示常量整数值
+      if (gep->getNumOperands() > 2) { // 如果有超过一个索引（除了基指针的第一个隐式索引）
+        // std::cerr << "Mem2Reg: Not promotable (GEP with multiple indices): " << alloca->name() << std::endl;
+        return false; // 复杂 GEP，通常表示数组或结构体字段访问
       }
-      
-      if (gep->getNumOperands() > 2) {
-        // 多个索引，表示复杂的数组或结构体访问
-        return false;
-      }
-      
-      // 只有一个索引的情况，检查索引是否为常量0
-      Value *firstIndexVal = gep->getOperand(1);
-      if (auto constInt = dynamic_cast<ConstantInteger *>(firstIndexVal)) {
-        if (constInt->getInt() != 0) {
-          // 索引不是0，表示访问数组元素
-          return false;
+      if (gep->getNumOperands() == 2) {                            // 只有基指针和一个索引
+        Value *firstIndexVal = gep->getOperand(1); // 获取第一个索引值
+        if (auto constInt = dynamic_cast<ConstantInteger *>(firstIndexVal)) {
+          if (constInt->getInt() != 0) {
+            // std::cerr << "Mem2Reg: Not promotable (GEP with non-zero first index): " << alloca->name() << std::endl;
+            return false; // 索引不是0，表示访问数组的非第一个元素
+          }
+        } else {
+          // std::cerr << "Mem2Reg: Not promotable (GEP with non-constant first index): " << alloca->name() <<
+          // std::endl;
+          return false; // 索引不是常量，表示动态数组访问
         }
-      } else {
-        // 索引不是常量，表示动态数组访问
-        return false;
       }
 
       // 此外，GEP 的结果也必须只被 LoadInst 或 StoreInst 使用
@@ -155,35 +151,23 @@ bool Mem2RegContext::isPromotableAlloca(AllocaInst *alloca) {
 
 // 收集所有对给定 AllocaInst 进行存储的 StoreInst
 void Mem2RegContext::collectStores(AllocaInst *alloca) {
-  // 使用工作队列处理所有可能的用户，包括嵌套的GEP
-  std::queue<Value*> workQueue;
-  std::unordered_set<Value*> visited;
-  
-  // 初始化工作队列
-  workQueue.push(alloca);
-  visited.insert(alloca);
-  
-  while (!workQueue.empty()) {
-    Value* current = workQueue.front();
-    workQueue.pop();
-    
-    // 遍历当前值的所有用户
-    for (auto use : current->getUses()) {
-      auto user = use->getUser();
-      if (!user || visited.count(user))
-        continue;
-      
-      visited.insert(user);
-      
-      if (auto storeInst = dynamic_cast<StoreInst *>(user)) {
-        // 找到一个store指令
-        allocaToStoresMap[alloca].insert(storeInst);
-        allocaToDefBlocksMap[alloca].insert(storeInst->getParent());
-      } else if (auto gep = dynamic_cast<GetElementPtrInst *>(user)) {
-        // 找到一个GEP指令，将其加入工作队列继续处理
-        workQueue.push(gep);
+  // 遍历 alloca 的所有用途
+  for (auto use : alloca->getUses()) {
+    auto user = use->getUser();
+    if (!user)
+      continue;
+
+    if (auto storeInst = dynamic_cast<StoreInst *>(user)) {
+      allocaToStoresMap[alloca].insert(storeInst);
+      allocaToDefBlocksMap[alloca].insert(storeInst->getParent());
+    } else if (auto gep = dynamic_cast<GetElementPtrInst *>(user)) {
+      // 如果是 GEP，递归收集其下游的 store
+      for (auto gep_use : gep->getUses()) {
+        if (auto gep_store = dynamic_cast<StoreInst *>(gep_use->getUser())) {
+          allocaToStoresMap[alloca].insert(gep_store);
+          allocaToDefBlocksMap[alloca].insert(gep_store->getParent());
+        }
       }
-      // 其他类型的用户不处理
     }
   }
 }
@@ -227,70 +211,56 @@ void Mem2RegContext::insertPhis(AllocaInst *alloca, const std::unordered_set<Bas
 }
 
 // 对支配树进行深度优先遍历，重命名变量并替换 load/store 指令
+// 移除了 AllocaInst *currentAlloca 参数，因为这个函数是为整个基本块处理所有可提升的 Alloca
 void Mem2RegContext::renameVariables(BasicBlock *currentBB) {
-  // 记录每个 alloca 在此基本块开始时的栈深度，用于退出时精确回溯
+  // 1. 在函数开始时，记录每个 promotableAlloca 的当前栈深度。
+  // 这将用于在函数返回时精确地回溯栈状态。
   std::map<AllocaInst *, size_t> originalStackSizes;
   for (auto alloca : promotableAllocas) {
     originalStackSizes[alloca] = allocaToValueStackMap[alloca].size();
   }
 
   // --------------------------------------------------------------------
-  // 第一步：处理当前基本块开头的 PHI 指令，为它们分配新的 SSA 值
-  // --------------------------------------------------------------------
-  for (auto alloca : promotableAllocas) {
-    if (allocaToPhiMap[alloca].count(currentBB)) {
-      PhiInst *phiInst = allocaToPhiMap[alloca][currentBB];
-      // 将 PHI 指令本身作为新的 SSA 值压入栈顶
-      allocaToValueStackMap[alloca].push(phiInst);
-      if (DEBUG) {
-        std::cout << "Mem2Reg: Pushed PHI " << (phiInst->getName().empty() ? "anonymous" : phiInst->getName()) 
-                  << " for alloca " << alloca->getName() << ". Stack size: " << allocaToValueStackMap[alloca].size() << std::endl;
-      }
-    }
-  }
-
-  // --------------------------------------------------------------------
-  // 第二步：处理当前基本块中的非PHI指令，替换 load/store 指令
+  // 处理当前基本块的指令
   // --------------------------------------------------------------------
   for (auto instIter = currentBB->getInstructions().begin(); instIter != currentBB->getInstructions().end();) {
     Instruction *inst = instIter->get();
-    bool instDeleted = false;
+      bool instDeleted = false;
 
-    // 跳过PHI指令，它们已在第一步处理
-    if (dynamic_cast<PhiInst *>(inst)) {
-      ++instIter;
-      continue;
-    }
-
-    // 处理 LoadInst
-    if (auto loadInst = dynamic_cast<LoadInst *>(inst)) {
+    // 处理 Phi 指令 (如果是当前 alloca 的 Phi)
+    if (auto phiInst = dynamic_cast<PhiInst *>(inst)) {
+      // 检查这个 Phi 是否是为某个可提升的 alloca 插入的
       for (auto alloca : promotableAllocas) {
-        Value *ptrOperand = loadInst->getPointer();
-        
-        // 优化：只做一次dynamic_cast
-        auto gepPtr = dynamic_cast<GetElementPtrInst *>(ptrOperand);
-        if (ptrOperand == alloca || 
-            (gepPtr && gepPtr->getOperand(0) == alloca)) {  // GEP的第一个操作数是基指针
-          
-          if (allocaToValueStackMap[alloca].empty()) {
-            // 栈为空时使用未定义值而非崩溃
-            if (DEBUG) {
-              std::cerr << "Warning: Value stack empty for alloca " << alloca->getName() 
-                        << " during load replacement. Using undefined value." << std::endl;
-            }
-            Value *undefValue = UndefinedValue::get(alloca->getType()->as<PointerType>()->getBaseType());
-            loadInst->replaceAllUsesWith(undefValue);
-          } else {
-            Value *currentValue = allocaToValueStackMap[alloca].top();
-            
-            if (DEBUG) {
-              std::cout << "Mem2Reg: Replacing load with SSA value " 
-                        << (currentValue->getName().empty() ? "anonymous" : currentValue->getName())
-                        << " for alloca " << alloca->getName() << std::endl;
-            }
-            
-            loadInst->replaceAllUsesWith(currentValue);
+        if (allocaToPhiMap[alloca].count(currentBB) && allocaToPhiMap[alloca][currentBB] == phiInst) {
+          // 为 Phi 指令的输出创建一个新的 SSA 值，并压入值栈
+          allocaToValueStackMap[alloca].push(phiInst);
+          if (DEBUG) {
+            std::cout << "Mem2Reg: Pushed Phi " << (phiInst->getName().empty() ? "anonymous" : phiInst->getName()) << " for alloca " << alloca->getName()
+              << ". Stack size: " << allocaToValueStackMap[alloca].size() << std::endl;
           }
+          break; // 找到对应的 alloca，处理下一个指令
+        }
+      }
+    }
+    // 处理 LoadInst
+    else if (auto loadInst = dynamic_cast<LoadInst *>(inst)) {
+      for (auto alloca : promotableAllocas) {
+        // 检查 LoadInst 的指针是否直接是 alloca，或者是指向 alloca 的 GEP
+        Value *ptrOperand = loadInst->getPointer();
+        if (ptrOperand == alloca || (dynamic_cast<GetElementPtrInst *>(ptrOperand) &&
+                                     dynamic_cast<GetElementPtrInst *>(ptrOperand)->getBasePointer() == alloca)) {
+          assert(!allocaToValueStackMap[alloca].empty() && "Value stack empty for alloca during load replacement!");
+          if (DEBUG) {
+            std::cout << "Mem2Reg: Replacing load "
+                      << (ptrOperand->getName().empty() ? "anonymous" : ptrOperand->getName()) << " with SSA value "
+                      << (allocaToValueStackMap[alloca].top()->getName().empty()
+                              ? "anonymous"
+                              : allocaToValueStackMap[alloca].top()->getName())
+                      << " for alloca " << alloca->getName() << std::endl;
+            std::cout << "Mem2Reg: allocaToValueStackMap[" << alloca->getName()
+                      << "] size: " << allocaToValueStackMap[alloca].size() << std::endl;
+          }
+          loadInst->replaceAllUsesWith(allocaToValueStackMap[alloca].top());
           instIter = SysYIROptUtils::usedelete(instIter);
           instDeleted = true;
           break;
@@ -300,104 +270,98 @@ void Mem2RegContext::renameVariables(BasicBlock *currentBB) {
     // 处理 StoreInst
     else if (auto storeInst = dynamic_cast<StoreInst *>(inst)) {
       for (auto alloca : promotableAllocas) {
+        // 检查 StoreInst 的指针是否直接是 alloca，或者是指向 alloca 的 GEP
         Value *ptrOperand = storeInst->getPointer();
-        
-        // 优化：只做一次dynamic_cast
-        auto gepPtr = dynamic_cast<GetElementPtrInst *>(ptrOperand);
-        if (ptrOperand == alloca || 
-            (gepPtr && gepPtr->getOperand(0) == alloca)) {  // GEP的第一个操作数是基指针
-          
-          Value *storedValue = storeInst->getValue();
-          allocaToValueStackMap[alloca].push(storedValue);
-          
+        if (ptrOperand == alloca || (dynamic_cast<GetElementPtrInst *>(ptrOperand) &&
+                                     dynamic_cast<GetElementPtrInst *>(ptrOperand)->getBasePointer() == alloca)) {
           if (DEBUG) {
-            std::cout << "Mem2Reg: Replacing store with SSA value " 
-                      << (storedValue->getName().empty() ? "anonymous" : storedValue->getName())
-                      << " for alloca " << alloca->getName() 
-                      << ". Stack size: " << allocaToValueStackMap[alloca].size() << std::endl;
+            std::cout << "Mem2Reg: Replacing store to "
+                      << (ptrOperand->getName().empty() ? "anonymous" : ptrOperand->getName()) << " with SSA value "
+                      << (storeInst->getValue()->getName().empty() ? "anonymous" : storeInst->getValue()->getName())
+                      << " for alloca " << alloca->getName() << std::endl;
+            std::cout << "Mem2Reg: allocaToValueStackMap[" << alloca->getName()
+                      << "] size before push: " << allocaToValueStackMap[alloca].size() << std::endl;
           }
-          
+          allocaToValueStackMap[alloca].push(storeInst->getValue());
           instIter = SysYIROptUtils::usedelete(instIter);
           instDeleted = true;
+          if (DEBUG) {
+            std::cout << "Mem2Reg: allocaToValueStackMap[" << alloca->getName()
+                      << "] size after push: " << allocaToValueStackMap[alloca].size() << std::endl;
+          }
           break;
         }
       }
     }
-
     if (!instDeleted) {
-      ++instIter;
+      ++instIter; // 如果指令没有被删除，移动到下一个
     }
   }
-
   // --------------------------------------------------------------------
-  // 第三步：为后继基本块中的 PHI 指令填充参数
+  // 处理后继基本块的 Phi 指令参数
   // --------------------------------------------------------------------
   for (auto successorBB : currentBB->getSuccessors()) {
-    if (!successorBB) continue;
-    
+    if (!successorBB)
+      continue;
     for (auto alloca : promotableAllocas) {
+      // 如果后继基本块包含为当前 alloca 插入的 Phi 指令
       if (allocaToPhiMap[alloca].count(successorBB)) {
-        PhiInst *phiInst = allocaToPhiMap[alloca][successorBB];
-        if (!allocaToValueStackMap[alloca].empty()) {
-          Value *currentValue = allocaToValueStackMap[alloca].top();
-          phiInst->addIncoming(currentValue, currentBB);
-          
-          if (DEBUG) {
-            std::cout << "Mem2Reg: Added incoming arg to PHI " 
-                      << (phiInst->getName().empty() ? "anonymous" : phiInst->getName())
-                      << " from " << currentBB->getName() 
-                      << " with value " << (currentValue->getName().empty() ? "anonymous" : currentValue->getName())
-                      << std::endl;
-          }
-        } else {
-          // 栈为空时使用未定义值
-          if (DEBUG) {
-            std::cerr << "Warning: Value stack empty for alloca " << alloca->getName() 
-                      << " when setting phi operand. Using undefined value." << std::endl;
-          }
-          Value *undefValue = UndefinedValue::get(alloca->getType()->as<PointerType>()->getBaseType());
-          phiInst->addIncoming(undefValue, currentBB);
+        auto phiInst = allocaToPhiMap[alloca][successorBB];
+        // 为 Phi 指令添加来自当前基本块的参数
+        // 参数值是当前 alloca 值栈顶部的 SSA 值
+        assert(!allocaToValueStackMap[alloca].empty() && "Value stack empty for alloca when setting phi operand!");
+        phiInst->addIncoming(allocaToValueStackMap[alloca].top(), currentBB);
+        if (DEBUG) {
+          std::cout << "Mem2Reg: Added incoming arg to Phi "
+                    << (phiInst->getName().empty() ? "anonymous" : phiInst->getName()) << " from "
+                    << currentBB->getName() << " with value "
+                    << (allocaToValueStackMap[alloca].top()->getName().empty()
+                            ? "anonymous"
+                            : allocaToValueStackMap[alloca].top()->getName())
+                    << std::endl;
         }
       }
     }
   }
-
   // --------------------------------------------------------------------
-  // 第四步：递归处理支配树的子节点
+  // 递归访问支配树的子节点
   // --------------------------------------------------------------------
   const std::set<BasicBlock *> *dominatedBlocks = dt->getDominatorTreeChildren(currentBB);
-  if (dominatedBlocks) {
-    if (DEBUG) {
-      std::cout << "Mem2Reg: Processing " << dominatedBlocks->size() 
-                << " dominated blocks for " << currentBB->getName() << std::endl;
+  if (dominatedBlocks) { // 检查是否存在子节点
+    if(DEBUG){
+      std::cout << "Mem2Reg: Processing dominated blocks for " << currentBB->getName() << std::endl;
+      for (auto dominatedBB : *dominatedBlocks) {
+        std::cout << "Mem2Reg: Dominated block: " << (dominatedBB ? dominatedBB->getName() : "null") << std::endl;
+      }
     }
-    
     for (auto dominatedBB : *dominatedBlocks) {
-      if (dominatedBB) {
+      if (dominatedBB) { // 确保子块有效
         if (DEBUG) {
-          std::cout << "Mem2Reg: Recursively processing dominated block: " 
-                    << dominatedBB->getName() << std::endl;
+          std::cout << "Mem2Reg: Recursively renaming variables in dominated block: " << dominatedBB->getName()
+                    << std::endl;
         }
-        renameVariables(dominatedBB);
+        renameVariables(dominatedBB); // 递归调用，不再传递 currentAlloca
       }
     }
   }
 
   // --------------------------------------------------------------------
-  // 第五步：退出时恢复值栈状态
+  // 退出基本块时，弹出在此块中压入值栈的 SSA 值，恢复栈到进入该块时的状态
   // --------------------------------------------------------------------
   for (auto alloca : promotableAllocas) {
     while (allocaToValueStackMap[alloca].size() > originalStackSizes[alloca]) {
       if (DEBUG) {
         std::cout << "Mem2Reg: Popping value "
-                  << (allocaToValueStackMap[alloca].top()->getName().empty() ? "anonymous" : allocaToValueStackMap[alloca].top()->getName())
-                  << " for alloca " << alloca->getName() 
-                  << ". Stack size: " << allocaToValueStackMap[alloca].size() 
+                  << (allocaToValueStackMap[alloca].top()->getName().empty()
+                          ? "anonymous"
+                          : allocaToValueStackMap[alloca].top()->getName())
+                  << " for alloca " << alloca->getName() << ". Stack size: " << allocaToValueStackMap[alloca].size()
                   << " -> " << (allocaToValueStackMap[alloca].size() - 1) << std::endl;
       }
       allocaToValueStackMap[alloca].pop();
     }
   }
+
 }
 
 // 删除所有原始的 AllocaInst、LoadInst 和 StoreInst
diff --git a/src/midend/Pass/Pass.cpp b/src/midend/Pass/Pass.cpp
index 93401ee..0678e4e 100644
--- a/src/midend/Pass/Pass.cpp
+++ b/src/midend/Pass/Pass.cpp
@@ -124,9 +124,9 @@ void PassManager::runOptimizationPipeline(Module* moduleIR, IRBuilder* builderIR
         printPasses();
       }
 
-      // this->clearPasses();
-      // this->addPass(&Mem2Reg::ID);
-      // this->run();
+      this->clearPasses();
+      this->addPass(&Mem2Reg::ID);
+      this->run();
 
       if(DEBUG) {
         std::cout << "=== IR After Mem2Reg Optimizations ===\n";

From 363ead0dddb5dba6c32255808c74dbdd64b1fdf7 Mon Sep 17 00:00:00 2001
From: rain2133 <1370973498@qq.com>
Date: Tue, 19 Aug 2025 20:01:33 +0800
Subject: [PATCH 8/9] =?UTF-8?q?[backend-O1]=E4=BF=AE=E5=A4=8D=E8=AE=A1?=
 =?UTF-8?q?=E6=97=B6=E5=87=BD=E6=95=B0=E5=91=BD=E5=90=8D=E9=94=99=E8=AF=AF?=
 =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 src/include/midend/SysYIRGenerator.h |  1 +
 src/midend/SysYIRGenerator.cpp       | 10 ++++++++++
 2 files changed, 11 insertions(+)

diff --git a/src/include/midend/SysYIRGenerator.h b/src/include/midend/SysYIRGenerator.h
index b4d4e57..70fca7f 100644
--- a/src/include/midend/SysYIRGenerator.h
+++ b/src/include/midend/SysYIRGenerator.h
@@ -51,6 +51,7 @@ public:
                          Module *pModule, IRBuilder *pBuilder);
 
   static void initExternalFunction(Module *pModule, IRBuilder *pBuilder);
+  static void modify_timefuncname(Module *pModule);
 };
 
 class SysYIRGenerator : public SysYBaseVisitor {
diff --git a/src/midend/SysYIRGenerator.cpp b/src/midend/SysYIRGenerator.cpp
index b2a52be..e6840a0 100644
--- a/src/midend/SysYIRGenerator.cpp
+++ b/src/midend/SysYIRGenerator.cpp
@@ -674,6 +674,8 @@ std::any SysYIRGenerator::visitCompUnit(SysYParser::CompUnitContext *ctx) {
   pModule->enterNewScope();
   visitChildren(ctx);
   pModule->leaveScope();
+
+  Utils::modify_timefuncname(pModule);
   return pModule;
 }
 
@@ -2403,4 +2405,12 @@ void Utils::initExternalFunction(Module *pModule, IRBuilder *pBuilder) {
 
 }
 
+void Utils::modify_timefuncname(Module *pModule){
+  auto starttimeFunc = pModule->getExternalFunction("starttime");
+  auto stoptimeFunc = pModule->getExternalFunction("stoptime");
+  starttimeFunc->setName("_sysy_starttime");
+  stoptimeFunc->setName("_sysy_stoptime");
+
+}
+
 } // namespace sysy
\ No newline at end of file

From ef682354467a392e0356f058c4d3bf6fb8c27808 Mon Sep 17 00:00:00 2001
From: Lixuanwang <xlwmail@nudt.edu.cn>
Date: Tue, 19 Aug 2025 20:04:58 +0800
Subject: [PATCH 9/9] =?UTF-8?q?[backend-O1]=E6=9B=B4=E6=96=B0sylib?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lib/libsysy_riscv.a | Bin 24880 -> 24140 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/lib/libsysy_riscv.a b/lib/libsysy_riscv.a
index 164fe306d000ea62c44a4a0121f03cb5a98ea8f2..05a509ff91c6d95efff80ab64ccca69dd972a10d 100644
GIT binary patch
delta 1965
zcmcK4?@wD*7zc1p9ot~HlVz)g#74}D!O?Qq27{@k8>2v3%g^CZSBlf>rqs;=3c5Of
zHJNkhmTxediuwkfx*D`rQCDM7SWC)_5lJ?+i*HC4Gm^cKgcmmPd+&7#`vYv!&v~Bb
z-1D4!?uCv`h|~pfYWuM*<%RmEJ2xvUD?cks9REMR{@%gfL4R-GqeuF()cEvImsSn-
z9Nb@eNS@9sL`vkW*t)H%3Ri?QtnQ5F8hUc0UnqYWgbinJFNto*`+H?HVIE%_9lPfr
zA?gq*mZBbMnu76ODH`CQZUkuxBeW4SbT=|u#lA^$m5+KbL{o^<C>A!ES<fz!!ix%;
zLZCzze9cF4^RrUuk(mSc<uy%6YgZa=QpDh}nYmX7<7R|u3Nf23_~zzvj(xXu$ncsL
z*zHox!e^IIYzofUt)dli*4nV3%~(pM_!iz$vuH<v@g5A*SxlD7g7Ia&3k`a*p2M#&
zO7+|h+JPXQ#RzT141Ew8tzs{e;yd`t%wjM48Fylo&SI`i7JR>~^W|GQTQ*B^1z!3P
zLi9I8=nqJ5Hj5+{89$7oEs}8ueFR~eK#cCgycV0bO5uZxUPM3b!l)L{Q9rWFrI<nm
z?M9$n7F<|%C)aP&6ZH*DBT_DLbhCnM<yH}gvBE6UD5{WRKRW0y2-C+9qm!7YkHfT0
z?d_rg^wWzNrB7gvCXihz#gnL@Nd)Kt4AX;{q)%ayUPV!rb~5@j!de`oJ($;G({?F>
zaM1+%=`$GB;yHQ<+4rgS3VIj;n#3@D7L!`MNROcCe)X<8XfMKA9HV`h*J9HHQuM<`
z*`Gd#Q7xXM&m-HRJvlmnfEEwaK}>4#B7Ffx)mnf0BEni6qen4cEpeenL7~$sLU1`{
z!H>(HyJw}IY~YZ_kW-465T{dEpvPd@p?3Asml2`~MCdC>(<F>LrFaz%`Wk}tb&Sxf
znAxfCxw^(G!m!t<@61=jnv?$rtW6-!?9LqumYDqs%PuK?hIbd=I%~&wskI0*^}eq$
zdjrN=^<6vY2?T37<5F<0)+*k_3~Nbb=t<aJY9AjR!Vu*?^errCbxWNTZ^KI`5Td6L
zsna`MtFuC>RghnAWiLm)L=UrpdaJ$<#}xC+oJ}CZSsQn+YS3p7a6HNUOoO^M-XL+E
z+0uWr#cnG+bqYG&>e`T7iZh7NX{6~{7#r2QchGkcq|+Fo?_ox(XXyK|?^f$RI)Wj3
z5pntf7PPv>BgKdCQug;qoOdf2_gK{(m?@1smDkEO4lhr|Y=9YeVU}WcnHkT-%(k0n
zYEtKXO;U_vh$axHA7g<|!_q9pC-BlKgy=a$DEFbC!q}qTnS*|YAmtwPbIi~wWaxR=
zTeXw4N}O!s6KhrPE6(gXvn6KSqfNro%;(jn&V|~f7(;||7y2cP?P~0xUm@7O5jWZu
z@jLD|u1DUU72=PmY0vsq$ltfbpTc#%%G`z`{m=WxCbaps8Q#g|rFY;XUol@p(&wZ(
zT~3h)cbC&p)q)dU)gm8BCM_+<^;e5RxcyGU);0Pve{oLL8ol8FF8M2S4z%dmYOL$7
qG>jS1*j;TZUaJ?&O!}C|VFDxFC5Fz8nC`C5IlNb&7{+PeUH<~$DPx5I

delta 3063
zcmchZTWlOx9me-;;&$T>shtJeDO1RTQ{O#yE=|%pw%7698z*ZDO`LTSLsNR$rg35*
z0YhteXeCw%>l4Bc7Q(1PeOOQ}b8*lDb-6`!8+-HQhkdA_X@j&N?F;x}h4{^^9r6I;
zjgjV?@BGex&-v!e8GF(x^Tu1|+ecnHxNlGKIA7}X`5yB5%xnLT^W2qdSFWAAGO2&;
z9g*L>vTJhu^5o>RFI@0lnSAk*XY`8MxBb9@mKOceIR4eq#lM+&>7(KD``-J+yS{+w
z`7U`sc$u)iADs6;6*gag`M>uqp0aZOl3IQdQRu;{v2D+7Tkg7H`S*0~g!!sDulilL
zd35b8>z?+%y1mN^_=dg8GPlwl;QRlxZt>!Sf4@)3vVfkbD08p=v!(iHu+o(3?8E=o
z@9lQ|k3Z(=uFAzvSNvgT{SD*u?c$a`<30@zyLj>4-sG3>^2Lh1e!m&k?i$COP-{(4
zu^L+&4VL0H0VQf|b5b+V6fUY|TyC?!K)+pUn-PU-gNoojr6_Kyw)p!*Z9qxruoj?M
z+!I>IS#@szhxBmW{c6Maw^*903zSrnaaa>|j@FtiCF`oa6xq6Jlai^gWE@i;RAarZ
zXPPaw)dv*Adr9#L{MG%I()GoWMzmb-m?xF5cf6X$hHCRGMH}34q79^Mu+1q=<B|=w
zs>qOP2q+89Y8`4d+UB&%8-r@YjVg?bHx?_W8Uvbx{#u#FpqKj$&M`5WCdV7At*P3)
zsqv;tMjLMmD$(Sw(+n<!8`Cl_*Hj#>(8M}6yR(IwSx4MiMR8Nj#r{My>j+)edUJ8M
zb2!JGSGiN1E#g$0w>06nvrRZb%{jJN*E}xmu)eHH)(NNp{gX@!YuI9&-)Ow0I9dkh
zn9phk&1f0@obu?ZLamN@N>OxG)93{)petHOpH}%k*N4&1YXZHgdGr}&(BI1JuQaz%
zX4`L@XVr>};eMx?{lStC^Q>><9P^x((X{gD^9mhs%sYyraZRH)wSc~$b#z7L2OV=!
zVRTg!=!=?1Gs>WsWFB?Qgj&(d8b{yP49Xp$SCsdB=#XP36-6_eM!%p1bXDu<7gc`P
z^<nf&n(+KQdQ}-TBXh(t*VKx#V(6DOgRW>9{fhFQ4?X6XDMis0O`~7c0?PQ0IjU{9
zWItZ*&hWUSS7}YrV%2Efp_TPorp-}*2m8Ct?Hi}{I;}Zc%+%(XZzzwhD0I|Opwm+9
zXf^9RO>34`iq`M7ezeluM%l&fydiMAJ)pLBn-Rn9>|^MuF!Mv_peve(rlEJBS!e;e
zD$~K!>)^fWB2NeJTZir5J2U}ZgU&%y&~IuPnuE?Bvs8fEoi<M()X9#<-BJ`c)kzxq
z6P@g6Xj%)<Ec8^jr5w}>v*+2bc9^{QC3plM58LLBCc^A(+}oPRrQO;d^3q@M`b`&k
zyWH7Yaj`Cv(;x5RuLJZQ%|KJoHE0@|^J?=@`<Si6y_On}1r#~v-evR{d7-n=1T+bK
zTMN)EbOV}$THTv@yUB|`)kj`@yxXmw=q4|837UkaH><Pwg6sP&Sv>(Y_IR0l$lOC_
zXdIe=W;6p$LD!&ZXwIw6L+xHN50JT+%)RdOiuRHjItxuelh9jQfM%f^&>Yn2bH^?3
z<1bqui3dsCM`GwLO+e?MOVA`V4P8|RT7Xs!k++|`{a)UF^7fM#8iyvJ<b|f7YtS?_
z=hfz+_5gV!<Q*XIfVaj2<b}>c6VN1-ywEIk1Db<cgYFJ3A0+P}d5@EKki1axLg%1M
z&?GbsB`>r9tr{lp5P65ZyhG$2A}=%!O+Z&P15H8KplN8%tIb302zgJCH$vWsx4seb
zLT8}~Xc9_ZXcoEw%|Wf>Zf*H-@*XGeN%9^iFO<B{Ip`8J2~9(9Dg!M*tD@u`ChxG9
zcbL4x<b}qe2`G7?Dd-wB4b6GAd8mEDHj4_K;EluGR1`Ob`-!G;NgP>mS=>*xj<ZgB
zD|?cpxSuJEi{r?No5THF^SHEEm%$ZqOEOX3v#6I57sFlGI4<F_8C(kY3oYYv(XxlU
zZ$kd!2RT3B+&a#es=2jt_vW{V+xwTTF0NCh`dVFu-aQ>Cale)RtbDZ6ytfz{3Er>9
z=0M32V2}HsvgDJve%HNJ<1)^zV`lR!>mZ<S3v%7!2k^c3@Ygo$-LVJm;jeFUcc#dH
z;O@NCH{b2Le$T{1cd5^}#odVq7wb-MGuyw#{VV@uP#-<BLy^&{(l;L|{@`STE{;Cx
zzgeox=?bMrYuQ$}M?28HXF5#3E}ZG`myPI$XWIEjyG^5ZM32PU`K-AR>+nDSK=Eg>
z+8vi3C=Pf-e~lg5(K%9VZK!GNkpJ*bJw4X`kvF^#Sf6iCMX|Gt%O7=ptiiupt@p>;
QcN`uqc6>)a8vXdc0AGlaasU7T