[midend-llvmirprint]修复进度（162/199），修复若干打印问题，修复若干ir生成逻辑问题

2025-08-09 21:28:44 +08:00
parent c507b98199
commit 6b9ad0566d
4 changed files with 160 additions and 54 deletions
--- a/src/include/midend/IR.h
+++ b/src/include/midend/IR.h
@ -856,7 +856,7 @@ public:
      case kBitFtoI:
        return "BitFtoI";
      case kSRA:
-        return "SRA";
+        return "ashr";
      default:
        return "Unknown";
    }
--- a/src/include/midend/IRBuilder.h
+++ b/src/include/midend/IRBuilder.h
@ -350,38 +350,31 @@ class IRBuilder {
    Type *currentWalkType = pointerType->as<PointerType>()->getBaseType();

    // 遍历所有索引来深入类型层次结构。
-    // `indices` 向量包含了所有 GEP 索引，包括由 `visitLValue` 等函数添加的初始 `0` 索引。
+    // 重要：第一个索引总是用于"解引用"指针，后续索引才用于数组/结构体的索引
    for (int i = 0; i < indices.size(); ++i) {
-        if (currentWalkType->isArray()) {
-            // 情况一：当前遍历类型是 `ArrayType`。
-            // 索引用于选择数组元素，`currentWalkType` 更新为数组的元素类型。
-            currentWalkType = currentWalkType->as<ArrayType>()->getElementType();
-        } else if (currentWalkType->isPointer()) {
-            // 情况二：当前遍历类型是 `PointerType`。
-            // 这意味着我们正在通过一个指针来访问其指向的内存。
-            // 索引用于选择该指针所指向的“数组”的元素。
-            // `currentWalkType` 更新为该指针所指向的基础类型。
-            // 例如：如果 `currentWalkType` 是 `i32*`，它将变为 `i32`。
-            // 如果 `currentWalkType` 是 `[10 x i32]*`，它将变为 `[10 x i32]`。
-            currentWalkType = currentWalkType->as<PointerType>()->getBaseType();
+        if (i == 0) {
+            // 第一个索引：总是用于"解引用"基指针，不改变currentWalkType
+            // 例如：对于 `[4 x i32]* ptr, i32 0`，第一个0只是说"访问ptr指向的对象"
+            // currentWalkType 保持为 `[4 x i32]`
+            continue;
        } else {
-            // 情况三：当前遍历类型是标量类型 (例如 `i32`, `float` 等非聚合、非指针类型)。
-            //
-            // 如果 `currentWalkType` 是标量，并且当前索引 `i` **不是** `indices` 向量中的最后一个索引，
-            // 这意味着尝试对一个标量类型进行进一步的结构性索引，这是**无效的**。
-            // 例如：`int x; x[0];` 对应的 GEP 链中，`x` 的类型是 `i32`，再加 `[0]` 索引就是错误。
-            //
-            // 如果 `currentWalkType` 是标量，且这是**最后一个索引** (`i == indices.size() - 1`)，
-            // 那么 GEP 是合法的，它只是计算一个偏移地址，最终的类型就是这个标量类型。
-            // 此时 `currentWalkType` 保持不变，循环结束。
-            if (i < indices.size() - 1) { 
-                assert(false && "Invalid GEP indexing: attempting to index into a non-aggregate/non-pointer type with further indices.");
-                return nullptr; // 返回空指针表示类型推断失败
+            // 后续索引：用于实际的数组/结构体索引
+            if (currentWalkType->isArray()) {
+                // 数组索引：选择数组中的元素
+                currentWalkType = currentWalkType->as<ArrayType>()->getElementType();
+            } else if (currentWalkType->isPointer()) {
+                // 指针索引：解引用指针并继续
+                currentWalkType = currentWalkType->as<PointerType>()->getBaseType();
+            } else {
+                // 标量类型：不能进一步索引
+                if (i < indices.size() - 1) { 
+                    assert(false && "Invalid GEP indexing: attempting to index into a non-aggregate/non-pointer type with further indices.");
+                    return nullptr;
+                }
            }
-            // 如果是最后一个索引，且当前类型是标量，则类型保持不变，这是合法的。
-            // 循环会自然结束，返回正确的 `currentWalkType`。
        }
    }
+    
    // 所有索引处理完毕后，`currentWalkType` 就是 GEP 指令最终计算出的地址所指向的元素的类型。
    return currentWalkType;
  }
--- a/src/midend/IR.cpp
+++ b/src/midend/IR.cpp
@ -556,8 +556,41 @@ void BasicBlock::print(std::ostream &os) const {
  os << "  ";
  printBlockName(os, this);
  os << ":\n";
+  
+  bool reachedTerminator = false;
  for (auto &inst : instructions) {
-    os << "    " << *inst << '\n';
+    // 跳过终结指令后的死代码
+    if (reachedTerminator) {
+      continue;
+    }
+    
+    os << "    ";
+    
+    // 特殊处理逻辑非指令
+    if (auto* unaryInst = dynamic_cast<UnaryInst*>(inst.get())) {
+      if (unaryInst->getKind() == Instruction::kNot && unaryInst->getType()->isInt()) {
+        // 生成两行：先比较，再扩展
+        os << "%tmp_not_" << unaryInst->getName() << " = icmp eq " 
+           << *unaryInst->getOperand()->getType() << " ";
+        printOperand(os, unaryInst->getOperand());
+        os << ", 0\n    %";
+        os << unaryInst->getName() << " = zext i1 %tmp_not_" << unaryInst->getName() << " to i32";
+        os << '\n';
+        
+        // 检查当前指令是否是终结指令
+        if (inst->isTerminator()) {
+          reachedTerminator = true;
+        }
+        continue;
+      }
+    }
+    
+    os << *inst << '\n';
+    
+    // 检查当前指令是否是终结指令
+    if (inst->isTerminator()) {
+      reachedTerminator = true;
+    }
  }
 }

@ -619,10 +652,11 @@ void UnaryInst::print(std::ostream &os) const {
    printOperand(os, getOperand());
    break;
  case kNot:
+    // 在BasicBlock::print中特殊处理整数逻辑非，这里不应该执行到
    os << "xor " << *getOperand()->getType() << " ";
    printOperand(os, getOperand());
    os << ", -1";
-    return;
+    break;
  case kFNot:
    os << "fcmp une " << *getOperand()->getType() << " ";
    printOperand(os, getOperand());
@ -698,8 +732,41 @@ void UncondBrInst::print(std::ostream &os) const {
 }

 void CondBrInst::print(std::ostream &os) const {
-  os << "br i1 ";  // 条件分支的条件总是假定为i1类型
-  printOperand(os, getCondition());
+  Value* condition = getCondition();
+  
+  // 检查条件是否来自比较指令
+  if (auto* binaryInst = dynamic_cast<BinaryInst*>(condition)) {
+    auto kind = binaryInst->getKind();
+    if (kind == kICmpEQ || kind == kICmpNE || kind == kICmpLT || 
+        kind == kICmpGT || kind == kICmpLE || kind == kICmpGE ||
+        kind == kFCmpEQ || kind == kFCmpNE || kind == kFCmpLT || 
+        kind == kFCmpGT || kind == kFCmpLE || kind == kFCmpGE) {
+      // 比较指令返回i1类型，直接使用
+      os << "br i1 ";
+      printOperand(os, condition);
+    } else {
+      // 其他指令返回i32，需要转换
+      static int tmpCondCounter = 0;
+      std::string condName = condition->getName();
+      if (condName.empty()) {
+        condName = "const" + std::to_string(++tmpCondCounter);
+      }
+      os << "%tmp_cond_" << condName << " = icmp ne i32 ";
+      printOperand(os, condition);
+      os << ", 0\n    br i1 %tmp_cond_" << condName;
+    }
+  } else {
+    // 对于非BinaryInst的条件（如变量），假设是i32需要转换
+    static int tmpCondCounter = 0;
+    std::string condName = condition->getName();
+    if (condName.empty()) {
+      condName = "const" + std::to_string(++tmpCondCounter);
+    }
+    os << "%tmp_cond_" << condName << " = icmp ne i32 ";
+    printOperand(os, condition);
+    os << ", 0\n    br i1 %tmp_cond_" << condName;
+  }
+  
  os << ", label %";
  printBlockName(os, getThenBlock());
  os << ", label %";
@ -731,12 +798,19 @@ void LoadInst::print(std::ostream &os) const {
 }

 void MemsetInst::print(std::ostream &os) const {
-  os << "call void @llvm.memset.p0i8.i32(i8* ";
-  printOperand(os, getPointer());
-  os << ", i8 ";
-  printOperand(os, getValue()); // value
+  Value* ptr = getPointer();
+  
+  // Generate a temporary bitcast instruction before the memset call
+  // This is done at print time to avoid modifying the IR structure
+  os << "%tmp_bitcast_" << ptr->getName() << " = bitcast " << *ptr->getType() << " ";
+  printOperand(os, ptr);
+  os << " to i8*\n  ";
+  
+  // Now call memset with the bitcast result
+  os << "call void @llvm.memset.p0i8.i32(i8* %tmp_bitcast_" << ptr->getName() << ", i8 ";
+  printOperand(os, getValue());
  os << ", i32 ";
-  printOperand(os, getSize()); // size
+  printOperand(os, getSize());
  os << ", i1 false)";
 }

@ -1064,7 +1138,18 @@ void renameValues(Function* function) {
  
  // 重命名指令
  for (auto& block : function->getBasicBlocks()) {
+    bool reachedTerminator = false;
    for (auto& inst : block->getInstructions()) {
+      // 跳过终结指令后的死代码
+      if (reachedTerminator) {
+        continue;
+      }
+      
+      // 检查当前指令是否是终结指令
+      if (inst->isTerminator()) {
+        reachedTerminator = true;
+      }
+      
      // 只有产生值的指令需要重命名
      if (!inst->getType()->isVoid() && needsRename(inst->getName())) {
        valueNames[inst.get()] = "%" + std::to_string(tempCounter++);
@ -1130,6 +1215,9 @@ void Module::print(std::ostream& os) const {
    os << ")\n";
  }
  
+  // Always declare memset intrinsic when needed
+  os << "declare void @llvm.memset.p0i8.i32(i8*, i8, i32, i1)\n";
+  
  if (!getExternalFunctions().empty()) {
    os << "\n"; // 外部函数和普通函数之间加空行
  }
--- a/src/midend/SysYIRGenerator.cpp
+++ b/src/midend/SysYIRGenerator.cpp
@ -1360,11 +1360,12 @@ std::any SysYIRGenerator::visitAssignStmt(SysYParser::AssignStmtContext *ctx) {
        }
      }
    } else {
-      if (LType == Type::getFloatType()) {
+      if (LType == Type::getFloatType() && RType != Type::getFloatType()) {
        RValue = builder.createItoFInst(RValue);
-      } else { // 假设如果不是浮点型，就是整型
+      } else if (LType != Type::getFloatType() && RType == Type::getFloatType()) {
        RValue = builder.createFtoIInst(RValue);
      }
+      // 如果两者都是同一类型，就不需要转换
    }
  }
  
@ -1691,8 +1692,10 @@ std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) {
        gepBasePointer = alloc;
        gepIndices.push_back(ConstantInteger::get(0));
        if (dims.empty() && declaredNumDims > 0) {
-          // 数组名单独出现（没有索引）：在SysY中，数组名应该退化为指向第一个元素的指针
-          // 需要添加额外的0索引来获取第一个元素的地址
+          // 数组名单独出现（没有索引）：在SysY中，多维数组名应该退化为指向第一行的指针
+          // 对于二维数组 T[M][N]，退化为 T(*)[N]，需要GEP: getelementptr T[M][N], T[M][N]* ptr, i32 0, i32 0
+          // 第一个i32 0: 选择数组本身，第二个i32 0: 选择第0行
+          // 结果类型: T[N]*
          gepIndices.push_back(ConstantInteger::get(0));
        } else {
          // 正常的数组元素访问
@ -1703,7 +1706,8 @@ std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) {
      gepBasePointer = glob;
      gepIndices.push_back(ConstantInteger::get(0));
      if (dims.empty() && declaredNumDims > 0) {
-        // 全局数组名单独出现（没有索引）：应该退化为指向第一个元素的指针
+        // 全局数组名单独出现（没有索引）：应该退化为指向第一行的指针
+        // 需要添加一个额外的i32 0索引
        gepIndices.push_back(ConstantInteger::get(0));
      } else {
        // 正常的数组元素访问
@ -1713,7 +1717,8 @@ std::any SysYIRGenerator::visitLValue(SysYParser::LValueContext *ctx) {
      gepBasePointer = constV;
      gepIndices.push_back(ConstantInteger::get(0));
      if (dims.empty() && declaredNumDims > 0) {
-        // 常量数组名单独出现（没有索引）：应该退化为指向第一个元素的指针
+        // 常量数组名单独出现（没有索引）：应该退化为指向第一行的指针
+        // 需要添加一个额外的i32 0索引
        gepIndices.push_back(ConstantInteger::get(0));
      } else {
        // 正常的数组元素访问
@ -1835,15 +1840,27 @@ std::any SysYIRGenerator::visitCall(SysYParser::CallContext *ctx) {
          } else if (formalParamExpectedValueType->isFloat() && actualArgType->isInt()) {
            args[i] = builder.createItoFInst(args[i]);
          }
-          // 2. 指针类型转换 (例如数组退化：`[N x T]*` 到 `T*`，或兼容指针类型之间) TODO：不清楚有没有这种样例
+          // 2. 指针类型转换 (例如数组退化：`[N x T]*` 到 `T*`，或兼容指针类型之间)
          // 这种情况常见于数组参数，实参可能是一个更具体的数组指针类型，
-          // 而形参是其退化后的基础指针类型。LLVM 的 `bitcast` 指令可以用于
-          // 在相同大小的指针类型之间进行转换，这对于数组退化至关重要。
-          // else if (formalParamType->isPointer() && actualArgType->isPointer()) {
-            // 检查指针基类型是否兼容，或者是否是数组退化导致的类型不同。
-            // 使用 bitcast，
-            // args[i] = builder.createBitCastInst(args[i], formalParamType);
-          // }
+          // 而形参是其退化后的基础指针类型。
+          else if (formalParamExpectedValueType->isPointer() && actualArgType->isPointer()) {
+            // 检查是否是数组指针到元素指针的decay
+            // 例如：[N x T]* -> T*
+            auto formalPtrType = formalParamExpectedValueType->as<PointerType>();
+            auto actualPtrType = actualArgType->as<PointerType>();
+            
+            if (formalPtrType && actualPtrType && actualPtrType->getBaseType()->isArray()) {
+              auto actualArrayType = actualPtrType->getBaseType()->as<ArrayType>();
+              if (actualArrayType && 
+                  formalPtrType->getBaseType() == actualArrayType->getElementType()) {
+                // 这是数组decay的情况，添加GEP来获取数组的第一个元素
+                std::vector<Value*> indices;
+                indices.push_back(ConstantInteger::get(0)); // 第一个索引：解引用指针
+                indices.push_back(ConstantInteger::get(0)); // 第二个索引：获取数组第一个元素
+                args[i] = getGEPAddressInst(args[i], indices);
+              }
+            }
+          }
          // 3. 其他未预期的类型不匹配
          // 如果代码执行到这里，说明存在编译器前端未处理的类型不兼容或错误。
          else {
@ -2227,15 +2244,23 @@ void Utils::createExternalFunction(
    const std::vector<std::string> &paramNames,
    const std::vector<std::vector<Value *>> &paramDims, Type *returnType,
    const std::string &funcName, Module *pModule, IRBuilder *pBuilder) {
-  auto funcType = Type::getFunctionType(returnType, paramTypes);
+  // 根据paramDims调整参数类型，数组参数需要转换为指针类型
+  std::vector<Type *> adjustedParamTypes = paramTypes;
+  for (int i = 0; i < paramTypes.size() && i < paramDims.size(); ++i) {
+    if (!paramDims[i].empty()) {
+      // 如果参数有维度信息，说明是数组参数，转换为指针类型
+      adjustedParamTypes[i] = Type::getPointerType(paramTypes[i]);
+    }
+  }
+  auto funcType = Type::getFunctionType(returnType, adjustedParamTypes);
  auto function = pModule->createExternalFunction(funcName, funcType);
  auto entry = function->getEntryBlock();
  pBuilder->setPosition(entry, entry->end());

  for (int i = 0; i < paramTypes.size(); ++i) {
-    auto arg = new Argument(paramTypes[i], function, i, paramNames[i]);
+    auto arg = new Argument(adjustedParamTypes[i], function, i, paramNames[i]);
    auto alloca = pBuilder->createAllocaInst(
-        Type::getPointerType(paramTypes[i]), paramNames[i]);
+        Type::getPointerType(adjustedParamTypes[i]), paramNames[i]);
    function->insertArgument(arg);
    auto store = pBuilder->createStoreInst(arg, alloca);
    pModule->addVariable(paramNames[i], alloca);