[backend]重构了后端

This commit is contained in:
Lixuanwang
2025-07-19 16:06:35 +08:00
parent 75e61bf274
commit d4a6996d74
10 changed files with 1336 additions and 1564 deletions

View File

@ -28,6 +28,9 @@ add_executable(sysyc
Mem2Reg.cpp
Reg2Mem.cpp
RISCv64Backend.cpp
RISCv64ISel.cpp
RISCv64RegAlloc.cpp
RISCv64AsmPrinter.cpp
)
# 设置 include 路径,包含 ANTLR 运行时库和项目头文件

246
src/RISCv64AsmPrinter.cpp Normal file
View File

@ -0,0 +1,246 @@
#include "RISCv64AsmPrinter.h"
#include <stdexcept>
namespace sysy {
void RISCv64AsmPrinter::runOnMachineFunction(MachineFunction* mfunc, std::ostream& os) {
OS = &os;
// 打印函数声明和全局符号
*OS << ".text\n";
*OS << ".globl " << mfunc->getName() << "\n";
*OS << mfunc->getName() << ":\n";
// 打印函数序言
printPrologue(mfunc);
// 遍历并打印所有基本块
for (auto& mbb : mfunc->getBlocks()) {
printBasicBlock(mbb.get());
}
}
void RISCv64AsmPrinter::printPrologue(MachineFunction* mfunc) {
int stack_size = mfunc->getFrameInfo().frame_size;
// 确保栈大小是16字节对齐
int aligned_stack_size = (stack_size + 15) & ~15;
if (aligned_stack_size > 0) {
*OS << " addi sp, sp, -" << aligned_stack_size << "\n";
// RV64中ra和s0都是8字节
*OS << " sd ra, " << (aligned_stack_size - 8) << "(sp)\n";
*OS << " sd s0, " << (aligned_stack_size - 16) << "(sp)\n";
*OS << " mv s0, sp\n";
}
}
void RISCv64AsmPrinter::printEpilogue(MachineFunction* mfunc) {
int stack_size = mfunc->getFrameInfo().frame_size;
int aligned_stack_size = (stack_size + 15) & ~15;
if (aligned_stack_size > 0) {
*OS << " ld ra, " << (aligned_stack_size - 8) << "(sp)\n";
*OS << " ld s0, " << (aligned_stack_size - 16) << "(sp)\n";
*OS << " addi sp, sp, " << aligned_stack_size << "\n";
}
}
void RISCv64AsmPrinter::printBasicBlock(MachineBasicBlock* mbb) {
// 打印基本块标签
if (!mbb->getName().empty()) {
*OS << mbb->getName() << ":\n";
}
// 打印指令
for (auto& instr : mbb->getInstructions()) {
printInstruction(instr.get(), mbb);
}
}
void RISCv64AsmPrinter::printInstruction(MachineInstr* instr, MachineBasicBlock* parent_bb) {
*OS << " "; // 指令缩进
auto opcode = instr->getOpcode();
// RET指令需要特殊处理在打印ret之前先打印函数尾声
if (opcode == RVOpcodes::RET) {
printEpilogue(parent_bb->getParent());
}
// 使用switch将Opcode转换为汇编助记符
switch (opcode) {
// Arithmatic
case RVOpcodes::ADD: *OS << "add "; break;
case RVOpcodes::ADDI: *OS << "addi "; break;
case RVOpcodes::ADDW: *OS << "addw "; break;
case RVOpcodes::ADDIW: *OS << "addiw "; break;
case RVOpcodes::SUB: *OS << "sub "; break;
case RVOpcodes::SUBW: *OS << "subw "; break;
case RVOpcodes::MUL: *OS << "mul "; break;
case RVOpcodes::MULW: *OS << "mulw "; break;
case RVOpcodes::DIV: *OS << "div "; break;
case RVOpcodes::DIVW: *OS << "divw "; break;
case RVOpcodes::REM: *OS << "rem "; break;
case RVOpcodes::REMW: *OS << "remw "; break;
// Logical
case RVOpcodes::XOR: *OS << "xor "; break;
case RVOpcodes::XORI: *OS << "xori "; break;
case RVOpcodes::OR: *OS << "or "; break;
case RVOpcodes::ORI: *OS << "ori "; break;
case RVOpcodes::AND: *OS << "and "; break;
case RVOpcodes::ANDI: *OS << "andi "; break;
// Shift
case RVOpcodes::SLL: *OS << "sll "; break;
case RVOpcodes::SLLI: *OS << "slli "; break;
case RVOpcodes::SLLW: *OS << "sllw "; break;
case RVOpcodes::SLLIW: *OS << "slliw "; break;
case RVOpcodes::SRL: *OS << "srl "; break;
case RVOpcodes::SRLI: *OS << "srli "; break;
case RVOpcodes::SRLW: *OS << "srlw "; break;
case RVOpcodes::SRLIW: *OS << "srliw "; break;
case RVOpcodes::SRA: *OS << "sra "; break;
case RVOpcodes::SRAI: *OS << "srai "; break;
case RVOpcodes::SRAW: *OS << "sraw "; break;
case RVOpcodes::SRAIW: *OS << "sraiw "; break;
// Compare
case RVOpcodes::SLT: *OS << "slt "; break;
case RVOpcodes::SLTI: *OS << "slti "; break;
case RVOpcodes::SLTU: *OS << "sltu "; break;
case RVOpcodes::SLTIU: *OS << "sltiu "; break;
// Memory
case RVOpcodes::LW: *OS << "lw "; break;
case RVOpcodes::LH: *OS << "lh "; break;
case RVOpcodes::LB: *OS << "lb "; break;
case RVOpcodes::LWU: *OS << "lwu "; break;
case RVOpcodes::LHU: *OS << "lhu "; break;
case RVOpcodes::LBU: *OS << "lbu "; break;
case RVOpcodes::SW: *OS << "sw "; break;
case RVOpcodes::SH: *OS << "sh "; break;
case RVOpcodes::SB: *OS << "sb "; break;
case RVOpcodes::LD: *OS << "ld "; break;
case RVOpcodes::SD: *OS << "sd "; break;
// Control Flow
case RVOpcodes::J: *OS << "j "; break;
case RVOpcodes::JAL: *OS << "jal "; break;
case RVOpcodes::JALR: *OS << "jalr "; break;
case RVOpcodes::RET: *OS << "ret"; break;
case RVOpcodes::BEQ: *OS << "beq "; break;
case RVOpcodes::BNE: *OS << "bne "; break;
case RVOpcodes::BLT: *OS << "blt "; break;
case RVOpcodes::BGE: *OS << "bge "; break;
case RVOpcodes::BLTU: *OS << "bltu "; break;
case RVOpcodes::BGEU: *OS << "bgeu "; break;
// Pseudo-Instructions
case RVOpcodes::LI: *OS << "li "; break;
case RVOpcodes::LA: *OS << "la "; break;
case RVOpcodes::MV: *OS << "mv "; break;
case RVOpcodes::NEG: *OS << "neg "; break;
case RVOpcodes::NEGW: *OS << "negw "; break;
case RVOpcodes::SEQZ: *OS << "seqz "; break;
case RVOpcodes::SNEZ: *OS << "snez "; break;
// Call
case RVOpcodes::CALL: *OS << "call "; break;
// Special
case RVOpcodes::LABEL:
*OS << "\b\b\b\b";
printOperand(instr->getOperands()[0].get());
*OS << ":";
break;
default:
throw std::runtime_error("Unknown opcode in AsmPrinter");
}
// 打印操作数
const auto& operands = instr->getOperands();
for (size_t i = 0; i < operands.size(); ++i) {
// 对于LW/SW, 操作数格式是 rd, offset(rs1)
if (opcode == RVOpcodes::LW || opcode == RVOpcodes::SW || opcode == RVOpcodes::LD || opcode == RVOpcodes::SD) {
printOperand(operands[0].get());
*OS << ", ";
printOperand(operands[1].get());
break; // LW/SW只有两个操作数部分
}
printOperand(operands[i].get());
if (i < operands.size() - 1) {
*OS << ", ";
}
}
*OS << "\n";
}
void RISCv64AsmPrinter::printOperand(MachineOperand* op) {
if (!op) return;
switch(op->getKind()) {
case MachineOperand::KIND_REG: {
auto reg_op = static_cast<RegOperand*>(op);
if (reg_op->isVirtual()) {
// 在这个阶段不应该再有虚拟寄存器了
*OS << "%vreg" << reg_op->getVRegNum();
} else {
*OS << regToString(reg_op->getPReg());
}
break;
}
case MachineOperand::KIND_IMM: {
*OS << static_cast<ImmOperand*>(op)->getValue();
break;
}
case MachineOperand::KIND_LABEL: {
*OS << static_cast<LabelOperand*>(op)->getName();
break;
}
case MachineOperand::KIND_MEM: {
auto mem_op = static_cast<MemOperand*>(op);
printOperand(mem_op->getOffset());
*OS << "(";
printOperand(mem_op->getBase());
*OS << ")";
break;
}
}
}
// 物理寄存器到字符串的转换 (从原RISCv64Backend.cpp迁移)
std::string RISCv64AsmPrinter::regToString(PhysicalReg reg) {
switch (reg) {
case PhysicalReg::ZERO: return "x0";
case PhysicalReg::RA: return "ra";
case PhysicalReg::SP: return "sp";
case PhysicalReg::GP: return "gp";
case PhysicalReg::TP: return "tp";
case PhysicalReg::T0: return "t0";
case PhysicalReg::T1: return "t1";
case PhysicalReg::T2: return "t2";
case PhysicalReg::S0: return "s0";
case PhysicalReg::S1: return "s1";
case PhysicalReg::A0: return "a0";
case PhysicalReg::A1: return "a1";
case PhysicalReg::A2: return "a2";
case PhysicalReg::A3: return "a3";
case PhysicalReg::A4: return "a4";
case PhysicalReg::A5: return "a5";
case PhysicalReg::A6: return "a6";
case PhysicalReg::A7: return "a7";
case PhysicalReg::S2: return "s2";
case PhysicalReg::S3: return "s3";
case PhysicalReg::S4: return "s4";
case PhysicalReg::S5: return "s5";
case PhysicalReg::S6: return "s6";
case PhysicalReg::S7: return "s7";
case PhysicalReg::S8: return "s8";
case PhysicalReg::S9: return "s9";
case PhysicalReg::S10: return "s10";
case PhysicalReg::S11: return "s11";
case PhysicalReg::T3: return "t3";
case PhysicalReg::T4: return "t4";
case PhysicalReg::T5: return "t5";
case PhysicalReg::T6: return "t6";
default: return "UNKNOWN_REG";
}
}
} // namespace sysy

File diff suppressed because it is too large Load Diff

605
src/RISCv64ISel.cpp Normal file
View File

@ -0,0 +1,605 @@
#include "RISCv64ISel.h"
#include <stdexcept>
#include <iostream>
#include <functional>
#include <set>
namespace sysy {
RISCv64ISel::RISCv64ISel() : vreg_counter(0), local_label_counter(0) {}
// 为一个IR Value获取或分配一个新的虚拟寄存器
unsigned RISCv64ISel::getVReg(Value* val) {
if (!val) { // 安全检查
throw std::runtime_error("Cannot get vreg for a null Value.");
}
if (vreg_map.find(val) == vreg_map.end()) {
if (vreg_counter == 0) {
// vreg 0 通常保留给物理寄存器x0(zero)我们从1开始分配
vreg_counter = 1;
}
vreg_map[val] = vreg_counter++;
}
return vreg_map.at(val);
}
// 主入口函数
std::unique_ptr<MachineFunction> RISCv64ISel::runOnFunction(Function* func) {
F = func;
if (!F) return nullptr;
MFunc = std::make_unique<MachineFunction>(F->getName());
vreg_map.clear();
bb_map.clear();
vreg_counter = 0;
local_label_counter = 0;
select();
return std::move(MFunc);
}
// 指令选择主流程
void RISCv64ISel::select() {
// 1. 为所有基本块创建对应的MachineBasicBlock
for (const auto& bb_ptr : F->getBasicBlocks()) {
BasicBlock* bb = bb_ptr.get();
auto mbb = std::make_unique<MachineBasicBlock>(bb->getName(), MFunc.get());
bb_map[bb] = mbb.get();
MFunc->addBlock(std::move(mbb));
}
// 2. 为函数参数创建虚拟寄存器
// ====================== 已修正 ======================
// 根据 IR.h, 参数列表存储在入口基本块中
if (F->getEntryBlock()) {
for (auto* arg_alloca : F->getEntryBlock()->getArguments()) {
getVReg(arg_alloca);
}
}
// =====================================================
// 3. 遍历每个基本块,生成指令
for (const auto& bb_ptr : F->getBasicBlocks()) {
selectBasicBlock(bb_ptr.get());
}
// 4. 设置基本块的前驱后继关系
for (const auto& bb_ptr : F->getBasicBlocks()) {
BasicBlock* bb = bb_ptr.get();
CurMBB = bb_map.at(bb);
for (auto succ : bb->getSuccessors()) {
CurMBB->successors.push_back(bb_map.at(succ));
}
for (auto pred : bb->getPredecessors()) {
CurMBB->predecessors.push_back(bb_map.at(pred));
}
}
}
// 处理单个基本块
void RISCv64ISel::selectBasicBlock(BasicBlock* bb) {
CurMBB = bb_map.at(bb);
auto dag = build_dag(bb);
std::map<Value*, DAGNode*> value_to_node;
for(const auto& node : dag) {
if (node->value) {
value_to_node[node->value] = node.get();
}
}
std::set<DAGNode*> selected_nodes;
std::function<void(DAGNode*)> select_recursive =
[&](DAGNode* node) {
if (!node || selected_nodes.count(node)) return;
for (auto operand : node->operands) {
select_recursive(operand);
}
// 只有当所有操作数都选择完毕后,才选择当前节点
selectNode(node);
selected_nodes.insert(node);
};
// 按照IR指令的原始顺序来驱动指令选择
for (const auto& inst_ptr : bb->getInstructions()) {
DAGNode* node_to_select = nullptr;
// 查找当前IR指令对应的DAG节点
if (value_to_node.count(inst_ptr.get())) {
node_to_select = value_to_node.at(inst_ptr.get());
} else {
// 对于没有返回值的指令或某些特殊情况
for(const auto& node : dag) {
if(node->value == inst_ptr.get()) {
node_to_select = node.get();
break;
}
}
}
if(node_to_select) {
select_recursive(node_to_select);
}
}
}
void RISCv64ISel::selectNode(DAGNode* node) {
// 注意不再生成字符串而是创建MachineInstr对象并加入到CurMBB
switch (node->kind) {
case DAGNode::CONSTANT:
case DAGNode::ALLOCA_ADDR:
// 这些节点本身不生成指令。使用它们的指令会按需处理。
// 为Alloca地址分配一个vreg是必要的代表地址。
if (node->value) getVReg(node->value);
break;
case DAGNode::LOAD: {
// lw rd, offset(base)
auto dest_vreg = getVReg(node->value);
auto ptr_vreg = getVReg(node->operands[0]->value);
auto instr = std::make_unique<MachineInstr>(RVOpcodes::LW);
instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
// 暂时生成0(ptr)后续pass会将其优化为 offset(s0)
instr->addOperand(std::make_unique<MemOperand>(
std::make_unique<RegOperand>(ptr_vreg),
std::make_unique<ImmOperand>(0)
));
CurMBB->addInstruction(std::move(instr));
break;
}
case DAGNode::STORE: {
// sw rs2, offset(rs1)
// 先加载常量
if (auto val_const = dynamic_cast<ConstantValue*>(node->operands[0]->value)) {
auto li = std::make_unique<MachineInstr>(RVOpcodes::LI);
li->addOperand(std::make_unique<RegOperand>(getVReg(val_const)));
li->addOperand(std::make_unique<ImmOperand>(val_const->getInt()));
CurMBB->addInstruction(std::move(li));
}
auto val_vreg = getVReg(node->operands[0]->value);
auto ptr_vreg = getVReg(node->operands[1]->value);
auto instr = std::make_unique<MachineInstr>(RVOpcodes::SW);
instr->addOperand(std::make_unique<RegOperand>(val_vreg)); // value to store
instr->addOperand(std::make_unique<MemOperand>(
std::make_unique<RegOperand>(ptr_vreg), // base address
std::make_unique<ImmOperand>(0) // offset
));
CurMBB->addInstruction(std::move(instr));
break;
}
case DAGNode::BINARY: {
auto bin = dynamic_cast<BinaryInst*>(node->value);
if (!bin) break;
Value* lhs = bin->getLhs();
Value* rhs = bin->getRhs();
// 检查是否为 addi 优化
if (bin->getKind() == BinaryInst::kAdd) {
if (auto rhs_const = dynamic_cast<ConstantValue*>(rhs)) {
if (rhs_const->getInt() >= -2048 && rhs_const->getInt() < 2048) {
auto instr = std::make_unique<MachineInstr>(RVOpcodes::ADDIW);
instr->addOperand(std::make_unique<RegOperand>(getVReg(bin)));
instr->addOperand(std::make_unique<RegOperand>(getVReg(lhs)));
instr->addOperand(std::make_unique<ImmOperand>(rhs_const->getInt()));
CurMBB->addInstruction(std::move(instr));
return; // 指令已生成,提前返回
}
}
}
// 为操作数加载立即数或地址
auto load_val_if_const = [&](Value* val) {
if (auto c = dynamic_cast<ConstantValue*>(val)) {
auto li = std::make_unique<MachineInstr>(RVOpcodes::LI);
li->addOperand(std::make_unique<RegOperand>(getVReg(c)));
li->addOperand(std::make_unique<ImmOperand>(c->getInt()));
CurMBB->addInstruction(std::move(li));
} else if (auto g = dynamic_cast<GlobalValue*>(val)) {
auto la = std::make_unique<MachineInstr>(RVOpcodes::LA);
la->addOperand(std::make_unique<RegOperand>(getVReg(g)));
la->addOperand(std::make_unique<LabelOperand>(g->getName()));
CurMBB->addInstruction(std::move(la));
}
};
load_val_if_const(lhs);
load_val_if_const(rhs);
auto dest_vreg = getVReg(bin);
auto lhs_vreg = getVReg(lhs);
auto rhs_vreg = getVReg(rhs);
// 生成二元运算指令
switch (bin->getKind()) {
case BinaryInst::kAdd: {
RVOpcodes opcode = (lhs->getType()->isPointer() || rhs->getType()->isPointer()) ? RVOpcodes::ADD : RVOpcodes::ADDW;
auto instr = std::make_unique<MachineInstr>(opcode);
instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
instr->addOperand(std::make_unique<RegOperand>(lhs_vreg));
instr->addOperand(std::make_unique<RegOperand>(rhs_vreg));
CurMBB->addInstruction(std::move(instr));
break;
}
case BinaryInst::kSub: {
auto instr = std::make_unique<MachineInstr>(RVOpcodes::SUBW);
instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
instr->addOperand(std::make_unique<RegOperand>(lhs_vreg));
instr->addOperand(std::make_unique<RegOperand>(rhs_vreg));
CurMBB->addInstruction(std::move(instr));
break;
}
case BinaryInst::kMul: {
auto instr = std::make_unique<MachineInstr>(RVOpcodes::MULW);
instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
instr->addOperand(std::make_unique<RegOperand>(lhs_vreg));
instr->addOperand(std::make_unique<RegOperand>(rhs_vreg));
CurMBB->addInstruction(std::move(instr));
break;
}
case Instruction::kDiv: {
auto instr = std::make_unique<MachineInstr>(RVOpcodes::DIVW);
instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
instr->addOperand(std::make_unique<RegOperand>(lhs_vreg));
instr->addOperand(std::make_unique<RegOperand>(rhs_vreg));
CurMBB->addInstruction(std::move(instr));
break;
}
case Instruction::kRem: {
auto instr = std::make_unique<MachineInstr>(RVOpcodes::REMW);
instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
instr->addOperand(std::make_unique<RegOperand>(lhs_vreg));
instr->addOperand(std::make_unique<RegOperand>(rhs_vreg));
CurMBB->addInstruction(std::move(instr));
break;
}
case BinaryInst::kICmpEQ: {
auto sub = std::make_unique<MachineInstr>(RVOpcodes::SUBW);
sub->addOperand(std::make_unique<RegOperand>(dest_vreg));
sub->addOperand(std::make_unique<RegOperand>(lhs_vreg));
sub->addOperand(std::make_unique<RegOperand>(rhs_vreg));
CurMBB->addInstruction(std::move(sub));
auto seqz = std::make_unique<MachineInstr>(RVOpcodes::SEQZ);
seqz->addOperand(std::make_unique<RegOperand>(dest_vreg));
seqz->addOperand(std::make_unique<RegOperand>(dest_vreg));
CurMBB->addInstruction(std::move(seqz));
break;
}
case BinaryInst::kICmpNE: {
auto sub = std::make_unique<MachineInstr>(RVOpcodes::SUBW);
sub->addOperand(std::make_unique<RegOperand>(dest_vreg));
sub->addOperand(std::make_unique<RegOperand>(lhs_vreg));
sub->addOperand(std::make_unique<RegOperand>(rhs_vreg));
CurMBB->addInstruction(std::move(sub));
auto snez = std::make_unique<MachineInstr>(RVOpcodes::SNEZ);
snez->addOperand(std::make_unique<RegOperand>(dest_vreg));
snez->addOperand(std::make_unique<RegOperand>(dest_vreg));
CurMBB->addInstruction(std::move(snez));
break;
}
case BinaryInst::kICmpLT: {
auto instr = std::make_unique<MachineInstr>(RVOpcodes::SLT);
instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
instr->addOperand(std::make_unique<RegOperand>(lhs_vreg));
instr->addOperand(std::make_unique<RegOperand>(rhs_vreg));
CurMBB->addInstruction(std::move(instr));
break;
}
case BinaryInst::kICmpGT: {
auto instr = std::make_unique<MachineInstr>(RVOpcodes::SLT);
instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
instr->addOperand(std::make_unique<RegOperand>(rhs_vreg)); // Swapped
instr->addOperand(std::make_unique<RegOperand>(lhs_vreg)); // Swapped
CurMBB->addInstruction(std::move(instr));
break;
}
case BinaryInst::kICmpLE: {
auto slt = std::make_unique<MachineInstr>(RVOpcodes::SLT);
slt->addOperand(std::make_unique<RegOperand>(dest_vreg));
slt->addOperand(std::make_unique<RegOperand>(rhs_vreg)); // Swapped
slt->addOperand(std::make_unique<RegOperand>(lhs_vreg)); // Swapped
CurMBB->addInstruction(std::move(slt));
auto xori = std::make_unique<MachineInstr>(RVOpcodes::XORI);
xori->addOperand(std::make_unique<RegOperand>(dest_vreg));
xori->addOperand(std::make_unique<RegOperand>(dest_vreg));
xori->addOperand(std::make_unique<ImmOperand>(1));
CurMBB->addInstruction(std::move(xori));
break;
}
case BinaryInst::kICmpGE: {
auto slt = std::make_unique<MachineInstr>(RVOpcodes::SLT);
slt->addOperand(std::make_unique<RegOperand>(dest_vreg));
slt->addOperand(std::make_unique<RegOperand>(lhs_vreg));
slt->addOperand(std::make_unique<RegOperand>(rhs_vreg));
CurMBB->addInstruction(std::move(slt));
auto xori = std::make_unique<MachineInstr>(RVOpcodes::XORI);
xori->addOperand(std::make_unique<RegOperand>(dest_vreg));
xori->addOperand(std::make_unique<RegOperand>(dest_vreg));
xori->addOperand(std::make_unique<ImmOperand>(1));
CurMBB->addInstruction(std::move(xori));
break;
}
default:
throw std::runtime_error("Unsupported binary instruction in ISel");
}
break;
}
case DAGNode::UNARY: {
auto unary = dynamic_cast<UnaryInst*>(node->value);
if (!unary) break;
auto dest_vreg = getVReg(unary);
auto src_vreg = getVReg(unary->getOperand());
switch (unary->getKind()) {
case UnaryInst::kNeg: {
auto instr = std::make_unique<MachineInstr>(RVOpcodes::SUBW);
instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
instr->addOperand(std::make_unique<RegOperand>(PhysicalReg::ZERO)); // x0
instr->addOperand(std::make_unique<RegOperand>(src_vreg));
CurMBB->addInstruction(std::move(instr));
break;
}
case UnaryInst::kNot: {
auto instr = std::make_unique<MachineInstr>(RVOpcodes::SEQZ);
instr->addOperand(std::make_unique<RegOperand>(dest_vreg));
instr->addOperand(std::make_unique<RegOperand>(src_vreg));
CurMBB->addInstruction(std::move(instr));
break;
}
default:
throw std::runtime_error("Unsupported unary instruction in ISel");
}
break;
}
case DAGNode::CALL: {
auto call = dynamic_cast<CallInst*>(node->value);
if (!call) break;
// 在此阶段,我们只处理函数调用本身和返回值的移动
// 参数的传递将在一个专门的 Calling Convention Pass 中处理
auto call_instr = std::make_unique<MachineInstr>(RVOpcodes::CALL);
call_instr->addOperand(std::make_unique<LabelOperand>(call->getCallee()->getName()));
CurMBB->addInstruction(std::move(call_instr));
if (!call->getType()->isVoid()) {
auto mv_instr = std::make_unique<MachineInstr>(RVOpcodes::MV);
mv_instr->addOperand(std::make_unique<RegOperand>(getVReg(call))); // dest
mv_instr->addOperand(std::make_unique<RegOperand>(PhysicalReg::A0)); // src
CurMBB->addInstruction(std::move(mv_instr));
}
break;
}
case DAGNode::RETURN: {
auto ret_inst = dynamic_cast<ReturnInst*>(node->value);
if (ret_inst && ret_inst->hasReturnValue()) {
// 如果有返回值生成一条mv指令将其放入a0
auto mv_instr = std::make_unique<MachineInstr>(RVOpcodes::MV);
mv_instr->addOperand(std::make_unique<RegOperand>(PhysicalReg::A0));
mv_instr->addOperand(std::make_unique<RegOperand>(getVReg(ret_inst->getReturnValue())));
CurMBB->addInstruction(std::move(mv_instr));
}
// 生成ret伪指令
auto instr = std::make_unique<MachineInstr>(RVOpcodes::RET);
CurMBB->addInstruction(std::move(instr));
break;
}
case DAGNode::BRANCH: {
if (auto cond_br = dynamic_cast<CondBrInst*>(node->value)) {
// bne cond, x0, then_block
auto br_instr = std::make_unique<MachineInstr>(RVOpcodes::BNE);
br_instr->addOperand(std::make_unique<RegOperand>(getVReg(cond_br->getCondition())));
br_instr->addOperand(std::make_unique<RegOperand>(PhysicalReg::ZERO));
br_instr->addOperand(std::make_unique<LabelOperand>(cond_br->getThenBlock()->getName()));
CurMBB->addInstruction(std::move(br_instr));
// j else_block
// 注意这里会产生一个fallthrough问题后续的分支优化pass会解决它
// 一个更健壮的生成方式是 bne -> j else; then: ...; else: ...
} else if (auto uncond_br = dynamic_cast<UncondBrInst*>(node->value)) {
auto j_instr = std::make_unique<MachineInstr>(RVOpcodes::J);
j_instr->addOperand(std::make_unique<LabelOperand>(uncond_br->getBlock()->getName()));
CurMBB->addInstruction(std::move(j_instr));
}
break;
}
case DAGNode::MEMSET: {
// 这是对原memset逻辑的完整LLIR翻译
auto memset = dynamic_cast<MemsetInst*>(node->value);
if (!memset) break;
auto r_dest_addr = getVReg(memset->getPointer());
auto r_num_bytes = getVReg(memset->getSize());
auto r_value_byte = getVReg(memset->getValue());
// 为临时值创建虚拟寄存器
auto r_counter = vreg_counter++;
auto r_end_addr = vreg_counter++;
auto r_current_addr = vreg_counter++;
auto r_temp_val = vreg_counter++;
auto add_instr = [&](RVOpcodes op, unsigned rd, unsigned rs1, unsigned rs2) {
auto i = std::make_unique<MachineInstr>(op);
i->addOperand(std::make_unique<RegOperand>(rd));
i->addOperand(std::make_unique<RegOperand>(rs1));
i->addOperand(std::make_unique<RegOperand>(rs2));
CurMBB->addInstruction(std::move(i));
};
auto addi_instr = [&](RVOpcodes op, unsigned rd, unsigned rs1, int64_t imm) {
auto i = std::make_unique<MachineInstr>(op);
i->addOperand(std::make_unique<RegOperand>(rd));
i->addOperand(std::make_unique<RegOperand>(rs1));
i->addOperand(std::make_unique<ImmOperand>(imm));
CurMBB->addInstruction(std::move(i));
};
auto store_instr = [&](RVOpcodes op, unsigned src, unsigned base, int64_t off) {
auto i = std::make_unique<MachineInstr>(op);
i->addOperand(std::make_unique<RegOperand>(src));
i->addOperand(std::make_unique<MemOperand>(std::make_unique<RegOperand>(base), std::make_unique<ImmOperand>(off)));
CurMBB->addInstruction(std::move(i));
};
auto branch_instr = [&](RVOpcodes op, unsigned rs1, unsigned rs2, const std::string& label) {
auto i = std::make_unique<MachineInstr>(op);
i->addOperand(std::make_unique<RegOperand>(rs1));
i->addOperand(std::make_unique<RegOperand>(rs2));
i->addOperand(std::make_unique<LabelOperand>(label));
CurMBB->addInstruction(std::move(i));
};
auto jump_instr = [&](const std::string& label) {
auto i = std::make_unique<MachineInstr>(RVOpcodes::J);
i->addOperand(std::make_unique<LabelOperand>(label));
CurMBB->addInstruction(std::move(i));
};
auto label_instr = [&](const std::string& name) {
auto i = std::make_unique<MachineInstr>(RVOpcodes::LABEL);
i->addOperand(std::make_unique<LabelOperand>(name));
CurMBB->addInstruction(std::move(i));
};
int unique_id = this->local_label_counter++;
std::string loop_start_label = "memset_loop_start_" + std::to_string(unique_id);
std::string loop_end_label = "memset_loop_end_" + std::to_string(unique_id);
std::string remainder_label = "memset_remainder_" + std::to_string(unique_id);
std::string done_label = "memset_done_" + std::to_string(unique_id);
// 构造64位的填充值
addi_instr(RVOpcodes::ANDI, r_temp_val, r_value_byte, 255);
addi_instr(RVOpcodes::SLLI, r_value_byte, r_temp_val, 8);
add_instr(RVOpcodes::OR, r_temp_val, r_temp_val, r_value_byte);
addi_instr(RVOpcodes::SLLI, r_value_byte, r_temp_val, 16);
add_instr(RVOpcodes::OR, r_temp_val, r_temp_val, r_value_byte);
addi_instr(RVOpcodes::SLLI, r_value_byte, r_temp_val, 32);
add_instr(RVOpcodes::OR, r_temp_val, r_temp_val, r_value_byte);
// 设置循环变量
add_instr(RVOpcodes::ADD, r_end_addr, r_dest_addr, r_num_bytes);
auto mv = std::make_unique<MachineInstr>(RVOpcodes::MV);
mv->addOperand(std::make_unique<RegOperand>(r_current_addr));
mv->addOperand(std::make_unique<RegOperand>(r_dest_addr));
CurMBB->addInstruction(std::move(mv));
addi_instr(RVOpcodes::ANDI, r_counter, r_num_bytes, -8);
add_instr(RVOpcodes::ADD, r_counter, r_dest_addr, r_counter);
// 64位写入循环
label_instr(loop_start_label);
branch_instr(RVOpcodes::BGEU, r_current_addr, r_counter, loop_end_label);
store_instr(RVOpcodes::SD, r_temp_val, r_current_addr, 0);
addi_instr(RVOpcodes::ADDI, r_current_addr, r_current_addr, 8);
jump_instr(loop_start_label);
label_instr(loop_end_label);
// 剩余字节写入循环
label_instr(remainder_label);
branch_instr(RVOpcodes::BGEU, r_current_addr, r_end_addr, done_label);
store_instr(RVOpcodes::SB, r_temp_val, r_current_addr, 0);
addi_instr(RVOpcodes::ADDI, r_current_addr, r_current_addr, 1);
jump_instr(remainder_label);
label_instr(done_label);
break;
}
default:
throw std::runtime_error("Unsupported DAGNode kind in ISel: " + std::to_string(node->kind));
}
}
// --- DAG构建函数 (从原RISCv64Backend.cpp几乎原样迁移, 保持不变) ---
RISCv64ISel::DAGNode* RISCv64ISel::create_node(DAGNode::NodeKind kind, Value* val, std::map<Value*, DAGNode*>& value_to_node, std::vector<std::unique_ptr<DAGNode>>& nodes_storage) {
if (val && value_to_node.count(val) && kind != DAGNode::STORE && kind != DAGNode::RETURN && kind != DAGNode::BRANCH && kind != DAGNode::MEMSET) {
return value_to_node[val];
}
auto node = std::make_unique<DAGNode>(kind);
node->value = val;
DAGNode* raw_node_ptr = node.get();
nodes_storage.push_back(std::move(node));
// 只有产生值的节点才应该被记录,以备复用
if (val && !val->getType()->isVoid() && dynamic_cast<Instruction*>(val)) {
value_to_node[val] = raw_node_ptr;
} else if (val && dynamic_cast<GlobalValue*>(val)) {
value_to_node[val] = raw_node_ptr;
}
return raw_node_ptr;
}
RISCv64ISel::DAGNode* RISCv64ISel::get_operand_node(Value* val_ir, std::map<Value*, DAGNode*>& value_to_node, std::vector<std::unique_ptr<DAGNode>>& nodes_storage) {
if (value_to_node.count(val_ir)) {
return value_to_node[val_ir];
} else if (dynamic_cast<ConstantValue*>(val_ir)) {
return create_node(DAGNode::CONSTANT, val_ir, value_to_node, nodes_storage);
} else if (dynamic_cast<GlobalValue*>(val_ir)) {
return create_node(DAGNode::CONSTANT, val_ir, value_to_node, nodes_storage);
} else if (dynamic_cast<AllocaInst*>(val_ir)) {
return create_node(DAGNode::ALLOCA_ADDR, val_ir, value_to_node, nodes_storage);
}
// Fallback: Assume it needs to be loaded if not found (might be a parameter or a value from another block)
return create_node(DAGNode::LOAD, val_ir, value_to_node, nodes_storage);
}
std::vector<std::unique_ptr<RISCv64ISel::DAGNode>> RISCv64ISel::build_dag(BasicBlock* bb) {
std::vector<std::unique_ptr<DAGNode>> nodes_storage;
std::map<Value*, DAGNode*> value_to_node;
for (const auto& inst_ptr : bb->getInstructions()) {
Instruction* inst = inst_ptr.get();
if (auto alloca = dynamic_cast<AllocaInst*>(inst)) {
create_node(DAGNode::ALLOCA_ADDR, alloca, value_to_node, nodes_storage);
} else if (auto store = dynamic_cast<StoreInst*>(inst)) {
auto store_node = create_node(DAGNode::STORE, store, value_to_node, nodes_storage);
store_node->operands.push_back(get_operand_node(store->getValue(), value_to_node, nodes_storage));
store_node->operands.push_back(get_operand_node(store->getPointer(), value_to_node, nodes_storage));
} else if (auto memset = dynamic_cast<MemsetInst*>(inst)) {
auto memset_node = create_node(DAGNode::MEMSET, memset, value_to_node, nodes_storage);
memset_node->operands.push_back(get_operand_node(memset->getPointer(), value_to_node, nodes_storage));
memset_node->operands.push_back(get_operand_node(memset->getBegin(), value_to_node, nodes_storage));
memset_node->operands.push_back(get_operand_node(memset->getSize(), value_to_node, nodes_storage));
memset_node->operands.push_back(get_operand_node(memset->getValue(), value_to_node, nodes_storage));
}
else if (auto load = dynamic_cast<LoadInst*>(inst)) {
auto load_node = create_node(DAGNode::LOAD, load, value_to_node, nodes_storage);
load_node->operands.push_back(get_operand_node(load->getPointer(), value_to_node, nodes_storage));
} else if (auto bin = dynamic_cast<BinaryInst*>(inst)) {
if(value_to_node.count(bin)) continue;
auto bin_node = create_node(DAGNode::BINARY, bin, value_to_node, nodes_storage);
bin_node->operands.push_back(get_operand_node(bin->getLhs(), value_to_node, nodes_storage));
bin_node->operands.push_back(get_operand_node(bin->getRhs(), value_to_node, nodes_storage));
} else if (auto un = dynamic_cast<UnaryInst*>(inst)) {
if(value_to_node.count(un)) continue;
auto unary_node = create_node(DAGNode::UNARY, un, value_to_node, nodes_storage);
unary_node->operands.push_back(get_operand_node(un->getOperand(), value_to_node, nodes_storage));
}
else if (auto call = dynamic_cast<CallInst*>(inst)) {
if(value_to_node.count(call)) continue;
auto call_node = create_node(DAGNode::CALL, call, value_to_node, nodes_storage);
for (auto arg : call->getArguments()) {
call_node->operands.push_back(get_operand_node(arg->getValue(), value_to_node, nodes_storage));
}
} else if (auto ret = dynamic_cast<ReturnInst*>(inst)) {
auto ret_node = create_node(DAGNode::RETURN, ret, value_to_node, nodes_storage);
if (ret->hasReturnValue()) {
ret_node->operands.push_back(get_operand_node(ret->getReturnValue(), value_to_node, nodes_storage));
}
} else if (auto cond_br = dynamic_cast<CondBrInst*>(inst)) {
auto br_node = create_node(DAGNode::BRANCH, cond_br, value_to_node, nodes_storage);
br_node->operands.push_back(get_operand_node(cond_br->getCondition(), value_to_node, nodes_storage));
} else if (auto uncond_br = dynamic_cast<UncondBrInst*>(inst)) {
create_node(DAGNode::BRANCH, uncond_br, value_to_node, nodes_storage);
}
}
return nodes_storage;
}
} // namespace sysy

265
src/RISCv64RegAlloc.cpp Normal file
View File

@ -0,0 +1,265 @@
#include "RISCv64RegAlloc.h"
#include <algorithm>
#include <vector>
namespace sysy {
RISCv64RegAlloc::RISCv64RegAlloc(MachineFunction* mfunc) : MFunc(mfunc) {
// 初始化可分配的整数寄存器池 (排除特殊用途的)
allocable_int_regs = {
PhysicalReg::T0, PhysicalReg::T1, PhysicalReg::T2, PhysicalReg::T3,
PhysicalReg::T4, PhysicalReg::T5, PhysicalReg::T6,
PhysicalReg::A0, PhysicalReg::A1, PhysicalReg::A2, PhysicalReg::A3,
PhysicalReg::A4, PhysicalReg::A5, PhysicalReg::A6, PhysicalReg::A7,
PhysicalReg::S0, PhysicalReg::S1, PhysicalReg::S2, PhysicalReg::S3,
PhysicalReg::S4, PhysicalReg::S5, PhysicalReg::S6, PhysicalReg::S7,
PhysicalReg::S8, PhysicalReg::S9, PhysicalReg::S10, PhysicalReg::S11,
};
}
void RISCv64RegAlloc::run() {
analyzeLiveness();
buildInterferenceGraph();
colorGraph();
rewriteFunction();
}
void RISCv64RegAlloc::getInstrUseDef(MachineInstr* instr, LiveSet& use, LiveSet& def) {
// 这是一个简化的版本实际需要根据RVOpcodes精确定义
// 通常第一个RegOperand是def其余是use
bool is_def = true;
for (const auto& op : instr->getOperands()) {
if (op->getKind() == MachineOperand::KIND_REG) {
auto reg_op = static_cast<RegOperand*>(op.get());
if (reg_op->isVirtual()) {
if (is_def) {
def.insert(reg_op->getVRegNum());
is_def = false; // 假设每条指令最多一个def
} else {
use.insert(reg_op->getVRegNum());
}
}
} else if (op->getKind() == MachineOperand::KIND_MEM) {
auto mem_op = static_cast<MemOperand*>(op.get());
if (mem_op->getBase()->isVirtual()) {
use.insert(mem_op->getBase()->getVRegNum());
}
}
}
// 特殊处理store和branch指令它们没有显式的def
auto opcode = instr->getOpcode();
if (opcode == RVOpcodes::SW || opcode == RVOpcodes::SD || opcode == RVOpcodes::BNE || opcode == RVOpcodes::BEQ) {
def.clear(); // 清空错误的def
use.clear();
for (const auto& op : instr->getOperands()) {
if (op->getKind() == MachineOperand::KIND_REG) {
auto reg_op = static_cast<RegOperand*>(op.get());
if(reg_op->isVirtual()) use.insert(reg_op->getVRegNum());
} else if (op->getKind() == MachineOperand::KIND_MEM) {
auto mem_op = static_cast<MemOperand*>(op.get());
if(mem_op->getBase()->isVirtual()) use.insert(mem_op->getBase()->getVRegNum());
}
}
}
}
void RISCv64RegAlloc::analyzeLiveness() {
bool changed = true;
while (changed) {
changed = false;
// 逆序遍历基本块
for (auto it = MFunc->getBlocks().rbegin(); it != MFunc->getBlocks().rend(); ++it) {
auto& mbb = *it;
LiveSet live_out;
for (auto succ : mbb->successors) {
// live_out[B] = Union(live_in[S]) for all S in succ(B)
if (!succ->getInstructions().empty()) {
auto first_instr = succ->getInstructions().front().get();
if (live_in_map.count(first_instr)) {
live_out.insert(live_in_map.at(first_instr).begin(), live_in_map.at(first_instr).end());
}
}
}
// 逆序遍历指令
for (auto instr_it = mbb->getInstructions().rbegin(); instr_it != mbb->getInstructions().rend(); ++instr_it) {
MachineInstr* instr = instr_it->get();
LiveSet old_live_in = live_in_map[instr];
LiveSet old_live_out = live_out_map[instr];
// 更新 live_out
live_out_map[instr] = live_out;
LiveSet use, def;
getInstrUseDef(instr, use, def);
// live_in[i] = use[i] U (live_out[i] - def[i])
LiveSet live_in = use;
LiveSet diff = live_out;
for (auto vreg : def) {
diff.erase(vreg);
}
live_in.insert(diff.begin(), diff.end());
live_in_map[instr] = live_in;
// 为下一次迭代准备live_out
live_out = live_in;
if (live_in_map[instr] != old_live_in || live_out_map[instr] != old_live_out) {
changed = true;
}
}
}
}
}
void RISCv64RegAlloc::buildInterferenceGraph() {
std::set<unsigned> all_vregs;
// 收集所有虚拟寄存器
for (auto const& [instr, live_set] : live_out_map) {
all_vregs.insert(live_set.begin(), live_set.end());
}
// 初始化图
for (auto vreg : all_vregs) {
interference_graph[vreg] = {};
}
for (auto& mbb : MFunc->getBlocks()) {
for (auto& instr : mbb->getInstructions()) {
LiveSet def, use;
getInstrUseDef(instr.get(), use, def);
const LiveSet& live_out = live_out_map.at(instr.get());
for (unsigned d : def) {
for (unsigned l : live_out) {
if (d != l) {
interference_graph[d].insert(l);
interference_graph[l].insert(d);
}
}
}
}
}
}
void RISCv64RegAlloc::colorGraph() {
std::vector<unsigned> sorted_vregs;
for (auto const& [vreg, neighbors] : interference_graph) {
sorted_vregs.push_back(vreg);
}
// 按度数降序排序 (简单贪心策略)
std::sort(sorted_vregs.begin(), sorted_vregs.end(), [&](unsigned a, unsigned b) {
return interference_graph[a].size() > interference_graph[b].size();
});
for (unsigned vreg : sorted_vregs) {
std::set<PhysicalReg> used_colors;
// 查找邻居已用的颜色
for (unsigned neighbor : interference_graph.at(vreg)) {
if (color_map.count(neighbor)) {
used_colors.insert(color_map.at(neighbor));
}
}
// 寻找一个可用的颜色
bool colored = false;
for (PhysicalReg preg : allocable_int_regs) {
if (used_colors.find(preg) == used_colors.end()) {
color_map[vreg] = preg;
colored = true;
break;
}
}
if (!colored) {
// 无法分配,需要溢出
spilled_vregs.insert(vreg);
}
}
}
void RISCv64RegAlloc::rewriteFunction() {
// 1. 为所有溢出的vreg分配栈槽
StackFrameInfo& frame_info = MFunc->getFrameInfo();
int current_offset = frame_info.frame_size; // 假设从现有栈大小后开始分配
for (unsigned vreg : spilled_vregs) {
current_offset += 4; // 假设所有溢出变量都占4字节
frame_info.spill_slots[vreg] = -current_offset; // 栈向下增长,所以是负偏移
}
frame_info.frame_size = current_offset;
// 2. 遍历所有指令替换vreg并插入spill代码
for (auto& mbb : MFunc->getBlocks()) {
std::vector<std::unique_ptr<MachineInstr>> new_instructions;
for (auto& instr_ptr : mbb->getInstructions()) {
LiveSet use, def;
getInstrUseDef(instr_ptr.get(), use, def);
// 为use的溢出变量插入LOAD
for (unsigned vreg : use) {
if (spilled_vregs.count(vreg)) {
int offset = frame_info.spill_slots.at(vreg);
auto load = std::make_unique<MachineInstr>(RVOpcodes::LW);
load->addOperand(std::make_unique<RegOperand>(vreg)); // 临时用vreg号代表稍后替换
load->addOperand(std::make_unique<MemOperand>(
std::make_unique<RegOperand>(PhysicalReg::S0), // 基址用帧指针s0
std::make_unique<ImmOperand>(offset)
));
new_instructions.push_back(std::move(load));
}
}
// 添加原始指令
new_instructions.push_back(std::move(instr_ptr));
// 为def的溢出变量插入STORE
for (unsigned vreg : def) {
if (spilled_vregs.count(vreg)) {
int offset = frame_info.spill_slots.at(vreg);
auto store = std::make_unique<MachineInstr>(RVOpcodes::SW);
store->addOperand(std::make_unique<RegOperand>(vreg)); // 临时用vreg号代表
store->addOperand(std::make_unique<MemOperand>(
std::make_unique<RegOperand>(PhysicalReg::S0),
std::make_unique<ImmOperand>(offset)
));
new_instructions.push_back(std::move(store));
}
}
}
mbb->getInstructions() = std::move(new_instructions);
}
// 3. 最后一遍扫描将所有RegOperand从vreg替换为preg
for (auto& mbb : MFunc->getBlocks()) {
for (auto& instr_ptr : mbb->getInstructions()) {
for (auto& op_ptr : instr_ptr->getOperands()) {
if(op_ptr->getKind() == MachineOperand::KIND_REG) {
auto reg_op = static_cast<RegOperand*>(op_ptr.get());
if (reg_op->isVirtual()) {
unsigned vreg = reg_op->getVRegNum();
if (color_map.count(vreg)) {
reg_op->setPReg(color_map.at(vreg));
} else if (spilled_vregs.count(vreg)) {
// 对于spill的vreg, 使用一个固定的临时寄存器, 比如t6
reg_op->setPReg(PhysicalReg::T6);
}
}
} else if (op_ptr->getKind() == MachineOperand::KIND_MEM) {
auto mem_op = static_cast<MemOperand*>(op_ptr.get());
auto base_reg_op = mem_op->getBase();
if(base_reg_op->isVirtual()){
unsigned vreg = base_reg_op->getVRegNum();
if(color_map.count(vreg)) base_reg_op->setPReg(color_map.at(vreg));
}
}
}
}
}
}
} // namespace sysy

View File

@ -0,0 +1,38 @@
#ifndef RISCV64_ASMPRINTER_H
#define RISCV64_ASMPRINTER_H
#include "RISCv64LLIR.h"
#include <iostream>
namespace sysy {
class RISCv64AsmPrinter {
public:
// 主入口将整个MachineFunction打印到指定的输出流
void runOnMachineFunction(MachineFunction* mfunc, std::ostream& os);
private:
// 打印单个基本块
void printBasicBlock(MachineBasicBlock* mbb);
// 打印单条指令
void printInstruction(MachineInstr* instr, MachineBasicBlock* parent_bb);
// 打印函数序言
void printPrologue(MachineFunction* mfunc);
// 打印函数尾声
void printEpilogue(MachineFunction* mfunc);
// 将物理寄存器枚举转换为字符串 (从原RISCv64Backend迁移)
std::string regToString(PhysicalReg reg);
// 打印单个操作数
void printOperand(MachineOperand* op);
std::ostream* OS; // 指向当前输出流
};
} // namespace sysy
#endif // RISCV64_ASMPRINTER_H

View File

@ -1,130 +1,29 @@
#ifndef RISCV64_BACKEND_H
#define RISCV64_BACKEND_H
#include "IR.h"
#include "IR.h" // 只需包含高层IR定义
#include <string>
#include <vector>
#include <map>
#include <set>
#include <memory>
#include <iostream>
#include <functional> // For std::function
extern int DEBUG;
extern int DEEPDEBUG;
namespace sysy {
// 为活跃性分析的结果定义一个结构体,以同时持有 live_in 和 live_out 集合
struct LivenessResult {
std::map<Instruction*, std::set<std::string>> live_in;
std::map<Instruction*, std::set<std::string>> live_out;
};
// RISCv64CodeGen 现在是一个高层驱动器
class RISCv64CodeGen {
public:
enum class PhysicalReg {
ZERO, RA, SP, GP, TP, T0, T1, T2, S0, S1, A0, A1, A2, A3, A4, A5, A6, A7, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, T3, T4, T5, T6,
F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15,F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31
};
// Move DAGNode and RegAllocResult to public section
struct DAGNode {
enum NodeKind { CONSTANT, LOAD, STORE, BINARY, CALL, RETURN, BRANCH, ALLOCA_ADDR, UNARY, MEMSET };
NodeKind kind;
Value* value = nullptr; // For IR Value
std::string inst; // Generated RISC-V instruction(s) for this node
std::string result_vreg; // Virtual register assigned to this node's result
std::vector<DAGNode*> operands;
std::vector<DAGNode*> users; // For debugging and potentially optimizations
DAGNode(NodeKind k) : kind(k) {}
// Debugging / helper
std::string getNodeKindString() const {
switch (kind) {
case CONSTANT: return "CONSTANT";
case LOAD: return "LOAD";
case STORE: return "STORE";
case BINARY: return "BINARY";
case CALL: return "CALL";
case RETURN: return "RETURN";
case BRANCH: return "BRANCH";
case ALLOCA_ADDR: return "ALLOCA_ADDR";
case UNARY: return "UNARY";
case MEMSET: return "MEMSET";
default: return "UNKNOWN";
}
}
};
struct RegAllocResult {
std::map<std::string, PhysicalReg> vreg_to_preg; // Virtual register to Physical Register mapping
std::map<Value*, int> stack_map; // Value (AllocaInst) to stack offset
int stack_size = 0; // Total stack frame size for locals and spills
};
RISCv64CodeGen(Module* mod) : module(mod) {}
// 唯一的公共入口点
std::string code_gen();
std::string module_gen();
std::string function_gen(Function* func);
// 修改 basicBlock_gen 的声明,添加 int block_idx 参数
std::string basicBlock_gen(BasicBlock* bb, const RegAllocResult& alloc, int block_idx);
// DAG related
std::vector<std::unique_ptr<DAGNode>> build_dag(BasicBlock* bb);
void select_instructions(DAGNode* node, const RegAllocResult& alloc);
// 改变 emit_instructions 的参数,使其可以直接添加汇编指令到 main ss
void emit_instructions(DAGNode* node, std::stringstream& ss, const RegAllocResult& alloc, std::set<DAGNode*>& emitted_nodes);
// Register Allocation related
LivenessResult liveness_analysis(Function* func);
std::map<std::string, std::set<std::string>> build_interference_graph(const LivenessResult& liveness);
void color_graph(std::map<std::string, PhysicalReg>& vreg_to_preg,
const std::map<std::string, std::set<std::string>>& interference_graph);
RegAllocResult register_allocation(Function* func);
void eliminate_phi(Function* func); // Phi elimination is typically done before DAG building
// Utility
std::string reg_to_string(PhysicalReg reg);
void print_dag(const std::vector<std::unique_ptr<DAGNode>>& dag, const std::string& bb_name);
private:
static const std::vector<PhysicalReg> allocable_regs;
std::map<Value*, std::string> value_vreg_map; // Maps IR Value* to its virtual register name
// 模块级代码生成 (处理全局变量和驱动函数生成)
std::string module_gen();
// 函数级代码生成 (实现新的流水线)
std::string function_gen(Function* func);
Module* module;
int vreg_counter = 0; // Counter for unique virtual register names
int alloca_offset_counter = 0; // Counter for alloca offsets
// 新增一个成员变量来存储当前函数的所有 DAGNode以确保其生命周期贯穿整个函数代码生成
// 这样可以在多个 BasicBlock_gen 调用中访问到完整的 DAG 节点
std::vector<std::unique_ptr<DAGNode>> current_function_dag_nodes;
// 为空标签定义一个伪名称前缀,加上块索引以确保唯一性
const std::string ENTRY_BLOCK_PSEUDO_NAME = "entry_block_";
int local_label_counter = 0; // 用于生成唯一的本地标签 (如 memset 循环, 匿名块跳转等)
// !!! 修改get_operand_node 辅助函数现在需要传入 value_to_node 和 nodes_storage 的引用
// 因为它们是 build_dag 局部管理的
DAGNode* get_operand_node(
Value* val_ir,
std::map<Value*, DAGNode*>& value_to_node,
std::vector<std::unique_ptr<DAGNode>>& nodes_storage
);
// !!! 新增create_node 辅助函数也需要传入 value_to_node 和 nodes_storage 的引用
// 并且它应该不再是 lambda而是一个真正的成员函数
DAGNode* create_node(
DAGNode::NodeKind kind,
Value* val,
std::map<Value*, DAGNode*>& value_to_node,
std::vector<std::unique_ptr<DAGNode>>& nodes_storage
);
std::vector<std::unique_ptr<Instruction>> temp_instructions_storage; // 用于存储 build_dag 中创建的临时 BinaryInst
};
} // namespace sysy

61
src/include/RISCv64ISel.h Normal file
View File

@ -0,0 +1,61 @@
#ifndef RISCV64_ISEL_H
#define RISCV64_ISEL_H
#include "IR.h"
#include "RISCv64LLIR.h"
#include <memory>
#include <map>
namespace sysy {
class RISCv64ISel {
public:
RISCv64ISel();
// 模块主入口将一个高层IR函数转换为底层LLIR函数
std::unique_ptr<MachineFunction> runOnFunction(Function* func);
private:
// DAG节点定义作为ISel的内部实现细节
struct DAGNode {
enum NodeKind { CONSTANT, LOAD, STORE, BINARY, CALL, RETURN, BRANCH, ALLOCA_ADDR, UNARY, MEMSET };
NodeKind kind;
Value* value = nullptr;
std::vector<DAGNode*> operands;
DAGNode(NodeKind k) : kind(k) {}
};
// 为当前函数生成LLIR
void select();
// 为单个基本块生成指令
void selectBasicBlock(BasicBlock* bb);
// 核心函数为DAG节点选择并生成MachineInstr
void selectNode(DAGNode* node);
// --- DAG 构建相关函数 (从原RISCv64Backend迁移) ---
std::vector<std::unique_ptr<DAGNode>> build_dag(BasicBlock* bb);
DAGNode* get_operand_node(Value* val_ir, std::map<Value*, DAGNode*>& value_to_node, std::vector<std::unique_ptr<DAGNode>>& nodes_storage);
DAGNode* create_node(DAGNode::NodeKind kind, Value* val, std::map<Value*, DAGNode*>& value_to_node, std::vector<std::unique_ptr<DAGNode>>& nodes_storage);
// --- 辅助函数 ---
// 为一个IR Value获取/分配一个虚拟寄存器号
unsigned getVReg(Value* val);
Function* F; // 当前处理的高层IR函数
std::unique_ptr<MachineFunction> MFunc; // 正在构建的底层LLIR函数
MachineBasicBlock* CurMBB; // 当前正在处理的机器基本块
// 映射关系
std::map<Value*, unsigned> vreg_map;
std::map<const BasicBlock*, MachineBasicBlock*> bb_map;
std::map<Value*, DAGNode*> value_to_node_map; // 用于selectNode中查找
unsigned vreg_counter;
int local_label_counter;
};
} // namespace sysy
#endif // RISCV64_ISEL_H

View File

@ -163,8 +163,11 @@ public:
MachineInstr(RVOpcodes opcode) : opcode(opcode) {}
RVOpcodes getOpcode() const { return opcode; }
// 注意返回const引用因为通常不直接修改指令的操作数列表
const std::vector<std::unique_ptr<MachineOperand>>& getOperands() const { return operands; }
// 提供一个非const版本用于内部修改
std::vector<std::unique_ptr<MachineOperand>>& getOperands() { return operands; }
void addOperand(std::unique_ptr<MachineOperand> operand) {
operands.push_back(std::move(operand));
}
@ -181,9 +184,12 @@ public:
: name(name), parent(parent) {}
const std::string& getName() const { return name; }
const std::vector<std::unique_ptr<MachineInstr>>& getInstructions() const { return instructions; }
MachineFunction* getParent() const { return parent; }
// 同时提供 const 和 non-const 版本
const std::vector<std::unique_ptr<MachineInstr>>& getInstructions() const { return instructions; }
std::vector<std::unique_ptr<MachineInstr>>& getInstructions() { return instructions; }
void addInstruction(std::unique_ptr<MachineInstr> instr) {
instructions.push_back(std::move(instr));
}
@ -210,9 +216,12 @@ public:
MachineFunction(const std::string& name) : name(name) {}
const std::string& getName() const { return name; }
const std::vector<std::unique_ptr<MachineBasicBlock>>& getBlocks() const { return blocks; }
StackFrameInfo& getFrameInfo() { return frame_info; }
// 同时提供 const 和 non-const 版本
const std::vector<std::unique_ptr<MachineBasicBlock>>& getBlocks() const { return blocks; }
std::vector<std::unique_ptr<MachineBasicBlock>>& getBlocks() { return blocks; }
void addBlock(std::unique_ptr<MachineBasicBlock> block) {
blocks.push_back(std::move(block));
}

View File

@ -0,0 +1,57 @@
#ifndef RISCV64_REGALLOC_H
#define RISCV64_REGALLOC_H
#include "RISCv64LLIR.h"
#include <map>
#include <set>
#include <vector>
namespace sysy {
class RISCv64RegAlloc {
public:
RISCv64RegAlloc(MachineFunction* mfunc);
// 模块主入口
void run();
private:
using LiveSet = std::set<unsigned>; // 活跃虚拟寄存器集合
using InterferenceGraph = std::map<unsigned, std::set<unsigned>>;
// 活跃性分析
void analyzeLiveness();
// 构建干扰图
void buildInterferenceGraph();
// 图着色分配寄存器
void colorGraph();
// 重写函数,将虚拟寄存器替换为物理寄存器,并插入溢出代码
void rewriteFunction();
// 辅助函数获取指令的Use/Def集合
void getInstrUseDef(MachineInstr* instr, LiveSet& use, LiveSet& def);
MachineFunction* MFunc;
// 活跃性分析结果
std::map<MachineInstr*, LiveSet> live_in_map;
std::map<MachineInstr*, LiveSet> live_out_map;
// 干扰图
InterferenceGraph interference_graph;
// 图着色结果
std::map<unsigned, PhysicalReg> color_map; // vreg -> preg
std::set<unsigned> spilled_vregs; // 被溢出的vreg集合
// 可用的物理寄存器池
std::vector<PhysicalReg> allocable_int_regs;
std::vector<PhysicalReg> allocable_float_regs; // (为未来浮点支持预留)
};
} // namespace sysy
#endif // RISCV64_REGALLOC_H