nudt csapp 2025 finished

This commit is contained in:
2025-06-18 10:30:41 +08:00
parent 603789006c
commit 6d5eb3ea2f
4 changed files with 302 additions and 0 deletions

252
Y86_as/as86.py Normal file
View File

@ -0,0 +1,252 @@
import sys
import re
import struct
# Y86-64 Constants
# Register mapping from name to number
REG = {
'%rax': 0x0, '%rcx': 0x1, '%rdx': 0x2, '%rbx': 0x3,
'%rsp': 0x4, '%rbp': 0x5, '%rsi': 0x6, '%rdi': 0x7,
'%r8': 0x8, '%r9': 0x9, '%r10': 0xa, '%r11': 0xb,
'%r12': 0xc, '%r13': 0xd, '%r14': 0xe, 'F': 0xF
}
# Instruction mapping from mnemonic to (icode, ifun)
# icode is the instruction code, ifun is the function code
INS = {
'halt': (0x0, 0x0), 'nop': (0x1, 0x0), 'rrmovq': (0x2, 0x0),
'cmovle': (0x2, 0x1), 'cmovl': (0x2, 0x2), 'cmove': (0x2, 0x3),
'cmovne': (0x2, 0x4), 'cmovge': (0x2, 0x5), 'cmovg': (0x2, 0x6),
'irmovq': (0x3, 0x0), 'rmmovq': (0x4, 0x0), 'mrmovq': (0x5, 0x0),
'addq': (0x6, 0x0), 'subq': (0x6, 0x1), 'andq': (0x6, 0x2),
'xorq': (0x6, 0x3), 'jmp': (0x7, 0x0), 'jle': (0x7, 0x1),
'jl': (0x7, 0x2), 'je': (0x7, 0x3), 'jne': (0x7, 0x4),
'jge': (0x7, 0x5), 'jg': (0x7, 0x6), 'call': (0x8, 0x0),
'ret': (0x9, 0x0), 'pushq': (0xA, 0x0), 'popq': (0xB, 0x0)
}
class Y86Assembler:
"""
A two-pass assembler for the Y86-64 instruction set.
"""
def __init__(self):
self.symbol_table = {}
self.pc = 0
self.byte_code = {} # Maps address to (size, hex_string, original_line)
def assemble(self, filepath):
"""
Assembles a Y86-64 source file.
Args:
filepath (str): The path to the input .txt or .ys file.
Returns:
str: The formatted machine code output.
"""
try:
with open(filepath, 'r') as f:
lines = f.readlines()
except FileNotFoundError:
return f"Error: File not found at '{filepath}'"
print("🚀 Starting assembly process...")
self._first_pass(lines)
print("✅ First pass complete. Symbol table built.")
self._second_pass(lines)
print("✅ Second pass complete. Machine code generated.")
return self._format_output()
def _parse_line(self, line):
"""Strips comments and splits a line into label, instruction, and operands."""
line = line.split('#')[0].split('|')[0].strip()
if not line:
return None, None, []
label, instruction, operands_str = None, None, ''
if ':' in line:
label, rest = line.split(':', 1)
line = rest.strip()
parts = line.split(maxsplit=1)
if parts:
instruction = parts[0]
if len(parts) > 1:
operands_str = parts[1]
# Split operands by comma, but not inside parentheses
operands = re.split(r',\s*(?![^()]*\))', operands_str) if operands_str else []
return label, instruction, [op.strip() for op in operands]
def _get_instruction_size(self, instruction, operands):
"""Calculates the size of an instruction in bytes."""
if not instruction: return 0
if instruction in ['.quad']: return 8
if instruction in ['.pos', '.align']: return 0
# ********** MODIFICATION START **********
# Handle custom `addq $val, rB` which has size 10
if instruction == 'addq' and operands and operands[0].startswith('$'):
return 10 # 1 (icode) + 1 (reg) + 8 (val)
# ********** MODIFICATION END **********
icode = INS[instruction][0]
if icode in [0x0, 0x1, 0x9]: return 1
if icode in [0x2, 0x6, 0xA, 0xB]: return 2
if icode in [0x7, 0x8]: return 9
if icode in [0x3, 0x4, 0x5]: return 10
return 0
def _first_pass(self, lines):
"""
Builds the symbol table by mapping labels to addresses.
"""
self.pc = 0
for line_num, line in enumerate(lines, 1):
label, instruction, operands = self._parse_line(line)
if label:
if label in self.symbol_table:
print(f"Warning: Duplicate label '{label}' on line {line_num}. Using first definition.")
else:
self.symbol_table[label] = self.pc
if not instruction: continue
if instruction == '.pos':
self.pc = int(operands[0], 0)
elif instruction == '.align':
align_val = int(operands[0])
self.pc = (self.pc + align_val - 1) & -align_val
else:
self.pc += self._get_instruction_size(instruction, operands)
def _second_pass(self, lines):
self.pc = 0
for line_num, line in enumerate(lines, 1):
original_line = line.strip()
label, instruction, operands = self._parse_line(line)
if instruction == '.pos':
self.pc = int(operands[0], 0)
continue
elif instruction == '.align':
new_pc = (self.pc + int(operands[0]) - 1) & -int(operands[0])
if new_pc != self.pc: self.byte_code[self.pc] = (0, '', original_line)
self.pc = new_pc
continue
if not instruction:
if label: self.byte_code[self.pc] = (0, '', original_line)
continue
start_pc = self.pc
size = self._get_instruction_size(instruction, operands)
code = bytearray()
if instruction == '.quad':
val = self._parse_value(operands[0])
code.extend(struct.pack('<Q', val))
else:
# ********** MODIFICATION START **********
# Handle our custom `addq $imm, rB` instruction
if instruction == 'addq' and operands[0].startswith('$'):
icode, ifun = 0xC, 0x0 # Use unused icode 0xC for our custom instruction
code.append((icode << 4) | ifun)
rB = REG[operands[1]]
rA = REG['F'] # No source register, so rA is F
code.append((rA << 4) | rB)
val = self._parse_value(operands[0])
code.extend(struct.pack('<q', val))
# ********** MODIFICATION END **********
else:
# Original logic for all other instructions
icode, ifun = INS[instruction]
b0 = (icode << 4) | ifun
code.append(b0)
if instruction in ['rrmovq', 'cmovle', 'cmovl', 'cmove', 'cmovne', 'cmovge', 'cmovg', 'addq', 'subq', 'andq', 'xorq']:
rA = REG[operands[0]]
rB = REG[operands[1]]
code.append((rA << 4) | rB)
elif instruction in ['pushq', 'popq']:
rA = REG[operands[0]]
code.append((rA << 4) | 0xF)
elif instruction in ['irmovq', 'rmmovq', 'mrmovq']:
if instruction == 'irmovq':
rA, rB = REG['F'], REG[operands[1]]
val = self._parse_value(operands[0])
elif instruction == 'rmmovq':
rA = REG[operands[0]]
disp, rB_name = self._parse_mem(operands[1])
rB, val = REG[rB_name], disp
elif instruction == 'mrmovq':
rA = REG[operands[1]]
disp, rB_name = self._parse_mem(operands[0])
rB, val = REG[rB_name], disp
code.append((rA << 4) | rB)
code.extend(struct.pack('<q', val))
elif instruction in ['jmp', 'jle', 'jl', 'je', 'jne', 'jge', 'jg', 'call']:
dest = self._parse_value(operands[0])
code.extend(struct.pack('<Q', dest))
if code:
self.byte_code[start_pc] = (size, code.hex(), original_line)
self.pc += size
def _parse_value(self, s):
"""Converts a string operand to an integer, resolving labels."""
s = s.strip()
if s.startswith('$'):
s = s[1:]
if s in self.symbol_table:
return self.symbol_table[s]
try:
return int(s, 0) # Handles decimal and '0x' hex
except ValueError:
raise ValueError(f"Invalid immediate value or unresolved label: {s}")
def _parse_mem(self, s):
"""Parses memory operands like 'D(%rB)' or '(%rB)'."""
match = re.match(r'(-?\d+)?\((\%r\w+)\)', s)
if not match:
raise ValueError(f"Invalid memory operand: {s}")
disp_str, reg = match.groups()
disp = int(disp_str) if disp_str else 0
return disp, reg
def _format_output(self):
"""Formats the final output string."""
output = []
# Sort addresses to print in order
sorted_addrs = sorted(self.byte_code.keys())
for addr in sorted_addrs:
size, hex_code, line = self.byte_code[addr]
addr_hex = f"0x{addr:03x}:"
# Handle lines that generate no code (labels, .align)
if size == 0:
output.append(f"{addr_hex: <10}| {line}")
else:
formatted_code = f"{hex_code:<20}"
output.append(f"{addr_hex: <10}{formatted_code}| {line}")
return '\n'.join(output)
if __name__ == '__main__':
if len(sys.argv) != 2:
print("Usage: python Y86_64_assembler.py <source_file.txt>")
sys.exit(1)
assembler = Y86Assembler()
result = assembler.assemble(sys.argv[1])
print("\n--- Assembled Code ---")
print(result)

40
Y86_as/input.txt Normal file
View File

@ -0,0 +1,40 @@
# Execution begins at address 0
.pos 0
irmovq stack, %rsp # Set up stack pointer
call main # Execute main program
halt # Terminate program
# Array of 4 elements
.pos 0x18
array:
.quad 0xd000d000d000d
.quad 0xc000c000c000c0
.quad 0xb000b000b000b
.quad 0xa000a000a000a
main:
irmovq array, %rdi
irmovq $4, %rsi
call sum
ret
# long sum(long *start, long count)
# start in %rdi, count in %rsi
sum:
irmovq $8,%r8
irmovq $1,%r9
xorq %rax,%rax
andq %rsi,%rsi
jmp test
loop:
mrmovq (%rdi),%r10
addq %r10,%rax
addq %r8,%rdi # start++
subq %r9,%rsi # count--
# Set CC
test:
jne loop # Stop when 0
ret # Return
.pos 0x200
stack:

10
Y86_as/sample2024.txt Normal file
View File

@ -0,0 +1,10 @@
.pos 0x200
sum:
mrmovq 8(%rdx), %rcx
mrmovq 8(%rax),%rsi
rmmovq %rsi,8(%rdx)
rmmovq %rcx, 8(%rax)
addq $8, %rdx
addq $120, %rax
subq %rdi, %rax
jne sum

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.1 MiB