nudt csapp 2025 finished
This commit is contained in:
252
Y86_as/as86.py
Normal file
252
Y86_as/as86.py
Normal file
@ -0,0 +1,252 @@
|
||||
import sys
|
||||
import re
|
||||
import struct
|
||||
|
||||
# Y86-64 Constants
|
||||
# Register mapping from name to number
|
||||
REG = {
|
||||
'%rax': 0x0, '%rcx': 0x1, '%rdx': 0x2, '%rbx': 0x3,
|
||||
'%rsp': 0x4, '%rbp': 0x5, '%rsi': 0x6, '%rdi': 0x7,
|
||||
'%r8': 0x8, '%r9': 0x9, '%r10': 0xa, '%r11': 0xb,
|
||||
'%r12': 0xc, '%r13': 0xd, '%r14': 0xe, 'F': 0xF
|
||||
}
|
||||
|
||||
# Instruction mapping from mnemonic to (icode, ifun)
|
||||
# icode is the instruction code, ifun is the function code
|
||||
INS = {
|
||||
'halt': (0x0, 0x0), 'nop': (0x1, 0x0), 'rrmovq': (0x2, 0x0),
|
||||
'cmovle': (0x2, 0x1), 'cmovl': (0x2, 0x2), 'cmove': (0x2, 0x3),
|
||||
'cmovne': (0x2, 0x4), 'cmovge': (0x2, 0x5), 'cmovg': (0x2, 0x6),
|
||||
'irmovq': (0x3, 0x0), 'rmmovq': (0x4, 0x0), 'mrmovq': (0x5, 0x0),
|
||||
'addq': (0x6, 0x0), 'subq': (0x6, 0x1), 'andq': (0x6, 0x2),
|
||||
'xorq': (0x6, 0x3), 'jmp': (0x7, 0x0), 'jle': (0x7, 0x1),
|
||||
'jl': (0x7, 0x2), 'je': (0x7, 0x3), 'jne': (0x7, 0x4),
|
||||
'jge': (0x7, 0x5), 'jg': (0x7, 0x6), 'call': (0x8, 0x0),
|
||||
'ret': (0x9, 0x0), 'pushq': (0xA, 0x0), 'popq': (0xB, 0x0)
|
||||
}
|
||||
|
||||
class Y86Assembler:
|
||||
"""
|
||||
A two-pass assembler for the Y86-64 instruction set.
|
||||
"""
|
||||
def __init__(self):
|
||||
self.symbol_table = {}
|
||||
self.pc = 0
|
||||
self.byte_code = {} # Maps address to (size, hex_string, original_line)
|
||||
|
||||
def assemble(self, filepath):
|
||||
"""
|
||||
Assembles a Y86-64 source file.
|
||||
|
||||
Args:
|
||||
filepath (str): The path to the input .txt or .ys file.
|
||||
|
||||
Returns:
|
||||
str: The formatted machine code output.
|
||||
"""
|
||||
try:
|
||||
with open(filepath, 'r') as f:
|
||||
lines = f.readlines()
|
||||
except FileNotFoundError:
|
||||
return f"Error: File not found at '{filepath}'"
|
||||
|
||||
print("🚀 Starting assembly process...")
|
||||
self._first_pass(lines)
|
||||
print("✅ First pass complete. Symbol table built.")
|
||||
self._second_pass(lines)
|
||||
print("✅ Second pass complete. Machine code generated.")
|
||||
return self._format_output()
|
||||
|
||||
def _parse_line(self, line):
|
||||
"""Strips comments and splits a line into label, instruction, and operands."""
|
||||
line = line.split('#')[0].split('|')[0].strip()
|
||||
if not line:
|
||||
return None, None, []
|
||||
|
||||
label, instruction, operands_str = None, None, ''
|
||||
if ':' in line:
|
||||
label, rest = line.split(':', 1)
|
||||
line = rest.strip()
|
||||
|
||||
parts = line.split(maxsplit=1)
|
||||
if parts:
|
||||
instruction = parts[0]
|
||||
if len(parts) > 1:
|
||||
operands_str = parts[1]
|
||||
|
||||
# Split operands by comma, but not inside parentheses
|
||||
operands = re.split(r',\s*(?![^()]*\))', operands_str) if operands_str else []
|
||||
|
||||
return label, instruction, [op.strip() for op in operands]
|
||||
|
||||
def _get_instruction_size(self, instruction, operands):
|
||||
"""Calculates the size of an instruction in bytes."""
|
||||
if not instruction: return 0
|
||||
|
||||
if instruction in ['.quad']: return 8
|
||||
if instruction in ['.pos', '.align']: return 0
|
||||
|
||||
# ********** MODIFICATION START **********
|
||||
# Handle custom `addq $val, rB` which has size 10
|
||||
if instruction == 'addq' and operands and operands[0].startswith('$'):
|
||||
return 10 # 1 (icode) + 1 (reg) + 8 (val)
|
||||
# ********** MODIFICATION END **********
|
||||
|
||||
icode = INS[instruction][0]
|
||||
if icode in [0x0, 0x1, 0x9]: return 1
|
||||
if icode in [0x2, 0x6, 0xA, 0xB]: return 2
|
||||
if icode in [0x7, 0x8]: return 9
|
||||
if icode in [0x3, 0x4, 0x5]: return 10
|
||||
return 0
|
||||
|
||||
def _first_pass(self, lines):
|
||||
"""
|
||||
Builds the symbol table by mapping labels to addresses.
|
||||
"""
|
||||
self.pc = 0
|
||||
for line_num, line in enumerate(lines, 1):
|
||||
label, instruction, operands = self._parse_line(line)
|
||||
|
||||
if label:
|
||||
if label in self.symbol_table:
|
||||
print(f"Warning: Duplicate label '{label}' on line {line_num}. Using first definition.")
|
||||
else:
|
||||
self.symbol_table[label] = self.pc
|
||||
|
||||
if not instruction: continue
|
||||
|
||||
if instruction == '.pos':
|
||||
self.pc = int(operands[0], 0)
|
||||
elif instruction == '.align':
|
||||
align_val = int(operands[0])
|
||||
self.pc = (self.pc + align_val - 1) & -align_val
|
||||
else:
|
||||
self.pc += self._get_instruction_size(instruction, operands)
|
||||
|
||||
def _second_pass(self, lines):
|
||||
self.pc = 0
|
||||
for line_num, line in enumerate(lines, 1):
|
||||
original_line = line.strip()
|
||||
label, instruction, operands = self._parse_line(line)
|
||||
|
||||
if instruction == '.pos':
|
||||
self.pc = int(operands[0], 0)
|
||||
continue
|
||||
elif instruction == '.align':
|
||||
new_pc = (self.pc + int(operands[0]) - 1) & -int(operands[0])
|
||||
if new_pc != self.pc: self.byte_code[self.pc] = (0, '', original_line)
|
||||
self.pc = new_pc
|
||||
continue
|
||||
|
||||
if not instruction:
|
||||
if label: self.byte_code[self.pc] = (0, '', original_line)
|
||||
continue
|
||||
|
||||
start_pc = self.pc
|
||||
size = self._get_instruction_size(instruction, operands)
|
||||
code = bytearray()
|
||||
|
||||
if instruction == '.quad':
|
||||
val = self._parse_value(operands[0])
|
||||
code.extend(struct.pack('<Q', val))
|
||||
else:
|
||||
# ********** MODIFICATION START **********
|
||||
# Handle our custom `addq $imm, rB` instruction
|
||||
if instruction == 'addq' and operands[0].startswith('$'):
|
||||
icode, ifun = 0xC, 0x0 # Use unused icode 0xC for our custom instruction
|
||||
code.append((icode << 4) | ifun)
|
||||
|
||||
rB = REG[operands[1]]
|
||||
rA = REG['F'] # No source register, so rA is F
|
||||
code.append((rA << 4) | rB)
|
||||
|
||||
val = self._parse_value(operands[0])
|
||||
code.extend(struct.pack('<q', val))
|
||||
# ********** MODIFICATION END **********
|
||||
else:
|
||||
# Original logic for all other instructions
|
||||
icode, ifun = INS[instruction]
|
||||
b0 = (icode << 4) | ifun
|
||||
code.append(b0)
|
||||
|
||||
if instruction in ['rrmovq', 'cmovle', 'cmovl', 'cmove', 'cmovne', 'cmovge', 'cmovg', 'addq', 'subq', 'andq', 'xorq']:
|
||||
rA = REG[operands[0]]
|
||||
rB = REG[operands[1]]
|
||||
code.append((rA << 4) | rB)
|
||||
elif instruction in ['pushq', 'popq']:
|
||||
rA = REG[operands[0]]
|
||||
code.append((rA << 4) | 0xF)
|
||||
elif instruction in ['irmovq', 'rmmovq', 'mrmovq']:
|
||||
if instruction == 'irmovq':
|
||||
rA, rB = REG['F'], REG[operands[1]]
|
||||
val = self._parse_value(operands[0])
|
||||
elif instruction == 'rmmovq':
|
||||
rA = REG[operands[0]]
|
||||
disp, rB_name = self._parse_mem(operands[1])
|
||||
rB, val = REG[rB_name], disp
|
||||
elif instruction == 'mrmovq':
|
||||
rA = REG[operands[1]]
|
||||
disp, rB_name = self._parse_mem(operands[0])
|
||||
rB, val = REG[rB_name], disp
|
||||
code.append((rA << 4) | rB)
|
||||
code.extend(struct.pack('<q', val))
|
||||
elif instruction in ['jmp', 'jle', 'jl', 'je', 'jne', 'jge', 'jg', 'call']:
|
||||
dest = self._parse_value(operands[0])
|
||||
code.extend(struct.pack('<Q', dest))
|
||||
|
||||
if code:
|
||||
self.byte_code[start_pc] = (size, code.hex(), original_line)
|
||||
self.pc += size
|
||||
|
||||
def _parse_value(self, s):
|
||||
"""Converts a string operand to an integer, resolving labels."""
|
||||
s = s.strip()
|
||||
if s.startswith('$'):
|
||||
s = s[1:]
|
||||
|
||||
if s in self.symbol_table:
|
||||
return self.symbol_table[s]
|
||||
try:
|
||||
return int(s, 0) # Handles decimal and '0x' hex
|
||||
except ValueError:
|
||||
raise ValueError(f"Invalid immediate value or unresolved label: {s}")
|
||||
|
||||
def _parse_mem(self, s):
|
||||
"""Parses memory operands like 'D(%rB)' or '(%rB)'."""
|
||||
match = re.match(r'(-?\d+)?\((\%r\w+)\)', s)
|
||||
if not match:
|
||||
raise ValueError(f"Invalid memory operand: {s}")
|
||||
|
||||
disp_str, reg = match.groups()
|
||||
disp = int(disp_str) if disp_str else 0
|
||||
return disp, reg
|
||||
|
||||
def _format_output(self):
|
||||
"""Formats the final output string."""
|
||||
output = []
|
||||
# Sort addresses to print in order
|
||||
sorted_addrs = sorted(self.byte_code.keys())
|
||||
|
||||
for addr in sorted_addrs:
|
||||
size, hex_code, line = self.byte_code[addr]
|
||||
addr_hex = f"0x{addr:03x}:"
|
||||
|
||||
# Handle lines that generate no code (labels, .align)
|
||||
if size == 0:
|
||||
output.append(f"{addr_hex: <10}| {line}")
|
||||
else:
|
||||
formatted_code = f"{hex_code:<20}"
|
||||
output.append(f"{addr_hex: <10}{formatted_code}| {line}")
|
||||
|
||||
return '\n'.join(output)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
if len(sys.argv) != 2:
|
||||
print("Usage: python Y86_64_assembler.py <source_file.txt>")
|
||||
sys.exit(1)
|
||||
|
||||
assembler = Y86Assembler()
|
||||
result = assembler.assemble(sys.argv[1])
|
||||
print("\n--- Assembled Code ---")
|
||||
print(result)
|
||||
40
Y86_as/input.txt
Normal file
40
Y86_as/input.txt
Normal file
@ -0,0 +1,40 @@
|
||||
# Execution begins at address 0
|
||||
.pos 0
|
||||
irmovq stack, %rsp # Set up stack pointer
|
||||
call main # Execute main program
|
||||
halt # Terminate program
|
||||
|
||||
# Array of 4 elements
|
||||
.pos 0x18
|
||||
array:
|
||||
.quad 0xd000d000d000d
|
||||
.quad 0xc000c000c000c0
|
||||
.quad 0xb000b000b000b
|
||||
.quad 0xa000a000a000a
|
||||
|
||||
main:
|
||||
irmovq array, %rdi
|
||||
irmovq $4, %rsi
|
||||
call sum
|
||||
ret
|
||||
|
||||
# long sum(long *start, long count)
|
||||
# start in %rdi, count in %rsi
|
||||
sum:
|
||||
irmovq $8,%r8
|
||||
irmovq $1,%r9
|
||||
xorq %rax,%rax
|
||||
andq %rsi,%rsi
|
||||
jmp test
|
||||
loop:
|
||||
mrmovq (%rdi),%r10
|
||||
addq %r10,%rax
|
||||
addq %r8,%rdi # start++
|
||||
subq %r9,%rsi # count--
|
||||
# Set CC
|
||||
test:
|
||||
jne loop # Stop when 0
|
||||
ret # Return
|
||||
|
||||
.pos 0x200
|
||||
stack:
|
||||
10
Y86_as/sample2024.txt
Normal file
10
Y86_as/sample2024.txt
Normal file
@ -0,0 +1,10 @@
|
||||
.pos 0x200
|
||||
sum:
|
||||
mrmovq 8(%rdx), %rcx
|
||||
mrmovq 8(%rax),%rsi
|
||||
rmmovq %rsi,8(%rdx)
|
||||
rmmovq %rcx, 8(%rax)
|
||||
addq $8, %rdx
|
||||
addq $120, %rax
|
||||
subq %rdi, %rax
|
||||
jne sum
|
||||
BIN
Y86_as/屏幕截图_20250613_013251.png
Normal file
BIN
Y86_as/屏幕截图_20250613_013251.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 3.1 MiB |
Reference in New Issue
Block a user