import sys import re import struct # Y86-64 Constants # Register mapping from name to number REG = { '%rax': 0x0, '%rcx': 0x1, '%rdx': 0x2, '%rbx': 0x3, '%rsp': 0x4, '%rbp': 0x5, '%rsi': 0x6, '%rdi': 0x7, '%r8': 0x8, '%r9': 0x9, '%r10': 0xa, '%r11': 0xb, '%r12': 0xc, '%r13': 0xd, '%r14': 0xe, 'F': 0xF } # Instruction mapping from mnemonic to (icode, ifun) # icode is the instruction code, ifun is the function code INS = { 'halt': (0x0, 0x0), 'nop': (0x1, 0x0), 'rrmovq': (0x2, 0x0), 'cmovle': (0x2, 0x1), 'cmovl': (0x2, 0x2), 'cmove': (0x2, 0x3), 'cmovne': (0x2, 0x4), 'cmovge': (0x2, 0x5), 'cmovg': (0x2, 0x6), 'irmovq': (0x3, 0x0), 'rmmovq': (0x4, 0x0), 'mrmovq': (0x5, 0x0), 'addq': (0x6, 0x0), 'subq': (0x6, 0x1), 'andq': (0x6, 0x2), 'xorq': (0x6, 0x3), 'jmp': (0x7, 0x0), 'jle': (0x7, 0x1), 'jl': (0x7, 0x2), 'je': (0x7, 0x3), 'jne': (0x7, 0x4), 'jge': (0x7, 0x5), 'jg': (0x7, 0x6), 'call': (0x8, 0x0), 'ret': (0x9, 0x0), 'pushq': (0xA, 0x0), 'popq': (0xB, 0x0) } class Y86Assembler: """ A two-pass assembler for the Y86-64 instruction set. """ def __init__(self): self.symbol_table = {} self.pc = 0 self.byte_code = {} # Maps address to (size, hex_string, original_line) def assemble(self, filepath): """ Assembles a Y86-64 source file. Args: filepath (str): The path to the input .txt or .ys file. Returns: str: The formatted machine code output. """ try: with open(filepath, 'r') as f: lines = f.readlines() except FileNotFoundError: return f"Error: File not found at '{filepath}'" print("🚀 Starting assembly process...") self._first_pass(lines) print("✅ First pass complete. Symbol table built.") self._second_pass(lines) print("✅ Second pass complete. Machine code generated.") return self._format_output() def _parse_line(self, line): """Strips comments and splits a line into label, instruction, and operands.""" line = line.split('#')[0].split('|')[0].strip() if not line: return None, None, [] label, instruction, operands_str = None, None, '' if ':' in line: label, rest = line.split(':', 1) line = rest.strip() parts = line.split(maxsplit=1) if parts: instruction = parts[0] if len(parts) > 1: operands_str = parts[1] # Split operands by comma, but not inside parentheses operands = re.split(r',\s*(?![^()]*\))', operands_str) if operands_str else [] return label, instruction, [op.strip() for op in operands] def _get_instruction_size(self, instruction, operands): """Calculates the size of an instruction in bytes.""" if not instruction: return 0 if instruction in ['.quad']: return 8 if instruction in ['.pos', '.align']: return 0 # ********** MODIFICATION START ********** # Handle custom `addq $val, rB` which has size 10 if instruction == 'addq' and operands and operands[0].startswith('$'): return 10 # 1 (icode) + 1 (reg) + 8 (val) # ********** MODIFICATION END ********** icode = INS[instruction][0] if icode in [0x0, 0x1, 0x9]: return 1 if icode in [0x2, 0x6, 0xA, 0xB]: return 2 if icode in [0x7, 0x8]: return 9 if icode in [0x3, 0x4, 0x5]: return 10 return 0 def _first_pass(self, lines): """ Builds the symbol table by mapping labels to addresses. """ self.pc = 0 for line_num, line in enumerate(lines, 1): label, instruction, operands = self._parse_line(line) if label: if label in self.symbol_table: print(f"Warning: Duplicate label '{label}' on line {line_num}. Using first definition.") else: self.symbol_table[label] = self.pc if not instruction: continue if instruction == '.pos': self.pc = int(operands[0], 0) elif instruction == '.align': align_val = int(operands[0]) self.pc = (self.pc + align_val - 1) & -align_val else: self.pc += self._get_instruction_size(instruction, operands) def _second_pass(self, lines): self.pc = 0 for line_num, line in enumerate(lines, 1): original_line = line.strip() label, instruction, operands = self._parse_line(line) if instruction == '.pos': self.pc = int(operands[0], 0) continue elif instruction == '.align': new_pc = (self.pc + int(operands[0]) - 1) & -int(operands[0]) if new_pc != self.pc: self.byte_code[self.pc] = (0, '', original_line) self.pc = new_pc continue if not instruction: if label: self.byte_code[self.pc] = (0, '', original_line) continue start_pc = self.pc size = self._get_instruction_size(instruction, operands) code = bytearray() if instruction == '.quad': val = self._parse_value(operands[0]) code.extend(struct.pack('") sys.exit(1) assembler = Y86Assembler() result = assembler.assemble(sys.argv[1]) print("\n--- Assembled Code ---") print(result)