diff --git a/fpga/.gitignore b/fpga/.gitignore
index a0991ff4..814384f3 100644
--- a/fpga/.gitignore
+++ b/fpga/.gitignore
@@ -1,3 +1 @@
-*
-!.gitignore
-!Makefile
+generated-src
diff --git a/fpga/Makefile b/fpga/Makefile
index b984431c..748a5029 100644
--- a/fpga/Makefile
+++ b/fpga/Makefile
@@ -76,6 +76,16 @@ $(BIT_FILE): $(synth_list_f)
.PHONY: bit
bit: $(BIT_FILE)
+.PHONY: debug-bitstream
+debug-bitstream: $(build_dir)/obj/post_synth.dcp
+ cd $(build_dir); vivado \
+ -nojournal -mode batch \
+ -source $(sim_dir)/scripts/run_impl_bitstream.tcl \
+ -tclargs \
+ $(build_dir)/obj/post_synth.dcp \
+ xcvu9p-flga2104-2l-e \
+ $(build_dir)/obj/debug_output
+
# Build .mcs
MCS_FILE := $(build_dir)/obj/$(MODEL).mcs
$(MCS_FILE): $(BIT_FILE)
diff --git a/fpga/scripts/run_impl_bitstream.tcl b/fpga/scripts/run_impl_bitstream.tcl
new file mode 100644
index 00000000..ec3828e8
--- /dev/null
+++ b/fpga/scripts/run_impl_bitstream.tcl
@@ -0,0 +1,45 @@
+#### Command line arguments to this script
+# argv[0] = absolute path to post_synth checkpoint file
+# argv[1] = part
+# argv[2] = output directory
+
+set synth_checkpoint_file [lindex $argv 0]
+set part [lindex $argv 1]
+set output_dir [lindex $argv 2]
+
+# Set the project part to the part passed into this script
+set_part ${part}
+
+# Create output directory if it doesn't exist
+file mkdir ${output_dir}
+file mkdir ${output_dir}/reports
+file mkdir ${output_dir}/outputs
+
+# Load synthesis checkpoint
+open_checkpoint ${synth_checkpoint_file}
+
+# Run implementation and save reports as needed
+opt_design
+place_design
+phys_opt_design
+write_checkpoint -force ${output_dir}/outputs/post_place
+report_timing_summary -file ${output_dir}/reports/post_place_timing_summary.rpt
+report_drc -file ${output_dir}/reports/post_place_drc.rpt
+
+route_design
+write_checkpoint -force ${output_dir}/outputs/post_route
+report_timing_summary -file ${output_dir}/reports/post_route_timing_summary.rpt
+report_timing -sort_by group -max_paths 100 -path_type summary -file ${output_dir}/reports/post_route_timing.rpt
+report_clock_utilization -file ${output_dir}/reports/post_route_clock_utilization.rpt
+report_utilization -file ${output_dir}/reports/post_route_utilization.rpt
+report_drc -file ${output_dir}/reports/post_route_drc.rpt
+report_cdc -details -file ${output_dir}/reports/post_route_cdc.rpt
+report_clock_interaction -file ${output_dir}/reports/post_route_clock_interaction.rpt
+report_bus_skew -file ${output_dir}/reports/post_route_bus_skew.rpt
+report_design_analysis -logic_level_distribution -of_timing_paths [get_timing_paths -max_paths 1000 -slack_lesser_than 0] -file ${output_dir}/reports/post_route_timing_violations.rpt
+
+write_verilog -force ${output_dir}/outputs/post_route.v
+write_xdc -no_fixed_only -force ${output_dir}/outputs/post_route.xdc
+
+write_bitstream -force ${output_dir}/outputs/top.bit
+write_debug_probes -force ${output_dir}/outputs/debug_nets.ltx
diff --git a/fpga/scripts/write_mmi.tcl b/fpga/scripts/write_mmi.tcl
new file mode 100644
index 00000000..e577dd2b
--- /dev/null
+++ b/fpga/scripts/write_mmi.tcl
@@ -0,0 +1,75 @@
+proc write_mmi {filepath inst} {
+ current_instance
+ current_instance $inst
+ set chn [open $filepath w]
+ puts $chn ""
+ puts $chn ""
+ puts $chn "\t"
+ set brams [dict create]
+ foreach cell [get_cells -hierarchical -filter { PRIMITIVE_GROUP =~ BLOCKRAM }] {
+ set name [get_property RTL_RAM_NAME $cell]
+ dict update brams $name name {
+ dict lappend name cells $cell
+ dict set name size [get_property RTL_RAM_BITS $cell]
+ }
+ }
+ proc compare {a b} {
+ set a_addr [get_property bram_addr_begin $a]
+ set b_addr [get_property bram_addr_begin $b]
+ if {$a_addr > $b_addr} {
+ return 1
+ } elseif {$a_addr < $b_addr} {
+ return -1
+ }
+ set a_slice [get_property bram_slice_begin $a]
+ set b_slice [get_property bram_slice_begin $b]
+ if {$a_slice > $b_slice} {
+ return 1
+ } elseif {$a_slice < $b_slice} {
+ return -1
+ }
+ return 0
+ }
+ dict for {name desc} $brams {
+ dict with desc {
+ puts $chn "\t\t> 3]\">"
+ puts $chn "\t\t\t"
+ foreach cell [lsort -command compare $cells] {
+ set type [switch [get_property REF_NAME $cell] \
+ RAMB36E2 {expr {"RAMB32"}} \
+ RAMB36E1 {expr {"RAMB32"}}]
+ set loc [lindex [split [get_property LOC $cell] "_"] 1]
+ set lsb [get_property bram_slice_begin $cell]
+ set msb [get_property bram_slice_end $cell]
+ set addr_bgn [get_property bram_addr_begin $cell]
+ set addr_end [get_property bram_addr_end $cell]
+ puts $chn "\t\t\t\t"
+ puts $chn "\t\t\t\t\t"
+ puts $chn "\t\t\t\t\t"
+ puts $chn "\t\t\t\t\t"
+ puts $chn "\t\t\t\t"
+ }
+ puts $chn "\t\t\t"
+ puts $chn "\t\t"
+ }
+ }
+ puts $chn "\t"
+ puts $chn "\t"
+ puts $chn "\t\t"
+ puts $chn "\t"
+ puts $chn ""
+ close $chn
+ current_instance
+
+}
+
+if {$argc != 3} {
+ puts $argc
+ puts {Error: Invalid number of arguments}
+ puts {Usage: write_mmi.tcl checkpoint mmi_file instance}
+}
+
+lassign $argv checkpoint mmi_file instance
+
+open_checkpoint $checkpoint
+write_mmi $mmi_file $instance
diff --git a/fpga/src/main/resources/vcu118/sdboot/.gitignore b/fpga/src/main/resources/vcu118/sdboot/.gitignore
new file mode 100644
index 00000000..378eac25
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/.gitignore
@@ -0,0 +1 @@
+build
diff --git a/fpga/src/main/resources/vcu118/sdboot/Makefile b/fpga/src/main/resources/vcu118/sdboot/Makefile
new file mode 100644
index 00000000..b9c21470
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/Makefile
@@ -0,0 +1,39 @@
+# RISCV environment variable must be set
+ROOT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
+BUILD_DIR := $(ROOT_DIR)/build
+
+CC=$(RISCV)/bin/riscv64-unknown-elf-gcc
+OBJCOPY=$(RISCV)/bin/riscv64-unknown-elf-objcopy
+OBJDUMP=$(RISCV)/bin/riscv64-unknown-elf-objdump
+CFLAGS=-march=rv64ima -mcmodel=medany -O2 -std=gnu11 -Wall -nostartfiles
+CFLAGS+= -fno-common -g -DENTROPY=0 -mabi=lp64 -DNONSMP_HART=0
+CFLAGS+= -I $(ROOT_DIR)/include -I.
+LFLAGS=-static -nostdlib -L $(ROOT_DIR)/linker -T sdboot.elf.lds
+
+#PBUS_CLK passed in
+elf := $(BUILD_DIR)/sdboot.elf
+$(elf): head.S kprintf.c sd.c
+ mkdir -p $(BUILD_DIR)
+ $(CC) $(CFLAGS) -DTL_CLK="$(PBUS_CLK)UL" $(LFLAGS) -o $@ head.S sd.c kprintf.c
+
+.PHONY: elf
+elf: $(elf)
+
+bin := $(BUILD_DIR)/sdboot.bin
+$(bin): $(elf)
+ mkdir -p $(BUILD_DIR)
+ $(OBJCOPY) -O binary --change-addresses=-0x10000 $< $@
+
+.PHONY: bin
+bin: $(bin)
+
+dump := $(BUILD_DIR)/sdboot.dump
+$(dump): $(elf)
+ $(OBJDUMP) -D -S $< > $@
+
+.PHONY: dump
+dump: $(dump)
+
+.PHONY: clean
+clean::
+ rm -rf $(BUILD_DIR)
diff --git a/fpga/src/main/resources/vcu118/sdboot/common.h b/fpga/src/main/resources/vcu118/sdboot/common.h
new file mode 100644
index 00000000..4f71e103
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/common.h
@@ -0,0 +1,9 @@
+#ifndef _SDBOOT_COMMON_H
+#define _SDBOOT_COMMON_H
+
+#ifndef PAYLOAD_DEST
+ #define PAYLOAD_DEST MEMORY_MEM_ADDR
+#endif
+
+
+#endif
diff --git a/fpga/src/main/resources/vcu118/sdboot/head.S b/fpga/src/main/resources/vcu118/sdboot/head.S
new file mode 100644
index 00000000..662a6fd2
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/head.S
@@ -0,0 +1,20 @@
+// See LICENSE for license details.
+#include
+#include
+#include "common.h"
+
+ .section .text.init
+ .option norvc
+ .globl _prog_start
+_prog_start:
+ smp_pause(s1, s2)
+ li sp, (PAYLOAD_DEST + 0xffff000)
+ call main
+ smp_resume(s1, s2)
+ csrr a0, mhartid // hartid for next level bootloader
+ la a1, dtb // dtb address for next level bootloader
+ li s1, PAYLOAD_DEST
+ jr s1
+
+ .section .rodata
+dtb:
diff --git a/fpga/src/main/resources/vcu118/sdboot/include/bits.h b/fpga/src/main/resources/vcu118/sdboot/include/bits.h
new file mode 100644
index 00000000..bfe656fe
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/include/bits.h
@@ -0,0 +1,36 @@
+// See LICENSE for license details.
+#ifndef _RISCV_BITS_H
+#define _RISCV_BITS_H
+
+#define likely(x) __builtin_expect((x), 1)
+#define unlikely(x) __builtin_expect((x), 0)
+
+#define ROUNDUP(a, b) ((((a)-1)/(b)+1)*(b))
+#define ROUNDDOWN(a, b) ((a)/(b)*(b))
+
+#define MAX(a, b) ((a) > (b) ? (a) : (b))
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#define CLAMP(a, lo, hi) MIN(MAX(a, lo), hi)
+
+#define EXTRACT_FIELD(val, which) (((val) & (which)) / ((which) & ~((which)-1)))
+#define INSERT_FIELD(val, which, fieldval) (((val) & ~(which)) | ((fieldval) * ((which) & ~((which)-1))))
+
+#define STR(x) XSTR(x)
+#define XSTR(x) #x
+
+#if __riscv_xlen == 64
+# define SLL32 sllw
+# define STORE sd
+# define LOAD ld
+# define LWU lwu
+# define LOG_REGBYTES 3
+#else
+# define SLL32 sll
+# define STORE sw
+# define LOAD lw
+# define LWU lw
+# define LOG_REGBYTES 2
+#endif
+#define REGBYTES (1 << LOG_REGBYTES)
+
+#endif
diff --git a/fpga/src/main/resources/vcu118/sdboot/include/const.h b/fpga/src/main/resources/vcu118/sdboot/include/const.h
new file mode 100644
index 00000000..8dcffbb0
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/include/const.h
@@ -0,0 +1,18 @@
+// See LICENSE for license details.
+/* Derived from */
+
+#ifndef _SIFIVE_CONST_H
+#define _SIFIVE_CONST_H
+
+#ifdef __ASSEMBLER__
+#define _AC(X,Y) X
+#define _AT(T,X) X
+#else
+#define _AC(X,Y) (X##Y)
+#define _AT(T,X) ((T)(X))
+#endif /* !__ASSEMBLER__*/
+
+#define _BITUL(x) (_AC(1,UL) << (x))
+#define _BITULL(x) (_AC(1,ULL) << (x))
+
+#endif /* _SIFIVE_CONST_H */
diff --git a/fpga/src/main/resources/vcu118/sdboot/include/devices/clint.h b/fpga/src/main/resources/vcu118/sdboot/include/devices/clint.h
new file mode 100644
index 00000000..c2b05bae
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/include/devices/clint.h
@@ -0,0 +1,14 @@
+// See LICENSE for license details.
+
+#ifndef _SIFIVE_CLINT_H
+#define _SIFIVE_CLINT_H
+
+
+#define CLINT_MSIP 0x0000
+#define CLINT_MSIP_size 0x4
+#define CLINT_MTIMECMP 0x4000
+#define CLINT_MTIMECMP_size 0x8
+#define CLINT_MTIME 0xBFF8
+#define CLINT_MTIME_size 0x8
+
+#endif /* _SIFIVE_CLINT_H */
diff --git a/fpga/src/main/resources/vcu118/sdboot/include/devices/gpio.h b/fpga/src/main/resources/vcu118/sdboot/include/devices/gpio.h
new file mode 100644
index 00000000..f7f0acb4
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/include/devices/gpio.h
@@ -0,0 +1,24 @@
+// See LICENSE for license details.
+
+#ifndef _SIFIVE_GPIO_H
+#define _SIFIVE_GPIO_H
+
+#define GPIO_INPUT_VAL (0x00)
+#define GPIO_INPUT_EN (0x04)
+#define GPIO_OUTPUT_EN (0x08)
+#define GPIO_OUTPUT_VAL (0x0C)
+#define GPIO_PULLUP_EN (0x10)
+#define GPIO_DRIVE (0x14)
+#define GPIO_RISE_IE (0x18)
+#define GPIO_RISE_IP (0x1C)
+#define GPIO_FALL_IE (0x20)
+#define GPIO_FALL_IP (0x24)
+#define GPIO_HIGH_IE (0x28)
+#define GPIO_HIGH_IP (0x2C)
+#define GPIO_LOW_IE (0x30)
+#define GPIO_LOW_IP (0x34)
+#define GPIO_IOF_EN (0x38)
+#define GPIO_IOF_SEL (0x3C)
+#define GPIO_OUTPUT_XOR (0x40)
+
+#endif /* _SIFIVE_GPIO_H */
diff --git a/fpga/src/main/resources/vcu118/sdboot/include/devices/plic.h b/fpga/src/main/resources/vcu118/sdboot/include/devices/plic.h
new file mode 100644
index 00000000..4d5b2d8d
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/include/devices/plic.h
@@ -0,0 +1,31 @@
+// See LICENSE for license details.
+
+#ifndef PLIC_H
+#define PLIC_H
+
+#include
+
+// 32 bits per source
+#define PLIC_PRIORITY_OFFSET _AC(0x0000,UL)
+#define PLIC_PRIORITY_SHIFT_PER_SOURCE 2
+// 1 bit per source (1 address)
+#define PLIC_PENDING_OFFSET _AC(0x1000,UL)
+#define PLIC_PENDING_SHIFT_PER_SOURCE 0
+
+//0x80 per target
+#define PLIC_ENABLE_OFFSET _AC(0x2000,UL)
+#define PLIC_ENABLE_SHIFT_PER_TARGET 7
+
+
+#define PLIC_THRESHOLD_OFFSET _AC(0x200000,UL)
+#define PLIC_CLAIM_OFFSET _AC(0x200004,UL)
+#define PLIC_THRESHOLD_SHIFT_PER_TARGET 12
+#define PLIC_CLAIM_SHIFT_PER_TARGET 12
+
+#define PLIC_MAX_SOURCE 1023
+#define PLIC_SOURCE_MASK 0x3FF
+
+#define PLIC_MAX_TARGET 15871
+#define PLIC_TARGET_MASK 0x3FFF
+
+#endif /* PLIC_H */
diff --git a/fpga/src/main/resources/vcu118/sdboot/include/devices/spi.h b/fpga/src/main/resources/vcu118/sdboot/include/devices/spi.h
new file mode 100644
index 00000000..7118572a
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/include/devices/spi.h
@@ -0,0 +1,79 @@
+// See LICENSE for license details.
+
+#ifndef _SIFIVE_SPI_H
+#define _SIFIVE_SPI_H
+
+/* Register offsets */
+
+#define SPI_REG_SCKDIV 0x00
+#define SPI_REG_SCKMODE 0x04
+#define SPI_REG_CSID 0x10
+#define SPI_REG_CSDEF 0x14
+#define SPI_REG_CSMODE 0x18
+
+#define SPI_REG_DCSSCK 0x28
+#define SPI_REG_DSCKCS 0x2a
+#define SPI_REG_DINTERCS 0x2c
+#define SPI_REG_DINTERXFR 0x2e
+
+#define SPI_REG_FMT 0x40
+#define SPI_REG_TXFIFO 0x48
+#define SPI_REG_RXFIFO 0x4c
+#define SPI_REG_TXCTRL 0x50
+#define SPI_REG_RXCTRL 0x54
+
+#define SPI_REG_FCTRL 0x60
+#define SPI_REG_FFMT 0x64
+
+#define SPI_REG_IE 0x70
+#define SPI_REG_IP 0x74
+
+/* Fields */
+
+#define SPI_SCK_POL 0x1
+#define SPI_SCK_PHA 0x2
+
+#define SPI_FMT_PROTO(x) ((x) & 0x3)
+#define SPI_FMT_ENDIAN(x) (((x) & 0x1) << 2)
+#define SPI_FMT_DIR(x) (((x) & 0x1) << 3)
+#define SPI_FMT_LEN(x) (((x) & 0xf) << 16)
+
+/* TXCTRL register */
+#define SPI_TXWM(x) ((x) & 0xffff)
+/* RXCTRL register */
+#define SPI_RXWM(x) ((x) & 0xffff)
+
+#define SPI_IP_TXWM 0x1
+#define SPI_IP_RXWM 0x2
+
+#define SPI_FCTRL_EN 0x1
+
+#define SPI_INSN_CMD_EN 0x1
+#define SPI_INSN_ADDR_LEN(x) (((x) & 0x7) << 1)
+#define SPI_INSN_PAD_CNT(x) (((x) & 0xf) << 4)
+#define SPI_INSN_CMD_PROTO(x) (((x) & 0x3) << 8)
+#define SPI_INSN_ADDR_PROTO(x) (((x) & 0x3) << 10)
+#define SPI_INSN_DATA_PROTO(x) (((x) & 0x3) << 12)
+#define SPI_INSN_CMD_CODE(x) (((x) & 0xff) << 16)
+#define SPI_INSN_PAD_CODE(x) (((x) & 0xff) << 24)
+
+#define SPI_TXFIFO_FULL (1 << 31)
+#define SPI_RXFIFO_EMPTY (1 << 31)
+
+/* Values */
+
+#define SPI_CSMODE_AUTO 0
+#define SPI_CSMODE_HOLD 2
+#define SPI_CSMODE_OFF 3
+
+#define SPI_DIR_RX 0
+#define SPI_DIR_TX 1
+
+#define SPI_PROTO_S 0
+#define SPI_PROTO_D 1
+#define SPI_PROTO_Q 2
+
+#define SPI_ENDIAN_MSB 0
+#define SPI_ENDIAN_LSB 1
+
+#endif /* _SIFIVE_SPI_H */
diff --git a/fpga/src/main/resources/vcu118/sdboot/include/devices/uart.h b/fpga/src/main/resources/vcu118/sdboot/include/devices/uart.h
new file mode 100644
index 00000000..aecfd912
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/include/devices/uart.h
@@ -0,0 +1,28 @@
+// See LICENSE for license details.
+
+#ifndef _SIFIVE_UART_H
+#define _SIFIVE_UART_H
+
+/* Register offsets */
+#define UART_REG_TXFIFO 0x00
+#define UART_REG_RXFIFO 0x04
+#define UART_REG_TXCTRL 0x08
+#define UART_REG_RXCTRL 0x0c
+#define UART_REG_IE 0x10
+#define UART_REG_IP 0x14
+#define UART_REG_DIV 0x18
+
+/* TXCTRL register */
+#define UART_TXEN 0x1
+#define UART_TXNSTOP 0x2
+#define UART_TXWM(x) (((x) & 0xffff) << 16)
+
+/* RXCTRL register */
+#define UART_RXEN 0x1
+#define UART_RXWM(x) (((x) & 0xffff) << 16)
+
+/* IP register */
+#define UART_IP_TXWM 0x1
+#define UART_IP_RXWM 0x2
+
+#endif /* _SIFIVE_UART_H */
diff --git a/fpga/src/main/resources/vcu118/sdboot/include/platform.h b/fpga/src/main/resources/vcu118/sdboot/include/platform.h
new file mode 100644
index 00000000..c240e0e5
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/include/platform.h
@@ -0,0 +1,108 @@
+// See LICENSE for license details.
+
+#ifndef _EAGLE_PLATFORM_H
+#define _EAGLE_PLATFORM_H
+
+#include "const.h"
+#include "riscv_test_defaults.h"
+#include "devices/clint.h"
+#include "devices/gpio.h"
+#include "devices/plic.h"
+#include "devices/spi.h"
+#include "devices/uart.h"
+
+ // Some things missing from the official encoding.h
+#if __riscv_xlen == 32
+ #define MCAUSE_INT 0x80000000UL
+ #define MCAUSE_CAUSE 0x7FFFFFFFUL
+#else
+ #define MCAUSE_INT 0x8000000000000000UL
+ #define MCAUSE_CAUSE 0x7FFFFFFFFFFFFFFFUL
+#endif
+
+/****************************************************************************
+ * Platform definitions
+ *****************************************************************************/
+
+// CPU info
+#define NUM_CORES 1
+#define GLOBAL_INT_SIZE 38
+#define GLOBAL_INT_MAX_PRIORITY 7
+
+// Memory map
+#define CLINT_CTRL_ADDR _AC(0x2000000,UL)
+#define CLINT_CTRL_SIZE _AC(0x10000,UL)
+#define DEBUG_CTRL_ADDR _AC(0x0,UL)
+#define DEBUG_CTRL_SIZE _AC(0x1000,UL)
+#define ERROR_MEM_ADDR _AC(0x3000,UL)
+#define ERROR_MEM_SIZE _AC(0x1000,UL)
+#define GPIO_CTRL_ADDR _AC(0x64002000,UL)
+#define GPIO_CTRL_SIZE _AC(0x1000,UL)
+#define MASKROM_MEM_ADDR _AC(0x10000,UL)
+#define MASKROM_MEM_SIZE _AC(0x10000,UL)
+#define MEMORY_MEM_ADDR _AC(0x80000000,UL)
+#define MEMORY_MEM_SIZE _AC(0x10000000,UL)
+#define PLIC_CTRL_ADDR _AC(0xc000000,UL)
+#define PLIC_CTRL_SIZE _AC(0x4000000,UL)
+#define SPI_CTRL_ADDR _AC(0x64001000,UL)
+#define SPI_CTRL_SIZE _AC(0x1000,UL)
+#define SPI1_CTRL_ADDR _AC(0x64004000,UL)
+#define SPI1_CTRL_SIZE _AC(0x1000,UL)
+#define TEST_CTRL_ADDR _AC(0x4000,UL)
+#define TEST_CTRL_SIZE _AC(0x1000,UL)
+#define UART_CTRL_ADDR _AC(0x64000000,UL)
+#define UART_CTRL_SIZE _AC(0x1000,UL)
+#define UART1_CTRL_ADDR _AC(0x64003000,UL)
+#define UART1_CTRL_SIZE _AC(0x1000,UL)
+#define I2C_CTRL_ADDR _AC(0x64005000,UL)
+#define I2C_CTRL_SIZE _AC(0x1000,UL)
+
+// IOF masks
+
+
+// Interrupt numbers
+#define UART_INT_BASE 1
+#define UART1_INT_BASE 2
+#define I2C_INT_BASE 3
+#define GPIO_INT_BASE 4
+#define SPI_INT_BASE 36
+#define SPI1_INT_BASE 37
+
+// Helper functions
+#define _REG64(p, i) (*(volatile uint64_t *)((p) + (i)))
+#define _REG32(p, i) (*(volatile uint32_t *)((p) + (i)))
+#define _REG16(p, i) (*(volatile uint16_t *)((p) + (i)))
+// Bulk set bits in `reg` to either 0 or 1.
+// E.g. SET_BITS(MY_REG, 0x00000007, 0) would generate MY_REG &= ~0x7
+// E.g. SET_BITS(MY_REG, 0x00000007, 1) would generate MY_REG |= 0x7
+#define SET_BITS(reg, mask, value) if ((value) == 0) { (reg) &= ~(mask); } else { (reg) |= (mask); }
+#define AXI_PCIE_HOST_1_00_A_REG(offset) _REG32(AXI_PCIE_HOST_1_00_A_CTRL_ADDR, offset)
+#define CLINT_REG(offset) _REG32(CLINT_CTRL_ADDR, offset)
+#define DEBUG_REG(offset) _REG32(DEBUG_CTRL_ADDR, offset)
+#define ERROR_REG(offset) _REG32(ERROR_CTRL_ADDR, offset)
+#define GPIO_REG(offset) _REG32(GPIO_CTRL_ADDR, offset)
+#define MASKROM_REG(offset) _REG32(MASKROM_CTRL_ADDR, offset)
+#define MEMORY_REG(offset) _REG32(MEMORY_CTRL_ADDR, offset)
+#define PLIC_REG(offset) _REG32(PLIC_CTRL_ADDR, offset)
+#define SPI_REG(offset) _REG32(SPI_CTRL_ADDR, offset)
+#define TEST_REG(offset) _REG32(TEST_CTRL_ADDR, offset)
+#define UART_REG(offset) _REG32(UART_CTRL_ADDR, offset)
+#define AXI_PCIE_HOST_1_00_A_REG64(offset) _REG64(AXI_PCIE_HOST_1_00_A_CTRL_ADDR, offset)
+#define CLINT_REG64(offset) _REG64(CLINT_CTRL_ADDR, offset)
+#define DEBUG_REG64(offset) _REG64(DEBUG_CTRL_ADDR, offset)
+#define ERROR_REG64(offset) _REG64(ERROR_CTRL_ADDR, offset)
+#define GPIO_REG64(offset) _REG64(GPIO_CTRL_ADDR, offset)
+#define MASKROM_REG64(offset) _REG64(MASKROM_CTRL_ADDR, offset)
+#define MEMORY_REG64(offset) _REG64(MEMORY_CTRL_ADDR, offset)
+#define PLIC_REG64(offset) _REG64(PLIC_CTRL_ADDR, offset)
+#define SPI_REG64(offset) _REG64(SPI_CTRL_ADDR, offset)
+#define SPI1_REG64(offset) _REG64(SPI1_CTRL_ADDR, offset)
+#define TEST_REG64(offset) _REG64(TEST_CTRL_ADDR, offset)
+#define UART_REG64(offset) _REG64(UART_CTRL_ADDR, offset)
+#define UART1_REG64(offset) _REG64(UART1_CTRL_ADDR, offset)
+#define I2C_REG64(offset) _REG64(I2C_CTRL_ADDR, offset)
+
+// Misc
+
+
+#endif /* _SIFIVE_PLATFORM_H */
diff --git a/fpga/src/main/resources/vcu118/sdboot/include/riscv_test_defaults.h b/fpga/src/main/resources/vcu118/sdboot/include/riscv_test_defaults.h
new file mode 100644
index 00000000..a2dea3d4
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/include/riscv_test_defaults.h
@@ -0,0 +1,81 @@
+// See LICENSE for license details.
+#ifndef _RISCV_TEST_DEFAULTS_H
+#define _RISCV_TEST_DEFAULTS_H
+
+#define TESTNUM x28
+#define TESTBASE 0x4000
+
+#define RVTEST_RV32U \
+ .macro init; \
+ .endm
+
+#define RVTEST_RV64U \
+ .macro init; \
+ .endm
+
+#define RVTEST_RV32UF \
+ .macro init; \
+ /* If FPU exists, initialize FCSR. */ \
+ csrr t0, misa; \
+ andi t0, t0, 1 << ('F' - 'A'); \
+ beqz t0, 1f; \
+ /* Enable FPU if it exists. */ \
+ li t0, MSTATUS_FS; \
+ csrs mstatus, t0; \
+ fssr x0; \
+1: ; \
+ .endm
+
+#define RVTEST_RV64UF \
+ .macro init; \
+ /* If FPU exists, initialize FCSR. */ \
+ csrr t0, misa; \
+ andi t0, t0, 1 << ('F' - 'A'); \
+ beqz t0, 1f; \
+ /* Enable FPU if it exists. */ \
+ li t0, MSTATUS_FS; \
+ csrs mstatus, t0; \
+ fssr x0; \
+1: ; \
+ .endm
+
+#define RVTEST_CODE_BEGIN \
+ .section .text.init; \
+ .globl _prog_start; \
+_prog_start: \
+ init;
+
+#define RVTEST_CODE_END \
+ unimp
+
+#define RVTEST_PASS \
+ fence; \
+ li t0, TESTBASE; \
+ li t1, 0x5555; \
+ sw t1, 0(t0); \
+1: \
+ j 1b;
+
+#define RVTEST_FAIL \
+ li t0, TESTBASE; \
+ li t1, 0x3333; \
+ slli a0, a0, 16; \
+ add a0, a0, t1; \
+ sw a0, 0(t0); \
+1: \
+ j 1b;
+
+#define EXTRA_DATA
+
+#define RVTEST_DATA_BEGIN \
+ EXTRA_DATA \
+ .align 4; .global begin_signature; begin_signature:
+
+#define RVTEST_DATA_END \
+ _msg_init: .asciz "RUN\r\n"; \
+ _msg_pass: .asciz "PASS"; \
+ _msg_fail: .asciz "FAIL "; \
+ _msg_end: .asciz "\r\n"; \
+ .align 4; .global end_signature; end_signature:
+
+#endif /* _RISCV_TEST_DEFAULTS_H */
diff --git a/fpga/src/main/resources/vcu118/sdboot/include/sections.h b/fpga/src/main/resources/vcu118/sdboot/include/sections.h
new file mode 100644
index 00000000..6e1f0518
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/include/sections.h
@@ -0,0 +1,17 @@
+// See LICENSE for license details.
+#ifndef _SECTIONS_H
+#define _SECTIONS_H
+
+extern unsigned char _rom[];
+extern unsigned char _rom_end[];
+
+extern unsigned char _ram[];
+extern unsigned char _ram_end[];
+
+extern unsigned char _ftext[];
+extern unsigned char _etext[];
+extern unsigned char _fbss[];
+extern unsigned char _ebss[];
+extern unsigned char _end[];
+
+#endif /* _SECTIONS_H */
diff --git a/fpga/src/main/resources/vcu118/sdboot/include/smp.h b/fpga/src/main/resources/vcu118/sdboot/include/smp.h
new file mode 100644
index 00000000..145ceb37
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/include/smp.h
@@ -0,0 +1,142 @@
+#ifndef SIFIVE_SMP
+#define SIFIVE_SMP
+#include "platform.h"
+
+// The maximum number of HARTs this code supports
+#ifndef MAX_HARTS
+#define MAX_HARTS 32
+#endif
+#define CLINT_END_HART_IPI CLINT_CTRL_ADDR + (MAX_HARTS*4)
+#define CLINT1_END_HART_IPI CLINT1_CTRL_ADDR + (MAX_HARTS*4)
+
+// The hart that non-SMP tests should run on
+#ifndef NONSMP_HART
+#define NONSMP_HART 0
+#endif
+
+/* If your test cannot handle multiple-threads, use this:
+ * smp_disable(reg1)
+ */
+#define smp_disable(reg1, reg2) \
+ csrr reg1, mhartid ;\
+ li reg2, NONSMP_HART ;\
+ beq reg1, reg2, hart0_entry ;\
+42: ;\
+ wfi ;\
+ j 42b ;\
+hart0_entry:
+
+/* If your test needs to temporarily block multiple-threads, do this:
+ * smp_pause(reg1, reg2)
+ * ... single-threaded work ...
+ * smp_resume(reg1, reg2)
+ * ... multi-threaded work ...
+ */
+
+#define smp_pause(reg1, reg2) \
+ li reg2, 0x8 ;\
+ csrw mie, reg2 ;\
+ li reg1, NONSMP_HART ;\
+ csrr reg2, mhartid ;\
+ bne reg1, reg2, 42f
+
+#ifdef CLINT1_CTRL_ADDR
+// If a second CLINT exists, then make sure we:
+// 1) Trigger a software interrupt on all harts of both CLINTs.
+// 2) Locate your own hart's software interrupt pending register and clear it.
+// 3) Wait for all harts on both CLINTs to clear their software interrupt
+// pending register.
+// WARNING: This code makes these assumptions, which are only true for Fadu as
+// of now:
+// 1) hart0 uses CLINT0 at offset 0
+// 2) hart2 uses CLINT1 at offset 0
+// 3) hart3 uses CLINT1 at offset 1
+// 4) There are no other harts or CLINTs in the system.
+#define smp_resume(reg1, reg2) \
+ /* Trigger software interrupt on CLINT0 */ \
+ li reg1, CLINT_CTRL_ADDR ;\
+41: ;\
+ li reg2, 1 ;\
+ sw reg2, 0(reg1) ;\
+ addi reg1, reg1, 4 ;\
+ li reg2, CLINT_END_HART_IPI ;\
+ blt reg1, reg2, 41b ;\
+ /* Trigger software interrupt on CLINT1 */ \
+ li reg1, CLINT1_CTRL_ADDR ;\
+41: ;\
+ li reg2, 1 ;\
+ sw reg2, 0(reg1) ;\
+ addi reg1, reg1, 4 ;\
+ li reg2, CLINT1_END_HART_IPI ;\
+ blt reg1, reg2, 41b ;\
+ /* Wait to receive software interrupt */ \
+42: ;\
+ wfi ;\
+ csrr reg2, mip ;\
+ andi reg2, reg2, 0x8 ;\
+ beqz reg2, 42b ;\
+ /* Clear own software interrupt bit */ \
+ csrr reg2, mhartid ;\
+ bnez reg2, 41f; \
+ /* hart0 case: Use CLINT0 */ \
+ li reg1, CLINT_CTRL_ADDR ;\
+ slli reg2, reg2, 2 ;\
+ add reg2, reg2, reg1 ;\
+ sw zero, 0(reg2) ;\
+ j 42f; \
+41: \
+ /* hart 2, 3 case: Use CLINT1 and remap hart IDs to 0 and 1 */ \
+ li reg1, CLINT1_CTRL_ADDR ;\
+ addi reg2, reg2, -2; \
+ slli reg2, reg2, 2 ;\
+ add reg2, reg2, reg1 ;\
+ sw zero, 0(reg2) ; \
+42: \
+ /* Wait for all software interrupt bits to be cleared on CLINT0 */ \
+ li reg1, CLINT_CTRL_ADDR ;\
+41: ;\
+ lw reg2, 0(reg1) ;\
+ bnez reg2, 41b ;\
+ addi reg1, reg1, 4 ;\
+ li reg2, CLINT_END_HART_IPI ;\
+ blt reg1, reg2, 41b; \
+ /* Wait for all software interrupt bits to be cleared on CLINT1 */ \
+ li reg1, CLINT1_CTRL_ADDR ;\
+41: ;\
+ lw reg2, 0(reg1) ;\
+ bnez reg2, 41b ;\
+ addi reg1, reg1, 4 ;\
+ li reg2, CLINT1_END_HART_IPI ;\
+ blt reg1, reg2, 41b; \
+ /* End smp_resume() */
+
+#else
+
+#define smp_resume(reg1, reg2) \
+ li reg1, CLINT_CTRL_ADDR ;\
+41: ;\
+ li reg2, 1 ;\
+ sw reg2, 0(reg1) ;\
+ addi reg1, reg1, 4 ;\
+ li reg2, CLINT_END_HART_IPI ;\
+ blt reg1, reg2, 41b ;\
+42: ;\
+ wfi ;\
+ csrr reg2, mip ;\
+ andi reg2, reg2, 0x8 ;\
+ beqz reg2, 42b ;\
+ li reg1, CLINT_CTRL_ADDR ;\
+ csrr reg2, mhartid ;\
+ slli reg2, reg2, 2 ;\
+ add reg2, reg2, reg1 ;\
+ sw zero, 0(reg2) ;\
+41: ;\
+ lw reg2, 0(reg1) ;\
+ bnez reg2, 41b ;\
+ addi reg1, reg1, 4 ;\
+ li reg2, CLINT_END_HART_IPI ;\
+ blt reg1, reg2, 41b
+
+#endif /* ifdef CLINT1_CTRL_ADDR */
+
+#endif
diff --git a/fpga/src/main/resources/vcu118/sdboot/kprintf.c b/fpga/src/main/resources/vcu118/sdboot/kprintf.c
new file mode 100644
index 00000000..57627011
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/kprintf.c
@@ -0,0 +1,75 @@
+// See LICENSE for license details.
+#include
+#include
+#include
+
+#include "kprintf.h"
+
+static inline void _kputs(const char *s)
+{
+ char c;
+ for (; (c = *s) != '\0'; s++)
+ kputc(c);
+}
+
+void kputs(const char *s)
+{
+ _kputs(s);
+ kputc('\r');
+ kputc('\n');
+}
+
+void kprintf(const char *fmt, ...)
+{
+ va_list vl;
+ bool is_format, is_long, is_char;
+ char c;
+
+ va_start(vl, fmt);
+ is_format = false;
+ is_long = false;
+ is_char = false;
+ while ((c = *fmt++) != '\0') {
+ if (is_format) {
+ switch (c) {
+ case 'l':
+ is_long = true;
+ continue;
+ case 'h':
+ is_char = true;
+ continue;
+ case 'x': {
+ unsigned long n;
+ long i;
+ if (is_long) {
+ n = va_arg(vl, unsigned long);
+ i = (sizeof(unsigned long) << 3) - 4;
+ } else {
+ n = va_arg(vl, unsigned int);
+ i = is_char ? 4 : (sizeof(unsigned int) << 3) - 4;
+ }
+ for (; i >= 0; i -= 4) {
+ long d;
+ d = (n >> i) & 0xF;
+ kputc(d < 10 ? '0' + d : 'a' + d - 10);
+ }
+ break;
+ }
+ case 's':
+ _kputs(va_arg(vl, const char *));
+ break;
+ case 'c':
+ kputc(va_arg(vl, int));
+ break;
+ }
+ is_format = false;
+ is_long = false;
+ is_char = false;
+ } else if (c == '%') {
+ is_format = true;
+ } else {
+ kputc(c);
+ }
+ }
+ va_end(vl);
+}
diff --git a/fpga/src/main/resources/vcu118/sdboot/kprintf.h b/fpga/src/main/resources/vcu118/sdboot/kprintf.h
new file mode 100644
index 00000000..26cc8055
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/kprintf.h
@@ -0,0 +1,49 @@
+// See LICENSE for license details.
+#ifndef _SDBOOT_KPRINTF_H
+#define _SDBOOT_KPRINTF_H
+
+#include
+#include
+
+#define REG32(p, i) ((p)[(i) >> 2])
+
+#ifndef UART_CTRL_ADDR
+ #ifndef UART_NUM
+ #define UART_NUM 0
+ #endif
+
+ #define _CONCAT3(A, B, C) A ## B ## C
+ #define _UART_CTRL_ADDR(UART_NUM) _CONCAT3(UART, UART_NUM, _CTRL_ADDR)
+ #define UART_CTRL_ADDR _UART_CTRL_ADDR(UART_NUM)
+#endif
+static volatile uint32_t * const uart = (void *)(UART_CTRL_ADDR);
+
+static inline void kputc(char c)
+{
+ volatile uint32_t *tx = ®32(uart, UART_REG_TXFIFO);
+#ifdef __riscv_atomic
+ int32_t r;
+ do {
+ __asm__ __volatile__ (
+ "amoor.w %0, %2, %1\n"
+ : "=r" (r), "+A" (*tx)
+ : "r" (c));
+ } while (r < 0);
+#else
+ while ((int32_t)(*tx) < 0);
+ *tx = c;
+#endif
+}
+
+extern void kputs(const char *);
+extern void kprintf(const char *, ...);
+
+#ifdef DEBUG
+#define dprintf(s, ...) kprintf((s), ##__VA_ARGS__)
+#define dputs(s) kputs((s))
+#else
+#define dprintf(s, ...) do { } while (0)
+#define dputs(s) do { } while (0)
+#endif
+
+#endif /* _SDBOOT_KPRINTF_H */
diff --git a/fpga/src/main/resources/vcu118/sdboot/linker/memory.lds b/fpga/src/main/resources/vcu118/sdboot/linker/memory.lds
new file mode 100644
index 00000000..997de4d3
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/linker/memory.lds
@@ -0,0 +1,5 @@
+MEMORY
+{
+ bootrom_mem (rx) : ORIGIN = 0x10000, LENGTH = 0x2000
+ memory_mem (rwx) : ORIGIN = 0x80000000, LENGTH = 0x40000000
+}
diff --git a/fpga/src/main/resources/vcu118/sdboot/linker/sdboot.elf.lds b/fpga/src/main/resources/vcu118/sdboot/linker/sdboot.elf.lds
new file mode 100644
index 00000000..34610c94
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/linker/sdboot.elf.lds
@@ -0,0 +1,79 @@
+OUTPUT_ARCH("riscv")
+ENTRY(_prog_start)
+
+INCLUDE memory.lds
+
+PHDRS
+{
+ text PT_LOAD;
+ data PT_LOAD;
+ bss PT_LOAD;
+}
+
+SECTIONS
+{
+ PROVIDE(_ram = ORIGIN(memory_mem));
+ PROVIDE(_ram_end = _ram + LENGTH(memory_mem));
+
+ .text ALIGN((ORIGIN(bootrom_mem) + 0x0), 8) : AT(ALIGN((ORIGIN(bootrom_mem) + 0x0), 8)) {
+ PROVIDE(_ftext = .);
+ *(.text.init)
+ *(.text.unlikely .text.unlikely.*)
+ *(.text .text.* .gnu.linkonce.t.*)
+ PROVIDE(_etext = .);
+ . += 0x40; /* to create a gap between .text and .data b/c ifetch can fetch ahead from .data */
+ } >bootrom_mem :text
+
+ .eh_frame ALIGN((ADDR(.text) + SIZEOF(.text)), 8) : AT(ALIGN((LOADADDR(.text) + SIZEOF(.text)), 8)) {
+ *(.eh_frame)
+ } >bootrom_mem :text
+
+ .srodata ALIGN((ADDR(.eh_frame) + SIZEOF(.eh_frame)), 8) : AT(ALIGN((LOADADDR(.eh_frame) + SIZEOF(.eh_frame)), 8)) ALIGN_WITH_INPUT {
+ *(.srodata.cst16)
+ *(.srodata.cst8)
+ *(.srodata.cst4)
+ *(.srodata.cst2)
+ *(.srodata.*)
+ } >bootrom_mem :data
+
+ .data ALIGN((ADDR(.srodata) + SIZEOF(.srodata)), 8) : AT(ALIGN((LOADADDR(.srodata) + SIZEOF(.srodata)), 8)) ALIGN_WITH_INPUT {
+ *(.data .data.* .gnu.linkonce.d.*)
+ *(.tohost) /* TODO: Support sections that aren't explicitly listed in this linker script */
+ } >bootrom_mem :data
+
+ .sdata ALIGN((ADDR(.data) + SIZEOF(.data)), 8) : AT(ALIGN((LOADADDR(.data) + SIZEOF(.data)), 8)) ALIGN_WITH_INPUT {
+ *(.sdata .sdata.* .gnu.linkonce.s.*)
+ } >bootrom_mem :data
+
+ .rodata ALIGN((ADDR(.sdata) + SIZEOF(.sdata)), 8) : AT(ALIGN((LOADADDR(.sdata) + SIZEOF(.sdata)), 8)) ALIGN_WITH_INPUT {
+ *(.rodata .rodata.* .gnu.linkonce.r.*)
+ } >bootrom_mem :data
+
+ PROVIDE(_data = ADDR(.rodata));
+ PROVIDE(_data_lma = LOADADDR(.rodata));
+ PROVIDE(_edata = .);
+
+ .bss ALIGN((ORIGIN(memory_mem) + 0x0), 8) : AT(ALIGN((ORIGIN(memory_mem) + 0x0), 8)) ALIGN(8) {
+ PROVIDE(_fbss = .);
+ PROVIDE(__global_pointer$ = . + 0x7C0);
+ *(.sbss .sbss.* .gnu.linkonce.sb.*)
+ *(.bss .bss.* .gnu.linkonce.b.*)
+ . = ALIGN(8);
+ PROVIDE(_ebss = .);
+ } >memory_mem :bss
+
+ PROVIDE(_end = .);
+
+ /*
+ * heap_stack_region_usable_end: (ORIGIN(memory_mem) + LENGTH(memory_mem))
+ * heap_stack_min_size: 4096
+ * heap_stack_max_size: 1048576
+ */
+ PROVIDE(_sp = ALIGN(MIN((ORIGIN(memory_mem) + LENGTH(memory_mem)), _ebss + 1048576) - 7, 8));
+ PROVIDE(_heap_end = _sp - 2048);
+
+ /* This section is a noop and is only used for the ASSERT */
+ .stack : {
+ ASSERT(_sp >= (_ebss + 4096), "Error: No room left for the heap and stack");
+ }
+}
diff --git a/fpga/src/main/resources/vcu118/sdboot/sd.c b/fpga/src/main/resources/vcu118/sdboot/sd.c
new file mode 100644
index 00000000..bdd9d62a
--- /dev/null
+++ b/fpga/src/main/resources/vcu118/sdboot/sd.c
@@ -0,0 +1,236 @@
+// See LICENSE for license details.
+#include
+
+#include
+
+#include "common.h"
+
+#define DEBUG
+#include "kprintf.h"
+
+#define MAX_CORES 8
+
+// A sector is 512 bytes, so ((1 << 11) * 512) = 1 MiB
+#define PAYLOAD_SIZE (16 << 11)
+
+// The sector at which the BBL partition starts
+#define BBL_PARTITION_START_SECTOR 34
+
+#ifndef TL_CLK
+#error Must define TL_CLK
+#endif
+
+#define F_CLK TL_CLK
+
+static volatile uint32_t * const spi = (void *)(SPI_CTRL_ADDR);
+
+static inline uint8_t spi_xfer(uint8_t d)
+{
+ int32_t r;
+
+ REG32(spi, SPI_REG_TXFIFO) = d;
+ do {
+ r = REG32(spi, SPI_REG_RXFIFO);
+ } while (r < 0);
+ return r;
+}
+
+static inline uint8_t sd_dummy(void)
+{
+ return spi_xfer(0xFF);
+}
+
+static uint8_t sd_cmd(uint8_t cmd, uint32_t arg, uint8_t crc)
+{
+ unsigned long n;
+ uint8_t r;
+
+ REG32(spi, SPI_REG_CSMODE) = SPI_CSMODE_HOLD;
+ sd_dummy();
+ spi_xfer(cmd);
+ spi_xfer(arg >> 24);
+ spi_xfer(arg >> 16);
+ spi_xfer(arg >> 8);
+ spi_xfer(arg);
+ spi_xfer(crc);
+
+ n = 1000;
+ do {
+ r = sd_dummy();
+ if (!(r & 0x80)) {
+// dprintf("sd:cmd: %hx\r\n", r);
+ goto done;
+ }
+ } while (--n > 0);
+ kputs("sd_cmd: timeout");
+done:
+ return r;
+}
+
+static inline void sd_cmd_end(void)
+{
+ sd_dummy();
+ REG32(spi, SPI_REG_CSMODE) = SPI_CSMODE_AUTO;
+}
+
+
+static void sd_poweron(void)
+{
+ long i;
+ REG32(spi, SPI_REG_SCKDIV) = (F_CLK / 300000UL);
+ REG32(spi, SPI_REG_CSMODE) = SPI_CSMODE_OFF;
+ for (i = 10; i > 0; i--) {
+ sd_dummy();
+ }
+ REG32(spi, SPI_REG_CSMODE) = SPI_CSMODE_AUTO;
+}
+
+static int sd_cmd0(void)
+{
+ int rc;
+ dputs("CMD0");
+ rc = (sd_cmd(0x40, 0, 0x95) != 0x01);
+ sd_cmd_end();
+ return rc;
+}
+
+static int sd_cmd8(void)
+{
+ int rc;
+ dputs("CMD8");
+ rc = (sd_cmd(0x48, 0x000001AA, 0x87) != 0x01);
+ sd_dummy(); /* command version; reserved */
+ sd_dummy(); /* reserved */
+ rc |= ((sd_dummy() & 0xF) != 0x1); /* voltage */
+ rc |= (sd_dummy() != 0xAA); /* check pattern */
+ sd_cmd_end();
+ return rc;
+}
+
+static void sd_cmd55(void)
+{
+ sd_cmd(0x77, 0, 0x65);
+ sd_cmd_end();
+}
+
+static int sd_acmd41(void)
+{
+ uint8_t r;
+ dputs("ACMD41");
+ do {
+ sd_cmd55();
+ r = sd_cmd(0x69, 0x40000000, 0x77); /* HCS = 1 */
+ } while (r == 0x01);
+ return (r != 0x00);
+}
+
+static int sd_cmd58(void)
+{
+ int rc;
+ dputs("CMD58");
+ rc = (sd_cmd(0x7A, 0, 0xFD) != 0x00);
+ rc |= ((sd_dummy() & 0x80) != 0x80); /* Power up status */
+ sd_dummy();
+ sd_dummy();
+ sd_dummy();
+ sd_cmd_end();
+ return rc;
+}
+
+static int sd_cmd16(void)
+{
+ int rc;
+ dputs("CMD16");
+ rc = (sd_cmd(0x50, 0x200, 0x15) != 0x00);
+ sd_cmd_end();
+ return rc;
+}
+
+static uint16_t crc16_round(uint16_t crc, uint8_t data) {
+ crc = (uint8_t)(crc >> 8) | (crc << 8);
+ crc ^= data;
+ crc ^= (uint8_t)(crc >> 4) & 0xf;
+ crc ^= crc << 12;
+ crc ^= (crc & 0xff) << 5;
+ return crc;
+}
+
+#define SPIN_SHIFT 6
+#define SPIN_UPDATE(i) (!((i) & ((1 << SPIN_SHIFT)-1)))
+#define SPIN_INDEX(i) (((i) >> SPIN_SHIFT) & 0x3)
+
+static const char spinner[] = { '-', '/', '|', '\\' };
+
+static int copy(void)
+{
+ volatile uint8_t *p = (void *)(PAYLOAD_DEST);
+ long i = PAYLOAD_SIZE;
+ int rc = 0;
+
+ dputs("CMD18");
+ kprintf("LOADING ");
+
+ // John: Let's go slow until we get this working
+ //REG32(spi, SPI_REG_SCKDIV) = (F_CLK / 16666666UL);
+ REG32(spi, SPI_REG_SCKDIV) = (F_CLK / 5000000UL);
+ if (sd_cmd(0x52, BBL_PARTITION_START_SECTOR, 0xE1) != 0x00) {
+ sd_cmd_end();
+ return 1;
+ }
+ do {
+ uint16_t crc, crc_exp;
+ long n;
+
+ crc = 0;
+ n = 512;
+ while (sd_dummy() != 0xFE);
+ do {
+ uint8_t x = sd_dummy();
+ *p++ = x;
+ crc = crc16_round(crc, x);
+ } while (--n > 0);
+
+ crc_exp = ((uint16_t)sd_dummy() << 8);
+ crc_exp |= sd_dummy();
+
+ if (crc != crc_exp) {
+ kputs("\b- CRC mismatch ");
+ rc = 1;
+ break;
+ }
+
+ if (SPIN_UPDATE(i)) {
+ kputc('\b');
+ kputc(spinner[SPIN_INDEX(i)]);
+ }
+ } while (--i > 0);
+ sd_cmd_end();
+
+ sd_cmd(0x4C, 0, 0x01);
+ sd_cmd_end();
+ kputs("\b ");
+ return rc;
+}
+
+int main(void)
+{
+ REG32(uart, UART_REG_TXCTRL) = UART_TXEN;
+
+ kputs("INIT");
+ sd_poweron();
+ if (sd_cmd0() ||
+ sd_cmd8() ||
+ sd_acmd41() ||
+ sd_cmd58() ||
+ sd_cmd16() ||
+ copy()) {
+ kputs("ERROR");
+ return 1;
+ }
+
+ kputs("BOOT");
+
+ __asm__ __volatile__ ("fence.i" : : : "memory");
+
+ return 0;
+}
diff --git a/fpga/src/main/scala/vcu118/bringup/TestHarness.scala b/fpga/src/main/scala/vcu118/bringup/TestHarness.scala
index 28c3ae14..4eaea05b 100644
--- a/fpga/src/main/scala/vcu118/bringup/TestHarness.scala
+++ b/fpga/src/main/scala/vcu118/bringup/TestHarness.scala
@@ -134,8 +134,7 @@ class BringupVCU118FPGATestHarness(override implicit val p: Parameters) extends
/*** DDR ***/
- val ddrWrangler = LazyModule(new ResetWrangler)
- val ddrPlaced = dp(DDROverlayKey).head.place(DDRDesignInput(dp(ExtMem).get.master.base, ddrWrangler.node, harnessSysPLL))
+ val ddrPlaced = dp(DDROverlayKey).head.place(DDRDesignInput(dp(ExtMem).get.master.base, dutWrangler.node, harnessSysPLL))
// connect 1 mem. channel to the FPGA DDR
val inParams = topDesign match { case td: ChipTop =>