diff --git a/CMakeLists.txt b/CMakeLists.txt index eab734e9..95dd15d8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -26,8 +26,10 @@ endif() if (BUILD_TARGET STREQUAL "smp-x86") set(ARCH "x86_64") + option(ENABLE_TOFU "Built-in tofu driver support" OFF) elseif (BUILD_TARGET STREQUAL "smp-arm64") set(ARCH "arm64") + option(ENABLE_TOFU "Built-in tofu driver support" ON) endif() include(GNUInstallDirs) @@ -252,6 +254,7 @@ message("KBUILD_C_FLAGS: ${KBUILD_C_FLAGS}") message("MAP_KERNEL_START: ${MAP_KERNEL_START}") message("ENABLE_MEMDUMP: ${ENABLE_MEMDUMP}") message("ENABLE_PERF: ${ENABLE_PERF}") +message("ENABLE_TOFU: ${ENABLE_TOFU}") message("ENABLE_RUSAGE: ${ENABLE_RUSAGE}") message("ENABLE_QLMPI: ${ENABLE_QLMPI}") message("ENABLE_UTI: ${ENABLE_UTI}") diff --git a/arch/arm64/kernel/fault.c b/arch/arm64/kernel/fault.c index 0e7b583a..04dbc5ce 100644 --- a/arch/arm64/kernel/fault.c +++ b/arch/arm64/kernel/fault.c @@ -223,7 +223,8 @@ static int do_translation_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - if (addr < USER_END) + // XXX: Handle kernel space page faults for Tofu driver + //if (addr < USER_END) return do_page_fault(addr, esr, regs); do_bad_area(addr, esr, regs); diff --git a/arch/arm64/kernel/include/arch-memory.h b/arch/arm64/kernel/include/arch-memory.h index f5341f5f..eab5cc18 100644 --- a/arch/arm64/kernel/include/arch-memory.h +++ b/arch/arm64/kernel/include/arch-memory.h @@ -94,7 +94,7 @@ extern char _end[]; # define LD_TASK_UNMAPPED_BASE UL(0x0000080000000000) # define TASK_UNMAPPED_BASE UL(0x0000100000000000) # define USER_END UL(0x0000400000000000) -# define MAP_VMAP_START UL(0xffff780000000000) +# define MAP_VMAP_START UL(0xffff7bdfffff0000) # define MAP_VMAP_SIZE UL(0x0000000100000000) # define MAP_FIXED_START UL(0xffff7ffffbdd0000) # define MAP_ST_START UL(0xffff800000000000) @@ -142,6 +142,7 @@ extern char _end[]; # define __PTL1_SHIFT 16 # define PTL4_INDEX_MASK 0 # define PTL3_INDEX_MASK ((UL(1) << 6) - 1) +# define PTL3_INDEX_MASK_LINUX ((UL(1) << 10) - 1) # define PTL2_INDEX_MASK ((UL(1) << 13) - 1) # define PTL1_INDEX_MASK PTL2_INDEX_MASK # define __PTL4_CONT_SHIFT (__PTL4_SHIFT + 0) @@ -829,7 +830,13 @@ static inline int pte_is_head(pte_t *ptep, pte_t *old, size_t cont_size) return page_is_contiguous_head(ptep, cont_size); } -struct page_table; +typedef pte_t translation_table_t; +struct page_table { + translation_table_t* tt; + translation_table_t* tt_pa; + int asid; +}; + void arch_adjust_allocate_page_size(struct page_table *pt, uintptr_t fault_addr, pte_t *ptep, @@ -849,7 +856,6 @@ void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable); void set_address_space_id(struct page_table *pt, int asid); int get_address_space_id(const struct page_table *pt); -typedef pte_t translation_table_t; void set_translation_table(struct page_table *pt, translation_table_t* tt); translation_table_t* get_translation_table(const struct page_table *pt); translation_table_t* get_translation_table_as_paddr(const struct page_table *pt); diff --git a/arch/arm64/kernel/include/io.h b/arch/arm64/kernel/include/io.h index 3150f284..10fc9480 100644 --- a/arch/arm64/kernel/include/io.h +++ b/arch/arm64/kernel/include/io.h @@ -80,6 +80,10 @@ static inline uint64_t __raw_readq(const volatile void *addr) return val; } +/* IO barriers */ +#define __iormb() rmb() +#define __iowmb() wmb() + /* * Relaxed I/O memory access primitives. These follow the Device memory * ordering rules but do not guarantee any ordering relative to Normal memory @@ -95,5 +99,20 @@ static inline uint64_t __raw_readq(const volatile void *addr) #define writel_relaxed(v,c) ((void)__raw_writel((uint32_t)(v),(c))) #define writeq_relaxed(v,c) ((void)__raw_writeq((uint64_t)(v),(c))) +/* + * I/O memory access primitives. Reads are ordered relative to any + * following Normal memory access. Writes are ordered relative to any prior + * Normal memory access. + */ +#define readb(c) ({ uint8_t __v = readb_relaxed(c); __iormb(); __v; }) +#define readw(c) ({ uint16_t __v = readw_relaxed(c); __iormb(); __v; }) +#define readl(c) ({ uint32_t __v = readl_relaxed(c); __iormb(); __v; }) +#define readq(c) ({ uint64_t __v = readq_relaxed(c); __iormb(); __v; }) + +#define writeb(v,c) ({ __iowmb(); writeb_relaxed((v),(c)); }) +#define writew(v,c) ({ __iowmb(); writew_relaxed((v),(c)); }) +#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c)); }) +#define writeq(v,c) ({ __iowmb(); writeq_relaxed((v),(c)); }) + #endif /* __KERNEL__ */ #endif /* __ASM_IO_H */ diff --git a/arch/arm64/kernel/memory.c b/arch/arm64/kernel/memory.c index 9d59849e..1274f35c 100644 --- a/arch/arm64/kernel/memory.c +++ b/arch/arm64/kernel/memory.c @@ -150,12 +150,6 @@ void flush_tlb_single(unsigned long addr) arch_flush_tlb_single(asid, addr); } -struct page_table { - translation_table_t* tt; - translation_table_t* tt_pa; - int asid; -}; - extern struct page_table swapper_page_table; static struct page_table *init_pt = &swapper_page_table; static ihk_spinlock_t init_pt_lock; @@ -223,6 +217,11 @@ static inline int ptl4_index(unsigned long addr) int idx = (addr >> PTL4_SHIFT) & PTL4_INDEX_MASK; return idx; } +static inline int ptl3_index_linux(unsigned long addr) +{ + int idx = (addr >> PTL3_SHIFT) & PTL3_INDEX_MASK_LINUX; + return idx; +} static inline int ptl3_index(unsigned long addr) { int idx = (addr >> PTL3_SHIFT) & PTL3_INDEX_MASK; @@ -281,6 +280,38 @@ static inline pte_t* ptl4_offset(const translation_table_t* ptl4, unsigned long } return ptep; } + +static inline pte_t* ptl3_offset_linux(const pte_t* l4p, unsigned long addr) +{ + pte_t* ptep = NULL; + pte_t pte = 0; + unsigned long phys = 0; + translation_table_t* ptl3 = NULL; + int idx = 0; + + switch (CONFIG_ARM64_PGTABLE_LEVELS) + { + case 4: + pte = ptl4_val(l4p); + phys = pte & PT_PHYSMASK; + ptl3 = phys_to_virt(phys); + idx = ptl3_index_linux(addr); + ptep = (pte_t*)ptl3 + idx; + break; + case 3: + ptl3 = (translation_table_t*)l4p; + idx = ptl3_index_linux(addr); + ptep = (pte_t*)ptl3 + idx; + break; + case 2: + case 1: + /* PTL3が無いときにはエントリではなくページテーブルのアドレスを引渡していく。*/ + ptep = (pte_t*)l4p; + break; + } + return ptep; +} + static inline pte_t* ptl3_offset(const pte_t* l4p, unsigned long addr) { pte_t* ptep = NULL; @@ -959,7 +990,12 @@ static void init_normal_area(struct page_table *pt) int i; tt = get_translation_table(pt); - + + setup(tt, + arm64_st_phys_base, + arm64_st_phys_base + (1UL << 40)); + return; + for (i = 0; i < ihk_mc_get_nr_memory_chunks(); i++) { unsigned long map_start, map_end; int numa_id; @@ -1287,6 +1323,57 @@ out: return ret; } +int ihk_mc_linux_pt_virt_to_phys_size(struct page_table *pt, + const void *virt, + unsigned long *phys, + unsigned long *size) +{ + unsigned long v = (unsigned long)virt; + pte_t* ptep; + translation_table_t* tt; + + unsigned long paddr; + unsigned long lsize; + + tt = get_translation_table(pt); + + ptep = ptl4_offset(tt, v); + if (!ptl4_present(ptep)) { + return -EFAULT; + } + + ptep = ptl3_offset_linux(ptep, v); + if (!ptl3_present(ptep)) { + return -EFAULT; + } + if (ptl3_type_block(ptep)) { + paddr = ptl3_phys(ptep); + lsize = PTL3_SIZE; + goto out; + } + + ptep = ptl2_offset(ptep, v); + if (!ptl2_present(ptep)) { + return -EFAULT; + } + if (ptl2_type_block(ptep)) { + paddr = ptl2_phys(ptep); + lsize = PTL2_SIZE; + goto out; + } + + ptep = ptl1_offset(ptep, v); + if (!ptl1_present(ptep)) { + return -EFAULT; + } + paddr = ptl1_phys(ptep); + lsize = PTL1_SIZE; +out: + *phys = paddr | (v & (lsize - 1)); + if(size) *size = lsize; + return 0; +} + int ihk_mc_pt_virt_to_phys_size(struct page_table *pt, const void *virt, @@ -1348,7 +1435,6 @@ int ihk_mc_pt_virt_to_phys(struct page_table *pt, return ihk_mc_pt_virt_to_phys_size(pt, virt, phys, NULL); } - int ihk_mc_pt_print_pte(struct page_table *pt, void *virt) { const unsigned long v = (unsigned long)virt; @@ -1360,6 +1446,15 @@ int ihk_mc_pt_print_pte(struct page_table *pt, void *virt) } tt = get_translation_table(pt); + __kprintf("%s: 0x%lx, CONFIG_ARM64_PGTABLE_LEVELS: %d, ptl4_index: %ld, ptl3_index: %ld, ptl2_index: %ld, ptl1_index: %ld\n", + __func__, + v, + CONFIG_ARM64_PGTABLE_LEVELS, + ptl4_index(v), + ptl3_index(v), + ptl2_index(v), + ptl1_index(v)); + ptep = ptl4_offset(tt, v); __kprintf("l4 table: 0x%lX l4idx: %d\n", virt_to_phys(tt), ptl4_index(v)); if (!(ptl4_present(ptep))) { @@ -2147,6 +2242,198 @@ static void unmap_free_stat(struct page *page, unsigned long phys, } } +/* + * Kernel space page table clearing functions. + */ +struct clear_kernel_range_args { + int free_physical; +}; + +static int clear_kernel_range_middle(void *args0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end, int level); + +static int clear_kernel_range_l1(void *args0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + const struct table { + unsigned long pgsize; + unsigned long cont_pgsize; + } tbl = { + .pgsize = PTL1_SIZE, + .cont_pgsize = PTL1_CONT_SIZE + }; + + struct clear_kernel_range_args *args = args0; + uint64_t phys = 0; + pte_t old; + size_t clear_size; + + if (ptl1_null(ptep)) { + return -ENOENT; + } + + old = xchg(ptep, PTE_NULL); + if (!pte_is_present(&old)) + return 0; + + arch_flush_tlb_single(0, base); + clear_size = pte_is_contiguous(&old) ? + tbl.cont_pgsize : tbl.pgsize; + + dkprintf("%s: 0x%lx:%lu unmapped\n", + __func__, base, clear_size); + + if (args->free_physical) { + phys = ptl1_phys(&old); + ihk_mc_free_pages(phys_to_virt(phys), clear_size >> PAGE_SHIFT); + } + + return 0; +} + +static int clear_kernel_range_l2(void *args0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + return clear_kernel_range_middle(args0, ptep, base, start, end, 2); +} + +static int clear_kernel_range_l3(void *args0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + return clear_kernel_range_middle(args0, ptep, base, start, end, 3); +} + +static int clear_kernel_range_l4(void *args0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end) +{ + return clear_kernel_range_middle(args0, ptep, base, start, end, 4); +} + +static int clear_kernel_range_middle(void *args0, pte_t *ptep, uint64_t base, + uint64_t start, uint64_t end, int level) +{ + const struct table { + walk_pte_t* walk; + walk_pte_fn_t* callback; + unsigned long pgsize; + unsigned long cont_pgsize; + } table[] = { + {walk_pte_l1, clear_kernel_range_l1, PTL2_SIZE, PTL2_CONT_SIZE}, /*PTL2*/ + {walk_pte_l2, clear_kernel_range_l2, PTL3_SIZE, PTL3_CONT_SIZE}, /*PTL3*/ + {walk_pte_l3, clear_kernel_range_l3, PTL4_SIZE, PTL4_CONT_SIZE}, /*PTL4*/ + }; + const struct table tbl = table[level-2]; + + struct clear_kernel_range_args *args = args0; + uint64_t phys = 0; + translation_table_t *tt; + int error; + pte_t old; + size_t clear_size; + + if (ptl_null(ptep, level)) { + return -ENOENT; + } + + dkprintf("%s(level: %d): 0x%lx in 0x%lx-0x%lx\n", + __func__, level, base, start, end); + + if (ptl_type_page(ptep, level) + && ((base < start) || (end < (base + tbl.pgsize)))) { + error = -EINVAL; + ekprintf("clear_range_middle(%p,%p,%lx,%lx,%lx,%d):" + "split page. %d\n", + args0, ptep, base, start, end, level, error); + return error; + } + + if (ptl_type_page(ptep, level)) { + old = xchg(ptep, PTE_NULL); + + if (!ptl_present(&old, level)) { + return 0; + } + + arch_flush_tlb_single(0, base); + + clear_size = pte_is_contiguous(&old) ? + tbl.cont_pgsize : tbl.pgsize; + + dkprintf("%s(level: %d): 0x%lx:%lu unmapped\n", + __func__, level, base, clear_size); + + if (args->free_physical) { + phys = ptl_phys(&old, level); + ihk_mc_free_pages(phys_to_virt(phys), clear_size >> PAGE_SHIFT); + } + + return 0; + } + + tt = (translation_table_t*)phys_to_virt(ptl_phys(ptep, level)); + error = tbl.walk(tt, base, start, end, tbl.callback, args0); + if (error && (error != -ENOENT)) { + return error; + } + + if (args->free_physical) { + if ((start <= base) && ((base + tbl.pgsize) <= end)) { + ptl_clear(ptep, level); + arch_flush_tlb_single(0, base); + ihk_mc_free_pages(tt, 1); + } + } + + return 0; +} + +static int clear_kernel_range(uintptr_t start, uintptr_t end, int free_physical) +{ + const struct table { + walk_pte_t* walk; + walk_pte_fn_t* callback; + } tables[] = { + {walk_pte_l2, clear_kernel_range_l2}, /*second*/ + {walk_pte_l3, clear_kernel_range_l3}, /*first*/ + {walk_pte_l4, clear_kernel_range_l4}, /*zero*/ + }; + const struct table initial_lookup = tables[CONFIG_ARM64_PGTABLE_LEVELS - 2]; + + int error; + struct clear_kernel_range_args args; + translation_table_t* tt; + unsigned long irqflags; + + dkprintf("%s: start: 0x%lx, end: 0x%lx, free phys: %d\n", + __func__, start, end, free_physical); + + if (start <= USER_END) + return -EINVAL; + + args.free_physical = free_physical; + + irqflags = ihk_mc_spinlock_lock(&init_pt_lock); + tt = get_translation_table(get_init_page_table()); + error = initial_lookup.walk(tt, 0, + (start & ~(0xffff000000000000)), + (end & ~(0xffff000000000000)), + initial_lookup.callback, &args); + dkprintf("%s: start: 0x%lx, end: 0x%lx, free phys: %d, ret: %d\n", + __func__, start, end, free_physical, error); + + ihk_mc_spinlock_unlock(&init_pt_lock, irqflags); + return error; +} + +int ihk_mc_clear_kernel_range(void *start, void *end) +{ +#define KEEP_PHYSICAL 0 + return clear_kernel_range((uintptr_t)start, (uintptr_t)end, KEEP_PHYSICAL); +} + +/* + * User space page table clearing functions. + */ struct clear_range_args { int free_physical; struct memobj *memobj; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index d9471053..852d04b9 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -174,9 +174,13 @@ void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) arch_show_interrupt_context(regs); +#if 0 info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_ILLOPC; +#endif + info.si_signo = SIGSTOP; + info.si_errno = 0; info._sifields._sigfault.si_addr = (void*)regs->pc; arm64_notify_die("Oops - bad mode", regs, &info, 0); diff --git a/config.h.in b/config.h.in index f0323e1a..6f35d33f 100644 --- a/config.h.in +++ b/config.h.in @@ -16,6 +16,9 @@ /* whether perf is enabled */ #cmakedefine ENABLE_PERF 1 +/* whether built-in tofu driver is enabled */ +#cmakedefine ENABLE_TOFU 1 + /* whether qlmpi is enabled */ #cmakedefine ENABLE_QLMPI 1 diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index 003caaa2..4360c0f7 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -152,6 +152,7 @@ struct program_load_desc { int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */ int straight_map; size_t straight_map_threshold; + int enable_tofu; int nr_processes; int process_rank; __cpu_set_unit cpu_set[PLD_CPU_SET_SIZE]; @@ -198,6 +199,7 @@ struct syscall_response { unsigned long req_thread_status; long ret; unsigned long fault_address; + void *pde_data; }; struct syscall_ret_desc { diff --git a/executer/kernel/mcctrl/syscall.c b/executer/kernel/mcctrl/syscall.c index 5c5cb565..443d717c 100644 --- a/executer/kernel/mcctrl/syscall.c +++ b/executer/kernel/mcctrl/syscall.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -52,6 +53,7 @@ #include "mcctrl.h" #include #include +#include #define ALIGN_WAIT_BUF(z) (((z + 63) >> 6) << 6) @@ -1869,6 +1871,52 @@ void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet, res->ret = ret; res->stid = stid; +#ifdef ENABLE_TOFU + /* Record PDE_DATA after ioctl() calls for Tofu driver */ + if (packet->req.number == __NR_ioctl && ret == 0) { + char *pathbuf, *fullpath; + struct fd f = fdget(packet->req.args[0]); + + if (!f.file) { + goto out_notify; + } + + pathbuf = kmalloc(PATH_MAX, GFP_ATOMIC); + if (!pathbuf) { + goto out_fdput; + } + + fullpath = d_path(&f.file->f_path, pathbuf, PATH_MAX); + if (IS_ERR(fullpath)) { + goto out_free; + } + + if (!strncmp("/proc/tofu/dev/", fullpath, 15)) { + res->pde_data = PDE_DATA(file_inode(f.file)); + printk("%s: fd: %ld, path: %s, PDE_DATA: 0x%lx\n", + __func__, + packet->req.args[0], + fullpath, + (unsigned long)res->pde_data); + printk("%s: pgd_index: %ld, pmd_index: %ld, pte_index: %ld\n", + __func__, + pgd_index((unsigned long)res->pde_data), + pmd_index((unsigned long)res->pde_data), + pte_index((unsigned long)res->pde_data)); +#ifdef CONFIG_ARM64 + printk("CONFIG_ARM64_VA_BITS: %d, PGDIR_SHIFT: %d\n", + CONFIG_ARM64_VA_BITS, PGDIR_SHIFT); +#endif + } + +out_free: + kfree(pathbuf); +out_fdput: + fdput(f); + } + +out_notify: +#endif if (__notify_syscall_requester(os, packet, res) < 0) { printk("%s: WARNING: failed to notify PID %d\n", __FUNCTION__, packet->pid); diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index a210ee8c..2f9f5aab 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -200,6 +200,7 @@ static char *mpol_bind_nodes = NULL; static int uti_thread_rank = 0; static int uti_use_last_cpu = 0; static int enable_uti = 0; +static int enable_tofu = 0; /* Partitioned execution (e.g., for MPI) */ static int nr_processes = 0; @@ -1724,6 +1725,12 @@ static struct option mcexec_options[] = { .flag = &enable_uti, .val = 1, }, + { + .name = "enable-tofu", + .has_arg = no_argument, + .flag = &enable_tofu, + .val = 1, + }, { .name = "debug-mcexec", .has_arg = no_argument, @@ -2698,6 +2705,7 @@ int main(int argc, char **argv) desc->straight_map = straight_map; desc->straight_map_threshold = straight_map_threshold; + desc->enable_tofu = enable_tofu; /* user_start and user_end are set by this call */ if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) { diff --git a/ihk b/ihk index 66ef07bf..bda97c4c 160000 --- a/ihk +++ b/ihk @@ -1 +1 @@ -Subproject commit 66ef07bfb2f41644f5ddb81296113cbe711eb992 +Subproject commit bda97c4cf7385896700868c4efbabe62f685ae82 diff --git a/kernel/CMakeLists.txt b/kernel/CMakeLists.txt index 710f78d2..82b3d94a 100644 --- a/kernel/CMakeLists.txt +++ b/kernel/CMakeLists.txt @@ -51,6 +51,12 @@ set(MCKERNEL_SRCS ${IHK_FULL_SOURCE_DIR}/cokernel/smp/${ARCH}/setup.c ) +if (ENABLE_TOFU) + list(APPEND MCKERNEL_SRCS + tofu/tof_utofu_main.c + ) +endif() + if (ENABLE_UBSAN) add_compile_options(-fsanitize=undefined) list(APPEND MCKERNEL_SRCS ubsan.c) diff --git a/kernel/host.c b/kernel/host.c index 4322d381..801adafa 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -549,6 +549,15 @@ static int process_msg_prepare_process(unsigned long rphys) proc->straight_map = pn->straight_map; proc->straight_map_threshold = pn->straight_map_threshold; +#ifdef ENABLE_TOFU + proc->enable_tofu = pn->enable_tofu; + if (proc->enable_tofu) { + extern void tof_utofu_finalize(void); + + tof_utofu_finalize(); + } +#endif + #ifdef PROFILE_ENABLE proc->profile = pn->profile; thread->profile = pn->profile; diff --git a/kernel/include/kref.h b/kernel/include/kref.h new file mode 100644 index 00000000..47dc0140 --- /dev/null +++ b/kernel/include/kref.h @@ -0,0 +1,84 @@ +/* + * kref.h - library routines for handling generic reference counted objects + * (based on Linux implementation) + * + * This file is released under the GPLv2. + * + */ + +#ifndef _KREF_H_ +#define _KREF_H_ + +#include +#include + +/* + * Bit 30 marks a kref as McKernel internal. + * This can be used to distinguish krefs from Linux and + * it also ensures that a non deallocated kref will not + * crash the Linux allocator. + */ +#define MCKERNEL_KREF_MARK (1U << 30) + +struct kref { + ihk_atomic_t refcount; +}; + +#define KREF_INIT(n) { .refcount = IHK_ATOMIC_INIT(MCKERNEL_KREF_MARK + n), } + +/** + * kref_init - initialize object. + * @kref: object in question. + */ +static inline void kref_init(struct kref *kref) +{ + ihk_atomic_set(&kref->refcount, MCKERNEL_KREF_MARK + 1); +} + +static inline unsigned int kref_read(const struct kref *kref) +{ + return (ihk_atomic_read(&kref->refcount) & ~(MCKERNEL_KREF_MARK)); +} + +static inline unsigned int kref_is_mckernel(const struct kref *kref) +{ + return (ihk_atomic_read(&kref->refcount) & (MCKERNEL_KREF_MARK)); +} + +/** + * kref_get - increment refcount for object. + * @kref: object. + */ +static inline void kref_get(struct kref *kref) +{ + ihk_atomic_inc(&kref->refcount); +} + +/** + * kref_put - decrement refcount for object. + * @kref: object. + * @release: pointer to the function that will clean up the object when the + * last reference to the object is released. + * This pointer is required, and it is not acceptable to pass kfree + * in as this function. If the caller does pass kfree to this + * function, you will be publicly mocked mercilessly by the kref + * maintainer, and anyone else who happens to notice it. You have + * been warned. + * + * Decrement the refcount, and if 0, call release(). + * Return 1 if the object was removed, otherwise return 0. Beware, if this + * function returns 0, you still can not count on the kref from remaining in + * memory. Only use the return value if you want to see if the kref is now + * gone, not present. + */ +static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) +{ + //if (ihk_atomic_dec_and_test(&kref->refcount)) { + if (ihk_atomic_sub_return(1, &kref->refcount) == MCKERNEL_KREF_MARK) { + release(kref); + return 1; + } + return 0; +} + +#endif /* _KREF_H_ */ diff --git a/kernel/include/mman.h b/kernel/include/mman.h index a618040f..f4ec6d18 100644 --- a/kernel/include/mman.h +++ b/kernel/include/mman.h @@ -79,4 +79,14 @@ extern int sysctl_overcommit_memory; +/* + * This looks more complex than it should be. But we need to + * get the type for the ~ right in round_down (it needs to be + * as wide as the result!), and we want to evaluate the macro + * arguments just once each. + */ +#define __round_mask(x, y) ((__typeof__(x))((y)-1)) +#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1) +#define round_down(x, y) ((x) & ~__round_mask(x, y)) + #endif /* HEADER_MMAN_H */ diff --git a/kernel/include/process.h b/kernel/include/process.h index 89a4fbf7..aafd0ce1 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -565,6 +565,7 @@ struct process { int thp_disable; int straight_map; + int enable_tofu; size_t straight_map_threshold; // perf_event @@ -589,6 +590,9 @@ struct process { int coredump_barrier_count, coredump_barrier_count2; mcs_rwlock_lock_t coredump_lock; // lock for coredump +#define MAX_FD_PDE 1024 + void *fd_pde_data[MAX_FD_PDE]; + char *fd_path[MAX_FD_PDE]; }; /* diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index f0a757c2..89071e0f 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -219,6 +219,7 @@ struct program_load_desc { int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */ int straight_map; size_t straight_map_threshold; + int enable_tofu; int nr_processes; int process_rank; __cpu_set_unit cpu_set[PLD_CPU_SET_SIZE]; @@ -329,6 +330,7 @@ struct syscall_response { unsigned long req_thread_status; long ret; unsigned long fault_address; + void *pde_data; }; struct syscall_post { diff --git a/kernel/include/tofu/generate_headers.sh b/kernel/include/tofu/generate_headers.sh new file mode 100755 index 00000000..f60d0bcc --- /dev/null +++ b/kernel/include/tofu/generate_headers.sh @@ -0,0 +1,34 @@ +#!/bin/bash + +SCRIPT="`readlink -f ${BASH_SOURCE[0]:-}`" +SCRIPT_DIR=$(dirname ${SCRIPT}) +CURRENT_DIR=`pwd` + +cd ${SCRIPT_DIR} + +DWARF_TOOL=~/src/mckernel-apollo+a64fx/mckernel/tools/dwarf-extract-struct/dwarf-extract-struct + +KMODULE=tof_utofu.ko +if ! tar zxvf /lib/modules/`uname -r`+debug/extra/tof_module.tar.gz ${KMODULE} 2>&1 > /dev/null; then + echo "error: uncompressing kernel module with debug symbols" + cd - + exit 1 +fi + +${DWARF_TOOL} ${KMODULE} tof_utofu_device enabled > tofu_generated-tof_utofu_device.h +${DWARF_TOOL} ${KMODULE} tof_utofu_cq common tni cqid trans steering mb num_stag | sed "s/struct FILL_IN_MANUALLY trans;/#include \"tof_utofu_cq_trans.h\"/g" > tofu_generated-tof_utofu_cq.h +${DWARF_TOOL} ${KMODULE} tof_utofu_mbpt ucq iova sg nsgents mbptstart pgsz kref > tofu_generated-tof_utofu_mbpt.h +rm ${KMODULE} + +KMODULE=tof_core.ko +if ! tar zxvf /lib/modules/`uname -r`+debug/extra/tof_module.tar.gz ${KMODULE} 2>&1 > /dev/null; then + echo "error: uncompressing kernel module with debug symbols" + cd - + exit 1 +fi + +${DWARF_TOOL} ${KMODULE} tof_core_cq reg | sed "s/struct FILL_IN_MANUALLY reg;/#include \"tof_core_cq_reg.h\"/g" > tofu_generated-tof_core_cq.h +rm ${KMODULE} + +#cat tofu_generated*.h +cd - diff --git a/kernel/include/tofu/tof_core_cq_reg.h b/kernel/include/tofu/tof_core_cq_reg.h new file mode 100644 index 00000000..423534aa --- /dev/null +++ b/kernel/include/tofu/tof_core_cq_reg.h @@ -0,0 +1,4 @@ + struct { + void *cq; + void *cqs; + } reg; diff --git a/kernel/include/tofu/tof_icc.h b/kernel/include/tofu/tof_icc.h new file mode 100644 index 00000000..45dd3418 --- /dev/null +++ b/kernel/include/tofu/tof_icc.h @@ -0,0 +1,836 @@ +#ifndef _TOF_ICC_H_ +#define _TOF_ICC_H_ + +#include +#include +typedef uint64_t phys_addr_t; + +/* @ref.impl include/linux/bitops.h */ +/* + * Create a contiguous bitmask starting at bit position @l and ending at + * position @h. For example + * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000. + */ +#define GENMASK(h, l) \ + (((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h)))) + +/* constants related to the Tofu Interconnect D */ + +#define TOF_ICC_NTNIS 6 +#define TOF_ICC_NCQS 12 +#define TOF_ICC_NBGS 48 +#define TOF_ICC_NBCHS 16 +#define TOF_ICC_NPORTS 10 +#define TOF_ICC_NVMSIDS 16 + +#define TOF_ICC_RH_LEN 8 +#define TOF_ICC_ECRC_LEN 4 +#define TOF_ICC_FRAME_ALIGN 32 +#define TOF_ICC_TLP_LEN(len) (((len) + 1) * TOF_ICC_FRAME_ALIGN) +#define TOF_ICC_TLP_PAYLOAD_MAX (TOF_ICC_TLP_LEN(61) - TOF_ICC_ECRC_LEN) +#define TOF_ICC_FRAME_LEN(len) (TOF_ICC_RH_LEN + TOF_ICC_TLP_LEN(len)) +#define TOF_ICC_FRAME_LEN_MIN TOF_ICC_FRAME_LEN(2) +#define TOF_ICC_FRAME_LEN_MAX TOF_ICC_FRAME_LEN(61) +#define TOF_ICC_FRAME_BUF_SIZE_BITS 11 +#define TOF_ICC_FRAME_BUF_SIZE (1 << TOF_ICC_FRAME_BUF_SIZE_BITS) +#define TOF_ICC_FRAME_BUF_ALIGN_BITS 8 +#define TOF_ICC_FRAME_BUF_ALIGN (1 << TOF_ICC_FRAME_BUF_ALIGN_BITS) +#define TOF_ICC_PB_SIZE_BITS 11 +#define TOF_ICC_PB_SIZE (1 << TOF_ICC_PB_SIZE_BITS) +#define TOF_ICC_PB_ALIGN_BITS 11 +#define TOF_ICC_PB_ALIGN (1 << TOF_ICC_PB_ALIGN_BITS) + +#define TOF_ICC_ST_ALIGN_BITS 8 +#define TOF_ICC_ST_ALIGN (1 << TOF_ICC_ST_ALIGN_BITS) + +#define TOF_ICC_MBT_ALIGN_BITS 8 +#define TOF_ICC_MBT_ALIGN (1 << TOF_ICC_MBT_ALIGN_BITS) + +#define TOF_ICC_MBPT_ALIGN_BITS 8 +#define TOF_ICC_MBPT_ALIGN (1 << TOF_ICC_MBPT_ALIGN_BITS) + +#define TOF_ICC_BG_BSEQ_SIZE_BITS 24 +#define TOF_ICC_BG_BSEQ_SIZE (1 << TOF_ICC_BG_BSEQ_SIZE_BITS) + +#define TOF_ICC_BCH_DMA_ALIGN_BITS 8 +#define TOF_ICC_BCH_DMA_ALIGN (1 << TOF_ICC_BCH_DMA_ALIGN_BITS) + +/* this is a CPU-specific constant, but referred in the ICC spec. */ +#define TOF_ICC_CACHE_LINE_SIZE_BITS 8 +#define TOF_ICC_CACHE_LINE_SIZE (1 << TOF_ICC_CACHE_LINE_SIZE_BITS) + +#define TOF_ICC_TOQ_DESC_SIZE_BITS 5 +#define TOF_ICC_TOQ_DESC_SIZE (1 << TOF_ICC_TOQ_DESC_SIZE_BITS) +#define TOF_ICC_TCQ_DESC_SIZE_BITS 3 +#define TOF_ICC_TCQ_DESC_SIZE (1 << TOF_ICC_TCQ_DESC_SIZE_BITS) +#define TOF_ICC_TCQ_NLINE_BITS (TOF_ICC_CACHE_LINE_SIZE_BITS - TOF_ICC_TCQ_DESC_SIZE_BITS) +#define TOF_ICC_MRQ_DESC_SIZE_BITS 5 +#define TOF_ICC_MRQ_DESC_SIZE (1 << TOF_ICC_MRQ_DESC_SIZE_BITS) +#define TOF_ICC_PBQ_DESC_SIZE_BITS 3 +#define TOF_ICC_PBQ_DESC_SIZE (1 << TOF_ICC_PBQ_DESC_SIZE_BITS) +#define TOF_ICC_PRQ_DESC_SIZE_BITS 3 +#define TOF_ICC_PRQ_DESC_SIZE (1 << TOF_ICC_PRQ_DESC_SIZE_BITS) +#define TOF_ICC_PRQ_NLINE_BITS (TOF_ICC_CACHE_LINE_SIZE_BITS - TOF_ICC_PBQ_DESC_SIZE_BITS) + +#define TOF_ICC_TOQ_SIZE_NTYPES 6 +#define TOF_ICC_TOQ_SIZE_BITS(size) ((size) * 2 + 11) +#define TOF_ICC_TOQ_SIZE(size) (1 << TOF_ICC_TOQ_SIZE_BITS(size)) +#define TOF_ICC_TOQ_LEN(size) (TOF_ICC_TOQ_SIZE(size) * TOF_ICC_TOQ_DESC_SIZE) +#define TOF_ICC_TCQ_LEN(size) (TOF_ICC_TOQ_SIZE(size) * TOF_ICC_TCQ_DESC_SIZE) + +#define TOF_ICC_MRQ_SIZE_NTYPES 6 +#define TOF_ICC_MRQ_SIZE_BITS(size) ((size) * 2 + 11) +#define TOF_ICC_MRQ_SIZE(size) (1 << TOF_ICC_MRQ_SIZE_BITS(size)) +#define TOF_ICC_MRQ_LEN(size) (TOF_ICC_MRQ_SIZE(size) * TOF_ICC_MRQ_DESC_SIZE) + +#define TOF_ICC_PBQ_SIZE_NTYPES 6 +#define TOF_ICC_PBQ_SIZE_BITS(size) ((size) * 2 + 11) +#define TOF_ICC_PBQ_SIZE(size) (1 << TOF_ICC_PBQ_SIZE_BITS(size)) +#define TOF_ICC_PBQ_LEN(size) (TOF_ICC_PBQ_SIZE(size) * TOF_ICC_PBQ_DESC_SIZE) + +#define TOF_ICC_PRQ_SIZE_NTYPES 6 +#define TOF_ICC_PRQ_SIZE_BITS(size) ((size) * 2 + 11) +#define TOF_ICC_PRQ_SIZE(size) (1 << TOF_ICC_PRQ_SIZE_BITS(size)) +#define TOF_ICC_PRQ_LEN(size) (TOF_ICC_PRQ_SIZE(size) * TOF_ICC_PRQ_DESC_SIZE) + +#define TOF_ICC_STEERING_TABLE_ALIGN_BITS 8 +#define TOF_ICC_STEERING_TABLE_ALIGN (1 << TOF_ICC_STEERING_TABLE_ALIGN_BITS) +#define TOF_ICC_STEERING_SIZE_BITS 4 +#define TOF_ICC_STEERING_SIZE (1 << TOF_ICC_STEERING_SIZE_BITS) + +#define TOF_ICC_MB_TABLE_ALIGN_BITS 8 +#define TOF_ICC_MB_TABLE_ALIGN (1 << TOF_ICC_MB_TABLE_ALIGN_BITS) +#define TOF_ICC_MB_SIZE_BITS 4 +#define TOF_ICC_MB_SIZE (1 << TOF_ICC_MB_SIZE_BITS) +#define TOF_ICC_MB_PS_ENCODE(bits) ((bits) % 9 == 3 ? (bits) / 9 - 1 : (bits) / 13 + 3) + +#define TOF_ICC_MBPT_ALIGN_BITS 8 +#define TOF_ICC_MBPT_ALIGN (1 << TOF_ICC_MBPT_ALIGN_BITS) +#define TOF_ICC_MBPT_SIZE_BITS 3 +#define TOF_ICC_MBPT_SIZE (1 << TOF_ICC_MBPT_SIZE_BITS) + +#define TOF_ICC_X_BITS 5 +#define TOF_ICC_Y_BITS 5 +#define TOF_ICC_Z_BITS 5 +#define TOF_ICC_A_BITS 1 +#define TOF_ICC_B_BITS 2 +#define TOF_ICC_C_BITS 1 +#define TOF_ICC_MAX_X_SIZE (1 << TOF_ICC_X_BITS) +#define TOF_ICC_MAX_Y_SIZE (1 << TOF_ICC_Y_BITS) +#define TOF_ICC_MAX_Z_SIZE (1 << TOF_ICC_Z_BITS) +#define TOF_ICC_A_SIZE 2 +#define TOF_ICC_B_SIZE 3 +#define TOF_ICC_C_SIZE 2 +#define TOF_ICC_X_MASK ((1 << TOF_ICC_X_BITS) - 1) +#define TOF_ICC_Y_MASK ((1 << TOF_ICC_Y_BITS) - 1) +#define TOF_ICC_Z_MASK ((1 << TOF_ICC_Z_BITS) - 1) +#define TOF_ICC_A_MASK ((1 << TOF_ICC_A_BITS) - 1) +#define TOF_ICC_B_MASK ((1 << TOF_ICC_B_BITS) - 1) +#define TOF_ICC_C_MASK ((1 << TOF_ICC_C_BITS) - 1) +#define TOF_ICC_ABC_SIZE (TOF_ICC_A_SIZE * TOF_ICC_B_SIZE * TOF_ICC_C_SIZE) + +static inline int tof_icc_get_framelen(int len){ + len = TOF_ICC_RH_LEN + round_up(len + TOF_ICC_ECRC_LEN, TOF_ICC_FRAME_ALIGN); + if(len < TOF_ICC_FRAME_LEN_MIN){ + len = TOF_ICC_FRAME_LEN_MIN; + } + return len; +} + +/** Descriptors **/ +/** commands and rcodes **/ +enum { + TOF_ICC_TOQ_NOP, + TOF_ICC_TOQ_PUT, + TOF_ICC_TOQ_WRITE_PIGGYBACK_BUFFER, + TOF_ICC_TOQ_PUT_PIGGYBACK, + TOF_ICC_TOQ_GET, + TOF_ICC_TOQ_GETL, + TOF_ICC_TOQ_ATOMIC_READ_MODIFY_WRITE = 0xe, + TOF_ICC_TOQ_TRANSMIT_RAW_PACKET1 = 0x10, + TOF_ICC_TOQ_TRANSMIT_RAW_PACKET2, + TOF_ICC_TOQ_TRANSMIT_SYSTEM_PACKET1, + TOF_ICC_TOQ_TRANSMIT_SYSTEM_PACKET2, + + TOF_ICC_TOQ_NCOMMANDS, +}; + +enum { + TOF_ICC_MRQ_ATOMIC_READ_MODIFY_WRITE_HALFWAY_NOTICE = 0x1, + TOF_ICC_MRQ_ATOMIC_READ_MODIFY_WRITE_NOTICE, + TOF_ICC_MRQ_ATOMIC_READ_MODIFY_WRITE_REMOTE_ERROR, + TOF_ICC_MRQ_PUT_HALFWAY_NOTICE, + TOF_ICC_MRQ_PUT_LAST_HALFWAY_NOTICE, + TOF_ICC_MRQ_GET_HALFWAY_NOTICE, + TOF_ICC_MRQ_GET_LAST_HALFWAY_NOTICE, + TOF_ICC_MRQ_PUT_NOTICE, + TOF_ICC_MRQ_PUT_LAST_NOTICE, + TOF_ICC_MRQ_GET_NOTICE, + TOF_ICC_MRQ_GET_LAST_NOTICE, + TOF_ICC_MRQ_PUT_REMOTE_ERROR, + TOF_ICC_MRQ_PUT_LAST_REMOTE_ERROR, + TOF_ICC_MRQ_GET_REMOTE_ERROR, + TOF_ICC_MRQ_GET_LAST_REMOTE_ERROR, + + TOF_ICC_MRQ_NCOMMANDS, +}; + +enum { + TOF_ICC_PRQ_UNKNOWN_TLP, + TOF_ICC_PRQ_SYSTEM_TLP, + TOF_ICC_PRQ_ADDRESS_RANGE_EXCEPTION = 0x6, + TOF_ICC_PRQ_CQ_EXCEPTION = 0x8, + TOF_ICC_PRQ_ILLEGAL_TLP_FLAGS, + TOF_ICC_PRQ_ILLEGAL_TLP_LENGTH, + TOF_ICC_PRQ_CQ_ERROR = 0xc, +}; + +/** structures **/ +struct tof_icc_steering_entry { + uint64_t res1:6; + uint64_t readonly:1; + uint64_t enable:1; + uint64_t mbva:32; + uint64_t res2:8; + uint64_t mbid:16; + uint64_t length; /* for optimization */ +}; + +struct tof_icc_mb_entry { + uint64_t ps:3; + uint64_t res1:4; + uint64_t enable:1; + uint64_t ipa:32; + uint64_t res2:24; + uint64_t npage; /* for optimization */ +}; + +struct tof_icc_mbpt_entry { + uint64_t res1:7; + uint64_t enable:1; + uint64_t res2:4; + uint64_t ipa:28; + uint64_t res3:24; +}; + +struct tof_icc_cq_stag_offset { + uint64_t offset:40; + uint64_t stag:18; + uint64_t cqid:6; +}; + +struct tof_icc_toq_common_header1 { + uint8_t interrupt:1; + uint8_t res1:4; + uint8_t source_type:2; + uint8_t flip:1; + uint8_t command; + union { + uint8_t mtu; + struct { + uint8_t res:4; + uint8_t op:4; + } armw; + } mtuop; + uint8_t sps:4; + uint8_t pa:1; + uint8_t pb:2; + uint8_t pc:1; + uint8_t rx; + uint8_t ry; + uint8_t rz; + uint8_t ra:1; + uint8_t rb:2; + uint8_t rc:1; + uint8_t res3:1; + uint8_t ri:3; +}; + +struct tof_icc_toq_common_header2 { + uint8_t gap; + uint8_t s:1; + uint8_t r:1; + uint8_t q:1; + uint8_t p:1; + uint8_t res1:1; + uint8_t j:1; + uint8_t res2:2; + uint16_t edata; + union{ + struct { + uint32_t length:24; + uint32_t res:8; + } normal; + struct { + uint32_t length:6; + uint32_t res:26; + } piggyback; + } len; +}; + +struct tof_icc_toq_descriptor { + struct tof_icc_toq_common_header1 head1; + uint64_t res[3]; +}; + +struct tof_icc_toq_nop { + struct tof_icc_toq_common_header1 head1; + uint64_t res[3]; +}; + +struct tof_icc_toq_put { + struct tof_icc_toq_common_header1 head1; + struct tof_icc_toq_common_header2 head2; + struct tof_icc_cq_stag_offset remote; + struct tof_icc_cq_stag_offset local; +}; + +struct tof_icc_toq_write_piggyback_buffer { + struct tof_icc_toq_common_header1 head1; + uint64_t data[3]; +}; + +struct tof_icc_toq_put_piggyback { + struct tof_icc_toq_common_header1 head1; + struct tof_icc_toq_common_header2 head2; + struct tof_icc_cq_stag_offset remote; + uint64_t data; +}; + +struct tof_icc_toq_get { + struct tof_icc_toq_common_header1 head1; + struct tof_icc_toq_common_header2 head2; + struct tof_icc_cq_stag_offset remote; + struct tof_icc_cq_stag_offset local; +}; + +struct tof_icc_toq_atomic_read_modify_write { + struct tof_icc_toq_common_header1 head1; + struct tof_icc_toq_common_header2 head2; + struct tof_icc_cq_stag_offset remote; + uint64_t data; +}; + +struct tof_icc_toq_transmit_raw_packet1 { + struct tof_icc_toq_common_header1 head1; + uint8_t gap; + uint8_t res4[3]; + uint32_t length:12; + uint32_t res5:20; + uint64_t res6; + uint64_t pa:48; /* for optimization */ + uint64_t res7:16; +}; + +struct tof_icc_toq_transmit_raw_packet2 { + uint8_t interrupt:1; + uint8_t res1:4; + uint8_t source_type:2; + uint8_t flip:1; + uint8_t command; + uint8_t res2:7; + uint8_t e:1; + uint8_t res3[4]; + uint8_t port:5; + uint8_t res4:1; + uint8_t vc:2; + uint8_t gap; + uint8_t res5[3]; + uint32_t length:12; + uint32_t res6:20; + uint64_t res7; + uint64_t pa:48; /* for optimization */ + uint64_t res8:16; +}; + +struct tof_icc_toq_transmit_system_packet { + struct tof_icc_toq_common_header1 head1; /* rx, ry, rz should be rdx, rdy, rdz */ + uint8_t gap; + uint8_t res4[3]; + uint32_t length:12; + uint32_t res5:20; + uint64_t res6; + uint64_t pa:48; /* for optimization */ + uint64_t res7:16; +}; + +struct tof_icc_tcq_descriptor { + uint8_t res1:5; + uint8_t counter_unmatch:1; + uint8_t res2:1; + uint8_t flip:1; + uint8_t rcode; + uint8_t res3[2]; + union{ + struct { + uint32_t length:24; + uint32_t res:8; + } normal; + struct { + uint32_t length:6; + uint32_t res:26; + } piggyback; + } len; +}; + +struct tof_icc_mrq_common_header1 { + uint8_t res1:7; + uint8_t flip:1; + uint8_t id; + uint8_t rcode; + uint8_t res2:4; + uint8_t pa:1; + uint8_t pb:2; + uint8_t pc:1; + uint8_t x; + uint8_t y; + uint8_t z; + uint8_t a:1; + uint8_t b:2; + uint8_t c:1; + uint8_t res3:1; + uint8_t i:3; +}; + +struct tof_icc_mrq_common_header2 { + uint8_t res1; + uint8_t res2:4; + uint8_t initial:1; + uint8_t res3:3; + uint16_t edata; + union { + struct { + uint32_t length:11; + uint32_t res:21; + } normal; + struct { + uint32_t op:4; + uint32_t res:28; + } armw; + } lenop; +}; + +struct tof_icc_mrq_atomic_read_modify_write_halfway_notice { + struct tof_icc_mrq_common_header1 head1; + struct tof_icc_mrq_common_header2 head2; + struct tof_icc_cq_stag_offset local; + struct tof_icc_cq_stag_offset remote; +}; + +struct tof_icc_mrq_descriptor { + struct tof_icc_mrq_common_header1 head1; + struct tof_icc_mrq_common_header2 head2; + struct tof_icc_cq_stag_offset cso1; + struct tof_icc_cq_stag_offset cso2; +}; + +struct tof_icc_pbq_descriptor { + uint64_t res1:7; + uint64_t f:1; + uint64_t res2:3; + uint64_t pa:29; + uint64_t res3:24; +}; + +struct tof_icc_prq_descriptor { + uint64_t rcode:7; + uint64_t f:1; + uint64_t res1:3; + uint64_t pa:29; + uint64_t res2:8; + uint64_t w:1; + uint64_t res3:5; + uint64_t l:1; + uint64_t e:1; + uint64_t res4:8; +}; + + +/** Registers **/ +/* useful packed structures */ +struct tof_icc_reg_subnet { + uint64_t lz:6; + uint64_t sz:6; + uint64_t nz:6; + uint64_t ly:6; + uint64_t sy:6; + uint64_t ny:6; + uint64_t lx:6; + uint64_t sx:6; + uint64_t nx:6; + uint64_t res:10; +}; + +struct tof_icc_reg_bg_address { + uint32_t bgid:6; + uint32_t tni:3; + uint32_t c:1; + uint32_t b:2; + uint32_t a:1; + uint32_t z:5; + uint32_t y:5; + uint32_t x:5; + uint32_t pc:1; + uint32_t pb:2; + uint32_t pa:1; +}; + +/* relative offset of interrupt controller registers */ +#define TOF_ICC_IRQREG_IRR 0x0 +#define TOF_ICC_IRQREG_IMR 0x8 +#define TOF_ICC_IRQREG_IRC 0x10 +#define TOF_ICC_IRQREG_IMC 0x18 +#define TOF_ICC_IRQREG_ICL 0x20 + +/* TOFU REGISTERS */ +#define tof_icc_reg_pa 0x40000000 + +/* CQ */ +#define TOF_ICC_REG_CQ_PA(tni, cqid) (tof_icc_reg_pa + 0 + (tni) * 0x1000000 + (cqid) * 0x10000) +#define TOF_ICC_REG_CQ_TOQ_DIRECT_DESCRIPTOR 0x0 +#define TOF_ICC_REG_CQ_TOQ_FETCH_START 0x40 +#define TOF_ICC_REG_CQ_MRQ_FULL_POINTER 0x48 +#define TOF_ICC_REG_CQ_TOQ_PIGGYBACK_BUFFER0 0x50 +#define TOF_ICC_REG_CQ_TOQ_PIGGYBACK_BUFFER1 0x58 +#define TOF_ICC_REG_CQ_TOQ_PIGGYBACK_BUFFER2 0x60 +#define TOF_ICC_REG_CQ_TCQ_NUM_NOTICE 0x68 +#define TOF_ICC_REG_CQ_MRQ_NUM_NOTICE 0x70 +#define TOF_ICC_REG_CQ_TX_PAYLOAD_BYTE 0x78 +#define TOF_ICC_REG_CQ_RX_PAYLOAD_BYTE 0x80 +#define TOF_ICC_REG_CQ_DUMP_START 0x0 +#define TOF_ICC_REG_CQ_DUMP_END 0x88 + +/* BCH */ +#define TOF_ICC_REG_BCH_PA(tni, bgid) (tof_icc_reg_pa + 0x0000e00000 + (tni) * 0x1000000 + (bgid) * 0x10000) +#define TOF_ICC_REG_BCH_IDATA 0x800 +#define TOF_ICC_REG_BCH_READY 0x840 +#define TOF_ICC_REG_BCH_READY_STATE BIT(63) +#define TOF_ICC_REG_BCH_IGNORED_SIGNAL_COUNT 0x848 +#define TOF_ICC_REG_BCH_DUMP_START 0x800 +#define TOF_ICC_REG_BCH_DUMP_END 0x850 + +/* CQS */ +#define TOF_ICC_REG_CQS_PA(tni, cqid) (tof_icc_reg_pa + 0x0000400000 + (tni) * 0x1000000 + (cqid) * 0x10000) +#define TOF_ICC_REG_CQS_STATUS 0x0 +#define TOF_ICC_REG_CQS_STATUS_DESCRIPTOR_PROCESS_STOP BIT(63) +#define TOF_ICC_REG_CQS_STATUS_DESCRIPTOR_FETCH_STOP BIT(62) +#define TOF_ICC_REG_CQS_STATUS_BLANK_ENTRY_FLIP_BIT BIT(61) +#define TOF_ICC_REG_CQS_STATUS_CACHE_FLUSH_BUSY BIT(60) +#define TOF_ICC_REG_CQS_STATUS_CQ_ENABLE BIT(59) +#define TOF_ICC_REG_CQS_STATUS_SESSION_DEAD BIT(58) +#define TOF_ICC_REG_CQS_STATUS_SESSION_OFFSET_OVERFLOW BIT(57) +#define TOF_ICC_REG_CQS_STATUS_SESSION_OFFSET GENMASK(56, 32) +#define TOF_ICC_REG_CQS_STATUS_NEXT_DESCRIPTOR_OFFSET GENMASK(29, 5) +#define TOF_ICC_REG_CQS_ENABLE 0x8 +#define TOF_ICC_REG_CQS_CACHE_FLUSH 0x10 +#define TOF_ICC_REG_CQS_FETCH_STOP 0x18 +#define TOF_ICC_REG_CQS_MODE 0x20 +#define TOF_ICC_REG_CQS_MODE_SYSTEM BIT(63) +#define TOF_ICC_REG_CQS_MODE_TRP2_ENABLE BIT(62) +#define TOF_ICC_REG_CQS_MODE_TRP1_ENABLE BIT(61) +#define TOF_ICC_REG_CQS_MODE_SESSION BIT(60) +#define TOF_ICC_REG_CQS_MODE_SUBNET_NX GENMASK(53, 48) +#define TOF_ICC_REG_CQS_MODE_SUBNET_SX GENMASK(47, 42) +#define TOF_ICC_REG_CQS_MODE_SUBNET_LX GENMASK(41, 36) +#define TOF_ICC_REG_CQS_MODE_SUBNET_NY GENMASK(35, 30) +#define TOF_ICC_REG_CQS_MODE_SUBNET_SY GENMASK(29, 24) +#define TOF_ICC_REG_CQS_MODE_SUBNET_LY GENMASK(23, 18) +#define TOF_ICC_REG_CQS_MODE_SUBNET_NZ GENMASK(17, 12) +#define TOF_ICC_REG_CQS_MODE_SUBNET_SZ GENMASK(11, 6) +#define TOF_ICC_REG_CQS_MODE_SUBNET_LZ GENMASK(5, 0) +#define TOF_ICC_REG_CQS_GPID 0x28 +#define TOF_ICC_REG_CQS_TOQ_IPA 0x30 +#define TOF_ICC_REG_CQS_TOQ_SIZE 0x38 +#define TOF_ICC_REG_CQS_TCQ_IPA 0x40 +#define TOF_ICC_REG_CQS_TCQ_IPA_CACHE_INJECTION BIT(63) +#define TOF_ICC_REG_CQS_MRQ_IPA 0x48 +#define TOF_ICC_REG_CQS_MRQ_IPA_CACHE_INJECTION BIT(63) +#define TOF_ICC_REG_CQS_MRQ_SIZE 0x50 +#define TOF_ICC_REG_CQS_MRQ_MASK 0x58 +#define TOF_ICC_REG_CQS_TCQ_DESCRIPTOR_COALESCING_TIMER 0x60 +#define TOF_ICC_REG_CQS_MRQ_DESCRIPTOR_COALESCING_TIMER 0x68 +#define TOF_ICC_REG_CQS_MRQ_INTERRUPT_COALESCING_TIMER 0x70 +#define TOF_ICC_REG_CQS_MRQ_INTERRUPT_COALESCING_COUNT 0x78 +#define TOF_ICC_REG_CQS_TOQ_DIRECT_SOURCE_COUNT 0x80 +#define TOF_ICC_REG_CQS_TOQ_DIRECT_DESCRIPTOR_COUNT 0x88 +#define TOF_ICC_REG_CQS_MEMORY_BLOCK_TABLE_ENABLE 0x90 +#define TOF_ICC_REG_CQS_MEMORY_BLOCK_TABLE_IPA 0x98 +#define TOF_ICC_REG_CQS_MEMORY_BLOCK_TABLE_SIZE 0xa0 +#define TOF_ICC_REG_CQS_STEERING_TABLE_ENABLE 0xa8 +#define TOF_ICC_REG_CQS_STEERING_TABLE_IPA 0xb0 +#define TOF_ICC_REG_CQS_STEERING_TABLE_SIZE 0xb8 +#define TOF_ICC_REG_CQS_MRQ_INTERRUPT_MASK 0xc0 +#define TOF_ICC_REG_CQS_IRR 0xc8 +#define TOF_ICC_REG_CQS_IMR 0xd0 +#define TOF_ICC_REG_CQS_IRC 0xd8 +#define TOF_ICC_REG_CQS_IMC 0xe0 +#define TOF_ICC_REG_CQS_ICL 0xe8 +#define TOF_ICC_REG_CQS_DUMP_START 0x0 +#define TOF_ICC_REG_CQS_DUMP_END 0xf0 + +/* BGS */ +#define TOF_ICC_REG_BGS_PA(tni, bgid) (tof_icc_reg_pa + 0x0000800000 + (tni) * 0x1000000 + (bgid) * 0x10000) +#define TOF_ICC_REG_BGS_ENABLE 0x0 +#define TOF_ICC_REG_BGS_IRR 0x8 +#define TOF_ICC_REG_BGS_IMR 0x10 +#define TOF_ICC_REG_BGS_IRC 0x18 +#define TOF_ICC_REG_BGS_IMC 0x20 +#define TOF_ICC_REG_BGS_ICL 0x28 +#define TOF_ICC_REG_BGS_STATE 0x30 +#define TOF_ICC_REG_BGS_STATE_ENABLE BIT(0) +#define TOF_ICC_REG_BGS_EXCEPTION_INFO_GPID_UNMATCH 0x38 +#define TOF_ICC_REG_BGS_EXCEPTION_INFO_GPID_UNMATCH_BG_ADDRESS GENMASK(27, 0) +#define TOF_ICC_REG_BGS_EXCEPTION_INFO_ADDRESS_UNMATCH 0x40 +#define TOF_ICC_REG_BGS_EXCEPTION_INFO_ADDRESS_UNMATCH_BG_ADDRESS GENMASK(27, 0) +#define TOF_ICC_REG_BGS_SIGNAL_A 0x48 +#define TOF_ICC_REG_BGS_SIGNAL_A_SIG_RECV BIT(63) +#define TOF_ICC_REG_BGS_SIGNAL_A_TLP_RECV BIT(62) +#define TOF_ICC_REG_BGS_SIGNAL_A_SIG_SEND BIT(61) +#define TOF_ICC_REG_BGS_SIGNAL_A_OP_TYPE GENMASK(3, 0) +#define TOF_ICC_REG_BGS_SIGNAL_B 0x50 +#define TOF_ICC_REG_BGS_SIGNAL_B_SIG_RECV BIT(63) +#define TOF_ICC_REG_BGS_SIGNAL_B_TLP_RECV BIT(62) +#define TOF_ICC_REG_BGS_SIGNAL_B_SIG_SEND BIT(61) +#define TOF_ICC_REG_BGS_SIGNAL_B_OP_TYPE GENMASK(3, 0) +#define TOF_ICC_REG_BGS_SIGNAL_MASK 0x58 +#define TOF_ICC_REG_BGS_SIGNAL_MASK_SIG_RECV BIT(63) +#define TOF_ICC_REG_BGS_SIGNAL_MASK_TLP_RECV BIT(62) +#define TOF_ICC_REG_BGS_SIGNAL_MASK_SIG_SEND BIT(61) +#define TOF_ICC_REG_BGS_SIGNAL_MASK_TLP_SEND BIT(60) +#define TOF_ICC_REG_BGS_LOCAL_LINK 0x60 +#define TOF_ICC_REG_BGS_LOCAL_LINK_BGID_RECV GENMASK(37, 32) +#define TOF_ICC_REG_BGS_LOCAL_LINK_BGID_SEND GENMASK(5, 0) +#define TOF_ICC_REG_BGS_REMOTE_LINK 0x68 +#define TOF_ICC_REG_BGS_REMOTE_LINK_BG_ADDRESS_RECV GENMASK(59, 32) +#define TOF_ICC_REG_BGS_REMOTE_LINK_BG_ADDRESS_SEND GENMASK(31, 0) +#define TOF_ICC_REG_BGS_SUBNET_SIZE 0x70 +#define TOF_ICC_REG_BGS_GPID_BSEQ 0x78 +#define TOF_ICC_REG_BGS_DATA_A0 0x108 +#define TOF_ICC_REG_BGS_DATA_AE 0x178 +#define TOF_ICC_REG_BGS_DATA_B0 0x188 +#define TOF_ICC_REG_BGS_DATA_BE 0x1f8 +#define TOF_ICC_REG_BGS_BCH_MASK 0x800 +#define TOF_ICC_REG_BGS_BCH_MASK_MASK BIT(63) +#define TOF_ICC_REG_BGS_BCH_MASK_STATUS 0x808 +#define TOF_ICC_REG_BGS_BCH_MASK_STATUS_RUN BIT(63) +#define TOF_ICC_REG_BGS_BCH_NOTICE_IPA 0x810 +#define TOF_ICC_REG_BGS_DUMP_START 0x0 +#define TOF_ICC_REG_BGS_DUMP_END 0x818 + +/* TNI */ +#define TOF_ICC_REG_TNI_PA(tni) (tof_icc_reg_pa + 0x0000c00000 + (tni) * 0x1000000) +#define TOF_ICC_REG_TNI_IRR 0x8 +#define TOF_ICC_REG_TNI_IMR 0x10 +#define TOF_ICC_REG_TNI_IRC 0x18 +#define TOF_ICC_REG_TNI_IMC 0x20 +#define TOF_ICC_REG_TNI_ICL 0x28 +#define TOF_ICC_REG_TNI_STATE 0x30 +#define TOF_ICC_REG_TNI_STATE_MASK GENMASK(1, 0) +#define TOF_ICC_REG_TNI_STATE_DISABLE 0 +#define TOF_ICC_REG_TNI_STATE_NORMAL 2 +#define TOF_ICC_REG_TNI_STATE_ERROR 3 +#define TOF_ICC_REG_TNI_ENABLE 0x38 +#define TOF_ICC_REG_TNI_CQ_PRESENT 0x40 +#define TOF_ICC_REG_TNI_EXCEPTION_INFO_INACTIVE_BG 0x48 +#define TOF_ICC_REG_TNI_EXCEPTION_INFO_INACTIVE_BG_DEST_BG GENMASK(37, 32) +#define TOF_ICC_REG_TNI_EXCEPTION_INFO_INACTIVE_BG_SOURCE_BG_ADDRESS GENMASK(27, 0) +#define TOF_ICC_REG_TNI_PRQ_FULL_POINTER 0x100 +#define TOF_ICC_REG_TNI_PBQ_PA 0x108 +#define TOF_ICC_REG_TNI_PBQ_SIZE 0x110 +#define TOF_ICC_REG_TNI_PRQ_PA 0x118 +#define TOF_ICC_REG_TNI_PRQ_PA_CACHE_INJECTION BIT(63) +#define TOF_ICC_REG_TNI_PRQ_SIZE 0x120 +#define TOF_ICC_REG_TNI_PRQ_MASK 0x128 +#define TOF_ICC_REG_TNI_PRQ_ENTRY_COALESCING_TIMER 0x130 +#define TOF_ICC_REG_TNI_PRQ_INTERRUPT_COALESCING_TIMER 0x138 +#define TOF_ICC_REG_TNI_PRQ_INTERRUPT_COALESCING_COUNT 0x140 +#define TOF_ICC_REG_TNI_SEND_COUNT 0x148 +#define TOF_ICC_REG_TNI_NO_SEND_COUNT 0x150 +#define TOF_ICC_REG_TNI_BLOCK_SEND_COUNT 0x158 +#define TOF_ICC_REG_TNI_RECEIVE_COUNT 0x160 +#define TOF_ICC_REG_TNI_NO_RECEIVE_COUNT 0x168 +#define TOF_ICC_REG_TNI_NUM_SEND_TLP 0x170 +#define TOF_ICC_REG_TNI_BYTE_SEND_TLP 0x178 +#define TOF_ICC_REG_TNI_NUM_SEND_SYSTEM_TLP 0x180 +#define TOF_ICC_REG_TNI_NUM_RECEIVE_TLP 0x188 +#define TOF_ICC_REG_TNI_BYTE_RECEIVE_TLP 0x190 +#define TOF_ICC_REG_TNI_NUM_RECEIVE_NULLIFIED_TLP 0x198 +#define TOF_ICC_REG_TNI_RX_NUM_UNKNOWN_TLP 0x1a0 +#define TOF_ICC_REG_TNI_RX_NUM_SYSTEM_TLP 0x1a8 +#define TOF_ICC_REG_TNI_RX_NUM_EXCEPTION_TLP 0x1b0 +#define TOF_ICC_REG_TNI_RX_NUM_DISCARD_UNKNOWN_TLP 0x1b8 +#define TOF_ICC_REG_TNI_RX_NUM_DISCARD_SYSTEM_TLP 0x1c0 +#define TOF_ICC_REG_TNI_RX_NUM_DISCARD_EXCEPTION_TLP 0x1c8 +#define TOF_ICC_REG_TNI_DUMP_START 0x8 +#define TOF_ICC_REG_TNI_DUMP_END 0x1d0 + +/* Port */ +#define TOF_ICC_REG_PORT_PA(port) (tof_icc_reg_pa + 0x0006000000 + (port) * 0x1000) +#define TOF_ICC_REG_PORT_TX_VC0_ZERO_CREDIT_COUNT 0x0 +#define TOF_ICC_REG_PORT_TX_VC1_ZERO_CREDIT_COUNT 0x8 +#define TOF_ICC_REG_PORT_TX_VC2_ZERO_CREDIT_COUNT 0x10 +#define TOF_ICC_REG_PORT_TX_VC3_ZERO_CREDIT_COUNT 0x18 +#define TOF_ICC_REG_PORT_FREE_RUN_COUNT 0x80 +#define TOF_ICC_REG_PORT_NUM_SEND_DLLP 0xc0 +#define TOF_ICC_REG_PORT_NUM_SEND_TLP 0xc8 +#define TOF_ICC_REG_PORT_BYTE_SEND_TLP 0xd0 +#define TOF_ICC_REG_PORT_NUM_SEND_SYSTEM_TLP 0xd8 +#define TOF_ICC_REG_PORT_NUM_SEND_NULLIFIED_TLP 0xe0 +#define TOF_ICC_REG_PORT_NUM_TX_DISCARD_SYSTEM_TLP 0xe8 +#define TOF_ICC_REG_PORT_NUM_TX_DISCARD_NORMAL_TLP 0xf0 +#define TOF_ICC_REG_PORT_NUM_TX_FILTERED_NORMAL_TLP 0xf8 +#define TOF_ICC_REG_PORT_NUM_VIRTUAL_CUT_THROUGH_TLP 0x100 +#define TOF_ICC_REG_PORT_NUM_GENERATE_NULLIFIED_TLP 0x108 +#define TOF_ICC_REG_PORT_NUM_RECEIVE_DLLP 0x110 +#define TOF_ICC_REG_PORT_NUM_RECEIVE_TLP 0x118 +#define TOF_ICC_REG_PORT_BYTE_RECEIVE_TLP 0x120 +#define TOF_ICC_REG_PORT_NUM_RECEIVE_SYSTEM_TLP 0x128 +#define TOF_ICC_REG_PORT_NUM_RECEIVE_NULLIFIED_TLP 0x130 +#define TOF_ICC_REG_PORT_NUM_RX_DISCARD_SYSTEM_TLP 0x138 +#define TOF_ICC_REG_PORT_NUM_RX_DISCARD_NORMAL_TLP 0x140 +#define TOF_ICC_REG_PORT_NUM_RX_FILTERED_NORMAL_TLP 0x158 +#define TOF_ICC_REG_PORT_NUM_RX_DISCARD_NULLIFIED_TLP 0x160 +#define TOF_ICC_REG_PORT_FRAME_LCRC_ERROR_COUNT 0x170 +#define TOF_ICC_REG_PORT_TX_RETRY_BUFFER_CE_COUNT 0x180 +#define TOF_ICC_REG_PORT_RX_VC_BUFFER_CE_COUNT 0x188 +#define TOF_ICC_REG_PORT_XB_CE_COUNT 0x190 +#define TOF_ICC_REG_PORT_ACK_NACK_TIME_OUT_COUNT 0x198 +#define TOF_ICC_REG_PORT_SLICE0_FCS_ERROR_COUNT 0x1a0 +#define TOF_ICC_REG_PORT_SLICE1_FCS_ERROR_COUNT 0x1a8 +#define TOF_ICC_REG_PORT_DUMP_START 0x0 +#define TOF_ICC_REG_PORT_DUMP_END 0x1b0 + +/* XB */ +#define TOF_ICC_REG_XB_PA (tof_icc_reg_pa + 0x000600f000) +#define TOF_ICC_REG_XB_STQ_ENABLE 0x0 +#define TOF_ICC_REG_XB_STQ_UPDATE_INTERVAL 0x8 +#define TOF_ICC_REG_XB_STQ_PA 0x10 +#define TOF_ICC_REG_XB_STQ_SIZE 0x18 +#define TOF_ICC_REG_XB_STQ_NEXT_OFFSET 0x20 +#define TOF_ICC_REG_XB_DUMP_START 0x0 +#define TOF_ICC_REG_XB_DUMP_END 0x28 + +#define TOF_ICC_XB_TC_DATA_CYCLE_COUNT(tni) ((tni) * 0x10 + 0x0) +#define TOF_ICC_XB_TC_WAIT_CYCLE_COUNT(tni) ((tni) * 0x10 + 0x8) +#define TOF_ICC_XB_TD_DATA_CYCLE_COUNT(tnr) ((tnr) * 0x10 + 0x60) +#define TOF_ICC_XB_TD_WAIT_CYCLE_COUNT(tnr) ((tnr) * 0x10 + 0x68) + +/* Tofu */ +#define TOF_ICC_REG_TOFU_PA (tof_icc_reg_pa + 0x0007000000) +#define TOF_ICC_REG_TOFU_NODE_ADDRESS 0x0 +#define TOF_ICC_REG_TOFU_NODE_ADDRESS_X GENMASK(22, 18) +#define TOF_ICC_REG_TOFU_NODE_ADDRESS_Y GENMASK(17, 13) +#define TOF_ICC_REG_TOFU_NODE_ADDRESS_Z GENMASK(12, 8) +#define TOF_ICC_REG_TOFU_NODE_ADDRESS_A BIT(7) +#define TOF_ICC_REG_TOFU_NODE_ADDRESS_B GENMASK(6, 5) +#define TOF_ICC_REG_TOFU_NODE_ADDRESS_C BIT(4) +#define TOF_ICC_REG_TOFU_PORT_SETTING 0x8 +#define TOF_ICC_REG_TOFU_TD_TLP_FILTER(tnr) ((tnr) * 0x10 + 0x10) +#define TOF_ICC_REG_TOFU_TD_SETTINGS(tnr) ((tnr) * 0x10 + 0x18) +#define TOF_ICC_REG_TOFU_TNR_MSI_BASE 0xc0 +#define TOF_ICC_REG_TOFU_TNR_IRR 0xc8 +#define TOF_ICC_REG_TOFU_TNR_IMR 0xd0 +#define TOF_ICC_REG_TOFU_TNR_IRC 0xd8 +#define TOF_ICC_REG_TOFU_TNR_IMC 0xe0 +#define TOF_ICC_REG_TOFU_TNR_ICL 0xe8 +#define TOF_ICC_REG_TOFU_TNI_VMS(tni, vmsid) ((tni) * 0x100 + (vmsid) * 0x8 + 0x100) +#define TOF_ICC_REG_TOFU_TNI_VMS_CQ00(tni) ((tni) * 0x100 + 0x180) +#define TOF_ICC_REG_TOFU_TNI_VMS_BG00(tni) ((tni) * 0x100 + 0x1a0) +#define TOF_ICC_REG_TOFU_TNI_VMS_BG16(tni) ((tni) * 0x100 + 0x1a8) +#define TOF_ICC_REG_TOFU_TNI_VMS_BG32(tni) ((tni) * 0x100 + 0x1b0) +#define TOF_ICC_REG_TOFU_TNI_MSI_BASE(tni) ((tni) * 0x100 + 0x1c0) +#define TOF_ICC_REG_TOFU_DUMP_START 0x0 +#define TOF_ICC_REG_TOFU_DUMP_END 0x6c8 + +/** Interrupts **/ +#define TOF_ICC_IRQ_CQS_TOQ_READ_EXCEPTION BIT(0) +#define TOF_ICC_IRQ_CQS_TOQ_DIRECT_DESCRIPTOR_EXCEPTION BIT(1) +#define TOF_ICC_IRQ_CQS_TOQ_MARKED_UE BIT(2) +#define TOF_ICC_IRQ_CQS_TCQ_WRITE_EXCEPTION BIT(3) +#define TOF_ICC_IRQ_CQS_TOQ_SOURCE_TYPE_EXCEPTION BIT(4) +#define TOF_ICC_IRQ_CQS_TCQ_WRITE_ACKNOWLEDGE BIT(5) +#define TOF_ICC_IRQ_CQS_MRQ_WRITE_ACKNOWLEDGE BIT(7) +#define TOF_ICC_IRQ_CQS_MRQ_WRITE_EXCEPTION BIT(8) +#define TOF_ICC_IRQ_CQS_MRQ_OVERFLOW BIT(9) +#define TOF_ICC_IRQ_CQS_STEERING_READ_EXCEPTION BIT(36) +#define TOF_ICC_IRQ_CQS_MB_READ_EXCEPTION BIT(38) +#define TOF_ICC_IRQ_CQS_PAYLOAD_READ_EXCEPTION BIT(39) +#define TOF_ICC_IRQ_CQS_PAYLOAD_WRITE_EXCEPTION BIT(40) +/* Just for convinience of irr value, no exists CQS CACHEFLUSH_TIMEOUT interrupt */ +#define TOF_ICC_DUMMY_IRQ_CQS_CACHEFLUSH_TIMEOUT BIT(63) + +#define TOF_ICC_IRQ_BGS_NODE_ADDRESS_UNMATCH BIT(0) +#define TOF_ICC_IRQ_BGS_BG_RECV_ADDRESS_EXCEPTION BIT(1) +#define TOF_ICC_IRQ_BGS_BG_SEND_ADDRESS_EXCEPTION BIT(2) +#define TOF_ICC_IRQ_BGS_GPID_UNMATCH BIT(3) +#define TOF_ICC_IRQ_BGS_BSEQ_UNMATCH BIT(4) +#define TOF_ICC_IRQ_BGS_SIGNAL_STATE_ERROR BIT(5) +#define TOF_ICC_IRQ_BGS_SYNCHRONIZATION_ACKNOWLEDGE BIT(24) +#define TOF_ICC_IRQ_BGS_ERROR_SYNCHRONIZATION_ACKNOWLEDGE BIT(25) +#define TOF_ICC_IRQ_BGS_DMA_COMPLETION_EXCEPTION BIT(26) + +#define TOF_ICC_IRQ_TNI_PBQ_READ_EXCEPTION BIT(0) +#define TOF_ICC_IRQ_TNI_PBQ_MARKED_UE BIT(1) +#define TOF_ICC_IRQ_TNI_PBQ_UNDERFLOW BIT(2) +#define TOF_ICC_IRQ_TNI_PRQ_PACKET_DISCARD BIT(3) +#define TOF_ICC_IRQ_TNI_PRQ_WRITE_ACKNOWLEDGE BIT(4) +#define TOF_ICC_IRQ_TNI_PRQ_WRITE_EXCEPTION BIT(5) +#define TOF_ICC_IRQ_TNI_PRQ_OVERFLOW BIT(6) +#define TOF_ICC_IRQ_TNI_INACTIVE_BG BIT(16) +#define TOF_ICC_IRQ_TNI_STAGE2_TRANSLATION_FAULT BIT(32) + +#define TOF_ICC_IRQ_TNR_TNR0_RX_FILTER_OUT BIT(0) +#define TOF_ICC_IRQ_TNR_TNR0_TX_FILTER_OUT BIT(1) +#define TOF_ICC_IRQ_TNR_TNR0_PORT_ERROR BIT(2) +#define TOF_ICC_IRQ_TNR_TNR0_DATELINE_ERROR BIT(3) +#define TOF_ICC_IRQ_TNR_TNR0_ROUTING_ERROR BIT(4) +#define TOF_ICC_IRQ_TNR_TNR1_RX_FILTER_OUT BIT(6) +#define TOF_ICC_IRQ_TNR_TNR1_TX_FILTER_OUT BIT(7) +#define TOF_ICC_IRQ_TNR_TNR1_PORT_ERROR BIT(8) +#define TOF_ICC_IRQ_TNR_TNR1_DATELINE_ERROR BIT(9) +#define TOF_ICC_IRQ_TNR_TNR1_ROUTING_ERROR BIT(10) +#define TOF_ICC_IRQ_TNR_TNR2_RX_FILTER_OUT BIT(12) +#define TOF_ICC_IRQ_TNR_TNR2_TX_FILTER_OUT BIT(13) +#define TOF_ICC_IRQ_TNR_TNR2_PORT_ERROR BIT(14) +#define TOF_ICC_IRQ_TNR_TNR2_DATELINE_ERROR BIT(15) +#define TOF_ICC_IRQ_TNR_TNR2_ROUTING_ERROR BIT(16) +#define TOF_ICC_IRQ_TNR_TNR3_RX_FILTER_OUT BIT(18) +#define TOF_ICC_IRQ_TNR_TNR3_TX_FILTER_OUT BIT(19) +#define TOF_ICC_IRQ_TNR_TNR3_PORT_ERROR BIT(20) +#define TOF_ICC_IRQ_TNR_TNR3_DATELINE_ERROR BIT(21) +#define TOF_ICC_IRQ_TNR_TNR3_ROUTING_ERROR BIT(22) +#define TOF_ICC_IRQ_TNR_TNR4_RX_FILTER_OUT BIT(24) +#define TOF_ICC_IRQ_TNR_TNR4_TX_FILTER_OUT BIT(25) +#define TOF_ICC_IRQ_TNR_TNR4_PORT_ERROR BIT(26) +#define TOF_ICC_IRQ_TNR_TNR4_DATELINE_ERROR BIT(27) +#define TOF_ICC_IRQ_TNR_TNR4_ROUTING_ERROR BIT(28) +#define TOF_ICC_IRQ_TNR_TNR5_RX_FILTER_OUT BIT(30) +#define TOF_ICC_IRQ_TNR_TNR5_TX_FILTER_OUT BIT(31) +#define TOF_ICC_IRQ_TNR_TNR5_PORT_ERROR BIT(32) +#define TOF_ICC_IRQ_TNR_TNR5_DATELINE_ERROR BIT(33) +#define TOF_ICC_IRQ_TNR_TNR5_ROUTING_ERROR BIT(34) +#define TOF_ICC_IRQ_TNR_TNR6_RX_FILTER_OUT BIT(36) +#define TOF_ICC_IRQ_TNR_TNR6_TX_FILTER_OUT BIT(37) +#define TOF_ICC_IRQ_TNR_TNR6_PORT_ERROR BIT(38) +#define TOF_ICC_IRQ_TNR_TNR6_DATELINE_ERROR BIT(39) +#define TOF_ICC_IRQ_TNR_TNR6_ROUTING_ERROR BIT(40) +#define TOF_ICC_IRQ_TNR_TNR7_RX_FILTER_OUT BIT(42) +#define TOF_ICC_IRQ_TNR_TNR7_TX_FILTER_OUT BIT(43) +#define TOF_ICC_IRQ_TNR_TNR7_PORT_ERROR BIT(44) +#define TOF_ICC_IRQ_TNR_TNR7_DATELINE_ERROR BIT(45) +#define TOF_ICC_IRQ_TNR_TNR7_ROUTING_ERROR BIT(46) +#define TOF_ICC_IRQ_TNR_TNR8_RX_FILTER_OUT BIT(48) +#define TOF_ICC_IRQ_TNR_TNR8_TX_FILTER_OUT BIT(49) +#define TOF_ICC_IRQ_TNR_TNR8_PORT_ERROR BIT(50) +#define TOF_ICC_IRQ_TNR_TNR8_DATELINE_ERROR BIT(51) +#define TOF_ICC_IRQ_TNR_TNR8_ROUTING_ERROR BIT(52) +#define TOF_ICC_IRQ_TNR_TNR9_RX_FILTER_OUT BIT(54) +#define TOF_ICC_IRQ_TNR_TNR9_TX_FILTER_OUT BIT(55) +#define TOF_ICC_IRQ_TNR_TNR9_PORT_ERROR BIT(56) +#define TOF_ICC_IRQ_TNR_TNR9_DATELINE_ERROR BIT(57) +#define TOF_ICC_IRQ_TNR_TNR9_ROUTING_ERROR BIT(58) + +#endif + +/* vim: set noet ts=8 sw=8 sts=0 tw=0 : */ + diff --git a/kernel/include/tofu/tof_uapi.h b/kernel/include/tofu/tof_uapi.h new file mode 100644 index 00000000..8d9fec80 --- /dev/null +++ b/kernel/include/tofu/tof_uapi.h @@ -0,0 +1,319 @@ +#ifndef _TOF_UAPI_H_ +#define _TOF_UAPI_H_ + +#include +#include + +enum tof_sig_errno_cq { + TOF_TOQ_DIRECT_DESCRIPTOR_EXCEPTION, + TOF_TOQ_SOURCE_TYPE_EXCEPTION, + TOF_MRQ_OVERFLOW, + TOF_CQS_CACHEFLUSH_TIMEOUT, +}; + +enum tof_sig_errno_bg { + TOF_NODE_ADDRESS_UNMATCH, + TOF_BSEQ_UNMATCH, + TOF_SIGNAL_STATE_ERROR, + TOF_ERROR_SYNCHRONIZATION_ACKNOWLEDGE, +}; + +#define TOF_UAPI_VERSION 0x2a00 + +struct tof_init_cq { + uint16_t version; + uint8_t session_mode; + uint8_t toq_size; + uint8_t mrq_size; + uint8_t num_stag; + uint8_t tcq_cinj; + uint8_t mrq_cinj; + void *toq_mem; + void *tcq_mem; + void *mrq_mem; +}; + +struct tof_alloc_stag { + uint32_t flags; + int stag; + uint64_t offset; + void *va; + uint64_t len; +}; + +struct tof_free_stags { + uint16_t num; + int *stags; +}; + +struct tof_addr { + uint8_t pa; + uint8_t pb; + uint8_t pc; + uint8_t x; + uint8_t y; + uint8_t z; + uint8_t a; + uint8_t b; + uint8_t c; +}; + +struct tof_set_bg { + int tni; + int gate; + int source_lgate; + struct tof_addr source_raddr; + int source_rtni; + int source_rgate; + int dest_lgate; + struct tof_addr dest_raddr; + int dest_rtni; + int dest_rgate; +}; + +struct tof_enable_bch { + void *addr; + int bseq; + int num; + struct tof_set_bg *bgs; +}; + +struct tof_set_subnet { + int res0; + int res1; + uint8_t nx; + uint8_t sx; + uint8_t lx; + uint8_t ny; + uint8_t sy; + uint8_t ly; + uint8_t nz; + uint8_t sz; + uint8_t lz; +}; + +struct tof_reg_user { + uid_t uid; + uint32_t gpid; + struct tof_set_subnet subnet; + uint64_t *cqmask; + uint64_t *bgmask; +}; + +struct tof_notify_linkdown { + int num; + struct { + uint8_t x; + uint8_t y; + uint8_t z; + uint8_t a; + uint8_t b; + uint8_t c; + uint16_t ports; + } *items; +}; + +struct tof_get_port_stat { + int port_no; + uint64_t mask; + uint64_t pa[31]; +}; + +struct tof_get_cq_stat { + int tni; + int cqid; + uint64_t txbyte; + uint64_t rxbyte; +}; + +struct tof_load_register { + uint64_t pa; + uint64_t len; + void *buf; +}; + +struct tof_load_resource { + uint64_t rsc_id; + uint64_t offset; + uint64_t len; + void *buf; +}; + +union tof_trans_table_bitfield { + struct { + uint64_t start:36; + uint64_t len:27; + uint64_t ps_code:1; + } bits; + uint64_t atomic; +}; + +struct tof_trans_table { + union tof_trans_table_bitfield steering; + union tof_trans_table_bitfield mbpt; +}; + +void tof_utofu_set_linkdown_callback(void (*callback)(int, const void *)); +void tof_utofu_unset_linkdown_callback(void); + +#define TOF_MMAP_CQ_REGISTER 0 +#define TOF_MMAP_CQ_TRANSTABLE (PAGE_SIZE) +#define TOF_MMAP_BCH_REGISTER 0 +#define TOF_MMAP_XB_STQ 0 + +#define TOF_ST_RDWR 0x0 +#define TOF_ST_RDONLY 0x1 +#define TOF_ST_LPG 0x2 + +#define TOF_STAG_TRANS_PS_CODE_64KB 0 +#define TOF_STAG_TRANS_PS_CODE_2MB 1 + +#define TOF_IOC_MAGIC 'd' +#define TOF_IOCTL_INIT_CQ _IOWR(TOF_IOC_MAGIC, 0, long) +#define TOF_IOCTL_ALLOC_STAG _IOWR(TOF_IOC_MAGIC, 1, long) +#define TOF_IOCTL_FREE_STAGS _IOWR(TOF_IOC_MAGIC, 2, long) +#define TOF_IOCTL_ENABLE_BCH _IOWR(TOF_IOC_MAGIC, 3, long) +#define TOF_IOCTL_DISABLE_BCH _IOWR(TOF_IOC_MAGIC, 4, long) +#define TOF_IOCTL_SET_RT_SIGNAL _IOWR(TOF_IOC_MAGIC, 5, long) +#define TOF_IOCTL_SET_SUBNET _IOWR(TOF_IOC_MAGIC, 6, long) +#define TOF_IOCTL_REG_USER _IOWR(TOF_IOC_MAGIC, 7, long) +#define TOF_IOCTL_NOTIFY_LINKDOWN _IOWR(TOF_IOC_MAGIC, 8, long) +#define TOF_IOCTL_GET_PORT_STAT _IOWR(TOF_IOC_MAGIC, 9, long) +#define TOF_IOCTL_GET_CQ_STAT _IOWR(TOF_IOC_MAGIC, 10, long) +#define TOF_IOCTL_LOAD_REGISTER _IOWR(TOF_IOC_MAGIC, 11, long) +#define TOF_IOCTL_LOAD_RESOURCE _IOWR(TOF_IOC_MAGIC, 12, long) + +enum { + /* TOQ (0 - 71) */ + TOF_RSC_TNI0_TOQ0 = 0, TOF_RSC_TNI0_TOQ1, TOF_RSC_TNI0_TOQ2, TOF_RSC_TNI0_TOQ3, + TOF_RSC_TNI0_TOQ4, TOF_RSC_TNI0_TOQ5, TOF_RSC_TNI0_TOQ6, TOF_RSC_TNI0_TOQ7, + TOF_RSC_TNI0_TOQ8, TOF_RSC_TNI0_TOQ9, TOF_RSC_TNI0_TOQ10, TOF_RSC_TNI0_TOQ11, + TOF_RSC_TNI1_TOQ0, TOF_RSC_TNI1_TOQ1, TOF_RSC_TNI1_TOQ2, TOF_RSC_TNI1_TOQ3, + TOF_RSC_TNI1_TOQ4, TOF_RSC_TNI1_TOQ5, TOF_RSC_TNI1_TOQ6, TOF_RSC_TNI1_TOQ7, + TOF_RSC_TNI1_TOQ8, TOF_RSC_TNI1_TOQ9, TOF_RSC_TNI1_TOQ10, TOF_RSC_TNI1_TOQ11, + TOF_RSC_TNI2_TOQ0, TOF_RSC_TNI2_TOQ1, TOF_RSC_TNI2_TOQ2, TOF_RSC_TNI2_TOQ3, + TOF_RSC_TNI2_TOQ4, TOF_RSC_TNI2_TOQ5, TOF_RSC_TNI2_TOQ6, TOF_RSC_TNI2_TOQ7, + TOF_RSC_TNI2_TOQ8, TOF_RSC_TNI2_TOQ9, TOF_RSC_TNI2_TOQ10, TOF_RSC_TNI2_TOQ11, + TOF_RSC_TNI3_TOQ0, TOF_RSC_TNI3_TOQ1, TOF_RSC_TNI3_TOQ2, TOF_RSC_TNI3_TOQ3, + TOF_RSC_TNI3_TOQ4, TOF_RSC_TNI3_TOQ5, TOF_RSC_TNI3_TOQ6, TOF_RSC_TNI3_TOQ7, + TOF_RSC_TNI3_TOQ8, TOF_RSC_TNI3_TOQ9, TOF_RSC_TNI3_TOQ10, TOF_RSC_TNI3_TOQ11, + TOF_RSC_TNI4_TOQ0, TOF_RSC_TNI4_TOQ1, TOF_RSC_TNI4_TOQ2, TOF_RSC_TNI4_TOQ3, + TOF_RSC_TNI4_TOQ4, TOF_RSC_TNI4_TOQ5, TOF_RSC_TNI4_TOQ6, TOF_RSC_TNI4_TOQ7, + TOF_RSC_TNI4_TOQ8, TOF_RSC_TNI4_TOQ9, TOF_RSC_TNI4_TOQ10, TOF_RSC_TNI4_TOQ11, + TOF_RSC_TNI5_TOQ0, TOF_RSC_TNI5_TOQ1, TOF_RSC_TNI5_TOQ2, TOF_RSC_TNI5_TOQ3, + TOF_RSC_TNI5_TOQ4, TOF_RSC_TNI5_TOQ5, TOF_RSC_TNI5_TOQ6, TOF_RSC_TNI5_TOQ7, + TOF_RSC_TNI5_TOQ8, TOF_RSC_TNI5_TOQ9, TOF_RSC_TNI5_TOQ10, TOF_RSC_TNI5_TOQ11, + + /* TOQ (72 - 143) */ + TOF_RSC_TNI0_TCQ0, TOF_RSC_TNI0_TCQ1, TOF_RSC_TNI0_TCQ2, TOF_RSC_TNI0_TCQ3, + TOF_RSC_TNI0_TCQ4, TOF_RSC_TNI0_TCQ5, TOF_RSC_TNI0_TCQ6, TOF_RSC_TNI0_TCQ7, + TOF_RSC_TNI0_TCQ8, TOF_RSC_TNI0_TCQ9, TOF_RSC_TNI0_TCQ10, TOF_RSC_TNI0_TCQ11, + TOF_RSC_TNI1_TCQ0, TOF_RSC_TNI1_TCQ1, TOF_RSC_TNI1_TCQ2, TOF_RSC_TNI1_TCQ3, + TOF_RSC_TNI1_TCQ4, TOF_RSC_TNI1_TCQ5, TOF_RSC_TNI1_TCQ6, TOF_RSC_TNI1_TCQ7, + TOF_RSC_TNI1_TCQ8, TOF_RSC_TNI1_TCQ9, TOF_RSC_TNI1_TCQ10, TOF_RSC_TNI1_TCQ11, + TOF_RSC_TNI2_TCQ0, TOF_RSC_TNI2_TCQ1, TOF_RSC_TNI2_TCQ2, TOF_RSC_TNI2_TCQ3, + TOF_RSC_TNI2_TCQ4, TOF_RSC_TNI2_TCQ5, TOF_RSC_TNI2_TCQ6, TOF_RSC_TNI2_TCQ7, + TOF_RSC_TNI2_TCQ8, TOF_RSC_TNI2_TCQ9, TOF_RSC_TNI2_TCQ10, TOF_RSC_TNI2_TCQ11, + TOF_RSC_TNI3_TCQ0, TOF_RSC_TNI3_TCQ1, TOF_RSC_TNI3_TCQ2, TOF_RSC_TNI3_TCQ3, + TOF_RSC_TNI3_TCQ4, TOF_RSC_TNI3_TCQ5, TOF_RSC_TNI3_TCQ6, TOF_RSC_TNI3_TCQ7, + TOF_RSC_TNI3_TCQ8, TOF_RSC_TNI3_TCQ9, TOF_RSC_TNI3_TCQ10, TOF_RSC_TNI3_TCQ11, + TOF_RSC_TNI4_TCQ0, TOF_RSC_TNI4_TCQ1, TOF_RSC_TNI4_TCQ2, TOF_RSC_TNI4_TCQ3, + TOF_RSC_TNI4_TCQ4, TOF_RSC_TNI4_TCQ5, TOF_RSC_TNI4_TCQ6, TOF_RSC_TNI4_TCQ7, + TOF_RSC_TNI4_TCQ8, TOF_RSC_TNI4_TCQ9, TOF_RSC_TNI4_TCQ10, TOF_RSC_TNI4_TCQ11, + TOF_RSC_TNI5_TCQ0, TOF_RSC_TNI5_TCQ1, TOF_RSC_TNI5_TCQ2, TOF_RSC_TNI5_TCQ3, + TOF_RSC_TNI5_TCQ4, TOF_RSC_TNI5_TCQ5, TOF_RSC_TNI5_TCQ6, TOF_RSC_TNI5_TCQ7, + TOF_RSC_TNI5_TCQ8, TOF_RSC_TNI5_TCQ9, TOF_RSC_TNI5_TCQ10, TOF_RSC_TNI5_TCQ11, + + /* MRQ (144 - 215) */ + TOF_RSC_TNI0_MRQ0, TOF_RSC_TNI0_MRQ1, TOF_RSC_TNI0_MRQ2, TOF_RSC_TNI0_MRQ3, + TOF_RSC_TNI0_MRQ4, TOF_RSC_TNI0_MRQ5, TOF_RSC_TNI0_MRQ6, TOF_RSC_TNI0_MRQ7, + TOF_RSC_TNI0_MRQ8, TOF_RSC_TNI0_MRQ9, TOF_RSC_TNI0_MRQ10, TOF_RSC_TNI0_MRQ11, + TOF_RSC_TNI1_MRQ0, TOF_RSC_TNI1_MRQ1, TOF_RSC_TNI1_MRQ2, TOF_RSC_TNI1_MRQ3, + TOF_RSC_TNI1_MRQ4, TOF_RSC_TNI1_MRQ5, TOF_RSC_TNI1_MRQ6, TOF_RSC_TNI1_MRQ7, + TOF_RSC_TNI1_MRQ8, TOF_RSC_TNI1_MRQ9, TOF_RSC_TNI1_MRQ10, TOF_RSC_TNI1_MRQ11, + TOF_RSC_TNI2_MRQ0, TOF_RSC_TNI2_MRQ1, TOF_RSC_TNI2_MRQ2, TOF_RSC_TNI2_MRQ3, + TOF_RSC_TNI2_MRQ4, TOF_RSC_TNI2_MRQ5, TOF_RSC_TNI2_MRQ6, TOF_RSC_TNI2_MRQ7, + TOF_RSC_TNI2_MRQ8, TOF_RSC_TNI2_MRQ9, TOF_RSC_TNI2_MRQ10, TOF_RSC_TNI2_MRQ11, + TOF_RSC_TNI3_MRQ0, TOF_RSC_TNI3_MRQ1, TOF_RSC_TNI3_MRQ2, TOF_RSC_TNI3_MRQ3, + TOF_RSC_TNI3_MRQ4, TOF_RSC_TNI3_MRQ5, TOF_RSC_TNI3_MRQ6, TOF_RSC_TNI3_MRQ7, + TOF_RSC_TNI3_MRQ8, TOF_RSC_TNI3_MRQ9, TOF_RSC_TNI3_MRQ10, TOF_RSC_TNI3_MRQ11, + TOF_RSC_TNI4_MRQ0, TOF_RSC_TNI4_MRQ1, TOF_RSC_TNI4_MRQ2, TOF_RSC_TNI4_MRQ3, + TOF_RSC_TNI4_MRQ4, TOF_RSC_TNI4_MRQ5, TOF_RSC_TNI4_MRQ6, TOF_RSC_TNI4_MRQ7, + TOF_RSC_TNI4_MRQ8, TOF_RSC_TNI4_MRQ9, TOF_RSC_TNI4_MRQ10, TOF_RSC_TNI4_MRQ11, + TOF_RSC_TNI5_MRQ0, TOF_RSC_TNI5_MRQ1, TOF_RSC_TNI5_MRQ2, TOF_RSC_TNI5_MRQ3, + TOF_RSC_TNI5_MRQ4, TOF_RSC_TNI5_MRQ5, TOF_RSC_TNI5_MRQ6, TOF_RSC_TNI5_MRQ7, + TOF_RSC_TNI5_MRQ8, TOF_RSC_TNI5_MRQ9, TOF_RSC_TNI5_MRQ10, TOF_RSC_TNI5_MRQ11, + + /* PBQ (216 - 221) */ + TOF_RSC_TNI0_PBQ, TOF_RSC_TNI1_PBQ, TOF_RSC_TNI2_PBQ, TOF_RSC_TNI3_PBQ, + TOF_RSC_TNI4_PBQ, TOF_RSC_TNI5_PBQ, + + /* PRQ (222 - 227) */ + TOF_RSC_TNI0_PRQ, TOF_RSC_TNI1_PRQ, TOF_RSC_TNI2_PRQ, TOF_RSC_TNI3_PRQ, + TOF_RSC_TNI4_PRQ, TOF_RSC_TNI5_PRQ, + + /* STEERINGTABLE (228 - 299) */ + TOF_RSC_TNI0_STEERINGTABLE0, TOF_RSC_TNI0_STEERINGTABLE1, TOF_RSC_TNI0_STEERINGTABLE2, + TOF_RSC_TNI0_STEERINGTABLE3, TOF_RSC_TNI0_STEERINGTABLE4, TOF_RSC_TNI0_STEERINGTABLE5, + TOF_RSC_TNI0_STEERINGTABLE6, TOF_RSC_TNI0_STEERINGTABLE7, TOF_RSC_TNI0_STEERINGTABLE8, + TOF_RSC_TNI0_STEERINGTABLE9, TOF_RSC_TNI0_STEERINGTABLE10, TOF_RSC_TNI0_STEERINGTABLE11, + TOF_RSC_TNI1_STEERINGTABLE0, TOF_RSC_TNI1_STEERINGTABLE1, TOF_RSC_TNI1_STEERINGTABLE2, + TOF_RSC_TNI1_STEERINGTABLE3, TOF_RSC_TNI1_STEERINGTABLE4, TOF_RSC_TNI1_STEERINGTABLE5, + TOF_RSC_TNI1_STEERINGTABLE6, TOF_RSC_TNI1_STEERINGTABLE7, TOF_RSC_TNI1_STEERINGTABLE8, + TOF_RSC_TNI1_STEERINGTABLE9, TOF_RSC_TNI1_STEERINGTABLE10, TOF_RSC_TNI1_STEERINGTABLE11, + TOF_RSC_TNI2_STEERINGTABLE0, TOF_RSC_TNI2_STEERINGTABLE1, TOF_RSC_TNI2_STEERINGTABLE2, + TOF_RSC_TNI2_STEERINGTABLE3, TOF_RSC_TNI2_STEERINGTABLE4, TOF_RSC_TNI2_STEERINGTABLE5, + TOF_RSC_TNI2_STEERINGTABLE6, TOF_RSC_TNI2_STEERINGTABLE7, TOF_RSC_TNI2_STEERINGTABLE8, + TOF_RSC_TNI2_STEERINGTABLE9, TOF_RSC_TNI2_STEERINGTABLE10, TOF_RSC_TNI2_STEERINGTABLE11, + TOF_RSC_TNI3_STEERINGTABLE0, TOF_RSC_TNI3_STEERINGTABLE1, TOF_RSC_TNI3_STEERINGTABLE2, + TOF_RSC_TNI3_STEERINGTABLE3, TOF_RSC_TNI3_STEERINGTABLE4, TOF_RSC_TNI3_STEERINGTABLE5, + TOF_RSC_TNI3_STEERINGTABLE6, TOF_RSC_TNI3_STEERINGTABLE7, TOF_RSC_TNI3_STEERINGTABLE8, + TOF_RSC_TNI3_STEERINGTABLE9, TOF_RSC_TNI3_STEERINGTABLE10, TOF_RSC_TNI3_STEERINGTABLE11, + TOF_RSC_TNI4_STEERINGTABLE0, TOF_RSC_TNI4_STEERINGTABLE1, TOF_RSC_TNI4_STEERINGTABLE2, + TOF_RSC_TNI4_STEERINGTABLE3, TOF_RSC_TNI4_STEERINGTABLE4, TOF_RSC_TNI4_STEERINGTABLE5, + TOF_RSC_TNI4_STEERINGTABLE6, TOF_RSC_TNI4_STEERINGTABLE7, TOF_RSC_TNI4_STEERINGTABLE8, + TOF_RSC_TNI4_STEERINGTABLE9, TOF_RSC_TNI4_STEERINGTABLE10, TOF_RSC_TNI4_STEERINGTABLE11, + TOF_RSC_TNI5_STEERINGTABLE3, TOF_RSC_TNI5_STEERINGTABLE4, TOF_RSC_TNI5_STEERINGTABLE5, + TOF_RSC_TNI5_STEERINGTABLE6, TOF_RSC_TNI5_STEERINGTABLE7, TOF_RSC_TNI5_STEERINGTABLE8, + TOF_RSC_TNI5_STEERINGTABLE9, TOF_RSC_TNI5_STEERINGTABLE10, TOF_RSC_TNI5_STEERINGTABLE11, + + /* MBTABLE (300 - 371) */ + TOF_RSC_TNI0_MBTABLE0, TOF_RSC_TNI0_MBTABLE1, TOF_RSC_TNI0_MBTABLE2, + TOF_RSC_TNI0_MBTABLE3, TOF_RSC_TNI0_MBTABLE4, TOF_RSC_TNI0_MBTABLE5, + TOF_RSC_TNI0_MBTABLE6, TOF_RSC_TNI0_MBTABLE7, TOF_RSC_TNI0_MBTABLE8, + TOF_RSC_TNI0_MBTABLE9, TOF_RSC_TNI0_MBTABLE10, TOF_RSC_TNI0_MBTABLE11, + TOF_RSC_TNI1_MBTABLE0, TOF_RSC_TNI1_MBTABLE1, TOF_RSC_TNI1_MBTABLE2, + TOF_RSC_TNI1_MBTABLE3, TOF_RSC_TNI1_MBTABLE4, TOF_RSC_TNI1_MBTABLE5, + TOF_RSC_TNI1_MBTABLE6, TOF_RSC_TNI1_MBTABLE7, TOF_RSC_TNI1_MBTABLE8, + TOF_RSC_TNI1_MBTABLE9, TOF_RSC_TNI1_MBTABLE10, TOF_RSC_TNI1_MBTABLE11, + TOF_RSC_TNI2_MBTABLE0, TOF_RSC_TNI2_MBTABLE1, TOF_RSC_TNI2_MBTABLE2, + TOF_RSC_TNI2_MBTABLE3, TOF_RSC_TNI2_MBTABLE4, TOF_RSC_TNI2_MBTABLE5, + TOF_RSC_TNI2_MBTABLE6, TOF_RSC_TNI2_MBTABLE7, TOF_RSC_TNI2_MBTABLE8, + TOF_RSC_TNI2_MBTABLE9, TOF_RSC_TNI2_MBTABLE10, TOF_RSC_TNI2_MBTABLE11, + TOF_RSC_TNI3_MBTABLE0, TOF_RSC_TNI3_MBTABLE1, TOF_RSC_TNI3_MBTABLE2, + TOF_RSC_TNI3_MBTABLE3, TOF_RSC_TNI3_MBTABLE4, TOF_RSC_TNI3_MBTABLE5, + TOF_RSC_TNI3_MBTABLE6, TOF_RSC_TNI3_MBTABLE7, TOF_RSC_TNI3_MBTABLE8, + TOF_RSC_TNI3_MBTABLE9, TOF_RSC_TNI3_MBTABLE10, TOF_RSC_TNI3_MBTABLE11, + TOF_RSC_TNI4_MBTABLE0, TOF_RSC_TNI4_MBTABLE1, TOF_RSC_TNI4_MBTABLE2, + TOF_RSC_TNI4_MBTABLE3, TOF_RSC_TNI4_MBTABLE4, TOF_RSC_TNI4_MBTABLE5, + TOF_RSC_TNI4_MBTABLE6, TOF_RSC_TNI4_MBTABLE7, TOF_RSC_TNI4_MBTABLE8, + TOF_RSC_TNI4_MBTABLE9, TOF_RSC_TNI4_MBTABLE10, TOF_RSC_TNI4_MBTABLE11, + TOF_RSC_TNI5_MBTABLE0, TOF_RSC_TNI5_MBTABLE1, TOF_RSC_TNI5_MBTABLE2, + TOF_RSC_TNI5_MBTABLE3, TOF_RSC_TNI5_MBTABLE4, TOF_RSC_TNI5_MBTABLE5, + TOF_RSC_TNI5_MBTABLE6, TOF_RSC_TNI5_MBTABLE7, TOF_RSC_TNI5_MBTABLE8, + TOF_RSC_TNI5_MBTABLE9, TOF_RSC_TNI5_MBTABLE10, TOF_RSC_TNI5_MBTABLE11, + + TOF_RSC_NUM /* 372 */ +}; +#define TOF_RSC_TOQ(TNI, CQID) (TOF_RSC_TNI0_TOQ0 + (TNI * 12) + CQID) +#define TOF_RSC_TCQ(TNI, CQID) (TOF_RSC_TNI0_TCQ0 + (TNI * 12) + CQID) +#define TOF_RSC_MRQ(TNI, CQID) (TOF_RSC_TNI0_MRQ0 + (TNI * 12) + CQID) +#define TOF_RSC_PBQ(TNI) (TOF_RSC_TNI0_PBQ + TNI) +#define TOF_RSC_PRQ(TNI) (TOF_RSC_TNI0_PRQ + TNI) +#define TOF_RSC_STT(TNI, CQID) (TOF_RSC_TNI0_STEERINGTABLE0 + (TNI * 12) + CQID) +#define TOF_RSC_MBT(TNI, CQID) (TOF_RSC_TNI0_MBTABLE0 + (TNI * 12) + CQID) + +#endif + +/* vim: set noet ts=8 sw=8 sts=0 tw=0 : */ + diff --git a/kernel/include/tofu/tof_utofu_cq_trans.h b/kernel/include/tofu/tof_utofu_cq_trans.h new file mode 100644 index 00000000..8c7c7656 --- /dev/null +++ b/kernel/include/tofu/tof_utofu_cq_trans.h @@ -0,0 +1,6 @@ + struct { + struct tof_utofu_trans_list *mru; + struct tof_trans_table *table; + int mruhead; + ihk_spinlock_t mru_lock; + } trans; diff --git a/kernel/include/tofu/tofu_generated-tof_core_cq.h b/kernel/include/tofu/tofu_generated-tof_core_cq.h new file mode 100644 index 00000000..adeb6309 --- /dev/null +++ b/kernel/include/tofu/tofu_generated-tof_core_cq.h @@ -0,0 +1,9 @@ +struct tof_core_cq { + union { + char whole_struct[264]; + struct { + char padding0[56]; + #include "tof_core_cq_reg.h" + }; + }; +}; diff --git a/kernel/include/tofu/tofu_generated-tof_utofu_cq.h b/kernel/include/tofu/tofu_generated-tof_utofu_cq.h new file mode 100644 index 00000000..a742c536 --- /dev/null +++ b/kernel/include/tofu/tofu_generated-tof_utofu_cq.h @@ -0,0 +1,33 @@ +struct tof_utofu_cq { + union { + char whole_struct[384]; + struct { + char padding0[0]; + struct tof_utofu_device common; + }; + struct { + char padding1[80]; + uint8_t tni; + }; + struct { + char padding2[81]; + uint8_t cqid; + }; + struct { + char padding3[104]; + #include "tof_utofu_cq_trans.h" + }; + struct { + char padding4[128]; + struct tof_icc_steering_entry *steering; + }; + struct { + char padding5[136]; + struct tof_icc_mb_entry *mb; + }; + struct { + char padding6[186]; + uint8_t num_stag; + }; + }; +}; diff --git a/kernel/include/tofu/tofu_generated-tof_utofu_device.h b/kernel/include/tofu/tofu_generated-tof_utofu_device.h new file mode 100644 index 00000000..ffc7404a --- /dev/null +++ b/kernel/include/tofu/tofu_generated-tof_utofu_device.h @@ -0,0 +1,9 @@ +struct tof_utofu_device { + union { + char whole_struct[80]; + struct { + char padding0[0]; + bool enabled; + }; + }; +}; diff --git a/kernel/include/tofu/tofu_generated-tof_utofu_mbpt.h b/kernel/include/tofu/tofu_generated-tof_utofu_mbpt.h new file mode 100644 index 00000000..81ab721c --- /dev/null +++ b/kernel/include/tofu/tofu_generated-tof_utofu_mbpt.h @@ -0,0 +1,33 @@ +struct tof_utofu_mbpt { + union { + char whole_struct[56]; + struct { + char padding0[0]; + struct kref kref; + }; + struct { + char padding1[8]; + struct tof_utofu_cq *ucq; + }; + struct { + char padding2[16]; + uintptr_t iova; + }; + struct { + char padding3[24]; + struct scatterlist *sg; + }; + struct { + char padding4[32]; + size_t nsgents; + }; + struct { + char padding5[40]; + uintptr_t mbptstart; + }; + struct { + char padding6[48]; + size_t pgsz; + }; + }; +}; diff --git a/kernel/init.c b/kernel/init.c index 8781c127..96f8aab4 100644 --- a/kernel/init.c +++ b/kernel/init.c @@ -345,6 +345,9 @@ static void populate_sysfs(void) int host_ikc_inited = 0; extern int num_processors; +#ifdef ENABLE_TOFU +extern void tof_utofu_init_globals(void); +#endif static void post_init(void) { @@ -370,6 +373,9 @@ static void post_init(void) sysfs_init(); populate_sysfs(); +#ifdef ENABLE_TOFU + tof_utofu_init_globals(); +#endif } #ifdef DCFA_RUN extern void user_main(); diff --git a/kernel/mem.c b/kernel/mem.c index c16c7066..8bb1b771 100644 --- a/kernel/mem.c +++ b/kernel/mem.c @@ -741,6 +741,16 @@ distance_based: memory_nodes[node].nodes_by_distance[i].id); break; } + else { + if (i == 0) + kprintf("%s: distance: CPU @ node %d failed to allocate " + "%d pages from node %d\n", + __FUNCTION__, + ihk_mc_get_numa_id(), + npages, + memory_nodes[node].nodes_by_distance[i].id); + + } } if (pa) break; @@ -1333,6 +1343,9 @@ static void unhandled_page_fault(struct thread *thread, void *fault_addr, static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs) { struct thread *thread = cpu_local_var(current); +#ifdef ENABLE_TOFU + unsigned long addr = (unsigned long)fault_addr; +#endif int error; #ifdef PROFILE_ENABLE uint64_t t_s = 0; @@ -1349,6 +1362,49 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs) cpu_enable_interrupt(); +#ifdef ENABLE_TOFU + if (!(reason & PF_USER) && + (addr > 0xffff000000000000 && + addr < 0xffff800000000000)) { + int error; + int ihk_mc_linux_pt_virt_to_phys_size(struct page_table *pt, + const void *virt, + unsigned long *phys, + unsigned long *size); + + unsigned long phys, size; + enum ihk_mc_pt_attribute attr = PTATTR_WRITABLE | PTATTR_ACTIVE; + + if (ihk_mc_linux_pt_virt_to_phys_size(ihk_mc_get_linux_kernel_pgt(), + fault_addr, &phys, &size) < 0) { + kprintf("%s: failed to resolve 0x%lx from Linux PT..\n", + __func__, addr); + goto out_linux; + } + +retry_linux: + if ((error = ihk_mc_pt_set_page(NULL, fault_addr, phys, attr)) < 0) { + if (error == -EBUSY) { + kprintf("%s: WARNING: updating 0x%lx -> 0x%lx" + " to reflect Linux kernel mapping..\n", + __func__, addr, phys); + ihk_mc_clear_kernel_range(fault_addr, fault_addr + PAGE_SIZE); + goto retry_linux; + } + else { + kprintf("%s: failed to set up 0x%lx -> 0x%lx Linux kernel mapping..\n", + __func__, addr, phys); + goto out_linux; + } + } + + dkprintf("%s: Linux kernel mapping 0x%lx -> 0x%lx set\n", + __func__, addr, phys); + goto out_ok; + } +out_linux: +#endif + if ((uintptr_t)fault_addr < PAGE_SIZE || !thread) { error = -EINVAL; } else { @@ -1394,6 +1450,9 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs) goto out; } +#ifdef ENABLE_TOFU +out_ok: +#endif error = 0; preempt_enable(); out: @@ -2735,3 +2794,25 @@ int ihk_mc_get_mem_user_page(void *arg0, page_table_t pt, pte_t *ptep, void *pga return 0; } + +pte_t *ihk_mc_pt_lookup_fault_pte(struct process_vm *vm, void *virt, + int pgshift, void **basep, size_t *sizep, int *p2alignp) +{ + int faulted = 0; + pte_t *ptep; + +retry: + ptep = ihk_mc_pt_lookup_pte(vm->address_space->page_table, + virt, pgshift, basep, sizep, p2alignp); + if (!faulted && (!ptep || !pte_is_present(ptep))) { + page_fault_process_vm(vm, virt, PF_POPULATE | PF_USER); + faulted = 1; + goto retry; + } + + if (faulted && ptep && pte_is_present(ptep)) { + kprintf("%s: successfully faulted 0x%lx\n", __FUNCTION__, virt); + } + + return ptep; +} diff --git a/kernel/process.c b/kernel/process.c index 78b36c9b..70f2e55c 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -2013,6 +2013,14 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang int private_range, patching_to_rdonly; int devfile_or_hugetlbfs_or_premap, regfile_or_shm; + if (cpu_local_var(current)->profile) { + dkprintf("%s: 0x%lx @ %s\n", + __func__, fault_addr, + range->memobj && range->memobj->path ? + range->memobj->path : + range->private_data ? "XPMEM" : ""); + } + dkprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx)\n", vm, range->start, range->end, range->flag, fault_addr, reason); ihk_mc_spinlock_lock_noirq(&vm->page_table_lock); /*****/ @@ -2852,6 +2860,11 @@ release_process(struct process *proc) /* no process left */ mcs_rwlock_reader_lock(&rset->pid1->children_lock, &lock); if (list_empty(&rset->pid1->children_list)) { +#ifdef ENABLE_TOFU + extern void tof_utofu_finalize(void); + + tof_utofu_finalize(); +#endif hugefileobj_cleanup(); } mcs_rwlock_reader_unlock(&rset->pid1->children_lock, &lock); diff --git a/kernel/profile.c b/kernel/profile.c index cbab0a6d..45673097 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -473,7 +473,7 @@ int do_profile(int flag) if (flag & PROF_ON) { if (!thread->profile) { thread->profile = 1; - thread->profile_start_ts = 0; + thread->profile_start_ts = now_ts; } } else if (flag & PROF_OFF) { diff --git a/kernel/syscall.c b/kernel/syscall.c index 69b6118b..41dd6aff 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -220,6 +220,7 @@ long do_syscall(struct syscall_request *req, int cpu) req->ttid = 0; } res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING; + res.pde_data = NULL; send_syscall(req, cpu, &res); if (req->rtid == -1) { @@ -380,6 +381,31 @@ long do_syscall(struct syscall_request *req, int cpu) rc = res.ret; + if (req->number == __NR_ioctl && rc == 0) { + if (cpu_local_var(current)->proc->enable_tofu && + res.pde_data && + !thread->proc->fd_pde_data[req->args[0]] && + !strncmp(thread->proc->fd_path[req->args[0]], + "/proc/tofu/dev/", 15)) { + + if (req->args[0] < MAX_FD_PDE) { + unsigned long irqstate; + + irqstate = ihk_mc_spinlock_lock(&thread->proc->mckfd_lock); + thread->proc->fd_pde_data[req->args[0]] = res.pde_data; + ihk_mc_spinlock_unlock(&thread->proc->mckfd_lock, irqstate); + + kprintf("%s: PID: %d, ioctl fd: %d, filename: " + "%s, pde_data: 0x%lx\n", + __FUNCTION__, + thread->proc->pid, + req->args[0], + thread->proc->fd_path[req->args[0]], + res.pde_data); + } + } + } + if(req->number != __NR_exit_group){ --thread->in_syscall_offload; } @@ -1265,6 +1291,23 @@ void terminate(int rc, int sig) mcs_rwlock_writer_unlock(&proc->threads_lock, &lock); mcs_rwlock_writer_unlock_noirq(&proc->update_lock, &updatelock); +#ifdef ENABLE_TOFU + /* Tofu: clean up stags, must be done before mcexec is gone */ + if (proc->enable_tofu) { + int fd; + + for (fd = 0; fd < MAX_FD_PDE; ++fd) { + /* Tofu? */ + if (proc->enable_tofu && proc->fd_pde_data[fd]) { + extern void tof_utofu_release_cq(void *pde_data); + + tof_utofu_release_cq(proc->fd_pde_data[fd]); + proc->fd_pde_data[fd] = NULL; + } + } + } +#endif + terminate_mcexec(rc, sig); mcs_rwlock_writer_lock(&proc->threads_lock, &lock); @@ -1419,7 +1462,6 @@ void terminate(int rc, int sig) #endif // clean up memory - finalize_process(proc); preempt_disable(); @@ -1907,6 +1949,10 @@ straight_out: } if (flags & (MAP_POPULATE | MAP_LOCKED)) { + dkprintf("%s: 0x%lx:%lu %s%s|\n", + __func__, addr, len, + flags & MAP_POPULATE ? "|MAP_POPULATE" : "", + flags & MAP_LOCKED ? "|MAP_LOCKED" : ""); populated_mapping = 1; } @@ -3810,6 +3856,23 @@ SYSCALL_DECLARE(ioctl) break; ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate); +#ifdef ENABLE_TOFU + /* Tofu? */ + if (proc->enable_tofu && + fd < MAX_FD_PDE && thread->proc->fd_pde_data[fd]) { + extern long tof_utofu_unlocked_ioctl_cq(int fd, + unsigned int cmd, unsigned long arg); + + rc = tof_utofu_unlocked_ioctl_cq(fd, + ihk_mc_syscall_arg1(ctx), + ihk_mc_syscall_arg2(ctx)); + + /* Do we need to offload? */ + if (rc != -ENOTSUPP) + return rc; + } +#endif + if(fdp && fdp->ioctl_cb){ //kprintf("ioctl: found system fd %d\n", fd); rc = fdp->ioctl_cb(fdp, ctx); @@ -3817,6 +3880,7 @@ SYSCALL_DECLARE(ioctl) else{ rc = syscall_generic_forwarding(__NR_ioctl, ctx); } + return rc; } @@ -3852,7 +3916,12 @@ SYSCALL_DECLARE(open) } out: - kfree(pathname); + if (rc > 0 && rc < MAX_FD_PDE) { + cpu_local_var(current)->proc->fd_path[rc] = pathname; + } + else { + kfree(pathname); + } return rc; } @@ -3888,7 +3957,12 @@ SYSCALL_DECLARE(openat) } out: - kfree(pathname); + if (rc > 0 && rc < MAX_FD_PDE) { + cpu_local_var(current)->proc->fd_path[rc] = pathname; + } + else { + kfree(pathname); + } return rc; } @@ -3936,6 +4010,26 @@ SYSCALL_DECLARE(close) long irqstate; irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock); + +#ifdef ENABLE_TOFU + /* Clear path and PDE data */ + if (fd >= 0 && fd < MAX_FD_PDE) { + /* Tofu? */ + if (thread->proc->fd_pde_data[fd]) { + extern void tof_utofu_release_cq(void *pde_data); + + tof_utofu_release_cq(thread->proc->fd_pde_data[fd]); + thread->proc->fd_pde_data[fd] = NULL; + } + + if (thread->proc->fd_path[fd]) { + dkprintf("%s: %d -> %s\n", __func__, fd, thread->proc->fd_path[fd]); + kfree(thread->proc->fd_path[fd]); + thread->proc->fd_path[fd] = NULL; + } + } +#endif + for(fdp = proc->mckfd, fdq = NULL; fdp; fdq = fdp, fdp = fdp->next) if(fdp->fd == fd) break; @@ -10682,6 +10776,7 @@ long syscall(int num, ihk_mc_user_context_t *ctx) */ if (num < PROFILE_SYSCALL_MAX) { profile_event_add(num, (ts - thread->profile_start_ts)); + thread->profile_start_ts = rdtsc(); } else { if (num != __NR_profile) { diff --git a/kernel/tofu/tof_utofu_main.c b/kernel/tofu/tof_utofu_main.c new file mode 100644 index 00000000..0d6fb567 --- /dev/null +++ b/kernel/tofu/tof_utofu_main.c @@ -0,0 +1,1390 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +/* DWARF generated headers */ +#include +#include +#include +#include + +#define TOF_UTOFU_VERSION TOF_UAPI_VERSION +#define TOF_UTOFU_NUM_STAG_NTYPES 3 +#define TOF_UTOFU_NUM_STAG_BITS(size) ((size) + 13) +#define TOF_UTOFU_NUM_STAG(size) ((uint64_t)1 << TOF_UTOFU_NUM_STAG_BITS(size)) +#define TOF_UTOFU_STAG_TRANS_BITS 3 +#define TOF_UTOFU_STAG_TRANS_SIZE ((uint64_t)1 << TOF_UTOFU_STAG_TRANS_BITS) +#define TOF_UTOFU_STAG_TRANS_TABLE_LEN(size) (TOF_UTOFU_NUM_STAG(size) * TOF_UTOFU_STAG_TRANS_SIZE) +#define TOF_UTOFU_STEERING_TABLE_LEN(size) (TOF_UTOFU_NUM_STAG(size) * TOF_ICC_STEERING_SIZE) +#define TOF_UTOFU_MB_TABLE_LEN(size) (TOF_UTOFU_NUM_STAG(size) * TOF_ICC_MB_SIZE) +#define TOF_UTOFU_STAG_MEM_LEN(size) (TOF_UTOFU_STEERING_TABLE_LEN(size) * 4) +#define TOF_UTOFU_SPECIAL_STAG 4096 + +#define TOF_UTOFU_ICC_COMMON_REGISTER (tof_icc_reg_pa + 0x0B000000) +#define TOF_UTOFU_REG_START tof_icc_reg_pa +#define TOF_UTOFU_REG_END (TOF_UTOFU_ICC_COMMON_REGISTER + 0x000FFFFF) + +#define TOF_UTOFU_SET_SUBNET_TNI 0 /* This number is kernel TNIs number in setting subnet */ +#define TOF_UTOFU_KCQ 11 +#define TOF_UTOFU_LINKDOWN_PORT_MASK 0x000003FF + +#define TOF_UTOFU_ALLOC_STAG_LPG 0x2 +#define TOF_UTOFU_BLANK_MBVA (-1) + +#define TOF_UTOFU_MRU_EMPTY (-1) + +/* The `const' in roundup() prevents gcc-3.3 from calling __divdi3 */ +#define roundup(x, y) ( \ +{ \ + const typeof(y) __y = y; \ + (((x) + (__y - 1)) / __y) * __y; \ +} \ +) +#define rounddown(x, y) ( \ +{ \ + typeof(x) __x = (x); \ + __x - (__x % (y)); \ +} \ +) + +struct tof_utofu_trans_list { + int16_t prev; + int16_t next; + uint8_t pgszbits; + struct tof_utofu_mbpt *mbpt; +}; + +static inline uintptr_t tof_utofu_get_stag_start(struct tof_utofu_cq *ucq, int stag) +{ + return ((uintptr_t)ucq->trans.table[stag].steering.bits.start) << PAGE_SHIFT; +} +static inline size_t tof_utofu_get_stag_len(struct tof_utofu_cq *ucq, int stag) +{ + return ((size_t)ucq->trans.table[stag].steering.bits.len) << PAGE_SHIFT; +} +static inline uintptr_t tof_utofu_get_mbpt_start(struct tof_utofu_cq *ucq, int stag) +{ + return ((uintptr_t)ucq->trans.table[stag].mbpt.bits.start) << PAGE_SHIFT; +} +static inline size_t tof_utofu_get_mbpt_len(struct tof_utofu_cq *ucq, int stag) +{ + return ((size_t)ucq->trans.table[stag].mbpt.bits.len) << PAGE_SHIFT; +} + +#define raw_rc_output(fmt, args...) kprintf("%s: ", fmt, __func__, ##args) +static int tof_utofu_raw_rc_output_supress = 1; +static int tof_utofu_mbpt_address_match = 1; + +static int tof_utofu_get_pagesize_locked(uintptr_t addr, size_t len, + uint8_t *_pgszbits, bool readonly) { + uint8_t cur_shift; + uint8_t min_shift = U8_MAX; + uintptr_t start, end, va; + struct process *proc = cpu_local_var(current)->proc; + struct process_vm *vm = cpu_local_var(current)->vm; + int p2align; + size_t psize; + pte_t *ptep; + //struct vm_range *range; + + if(addr < PAGE_SIZE){ + *_pgszbits = PAGE_SHIFT; + return 0; + } + + start = round_down(addr, PAGE_SIZE); + end = round_up(addr + len, PAGE_SIZE); + + //range = lookup_process_memory_range(vm, start, end); + //if (!range) { + // return -EFAULT; + //} + + /* Special case for straight mapping */ + if (proc->straight_va && (void *)start >= proc->straight_va && + (void *)end < proc->straight_va + proc->straight_len) { + + if (end - start < PTL2_SIZE) { + *_pgszbits = PTL1_SHIFT; + } + else { + *_pgszbits = PTL2_SHIFT; + } + return 0; + } + + for (va = start; va < end; va += ((size_t)1 << cur_shift)) { + ptep = ihk_mc_pt_lookup_fault_pte(vm, (void *)va, + 0, NULL, &psize, &p2align); + + if (unlikely(!ptep || !pte_is_present(ptep))) { + kprintf("%s: ERROR: no valid PTE for 0x%lx\n", + __func__, va); + return -EFAULT; + } + + cur_shift = p2align + PAGE_SHIFT; + + if (cur_shift < min_shift) { + min_shift = cur_shift; + } + + if (min_shift <= PAGE_SHIFT) { + break; + } + } + +#if 0 + /* Tofu only support 64kB and 2MB pages */ + if (min_shift > PTL1_CONT_SHIFT) + min_shift = PTL1_CONT_SHIFT; +#endif + + *_pgszbits = min_shift; + return 0; +} + +static int tof_utofu_trans_search(struct tof_utofu_cq *ucq, uintptr_t start, uintptr_t end, uint8_t pgszbits, bool readonly){ + struct tof_utofu_trans_list *mru = ucq->trans.mru; + uintptr_t stagstart, stagend; + int stag; + + stag = ucq->trans.mruhead; + if(stag == TOF_UTOFU_MRU_EMPTY){ + if(unlikely(!tof_utofu_raw_rc_output_supress)){ + raw_rc_output(-ENOENT); + } + return -ENOENT; + } + do { + stagstart = tof_utofu_get_stag_start(ucq, stag); + stagend = stagstart + tof_utofu_get_stag_len(ucq, stag); + if(stag >= TOF_UTOFU_SPECIAL_STAG && ((stag & 0x1) == readonly) && (mru[stag].pgszbits == pgszbits)) { + if ((tof_utofu_mbpt_address_match & 0x1)) { + if ((stagstart == start) && (stagend == end)) { + kprintf("%s: found stag: %d\n", __func__, stag); + return stag; + } + } + else { + if ((stagstart <= start) && (end <= stagend)) { + return stag; + } + } + } + stag = ucq->trans.mru[stag].next; + } while(stag != ucq->trans.mruhead); + if(unlikely(!tof_utofu_raw_rc_output_supress)){ + raw_rc_output(-ENOENT); + } + dkprintf("%s: -ENOENT\n", __func__); + return -ENOENT; +} + +static int tof_utofu_reserve_stag(struct tof_utofu_cq *ucq, bool readonly){ + int stag; + for(stag = TOF_UTOFU_SPECIAL_STAG + readonly; stag < TOF_UTOFU_NUM_STAG(ucq->num_stag); stag += 2){ + if(!ucq->steering[stag].enable){ + dkprintf("%s: could use: %d\n", __func__, stag); + return stag; + } + } + return -1; +} + +static int tof_utofu_calc_mbptstart(int64_t start, int64_t end, size_t mbpt_npages, uint8_t pgszbits, + uintptr_t *mbptstart) +{ +#if 0 + struct vm_area_struct *vma; + int64_t len = mbpt_npages << pgszbits; + size_t pgsz = (size_t)1 << pgszbits; + + vma = find_vma(current->mm, start); + if(vma == NULL || vma->vm_start > start || vma->vm_end < end){ + return -ENOENT; + } + + if(vma->vm_flags & VM_GROWSDOWN){ + /* stack */ + /* we cannot extend MBPTs to lower address. + * therefore, we allocate rather large MBPT. */ + int64_t upperbound; + uintptr_t mbpttail; + upperbound = round_up(vma->vm_end, pgsz); + if ((start + len) < 0) { + mbpttail = upperbound; + } + else { + mbpttail = min(upperbound, start + len); + } + *mbptstart = mbpttail - len; + }else{ + int64_t lowerbound; + lowerbound = round_down(vma->vm_start, pgsz); + *mbptstart = max(lowerbound, end - len); + } +#else + *mbptstart = start; +#endif + + return 0; +} + +int16_t *tof_ib_stag_list; +ihk_spinlock_t *tof_ib_stag_lock; +int *tof_ib_stag_list_Rp_addr; +#define tof_ib_stag_list_Rp (*tof_ib_stag_list_Rp_addr) +int *tof_ib_stag_list_Wp_addr; +#define tof_ib_stag_list_Wp (*tof_ib_stag_list_Wp_addr) +#define TOF_IB_MAX_STAG 0x4000 + +static int16_t tof_ib_stag_alloc(void){ + int16_t ret; + unsigned long flags; + + linux_spin_lock_irqsave(tof_ib_stag_lock, flags); + + if(tof_ib_stag_list_Rp != tof_ib_stag_list_Wp){ + ret = tof_ib_stag_list[tof_ib_stag_list_Rp]; + tof_ib_stag_list_Rp = (tof_ib_stag_list_Rp + 1) % TOF_IB_MAX_STAG; + } + else{ + /* empty */ + ret = -ENOENT; + } + linux_spin_unlock_irqrestore(tof_ib_stag_lock, flags); + + dkprintf("%s: stag: %d allocated\n", __func__, ret); + return ret; +} + +static void tof_ib_stag_free(int16_t stag){ + int16_t next; + unsigned long flags; + + linux_spin_lock_irqsave(tof_ib_stag_lock, flags); + + next = (tof_ib_stag_list_Wp + 1) % TOF_IB_MAX_STAG; + if(next != tof_ib_stag_list_Rp){ /* next == tof_ib_stag_list_Rp is full. */ + tof_ib_stag_list[tof_ib_stag_list_Wp] = stag; + tof_ib_stag_list_Wp = next; + } + linux_spin_unlock_irqrestore(tof_ib_stag_lock, flags); + + dkprintf("%s: stag: %d freed\n", __func__, stag); +} + +struct tof_util_aligned_mem { + void *mem; + int nr_pages; + uint32_t offset; /* should be less than PAGE_SIZE */ +}; +static struct tof_util_aligned_mem *tof_ib_mbpt_mem = NULL; +static struct tof_icc_steering_entry *tof_ib_steering = NULL; +static struct tof_icc_mb_entry *tof_ib_mb = NULL; + +static int tof_ib_steering_enable(int stag, uint64_t mbpt_ipa, size_t npages, size_t length, uint64_t mbva){ + struct tof_icc_steering_entry *steering = &tof_ib_steering[stag]; + struct tof_icc_mb_entry *mb = &tof_ib_mb[stag]; + if(steering->enable != 0 || mb->enable != 0){ + return -EBUSY; + } + mb->ps = TOF_ICC_MB_PS_ENCODE(PAGE_SHIFT); /* will be 0 */ + mb->enable = 1; + mb->ipa = mbpt_ipa >> 8; + mb->npage = npages; + steering->readonly = 0; + steering->mbid = stag; + steering->mbva = mbva >> 8; + steering->length = length; + dma_wmb(); + steering->enable = 1; + return 0; +} + +int tof_core_cq_cacheflush(int tni, int cqid); + +#define TOF_IB_TNI_OFFSET 3 +#define TOF_IB_KCQ (TOF_ICC_NCQS - 1) +#define TOF_IB_ROUTE_CHECK_STAG 0 +#define TOF_IB_ROUTE_CHECK_DMAADDR 0 + +#define TOF_IB_SEND_MTU (7 * 256 - sizeof(struct tof_ib_send_header)) +#define TOF_IB_SEND_MAXLEN (32 * TOF_IB_SEND_MTU) + +#define TOF_IB_TIMER_DELAY 1 + +#define TOF_IB_MAX_STAG 0x4000 + +#define TOF_IB_MAX_QPNO 800000 +#define TOF_IB_MAX_QPID 4 + +static void tof_ib_steering_disable(int stag){ + struct tof_icc_steering_entry *steering = &tof_ib_steering[stag]; + struct tof_icc_mb_entry *mb = &tof_ib_mb[stag]; + steering->enable = 0; + dma_wmb(); + mb->enable = 0; + dma_wmb(); + tof_core_cq_cacheflush(TOF_IB_TNI_OFFSET, TOF_IB_KCQ); + tof_core_cq_cacheflush(TOF_IB_TNI_OFFSET + 1, TOF_IB_KCQ); +} + +static inline uint64_t tof_ib_dmaaddr_pack(uint32_t stag, uint32_t offset){ + return (uint64_t)stag << 32 | offset; +} + +static inline uint32_t tof_ib_dmaaddr_stag(uint64_t dmaaddr){ + return dmaaddr >> 32; +} + +/* + * McKernel scatterlist is simply a contiguous buffer + * This greatly simplifes dealing with it. + */ +struct scatterlist { + void *pages; + unsigned int offset; + unsigned int length; + unsigned long dma_address; + unsigned int dma_length; +}; + +#if 0 +static int tof_ib_map_sg(struct scatterlist *sg, int nents){ + struct tof_icc_mbpt_entry *mbpt; + int stag; + int ret; + int i; + int nr_pages; + + //if(!tof_ib_sg_is_contiguous(sg, nents)){ + // tof_info(7002, "SG is not contiguous\n"); + // return 0; + //} + + for(i = 0; ; i++){ + stag = tof_ib_stag_alloc(); + if(stag >= 0){ + break; + } + if(i % 10000 == 0){ + //tof_warn(6013, "Cannot allocate STag\n"); + kprintf("%s: WARNING: cannot allocate STag\n", __func__); + } + //schedule(); + } + + //ret = tof_util_aligned_alloc(&tof_ib_mbpt_mem[stag], nents * TOF_ICC_MBPT_SIZE, TOF_ICC_MBPT_ALIGN); + //if(ret < 0){ + // tof_ib_stag_free(stag); + // return 0; + //} + + nr_pages = (nents * TOF_ICC_MBPT_SIZE + (PAGE_SIZE - 1)) / PAGE_SIZE; + tof_ib_mbpt_mem[stag].mem = ihk_mc_alloc_pages(nr_pages, IHK_MC_AP_NOWAIT); + if (!tof_ib_mbpt_mem[stag].mem) { + tof_ib_stag_free(stag); + return 0; + } + tof_ib_mbpt_mem[stag].nr_pages = nr_pages; + tof_ib_mbpt_mem[stag].offset = 0; + + mbpt = tof_ib_mbpt_mem[stag].mem; + for(i = 0; i < nents; i++){ + //uint64_t paddr = sg_phys(&sg[i]) - sg[i].offset; + uint64_t paddr = virt_to_phys(sg->pages) + i * PAGE_SIZE; + mbpt[i].ipa = paddr >> 12; + mbpt[i].enable = 1; + //sg[i].dma_address = tof_ib_dmaaddr_pack(stag, i * PAGE_SIZE + sg[i].offset); + //sg[i].dma_length = sg[i].length; + } + sg->dma_address = tof_ib_dmaaddr_pack(stag, 0); + sg->dma_length = sg->length; + + //ret = tof_ib_steering_enable(stag, tof_util_get_pa(mbpt), nents, (size_t)nents << PAGE_SHIFT, 0); + ret = tof_ib_steering_enable(stag, virt_to_phys(mbpt), nents, (size_t)nents << PAGE_SHIFT, 0); + if(ret < 0){ + /* something going wrong */ + tof_ib_stag_free(stag); + //tof_util_aligned_free(&tof_ib_mbpt_mem[stag]); + ihk_mc_free_pages(tof_ib_mbpt_mem[stag].mem, nr_pages); + return 0; + } + return nents; +} +#endif + +static void tof_ib_unmap_sg(struct scatterlist *sg, int nents){ + int stag; + //if(!tof_ib_sg_is_contiguous(sg, nents)){ + // tof_info(7002, "SG is not contiguous\n"); + // return; + //} + stag = tof_ib_dmaaddr_stag(sg->dma_address); + tof_ib_steering_disable(stag); + tof_ib_stag_free(stag); + //tof_util_aligned_free(&tof_ib_mbpt_mem[stag]); + ihk_mc_free_pages(tof_ib_mbpt_mem[stag].mem, + tof_ib_mbpt_mem[stag].nr_pages); + tof_ib_mbpt_mem[stag].mem = NULL; + tof_ib_mbpt_mem[stag].nr_pages = 0; +} + + +static int tof_utofu_alloc_mbpt(struct tof_utofu_cq *ucq, uint32_t npages, struct tof_utofu_mbpt **pmbpt, int stag){ + size_t nsgents = npages / (PAGE_SIZE >> TOF_ICC_MBPT_SIZE_BITS); + //int i; + int ret; + struct scatterlist *sg; + struct tof_utofu_mbpt *mbpt; + + //sg = tof_util_alloc(nsgents * sizeof(*sg), GFP_ATOMIC); + sg = kmalloc(sizeof(*sg), IHK_MC_AP_NOWAIT); + if(sg == NULL){ + raw_rc_output(-ENOMEM); + return -ENOMEM; + } + memset(sg, 0, sizeof(*sg)); + + //sg_init_table(sg, nsgents); + //for(i = 0; i < nsgents; i++){ + // void *buf; + // buf = (void *)tof_util_get_free_pages(GFP_ATOMIC, 0); + // if(buf == NULL){ + // ret = -ENOMEM; + // raw_rc_output(ret); + // goto free_ent; + // } + // memset(buf, 0, PAGE_SIZE); + // sg_set_buf(&sg[i], buf, PAGE_SIZE); + //} + sg->pages = ihk_mc_alloc_pages(nsgents, IHK_MC_AP_NOWAIT); + if (!sg->pages) { + raw_rc_output(-ENOMEM); + ret = -ENOMEM; + goto free_sg; + } + memset(sg->pages, 0, PAGE_SIZE * nsgents); + + //mbpt = tof_util_alloc(sizeof(*mbpt), GFP_ATOMIC); + mbpt = kmalloc(sizeof(*mbpt), IHK_MC_AP_NOWAIT); + if(mbpt == NULL){ + raw_rc_output(-ENOMEM); + ret = -ENOMEM; + goto free_sg_pages; + } + + //ret = tof_smmu_iova_map_sg(ucq->tni, ucq->cqid, sg, nsgents); + //if(ret == 0){ + // ret = -EINVAL; + // goto free_ent; + //} + + sg->dma_address = -1; + { + unsigned long phys = virt_to_phys(sg->pages); + int i; + + for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) { + unsigned long start, end; + + ihk_mc_get_memory_chunk(i, &start, &end, NULL); + + // Since chunks are contiguous, if end falls in, + // the whole region is covered.. + if (phys < start || phys > end) { + continue; + } + + ihk_mc_get_memory_chunk_dma_addr(i, ucq->tni, ucq->cqid, + (uintptr_t *)&sg->dma_address); + sg->dma_address += (phys - start); + break; + } + } + + if (sg->dma_address == -1) { + kprintf("%s: error: obtaining sg DMA address\n", __func__); + ret = -EINVAL; + goto free_ent; + } + + //atomic64_inc((atomic64_t *)&kref_init_count); + kref_init(&mbpt->kref); + mbpt->ucq = ucq; + //mbpt->iova = sg_dma_address(sg); + mbpt->iova = sg->dma_address; + mbpt->sg = sg; + mbpt->nsgents = nsgents; + *pmbpt = mbpt; + dkprintf("%s: mbpt iova: 0x%lx\n", __func__, mbpt->iova); + + return 0; +free_ent: + //for(i = i - 1; i >= 0; i--){ + // tof_util_free_pages((unsigned long)sg_virt(&sg[i]), 0); + //} + kfree(mbpt); +free_sg_pages: + ihk_mc_free_pages(sg->pages, nsgents); +free_sg: + kfree(sg); + + return ret; +} + +static uintptr_t tof_utofu_disable_mbpt(struct tof_utofu_mbpt *mbpt, int idx){ + int i0, i1; + struct tof_icc_mbpt_entry *ent; + uintptr_t ipa; + i0 = idx / (PAGE_SIZE / TOF_ICC_MBPT_SIZE); + i1 = idx - i0 * (PAGE_SIZE / TOF_ICC_MBPT_SIZE); + //ent = sg_virt(&mbpt->sg[i0]); + ent = mbpt->sg->pages + (i0 * PAGE_SIZE); + if(!ent[i1].enable){ + return 0; + } + ent[i1].enable = 0; + ipa = (uint64_t)ent[i1].ipa << 12; + ent[i1].ipa = 0; + return ipa; +} + +static void tof_utofu_enable_mbpt(struct tof_utofu_mbpt *mbpt, int idx, uintptr_t iova){ + int i0, i1; + struct tof_icc_mbpt_entry *ent; + i0 = idx / (PAGE_SIZE / TOF_ICC_MBPT_SIZE); + i1 = idx - i0 * (PAGE_SIZE / TOF_ICC_MBPT_SIZE); + //ent = sg_virt(&mbpt->sg[i0]); + ent = mbpt->sg->pages + (i0 * PAGE_SIZE); + ent[i1].ipa = iova>>12; + dma_wmb(); + ent[i1].enable = 1; +} + +static struct tof_icc_mbpt_entry *tof_utofu_get_mbpt_entry(struct tof_utofu_mbpt *mbpt, int idx){ + int i0, i1; + struct tof_icc_mbpt_entry *ent; + i0 = idx / (PAGE_SIZE / TOF_ICC_MBPT_SIZE); + i1 = idx - i0 * (PAGE_SIZE / TOF_ICC_MBPT_SIZE); + //ent = sg_virt(&mbpt->sg[i0]); + ent = mbpt->sg->pages + (i0 * PAGE_SIZE); + return &(ent[i1]); +} + +static bool tof_utofu_mbpt_is_enabled(struct tof_utofu_mbpt *mbpt, int idx) { + struct tof_icc_mbpt_entry *ent = tof_utofu_get_mbpt_entry(mbpt, idx); + return (ent->enable == 1); +} + +static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq, + struct tof_utofu_mbpt *mbpt, + uintptr_t start, + uintptr_t end, + uint32_t ix, + size_t pgsz, + bool readonly) +{ + //struct page *page; + struct process *proc = cpu_local_var(current)->proc; + uintptr_t iova = 0, va; + int ret; + unsigned long phys = 0; + + /* Special case for straight mapping */ + if (proc->straight_va && (void *)start >= proc->straight_va && + (void *)end < proc->straight_va + proc->straight_len) { + + for (va = start; va < end; va += pgsz, ix++) { + if (tof_utofu_mbpt_is_enabled(mbpt, ix)) { + /* this page is already mapped to mbpt */ + kprintf("%s: 0x%lx already mapped...\n", __func__, va); + continue; + } + + /* Not yet resolved? */ + if (!iova) { + int i; + + phys = proc->straight_pa + + ((void *)va - proc->straight_va); + + iova = -1; + for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) { + unsigned long start, end; + + ihk_mc_get_memory_chunk(i, &start, &end, NULL); + + if (phys < start || phys > end) { + continue; + } + + ihk_mc_get_memory_chunk_dma_addr(i, ucq->tni, ucq->cqid, + (uintptr_t *)&iova); + iova += (phys - start); + break; + } + + if (iova == -1) { + return -EINVAL; + } + } + + tof_utofu_enable_mbpt(mbpt, ix, iova); + iova += pgsz; + } + + return 0; + } + + for(va = start; va < end; va += pgsz, ix++){ + + if (tof_utofu_mbpt_is_enabled(mbpt, ix)) { + /* this page is already mapped to mbpt */ + continue; + } + + //ret = get_user_pages(va, 1, readonly ? 0 : FOLL_WRITE, &page, NULL); + //if(ret < 1){ + // raw_rc_output(ret); + // if(tof_utofu_stag_debug & 0x4){ + // tof_info(9999, "[%s] get_user_pages: ret=%d va=0x%lx readonly=%d\n", current->comm, ret, va, readonly); + // } + // if(ret == -EFAULT && !readonly){ + // return -EPERM; + // } + // return -ENOMEM; + //} + + ret = ihk_mc_pt_virt_to_phys( + cpu_local_var(current)->vm->address_space->page_table, + (void *)va, &phys); + + if (ret) { + raw_rc_output(ret); + return -ENOMEM; + } + + //iova = tof_smmu_get_ipa_cq(ucq->tni, ucq->cqid, + // pfn_to_kaddr(page_to_pfn(page)), pgsz); + //if (iova == 0) { + // put_page(page); + // raw_rc_output(ret); + // return -ENOMEM; + //} + + iova = -1; + { + int i; + for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) { + unsigned long start, end; + + ihk_mc_get_memory_chunk(i, &start, &end, NULL); + + if (phys < start || phys > end) { + continue; + } + + ihk_mc_get_memory_chunk_dma_addr(i, ucq->tni, ucq->cqid, + (uintptr_t *)&iova); + iova += (phys - start); + break; + } + } + + if (iova == -1) { + return -EINVAL; + } + + dkprintf("%s: VA: 0x%lx -> iova (phys): 0x%lx\n", + __func__, va, phys); + + /* Check ovalap MBPT IOVA */ + //ret = tof_utofu_check_overlap_mbpt_iova(iova, ucq, mbpt, ix); + //if(unlikely(ret)){ + // put_page(page); + // return ret; + //} + + tof_utofu_enable_mbpt(mbpt, ix, iova); + //put_page(page); + } + return 0; +} + +static void tof_utofu_free_mbpt(struct tof_utofu_cq *ucq, struct tof_utofu_mbpt *mbpt){ + int i; + + for(i = 0; i < mbpt->nsgents * PAGE_SIZE / sizeof(struct tof_icc_mbpt_entry); i++){ + tof_utofu_disable_mbpt(mbpt, i); + //uintptr_t iova; + //iova = tof_utofu_disable_mbpt(mbpt, i); + //if(iova){ + /* This appears to be doing nothing, see tof_ib_dma_ops->unmap_page */ + //tof_smmu_release_ipa_cq(ucq->tni, ucq->cqid, iova, mbpt->pgsz); + //} + } + + //tof_smmu_iova_unmap_sg(ucq->tni, ucq->cqid, mbpt->sg, mbpt->nsgents); + // Do nothing in McKernel.. + + //for(i = 0; i < mbpt->nsgents; i++){ + // tof_util_free_pages((unsigned long)sg_virt(&mbpt->sg[i]), 0); + //} + ihk_mc_free_pages(mbpt->sg->pages, mbpt->nsgents); + + //tof_util_free(mbpt->sg); + kfree(mbpt->sg); + + //tof_util_free(mbpt); + kfree(mbpt); + dkprintf("%s: mbpt %p freed\n", __func__, mbpt); +} + +static void tof_utofu_enable_steering(struct tof_utofu_cq *ucq, int stag, uintptr_t mbva, size_t length, bool readonly){ + struct tof_icc_steering_entry *steering = &ucq->steering[stag]; + + steering->length = length; + steering->readonly = readonly; + steering->mbva = mbva>>8; + steering->mbid = stag; + dma_wmb(); + steering->enable = 1; +} + +static void tof_utofu_enable_mb(struct tof_utofu_cq *ucq, int stag, uintptr_t iova, uint8_t pgszbits, size_t npages){ + struct tof_icc_mb_entry *mb = &ucq->mb[stag]; + + mb->npage = npages; + mb->ps = TOF_ICC_MB_PS_ENCODE(pgszbits); + mb->ipa = iova>>8; + dma_wmb(); + mb->enable = 1; +} + +static void tof_utofu_trans_mru_delete(struct tof_utofu_cq *ucq, int stag){ + struct tof_utofu_trans_list *mru = ucq->trans.mru; + int prev = mru[stag].prev; + int next = mru[stag].next; + if(prev == TOF_UTOFU_MRU_EMPTY || next == TOF_UTOFU_MRU_EMPTY){ /* already deleted */ + return; + } + if(prev == stag){ /* a single entry */ + ucq->trans.mruhead = TOF_UTOFU_MRU_EMPTY; + }else{ + if(ucq->trans.mruhead == stag){ + ucq->trans.mruhead = next; + } + mru[prev].next = next; + mru[next].prev = prev; + } + mru[stag].prev = TOF_UTOFU_MRU_EMPTY; + mru[stag].next = TOF_UTOFU_MRU_EMPTY; +} + +static void tof_utofu_trans_mru_insert(struct tof_utofu_cq *ucq, int stag, uint8_t pgszbits, struct tof_utofu_mbpt *mbpt){ + struct tof_utofu_trans_list *mru = ucq->trans.mru; + mru[stag].pgszbits = pgszbits; + mru[stag].mbpt = mbpt; + if(ucq->trans.mruhead == TOF_UTOFU_MRU_EMPTY){ + mru[stag].prev = stag; + mru[stag].next = stag; + }else{ + int next = ucq->trans.mruhead; + int prev = mru[next].prev; + mru[stag].prev = prev; + mru[stag].next = next; + mru[prev].next = stag; + mru[next].prev = stag; + } + ucq->trans.mruhead = stag; +} + +static void tof_utofu_trans_update(struct tof_utofu_cq *ucq, int stag, uintptr_t start, size_t len, uint8_t pgszbits, struct tof_utofu_mbpt *mbpt){ + struct tof_trans_table *table = ucq->trans.table; + union { + struct tof_trans_table ent; + uint64_t atomic; + } tmp; + tmp.ent.steering.bits.start = start >> PAGE_SHIFT; + tmp.ent.steering.bits.len = len >> PAGE_SHIFT; + tmp.ent.steering.bits.ps_code = (pgszbits == PAGE_SHIFT)? TOF_STAG_TRANS_PS_CODE_64KB:TOF_STAG_TRANS_PS_CODE_2MB; + //atomic64_set((atomic64_t *)&table[stag], tmp.atomic); + ihk_atomic64_set((ihk_atomic64_t *)&table[stag], tmp.atomic); + + linux_spin_lock(&ucq->trans.mru_lock); + tof_utofu_trans_mru_delete(ucq, stag); + tof_utofu_trans_mru_insert(ucq, stag, pgszbits, mbpt); + linux_spin_unlock(&ucq->trans.mru_lock); +} + + + +static void tof_utofu_trans_disable(struct tof_utofu_cq *ucq, int stag){ + struct tof_trans_table *table = ucq->trans.table; + //atomic64_set((atomic64_t *)&table[stag], 0); + ihk_atomic64_set((ihk_atomic64_t *)&table[stag], 0); + tof_utofu_trans_mru_delete(ucq, stag); +} + +static void tof_utofu_trans_enable(struct tof_utofu_cq *ucq, int stag, uintptr_t start, size_t len, uintptr_t mbptstart, size_t mbptlen, uint8_t pgszbits, struct tof_utofu_mbpt *mbpt){ + struct tof_trans_table *table = ucq->trans.table; + table[stag].mbpt.bits.start = mbptstart >> PAGE_SHIFT; + table[stag].mbpt.bits.len = mbptlen >> PAGE_SHIFT; + table[stag].mbpt.bits.ps_code = (pgszbits == PAGE_SHIFT)? TOF_STAG_TRANS_PS_CODE_64KB:TOF_STAG_TRANS_PS_CODE_2MB; + wmb(); + tof_utofu_trans_update(ucq, stag, start, len, pgszbits, mbpt); +} + +static int tof_utofu_alloc_new_steering(struct tof_utofu_cq *ucq, int stag, uintptr_t start, uintptr_t end, uint8_t pgszbits, uintptr_t plus_mbva, bool readonly){ + uintptr_t mbptstart; + size_t pgsz = (size_t)1 << pgszbits; + size_t npages, mbpt_npages; + uint32_t ix; + int ret; + struct tof_utofu_mbpt *mbpt; + uintptr_t mbva; + + npages = (end - start) >> pgszbits; + mbpt_npages = roundup(npages, PAGE_SIZE / TOF_ICC_MBPT_SIZE); + ret = tof_utofu_calc_mbptstart((int64_t)start, (int64_t)end, mbpt_npages, pgszbits, &mbptstart); + if (ret < 0) { + raw_rc_output(ret); + return ret; + } + + ret = tof_utofu_alloc_mbpt(ucq, mbpt_npages, &mbpt, stag); + if(ret < 0){ + raw_rc_output(ret); + return ret; + } + mbpt->mbptstart = mbptstart; + mbpt->pgsz = pgsz; + + ix = (start - mbptstart) >> pgszbits; + ret = tof_utofu_update_mbpt_entries(ucq, mbpt, start, end, ix, pgsz, readonly); + if (ret < 0) { + raw_rc_output(ret); + //if(ret == -EFAULT){ + // tof_warn(9999, "Founds the overlap MBPT iova. abnormal end. Target TNI=%d CQ=%d Stag[%d] comm=%s pid=%d\n" + // ,ucq->tni, ucq->cqid, stag, current->comm, current->pid); + //} + tof_utofu_free_mbpt(ucq, mbpt); + return ret; + } + + if(plus_mbva == TOF_UTOFU_BLANK_MBVA) { + mbva = 0; + } else { + mbva = start - mbptstart + plus_mbva; + } + //if(tof_utofu_stag_debug & 0x1){ + // tof_info(9999, "[%s] tni=%d cq=%d stag=%d mbva=%ld start=0x%lx end=0x%lx mbptstart=0x%lx npages=%ld mbpt_npages=%ld plus_mbva=%ld pgszbits=%d\n", current->comm, ucq->tni, ucq->cqid, stag, mbva, start, end, mbptstart, npages, mbpt_npages, plus_mbva, pgszbits); + //} + tof_utofu_enable_mb(ucq, stag, mbpt->iova, pgszbits, mbpt_npages); + tof_utofu_enable_steering(ucq, stag, mbva, end - mbptstart - mbva, readonly); + tof_utofu_trans_enable(ucq, stag, start, end - start, mbptstart, mbpt_npages * TOF_ICC_MBPT_SIZE, pgszbits, mbpt); + + return 0; +} + +static void tof_utofu_release_stag(struct tof_utofu_cq *ucq, int stag){ + /* nothing to do */ + /* tof_utofu_reserve_stag() and tof_utofu_release_stag() are in a same ucq_lock region */ + return; +} + +static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned long arg) { + struct tof_utofu_cq *ucq; + struct tof_alloc_stag req; + struct process_vm *vm = cpu_local_var(current)->vm; + bool readonly; + uintptr_t start; + uintptr_t end; + uint8_t pgszbits; + size_t pgsz; + int ret = -ENOTSUPP; + + ucq = container_of(dev, struct tof_utofu_cq, common); + if(!ucq->common.enabled){ + return -EPERM; + } + if(copy_from_user(&req, (void *)arg, sizeof(req)) != 0){ + return -EFAULT; + } + dkprintf("%s: [IN] tni=%d cqid=%d flags=%u stag=%d va=%p len=%llx\n", + __func__, ucq->tni, ucq->cqid, req.flags, req.stag, req.va, req.len); + + if(req.stag < -1 || req.stag >= TOF_UTOFU_SPECIAL_STAG || + req.va == NULL || req.len == 0){ + return -EINVAL; + } + dkprintf("%s: ucq->steering: 0x%lx\n", __func__, ucq->steering); + if(req.stag >= 0 && ucq->steering[req.stag].enable){ + return -EBUSY; + } + + readonly = (req.flags & 1) != 0; + ihk_rwspinlock_read_lock_noirq(&vm->memory_range_lock); + pgszbits = PAGE_SHIFT; + if (req.flags & TOF_UTOFU_ALLOC_STAG_LPG) { + ret = tof_utofu_get_pagesize_locked((uintptr_t)req.va, + req.len, &pgszbits, readonly); + if(ret < 0){ + kprintf("%s: ret: %d\n", __func__, ret); + ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock); + return ret; + } + } + pgsz = (size_t)1 << pgszbits; + start = round_down((uintptr_t)req.va, pgsz); + end = round_up((uintptr_t)req.va + req.len, pgsz); + dkprintf("%s: 0x%lx:%llu, start: 0x%lx, end: 0x%lx, pgsz: %d\n", + __func__, req.va, req.len, start, end, pgsz); + + //down(&ucq->ucq_sem); + if(req.stag < 0){ +#if 1 + /* normal stag */ + int stag; + linux_spin_lock(&ucq->trans.mru_lock); + stag = tof_utofu_trans_search(ucq, start, end, pgszbits, readonly); + linux_spin_unlock(&ucq->trans.mru_lock); + if(stag < 0){ + struct tof_utofu_mbpt *mbpt = NULL; + stag = tof_utofu_reserve_stag(ucq, readonly); + if(stag < 0){ + //up(&ucq->ucq_sem); + ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock); + return -ENOSPC; + } + + /* With tof_utofu_disable_extend, this call does nothing */ + //spin_lock(&ucq->trans.mru_lock); + //mbpt = tof_utofu_mbpt_search(ucq, start, end, readonly, pgszbits); + //spin_unlock(&ucq->trans.mru_lock); + if (mbpt == NULL) { + ret = tof_utofu_alloc_new_steering(ucq, stag, start, end, pgszbits, + TOF_UTOFU_BLANK_MBVA, readonly); + } + //else { + // ret = tof_utofu_extend_steering(ucq, stag, mbpt, start, end, pgszbits, readonly); + //} + if(ret < 0){ + tof_utofu_release_stag(ucq, stag); + } + } + else{ + ret = 0; + } + req.stag = stag; + req.offset = (uintptr_t)req.va - tof_utofu_get_mbpt_start(ucq, stag); +#endif + } + else{ + /* special stag */ + uintptr_t plus_mbva; + if(ucq->steering[req.stag].enable){ + kprintf("%s: ret: %d\n", __func__, -EBUSY); + //up(&ucq->ucq_sem); + ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock); + return -EBUSY; + } + plus_mbva = round_down((uintptr_t)req.va, 256) - start; + ret = tof_utofu_alloc_new_steering(ucq, req.stag, start, end, pgszbits, plus_mbva, readonly); + req.offset = (uintptr_t)req.va & 0xff; + } + + //up(&ucq->ucq_sem); + ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock); + + if(ret == 0){ + if(copy_to_user((void *)arg, &req, sizeof(req)) != 0){ + kprintf("%s: ret: %d\n", __func__, -EFAULT); + ret = -EFAULT; + } + } + + //if(unlikely(tof_utofu_stag_debug & 0x100)){ + // tof_info(9999, "[%s] ucq=%d:%d stag=%d offset=%llu va=%p len=%llu flags=%d\n", + // current->comm, ucq->tni, ucq->cqid, req.stag, req.offset, req.va, req.len, req.flags); + //} + + dkprintf("%s: [OUT] tni=%d cqid=%d stag=%d offset=0x%llx ret=%d\n", + __func__, ucq->tni, ucq->cqid, req.stag, req.offset, ret); + return ret; +} + +static void tof_utofu_mbpt_release(struct kref *kref) +{ + struct tof_utofu_mbpt *mbpt = container_of(kref, struct tof_utofu_mbpt, kref); + //atomic64_inc((atomic64_t *)&kref_free_count); + tof_utofu_free_mbpt(mbpt->ucq, mbpt); +} + +//static struct tof_core_cq tof_core_cq[TOF_ICC_NTNIS][TOF_ICC_NCQS]; +static struct tof_core_cq *tof_core_cq; + +struct tof_core_cq *tof_core_cq_get(int tni, int cqid){ + if((unsigned int)tni >= TOF_ICC_NTNIS || + (unsigned int)cqid >= TOF_ICC_NCQS){ + return NULL; + } + //return tof_core_cq[tni][cqid]; + + // Convert [][] notion into pointer aritmethic + return tof_core_cq + (tni * TOF_ICC_NCQS) + cqid; +} + +static inline void tof_writeq_relaxed(uint64_t val, void *reg, off_t offset){ + writeq_relaxed(val, (char *)reg + offset); +} + +static inline uint64_t tof_readq(void *reg, off_t offset){ + return readq((char *)reg + offset); +} + +static inline bool tof_core_readq_spin(void *reg, off_t offset, uint64_t mask, + uint64_t expect, unsigned long timeout){ + uint64_t val; + unsigned long cyc; + cyc = rdtsc(); + do{ + val = tof_readq(reg, offset); + if(rdtsc() - cyc > timeout){ + return false; + } + }while((val & mask) != expect); + return true; +} + +static int tof_core_cq_cache_flush_timeout_panic_disabled = 1; +static int tof_core_cq_cacheflush_is_cqs_steering_table_bit_disabled = 1; + +#define TOF_CORE_KCQID (TOF_ICC_NCQS - 1) + +static int tof_core_cacheflush_timeout(struct tof_core_cq *timeout_cq){ + int tni, cqid; + + for(tni = 0; tni < TOF_ICC_NTNIS; tni++){ + for(cqid = 0; cqid < TOF_ICC_NCQS; cqid++){ + struct tof_core_cq *cq = tof_core_cq_get(tni, cqid); + + if(cqid == TOF_CORE_KCQID){ + continue; + } + /* write 0 to steering table enable bit of CQS reg -> MRQ RCODE 10h issued */ + if(tof_core_cq_cacheflush_is_cqs_steering_table_bit_disabled){ + tof_writeq_relaxed(0, cq->reg.cqs, TOF_ICC_REG_CQS_STEERING_TABLE_ENABLE); + wmb(); + } + /* send signal */ + //if(tof_core_cq_cacheflush_is_send_signal_enabled){ + // tof_core_irq_handler_cq_user(&cq->irq, TOF_ICC_DUMMY_IRQ_CQS_CACHEFLUSH_TIMEOUT, timeout_cq); + //} + kprintf("%s WARNING: no signal sent.. \n", __func__); + } + } + return 0; +} + +static int tof_core_cq_cache_flush_timeout_sec = 3; +static int tof_core_cq_cache_flush_2nd_timeout_sec = 3600; +int tof_core_cq_cacheflush_timeout_dbg_msg_disabled = 1; + +// Assuming 2 GHz.. +#define TOF_CORE_TIMEOUT_SEC(n) ((n) * 2 * 1000000000) + +int tof_core_cq_cacheflush(int tni, int cqid){ + struct tof_core_cq *cq; + cq = tof_core_cq_get(tni, cqid); + tof_writeq_relaxed(1, cq->reg.cqs, TOF_ICC_REG_CQS_CACHE_FLUSH); + if(!tof_core_readq_spin(cq->reg.cqs, TOF_ICC_REG_CQS_STATUS, + TOF_ICC_REG_CQS_STATUS_CACHE_FLUSH_BUSY, + 0, TOF_CORE_TIMEOUT_SEC(tof_core_cq_cache_flush_timeout_sec))){ + if(likely(tof_core_cq_cache_flush_timeout_panic_disabled)){ + + //tof_warn(2018, "cache flush timeout: tni=%d cqid=%d", tni, cqid); + kprintf("%s: cache flush timeout: tni=%d cqid=%d", __func__, tni, cqid); + + /* cacheflush timeout processing for user CQ in TNI */ + tof_core_cacheflush_timeout(cq); + + /* Check cacheflush status change */ + if(!tof_core_readq_spin(cq->reg.cqs, TOF_ICC_REG_CQS_STATUS, + TOF_ICC_REG_CQS_STATUS_CACHE_FLUSH_BUSY, + 0, TOF_CORE_TIMEOUT_SEC(tof_core_cq_cache_flush_2nd_timeout_sec))){ + //tof_info(9999, "been exceeded cacheflush timeout status check time=%d : tni=%d cqid=%d",tof_core_cq_cache_flush_2nd_timeout_sec,tni,cqid); + //tof_panic(8, "cache flush timeout: tni=%d cqid=%d", tni, cqid); + kprintf("%s: cache flush timeout: tni=%d cqid=%d", __func__, tni, cqid); + panic("cache flush timeout"); + } + else{ + //if(!tof_core_cq_cacheflush_timeout_dbg_msg_disabled){ + // tof_info(9999, "been changed within cacheflush timeout status check time=%d : tni=%d cqid=%d",tof_core_cq_cache_flush_2nd_timeout_sec,tni,cqid); + //} + } + }else{ + //tof_panic(8, "cache flush timeout: tni=%d cqid=%d", tni, cqid); + kprintf("%s: cache flush timeout: tni=%d cqid=%d", __func__, tni, cqid); + panic("cache flush timeout"); + } + } + return 0; +} + +static int tof_utofu_cq_cacheflush(struct tof_utofu_cq *ucq){ + return tof_core_cq_cacheflush(ucq->tni, ucq->cqid); +} + + +static int tof_utofu_free_stag(struct tof_utofu_cq *ucq, int stag){ + if(stag < 0 || stag >= TOF_UTOFU_NUM_STAG(ucq->num_stag) || + ucq->steering == NULL){ + return -EINVAL; + } + if(!(ucq->steering[stag].enable)){ + return -ENOENT; + } + if (!kref_is_mckernel(&ucq->trans.mru[stag].mbpt->kref)) { + kprintf("%s: stag: %d is not an McKernel kref\n", __func__, stag); + return -EINVAL; + } + //if(unlikely(tof_utofu_stag_debug & 0x20)){ + // tof_info(9999, "[%s] ucq=%d:%d stag=%d\n", current->comm, ucq->tni, ucq->cqid, stag); + //} + ucq->steering[stag].enable = 0; + ucq->mb[stag].enable = 0; + tof_utofu_trans_disable(ucq, stag); + dma_wmb(); + tof_utofu_cq_cacheflush(ucq); + kref_put(&ucq->trans.mru[stag].mbpt->kref, tof_utofu_mbpt_release); + ucq->trans.mru[stag].mbpt = NULL; + dkprintf("%s: stag: %d deallocated\n", __func__, stag); + return 0; +} + + +static int tof_utofu_ioctl_free_stags(struct tof_utofu_device *dev, unsigned long arg){ + struct tof_utofu_cq *ucq; + struct tof_free_stags req; + int i, no_free_cnt = 0, ret; + + ucq = container_of(dev, struct tof_utofu_cq, common); + + if(copy_from_user(&req, (void *)arg, sizeof(req)) != 0){ + raw_rc_output(-EFAULT); + return -EFAULT; + } + //tof_log_if("[IN] tni=%d cqid=%d num=%u stags=%p\n", ucq->tni, ucq->cqid, req.num, req.stags); + dkprintf("%: [IN] tni=%d cqid=%d num=%u stags=%p\n", + __func__, ucq->tni, ucq->cqid, req.num, req.stags); + + if(req.num > 1024 || req.stags == NULL){ + return -EINVAL; + } + for(i = 0; i < req.num; i++){ + int stag; + if(copy_from_user(&stag, &req.stags[i], sizeof(stag)) != 0){ + raw_rc_output(-EFAULT); + return -EFAULT; + } + linux_spin_lock(&ucq->trans.mru_lock); + ret = tof_utofu_free_stag(ucq, stag); + linux_spin_unlock(&ucq->trans.mru_lock); + if(ret == 0){ + int result = -1; + if(copy_to_user(&req.stags[i], &result, sizeof(result)) != 0){ + raw_rc_output(-EFAULT); + return -EFAULT; + } + } + else if(ret == -ENOENT){ + no_free_cnt++; + continue; /* continue free tag process */ + } + else{ + req.num = i - no_free_cnt; + if(copy_to_user((void *)arg, &req, sizeof(req)) != 0){ + return -EFAULT; + } + //tof_log_if("[OUT] tni=%d cqid=%d num=%u stags=%p ret=%d no_free_cnt=%d\n", ucq->tni, ucq->cqid, req.num, req.stags, ret, no_free_cnt); + return ret; + } + } + + req.num = i - no_free_cnt; + if(copy_to_user((void *)arg, &req, sizeof(req)) != 0){ + return -EFAULT; + } + //tof_log_if("[OUT] tni=%d cqid=%d num=%u stags=%p no_free_cnt=%d\n", ucq->tni, ucq->cqid, req.num, req.stags, no_free_cnt); + dkprintf("%s: [OUT] tni=%d cqid=%d num=%u stags=%p no_free_cnt=%d\n", + __func__, ucq->tni, ucq->cqid, req.num, req.stags, no_free_cnt); + + return no_free_cnt > 0 ? -ENOENT : 0; +} + +void tof_utofu_release_cq(void *pde_data) +{ + struct tof_utofu_cq *ucq; + int stag; + struct tof_utofu_device *dev; + + dev = (struct tof_utofu_device *)pde_data; + ucq = container_of(dev, struct tof_utofu_cq, common); + + if (!ucq->common.enabled) { + kprintf("%s: UCQ TNI %d, CQ %d is disabled\n", + __func__, ucq->tni, ucq->cqid); + return; + } + + for (stag = 0; stag < TOF_UTOFU_NUM_STAG(ucq->num_stag); stag++) { + linux_spin_lock(&ucq->trans.mru_lock); + tof_utofu_free_stag(ucq, stag); + linux_spin_unlock(&ucq->trans.mru_lock); + } + + kprintf("%s: UCQ (pde: %p) TNI %d, CQ %d\n", + __func__, pde_data, ucq->tni, ucq->cqid); +} + +long tof_utofu_unlocked_ioctl_cq(int fd, + unsigned int cmd, unsigned long arg) { + int ret = -ENOTSUPP; + struct thread *thread = cpu_local_var(current); + struct tof_utofu_device *dev; + + /* ENOTSUPP inidicates proceed with offload */ + if (fd >= MAX_FD_PDE || !thread->proc->fd_pde_data[fd]) { + return -ENOTSUPP; + } + + dev = (struct tof_utofu_device *)thread->proc->fd_pde_data[fd]; + +#if 0 + switch (cmd) { + case TOF_IOCTL_INIT_CQ: + kprintf("%s: TOF_IOCTL_INIT_CQ @ %d\n", __func__, fd); + break; + + case TOF_IOCTL_ALLOC_STAG: + kprintf("%s: TOF_IOCTL_ALLOC_STAG @ %d\n", __func__, fd); + break; + + case TOF_IOCTL_FREE_STAGS: + kprintf("%s: TOF_IOCTL_FREE_STAGS @ %d\n", __func__, fd); + break; + + case TOF_IOCTL_SET_RT_SIGNAL: + kprintf("%s: TOF_IOCTL_SET_RT_SIGNAL @ %d\n", __func__, fd); + break; + + case TOF_IOCTL_GET_PORT_STAT: + kprintf("%s: TOF_IOCTL_GET_PORT_STAT @ %d\n", __func__, fd); + break; + + case TOF_IOCTL_GET_CQ_STAT: + kprintf("%s: TOF_IOCTL_GET_CQ_STAT @ %d\n", __func__, fd); + break; + + case TOF_IOCTL_ENABLE_BCH: + kprintf("%s: TOF_IOCTL_ENABLE_BCH @ %d\n", __func__, fd); + break; + + case TOF_IOCTL_DISABLE_BCH: + kprintf("%s: TOF_IOCTL_DISABLE_BCH @ %d\n", __func__, fd); + break; + + case TOF_IOCTL_SET_SUBNET: + kprintf("%s: TOF_IOCTL_SET_SUBNET @ %d\n", __func__, fd); + break; + + case TOF_IOCTL_REG_USER: + kprintf("%s: TOF_IOCTL_REG_USER @ %d\n", __func__, fd); + break; + + case TOF_IOCTL_NOTIFY_LINKDOWN: + kprintf("%s: TOF_IOCTL_NOTIFY_LINKDOWN @ %d\n", __func__, fd); + break; + + case TOF_IOCTL_LOAD_REGISTER: + kprintf("%s: TOF_IOCTL_LOAD_REGISTER @ %d\n", __func__, fd); + break; + + case TOF_IOCTL_LOAD_RESOURCE: + kprintf("%s: TOF_IOCTL_LOAD_RESOURCE @ %d\n", __func__, fd); + break; + + default: + kprintf("%s: unknown @ %d\n", __func__, fd); + break; + } +#endif + + switch (cmd) { + case TOF_IOCTL_ALLOC_STAG: + ret = tof_utofu_ioctl_alloc_stag(dev, arg); + break; + case TOF_IOCTL_FREE_STAGS: + ret = tof_utofu_ioctl_free_stags(dev, arg); + break; + default: + ret = -ENOTSUPP; + } + + return ret; +} + +extern struct tofu_globals *ihk_mc_get_tofu_globals(void); +void tof_utofu_init_globals(void) +{ + struct tofu_globals *tg = ihk_mc_get_tofu_globals(); + + tof_ib_stag_list = (int16_t *)tg->tof_ib_stag_list_addr; + tof_ib_stag_lock = (ihk_spinlock_t *)tg->tof_ib_stag_lock_addr; + tof_ib_stag_list_Rp_addr = (int *)tg->tof_ib_stag_list_Rp_addr; + tof_ib_stag_list_Wp_addr = (int *)tg->tof_ib_stag_list_Wp_addr; + tof_ib_mbpt_mem = + (struct tof_util_aligned_mem *)tg->tof_ib_mbpt_mem_addr; + tof_ib_steering = + (struct tof_icc_steering_entry *)tg->tof_ib_steering_addr; + tof_ib_mb = + (struct tof_icc_mb_entry *)tg->tof_ib_mb_addr; + tof_core_cq = + (struct tof_core_cq *)tg->tof_core_cq_addr; + + dkprintf("%s: tof_ib_stag_lock: 0x%lx\n", + __func__, tg->tof_ib_stag_lock_addr); + dkprintf("%s: tof_ib_stag_list_Wp_addr: 0x%lx\n", + __func__, tg->tof_ib_stag_list_Wp_addr); + dkprintf("%s: tof_ib_stag_list_Wp: %d\n", + __func__, *((int *)tg->tof_ib_stag_list_Wp_addr)); + kprintf("%s: linux_vmalloc_start: %p\n", __func__, tg->linux_vmalloc_start); + kprintf("Tofu globals initialized.\n"); +} + + +void tof_utofu_finalize(void) +{ + struct tofu_globals *tg = ihk_mc_get_tofu_globals(); + + ihk_mc_clear_kernel_range((void *)tg->linux_vmalloc_start, + (void *)tg->linux_vmalloc_end); +} + diff --git a/lib/include/ihk/lock.h b/lib/include/ihk/lock.h index b10c735c..b7dfa509 100644 --- a/lib/include/ihk/lock.h +++ b/lib/include/ihk/lock.h @@ -292,5 +292,38 @@ void ihk_mc_spinlock_lock(ihk_spinlock_t *, unsigned long *); void ihk_mc_spinlock_unlock(ihk_spinlock_t *, unsigned long *); #endif +/* + * Linux queued_spin_lock compatible spin_lock, without the queue. + */ +#define _Q_LOCKED_OFFSET 0 +#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET) + +#define linux_spin_lock(lock) \ + do { \ + while (!__sync_bool_compare_and_swap( \ + (unsigned int *)lock, 0, \ + _Q_LOCKED_VAL)) { \ + cpu_pause(); \ + } \ + } while (0) + +#define linux_spin_unlock(lock) \ + do { \ + smp_store_release(lock, 0); \ + } while (0) + +#define linux_spin_lock_irqsave(lock, flags) \ + do { \ + flags = cpu_disable_interrupt_save(); \ + linux_spin_lock(lock); \ + } while (0) + +#define linux_spin_unlock_irqrestore(lock, flags) \ + do { \ + linux_spin_unlock(lock); \ + cpu_restore_interrupt(flags); \ + } while (0) + + #endif diff --git a/lib/include/ihk/mm.h b/lib/include/ihk/mm.h index 93c89d3a..2569c860 100644 --- a/lib/include/ihk/mm.h +++ b/lib/include/ihk/mm.h @@ -170,6 +170,7 @@ int ihk_mc_pt_change_page(page_table_t pt, void *virt, enum ihk_mc_pt_attribute); int ihk_mc_pt_clear_page(page_table_t pt, void *virt); int ihk_mc_pt_clear_large_page(page_table_t pt, void *virt); +int ihk_mc_clear_kernel_range(void *start, void *end); int ihk_mc_pt_clear_range(page_table_t pt, struct process_vm *vm, void *start, void *end); int ihk_mc_pt_free_range(page_table_t pt, struct process_vm *vm, @@ -178,6 +179,8 @@ int ihk_mc_pt_change_attr_range(page_table_t pt, void *start, void *end, enum ihk_mc_pt_attribute clrattr, enum ihk_mc_pt_attribute setattr); pte_t *ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, int pgshift, void **pgbasep, size_t *pgsizep, int *p2alignp); +pte_t *ihk_mc_pt_lookup_fault_pte(struct process_vm *vm, void *virt, + int pgshift, void **basep, size_t *sizep, int *p2alignp); int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start, void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr, int pgshift, struct vm_range *range, int overwrite); @@ -221,6 +224,9 @@ int ihk_mc_get_memory_chunk(int id, unsigned long *start, unsigned long *end, int *numa_id); +int ihk_mc_get_memory_chunk_dma_addr(int id, + int tni, int cqid, + uintptr_t *dma_addr); void remote_flush_tlb_cpumask(struct process_vm *vm, unsigned long addr, int cpu_id); diff --git a/lib/include/limits.h b/lib/include/limits.h index 5223889e..7b9c2929 100644 --- a/lib/include/limits.h +++ b/lib/include/limits.h @@ -13,12 +13,35 @@ #ifndef __HEADER_LIMITS #define __HEADER_LIMITS -#define INT_MAX 0x7fffffff -#define INT_MIN -0x80000000 -#define UINT_MAX 0xffffffff -#define LONG_MAX 0x7fffffffffffffffL -#define LONG_MIN -0x8000000000000000L -#define ULONG_MAX 0xffffffffffffffffL +#define USHRT_MAX ((uint16_t)(~0U)) +#define SHRT_MAX ((int16_t)(USHRT_MAX>>1)) +#define SHRT_MIN ((int16_t)(-SHRT_MAX - 1)) +#define INT_MAX ((int)(~0U>>1)) +#define INT_MIN (-INT_MAX - 1) +#define UINT_MAX (~0U) +#define LONG_MAX ((long)(~0UL>>1)) +#define LONG_MIN (-LONG_MAX - 1) +#define ULONG_MAX (~0UL) +#define LLONG_MAX ((long long)(~0ULL>>1)) +#define LLONG_MIN (-LLONG_MAX - 1) +#define ULLONG_MAX (~0ULL) +#define SIZE_MAX (~(size_t)0) +typedef uint64_t phys_addr_t; +#define PHYS_ADDR_MAX (~(phys_addr_t)0) + +#define U8_MAX ((uint8_t)~0U) +#define S8_MAX ((int8_t)(U8_MAX>>1)) +#define S8_MIN ((int8_t)(-S8_MAX - 1)) +#define U16_MAX ((uint16_t)~0U) +#define S16_MAX ((int16_t)(U16_MAX>>1)) +#define S16_MIN ((int16_t)(-S16_MAX - 1)) +#define U32_MAX ((uint32_t)~0U) +#define S32_MAX ((int32_t)(U32_MAX>>1)) +#define S32_MIN ((int32_t)(-S32_MAX - 1)) +#define U64_MAX ((uint64_t)~0ULL) +#define S64_MAX ((int64_t)(U64_MAX>>1)) +#define S64_MIN ((int64_t)(-S64_MAX - 1)) + #define IOV_MAX 1024 #ifndef PATH_MAX diff --git a/lib/include/mc_perf_event.h b/lib/include/mc_perf_event.h index 6ff0836d..94051d51 100644 --- a/lib/include/mc_perf_event.h +++ b/lib/include/mc_perf_event.h @@ -45,7 +45,7 @@ struct perf_event_attr; ((nr) << _IOC_NRSHIFT) | \ ((size) << _IOC_SIZESHIFT)) -#ifndef __KERNEL__ +#ifndef _IOC_TYPECHECK #define _IOC_TYPECHECK(t) (sizeof(t)) #endif diff --git a/tools/crash/mckernel.c b/tools/crash/mckernel.c index 753ba4ec..4af7123a 100644 --- a/tools/crash/mckernel.c +++ b/tools/crash/mckernel.c @@ -1179,6 +1179,7 @@ pgshift_to_string(int pgshift) case 21: return "2M"; case 29: return "512M"; case 30: return "1G"; + case 34: return "16G"; case 39: return "512G"; case 42: return "4T"; case 55: return "32P"; @@ -1645,9 +1646,8 @@ static void cmd_mcinfo(void) { #ifdef x86 - -#endif fprintf(fp, "LINUX_PAGE_OFFSET: 0x%lx\n", LINUX_PAGE_OFFSET); +#endif #ifdef ARM64 fprintf(fp, "V2PHYS_OFFSET: 0x%lx\n", V2PHYS_OFFSET); diff --git a/tools/dwarf-extract-struct/dwarf-extract-struct b/tools/dwarf-extract-struct/dwarf-extract-struct new file mode 100755 index 00000000..620c5157 Binary files /dev/null and b/tools/dwarf-extract-struct/dwarf-extract-struct differ diff --git a/tools/dwarf-extract-struct/dwarf-extract-struct.c b/tools/dwarf-extract-struct/dwarf-extract-struct.c new file mode 100644 index 00000000..7bb5944a --- /dev/null +++ b/tools/dwarf-extract-struct/dwarf-extract-struct.c @@ -0,0 +1,729 @@ +/* + * Trivial dwarf parser to extract part of a struct from debug infos + * + * Author: Dominique Martinet + * License: WTFPLv2 + * + * Canonical source: http://cgit.notk.org/asmadeus/dwarf-extract-struct.git + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include "libdwarf/dwarf.h" +#include "libdwarf/libdwarf.h" + + +static void parse_dwarf(Dwarf_Debug dbg, const char *struct_name, + const char *field_names[], int field_count); +static void find_struct(Dwarf_Debug dbg, Dwarf_Die die, const char *struct_name, + const char *field_names[], int field_count, int level); +static void find_fields(Dwarf_Debug dbg, Dwarf_Die struct_die, Dwarf_Die die, + const char *struct_name, const char *field_names[], + int field_count, int level); +static void print_field(Dwarf_Debug dbg, Dwarf_Die die, const char *field_name, + int pad_num); + +int debug = 0; + +void usage(const char *argv[]) { + fprintf(stderr, "%s debug_file struct_name [field [field...]]\n", + argv[0]); +} + +int main(int argc, const char *argv[]) { + Dwarf_Debug dbg = 0; + int fd = -1; + const char *filepath; + const char *struct_name; + int res = DW_DLV_ERROR; + Dwarf_Error error; + Dwarf_Handler errhand = 0; + Dwarf_Ptr errarg = 0; + + if(argc < 3) { + usage(argv); + exit(1); + } + + filepath = argv[1]; + struct_name = argv[2]; + + fd = open(filepath,O_RDONLY); + if(fd < 0) { + fprintf(stderr, "Failure attempting to open %s\n",filepath); + } + res = dwarf_init(fd, DW_DLC_READ, errhand, errarg, &dbg, &error); + if(res != DW_DLV_OK) { + fprintf(stderr, "Giving up, cannot do DWARF processing\n"); + exit(1); + } + + parse_dwarf(dbg, struct_name, argv + 3, argc - 3); + + res = dwarf_finish(dbg,&error); + if(res != DW_DLV_OK) { + fprintf(stderr, "dwarf_finish failed!\n"); + } + close(fd); + return 0; +} + +static void parse_dwarf(Dwarf_Debug dbg, const char *struct_name, + const char *field_names[], int field_count) { + Dwarf_Bool is_info = 1; + Dwarf_Unsigned cu_length; + Dwarf_Half cu_version; + Dwarf_Off cu_abbrev_offset; + Dwarf_Half cu_pointer_size; + Dwarf_Half cu_offset_size; + Dwarf_Half cu_extension_size; + Dwarf_Sig8 type_signature; + Dwarf_Unsigned type_offset; + Dwarf_Unsigned cu_next_offset; + Dwarf_Error err; + int rc; + + + /* Iterate compile and type units */ + for (is_info = 0; is_info < 2; ++is_info) { + rc = dwarf_next_cu_header_c(dbg, is_info, &cu_length, + &cu_version, &cu_abbrev_offset, &cu_pointer_size, + &cu_offset_size, &cu_extension_size, &type_signature, + &type_offset, &cu_next_offset, &err); + + while (rc != DW_DLV_NO_ENTRY) { + Dwarf_Die die; + + if (rc != DW_DLV_OK) { + fprintf(stderr, "error dwarf_next_cu_header_c: %d %s\n", + rc, dwarf_errmsg(err)); + exit(1); + } + + rc = dwarf_siblingof(dbg, NULL, &die, &err); + if (rc != DW_DLV_OK) { + fprintf(stderr, "first dwarf_siblingof failed: %d %s\n", + rc, dwarf_errmsg(err)); + exit(1); + } + + find_struct(dbg, die, struct_name, field_names, field_count, 0); + + rc = dwarf_next_cu_header_c(dbg, is_info, &cu_length, + &cu_version, &cu_abbrev_offset, &cu_pointer_size, + &cu_offset_size, &cu_extension_size, &type_signature, + &type_offset, &cu_next_offset, &err); + } + } + + fprintf(stderr, "struct %s not found\n", struct_name); + exit(2); +} + +static void find_struct(Dwarf_Debug dbg, Dwarf_Die die, const char *struct_name, + const char *field_names[], int field_count, int level) { + Dwarf_Die next; + Dwarf_Error err; + int rc; + + if (level > 1) + return; + + do { + char *name; + const char *tag_name; + Dwarf_Half tag; + + rc = dwarf_diename(die, &name, &err); + if (rc == DW_DLV_NO_ENTRY) { + name = NULL; + } else if (rc != DW_DLV_OK) { + fprintf(stderr, "dwarf_diename error: %d %s\n", + rc, dwarf_errmsg(err)); + exit(1); + } + + if (debug) { + printf("diename: %s\n", name); + } + + rc = dwarf_tag(die, &tag, &err); + if (rc != DW_DLV_OK) { + fprintf(stderr, "dwarf_tag error: %d %s\n", + rc, dwarf_errmsg(err)); + exit(1); + } + + if (debug) { + rc = dwarf_get_TAG_name(tag, &tag_name); + if (rc != DW_DLV_OK) { + fprintf(stderr, + "dwarf_get_TAG_name error: %d\n", rc); + exit(1); + } + + printf("<%d> %p <%d> %s: %s\n", level, die, tag, + tag_name, name ? name : ""); + } + + rc = dwarf_child(die, &next, &err); + if (rc == DW_DLV_ERROR) { + fprintf(stderr, "dwarf_child error: %d %s\n", + rc, dwarf_errmsg(err)); + exit(1); + } + if (rc == DW_DLV_OK) { + if (tag == DW_TAG_structure_type + && name && strcasecmp(name, struct_name) == 0) { + find_fields(dbg, die, next, struct_name, + field_names, field_count, + level + 1); + fprintf(stderr, + "Found struct %s but it did not have all members given!\nMissing:\n", + struct_name); + for (rc = 0; rc < field_count; rc++) { + if (field_names[rc]) + fprintf(stderr, "%s\n", + field_names[rc]); + } + exit(3); + } + find_struct(dbg, next, struct_name, field_names, + field_count, level + 1); + dwarf_dealloc(dbg, next, DW_DLA_DIE); + } + + + rc = dwarf_siblingof(dbg, die, &next, &err); + dwarf_dealloc(dbg, die, DW_DLA_DIE); + if (name) + dwarf_dealloc(dbg, name, DW_DLA_STRING); + + if (rc != DW_DLV_OK) + break; + + die = next; + } while (die); +} + +static int dwarf_get_offset(Dwarf_Debug dbg, Dwarf_Die die, + int *poffset, Dwarf_Error *perr) { + Dwarf_Attribute attr; + Dwarf_Unsigned offset; + int rc; + + rc = dwarf_attr(die, DW_AT_data_member_location, &attr, perr); + if (rc != DW_DLV_OK) { + return rc; + } + Dwarf_Half form; + rc = dwarf_whatform(attr, &form, perr); + if (rc != DW_DLV_OK) { + fprintf(stderr, "Error getting whatform: %s\n", + dwarf_errmsg(*perr)); + exit(5); + } + if (form == DW_FORM_data1 || form == DW_FORM_data2 + || form == DW_FORM_data2 || form == DW_FORM_data4 + || form == DW_FORM_data8 || form == DW_FORM_udata) { + dwarf_formudata(attr, &offset, 0); + } else if (form == DW_FORM_sdata) { + Dwarf_Signed soffset; + dwarf_formsdata(attr, &soffset, 0); + if (soffset < 0) { + fprintf(stderr, + "unsupported negative offset\n"); + exit(5); + } + offset = (Dwarf_Unsigned) soffset; + } else { + Dwarf_Locdesc **locdescs; + Dwarf_Signed len; + if (dwarf_loclist_n(attr, &locdescs, &len, perr) + == DW_DLV_ERROR) { + fprintf(stderr, "unsupported member offset\n"); + exit(5); + } + if (len != 1 + || locdescs[0]->ld_cents != 1 + || (locdescs[0]->ld_s[0]).lr_atom + != DW_OP_plus_uconst) { + fprintf(stderr, + "unsupported location expression\n"); + exit(5); + } + offset = (locdescs[0]->ld_s[0]).lr_number; + } + dwarf_dealloc(dbg, attr, DW_DLA_ATTR); + + *poffset = (int) offset; + return DW_DLV_OK; +} + +static int dwarf_get_size(Dwarf_Debug dbg, Dwarf_Die die, + int *psize, Dwarf_Error *perr) { + Dwarf_Attribute attr; + Dwarf_Unsigned size; + int rc; + + rc = dwarf_attr(die, DW_AT_byte_size, &attr, perr); + if (rc != DW_DLV_OK) { + return rc; + } + Dwarf_Half form; + rc = dwarf_whatform(attr, &form, perr); + if (rc != DW_DLV_OK) { + fprintf(stderr, "Error getting whatform: %s\n", + dwarf_errmsg(*perr)); + exit(5); + } + if (form == DW_FORM_data1 || form == DW_FORM_data2 + || form == DW_FORM_data2 || form == DW_FORM_data4 + || form == DW_FORM_data8 || form == DW_FORM_udata) { + dwarf_formudata(attr, &size, 0); + } else if (form == DW_FORM_sdata) { + Dwarf_Signed ssize; + dwarf_formsdata(attr, &ssize, 0); + if (ssize < 0) { + fprintf(stderr, + "unsupported negative size\n"); + exit(5); + } + size = (Dwarf_Unsigned) ssize; + } else { + Dwarf_Locdesc **locdescs; + Dwarf_Signed len; + if (dwarf_loclist_n(attr, &locdescs, &len, perr) + == DW_DLV_ERROR) { + fprintf(stderr, "unsupported member size\n"); + exit(5); + } + if (len != 1 + || locdescs[0]->ld_cents != 1 + || (locdescs[0]->ld_s[0]).lr_atom + != DW_OP_plus_uconst) { + fprintf(stderr, + "unsupported location expression\n"); + exit(5); + } + size = (locdescs[0]->ld_s[0]).lr_number; + } + dwarf_dealloc(dbg, attr, DW_DLA_ATTR); + + *psize = (int) size; + return DW_DLV_OK; +} + +static int dwarf_get_arraysize(Dwarf_Debug dbg, Dwarf_Die die, + int *psize, Dwarf_Error *perr) { + Dwarf_Attribute attr; + Dwarf_Unsigned lower_bound, upper_bound; + int rc; + Dwarf_Die child; + Dwarf_Half form; + + rc = dwarf_child(die, &child, perr); + if (rc == DW_DLV_NO_ENTRY) { + fprintf(stderr, + "Could not deref child of array: no entry\n"); + return rc; + } + if (rc != DW_DLV_OK) { + fprintf(stderr, + "Could not get child entry of array: %s\n", + dwarf_errmsg(*perr)); + return rc; + } + + rc = dwarf_attr(child, DW_AT_lower_bound, &attr, perr); + /* Not present? Assume zero */ + if (rc != DW_DLV_OK) { + lower_bound = 0; + goto upper; + } + + rc = dwarf_whatform(attr, &form, perr); + if (rc != DW_DLV_OK) { + fprintf(stderr, "Error getting whatform: %s\n", + dwarf_errmsg(*perr)); + exit(5); + } + + if (form == DW_FORM_data1 || form == DW_FORM_data2 + || form == DW_FORM_data2 || form == DW_FORM_data4 + || form == DW_FORM_data8 || form == DW_FORM_udata) { + dwarf_formudata(attr, &lower_bound, 0); + } else if (form == DW_FORM_sdata) { + Dwarf_Signed ssize; + dwarf_formsdata(attr, &ssize, 0); + if (ssize < 0) { + fprintf(stderr, + "unsupported negative size\n"); + exit(5); + } + lower_bound = (Dwarf_Unsigned) ssize; + } else { + Dwarf_Locdesc **locdescs; + Dwarf_Signed len; + if (dwarf_loclist_n(attr, &locdescs, &len, perr) + == DW_DLV_ERROR) { + fprintf(stderr, "unsupported member size\n"); + exit(5); + } + if (len != 1 + || locdescs[0]->ld_cents != 1 + || (locdescs[0]->ld_s[0]).lr_atom + != DW_OP_plus_uconst) { + fprintf(stderr, + "unsupported location expression\n"); + exit(5); + } + lower_bound = (locdescs[0]->ld_s[0]).lr_number; + } + dwarf_dealloc(dbg, attr, DW_DLA_ATTR); + +upper: + rc = dwarf_attr(child, DW_AT_upper_bound, &attr, perr); + if (rc != DW_DLV_OK) { + return rc; + } + + rc = dwarf_whatform(attr, &form, perr); + if (rc != DW_DLV_OK) { + fprintf(stderr, "Error getting whatform: %s\n", + dwarf_errmsg(*perr)); + exit(5); + } + + if (form == DW_FORM_data1 || form == DW_FORM_data2 + || form == DW_FORM_data2 || form == DW_FORM_data4 + || form == DW_FORM_data8 || form == DW_FORM_udata) { + dwarf_formudata(attr, &upper_bound, 0); + } else if (form == DW_FORM_sdata) { + Dwarf_Signed ssize; + dwarf_formsdata(attr, &ssize, 0); + if (ssize < 0) { + fprintf(stderr, + "unsupported negative size\n"); + exit(5); + } + upper_bound = (Dwarf_Unsigned) ssize; + } else { + Dwarf_Locdesc **locdescs; + Dwarf_Signed len; + if (dwarf_loclist_n(attr, &locdescs, &len, perr) + == DW_DLV_ERROR) { + fprintf(stderr, "unsupported member size\n"); + exit(5); + } + if (len != 1 + || locdescs[0]->ld_cents != 1 + || (locdescs[0]->ld_s[0]).lr_atom + != DW_OP_plus_uconst) { + fprintf(stderr, + "unsupported location expression\n"); + exit(5); + } + upper_bound = (locdescs[0]->ld_s[0]).lr_number; + } + dwarf_dealloc(dbg, attr, DW_DLA_ATTR); + + *psize = ((int)upper_bound - (int)lower_bound + 1); + return DW_DLV_OK; +} + + + +static int deref_type(Dwarf_Debug dbg, Dwarf_Die type_die, + Dwarf_Die *new_type_die, Dwarf_Half *ptype_tag, + Dwarf_Error *perr) { + Dwarf_Attribute pointer_attr; + Dwarf_Off pointer_off; + int rc; + + rc = dwarf_attr(type_die, DW_AT_type, &pointer_attr, + perr); + if (rc != DW_DLV_OK) + return rc; + + rc = dwarf_global_formref(pointer_attr, &pointer_off, + perr); + if (rc != DW_DLV_OK) + return rc; + + rc = dwarf_offdie_b(dbg, pointer_off, 1, new_type_die, + perr); + if (rc != DW_DLV_OK) + return rc; + + dwarf_dealloc(dbg, pointer_attr, DW_DLA_ATTR); + + if (ptype_tag) + rc = dwarf_tag(*new_type_die, ptype_tag, perr); + + return rc; +} + +static void find_fields(Dwarf_Debug dbg, Dwarf_Die struct_die, Dwarf_Die die, + const char *struct_name, const char *field_names[], + int field_count, int level) { + Dwarf_Die next; + Dwarf_Error err; + int rc, i, printed_count = 0; + int size; + + printf("struct %s {\n\tunion {\n", + struct_name); + + rc = dwarf_get_size(dbg, struct_die, &size, &err); + if (rc != DW_DLV_OK) { + fprintf(stderr, "could not get size for struct %s: %s\n", + struct_name, dwarf_errmsg(err)); + exit(1); + } + printf("\t\tchar whole_struct[%d];\n", size); + + do { + char *name; + const char *tag_name; + Dwarf_Half tag; + + rc = dwarf_diename(die, &name, &err); + if (rc == DW_DLV_NO_ENTRY) { + name = NULL; + } else if (rc != DW_DLV_OK) { + fprintf(stderr, "dwarf_diename error: %d %s\n", + rc, dwarf_errmsg(err)); + exit(1); + } + + rc = dwarf_tag(die, &tag, &err); + if (rc != DW_DLV_OK) { + fprintf(stderr, "dwarf_tag error: %d %s\n", + rc, dwarf_errmsg(err)); + exit(1); + } + + if (debug) { + rc = dwarf_get_TAG_name(tag, &tag_name); + if (rc != DW_DLV_OK) { + fprintf(stderr, + "dwarf_get_TAG_name error: %d\n", rc); + exit(1); + } + + printf("<%d> %p <%d> %s: %s\n", level, die, tag, + tag_name, name ? name : ""); + } + + if (tag == DW_TAG_member && name) { + for (i = 0; i < field_count; i++) { + if (!field_names[i]) + continue; + if (strcasecmp(name, field_names[i]) == 0) { + print_field(dbg, die, field_names[i], + printed_count); + field_names[i] = NULL; + printed_count++; + break; + } + } + if (printed_count == field_count) { + printf("\t};\n};\n"); + exit(0); + } + } + + rc = dwarf_siblingof(dbg, die, &next, &err); + dwarf_dealloc(dbg, die, DW_DLA_DIE); + if (name) + dwarf_dealloc(dbg, name, DW_DLA_STRING); + + if (rc != DW_DLV_OK) + break; + + die = next; + } while (die); +} + +static void print_field(Dwarf_Debug dbg, Dwarf_Die die, const char *field_name, + int padnum) { + Dwarf_Attribute attr; + Dwarf_Error err; + int offset = 0; + char type_buf[1024]; + char array_buf[128] = ""; + char pointer_buf[128] = ""; + int rc; + + rc = dwarf_get_offset(dbg, die, &offset, &err); + if (rc == DW_DLV_NO_ENTRY) { + fprintf(stderr, "Found %s but no offset, assuming 0\n", + field_name); + } else if (rc != DW_DLV_OK) { + fprintf(stderr, "Error getting dwarf attr offset: %s\n", + dwarf_errmsg(err)); + exit(4); + } + + rc = dwarf_attr(die, DW_AT_type, &attr, &err); + if (rc == DW_DLV_NO_ENTRY) { + fprintf(stderr, + "Found %s but no type, can't assume that one out..\n", + field_name); + exit(6); + } else if (rc != DW_DLV_OK) { + fprintf(stderr, "Error getting dwarf attrlist: %s\n", + dwarf_errmsg(err)); + exit(6); + } else { + Dwarf_Die type_die, next; + Dwarf_Off type_off; + Dwarf_Half type_tag; + char *type_name; + int pointer = 0; + int embeded_struct = 0; + + rc = dwarf_global_formref(attr, &type_off, &err); + if (rc != DW_DLV_OK) { + fprintf(stderr, + "Error getting ref offset for type: %s\n", + dwarf_errmsg(err)); + exit(7); + } + + rc = dwarf_offdie_b(dbg, type_off, 1, &type_die, &err); + if (rc != DW_DLV_OK) { + fprintf(stderr, + "Error getting die from offset for type: %s\n", + dwarf_errmsg(err)); + exit(7); + } + + rc = dwarf_tag(type_die, &type_tag, &err); + if (rc != DW_DLV_OK) { + fprintf(stderr, "dwarf_tag error: %d %s\n", + rc, dwarf_errmsg(err)); + exit(7); + } + + while (type_tag == DW_TAG_pointer_type) { + pointer_buf[pointer++] = '*'; + + rc = deref_type(dbg, type_die, &next, + &type_tag, &err); + /* No entry here means void* */ + if (rc == DW_DLV_NO_ENTRY) + break; + + if (rc != DW_DLV_OK) { + fprintf(stderr, + "Could not deref type for %s: %s\n", + field_name, dwarf_errmsg(err)); + exit(7); + } + + dwarf_dealloc(dbg, type_die, DW_DLA_DIE); + type_die = next; + } + + if (type_tag == DW_TAG_array_type) { + int next_offset, size; + + rc = deref_type(dbg, type_die, &next, + &type_tag, &err); + if (rc == DW_DLV_NO_ENTRY) { + fprintf(stderr, + "Could not deref array type for %s: no entry\n", + field_name); + exit(7); + } + if (rc != DW_DLV_OK) { + fprintf(stderr, + "Could not deref type for %s: %s\n", + field_name, dwarf_errmsg(err)); + exit(7); + } + + rc = dwarf_get_arraysize(dbg, type_die, &size, &err); + if (rc != DW_DLV_OK) { + fprintf(stderr, + "Could not get array size for %s: %s\n", + field_name, dwarf_errmsg(err)); + exit(7); + } + type_die = next; + + snprintf(array_buf, 128, "[%d]", size); + } + + /* If it's still pointer at this point, it's void * */ + if (type_tag != DW_TAG_pointer_type) { + rc = dwarf_diename(type_die, &type_name, &err); + if (rc != DW_DLV_OK) { +#if 0 + fprintf(stderr, "dwarf_diename error: %s\n", + rc == DW_DLV_NO_ENTRY ? + "no name" : dwarf_errmsg(err)); + const char *tag_name; + + rc = dwarf_get_TAG_name(type_tag, &tag_name); + if (rc != DW_DLV_OK) { + fprintf(stderr, + "dwarf_get_TAG_name error: %d\n", + rc); + } + + fprintf(stderr, "Bad tag %s (%d)?\n", + tag_name, type_tag); + exit(7); +#endif + if (rc == DW_DLV_NO_ENTRY) { + embeded_struct = 1; + } + } + } + + if (type_tag == DW_TAG_structure_type) { + snprintf(type_buf, 1024, "struct %s %s", + embeded_struct ? "FILL_IN_MANUALLY" : type_name, pointer_buf); + } else if (type_tag == DW_TAG_enumeration_type) { + snprintf(type_buf, 1024, "enum %s %s", + type_name, pointer_buf); + } else if (type_tag == DW_TAG_base_type + || type_tag == DW_TAG_typedef) { + snprintf(type_buf, 1024, "%s %s", type_name, + pointer_buf); + } else if (type_tag == DW_TAG_pointer_type) { + snprintf(type_buf, 1024, "void %s", pointer_buf); + } else { + const char *tag_name; + + rc = dwarf_get_TAG_name(type_tag, &tag_name); + if (rc != DW_DLV_OK) { + fprintf(stderr, + "dwarf_get_TAG_name error: %d\n", rc); + } + + fprintf(stderr, + "Type tag %s (%d) is not implemented, please add it\n", + tag_name, type_tag); + exit(7); + } + + if (type_tag != DW_TAG_pointer_type) + dwarf_dealloc(dbg, type_name, DW_DLA_STRING); + dwarf_dealloc(dbg, attr, DW_DLA_ATTR); + dwarf_dealloc(dbg, type_die, DW_DLA_DIE); + } + + printf("\t\tstruct {\n\t\t\tchar padding%i[%u];\n\t\t\t%s%s%s;\n\t\t};\n", + padnum, (unsigned int) offset, + type_buf, field_name, array_buf); +}