Compare commits
161 Commits
developmen
...
1.5.1-knl+
| Author | SHA1 | Date | |
|---|---|---|---|
| 8f117cc0dc | |||
| 0b9a657a01 | |||
| c2d6651cd2 | |||
| d979444049 | |||
| faa357d5a6 | |||
| 653aba17a1 | |||
| 7736e25ca4 | |||
| 73d16a9d79 | |||
| 922bd7e6eb | |||
| 0d99072109 | |||
| 3ced3f6080 | |||
| d9ff940528 | |||
| cd63ec877d | |||
| 6c0bb9e576 | |||
| ca9894108b | |||
| 3f26e44f85 | |||
| bacfb0c2b9 | |||
| 09f63483cc | |||
| 2f0c2aae9e | |||
| f7b277a623 | |||
| a3aa96af19 | |||
| 91d732308d | |||
| 166c6105ef | |||
| 5a2f8388a6 | |||
| 8164b63fc2 | |||
| af22ce62d2 | |||
| 2eca75ead8 | |||
| 22992780cf | |||
| 3043591e9a | |||
| 7e7c0f9ed3 | |||
| 7193f165cc | |||
| c8c42576fd | |||
| 0412e1fcc6 | |||
| 238e346586 | |||
| 0e57c715ad | |||
| 3facd3dcca | |||
| ec5328de69 | |||
| 880dd6ddb2 | |||
| 898708b8b4 | |||
| b08331b21a | |||
| c196c996dd | |||
| 20e179f6dc | |||
| 32fbc015f5 | |||
| 558c250bb3 | |||
| 96ea2d3658 | |||
| 9c91298ccf | |||
| b08da83a51 | |||
| fcc8310454 | |||
| 96b8b30516 | |||
| 521e0dc707 | |||
| e2e773d883 | |||
| 04d22d90a3 | |||
| f6405081a6 | |||
| 5bea237581 | |||
| 33ad55e72b | |||
| 6848c2ecf7 | |||
| 79f9a2d31a | |||
| 2900ce20f7 | |||
| 002b78372d | |||
| 5fce5e4e3c | |||
| 7a1ad31183 | |||
| 54bdb3419d | |||
| 03fed4d1c8 | |||
| 6279f69f5c | |||
| 6959d5ead4 | |||
| a5aa68744f | |||
| 89c5aaa9e9 | |||
| 15422d886f | |||
| f139bef0cb | |||
| de82cf8779 | |||
| 662895c020 | |||
| d23939da8c | |||
| 67529f21ff | |||
| 5c11ff0950 | |||
| ce4eb0d409 | |||
| 04434320fc | |||
| 50fafa6d71 | |||
| f5ced648ef | |||
| 0f8f88ca46 | |||
| e99f19e812 | |||
| 9a36e5d213 | |||
| 4816f27639 | |||
| 9c0b8aa812 | |||
| 23f178d718 | |||
| 159c18b98b | |||
| 1847a3ac11 | |||
| 15b16ffbbb | |||
| e64d89cd48 | |||
| 7366da4390 | |||
| 2dc85ee417 | |||
| 73cc07f98e | |||
| 815e2244ca | |||
| 163af73554 | |||
| fd316f3ca3 | |||
| 122588bc4d | |||
| 70238982c2 | |||
| 5b5191ef64 | |||
| a65faeaed4 | |||
| 4dea1842e0 | |||
| 5353b11f90 | |||
| abdbf96254 | |||
| bd170e63ba | |||
| d35fa16417 | |||
| 6406a0df6b | |||
| 52e8f03b4b | |||
| b071a3f32c | |||
| 90258f00bd | |||
| 28eb649056 | |||
| 744ebacf65 | |||
| 62e438a0aa | |||
| 5ac582a678 | |||
| 51bc28acca | |||
| c43654d69b | |||
| c1d2db6a73 | |||
| aeef55d1b0 | |||
| 6e289e8d9f | |||
| 3b5363c533 | |||
| 60f6862db2 | |||
| 39deff4e10 | |||
| 7f03c18d4d | |||
| 640dba627f | |||
| ae368d97d4 | |||
| 99c216d91e | |||
| 3c357dc30a | |||
| 37866e61ab | |||
| 076e6b9b12 | |||
| fa6db686b4 | |||
| 74a636a612 | |||
| 1c4a6568e6 | |||
| 7d2e2f93b0 | |||
| 7005110697 | |||
| c4ca4ae3ab | |||
| b024a486b9 | |||
| fe4c461f2f | |||
| b60a980088 | |||
| ec66229063 | |||
| b875b5186f | |||
| 5cf884ef41 | |||
| 64e2639adc | |||
| 14b360e867 | |||
| 4a0e389953 | |||
| 34363c2b68 | |||
| 8a1d756cb1 | |||
| e36abe57e7 | |||
| b2c8cc50dc | |||
| b9b4a4fe36 | |||
| 4b652c9353 | |||
| 60ac94cbb9 | |||
| 42bbf5f2a4 | |||
| e29a40331d | |||
| 655de2cd82 | |||
| 205747594b | |||
| 21f9a1ea33 | |||
| aed099fbcb | |||
| 48515970a0 | |||
| b888f31b30 | |||
| 7982008b5b | |||
| f658173269 | |||
| ca7edf1df8 | |||
| 9a5f3ad4e6 | |||
| cfbab0ee82 |
1
.gitignore
vendored
1
.gitignore
vendored
@ -14,3 +14,4 @@ elfboot/elfboot_test
|
||||
linux/executer/mcexec
|
||||
linux/mod_test*
|
||||
linux/target
|
||||
kernel/script/dwarf-extract-struct
|
||||
|
||||
@ -1,5 +1,6 @@
|
||||
TARGET = @TARGET@
|
||||
SBINDIR = @SBINDIR@
|
||||
BINDIR = @BINDIR@
|
||||
INCDIR = @INCDIR@
|
||||
ETCDIR = @ETCDIR@
|
||||
MANDIR = @MANDIR@
|
||||
@ -47,6 +48,7 @@ install:
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86_64/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
|
||||
install -m 755 arch/x86_64/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
|
||||
install -m 755 arch/x86_64/tools/mpimcexec $(BINDIR)/mpimcexec; \
|
||||
install -m 755 arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh $(SBINDIR)/mcoverlay-destroy.sh; \
|
||||
install -m 755 arch/x86_64/tools/mcoverlay-create-smp-x86.sh $(SBINDIR)/mcoverlay-create.sh; \
|
||||
install -m 755 arch/x86_64/tools/eclair-dump-backtrace.exp $(SBINDIR)/eclair-dump-backtrace.exp;\
|
||||
@ -57,6 +59,7 @@ install:
|
||||
install -m 644 kernel/include/swapfmt.h $(INCDIR); \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86_64/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
install -m 644 arch/x86_64/tools/mpimcexec.1 $(MANDIR)/man1/mpimcexec.1; \
|
||||
;; \
|
||||
*) \
|
||||
echo "unknown target $(TARGET)" >&2 \
|
||||
|
||||
@ -1225,6 +1225,13 @@ void cpu_pause(void)
|
||||
asm volatile("pause" ::: "memory");
|
||||
}
|
||||
|
||||
/* From: kernel-xppsl_1.5.2/arch/x86/include/asm/processor.h */
|
||||
/* REP NOP (PAUSE) is a good thing to insert into busy-wait loops. */
|
||||
void cpu_relax(void)
|
||||
{
|
||||
asm volatile("rep; nop" ::: "memory");
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ ensures \interrupt_disabled > 0;
|
||||
|
||||
@ -14,7 +14,17 @@
|
||||
int __kprintf(const char *format, ...);
|
||||
#endif
|
||||
|
||||
typedef int ihk_spinlock_t;
|
||||
typedef unsigned short __ticket_t;
|
||||
typedef unsigned int __ticketpair_t;
|
||||
|
||||
typedef struct ihk_spinlock {
|
||||
union {
|
||||
__ticketpair_t head_tail;
|
||||
struct __raw_tickets {
|
||||
__ticket_t head, tail;
|
||||
} tickets;
|
||||
};
|
||||
} ihk_spinlock_t;
|
||||
|
||||
extern void preempt_enable(void);
|
||||
extern void preempt_disable(void);
|
||||
@ -23,9 +33,9 @@ extern void preempt_disable(void);
|
||||
|
||||
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
|
||||
{
|
||||
*lock = 0;
|
||||
lock->head_tail = 0;
|
||||
}
|
||||
#define SPIN_LOCK_UNLOCKED 0
|
||||
#define SPIN_LOCK_UNLOCKED { .head_tail = 0 }
|
||||
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
#define ihk_mc_spinlock_lock_noirq(l) { \
|
||||
@ -39,40 +49,24 @@ __kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
|
||||
|
||||
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
|
||||
{
|
||||
int inc = 0x00010000;
|
||||
int tmp;
|
||||
|
||||
#if 0
|
||||
asm volatile("lock ; xaddl %0, %1\n"
|
||||
"movzwl %w0, %2\n\t"
|
||||
"shrl $16, %0\n\t"
|
||||
"1:\t"
|
||||
"cmpl %0, %2\n\t"
|
||||
"je 2f\n\t"
|
||||
"rep ; nop\n\t"
|
||||
"movzwl %1, %2\n\t"
|
||||
"jmp 1b\n"
|
||||
"2:"
|
||||
: "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc");
|
||||
#endif
|
||||
register struct __raw_tickets inc = { .tail = 0x0002 };
|
||||
|
||||
preempt_disable();
|
||||
|
||||
asm volatile("lock; xaddl %0, %1\n"
|
||||
"movzwl %w0, %2\n\t"
|
||||
"shrl $16, %0\n\t"
|
||||
"1:\t"
|
||||
"cmpl %0, %2\n\t"
|
||||
"je 2f\n\t"
|
||||
"rep ; nop\n\t"
|
||||
"movzwl %1, %2\n\t"
|
||||
/* don't need lfence here, because loads are in-order */
|
||||
"jmp 1b\n"
|
||||
"2:"
|
||||
: "+r" (inc), "+m" (*lock), "=&r" (tmp)
|
||||
:
|
||||
: "memory", "cc");
|
||||
asm volatile ("lock xaddl %0, %1\n"
|
||||
: "+r" (inc), "+m" (*(lock)) : : "memory", "cc");
|
||||
|
||||
if (inc.head == inc.tail)
|
||||
goto out;
|
||||
|
||||
for (;;) {
|
||||
if (*((volatile __ticket_t *)&lock->tickets.head) == inc.tail)
|
||||
goto out;
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
out:
|
||||
barrier(); /* make sure nothing creeps before the lock is taken */
|
||||
}
|
||||
|
||||
#ifdef DEBUG_SPINLOCK
|
||||
@ -106,8 +100,11 @@ __kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id());
|
||||
#endif
|
||||
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
|
||||
{
|
||||
asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc");
|
||||
|
||||
__ticket_t inc = 0x0002;
|
||||
|
||||
asm volatile ("lock addw %1, %0\n"
|
||||
: "+m" (lock->tickets.head) : "ri" (inc) : "memory", "cc");
|
||||
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
|
||||
@ -40,18 +40,42 @@
|
||||
#define LARGE_PAGE_MASK (~((unsigned long)LARGE_PAGE_SIZE - 1))
|
||||
#define LARGE_PAGE_P2ALIGN (LARGE_PAGE_SHIFT - PAGE_SHIFT)
|
||||
|
||||
#define GB_PAGE_SHIFT 30
|
||||
#define GB_PAGE_SIZE (1UL << GB_PAGE_SHIFT)
|
||||
#define GB_PAGE_MASK (~((unsigned long)GB_PAGE_SIZE - 1))
|
||||
#define GB_PAGE_P2ALIGN (GB_PAGE_SHIFT - PAGE_SHIFT)
|
||||
|
||||
|
||||
#define USER_END 0x0000800000000000UL
|
||||
#define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL
|
||||
|
||||
/*
|
||||
* Canonical negative addresses (i.e., the smallest kernel virtual address)
|
||||
* on x86 64 bit mode (in its most restricted 48 bit format) starts from
|
||||
* 0xffff800000000000, but Linux starts mapping physical memory at 0xffff880000000000.
|
||||
* The 0x80000000000 long gap (8TBs, i.e., 16 PGD level entries in the page tables)
|
||||
* is used for Xen hyervisor (see arch/x86/include/asm/page.h) and that is
|
||||
* what we utilize for McKernel.
|
||||
* This gives us the benefit of being able to use Linux kernel virtual
|
||||
* addresses identically as in Linux.
|
||||
*
|
||||
* NOTE: update these also in eclair.c when modified!
|
||||
*/
|
||||
#define MAP_ST_START 0xffff800000000000UL
|
||||
#define MAP_VMAP_START 0xfffff00000000000UL
|
||||
#define MAP_FIXED_START 0xffffffff70000000UL
|
||||
#define MAP_KERNEL_START 0xffffffff80000000UL
|
||||
#define MAP_VMAP_START 0xffff850000000000UL
|
||||
#define MAP_FIXED_START 0xffff860000000000UL
|
||||
#define LINUX_PAGE_OFFSET 0xffff880000000000UL
|
||||
/*
|
||||
* MAP_KERNEL_START is 8MB below MODULES_END in Linux.
|
||||
* Placing the LWK image in the virtual address space at the end of
|
||||
* the Linux modules section enables us to map the LWK TEXT in Linux
|
||||
* as well, so that Linux can also call into LWK text.
|
||||
*/
|
||||
#define MAP_KERNEL_START 0xFFFFFFFFFE800000UL
|
||||
#define STACK_TOP(region) ((region)->user_end)
|
||||
|
||||
#define MAP_VMAP_SIZE 0x0000000100000000UL
|
||||
|
||||
#define KERNEL_PHYS_OFFSET MAP_ST_START
|
||||
|
||||
#define PTL4_SHIFT 39
|
||||
#define PTL4_SIZE (1UL << PTL4_SHIFT)
|
||||
#define PTL3_SHIFT 30
|
||||
|
||||
@ -133,7 +133,7 @@ static inline void ihk_atomic64_inc(ihk_atomic64_t *v)
|
||||
* Note 2: xchg has side effect, so that attribute volatile is necessary,
|
||||
* but generally the primitive is invalid, *ptr is output argument. --ANK
|
||||
*/
|
||||
#define __xg(x) ((volatile long *)(x))
|
||||
#define __xg(x) ((volatile typeof(x))(x))
|
||||
|
||||
#define xchg4(ptr, x) \
|
||||
({ \
|
||||
|
||||
@ -39,7 +39,7 @@ SYSCALL_HANDLED(15, rt_sigreturn)
|
||||
SYSCALL_HANDLED(16, ioctl)
|
||||
SYSCALL_DELEGATED(17, pread64)
|
||||
SYSCALL_DELEGATED(18, pwrite64)
|
||||
SYSCALL_DELEGATED(20, writev)
|
||||
SYSCALL_HANDLED(20, writev)
|
||||
SYSCALL_DELEGATED(21, access)
|
||||
SYSCALL_DELEGATED(23, select)
|
||||
SYSCALL_HANDLED(24, sched_yield)
|
||||
|
||||
@ -107,9 +107,17 @@ void init_boot_processor_local(void)
|
||||
@ ensures \result == %gs;
|
||||
@ assigns \nothing;
|
||||
*/
|
||||
extern int num_processors;
|
||||
int ihk_mc_get_processor_id(void)
|
||||
{
|
||||
int id;
|
||||
void *gs;
|
||||
|
||||
gs = (void *)rdmsr(MSR_GS_BASE);
|
||||
if (gs < (void *)locals ||
|
||||
gs > ((void *)locals + LOCALS_SPAN * num_processors)) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
asm volatile("movl %%gs:0, %0" : "=r"(id));
|
||||
|
||||
|
||||
@ -41,6 +41,8 @@ extern char _head[], _end[];
|
||||
|
||||
extern unsigned long x86_kernel_phys_base;
|
||||
|
||||
int safe_kernel_map = 0;
|
||||
|
||||
/* Arch specific early allocation routine */
|
||||
void *early_alloc_pages(int nr_pages)
|
||||
{
|
||||
@ -109,6 +111,7 @@ struct page_table {
|
||||
};
|
||||
|
||||
static struct page_table *init_pt;
|
||||
static int init_pt_loaded = 0;
|
||||
static ihk_spinlock_t init_pt_lock;
|
||||
|
||||
static int use_1gb_page = 0;
|
||||
@ -172,19 +175,23 @@ static void init_normal_area(struct page_table *pt)
|
||||
unsigned long map_start, map_end, phys, pt_phys;
|
||||
int ident_index, virt_index;
|
||||
|
||||
map_start = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_START, 0);
|
||||
/*
|
||||
* This has to start from 0x00, see load_file() in IHK-SMP.
|
||||
* For security reasons, we could skip holes in the LWK
|
||||
* assigned physical memory, but Linux mappings already map
|
||||
* those anyway.
|
||||
*/
|
||||
map_start = 0;
|
||||
map_end = ihk_mc_get_memory_address(IHK_MC_GMA_MAP_END, 0);
|
||||
|
||||
kprintf("map_start = %lx, map_end = %lx\n", map_start, map_end);
|
||||
ident_index = map_start >> PTL4_SHIFT;
|
||||
virt_index = (MAP_ST_START >> PTL4_SHIFT) & (PT_ENTRIES - 1);
|
||||
|
||||
memset(pt, 0, sizeof(struct page_table));
|
||||
|
||||
for (phys = (map_start & ~(PTL4_SIZE - 1)); phys < map_end;
|
||||
phys += PTL4_SIZE) {
|
||||
pt_phys = setup_l3(ihk_mc_alloc_pages(1, IHK_MC_AP_CRITICAL), phys,
|
||||
map_start, map_end);
|
||||
for (phys = map_start; phys < map_end; phys += PTL4_SIZE) {
|
||||
pt_phys = setup_l3(ihk_mc_alloc_pages(1, IHK_MC_AP_CRITICAL),
|
||||
phys, map_start, map_end);
|
||||
|
||||
pt->entry[ident_index++] = pt_phys | PFL4_PDIR_ATTR;
|
||||
pt->entry[virt_index++] = pt_phys | PFL4_PDIR_ATTR;
|
||||
@ -724,6 +731,26 @@ static void destroy_page_table(int level, struct page_table *pt)
|
||||
return;
|
||||
}
|
||||
|
||||
void ihk_mc_pt_destroy_pgd_subtree(struct page_table *pt, void *virt)
|
||||
{
|
||||
int l4idx, l3idx, l2idx, l1idx;
|
||||
unsigned long v = (unsigned long)virt;
|
||||
struct page_table *lower;
|
||||
|
||||
GET_VIRT_INDICES(v, l4idx, l3idx, l2idx, l1idx);
|
||||
|
||||
if (!(pt->entry[l4idx] & PF_PRESENT))
|
||||
return;
|
||||
|
||||
lower = (struct page_table *)
|
||||
phys_to_virt(pt->entry[l4idx] & PT_PHYSMASK);
|
||||
destroy_page_table(3, lower);
|
||||
|
||||
pt->entry[l4idx] = 0;
|
||||
dkprintf("%s: virt: 0x%lx, l4idx: %d subtree destroyed\n",
|
||||
__FUNCTION__, virt, l4idx);
|
||||
}
|
||||
|
||||
void ihk_mc_pt_destroy(struct page_table *pt)
|
||||
{
|
||||
const int level = 4; /* PML4 */
|
||||
@ -1960,6 +1987,28 @@ out:
|
||||
return ptep;
|
||||
}
|
||||
|
||||
pte_t *ihk_mc_pt_lookup_fault_pte(struct process_vm *vm, void *virt,
|
||||
int pgshift, void **basep, size_t *sizep, int *p2alignp)
|
||||
{
|
||||
int faulted = 0;
|
||||
pte_t *ptep;
|
||||
|
||||
retry:
|
||||
ptep = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
|
||||
virt, pgshift, basep, sizep, p2alignp);
|
||||
if (!faulted && (!ptep || !pte_is_present(ptep))) {
|
||||
page_fault_process_vm(vm, virt, PF_POPULATE | PF_USER);
|
||||
faulted = 1;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (faulted && ptep && pte_is_present(ptep)) {
|
||||
kprintf("%s: successfully faulted 0x%lx\n", __FUNCTION__, virt);
|
||||
}
|
||||
|
||||
return ptep;
|
||||
}
|
||||
|
||||
pte_t *ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, int pgshift,
|
||||
void **basep, size_t *sizep, int *p2alignp)
|
||||
{
|
||||
@ -2259,7 +2308,7 @@ out:
|
||||
|
||||
int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start,
|
||||
void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr,
|
||||
int pgshift, struct vm_range *range)
|
||||
int pgshift, struct vm_range *range)
|
||||
{
|
||||
int error;
|
||||
struct set_range_args args;
|
||||
@ -2603,6 +2652,61 @@ void init_low_area(struct page_table *pt)
|
||||
set_pt_large_page(pt, 0, 0, PTATTR_NO_EXECUTE|PTATTR_WRITABLE);
|
||||
}
|
||||
|
||||
static void init_linux_kernel_mapping(struct page_table *pt)
|
||||
{
|
||||
unsigned long map_start, map_end, phys;
|
||||
void *virt;
|
||||
int nr_memory_chunks, chunk_id, numa_id;
|
||||
|
||||
/* In case of safe_kernel_map option (safe_kernel_map == 1),
|
||||
processing to prevent destruction of the memory area on Linux side
|
||||
is executed */
|
||||
if (safe_kernel_map == 0) {
|
||||
kprintf("Straight-map entire physical memory\n");
|
||||
|
||||
/* Map 2 TB for now */
|
||||
map_start = 0;
|
||||
map_end = 0x20000000000;
|
||||
|
||||
virt = (void *)LINUX_PAGE_OFFSET;
|
||||
|
||||
kprintf("Linux kernel virtual: 0x%lx - 0x%lx -> 0x%lx - 0x%lx\n",
|
||||
LINUX_PAGE_OFFSET, LINUX_PAGE_OFFSET + map_end, 0, map_end);
|
||||
|
||||
for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE) {
|
||||
if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) {
|
||||
kprintf("%s: error setting mapping for 0x%lx\n", __FUNCTION__, virt);
|
||||
}
|
||||
virt += LARGE_PAGE_SIZE;
|
||||
}
|
||||
} else {
|
||||
kprintf("Straight-map physical memory areas allocated to McKernel\n");
|
||||
|
||||
nr_memory_chunks = ihk_mc_get_nr_memory_chunks();
|
||||
if (nr_memory_chunks == 0) {
|
||||
kprintf("%s: ERROR: No memory chunk available.\n", __FUNCTION__);
|
||||
return;
|
||||
}
|
||||
|
||||
for (chunk_id = 0; chunk_id < nr_memory_chunks; chunk_id++) {
|
||||
if (ihk_mc_get_memory_chunk(chunk_id, &map_start, &map_end, &numa_id)) {
|
||||
kprintf("%s: ERROR: Memory chunk id (%d) out of range.\n", __FUNCTION__, chunk_id);
|
||||
continue;
|
||||
}
|
||||
|
||||
dkprintf("Linux kernel virtual: 0x%lx - 0x%lx -> 0x%lx - 0x%lx\n",
|
||||
LINUX_PAGE_OFFSET + map_start, LINUX_PAGE_OFFSET + map_end, map_start, map_end);
|
||||
|
||||
virt = (void *)(LINUX_PAGE_OFFSET + map_start);
|
||||
for (phys = map_start; phys < map_end; phys += LARGE_PAGE_SIZE, virt += LARGE_PAGE_SIZE) {
|
||||
if (set_pt_large_page(pt, virt, phys, PTATTR_WRITABLE) != 0) {
|
||||
kprintf("%s: set_pt_large_page() failed for 0x%lx\n", __FUNCTION__, virt);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void init_vsyscall_area(struct page_table *pt)
|
||||
{
|
||||
extern char vsyscall_page[];
|
||||
@ -2628,13 +2732,15 @@ void init_page_table(void)
|
||||
|
||||
/* Normal memory area */
|
||||
init_normal_area(init_pt);
|
||||
init_linux_kernel_mapping(init_pt);
|
||||
init_fixed_area(init_pt);
|
||||
init_low_area(init_pt);
|
||||
init_text_area(init_pt);
|
||||
init_vsyscall_area(init_pt);
|
||||
|
||||
load_page_table(init_pt);
|
||||
kprintf("Page table is now at %p\n", init_pt);
|
||||
init_pt_loaded = 1;
|
||||
kprintf("Page table is now at 0x%lx\n", init_pt);
|
||||
}
|
||||
|
||||
extern void __reserve_arch_pages(unsigned long, unsigned long,
|
||||
@ -2662,17 +2768,33 @@ void ihk_mc_reserve_arch_pages(struct ihk_page_allocator_desc *pa_allocator,
|
||||
unsigned long virt_to_phys(void *v)
|
||||
{
|
||||
unsigned long va = (unsigned long)v;
|
||||
|
||||
|
||||
if (va >= MAP_KERNEL_START) {
|
||||
dkprintf("%s: MAP_KERNEL_START <= 0x%lx <= LINUX_PAGE_OFFSET\n",
|
||||
__FUNCTION__, va);
|
||||
return va - MAP_KERNEL_START + x86_kernel_phys_base;
|
||||
} else {
|
||||
}
|
||||
else if (va >= LINUX_PAGE_OFFSET) {
|
||||
return va - LINUX_PAGE_OFFSET;
|
||||
}
|
||||
else if (va >= MAP_FIXED_START) {
|
||||
return va - MAP_FIXED_START;
|
||||
}
|
||||
else {
|
||||
dkprintf("%s: MAP_ST_START <= 0x%lx <= MAP_FIXED_START\n",
|
||||
__FUNCTION__, va);
|
||||
return va - MAP_ST_START;
|
||||
}
|
||||
}
|
||||
|
||||
void *phys_to_virt(unsigned long p)
|
||||
{
|
||||
return (void *)(p + MAP_ST_START);
|
||||
/* Before loading our own PT use straight mapping */
|
||||
if (!init_pt_loaded) {
|
||||
return (void *)(p + MAP_ST_START);
|
||||
}
|
||||
|
||||
return (void *)(p + LINUX_PAGE_OFFSET);
|
||||
}
|
||||
|
||||
int copy_from_user(void *dst, const void *src, size_t siz)
|
||||
|
||||
@ -44,11 +44,12 @@ fi
|
||||
|
||||
turbo=""
|
||||
ihk_irq=""
|
||||
safe_kernel_map=""
|
||||
umask_old=`umask`
|
||||
idle_halt=""
|
||||
allow_oversubscribe=""
|
||||
|
||||
while getopts :tk:c:m:o:f:r:q:i:d:e:hO OPT
|
||||
while getopts :stk:c:m:o:f:r:q:i:d:e:hO OPT
|
||||
do
|
||||
case ${OPT} in
|
||||
f) facility=${OPTARG}
|
||||
@ -61,6 +62,8 @@ do
|
||||
;;
|
||||
m) mem=${OPTARG}
|
||||
;;
|
||||
s) safe_kernel_map="safe_kernel_map"
|
||||
;;
|
||||
r) ikc_map=${OPTARG}
|
||||
;;
|
||||
q) ihk_irq=${OPTARG}
|
||||
@ -82,6 +85,9 @@ do
|
||||
esac
|
||||
done
|
||||
|
||||
redirect_kmsg=0
|
||||
turbo="turbo"
|
||||
|
||||
# Start ihkmond
|
||||
pid=`pidof ihkmond`
|
||||
if [ "${pid}" != "" ]; then
|
||||
@ -299,16 +305,25 @@ if ! grep -E 'ihk\s' /proc/modules &>/dev/null; then
|
||||
fi
|
||||
fi
|
||||
|
||||
# Increase swappiness so that we have better chance to allocate memory for IHK
|
||||
echo 100 > /proc/sys/vm/swappiness
|
||||
# Copy modules under /tmp to avoid loading from shared FS
|
||||
if mkdir -p /tmp/mcos-kmod; then
|
||||
cp ${KMODDIR}/* /tmp/mcos-kmod/
|
||||
KMODDIR="/tmp/mcos-kmod/"
|
||||
fi
|
||||
|
||||
# Drop Linux caches to free memory
|
||||
sync && echo 3 > /proc/sys/vm/drop_caches
|
||||
# Fujitsu drops caches for us in between jobs so don't do it on OFP
|
||||
if [ "`hostname | grep "c[0-9][0-9][0-9][0-9].ofp"`" == "" ]; then
|
||||
# Increase swappiness so that we have better chance to allocate memory for IHK
|
||||
echo 100 > /proc/sys/vm/swappiness
|
||||
|
||||
# Merge free memory areas into large, physically contigous ones
|
||||
echo 1 > /proc/sys/vm/compact_memory 2>/dev/null
|
||||
# Drop Linux caches to free memory
|
||||
sync && echo 3 > /proc/sys/vm/drop_caches
|
||||
|
||||
sync
|
||||
# Merge free memory areas into large, physically contigous ones
|
||||
echo 1 > /proc/sys/vm/compact_memory 2>/dev/null
|
||||
|
||||
sync
|
||||
fi
|
||||
|
||||
# Load IHK-SMP if not loaded and reserve CPUs and memory
|
||||
if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
|
||||
@ -329,41 +344,41 @@ if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
|
||||
error_exit "ihk_loaded"
|
||||
fi
|
||||
|
||||
# Offline-reonline RAM (special case for OFP SNC-4 flat mode)
|
||||
if [ "`hostname | grep "c[0-9][0-9][0-9][0-9].ofp"`" != "" ] && [ "`cat /sys/devices/system/node/online`" == "0-7" ]; then
|
||||
for i in 0 1 2 3; do
|
||||
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
echo 0 > $f 2>&1 > /dev/null;
|
||||
done
|
||||
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
echo 1 > $f 2>&1 > /dev/null;
|
||||
done
|
||||
done
|
||||
for i in 4 5 6 7; do
|
||||
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
echo 0 > $f 2>&1 > /dev/null;
|
||||
done
|
||||
done
|
||||
for i in 4 5 6 7; do
|
||||
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
echo 1 > $f 2>&1 > /dev/null;
|
||||
done
|
||||
done
|
||||
fi
|
||||
|
||||
# Offline-reonline RAM (special case for OFP Quadrant flat mode)
|
||||
if [ "`hostname | grep "c[0-9][0-9][0-9][0-9].ofp"`" != "" ] && [ "`cat /sys/devices/system/node/online`" == "0-1" ]; then
|
||||
for i in 1; do
|
||||
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
echo 0 > $f 2>&1 > /dev/null;
|
||||
done
|
||||
done
|
||||
for i in 1; do
|
||||
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
echo 1 > $f 2>&1 > /dev/null;
|
||||
done
|
||||
done
|
||||
fi
|
||||
# # Offline-reonline RAM (special case for OFP SNC-4 flat mode)
|
||||
# if [ "`hostname | grep "c[0-9][0-9][0-9][0-9].ofp"`" != "" ] && [ "`cat /sys/devices/system/node/online`" == "0-7" ]; then
|
||||
# for i in 0 1 2 3; do
|
||||
# find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
# echo 0 | tee $f 2>/dev/null 1>/dev/null
|
||||
# done
|
||||
# find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
# echo 1 | tee $f 2>/dev/null 1>/dev/null
|
||||
# done
|
||||
# done
|
||||
# for i in 4 5 6 7; do
|
||||
# find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
# echo 0 | tee $f 2>/dev/null 1>/dev/null
|
||||
# done
|
||||
# done
|
||||
# for i in 4 5 6 7; do
|
||||
# find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
# echo 1 | tee $f 2>/dev/null 1>/dev/null
|
||||
# done
|
||||
# done
|
||||
# fi
|
||||
#
|
||||
# # Offline-reonline RAM (special case for OFP Quadrant flat mode)
|
||||
# if [ "`hostname | grep "c[0-9][0-9][0-9][0-9].ofp"`" != "" ] && [ "`cat /sys/devices/system/node/online`" == "0-1" ]; then
|
||||
# for i in 1; do
|
||||
# find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
# echo 0 | tee $f 2>/dev/null 1>/dev/null
|
||||
# done
|
||||
# done
|
||||
# for i in 1; do
|
||||
# find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
|
||||
# echo 1 | tee $f 2>/dev/null 1>/dev/null
|
||||
# done
|
||||
# done
|
||||
# fi
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then
|
||||
echo "error: reserving memory" >&2
|
||||
@ -440,7 +455,7 @@ if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
|
||||
fi
|
||||
|
||||
# Set kernel arguments
|
||||
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo $idle_halt dump_level=${DUMP_LEVEL} $extra_kopts $allow_oversubscribe"; then
|
||||
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo $safe_kernel_map $idle_halt dump_level=${DUMP_LEVEL} $extra_kopts $allow_oversubscribe"; then
|
||||
echo "error: setting kernel arguments" >&2
|
||||
error_exit "os_created"
|
||||
fi
|
||||
|
||||
60
arch/x86_64/tools/mpimcexec.1in
Normal file
60
arch/x86_64/tools/mpimcexec.1in
Normal file
@ -0,0 +1,60 @@
|
||||
.\" Man page for mpimcexec
|
||||
.\"
|
||||
|
||||
.TH MPIMCEXEC 1 "@MCKERNEL_RELEASE_DATE@" "Version @MCKERNEL_VERSION@" MCKERNEL @MCKERNEL_VERSION@"
|
||||
.SH NAME
|
||||
mpimcexec \- run an MPI application on McKernel
|
||||
.\"
|
||||
|
||||
.\" ---------------------------- SYNOPSIS ----------------------------
|
||||
.SH SYNOPSIS
|
||||
.B mpimcexec \fR [\fIoptions\fR] \fI<command>\fR
|
||||
|
||||
.\" ---------------------------- DESCRIPTION ----------------------------
|
||||
.SH DESCRIPTION
|
||||
mpimcexec is a wrapper script for running MPI applications on McKernel.
|
||||
It internally calls mpiexec to spawn mcexec on compute nodes, which in
|
||||
turn runs \fI<command>\fR on McKernel. mpimcexec specifies a number of
|
||||
mcexec arguments that enable high performance execution.
|
||||
|
||||
.\" ---------------------------- OPTIONS ----------------------------
|
||||
.SH OPTIONS
|
||||
|
||||
.TP
|
||||
.B -ppn N, --ppn N, --ranks-per-node N
|
||||
Specify the number of MPI ranks per node. This argument is required.
|
||||
.TP
|
||||
.B -n N, --n N, --ranks N
|
||||
Specify the number of total MPI ranks.
|
||||
e.g.,
|
||||
$ mpimcexec -n 32 -ppn 4 ./a.out
|
||||
.br
|
||||
In the above example, 32 MPI processes are invoked
|
||||
on eight compute nodes each of which has four processes.
|
||||
.TP
|
||||
.B --nodes N
|
||||
Specify the number of compute nodes.
|
||||
By default, all nodes, specified by "PJM --mpi proc" option, are used.
|
||||
.TP
|
||||
.B --env, -env
|
||||
Pass an additional environment variable
|
||||
.TP
|
||||
.B -m N, --numa N
|
||||
Specify preferred NUMA node.
|
||||
.TP
|
||||
.B -h <file name>, ---hostfile <file name>
|
||||
Specify a host file for MPI.
|
||||
.TP
|
||||
.B --help
|
||||
Show help message.
|
||||
|
||||
.PP
|
||||
.\" ---------------------------- SEE ALSO ----------------------------
|
||||
.SH SEE ALSO
|
||||
\fBmcexec\fR (1), \fBmpiexec\fR (1)
|
||||
|
||||
.\" ---------------------------- AUTHORS ----------------------------
|
||||
.SH AUTHORS
|
||||
Copyright (C) 2018 McKernel Development Team, RIKEN, Japan
|
||||
|
||||
|
||||
147
arch/x86_64/tools/mpimcexec.in
Executable file
147
arch/x86_64/tools/mpimcexec.in
Executable file
@ -0,0 +1,147 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# OFP McKernel MPI wrapper script
|
||||
# author: Balazs Gerofi <bgerofi@riken.jp>
|
||||
# Copyright (C) 2018 RIKEN R-CCS
|
||||
#
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="${prefix}/bin"
|
||||
|
||||
if [ "${BASH_VERSINFO[0]}" -lt 4 ]; then
|
||||
echo "You need at least bash-4.0 to run this script." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
RANKS=""
|
||||
NODES=""
|
||||
PPN=""
|
||||
MPI_ENV=""
|
||||
COMMAND=""
|
||||
NUMA=""
|
||||
HOSTFILE=""
|
||||
|
||||
if [ ! -z "${PJM_PROC_BY_NODE}" ]; then
|
||||
PPN=${PJM_PROC_BY_NODE}
|
||||
elif [ ! -z "${MPI_LOCALNRANKS}" ]; then
|
||||
PPN=${MPI_LOCALNRANKS}
|
||||
fi
|
||||
|
||||
help_exit() {
|
||||
echo ""
|
||||
echo "Spawn an McKernel MPI job on Oakforest-PACS."
|
||||
echo "usage: `basename $0` -ppn ranks_per_node [--nodes nodes] [-n ranks] [--env additional_environment]... command"
|
||||
echo ""
|
||||
echo " -ppn | --ppn | --ranks-per-node Number of MPI ranks per node (required)"
|
||||
echo " -n | --n | --ranks Total number of MPI ranks in the job"
|
||||
echo " --nodes Number of nodes to be used"
|
||||
echo " --env | -env Pass an additional environment variable"
|
||||
echo " -m | --numa Preferred NUMA node(s)"
|
||||
echo " -h | --hostfile Host file for MPI"
|
||||
echo " --help Show help message"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Parse options
|
||||
while true; do
|
||||
case $1 in
|
||||
-ppn | --ppn | --ranks-per-node )
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "error: needs an interger value for -ppn, --ppn, or --ranks-per-node option"
|
||||
help_exit
|
||||
fi
|
||||
PPN=$2
|
||||
shift 2
|
||||
;;
|
||||
-n | --n | --ranks )
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "error: needs an interger value for -n, --n, or --ranks option"
|
||||
help_exit
|
||||
fi
|
||||
RANKS=$2
|
||||
shift 2
|
||||
;;
|
||||
-m | --numa )
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "error: needs an interger value for -m or --numa option"
|
||||
help_exit
|
||||
fi
|
||||
NUMA="-m $2"
|
||||
shift 2
|
||||
;;
|
||||
--nodes )
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "error: needs an interger value for --nodes option"
|
||||
help_exit
|
||||
fi
|
||||
NODES=$2
|
||||
shift 2
|
||||
;;
|
||||
--env | -env )
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "error: needs an environment variable name for -env or --env option"
|
||||
help_exit
|
||||
fi
|
||||
if [ -z "`echo $2 | grep I_MPI_PIN`" ]; then
|
||||
MPI_ENV=`echo "${MPI_ENV} -env $2" | xargs`
|
||||
fi
|
||||
shift 2
|
||||
;;
|
||||
-h | --hostfile )
|
||||
if [ $# -lt 2 ]; then
|
||||
echo "error: needs a file name for -h or --hostfile option"
|
||||
help_exit
|
||||
fi
|
||||
HOSTFILE="-hostfile $2"
|
||||
shift 2
|
||||
;;
|
||||
--help )
|
||||
help_exit
|
||||
;;
|
||||
* )
|
||||
COMMAND=$@
|
||||
break
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [ -z ${PPN} ]; then
|
||||
echo "error: please specify the number of ranks per node"
|
||||
help_exit
|
||||
fi
|
||||
|
||||
# Unless explicitly specified, use Fujitsu inherited value
|
||||
if [ -z ${NODES} ]; then
|
||||
NODES=${PJM_VNODES}
|
||||
fi
|
||||
|
||||
if [ -z ${RANKS} ] && [ -z ${NODES} ]; then
|
||||
echo "error: please specify the total number of ranks or the number of nodes"
|
||||
help_exit
|
||||
fi
|
||||
|
||||
if [ "x${COMMAND}" = "x" ]; then
|
||||
echo "error: please specify command"
|
||||
help_exit
|
||||
fi
|
||||
|
||||
# Calculate total job size if not specified
|
||||
if [ -z ${RANKS} ]; then
|
||||
let RANKS=(${PPN}*${NODES})
|
||||
fi
|
||||
|
||||
# Support direct SSH when not executed from Fujitsu job system
|
||||
if [ -z ${PJM_VNODES} ]; then
|
||||
HOSTFILE="-launcher-exec ssh ${HOSTFILE}"
|
||||
fi
|
||||
|
||||
export I_MPI_PIN=off
|
||||
export PSM2_RCVTHREAD=0
|
||||
export HFI_NO_CPUAFFINITY=1
|
||||
export I_MPI_COLL_INTRANODE_SHM_THRESHOLD=4194304
|
||||
export PSM2_MQ_RNDV_HFI_WINDOW=4194304
|
||||
export PSM2_MQ_EAGER_SDMA_SZ=65536
|
||||
export PSM2_MQ_RNDV_HFI_THRESH=200000
|
||||
|
||||
mpirun ${HOSTFILE} -n ${RANKS} -ppn ${PPN} ${MPI_ENV} ${BINDIR}/mcexec -n ${PPN} ${NUMA} --enable-hfi1 --mpol-threshold=1M --stack-premap=4M,4G --extend-heap-by=8M --disable-sched-yield --mpol-shm-premap ${COMMAND}
|
||||
|
||||
30
configure
vendored
30
configure
vendored
@ -1,6 +1,6 @@
|
||||
#! /bin/sh
|
||||
# Guess values for system-dependent variables and create Makefiles.
|
||||
# Generated by GNU Autoconf 2.69 for mckernel 1.5.0.
|
||||
# Generated by GNU Autoconf 2.69 for mckernel 1.5.1-knl+hfi.
|
||||
#
|
||||
#
|
||||
# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc.
|
||||
@ -577,8 +577,8 @@ MAKEFLAGS=
|
||||
# Identity of this package.
|
||||
PACKAGE_NAME='mckernel'
|
||||
PACKAGE_TARNAME='mckernel'
|
||||
PACKAGE_VERSION='1.5.0'
|
||||
PACKAGE_STRING='mckernel 1.5.0'
|
||||
PACKAGE_VERSION='1.5.1-knl+hfi'
|
||||
PACKAGE_STRING='mckernel 1.5.1-knl+hfi'
|
||||
PACKAGE_BUGREPORT=''
|
||||
PACKAGE_URL=''
|
||||
|
||||
@ -1262,7 +1262,7 @@ if test "$ac_init_help" = "long"; then
|
||||
# Omit some internal or obsolete options to make the list less imposing.
|
||||
# This message is too long to be a string in the A/UX 3.1 sh.
|
||||
cat <<_ACEOF
|
||||
\`configure' configures mckernel 1.5.0 to adapt to many kinds of systems.
|
||||
\`configure' configures mckernel 1.5.1-knl+hfi to adapt to many kinds of systems.
|
||||
|
||||
Usage: $0 [OPTION]... [VAR=VALUE]...
|
||||
|
||||
@ -1323,7 +1323,7 @@ fi
|
||||
|
||||
if test -n "$ac_init_help"; then
|
||||
case $ac_init_help in
|
||||
short | recursive ) echo "Configuration of mckernel 1.5.0:";;
|
||||
short | recursive ) echo "Configuration of mckernel 1.5.1-knl+hfi:";;
|
||||
esac
|
||||
cat <<\_ACEOF
|
||||
|
||||
@ -1431,7 +1431,7 @@ fi
|
||||
test -n "$ac_init_help" && exit $ac_status
|
||||
if $ac_init_version; then
|
||||
cat <<\_ACEOF
|
||||
mckernel configure 1.5.0
|
||||
mckernel configure 1.5.1-knl+hfi
|
||||
generated by GNU Autoconf 2.69
|
||||
|
||||
Copyright (C) 2012 Free Software Foundation, Inc.
|
||||
@ -1729,7 +1729,7 @@ cat >config.log <<_ACEOF
|
||||
This file contains any messages produced by compilers while
|
||||
running configure, to aid debugging if configure makes a mistake.
|
||||
|
||||
It was created by mckernel $as_me 1.5.0, which was
|
||||
It was created by mckernel $as_me 1.5.1-knl+hfi, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
$ $0 $@
|
||||
@ -2082,11 +2082,11 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
|
||||
|
||||
|
||||
|
||||
IHK_VERSION=1.5.0
|
||||
MCKERNEL_VERSION=1.5.0
|
||||
IHK_VERSION=1.5.1-knl+hfi
|
||||
MCKERNEL_VERSION=1.5.1-knl+hfi
|
||||
DCFA_VERSION=DCFA_VERSION_m4
|
||||
IHK_RELEASE_DATE=2018-04-05
|
||||
MCKERNEL_RELEASE_DATE=2018-04-05
|
||||
IHK_RELEASE_DATE=2019-05-14
|
||||
MCKERNEL_RELEASE_DATE=2019-05-14
|
||||
DCFA_RELEASE_DATE=DCFA_RELEASE_DATE_m4
|
||||
|
||||
|
||||
@ -5060,7 +5060,7 @@ ac_config_headers="$ac_config_headers config.h"
|
||||
|
||||
# POSTK_DEBUG_ARCH_DEP_37
|
||||
# AC_CONFIG_FILES arch dependfiles separate
|
||||
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86_64/tools/mcreboot-attached-mic.sh arch/x86_64/tools/mcshutdown-attached-mic.sh arch/x86_64/tools/mcreboot-builtin-x86.sh arch/x86_64/tools/mcreboot-smp-x86.sh arch/x86_64/tools/mcstop+release-smp-x86.sh arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh arch/x86_64/tools/mcoverlay-create-smp-x86.sh arch/x86_64/tools/eclair-dump-backtrace.exp arch/x86_64/tools/mcshutdown-builtin-x86.sh arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.in tools/mcstat/Makefile"
|
||||
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/user/mcexec.1:executer/user/mcexec.1in executer/user/vmcore2mckdump executer/user/arch/$ARCH/Makefile executer/user/arch/x86_64/Makefile executer/kernel/mcctrl/Makefile executer/kernel/mcctrl/arch/$ARCH/Makefile executer/kernel/mcoverlayfs/Makefile executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile executer/kernel/mcoverlayfs/linux-4.0.9/Makefile executer/kernel/mcoverlayfs/linux-4.6.7/Makefile executer/include/qlmpilib.h kernel/Makefile kernel/Makefile.build kernel/include/swapfmt.h arch/x86_64/tools/mcreboot-attached-mic.sh arch/x86_64/tools/mcshutdown-attached-mic.sh arch/x86_64/tools/mcreboot-builtin-x86.sh arch/x86_64/tools/mcreboot-smp-x86.sh arch/x86_64/tools/mcstop+release-smp-x86.sh arch/x86_64/tools/mcoverlay-destroy-smp-x86.sh arch/x86_64/tools/mcoverlay-create-smp-x86.sh arch/x86_64/tools/eclair-dump-backtrace.exp arch/x86_64/tools/mcshutdown-builtin-x86.sh arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in arch/x86_64/tools/mpimcexec arch/x86_64/tools/mpimcexec.1:arch/x86_64/tools/mpimcexec.1in arch/x86_64/tools/irqbalance_mck.service arch/x86_64/tools/irqbalance_mck.in tools/mcstat/Makefile"
|
||||
|
||||
|
||||
if test "$TARGET" = "smp-x86"; then
|
||||
@ -5585,7 +5585,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1
|
||||
# report actual input values of CONFIG_FILES etc. instead of their
|
||||
# values after options handling.
|
||||
ac_log="
|
||||
This file was extended by mckernel $as_me 1.5.0, which was
|
||||
This file was extended by mckernel $as_me 1.5.1-knl+hfi, which was
|
||||
generated by GNU Autoconf 2.69. Invocation command line was
|
||||
|
||||
CONFIG_FILES = $CONFIG_FILES
|
||||
@ -5647,7 +5647,7 @@ _ACEOF
|
||||
cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
|
||||
ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
|
||||
ac_cs_version="\\
|
||||
mckernel config.status 1.5.0
|
||||
mckernel config.status 1.5.1-knl+hfi
|
||||
configured by $0, generated by GNU Autoconf 2.69,
|
||||
with options \\"\$ac_cs_config\\"
|
||||
|
||||
@ -5795,6 +5795,8 @@ do
|
||||
"arch/x86_64/tools/eclair-dump-backtrace.exp") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/eclair-dump-backtrace.exp" ;;
|
||||
"arch/x86_64/tools/mcshutdown-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcshutdown-builtin-x86.sh" ;;
|
||||
"arch/x86_64/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in" ;;
|
||||
"arch/x86_64/tools/mpimcexec") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mpimcexec" ;;
|
||||
"arch/x86_64/tools/mpimcexec.1") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/mpimcexec.1:arch/x86_64/tools/mpimcexec.1in" ;;
|
||||
"arch/x86_64/tools/irqbalance_mck.service") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.service" ;;
|
||||
"arch/x86_64/tools/irqbalance_mck.in") CONFIG_FILES="$CONFIG_FILES arch/x86_64/tools/irqbalance_mck.in" ;;
|
||||
"tools/mcstat/Makefile") CONFIG_FILES="$CONFIG_FILES tools/mcstat/Makefile" ;;
|
||||
|
||||
10
configure.ac
10
configure.ac
@ -1,9 +1,9 @@
|
||||
# configure.ac COPYRIGHT FUJITSU LIMITED 2015-2016
|
||||
AC_PREREQ(2.63)
|
||||
m4_define([IHK_VERSION_m4],[1.5.0])dnl
|
||||
m4_define([MCKERNEL_VERSION_m4],[1.5.0])dnl
|
||||
m4_define([IHK_RELEASE_DATE_m4],[2018-04-05])dnl
|
||||
m4_define([MCKERNEL_RELEASE_DATE_m4],[2018-04-05])dnl
|
||||
m4_define([IHK_VERSION_m4],[1.5.1-knl+hfi])dnl
|
||||
m4_define([MCKERNEL_VERSION_m4],[1.5.1-knl+hfi])dnl
|
||||
m4_define([IHK_RELEASE_DATE_m4],[2019-05-14])dnl
|
||||
m4_define([MCKERNEL_RELEASE_DATE_m4],[2019-05-14])dnl
|
||||
|
||||
AC_INIT([mckernel], MCKERNEL_VERSION_m4)
|
||||
|
||||
@ -568,6 +568,8 @@ AC_CONFIG_FILES([
|
||||
arch/x86_64/tools/eclair-dump-backtrace.exp
|
||||
arch/x86_64/tools/mcshutdown-builtin-x86.sh
|
||||
arch/x86_64/tools/mcreboot.1:arch/x86_64/tools/mcreboot.1in
|
||||
arch/x86_64/tools/mpimcexec
|
||||
arch/x86_64/tools/mpimcexec.1:arch/x86_64/tools/mpimcexec.1in
|
||||
arch/x86_64/tools/irqbalance_mck.service
|
||||
arch/x86_64/tools/irqbalance_mck.in
|
||||
tools/mcstat/Makefile
|
||||
|
||||
@ -91,6 +91,7 @@ struct program_image_section {
|
||||
|
||||
struct get_cpu_set_arg {
|
||||
int nr_processes;
|
||||
int *process_rank;
|
||||
void *cpu_set;
|
||||
size_t cpu_set_size; // Size in bytes
|
||||
int *target_core;
|
||||
@ -109,6 +110,8 @@ typedef unsigned long __cpu_set_unit;
|
||||
#define MPOL_NO_BSS 0x04
|
||||
#define MPOL_SHM_PREMAP 0x08
|
||||
|
||||
#define MCEXEC_HFI1 0x01
|
||||
|
||||
struct program_load_desc {
|
||||
int num_sections;
|
||||
int status;
|
||||
@ -137,12 +140,14 @@ struct program_load_desc {
|
||||
unsigned long envs_len;
|
||||
struct rlimit rlimit[MCK_RLIM_MAX];
|
||||
unsigned long interp_align;
|
||||
unsigned long mcexec_flags;
|
||||
unsigned long mpol_flags;
|
||||
unsigned long mpol_threshold;
|
||||
unsigned long heap_extension;
|
||||
long stack_premap;
|
||||
unsigned long mpol_bind_mask;
|
||||
int nr_processes;
|
||||
int process_rank;
|
||||
char shell_path[SHELL_PATH_MAX_LEN];
|
||||
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
|
||||
int profile;
|
||||
@ -189,6 +194,7 @@ struct syscall_response {
|
||||
long ret;
|
||||
unsigned long fault_address;
|
||||
unsigned long fault_reason;
|
||||
void *private_data;
|
||||
};
|
||||
|
||||
struct syscall_ret_desc {
|
||||
|
||||
@ -692,6 +692,7 @@ static long mcexec_get_cpuset(ihk_os_t os, unsigned long arg)
|
||||
wake_up_interruptible(&pli_next->pli_wq);
|
||||
/* Reset process counter */
|
||||
pe->nr_processes_left = pe->nr_processes;
|
||||
pe->process_rank = 0;
|
||||
}
|
||||
|
||||
/* Wait for the rest if not the last or if the last but
|
||||
@ -923,6 +924,15 @@ next_cpu:
|
||||
goto put_and_unlock_out;
|
||||
}
|
||||
|
||||
/* Copy rank */
|
||||
if (copy_to_user(req.process_rank, &pe->process_rank,
|
||||
sizeof(int))) {
|
||||
printk("%s: error copying process rank to user\n",
|
||||
__FUNCTION__);
|
||||
ret = -EINVAL;
|
||||
goto put_and_unlock_out;
|
||||
}
|
||||
|
||||
/* mcexec NUMA to bind to */
|
||||
mcexec_linux_numa = cpu_to_node(mckernel_cpu_2_linux_cpu(udp, cpu));
|
||||
if (copy_to_user(req.mcexec_linux_numa, &mcexec_linux_numa,
|
||||
@ -970,6 +980,7 @@ next_cpu:
|
||||
}
|
||||
/* Otherwise wake up next process in list */
|
||||
else {
|
||||
++pe->process_rank;
|
||||
pli_next = list_first_entry(&pe->pli_list,
|
||||
struct process_list_item, list);
|
||||
list_del(&pli_next->list);
|
||||
@ -1062,7 +1073,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/* NOTE: per-process data is refcounted.
|
||||
* For every get call the user should call put. */
|
||||
struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
|
||||
@ -1192,7 +1202,7 @@ int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet)
|
||||
return -1;
|
||||
}
|
||||
|
||||
dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %d\n",
|
||||
dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %lu\n",
|
||||
__FUNCTION__,
|
||||
packet->req.rtid,
|
||||
packet->req.ttid,
|
||||
@ -1362,7 +1372,7 @@ retry_alloc:
|
||||
}
|
||||
|
||||
packet->req.valid = 0; /* ack */
|
||||
dprintk("%s: system call: %d, args[0]: %lu, args[1]: %lu, args[2]: %lu, "
|
||||
dprintk("%s: system call: %lu, args[0]: %lu, args[1]: %lu, args[2]: %lu, "
|
||||
"args[3]: %lu, args[4]: %lu, args[5]: %lu\n",
|
||||
__FUNCTION__,
|
||||
packet->req.number,
|
||||
@ -1487,7 +1497,7 @@ long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg)
|
||||
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys, desc.size, NULL, 0);
|
||||
#endif
|
||||
|
||||
dprintk("mcexec_load_syscall: %s (desc.size: %d)\n", rpm, desc.size);
|
||||
dprintk("mcexec_load_syscall: %p (desc.size: %lu)\n", rpm, desc.size);
|
||||
|
||||
if (copy_to_user((void *__user)desc.dest, rpm, desc.size)) {
|
||||
return -EFAULT;
|
||||
|
||||
@ -314,6 +314,7 @@ struct mcctrl_part_exec {
|
||||
struct mutex lock;
|
||||
int nr_processes;
|
||||
int nr_processes_left;
|
||||
int process_rank;
|
||||
cpumask_t cpus_used;
|
||||
struct list_head pli_list;
|
||||
};
|
||||
|
||||
@ -2065,6 +2065,17 @@ void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
|
||||
/* Map response structure and notify offloading thread */
|
||||
res->ret = ret;
|
||||
res->stid = stid;
|
||||
res->private_data = 0;
|
||||
|
||||
/* Special case for open() to return private_data */
|
||||
if (packet->req.number == __NR_open && ret > 0) {
|
||||
struct fd f;
|
||||
f = fdget(ret);
|
||||
if (f.file) {
|
||||
res->private_data = f.file->private_data;
|
||||
fdput(f);
|
||||
}
|
||||
}
|
||||
|
||||
if (__notify_syscall_requester(os, packet, res) < 0) {
|
||||
printk("%s: WARNING: failed to notify PID %d\n",
|
||||
|
||||
@ -42,8 +42,8 @@ ifeq ($(ARCH), arm64)
|
||||
eclair: eclair.c arch/$(ARCH)/arch-eclair.c
|
||||
$(CC) -I.. -I. -I./arch/$(ARCH)/include -I$(VPATH)/.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
else
|
||||
eclair: eclair.c
|
||||
$(CC) $(CFLAGS) -I${IHKDIR} -o $@ $^ $(LIBS)
|
||||
eclair: eclair.c arch/$(ARCH)/arch-eclair.c
|
||||
$(CC) -I.. -I$(VPATH) -I$(VPATH)/arch/$(ARCH)/include $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
endif
|
||||
|
||||
ldump2mcdump.so: ldump2mcdump.c
|
||||
|
||||
@ -2,8 +2,18 @@
|
||||
#ifndef HEADER_USER_X86_ECLAIR_H
|
||||
#define HEADER_USER_X86_ECLAIR_H
|
||||
|
||||
#define MAP_KERNEL 0xFFFFFFFF80000000
|
||||
#define MAP_ST 0xFFFF800000000000
|
||||
#ifndef POSTK_DEBUG_ARCH_DEP_34
|
||||
#define MAP_ST_START 0xffff800000000000UL
|
||||
#define MAP_VMAP_START 0xffff850000000000UL
|
||||
#define MAP_FIXED_START 0xffff860000000000UL
|
||||
#define LINUX_PAGE_OFFSET 0xffff880000000000UL
|
||||
#define MAP_KERNEL_START 0xFFFFFFFFFE800000UL
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
|
||||
/* TODO: these should be updated when McKernel changes */
|
||||
#define MCKERNEL_ELF_START "0xFFFFFFFFFE801000"
|
||||
#define MCKERNEL_ELF_LEN "0x0000000000100000"
|
||||
|
||||
|
||||
#define ARCH_CLV_SPAN "x86_cpu_local_variables_span"
|
||||
|
||||
|
||||
@ -8,9 +8,7 @@
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_33
|
||||
#include "../config.h"
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_33 */
|
||||
#include <bfd.h>
|
||||
#include <fcntl.h>
|
||||
#include <inttypes.h>
|
||||
@ -22,10 +20,8 @@
|
||||
#include <arpa/inet.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <ihk/ihk_host_user.h>
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_34
|
||||
#include <eclair.h>
|
||||
#include <arch-eclair.h>
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
|
||||
#define CPU_TID_BASE 1000000
|
||||
|
||||
@ -85,11 +81,7 @@ static struct thread_info *curr_thread = NULL;
|
||||
static uintptr_t ihk_mc_switch_context = -1;
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_34
|
||||
uintptr_t lookup_symbol(char *name) {
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
static uintptr_t lookup_symbol(char *name) {
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
int i;
|
||||
|
||||
for (i = 0; i < nsyms; ++i) {
|
||||
@ -101,22 +93,22 @@ static uintptr_t lookup_symbol(char *name) {
|
||||
return NOSYMBOL;
|
||||
} /* lookup_symbol() */
|
||||
|
||||
#define NOPHYS ((uintptr_t)-1)
|
||||
|
||||
static uintptr_t virt_to_phys(uintptr_t va) {
|
||||
#ifndef POSTK_DEBUG_ARCH_DEP_34
|
||||
#define MAP_KERNEL 0xFFFFFFFF80000000
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
if (va >= MAP_KERNEL) {
|
||||
return (va - MAP_KERNEL + kernel_base);
|
||||
if (va >= MAP_KERNEL_START) {
|
||||
return va - MAP_KERNEL_START + kernel_base;
|
||||
}
|
||||
#ifndef POSTK_DEBUG_ARCH_DEP_34
|
||||
#define MAP_ST 0xFFFF800000000000
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
if (va >= MAP_ST) {
|
||||
return (va - MAP_ST);
|
||||
else if (va >= LINUX_PAGE_OFFSET) {
|
||||
return va - LINUX_PAGE_OFFSET;
|
||||
}
|
||||
if (0) printf("virt_to_phys(%lx): -1\n", va);
|
||||
#define NOPHYS ((uintptr_t)-1)
|
||||
else if (va >= MAP_FIXED_START) {
|
||||
return va - MAP_FIXED_START;
|
||||
}
|
||||
else if (va >= MAP_ST_START) {
|
||||
return va - MAP_ST_START;
|
||||
}
|
||||
|
||||
return NOPHYS;
|
||||
} /* virt_to_phys() */
|
||||
|
||||
@ -673,11 +665,7 @@ static int setup_dump(char *fname) {
|
||||
return 0;
|
||||
} /* setup_dump() */
|
||||
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_38
|
||||
static ssize_t print_hex(char *buf, size_t buf_size, char *str) {
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_38 */
|
||||
static ssize_t print_hex(char *buf, char *str) {
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
|
||||
|
||||
char *p;
|
||||
char *q;
|
||||
@ -702,11 +690,7 @@ static ssize_t print_hex(char *buf, char *str) {
|
||||
return (q - buf);
|
||||
} /* print_hex() */
|
||||
|
||||
#if defined(POSTK_DEBUG_ARCH_DEP_34) && defined(POSTK_DEBUG_ARCH_DEP_38)
|
||||
ssize_t print_bin(char *buf, size_t buf_size, void *data, size_t size) {
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_34 && POSTK_DEBUG_ARCH_DEP_38*/
|
||||
static ssize_t print_bin(char *buf, void *data, size_t size) {
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_34 && POSTK_DEBUG_ARCH_DEP_38*/
|
||||
uint8_t *p;
|
||||
char *q;
|
||||
int i;
|
||||
@ -733,13 +717,8 @@ static ssize_t print_bin(char *buf, void *data, size_t size) {
|
||||
return (q - buf);
|
||||
} /* print_bin() */
|
||||
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_38
|
||||
static void command(const char *cmd, char *res, size_t res_size) {
|
||||
const char *p;
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_38 */
|
||||
static void command(char *cmd, char *res) {
|
||||
char *p;
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
|
||||
char *rbp;
|
||||
|
||||
p = cmd;
|
||||
@ -801,11 +780,7 @@ static void command(char *cmd, char *res) {
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
rbp += sprintf(rbp, "l");
|
||||
if (0)
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_38
|
||||
rbp += print_hex(rbp, res_size, str);
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_38 */
|
||||
rbp += print_hex(rbp, str);
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
|
||||
rbp += sprintf(rbp, "%s", str);
|
||||
}
|
||||
else if (!strcmp(p, "D")) {
|
||||
@ -814,20 +789,9 @@ static void command(char *cmd, char *res) {
|
||||
}
|
||||
else if (!strcmp(p, "g")) {
|
||||
if (curr_thread->cpu < 0) {
|
||||
#ifndef POSTK_DEBUG_ARCH_DEP_34
|
||||
struct x86_kregs {
|
||||
uintptr_t rsp, rbp, rbx, rsi;
|
||||
uintptr_t rdi, r12, r13, r14;
|
||||
uintptr_t r15, rflags, rsp0;
|
||||
};
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
|
||||
int error;
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_34
|
||||
struct arch_kregs kregs;
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
struct x86_kregs kregs;
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
|
||||
error = read_mem(curr_thread->process+K(CTX_OFFSET),
|
||||
&kregs, sizeof(kregs));
|
||||
@ -836,36 +800,7 @@ static void command(char *cmd, char *res) {
|
||||
break;
|
||||
}
|
||||
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_34
|
||||
print_kregs(rbp, res_size, &kregs);
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rax */
|
||||
rbp += print_bin(rbp, &kregs.rbx, sizeof(uint64_t));
|
||||
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rcx */
|
||||
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* rdx */
|
||||
rbp += print_bin(rbp, &kregs.rsi, sizeof(uint64_t));
|
||||
rbp += print_bin(rbp, &kregs.rdi, sizeof(uint64_t));
|
||||
rbp += print_bin(rbp, &kregs.rbp, sizeof(uint64_t));
|
||||
rbp += print_bin(rbp, &kregs.rsp, sizeof(uint64_t));
|
||||
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r8 */
|
||||
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r9 */
|
||||
|
||||
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r10 */
|
||||
rbp += sprintf(rbp, "xxxxxxxxxxxxxxxx"); /* r11 */
|
||||
rbp += print_bin(rbp, &kregs.r12, sizeof(uint64_t));
|
||||
rbp += print_bin(rbp, &kregs.r13, sizeof(uint64_t));
|
||||
rbp += print_bin(rbp, &kregs.r14, sizeof(uint64_t));
|
||||
rbp += print_bin(rbp, &kregs.r15, sizeof(uint64_t));
|
||||
rbp += print_bin(rbp, &ihk_mc_switch_context,
|
||||
sizeof(uint64_t)); /* rip */
|
||||
rbp += print_bin(rbp, &kregs.rflags, sizeof(uint32_t));
|
||||
rbp += sprintf(rbp, "xxxxxxxx"); /* cs */
|
||||
rbp += sprintf(rbp, "xxxxxxxx"); /* ss */
|
||||
rbp += sprintf(rbp, "xxxxxxxx"); /* ds */
|
||||
rbp += sprintf(rbp, "xxxxxxxx"); /* es */
|
||||
rbp += sprintf(rbp, "xxxxxxxx"); /* fs */
|
||||
rbp += sprintf(rbp, "xxxxxxxx"); /* gs */
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
}
|
||||
else {
|
||||
int error;
|
||||
@ -943,11 +878,7 @@ static void command(char *cmd, char *res) {
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_34 */
|
||||
rbp += sprintf(rbp, "l");
|
||||
if (0)
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_38
|
||||
rbp += print_hex(rbp, res_size, str);
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_38 */
|
||||
rbp += print_hex(rbp, str);
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
|
||||
rbp += sprintf(rbp, "%s", str);
|
||||
}
|
||||
else if (!strncmp(p, "T", 1)) {
|
||||
@ -1039,11 +970,7 @@ static void command(char *cmd, char *res) {
|
||||
else {
|
||||
q += sprintf(q, "status=%#x", ti->status);
|
||||
}
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_38
|
||||
rbp += print_hex(rbp, res_size, buf);
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_38 */
|
||||
rbp += print_hex(rbp, buf);
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
|
||||
}
|
||||
} while (0);
|
||||
|
||||
@ -1272,11 +1199,7 @@ int main(int argc, char *argv[]) {
|
||||
}
|
||||
mode = 0;
|
||||
fputc('+', ofp);
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_38
|
||||
command(lbuf, rbuf, sizeof(rbuf));
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_38 */
|
||||
command(lbuf, rbuf);
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_38 */
|
||||
sum = 0;
|
||||
for (p = rbuf; *p != '\0'; ++p) {
|
||||
sum += *p;
|
||||
|
||||
@ -3,11 +3,7 @@
|
||||
#ifndef HEADER_USER_COMMON_ECLAIR_H
|
||||
#define HEADER_USER_COMMON_ECLAIR_H
|
||||
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_76 /* header path fix */
|
||||
#include "../config.h"
|
||||
#else /* POSTK_DEBUG_ARCH_DEP_76 */
|
||||
#include <config.h>
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_76 */
|
||||
#include <stdio.h>
|
||||
#include <inttypes.h>
|
||||
#include <arch-eclair.h>
|
||||
|
||||
@ -73,6 +73,13 @@ e.g.: 10k means 10Kibyte, 100M 100Mibyte, 1G 1Gibyte
|
||||
Enable system call profiling. After the execution, profiling
|
||||
information may be obtained by the ihkosctl tool.
|
||||
|
||||
.TP
|
||||
.B -m N
|
||||
Specify the NUMA memory policy. In the case of Quadrant&Flat mode, NUMA node
|
||||
0 is CPU cores and NUMA node 1 is MCDRAM. Thus, option "-m 1"
|
||||
means that user's memory areas are assigned in MCDRAM.
|
||||
|
||||
|
||||
.TP
|
||||
.B --mpol-no-heap, --mpol-no-stack, --mpol-no-bss,
|
||||
Disregard NUMA memory policy in the heap/stack/BSS areas.
|
||||
@ -93,7 +100,7 @@ This option eliminates potential kernel resource contention by
|
||||
avoiding page faults in the shared memory region.
|
||||
|
||||
.TP
|
||||
.B -m N, --mpol-threshold=N
|
||||
.B -M N, --mpol-threshold=N
|
||||
Specify the threshold of memory size for respecting the memory
|
||||
allocation policy in NUMA machines. If the size of memory allocation
|
||||
is smaller than the one specified in this option, the memory area is
|
||||
|
||||
@ -221,6 +221,7 @@ static int mpol_no_stack = 0;
|
||||
static int mpol_no_bss = 0;
|
||||
static int mpol_shm_premap = 0;
|
||||
static int no_bind_ikc_map = 0;
|
||||
static int hfi1_enabled = 0;
|
||||
static unsigned long mpol_threshold = 0;
|
||||
static unsigned long heap_extension = (4*1024);
|
||||
static int profile = 0;
|
||||
@ -1653,6 +1654,8 @@ static void destroy_local_environ(char **local_env)
|
||||
unsigned long atobytes(char *string)
|
||||
{
|
||||
unsigned long mult = 1;
|
||||
unsigned long ret;
|
||||
char orig_postfix = 0;
|
||||
char *postfix;
|
||||
errno = ERANGE;
|
||||
|
||||
@ -1664,19 +1667,26 @@ unsigned long atobytes(char *string)
|
||||
|
||||
if (*postfix == 'k' || *postfix == 'K') {
|
||||
mult = 1024;
|
||||
orig_postfix = *postfix;
|
||||
*postfix = 0;
|
||||
}
|
||||
else if (*postfix == 'm' || *postfix == 'M') {
|
||||
mult = 1024 * 1024;
|
||||
orig_postfix = *postfix;
|
||||
*postfix = 0;
|
||||
}
|
||||
else if (*postfix == 'g' || *postfix == 'G') {
|
||||
mult = 1024 * 1024 * 1024;
|
||||
orig_postfix = *postfix;
|
||||
*postfix = 0;
|
||||
}
|
||||
|
||||
ret = atol(string) * mult;
|
||||
if (orig_postfix)
|
||||
*postfix = orig_postfix;
|
||||
|
||||
errno = 0;
|
||||
return atol(string) * mult;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct option mcexec_options[] = {
|
||||
@ -1744,6 +1754,12 @@ static struct option mcexec_options[] = {
|
||||
.flag = &disable_sched_yield,
|
||||
.val = 1,
|
||||
},
|
||||
{
|
||||
.name = "enable-hfi1",
|
||||
.has_arg = no_argument,
|
||||
.flag = &hfi1_enabled,
|
||||
.val = 1,
|
||||
},
|
||||
{
|
||||
.name = "extend-heap-by",
|
||||
.has_arg = required_argument,
|
||||
@ -2416,6 +2432,7 @@ int main(int argc, char **argv)
|
||||
struct get_cpu_set_arg cpu_set_arg;
|
||||
int mcexec_linux_numa = 0;
|
||||
int ikc_mapped = 0;
|
||||
int process_rank = -1;
|
||||
cpu_set_t mcexec_cpu_set;
|
||||
|
||||
CPU_ZERO(&mcexec_cpu_set);
|
||||
@ -2424,6 +2441,7 @@ int main(int argc, char **argv)
|
||||
cpu_set_arg.cpu_set_size = sizeof(desc->cpu_set);
|
||||
cpu_set_arg.nr_processes = nr_processes;
|
||||
cpu_set_arg.target_core = &target_core;
|
||||
cpu_set_arg.process_rank = &process_rank;
|
||||
cpu_set_arg.mcexec_linux_numa = &mcexec_linux_numa;
|
||||
cpu_set_arg.mcexec_cpu_set = &mcexec_cpu_set;
|
||||
cpu_set_arg.mcexec_cpu_set_size = sizeof(mcexec_cpu_set);
|
||||
@ -2436,6 +2454,7 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
desc->cpu = target_core;
|
||||
desc->process_rank = process_rank;
|
||||
|
||||
/* Bind to CPU cores where the LWK process' IKC target maps to */
|
||||
if (ikc_mapped && !no_bind_ikc_map) {
|
||||
@ -2523,6 +2542,11 @@ int main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
desc->mcexec_flags = 0;
|
||||
if (hfi1_enabled) {
|
||||
desc->mcexec_flags |= MCEXEC_HFI1;
|
||||
}
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) {
|
||||
perror("prepare");
|
||||
close(fd);
|
||||
@ -3308,7 +3332,6 @@ int main_loop(struct thread_data_s *my_thread)
|
||||
memset(&w, '\0', sizeof w);
|
||||
w.cpu = cpu;
|
||||
w.pid = getpid();
|
||||
|
||||
while (((ret = ioctl(fd, MCEXEC_UP_WAIT_SYSCALL, (unsigned long)&w)) == 0) || (ret == -1 && errno == EINTR)) {
|
||||
|
||||
if (ret) {
|
||||
@ -3499,6 +3522,7 @@ int main_loop(struct thread_data_s *my_thread)
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_TRANSFER, &trans) != 0) {
|
||||
fprintf(stderr, "__NR_gettid(): error transfering TIDs\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
free(tids);
|
||||
@ -4189,6 +4213,7 @@ return_execve2:
|
||||
}
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case 801: {// swapout
|
||||
#ifdef ENABLE_QLMPI
|
||||
int rc;
|
||||
@ -4378,6 +4403,11 @@ return_linux_spawn:
|
||||
break;
|
||||
}
|
||||
|
||||
case __NR_writev:
|
||||
ret = do_generic_syscall(&w);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
if (archdep_syscall(&w, &ret)) {
|
||||
ret = do_generic_syscall(&w);
|
||||
|
||||
@ -8,6 +8,7 @@ OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
|
||||
OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o
|
||||
OBJS += rbtree.o
|
||||
OBJS += pager.o
|
||||
OBJS += file_ops.o user_sdma.o sdma.o user_exp_rcv.o chip.o
|
||||
# POSTK_DEBUG_ARCH_DEP_18 coredump arch separation.
|
||||
DEPSRCS=$(wildcard $(SRC)/*.c)
|
||||
|
||||
|
||||
@ -68,6 +68,11 @@ static void ap_wait(void)
|
||||
init_host_ikc2mckernel();
|
||||
init_host_ikc2linux(ikc_cpu);
|
||||
mcs_lock_unlock_noirq(&ap_syscall_semaphore, &mcs_node);
|
||||
|
||||
{
|
||||
extern void hfi1_kmalloc_cache_prealloc(void);
|
||||
hfi1_kmalloc_cache_prealloc();
|
||||
}
|
||||
}
|
||||
|
||||
/* one of them listens */
|
||||
|
||||
126
kernel/chip.c
Normal file
126
kernel/chip.c
Normal file
@ -0,0 +1,126 @@
|
||||
/*
|
||||
* Copyright(c) 2015, 2016 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains all of the code that is specific to the HFI chip,
|
||||
* or what we use of them.
|
||||
*/
|
||||
|
||||
#include <hfi1/hfi.h>
|
||||
#include <hfi1/chip_registers.h>
|
||||
#include <hfi1/chip.h>
|
||||
|
||||
//#define DEBUG_PRINT_CHIP
|
||||
|
||||
#ifdef DEBUG_PRINT_CHIP
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...) do { if(0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
|
||||
/*
|
||||
* index is the index into the receive array
|
||||
*/
|
||||
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
|
||||
u32 type, unsigned long pa, u16 order)
|
||||
{
|
||||
u64 reg;
|
||||
void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc :
|
||||
(dd->kregbase1 + RCV_ARRAY));
|
||||
|
||||
if (!(dd->flags & HFI1_PRESENT))
|
||||
goto done;
|
||||
|
||||
if (type == PT_INVALID) {
|
||||
pa = 0;
|
||||
} else if (type > PT_INVALID) {
|
||||
kprintf("unexpected receive array type %u for index %u, not handled\n",
|
||||
type, index);
|
||||
goto done;
|
||||
}
|
||||
|
||||
#ifdef TIDRDMA_DEBUG
|
||||
hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx",
|
||||
pt_name(type), index, pa, (unsigned long)order);
|
||||
#endif
|
||||
|
||||
#define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */
|
||||
reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK
|
||||
| (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT
|
||||
| ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK)
|
||||
<< RCV_ARRAY_RT_ADDR_SHIFT;
|
||||
dkprintf("type %d, index 0x%x, pa 0x%lx, bsize 0x%lx, reg 0x%llx\n",
|
||||
type, index, pa, (unsigned long)order, reg);
|
||||
writeq(reg, base + (index * 8));
|
||||
|
||||
if (type == PT_EAGER)
|
||||
/*
|
||||
* Eager entries are written one-by-one so we have to push them
|
||||
* after we write the entry.
|
||||
*/
|
||||
flush_wc();
|
||||
done:
|
||||
return;
|
||||
}
|
||||
|
||||
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd)
|
||||
{
|
||||
struct hfi1_devdata *dd = rcd->dd;
|
||||
u32 i;
|
||||
|
||||
#if 0
|
||||
/* this could be optimized */
|
||||
for (i = rcd->eager_base; i < rcd->eager_base +
|
||||
rcd->egrbufs.alloced; i++)
|
||||
hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
|
||||
#endif
|
||||
for (i = rcd->expected_base;
|
||||
i < rcd->expected_base + rcd->expected_count; i++)
|
||||
hfi1_put_tid(dd, i, PT_INVALID, 0, 0);
|
||||
}
|
||||
|
||||
@ -5,7 +5,7 @@ PHDRS
|
||||
}
|
||||
SECTIONS
|
||||
{
|
||||
. = 0xffffffff80001000;
|
||||
. = 0xFFFFFFFFFE801000;
|
||||
_head = .;
|
||||
|
||||
.text : {
|
||||
|
||||
291
kernel/file_ops.c
Normal file
291
kernel/file_ops.c
Normal file
@ -0,0 +1,291 @@
|
||||
#include <hfi1/file_ops.h>
|
||||
#include <hfi1/hfi.h>
|
||||
#include <hfi1/user_sdma.h>
|
||||
#include <hfi1/sdma.h>
|
||||
#include <hfi1/ihk_hfi1_common.h>
|
||||
#include <hfi1/user_exp_rcv.h>
|
||||
#include <errno.h>
|
||||
|
||||
//#define DEBUG_PRINT_FOPS
|
||||
|
||||
#ifdef DEBUG_PRINT_FOPS
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
long hfi1_file_ioctl(void *private_data, unsigned int cmd,
|
||||
unsigned long arg, unsigned long t_s)
|
||||
{
|
||||
struct hfi1_filedata *fd = private_data;
|
||||
struct hfi1_ctxtdata *uctxt = fd->uctxt;
|
||||
struct hfi1_tid_info tinfo;
|
||||
unsigned long addr;
|
||||
int ret = -ENOTSUPP;
|
||||
|
||||
hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd);
|
||||
if (cmd != HFI1_IOCTL_ASSIGN_CTXT &&
|
||||
cmd != HFI1_IOCTL_GET_VERS &&
|
||||
!uctxt)
|
||||
return -EINVAL;
|
||||
|
||||
switch (cmd) {
|
||||
case HFI1_IOCTL_ASSIGN_CTXT:
|
||||
#if 0
|
||||
if (uctxt)
|
||||
return -EINVAL;
|
||||
|
||||
if (copy_from_user(&uinfo,
|
||||
(struct hfi1_user_info __user *)arg,
|
||||
sizeof(uinfo)))
|
||||
return -EFAULT;
|
||||
|
||||
ret = assign_ctxt(fp, &uinfo);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
ret = setup_ctxt(fp);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = user_init(fp);
|
||||
#endif
|
||||
dkprintf("%s: HFI1_IOCTL_ASSIGN_CTXT \n", __FUNCTION__);
|
||||
break;
|
||||
case HFI1_IOCTL_CTXT_INFO:
|
||||
#if 0
|
||||
ret = get_ctxt_info(fp, (void __user *)(unsigned long)arg,
|
||||
sizeof(struct hfi1_ctxt_info));
|
||||
#endif
|
||||
dkprintf("%s: HFI1_IOCTL_CTXT_INFO \n", __FUNCTION__);
|
||||
break;
|
||||
case HFI1_IOCTL_USER_INFO:
|
||||
#if 0
|
||||
ret = get_base_info(fp, (void __user *)(unsigned long)arg,
|
||||
sizeof(struct hfi1_base_info));
|
||||
#endif
|
||||
dkprintf("%s: HFI1_IOCTL_USER_INFO \n", __FUNCTION__);
|
||||
break;
|
||||
case HFI1_IOCTL_CREDIT_UPD:
|
||||
#if 0
|
||||
if (uctxt)
|
||||
sc_return_credits(uctxt->sc);
|
||||
#endif
|
||||
dkprintf("%s: HFI1_IOCTL_CREDIT_UPD \n", __FUNCTION__);
|
||||
break;
|
||||
|
||||
case HFI1_IOCTL_TID_UPDATE:
|
||||
dkprintf("%s: HFI1_IOCTL_TID_UPDATE \n", __FUNCTION__);
|
||||
if (copy_from_user(&tinfo,
|
||||
(struct hfi11_tid_info __user *)arg,
|
||||
sizeof(tinfo)))
|
||||
return -EFAULT;
|
||||
|
||||
ret = hfi1_user_exp_rcv_setup(fd, &tinfo);
|
||||
if (!ret) {
|
||||
/*
|
||||
* Copy the number of tidlist entries we used
|
||||
* and the length of the buffer we registered.
|
||||
* These fields are adjacent in the structure so
|
||||
* we can copy them at the same time.
|
||||
*/
|
||||
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
|
||||
if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
|
||||
sizeof(tinfo.tidcnt) +
|
||||
sizeof(tinfo.length)))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
break;
|
||||
|
||||
case HFI1_IOCTL_TID_FREE:
|
||||
dkprintf("%s: HFI1_IOCTL_TID_FREE \n", __FUNCTION__);
|
||||
if (copy_from_user(&tinfo,
|
||||
(struct hfi11_tid_info __user *)arg,
|
||||
sizeof(tinfo)))
|
||||
return -EFAULT;
|
||||
|
||||
ret = hfi1_user_exp_rcv_clear(fd, &tinfo);
|
||||
if (ret)
|
||||
break;
|
||||
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
|
||||
if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
|
||||
sizeof(tinfo.tidcnt)))
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
|
||||
case HFI1_IOCTL_TID_INVAL_READ:
|
||||
dkprintf("%s: HFI1_IOCTL_TID_INVAL_READ \n", __FUNCTION__);
|
||||
if (copy_from_user(&tinfo,
|
||||
(struct hfi11_tid_info __user *)arg,
|
||||
sizeof(tinfo)))
|
||||
return -EFAULT;
|
||||
|
||||
ret = hfi1_user_exp_rcv_invalid(fd, &tinfo);
|
||||
if (ret)
|
||||
break;
|
||||
addr = arg + offsetof(struct hfi1_tid_info, tidcnt);
|
||||
if (copy_to_user((void __user *)addr, &tinfo.tidcnt,
|
||||
sizeof(tinfo.tidcnt)))
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
|
||||
case HFI1_IOCTL_RECV_CTRL:
|
||||
#if 0
|
||||
ret = get_user(uval, (int __user *)arg);
|
||||
if (ret != 0)
|
||||
return -EFAULT;
|
||||
ret = manage_rcvq(uctxt, fd->subctxt, uval);
|
||||
#endif
|
||||
dkprintf("%s: HFI1_IOCTL_RECV_CTRL \n", __FUNCTION__);
|
||||
break;
|
||||
|
||||
case HFI1_IOCTL_POLL_TYPE:
|
||||
#if 0
|
||||
ret = get_user(uval, (int __user *)arg);
|
||||
if (ret != 0)
|
||||
return -EFAULT;
|
||||
uctxt->poll_type = (typeof(uctxt->poll_type))uval;
|
||||
#endif
|
||||
dkprintf("%s: HFI1_IOCTL_POLL_TYPE \n", __FUNCTION__);
|
||||
break;
|
||||
|
||||
case HFI1_IOCTL_ACK_EVENT:
|
||||
#if 0
|
||||
ret = get_user(ul_uval, (unsigned long __user *)arg);
|
||||
if (ret != 0)
|
||||
return -EFAULT;
|
||||
ret = user_event_ack(uctxt, fd->subctxt, ul_uval);
|
||||
#endif
|
||||
dkprintf("%s: HFI1_IOCTL_ACK_EVENT \n", __FUNCTION__);
|
||||
break;
|
||||
|
||||
case HFI1_IOCTL_SET_PKEY:
|
||||
#if 0
|
||||
ret = get_user(uval16, (u16 __user *)arg);
|
||||
if (ret != 0)
|
||||
return -EFAULT;
|
||||
if (HFI1_CAP_IS_USET(PKEY_CHECK))
|
||||
ret = set_ctxt_pkey(uctxt, fd->subctxt, uval16);
|
||||
else
|
||||
return -EPERM;
|
||||
#endif
|
||||
ret = -ENODEV;
|
||||
dkprintf("%s: HFI1_IOCTL_SET_PKEY \n", __FUNCTION__);
|
||||
break;
|
||||
|
||||
case HFI1_IOCTL_CTXT_RESET: {
|
||||
#if 0
|
||||
struct send_context *sc;
|
||||
struct hfi1_devdata *dd;
|
||||
|
||||
if (!uctxt || !uctxt->dd || !uctxt->sc)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* There is no protection here. User level has to
|
||||
* guarantee that no one will be writing to the send
|
||||
* context while it is being re-initialized.
|
||||
* If user level breaks that guarantee, it will break
|
||||
* it's own context and no one else's.
|
||||
*/
|
||||
dd = uctxt->dd;
|
||||
sc = uctxt->sc;
|
||||
/*
|
||||
* Wait until the interrupt handler has marked the
|
||||
* context as halted or frozen. Report error if we time
|
||||
* out.
|
||||
*/
|
||||
wait_event_interruptible_timeout(
|
||||
sc->halt_wait, (sc->flags & SCF_HALTED),
|
||||
msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
|
||||
if (!(sc->flags & SCF_HALTED))
|
||||
return -ENOLCK;
|
||||
|
||||
/*
|
||||
* If the send context was halted due to a Freeze,
|
||||
* wait until the device has been "unfrozen" before
|
||||
* resetting the context.
|
||||
*/
|
||||
if (sc->flags & SCF_FROZEN) {
|
||||
wait_event_interruptible_timeout(
|
||||
dd->event_queue,
|
||||
!(ACCESS_ONCE(dd->flags) & HFI1_FROZEN),
|
||||
msecs_to_jiffies(SEND_CTXT_HALT_TIMEOUT));
|
||||
if (dd->flags & HFI1_FROZEN)
|
||||
return -ENOLCK;
|
||||
|
||||
if (dd->flags & HFI1_FORCED_FREEZE)
|
||||
/*
|
||||
* Don't allow context reset if we are into
|
||||
* forced freeze
|
||||
*/
|
||||
return -ENODEV;
|
||||
|
||||
sc_disable(sc);
|
||||
ret = sc_enable(sc);
|
||||
hfi1_rcvctrl(dd, HFI1_RCVCTRL_CTXT_ENB,
|
||||
uctxt->ctxt);
|
||||
} else {
|
||||
ret = sc_restart(sc);
|
||||
}
|
||||
if (!ret)
|
||||
sc_return_credits(sc);
|
||||
break;
|
||||
#endif
|
||||
dkprintf("%s: HFI1_IOCTL_CTXT_RESET \n", __FUNCTION__);
|
||||
break;
|
||||
}
|
||||
|
||||
case HFI1_IOCTL_GET_VERS:
|
||||
#if 0
|
||||
uval = HFI1_USER_SWVERSION;
|
||||
if (put_user(uval, (int __user *)arg))
|
||||
return -EFAULT;
|
||||
#endif
|
||||
dkprintf("%s: HFI1_IOCTL_GET_VERS \n", __FUNCTION__);
|
||||
break;
|
||||
|
||||
default:
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t hfi1_aio_write(void *private_data, const struct iovec *iovec, unsigned long dim)
|
||||
{
|
||||
struct hfi1_filedata *fd = private_data;
|
||||
struct hfi1_user_sdma_pkt_q *pq = fd->pq;
|
||||
struct hfi1_user_sdma_comp_q *cq = fd->cq;
|
||||
int done = 0, reqs = 0;
|
||||
|
||||
if (!cq || !pq)
|
||||
return -EIO;
|
||||
|
||||
if (!dim)
|
||||
return -EINVAL;
|
||||
|
||||
hfi1_cdbg(SDMA, "SDMA request from %u:%u (%lu)",
|
||||
fd->uctxt->ctxt, fd->subctxt, dim);
|
||||
|
||||
if (atomic_read(&pq->n_reqs) == pq->n_max_reqs)
|
||||
return -ENOSPC;
|
||||
|
||||
while (dim) {
|
||||
int ret;
|
||||
unsigned long count = 0;
|
||||
|
||||
ret = hfi1_user_sdma_process_request(
|
||||
private_data, (struct iovec *)(iovec + done),
|
||||
dim, &count);
|
||||
if (ret) {
|
||||
reqs = ret;
|
||||
break;
|
||||
}
|
||||
dim -= count;
|
||||
done += count;
|
||||
reqs++;
|
||||
}
|
||||
|
||||
return reqs;
|
||||
}
|
||||
|
||||
@ -479,9 +479,11 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
proc->sgid = pn->cred[6];
|
||||
proc->fsgid = pn->cred[7];
|
||||
proc->termsig = SIGCHLD;
|
||||
proc->mcexec_flags = pn->mcexec_flags;
|
||||
proc->mpol_flags = pn->mpol_flags;
|
||||
proc->mpol_threshold = pn->mpol_threshold;
|
||||
proc->nr_processes = pn->nr_processes;
|
||||
proc->process_rank = pn->process_rank;
|
||||
proc->heap_extension = pn->heap_extension;
|
||||
|
||||
/* Update NUMA binding policy if requested */
|
||||
|
||||
@ -19,10 +19,17 @@
|
||||
* CPU Local Storage (cls)
|
||||
*/
|
||||
|
||||
struct kmalloc_cache_header {
|
||||
struct kmalloc_cache_header *next;
|
||||
};
|
||||
|
||||
struct kmalloc_header {
|
||||
unsigned int front_magic;
|
||||
unsigned int cpu_id;
|
||||
struct list_head list;
|
||||
int cpu_id;
|
||||
union {
|
||||
struct list_head list;
|
||||
struct kmalloc_cache_header *cache;
|
||||
};
|
||||
int size; /* The size of this chunk without the header */
|
||||
unsigned int end_magic;
|
||||
/* 32 bytes */
|
||||
@ -99,6 +106,12 @@ struct cpu_local_var {
|
||||
struct list_head smp_func_req_list;
|
||||
|
||||
struct process_vm *on_fork_vm;
|
||||
|
||||
/* HFI1 related per-core kmalloc caches */
|
||||
struct kmalloc_cache_header txreq_cache;
|
||||
struct kmalloc_cache_header tids_cache;
|
||||
struct kmalloc_cache_header tidlist_cache;
|
||||
struct kmalloc_cache_header tid_node_cache;
|
||||
} __attribute__((aligned(64)));
|
||||
|
||||
|
||||
|
||||
60
kernel/include/hfi1/chip.h
Normal file
60
kernel/include/hfi1/chip.h
Normal file
@ -0,0 +1,60 @@
|
||||
#ifndef _CHIP_H
|
||||
#define _CHIP_H
|
||||
/*
|
||||
* Copyright(c) 2015, 2016 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains all of the defines that is specific to the HFI chip
|
||||
*/
|
||||
|
||||
#define MAX_EXPECTED_BUFFER (2048 * 1024)
|
||||
|
||||
void hfi1_put_tid(struct hfi1_devdata *dd, u32 index,
|
||||
u32 type, unsigned long pa, u16 order);
|
||||
void hfi1_clear_tids(struct hfi1_ctxtdata *rcd);
|
||||
|
||||
#endif /* _CHIP_H */
|
||||
64
kernel/include/hfi1/chip_registers.h
Normal file
64
kernel/include/hfi1/chip_registers.h
Normal file
@ -0,0 +1,64 @@
|
||||
#ifndef DEF_CHIP_REG
|
||||
#define DEF_CHIP_REG
|
||||
|
||||
/*
|
||||
* Copyright(c) 2015, 2016 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#define CORE 0x000000000000
|
||||
|
||||
|
||||
#define RXE (CORE + 0x000001000000)
|
||||
|
||||
|
||||
#define RCV_ARRAY (RXE + 0x000000200000)
|
||||
#define RCV_ARRAY_CNT (RXE + 0x000000000018)
|
||||
#define RCV_ARRAY_RT_ADDR_MASK 0xFFFFFFFFFull
|
||||
#define RCV_ARRAY_RT_ADDR_SHIFT 0
|
||||
#define RCV_ARRAY_RT_BUF_SIZE_SHIFT 36
|
||||
#define RCV_ARRAY_RT_WRITE_ENABLE_SMASK 0x8000000000000000ull
|
||||
|
||||
#endif /* DEF_CHIP_REG */
|
||||
411
kernel/include/hfi1/common.h
Normal file
411
kernel/include/hfi1/common.h
Normal file
@ -0,0 +1,411 @@
|
||||
/*
|
||||
* Copyright(c) 2015, 2016 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _COMMON_H
|
||||
#define _COMMON_H
|
||||
|
||||
#ifdef __HFI1_ORIG__
|
||||
#include "update/hfi1_user.h"
|
||||
#else
|
||||
#include <hfi1/hfi1_user.h>
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
|
||||
/*
|
||||
* This file contains defines, structures, etc. that are used
|
||||
* to communicate between kernel and user code.
|
||||
*/
|
||||
|
||||
/* version of protocol header (known to chip also). In the long run,
|
||||
* we should be able to generate and accept a range of version numbers;
|
||||
* for now we only accept one, and it's compiled in.
|
||||
*/
|
||||
#define IPS_PROTO_VERSION 2
|
||||
|
||||
/*
|
||||
* These are compile time constants that you may want to enable or disable
|
||||
* if you are trying to debug problems with code or performance.
|
||||
* HFI1_VERBOSE_TRACING define as 1 if you want additional tracing in
|
||||
* fast path code
|
||||
* HFI1_TRACE_REGWRITES define as 1 if you want register writes to be
|
||||
* traced in fast path code
|
||||
* _HFI1_TRACING define as 0 if you want to remove all tracing in a
|
||||
* compilation unit
|
||||
*/
|
||||
|
||||
/*
|
||||
* If a packet's QP[23:16] bits match this value, then it is
|
||||
* a PSM packet and the hardware will expect a KDETH header
|
||||
* following the BTH.
|
||||
*/
|
||||
#define DEFAULT_KDETH_QP 0x80
|
||||
|
||||
/* driver/hw feature set bitmask */
|
||||
#define HFI1_CAP_USER_SHIFT 24
|
||||
#define HFI1_CAP_MASK ((1UL << HFI1_CAP_USER_SHIFT) - 1)
|
||||
/* locked flag - if set, only HFI1_CAP_WRITABLE_MASK bits can be set */
|
||||
#define HFI1_CAP_LOCKED_SHIFT 63
|
||||
#define HFI1_CAP_LOCKED_MASK 0x1ULL
|
||||
#define HFI1_CAP_LOCKED_SMASK (HFI1_CAP_LOCKED_MASK << HFI1_CAP_LOCKED_SHIFT)
|
||||
/* extra bits used between kernel and user processes */
|
||||
#define HFI1_CAP_MISC_SHIFT (HFI1_CAP_USER_SHIFT * 2)
|
||||
#define HFI1_CAP_MISC_MASK ((1ULL << (HFI1_CAP_LOCKED_SHIFT - \
|
||||
HFI1_CAP_MISC_SHIFT)) - 1)
|
||||
|
||||
#define HFI1_CAP_KSET(cap) ({ hfi1_cap_mask |= HFI1_CAP_##cap; hfi1_cap_mask; })
|
||||
#define HFI1_CAP_KCLEAR(cap) \
|
||||
({ \
|
||||
hfi1_cap_mask &= ~HFI1_CAP_##cap; \
|
||||
hfi1_cap_mask; \
|
||||
})
|
||||
#define HFI1_CAP_USET(cap) \
|
||||
({ \
|
||||
hfi1_cap_mask |= (HFI1_CAP_##cap << HFI1_CAP_USER_SHIFT); \
|
||||
hfi1_cap_mask; \
|
||||
})
|
||||
#define HFI1_CAP_UCLEAR(cap) \
|
||||
({ \
|
||||
hfi1_cap_mask &= ~(HFI1_CAP_##cap << HFI1_CAP_USER_SHIFT); \
|
||||
hfi1_cap_mask; \
|
||||
})
|
||||
#define HFI1_CAP_SET(cap) \
|
||||
({ \
|
||||
hfi1_cap_mask |= (HFI1_CAP_##cap | (HFI1_CAP_##cap << \
|
||||
HFI1_CAP_USER_SHIFT)); \
|
||||
hfi1_cap_mask; \
|
||||
})
|
||||
#define HFI1_CAP_CLEAR(cap) \
|
||||
({ \
|
||||
hfi1_cap_mask &= ~(HFI1_CAP_##cap | \
|
||||
(HFI1_CAP_##cap << HFI1_CAP_USER_SHIFT)); \
|
||||
hfi1_cap_mask; \
|
||||
})
|
||||
#define HFI1_CAP_LOCK() \
|
||||
({ hfi1_cap_mask |= HFI1_CAP_LOCKED_SMASK; hfi1_cap_mask; })
|
||||
#define HFI1_CAP_LOCKED() (!!(hfi1_cap_mask & HFI1_CAP_LOCKED_SMASK))
|
||||
/*
|
||||
* The set of capability bits that can be changed after initial load
|
||||
* This set is the same for kernel and user contexts. However, for
|
||||
* user contexts, the set can be further filtered by using the
|
||||
* HFI1_CAP_RESERVED_MASK bits.
|
||||
*/
|
||||
#define HFI1_CAP_WRITABLE_MASK (HFI1_CAP_SDMA_AHG | \
|
||||
HFI1_CAP_HDRSUPP | \
|
||||
HFI1_CAP_MULTI_PKT_EGR | \
|
||||
HFI1_CAP_NODROP_RHQ_FULL | \
|
||||
HFI1_CAP_NODROP_EGR_FULL | \
|
||||
HFI1_CAP_ALLOW_PERM_JKEY | \
|
||||
HFI1_CAP_STATIC_RATE_CTRL | \
|
||||
HFI1_CAP_PRINT_UNIMPL | \
|
||||
HFI1_CAP_TID_UNMAP | \
|
||||
HFI1_CAP_OPFN | \
|
||||
HFI1_CAP_TID_RDMA)
|
||||
/*
|
||||
* A set of capability bits that are "global" and are not allowed to be
|
||||
* set in the user bitmask.
|
||||
*/
|
||||
#define HFI1_CAP_RESERVED_MASK ((HFI1_CAP_SDMA | \
|
||||
HFI1_CAP_USE_SDMA_HEAD | \
|
||||
HFI1_CAP_EXTENDED_PSN | \
|
||||
HFI1_CAP_PRINT_UNIMPL | \
|
||||
HFI1_CAP_NO_INTEGRITY | \
|
||||
HFI1_CAP_PKEY_CHECK | \
|
||||
HFI1_CAP_TID_RDMA | \
|
||||
HFI1_CAP_OPFN) << \
|
||||
HFI1_CAP_USER_SHIFT)
|
||||
/*
|
||||
* Set of capabilities that need to be enabled for kernel context in
|
||||
* order to be allowed for user contexts, as well.
|
||||
*/
|
||||
#define HFI1_CAP_MUST_HAVE_KERN (HFI1_CAP_STATIC_RATE_CTRL)
|
||||
/* Default enabled capabilities (both kernel and user) */
|
||||
#define HFI1_CAP_MASK_DEFAULT (HFI1_CAP_HDRSUPP | \
|
||||
HFI1_CAP_NODROP_RHQ_FULL | \
|
||||
HFI1_CAP_NODROP_EGR_FULL | \
|
||||
HFI1_CAP_SDMA | \
|
||||
HFI1_CAP_PRINT_UNIMPL | \
|
||||
HFI1_CAP_STATIC_RATE_CTRL | \
|
||||
HFI1_CAP_PKEY_CHECK | \
|
||||
HFI1_CAP_MULTI_PKT_EGR | \
|
||||
HFI1_CAP_EXTENDED_PSN | \
|
||||
((HFI1_CAP_HDRSUPP | \
|
||||
HFI1_CAP_MULTI_PKT_EGR | \
|
||||
HFI1_CAP_STATIC_RATE_CTRL | \
|
||||
HFI1_CAP_PKEY_CHECK | \
|
||||
HFI1_CAP_EARLY_CREDIT_RETURN) << \
|
||||
HFI1_CAP_USER_SHIFT))
|
||||
/*
|
||||
* A bitmask of kernel/global capabilities that should be communicated
|
||||
* to user level processes.
|
||||
*/
|
||||
#define HFI1_CAP_K2U (HFI1_CAP_SDMA | \
|
||||
HFI1_CAP_EXTENDED_PSN | \
|
||||
HFI1_CAP_PKEY_CHECK | \
|
||||
HFI1_CAP_NO_INTEGRITY)
|
||||
|
||||
#define HFI1_USER_SWVERSION ((HFI1_USER_SWMAJOR << HFI1_SWMAJOR_SHIFT) | \
|
||||
HFI1_USER_SWMINOR)
|
||||
|
||||
#ifndef HFI1_KERN_TYPE
|
||||
#define HFI1_KERN_TYPE 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Similarly, this is the kernel version going back to the user. It's
|
||||
* slightly different, in that we want to tell if the driver was built as
|
||||
* part of a Intel release, or from the driver from openfabrics.org,
|
||||
* kernel.org, or a standard distribution, for support reasons.
|
||||
* The high bit is 0 for non-Intel and 1 for Intel-built/supplied.
|
||||
*
|
||||
* It's returned by the driver to the user code during initialization in the
|
||||
* spi_sw_version field of hfi1_base_info, so the user code can in turn
|
||||
* check for compatibility with the kernel.
|
||||
*/
|
||||
#define HFI1_KERN_SWVERSION ((HFI1_KERN_TYPE << 31) | HFI1_USER_SWVERSION)
|
||||
|
||||
/*
|
||||
* Define the driver version number. This is something that refers only
|
||||
* to the driver itself, not the software interfaces it supports.
|
||||
*/
|
||||
#ifndef HFI1_DRIVER_VERSION_BASE
|
||||
#define HFI1_DRIVER_VERSION_BASE "0.9-294"
|
||||
#endif
|
||||
|
||||
/* create the final driver version string */
|
||||
#ifdef HFI1_IDSTR
|
||||
#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE " " HFI1_IDSTR
|
||||
#else
|
||||
#define HFI1_DRIVER_VERSION HFI1_DRIVER_VERSION_BASE
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Diagnostics can send a packet by writing the following
|
||||
* struct to the diag packet special file.
|
||||
*
|
||||
* This allows a custom PBC qword, so that special modes and deliberate
|
||||
* changes to CRCs can be used.
|
||||
*/
|
||||
#define _DIAG_PKT_VERS 1
|
||||
struct diag_pkt {
|
||||
__u16 version; /* structure version */
|
||||
__u16 unit; /* which device */
|
||||
__u16 sw_index; /* send sw index to use */
|
||||
__u16 len; /* data length, in bytes */
|
||||
__u16 port; /* port number */
|
||||
__u16 unused;
|
||||
__u32 flags; /* call flags */
|
||||
__u64 data; /* user data pointer */
|
||||
__u64 pbc; /* PBC for the packet */
|
||||
};
|
||||
|
||||
/* diag_pkt flags */
|
||||
#define F_DIAGPKT_WAIT 0x1 /* wait until packet is sent */
|
||||
|
||||
/*
|
||||
* The next set of defines are for packet headers, and chip register
|
||||
* and memory bits that are visible to and/or used by user-mode software.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Receive Header Flags
|
||||
*/
|
||||
#define RHF_PKT_LEN_SHIFT 0
|
||||
#define RHF_PKT_LEN_MASK 0xfffull
|
||||
#define RHF_PKT_LEN_SMASK (RHF_PKT_LEN_MASK << RHF_PKT_LEN_SHIFT)
|
||||
|
||||
#define RHF_RCV_TYPE_SHIFT 12
|
||||
#define RHF_RCV_TYPE_MASK 0x7ull
|
||||
#define RHF_RCV_TYPE_SMASK (RHF_RCV_TYPE_MASK << RHF_RCV_TYPE_SHIFT)
|
||||
|
||||
#define RHF_USE_EGR_BFR_SHIFT 15
|
||||
#define RHF_USE_EGR_BFR_MASK 0x1ull
|
||||
#define RHF_USE_EGR_BFR_SMASK (RHF_USE_EGR_BFR_MASK << RHF_USE_EGR_BFR_SHIFT)
|
||||
|
||||
#define RHF_EGR_INDEX_SHIFT 16
|
||||
#define RHF_EGR_INDEX_MASK 0x7ffull
|
||||
#define RHF_EGR_INDEX_SMASK (RHF_EGR_INDEX_MASK << RHF_EGR_INDEX_SHIFT)
|
||||
|
||||
#define RHF_DC_INFO_SHIFT 27
|
||||
#define RHF_DC_INFO_MASK 0x1ull
|
||||
#define RHF_DC_INFO_SMASK (RHF_DC_INFO_MASK << RHF_DC_INFO_SHIFT)
|
||||
|
||||
#define RHF_RCV_SEQ_SHIFT 28
|
||||
#define RHF_RCV_SEQ_MASK 0xfull
|
||||
#define RHF_RCV_SEQ_SMASK (RHF_RCV_SEQ_MASK << RHF_RCV_SEQ_SHIFT)
|
||||
|
||||
#define RHF_EGR_OFFSET_SHIFT 32
|
||||
#define RHF_EGR_OFFSET_MASK 0xfffull
|
||||
#define RHF_EGR_OFFSET_SMASK (RHF_EGR_OFFSET_MASK << RHF_EGR_OFFSET_SHIFT)
|
||||
#define RHF_HDRQ_OFFSET_SHIFT 44
|
||||
#define RHF_HDRQ_OFFSET_MASK 0x1ffull
|
||||
#define RHF_HDRQ_OFFSET_SMASK (RHF_HDRQ_OFFSET_MASK << RHF_HDRQ_OFFSET_SHIFT)
|
||||
#define RHF_K_HDR_LEN_ERR (0x1ull << 53)
|
||||
#define RHF_DC_UNC_ERR (0x1ull << 54)
|
||||
#define RHF_DC_ERR (0x1ull << 55)
|
||||
#define RHF_RCV_TYPE_ERR_SHIFT 56
|
||||
#define RHF_RCV_TYPE_ERR_MASK 0x7ul
|
||||
#define RHF_RCV_TYPE_ERR_SMASK (RHF_RCV_TYPE_ERR_MASK << RHF_RCV_TYPE_ERR_SHIFT)
|
||||
#define RHF_TID_ERR (0x1ull << 59)
|
||||
#define RHF_LEN_ERR (0x1ull << 60)
|
||||
#define RHF_ECC_ERR (0x1ull << 61)
|
||||
#define RHF_VCRC_ERR (0x1ull << 62)
|
||||
#define RHF_ICRC_ERR (0x1ull << 63)
|
||||
|
||||
#define RHF_ERROR_SMASK 0xffe0000000000000ull /* bits 63:53 */
|
||||
|
||||
/* RHF receive types */
|
||||
#define RHF_RCV_TYPE_EXPECTED 0
|
||||
#define RHF_RCV_TYPE_EAGER 1
|
||||
#define RHF_RCV_TYPE_IB 2 /* normal IB, IB Raw, or IPv6 */
|
||||
#define RHF_RCV_TYPE_ERROR 3
|
||||
#define RHF_RCV_TYPE_BYPASS 4
|
||||
#define RHF_RCV_TYPE_INVALID5 5
|
||||
#define RHF_RCV_TYPE_INVALID6 6
|
||||
#define RHF_RCV_TYPE_INVALID7 7
|
||||
|
||||
/* RHF receive type error - expected packet errors */
|
||||
#define RHF_RTE_EXPECTED_FLOW_SEQ_ERR 0x2
|
||||
#define RHF_RTE_EXPECTED_FLOW_GEN_ERR 0x4
|
||||
|
||||
/* RHF receive type error - eager packet errors */
|
||||
#define RHF_RTE_EAGER_NO_ERR 0x0
|
||||
|
||||
/* RHF receive type error - IB packet errors */
|
||||
#define RHF_RTE_IB_NO_ERR 0x0
|
||||
|
||||
/* RHF receive type error - error packet errors */
|
||||
#define RHF_RTE_ERROR_NO_ERR 0x0
|
||||
#define RHF_RTE_ERROR_OP_CODE_ERR 0x1
|
||||
#define RHF_RTE_ERROR_KHDR_MIN_LEN_ERR 0x2
|
||||
#define RHF_RTE_ERROR_KHDR_HCRC_ERR 0x3
|
||||
#define RHF_RTE_ERROR_KHDR_KVER_ERR 0x4
|
||||
#define RHF_RTE_ERROR_CONTEXT_ERR 0x5
|
||||
#define RHF_RTE_ERROR_KHDR_TID_ERR 0x6
|
||||
|
||||
/* RHF receive type error - bypass packet errors */
|
||||
#define RHF_RTE_BYPASS_NO_ERR 0x0
|
||||
|
||||
/* IB - LRH header constants */
|
||||
#define HFI1_LRH_GRH 0x0003 /* 1. word of IB LRH - next header: GRH */
|
||||
#define HFI1_LRH_BTH 0x0002 /* 1. word of IB LRH - next header: BTH */
|
||||
|
||||
/* misc. */
|
||||
#define SIZE_OF_CRC 1
|
||||
|
||||
#define LIM_MGMT_P_KEY 0x7FFF
|
||||
#define FULL_MGMT_P_KEY 0xFFFF
|
||||
|
||||
#define DEFAULT_P_KEY LIM_MGMT_P_KEY
|
||||
#define HFI1_FECN_SHIFT 31
|
||||
#define HFI1_FECN_MASK 1
|
||||
#define HFI1_FECN_SMASK BIT(HFI1_FECN_SHIFT)
|
||||
#define HFI1_BECN_SHIFT 30
|
||||
#define HFI1_BECN_MASK 1
|
||||
#define HFI1_BECN_SMASK BIT(HFI1_BECN_SHIFT)
|
||||
|
||||
#define HFI1_PSM_IOC_BASE_SEQ 0x0
|
||||
|
||||
/* Number of BTH.PSN bits used for sequence number in expected rcvs */
|
||||
#define HFI1_KDETH_BTH_SEQ_SHIFT 11
|
||||
#define HFI1_KDETH_BTH_SEQ_MASK (BIT(HFI1_KDETH_BTH_SEQ_SHIFT) - 1)
|
||||
|
||||
static inline __u64 rhf_to_cpu(const __le32 *rbuf)
|
||||
{
|
||||
return __le64_to_cpu(*((__le64 *)rbuf));
|
||||
}
|
||||
|
||||
static inline u64 rhf_err_flags(u64 rhf)
|
||||
{
|
||||
return rhf & RHF_ERROR_SMASK;
|
||||
}
|
||||
|
||||
static inline u32 rhf_rcv_type(u64 rhf)
|
||||
{
|
||||
return (rhf >> RHF_RCV_TYPE_SHIFT) & RHF_RCV_TYPE_MASK;
|
||||
}
|
||||
|
||||
static inline u32 rhf_rcv_type_err(u64 rhf)
|
||||
{
|
||||
return (rhf >> RHF_RCV_TYPE_ERR_SHIFT) & RHF_RCV_TYPE_ERR_MASK;
|
||||
}
|
||||
|
||||
/* return size is in bytes, not DWORDs */
|
||||
static inline u32 rhf_pkt_len(u64 rhf)
|
||||
{
|
||||
return ((rhf & RHF_PKT_LEN_SMASK) >> RHF_PKT_LEN_SHIFT) << 2;
|
||||
}
|
||||
|
||||
static inline u32 rhf_egr_index(u64 rhf)
|
||||
{
|
||||
return (rhf >> RHF_EGR_INDEX_SHIFT) & RHF_EGR_INDEX_MASK;
|
||||
}
|
||||
|
||||
static inline u32 rhf_rcv_seq(u64 rhf)
|
||||
{
|
||||
return (rhf >> RHF_RCV_SEQ_SHIFT) & RHF_RCV_SEQ_MASK;
|
||||
}
|
||||
|
||||
/* returned offset is in DWORDS */
|
||||
static inline u32 rhf_hdrq_offset(u64 rhf)
|
||||
{
|
||||
return (rhf >> RHF_HDRQ_OFFSET_SHIFT) & RHF_HDRQ_OFFSET_MASK;
|
||||
}
|
||||
|
||||
static inline u64 rhf_use_egr_bfr(u64 rhf)
|
||||
{
|
||||
return rhf & RHF_USE_EGR_BFR_SMASK;
|
||||
}
|
||||
|
||||
static inline u64 rhf_dc_info(u64 rhf)
|
||||
{
|
||||
return rhf & RHF_DC_INFO_SMASK;
|
||||
}
|
||||
|
||||
static inline u32 rhf_egr_buf_offset(u64 rhf)
|
||||
{
|
||||
return (rhf >> RHF_EGR_OFFSET_SHIFT) & RHF_EGR_OFFSET_MASK;
|
||||
}
|
||||
#endif /* _COMMON_H */
|
||||
9
kernel/include/hfi1/file_ops.h
Normal file
9
kernel/include/hfi1/file_ops.h
Normal file
@ -0,0 +1,9 @@
|
||||
#ifndef _HFI1_FILE_OPS_H_
|
||||
#define _HFI1_FILE_OPS_H_
|
||||
|
||||
#include <ihk/types.h>
|
||||
#include <uio.h>
|
||||
|
||||
ssize_t hfi1_aio_write(void *private_data, const struct iovec *iovec, unsigned long dim);
|
||||
|
||||
#endif
|
||||
1232
kernel/include/hfi1/hfi.h
Normal file
1232
kernel/include/hfi1/hfi.h
Normal file
File diff suppressed because it is too large
Load Diff
41
kernel/include/hfi1/hfi1_generated_ctxtdata.h
Normal file
41
kernel/include/hfi1/hfi1_generated_ctxtdata.h
Normal file
@ -0,0 +1,41 @@
|
||||
struct hfi1_ctxtdata {
|
||||
union {
|
||||
char whole_struct[1160];
|
||||
struct {
|
||||
char padding0[144];
|
||||
u16 ctxt;
|
||||
};
|
||||
struct {
|
||||
char padding1[168];
|
||||
u32 rcv_array_groups;
|
||||
};
|
||||
struct {
|
||||
char padding2[172];
|
||||
u32 eager_base;
|
||||
};
|
||||
struct {
|
||||
char padding3[176];
|
||||
u32 expected_count;
|
||||
};
|
||||
struct {
|
||||
char padding4[180];
|
||||
u32 expected_base;
|
||||
};
|
||||
struct {
|
||||
char padding5[184];
|
||||
struct exp_tid_set tid_group_list;
|
||||
};
|
||||
struct {
|
||||
char padding6[208];
|
||||
struct exp_tid_set tid_used_list;
|
||||
};
|
||||
struct {
|
||||
char padding7[232];
|
||||
struct exp_tid_set tid_full_list;
|
||||
};
|
||||
struct {
|
||||
char padding8[392];
|
||||
struct hfi1_devdata *dd;
|
||||
};
|
||||
};
|
||||
};
|
||||
65
kernel/include/hfi1/hfi1_generated_devdata.h
Normal file
65
kernel/include/hfi1/hfi1_generated_devdata.h
Normal file
@ -0,0 +1,65 @@
|
||||
struct hfi1_devdata {
|
||||
union {
|
||||
char whole_struct[7808];
|
||||
struct {
|
||||
char padding0[3368];
|
||||
u8 *kregbase1;
|
||||
};
|
||||
struct {
|
||||
char padding1[3376];
|
||||
resource_size_t physaddr;
|
||||
};
|
||||
struct {
|
||||
char padding2[3704];
|
||||
u64 default_desc1;
|
||||
};
|
||||
struct {
|
||||
char padding3[3736];
|
||||
dma_addr_t sdma_pad_phys;
|
||||
};
|
||||
struct {
|
||||
char padding4[3760];
|
||||
struct sdma_engine *per_sdma;
|
||||
};
|
||||
struct {
|
||||
char padding5[3768];
|
||||
struct sdma_vl_map *sdma_map;
|
||||
};
|
||||
struct {
|
||||
char padding6[3816];
|
||||
void *piobase;
|
||||
};
|
||||
struct {
|
||||
char padding7[3824];
|
||||
void *rcvarray_wc;
|
||||
};
|
||||
struct {
|
||||
char padding8[4040];
|
||||
long unsigned int *events;
|
||||
};
|
||||
struct {
|
||||
char padding9[4076];
|
||||
u32 chip_rcv_contexts;
|
||||
};
|
||||
struct {
|
||||
char padding10[4080];
|
||||
u32 chip_rcv_array_count;
|
||||
};
|
||||
struct {
|
||||
char padding11[7264];
|
||||
struct hfi1_pportdata *pport;
|
||||
};
|
||||
struct {
|
||||
char padding12[7296];
|
||||
u16 flags;
|
||||
};
|
||||
struct {
|
||||
char padding13[7299];
|
||||
u8 first_dyn_alloc_ctxt;
|
||||
};
|
||||
struct {
|
||||
char padding14[7368];
|
||||
u64 sc2vl[4];
|
||||
};
|
||||
};
|
||||
};
|
||||
49
kernel/include/hfi1/hfi1_generated_filedata.h
Normal file
49
kernel/include/hfi1/hfi1_generated_filedata.h
Normal file
@ -0,0 +1,49 @@
|
||||
struct hfi1_filedata {
|
||||
union {
|
||||
char whole_struct[104];
|
||||
struct {
|
||||
char padding0[0];
|
||||
struct hfi1_devdata *dd;
|
||||
};
|
||||
struct {
|
||||
char padding1[8];
|
||||
struct hfi1_ctxtdata *uctxt;
|
||||
};
|
||||
struct {
|
||||
char padding2[16];
|
||||
struct hfi1_user_sdma_comp_q *cq;
|
||||
};
|
||||
struct {
|
||||
char padding3[24];
|
||||
struct hfi1_user_sdma_pkt_q *pq;
|
||||
};
|
||||
struct {
|
||||
char padding4[32];
|
||||
u16 subctxt;
|
||||
};
|
||||
struct {
|
||||
char padding5[56];
|
||||
struct tid_rb_node **entry_to_rb;
|
||||
};
|
||||
struct {
|
||||
char padding6[64];
|
||||
spinlock_t tid_lock;
|
||||
};
|
||||
struct {
|
||||
char padding7[72];
|
||||
u32 tid_used;
|
||||
};
|
||||
struct {
|
||||
char padding8[80];
|
||||
u32 *invalid_tids;
|
||||
};
|
||||
struct {
|
||||
char padding9[88];
|
||||
u32 invalid_tid_idx;
|
||||
};
|
||||
struct {
|
||||
char padding10[92];
|
||||
spinlock_t invalid_lock;
|
||||
};
|
||||
};
|
||||
};
|
||||
29
kernel/include/hfi1/hfi1_generated_hfi1_user_sdma_pkt_q.h
Normal file
29
kernel/include/hfi1/hfi1_generated_hfi1_user_sdma_pkt_q.h
Normal file
@ -0,0 +1,29 @@
|
||||
struct hfi1_user_sdma_pkt_q {
|
||||
union {
|
||||
char whole_struct[352];
|
||||
struct {
|
||||
char padding0[4];
|
||||
u16 n_max_reqs;
|
||||
};
|
||||
struct {
|
||||
char padding1[8];
|
||||
atomic_t n_reqs;
|
||||
};
|
||||
struct {
|
||||
char padding2[16];
|
||||
struct hfi1_devdata *dd;
|
||||
};
|
||||
struct {
|
||||
char padding3[32];
|
||||
struct user_sdma_request *reqs;
|
||||
};
|
||||
struct {
|
||||
char padding4[40];
|
||||
long unsigned int *req_in_use;
|
||||
};
|
||||
struct {
|
||||
char padding5[288];
|
||||
enum pkt_q_sdma_state state;
|
||||
};
|
||||
};
|
||||
};
|
||||
9
kernel/include/hfi1/hfi1_generated_pportdata.h
Normal file
9
kernel/include/hfi1/hfi1_generated_pportdata.h
Normal file
@ -0,0 +1,9 @@
|
||||
struct hfi1_pportdata {
|
||||
union {
|
||||
char whole_struct[12992];
|
||||
struct {
|
||||
char padding0[2113];
|
||||
u8 vls_operational;
|
||||
};
|
||||
};
|
||||
};
|
||||
81
kernel/include/hfi1/hfi1_generated_sdma_engine.h
Normal file
81
kernel/include/hfi1/hfi1_generated_sdma_engine.h
Normal file
@ -0,0 +1,81 @@
|
||||
struct sdma_engine {
|
||||
union {
|
||||
char whole_struct[1472];
|
||||
struct {
|
||||
char padding0[0];
|
||||
struct hfi1_devdata *dd;
|
||||
};
|
||||
struct {
|
||||
char padding1[16];
|
||||
void *tail_csr;
|
||||
};
|
||||
struct {
|
||||
char padding2[72];
|
||||
struct hw_sdma_desc *descq;
|
||||
};
|
||||
struct {
|
||||
char padding3[80];
|
||||
unsigned int descq_full_count;
|
||||
};
|
||||
struct {
|
||||
char padding4[88];
|
||||
struct sdma_txreq **tx_ring;
|
||||
};
|
||||
struct {
|
||||
char padding5[104];
|
||||
u32 sdma_mask;
|
||||
};
|
||||
struct {
|
||||
char padding6[112];
|
||||
struct sdma_state state;
|
||||
};
|
||||
struct {
|
||||
char padding7[180];
|
||||
u8 sdma_shift;
|
||||
};
|
||||
struct {
|
||||
char padding8[181];
|
||||
u8 this_idx;
|
||||
};
|
||||
struct {
|
||||
char padding9[256];
|
||||
spinlock_t tail_lock;
|
||||
};
|
||||
struct {
|
||||
char padding10[260];
|
||||
u32 descq_tail;
|
||||
};
|
||||
struct {
|
||||
char padding11[264];
|
||||
long unsigned int ahg_bits;
|
||||
};
|
||||
struct {
|
||||
char padding12[272];
|
||||
u16 desc_avail;
|
||||
};
|
||||
struct {
|
||||
char padding13[274];
|
||||
u16 tx_tail;
|
||||
};
|
||||
struct {
|
||||
char padding14[276];
|
||||
u16 descq_cnt;
|
||||
};
|
||||
struct {
|
||||
char padding15[320];
|
||||
seqlock_t head_lock;
|
||||
};
|
||||
struct {
|
||||
char padding16[328];
|
||||
u32 descq_head;
|
||||
};
|
||||
struct {
|
||||
char padding17[704];
|
||||
spinlock_t flushlist_lock;
|
||||
};
|
||||
struct {
|
||||
char padding18[712];
|
||||
struct list_head flushlist;
|
||||
};
|
||||
};
|
||||
};
|
||||
17
kernel/include/hfi1/hfi1_generated_sdma_state.h
Normal file
17
kernel/include/hfi1/hfi1_generated_sdma_state.h
Normal file
@ -0,0 +1,17 @@
|
||||
struct sdma_state {
|
||||
union {
|
||||
char whole_struct[64];
|
||||
struct {
|
||||
char padding0[40];
|
||||
enum sdma_states current_state;
|
||||
};
|
||||
struct {
|
||||
char padding1[48];
|
||||
unsigned int go_s99_running;
|
||||
};
|
||||
struct {
|
||||
char padding2[52];
|
||||
enum sdma_states previous_state;
|
||||
};
|
||||
};
|
||||
};
|
||||
89
kernel/include/hfi1/hfi1_generated_user_sdma_request.h
Normal file
89
kernel/include/hfi1/hfi1_generated_user_sdma_request.h
Normal file
@ -0,0 +1,89 @@
|
||||
struct user_sdma_request {
|
||||
union {
|
||||
char whole_struct[768];
|
||||
struct {
|
||||
char padding0[0];
|
||||
struct hfi1_pkt_header hdr;
|
||||
};
|
||||
struct {
|
||||
char padding1[64];
|
||||
struct hfi1_user_sdma_pkt_q *pq;
|
||||
};
|
||||
struct {
|
||||
char padding2[72];
|
||||
struct hfi1_user_sdma_comp_q *cq;
|
||||
};
|
||||
struct {
|
||||
char padding3[80];
|
||||
struct sdma_engine *sde;
|
||||
};
|
||||
struct {
|
||||
char padding4[88];
|
||||
struct sdma_req_info info;
|
||||
};
|
||||
struct {
|
||||
char padding5[96];
|
||||
u32 *tids;
|
||||
};
|
||||
struct {
|
||||
char padding6[104];
|
||||
u32 data_len;
|
||||
};
|
||||
struct {
|
||||
char padding7[108];
|
||||
u16 n_tids;
|
||||
};
|
||||
struct {
|
||||
char padding8[110];
|
||||
u8 data_iovs;
|
||||
};
|
||||
struct {
|
||||
char padding9[111];
|
||||
s8 ahg_idx;
|
||||
};
|
||||
struct {
|
||||
char padding10[128];
|
||||
u64 seqcomp;
|
||||
};
|
||||
struct {
|
||||
char padding11[136];
|
||||
u64 seqsubmitted;
|
||||
};
|
||||
struct {
|
||||
char padding12[192];
|
||||
struct list_head txps;
|
||||
};
|
||||
struct {
|
||||
char padding13[208];
|
||||
u64 seqnum;
|
||||
};
|
||||
struct {
|
||||
char padding14[216];
|
||||
u32 tidoffset;
|
||||
};
|
||||
struct {
|
||||
char padding15[220];
|
||||
u32 koffset;
|
||||
};
|
||||
struct {
|
||||
char padding16[224];
|
||||
u32 sent;
|
||||
};
|
||||
struct {
|
||||
char padding17[228];
|
||||
u16 tididx;
|
||||
};
|
||||
struct {
|
||||
char padding18[230];
|
||||
u8 iov_idx;
|
||||
};
|
||||
struct {
|
||||
char padding19[231];
|
||||
u8 has_error;
|
||||
};
|
||||
struct {
|
||||
char padding20[232];
|
||||
struct user_sdma_iovec iovs[8];
|
||||
};
|
||||
};
|
||||
};
|
||||
33
kernel/include/hfi1/hfi1_generated_user_sdma_txreq.h
Normal file
33
kernel/include/hfi1/hfi1_generated_user_sdma_txreq.h
Normal file
@ -0,0 +1,33 @@
|
||||
struct user_sdma_txreq {
|
||||
union {
|
||||
char whole_struct[264];
|
||||
struct {
|
||||
char padding0[0];
|
||||
struct hfi1_pkt_header hdr;
|
||||
};
|
||||
struct {
|
||||
char padding1[64];
|
||||
struct sdma_txreq txreq;
|
||||
};
|
||||
struct {
|
||||
char padding2[224];
|
||||
struct list_head list;
|
||||
};
|
||||
struct {
|
||||
char padding3[240];
|
||||
struct user_sdma_request *req;
|
||||
};
|
||||
struct {
|
||||
char padding4[248];
|
||||
u16 flags;
|
||||
};
|
||||
struct {
|
||||
char padding5[252];
|
||||
unsigned int busycount;
|
||||
};
|
||||
struct {
|
||||
char padding6[256];
|
||||
u64 seqnum;
|
||||
};
|
||||
};
|
||||
};
|
||||
444
kernel/include/hfi1/hfi1_user.h
Normal file
444
kernel/include/hfi1/hfi1_user.h
Normal file
@ -0,0 +1,444 @@
|
||||
/*
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* Copyright(c) 2015 Intel Corporation.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Copyright(c) 2015 Intel Corporation.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file contains defines, structures, etc. that are used
|
||||
* to communicate between kernel and user code.
|
||||
*/
|
||||
|
||||
#ifndef _LINUX__HFI1_USER_H
|
||||
#define _LINUX__HFI1_USER_H
|
||||
|
||||
#ifdef __HFI1_ORIG__
|
||||
#include <linux/types.h>
|
||||
#else
|
||||
#include <mc_perf_event.h>
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
|
||||
/*
|
||||
* This version number is given to the driver by the user code during
|
||||
* initialization in the spu_userversion field of hfi1_user_info, so
|
||||
* the driver can check for compatibility with user code.
|
||||
*
|
||||
* The major version changes when data structures change in an incompatible
|
||||
* way. The driver must be the same for initialization to succeed.
|
||||
*/
|
||||
#define HFI1_USER_SWMAJOR 6
|
||||
|
||||
/*
|
||||
* Minor version differences are always compatible
|
||||
* a within a major version, however if user software is larger
|
||||
* than driver software, some new features and/or structure fields
|
||||
* may not be implemented; the user code must deal with this if it
|
||||
* cares, or it must abort after initialization reports the difference.
|
||||
*/
|
||||
#define HFI1_USER_SWMINOR 3
|
||||
|
||||
/*
|
||||
* We will encode the major/minor inside a single 32bit version number.
|
||||
*/
|
||||
#define HFI1_SWMAJOR_SHIFT 16
|
||||
|
||||
/*
|
||||
* Set of HW and driver capability/feature bits.
|
||||
* These bit values are used to configure enabled/disabled HW and
|
||||
* driver features. The same set of bits are communicated to user
|
||||
* space.
|
||||
*/
|
||||
#define HFI1_CAP_DMA_RTAIL (1UL << 0) /* Use DMA'ed RTail value */
|
||||
#define HFI1_CAP_SDMA (1UL << 1) /* Enable SDMA support */
|
||||
#define HFI1_CAP_SDMA_AHG (1UL << 2) /* Enable SDMA AHG support */
|
||||
#define HFI1_CAP_EXTENDED_PSN (1UL << 3) /* Enable Extended PSN support */
|
||||
#define HFI1_CAP_HDRSUPP (1UL << 4) /* Enable Header Suppression */
|
||||
#define HFI1_CAP_TID_RDMA (1UL << 5) /* Enable TID RDMA operations */
|
||||
#define HFI1_CAP_USE_SDMA_HEAD (1UL << 6) /* DMA Hdr Q tail vs. use CSR */
|
||||
#define HFI1_CAP_MULTI_PKT_EGR (1UL << 7) /* Enable multi-packet Egr buffs*/
|
||||
#define HFI1_CAP_NODROP_RHQ_FULL (1UL << 8) /* Don't drop on Hdr Q full */
|
||||
#define HFI1_CAP_NODROP_EGR_FULL (1UL << 9) /* Don't drop on EGR buffs full */
|
||||
#define HFI1_CAP_TID_UNMAP (1UL << 10) /* Disable Expected TID caching */
|
||||
#define HFI1_CAP_PRINT_UNIMPL (1UL << 11) /* Show for unimplemented feats */
|
||||
#define HFI1_CAP_ALLOW_PERM_JKEY (1UL << 12) /* Allow use of permissive JKEY */
|
||||
#define HFI1_CAP_NO_INTEGRITY (1UL << 13) /* Enable ctxt integrity checks */
|
||||
#define HFI1_CAP_PKEY_CHECK (1UL << 14) /* Enable ctxt PKey checking */
|
||||
#define HFI1_CAP_STATIC_RATE_CTRL (1UL << 15) /* Allow PBC.StaticRateControl */
|
||||
#define HFI1_CAP_OPFN (1UL << 16) /* Enable the OPFN protocol */
|
||||
#define HFI1_CAP_SDMA_HEAD_CHECK (1UL << 17) /* SDMA head checking */
|
||||
#define HFI1_CAP_EARLY_CREDIT_RETURN (1UL << 18) /* early credit return */
|
||||
|
||||
#define HFI1_RCVHDR_ENTSIZE_2 (1UL << 0)
|
||||
#define HFI1_RCVHDR_ENTSIZE_16 (1UL << 1)
|
||||
#define HFI1_RCVDHR_ENTSIZE_32 (1UL << 2)
|
||||
|
||||
/* User commands. */
|
||||
#define HFI1_CMD_ASSIGN_CTXT 1 /* allocate HFI and context */
|
||||
#define HFI1_CMD_CTXT_INFO 2 /* find out what resources we got */
|
||||
#define HFI1_CMD_USER_INFO 3 /* set up userspace */
|
||||
#define HFI1_CMD_TID_UPDATE 4 /* update expected TID entries */
|
||||
#define HFI1_CMD_TID_FREE 5 /* free expected TID entries */
|
||||
#define HFI1_CMD_CREDIT_UPD 6 /* force an update of PIO credit */
|
||||
|
||||
#define HFI1_CMD_RECV_CTRL 8 /* control receipt of packets */
|
||||
#define HFI1_CMD_POLL_TYPE 9 /* set the kind of polling we want */
|
||||
#define HFI1_CMD_ACK_EVENT 10 /* ack & clear user status bits */
|
||||
#define HFI1_CMD_SET_PKEY 11 /* set context's pkey */
|
||||
#define HFI1_CMD_CTXT_RESET 12 /* reset context's HW send context */
|
||||
#define HFI1_CMD_TID_INVAL_READ 13 /* read TID cache invalidations */
|
||||
#define HFI1_CMD_GET_VERS 14 /* get the version of the user cdev */
|
||||
|
||||
/*
|
||||
* User IOCTLs can not go above 128 if they do then see common.h and change the
|
||||
* base for the snoop ioctl
|
||||
*/
|
||||
#define IB_IOCTL_MAGIC 0x1b /* See Documentation/ioctl/ioctl-number.txt */
|
||||
|
||||
/*
|
||||
* Make the ioctls occupy the last 0xf0-0xff portion of the IB range
|
||||
*/
|
||||
#define __NUM(cmd) (HFI1_CMD_##cmd + 0xe0)
|
||||
|
||||
struct hfi1_cmd;
|
||||
#define HFI1_IOCTL_ASSIGN_CTXT \
|
||||
_IOWR(IB_IOCTL_MAGIC, __NUM(ASSIGN_CTXT), struct hfi1_user_info)
|
||||
#define HFI1_IOCTL_CTXT_INFO \
|
||||
_IOW(IB_IOCTL_MAGIC, __NUM(CTXT_INFO), struct hfi1_ctxt_info)
|
||||
#define HFI1_IOCTL_USER_INFO \
|
||||
_IOW(IB_IOCTL_MAGIC, __NUM(USER_INFO), struct hfi1_base_info)
|
||||
#define HFI1_IOCTL_TID_UPDATE \
|
||||
_IOWR(IB_IOCTL_MAGIC, __NUM(TID_UPDATE), struct hfi1_tid_info)
|
||||
#define HFI1_IOCTL_TID_FREE \
|
||||
_IOWR(IB_IOCTL_MAGIC, __NUM(TID_FREE), struct hfi1_tid_info)
|
||||
#define HFI1_IOCTL_CREDIT_UPD \
|
||||
_IO(IB_IOCTL_MAGIC, __NUM(CREDIT_UPD))
|
||||
#define HFI1_IOCTL_RECV_CTRL \
|
||||
_IOW(IB_IOCTL_MAGIC, __NUM(RECV_CTRL), int)
|
||||
#define HFI1_IOCTL_POLL_TYPE \
|
||||
_IOW(IB_IOCTL_MAGIC, __NUM(POLL_TYPE), int)
|
||||
#define HFI1_IOCTL_ACK_EVENT \
|
||||
_IOW(IB_IOCTL_MAGIC, __NUM(ACK_EVENT), unsigned long)
|
||||
#define HFI1_IOCTL_SET_PKEY \
|
||||
_IOW(IB_IOCTL_MAGIC, __NUM(SET_PKEY), __u16)
|
||||
#define HFI1_IOCTL_CTXT_RESET \
|
||||
_IO(IB_IOCTL_MAGIC, __NUM(CTXT_RESET))
|
||||
#define HFI1_IOCTL_TID_INVAL_READ \
|
||||
_IOWR(IB_IOCTL_MAGIC, __NUM(TID_INVAL_READ), struct hfi1_tid_info)
|
||||
#define HFI1_IOCTL_GET_VERS \
|
||||
_IOR(IB_IOCTL_MAGIC, __NUM(GET_VERS), int)
|
||||
|
||||
#define _HFI1_EVENT_FROZEN_BIT 0
|
||||
#define _HFI1_EVENT_LINKDOWN_BIT 1
|
||||
#define _HFI1_EVENT_LID_CHANGE_BIT 2
|
||||
#define _HFI1_EVENT_LMC_CHANGE_BIT 3
|
||||
#define _HFI1_EVENT_SL2VL_CHANGE_BIT 4
|
||||
#define _HFI1_EVENT_TID_MMU_NOTIFY_BIT 5
|
||||
#define _HFI1_MAX_EVENT_BIT _HFI1_EVENT_TID_MMU_NOTIFY_BIT
|
||||
|
||||
#define HFI1_EVENT_FROZEN (1UL << _HFI1_EVENT_FROZEN_BIT)
|
||||
#define HFI1_EVENT_LINKDOWN (1UL << _HFI1_EVENT_LINKDOWN_BIT)
|
||||
#define HFI1_EVENT_LID_CHANGE (1UL << _HFI1_EVENT_LID_CHANGE_BIT)
|
||||
#define HFI1_EVENT_LMC_CHANGE (1UL << _HFI1_EVENT_LMC_CHANGE_BIT)
|
||||
#define HFI1_EVENT_SL2VL_CHANGE (1UL << _HFI1_EVENT_SL2VL_CHANGE_BIT)
|
||||
#define HFI1_EVENT_TID_MMU_NOTIFY (1UL << _HFI1_EVENT_TID_MMU_NOTIFY_BIT)
|
||||
|
||||
/*
|
||||
* These are the status bits readable (in ASCII form, 64bit value)
|
||||
* from the "status" sysfs file. For binary compatibility, values
|
||||
* must remain as is; removed states can be reused for different
|
||||
* purposes.
|
||||
*/
|
||||
#define HFI1_STATUS_INITTED 0x1 /* basic initialization done */
|
||||
/* Chip has been found and initialized */
|
||||
#define HFI1_STATUS_CHIP_PRESENT 0x20
|
||||
/* IB link is at ACTIVE, usable for data traffic */
|
||||
#define HFI1_STATUS_IB_READY 0x40
|
||||
/* link is configured, LID, MTU, etc. have been set */
|
||||
#define HFI1_STATUS_IB_CONF 0x80
|
||||
/* A Fatal hardware error has occurred. */
|
||||
#define HFI1_STATUS_HWERROR 0x200
|
||||
|
||||
/*
|
||||
* Number of supported shared contexts.
|
||||
* This is the maximum number of software contexts that can share
|
||||
* a hardware send/receive context.
|
||||
*/
|
||||
#define HFI1_MAX_SHARED_CTXTS 8
|
||||
|
||||
/*
|
||||
* Poll types
|
||||
*/
|
||||
#define HFI1_POLL_TYPE_ANYRCV 0x0
|
||||
#define HFI1_POLL_TYPE_URGENT 0x1
|
||||
|
||||
/*
|
||||
* This structure is passed to the driver to tell it where
|
||||
* user code buffers are, sizes, etc. The offsets and sizes of the
|
||||
* fields must remain unchanged, for binary compatibility. It can
|
||||
* be extended, if userversion is changed so user code can tell, if needed
|
||||
*/
|
||||
struct hfi1_user_info {
|
||||
/*
|
||||
* version of user software, to detect compatibility issues.
|
||||
* Should be set to HFI1_USER_SWVERSION.
|
||||
*/
|
||||
__u32 userversion;
|
||||
__u32 pad;
|
||||
/*
|
||||
* If two or more processes wish to share a context, each process
|
||||
* must set the subcontext_cnt and subcontext_id to the same
|
||||
* values. The only restriction on the subcontext_id is that
|
||||
* it be unique for a given node.
|
||||
*/
|
||||
__u16 subctxt_cnt;
|
||||
__u16 subctxt_id;
|
||||
/* 128bit UUID passed in by PSM. */
|
||||
__u8 uuid[16];
|
||||
};
|
||||
|
||||
struct hfi1_ctxt_info {
|
||||
__u64 runtime_flags; /* chip/drv runtime flags (HFI1_CAP_*) */
|
||||
__u32 rcvegr_size; /* size of each eager buffer */
|
||||
__u16 num_active; /* number of active units */
|
||||
__u16 unit; /* unit (chip) assigned to caller */
|
||||
__u16 ctxt; /* ctxt on unit assigned to caller */
|
||||
__u16 subctxt; /* subctxt on unit assigned to caller */
|
||||
__u16 rcvtids; /* number of Rcv TIDs for this context */
|
||||
__u16 credits; /* number of PIO credits for this context */
|
||||
__u16 numa_node; /* NUMA node of the assigned device */
|
||||
__u16 rec_cpu; /* cpu # for affinity (0xffff if none) */
|
||||
__u16 send_ctxt; /* send context in use by this user context */
|
||||
__u16 egrtids; /* number of RcvArray entries for Eager Rcvs */
|
||||
__u16 rcvhdrq_cnt; /* number of RcvHdrQ entries */
|
||||
__u16 rcvhdrq_entsize; /* size (in bytes) for each RcvHdrQ entry */
|
||||
__u16 sdma_ring_size; /* number of entries in SDMA request ring */
|
||||
};
|
||||
|
||||
struct hfi1_tid_info {
|
||||
/* virtual address of first page in transfer */
|
||||
__u64 vaddr;
|
||||
/* pointer to tid array. this array is big enough */
|
||||
__u64 tidlist;
|
||||
/* number of tids programmed by this request */
|
||||
__u32 tidcnt;
|
||||
/* length of transfer buffer programmed by this request */
|
||||
__u32 length;
|
||||
};
|
||||
|
||||
enum hfi1_sdma_comp_state {
|
||||
FREE = 0,
|
||||
QUEUED,
|
||||
COMPLETE,
|
||||
ERROR
|
||||
};
|
||||
|
||||
/*
|
||||
* SDMA completion ring entry
|
||||
*/
|
||||
struct hfi1_sdma_comp_entry {
|
||||
__u32 status;
|
||||
__u32 errcode;
|
||||
};
|
||||
|
||||
/*
|
||||
* Device status and notifications from driver to user-space.
|
||||
*/
|
||||
struct hfi1_status {
|
||||
__u64 dev; /* device/hw status bits */
|
||||
__u64 port; /* port state and status bits */
|
||||
char freezemsg[0];
|
||||
};
|
||||
|
||||
/*
|
||||
* This structure is returned by the driver immediately after
|
||||
* open to get implementation-specific info, and info specific to this
|
||||
* instance.
|
||||
*
|
||||
* This struct must have explicit pad fields where type sizes
|
||||
* may result in different alignments between 32 and 64 bit
|
||||
* programs, since the 64 bit * bit kernel requires the user code
|
||||
* to have matching offsets
|
||||
*/
|
||||
struct hfi1_base_info {
|
||||
/* version of hardware, for feature checking. */
|
||||
__u32 hw_version;
|
||||
/* version of software, for feature checking. */
|
||||
__u32 sw_version;
|
||||
/* Job key */
|
||||
__u16 jkey;
|
||||
__u16 padding1;
|
||||
/*
|
||||
* The special QP (queue pair) value that identifies PSM
|
||||
* protocol packet from standard IB packets.
|
||||
*/
|
||||
__u32 bthqp;
|
||||
/* PIO credit return address, */
|
||||
__u64 sc_credits_addr;
|
||||
/*
|
||||
* Base address of write-only pio buffers for this process.
|
||||
* Each buffer has sendpio_credits*64 bytes.
|
||||
*/
|
||||
__u64 pio_bufbase_sop;
|
||||
/*
|
||||
* Base address of write-only pio buffers for this process.
|
||||
* Each buffer has sendpio_credits*64 bytes.
|
||||
*/
|
||||
__u64 pio_bufbase;
|
||||
/* address where receive buffer queue is mapped into */
|
||||
__u64 rcvhdr_bufbase;
|
||||
/* base address of Eager receive buffers. */
|
||||
__u64 rcvegr_bufbase;
|
||||
/* base address of SDMA completion ring */
|
||||
__u64 sdma_comp_bufbase;
|
||||
/*
|
||||
* User register base for init code, not to be used directly by
|
||||
* protocol or applications. Always maps real chip register space.
|
||||
* the register addresses are:
|
||||
* ur_rcvhdrhead, ur_rcvhdrtail, ur_rcvegrhead, ur_rcvegrtail,
|
||||
* ur_rcvtidflow
|
||||
*/
|
||||
__u64 user_regbase;
|
||||
/* notification events */
|
||||
__u64 events_bufbase;
|
||||
/* status page */
|
||||
__u64 status_bufbase;
|
||||
/* rcvhdrtail update */
|
||||
__u64 rcvhdrtail_base;
|
||||
/*
|
||||
* shared memory pages for subctxts if ctxt is shared; these cover
|
||||
* all the processes in the group sharing a single context.
|
||||
* all have enough space for the num_subcontexts value on this job.
|
||||
*/
|
||||
__u64 subctxt_uregbase;
|
||||
__u64 subctxt_rcvegrbuf;
|
||||
__u64 subctxt_rcvhdrbuf;
|
||||
};
|
||||
|
||||
enum sdma_req_opcode {
|
||||
EXPECTED = 0,
|
||||
EAGER
|
||||
};
|
||||
|
||||
#define HFI1_SDMA_REQ_VERSION_MASK 0xF
|
||||
#define HFI1_SDMA_REQ_VERSION_SHIFT 0x0
|
||||
#define HFI1_SDMA_REQ_OPCODE_MASK 0xF
|
||||
#define HFI1_SDMA_REQ_OPCODE_SHIFT 0x4
|
||||
#define HFI1_SDMA_REQ_IOVCNT_MASK 0xFF
|
||||
#define HFI1_SDMA_REQ_IOVCNT_SHIFT 0x8
|
||||
|
||||
struct sdma_req_info {
|
||||
/*
|
||||
* bits 0-3 - version (currently unused)
|
||||
* bits 4-7 - opcode (enum sdma_req_opcode)
|
||||
* bits 8-15 - io vector count
|
||||
*/
|
||||
__u16 ctrl;
|
||||
/*
|
||||
* Number of fragments contained in this request.
|
||||
* User-space has already computed how many
|
||||
* fragment-sized packet the user buffer will be
|
||||
* split into.
|
||||
*/
|
||||
__u16 npkts;
|
||||
/*
|
||||
* Size of each fragment the user buffer will be
|
||||
* split into.
|
||||
*/
|
||||
__u16 fragsize;
|
||||
/*
|
||||
* Index of the slot in the SDMA completion ring
|
||||
* this request should be using. User-space is
|
||||
* in charge of managing its own ring.
|
||||
*/
|
||||
__u16 comp_idx;
|
||||
} __attribute__((packed));
|
||||
|
||||
/*
|
||||
* SW KDETH header.
|
||||
* swdata is SW defined portion.
|
||||
*/
|
||||
struct hfi1_kdeth_header {
|
||||
__le32 ver_tid_offset;
|
||||
__le16 jkey;
|
||||
__le16 hcrc;
|
||||
__le32 swdata[7];
|
||||
} __attribute__((packed));
|
||||
|
||||
/*
|
||||
* Structure describing the headers that User space uses. The
|
||||
* structure above is a subset of this one.
|
||||
*/
|
||||
struct hfi1_pkt_header {
|
||||
__le16 pbc[4];
|
||||
__be16 lrh[4];
|
||||
__be32 bth[3];
|
||||
struct hfi1_kdeth_header kdeth;
|
||||
} __attribute__((packed));
|
||||
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
/*
|
||||
* The list of usermode accessible registers.
|
||||
*/
|
||||
enum hfi1_ureg {
|
||||
/* (RO) DMA RcvHdr to be used next. */
|
||||
ur_rcvhdrtail = 0,
|
||||
/* (RW) RcvHdr entry to be processed next by host. */
|
||||
ur_rcvhdrhead = 1,
|
||||
/* (RO) Index of next Eager index to use. */
|
||||
ur_rcvegrindextail = 2,
|
||||
/* (RW) Eager TID to be processed next */
|
||||
ur_rcvegrindexhead = 3,
|
||||
/* (RO) Receive Eager Offset Tail */
|
||||
ur_rcvegroffsettail = 4,
|
||||
/* For internal use only; max register number. */
|
||||
ur_maxreg,
|
||||
/* (RW) Receive TID flow table */
|
||||
ur_rcvtidflowtable = 256
|
||||
};
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
#endif /* _LINIUX__HFI1_USER_H */
|
||||
310
kernel/include/hfi1/ihk_hfi1_common.h
Normal file
310
kernel/include/hfi1/ihk_hfi1_common.h
Normal file
@ -0,0 +1,310 @@
|
||||
#ifndef _IHK_HFI1_COMMON_H_
|
||||
#define _IHK_HFI1_COMMON_H_
|
||||
|
||||
#include <ihk/atomic.h>
|
||||
#include <ihk/types.h>
|
||||
#include <ihk/cpu.h>
|
||||
#include <kmalloc.h>
|
||||
#include <lwk/compiler.h>
|
||||
#include <arch-lock.h>
|
||||
#include <page.h>
|
||||
#include <string.h>
|
||||
#include <lwk/stddef.h>
|
||||
|
||||
//#define VERBOSE_DEBUG
|
||||
|
||||
#define IF_VA_ARGS(...) , ##__VA_ARGS__
|
||||
//#define TP(msg, ...) kprintf("%s(%d):" msg "\n", __FUNCTION__, __LINE__ IF_VA_ARGS(__VA_ARGS__))
|
||||
#define TP(msg, ...) do {} while(0)
|
||||
|
||||
#ifdef VERBOSE_DEBUG
|
||||
#define SDMA_DBG(req, fmt, ...) kprintf("%s(%d): DBG:" fmt "\n", __FUNCTION__, __LINE__ IF_VA_ARGS(__VA_ARGS__));
|
||||
#define SDMA_Q_DBG(req, fmt, ...) kprintf("%s(%d): Q_DBG:" fmt "\n", __FUNCTION__, __LINE__ IF_VA_ARGS(__VA_ARGS__));
|
||||
#define hfi1_cdbg(...) kprintf("%s(%d): hfi1_cdbg: %s \n", __FUNCTION__, __LINE__, #__VA_ARGS__);
|
||||
#else
|
||||
#define SDMA_DBG(req, fmt, ...) do {} while(0)
|
||||
#define SDMA_Q_DBG(req, fmt, ...) do {} while(0)
|
||||
#define hfi1_cdbg(...) do {} while(0)
|
||||
#endif
|
||||
|
||||
/* From: kernel-xppsl_1.5.2/include/linux/compiler.h */
|
||||
#define WARN_ON(condition) ({ \
|
||||
int __ret_warn_on = !!(condition); \
|
||||
if (unlikely(__ret_warn_on)) \
|
||||
kprintf("%s(%d): WARN: %s\n", __FUNCTION__, __LINE__, #condition); \
|
||||
unlikely(__ret_warn_on); \
|
||||
})
|
||||
|
||||
#define WARN_ON_ONCE WARN_ON // use the local definition
|
||||
|
||||
#ifndef ARRAY_SIZE
|
||||
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
|
||||
#endif
|
||||
|
||||
/* From: mckernel/kernel/include/xpmem_private.h */
|
||||
#define offset_in_page(p) ((unsigned long)(p) & ~PAGE_MASK)
|
||||
#define min(x, y) ({ \
|
||||
__typeof__(x) _min1 = (x); \
|
||||
__typeof__(y) _min2 = (y); \
|
||||
(void) (&_min1 == &_min2); \
|
||||
_min1 < _min2 ? _min1 : _min2;})
|
||||
|
||||
|
||||
#define BIT_ULL(nr) (1ULL << (nr))
|
||||
|
||||
/* Disable debug macros */
|
||||
#define trace_hfi1_ahg_allocate(...) do {} while(0)
|
||||
#define trace_hfi1_ahg_deallocate(...) do {} while(0)
|
||||
|
||||
/* Byte swapping */
|
||||
#define be32_to_cpu(x) __builtin_bswap32(x)
|
||||
#define be16_to_cpu(x) __builtin_bswap16(x)
|
||||
#define le32_to_cpu(x) x
|
||||
#define le16_to_cpu(x) x
|
||||
#define cpu_to_le16(x) x
|
||||
#define cpu_to_le32(x) x
|
||||
#define cpu_to_le64(x) x
|
||||
#define __cpu_to_le64(x) x
|
||||
#define __le64_to_cpu(x) x
|
||||
#define __le32_to_cpu(x) x
|
||||
#define __le16_to_cpu(x) x
|
||||
#define cpu_to_be16(x) __builtin_bswap16(x)
|
||||
#define cpu_to_be32(x) __builtin_bswap32(x)
|
||||
|
||||
/* Compiler */
|
||||
#ifndef likely
|
||||
# define likely(x) __builtin_expect(!!(x), 1)
|
||||
#endif
|
||||
|
||||
#ifndef unlikely
|
||||
# define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#endif
|
||||
|
||||
/* Atomic ops */
|
||||
#define atomic_inc ihk_atomic_inc
|
||||
#define atomic_dec ihk_atomic_dec
|
||||
#define atomic_read ihk_atomic_read
|
||||
#define atomic_add ihk_atomic_add
|
||||
#define atomic_t ihk_atomic_t
|
||||
typedef ihk_spinlock_t spinlock_t;
|
||||
|
||||
|
||||
/*
|
||||
* Linux queued_spin_lock compatible spin_lock, without the queue.
|
||||
*/
|
||||
#define _Q_LOCKED_OFFSET 0
|
||||
#define _Q_LOCKED_VAL (1U << _Q_LOCKED_OFFSET)
|
||||
|
||||
#define linux_spin_lock(lock) \
|
||||
do { \
|
||||
while (!__sync_bool_compare_and_swap( \
|
||||
(unsigned int *)lock, 0, \
|
||||
_Q_LOCKED_VAL)) { \
|
||||
cpu_pause(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define linux_spin_unlock(lock) \
|
||||
do { \
|
||||
ihk_atomic_sub(_Q_LOCKED_VAL, (ihk_atomic_t *)lock); \
|
||||
} while (0)
|
||||
|
||||
#define linux_spin_lock_irqsave(lock, flags) \
|
||||
do { \
|
||||
flags = cpu_disable_interrupt_save(); \
|
||||
linux_spin_lock(lock); \
|
||||
} while (0)
|
||||
|
||||
#define linux_spin_unlock_irqrestore(lock, flags) \
|
||||
do { \
|
||||
linux_spin_unlock(lock); \
|
||||
cpu_restore_interrupt(flags); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/*****************************************************/
|
||||
|
||||
#define ____cacheline_aligned_in_smp __attribute__((aligned(64)))
|
||||
#define smp_wmb() barrier()
|
||||
#define smp_rmb() barrier()
|
||||
#define __iomem
|
||||
#define __rcu
|
||||
#define __percpu
|
||||
#define send_routine void *
|
||||
|
||||
#define GFP_KERNEL 0
|
||||
// TODO: double check GFP_ATOMIC
|
||||
#define GFP_ATOMIC 0
|
||||
|
||||
/* hfi1 pio.h */
|
||||
#define SC_MAX 4 /* count of send context types */
|
||||
|
||||
/* kernel-xppsl_1.5.2/include/linux/seqlock.h */
|
||||
/***********************************************/
|
||||
typedef struct seqcount {
|
||||
unsigned sequence;
|
||||
} seqcount_t;
|
||||
|
||||
typedef struct {
|
||||
struct seqcount seqcount;
|
||||
spinlock_t lock;
|
||||
} seqlock_t;
|
||||
|
||||
static inline unsigned raw_seqcount_begin(const seqcount_t *s)
|
||||
{
|
||||
unsigned ret = ACCESS_ONCE(s->sequence);
|
||||
smp_rmb();
|
||||
return ret & ~1;
|
||||
}
|
||||
/***********************************************/
|
||||
|
||||
/* kernel-xppsl_1.5.2/include/linux/kref.h */
|
||||
struct kref {
|
||||
atomic_t refcount;
|
||||
};
|
||||
|
||||
struct wait_queue_head_t {
|
||||
spinlock_t lock;
|
||||
struct list_head task_list;
|
||||
};
|
||||
typedef struct wait_queue_head_t wait_queue_head_t;
|
||||
|
||||
struct completion {
|
||||
unsigned int done;
|
||||
wait_queue_head_t wait;
|
||||
};
|
||||
|
||||
/* kernel-xppsl_1.5.2/include/linux/interrupt.h */
|
||||
struct tasklet_struct
|
||||
{
|
||||
struct tasklet_struct *next;
|
||||
unsigned long state;
|
||||
atomic_t count;
|
||||
void (*func)(unsigned long);
|
||||
unsigned long data;
|
||||
};
|
||||
|
||||
/* Misc */
|
||||
/* From: kernel-xppsl_1.5.2/include/linux/kernel.h */
|
||||
#define min_t(type, x, y) ({ \
|
||||
type __min1 = (x); \
|
||||
type __min2 = (y); \
|
||||
__min1 < __min2 ? __min1: __min2; })
|
||||
|
||||
#define SIZE_MAX (~(size_t)0)
|
||||
#define MAX_TID_PAIR_ENTRIES 1024 /* max receive expected pairs */
|
||||
#define PIO_BLOCK_SIZE 64 /* bytes */
|
||||
/* From: chip.c/h */
|
||||
#define TXE_NUM_SDMA_ENGINES 16
|
||||
#define CCE_NUM_INT_CSRS 12
|
||||
//num_vls = HFI1_MAX_VLS_SUPPORTED;
|
||||
//num_vls = dd->chip_sdma_engines;
|
||||
#define HFI1_MAX_VLS_SUPPORTED 8
|
||||
|
||||
|
||||
/* integer typedefs */
|
||||
typedef __signed__ char __s8;
|
||||
typedef unsigned char __u8;
|
||||
|
||||
typedef __signed__ short __s16;
|
||||
typedef unsigned short __u16;
|
||||
|
||||
typedef __signed__ int __s32;
|
||||
typedef unsigned int __u32;
|
||||
|
||||
typedef __signed__ long long __s64;
|
||||
typedef unsigned long long __u64;
|
||||
|
||||
typedef __u64 u64;
|
||||
typedef __s64 s64;
|
||||
|
||||
typedef __u32 u32;
|
||||
typedef __s32 s32;
|
||||
|
||||
typedef __u16 u16;
|
||||
typedef __s16 s16;
|
||||
|
||||
typedef __u8 u8;
|
||||
typedef __s8 s8;
|
||||
|
||||
typedef __u16 __le16;
|
||||
typedef __u16 __be16;
|
||||
typedef __u32 __le32;
|
||||
typedef __u32 __be32;
|
||||
typedef __u64 __le64;
|
||||
typedef __u64 __be64;
|
||||
|
||||
typedef unsigned int uint;
|
||||
|
||||
/* TODO: There should be a header file that I can include */
|
||||
typedef _Bool bool;
|
||||
|
||||
/* TODO: double check this typedef */
|
||||
typedef u64 dma_addr_t;
|
||||
|
||||
/* From: kernel-xppsl_1.5.2/include/linux/types.h */
|
||||
typedef unsigned gfp_t;
|
||||
#define CONFIG_PHYS_ADDR_T_64BIT
|
||||
#ifdef CONFIG_PHYS_ADDR_T_64BIT
|
||||
typedef u64 phys_addr_t;
|
||||
#else
|
||||
typedef u32 phys_addr_t;
|
||||
#endif
|
||||
typedef phys_addr_t resource_size_t;
|
||||
|
||||
/* kernel-xppsl_1.5.2/include/asm-generic/io.h */
|
||||
#ifndef __raw_writeq
|
||||
static inline void __raw_writeq(u64 b, volatile void __iomem *addr)
|
||||
{
|
||||
*(volatile u64 __force *) addr = b;
|
||||
}
|
||||
#endif
|
||||
#define writeq(b, addr) __raw_writeq(__cpu_to_le64(b), addr)
|
||||
|
||||
|
||||
/* TODO: I'm not sure if this definition is correct */
|
||||
#define LOCK_PREFIX "lock; "
|
||||
|
||||
/* From: kernel-xppsl_1.5.2/arch/x86/include/asm/bitops.h */
|
||||
#define BITOP_ADDR(x) "+m" (*(volatile long *) (x))
|
||||
#define LINUX_ADDR BITOP_ADDR(addr)
|
||||
|
||||
/* From: kernel-xppsl_1.5.2/arch/x86/include/asm/bitops.h */
|
||||
static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
|
||||
{
|
||||
int oldbit;
|
||||
|
||||
asm volatile(LOCK_PREFIX "bts %2,%1\n\t"
|
||||
"sbb %0,%0" : "=r" (oldbit), LINUX_ADDR : "Ir" (nr) : "memory");
|
||||
|
||||
return oldbit;
|
||||
}
|
||||
|
||||
/* From: kernel-xppsl_1.5.2/arch/x86/include/asm/atomic.h */
|
||||
static inline int atomic_dec_and_test(atomic_t *v)
|
||||
{
|
||||
unsigned char c;
|
||||
|
||||
asm volatile(LOCK_PREFIX "decl %0; sete %1"
|
||||
: "+m" (v->counter), "=qm" (c)
|
||||
: : "memory");
|
||||
return c != 0;
|
||||
}
|
||||
|
||||
/* From: kernel-xppsl_1.5.2/include/linux/slab.h */
|
||||
static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
|
||||
{
|
||||
if (size != 0 && n > SIZE_MAX / size)
|
||||
return NULL;
|
||||
return kmalloc(n * size, flags);
|
||||
}
|
||||
|
||||
static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
|
||||
{
|
||||
void *mem = kmalloc(n * size, flags);
|
||||
if (mem)
|
||||
memset(mem, 0, n * size);
|
||||
return mem;
|
||||
}
|
||||
|
||||
#endif
|
||||
446
kernel/include/hfi1/iowait.h
Normal file
446
kernel/include/hfi1/iowait.h
Normal file
@ -0,0 +1,446 @@
|
||||
#ifndef _HFI1_IOWAIT_H
|
||||
#define _HFI1_IOWAIT_H
|
||||
/*
|
||||
* Copyright(c) 2015 - 2017 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifdef __HFI1_ORIG__
|
||||
#include <linux/list.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
#include "sdma_txreq.h"
|
||||
|
||||
/*
|
||||
* typedef (*restart_t)() - restart callback
|
||||
* @work: pointer to work structure
|
||||
*/
|
||||
typedef void (*restart_t)(struct work_struct *work);
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
|
||||
#define IOWAIT_PENDING_IB 0x0
|
||||
#define IOWAIT_PENDING_TID 0x1
|
||||
|
||||
/*
|
||||
* A QP can have multiple Send Engines (SEs).
|
||||
*
|
||||
* The current use case is for supporting a TID RDMA
|
||||
* packet build/xmit mechanism independent from verbs.
|
||||
*/
|
||||
#define IOWAIT_SES 2
|
||||
#define IOWAIT_IB_SE 0
|
||||
#define IOWAIT_TID_SE 1
|
||||
|
||||
struct sdma_txreq;
|
||||
struct sdma_engine;
|
||||
/**
|
||||
* @iowork: the work struct
|
||||
* @tx_head: list of prebuilt packets
|
||||
* @iow: the parent iowait structure
|
||||
*
|
||||
* This structure is the work item (process) specific
|
||||
* details associated with the each of the two SEs of the
|
||||
* QP.
|
||||
*
|
||||
* The workstruct and the queued TXs are unique to each
|
||||
* SE.
|
||||
*/
|
||||
struct iowait;
|
||||
struct iowait_work {
|
||||
char iowork[32]; // struct work_struct iowork;
|
||||
struct list_head tx_head;
|
||||
struct iowait *iow;
|
||||
};
|
||||
|
||||
/**
|
||||
* @list: used to add/insert into QP/PQ wait lists
|
||||
* @tx_head: overflow list of sdma_txreq's
|
||||
* @sleep: no space callback
|
||||
* @wakeup: space callback wakeup
|
||||
* @sdma_drained: sdma count drained
|
||||
* @lock: lock protected head of wait queue
|
||||
* @iowork: workqueue overhead
|
||||
* @wait_dma: wait for sdma_busy == 0
|
||||
* @wait_pio: wait for pio_busy == 0
|
||||
* @sdma_busy: # of packets in flight
|
||||
* @count: total number of descriptors in tx_head'ed list
|
||||
* @tx_limit: limit for overflow queuing
|
||||
* @tx_count: number of tx entry's in tx_head'ed list
|
||||
* @flags: wait flags (one per QP)
|
||||
* @wait: SE array
|
||||
*
|
||||
* This is to be embedded in user's state structure
|
||||
* (QP or PQ).
|
||||
*
|
||||
* The sleep and wakeup members are a
|
||||
* bit misnamed. They do not strictly
|
||||
* speaking sleep or wake up, but they
|
||||
* are callbacks for the ULP to implement
|
||||
* what ever queuing/dequeuing of
|
||||
* the embedded iowait and its containing struct
|
||||
* when a resource shortage like SDMA ring space is seen.
|
||||
*
|
||||
* Both potentially have locks help
|
||||
* so sleeping is not allowed.
|
||||
*
|
||||
* The wait_dma member along with the iow
|
||||
*
|
||||
* The lock field is used by waiters to record
|
||||
* the seqlock_t that guards the list head.
|
||||
* Waiters explicity know that, but the destroy
|
||||
* code that unwaits QPs does not.
|
||||
*/
|
||||
/* The original size on Linux is 240 B */
|
||||
struct iowait {
|
||||
struct list_head list;
|
||||
int (*sleep)(
|
||||
struct sdma_engine *sde,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *tx,
|
||||
uint seq,
|
||||
bool pkts_sent
|
||||
);
|
||||
void (*wakeup)(struct iowait *wait, int reason);
|
||||
void (*sdma_drained)(struct iowait *wait);
|
||||
seqlock_t *lock;
|
||||
wait_queue_head_t wait_dma;
|
||||
wait_queue_head_t wait_pio;
|
||||
atomic_t sdma_busy;
|
||||
atomic_t pio_busy;
|
||||
u32 count;
|
||||
u32 tx_limit;
|
||||
u32 tx_count;
|
||||
unsigned long flags;
|
||||
struct iowait_work wait[IOWAIT_SES];
|
||||
u8 starved_cnt;
|
||||
};
|
||||
|
||||
#define SDMA_AVAIL_REASON 0
|
||||
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
void iowait_set_flag(struct iowait *wait, u32 flag);
|
||||
bool iowait_flag_set(struct iowait *wait, u32 flag);
|
||||
void iowait_clear_flag(struct iowait *wait, u32 flag);
|
||||
|
||||
void iowait_init(
|
||||
struct iowait *wait,
|
||||
u32 tx_limit,
|
||||
void (*func)(struct work_struct *work),
|
||||
void (*tidfunc)(struct work_struct *work),
|
||||
int (*sleep)(
|
||||
struct sdma_engine *sde,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *tx,
|
||||
uint seq,
|
||||
bool pkts_sent),
|
||||
void (*wakeup)(struct iowait *wait, int reason),
|
||||
void (*sdma_drained)(struct iowait *wait));
|
||||
|
||||
/**
|
||||
* iowait_schedule() - schedule the default send engine work
|
||||
* @wait: wait struct to schedule
|
||||
* @wq: workqueue for schedule
|
||||
* @cpu: cpu
|
||||
*/
|
||||
static inline bool iowait_schedule(
|
||||
struct iowait *wait,
|
||||
struct workqueue_struct *wq,
|
||||
int cpu)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_IB_SE].iowork);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_tid_schedule - schedule the tid SE
|
||||
* @wait: the iowait structure
|
||||
* @wq: the work queue
|
||||
* @cpu: the cpu
|
||||
*/
|
||||
static inline bool iowait_tid_schedule(
|
||||
struct iowait *wait,
|
||||
struct workqueue_struct *wq,
|
||||
int cpu)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
return !!queue_work_on(cpu, wq, &wait->wait[IOWAIT_TID_SE].iowork);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_sdma_drain() - wait for DMAs to drain
|
||||
* @wait: iowait structure
|
||||
*
|
||||
* This will delay until the iowait sdmas have
|
||||
* completed.
|
||||
*/
|
||||
static inline void iowait_sdma_drain(struct iowait *wait)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
wait_event(wait->wait_dma, !atomic_read(&wait->sdma_busy));
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_sdma_pending() - return sdma pending count
|
||||
*
|
||||
* @wait: iowait structure
|
||||
*
|
||||
*/
|
||||
static inline int iowait_sdma_pending(struct iowait *wait)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
return atomic_read(&wait->sdma_busy);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_sdma_inc - note sdma io pending
|
||||
* @wait: iowait structure
|
||||
*/
|
||||
static inline void iowait_sdma_inc(struct iowait *wait)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
atomic_inc(&wait->sdma_busy);
|
||||
}
|
||||
|
||||
#endif
|
||||
/**
|
||||
* iowait_sdma_add - add count to pending
|
||||
* @wait: iowait_work structure
|
||||
*/
|
||||
static inline void iowait_sdma_add(struct iowait *wait, int count)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
atomic_add(count, &wait->sdma_busy);
|
||||
}
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
/**
|
||||
* iowait_pio_drain() - wait for pios to drain
|
||||
*
|
||||
* @wait: iowait structure
|
||||
*
|
||||
* This will delay until the iowait pios have
|
||||
* completed.
|
||||
*/
|
||||
static inline void iowait_pio_drain(struct iowait *wait)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
wait_event_timeout(wait->wait_pio,
|
||||
!atomic_read(&wait->pio_busy),
|
||||
HZ);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_pio_pending() - return pio pending count
|
||||
*
|
||||
* @wait: iowait structure
|
||||
*
|
||||
*/
|
||||
static inline int iowait_pio_pending(struct iowait *w)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
return atomic_read(&w->pio_busy);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_drain_wakeup() - trigger iowait_drain() waiter
|
||||
* @wait: iowait structure
|
||||
*
|
||||
* This will trigger any waiters.
|
||||
*/
|
||||
static inline void iowait_drain_wakeup(struct iowait *w)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
wake_up(&w->wait_dma);
|
||||
wake_up(&w->wait_pio);
|
||||
if (w->sdma_drained)
|
||||
w->sdma_drained(w);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_pio_inc - note pio pending
|
||||
* @wait: iowait structure
|
||||
*/
|
||||
static inline void iowait_pio_inc(struct iowait *wait)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
atomic_inc(&wait->pio_busy);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_pio_dec - note pio complete
|
||||
* @wait: iowait structure
|
||||
*/
|
||||
static inline int iowait_pio_dec(struct iowait *wait)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
if (!wait)
|
||||
return 0;
|
||||
return atomic_dec_and_test(&wait->pio_busy);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_sdma_dec - note pio complete
|
||||
* @wait: iowait structure
|
||||
*/
|
||||
static inline int iowait_sdma_dec(struct iowait *wait)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
if (!wait)
|
||||
return 0;
|
||||
return atomic_dec_and_test(&wait->sdma_busy);
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_get_txhead() - get packet off of iowait list
|
||||
* @wait wait struture
|
||||
*/
|
||||
static inline struct sdma_txreq *iowait_get_txhead(struct iowait_work *wait)
|
||||
{
|
||||
struct sdma_txreq *tx = NULL;
|
||||
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
if (!list_empty(&wait->tx_head)) {
|
||||
tx = list_first_entry(
|
||||
&wait->tx_head,
|
||||
struct sdma_txreq,
|
||||
list);
|
||||
list_del_init(&tx->list);
|
||||
}
|
||||
return tx;
|
||||
}
|
||||
|
||||
static inline u16 iowait_get_desc(struct iowait_work *w)
|
||||
{
|
||||
u16 num_desc = 0;
|
||||
struct sdma_txreq *tx = NULL;
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
|
||||
if (!list_empty(&w->tx_head)) {
|
||||
tx = list_first_entry(
|
||||
&w->tx_head,
|
||||
struct sdma_txreq,
|
||||
list);
|
||||
num_desc = tx->num_desc;
|
||||
}
|
||||
return num_desc;
|
||||
}
|
||||
|
||||
static inline u32 iowait_get_all_desc(struct iowait *w)
|
||||
{
|
||||
u32 num_desc = 0;
|
||||
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
num_desc = iowait_get_desc(&w->wait[IOWAIT_IB_SE]);
|
||||
num_desc += iowait_get_desc(&w->wait[IOWAIT_TID_SE]);
|
||||
return num_desc;
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_packet_queued() - determine if a packet it queued
|
||||
* @wait: the wait structure
|
||||
*/
|
||||
static inline bool iowait_packet_queued(struct iowait_work *w)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
return !list_empty(&w->tx_head);
|
||||
}
|
||||
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
/**
|
||||
* inc_wait_count - increment wait counts
|
||||
* @w: the log work struct
|
||||
* @n: the count
|
||||
*/
|
||||
static inline void iowait_inc_wait_count(struct iowait_work *w, u16 n)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
if (!w)
|
||||
return;
|
||||
w->iow->tx_count++;
|
||||
w->iow->count += n;
|
||||
}
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
/**
|
||||
* iowait_get_tid_work - return iowait_work for tid SE
|
||||
* @w: the iowait struct
|
||||
*/
|
||||
static inline struct iowait_work *iowait_get_tid_work(struct iowait *w)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
return &w->wait[IOWAIT_TID_SE];
|
||||
}
|
||||
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
/**
|
||||
* iowait_get_ib_work - return iowait_work for ib SE
|
||||
* @w: the iowait struct
|
||||
*/
|
||||
static inline struct iowait_work *iowait_get_ib_work(struct iowait *w)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
return &w->wait[IOWAIT_IB_SE];
|
||||
}
|
||||
|
||||
/**
|
||||
* iowait_ioww_to_iow - return iowait given iowait_work
|
||||
* @w: the iowait_work struct
|
||||
*/
|
||||
static inline struct iowait *iowait_ioww_to_iow(struct iowait_work *w)
|
||||
{
|
||||
hfi1_cdbg(AIOWRITE, ".");
|
||||
if (likely(w))
|
||||
return w->iow;
|
||||
return NULL;
|
||||
}
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
void iowait_cancel_work(struct iowait *w);
|
||||
int iowait_set_work_flag(struct iowait_work *w);
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
|
||||
#endif
|
||||
983
kernel/include/hfi1/sdma.h
Normal file
983
kernel/include/hfi1/sdma.h
Normal file
@ -0,0 +1,983 @@
|
||||
#ifndef _HFI1_SDMA_H
|
||||
#define _HFI1_SDMA_H
|
||||
/*
|
||||
* Copyright(c) 2015, 2016 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
#include <hfi1/hfi.h>
|
||||
#include <hfi1/ihk_hfi1_common.h>
|
||||
#include <hfi1/sdma_txreq.h>
|
||||
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
#include <linux/types.h>
|
||||
#include <linux/list.h>
|
||||
#include <asm/byteorder.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/rculist.h>
|
||||
|
||||
#include "hfi.h"
|
||||
#include "verbs.h"
|
||||
#include "sdma_txreq.h"
|
||||
|
||||
#define hfi1_cdbg(which, fmt, ...) \
|
||||
__hfi1_trace_##which(__func__, fmt, ##__VA_ARGS__)
|
||||
extern void __hfi1_trace_AIOWRITE(const char *func, char *fmt, ...);
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
|
||||
/* Hardware limit */
|
||||
#define MAX_DESC 64
|
||||
/* Hardware limit for SDMA packet size */
|
||||
#define MAX_SDMA_PKT_SIZE ((16 * 1024) - 1)
|
||||
|
||||
#define SDMA_TXREQ_S_OK 0
|
||||
#define SDMA_TXREQ_S_SENDERROR 1
|
||||
#define SDMA_TXREQ_S_ABORTED 2
|
||||
#define SDMA_TXREQ_S_SHUTDOWN 3
|
||||
|
||||
/* flags bits */
|
||||
#define SDMA_TXREQ_F_URGENT 0x0001
|
||||
#define SDMA_TXREQ_F_AHG_COPY 0x0002
|
||||
#define SDMA_TXREQ_F_USE_AHG 0x0004
|
||||
|
||||
#define SDMA_MAP_NONE 0
|
||||
#define SDMA_MAP_SINGLE 1
|
||||
#define SDMA_MAP_PAGE 2
|
||||
|
||||
#define SDMA_AHG_VALUE_MASK 0xffff
|
||||
#define SDMA_AHG_VALUE_SHIFT 0
|
||||
#define SDMA_AHG_INDEX_MASK 0xf
|
||||
#define SDMA_AHG_INDEX_SHIFT 16
|
||||
#define SDMA_AHG_FIELD_LEN_MASK 0xf
|
||||
#define SDMA_AHG_FIELD_LEN_SHIFT 20
|
||||
#define SDMA_AHG_FIELD_START_MASK 0x1f
|
||||
#define SDMA_AHG_FIELD_START_SHIFT 24
|
||||
#define SDMA_AHG_UPDATE_ENABLE_MASK 0x1
|
||||
#define SDMA_AHG_UPDATE_ENABLE_SHIFT 31
|
||||
|
||||
/* AHG modes */
|
||||
|
||||
/*
|
||||
* Be aware the ordering and values
|
||||
* for SDMA_AHG_APPLY_UPDATE[123]
|
||||
* are assumed in generating a skip
|
||||
* count in submit_tx() in sdma.c
|
||||
*/
|
||||
#define SDMA_AHG_NO_AHG 0
|
||||
#define SDMA_AHG_COPY 1
|
||||
#define SDMA_AHG_APPLY_UPDATE1 2
|
||||
#define SDMA_AHG_APPLY_UPDATE2 3
|
||||
#define SDMA_AHG_APPLY_UPDATE3 4
|
||||
|
||||
/*
|
||||
* Bits defined in the send DMA descriptor.
|
||||
*/
|
||||
#define SDMA_DESC0_FIRST_DESC_FLAG BIT_ULL(63)
|
||||
#define SDMA_DESC0_LAST_DESC_FLAG BIT_ULL(62)
|
||||
#define SDMA_DESC0_BYTE_COUNT_SHIFT 48
|
||||
#define SDMA_DESC0_BYTE_COUNT_WIDTH 14
|
||||
#define SDMA_DESC0_BYTE_COUNT_MASK \
|
||||
((1ULL << SDMA_DESC0_BYTE_COUNT_WIDTH) - 1)
|
||||
#define SDMA_DESC0_BYTE_COUNT_SMASK \
|
||||
(SDMA_DESC0_BYTE_COUNT_MASK << SDMA_DESC0_BYTE_COUNT_SHIFT)
|
||||
#define SDMA_DESC0_PHY_ADDR_SHIFT 0
|
||||
#define SDMA_DESC0_PHY_ADDR_WIDTH 48
|
||||
#define SDMA_DESC0_PHY_ADDR_MASK \
|
||||
((1ULL << SDMA_DESC0_PHY_ADDR_WIDTH) - 1)
|
||||
#define SDMA_DESC0_PHY_ADDR_SMASK \
|
||||
(SDMA_DESC0_PHY_ADDR_MASK << SDMA_DESC0_PHY_ADDR_SHIFT)
|
||||
|
||||
#define SDMA_DESC1_HEADER_UPDATE1_SHIFT 32
|
||||
#define SDMA_DESC1_HEADER_UPDATE1_WIDTH 32
|
||||
#define SDMA_DESC1_HEADER_UPDATE1_MASK \
|
||||
((1ULL << SDMA_DESC1_HEADER_UPDATE1_WIDTH) - 1)
|
||||
#define SDMA_DESC1_HEADER_UPDATE1_SMASK \
|
||||
(SDMA_DESC1_HEADER_UPDATE1_MASK << SDMA_DESC1_HEADER_UPDATE1_SHIFT)
|
||||
#define SDMA_DESC1_HEADER_MODE_SHIFT 13
|
||||
#define SDMA_DESC1_HEADER_MODE_WIDTH 3
|
||||
#define SDMA_DESC1_HEADER_MODE_MASK \
|
||||
((1ULL << SDMA_DESC1_HEADER_MODE_WIDTH) - 1)
|
||||
#define SDMA_DESC1_HEADER_MODE_SMASK \
|
||||
(SDMA_DESC1_HEADER_MODE_MASK << SDMA_DESC1_HEADER_MODE_SHIFT)
|
||||
#define SDMA_DESC1_HEADER_INDEX_SHIFT 8
|
||||
#define SDMA_DESC1_HEADER_INDEX_WIDTH 5
|
||||
#define SDMA_DESC1_HEADER_INDEX_MASK \
|
||||
((1ULL << SDMA_DESC1_HEADER_INDEX_WIDTH) - 1)
|
||||
#define SDMA_DESC1_HEADER_INDEX_SMASK \
|
||||
(SDMA_DESC1_HEADER_INDEX_MASK << SDMA_DESC1_HEADER_INDEX_SHIFT)
|
||||
#define SDMA_DESC1_HEADER_DWS_SHIFT 4
|
||||
#define SDMA_DESC1_HEADER_DWS_WIDTH 4
|
||||
#define SDMA_DESC1_HEADER_DWS_MASK \
|
||||
((1ULL << SDMA_DESC1_HEADER_DWS_WIDTH) - 1)
|
||||
#define SDMA_DESC1_HEADER_DWS_SMASK \
|
||||
(SDMA_DESC1_HEADER_DWS_MASK << SDMA_DESC1_HEADER_DWS_SHIFT)
|
||||
#define SDMA_DESC1_GENERATION_SHIFT 2
|
||||
#define SDMA_DESC1_GENERATION_WIDTH 2
|
||||
#define SDMA_DESC1_GENERATION_MASK \
|
||||
((1ULL << SDMA_DESC1_GENERATION_WIDTH) - 1)
|
||||
#define SDMA_DESC1_GENERATION_SMASK \
|
||||
(SDMA_DESC1_GENERATION_MASK << SDMA_DESC1_GENERATION_SHIFT)
|
||||
#define SDMA_DESC1_INT_REQ_FLAG BIT_ULL(1)
|
||||
#define SDMA_DESC1_HEAD_TO_HOST_FLAG BIT_ULL(0)
|
||||
|
||||
enum sdma_states {
|
||||
sdma_state_s00_hw_down,
|
||||
sdma_state_s10_hw_start_up_halt_wait,
|
||||
sdma_state_s15_hw_start_up_clean_wait,
|
||||
sdma_state_s20_idle,
|
||||
sdma_state_s30_sw_clean_up_wait,
|
||||
sdma_state_s40_hw_clean_up_wait,
|
||||
sdma_state_s50_hw_halt_wait,
|
||||
sdma_state_s60_idle_halt_wait,
|
||||
sdma_state_s80_hw_freeze,
|
||||
sdma_state_s82_freeze_sw_clean,
|
||||
sdma_state_s99_running,
|
||||
};
|
||||
|
||||
enum sdma_events {
|
||||
sdma_event_e00_go_hw_down,
|
||||
sdma_event_e10_go_hw_start,
|
||||
sdma_event_e15_hw_halt_done,
|
||||
sdma_event_e25_hw_clean_up_done,
|
||||
sdma_event_e30_go_running,
|
||||
sdma_event_e40_sw_cleaned,
|
||||
sdma_event_e50_hw_cleaned,
|
||||
sdma_event_e60_hw_halted,
|
||||
sdma_event_e70_go_idle,
|
||||
sdma_event_e80_hw_freeze,
|
||||
sdma_event_e81_hw_frozen,
|
||||
sdma_event_e82_hw_unfreeze,
|
||||
sdma_event_e85_link_down,
|
||||
sdma_event_e90_sw_halted,
|
||||
};
|
||||
|
||||
struct sdma_set_state_action {
|
||||
unsigned op_enable:1;
|
||||
unsigned op_intenable:1;
|
||||
unsigned op_halt:1;
|
||||
unsigned op_cleanup:1;
|
||||
unsigned go_s99_running_tofalse:1;
|
||||
unsigned go_s99_running_totrue:1;
|
||||
};
|
||||
|
||||
#include <hfi1/hfi1_generated_sdma_state.h>
|
||||
|
||||
/**
|
||||
* DOC: sdma exported routines
|
||||
*
|
||||
* These sdma routines fit into three categories:
|
||||
* - The SDMA API for building and submitting packets
|
||||
* to the ring
|
||||
*
|
||||
* - Initialization and tear down routines to buildup
|
||||
* and tear down SDMA
|
||||
*
|
||||
* - ISR entrances to handle interrupts, state changes
|
||||
* and errors
|
||||
*/
|
||||
|
||||
/**
|
||||
* DOC: sdma PSM/verbs API
|
||||
*
|
||||
* The sdma API is designed to be used by both PSM
|
||||
* and verbs to supply packets to the SDMA ring.
|
||||
*
|
||||
* The usage of the API is as follows:
|
||||
*
|
||||
* Embed a struct iowait in the QP or
|
||||
* PQ. The iowait should be initialized with a
|
||||
* call to iowait_init().
|
||||
*
|
||||
* The user of the API should create an allocation method
|
||||
* for their version of the txreq. slabs, pre-allocated lists,
|
||||
* and dma pools can be used. Once the user's overload of
|
||||
* the sdma_txreq has been allocated, the sdma_txreq member
|
||||
* must be initialized with sdma_txinit() or sdma_txinit_ahg().
|
||||
*
|
||||
* The txreq must be declared with the sdma_txreq first.
|
||||
*
|
||||
* The tx request, once initialized, is manipulated with calls to
|
||||
* sdma_txadd_daddr(), sdma_txadd_page(), or sdma_txadd_kvaddr()
|
||||
* for each disjoint memory location. It is the user's responsibility
|
||||
* to understand the packet boundaries and page boundaries to do the
|
||||
* appropriate number of sdma_txadd_* calls.. The user
|
||||
* must be prepared to deal with failures from these routines due to
|
||||
* either memory allocation or dma_mapping failures.
|
||||
*
|
||||
* The mapping specifics for each memory location are recorded
|
||||
* in the tx. Memory locations added with sdma_txadd_page()
|
||||
* and sdma_txadd_kvaddr() are automatically mapped when added
|
||||
* to the tx and nmapped as part of the progress processing in the
|
||||
* SDMA interrupt handling.
|
||||
*
|
||||
* sdma_txadd_daddr() is used to add an dma_addr_t memory to the
|
||||
* tx. An example of a use case would be a pre-allocated
|
||||
* set of headers allocated via dma_pool_alloc() or
|
||||
* dma_alloc_coherent(). For these memory locations, it
|
||||
* is the responsibility of the user to handle that unmapping.
|
||||
* (This would usually be at an unload or job termination.)
|
||||
*
|
||||
* The routine sdma_send_txreq() is used to submit
|
||||
* a tx to the ring after the appropriate number of
|
||||
* sdma_txadd_* have been done.
|
||||
*
|
||||
* If it is desired to send a burst of sdma_txreqs, sdma_send_txlist()
|
||||
* can be used to submit a list of packets.
|
||||
*
|
||||
* The user is free to use the link overhead in the struct sdma_txreq as
|
||||
* long as the tx isn't in flight.
|
||||
*
|
||||
* The extreme degenerate case of the number of descriptors
|
||||
* exceeding the ring size is automatically handled as
|
||||
* memory locations are added. An overflow of the descriptor
|
||||
* array that is part of the sdma_txreq is also automatically
|
||||
* handled.
|
||||
*
|
||||
*/
|
||||
|
||||
/**
|
||||
* DOC: Infrastructure calls
|
||||
*
|
||||
* sdma_init() is used to initialize data structures and
|
||||
* CSRs for the desired number of SDMA engines.
|
||||
*
|
||||
* sdma_start() is used to kick the SDMA engines initialized
|
||||
* with sdma_init(). Interrupts must be enabled at this
|
||||
* point since aspects of the state machine are interrupt
|
||||
* driven.
|
||||
*
|
||||
* sdma_engine_error() and sdma_engine_interrupt() are
|
||||
* entrances for interrupts.
|
||||
*
|
||||
* sdma_map_init() is for the management of the mapping
|
||||
* table when the number of vls is changed.
|
||||
*
|
||||
*/
|
||||
|
||||
/*
|
||||
* struct hw_sdma_desc - raw 128 bit SDMA descriptor
|
||||
*
|
||||
* This is the raw descriptor in the SDMA ring
|
||||
*/
|
||||
struct hw_sdma_desc {
|
||||
/* private: don't use directly */
|
||||
__le64 qw[2];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct sdma_engine - Data pertaining to each SDMA engine.
|
||||
* @dd: a back-pointer to the device data
|
||||
* @ppd: per port back-pointer
|
||||
* @imask: mask for irq manipulation
|
||||
* @idle_mask: mask for determining if an interrupt is due to sdma_idle
|
||||
*
|
||||
* This structure has the state for each sdma_engine.
|
||||
*
|
||||
* Accessing to non public fields are not supported
|
||||
* since the private members are subject to change.
|
||||
*/
|
||||
/* The original size on Linux is 1472 B */
|
||||
|
||||
#include <hfi1/hfi1_generated_sdma_engine.h>
|
||||
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
int sdma_init(struct hfi1_devdata *dd, u8 port);
|
||||
void sdma_start(struct hfi1_devdata *dd);
|
||||
void sdma_exit(struct hfi1_devdata *dd);
|
||||
void sdma_all_running(struct hfi1_devdata *dd);
|
||||
void sdma_all_idle(struct hfi1_devdata *dd);
|
||||
void sdma_freeze_notify(struct hfi1_devdata *dd, int go_idle);
|
||||
void sdma_freeze(struct hfi1_devdata *dd);
|
||||
void sdma_unfreeze(struct hfi1_devdata *dd);
|
||||
void sdma_wait(struct hfi1_devdata *dd);
|
||||
|
||||
/**
|
||||
* sdma_empty() - idle engine test
|
||||
* @engine: sdma engine
|
||||
*
|
||||
* Currently used by verbs as a latency optimization.
|
||||
*
|
||||
* Return:
|
||||
* 1 - empty, 0 - non-empty
|
||||
*/
|
||||
static inline int sdma_empty(struct sdma_engine *sde)
|
||||
{
|
||||
return sde->descq_tail == sde->descq_head;
|
||||
}
|
||||
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
static inline u16 sdma_descq_freecnt(struct sdma_engine *sde)
|
||||
{
|
||||
return sde->descq_cnt -
|
||||
(sde->descq_tail -
|
||||
ACCESS_ONCE(sde->descq_head)) - 1;
|
||||
}
|
||||
|
||||
static inline u16 sdma_descq_inprocess(struct sdma_engine *sde)
|
||||
{
|
||||
return sde->descq_cnt - sdma_descq_freecnt(sde);
|
||||
}
|
||||
|
||||
/*
|
||||
* Either head_lock or tail lock required to see
|
||||
* a steady state.
|
||||
*/
|
||||
static inline int __sdma_running(struct sdma_engine *engine)
|
||||
{
|
||||
return engine->state.current_state == sdma_state_s99_running;
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_running() - state suitability test
|
||||
* @engine: sdma engine
|
||||
*
|
||||
* sdma_running probes the internal state to determine if it is suitable
|
||||
* for submitting packets.
|
||||
*
|
||||
* Return:
|
||||
* 1 - ok to submit, 0 - not ok to submit
|
||||
*
|
||||
*/
|
||||
static inline int sdma_running(struct sdma_engine *engine)
|
||||
{
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
linux_spin_lock_irqsave(&engine->tail_lock, flags);
|
||||
ret = __sdma_running(engine);
|
||||
linux_spin_unlock_irqrestore(&engine->tail_lock, flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void _sdma_txreq_ahgadd(
|
||||
struct sdma_txreq *tx,
|
||||
u8 num_ahg,
|
||||
u8 ahg_entry,
|
||||
u32 *ahg,
|
||||
u8 ahg_hlen);
|
||||
|
||||
/**
|
||||
* sdma_txinit_ahg() - initialize an sdma_txreq struct with AHG
|
||||
* @tx: tx request to initialize
|
||||
* @flags: flags to key last descriptor additions
|
||||
* @tlen: total packet length (pbc + headers + data)
|
||||
* @ahg_entry: ahg entry to use (0 - 31)
|
||||
* @num_ahg: ahg descriptor for first descriptor (0 - 9)
|
||||
* @ahg: array of AHG descriptors (up to 9 entries)
|
||||
* @ahg_hlen: number of bytes from ASIC entry to use
|
||||
* @cb: callback
|
||||
*
|
||||
* The allocation of the sdma_txreq and it enclosing structure is user
|
||||
* dependent. This routine must be called to initialize the user independent
|
||||
* fields.
|
||||
*
|
||||
* The currently supported flags are SDMA_TXREQ_F_URGENT,
|
||||
* SDMA_TXREQ_F_AHG_COPY, and SDMA_TXREQ_F_USE_AHG.
|
||||
*
|
||||
* SDMA_TXREQ_F_URGENT is used for latency sensitive situations where the
|
||||
* completion is desired as soon as possible.
|
||||
*
|
||||
* SDMA_TXREQ_F_AHG_COPY causes the header in the first descriptor to be
|
||||
* copied to chip entry. SDMA_TXREQ_F_USE_AHG causes the code to add in
|
||||
* the AHG descriptors into the first 1 to 3 descriptors.
|
||||
*
|
||||
* Completions of submitted requests can be gotten on selected
|
||||
* txreqs by giving a completion routine callback to sdma_txinit() or
|
||||
* sdma_txinit_ahg(). The environment in which the callback runs
|
||||
* can be from an ISR, a tasklet, or a thread, so no sleeping
|
||||
* kernel routines can be used. Aspects of the sdma ring may
|
||||
* be locked so care should be taken with locking.
|
||||
*
|
||||
* The callback pointer can be NULL to avoid any callback for the packet
|
||||
* being submitted. The callback will be provided this tx, a status, and a flag.
|
||||
*
|
||||
* The status will be one of SDMA_TXREQ_S_OK, SDMA_TXREQ_S_SENDERROR,
|
||||
* SDMA_TXREQ_S_ABORTED, or SDMA_TXREQ_S_SHUTDOWN.
|
||||
*
|
||||
* The flag, if the is the iowait had been used, indicates the iowait
|
||||
* sdma_busy count has reached zero.
|
||||
*
|
||||
* user data portion of tlen should be precise. The sdma_txadd_* entrances
|
||||
* will pad with a descriptor references 1 - 3 bytes when the number of bytes
|
||||
* specified in tlen have been supplied to the sdma_txreq.
|
||||
*
|
||||
* ahg_hlen is used to determine the number of on-chip entry bytes to
|
||||
* use as the header. This is for cases where the stored header is
|
||||
* larger than the header to be used in a packet. This is typical
|
||||
* for verbs where an RDMA_WRITE_FIRST is larger than the packet in
|
||||
* and RDMA_WRITE_MIDDLE.
|
||||
*
|
||||
*/
|
||||
static inline int sdma_txinit_ahg(
|
||||
struct sdma_txreq *tx,
|
||||
u16 flags,
|
||||
u16 tlen,
|
||||
u8 ahg_entry,
|
||||
u8 num_ahg,
|
||||
u32 *ahg,
|
||||
u8 ahg_hlen,
|
||||
void (*cb)(struct sdma_txreq *, int))
|
||||
{
|
||||
if (tlen == 0)
|
||||
return -ENODATA;
|
||||
if (tlen > MAX_SDMA_PKT_SIZE)
|
||||
return -EMSGSIZE;
|
||||
tx->desc_limit = ARRAY_SIZE(tx->descs);
|
||||
tx->descp = &tx->descs[0];
|
||||
INIT_LIST_HEAD(&tx->list);
|
||||
tx->num_desc = 0;
|
||||
tx->flags = flags;
|
||||
tx->complete = cb;
|
||||
tx->coalesce_buf = NULL;
|
||||
tx->wait = NULL;
|
||||
tx->packet_len = tlen;
|
||||
tx->tlen = tx->packet_len;
|
||||
tx->descs[0].qw[0] = SDMA_DESC0_FIRST_DESC_FLAG;
|
||||
tx->descs[0].qw[1] = 0;
|
||||
if (flags & SDMA_TXREQ_F_AHG_COPY)
|
||||
tx->descs[0].qw[1] |=
|
||||
(((u64)ahg_entry & SDMA_DESC1_HEADER_INDEX_MASK)
|
||||
<< SDMA_DESC1_HEADER_INDEX_SHIFT) |
|
||||
(((u64)SDMA_AHG_COPY & SDMA_DESC1_HEADER_MODE_MASK)
|
||||
<< SDMA_DESC1_HEADER_MODE_SHIFT);
|
||||
else if (flags & SDMA_TXREQ_F_USE_AHG && num_ahg)
|
||||
_sdma_txreq_ahgadd(tx, num_ahg, ahg_entry, ahg, ahg_hlen);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_txinit() - initialize an sdma_txreq struct (no AHG)
|
||||
* @tx: tx request to initialize
|
||||
* @flags: flags to key last descriptor additions
|
||||
* @tlen: total packet length (pbc + headers + data)
|
||||
* @cb: callback pointer
|
||||
*
|
||||
* The allocation of the sdma_txreq and it enclosing structure is user
|
||||
* dependent. This routine must be called to initialize the user
|
||||
* independent fields.
|
||||
*
|
||||
* The currently supported flags is SDMA_TXREQ_F_URGENT.
|
||||
*
|
||||
* SDMA_TXREQ_F_URGENT is used for latency sensitive situations where the
|
||||
* completion is desired as soon as possible.
|
||||
*
|
||||
* Completions of submitted requests can be gotten on selected
|
||||
* txreqs by giving a completion routine callback to sdma_txinit() or
|
||||
* sdma_txinit_ahg(). The environment in which the callback runs
|
||||
* can be from an ISR, a tasklet, or a thread, so no sleeping
|
||||
* kernel routines can be used. The head size of the sdma ring may
|
||||
* be locked so care should be taken with locking.
|
||||
*
|
||||
* The callback pointer can be NULL to avoid any callback for the packet
|
||||
* being submitted.
|
||||
*
|
||||
* The callback, if non-NULL, will be provided this tx and a status. The
|
||||
* status will be one of SDMA_TXREQ_S_OK, SDMA_TXREQ_S_SENDERROR,
|
||||
* SDMA_TXREQ_S_ABORTED, or SDMA_TXREQ_S_SHUTDOWN.
|
||||
*
|
||||
*/
|
||||
static inline int sdma_txinit(
|
||||
struct sdma_txreq *tx,
|
||||
u16 flags,
|
||||
u16 tlen,
|
||||
void (*cb)(struct sdma_txreq *, int))
|
||||
{
|
||||
return sdma_txinit_ahg(tx, flags, tlen, 0, 0, NULL, 0, cb);
|
||||
}
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
/* helpers - don't use */
|
||||
static inline int sdma_mapping_type(struct sdma_desc *d)
|
||||
{
|
||||
return (d->qw[1] & SDMA_DESC1_GENERATION_SMASK)
|
||||
>> SDMA_DESC1_GENERATION_SHIFT;
|
||||
}
|
||||
|
||||
static inline size_t sdma_mapping_len(struct sdma_desc *d)
|
||||
{
|
||||
return (d->qw[0] & SDMA_DESC0_BYTE_COUNT_SMASK)
|
||||
>> SDMA_DESC0_BYTE_COUNT_SHIFT;
|
||||
}
|
||||
|
||||
static inline dma_addr_t sdma_mapping_addr(struct sdma_desc *d)
|
||||
{
|
||||
return (d->qw[0] & SDMA_DESC0_PHY_ADDR_SMASK)
|
||||
>> SDMA_DESC0_PHY_ADDR_SHIFT;
|
||||
}
|
||||
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
static inline void make_tx_sdma_desc(
|
||||
struct sdma_txreq *tx,
|
||||
int type,
|
||||
dma_addr_t addr,
|
||||
size_t len)
|
||||
{
|
||||
struct sdma_desc *desc = &tx->descp[tx->num_desc];
|
||||
|
||||
if (!tx->num_desc) {
|
||||
/* qw[0] zero; qw[1] first, ahg mode already in from init */
|
||||
desc->qw[1] |= ((u64)type & SDMA_DESC1_GENERATION_MASK)
|
||||
<< SDMA_DESC1_GENERATION_SHIFT;
|
||||
} else {
|
||||
desc->qw[0] = 0;
|
||||
desc->qw[1] = ((u64)type & SDMA_DESC1_GENERATION_MASK)
|
||||
<< SDMA_DESC1_GENERATION_SHIFT;
|
||||
}
|
||||
desc->qw[0] |= (((u64)addr & SDMA_DESC0_PHY_ADDR_MASK)
|
||||
<< SDMA_DESC0_PHY_ADDR_SHIFT) |
|
||||
(((u64)len & SDMA_DESC0_BYTE_COUNT_MASK)
|
||||
<< SDMA_DESC0_BYTE_COUNT_SHIFT);
|
||||
}
|
||||
|
||||
/* helper to extend txreq */
|
||||
int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
|
||||
int type, void *kvaddr, struct page *page,
|
||||
unsigned long offset, u16 len);
|
||||
void __sdma_txclean(struct hfi1_devdata *, struct sdma_txreq *);
|
||||
|
||||
static inline void sdma_txclean(struct hfi1_devdata *dd, struct sdma_txreq *tx)
|
||||
{
|
||||
if (tx->num_desc)
|
||||
__sdma_txclean(dd, tx);
|
||||
}
|
||||
int _pad_sdma_tx_descs(struct hfi1_devdata *, struct sdma_txreq *);
|
||||
|
||||
/* helpers used by public routines */
|
||||
static inline void _sdma_close_tx(struct hfi1_devdata *dd,
|
||||
struct sdma_txreq *tx)
|
||||
{
|
||||
tx->descp[tx->num_desc].qw[0] |=
|
||||
SDMA_DESC0_LAST_DESC_FLAG;
|
||||
tx->descp[tx->num_desc].qw[1] |=
|
||||
dd->default_desc1;
|
||||
if (tx->flags & SDMA_TXREQ_F_URGENT)
|
||||
tx->descp[tx->num_desc].qw[1] |=
|
||||
(SDMA_DESC1_HEAD_TO_HOST_FLAG |
|
||||
SDMA_DESC1_INT_REQ_FLAG);
|
||||
}
|
||||
|
||||
static inline int _sdma_txadd_daddr(
|
||||
struct hfi1_devdata *dd,
|
||||
int type,
|
||||
struct sdma_txreq *tx,
|
||||
dma_addr_t addr,
|
||||
u16 len)
|
||||
{
|
||||
int rval = 0;
|
||||
|
||||
make_tx_sdma_desc(
|
||||
tx,
|
||||
type,
|
||||
addr, len);
|
||||
WARN_ON(len > tx->tlen);
|
||||
tx->tlen -= len;
|
||||
/* special cases for last */
|
||||
if (!tx->tlen) {
|
||||
if (tx->packet_len & (sizeof(u32) - 1)) {
|
||||
rval = _pad_sdma_tx_descs(dd, tx);
|
||||
if (rval)
|
||||
return rval;
|
||||
} else {
|
||||
_sdma_close_tx(dd, tx);
|
||||
}
|
||||
}
|
||||
tx->num_desc++;
|
||||
return rval;
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_txadd_page() - add a page to the sdma_txreq
|
||||
* @dd: the device to use for mapping
|
||||
* @tx: tx request to which the page is added
|
||||
* @page: page to map
|
||||
* @offset: offset within the page
|
||||
* @len: length in bytes
|
||||
*
|
||||
* This is used to add a page/offset/length descriptor.
|
||||
*
|
||||
* The mapping/unmapping of the page/offset/len is automatically handled.
|
||||
*
|
||||
* Return:
|
||||
* 0 - success, -ENOSPC - mapping fail, -ENOMEM - couldn't
|
||||
* extend/coalesce descriptor array
|
||||
*/
|
||||
static inline int sdma_txadd_page(
|
||||
struct hfi1_devdata *dd,
|
||||
struct sdma_txreq *tx,
|
||||
dma_addr_t paddr,
|
||||
u16 len)
|
||||
{
|
||||
return _sdma_txadd_daddr(
|
||||
dd, SDMA_MAP_PAGE, tx, paddr, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_txadd_daddr() - add a dma address to the sdma_txreq
|
||||
* @dd: the device to use for mapping
|
||||
* @tx: sdma_txreq to which the page is added
|
||||
* @addr: dma address mapped by caller
|
||||
* @len: length in bytes
|
||||
*
|
||||
* This is used to add a descriptor for memory that is already dma mapped.
|
||||
*
|
||||
* In this case, there is no unmapping as part of the progress processing for
|
||||
* this memory location.
|
||||
*
|
||||
* Return:
|
||||
* 0 - success, -ENOMEM - couldn't extend descriptor array
|
||||
*/
|
||||
|
||||
static inline int sdma_txadd_daddr(
|
||||
struct hfi1_devdata *dd,
|
||||
struct sdma_txreq *tx,
|
||||
dma_addr_t addr,
|
||||
u16 len)
|
||||
{
|
||||
int rval;
|
||||
|
||||
if ((unlikely(tx->num_desc == tx->desc_limit))) {
|
||||
rval = ext_coal_sdma_tx_descs(dd, tx, SDMA_MAP_NONE,
|
||||
NULL, NULL, 0, 0);
|
||||
if (rval <= 0)
|
||||
return rval;
|
||||
}
|
||||
|
||||
return _sdma_txadd_daddr(dd, SDMA_MAP_NONE, tx, addr, len);
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_txadd_kvaddr() - add a kernel virtual address to sdma_txreq
|
||||
* @dd: the device to use for mapping
|
||||
* @tx: sdma_txreq to which the page is added
|
||||
* @kvaddr: the kernel virtual address
|
||||
* @len: length in bytes
|
||||
*
|
||||
* This is used to add a descriptor referenced by the indicated kvaddr and
|
||||
* len.
|
||||
*
|
||||
* The mapping/unmapping of the kvaddr and len is automatically handled.
|
||||
*
|
||||
* Return:
|
||||
* 0 - success, -ENOSPC - mapping fail, -ENOMEM - couldn't extend/coalesce
|
||||
* descriptor array
|
||||
*/
|
||||
static inline int sdma_txadd_kvaddr(
|
||||
struct hfi1_devdata *dd,
|
||||
struct sdma_txreq *tx,
|
||||
void *kvaddr,
|
||||
u16 len)
|
||||
{
|
||||
dma_addr_t addr;
|
||||
int rval;
|
||||
|
||||
if ((unlikely(tx->num_desc == tx->desc_limit))) {
|
||||
rval = ext_coal_sdma_tx_descs(dd, tx, SDMA_MAP_SINGLE,
|
||||
kvaddr, NULL, 0, len);
|
||||
if (rval <= 0)
|
||||
return rval;
|
||||
}
|
||||
|
||||
addr = virt_to_phys(kvaddr);
|
||||
|
||||
return _sdma_txadd_daddr(
|
||||
dd, SDMA_MAP_SINGLE, tx, addr, len);
|
||||
}
|
||||
|
||||
struct iowait_wait;
|
||||
int sdma_send_txreq(struct sdma_engine *sde,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *tx);
|
||||
int sdma_send_txlist(struct sdma_engine *sde,
|
||||
struct iowait_work *wait,
|
||||
struct list_head *tx_list,
|
||||
u32 *count);
|
||||
|
||||
int sdma_ahg_alloc(struct sdma_engine *sde);
|
||||
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index);
|
||||
|
||||
/**
|
||||
* sdma_build_ahg - build ahg descriptor
|
||||
* @data
|
||||
* @dwindex
|
||||
* @startbit
|
||||
* @bits
|
||||
*
|
||||
* Build and return a 32 bit descriptor.
|
||||
*/
|
||||
static inline u32 sdma_build_ahg_descriptor(
|
||||
u16 data,
|
||||
u8 dwindex,
|
||||
u8 startbit,
|
||||
u8 bits)
|
||||
{
|
||||
return (u32)(1UL << SDMA_AHG_UPDATE_ENABLE_SHIFT |
|
||||
((startbit & SDMA_AHG_FIELD_START_MASK) <<
|
||||
SDMA_AHG_FIELD_START_SHIFT) |
|
||||
((bits & SDMA_AHG_FIELD_LEN_MASK) <<
|
||||
SDMA_AHG_FIELD_LEN_SHIFT) |
|
||||
((dwindex & SDMA_AHG_INDEX_MASK) <<
|
||||
SDMA_AHG_INDEX_SHIFT) |
|
||||
((data & SDMA_AHG_VALUE_MASK) <<
|
||||
SDMA_AHG_VALUE_SHIFT));
|
||||
}
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
/**
|
||||
* sdma_progress - use seq number of detect head progress
|
||||
* @sde: sdma_engine to check
|
||||
* @seq: base seq count
|
||||
* @tx: txreq for which we need to check descriptor availability
|
||||
*
|
||||
* This is used in the appropriate spot in the sleep routine
|
||||
* to check for potential ring progress. This routine gets the
|
||||
* seqcount before queuing the iowait structure for progress.
|
||||
*
|
||||
* If the seqcount indicates that progress needs to be checked,
|
||||
* re-submission is detected by checking whether the descriptor
|
||||
* queue has enough descriptor for the txreq.
|
||||
*/
|
||||
static inline unsigned sdma_progress(struct sdma_engine *sde, unsigned seq,
|
||||
struct sdma_txreq *tx)
|
||||
{
|
||||
if (read_seqretry(&sde->head_lock, seq)) {
|
||||
sde->desc_avail = sdma_descq_freecnt(sde);
|
||||
if (tx->num_desc > sde->desc_avail)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_iowait_schedule() - initialize wait structure
|
||||
* @sde: sdma_engine to schedule
|
||||
* @wait: wait struct to schedule
|
||||
*
|
||||
* This function initializes the iowait
|
||||
* structure embedded in the QP or PQ.
|
||||
*
|
||||
*/
|
||||
static inline void sdma_iowait_schedule(
|
||||
struct sdma_engine *sde,
|
||||
struct iowait *wait)
|
||||
{
|
||||
struct hfi1_pportdata *ppd = sde->dd->pport;
|
||||
|
||||
iowait_schedule(wait, ppd->hfi1_wq, sde->cpu);
|
||||
}
|
||||
|
||||
/* for use by interrupt handling */
|
||||
void sdma_engine_error(struct sdma_engine *sde, u64 status);
|
||||
void sdma_engine_interrupt(struct sdma_engine *sde, u64 status);
|
||||
|
||||
/*
|
||||
*
|
||||
* The diagram below details the relationship of the mapping structures
|
||||
*
|
||||
* Since the mapping now allows for non-uniform engines per vl, the
|
||||
* number of engines for a vl is either the vl_engines[vl] or
|
||||
* a computation based on num_sdma/num_vls:
|
||||
*
|
||||
* For example:
|
||||
* nactual = vl_engines ? vl_engines[vl] : num_sdma/num_vls
|
||||
*
|
||||
* n = roundup to next highest power of 2 using nactual
|
||||
*
|
||||
* In the case where there are num_sdma/num_vls doesn't divide
|
||||
* evenly, the extras are added from the last vl downward.
|
||||
*
|
||||
* For the case where n > nactual, the engines are assigned
|
||||
* in a round robin fashion wrapping back to the first engine
|
||||
* for a particular vl.
|
||||
*
|
||||
* dd->sdma_map
|
||||
* | sdma_map_elem[0]
|
||||
* | +--------------------+
|
||||
* v | mask |
|
||||
* sdma_vl_map |--------------------|
|
||||
* +--------------------------+ | sde[0] -> eng 1 |
|
||||
* | list (RCU) | |--------------------|
|
||||
* |--------------------------| ->| sde[1] -> eng 2 |
|
||||
* | mask | --/ |--------------------|
|
||||
* |--------------------------| -/ | * |
|
||||
* | actual_vls (max 8) | -/ |--------------------|
|
||||
* |--------------------------| --/ | sde[n] -> eng n |
|
||||
* | vls (max 8) | -/ +--------------------+
|
||||
* |--------------------------| --/
|
||||
* | map[0] |-/
|
||||
* |--------------------------| +--------------------+
|
||||
* | map[1] |--- | mask |
|
||||
* |--------------------------| \---- |--------------------|
|
||||
* | * | \-- | sde[0] -> eng 1+n |
|
||||
* | * | \---- |--------------------|
|
||||
* | * | \->| sde[1] -> eng 2+n |
|
||||
* |--------------------------| |--------------------|
|
||||
* | map[vls - 1] |- | * |
|
||||
* +--------------------------+ \- |--------------------|
|
||||
* \- | sde[m] -> eng m+n |
|
||||
* \ +--------------------+
|
||||
* \-
|
||||
* \
|
||||
* \- +--------------------+
|
||||
* \- | mask |
|
||||
* \ |--------------------|
|
||||
* \- | sde[0] -> eng 1+m+n|
|
||||
* \- |--------------------|
|
||||
* >| sde[1] -> eng 2+m+n|
|
||||
* |--------------------|
|
||||
* | * |
|
||||
* |--------------------|
|
||||
* | sde[o] -> eng o+m+n|
|
||||
* +--------------------+
|
||||
*
|
||||
*/
|
||||
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
/**
|
||||
* struct sdma_map_elem - mapping for a vl
|
||||
* @mask - selector mask
|
||||
* @sde - array of engines for this vl
|
||||
*
|
||||
* The mask is used to "mod" the selector
|
||||
* to produce index into the trailing
|
||||
* array of sdes.
|
||||
*/
|
||||
struct sdma_map_elem {
|
||||
u32 mask;
|
||||
struct sdma_engine *sde[0];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct sdma_map_el - mapping for a vl
|
||||
* @engine_to_vl - map of an engine to a vl
|
||||
* @list - rcu head for free callback
|
||||
* @mask - vl mask to "mod" the vl to produce an index to map array
|
||||
* @actual_vls - number of vls
|
||||
* @vls - number of vls rounded to next power of 2
|
||||
* @map - array of sdma_map_elem entries
|
||||
*
|
||||
* This is the parent mapping structure. The trailing
|
||||
* members of the struct point to sdma_map_elem entries, which
|
||||
* in turn point to an array of sde's for that vl.
|
||||
*/
|
||||
struct sdma_vl_map {
|
||||
s8 engine_to_vl[TXE_NUM_SDMA_ENGINES];
|
||||
char list[16]; // struct rcu_head list;
|
||||
u32 mask;
|
||||
u8 actual_vls;
|
||||
u8 vls;
|
||||
struct sdma_map_elem *map[0];
|
||||
};
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
int sdma_map_init(
|
||||
struct hfi1_devdata *dd,
|
||||
u8 port,
|
||||
u8 num_vls,
|
||||
u8 *vl_engines);
|
||||
|
||||
/* slow path */
|
||||
void _sdma_engine_progress_schedule(struct sdma_engine *sde);
|
||||
|
||||
/**
|
||||
* sdma_engine_progress_schedule() - schedule progress on engine
|
||||
* @sde: sdma_engine to schedule progress
|
||||
*
|
||||
* This is the fast path.
|
||||
*
|
||||
*/
|
||||
static inline void sdma_engine_progress_schedule(
|
||||
struct sdma_engine *sde)
|
||||
{
|
||||
if (!sde || sdma_descq_inprocess(sde) < (sde->descq_cnt / 8))
|
||||
return;
|
||||
_sdma_engine_progress_schedule(sde);
|
||||
}
|
||||
|
||||
struct sdma_engine *sdma_select_engine_sc(
|
||||
struct hfi1_devdata *dd,
|
||||
u32 selector,
|
||||
u8 sc5);
|
||||
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
struct sdma_engine *sdma_select_engine_vl(
|
||||
struct hfi1_devdata *dd,
|
||||
u32 selector,
|
||||
u8 vl);
|
||||
|
||||
struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
|
||||
u32 selector, u8 vl);
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
ssize_t sdma_get_cpu_to_sde_map(struct sdma_engine *sde, char *buf);
|
||||
ssize_t sdma_set_cpu_to_sde_map(struct sdma_engine *sde, const char *buf,
|
||||
size_t count);
|
||||
int sdma_engine_get_vl(struct sdma_engine *sde);
|
||||
void sdma_seqfile_dump_sde(struct seq_file *s, struct sdma_engine *);
|
||||
void sdma_seqfile_dump_cpu_list(struct seq_file *s, struct hfi1_devdata *dd,
|
||||
unsigned long cpuid);
|
||||
|
||||
#ifdef CONFIG_SDMA_VERBOSITY
|
||||
void sdma_dumpstate(struct sdma_engine *);
|
||||
#endif
|
||||
static inline char *slashstrip(char *s)
|
||||
{
|
||||
char *r = s;
|
||||
|
||||
while (*s)
|
||||
if (*s++ == '/')
|
||||
r = s;
|
||||
return r;
|
||||
}
|
||||
|
||||
u16 sdma_get_descq_cnt(void);
|
||||
|
||||
extern uint mod_num_sdma;
|
||||
|
||||
void sdma_update_lmc(struct hfi1_devdata *dd, u64 mask, u32 lid);
|
||||
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
#endif
|
||||
137
kernel/include/hfi1/sdma_txreq.h
Normal file
137
kernel/include/hfi1/sdma_txreq.h
Normal file
@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Copyright(c) 2016 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef HFI1_SDMA_TXREQ_H
|
||||
#define HFI1_SDMA_TXREQ_H
|
||||
|
||||
#include <hfi1/iowait.h>
|
||||
|
||||
/* increased for AHG */
|
||||
#define NUM_DESC 6
|
||||
|
||||
/*
|
||||
* struct sdma_desc - canonical fragment descriptor
|
||||
*
|
||||
* This is the descriptor carried in the tx request
|
||||
* corresponding to each fragment.
|
||||
*
|
||||
*/
|
||||
struct sdma_desc {
|
||||
/* private: don't use directly */
|
||||
u64 qw[2];
|
||||
};
|
||||
|
||||
/**
|
||||
* struct sdma_txreq - the sdma_txreq structure (one per packet)
|
||||
* @list: for use by user and by queuing for wait
|
||||
*
|
||||
* This is the representation of a packet which consists of some
|
||||
* number of fragments. Storage is provided to within the structure.
|
||||
* for all fragments.
|
||||
*
|
||||
* The storage for the descriptors are automatically extended as needed
|
||||
* when the currently allocation is exceeded.
|
||||
*
|
||||
* The user (Verbs or PSM) may overload this structure with fields
|
||||
* specific to their use by putting this struct first in their struct.
|
||||
* The method of allocation of the overloaded structure is user dependent
|
||||
*
|
||||
* The list is the only public field in the structure.
|
||||
*
|
||||
*/
|
||||
|
||||
#define SDMA_TXREQ_S_OK 0
|
||||
#define SDMA_TXREQ_S_SENDERROR 1
|
||||
#define SDMA_TXREQ_S_ABORTED 2
|
||||
#define SDMA_TXREQ_S_SHUTDOWN 3
|
||||
|
||||
/* flags bits */
|
||||
#define SDMA_TXREQ_F_URGENT 0x0001
|
||||
#define SDMA_TXREQ_F_AHG_COPY 0x0002
|
||||
#define SDMA_TXREQ_F_USE_AHG 0x0004
|
||||
|
||||
struct sdma_txreq;
|
||||
typedef void (*callback_t)(struct sdma_txreq *, int);
|
||||
|
||||
struct iowait_wait;
|
||||
struct sdma_txreq {
|
||||
struct list_head list;
|
||||
/* private: */
|
||||
struct sdma_desc *descp;
|
||||
/* private: */
|
||||
void *coalesce_buf;
|
||||
/* private: */
|
||||
struct iowait *wait;
|
||||
/* private: */
|
||||
callback_t complete;
|
||||
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
|
||||
u64 sn;
|
||||
#endif
|
||||
/* private: - used in coalesce/pad processing */
|
||||
u16 packet_len;
|
||||
/* private: - down-counted to trigger last */
|
||||
u16 tlen;
|
||||
/* private: */
|
||||
u16 num_desc;
|
||||
/* private: */
|
||||
u16 desc_limit;
|
||||
/* private: */
|
||||
u16 next_descq_idx;
|
||||
/* private: */
|
||||
u16 coalesce_idx;
|
||||
/* private: flags */
|
||||
u16 flags;
|
||||
/* private: */
|
||||
struct sdma_desc descs[NUM_DESC];
|
||||
};
|
||||
|
||||
static inline int sdma_txreq_built(struct sdma_txreq *tx)
|
||||
{
|
||||
return tx->num_desc;
|
||||
}
|
||||
|
||||
#endif /* HFI1_SDMA_TXREQ_H */
|
||||
175
kernel/include/hfi1/user_exp_rcv.h
Normal file
175
kernel/include/hfi1/user_exp_rcv.h
Normal file
@ -0,0 +1,175 @@
|
||||
#ifndef _HFI1_USER_EXP_RCV_H
|
||||
#define _HFI1_USER_EXP_RCV_H
|
||||
/*
|
||||
* Copyright(c) 2015, 2016 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
#include "hfi1/hfi.h"
|
||||
|
||||
#define EXP_TID_TIDLEN_MASK 0x7FFULL
|
||||
#define EXP_TID_TIDLEN_SHIFT 0
|
||||
#define EXP_TID_TIDCTRL_MASK 0x3ULL
|
||||
#define EXP_TID_TIDCTRL_SHIFT 20
|
||||
#define EXP_TID_TIDIDX_MASK 0x3FFULL
|
||||
#define EXP_TID_TIDIDX_SHIFT 22
|
||||
#define EXP_TID_GET(tid, field) \
|
||||
(((tid) >> EXP_TID_TID##field##_SHIFT) & EXP_TID_TID##field##_MASK)
|
||||
|
||||
#define EXP_TID_SET(field, value) \
|
||||
(((value) & EXP_TID_TID##field##_MASK) << \
|
||||
EXP_TID_TID##field##_SHIFT)
|
||||
#define EXP_TID_CLEAR(tid, field) ({ \
|
||||
(tid) &= ~(EXP_TID_TID##field##_MASK << \
|
||||
EXP_TID_TID##field##_SHIFT); \
|
||||
})
|
||||
#define EXP_TID_RESET(tid, field, value) do { \
|
||||
EXP_TID_CLEAR(tid, field); \
|
||||
(tid) |= EXP_TID_SET(field, (value)); \
|
||||
} while (0)
|
||||
|
||||
struct tid_group {
|
||||
struct list_head list;
|
||||
unsigned base;
|
||||
u8 size;
|
||||
u8 used;
|
||||
u8 map;
|
||||
};
|
||||
|
||||
struct tid_rb_node {
|
||||
uintptr_t phys;
|
||||
u32 len;
|
||||
u32 rcventry;
|
||||
struct tid_group *grp;
|
||||
bool freed;
|
||||
|
||||
struct rb_root *rb_root;
|
||||
struct hfi1_filedata *fd;
|
||||
unsigned long start;
|
||||
unsigned long end;
|
||||
struct rb_node rb_node;
|
||||
struct deferred_unmap_range *range;
|
||||
};
|
||||
|
||||
struct tid_pageset {
|
||||
u16 idx;
|
||||
u16 count;
|
||||
};
|
||||
|
||||
/*
|
||||
* Write an "empty" RcvArray entry.
|
||||
* This function exists so the TID registaration code can use it
|
||||
* to write to unused/unneeded entries and still take advantage
|
||||
* of the WC performance improvements. The HFI will ignore this
|
||||
* write to the RcvArray entry.
|
||||
*/
|
||||
static inline void rcv_array_wc_fill(struct hfi1_devdata *dd, u32 index)
|
||||
{
|
||||
/*
|
||||
* Doing the WC fill writes only makes sense if the device is
|
||||
* present and the RcvArray has been mapped as WC memory.
|
||||
*/
|
||||
if ((dd->flags & HFI1_PRESENT) && dd->rcvarray_wc)
|
||||
writeq(0, dd->rcvarray_wc + (index * 8));
|
||||
}
|
||||
|
||||
static inline u32 rcventry2tidinfo(u32 rcventry)
|
||||
{
|
||||
u32 pair = rcventry & ~0x1;
|
||||
|
||||
return EXP_TID_SET(IDX, pair >> 1) |
|
||||
EXP_TID_SET(CTRL, 1 << (rcventry - pair));
|
||||
}
|
||||
|
||||
static inline void exp_tid_group_init(struct exp_tid_set *set)
|
||||
{
|
||||
INIT_LIST_HEAD(&set->list);
|
||||
set->count = 0;
|
||||
}
|
||||
|
||||
static inline void tid_group_remove(struct tid_group *grp,
|
||||
struct exp_tid_set *set)
|
||||
{
|
||||
list_del_init(&grp->list);
|
||||
set->count--;
|
||||
}
|
||||
|
||||
static inline void tid_group_add_tail(struct tid_group *grp,
|
||||
struct exp_tid_set *set)
|
||||
{
|
||||
list_add_tail(&grp->list, &set->list);
|
||||
set->count++;
|
||||
}
|
||||
|
||||
static inline struct tid_group *tid_group_pop(struct exp_tid_set *set)
|
||||
{
|
||||
struct tid_group *grp =
|
||||
list_first_entry(&set->list, struct tid_group, list);
|
||||
list_del_init(&grp->list);
|
||||
set->count--;
|
||||
return grp;
|
||||
}
|
||||
|
||||
static inline void tid_group_move(struct tid_group *group,
|
||||
struct exp_tid_set *s1,
|
||||
struct exp_tid_set *s2)
|
||||
{
|
||||
tid_group_remove(group, s1);
|
||||
tid_group_add_tail(group, s2);
|
||||
}
|
||||
|
||||
#ifdef __HFI1_ORIG__
|
||||
u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *);
|
||||
int alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
|
||||
void free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd);
|
||||
int hfi1_user_exp_rcv_init(struct file *);
|
||||
int hfi1_user_exp_rcv_free(struct hfi1_filedata *);
|
||||
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
|
||||
int hfi1_user_exp_rcv_setup(struct hfi1_filedata *, struct hfi1_tid_info *);
|
||||
int hfi1_user_exp_rcv_clear(struct hfi1_filedata *, struct hfi1_tid_info *);
|
||||
int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *, struct hfi1_tid_info *);
|
||||
|
||||
#endif /* _HFI1_USER_EXP_RCV_H */
|
||||
139
kernel/include/hfi1/user_sdma.h
Normal file
139
kernel/include/hfi1/user_sdma.h
Normal file
@ -0,0 +1,139 @@
|
||||
|
||||
#ifndef _HFI1_USER_SDMA_H
|
||||
#define _HFI1_USER_SDMA_H
|
||||
|
||||
/*
|
||||
* Copyright(c) 2015, 2016 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <hfi1/ihk_hfi1_common.h>
|
||||
#include <hfi1/iowait.h>
|
||||
#include <string.h>
|
||||
#include <hfi1/hfi1_user.h>
|
||||
#include <uio.h>
|
||||
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
#include <linux/device.h>
|
||||
#include <linux/wait.h>
|
||||
|
||||
#include "common.h"
|
||||
#include "iowait.h"
|
||||
#include "user_exp_rcv.h"
|
||||
|
||||
extern uint extended_psn;
|
||||
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
/*
|
||||
* Define fields in the KDETH header so we can update the header
|
||||
* template.
|
||||
*/
|
||||
#define KDETH_OFFSET_SHIFT 0
|
||||
#define KDETH_OFFSET_MASK 0x7fff
|
||||
#define KDETH_OM_SHIFT 15
|
||||
#define KDETH_OM_MASK 0x1
|
||||
#define KDETH_TID_SHIFT 16
|
||||
#define KDETH_TID_MASK 0x3ff
|
||||
#define KDETH_TIDCTRL_SHIFT 26
|
||||
#define KDETH_TIDCTRL_MASK 0x3
|
||||
#define KDETH_INTR_SHIFT 28
|
||||
#define KDETH_INTR_MASK 0x1
|
||||
#define KDETH_SH_SHIFT 29
|
||||
#define KDETH_SH_MASK 0x1
|
||||
#define KDETH_KVER_SHIFT 30
|
||||
#define KDETH_KVER_MASK 0x3
|
||||
#define KDETH_JKEY_SHIFT 0x0
|
||||
#define KDETH_JKEY_MASK 0xff
|
||||
#define KDETH_HCRC_UPPER_SHIFT 16
|
||||
#define KDETH_HCRC_UPPER_MASK 0xff
|
||||
#define KDETH_HCRC_LOWER_SHIFT 24
|
||||
#define KDETH_HCRC_LOWER_MASK 0xff
|
||||
|
||||
#define AHG_KDETH_INTR_SHIFT 12
|
||||
#define AHG_KDETH_SH_SHIFT 13
|
||||
#define AHG_KDETH_ARRAY_SIZE 9
|
||||
|
||||
#define KDETH_GET(val, field) \
|
||||
(((le32_to_cpu((val))) >> KDETH_##field##_SHIFT) & KDETH_##field##_MASK)
|
||||
#define KDETH_SET(dw, field, val) do { \
|
||||
u32 dwval = le32_to_cpu(dw); \
|
||||
dwval &= ~(KDETH_##field##_MASK << KDETH_##field##_SHIFT); \
|
||||
dwval |= (((val) & KDETH_##field##_MASK) << \
|
||||
KDETH_##field##_SHIFT); \
|
||||
dw = cpu_to_le32(dwval); \
|
||||
} while (0)
|
||||
#define KDETH_RESET(dw, field, val) ({ dw = 0; KDETH_SET(dw, field, val); })
|
||||
|
||||
/* KDETH OM multipliers and switch over point */
|
||||
#define KDETH_OM_SMALL 4
|
||||
#define KDETH_OM_SMALL_SHIFT 2
|
||||
#define KDETH_OM_LARGE 64
|
||||
#define KDETH_OM_LARGE_SHIFT 6
|
||||
#define KDETH_OM_MAX_SIZE (1 << ((KDETH_OM_LARGE / KDETH_OM_SMALL) + 1))
|
||||
|
||||
enum pkt_q_sdma_state {
|
||||
SDMA_PKT_Q_ACTIVE,
|
||||
SDMA_PKT_Q_DEFERRED,
|
||||
};
|
||||
|
||||
#include <hfi1/hfi1_generated_hfi1_user_sdma_pkt_q.h>
|
||||
|
||||
struct hfi1_user_sdma_comp_q {
|
||||
u16 nentries;
|
||||
struct hfi1_sdma_comp_entry *comps;
|
||||
};
|
||||
|
||||
int hfi1_user_sdma_process_request(void *private_data, struct iovec *iovec,
|
||||
unsigned long dim, unsigned long *count);
|
||||
#ifdef __HFI1_ORIG__
|
||||
|
||||
int hfi1_user_sdma_alloc_queues(struct hfi1_ctxtdata *, struct file *);
|
||||
int hfi1_user_sdma_free_queues(struct hfi1_filedata *);
|
||||
int hfi1_user_sdma_process_request(struct file *, struct iovec *, unsigned long,
|
||||
unsigned long *);
|
||||
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
#endif /* _HFI1_SDMA_H */
|
||||
@ -38,4 +38,104 @@ int memcheckall();
|
||||
int freecheck(int runcount);
|
||||
void kmalloc_consolidate_free_list(void);
|
||||
|
||||
#ifndef unlikely
|
||||
#define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Generic lockless kmalloc cache.
|
||||
*/
|
||||
static inline void kmalloc_cache_free(void *elem)
|
||||
{
|
||||
struct kmalloc_cache_header *current = NULL;
|
||||
struct kmalloc_cache_header *new =
|
||||
(struct kmalloc_cache_header *)elem;
|
||||
struct kmalloc_header *header;
|
||||
register struct kmalloc_cache_header *cache;
|
||||
|
||||
if (unlikely(!elem))
|
||||
return;
|
||||
|
||||
/* Get cache pointer from kmalloc header */
|
||||
header = (struct kmalloc_header *)((void *)elem -
|
||||
sizeof(struct kmalloc_header));
|
||||
if (unlikely(!header->cache)) {
|
||||
kprintf("%s: WARNING: no cache for 0x%lx\n",
|
||||
__FUNCTION__, elem);
|
||||
return;
|
||||
}
|
||||
|
||||
cache = header->cache;
|
||||
|
||||
retry:
|
||||
current = cache->next;
|
||||
new->next = current;
|
||||
|
||||
if (!__sync_bool_compare_and_swap(&cache->next, current, new)) {
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void kmalloc_cache_prealloc(struct kmalloc_cache_header *cache,
|
||||
size_t size, int nr_elem)
|
||||
{
|
||||
struct kmalloc_cache_header *elem;
|
||||
int i;
|
||||
|
||||
if (unlikely(cache->next))
|
||||
return;
|
||||
|
||||
for (i = 0; i < nr_elem; ++i) {
|
||||
struct kmalloc_header *header;
|
||||
|
||||
elem = (struct kmalloc_cache_header *)
|
||||
kmalloc(size, IHK_MC_AP_NOWAIT);
|
||||
|
||||
if (!elem) {
|
||||
kprintf("%s: ERROR: allocating cache element\n", __FUNCTION__);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Store cache pointer in kmalloc_header */
|
||||
header = (struct kmalloc_header *)((void *)elem -
|
||||
sizeof(struct kmalloc_header));
|
||||
header->cache = cache;
|
||||
|
||||
kmalloc_cache_free(elem);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void *kmalloc_cache_alloc(struct kmalloc_cache_header *cache,
|
||||
size_t size)
|
||||
{
|
||||
register struct kmalloc_cache_header *first, *next;
|
||||
|
||||
retry:
|
||||
next = NULL;
|
||||
first = cache->next;
|
||||
|
||||
if (first) {
|
||||
next = first->next;
|
||||
|
||||
if (!__sync_bool_compare_and_swap(&cache->next,
|
||||
first, next)) {
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
else {
|
||||
//kprintf("%s: calling pre-alloc for 0x%lx...\n",
|
||||
// __FUNCTION__, cache);
|
||||
kprintf("%s: calling pre-alloc for 0x%lx (offs: %lu)...\n",
|
||||
__FUNCTION__, cache,
|
||||
((unsigned long)cache -
|
||||
(unsigned long)&cpu_local_var(txreq_cache)) /
|
||||
sizeof(struct kmalloc_cache_header));
|
||||
|
||||
kmalloc_cache_prealloc(cache, size, 512);
|
||||
goto retry;
|
||||
}
|
||||
|
||||
return (void *)first;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
#ifndef __LWK_COMPILER_H
|
||||
#define __LWK_COMPILER_H
|
||||
|
||||
#include <ihk/cpu.h>
|
||||
|
||||
#ifndef __ASSEMBLY__
|
||||
|
||||
#ifdef __CHECKER__
|
||||
@ -175,11 +177,6 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
|
||||
# define unlikely(x) __builtin_expect(!!(x), 0)
|
||||
#endif
|
||||
|
||||
/* Optimization barrier */
|
||||
#ifndef barrier
|
||||
# define barrier() __memory_barrier()
|
||||
#endif
|
||||
|
||||
#ifndef barrier_data
|
||||
# define barrier_data(ptr) barrier()
|
||||
#endif
|
||||
@ -490,4 +487,66 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val,
|
||||
(_________p1); \
|
||||
})
|
||||
|
||||
extern void *memcpy(void *dest, const void *src, size_t n);
|
||||
|
||||
static __always_inline void __read_once_size(const volatile void *p, void *res, int size)
|
||||
{
|
||||
switch (size) {
|
||||
case 1: *(unsigned char *)res = *(volatile unsigned char *)p; break;
|
||||
case 2: *(unsigned short *)res = *(volatile unsigned short *)p; break;
|
||||
case 4: *(unsigned int *)res = *(volatile unsigned int *)p; break;
|
||||
case 8: *(unsigned long long *)res = *(volatile unsigned long long *)p; break;
|
||||
default:
|
||||
barrier();
|
||||
memcpy((void *)res, (const void *)p, size);
|
||||
barrier();
|
||||
}
|
||||
}
|
||||
|
||||
static __always_inline void __write_once_size(volatile void *p, void *res, int size)
|
||||
{
|
||||
switch (size) {
|
||||
case 1: *(volatile unsigned char *)p = *(unsigned char *)res; break;
|
||||
case 2: *(volatile unsigned short *)p = *(unsigned short *)res; break;
|
||||
case 4: *(volatile unsigned int *)p = *(unsigned int *)res; break;
|
||||
case 8: *(volatile unsigned long long *)p = *(unsigned long long *)res; break;
|
||||
default:
|
||||
barrier();
|
||||
memcpy((void *)p, (const void *)res, size);
|
||||
barrier();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Prevent the compiler from merging or refetching reads or writes. The
|
||||
* compiler is also forbidden from reordering successive instances of
|
||||
* READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the
|
||||
* compiler is aware of some particular ordering. One way to make the
|
||||
* compiler aware of ordering is to put the two invocations of READ_ONCE,
|
||||
* WRITE_ONCE or ACCESS_ONCE() in different C statements.
|
||||
*
|
||||
* In contrast to ACCESS_ONCE these two macros will also work on aggregate
|
||||
* data types like structs or unions. If the size of the accessed data
|
||||
* type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
|
||||
* READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a
|
||||
* compile-time warning.
|
||||
*
|
||||
* Their two major use cases are: (1) Mediating communication between
|
||||
* process-level code and irq/NMI handlers, all running on the same CPU,
|
||||
* and (2) Ensuring that the compiler does not fold, spindle, or otherwise
|
||||
* mutilate accesses that either do not require ordering or that interact
|
||||
* with an explicit memory barrier or atomic instruction that provides the
|
||||
* required ordering.
|
||||
*/
|
||||
|
||||
#define READ_ONCE(x) \
|
||||
({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; })
|
||||
|
||||
#define WRITE_ONCE(x, val) \
|
||||
({ typeof(x) __val = (val); __write_once_size(&(x), &__val, sizeof(__val)); __val; })
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif /* __LWK_COMPILER_H */
|
||||
|
||||
@ -32,6 +32,7 @@
|
||||
#define VR_STACK 0x1
|
||||
#define VR_RESERVED 0x2
|
||||
#define VR_AP_USER 0x4
|
||||
#define VR_PREALLOC 0x8
|
||||
#define VR_IO_NOCACHE 0x100
|
||||
#define VR_REMOTE 0x200
|
||||
#define VR_WRITE_COMBINED 0x400
|
||||
@ -387,6 +388,8 @@ struct vm_range {
|
||||
int pgshift; /* page size. 0 means THP */
|
||||
int padding;
|
||||
void *private_data;
|
||||
unsigned long lowest_accesed;
|
||||
unsigned long faulted_size;
|
||||
};
|
||||
|
||||
struct vm_range_numa_policy {
|
||||
@ -552,6 +555,7 @@ struct process {
|
||||
|
||||
long maxrss;
|
||||
long maxrss_children;
|
||||
unsigned long mcexec_flags;
|
||||
/* Memory policy flags and memory specific options */
|
||||
unsigned long mpol_flags;
|
||||
size_t mpol_threshold;
|
||||
@ -572,6 +576,21 @@ struct process {
|
||||
unsigned long profile_elapsed_ts;
|
||||
#endif // PROFILE_ENABLE
|
||||
int nr_processes; /* For partitioned execution */
|
||||
int process_rank; /* Rank in partition */
|
||||
|
||||
#define MAX_FD_PRIV 256
|
||||
void *fd_priv_table[MAX_FD_PRIV];
|
||||
/* HFI1 specific */
|
||||
void *hfi1_kregbase;
|
||||
void *hfi1_piobase;
|
||||
void *hfi1_rcvarray_wc;
|
||||
size_t hfi1_rcvarray_wc_len;
|
||||
void *hfi1_cq_comps;
|
||||
void *hfi1_events;
|
||||
size_t hfi1_cq_comps_len;
|
||||
ihk_spinlock_t hfi1_lock;
|
||||
struct rb_root hfi1_reg_tree;
|
||||
struct rb_root hfi1_inv_tree;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -699,9 +718,35 @@ struct thread {
|
||||
|
||||
#define VM_RANGE_CACHE_SIZE 4
|
||||
|
||||
struct deferred_unmap_range {
|
||||
struct process_vm *vm;
|
||||
void *addr;
|
||||
size_t len;
|
||||
struct list_head list;
|
||||
|
||||
/*
|
||||
* List operations as well as the refcnt are protected
|
||||
* by vm->vm_deferred_unmap_lock.
|
||||
*/
|
||||
int refcnt;
|
||||
};
|
||||
|
||||
static void init_deferred_unmap_range(
|
||||
struct deferred_unmap_range *range,
|
||||
struct process_vm *vm,
|
||||
void *addr, size_t len)
|
||||
{
|
||||
range->vm = vm;
|
||||
range->addr = addr;
|
||||
range->len = len;
|
||||
INIT_LIST_HEAD(&range->list);
|
||||
range->refcnt = 0;
|
||||
}
|
||||
|
||||
struct process_vm {
|
||||
struct address_space *address_space;
|
||||
struct rb_root vm_range_tree;
|
||||
struct list_head vm_deferred_unmap_range_list;
|
||||
struct vm_regions region;
|
||||
struct process *proc; /* process that reside on the same page */
|
||||
void *opt;
|
||||
@ -711,6 +756,7 @@ struct process_vm {
|
||||
|
||||
ihk_spinlock_t page_table_lock;
|
||||
ihk_spinlock_t memory_range_lock;
|
||||
ihk_spinlock_t vm_deferred_unmap_lock;
|
||||
// to protect the followings:
|
||||
// 1. addition of process "memory range" (extend_process_region, add_process_memory_range)
|
||||
// 2. addition of process page table (allocate_pages, update_process_page_table)
|
||||
@ -829,4 +875,8 @@ void proc_init();
|
||||
void set_timer();
|
||||
struct sig_pending *hassigpending(struct thread *thread);
|
||||
|
||||
#define VERIFY_READ 0
|
||||
#define VERIFY_WRITE 1
|
||||
int access_ok(struct process_vm *vm, int type, uintptr_t addr, size_t len);
|
||||
|
||||
#endif
|
||||
|
||||
@ -28,7 +28,7 @@ struct profile_event {
|
||||
* [PROFILE_SYSCALL_MAX,PROFILE_OFFLOAD_MAX) - syscall offloads
|
||||
* [PROFILE_OFFLOAD_MAX,PROFILE_EVENT_MAX) - general events
|
||||
*
|
||||
* XXX: Make sure to fill in prof_event_names in profile.c
|
||||
* XXX: Make sure to fill in profile_event_names in profile.c
|
||||
* for each added profiled event.
|
||||
*/
|
||||
enum profile_event_type {
|
||||
@ -44,6 +44,12 @@ enum profile_event_type {
|
||||
PROFILE_mmap_anon_no_contig_phys,
|
||||
PROFILE_mmap_regular_file,
|
||||
PROFILE_mmap_device_file,
|
||||
PROFILE_sdma_0,
|
||||
PROFILE_sdma_1,
|
||||
PROFILE_sdma_2,
|
||||
PROFILE_sdma_3,
|
||||
PROFILE_sdma_4,
|
||||
PROFILE_sdma_5,
|
||||
PROFILE_EVENT_MAX /* Should be the last event type */
|
||||
};
|
||||
|
||||
|
||||
@ -166,6 +166,8 @@ typedef unsigned long __cpu_set_unit;
|
||||
#define MPOL_NO_BSS 0x04
|
||||
#define MPOL_SHM_PREMAP 0x08
|
||||
|
||||
#define MCEXEC_HFI1 0x01
|
||||
|
||||
struct program_load_desc {
|
||||
int num_sections;
|
||||
int status;
|
||||
@ -194,12 +196,14 @@ struct program_load_desc {
|
||||
unsigned long envs_len;
|
||||
struct rlimit rlimit[MCK_RLIM_MAX];
|
||||
unsigned long interp_align;
|
||||
unsigned long mcexec_flags;
|
||||
unsigned long mpol_flags;
|
||||
unsigned long mpol_threshold;
|
||||
unsigned long heap_extension;
|
||||
long stack_premap;
|
||||
unsigned long mpol_bind_mask;
|
||||
int nr_processes;
|
||||
int process_rank;
|
||||
char shell_path[SHELL_PATH_MAX_LEN];
|
||||
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
|
||||
int profile;
|
||||
@ -293,6 +297,7 @@ struct syscall_response {
|
||||
long ret;
|
||||
unsigned long fault_address;
|
||||
unsigned long fault_reason;
|
||||
void *private_data;
|
||||
};
|
||||
|
||||
struct syscall_post {
|
||||
|
||||
@ -125,6 +125,8 @@ char *find_command_line(char *name)
|
||||
return strstr(cmdline, name);
|
||||
}
|
||||
|
||||
extern int safe_kernel_map;
|
||||
|
||||
static void parse_kargs(void)
|
||||
{
|
||||
char *ptr;
|
||||
@ -145,6 +147,11 @@ static void parse_kargs(void)
|
||||
}
|
||||
ihk_mc_set_dump_level(dump_level);
|
||||
|
||||
ptr = find_command_line("safe_kernel_map");
|
||||
if (ptr) {
|
||||
safe_kernel_map = 1;
|
||||
}
|
||||
|
||||
/* idle_halt option */
|
||||
ptr = find_command_line("idle_halt");
|
||||
if (ptr) {
|
||||
@ -353,6 +360,11 @@ static void post_init(void)
|
||||
}
|
||||
init_host_ikc2mckernel();
|
||||
init_host_ikc2linux(ikc_cpu);
|
||||
|
||||
{
|
||||
extern void hfi1_kmalloc_cache_prealloc(void);
|
||||
hfi1_kmalloc_cache_prealloc();
|
||||
}
|
||||
}
|
||||
|
||||
arch_setup_vdso();
|
||||
|
||||
105
kernel/mem.c
105
kernel/mem.c
@ -698,6 +698,22 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
|
||||
|
||||
break;
|
||||
}
|
||||
else {
|
||||
dkprintf("%s: couldn't fulfill user policy for"
|
||||
" %d contiguous pages from node %d "
|
||||
#ifdef IHK_RBTREE_ALLOCATOR
|
||||
"(free pages left: %d)"
|
||||
#endif
|
||||
"\n",
|
||||
__FUNCTION__,
|
||||
npages,
|
||||
numa_id
|
||||
#ifdef IHK_RBTREE_ALLOCATOR
|
||||
, memory_nodes[numa_id].nr_free_pages
|
||||
#endif
|
||||
);
|
||||
//return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (pa) break;
|
||||
@ -719,8 +735,8 @@ static void *mckernel_allocate_aligned_pages_node(int npages, int p2align,
|
||||
#ifdef PROFILE_ENABLE
|
||||
profile_event_add(PROFILE_mpol_alloc_missed, npages * 4096);
|
||||
#endif
|
||||
dkprintf("%s: couldn't fulfill user policy for %d pages\n",
|
||||
__FUNCTION__, npages);
|
||||
dkprintf("%s: couldn't fulfill user policy for %d pages from node %d\n",
|
||||
__FUNCTION__, npages, i);
|
||||
}
|
||||
|
||||
distance_based:
|
||||
@ -926,6 +942,8 @@ static void query_free_mem_interrupt_handler(void *priv)
|
||||
/* Iterate memory allocators */
|
||||
for (i = 0; i < ihk_mc_get_nr_numa_nodes(); ++i) {
|
||||
#ifdef IHK_RBTREE_ALLOCATOR
|
||||
kprintf("McKernel free pages in NUMA node %d: %d\n",
|
||||
i, memory_nodes[i].nr_free_pages);
|
||||
pages += memory_nodes[i].nr_free_pages;
|
||||
#else
|
||||
struct ihk_page_allocator_desc *pa_allocator;
|
||||
@ -981,6 +999,8 @@ void coredump(struct thread *thread, void *regs)
|
||||
struct coretable *coretable;
|
||||
int chunks;
|
||||
|
||||
return;
|
||||
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_67 /* use limit corefile size. (temporarily fix.) */
|
||||
if (thread->proc->rlimit[MCK_RLIMIT_CORE].rlim_cur == 0) {
|
||||
return;
|
||||
@ -1168,6 +1188,59 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
|
||||
dkprintf("%s: addr: %p, reason: %lx, regs: %p\n",
|
||||
__FUNCTION__, fault_addr, reason, regs);
|
||||
|
||||
/* Linux ioremap address? */
|
||||
if ((unsigned long)fault_addr >= 0xFFFFC90000000000 &&
|
||||
(unsigned long)fault_addr < 0xFFFFFFFF80000000) {
|
||||
pte_t *lptep;
|
||||
pte_t *ptep;
|
||||
enum ihk_mc_pt_attribute attr =
|
||||
PTATTR_UNCACHABLE | PTATTR_WRITABLE;
|
||||
unsigned long phys;
|
||||
void *virt = fault_addr;
|
||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||
|
||||
if (!vm) {
|
||||
goto regular_handler;
|
||||
}
|
||||
|
||||
/* Is this a valid address in Linux? */
|
||||
lptep = ihk_mc_pt_lookup_pte(ihk_mc_get_linux_kernel_pgt(),
|
||||
virt, 0, 0, 0, 0);
|
||||
if (!lptep || !pte_is_present(lptep)) {
|
||||
kprintf("%s: ERROR: no mapping in Linux for: 0x%lx?\n",
|
||||
__FUNCTION__, virt);
|
||||
terminate(0, SIGKILL);
|
||||
goto regular_handler;
|
||||
}
|
||||
|
||||
phys = pte_get_phys(lptep);
|
||||
|
||||
if (ihk_mc_pt_set_page(vm->address_space->page_table,
|
||||
virt, phys, attr) < 0) {
|
||||
/* Not necessarily an error.. */
|
||||
kprintf("%s: WARNING: mapping: 0x%lx -> 0x%lx\n",
|
||||
__FUNCTION__, virt, phys);
|
||||
}
|
||||
|
||||
ptep = ihk_mc_pt_lookup_pte(vm->address_space->page_table,
|
||||
virt, 0, 0, 0, 0);
|
||||
if (!ptep) {
|
||||
kprintf("%s: ERROR: no PTE in McKernel for: 0x%lx?\n",
|
||||
__FUNCTION__, virt);
|
||||
goto regular_handler;
|
||||
}
|
||||
|
||||
*ptep = *lptep;
|
||||
dkprintf("%s: Linux ioremap address 0x%lx -> 0x%lx "
|
||||
"mapped on demand\n",
|
||||
__FUNCTION__, virt, phys);
|
||||
|
||||
flush_tlb_single((unsigned long)virt);
|
||||
error = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
regular_handler:
|
||||
preempt_disable();
|
||||
|
||||
cpu_enable_interrupt();
|
||||
@ -2192,6 +2265,7 @@ static void *___kmalloc(int size, ihk_mc_ap_flag flag)
|
||||
int npages;
|
||||
unsigned long kmalloc_irq_flags = cpu_disable_interrupt_save();
|
||||
|
||||
retry_malloc:
|
||||
/* KMALLOC_MIN_SIZE bytes aligned size. */
|
||||
if (size & KMALLOC_MIN_MASK) {
|
||||
size = ((size + KMALLOC_MIN_SIZE - 1) & ~(KMALLOC_MIN_MASK));
|
||||
@ -2223,10 +2297,36 @@ split_and_return:
|
||||
}
|
||||
|
||||
list_del(&chunk->list);
|
||||
ZERO_LIST_HEAD(&chunk->list);
|
||||
cpu_restore_interrupt(kmalloc_irq_flags);
|
||||
return ((void *)chunk + sizeof(struct kmalloc_header));
|
||||
}
|
||||
/* See remote list before falling back to page_alloc */
|
||||
else {
|
||||
int retry = 0;
|
||||
struct kmalloc_header *chunk, *tmp;
|
||||
unsigned long irqflags =
|
||||
ihk_mc_spinlock_lock(
|
||||
&cpu_local_var(remote_free_list_lock));
|
||||
|
||||
/* Clean up remotely deallocated chunks */
|
||||
list_for_each_entry_safe(chunk, tmp,
|
||||
&cpu_local_var(remote_free_list), list) {
|
||||
|
||||
list_del(&chunk->list);
|
||||
___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk);
|
||||
if (chunk->size >= size) {
|
||||
retry = 1;
|
||||
}
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock(&cpu_local_var(remote_free_list_lock),
|
||||
irqflags);
|
||||
/* Found anything? */
|
||||
if (retry) {
|
||||
goto retry_malloc;
|
||||
}
|
||||
}
|
||||
|
||||
/* Allocate new memory and add it to free list */
|
||||
npages = (size + sizeof(struct kmalloc_header) + (PAGE_SIZE - 1))
|
||||
@ -2585,3 +2685,4 @@ int ihk_mc_get_mem_user_page(void *arg0, page_table_t pt, pte_t *ptep, void *pga
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
226
kernel/process.c
226
kernel/process.c
@ -138,12 +138,17 @@ init_process(struct process *proc, struct process *parent)
|
||||
INIT_LIST_HEAD(&proc->ptraced_siblings_list);
|
||||
mcs_rwlock_init(&proc->update_lock);
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_63 */
|
||||
|
||||
// Double check the inheritance from parent
|
||||
memset(proc->fd_priv_table, 0, MAX_FD_PRIV * sizeof(void *));
|
||||
|
||||
INIT_LIST_HEAD(&proc->threads_list);
|
||||
INIT_LIST_HEAD(&proc->children_list);
|
||||
INIT_LIST_HEAD(&proc->ptraced_children_list);
|
||||
mcs_rwlock_init(&proc->threads_lock);
|
||||
mcs_rwlock_init(&proc->children_lock);
|
||||
ihk_mc_spinlock_init(&proc->mckfd_lock);
|
||||
ihk_mc_spinlock_init(&proc->hfi1_lock);
|
||||
waitq_init(&proc->waitpid_q);
|
||||
ihk_atomic_set(&proc->refcount, 2);
|
||||
proc->monitoring_event = NULL;
|
||||
@ -256,10 +261,12 @@ init_process_vm(struct process *owner, struct address_space *asp, struct process
|
||||
int i;
|
||||
ihk_mc_spinlock_init(&vm->memory_range_lock);
|
||||
ihk_mc_spinlock_init(&vm->page_table_lock);
|
||||
ihk_mc_spinlock_init(&vm->vm_deferred_unmap_lock);
|
||||
|
||||
ihk_atomic_set(&vm->refcount, 1);
|
||||
vm->vm_range_tree = RB_ROOT;
|
||||
vm->vm_range_numa_policy_tree = RB_ROOT;
|
||||
INIT_LIST_HEAD(&vm->vm_deferred_unmap_range_list);
|
||||
vm->address_space = asp;
|
||||
vm->proc = owner;
|
||||
vm->exiting = 0;
|
||||
@ -1008,6 +1015,10 @@ int free_process_memory_range(struct process_vm *vm, struct vm_range *range)
|
||||
if (vm->range_cache[i] == range)
|
||||
vm->range_cache[i] = NULL;
|
||||
}
|
||||
|
||||
if (range->flag & VR_STACK) {
|
||||
kprintf("%s: VR_STACK faulted_size: %lu\n", __FUNCTION__, range->faulted_size);
|
||||
}
|
||||
kfree(range);
|
||||
|
||||
dkprintf("free_process_memory_range(%p,%lx-%lx): 0\n",
|
||||
@ -1223,6 +1234,9 @@ int add_process_memory_range(struct process_vm *vm,
|
||||
range->pgshift = pgshift;
|
||||
range->private_data = NULL;
|
||||
|
||||
range->lowest_accesed = end;
|
||||
range->faulted_size = 0;
|
||||
|
||||
rc = 0;
|
||||
if (phys == NOPHYS) {
|
||||
/* Nothing to map */
|
||||
@ -1259,6 +1273,138 @@ int add_process_memory_range(struct process_vm *vm,
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate and map physical memory,
|
||||
* interpret NUMA policy.
|
||||
* TODO: move out to a function..
|
||||
*/
|
||||
if (flag & VR_PREALLOC && phys == NOPHYS) {
|
||||
|
||||
#if 0
|
||||
unsigned long addr = start;
|
||||
enum ihk_mc_pt_attribute ptattr;
|
||||
ptattr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL);
|
||||
unsigned long irqflags;
|
||||
unsigned long len = 0;
|
||||
void *frame = NULL;
|
||||
int npages;
|
||||
int p2align;
|
||||
|
||||
len = end - addr;
|
||||
|
||||
/* Figure out size */
|
||||
if (len >= LARGE_PAGE_SIZE) {
|
||||
p2align = LARGE_PAGE_P2ALIGN;
|
||||
}
|
||||
else {
|
||||
p2align = PAGE_P2ALIGN;
|
||||
}
|
||||
npages = len >> PAGE_SHIFT;
|
||||
|
||||
frame = ihk_mc_alloc_aligned_pages_user(npages,
|
||||
p2align,
|
||||
IHK_MC_AP_NOWAIT | (range->flag & VR_AP_USER ? IHK_MC_AP_USER : 0),
|
||||
-1);
|
||||
if (!frame) {
|
||||
kprintf("%s: error: out of memory\n", __FUNCTION__);
|
||||
panic("panic");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
irqflags = ihk_mc_spinlock_lock(&vm->page_table_lock);
|
||||
|
||||
rc = ihk_mc_pt_set_range(vm->address_space->page_table,
|
||||
vm,
|
||||
(void *)addr,
|
||||
(void *)addr + len,
|
||||
virt_to_phys(frame),
|
||||
ptattr,
|
||||
PAGE_SHIFT + p2align,
|
||||
range);
|
||||
|
||||
if (rc) {
|
||||
kprintf("%s: ERROR: mapping\n", __FUNCTION__);
|
||||
ihk_mc_spinlock_unlock(&vm->page_table_lock, irqflags);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock(&vm->page_table_lock, irqflags);
|
||||
|
||||
memset(frame, 0, len);
|
||||
addr += len;
|
||||
|
||||
|
||||
#else
|
||||
unsigned long addr = start;
|
||||
enum ihk_mc_pt_attribute ptattr;
|
||||
ptattr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL);
|
||||
|
||||
while (addr < end) {
|
||||
unsigned long irqflags;
|
||||
unsigned long len = 0;
|
||||
void *frame = NULL;
|
||||
int npages;
|
||||
int p2align;
|
||||
|
||||
len = end - addr;
|
||||
|
||||
/* Figure out size */
|
||||
if (len >= LARGE_PAGE_SIZE) {
|
||||
len = LARGE_PAGE_SIZE;
|
||||
p2align = LARGE_PAGE_P2ALIGN;
|
||||
}
|
||||
else {
|
||||
len = PAGE_SIZE;
|
||||
p2align = PAGE_P2ALIGN;
|
||||
}
|
||||
|
||||
npages = len >> PAGE_SHIFT;
|
||||
#if 0
|
||||
frame = ihk_mc_alloc_aligned_pages_node_user(npages,
|
||||
p2align,
|
||||
IHK_MC_AP_NOWAIT | (range->flag & VR_AP_USER ? IHK_MC_AP_USER : 0),
|
||||
node, -1);
|
||||
node = 1 - node;
|
||||
#else
|
||||
frame = ihk_mc_alloc_aligned_pages_user(npages,
|
||||
p2align,
|
||||
IHK_MC_AP_NOWAIT | (range->flag & VR_AP_USER ? IHK_MC_AP_USER : 0),
|
||||
-1);
|
||||
#endif
|
||||
if (!frame) {
|
||||
kprintf("%s: error: out of memory\n", __FUNCTION__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
irqflags = ihk_mc_spinlock_lock(&vm->page_table_lock);
|
||||
|
||||
rc = ihk_mc_pt_set_range(vm->address_space->page_table,
|
||||
vm,
|
||||
(void *)addr,
|
||||
(void *)addr + len,
|
||||
virt_to_phys(frame),
|
||||
ptattr,
|
||||
PAGE_SHIFT + p2align,
|
||||
range);
|
||||
|
||||
if (rc) {
|
||||
kprintf("%s: ERROR: mapping\n", __FUNCTION__);
|
||||
ihk_mc_spinlock_unlock(&vm->page_table_lock, irqflags);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ihk_mc_spinlock_unlock(&vm->page_table_lock, irqflags);
|
||||
|
||||
memset(frame, 0, len);
|
||||
addr += len;
|
||||
}
|
||||
#endif
|
||||
dkprintf("%s: 0x%lx:%lu mapped\n",
|
||||
__FUNCTION__,
|
||||
start,
|
||||
end - start);
|
||||
}
|
||||
|
||||
/* Clear content! */
|
||||
if (phys != NOPHYS && !(flag & (VR_REMOTE | VR_DEMAND_PAGING))
|
||||
&& ((flag & VR_PROT_MASK) != VR_PROT_NONE)) {
|
||||
@ -1777,6 +1923,22 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
|
||||
}
|
||||
pgaddr = (void *)(fault_addr & ~(pgsize - 1));
|
||||
}
|
||||
|
||||
if (pgsize > LARGE_PAGE_SIZE) {
|
||||
dkprintf("%s: 0x%lx, pgsize: %lu\n",
|
||||
__FUNCTION__, pgaddr, pgsize);
|
||||
}
|
||||
|
||||
if (range->flag & VR_STACK) {
|
||||
range->faulted_size += pgsize;
|
||||
|
||||
if (range->lowest_accesed > (unsigned long)pgaddr) {
|
||||
dkprintf("%s: VR_STACK @ 0x%lx, pgsize: %lu, distance: %lu\n",
|
||||
__FUNCTION__, pgaddr, pgsize, range->end - (unsigned long)pgaddr);
|
||||
range->lowest_accesed = (unsigned long)pgaddr;
|
||||
}
|
||||
}
|
||||
|
||||
/*****/
|
||||
dkprintf("%s: ptep=%lx,pte_is_null=%d,pte_is_fileoff=%d\n", __FUNCTION__, ptep, ptep ? pte_is_null(ptep) : -1, ptep ? pte_is_fileoff(ptep, pgsize) : -1);
|
||||
if (!ptep || pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
|
||||
@ -2148,6 +2310,8 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
|
||||
struct vm_range *range;
|
||||
int stack_populated_size = 0;
|
||||
int stack_align_padding = 0;
|
||||
int p2align = LARGE_PAGE_P2ALIGN;
|
||||
int pgshift = LARGE_PAGE_SHIFT;
|
||||
|
||||
/* Create stack range */
|
||||
end = STACK_TOP(&thread->vm->region) & LARGE_PAGE_MASK;
|
||||
@ -2170,18 +2334,27 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
|
||||
else if (size < minsz) {
|
||||
size = minsz;
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (minsz >= GB_PAGE_SIZE) {
|
||||
end = end & GB_PAGE_MASK;
|
||||
p2align = GB_PAGE_P2ALIGN;
|
||||
pgshift = GB_PAGE_SHIFT;
|
||||
}
|
||||
#endif
|
||||
|
||||
start = (end - size) & LARGE_PAGE_MASK;
|
||||
|
||||
/* Apply user allocation policy to stacks */
|
||||
/* TODO: make threshold kernel or mcexec argument */
|
||||
ap_flag = (size >= proc->mpol_threshold &&
|
||||
!(proc->mpol_flags & MPOL_NO_STACK)) ? IHK_MC_AP_USER : 0;
|
||||
dkprintf("%s: max size: %lu, mapped size: %lu %s\n",
|
||||
__FUNCTION__, size, minsz,
|
||||
kprintf("%s: stack: 0x%lx-0x%lx:%lu, mapped: %lu %s\n",
|
||||
__FUNCTION__, start, end, size, minsz,
|
||||
ap_flag ? "(IHK_MC_AP_USER)" : "");
|
||||
|
||||
stack = ihk_mc_alloc_aligned_pages_user(minsz >> PAGE_SHIFT,
|
||||
LARGE_PAGE_P2ALIGN, IHK_MC_AP_NOWAIT | ap_flag, start);
|
||||
p2align, IHK_MC_AP_NOWAIT | ap_flag, start);
|
||||
|
||||
if (!stack) {
|
||||
kprintf("%s: error: couldn't allocate initial stack\n",
|
||||
@ -2208,8 +2381,7 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
|
||||
thread->vm, (void *)(end - minsz),
|
||||
(void *)end, virt_to_phys(stack),
|
||||
arch_vrflag_to_ptattr(vrflag, PF_POPULATE, NULL),
|
||||
LARGE_PAGE_SHIFT, range
|
||||
);
|
||||
pgshift, range);
|
||||
|
||||
if (error) {
|
||||
kprintf("init_process_stack:"
|
||||
@ -3677,3 +3849,47 @@ debug_log(unsigned long arg)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int access_ok(struct process_vm *vm, int type, uintptr_t addr, size_t len) {
|
||||
struct vm_range *range, *next;
|
||||
int first = true;
|
||||
|
||||
range = lookup_process_memory_range(vm, addr, addr + len);
|
||||
|
||||
if (!range || range->start > addr) {
|
||||
kprintf("%s: No VM range at 0x%llx, refusing access\n",
|
||||
__FUNCTION__, addr);
|
||||
return -EFAULT;
|
||||
}
|
||||
do {
|
||||
if (first) {
|
||||
first = false;
|
||||
} else {
|
||||
next = next_process_memory_range(vm, range);
|
||||
if (!next) {
|
||||
kprintf("%s: No VM range after 0x%llx, but checking until 0x%llx. Refusing access\n",
|
||||
__FUNCTION__, range->end, addr + len);
|
||||
return -EFAULT;
|
||||
}
|
||||
if (range->end != next->start) {
|
||||
kprintf("%s: 0x%llx - 0x%llx and 0x%llx - 0x%llx are not adjacent (request was %0x%llx-0x%llx %zu)\n",
|
||||
__FUNCTION__, range->start, range->end,
|
||||
next->start, next->end,
|
||||
addr, addr+len, len);
|
||||
return -EFAULT;
|
||||
}
|
||||
range = next;
|
||||
}
|
||||
|
||||
if ((type == VERIFY_WRITE && !(range->flag & VR_PROT_WRITE)) ||
|
||||
(type == VERIFY_READ && !(range->flag & VR_PROT_READ))) {
|
||||
kprintf("%s: 0x%llx - 0x%llx does not have prot %s (request was %0x%llx-0x%llx %zu)\n",
|
||||
__FUNCTION__, range->start, range->end,
|
||||
type == VERIFY_WRITE ? "write" : "ready",
|
||||
addr, addr+len, len);
|
||||
return -EACCES;
|
||||
}
|
||||
} while (addr + len > range->end);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -69,6 +69,12 @@ char *profile_event_names[] =
|
||||
"mmap_anon_no_contig_phys",
|
||||
"mmap_regular_file",
|
||||
"mmap_device_file",
|
||||
"sdma_0",
|
||||
"sdma_1",
|
||||
"sdma_2",
|
||||
"sdma_3",
|
||||
"sdma_4",
|
||||
"sdma_5",
|
||||
""
|
||||
};
|
||||
|
||||
|
||||
714
kernel/script/dwarf-extract-struct.c
Normal file
714
kernel/script/dwarf-extract-struct.c
Normal file
@ -0,0 +1,714 @@
|
||||
/*
|
||||
* Trivial dwarf parser to extract part of a struct from debug infos
|
||||
*
|
||||
* Author: Dominique Martinet <dominique.martinet@cea.fr>
|
||||
* License: WTFPLv2
|
||||
*
|
||||
* Canonical source: http://cgit.notk.org/asmadeus/dwarf-extract-struct.git
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <fcntl.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <strings.h>
|
||||
#include <errno.h>
|
||||
#include "libdwarf/dwarf.h"
|
||||
#include "libdwarf/libdwarf.h"
|
||||
|
||||
|
||||
static void parse_dwarf(Dwarf_Debug dbg, const char *struct_name,
|
||||
const char *field_names[], int field_count);
|
||||
static void find_struct(Dwarf_Debug dbg, Dwarf_Die die, const char *struct_name,
|
||||
const char *field_names[], int field_count, int level);
|
||||
static void find_fields(Dwarf_Debug dbg, Dwarf_Die struct_die, Dwarf_Die die,
|
||||
const char *struct_name, const char *field_names[],
|
||||
int field_count, int level);
|
||||
static void print_field(Dwarf_Debug dbg, Dwarf_Die die, const char *field_name,
|
||||
int pad_num);
|
||||
|
||||
int debug = 0;
|
||||
|
||||
void usage(const char *argv[]) {
|
||||
fprintf(stderr, "%s debug_file struct_name [field [field...]]\n",
|
||||
argv[0]);
|
||||
}
|
||||
|
||||
int main(int argc, const char *argv[]) {
|
||||
Dwarf_Debug dbg = 0;
|
||||
int fd = -1;
|
||||
const char *filepath;
|
||||
const char *struct_name;
|
||||
int res = DW_DLV_ERROR;
|
||||
Dwarf_Error error;
|
||||
Dwarf_Handler errhand = 0;
|
||||
Dwarf_Ptr errarg = 0;
|
||||
|
||||
if(argc < 3) {
|
||||
usage(argv);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
filepath = argv[1];
|
||||
struct_name = argv[2];
|
||||
|
||||
fd = open(filepath,O_RDONLY);
|
||||
if(fd < 0) {
|
||||
fprintf(stderr, "Failure attempting to open %s\n",filepath);
|
||||
}
|
||||
res = dwarf_init(fd, DW_DLC_READ, errhand, errarg, &dbg, &error);
|
||||
if(res != DW_DLV_OK) {
|
||||
fprintf(stderr, "Giving up, cannot do DWARF processing\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
parse_dwarf(dbg, struct_name, argv + 3, argc - 3);
|
||||
|
||||
res = dwarf_finish(dbg,&error);
|
||||
if(res != DW_DLV_OK) {
|
||||
fprintf(stderr, "dwarf_finish failed!\n");
|
||||
}
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void parse_dwarf(Dwarf_Debug dbg, const char *struct_name,
|
||||
const char *field_names[], int field_count) {
|
||||
Dwarf_Bool is_info = 1;
|
||||
Dwarf_Unsigned cu_length;
|
||||
Dwarf_Half cu_version;
|
||||
Dwarf_Off cu_abbrev_offset;
|
||||
Dwarf_Half cu_pointer_size;
|
||||
Dwarf_Half cu_offset_size;
|
||||
Dwarf_Half cu_extension_size;
|
||||
Dwarf_Sig8 type_signature;
|
||||
Dwarf_Unsigned type_offset;
|
||||
Dwarf_Unsigned cu_next_offset;
|
||||
Dwarf_Error err;
|
||||
int rc;
|
||||
|
||||
|
||||
while (1) {
|
||||
Dwarf_Die die;
|
||||
|
||||
rc = dwarf_next_cu_header_c(dbg, is_info, &cu_length,
|
||||
&cu_version, &cu_abbrev_offset, &cu_pointer_size,
|
||||
&cu_offset_size, &cu_extension_size, &type_signature,
|
||||
&type_offset, &cu_next_offset, &err);
|
||||
|
||||
if (rc == DW_DLV_NO_ENTRY)
|
||||
break;
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "error dwarf_next_cu_header_c: %d %s\n",
|
||||
rc, dwarf_errmsg(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
|
||||
rc = dwarf_siblingof(dbg, NULL, &die, &err);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "first dwarf_siblingof failed: %d %s\n",
|
||||
rc, dwarf_errmsg(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
find_struct(dbg, die, struct_name, field_names, field_count, 0);
|
||||
}
|
||||
|
||||
fprintf(stderr, "struct %s not found\n", struct_name);
|
||||
exit(2);
|
||||
}
|
||||
|
||||
static void find_struct(Dwarf_Debug dbg, Dwarf_Die die, const char *struct_name,
|
||||
const char *field_names[], int field_count, int level) {
|
||||
Dwarf_Die next;
|
||||
Dwarf_Error err;
|
||||
int rc;
|
||||
|
||||
if (level > 1)
|
||||
return;
|
||||
|
||||
do {
|
||||
char *name;
|
||||
const char *tag_name;
|
||||
Dwarf_Half tag;
|
||||
|
||||
rc = dwarf_diename(die, &name, &err);
|
||||
if (rc == DW_DLV_NO_ENTRY) {
|
||||
name = NULL;
|
||||
} else if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "dwarf_diename error: %d %s\n",
|
||||
rc, dwarf_errmsg(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
rc = dwarf_tag(die, &tag, &err);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "dwarf_tag error: %d %s\n",
|
||||
rc, dwarf_errmsg(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (debug) {
|
||||
rc = dwarf_get_TAG_name(tag, &tag_name);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr,
|
||||
"dwarf_get_TAG_name error: %d\n", rc);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("<%d> %p <%d> %s: %s\n", level, die, tag,
|
||||
tag_name, name ? name : "<no name>");
|
||||
}
|
||||
|
||||
rc = dwarf_child(die, &next, &err);
|
||||
if (rc == DW_DLV_ERROR) {
|
||||
fprintf(stderr, "dwarf_child error: %d %s\n",
|
||||
rc, dwarf_errmsg(err));
|
||||
exit(1);
|
||||
}
|
||||
if (rc == DW_DLV_OK) {
|
||||
if (tag == DW_TAG_structure_type
|
||||
&& name && strcasecmp(name, struct_name) == 0) {
|
||||
find_fields(dbg, die, next, struct_name,
|
||||
field_names, field_count,
|
||||
level + 1);
|
||||
fprintf(stderr,
|
||||
"Found struct %s but it did not have all members given!\nMissing:\n",
|
||||
struct_name);
|
||||
for (rc = 0; rc < field_count; rc++) {
|
||||
if (field_names[rc])
|
||||
fprintf(stderr, "%s\n",
|
||||
field_names[rc]);
|
||||
}
|
||||
exit(3);
|
||||
}
|
||||
find_struct(dbg, next, struct_name, field_names,
|
||||
field_count, level + 1);
|
||||
dwarf_dealloc(dbg, next, DW_DLA_DIE);
|
||||
}
|
||||
|
||||
|
||||
rc = dwarf_siblingof(dbg, die, &next, &err);
|
||||
dwarf_dealloc(dbg, die, DW_DLA_DIE);
|
||||
if (name)
|
||||
dwarf_dealloc(dbg, name, DW_DLA_STRING);
|
||||
|
||||
if (rc != DW_DLV_OK)
|
||||
break;
|
||||
|
||||
die = next;
|
||||
} while (die);
|
||||
}
|
||||
|
||||
static int dwarf_get_offset(Dwarf_Debug dbg, Dwarf_Die die,
|
||||
int *poffset, Dwarf_Error *perr) {
|
||||
Dwarf_Attribute attr;
|
||||
Dwarf_Unsigned offset;
|
||||
int rc;
|
||||
|
||||
rc = dwarf_attr(die, DW_AT_data_member_location, &attr, perr);
|
||||
if (rc != DW_DLV_OK) {
|
||||
return rc;
|
||||
}
|
||||
Dwarf_Half form;
|
||||
rc = dwarf_whatform(attr, &form, perr);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "Error getting whatform: %s\n",
|
||||
dwarf_errmsg(*perr));
|
||||
exit(5);
|
||||
}
|
||||
if (form == DW_FORM_data1 || form == DW_FORM_data2
|
||||
|| form == DW_FORM_data2 || form == DW_FORM_data4
|
||||
|| form == DW_FORM_data8 || form == DW_FORM_udata) {
|
||||
dwarf_formudata(attr, &offset, 0);
|
||||
} else if (form == DW_FORM_sdata) {
|
||||
Dwarf_Signed soffset;
|
||||
dwarf_formsdata(attr, &soffset, 0);
|
||||
if (soffset < 0) {
|
||||
fprintf(stderr,
|
||||
"unsupported negative offset\n");
|
||||
exit(5);
|
||||
}
|
||||
offset = (Dwarf_Unsigned) soffset;
|
||||
} else {
|
||||
Dwarf_Locdesc **locdescs;
|
||||
Dwarf_Signed len;
|
||||
if (dwarf_loclist_n(attr, &locdescs, &len, perr)
|
||||
== DW_DLV_ERROR) {
|
||||
fprintf(stderr, "unsupported member offset\n");
|
||||
exit(5);
|
||||
}
|
||||
if (len != 1
|
||||
|| locdescs[0]->ld_cents != 1
|
||||
|| (locdescs[0]->ld_s[0]).lr_atom
|
||||
!= DW_OP_plus_uconst) {
|
||||
fprintf(stderr,
|
||||
"unsupported location expression\n");
|
||||
exit(5);
|
||||
}
|
||||
offset = (locdescs[0]->ld_s[0]).lr_number;
|
||||
}
|
||||
dwarf_dealloc(dbg, attr, DW_DLA_ATTR);
|
||||
|
||||
*poffset = (int) offset;
|
||||
return DW_DLV_OK;
|
||||
}
|
||||
|
||||
static int dwarf_get_size(Dwarf_Debug dbg, Dwarf_Die die,
|
||||
int *psize, Dwarf_Error *perr) {
|
||||
Dwarf_Attribute attr;
|
||||
Dwarf_Unsigned size;
|
||||
int rc;
|
||||
|
||||
rc = dwarf_attr(die, DW_AT_byte_size, &attr, perr);
|
||||
if (rc != DW_DLV_OK) {
|
||||
return rc;
|
||||
}
|
||||
Dwarf_Half form;
|
||||
rc = dwarf_whatform(attr, &form, perr);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "Error getting whatform: %s\n",
|
||||
dwarf_errmsg(*perr));
|
||||
exit(5);
|
||||
}
|
||||
if (form == DW_FORM_data1 || form == DW_FORM_data2
|
||||
|| form == DW_FORM_data2 || form == DW_FORM_data4
|
||||
|| form == DW_FORM_data8 || form == DW_FORM_udata) {
|
||||
dwarf_formudata(attr, &size, 0);
|
||||
} else if (form == DW_FORM_sdata) {
|
||||
Dwarf_Signed ssize;
|
||||
dwarf_formsdata(attr, &ssize, 0);
|
||||
if (ssize < 0) {
|
||||
fprintf(stderr,
|
||||
"unsupported negative size\n");
|
||||
exit(5);
|
||||
}
|
||||
size = (Dwarf_Unsigned) ssize;
|
||||
} else {
|
||||
Dwarf_Locdesc **locdescs;
|
||||
Dwarf_Signed len;
|
||||
if (dwarf_loclist_n(attr, &locdescs, &len, perr)
|
||||
== DW_DLV_ERROR) {
|
||||
fprintf(stderr, "unsupported member size\n");
|
||||
exit(5);
|
||||
}
|
||||
if (len != 1
|
||||
|| locdescs[0]->ld_cents != 1
|
||||
|| (locdescs[0]->ld_s[0]).lr_atom
|
||||
!= DW_OP_plus_uconst) {
|
||||
fprintf(stderr,
|
||||
"unsupported location expression\n");
|
||||
exit(5);
|
||||
}
|
||||
size = (locdescs[0]->ld_s[0]).lr_number;
|
||||
}
|
||||
dwarf_dealloc(dbg, attr, DW_DLA_ATTR);
|
||||
|
||||
*psize = (int) size;
|
||||
return DW_DLV_OK;
|
||||
}
|
||||
|
||||
static int dwarf_get_arraysize(Dwarf_Debug dbg, Dwarf_Die die,
|
||||
int *psize, Dwarf_Error *perr) {
|
||||
Dwarf_Attribute attr;
|
||||
Dwarf_Unsigned lower_bound, upper_bound;
|
||||
int rc;
|
||||
Dwarf_Die child;
|
||||
Dwarf_Half form;
|
||||
|
||||
rc = dwarf_child(die, &child, perr);
|
||||
if (rc == DW_DLV_NO_ENTRY) {
|
||||
fprintf(stderr,
|
||||
"Could not deref child of array: no entry\n");
|
||||
return rc;
|
||||
}
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr,
|
||||
"Could not get child entry of array: %s\n",
|
||||
dwarf_errmsg(*perr));
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = dwarf_attr(child, DW_AT_lower_bound, &attr, perr);
|
||||
/* Not present? Assume zero */
|
||||
if (rc != DW_DLV_OK) {
|
||||
lower_bound = 0;
|
||||
goto upper;
|
||||
}
|
||||
|
||||
rc = dwarf_whatform(attr, &form, perr);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "Error getting whatform: %s\n",
|
||||
dwarf_errmsg(*perr));
|
||||
exit(5);
|
||||
}
|
||||
|
||||
if (form == DW_FORM_data1 || form == DW_FORM_data2
|
||||
|| form == DW_FORM_data2 || form == DW_FORM_data4
|
||||
|| form == DW_FORM_data8 || form == DW_FORM_udata) {
|
||||
dwarf_formudata(attr, &lower_bound, 0);
|
||||
} else if (form == DW_FORM_sdata) {
|
||||
Dwarf_Signed ssize;
|
||||
dwarf_formsdata(attr, &ssize, 0);
|
||||
if (ssize < 0) {
|
||||
fprintf(stderr,
|
||||
"unsupported negative size\n");
|
||||
exit(5);
|
||||
}
|
||||
lower_bound = (Dwarf_Unsigned) ssize;
|
||||
} else {
|
||||
Dwarf_Locdesc **locdescs;
|
||||
Dwarf_Signed len;
|
||||
if (dwarf_loclist_n(attr, &locdescs, &len, perr)
|
||||
== DW_DLV_ERROR) {
|
||||
fprintf(stderr, "unsupported member size\n");
|
||||
exit(5);
|
||||
}
|
||||
if (len != 1
|
||||
|| locdescs[0]->ld_cents != 1
|
||||
|| (locdescs[0]->ld_s[0]).lr_atom
|
||||
!= DW_OP_plus_uconst) {
|
||||
fprintf(stderr,
|
||||
"unsupported location expression\n");
|
||||
exit(5);
|
||||
}
|
||||
lower_bound = (locdescs[0]->ld_s[0]).lr_number;
|
||||
}
|
||||
dwarf_dealloc(dbg, attr, DW_DLA_ATTR);
|
||||
|
||||
upper:
|
||||
rc = dwarf_attr(child, DW_AT_upper_bound, &attr, perr);
|
||||
if (rc != DW_DLV_OK) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
rc = dwarf_whatform(attr, &form, perr);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "Error getting whatform: %s\n",
|
||||
dwarf_errmsg(*perr));
|
||||
exit(5);
|
||||
}
|
||||
|
||||
if (form == DW_FORM_data1 || form == DW_FORM_data2
|
||||
|| form == DW_FORM_data2 || form == DW_FORM_data4
|
||||
|| form == DW_FORM_data8 || form == DW_FORM_udata) {
|
||||
dwarf_formudata(attr, &upper_bound, 0);
|
||||
} else if (form == DW_FORM_sdata) {
|
||||
Dwarf_Signed ssize;
|
||||
dwarf_formsdata(attr, &ssize, 0);
|
||||
if (ssize < 0) {
|
||||
fprintf(stderr,
|
||||
"unsupported negative size\n");
|
||||
exit(5);
|
||||
}
|
||||
upper_bound = (Dwarf_Unsigned) ssize;
|
||||
} else {
|
||||
Dwarf_Locdesc **locdescs;
|
||||
Dwarf_Signed len;
|
||||
if (dwarf_loclist_n(attr, &locdescs, &len, perr)
|
||||
== DW_DLV_ERROR) {
|
||||
fprintf(stderr, "unsupported member size\n");
|
||||
exit(5);
|
||||
}
|
||||
if (len != 1
|
||||
|| locdescs[0]->ld_cents != 1
|
||||
|| (locdescs[0]->ld_s[0]).lr_atom
|
||||
!= DW_OP_plus_uconst) {
|
||||
fprintf(stderr,
|
||||
"unsupported location expression\n");
|
||||
exit(5);
|
||||
}
|
||||
upper_bound = (locdescs[0]->ld_s[0]).lr_number;
|
||||
}
|
||||
dwarf_dealloc(dbg, attr, DW_DLA_ATTR);
|
||||
|
||||
*psize = ((int)upper_bound - (int)lower_bound + 1);
|
||||
return DW_DLV_OK;
|
||||
}
|
||||
|
||||
|
||||
|
||||
static int deref_type(Dwarf_Debug dbg, Dwarf_Die type_die,
|
||||
Dwarf_Die *new_type_die, Dwarf_Half *ptype_tag,
|
||||
Dwarf_Error *perr) {
|
||||
Dwarf_Attribute pointer_attr;
|
||||
Dwarf_Off pointer_off;
|
||||
int rc;
|
||||
|
||||
rc = dwarf_attr(type_die, DW_AT_type, &pointer_attr,
|
||||
perr);
|
||||
if (rc != DW_DLV_OK)
|
||||
return rc;
|
||||
|
||||
rc = dwarf_global_formref(pointer_attr, &pointer_off,
|
||||
perr);
|
||||
if (rc != DW_DLV_OK)
|
||||
return rc;
|
||||
|
||||
rc = dwarf_offdie_b(dbg, pointer_off, 1, new_type_die,
|
||||
perr);
|
||||
if (rc != DW_DLV_OK)
|
||||
return rc;
|
||||
|
||||
dwarf_dealloc(dbg, pointer_attr, DW_DLA_ATTR);
|
||||
|
||||
if (ptype_tag)
|
||||
rc = dwarf_tag(*new_type_die, ptype_tag, perr);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void find_fields(Dwarf_Debug dbg, Dwarf_Die struct_die, Dwarf_Die die,
|
||||
const char *struct_name, const char *field_names[],
|
||||
int field_count, int level) {
|
||||
Dwarf_Die next;
|
||||
Dwarf_Error err;
|
||||
int rc, i, printed_count = 0;
|
||||
int size;
|
||||
|
||||
printf("struct %s {\n\tunion {\n",
|
||||
struct_name);
|
||||
|
||||
rc = dwarf_get_size(dbg, struct_die, &size, &err);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "could not get size for struct %s: %s\n",
|
||||
struct_name, dwarf_errmsg(err));
|
||||
exit(1);
|
||||
}
|
||||
printf("\t\tchar whole_struct[%d];\n", size);
|
||||
|
||||
do {
|
||||
char *name;
|
||||
const char *tag_name;
|
||||
Dwarf_Half tag;
|
||||
|
||||
rc = dwarf_diename(die, &name, &err);
|
||||
if (rc == DW_DLV_NO_ENTRY) {
|
||||
name = NULL;
|
||||
} else if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "dwarf_diename error: %d %s\n",
|
||||
rc, dwarf_errmsg(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
rc = dwarf_tag(die, &tag, &err);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "dwarf_tag error: %d %s\n",
|
||||
rc, dwarf_errmsg(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if (debug) {
|
||||
rc = dwarf_get_TAG_name(tag, &tag_name);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr,
|
||||
"dwarf_get_TAG_name error: %d\n", rc);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("<%d> %p <%d> %s: %s\n", level, die, tag,
|
||||
tag_name, name ? name : "<no name>");
|
||||
}
|
||||
|
||||
if (tag == DW_TAG_member && name) {
|
||||
for (i = 0; i < field_count; i++) {
|
||||
if (!field_names[i])
|
||||
continue;
|
||||
if (strcasecmp(name, field_names[i]) == 0) {
|
||||
print_field(dbg, die, field_names[i],
|
||||
printed_count);
|
||||
field_names[i] = NULL;
|
||||
printed_count++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (printed_count == field_count) {
|
||||
printf("\t};\n};\n");
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
|
||||
rc = dwarf_siblingof(dbg, die, &next, &err);
|
||||
dwarf_dealloc(dbg, die, DW_DLA_DIE);
|
||||
if (name)
|
||||
dwarf_dealloc(dbg, name, DW_DLA_STRING);
|
||||
|
||||
if (rc != DW_DLV_OK)
|
||||
break;
|
||||
|
||||
die = next;
|
||||
} while (die);
|
||||
}
|
||||
|
||||
static void print_field(Dwarf_Debug dbg, Dwarf_Die die, const char *field_name,
|
||||
int padnum) {
|
||||
Dwarf_Attribute attr;
|
||||
Dwarf_Error err;
|
||||
int offset = 0;
|
||||
char type_buf[1024];
|
||||
char array_buf[128] = "";
|
||||
char pointer_buf[128] = "";
|
||||
int rc;
|
||||
|
||||
rc = dwarf_get_offset(dbg, die, &offset, &err);
|
||||
if (rc == DW_DLV_NO_ENTRY) {
|
||||
fprintf(stderr, "Found %s but no offset, assuming 0\n",
|
||||
field_name);
|
||||
} else if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "Error getting dwarf attr offset: %s\n",
|
||||
dwarf_errmsg(err));
|
||||
exit(4);
|
||||
}
|
||||
|
||||
rc = dwarf_attr(die, DW_AT_type, &attr, &err);
|
||||
if (rc == DW_DLV_NO_ENTRY) {
|
||||
fprintf(stderr,
|
||||
"Found %s but no type, can't assume that one out..\n",
|
||||
field_name);
|
||||
exit(6);
|
||||
} else if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "Error getting dwarf attrlist: %s\n",
|
||||
dwarf_errmsg(err));
|
||||
exit(6);
|
||||
} else {
|
||||
Dwarf_Die type_die, next;
|
||||
Dwarf_Off type_off;
|
||||
Dwarf_Half type_tag;
|
||||
char *type_name;
|
||||
int pointer = 0;
|
||||
|
||||
rc = dwarf_global_formref(attr, &type_off, &err);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr,
|
||||
"Error getting ref offset for type: %s\n",
|
||||
dwarf_errmsg(err));
|
||||
exit(7);
|
||||
}
|
||||
|
||||
rc = dwarf_offdie_b(dbg, type_off, 1, &type_die, &err);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr,
|
||||
"Error getting die from offset for type: %s\n",
|
||||
dwarf_errmsg(err));
|
||||
exit(7);
|
||||
}
|
||||
|
||||
rc = dwarf_tag(type_die, &type_tag, &err);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "dwarf_tag error: %d %s\n",
|
||||
rc, dwarf_errmsg(err));
|
||||
exit(7);
|
||||
}
|
||||
|
||||
while (type_tag == DW_TAG_pointer_type) {
|
||||
pointer_buf[pointer++] = '*';
|
||||
|
||||
rc = deref_type(dbg, type_die, &next,
|
||||
&type_tag, &err);
|
||||
/* No entry here means void* */
|
||||
if (rc == DW_DLV_NO_ENTRY)
|
||||
break;
|
||||
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr,
|
||||
"Could not deref type for %s: %s\n",
|
||||
field_name, dwarf_errmsg(err));
|
||||
exit(7);
|
||||
}
|
||||
|
||||
dwarf_dealloc(dbg, type_die, DW_DLA_DIE);
|
||||
type_die = next;
|
||||
}
|
||||
|
||||
if (type_tag == DW_TAG_array_type) {
|
||||
int next_offset, size;
|
||||
|
||||
rc = deref_type(dbg, type_die, &next,
|
||||
&type_tag, &err);
|
||||
if (rc == DW_DLV_NO_ENTRY) {
|
||||
fprintf(stderr,
|
||||
"Could not deref array type for %s: no entry\n",
|
||||
field_name);
|
||||
exit(7);
|
||||
}
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr,
|
||||
"Could not deref type for %s: %s\n",
|
||||
field_name, dwarf_errmsg(err));
|
||||
exit(7);
|
||||
}
|
||||
|
||||
rc = dwarf_get_arraysize(dbg, type_die, &size, &err);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr,
|
||||
"Could not get array size for %s: %s\n",
|
||||
field_name, dwarf_errmsg(err));
|
||||
exit(7);
|
||||
}
|
||||
type_die = next;
|
||||
|
||||
snprintf(array_buf, 128, "[%d]", size);
|
||||
}
|
||||
|
||||
/* If it's still pointer at this point, it's void * */
|
||||
if (type_tag != DW_TAG_pointer_type) {
|
||||
rc = dwarf_diename(type_die, &type_name, &err);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr, "dwarf_diename error: %s\n",
|
||||
rc == DW_DLV_NO_ENTRY ?
|
||||
"no name" : dwarf_errmsg(err));
|
||||
const char *tag_name;
|
||||
|
||||
rc = dwarf_get_TAG_name(type_tag, &tag_name);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr,
|
||||
"dwarf_get_TAG_name error: %d\n",
|
||||
rc);
|
||||
}
|
||||
|
||||
fprintf(stderr, "Bad tag %s (%d)?\n",
|
||||
tag_name, type_tag);
|
||||
exit(7);
|
||||
}
|
||||
}
|
||||
|
||||
if (type_tag == DW_TAG_structure_type) {
|
||||
snprintf(type_buf, 1024, "struct %s %s",
|
||||
type_name, pointer_buf);
|
||||
} else if (type_tag == DW_TAG_enumeration_type) {
|
||||
snprintf(type_buf, 1024, "enum %s %s",
|
||||
type_name, pointer_buf);
|
||||
} else if (type_tag == DW_TAG_base_type
|
||||
|| type_tag == DW_TAG_typedef) {
|
||||
snprintf(type_buf, 1024, "%s %s", type_name,
|
||||
pointer_buf);
|
||||
} else if (type_tag == DW_TAG_pointer_type) {
|
||||
snprintf(type_buf, 1024, "void %s", pointer_buf);
|
||||
} else {
|
||||
const char *tag_name;
|
||||
|
||||
rc = dwarf_get_TAG_name(type_tag, &tag_name);
|
||||
if (rc != DW_DLV_OK) {
|
||||
fprintf(stderr,
|
||||
"dwarf_get_TAG_name error: %d\n", rc);
|
||||
}
|
||||
|
||||
fprintf(stderr,
|
||||
"Type tag %s (%d) is not implemented, please add it\n",
|
||||
tag_name, type_tag);
|
||||
exit(7);
|
||||
}
|
||||
|
||||
if (type_tag != DW_TAG_pointer_type)
|
||||
dwarf_dealloc(dbg, type_name, DW_DLA_STRING);
|
||||
dwarf_dealloc(dbg, attr, DW_DLA_ATTR);
|
||||
dwarf_dealloc(dbg, type_die, DW_DLA_DIE);
|
||||
}
|
||||
|
||||
printf("\t\tstruct {\n\t\t\tchar padding%i[%u];\n\t\t\t%s%s%s;\n\t\t};\n",
|
||||
padnum, (unsigned int) offset,
|
||||
type_buf, field_name, array_buf);
|
||||
}
|
||||
71
kernel/script/regenerate_hfi1_header.sh
Executable file
71
kernel/script/regenerate_hfi1_header.sh
Executable file
@ -0,0 +1,71 @@
|
||||
#!/bin/bash
|
||||
|
||||
# usage:
|
||||
# /path/to/regenerate_hfi1_header.sh [hfi1.ko]
|
||||
|
||||
SCRIPT_PATH="${BASH_SOURCE[0]}"
|
||||
ROOTDIR=$(readlink -m "$SCRIPT_PATH")
|
||||
ROOTDIR=$(dirname "$ROOTDIR")
|
||||
set -e -u
|
||||
|
||||
# static configuration-ish
|
||||
declare -r DES_BIN="${ROOTDIR}/dwarf-extract-struct"
|
||||
declare -r DES_SRC="${DES_BIN}.c"
|
||||
declare -r HDR_PREFIX="${ROOTDIR}/../include/hfi1/hfi1_generated_"
|
||||
|
||||
error() {
|
||||
echo "$@" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
HFI1_KO="${1-$(modinfo -n hfi1)}" || \
|
||||
error "Could not find hfi1 module and no argument given. Usage: $0 [hfi1.ko]"
|
||||
|
||||
|
||||
[[ "$DES_BIN" -nt "$DES_SRC" ]] || \
|
||||
gcc -o "$DES_BIN" -g -ldwarf "$DES_SRC" || \
|
||||
error "Could not compile, install libdwarf-devel ?"
|
||||
|
||||
"$DES_BIN" "$HFI1_KO" hfi1_pportdata \
|
||||
vls_operational > "${HDR_PREFIX}pportdata.h"
|
||||
|
||||
"$DES_BIN" "$HFI1_KO" hfi1_ctxtdata \
|
||||
ctxt rcv_array_groups eager_base expected_count expected_base \
|
||||
tid_group_list tid_used_list tid_full_list dd \
|
||||
> "${HDR_PREFIX}ctxtdata.h"
|
||||
|
||||
"$DES_BIN" "$HFI1_KO" hfi1_devdata \
|
||||
per_sdma sdma_pad_phys sdma_map pport chip_rcv_array_count \
|
||||
kregbase1 piobase physaddr rcvarray_wc default_desc1 flags \
|
||||
sc2vl events first_dyn_alloc_ctxt chip_rcv_contexts \
|
||||
> "${HDR_PREFIX}devdata.h"
|
||||
|
||||
"$DES_BIN" "$HFI1_KO" hfi1_filedata \
|
||||
uctxt pq cq dd subctxt entry_to_rb tid_lock tid_used \
|
||||
invalid_tids invalid_tid_idx invalid_lock \
|
||||
> "${HDR_PREFIX}filedata.h"
|
||||
|
||||
"$DES_BIN" "$HFI1_KO" sdma_state \
|
||||
current_state go_s99_running previous_state\
|
||||
> "${HDR_PREFIX}sdma_state.h"
|
||||
|
||||
"$DES_BIN" "$HFI1_KO" sdma_engine \
|
||||
dd tail_lock desc_avail tail_csr flushlist flushlist_lock \
|
||||
descq_head descq_tail descq_cnt state sdma_shift sdma_mask\
|
||||
descq tx_ring tx_tail head_lock descq_full_count ahg_bits\
|
||||
this_idx \
|
||||
> "${HDR_PREFIX}sdma_engine.h"
|
||||
|
||||
"$DES_BIN" "$HFI1_KO" user_sdma_request \
|
||||
data_iovs pq cq txps info hdr tidoffset data_len \
|
||||
iov_idx sent seqnum has_error koffset tididx \
|
||||
tids n_tids sde ahg_idx iovs seqcomp seqsubmitted \
|
||||
> "${HDR_PREFIX}user_sdma_request.h"
|
||||
|
||||
"$DES_BIN" "$HFI1_KO" user_sdma_txreq \
|
||||
hdr txreq list req flags busycount seqnum \
|
||||
> "${HDR_PREFIX}user_sdma_txreq.h"
|
||||
|
||||
"$DES_BIN" "$HFI1_KO" hfi1_user_sdma_pkt_q \
|
||||
dd req_in_use reqs n_reqs state n_max_reqs \
|
||||
> "${HDR_PREFIX}hfi1_user_sdma_pkt_q.h"
|
||||
682
kernel/sdma.c
Normal file
682
kernel/sdma.c
Normal file
@ -0,0 +1,682 @@
|
||||
/*
|
||||
* Copyright(c) 2015, 2016 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <hfi1/ihk_hfi1_common.h>
|
||||
#include <hfi1/user_sdma.h>
|
||||
#include <hfi1/sdma.h>
|
||||
#include <hfi1/common.h>
|
||||
|
||||
//#define DEBUG_PRINT_SDMA
|
||||
|
||||
#ifdef DEBUG_PRINT_SC
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
unsigned long hfi1_cap_mask = HFI1_CAP_MASK_DEFAULT;
|
||||
|
||||
/* must be a power of 2 >= 64 <= 32768 */
|
||||
#define SDMA_DESCQ_CNT 2048
|
||||
#define SDMA_DESC_INTR 64
|
||||
#define INVALID_TAIL 0xffff
|
||||
|
||||
#define SDMA_TAIL_UPDATE_THRESH 0x1F
|
||||
|
||||
/**
|
||||
* sdma_select_engine_vl() - select sdma engine
|
||||
* @dd: devdata
|
||||
* @selector: a spreading factor
|
||||
* @vl: this vl
|
||||
*
|
||||
*
|
||||
* This function returns an engine based on the selector and a vl. The
|
||||
* mapping fields are protected by RCU.
|
||||
*/
|
||||
struct sdma_engine *sdma_select_engine_vl(
|
||||
struct hfi1_devdata *dd,
|
||||
u32 selector,
|
||||
u8 vl)
|
||||
{
|
||||
struct sdma_vl_map *m;
|
||||
struct sdma_map_elem *e;
|
||||
struct sdma_engine *rval;
|
||||
|
||||
/* NOTE This should only happen if SC->VL changed after the initial
|
||||
* checks on the QP/AH
|
||||
* Default will return engine 0 below
|
||||
*/
|
||||
if (vl >= HFI1_MAX_VLS_SUPPORTED) {
|
||||
rval = NULL;
|
||||
goto done;
|
||||
}
|
||||
|
||||
m = ACCESS_ONCE(dd->sdma_map);
|
||||
if (unlikely(!m)) {
|
||||
return &dd->per_sdma[0];
|
||||
}
|
||||
e = m->map[vl & m->mask];
|
||||
rval = e->sde[selector & e->mask];
|
||||
|
||||
done:
|
||||
rval = !rval ? &dd->per_sdma[0] : rval;
|
||||
// trace_hfi1_sdma_engine_select(dd, selector, vl, rval->this_idx);
|
||||
hfi1_cdbg(AIOWRITE, "-");
|
||||
return rval;
|
||||
}
|
||||
|
||||
int sdma_select_user_engine_idx(void)
|
||||
{
|
||||
int idx = 0;
|
||||
int idx_start = 0;
|
||||
int idx_modulo = 16;
|
||||
|
||||
/* Hash on rank if MPI job */
|
||||
if (cpu_local_var(current)->proc->nr_processes > 1) {
|
||||
idx = idx_start +
|
||||
(cpu_local_var(current)->proc->process_rank % idx_modulo);
|
||||
}
|
||||
/* Otherwise, CPU id */
|
||||
else {
|
||||
idx = ihk_mc_get_processor_id() % idx_modulo;
|
||||
}
|
||||
|
||||
return idx;
|
||||
}
|
||||
|
||||
/*
|
||||
* sdma_select_user_engine() - select sdma engine based on user setup
|
||||
* @dd: devdata
|
||||
* @selector: a spreading factor
|
||||
* @vl: this vl
|
||||
*
|
||||
* This function returns an sdma engine for a user sdma request.
|
||||
* User defined sdma engine affinity setting is honored when applicable,
|
||||
* otherwise system default sdma engine mapping is used. To ensure correct
|
||||
* ordering, the mapping from <selector, vl> to sde must remain unchanged.
|
||||
*/
|
||||
struct sdma_engine *sdma_select_user_engine(struct hfi1_devdata *dd,
|
||||
u32 selector, u8 vl)
|
||||
{
|
||||
return &dd->per_sdma[sdma_select_user_engine_idx()];
|
||||
}
|
||||
|
||||
/*
|
||||
* return the mode as indicated by the first
|
||||
* descriptor in the tx.
|
||||
*/
|
||||
static inline u8 ahg_mode(struct sdma_txreq *tx)
|
||||
{
|
||||
return (tx->descp[0].qw[1] & SDMA_DESC1_HEADER_MODE_SMASK)
|
||||
>> SDMA_DESC1_HEADER_MODE_SHIFT;
|
||||
}
|
||||
|
||||
/**
|
||||
* __sdma_txclean() - clean tx of mappings, descp *kmalloc's
|
||||
* @dd: hfi1_devdata for unmapping
|
||||
* @tx: tx request to clean
|
||||
*
|
||||
* This is used in the progress routine to clean the tx or
|
||||
* by the ULP to toss an in-process tx build.
|
||||
*
|
||||
* The code can be called multiple times without issue.
|
||||
*
|
||||
*/
|
||||
void __sdma_txclean(
|
||||
struct hfi1_devdata *dd,
|
||||
struct sdma_txreq *tx)
|
||||
{
|
||||
if (tx->num_desc) {
|
||||
/* TODO: enable sdma_unmap_desc */
|
||||
#if 0
|
||||
u16 i;
|
||||
u8 skip = 0, mode = ahg_mode(tx);
|
||||
|
||||
/* unmap first */
|
||||
//sdma_unmap_desc(dd, &tx->descp[0]);
|
||||
/* determine number of AHG descriptors to skip */
|
||||
if (mode > SDMA_AHG_APPLY_UPDATE1)
|
||||
skip = mode >> 1;
|
||||
// for (i = 1 + skip; i < tx->num_desc; i++)
|
||||
// sdma_unmap_desc(dd, &tx->descp[i]);
|
||||
#endif
|
||||
tx->num_desc = 0;
|
||||
}
|
||||
kfree(tx->coalesce_buf);
|
||||
tx->coalesce_buf = NULL;
|
||||
/* kmalloc'ed descp */
|
||||
if (unlikely(tx->desc_limit > ARRAY_SIZE(tx->descs))) {
|
||||
tx->desc_limit = ARRAY_SIZE(tx->descs);
|
||||
kfree(tx->descp);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void sdma_update_tail(struct sdma_engine *sde, u16 tail)
|
||||
{
|
||||
/* Commit writes to memory and advance the tail on the chip */
|
||||
smp_wmb(); /* see get_txhead() */
|
||||
writeq(tail, sde->tail_csr);
|
||||
}
|
||||
|
||||
/*
|
||||
* add the generation number into
|
||||
* the qw1 and return
|
||||
*/
|
||||
static inline u64 add_gen(struct sdma_engine *sde, u64 qw1)
|
||||
{
|
||||
u8 generation = (sde->descq_tail >> sde->sdma_shift) & 3;
|
||||
|
||||
qw1 &= ~SDMA_DESC1_GENERATION_SMASK;
|
||||
qw1 |= ((u64)generation & SDMA_DESC1_GENERATION_MASK)
|
||||
<< SDMA_DESC1_GENERATION_SHIFT;
|
||||
return qw1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This routine submits the indicated tx
|
||||
*
|
||||
* Space has already been guaranteed and
|
||||
* tail side of ring is locked.
|
||||
*
|
||||
* The hardware tail update is done
|
||||
* in the caller and that is facilitated
|
||||
* by returning the new tail.
|
||||
*
|
||||
* There is special case logic for ahg
|
||||
* to not add the generation number for
|
||||
* up to 2 descriptors that follow the
|
||||
* first descriptor.
|
||||
*
|
||||
*/
|
||||
static inline u16 submit_tx(struct sdma_engine *sde, struct sdma_txreq *tx)
|
||||
{
|
||||
int i;
|
||||
u16 tail;
|
||||
struct sdma_desc *descp = tx->descp;
|
||||
u8 skip = 0, mode = ahg_mode(tx);
|
||||
tail = sde->descq_tail & sde->sdma_mask;
|
||||
sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
|
||||
sde->descq[tail].qw[1] = cpu_to_le64(add_gen(sde, descp->qw[1]));
|
||||
// trace_hfi1_sdma_descriptor(sde, descp->qw[0], descp->qw[1],
|
||||
// tail, &sde->descq[tail]);
|
||||
tail = ++sde->descq_tail & sde->sdma_mask;
|
||||
descp++;
|
||||
if (mode > SDMA_AHG_APPLY_UPDATE1)
|
||||
skip = mode >> 1;
|
||||
for (i = 1; i < tx->num_desc; i++, descp++) {
|
||||
u64 qw1;
|
||||
|
||||
sde->descq[tail].qw[0] = cpu_to_le64(descp->qw[0]);
|
||||
if (skip) {
|
||||
/* edits don't have generation */
|
||||
qw1 = descp->qw[1];
|
||||
skip--;
|
||||
} else {
|
||||
/* replace generation with real one for non-edits */
|
||||
qw1 = add_gen(sde, descp->qw[1]);
|
||||
}
|
||||
sde->descq[tail].qw[1] = cpu_to_le64(qw1);
|
||||
// trace_hfi1_sdma_descriptor(sde, descp->qw[0], qw1,
|
||||
// tail, &sde->descq[tail]);
|
||||
tail = ++sde->descq_tail & sde->sdma_mask;
|
||||
}
|
||||
|
||||
tx->next_descq_idx = tail;
|
||||
#ifdef CONFIG_HFI1_DEBUG_SDMA_ORDER
|
||||
tx->sn = sde->tail_sn++;
|
||||
// trace_hfi1_sdma_in_sn(sde, tx->sn);
|
||||
WARN_ON_ONCE(sde->tx_ring[sde->tx_tail & sde->sdma_mask]);
|
||||
#endif
|
||||
sde->tx_ring[sde->tx_tail++ & sde->sdma_mask] = tx;
|
||||
sde->desc_avail -= tx->num_desc;
|
||||
return tail;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for progress
|
||||
*/
|
||||
static int sdma_check_progress(
|
||||
struct sdma_engine *sde,
|
||||
struct iowait_work *wait,
|
||||
struct sdma_txreq *tx,
|
||||
bool pkts_sent)
|
||||
{
|
||||
int ret;
|
||||
|
||||
hfi1_cdbg(AIOWRITE, "+");
|
||||
sde->desc_avail = sdma_descq_freecnt(sde);
|
||||
if (tx->num_desc <= sde->desc_avail)
|
||||
return -EAGAIN;
|
||||
/* pulse the head_lock */
|
||||
if (wait && iowait_ioww_to_iow(wait)->sleep) {
|
||||
unsigned seq;
|
||||
|
||||
seq = raw_seqcount_begin(
|
||||
(const seqcount_t *)&sde->head_lock.seqcount);
|
||||
ret = wait->iow->sleep(sde, wait, tx, seq, pkts_sent);
|
||||
if (ret == -EAGAIN)
|
||||
sde->desc_avail = sdma_descq_freecnt(sde);
|
||||
} else {
|
||||
ret = -EBUSY;
|
||||
}
|
||||
hfi1_cdbg(AIOWRITE, "-");
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_send_txlist() - submit a list of tx req to ring
|
||||
* @sde: sdma engine to use
|
||||
* @wait: SE wait structure to use when full (may be NULL)
|
||||
* @tx_list: list of sdma_txreqs to submit
|
||||
* @count: pointer to a u32 which, after return will contain the total number of
|
||||
* sdma_txreqs removed from the tx_list. This will include sdma_txreqs
|
||||
* whose SDMA descriptors are submitted to the ring and the sdma_txreqs
|
||||
* which are added to SDMA engine flush list if the SDMA engine state is
|
||||
* not running.
|
||||
*
|
||||
* The call submits the list into the ring.
|
||||
*
|
||||
* If the iowait structure is non-NULL and not equal to the iowait list
|
||||
* the unprocessed part of the list will be appended to the list in wait.
|
||||
*
|
||||
* In all cases, the tx_list will be updated so the head of the tx_list is
|
||||
* the list of descriptors that have yet to be transmitted.
|
||||
*
|
||||
* The intent of this call is to provide a more efficient
|
||||
* way of submitting multiple packets to SDMA while holding the tail
|
||||
* side locking.
|
||||
*
|
||||
* Return:
|
||||
* 0 - Success,
|
||||
* -EINVAL - sdma_txreq incomplete, -EBUSY - no space in ring (wait == NULL)
|
||||
* -EIOCBQUEUED - tx queued to iowait, -ECOMM bad sdma state
|
||||
*/
|
||||
int sdma_send_txlist(struct sdma_engine *sde, struct iowait_work *wait,
|
||||
struct list_head *tx_list, u32 *count_out)
|
||||
{
|
||||
struct sdma_txreq *tx, *tx_next;
|
||||
int ret = 0;
|
||||
unsigned long flags;
|
||||
u16 tail = INVALID_TAIL;
|
||||
u32 submit_count = 0, flush_count = 0, total_count;
|
||||
|
||||
retry_lock:
|
||||
linux_spin_lock_irqsave(&sde->tail_lock, flags);
|
||||
retry:
|
||||
list_for_each_entry_safe(tx, tx_next, tx_list, list) {
|
||||
tx->wait = iowait_ioww_to_iow(wait);
|
||||
if (unlikely(!__sdma_running(sde))) {
|
||||
kprintf("%s: !__sdma_running \n", __FUNCTION__);
|
||||
goto unlock_noconn;
|
||||
}
|
||||
if (unlikely(tx->num_desc > sde->desc_avail)) {
|
||||
goto nodesc;
|
||||
}
|
||||
if (unlikely(tx->tlen)) {
|
||||
ret = -EINVAL;
|
||||
goto update_tail;
|
||||
}
|
||||
list_del_init(&tx->list);
|
||||
tail = submit_tx(sde, tx);
|
||||
submit_count++;
|
||||
if (tail != INVALID_TAIL &&
|
||||
(submit_count & SDMA_TAIL_UPDATE_THRESH) == 0) {
|
||||
sdma_update_tail(sde, tail);
|
||||
tail = INVALID_TAIL;
|
||||
}
|
||||
}
|
||||
|
||||
update_tail:
|
||||
total_count = submit_count + flush_count;
|
||||
if (wait)
|
||||
iowait_sdma_add(iowait_ioww_to_iow(wait), total_count);
|
||||
if (tail != INVALID_TAIL)
|
||||
sdma_update_tail(sde, tail);
|
||||
linux_spin_unlock_irqrestore(&sde->tail_lock, flags);
|
||||
*count_out = total_count;
|
||||
return ret;
|
||||
|
||||
unlock_noconn:
|
||||
nodesc:
|
||||
{
|
||||
/*
|
||||
* Either way, we spin.
|
||||
* We never sleep in McKernel so release the lock occasionally
|
||||
* to give a chance to Linux.
|
||||
*/
|
||||
unsigned long ts = rdtsc();
|
||||
|
||||
while ((tx->num_desc > sde->desc_avail) &&
|
||||
(rdtsc() - ts) < 5000000) {
|
||||
sde->desc_avail = sdma_descq_freecnt(sde);
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
if (tx->num_desc <= sde->desc_avail) {
|
||||
ret = 0;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
dkprintf("%s: releasing lock and reiterating.. \n", __FUNCTION__);
|
||||
linux_spin_unlock_irqrestore(&sde->tail_lock, flags);
|
||||
cpu_pause();
|
||||
ret = 0;
|
||||
goto retry_lock;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* _extend_sdma_tx_descs() - helper to extend txreq
|
||||
*
|
||||
* This is called once the initial nominal allocation
|
||||
* of descriptors in the sdma_txreq is exhausted.
|
||||
*
|
||||
* The code will bump the allocation up to the max
|
||||
* of MAX_DESC (64) descriptors. There doesn't seem
|
||||
* much point in an interim step. The last descriptor
|
||||
* is reserved for coalesce buffer in order to support
|
||||
* cases where input packet has >MAX_DESC iovecs.
|
||||
*
|
||||
*/
|
||||
static int _extend_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Handle last descriptor */
|
||||
if (unlikely((tx->num_desc == (MAX_DESC - 1)))) {
|
||||
/* if tlen is 0, it is for padding, release last descriptor */
|
||||
if (!tx->tlen) {
|
||||
tx->desc_limit = MAX_DESC;
|
||||
} else if (!tx->coalesce_buf) {
|
||||
/* allocate coalesce buffer with space for padding */
|
||||
tx->coalesce_buf = kmalloc(tx->tlen + sizeof(u32),
|
||||
GFP_ATOMIC);
|
||||
if (!tx->coalesce_buf)
|
||||
goto enomem;
|
||||
tx->coalesce_idx = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (unlikely(tx->num_desc == MAX_DESC))
|
||||
goto enomem;
|
||||
|
||||
tx->descp = kmalloc_array(
|
||||
MAX_DESC,
|
||||
sizeof(struct sdma_desc),
|
||||
GFP_ATOMIC);
|
||||
if (!tx->descp)
|
||||
goto enomem;
|
||||
|
||||
/* reserve last descriptor for coalescing */
|
||||
tx->desc_limit = MAX_DESC - 1;
|
||||
/* copy ones already built */
|
||||
for (i = 0; i < tx->num_desc; i++)
|
||||
tx->descp[i] = tx->descs[i];
|
||||
return 0;
|
||||
enomem:
|
||||
__sdma_txclean(dd, tx);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
/*
|
||||
* ext_coal_sdma_tx_descs() - extend or coalesce sdma tx descriptors
|
||||
*
|
||||
* This is called once the initial nominal allocation of descriptors
|
||||
* in the sdma_txreq is exhausted.
|
||||
*
|
||||
* This function calls _extend_sdma_tx_descs to extend or allocate
|
||||
* coalesce buffer. If there is a allocated coalesce buffer, it will
|
||||
* copy the input packet data into the coalesce buffer. It also adds
|
||||
* coalesce buffer descriptor once when whole packet is received.
|
||||
*
|
||||
* Return:
|
||||
* <0 - error
|
||||
* 0 - coalescing, don't populate descriptor
|
||||
* 1 - continue with populating descriptor
|
||||
*/
|
||||
int ext_coal_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx,
|
||||
int type, void *kvaddr, struct page *page,
|
||||
unsigned long offset, u16 len)
|
||||
{
|
||||
//TODO: ext_coal_sdma_tx_descs
|
||||
#ifdef __HFI1_ORIG__
|
||||
int pad_len, rval;
|
||||
dma_addr_t addr;
|
||||
|
||||
rval = _extend_sdma_tx_descs(dd, tx);
|
||||
if (rval) {
|
||||
__sdma_txclean(dd, tx);
|
||||
return rval;
|
||||
}
|
||||
|
||||
/* If coalesce buffer is allocated, copy data into it */
|
||||
if (tx->coalesce_buf) {
|
||||
if (type == SDMA_MAP_NONE) {
|
||||
__sdma_txclean(dd, tx);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (type == SDMA_MAP_PAGE) {
|
||||
kvaddr = kmap(page);
|
||||
kvaddr += offset;
|
||||
} else if (WARN_ON(!kvaddr)) {
|
||||
__sdma_txclean(dd, tx);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memcpy(tx->coalesce_buf + tx->coalesce_idx, kvaddr, len);
|
||||
tx->coalesce_idx += len;
|
||||
if (type == SDMA_MAP_PAGE)
|
||||
kunmap(page);
|
||||
|
||||
/* If there is more data, return */
|
||||
if (tx->tlen - tx->coalesce_idx)
|
||||
return 0;
|
||||
|
||||
/* Whole packet is received; add any padding */
|
||||
pad_len = tx->packet_len & (sizeof(u32) - 1);
|
||||
if (pad_len) {
|
||||
pad_len = sizeof(u32) - pad_len;
|
||||
memset(tx->coalesce_buf + tx->coalesce_idx, 0, pad_len);
|
||||
/* padding is taken care of for coalescing case */
|
||||
tx->packet_len += pad_len;
|
||||
tx->tlen += pad_len;
|
||||
}
|
||||
|
||||
/* dma map the coalesce buffer */
|
||||
addr = dma_map_single(&dd->pcidev->dev,
|
||||
tx->coalesce_buf,
|
||||
tx->tlen,
|
||||
DMA_TO_DEVICE);
|
||||
|
||||
if (unlikely(dma_mapping_error(&dd->pcidev->dev, addr))) {
|
||||
__sdma_txclean(dd, tx);
|
||||
return -ENOSPC;
|
||||
}
|
||||
|
||||
/* Add descriptor for coalesce buffer */
|
||||
tx->desc_limit = MAX_DESC;
|
||||
return _sdma_txadd_daddr(dd, SDMA_MAP_SINGLE, tx,
|
||||
addr, tx->tlen);
|
||||
}
|
||||
#endif /* __HFI1_ORIG__ */
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* tx not dword sized - pad */
|
||||
int _pad_sdma_tx_descs(struct hfi1_devdata *dd, struct sdma_txreq *tx)
|
||||
{
|
||||
int rval = 0;
|
||||
|
||||
tx->num_desc++;
|
||||
if ((unlikely(tx->num_desc == tx->desc_limit))) {
|
||||
rval = _extend_sdma_tx_descs(dd, tx);
|
||||
if (rval) {
|
||||
__sdma_txclean(dd, tx);
|
||||
return rval;
|
||||
}
|
||||
}
|
||||
/* finish the one just added */
|
||||
make_tx_sdma_desc(
|
||||
tx,
|
||||
SDMA_MAP_NONE,
|
||||
dd->sdma_pad_phys,
|
||||
sizeof(u32) - (tx->packet_len & (sizeof(u32) - 1)));
|
||||
_sdma_close_tx(dd, tx);
|
||||
return rval;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add ahg to the sdma_txreq
|
||||
*
|
||||
* The logic will consume up to 3
|
||||
* descriptors at the beginning of
|
||||
* sdma_txreq.
|
||||
*/
|
||||
void _sdma_txreq_ahgadd(
|
||||
struct sdma_txreq *tx,
|
||||
u8 num_ahg,
|
||||
u8 ahg_entry,
|
||||
u32 *ahg,
|
||||
u8 ahg_hlen)
|
||||
{
|
||||
u32 i, shift = 0, desc = 0;
|
||||
u8 mode;
|
||||
|
||||
WARN_ON_ONCE(num_ahg > 9 || (ahg_hlen & 3) || ahg_hlen == 4);
|
||||
/* compute mode */
|
||||
if (num_ahg == 1)
|
||||
mode = SDMA_AHG_APPLY_UPDATE1;
|
||||
else if (num_ahg <= 5)
|
||||
mode = SDMA_AHG_APPLY_UPDATE2;
|
||||
else
|
||||
mode = SDMA_AHG_APPLY_UPDATE3;
|
||||
tx->num_desc++;
|
||||
/* initialize to consumed descriptors to zero */
|
||||
switch (mode) {
|
||||
case SDMA_AHG_APPLY_UPDATE3:
|
||||
tx->num_desc++;
|
||||
tx->descs[2].qw[0] = 0;
|
||||
tx->descs[2].qw[1] = 0;
|
||||
/* FALLTHROUGH */
|
||||
case SDMA_AHG_APPLY_UPDATE2:
|
||||
tx->num_desc++;
|
||||
tx->descs[1].qw[0] = 0;
|
||||
tx->descs[1].qw[1] = 0;
|
||||
break;
|
||||
}
|
||||
ahg_hlen >>= 2;
|
||||
tx->descs[0].qw[1] |=
|
||||
(((u64)ahg_entry & SDMA_DESC1_HEADER_INDEX_MASK)
|
||||
<< SDMA_DESC1_HEADER_INDEX_SHIFT) |
|
||||
(((u64)ahg_hlen & SDMA_DESC1_HEADER_DWS_MASK)
|
||||
<< SDMA_DESC1_HEADER_DWS_SHIFT) |
|
||||
(((u64)mode & SDMA_DESC1_HEADER_MODE_MASK)
|
||||
<< SDMA_DESC1_HEADER_MODE_SHIFT) |
|
||||
(((u64)ahg[0] & SDMA_DESC1_HEADER_UPDATE1_MASK)
|
||||
<< SDMA_DESC1_HEADER_UPDATE1_SHIFT);
|
||||
for (i = 0; i < (num_ahg - 1); i++) {
|
||||
if (!shift && !(i & 2))
|
||||
desc++;
|
||||
tx->descs[desc].qw[!!(i & 2)] |=
|
||||
(((u64)ahg[i + 1])
|
||||
<< shift);
|
||||
shift = (shift + 32) & 63;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_ahg_alloc - allocate an AHG entry
|
||||
* @sde: engine to allocate from
|
||||
*
|
||||
* Return:
|
||||
* 0-31 when successful, -EOPNOTSUPP if AHG is not enabled,
|
||||
* -ENOSPC if an entry is not available
|
||||
*/
|
||||
int sdma_ahg_alloc(struct sdma_engine *sde)
|
||||
{
|
||||
int nr;
|
||||
int oldbit;
|
||||
|
||||
if (!sde) {
|
||||
trace_hfi1_ahg_allocate(sde, -EINVAL);
|
||||
return -EINVAL;
|
||||
}
|
||||
while (1) {
|
||||
nr = ffz(ACCESS_ONCE(sde->ahg_bits));
|
||||
if (nr > 31) {
|
||||
trace_hfi1_ahg_allocate(sde, -ENOSPC);
|
||||
return -ENOSPC;
|
||||
}
|
||||
oldbit = test_and_set_bit(nr, &sde->ahg_bits);
|
||||
if (!oldbit)
|
||||
break;
|
||||
cpu_relax();
|
||||
}
|
||||
trace_hfi1_ahg_allocate(sde, nr);
|
||||
return nr;
|
||||
}
|
||||
|
||||
/**
|
||||
* sdma_ahg_free - free an AHG entry
|
||||
* @sde: engine to return AHG entry
|
||||
* @ahg_index: index to free
|
||||
*
|
||||
* This routine frees the indicate AHG entry.
|
||||
*/
|
||||
void sdma_ahg_free(struct sdma_engine *sde, int ahg_index)
|
||||
{
|
||||
if (!sde)
|
||||
return;
|
||||
trace_hfi1_ahg_deallocate(sde, ahg_index);
|
||||
if (ahg_index < 0 || ahg_index > 31)
|
||||
return;
|
||||
clear_bit(ahg_index, &sde->ahg_bits);
|
||||
}
|
||||
178
kernel/syscall.c
178
kernel/syscall.c
@ -67,6 +67,8 @@
|
||||
#include <lwk/stddef.h>
|
||||
#include <futex.h>
|
||||
|
||||
#include <hfi1/file_ops.h>
|
||||
|
||||
#define SYSCALL_BY_IKC
|
||||
|
||||
//#define DEBUG_PRINT_SC
|
||||
@ -268,6 +270,7 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
|
||||
req->rtid = cpu_local_var(current)->tid;
|
||||
req->ttid = 0;
|
||||
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
|
||||
res.private_data = NULL;
|
||||
#ifdef POSTK_DEBUG_TEMP_FIX_26 /* do_syscall arg pid is not targetpid */
|
||||
send_syscall(req, cpu, target_pid, &res);
|
||||
#else /* POSTK_DEBUG_TEMP_FIX_26 */
|
||||
@ -477,6 +480,21 @@ long do_syscall(struct syscall_request *req, int cpu, int pid)
|
||||
}
|
||||
#endif // PROFILE_ENABLE
|
||||
|
||||
if (req->number == __NR_open && rc > 0) {
|
||||
if ((cpu_local_var(current)->proc->mcexec_flags & MCEXEC_HFI1) &&
|
||||
res.private_data &&
|
||||
!strncmp((const char *)req->args[0], "/dev/hfi", 8)) {
|
||||
|
||||
if (rc >= 0 && rc < MAX_FD_PRIV) {
|
||||
thread->proc->fd_priv_table[rc] = res.private_data;
|
||||
}
|
||||
dkprintf("%s: PID: %d, open fd: %d, filename: "
|
||||
"%s, private_data: 0x%lx\n",
|
||||
__FUNCTION__, thread->proc->pid,
|
||||
rc, req->args[0], res.private_data);
|
||||
}
|
||||
}
|
||||
|
||||
monitor->status = mstatus;
|
||||
monitor->counter++;
|
||||
return rc;
|
||||
@ -1060,6 +1078,12 @@ void terminate(int rc, int sig)
|
||||
mcs_rwlock_writer_unlock(&proc->threads_lock, &lock);
|
||||
|
||||
vm = proc->vm;
|
||||
|
||||
{
|
||||
extern int hfi1_unmap_device_addresses(struct process *proc);
|
||||
hfi1_unmap_device_addresses(proc);
|
||||
}
|
||||
|
||||
free_all_process_memory_range(vm);
|
||||
|
||||
if (proc->saved_cmdline) {
|
||||
@ -1220,6 +1244,8 @@ interrupt_syscall(struct thread *thread, int sig)
|
||||
SYSCALL_DECLARE(exit_group)
|
||||
{
|
||||
dkprintf("sys_exit_group,pid=%d\n", cpu_local_var(current)->proc->pid);
|
||||
dkprintf("%s: PID: %d, TID: %d\n", __FUNCTION__,
|
||||
cpu_local_var(current)->proc->pid, cpu_local_var(current)->tid);
|
||||
terminate((int)ihk_mc_syscall_arg0(ctx), 0);
|
||||
|
||||
return 0;
|
||||
@ -1268,6 +1294,19 @@ int do_munmap(void *addr, size_t len)
|
||||
int error;
|
||||
int ro_freed;
|
||||
|
||||
/*
|
||||
* TODO: do call back registration for address space changes..
|
||||
*/
|
||||
{
|
||||
extern int hfi1_user_exp_rcv_overlapping(
|
||||
unsigned long start, unsigned long end);
|
||||
unsigned long start = (unsigned long)addr;
|
||||
|
||||
if (hfi1_user_exp_rcv_overlapping(start, start + len)) {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
begin_free_pages_pending();
|
||||
error = remove_process_memory_range(cpu_local_var(current)->vm,
|
||||
(intptr_t)addr, (intptr_t)addr+len, &ro_freed);
|
||||
@ -1282,6 +1321,7 @@ int do_munmap(void *addr, size_t len)
|
||||
}
|
||||
}
|
||||
finish_free_pages_pending();
|
||||
|
||||
dkprintf("%s: 0x%lx:%lu, error: %ld\n",
|
||||
__FUNCTION__, addr, len, error);
|
||||
return error;
|
||||
@ -1289,7 +1329,7 @@ int do_munmap(void *addr, size_t len)
|
||||
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_27
|
||||
#else
|
||||
static int search_free_space(size_t len, intptr_t hint, int pgshift, intptr_t *addrp)
|
||||
static int search_free_space(size_t len, int pgshift, intptr_t *addrp)
|
||||
{
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
struct vm_regions *region = &thread->vm->region;
|
||||
@ -1298,17 +1338,17 @@ static int search_free_space(size_t len, intptr_t hint, int pgshift, intptr_t *a
|
||||
struct vm_range *range;
|
||||
size_t pgsize = (size_t)1 << pgshift;
|
||||
|
||||
dkprintf("search_free_space(%lx,%lx,%d,%p)\n", len, hint, pgshift, addrp);
|
||||
dkprintf("%s: len: %lu, pgshift: %d\n",
|
||||
__FUNCTION__, len, pgshift);
|
||||
|
||||
addr = hint;
|
||||
addr = region->map_end;
|
||||
for (;;) {
|
||||
addr = (addr + pgsize - 1) & ~(pgsize - 1);
|
||||
if ((region->user_end <= addr)
|
||||
|| ((region->user_end - len) < addr)) {
|
||||
ekprintf("search_free_space(%lx,%lx,%p):"
|
||||
"no space. %lx %lx\n",
|
||||
len, hint, addrp, addr,
|
||||
region->user_end);
|
||||
ekprintf("%s: error: addr 0x%lx is outside the user region\n",
|
||||
__FUNCTION__, addr);
|
||||
|
||||
error = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@ -1320,12 +1360,13 @@ static int search_free_space(size_t len, intptr_t hint, int pgshift, intptr_t *a
|
||||
addr = range->end;
|
||||
}
|
||||
|
||||
region->map_end = addr + len;
|
||||
error = 0;
|
||||
*addrp = addr;
|
||||
|
||||
out:
|
||||
dkprintf("search_free_space(%lx,%lx,%d,%p): %d %lx\n",
|
||||
len, hint, pgshift, addrp, error, addr);
|
||||
dkprintf("%s: len: %lu, pgshift: %d, addr: 0x%lx\n",
|
||||
__FUNCTION__, len, pgshift, addr);
|
||||
return error;
|
||||
}
|
||||
#endif
|
||||
@ -1420,20 +1461,18 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* choose mapping address */
|
||||
/* Obtain mapping address */
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_27
|
||||
error = search_free_space(cpu_local_var(current), len,
|
||||
region->map_end, PAGE_SHIFT + p2align, &addr);
|
||||
#else
|
||||
error = search_free_space(len, region->map_end,
|
||||
PAGE_SHIFT + p2align, &addr);
|
||||
error = search_free_space(len, PAGE_SHIFT + p2align, &addr);
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_27 */
|
||||
if (error) {
|
||||
ekprintf("do_mmap:search_free_space(%lx,%lx,%d) failed. %d\n",
|
||||
len, region->map_end, p2align, error);
|
||||
goto out;
|
||||
}
|
||||
region->map_end = addr + len;
|
||||
}
|
||||
|
||||
/* do the map */
|
||||
@ -1537,6 +1576,24 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
||||
vrflags |= VR_AP_USER;
|
||||
}
|
||||
|
||||
#if 1
|
||||
if (len < (unsigned long)4*1024*1024*1024) {
|
||||
phys = NOPHYS;
|
||||
vrflags |= VR_PREALLOC;
|
||||
}
|
||||
else {
|
||||
kprintf("%s: big ANON mapping!!: %lu\n", __FUNCTION__, len);
|
||||
/* Give demand paging a chance */
|
||||
vrflags |= VR_DEMAND_PAGING;
|
||||
populated_mapping = 0;
|
||||
error = zeroobj_create(&memobj);
|
||||
if (error) {
|
||||
ekprintf("%s: zeroobj_create failed, error: %d\n",
|
||||
__FUNCTION__, error);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
#else
|
||||
p = ihk_mc_alloc_aligned_pages_user(npages, p2align,
|
||||
IHK_MC_AP_NOWAIT | ap_flag, addr0);
|
||||
if (p == NULL) {
|
||||
@ -1568,6 +1625,7 @@ do_mmap(const intptr_t addr0, const size_t len0, const int prot,
|
||||
__FUNCTION__, addr, len, npages, p2align);
|
||||
phys = virt_to_phys(p);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else if (flags & MAP_SHARED) {
|
||||
dkprintf("%s: MAP_SHARED,flags=%x,len=%ld\n", __FUNCTION__, flags, len);
|
||||
@ -1701,10 +1759,10 @@ out:
|
||||
if (memobj) {
|
||||
memobj_release(memobj);
|
||||
}
|
||||
dkprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, "
|
||||
dkprintf("%s: 0x%lx:%8lu-0x%lx, (req: 0x%lx:%lu), prot: %x, flags: %x, "
|
||||
"fd: %d, off: %lu, error: %ld, addr: 0x%lx\n",
|
||||
__FUNCTION__,
|
||||
addr, len, addr0, len0, prot, flags,
|
||||
addr, len, addr+len, addr0, len0, prot, flags,
|
||||
fd, off0, error, addr);
|
||||
|
||||
return (!error)? addr: error;
|
||||
@ -3078,6 +3136,22 @@ do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
|
||||
return 0;
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(writev)
|
||||
{
|
||||
struct process *proc = cpu_local_var(current)->proc;
|
||||
int fd = ihk_mc_syscall_arg0(ctx);
|
||||
struct iovec *iovec = (struct iovec *)ihk_mc_syscall_arg1(ctx);
|
||||
int iovcnt = ihk_mc_syscall_arg2(ctx);
|
||||
void *private_data = (fd < 0 || fd >= MAX_FD_PRIV) ? NULL : proc->fd_priv_table[fd];
|
||||
|
||||
if (private_data) {
|
||||
return hfi1_aio_write(private_data, iovec, iovcnt);
|
||||
}
|
||||
else {
|
||||
return syscall_generic_forwarding(__NR_writev, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
SYSCALL_DECLARE(read)
|
||||
{
|
||||
int fd = ihk_mc_syscall_arg0(ctx);
|
||||
@ -3111,6 +3185,9 @@ SYSCALL_DECLARE(ioctl)
|
||||
struct process *proc = thread->proc;
|
||||
struct mckfd *fdp;
|
||||
long irqstate;
|
||||
void *private_data = (fd < 0 || fd >= MAX_FD_PRIV) ? NULL : proc->fd_priv_table[fd];
|
||||
unsigned long t_s = rdtsc();
|
||||
int sub_rc = 0;
|
||||
|
||||
irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock);
|
||||
for(fdp = proc->mckfd; fdp; fdp = fdp->next)
|
||||
@ -3118,13 +3195,44 @@ SYSCALL_DECLARE(ioctl)
|
||||
break;
|
||||
ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate);
|
||||
|
||||
if(fdp && fdp->ioctl_cb){
|
||||
//kprintf("ioctl: found system fd %d\n", fd);
|
||||
if (private_data) {
|
||||
extern long hfi1_file_ioctl(void *private_data,
|
||||
unsigned int cmd,
|
||||
unsigned long arg,
|
||||
unsigned long t_s);
|
||||
|
||||
rc = hfi1_file_ioctl(private_data,
|
||||
ihk_mc_syscall_arg1(ctx),
|
||||
ihk_mc_syscall_arg2(ctx),
|
||||
t_s);
|
||||
|
||||
/* Continue forwarding iff hfi1 didn't handle it */
|
||||
// TODO: improve heuristics?
|
||||
if (rc != -ENOTSUPP && rc != -ENODEV)
|
||||
return rc;
|
||||
|
||||
if (rc == -ENODEV) {
|
||||
sub_rc = rc;
|
||||
}
|
||||
}
|
||||
|
||||
if (fdp && fdp->ioctl_cb) {
|
||||
rc = fdp->ioctl_cb(fdp, ctx);
|
||||
}
|
||||
else{
|
||||
else {
|
||||
rc = syscall_generic_forwarding(__NR_ioctl, ctx);
|
||||
}
|
||||
|
||||
if (private_data && sub_rc == -ENODEV) {
|
||||
extern int hfi1_map_device_addresses(void *fd);
|
||||
|
||||
if (hfi1_map_device_addresses(private_data) < 0) {
|
||||
kprintf("%s: Could not map hfi1 device addresses\n",
|
||||
__FUNCTION__);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -3145,6 +3253,11 @@ SYSCALL_DECLARE(open)
|
||||
return -EFAULT;
|
||||
}
|
||||
dkprintf("open(): pathname=%s\n", xpmem_wk);
|
||||
|
||||
if (!strcmp(xpmem_wk, "/proc/sys/vm/overcommit_memory")) {
|
||||
return -ENOENT;
|
||||
}
|
||||
|
||||
rc = strcmp(xpmem_wk, XPMEM_DEV_PATH);
|
||||
#ifdef POSTK_DEBUG_ARCH_DEP_62 /* Absorb the difference between open and openat args. */
|
||||
if (!rc) {
|
||||
@ -3231,6 +3344,11 @@ SYSCALL_DECLARE(close)
|
||||
ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate);
|
||||
rc = syscall_generic_forwarding(__NR_close, ctx);
|
||||
}
|
||||
|
||||
if (fd >= 0 && fd < MAX_FD_PRIV) {
|
||||
thread->proc->fd_priv_table[fd] = NULL;
|
||||
}
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
@ -4908,7 +5026,6 @@ SYSCALL_DECLARE(shmat)
|
||||
struct process_vm *vm = thread->vm;
|
||||
size_t len;
|
||||
int error;
|
||||
struct vm_regions *region = &vm->region;
|
||||
intptr_t addr;
|
||||
int prot;
|
||||
int vrflags;
|
||||
@ -4977,7 +5094,7 @@ SYSCALL_DECLARE(shmat)
|
||||
error = search_free_space(cpu_local_var(current), len,
|
||||
region->map_end, obj->pgshift, &addr);
|
||||
#else
|
||||
error = search_free_space(len, region->map_end, obj->pgshift, &addr);
|
||||
error = search_free_space(len, obj->pgshift, &addr);
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_27 */
|
||||
if (error) {
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
@ -4985,7 +5102,6 @@ SYSCALL_DECLARE(shmat)
|
||||
dkprintf("shmat(%#x,%p,%#x):search_free_space failed. %d\n", shmid, shmaddr, shmflg, error);
|
||||
return error;
|
||||
}
|
||||
region->map_end = addr + len;
|
||||
}
|
||||
|
||||
vrflags = VR_NONE;
|
||||
@ -5438,6 +5554,7 @@ do_exit(int code)
|
||||
int sig = code & 255;
|
||||
|
||||
dkprintf("sys_exit,pid=%d\n", proc->pid);
|
||||
dkprintf("%s: PID: %d, TID: %d\n", __FUNCTION__, proc->pid, thread->tid);
|
||||
|
||||
mcs_rwlock_reader_lock(&proc->threads_lock, &lock);
|
||||
nproc = 0;
|
||||
@ -7721,6 +7838,10 @@ SYSCALL_DECLARE(mremap)
|
||||
uintptr_t lckstart = -1;
|
||||
uintptr_t lckend = -1;
|
||||
|
||||
/* Not for lammps for now.. */
|
||||
if (!strcmp("./lammps", thread->proc->saved_cmdline))
|
||||
return -ENOSYS;
|
||||
|
||||
dkprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx)\n",
|
||||
oldaddr, oldsize0, newsize0, flags, newaddr);
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
@ -7816,8 +7937,8 @@ SYSCALL_DECLARE(mremap)
|
||||
vm->region.map_end,
|
||||
range->pgshift, (intptr_t *)&newstart);
|
||||
#else
|
||||
error = search_free_space(newsize, vm->region.map_end,
|
||||
range->pgshift, (intptr_t *)&newstart);
|
||||
error = search_free_space(newsize, range->pgshift,
|
||||
(intptr_t *)&newstart);
|
||||
#endif /* POSTK_DEBUG_ARCH_DEP_27 */
|
||||
if (error) {
|
||||
ekprintf("sys_mremap(%#lx,%#lx,%#lx,%#x,%#lx):"
|
||||
@ -7848,8 +7969,10 @@ SYSCALL_DECLARE(mremap)
|
||||
if (range->memobj) {
|
||||
memobj_ref(range->memobj);
|
||||
}
|
||||
|
||||
/* Drop VR_PREALLOC to create vm_range without physical pages */
|
||||
error = add_process_memory_range(thread->vm, newstart, newend, -1,
|
||||
range->flag, range->memobj,
|
||||
range->flag & ~VR_PREALLOC, range->memobj,
|
||||
range->objoff + (oldstart - range->start),
|
||||
range->pgshift, NULL);
|
||||
if (error) {
|
||||
@ -9443,6 +9566,10 @@ long syscall(int num, ihk_mc_user_context_t *ctx)
|
||||
}
|
||||
#endif // PROFILE_ENABLE
|
||||
|
||||
if (thread->proc->nohost) { // mcexec termination was detected
|
||||
terminate(0, SIGKILL);
|
||||
}
|
||||
|
||||
#if defined(POSTK_DEBUG_TEMP_FIX_60) && defined(POSTK_DEBUG_TEMP_FIX_56)
|
||||
check_need_resched();
|
||||
#elif defined(POSTK_DEBUG_TEMP_FIX_60) /* sched_yield called check_signal fix. */
|
||||
@ -9470,9 +9597,6 @@ long syscall(int num, ihk_mc_user_context_t *ctx)
|
||||
#endif // DISABLE_SCHED_YIELD
|
||||
set_cputime(0);
|
||||
|
||||
if (thread->proc->nohost) { // mcexec termination was detected
|
||||
terminate(0, SIGKILL);
|
||||
}
|
||||
//kprintf("syscall=%d returns %lx(%ld)\n", num, l, l);
|
||||
|
||||
return l;
|
||||
|
||||
778
kernel/user_exp_rcv.c
Normal file
778
kernel/user_exp_rcv.c
Normal file
@ -0,0 +1,778 @@
|
||||
/*
|
||||
* Copyright(c) 2015, 2016 Intel Corporation.
|
||||
*
|
||||
* This file is provided under a dual BSD/GPLv2 license. When using or
|
||||
* redistributing this file, you may do so under either license.
|
||||
*
|
||||
* GPL LICENSE SUMMARY
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of version 2 of the GNU General Public License as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*
|
||||
* BSD LICENSE
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in
|
||||
* the documentation and/or other materials provided with the
|
||||
* distribution.
|
||||
* - Neither the name of Intel Corporation nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*/
|
||||
|
||||
#include <hfi1/ihk_hfi1_common.h>
|
||||
#include <hfi1/common.h>
|
||||
#include <hfi1/hfi.h>
|
||||
#include <hfi1/chip.h>
|
||||
#include <hfi1/user_exp_rcv.h>
|
||||
#include <hfi1/user_sdma.h> // for hfi1_map_device_addresses
|
||||
|
||||
//#define DEBUG_PRINT_USER_EXP_RCV
|
||||
|
||||
#ifdef DEBUG_PRINT_USER_EXP_RCV
|
||||
#define dkprintf(...) kprintf(__VA_ARGS__)
|
||||
#else
|
||||
#define dkprintf(...) do { if(0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
static int program_rcvarray(struct hfi1_filedata *, unsigned long, uintptr_t,
|
||||
size_t, u32 *);
|
||||
static int set_rcvarray_entry(struct hfi1_filedata *, unsigned long, uintptr_t,
|
||||
u32, struct tid_group *, int, u32);
|
||||
static int unprogram_rcvarray(struct hfi1_filedata *, u32, struct tid_group **);
|
||||
static void clear_tid_node(struct hfi1_filedata *, struct tid_rb_node *);
|
||||
static int tid_rb_invalidate(struct hfi1_filedata *fdata,
|
||||
struct tid_rb_node *node);
|
||||
|
||||
static int hfi1_rb_tree_insert(struct rb_root *root,
|
||||
struct tid_rb_node *new_node);
|
||||
static void __hfi1_rb_tree_remove(struct tid_rb_node *tid_node);
|
||||
static struct tid_rb_node *__hfi1_search_rb_overlapping_node(
|
||||
struct rb_root *root,
|
||||
unsigned long start,
|
||||
unsigned long end);
|
||||
|
||||
/*
|
||||
* RcvArray entry allocation for Expected Receives is done by the
|
||||
* following algorithm:
|
||||
*/
|
||||
int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, struct hfi1_tid_info *tinfo)
|
||||
{
|
||||
int ret = -EFAULT;
|
||||
struct hfi1_ctxtdata *uctxt = fd->uctxt;
|
||||
uintptr_t vaddr, vaddr_end, base_vaddr = 0;
|
||||
u32 *tidlist;
|
||||
u16 tididx = 0;
|
||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||
size_t base_pgsize, len = 0;
|
||||
pte_t *ptep;
|
||||
u64 phys;
|
||||
|
||||
if (!tinfo->length)
|
||||
return -EINVAL;
|
||||
|
||||
if (tinfo->length / PAGE_SIZE > uctxt->expected_count) {
|
||||
kprintf("Expected buffer too big\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* TODO: sizeof(*tidlist) * uctxt->expected_count); */
|
||||
tidlist = kmalloc_cache_alloc(&cpu_local_var(tidlist_cache),
|
||||
sizeof(*tidlist) * 2048);
|
||||
|
||||
if (!tidlist)
|
||||
return -ENOMEM;
|
||||
|
||||
#if 0
|
||||
/* Verify that access is OK for the user buffer */
|
||||
if (access_ok(vm, VERIFY_WRITE, tinfo->vaddr, tinfo->length)) {
|
||||
kprintf("%s: access_ok() failed for 0x%lx:%lu\n",
|
||||
__FUNCTION__, tinfo->vaddr, tinfo->length);
|
||||
return -EFAULT;
|
||||
}
|
||||
#endif
|
||||
|
||||
vaddr_end = tinfo->vaddr + tinfo->length;
|
||||
dkprintf("%s: vaddr: 0x%llx, length: %zu (end: 0x%lx)\n",
|
||||
__FUNCTION__, tinfo->vaddr, tinfo->length,
|
||||
tinfo->vaddr + tinfo->length);
|
||||
|
||||
vaddr = tinfo->vaddr;
|
||||
|
||||
ptep = ihk_mc_pt_lookup_fault_pte(vm,
|
||||
(void*)vaddr, 0,
|
||||
(void**)&base_vaddr,
|
||||
&base_pgsize, 0);
|
||||
if (unlikely(!ptep || !pte_is_present(ptep))) {
|
||||
kprintf("%s: ERROR: no valid PTE for 0x%lx\n",
|
||||
__FUNCTION__, vaddr);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
while (vaddr < vaddr_end) {
|
||||
phys = pte_get_phys(ptep) + (vaddr - base_vaddr);
|
||||
len = (base_vaddr + base_pgsize - vaddr);
|
||||
ret = 0;
|
||||
|
||||
/* Are we right at a page border? */
|
||||
if (len == 0) {
|
||||
ptep = ihk_mc_pt_lookup_fault_pte(vm,
|
||||
(void*)vaddr, 0,
|
||||
(void**)&base_vaddr,
|
||||
&base_pgsize, 0);
|
||||
if (unlikely(!ptep || !pte_is_present(ptep))) {
|
||||
kprintf("%s: ERROR: no valid PTE for 0x%lx\n",
|
||||
__FUNCTION__, vaddr);
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
phys = pte_get_phys(ptep) + (vaddr - base_vaddr);
|
||||
len = (base_vaddr + base_pgsize - vaddr);
|
||||
}
|
||||
|
||||
/* Collect max physically contiguous chunk */
|
||||
while (len < MAX_EXPECTED_BUFFER &&
|
||||
vaddr + len < vaddr_end) {
|
||||
uintptr_t __base_vaddr;
|
||||
size_t __base_pgsize;
|
||||
pte_t *__ptep;
|
||||
int contiguous = 0;
|
||||
|
||||
/* Look up next page */
|
||||
__ptep = ihk_mc_pt_lookup_fault_pte(vm,
|
||||
(void*)vaddr + len, 0,
|
||||
(void**)&__base_vaddr,
|
||||
&__base_pgsize, 0);
|
||||
if (unlikely(!__ptep || !pte_is_present(__ptep))) {
|
||||
kprintf("%s: ERRROR: no valid PTE for 0x%lx\n",
|
||||
__FUNCTION__, vaddr + len);
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Contiguous? */
|
||||
if (pte_get_phys(__ptep) == pte_get_phys(ptep) + base_pgsize) {
|
||||
len += __base_pgsize;
|
||||
contiguous = 1;
|
||||
}
|
||||
|
||||
base_pgsize = __base_pgsize;
|
||||
base_vaddr = __base_vaddr;
|
||||
ptep = __ptep;
|
||||
|
||||
if (!contiguous)
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret == -EFAULT)
|
||||
break;
|
||||
|
||||
if (len > vaddr_end - vaddr) {
|
||||
len = vaddr_end - vaddr;
|
||||
}
|
||||
|
||||
if (len > MAX_EXPECTED_BUFFER) {
|
||||
len = MAX_EXPECTED_BUFFER;
|
||||
}
|
||||
|
||||
ret = program_rcvarray(fd, vaddr, phys, len, tidlist + tididx);
|
||||
if (ret <= 0) {
|
||||
kprintf("%s: failed to program RcvArray entries for len: %lu"
|
||||
", vaddr: 0x%lx, vaddr_end: 0x%lx, ret: %d\n",
|
||||
__FUNCTION__, len, vaddr, vaddr_end, ret);
|
||||
panic("program_rcvarray() failed");
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
dkprintf("%s: vaddr: 0x%lx -> phys: 0x%llx:%lu programmed\n",
|
||||
__FUNCTION__, vaddr, phys, len);
|
||||
|
||||
tididx += ret;
|
||||
vaddr += len;
|
||||
}
|
||||
|
||||
if (ret > 0) {
|
||||
linux_spin_lock(&fd->tid_lock);
|
||||
fd->tid_used += tididx;
|
||||
linux_spin_unlock(&fd->tid_lock);
|
||||
tinfo->tidcnt = tididx;
|
||||
|
||||
if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist,
|
||||
tidlist, sizeof(*tidlist)*tididx)) {
|
||||
/*
|
||||
* On failure to copy to the user level, we need to undo
|
||||
* everything done so far so we don't leak resources.
|
||||
*/
|
||||
tinfo->tidlist = (unsigned long)&tidlist;
|
||||
hfi1_user_exp_rcv_clear(fd, tinfo);
|
||||
tinfo->tidlist = 0;
|
||||
ret = -EFAULT;
|
||||
}
|
||||
|
||||
dkprintf("%s: range: 0x%llx:%lu -> %d TIDs programmed\n",
|
||||
__FUNCTION__, tinfo->vaddr, tinfo->length, tinfo->tidcnt);
|
||||
}
|
||||
|
||||
kmalloc_cache_free(tidlist);
|
||||
return ret > 0 ? 0 : ret;
|
||||
}
|
||||
|
||||
int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, struct hfi1_tid_info *tinfo)
|
||||
{
|
||||
int ret = 0;
|
||||
u32 *tidinfo;
|
||||
unsigned tididx;
|
||||
|
||||
tidinfo = kcalloc(tinfo->tidcnt, sizeof(*tidinfo), GFP_KERNEL);
|
||||
if (!tidinfo)
|
||||
return -ENOMEM;
|
||||
|
||||
if (copy_from_user(tidinfo, (void __user *)(unsigned long)
|
||||
tinfo->tidlist, sizeof(tidinfo[0]) *
|
||||
tinfo->tidcnt)) {
|
||||
ret = -EFAULT;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Technically should never be needed (because mapped previously
|
||||
* on update), but this call is no-op if addresses have been set
|
||||
* previously
|
||||
if (hfi1_map_device_addresses(fd) < 0) {
|
||||
kprintf("%s: Could not map hfi1 device addresses\n",
|
||||
__FUNCTION__);
|
||||
return -EINVAL;
|
||||
}
|
||||
*/
|
||||
|
||||
for (tididx = 0; tididx < tinfo->tidcnt; tididx++) {
|
||||
ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL);
|
||||
if (ret) {
|
||||
kprintf("Failed to unprogram rcv array %d\n",
|
||||
ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
dkprintf("%s: 0x%llx:%lu -> %d TIDs unprogrammed\n",
|
||||
__FUNCTION__, tinfo->vaddr, tinfo->length, tinfo->tidcnt);
|
||||
|
||||
linux_spin_lock(&fd->tid_lock);
|
||||
fd->tid_used -= tididx;
|
||||
linux_spin_unlock(&fd->tid_lock);
|
||||
|
||||
tinfo->tidcnt = tididx;
|
||||
done:
|
||||
kfree(tidinfo);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* program_rcvarray() - program an RcvArray group with receive buffers
|
||||
*/
|
||||
static int program_rcvarray(struct hfi1_filedata *fd,
|
||||
unsigned long vaddr,
|
||||
uintptr_t phys,
|
||||
size_t len, u32 *ptid)
|
||||
{
|
||||
struct hfi1_ctxtdata *uctxt = fd->uctxt;
|
||||
struct hfi1_devdata *dd = uctxt->dd;
|
||||
u16 idx = 0;
|
||||
u32 tidinfo = 0, rcventry;
|
||||
int ret = -ENOMEM, count = 0;
|
||||
struct tid_group *grp = NULL;
|
||||
|
||||
/* lock is taken at loop edges */
|
||||
linux_spin_lock(&fd->tid_lock);
|
||||
while (len > 0) {
|
||||
size_t tid_len;
|
||||
size_t tid_npages;
|
||||
|
||||
if (!grp) {
|
||||
if (!uctxt->tid_used_list.count) {
|
||||
if (!uctxt->tid_group_list.count) {
|
||||
linux_spin_unlock(&fd->tid_lock);
|
||||
/* return what we have so far */
|
||||
kprintf("%s: ERROR: no grp?\n", __FUNCTION__);
|
||||
return count ? count : -ENOMEM;
|
||||
}
|
||||
|
||||
grp = tid_group_pop(&uctxt->tid_group_list);
|
||||
} else {
|
||||
grp = tid_group_pop(&uctxt->tid_used_list);
|
||||
}
|
||||
}
|
||||
|
||||
/* Find the first unused entry in the group */
|
||||
for (; idx < grp->size; idx++) {
|
||||
if (!(grp->map & (1 << idx))) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
linux_spin_unlock(&fd->tid_lock);
|
||||
|
||||
tid_len = (len > MAX_EXPECTED_BUFFER) ? MAX_EXPECTED_BUFFER :
|
||||
(1 << (fls(len) - 1));
|
||||
tid_npages = (tid_len > PAGE_SIZE) ? tid_len >> PAGE_SHIFT : 1;
|
||||
|
||||
rcventry = grp->base + idx;
|
||||
rcv_array_wc_fill(dd, rcventry);
|
||||
tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base) |
|
||||
EXP_TID_SET(LEN, tid_npages);
|
||||
ret = set_rcvarray_entry(fd, vaddr, phys, rcventry,
|
||||
grp, tid_npages, tidinfo);
|
||||
if (ret) {
|
||||
kprintf("%s: set_rcvarray_entry() failed: %d\n",
|
||||
__FUNCTION__, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ptid[count++] = tidinfo;
|
||||
len -= tid_len;
|
||||
vaddr += tid_len;
|
||||
phys += tid_len;
|
||||
|
||||
linux_spin_lock(&fd->tid_lock);
|
||||
grp->used++;
|
||||
grp->map |= 1 << idx++;
|
||||
|
||||
/* optimization: keep same group if possible. */
|
||||
if (grp->used < grp->size && len > 0)
|
||||
continue;
|
||||
|
||||
if (grp->used == grp->size)
|
||||
tid_group_add_tail(grp, &uctxt->tid_full_list);
|
||||
else
|
||||
tid_group_add_tail(grp, &uctxt->tid_used_list);
|
||||
idx = 0;
|
||||
grp = NULL;
|
||||
}
|
||||
linux_spin_unlock(&fd->tid_lock);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
static int set_rcvarray_entry(struct hfi1_filedata *fd,
|
||||
unsigned long vaddr, uintptr_t phys,
|
||||
u32 rcventry, struct tid_group *grp,
|
||||
int npages, u32 tidinfo)
|
||||
{
|
||||
struct hfi1_ctxtdata *uctxt = fd->uctxt;
|
||||
struct hfi1_devdata *dd = uctxt->dd;
|
||||
struct tid_rb_node *node;
|
||||
|
||||
/*
|
||||
* Allocate the node first so we can handle a potential
|
||||
* failure before we've programmed anything.
|
||||
*/
|
||||
node = kmalloc_cache_alloc(&cpu_local_var(tid_node_cache),
|
||||
sizeof(*node));
|
||||
if (!node) {
|
||||
kprintf("%s: ERROR: allocating node\n", __FUNCTION__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
dkprintf("Registering rcventry %d, phys 0x%p, len %u\n", rcventry,
|
||||
phys, npages << PAGE_SHIFT);
|
||||
|
||||
node->phys = phys;
|
||||
node->len = npages << PAGE_SHIFT;
|
||||
node->rcventry = rcventry;
|
||||
node->grp = grp;
|
||||
node->freed = false;
|
||||
node->fd = fd;
|
||||
node->start = vaddr;
|
||||
node->end = vaddr + node->len;
|
||||
node->range = NULL;
|
||||
|
||||
// TODO: check node->rcventry - uctxt->expected_base is within
|
||||
// [0; uctxt->expected_count[ ?
|
||||
fd->entry_to_rb[node->rcventry - uctxt->expected_base] = node;
|
||||
hfi1_rb_tree_insert(
|
||||
&cpu_local_var(current)->proc->hfi1_reg_tree,
|
||||
node);
|
||||
dkprintf("%s: node (0x%lx:%lu) programmed, tidinfo: %d\n",
|
||||
__FUNCTION__, vaddr, node->len, tidinfo);
|
||||
|
||||
hfi1_put_tid(dd, rcventry, PT_EXPECTED, phys, fls(npages));
|
||||
#if 0
|
||||
trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages,
|
||||
node->mmu.addr, node->phys, phys);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, struct hfi1_tid_info *tinfo)
|
||||
{
|
||||
struct hfi1_ctxtdata *uctxt = fd->uctxt;
|
||||
unsigned long *ev = uctxt->dd->events +
|
||||
(((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
|
||||
HFI1_MAX_SHARED_CTXTS) + fd->subctxt);
|
||||
int ret = 0;
|
||||
|
||||
if (!fd->invalid_tids)
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* copy_to_user() can sleep, which will leave the invalid_lock
|
||||
* locked and cause the MMU notifier to be blocked on the lock
|
||||
* for a long time.
|
||||
* Copy the data to a local buffer so we can release the lock.
|
||||
*
|
||||
* McKernel: copy to userspace directly.
|
||||
*/
|
||||
|
||||
linux_spin_lock(&fd->invalid_lock);
|
||||
if (fd->invalid_tid_idx) {
|
||||
dkprintf("%s: fd->invalid_tid_idx: %d to be notified\n",
|
||||
__FUNCTION__, fd->invalid_tid_idx);
|
||||
|
||||
if (copy_to_user((void __user *)tinfo->tidlist,
|
||||
fd->invalid_tids,
|
||||
sizeof(*(fd->invalid_tids)) *
|
||||
fd->invalid_tid_idx)) {
|
||||
ret = -EFAULT;
|
||||
}
|
||||
else {
|
||||
tinfo->tidcnt = fd->invalid_tid_idx;
|
||||
memset(fd->invalid_tids, 0, sizeof(*fd->invalid_tids) *
|
||||
fd->invalid_tid_idx);
|
||||
/*
|
||||
* Reset the user flag while still holding the lock.
|
||||
* Otherwise, PSM can miss events.
|
||||
*/
|
||||
clear_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
|
||||
dkprintf("%s: fd->invalid_tid_idx: %d notified\n",
|
||||
__FUNCTION__, fd->invalid_tid_idx);
|
||||
fd->invalid_tid_idx = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
tinfo->tidcnt = 0;
|
||||
}
|
||||
linux_spin_unlock(&fd->invalid_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo,
|
||||
struct tid_group **grp)
|
||||
{
|
||||
struct hfi1_ctxtdata *uctxt = fd->uctxt;
|
||||
struct tid_rb_node *node;
|
||||
u8 tidctrl = EXP_TID_GET(tidinfo, CTRL);
|
||||
u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry;
|
||||
|
||||
if (tididx >= uctxt->expected_count) {
|
||||
kprintf("Invalid RcvArray entry (%u) index for ctxt %u\n",
|
||||
tididx, uctxt->ctxt);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (tidctrl == 0x3) {
|
||||
kprintf("tidctrl = 3 for rcventry %d\n",
|
||||
tididx + 2 + uctxt->expected_base);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
rcventry = tididx + (tidctrl - 1);
|
||||
|
||||
node = fd->entry_to_rb[rcventry];
|
||||
dkprintf("%s: node (0x%lx:%lu), tidinfo: %d\n",
|
||||
__FUNCTION__, node->start, node->end - node->start, tidinfo);
|
||||
|
||||
if (!node || node->rcventry != (uctxt->expected_base + rcventry)) {
|
||||
kprintf("bad entry %d\n", rcventry);
|
||||
return -EBADF;
|
||||
}
|
||||
|
||||
if (node->range) {
|
||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||
struct deferred_unmap_range *range = node->range;
|
||||
|
||||
//ihk_mc_spinlock_lock_noirq(&vm->vm_deferred_unmap_lock);
|
||||
|
||||
if (--range->refcnt == 0) {
|
||||
list_del(&range->list);
|
||||
}
|
||||
else {
|
||||
range = NULL;
|
||||
}
|
||||
//ihk_mc_spinlock_unlock_noirq(&vm->vm_deferred_unmap_lock);
|
||||
|
||||
if (range) {
|
||||
dkprintf("%s: executing deferred unmap: 0x%lx:%lu-0x%lx\n",
|
||||
__FUNCTION__, range->addr, range->len,
|
||||
range->addr + range->len);
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
|
||||
do_munmap(range->addr, range->len);
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
|
||||
|
||||
kfree(range);
|
||||
}
|
||||
}
|
||||
|
||||
if (grp)
|
||||
*grp = node->grp;
|
||||
|
||||
dkprintf("Clearing rcventry %d, phys 0x%p, len %u\n", node->rcventry,
|
||||
node->phys, node->len);
|
||||
|
||||
fd->entry_to_rb[rcventry] = NULL;
|
||||
clear_tid_node(fd, node);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void clear_tid_node(struct hfi1_filedata *fd, struct tid_rb_node *node)
|
||||
{
|
||||
struct hfi1_ctxtdata *uctxt = fd->uctxt;
|
||||
struct hfi1_devdata *dd = uctxt->dd;
|
||||
|
||||
|
||||
hfi1_put_tid(dd, node->rcventry, PT_INVALID, 0, 0);
|
||||
/*
|
||||
* Make sure device has seen the write before we unpin the
|
||||
* pages.
|
||||
*/
|
||||
flush_wc();
|
||||
barrier();
|
||||
|
||||
__hfi1_rb_tree_remove(node);
|
||||
|
||||
linux_spin_lock(&fd->tid_lock);
|
||||
node->grp->used--;
|
||||
node->grp->map &= ~(1 << (node->rcventry - node->grp->base));
|
||||
|
||||
if (node->grp->used == node->grp->size - 1)
|
||||
tid_group_move(node->grp, &uctxt->tid_full_list,
|
||||
&uctxt->tid_used_list);
|
||||
else if (!node->grp->used)
|
||||
tid_group_move(node->grp, &uctxt->tid_used_list,
|
||||
&uctxt->tid_group_list);
|
||||
linux_spin_unlock(&fd->tid_lock);
|
||||
kmalloc_cache_free(node);
|
||||
}
|
||||
|
||||
|
||||
int hfi1_user_exp_rcv_overlapping(unsigned long start, unsigned long end)
|
||||
{
|
||||
int ret = 0;
|
||||
struct process_vm *vm = cpu_local_var(current)->vm;
|
||||
struct tid_rb_node *node;
|
||||
struct deferred_unmap_range *range;
|
||||
|
||||
dkprintf("%s: 0x%lx:%lu\n", __FUNCTION__, start, end - start);
|
||||
|
||||
//ihk_mc_spinlock_lock_noirq(&vm->vm_deferred_unmap_lock);
|
||||
|
||||
node = __hfi1_search_rb_overlapping_node(
|
||||
&cpu_local_var(current)->proc->hfi1_reg_tree,
|
||||
start, end);
|
||||
if (!node || node->freed) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
range = kmalloc(sizeof(*range), IHK_MC_AP_NOWAIT);
|
||||
if (!range) {
|
||||
kprintf("%s: ERROR: allocating memory\n", __FUNCTION__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
init_deferred_unmap_range(range, vm, (void *)start, end - start);
|
||||
|
||||
while (node) {
|
||||
struct hfi1_filedata *fd = node->fd;
|
||||
struct hfi1_ctxtdata *uctxt = fd ? fd->uctxt : NULL;
|
||||
|
||||
/* Sanity check */
|
||||
if (!uctxt ||
|
||||
fd->entry_to_rb[node->rcventry - uctxt->expected_base] != node) {
|
||||
kprintf("%s: ERROR: inconsistent TID node\n", __FUNCTION__);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
dkprintf("%s: node (0x%lx:%lu) deferred and invalidated"
|
||||
" in munmap for 0x%lx:%lu-0x%lx\n",
|
||||
__FUNCTION__, node->start, node->len, start, end - start, end);
|
||||
tid_rb_invalidate(fd, node);
|
||||
if (node->range) {
|
||||
kprintf("%s: WARNING: node->range is already set for 0x%lx:%lu\n",
|
||||
__FUNCTION__, start, end);
|
||||
}
|
||||
else {
|
||||
node->range = range;
|
||||
}
|
||||
++range->refcnt;
|
||||
|
||||
node = __hfi1_search_rb_overlapping_node(
|
||||
&cpu_local_var(current)->proc->hfi1_reg_tree,
|
||||
start, end);
|
||||
}
|
||||
|
||||
if (range->refcnt == 0) {
|
||||
kfree(range);
|
||||
}
|
||||
else {
|
||||
list_add_tail(&range->list, &vm->vm_deferred_unmap_range_list);
|
||||
ret = range->refcnt;
|
||||
}
|
||||
|
||||
//ihk_mc_spinlock_unlock_noirq(&vm->vm_deferred_unmap_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int hfi1_rb_tree_insert(struct rb_root *root,
|
||||
struct tid_rb_node *new_node)
|
||||
{
|
||||
struct rb_node **new = &(root->rb_node), *parent = NULL;
|
||||
struct tid_rb_node *tid_node;
|
||||
|
||||
while (*new) {
|
||||
tid_node = rb_entry(*new, struct tid_rb_node, rb_node);
|
||||
parent = *new;
|
||||
|
||||
if (new_node->end <= tid_node->start) {
|
||||
new = &((*new)->rb_left);
|
||||
}
|
||||
else if (new_node->start >= tid_node->end) {
|
||||
new = &((*new)->rb_right);
|
||||
}
|
||||
else {
|
||||
kprintf("%s: ERROR: overlapping TID nodes, "
|
||||
"node (0x%lx:%lu) <=> new (0x%lx:%lu)\n",
|
||||
__FUNCTION__,
|
||||
tid_node->start, tid_node->len,
|
||||
new_node->start, new_node->len);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
rb_link_node(&new_node->rb_node, parent, new);
|
||||
rb_insert_color(&new_node->rb_node, root);
|
||||
new_node->rb_root = root;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void __hfi1_rb_tree_remove(struct tid_rb_node *tid_node)
|
||||
{
|
||||
if (!tid_node->rb_root) {
|
||||
kprintf("%s: ERROR: node without rb_root??\n",
|
||||
__FUNCTION__);
|
||||
return;
|
||||
}
|
||||
rb_erase(&tid_node->rb_node, tid_node->rb_root);
|
||||
tid_node->rb_root = NULL;
|
||||
}
|
||||
|
||||
static struct tid_rb_node *__hfi1_search_rb_overlapping_node(
|
||||
struct rb_root *root,
|
||||
unsigned long start,
|
||||
unsigned long end)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
struct tid_rb_node *tid_node = NULL;
|
||||
|
||||
while (node) {
|
||||
tid_node = rb_entry(node, struct tid_rb_node, rb_node);
|
||||
|
||||
if (end <= tid_node->start) {
|
||||
node = node->rb_left;
|
||||
}
|
||||
else if (start >= tid_node->end) {
|
||||
node = node->rb_right;
|
||||
}
|
||||
else if (tid_node->freed) {
|
||||
node = rb_next(node);
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return node ? tid_node : NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Always return 0 from this function. A non-zero return indicates that the
|
||||
* remove operation will be called and that memory should be unpinned.
|
||||
* However, the driver cannot unpin out from under PSM. Instead, retain the
|
||||
* memory (by returning 0) and inform PSM that the memory is going away. PSM
|
||||
* will call back later when it has removed the memory from its list.
|
||||
*
|
||||
* XXX: in McKernel we attach tid nodes to memory ranges that are
|
||||
* about to be unmapped. Once we got all of them cleared, the actual
|
||||
* unmap is performed.
|
||||
*/
|
||||
static int tid_rb_invalidate(struct hfi1_filedata *fdata,
|
||||
struct tid_rb_node *node)
|
||||
{
|
||||
struct hfi1_ctxtdata *uctxt = fdata->uctxt;
|
||||
|
||||
if (node->freed)
|
||||
return 0;
|
||||
|
||||
node->freed = true;
|
||||
__hfi1_rb_tree_remove(node);
|
||||
hfi1_rb_tree_insert(
|
||||
&cpu_local_var(current)->proc->hfi1_inv_tree,
|
||||
node);
|
||||
|
||||
linux_spin_lock(&fdata->invalid_lock);
|
||||
if (fdata->invalid_tid_idx < uctxt->expected_count) {
|
||||
fdata->invalid_tids[fdata->invalid_tid_idx] =
|
||||
rcventry2tidinfo(node->rcventry - uctxt->expected_base);
|
||||
fdata->invalid_tids[fdata->invalid_tid_idx] |=
|
||||
EXP_TID_SET(LEN, node->len >> PAGE_SHIFT);
|
||||
if (!fdata->invalid_tid_idx) {
|
||||
unsigned long *ev;
|
||||
|
||||
/*
|
||||
* hfi1_set_uevent_bits() sets a user event flag
|
||||
* for all processes. Because calling into the
|
||||
* driver to process TID cache invalidations is
|
||||
* expensive and TID cache invalidations are
|
||||
* handled on a per-process basis, we can
|
||||
* optimize this to set the flag only for the
|
||||
* process in question.
|
||||
*/
|
||||
ev = uctxt->dd->events +
|
||||
(((uctxt->ctxt - uctxt->dd->first_dyn_alloc_ctxt) *
|
||||
HFI1_MAX_SHARED_CTXTS) + fdata->subctxt);
|
||||
set_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev);
|
||||
}
|
||||
fdata->invalid_tid_idx++;
|
||||
}
|
||||
linux_spin_unlock(&fdata->invalid_lock);
|
||||
return 0;
|
||||
}
|
||||
1635
kernel/user_sdma.c
Normal file
1635
kernel/user_sdma.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -19,7 +19,7 @@ void panic(const char *msg)
|
||||
|
||||
kprintf("%s\n", msg);
|
||||
|
||||
arch_print_stack();
|
||||
//arch_print_stack();
|
||||
|
||||
while (1) {
|
||||
cpu_halt();
|
||||
|
||||
@ -24,6 +24,7 @@ void cpu_halt(void);
|
||||
void cpu_safe_halt(void);
|
||||
void cpu_restore_interrupt(unsigned long);
|
||||
void cpu_pause(void);
|
||||
void cpu_relax(void);
|
||||
|
||||
#define barrier() arch_barrier()
|
||||
|
||||
@ -76,6 +77,7 @@ void ihk_mc_init_ap(void);
|
||||
void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
|
||||
void *stack_pointer,
|
||||
void (*next_function)(void));
|
||||
void *ihk_mc_get_linux_kernel_pgt(void);
|
||||
|
||||
int ihk_mc_get_extra_reg_id(unsigned long hw_config, unsigned long hw_config_ext);
|
||||
unsigned int ihk_mc_get_nr_extra_regs();
|
||||
|
||||
@ -176,7 +176,10 @@ int ihk_mc_pt_free_range(page_table_t pt, struct process_vm *vm,
|
||||
int ihk_mc_pt_change_attr_range(page_table_t pt, void *start, void *end,
|
||||
enum ihk_mc_pt_attribute clrattr,
|
||||
enum ihk_mc_pt_attribute setattr);
|
||||
pte_t *ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, int pgshift, void **pgbasep, size_t *pgsizep, int *p2alignp);
|
||||
pte_t *ihk_mc_pt_lookup_pte(page_table_t pt, void *virt, int pgshift,
|
||||
void **pgbasep, size_t *pgsizep, int *p2alignp);
|
||||
pte_t *ihk_mc_pt_lookup_fault_pte(struct process_vm *vm, void *virt,
|
||||
int pgshift, void **basep, size_t *sizep, int *p2alignp);
|
||||
int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start,
|
||||
void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr,
|
||||
int pgshift, struct vm_range *range);
|
||||
|
||||
@ -41,6 +41,12 @@ static inline void INIT_LIST_HEAD(struct list_head *list)
|
||||
list->prev = list;
|
||||
}
|
||||
|
||||
static inline void ZERO_LIST_HEAD(struct list_head *list)
|
||||
{
|
||||
list->next = 0;
|
||||
list->prev = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert a new entry between two known consecutive entries.
|
||||
*
|
||||
|
||||
@ -46,9 +46,7 @@ struct perf_event_attr;
|
||||
((nr) << _IOC_NRSHIFT) | \
|
||||
((size) << _IOC_SIZESHIFT))
|
||||
|
||||
#ifndef __KERNEL__
|
||||
#define _IOC_TYPECHECK(t) (sizeof(t))
|
||||
#endif
|
||||
|
||||
/* used to create numbers */
|
||||
#define _IO(type,nr) _IOC(_IOC_NONE,(type),(nr),0)
|
||||
|
||||
@ -16,19 +16,6 @@
|
||||
|
||||
#include <arch-memory.h>
|
||||
|
||||
#ifndef KERNEL_PHYS_OFFSET
|
||||
#define KERNEL_PHYS_OFFSET 0
|
||||
|
||||
static unsigned long virt_to_phys(void *v)
|
||||
{
|
||||
return (unsigned long)v - KERNEL_PHYS_OFFSET;
|
||||
}
|
||||
static void *phys_to_virt(unsigned long p)
|
||||
{
|
||||
return (void *)(p + KERNEL_PHYS_OFFSET);
|
||||
}
|
||||
#endif
|
||||
|
||||
struct process_vm;
|
||||
|
||||
unsigned long virt_to_phys(void *v);
|
||||
|
||||
100
test/mem_dest_prev/LTP_testcase.txt
Normal file
100
test/mem_dest_prev/LTP_testcase.txt
Normal file
@ -0,0 +1,100 @@
|
||||
brk01
|
||||
clone01
|
||||
clone03
|
||||
clone04
|
||||
clone06
|
||||
clone07
|
||||
close01
|
||||
close02
|
||||
dup01
|
||||
dup02
|
||||
dup03
|
||||
dup04
|
||||
dup05
|
||||
dup06
|
||||
dup07
|
||||
fork01
|
||||
fork02
|
||||
fork03
|
||||
fork04
|
||||
fork07
|
||||
fork08
|
||||
fork09
|
||||
fork10
|
||||
fork11
|
||||
mbind01
|
||||
mem01
|
||||
mem02
|
||||
mem03
|
||||
memcpy01
|
||||
memfd_create02
|
||||
memset01
|
||||
mkdir01
|
||||
mkdir08
|
||||
mkdirat01
|
||||
mknodat01
|
||||
mmap001
|
||||
mmap01
|
||||
mmap02
|
||||
mmap03
|
||||
mmap04
|
||||
mmap06
|
||||
mmap07
|
||||
mmap08
|
||||
mmap09
|
||||
mmap12
|
||||
mmapstress02
|
||||
mmapstress04
|
||||
mmapstress05
|
||||
mremap01
|
||||
mremap05
|
||||
open01
|
||||
open03
|
||||
open04
|
||||
open06
|
||||
open07
|
||||
open09
|
||||
open13
|
||||
poll01
|
||||
posix_fadvise01
|
||||
read01
|
||||
read02
|
||||
read03
|
||||
read04
|
||||
sbrk01
|
||||
sbrk02
|
||||
sendfile02
|
||||
sendfile03
|
||||
sendfile04
|
||||
sendfile05
|
||||
sendfile06
|
||||
sendfile07
|
||||
sendfile08
|
||||
sendfile09
|
||||
semctl01
|
||||
semctl03
|
||||
semctl05
|
||||
socket01
|
||||
socket02
|
||||
stream01
|
||||
stream02
|
||||
stream03
|
||||
stream04
|
||||
stream05
|
||||
unlink05
|
||||
unlink06
|
||||
unlink07
|
||||
unlink08
|
||||
vfork01
|
||||
vfork02
|
||||
vma01
|
||||
vmsplice01
|
||||
vmsplice02
|
||||
write01
|
||||
write03
|
||||
write04
|
||||
write05
|
||||
writetest
|
||||
writev01
|
||||
writev02
|
||||
writev07
|
||||
25
test/mem_dest_prev/README
Normal file
25
test/mem_dest_prev/README
Normal file
@ -0,0 +1,25 @@
|
||||
===================
|
||||
Advance preparation
|
||||
===================
|
||||
1)Implement patch of test_memtest_destroy.patch
|
||||
cd mckernel
|
||||
patch -p0 < test_memtest_destroy.patch
|
||||
make
|
||||
make install
|
||||
|
||||
|
||||
2)Compile command execution processing
|
||||
cd mckernel/test/mem_dest_prev/mcexec_test_proc/
|
||||
make
|
||||
|
||||
3)Write the LTP path to LTP_DIR in the configuration file
|
||||
vi config
|
||||
|
||||
ex) LTP_DIR=$HOME/test/mem_dest_prev/ltp/testcases/bin/
|
||||
|
||||
|
||||
==========
|
||||
How to run
|
||||
==========
|
||||
./go_test_McKernal.sh
|
||||
|
||||
13
test/mem_dest_prev/config
Normal file
13
test/mem_dest_prev/config
Normal file
@ -0,0 +1,13 @@
|
||||
MCMOD_DIR=$HOME/ppos
|
||||
LTP_DIR=$HOME/test/mem_dest_prev/ltp/testcases/bin/
|
||||
LTP_TESTCASE_FILE=LTP_testcase.txt
|
||||
MCRBT_OPT_LTP="-m 3G@0,3G@1 -s"
|
||||
USR_PROC="mcexec_test_proc/memtest_destroy"
|
||||
OS_IDX=0
|
||||
|
||||
export MCMOD_DIR
|
||||
export LTP_DIR
|
||||
export LTP_TESTCASE_FILE
|
||||
export MCRBT_OPT_LTP
|
||||
export USR_PROC
|
||||
export OS_IDX
|
||||
101
test/mem_dest_prev/go_test_McKernal.sh
Executable file
101
test/mem_dest_prev/go_test_McKernal.sh
Executable file
@ -0,0 +1,101 @@
|
||||
#!/bin/sh
|
||||
|
||||
# read config
|
||||
source ./config
|
||||
|
||||
#logfile="./result/test_result.log"
|
||||
|
||||
# mcexec processのkill
|
||||
./utils/kill_mcexec.sh &> /dev/null
|
||||
|
||||
for test_case in `ls -1 ./testcases/*.txt`
|
||||
do
|
||||
# read testcase param
|
||||
source ${test_case}
|
||||
case_name=`basename ${test_case} .txt`
|
||||
echo "####################"
|
||||
echo "Test No:${case_name}"
|
||||
|
||||
# Out-of-range address Test(Before correspondence)
|
||||
echo ">>> Out-of-range address Test(Before correspondence) Start"
|
||||
|
||||
# stop mckernel
|
||||
sudo ${MCMOD_DIR}/sbin/mcstop+release.sh
|
||||
sleep 1
|
||||
# boot mckernel
|
||||
echo "${MCMOD_DIR}/sbin/mcreboot.sh ${MCRBT_OPT_BEFORE%,}"
|
||||
sudo ${MCMOD_DIR}/sbin/mcreboot.sh ${MCRBT_OPT_BEFORE%,}
|
||||
sleep 1
|
||||
|
||||
echo " ${MCMOD_DIR}/bin/mcexec ${USR_PROC}"
|
||||
timeout -sKILL 5 ${MCMOD_DIR}/bin/mcexec ${USR_PROC}
|
||||
STATUS=$?
|
||||
|
||||
echo "${MCMOD_DIR}/sbin/ihkosctl ${OS_IDX} kmsg"
|
||||
sudo ${MCMOD_DIR}/sbin/ihkosctl ${OS_IDX} kmsg
|
||||
|
||||
if [ "$STATUS" -ne 21 ];
|
||||
then
|
||||
echo ">>> Out-of-range address Test End(Timeout!!!)"
|
||||
else
|
||||
echo ">>> Out-of-range address Test End"
|
||||
fi
|
||||
|
||||
# Out-of-range address Test(After correspondence)
|
||||
echo ">>> Out-of-range address(After correspondence) Test Start"
|
||||
|
||||
# stop mckernel
|
||||
sudo ${MCMOD_DIR}/sbin/mcstop+release.sh
|
||||
sleep 1
|
||||
# boot mckernel
|
||||
echo "${MCMOD_DIR}/sbin/mcreboot.sh ${MCRBT_OPT_AFTER%,}"
|
||||
sudo ${MCMOD_DIR}/sbin/mcreboot.sh ${MCRBT_OPT_AFTER%,}
|
||||
sleep 1
|
||||
|
||||
echo " ${MCMOD_DIR}/bin/mcexec ${USR_PROC}"
|
||||
timeout -sKILL 5 ${MCMOD_DIR}/bin/mcexec ${USR_PROC}
|
||||
STATUS=$?
|
||||
|
||||
echo "${MCMOD_DIR}/sbin/ihkosctl ${OS_IDX} kmsg"
|
||||
sudo ${MCMOD_DIR}/sbin/ihkosctl ${OS_IDX} kmsg
|
||||
|
||||
if [ "$STATUS" -ne 21 ];
|
||||
then
|
||||
echo ">>> Out-of-range address Test End(Timeout!!!)"
|
||||
else
|
||||
echo ">>> Out-of-range address Test End"
|
||||
fi
|
||||
done
|
||||
|
||||
### LTP START ##################################################
|
||||
# stop mckernel
|
||||
sudo ${MCMOD_DIR}/sbin/mcstop+release.sh
|
||||
sleep 1
|
||||
|
||||
# boot mckernel
|
||||
echo "${MCMOD_DIR}/sbin/mcreboot.sh ${MCRBT_OPT_LTP%,}"
|
||||
sudo ${MCMOD_DIR}/sbin/mcreboot.sh ${MCRBT_OPT_LTP%,}
|
||||
sleep 1
|
||||
|
||||
if [ ! -e "/dev/mcos0" ]; then
|
||||
echo "Error: failed to mcreboot"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
||||
TEST_NUM=`wc -l ${LTP_TESTCASE_FILE} | awk '{print $1}'`
|
||||
echo ">>> LTP Test Start( $TEST_NUM counts )"
|
||||
|
||||
# exec mckernel test program
|
||||
COUNT=0
|
||||
while read line
|
||||
do
|
||||
((COUNT++))
|
||||
echo "$COUNT:${MCMOD_DIR}/bin/mcexec ${LTP_DIR}$line"
|
||||
# ${MCMOD_DIR}/bin/mcexec ${LTP_DIR}$line &>> ${logfile}
|
||||
${MCMOD_DIR}/bin/mcexec ${LTP_DIR}$line
|
||||
done < ${LTP_TESTCASE_FILE}
|
||||
|
||||
echo ">>> LTP Test End"
|
||||
### LTP END ####################################################
|
||||
|
||||
7
test/mem_dest_prev/mcexec_test_proc/Makefile
Normal file
7
test/mem_dest_prev/mcexec_test_proc/Makefile
Normal file
@ -0,0 +1,7 @@
|
||||
OBJS = memtest_destroy
|
||||
|
||||
all:$(OBJS)
|
||||
|
||||
clean:
|
||||
rm $(OBJS)
|
||||
|
||||
13
test/mem_dest_prev/mcexec_test_proc/memtest_destroy.c
Normal file
13
test/mem_dest_prev/mcexec_test_proc/memtest_destroy.c
Normal file
@ -0,0 +1,13 @@
|
||||
#include <stdio.h>
|
||||
#define _GNU_SOURCE
|
||||
#include <unistd.h>
|
||||
#include <sys/syscall.h>
|
||||
|
||||
main() {
|
||||
int rst = 0;
|
||||
|
||||
rst = syscall(950);
|
||||
printf("mem_destroy result:%d\n",rst);
|
||||
|
||||
return;
|
||||
}
|
||||
50
test/mem_dest_prev/test_memtest_destroy.patch
Normal file
50
test/mem_dest_prev/test_memtest_destroy.patch
Normal file
@ -0,0 +1,50 @@
|
||||
diff --git arch/x86/kernel/include/syscall_list.h arch/x86/kernel/include/syscall_list.h
|
||||
index 42d1e2e..f5769b8 100644
|
||||
--- arch/x86/kernel/include/syscall_list.h
|
||||
+++ arch/x86/kernel/include/syscall_list.h
|
||||
@@ -156,5 +156,7 @@ SYSCALL_HANDLED(__NR_profile, profile)
|
||||
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
|
||||
SYSCALL_HANDLED(731, util_indicate_clone)
|
||||
SYSCALL_HANDLED(732, get_system)
|
||||
+/* McKernel Specific */
|
||||
+SYSCALL_HANDLED(950, mem_destroy)
|
||||
|
||||
/**** End of File ****/
|
||||
diff --git arch/x86/kernel/syscall.c arch/x86/kernel/syscall.c
|
||||
index 2260b66..e96776a 100644
|
||||
--- arch/x86/kernel/syscall.c
|
||||
+++ arch/x86/kernel/syscall.c
|
||||
@@ -1887,4 +1887,33 @@ save_uctx(void *uctx, struct x86_user_context *regs)
|
||||
ctx->fregsize = 0;
|
||||
}
|
||||
|
||||
+
|
||||
+#define ADD_ADDR_VAL 0x400
|
||||
+SYSCALL_DECLARE(mem_destroy)
|
||||
+{
|
||||
+ int rst = 0;
|
||||
+ int mem_chunks_num, chunk_id, get_numa_id;
|
||||
+ unsigned long get_start, get_end;
|
||||
+ unsigned long *addr;
|
||||
+
|
||||
+ mem_chunks_num = ihk_mc_get_nr_memory_chunks();
|
||||
+ kprintf("%s: memory chunk %d.\n", __FUNCTION__, mem_chunks_num);
|
||||
+
|
||||
+ for (chunk_id = 0; chunk_id < mem_chunks_num; chunk_id++) {
|
||||
+ rst = ihk_mc_get_memory_chunk(chunk_id, &get_start, &get_end, &get_numa_id);
|
||||
+ kprintf("%s: mem chunk[%d] numa ID(%d)\n"
|
||||
+ ,__FUNCTION__ ,chunk_id ,get_numa_id);
|
||||
+ kprintf(" phys(0x%lx - 0x%lx) virt(0x%lx - 0x%lx)\n"
|
||||
+ ,get_start ,get_end ,phys_to_virt(get_start) ,phys_to_virt(get_end));
|
||||
+ }
|
||||
+
|
||||
+ addr = phys_to_virt(get_end + ADD_ADDR_VAL);
|
||||
+#if 1
|
||||
+ *addr = 0x1;
|
||||
+#endif
|
||||
+ kprintf("%s: Address out of range 0x%lx(val:%d)\n",__FUNCTION__ ,addr ,*addr);
|
||||
+
|
||||
+ return rst;
|
||||
+}
|
||||
+
|
||||
/*** End of File ***/
|
||||
2
test/mem_dest_prev/testcases/0001.txt
Normal file
2
test/mem_dest_prev/testcases/0001.txt
Normal file
@ -0,0 +1,2 @@
|
||||
MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 3G 1`"
|
||||
MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 3G 1` -s"
|
||||
2
test/mem_dest_prev/testcases/0002.txt
Normal file
2
test/mem_dest_prev/testcases/0002.txt
Normal file
@ -0,0 +1,2 @@
|
||||
MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 2`"
|
||||
MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 2` -s"
|
||||
2
test/mem_dest_prev/testcases/0003.txt
Normal file
2
test/mem_dest_prev/testcases/0003.txt
Normal file
@ -0,0 +1,2 @@
|
||||
MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 4`"
|
||||
MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 4` -s"
|
||||
2
test/mem_dest_prev/testcases/0004.txt
Normal file
2
test/mem_dest_prev/testcases/0004.txt
Normal file
@ -0,0 +1,2 @@
|
||||
MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 8`"
|
||||
MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 8` -s"
|
||||
2
test/mem_dest_prev/testcases/0005.txt
Normal file
2
test/mem_dest_prev/testcases/0005.txt
Normal file
@ -0,0 +1,2 @@
|
||||
MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 16`"
|
||||
MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 16` -s"
|
||||
2
test/mem_dest_prev/testcases/0006.txt
Normal file
2
test/mem_dest_prev/testcases/0006.txt
Normal file
@ -0,0 +1,2 @@
|
||||
MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 32`"
|
||||
MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 32` -s"
|
||||
2
test/mem_dest_prev/testcases/0007.txt
Normal file
2
test/mem_dest_prev/testcases/0007.txt
Normal file
@ -0,0 +1,2 @@
|
||||
MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 48`"
|
||||
MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 48` -s"
|
||||
2
test/mem_dest_prev/testcases/0008.txt
Normal file
2
test/mem_dest_prev/testcases/0008.txt
Normal file
@ -0,0 +1,2 @@
|
||||
MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 64`"
|
||||
MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 64` -s"
|
||||
2
test/mem_dest_prev/testcases/0009.txt
Normal file
2
test/mem_dest_prev/testcases/0009.txt
Normal file
@ -0,0 +1,2 @@
|
||||
MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 96`"
|
||||
MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 96` -s"
|
||||
2
test/mem_dest_prev/testcases/0010.txt
Normal file
2
test/mem_dest_prev/testcases/0010.txt
Normal file
@ -0,0 +1,2 @@
|
||||
MCRBT_OPT_BEFORE="-m `./utils/gen_mem_chunks.sh "0 1" 32M 128`"
|
||||
MCRBT_OPT_AFTER="-m `./utils/gen_mem_chunks.sh "0 1" 32M 128` -s"
|
||||
16
test/mem_dest_prev/utils/gen_mem_chunks.sh
Executable file
16
test/mem_dest_prev/utils/gen_mem_chunks.sh
Executable file
@ -0,0 +1,16 @@
|
||||
#!/bin/sh
|
||||
|
||||
NUMAS=$1
|
||||
MEM_SIZE=$2
|
||||
REP=$3
|
||||
CHUNKS=""
|
||||
|
||||
for numa in ${NUMAS}
|
||||
do
|
||||
for rep in `seq 1 ${REP}`
|
||||
do
|
||||
CHUNKS="${CHUNKS}${MEM_SIZE}@${numa},"
|
||||
done
|
||||
done
|
||||
|
||||
echo ${CHUNKS%,}
|
||||
10
test/mem_dest_prev/utils/kill_mcexec.sh
Executable file
10
test/mem_dest_prev/utils/kill_mcexec.sh
Executable file
@ -0,0 +1,10 @@
|
||||
#!/bin/sh
|
||||
|
||||
count=`pgrep -c -f 'mcexec '`
|
||||
if [ ${count} -gt 0 ]
|
||||
then
|
||||
echo "kill process :" ${count}
|
||||
pgrep -l -f 'mcexec '
|
||||
pgrep -f 'mcexec ' | xargs kill -9
|
||||
fi
|
||||
|
||||
Reference in New Issue
Block a user