remote TLB invalidation code for multi-threaded applications (e.g., during munmap())

This commit is contained in:
Balazs Gerofi bgerofi@riken.jp
2014-07-22 12:24:07 +09:00
parent a5b36e2b51
commit 101a0f6e4a
11 changed files with 435 additions and 33 deletions

View File

@ -407,6 +407,7 @@ void setup_x86_ap(void (*next_func)(void))
void arch_show_interrupt_context(const void *reg);
void set_signal(int sig, void *regs);
void check_signal(unsigned long rc, void *regs);
extern void tlb_flush_handler(int vector);
void handle_interrupt(int vector, struct x86_regs *regs)
{
@ -419,7 +420,8 @@ void handle_interrupt(int vector, struct x86_regs *regs)
if (vector < 0 || vector > 255) {
panic("Invalid interrupt vector.");
} else if (vector < 32) {
}
else if (vector < 32) {
if (vector == 8 ||
(vector >= 10 && vector <= 15) || vector == 17) {
kprintf("Exception %d, rflags: 0x%lX CS: 0x%lX, RIP: 0x%lX\n",
@ -430,7 +432,13 @@ void handle_interrupt(int vector, struct x86_regs *regs)
}
arch_show_interrupt_context(regs);
panic("Unhandled exception");
} else {
}
else if (vector >= IHK_TLB_FLUSH_IRQ_VECTOR_START &&
vector < IHK_TLB_FLUSH_IRQ_VECTOR_END) {
tlb_flush_handler(vector);
}
else {
list_for_each_entry(h, &handlers[vector - 32], list) {
if (h->func) {
h->func(h->priv);

View File

@ -24,4 +24,217 @@ static inline int fls(int x)
return r + 1;
}
/**
* ffs - find first set bit in word
* @x: the word to search
*
* This is defined the same way as the libc and compiler builtin ffs
* routines, therefore differs in spirit from the other bitops.
*
* ffs(value) returns 0 if value is 0 or the position of the first
* set bit if value is nonzero. The first (least significant) bit
* is at position 1.
*/
static inline int ffs(int x)
{
int r;
asm("bsfl %1,%0\n\t"
"jnz 1f\n\t"
"movl $-1,%0\n"
"1:" : "=r" (r) : "rm" (x));
return r + 1;
}
/**
* __ffs - find first set bit in word
* @word: The word to search
*
* Undefined if no bit exists, so code should check against 0 first.
*/
static inline unsigned long __ffs(unsigned long word)
{
asm("bsf %1,%0"
: "=r" (word)
: "rm" (word));
return word;
}
/**
* ffz - find first zero bit in word
* @word: The word to search
*
* Undefined if no zero exists, so code should check against ~0UL first.
*/
static inline unsigned long ffz(unsigned long word)
{
asm("bsf %1,%0"
: "=r" (word)
: "r" (~word));
return word;
}
#define ADDR (*(volatile long *)addr)
static inline void set_bit(int nr, volatile unsigned long *addr)
{
asm volatile("lock; btsl %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
}
static inline void clear_bit(int nr, volatile unsigned long *addr)
{
asm volatile("lock; btrl %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
}
#define for_each_set_bit(bit, addr, size) \
for ((bit) = find_first_bit((addr), (size)); \
(bit) < (size); \
(bit) = find_next_bit((addr), (size), (bit) + 1))
#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
/*
* Find the next set bit in a memory region.
*/
static unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
offset %= BITS_PER_LONG;
if (offset) {
tmp = *(p++);
tmp &= (~0UL << offset);
if (size < BITS_PER_LONG)
goto found_first;
if (tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG-1)) {
if ((tmp = *(p++)))
goto found_middle;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = *p;
found_first:
tmp &= (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found_middle:
return result + __ffs(tmp);
}
/*
* This implementation of find_{first,next}_zero_bit was stolen from
* Linus' asm-alpha/bitops.h.
*/
static unsigned long find_next_zero_bit(const unsigned long *addr,
unsigned long size, unsigned long offset)
{
const unsigned long *p = addr + BITOP_WORD(offset);
unsigned long result = offset & ~(BITS_PER_LONG-1);
unsigned long tmp;
if (offset >= size)
return size;
size -= result;
offset %= BITS_PER_LONG;
if (offset) {
tmp = *(p++);
tmp |= ~0UL >> (BITS_PER_LONG - offset);
if (size < BITS_PER_LONG)
goto found_first;
if (~tmp)
goto found_middle;
size -= BITS_PER_LONG;
result += BITS_PER_LONG;
}
while (size & ~(BITS_PER_LONG-1)) {
if (~(tmp = *(p++)))
goto found_middle;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = *p;
found_first:
tmp |= ~0UL << size;
if (tmp == ~0UL) /* Are any bits zero? */
return result + size; /* Nope. */
found_middle:
return result + ffz(tmp);
}
/*
* Find the first set bit in a memory region.
*/
static unsigned long find_first_bit(const unsigned long *addr,
unsigned long size)
{
const unsigned long *p = addr;
unsigned long result = 0;
unsigned long tmp;
while (size & ~(BITS_PER_LONG-1)) {
if ((tmp = *(p++)))
goto found;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = (*p) & (~0UL >> (BITS_PER_LONG - size));
if (tmp == 0UL) /* Are any bits set? */
return result + size; /* Nope. */
found:
return result + __ffs(tmp);
}
/*
* Find the first cleared bit in a memory region.
*/
static unsigned long find_first_zero_bit(const unsigned long *addr,
unsigned long size)
{
const unsigned long *p = addr;
unsigned long result = 0;
unsigned long tmp;
while (size & ~(BITS_PER_LONG-1)) {
if (~(tmp = *(p++)))
goto found;
result += BITS_PER_LONG;
size -= BITS_PER_LONG;
}
if (!size)
return result;
tmp = (*p) | (~0UL << size);
if (tmp == ~0UL) /* Are any bits zero? */
return result + size; /* Nope. */
found:
return result + ffz(tmp);
}
#endif

View File

@ -31,5 +31,9 @@ typedef int64_t off_t;
#define NULL ((void *)0)
#define BITS_PER_LONG_SHIFT 6
#define BITS_PER_LONG (1 << BITS_PER_LONG_SHIFT)
#endif

View File

@ -1005,6 +1005,7 @@ struct clear_range_args {
int free_physical;
uint8_t padding[4];
struct memobj *memobj;
struct process_vm *vm;
};
static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
@ -1032,6 +1033,8 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
ihk_mc_free_pages(phys_to_virt(phys), 1);
}
}
remote_flush_tlb_cpumask(args->vm, base, ihk_mc_get_processor_id());
return 0;
}
@ -1079,6 +1082,8 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
}
}
remote_flush_tlb_cpumask(args->vm, base, ihk_mc_get_processor_id());
return 0;
}
@ -1122,8 +1127,9 @@ static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base,
return walk_pte_l3(pt, base, start, end, &clear_range_l3, args0);
}
static int clear_range(struct page_table *pt, uintptr_t start, uintptr_t end,
int free_physical, struct memobj *memobj)
static int clear_range(struct page_table *pt, struct process_vm *vm,
uintptr_t start, uintptr_t end, int free_physical,
struct memobj *memobj)
{
int error;
struct clear_range_args args;
@ -1137,22 +1143,25 @@ static int clear_range(struct page_table *pt, uintptr_t start, uintptr_t end,
args.free_physical = free_physical;
args.memobj = memobj;
args.vm = vm;
error = walk_pte_l4(pt, 0, start, end, &clear_range_l4, &args);
return error;
}
int ihk_mc_pt_clear_range(page_table_t pt, void *start, void *end)
int ihk_mc_pt_clear_range(page_table_t pt, struct process_vm *vm,
void *start, void *end)
{
#define KEEP_PHYSICAL 0
return clear_range(pt, (uintptr_t)start, (uintptr_t)end,
return clear_range(pt, vm, (uintptr_t)start, (uintptr_t)end,
KEEP_PHYSICAL, NULL);
}
int ihk_mc_pt_free_range(page_table_t pt, void *start, void *end, struct memobj *memobj)
int ihk_mc_pt_free_range(page_table_t pt, struct process_vm *vm,
void *start, void *end, struct memobj *memobj)
{
#define FREE_PHYSICAL 1
return clear_range(pt, (uintptr_t)start, (uintptr_t)end,
return clear_range(pt, vm, (uintptr_t)start, (uintptr_t)end,
FREE_PHYSICAL, memobj);
}
@ -1474,6 +1483,7 @@ struct set_range_args {
enum ihk_mc_pt_attribute attr;
int padding;
uintptr_t diff;
struct process_vm *vm;
};
int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
@ -1489,7 +1499,7 @@ int set_range_l1(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
error = -EBUSY;
ekprintf("set_range_l1(%lx,%lx,%lx):page exists. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, start, base, KEEP_PHYSICAL, NULL);
(void)clear_range(args->pt, args->vm, start, base, KEEP_PHYSICAL, NULL);
goto out;
}
@ -1536,7 +1546,7 @@ int set_range_l2(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
ekprintf("set_range_l2(%lx,%lx,%lx):"
"__alloc_new_pt failed. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, start, base,
(void)clear_range(args->pt, args->vm, start, base,
KEEP_PHYSICAL, NULL);
goto out;
}
@ -1548,7 +1558,7 @@ int set_range_l2(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
ekprintf("set_range_l2(%lx,%lx,%lx):"
"page exists. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, start, base, KEEP_PHYSICAL, NULL);
(void)clear_range(args->pt, args->vm, start, base, KEEP_PHYSICAL, NULL);
goto out;
}
else {
@ -1604,7 +1614,7 @@ int set_range_l3(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
ekprintf("set_range_l3(%lx,%lx,%lx):"
"__alloc_new_pt failed. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, start, base,
(void)clear_range(args->pt, args->vm, start, base,
KEEP_PHYSICAL, NULL);
goto out;
}
@ -1615,7 +1625,7 @@ int set_range_l3(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
ekprintf("set_range_l3(%lx,%lx,%lx):"
"page exists. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, start, base, KEEP_PHYSICAL, NULL);
(void)clear_range(args->pt, args->vm, start, base, KEEP_PHYSICAL, NULL);
goto out;
}
else {
@ -1653,7 +1663,7 @@ int set_range_l4(void *args0, pte_t *ptep, uintptr_t base, uintptr_t start,
ekprintf("set_range_l4(%lx,%lx,%lx):"
"__alloc_new_pt failed. %d %lx\n",
base, start, end, error, *ptep);
(void)clear_range(args->pt, start, base,
(void)clear_range(args->pt, args->vm, start, base,
KEEP_PHYSICAL, NULL);
goto out;
}
@ -1678,8 +1688,8 @@ out:
return error;
}
int ihk_mc_pt_set_range(page_table_t pt, void *start, void *end,
uintptr_t phys, enum ihk_mc_pt_attribute attr)
int ihk_mc_pt_set_range(page_table_t pt, struct process_vm *vm, void *start,
void *end, uintptr_t phys, enum ihk_mc_pt_attribute attr)
{
int error;
struct set_range_args args;
@ -1691,6 +1701,7 @@ int ihk_mc_pt_set_range(page_table_t pt, void *start, void *end,
args.phys = phys;
args.attr = attr;
args.diff = (uintptr_t)start ^ phys;
args.vm = vm;
error = walk_pte_l4(pt, 0, (uintptr_t)start, (uintptr_t)end,
&set_range_l4, &args);
@ -1805,9 +1816,11 @@ enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t faul
struct move_args {
uintptr_t src;
uintptr_t dest;
struct process_vm *vm;
};
static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep, void *pgaddr, size_t pgsize)
static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep,
void *pgaddr, size_t pgsize)
{
int error;
struct move_args *args = arg0;
@ -1833,7 +1846,7 @@ static int move_one_page(void *arg0, page_table_t pt, pte_t *ptep, void *pgaddr,
phys = apte & PT_PHYSMASK;
attr = apte & ~PT_PHYSMASK;
error = ihk_mc_pt_set_range(pt, (void *)dest,
error = ihk_mc_pt_set_range(pt, args->vm, (void *)dest,
(void *)(dest + pgsize), phys, attr);
if (error) {
kprintf("move_one_page(%p,%p,%p %#lx,%p,%#lx):"
@ -1849,7 +1862,8 @@ out:
return error;
}
int move_pte_range(page_table_t pt, void *src, void *dest, size_t size)
int move_pte_range(page_table_t pt, struct process_vm *vm,
void *src, void *dest, size_t size)
{
int error;
struct move_args args;
@ -1857,6 +1871,7 @@ int move_pte_range(page_table_t pt, void *src, void *dest, size_t size)
dkprintf("move_pte_range(%p,%p,%p,%#lx)\n", pt, src, dest, size);
args.src = (uintptr_t)src;
args.dest = (uintptr_t)dest;
args.vm = vm;
error = visit_pte_range(pt, src, src+size, VPTEF_SKIP_NULL,
&move_one_page, &args);