remote_flush_tlb_array_cpumask(): bundle remote TLB invalidations

This commit is contained in:
Balazs Gerofi
2017-07-21 15:34:48 +09:00
parent bc423255d9
commit 277ac81bb2
3 changed files with 97 additions and 43 deletions

View File

@ -1077,8 +1077,27 @@ struct clear_range_args {
int free_physical;
struct memobj *memobj;
struct process_vm *vm;
unsigned long *addr;
int nr_addr;
int max_nr_addr;
};
static void remote_flush_tlb_add_addr(struct clear_range_args *args,
unsigned long addr)
{
if (args->nr_addr < args->max_nr_addr) {
args->addr[args->nr_addr] = addr;
++args->nr_addr;
return;
}
remote_flush_tlb_array_cpumask(args->vm, args->addr, args->nr_addr,
ihk_mc_get_processor_id());
args->addr[0] = addr;
args->nr_addr = 1;
}
static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
uint64_t start, uint64_t end)
{
@ -1092,7 +1111,7 @@ static int clear_range_l1(void *args0, pte_t *ptep, uint64_t base,
}
old = xchg(ptep, PTE_NULL);
remote_flush_tlb_cpumask(args->vm, base, ihk_mc_get_processor_id());
remote_flush_tlb_add_addr(args, base);
page = NULL;
if (!pte_is_fileoff(&old, PTL1_SIZE)) {
@ -1141,8 +1160,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
if (*ptep & PFL2_SIZE) {
old = xchg(ptep, PTE_NULL);
remote_flush_tlb_cpumask(args->vm, base,
ihk_mc_get_processor_id());
remote_flush_tlb_add_addr(args, base);
page = NULL;
if (!pte_is_fileoff(&old, PTL2_SIZE)) {
@ -1174,8 +1192,7 @@ static int clear_range_l2(void *args0, pte_t *ptep, uint64_t base,
if ((start <= base) && ((base + PTL2_SIZE) <= end)) {
*ptep = PTE_NULL;
remote_flush_tlb_cpumask(args->vm, base,
ihk_mc_get_processor_id());
remote_flush_tlb_add_addr(args, base);
ihk_mc_free_pages(pt, 1);
}
@ -1207,8 +1224,7 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base,
if (*ptep & PFL3_SIZE) {
old = xchg(ptep, PTE_NULL);
remote_flush_tlb_cpumask(args->vm, base,
ihk_mc_get_processor_id());
remote_flush_tlb_add_addr(args, base);
page = NULL;
if (!pte_is_fileoff(&old, PTL3_SIZE)) {
@ -1239,8 +1255,7 @@ static int clear_range_l3(void *args0, pte_t *ptep, uint64_t base,
if (use_1gb_page && (start <= base) && ((base + PTL3_SIZE) <= end)) {
*ptep = PTE_NULL;
remote_flush_tlb_cpumask(args->vm, base,
ihk_mc_get_processor_id());
remote_flush_tlb_add_addr(args, base);
ihk_mc_free_pages(pt, 1);
}
@ -1260,8 +1275,10 @@ static int clear_range_l4(void *args0, pte_t *ptep, uint64_t base,
return walk_pte_l3(pt, base, start, end, &clear_range_l3, args0);
}
static int clear_range(struct page_table *pt, struct process_vm *vm,
uintptr_t start, uintptr_t end, int free_physical,
#define TLB_INVALID_ARRAY_PAGES (4)
static int clear_range(struct page_table *pt, struct process_vm *vm,
uintptr_t start, uintptr_t end, int free_physical,
struct memobj *memobj)
{
int error;
@ -1276,6 +1293,17 @@ static int clear_range(struct page_table *pt, struct process_vm *vm,
return -EINVAL;
}
/* TODO: embedd this in tlb_flush_entry? */
args.addr = (unsigned long *)ihk_mc_alloc_pages(
TLB_INVALID_ARRAY_PAGES, IHK_MC_AP_CRITICAL);
if (!args.addr) {
ekprintf("%s: error: allocating address array\n", __FUNCTION__);
return -ENOMEM;
}
args.nr_addr = 0;
args.max_nr_addr = (TLB_INVALID_ARRAY_PAGES * PAGE_SIZE /
sizeof(uint64_t));
args.free_physical = free_physical;
if (memobj && (memobj->flags & MF_DEV_FILE)) {
args.free_physical = 0;
@ -1287,6 +1315,13 @@ static int clear_range(struct page_table *pt, struct process_vm *vm,
args.vm = vm;
error = walk_pte_l4(pt, 0, start, end, &clear_range_l4, &args);
if (args.nr_addr) {
remote_flush_tlb_array_cpumask(vm, args.addr, args.nr_addr,
ihk_mc_get_processor_id());
}
ihk_mc_free_pages(args.addr, TLB_INVALID_ARRAY_PAGES);
return error;
}

View File

@ -885,86 +885,98 @@ void coredump(struct thread *thread, void *regs)
freecore(&coretable);
}
void remote_flush_tlb_cpumask(struct process_vm *vm,
void remote_flush_tlb_cpumask(struct process_vm *vm,
unsigned long addr, int cpu_id)
{
unsigned long __addr = addr;
return remote_flush_tlb_array_cpumask(vm, &__addr, 1, cpu_id);
}
void remote_flush_tlb_array_cpumask(struct process_vm *vm,
unsigned long *addr,
int nr_addr,
int cpu_id)
{
unsigned long cpu;
int flush_ind;
struct tlb_flush_entry *flush_entry;
cpu_set_t _cpu_set;
if (addr) {
flush_ind = (addr >> PAGE_SHIFT) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE;
if (addr[0]) {
flush_ind = (addr[0] >> PAGE_SHIFT) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE;
}
/* Zero address denotes full TLB flush */
else {
else {
/* Random.. */
flush_ind = (rdtsc()) % IHK_TLB_FLUSH_IRQ_VECTOR_SIZE;
}
flush_entry = &tlb_flush_vector[flush_ind];
flush_entry = &tlb_flush_vector[flush_ind];
/* Take a copy of the cpu set so that we don't hold the lock
* all the way while interrupting other cores */
ihk_mc_spinlock_lock_noirq(&vm->address_space->cpu_set_lock);
memcpy(&_cpu_set, &vm->address_space->cpu_set, sizeof(cpu_set_t));
ihk_mc_spinlock_unlock_noirq(&vm->address_space->cpu_set_lock);
dkprintf("trying to aquire flush_entry->lock flush_ind: %d\n", flush_ind);
ihk_mc_spinlock_lock_noirq(&flush_entry->lock);
flush_entry->vm = vm;
flush_entry->addr = addr;
flush_entry->nr_addr = nr_addr;
ihk_atomic_set(&flush_entry->pending, 0);
dkprintf("lock aquired, iterating cpu mask.. flush_ind: %d\n", flush_ind);
/* Loop through CPUs in this address space and interrupt them for
* TLB flush on the specified address */
for_each_set_bit(cpu, (const unsigned long*)&_cpu_set.__bits, CPU_SETSIZE) {
if (ihk_mc_get_processor_id() == cpu)
if (ihk_mc_get_processor_id() == cpu)
continue;
ihk_atomic_inc(&flush_entry->pending);
dkprintf("remote_flush_tlb_cpumask: flush_ind: %d, addr: 0x%lX, interrupting cpu: %d\n",
flush_ind, addr, cpu);
ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu)->apic_id,
ihk_mc_interrupt_cpu(get_x86_cpu_local_variable(cpu)->apic_id,
flush_ind + IHK_TLB_FLUSH_IRQ_VECTOR_START);
}
#ifdef DEBUG_IC_TLB
{
unsigned long tsc;
tsc = rdtsc() + 12884901888; /* 1.2GHz =>10 sec */
#endif
if (flush_entry->addr) {
flush_tlb_single(flush_entry->addr & PAGE_MASK);
if (flush_entry->addr[0]) {
int i;
for (i = 0; i < flush_entry->nr_addr; ++i) {
flush_tlb_single(flush_entry->addr[i] & PAGE_MASK);
}
}
/* Zero address denotes full TLB flush */
else {
flush_tlb();
}
/* Flush on this core */
flush_tlb_single(addr & PAGE_MASK);
/* Wait for all cores */
while (ihk_atomic_read(&flush_entry->pending) != 0) {
cpu_pause();
#ifdef DEBUG_IC_TLB
if (rdtsc() > tsc) {
kprintf("waited 10 secs for remote TLB!! -> panic_all()\n");
panic_all_cores("waited 10 secs for remote TLB!!\n");
kprintf("waited 10 secs for remote TLB!! -> panic_all()\n");
panic_all_cores("waited 10 secs for remote TLB!!\n");
}
#endif
}
#ifdef DEBUG_IC_TLB
}
#endif
ihk_mc_spinlock_unlock_noirq(&flush_entry->lock);
}
@ -975,25 +987,27 @@ void tlb_flush_handler(int vector)
#endif // PROFILE_ENABLE
int flags = cpu_disable_interrupt_save();
struct tlb_flush_entry *flush_entry = &tlb_flush_vector[vector -
struct tlb_flush_entry *flush_entry = &tlb_flush_vector[vector -
IHK_TLB_FLUSH_IRQ_VECTOR_START];
dkprintf("decreasing pending cnt for %d\n",
vector - IHK_TLB_FLUSH_IRQ_VECTOR_START);
/* Decrease counter */
ihk_atomic_dec(&flush_entry->pending);
if (flush_entry->addr[0]) {
int i;
dkprintf("flusing TLB for addr: 0x%lX\n", flush_entry->addr);
if (flush_entry->addr) {
flush_tlb_single(flush_entry->addr & PAGE_MASK);
for (i = 0; i < flush_entry->nr_addr; ++i) {
flush_tlb_single(flush_entry->addr[i] & PAGE_MASK);
dkprintf("flusing TLB for addr: 0x%lX\n", flush_entry->addr[i]);
}
}
/* Zero address denotes full TLB flush */
else {
flush_tlb();
}
/* Decrease counter */
dkprintf("decreasing pending cnt for %d\n",
vector - IHK_TLB_FLUSH_IRQ_VECTOR_START);
ihk_atomic_dec(&flush_entry->pending);
cpu_restore_interrupt(flags);
#ifdef PROFILE_ENABLE
{

View File

@ -215,6 +215,10 @@ int ihk_mc_get_memory_chunk(int id,
void remote_flush_tlb_cpumask(struct process_vm *vm,
unsigned long addr, int cpu_id);
void remote_flush_tlb_array_cpumask(struct process_vm *vm,
unsigned long *addr,
int nr_addr,
int cpu_id);
int ihk_set_kmsg(unsigned long addr, unsigned long size);
char *ihk_get_kargs();
@ -226,7 +230,8 @@ extern void (*__tlb_flush_handler)(int vector);
struct tlb_flush_entry {
struct process_vm *vm;
unsigned long addr;
unsigned long *addr;
int nr_addr;
ihk_atomic_t pending;
ihk_spinlock_t lock;
} __attribute__((aligned(64)));