diff --git a/executer/include/uprotocol.h b/executer/include/uprotocol.h index 5aebd6c2..9615352a 100644 --- a/executer/include/uprotocol.h +++ b/executer/include/uprotocol.h @@ -111,10 +111,8 @@ typedef unsigned long __cpu_set_unit; struct program_load_desc { int num_sections; - int status; int cpu; int pid; - int err; int stack_prot; int pgid; int cred[8]; diff --git a/executer/kernel/mcctrl/control.c b/executer/kernel/mcctrl/control.c index 195e012e..34f9d60e 100644 --- a/executer/kernel/mcctrl/control.c +++ b/executer/kernel/mcctrl/control.c @@ -107,6 +107,7 @@ static long mcexec_prepare_image(ihk_os_t os, struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); struct mcctrl_per_proc_data *ppd = NULL; int num_sections; + int free_ikc_pointers = 1; desc = kmalloc(sizeof(*desc), GFP_KERNEL); if (!desc) { @@ -140,9 +141,9 @@ static long mcexec_prepare_image(ihk_os_t os, goto put_and_free_out; } - pdesc = kmalloc(sizeof(struct program_load_desc) + - sizeof(struct program_image_section) - * num_sections, GFP_KERNEL); + pdesc = kmalloc(sizeof(struct program_load_desc) + + sizeof(struct program_image_section) * num_sections, + GFP_KERNEL); memcpy(pdesc, desc, sizeof(struct program_load_desc)); if (copy_from_user(pdesc->sections, udesc->sections, @@ -167,10 +168,10 @@ static long mcexec_prepare_image(ihk_os_t os, ret = -EFAULT; goto put_and_free_out; } - + envs = kmalloc(pdesc->envs_len, GFP_KERNEL); if (copy_from_user(envs, pdesc->envs, pdesc->envs_len)) { - ret = -EFAULT; + ret = -EFAULT; goto put_and_free_out; } @@ -187,36 +188,24 @@ static long mcexec_prepare_image(ihk_os_t os, dprintk("# of sections: %d\n", pdesc->num_sections); dprintk("%p (%lx)\n", pdesc, isp.arg); - - pdesc->status = 0; - mb(); - ret = mcctrl_ikc_send(os, pdesc->cpu, &isp); - if(ret < 0) { - printk("%s: ERROR mcctrl_ikc_send: %d\n", __FUNCTION__, ret); - goto put_and_free_out; - } - ret = wait_event_interruptible(ppd->wq_prepare, pdesc->status); + ret = mcctrl_ikc_send_wait(os, pdesc->cpu, &isp, 0, &free_ikc_pointers, + 3, pdesc, args, envs); if (ret < 0) { - printk("%s: ERROR after wait: %d\n", __FUNCTION__, ret); - goto put_and_free_out; - } - - if (pdesc->err < 0) { - ret = pdesc->err; + /* either send or remote prepare_process failed */ goto put_and_free_out; } /* Update rpgtable */ ppd->rpgtable = pdesc->rpgtable; - - if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) + + + if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) + sizeof(struct program_image_section) * num_sections)) { - ret = -EFAULT; + ret = -EFAULT; goto put_and_free_out; } - dprintk("%s: pid %d, rpgtable: 0x%lx added\n", + dprintk("%s: pid %d, rpgtable: 0x%lx added\n", __FUNCTION__, ppd->pid, ppd->rpgtable); ret = 0; @@ -224,10 +213,12 @@ static long mcexec_prepare_image(ihk_os_t os, put_and_free_out: mcctrl_put_per_proc_data(ppd); free_out: - kfree(args); - kfree(pdesc); - kfree(envs); - kfree(desc); + if (free_ikc_pointers) { + kfree(args); + kfree(pdesc); + kfree(envs); + kfree(desc); + } return ret; } @@ -1691,7 +1682,6 @@ int mcexec_create_per_process_data(ihk_os_t os) INIT_LIST_HEAD(&ppd->wq_list); INIT_LIST_HEAD(&ppd->wq_req_list); INIT_LIST_HEAD(&ppd->wq_list_exact); - init_waitqueue_head(&ppd->wq_prepare); init_waitqueue_head(&ppd->wq_procfs); spin_lock_init(&ppd->wq_list_lock); memset(&ppd->cpu_set, 0, sizeof(cpumask_t)); @@ -2925,28 +2915,6 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg, return -EINVAL; } -void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err) -{ - struct program_load_desc *desc = phys_to_virt(arg); - struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); - struct mcctrl_per_proc_data *ppd = NULL; - - ppd = mcctrl_get_per_proc_data(usrdata, desc->pid); - if (!ppd) { - printk("%s: ERROR: no per process data for PID %d\n", - __FUNCTION__, desc->pid); - return; - } - - desc->err = err; - desc->status = 1; - mb(); - - wake_up_all(&ppd->wq_prepare); - mcctrl_put_per_proc_data(ppd); -} - - /* Per-CPU register manipulation functions */ struct mcctrl_os_cpu_response { int done; diff --git a/executer/kernel/mcctrl/ikc.c b/executer/kernel/mcctrl/ikc.c index e0d2a4ab..c0b50a45 100644 --- a/executer/kernel/mcctrl/ikc.c +++ b/executer/kernel/mcctrl/ikc.c @@ -49,7 +49,6 @@ //struct mcctrl_channel *channels; -void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err); static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c); int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet); void sig_done(unsigned long arg, int err); @@ -58,6 +57,119 @@ void mcctrl_os_read_write_cpu_response(ihk_os_t os, struct ikc_scd_packet *pisp); void mcctrl_eventfd(ihk_os_t os, struct ikc_scd_packet *pisp); +struct mcctrl_wakeup_desc { + int status; + int err; + wait_queue_head_t wq; + struct list_head chain; + int free_addrs_count; + void *free_addrs[]; +}; + +/* Assumes usrdata->wakeup_descs_lock taken */ +static void mcctrl_wakeup_desc_cleanup(ihk_os_t os, + struct mcctrl_wakeup_desc *desc) +{ + int i; + + list_del(&desc->chain); + + for (i = 0; i < desc->free_addrs_count; i++) { + kfree(desc->free_addrs[i]); + } +} + +static void mcctrl_wakeup_cb(ihk_os_t os, struct ikc_scd_packet *packet) +{ + struct mcctrl_wakeup_desc *desc = packet->reply; + + + WRITE_ONCE(desc->err, packet->err); + + /* + * Check if the other side is still waiting, and signal it we're done. + * + * Setting status needs to be done last because the other side could + * wake up opportunistically between this set and the wake_up call. + * + * If the other side is no longer waiting, free the memory that was + * left for us. + */ + if (cmpxchg(&desc->status, 0, 1)) { + struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); + unsigned long flags; + + spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags); + mcctrl_wakeup_desc_cleanup(os, desc); + spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags); + return; + } + + wake_up_interruptible(&desc->wq); +} + +int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp, + long int timeout, int *do_frees, int free_addrs_count, ...) +{ + int ret, i; + va_list ap; + struct mcctrl_wakeup_desc *desc; + + *do_frees = 1; + desc = kmalloc(sizeof(struct mcctrl_wakeup_desc) + + (free_addrs_count + 1) * sizeof(void *), + GFP_KERNEL); + if (!desc) { + pr_warn("%s: Could not allocate wakeup descriptor", __func__); + return -ENOMEM; + } + pisp->reply = desc; + va_start(ap, free_addrs_count); + for (i = 0; i < free_addrs_count; i++) { + desc->free_addrs[i] = va_arg(ap, void*); + } + desc->free_addrs[free_addrs_count] = desc; + va_end(ap); + desc->free_addrs_count = free_addrs_count + 1; + init_waitqueue_head(&desc->wq); + WRITE_ONCE(desc->err, 0); + WRITE_ONCE(desc->status, 0); + + ret = mcctrl_ikc_send(os, cpu, pisp); + if (ret < 0) { + pr_warn("%s: mcctrl_ikc_send failed: %d\n", __func__, ret); + kfree(desc); + return ret; + } + + if (timeout) { + ret = wait_event_interruptible_timeout(desc->wq, + desc->status, timeout); + } else { + ret = wait_event_interruptible(desc->wq, desc->status); + } + + /* + * Check if wait aborted (signal..) or timed out, and notify + * the callback it will need to free things for us + */ + if (!cmpxchg(&desc->status, 0, 1)) { + struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); + unsigned long flags; + + spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags); + list_add(&desc->chain, &usrdata->wakeup_descs_list); + spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags); + *do_frees = 0; + return ret < 0 ? ret : -ETIME; + } + + ret = READ_ONCE(desc->err); + kfree(desc); + return ret; +} + + /* XXX: this runs in atomic context! */ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, void *__packet, void *__os) @@ -72,11 +184,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, break; case SCD_MSG_PREPARE_PROCESS_ACKED: - mcexec_prepare_ack(__os, pisp->arg, 0); - break; - - case SCD_MSG_PREPARE_PROCESS_NACKED: - mcexec_prepare_ack(__os, pisp->arg, pisp->err); + mcctrl_wakeup_cb(__os, pisp); break; case SCD_MSG_SYSCALL_ONESIDE: @@ -157,10 +265,15 @@ static int dummy_packet_handler(struct ihk_ikc_channel_desc *c, int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp) { - struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); + struct mcctrl_usrdata *usrdata; - if (cpu < 0 || os == NULL || usrdata == NULL || - cpu >= usrdata->num_channels || !usrdata->channels[cpu].c) { + if (cpu < 0 || os == NULL) { + return -EINVAL; + } + + usrdata = ihk_host_os_get_usrdata(os); + if (usrdata == NULL || cpu >= usrdata->num_channels || + !usrdata->channels[cpu].c) { return -EINVAL; } return ihk_ikc_send(usrdata->channels[cpu].c, pisp, 0); @@ -354,6 +467,8 @@ int prepare_ikc_channels(ihk_os_t os) mutex_init(&usrdata->part_exec.lock); INIT_LIST_HEAD(&usrdata->part_exec.pli_list); usrdata->part_exec.nr_processes = -1; + INIT_LIST_HEAD(&usrdata->wakeup_descs_list); + spin_lock_init(&usrdata->wakeup_descs_lock); return 0; @@ -375,7 +490,9 @@ void __destroy_ikc_channel(ihk_os_t os, struct mcctrl_channel *pmc) void destroy_ikc_channels(ihk_os_t os) { int i; + unsigned long flags; struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os); + struct mcctrl_wakeup_desc *mwd_entry, *mwd_next; if (!usrdata) { printk("%s: WARNING: no mcctrl_usrdata found\n", __FUNCTION__); @@ -395,6 +512,12 @@ void destroy_ikc_channels(ihk_os_t os) ihk_ikc_destroy_channel(usrdata->ikc2linux[i]); } } + spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags); + list_for_each_entry_safe(mwd_entry, mwd_next, + &usrdata->wakeup_descs_list, chain) { + mcctrl_wakeup_desc_cleanup(os, mwd_entry); + } + spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags); kfree(usrdata->channels); kfree(usrdata->ikc2linux); diff --git a/executer/kernel/mcctrl/mcctrl.h b/executer/kernel/mcctrl/mcctrl.h index e2724562..fc51d51d 100644 --- a/executer/kernel/mcctrl/mcctrl.h +++ b/executer/kernel/mcctrl/mcctrl.h @@ -48,7 +48,6 @@ #define SCD_MSG_PREPARE_PROCESS 0x1 #define SCD_MSG_PREPARE_PROCESS_ACKED 0x2 -#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7 #define SCD_MSG_SCHEDULE_PROCESS 0x3 #define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14 @@ -128,6 +127,7 @@ enum mcctrl_os_cpu_operation { struct ikc_scd_packet { int msg; int err; + void *reply; union { /* for traditional SCD_MSG_* */ struct { @@ -163,9 +163,10 @@ struct ikc_scd_packet { int eventfd_type; }; }; - char padding[12]; + char padding[8]; }; + struct mcctrl_priv { ihk_os_t os; struct program_load_desc *desc; @@ -231,7 +232,6 @@ struct mcctrl_per_proc_data { struct list_head wq_list_exact; /* These requests come from IKC IRQ handler targeting a particular thread */ ihk_spinlock_t wq_list_lock; - wait_queue_head_t wq_prepare; wait_queue_head_t wq_procfs; struct list_head per_thread_data_hash[MCCTRL_PER_THREAD_DATA_HASH_SIZE]; @@ -342,6 +342,8 @@ struct mcctrl_usrdata { wait_queue_head_t wq_procfs; struct list_head per_proc_data_hash[MCCTRL_PER_PROC_DATA_HASH_SIZE]; rwlock_t per_proc_data_hash_lock[MCCTRL_PER_PROC_DATA_HASH_SIZE]; + struct list_head wakeup_descs_list; + spinlock_t wakeup_descs_lock; void **keys; struct sysfsm_data sysfsm_data; @@ -367,6 +369,10 @@ int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp); int mcctrl_ikc_send_msg(ihk_os_t os, int cpu, int msg, int ref, unsigned long arg); int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu); +/* ikc query-and-wait helper */ +int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp, + long int timeout, int *do_frees, int free_addrs_count, ...); + ihk_os_t osnum_to_os(int n); /* syscall.c */ diff --git a/kernel/host.c b/kernel/host.c index e68776c4..24f8af7c 100644 --- a/kernel/host.c +++ b/kernel/host.c @@ -597,14 +597,9 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c, case SCD_MSG_PREPARE_PROCESS: - if((rc = process_msg_prepare_process(packet->arg)) == 0){ - pckt.msg = SCD_MSG_PREPARE_PROCESS_ACKED; - pckt.err = 0; - } - else{ - pckt.msg = SCD_MSG_PREPARE_PROCESS_NACKED; - pckt.err = rc; - } + pckt.err = process_msg_prepare_process(packet->arg); + pckt.msg = SCD_MSG_PREPARE_PROCESS_ACKED; + pckt.reply = packet->reply; pckt.ref = packet->ref; pckt.arg = packet->arg; syscall_channel_send(resp_channel, &pckt); diff --git a/kernel/include/syscall.h b/kernel/include/syscall.h index 24a2833b..111f730e 100644 --- a/kernel/include/syscall.h +++ b/kernel/include/syscall.h @@ -30,7 +30,6 @@ #define SCD_MSG_PREPARE_PROCESS 0x1 #define SCD_MSG_PREPARE_PROCESS_ACKED 0x2 -#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7 #define SCD_MSG_SCHEDULE_PROCESS 0x3 #define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14 @@ -168,10 +167,8 @@ typedef unsigned long __cpu_set_unit; struct program_load_desc { int num_sections; - int status; int cpu; int pid; - int err; int stack_prot; int pgid; int cred[8]; @@ -241,6 +238,7 @@ enum mcctrl_os_cpu_operation { struct ikc_scd_packet { int msg; int err; + void *reply; union { /* for traditional SCD_MSG_* */ struct { @@ -276,7 +274,7 @@ struct ikc_scd_packet { int eventfd_type; }; }; - char padding[12]; + char padding[8]; }; #define IHK_SCD_REQ_THREAD_SPINNING 0