mcctrl: refactor prepare_image into new generic ikc send&wait

Many ikc messages expecting a reply use wait_event_interruptible
incorrectly, freeing memory that could still be used on the other side.

This commit implements a generic ikc send and wait helper that helps
with memory management and ownership properly:
 - if the message succeeds and a reply comes back normally, the memory
is freed by the caller as usual
 - if the wait fails (signal before the reply comes or timeout) then the
memory is set as owner by ikc and will be free when the reply comes back
later
 - if the reply never comes, the memory is freed at shutdown when
destroying ikc channels

Refs: #1076
Change-Id: I7f348d9029a6ad56ba9a50c836105ec39fa14943
This commit is contained in:
Dominique Martinet
2018-06-21 19:12:59 +09:00
committed by Masamichi Takagi
parent ec202a1ca9
commit b939ca9370
6 changed files with 165 additions and 77 deletions

View File

@ -111,10 +111,8 @@ typedef unsigned long __cpu_set_unit;
struct program_load_desc {
int num_sections;
int status;
int cpu;
int pid;
int err;
int stack_prot;
int pgid;
int cred[8];

View File

@ -107,6 +107,7 @@ static long mcexec_prepare_image(ihk_os_t os,
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_per_proc_data *ppd = NULL;
int num_sections;
int free_ikc_pointers = 1;
desc = kmalloc(sizeof(*desc), GFP_KERNEL);
if (!desc) {
@ -140,9 +141,9 @@ static long mcexec_prepare_image(ihk_os_t os,
goto put_and_free_out;
}
pdesc = kmalloc(sizeof(struct program_load_desc) +
sizeof(struct program_image_section)
* num_sections, GFP_KERNEL);
pdesc = kmalloc(sizeof(struct program_load_desc) +
sizeof(struct program_image_section) * num_sections,
GFP_KERNEL);
memcpy(pdesc, desc, sizeof(struct program_load_desc));
if (copy_from_user(pdesc->sections, udesc->sections,
@ -167,10 +168,10 @@ static long mcexec_prepare_image(ihk_os_t os,
ret = -EFAULT;
goto put_and_free_out;
}
envs = kmalloc(pdesc->envs_len, GFP_KERNEL);
if (copy_from_user(envs, pdesc->envs, pdesc->envs_len)) {
ret = -EFAULT;
ret = -EFAULT;
goto put_and_free_out;
}
@ -187,36 +188,24 @@ static long mcexec_prepare_image(ihk_os_t os,
dprintk("# of sections: %d\n", pdesc->num_sections);
dprintk("%p (%lx)\n", pdesc, isp.arg);
pdesc->status = 0;
mb();
ret = mcctrl_ikc_send(os, pdesc->cpu, &isp);
if(ret < 0) {
printk("%s: ERROR mcctrl_ikc_send: %d\n", __FUNCTION__, ret);
goto put_and_free_out;
}
ret = wait_event_interruptible(ppd->wq_prepare, pdesc->status);
ret = mcctrl_ikc_send_wait(os, pdesc->cpu, &isp, 0, &free_ikc_pointers,
3, pdesc, args, envs);
if (ret < 0) {
printk("%s: ERROR after wait: %d\n", __FUNCTION__, ret);
goto put_and_free_out;
}
if (pdesc->err < 0) {
ret = pdesc->err;
/* either send or remote prepare_process failed */
goto put_and_free_out;
}
/* Update rpgtable */
ppd->rpgtable = pdesc->rpgtable;
if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) +
if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) +
sizeof(struct program_image_section) * num_sections)) {
ret = -EFAULT;
ret = -EFAULT;
goto put_and_free_out;
}
dprintk("%s: pid %d, rpgtable: 0x%lx added\n",
dprintk("%s: pid %d, rpgtable: 0x%lx added\n",
__FUNCTION__, ppd->pid, ppd->rpgtable);
ret = 0;
@ -224,10 +213,12 @@ static long mcexec_prepare_image(ihk_os_t os,
put_and_free_out:
mcctrl_put_per_proc_data(ppd);
free_out:
kfree(args);
kfree(pdesc);
kfree(envs);
kfree(desc);
if (free_ikc_pointers) {
kfree(args);
kfree(pdesc);
kfree(envs);
kfree(desc);
}
return ret;
}
@ -1691,7 +1682,6 @@ int mcexec_create_per_process_data(ihk_os_t os)
INIT_LIST_HEAD(&ppd->wq_list);
INIT_LIST_HEAD(&ppd->wq_req_list);
INIT_LIST_HEAD(&ppd->wq_list_exact);
init_waitqueue_head(&ppd->wq_prepare);
init_waitqueue_head(&ppd->wq_procfs);
spin_lock_init(&ppd->wq_list_lock);
memset(&ppd->cpu_set, 0, sizeof(cpumask_t));
@ -2925,28 +2915,6 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
return -EINVAL;
}
void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err)
{
struct program_load_desc *desc = phys_to_virt(arg);
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_per_proc_data *ppd = NULL;
ppd = mcctrl_get_per_proc_data(usrdata, desc->pid);
if (!ppd) {
printk("%s: ERROR: no per process data for PID %d\n",
__FUNCTION__, desc->pid);
return;
}
desc->err = err;
desc->status = 1;
mb();
wake_up_all(&ppd->wq_prepare);
mcctrl_put_per_proc_data(ppd);
}
/* Per-CPU register manipulation functions */
struct mcctrl_os_cpu_response {
int done;

View File

@ -49,7 +49,6 @@
//struct mcctrl_channel *channels;
void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err);
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c);
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet);
void sig_done(unsigned long arg, int err);
@ -58,6 +57,119 @@ void mcctrl_os_read_write_cpu_response(ihk_os_t os,
struct ikc_scd_packet *pisp);
void mcctrl_eventfd(ihk_os_t os, struct ikc_scd_packet *pisp);
struct mcctrl_wakeup_desc {
int status;
int err;
wait_queue_head_t wq;
struct list_head chain;
int free_addrs_count;
void *free_addrs[];
};
/* Assumes usrdata->wakeup_descs_lock taken */
static void mcctrl_wakeup_desc_cleanup(ihk_os_t os,
struct mcctrl_wakeup_desc *desc)
{
int i;
list_del(&desc->chain);
for (i = 0; i < desc->free_addrs_count; i++) {
kfree(desc->free_addrs[i]);
}
}
static void mcctrl_wakeup_cb(ihk_os_t os, struct ikc_scd_packet *packet)
{
struct mcctrl_wakeup_desc *desc = packet->reply;
WRITE_ONCE(desc->err, packet->err);
/*
* Check if the other side is still waiting, and signal it we're done.
*
* Setting status needs to be done last because the other side could
* wake up opportunistically between this set and the wake_up call.
*
* If the other side is no longer waiting, free the memory that was
* left for us.
*/
if (cmpxchg(&desc->status, 0, 1)) {
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
unsigned long flags;
spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags);
mcctrl_wakeup_desc_cleanup(os, desc);
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags);
return;
}
wake_up_interruptible(&desc->wq);
}
int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
long int timeout, int *do_frees, int free_addrs_count, ...)
{
int ret, i;
va_list ap;
struct mcctrl_wakeup_desc *desc;
*do_frees = 1;
desc = kmalloc(sizeof(struct mcctrl_wakeup_desc) +
(free_addrs_count + 1) * sizeof(void *),
GFP_KERNEL);
if (!desc) {
pr_warn("%s: Could not allocate wakeup descriptor", __func__);
return -ENOMEM;
}
pisp->reply = desc;
va_start(ap, free_addrs_count);
for (i = 0; i < free_addrs_count; i++) {
desc->free_addrs[i] = va_arg(ap, void*);
}
desc->free_addrs[free_addrs_count] = desc;
va_end(ap);
desc->free_addrs_count = free_addrs_count + 1;
init_waitqueue_head(&desc->wq);
WRITE_ONCE(desc->err, 0);
WRITE_ONCE(desc->status, 0);
ret = mcctrl_ikc_send(os, cpu, pisp);
if (ret < 0) {
pr_warn("%s: mcctrl_ikc_send failed: %d\n", __func__, ret);
kfree(desc);
return ret;
}
if (timeout) {
ret = wait_event_interruptible_timeout(desc->wq,
desc->status, timeout);
} else {
ret = wait_event_interruptible(desc->wq, desc->status);
}
/*
* Check if wait aborted (signal..) or timed out, and notify
* the callback it will need to free things for us
*/
if (!cmpxchg(&desc->status, 0, 1)) {
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
unsigned long flags;
spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags);
list_add(&desc->chain, &usrdata->wakeup_descs_list);
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags);
*do_frees = 0;
return ret < 0 ? ret : -ETIME;
}
ret = READ_ONCE(desc->err);
kfree(desc);
return ret;
}
/* XXX: this runs in atomic context! */
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
void *__packet, void *__os)
@ -72,11 +184,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
break;
case SCD_MSG_PREPARE_PROCESS_ACKED:
mcexec_prepare_ack(__os, pisp->arg, 0);
break;
case SCD_MSG_PREPARE_PROCESS_NACKED:
mcexec_prepare_ack(__os, pisp->arg, pisp->err);
mcctrl_wakeup_cb(__os, pisp);
break;
case SCD_MSG_SYSCALL_ONESIDE:
@ -157,10 +265,15 @@ static int dummy_packet_handler(struct ihk_ikc_channel_desc *c,
int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp)
{
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_usrdata *usrdata;
if (cpu < 0 || os == NULL || usrdata == NULL ||
cpu >= usrdata->num_channels || !usrdata->channels[cpu].c) {
if (cpu < 0 || os == NULL) {
return -EINVAL;
}
usrdata = ihk_host_os_get_usrdata(os);
if (usrdata == NULL || cpu >= usrdata->num_channels ||
!usrdata->channels[cpu].c) {
return -EINVAL;
}
return ihk_ikc_send(usrdata->channels[cpu].c, pisp, 0);
@ -354,6 +467,8 @@ int prepare_ikc_channels(ihk_os_t os)
mutex_init(&usrdata->part_exec.lock);
INIT_LIST_HEAD(&usrdata->part_exec.pli_list);
usrdata->part_exec.nr_processes = -1;
INIT_LIST_HEAD(&usrdata->wakeup_descs_list);
spin_lock_init(&usrdata->wakeup_descs_lock);
return 0;
@ -375,7 +490,9 @@ void __destroy_ikc_channel(ihk_os_t os, struct mcctrl_channel *pmc)
void destroy_ikc_channels(ihk_os_t os)
{
int i;
unsigned long flags;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct mcctrl_wakeup_desc *mwd_entry, *mwd_next;
if (!usrdata) {
printk("%s: WARNING: no mcctrl_usrdata found\n", __FUNCTION__);
@ -395,6 +512,12 @@ void destroy_ikc_channels(ihk_os_t os)
ihk_ikc_destroy_channel(usrdata->ikc2linux[i]);
}
}
spin_lock_irqsave(&usrdata->wakeup_descs_lock, flags);
list_for_each_entry_safe(mwd_entry, mwd_next,
&usrdata->wakeup_descs_list, chain) {
mcctrl_wakeup_desc_cleanup(os, mwd_entry);
}
spin_unlock_irqrestore(&usrdata->wakeup_descs_lock, flags);
kfree(usrdata->channels);
kfree(usrdata->ikc2linux);

View File

@ -48,7 +48,6 @@
#define SCD_MSG_PREPARE_PROCESS 0x1
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
#define SCD_MSG_SCHEDULE_PROCESS 0x3
#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14
@ -128,6 +127,7 @@ enum mcctrl_os_cpu_operation {
struct ikc_scd_packet {
int msg;
int err;
void *reply;
union {
/* for traditional SCD_MSG_* */
struct {
@ -163,9 +163,10 @@ struct ikc_scd_packet {
int eventfd_type;
};
};
char padding[12];
char padding[8];
};
struct mcctrl_priv {
ihk_os_t os;
struct program_load_desc *desc;
@ -231,7 +232,6 @@ struct mcctrl_per_proc_data {
struct list_head wq_list_exact; /* These requests come from IKC IRQ handler targeting a particular thread */
ihk_spinlock_t wq_list_lock;
wait_queue_head_t wq_prepare;
wait_queue_head_t wq_procfs;
struct list_head per_thread_data_hash[MCCTRL_PER_THREAD_DATA_HASH_SIZE];
@ -342,6 +342,8 @@ struct mcctrl_usrdata {
wait_queue_head_t wq_procfs;
struct list_head per_proc_data_hash[MCCTRL_PER_PROC_DATA_HASH_SIZE];
rwlock_t per_proc_data_hash_lock[MCCTRL_PER_PROC_DATA_HASH_SIZE];
struct list_head wakeup_descs_list;
spinlock_t wakeup_descs_lock;
void **keys;
struct sysfsm_data sysfsm_data;
@ -367,6 +369,10 @@ int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp);
int mcctrl_ikc_send_msg(ihk_os_t os, int cpu, int msg, int ref, unsigned long arg);
int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu);
/* ikc query-and-wait helper */
int mcctrl_ikc_send_wait(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp,
long int timeout, int *do_frees, int free_addrs_count, ...);
ihk_os_t osnum_to_os(int n);
/* syscall.c */

View File

@ -597,14 +597,9 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
case SCD_MSG_PREPARE_PROCESS:
if((rc = process_msg_prepare_process(packet->arg)) == 0){
pckt.msg = SCD_MSG_PREPARE_PROCESS_ACKED;
pckt.err = 0;
}
else{
pckt.msg = SCD_MSG_PREPARE_PROCESS_NACKED;
pckt.err = rc;
}
pckt.err = process_msg_prepare_process(packet->arg);
pckt.msg = SCD_MSG_PREPARE_PROCESS_ACKED;
pckt.reply = packet->reply;
pckt.ref = packet->ref;
pckt.arg = packet->arg;
syscall_channel_send(resp_channel, &pckt);

View File

@ -30,7 +30,6 @@
#define SCD_MSG_PREPARE_PROCESS 0x1
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
#define SCD_MSG_SCHEDULE_PROCESS 0x3
#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14
@ -168,10 +167,8 @@ typedef unsigned long __cpu_set_unit;
struct program_load_desc {
int num_sections;
int status;
int cpu;
int pid;
int err;
int stack_prot;
int pgid;
int cred[8];
@ -241,6 +238,7 @@ enum mcctrl_os_cpu_operation {
struct ikc_scd_packet {
int msg;
int err;
void *reply;
union {
/* for traditional SCD_MSG_* */
struct {
@ -276,7 +274,7 @@ struct ikc_scd_packet {
int eventfd_type;
};
};
char padding[12];
char padding[8];
};
#define IHK_SCD_REQ_THREAD_SPINNING 0