From 0307f6a6cc4ad35cf5dcbe9ff8e8ea41a3dd272c Mon Sep 17 00:00:00 2001 From: Balazs Gerofi Date: Thu, 19 Feb 2015 11:13:13 -0800 Subject: [PATCH] impementation of sched_{setparam, getparam, setscheduler, getscheduler, get_priority_min, get_priority_max, rr_get_interval} system calls --- arch/x86/kernel/include/syscall_list.h | 7 + executer/kernel/syscall.c | 67 ++++++ kernel/include/prio.h | 60 ++++++ kernel/include/process.h | 25 +++ kernel/process.c | 5 + kernel/syscall.c | 270 ++++++++++++++++++++++++- 6 files changed, 433 insertions(+), 1 deletion(-) create mode 100644 kernel/include/prio.h diff --git a/arch/x86/kernel/include/syscall_list.h b/arch/x86/kernel/include/syscall_list.h index 23dd6aa8..4e0a7337 100644 --- a/arch/x86/kernel/include/syscall_list.h +++ b/arch/x86/kernel/include/syscall_list.h @@ -71,6 +71,13 @@ SYSCALL_HANDLED(128, rt_sigtimedwait) SYSCALL_HANDLED(129, rt_sigqueueinfo) SYSCALL_HANDLED(130, rt_sigsuspend) SYSCALL_HANDLED(131, sigaltstack) +SYSCALL_HANDLED(142, sched_setparam) +SYSCALL_HANDLED(143, sched_getparam) +SYSCALL_HANDLED(144, sched_setscheduler) +SYSCALL_HANDLED(145, sched_getscheduler) +SYSCALL_HANDLED(146, sched_get_priority_max) +SYSCALL_HANDLED(147, sched_get_priority_min) +SYSCALL_HANDLED(148, sched_rr_get_interval) SYSCALL_HANDLED(149, mlock) SYSCALL_HANDLED(150, munlock) SYSCALL_HANDLED(158, arch_prctl) diff --git a/executer/kernel/syscall.c b/executer/kernel/syscall.c index af05cf98..57070c1d 100644 --- a/executer/kernel/syscall.c +++ b/executer/kernel/syscall.c @@ -1505,6 +1505,8 @@ fail: return error; } +#define SCHED_CHECK_SAME_OWNER 0x01 +#define SCHED_CHECK_ROOT 0x02 int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc) { @@ -1592,6 +1594,71 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall error = writecore(os, sc->args[1], sc->args[0]); ret = 0; break; + + case __NR_sched_setparam: { + + switch (sc->args[0]) { + + case SCHED_CHECK_SAME_OWNER: { + const struct cred *cred = current_cred(); + const struct cred *pcred; + bool match; + struct task_struct *p; + int pid = sc->args[1]; + + rcu_read_lock(); + p = pid_task(find_get_pid(pid), PIDTYPE_PID); + if (!p) { + rcu_read_unlock(); + ret = -ESRCH; + goto sched_setparam_out; + } + rcu_read_unlock(); + + rcu_read_lock(); + pcred = __task_cred(p); +#if LINUX_VERSION_CODE > KERNEL_VERSION(3,4,0) + match = (uid_eq(cred->euid, pcred->euid) || + uid_eq(cred->euid, pcred->uid)); +#else + match = ((cred->euid == pcred->euid) || + (cred->euid == pcred->uid)); +#endif + rcu_read_unlock(); + + if (match) { + ret = 0; + } + else { + ret = -EPERM; + } + + break; + } + + case SCHED_CHECK_ROOT: { + const struct cred *cred = current_cred(); + bool match; + +#if LINUX_VERSION_CODE > KERNEL_VERSION(3,4,0) + match = uid_eq(cred->euid, GLOBAL_ROOT_UID); +#else + match = (cred->euid == 0); +#endif + if (match) { + ret = 0; + } + else { + ret = -EPERM; + } + + break; + } + } + +sched_setparam_out: + break; + } default: error = -ENOSYS; diff --git a/kernel/include/prio.h b/kernel/include/prio.h new file mode 100644 index 00000000..d9cf5a57 --- /dev/null +++ b/kernel/include/prio.h @@ -0,0 +1,60 @@ +#ifndef _SCHED_PRIO_H +#define _SCHED_PRIO_H + +#define MAX_NICE 19 +#define MIN_NICE -20 +#define NICE_WIDTH (MAX_NICE - MIN_NICE + 1) + +/* + * Priority of a process goes from 0..MAX_PRIO-1, valid RT + * priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH + * tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority + * values are inverted: lower p->prio value means higher priority. + * + * The MAX_USER_RT_PRIO value allows the actual maximum + * RT priority to be separate from the value exported to + * user-space. This allows kernel threads to set their + * priority to a value higher than any user task. Note: + * MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO. + */ + +#define MAX_USER_RT_PRIO 100 +#define MAX_RT_PRIO MAX_USER_RT_PRIO + +#define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH) +#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2) + +/* + * Convert user-nice values [ -20 ... 0 ... 19 ] + * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ], + * and back. + */ +#define NICE_TO_PRIO(nice) ((nice) + DEFAULT_PRIO) +#define PRIO_TO_NICE(prio) ((prio) - DEFAULT_PRIO) + +/* + * 'User priority' is the nice value converted to something we + * can work with better when scaling various scheduler parameters, + * it's a [ 0 ... 39 ] range. + */ +#define USER_PRIO(p) ((p)-MAX_RT_PRIO) +#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio) +#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO)) + +/* + * Convert nice value [19,-20] to rlimit style value [1,40]. + */ +static inline long nice_to_rlimit(long nice) +{ + return (MAX_NICE - nice + 1); +} + +/* + * Convert rlimit style value [1,40] to nice value [-20, 19]. + */ +static inline long rlimit_to_nice(long prio) +{ + return (MAX_NICE - prio + 1); +} + +#endif /* _SCHED_PRIO_H */ diff --git a/kernel/include/process.h b/kernel/include/process.h index a8dc0219..c3890260 100644 --- a/kernel/include/process.h +++ b/kernel/include/process.h @@ -316,6 +316,29 @@ struct fork_tree_node { void hold_fork_tree_node(struct fork_tree_node *ftn); void release_fork_tree_node(struct fork_tree_node *ftn); +/* + * Scheduling policies + */ +#define SCHED_NORMAL 0 +#define SCHED_FIFO 1 +#define SCHED_RR 2 +#define SCHED_BATCH 3 +/* SCHED_ISO: reserved but not implemented yet */ +#define SCHED_IDLE 5 +#define SCHED_DEADLINE 6 + +/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */ +#define SCHED_RESET_ON_FORK 0x40000000 + +/* + * For the sched_{set,get}attr() calls + */ +#define SCHED_FLAG_RESET_ON_FORK 0x01 + +struct sched_param { + int sched_priority; +}; + struct process { int cpu_id; @@ -327,6 +350,8 @@ struct process { // Runqueue list entry struct list_head sched_list; + int sched_policy; + struct sched_param sched_param; ihk_spinlock_t spin_sleep_lock; int spin_sleep; diff --git a/kernel/process.c b/kernel/process.c index ad2aad8f..cfb7ff74 100644 --- a/kernel/process.c +++ b/kernel/process.c @@ -154,6 +154,8 @@ struct process *create_process(unsigned long user_pc) } } + proc->sched_policy = SCHED_NORMAL; + proc->sighandler = kmalloc(sizeof(struct sig_handler), IHK_MC_AP_NOWAIT); if(!proc->sighandler){ goto err_free_process; @@ -252,6 +254,9 @@ struct process *clone_process(struct process *org, unsigned long pc, init_fork_tree_node(proc->ftn, org->ftn, proc); + proc->sched_policy = org->sched_policy; + proc->sched_param.sched_priority = org->sched_param.sched_priority; + /* clone signal handlers */ if (clone_flags & CLONE_SIGHAND) { proc->sigstack.ss_sp = NULL; diff --git a/kernel/syscall.c b/kernel/syscall.c index f97c8d8a..ed0b31b1 100644 --- a/kernel/syscall.c +++ b/kernel/syscall.c @@ -46,6 +46,7 @@ #include #include #include +#include /* Headers taken from kitten LWK */ #include @@ -1364,8 +1365,9 @@ SYSCALL_DECLARE(getppid) if (proc->ftn->ppid_parent) pid = proc->ftn->ppid_parent->pid; } else { - if (proc->ftn->parent) + if (proc->ftn->parent) { pid = proc->ftn->parent->pid; + } } ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock); return pid; @@ -3202,6 +3204,272 @@ SYSCALL_DECLARE(ptrace) return error; } +/* We do not have actual scheduling classes so we just make sure we store + * policies and priorities in a POSIX/Linux complaint manner */ +static int setscheduler(struct process *proc, int policy, struct sched_param *param) +{ + if ((policy == SCHED_FIFO || policy == SCHED_RR) && + ((param->sched_priority < 1) || + (param->sched_priority > MAX_USER_RT_PRIO - 1))) { + return -EINVAL; + } + + if ((policy == SCHED_NORMAL || policy == SCHED_BATCH || policy == SCHED_IDLE) && + (param->sched_priority != 0)) { + return -EINVAL; + } + + memcpy(&proc->sched_param, param, sizeof(*param)); + proc->sched_policy = policy; + + return 0; +} + +#define SCHED_CHECK_SAME_OWNER 0x01 +#define SCHED_CHECK_ROOT 0x02 + +SYSCALL_DECLARE(sched_setparam) +{ + int retval = 0; + int pid = (int)ihk_mc_syscall_arg0(ctx); + struct sched_param *uparam = (struct sched_param *)ihk_mc_syscall_arg1(ctx); + struct sched_param param; + struct process *proc = cpu_local_var(current); + unsigned long irqstate = 0; + ihk_spinlock_t *lock; + + ihk_mc_user_context_t ctx1; + struct syscall_request request1 IHK_DMA_ALIGN; + + dkprintf("sched_setparam: pid: %d, uparam: 0x%lx\n", pid, uparam); + + if (!uparam || pid < 0) { + return -EINVAL; + } + + if (pid == 0) + pid = proc->ftn->pid; + + if (proc->ftn->pid != pid) { + proc = findthread_and_lock(pid, pid, &lock, &irqstate); + if (!proc) { + return -ESRCH; + } + process_unlock(lock, irqstate); + + /* Ask Linux about ownership.. */ + request1.number = __NR_sched_setparam; + request1.args[0] = SCHED_CHECK_SAME_OWNER; + request1.args[1] = pid; + + retval = do_syscall(&request1, &ctx1, ihk_mc_get_processor_id(), 0); + if (retval != 0) { + return retval; + } + } + + retval = copy_from_user(proc, ¶m, uparam, sizeof(param)); + if (retval < 0) { + return -EFAULT; + } + + return setscheduler(proc, proc->sched_policy, ¶m); +} + +SYSCALL_DECLARE(sched_getparam) +{ + int retval = 0; + int pid = (int)ihk_mc_syscall_arg0(ctx); + struct sched_param *param = (struct sched_param *)ihk_mc_syscall_arg1(ctx); + struct process *proc = cpu_local_var(current); + unsigned long irqstate = 0; + ihk_spinlock_t *lock; + + if (!param || pid < 0) { + return -EINVAL; + } + + if (pid == 0) + pid = proc->ftn->pid; + + if (proc->ftn->pid != pid) { + proc = findthread_and_lock(pid, pid, &lock, &irqstate); + if (!proc) { + return -ESRCH; + } + process_unlock(lock, irqstate); + } + + retval = copy_to_user(proc, param, &proc->sched_param, sizeof(*param)) ? -EFAULT : 0; + + return retval; +} + +SYSCALL_DECLARE(sched_setscheduler) +{ + int retval; + int pid = (int)ihk_mc_syscall_arg0(ctx); + int policy = ihk_mc_syscall_arg1(ctx); + struct sched_param *uparam = (struct sched_param *)ihk_mc_syscall_arg2(ctx); + struct sched_param param; + struct process *proc = cpu_local_var(current); + unsigned long irqstate = 0; + ihk_spinlock_t *lock; + + ihk_mc_user_context_t ctx1; + struct syscall_request request1 IHK_DMA_ALIGN; + + if (!uparam || pid < 0) { + return -EINVAL; + } + + if (policy != SCHED_DEADLINE && + policy != SCHED_FIFO && policy != SCHED_RR && + policy != SCHED_NORMAL && policy != SCHED_BATCH && + policy != SCHED_IDLE) { + return -EINVAL; + } + + if (policy != SCHED_NORMAL) { + + /* Ask Linux about permissions */ + request1.number = __NR_sched_setparam; + request1.args[0] = SCHED_CHECK_ROOT; + + retval = do_syscall(&request1, &ctx1, ihk_mc_get_processor_id(), 0); + if (retval != 0) { + return retval; + } + } + + retval = copy_from_user(proc, ¶m, uparam, sizeof(param)); + if (retval < 0) { + return -EFAULT; + } + + if (pid == 0) + pid = proc->ftn->pid; + + if (proc->ftn->pid != pid) { + proc = findthread_and_lock(pid, pid, &lock, &irqstate); + if (!proc) { + return -ESRCH; + } + process_unlock(lock, irqstate); + + /* Ask Linux about ownership.. */ + request1.number = __NR_sched_setparam; + request1.args[0] = SCHED_CHECK_SAME_OWNER; + request1.args[1] = pid; + + retval = do_syscall(&request1, &ctx1, ihk_mc_get_processor_id(), 0); + if (retval != 0) { + return retval; + } + } + + return setscheduler(proc, policy, ¶m); +} + +SYSCALL_DECLARE(sched_getscheduler) +{ + int pid = (int)ihk_mc_syscall_arg0(ctx); + struct process *proc = cpu_local_var(current); + unsigned long irqstate = 0; + ihk_spinlock_t *lock; + + if (pid < 0) { + return -EINVAL; + } + + if (pid == 0) + pid = proc->ftn->pid; + + if (proc->ftn->pid != pid) { + proc = findthread_and_lock(pid, pid, &lock, &irqstate); + if (!proc) { + return -ESRCH; + } + process_unlock(lock, irqstate); + } + + return proc->sched_policy; +} + +SYSCALL_DECLARE(sched_get_priority_max) +{ + int ret = -EINVAL; + int policy = ihk_mc_syscall_arg0(ctx); + + switch (policy) { + case SCHED_FIFO: + case SCHED_RR: + ret = MAX_USER_RT_PRIO - 1; + break; + case SCHED_DEADLINE: + case SCHED_NORMAL: + case SCHED_BATCH: + case SCHED_IDLE: + ret = 0; + break; + } + return ret; +} + +SYSCALL_DECLARE(sched_get_priority_min) +{ + int ret = -EINVAL; + int policy = ihk_mc_syscall_arg0(ctx); + + switch (policy) { + case SCHED_FIFO: + case SCHED_RR: + ret = 1; + break; + case SCHED_DEADLINE: + case SCHED_NORMAL: + case SCHED_BATCH: + case SCHED_IDLE: + ret = 0; + } + return ret; +} + +SYSCALL_DECLARE(sched_rr_get_interval) +{ + int pid = ihk_mc_syscall_arg0(ctx); + struct timespec *utime = (struct timespec *)ihk_mc_syscall_arg1(ctx); + struct timespec t; + struct process *proc = cpu_local_var(current); + unsigned long irqstate = 0; + ihk_spinlock_t *lock; + int retval = 0; + + if (pid < 0) + return -EINVAL; + + if (pid == 0) + pid = proc->ftn->pid; + + if (proc->ftn->pid != pid) { + proc = findthread_and_lock(pid, pid, &lock, &irqstate); + if (!proc) { + return -ESRCH; + } + process_unlock(lock, irqstate); + } + + t.tv_sec = 0; + t.tv_nsec = 0; + if (proc->sched_policy == SCHED_RR) { + t.tv_nsec = 10000; + } + + retval = copy_to_user(proc, utime, &t, sizeof(t)) ? -EFAULT : 0; + + return retval; +} + #define MIN2(x,y) (x) < (y) ? (x) : (y) SYSCALL_DECLARE(sched_setaffinity) {