Compare commits

..

87 Commits
0.9.0 ... 1.0.0

Author SHA1 Message Date
eb6de9d1de delete debug code 2015-11-13 15:10:14 +09:00
42c8ef6539 do_fork(): fix CLONE_PARENT_SETTID bug 2015-11-13 12:46:09 +09:00
780d4fc29b futex_wait(): support for FUTEX_CLOCK_REALTIME 2015-11-13 12:46:02 +09:00
94fcc5bb9a futex_wait: add to check signal 2015-11-12 09:38:36 +09:00
e822fc47dd fix dead locking when kill subthreads 2015-11-11 23:03:43 +09:00
26492a2895 vsyscall_gettimeofday: make timeval from TSC 2015-11-11 19:45:14 +09:00
1a5ff7f535 gettimeofday: gather variables into new struct 2015-11-11 18:31:33 +09:00
4c181d7fc0 smp-x86: add supports for dump analyzer 2015-11-09 16:06:55 +09:00
be78eb752e time_init: fix zero divide on KVM 2015-11-06 19:31:42 +09:00
0ad7c8ac50 nanosleep: fix arguments to be delegated 2015-11-06 19:31:42 +09:00
e9458a6cd3 fix ptrace02 failed 2015-10-30 16:59:03 +09:00
9e3b0b5866 bug fix 'GDB: missing parent-child relationship'
refs #641
2015-10-30 15:06:27 +09:00
0eaa27291a thread: move clear_child_tid, etc. to main structure 2015-10-29 11:01:27 +09:00
0b07dd1b79 support madvise(MADV_REMOVE) partially
This MADV_REMOVE works with a mapping which is
- created with shmat() and
- not sharing memobj with other mappings.
2015-10-28 18:41:28 +09:00
c25f8c7a39 support settimeofday() 2015-10-27 19:21:50 +09:00
9e53ae20d4 add memory barriers
- rmb()
- wmb()
2015-10-27 19:21:50 +09:00
09c9ee58d1 add 64bit atomic operations
- ihk_atomic64_t
- IHK_ATOMIC64_INIT()
- ihk_atomic64_read()
- ihk_atomic64_inc()
2015-10-27 19:21:50 +09:00
153a59a6f4 gettimeofday: avoid per-cpu data in calculation
Because it is difficult to safely update per-cpu data of other cpus in
settimeofday().
2015-10-27 19:21:50 +09:00
cad72a8562 when SIGXCPU or SIGXFSZ, set coredump bit to exit status 2015-10-22 20:57:37 +09:00
343bfbd30a rename back status field 2015-10-22 20:26:50 +09:00
4e4f1208f7 delete unused member 2015-10-19 20:12:26 +09:00
a325a78866 refactoring to send signal 2015-10-15 17:10:02 +09:00
6ae99454da delete debug print 2015-10-15 06:51:41 +09:00
04e193de13 refactoring process structures 2015-10-13 23:04:08 +09:00
2ca46fabfd support reader/writer lock 2015-10-02 14:05:10 +09:00
5b737b499d fix cmpxchgq operand 2015-10-02 14:04:05 +09:00
cb4f3a4d65 take into account args/envs' offset in page
- prepare_process_ranges_args_envs()
2015-10-01 21:08:42 +09:00
51789fcd38 initialize idle_vm for page faluts 2015-10-01 21:08:35 +09:00
9f50c5dc3a mcexec_wait_syscall: handle request even if signaled (reworked) 2015-09-29 19:53:40 +09:00
cd905f7ad1 Revert "mcexec_wait_syscall: handle request even if signaled"
This reverts commit d862f345be.
2015-09-29 19:52:36 +09:00
79266f6b97 x86_issue_ipi: keep interrupt disabled while issuing IPI 2015-09-29 19:10:01 +09:00
a666b69c2c make x86_issue_ipi() call wait_icr_idle() 2015-09-29 19:10:01 +09:00
47e8552eba move wait_icr_idle() before x86_issue_ipi() 2015-09-29 19:10:00 +09:00
8dd9175411 schedule: fix null pointer dereference 2015-09-29 19:10:00 +09:00
f08e0c0054 guess whether MSR_PLATFORM_INFO exists or not 2015-09-29 19:10:00 +09:00
d862f345be mcexec_wait_syscall: handle request even if signaled 2015-09-24 21:35:30 +09:00
a14768c49a kmalloc: fix missing unlock on out-of-memory path 2015-09-18 21:26:15 +09:00
56e57775e7 clone: fix error message 2015-09-18 21:26:15 +09:00
b3b752ba41 nanosleep: use copy_from_user instead of direct access 2015-09-17 21:46:32 +09:00
7b32f2f73b nanosleep: fix tscs_rem underflow issue 2015-09-17 21:46:26 +09:00
ea5a1a8693 nanosleep: update *rem whenever signaled 2015-09-17 21:44:49 +09:00
92f8fb2b2b nanosleep: use copy_to_user instead of direct access 2015-09-17 21:44:49 +09:00
a3e440414d nanosleep: cosmetic change 2015-09-17 21:44:49 +09:00
10ba03ccea mcreboot-smp-x86.sh: fix querying free irq 2015-09-17 13:19:07 +09:00
ccb7c30a05 page_fault_handler(): reenable preempt after failed PF when process is exiting 2015-09-17 10:05:32 +09:00
7dfeb8e7ce create demand-paging mapping in case of MAP_SHARED
On current McKernel, only mappings for demand paging can be shared.
Therefore, if MAP_SHARED and MAP_ANONYMOUS are specified and
anon_on_demand is disabled, then mmap(2) should create a mapping which
is for demand paging and is entirely populated with physical pages.
2015-09-16 21:38:00 +09:00
b1b706453f vsyscall: send SIGSEGV to the caller if syscall fails
On CentOS 7 (RHEL 7?), "errno" isn't set when vsyscall_gettimeofday
fails. So, in such case, vsyscall_gettimeofday send SIGSEGV to the
caller to report failure of gettimeofday operation.
2015-09-16 21:37:11 +09:00
bd5708286d make sys_gettimeofday() use copy_to_user() 2015-09-16 21:26:32 +09:00
c8a13cf213 make gettimeofday ignore NULL parameter 2015-09-16 21:26:24 +09:00
5ad0a03d18 make gettimeofday handle second parameter (timezone) 2015-09-16 21:25:29 +09:00
3819eec03f cosmetic changes
- sys_gettimeofday()
2015-09-16 21:13:12 +09:00
40b8587a8a schedule(): sync CPU_FLAG_NEED_RESCHED flag with clone and migrate 2015-09-16 19:22:40 +09:00
e7b1115572 mcreboot-smp-x86.sh: introduction of ihk_ikc_irq_core argument 2015-09-14 17:30:25 +09:00
e1a01803d0 disable demand paging on ANONYMOUS mappings unless anon_on_demand kernel argument is passed 2015-09-14 17:26:37 +09:00
69f4b0e1ad gettimeofday()/nanosleep(): check arguments, return on pending signal 2015-09-14 17:05:30 +09:00
0909a5bed5 tracee context is broken when tracee call execve 2015-09-03 10:05:25 +09:00
9dd224385e When SIGSEGV occurred on a tracee process, a tracee process freezes. 2015-09-01 17:37:56 +09:00
4176c59fd3 using d_path for solution to file path. 2015-08-28 13:01:34 +09:00
afeee5432f When envp is NULL, execve is delayed. 2015-08-28 13:00:45 +09:00
9ae5bcf46e gettimeofday(): an implementation based on CPU invariant TSC support 2015-08-24 23:53:56 +02:00
b8f166e608 mcreboot-smp-x86.sh: handle resource allocation after unloading; mcstop+release-smp-x86.sh 2015-08-22 18:55:53 +09:00
c85a9b99e1 a couple of cosmetic changes of debug messages 2015-08-22 18:53:14 +09:00
7c816a6b73 an implementation of the Mellor-Crummey Scott (MCS) lock 2015-08-20 15:26:52 +09:00
5a0cd3f53f ptrace_detach when exiting
refs #590
2015-08-18 18:03:09 +09:00
9fa62adfe7 execve(): stay compliant with locked context switching 2015-08-10 14:18:11 +09:00
f0ab8ec89a sched_request_migrate(): change CPU flags atomically 2015-08-10 12:45:59 +09:00
f4cc82578d check_need_resched(): no thread migration in IRQ context 2015-08-10 12:43:35 +09:00
9ba40dc0ff schedule(): hold runq lock for the entire duration of context switching
releasing the runq lock after loading page tables but before the actual
context switch can leave execution in an inconsistent if the current
process is descheduled from an IRQ between these two steps.
this patch holds the runq lock with IRQs disabled and makes the context
switch a single atomic operation.
2015-08-10 12:37:12 +09:00
8d6c97ea5c schedule(): disable auto thread migration 2015-08-07 16:07:31 +09:00
386f59000a mcreboot-smp-x86.sh.in: grant real user rw permission on /dev/mcos* 2015-08-07 13:33:44 +09:00
215cd370a1 ap_init(): clean up AP boot kernel messages 2015-08-07 10:57:59 +09:00
0a0e2c04a0 support for dynamically toggling time sharing when CPU is oversubscribed 2015-08-07 08:51:50 +09:00
aa191b87d3 schedule(): use XSAVE/XRSTOR and swap floating point registers in context switch 2015-08-07 08:41:00 +09:00
d5c243571f cpu_clear_and_set(): atomic CPU mask update in migration code 2015-08-06 10:49:55 +09:00
328e69a335 schedule(): do not preempt while holding spinlocks or while in offloaded syscall 2015-08-06 10:36:13 +09:00
b77755d0f7 obtain_clone_cpuid(): always start from CPU 0 and fill in cores linearily 2015-07-28 20:20:47 +09:00
d7bae14707 TEMPORARY: schedule(): move threads when core is explicitly oversubscribed 2015-07-28 20:12:58 +09:00
4e58d08f5c schedule_timeout(): give a chance to other process in spin sleep if CPU core is oversubscribed 2015-07-28 20:06:56 +09:00
9b1e691588 fix thread migration code (i.e., sched_setaffinity())
- moved migration code into idle() process and updated schedule() to detect
  when a thread has moved to another CPU in order to avoid doing housekeeping
  on behalf of the original one
- start CPU head from core 0
- keeps track of nested interrupts
2015-07-24 20:09:17 +09:00
3988b0fc61 keep track of IRQ context and don't do thread migration there 2015-07-23 16:56:58 +09:00
54eb345847 settid(): prevent modifying tid after thread migration 2015-07-23 16:51:24 +09:00
bbe7aef95b fix calling do_signal (argument lacked) 2015-07-17 10:18:43 +09:00
1ff4cf68c2 support SA_RESTART flag and restart syscall 2015-07-16 16:33:14 +09:00
1bc84d3feb modify to copy credentials 2015-07-13 15:29:26 +09:00
f7d78c8b7d sched_getaffinity(): return EINVAL for 0 lenght request (fixes LTP sched_getaffinity01) 2015-07-10 11:00:43 +09:00
7647c99cc2 do_migrate(): disable IRQ while holding migq_lock to avoid deadlocking with reschedule interrupts 2015-07-09 15:23:28 +09:00
43a774fbfc sched_setaffinity(): undo target core change, avoid abort on length mismatch 2015-07-09 11:00:26 +09:00
47 changed files with 4028 additions and 2411 deletions

View File

@ -44,7 +44,8 @@ install::
;; \
smp-x86) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot; \
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
;; \

View File

@ -39,6 +39,7 @@
#define LAPIC_ICR0 0x300
#define LAPIC_ICR2 0x310
#define LAPIC_ESR 0x280
#define LOCAL_TIMER_VECTOR 0xef
#define APIC_INT_LEVELTRIG 0x08000
#define APIC_INT_ASSERT 0x04000
@ -48,6 +49,8 @@
#define APIC_DM_NMI 0x00400
#define APIC_DM_INIT 0x00500
#define APIC_DM_STARTUP 0x00600
#define APIC_DIVISOR 16
#define APIC_LVT_TIMER_PERIODIC (1 << 17)
//#define DEBUG_PRINT_CPU
@ -64,6 +67,7 @@ void assign_processor_id(void);
void arch_delay(int);
void x86_set_warm_reset(unsigned long ip, char *first_page_va);
void x86_init_perfctr(void);
int gettime_local_support = 0;
extern int kprintf(const char *format, ...);
@ -143,6 +147,8 @@ static void init_idt(void)
reload_idt();
}
static int xsave_available = 0;
void init_fpu(void)
{
unsigned long reg;
@ -170,20 +176,24 @@ void init_fpu(void)
reg |= ((1 << 9) | (1 << 10));
if(cpuid01_ecx & (1 << 26)) {
/* XSAVE set, enable access to xcr0 */
dkprintf("init_fpu(): XSAVE available\n");
xsave_available = 1;
reg |= (1 << 18);
}
asm volatile("movq %0, %%cr4" : : "r"(reg));
kprintf("init_fpu(): SSE init: CR4 = 0x%016lX; ", reg);
dkprintf("init_fpu(): SSE init: CR4 = 0x%016lX\n", reg);
/* Set xcr0[2:1] to enable avx ops */
if(cpuid01_ecx & (1 << 28)) {
reg = xgetbv(0);
reg |= 0x6;
xsetbv(0, reg);
dkprintf("init_fpu(): AVX init: XCR0 = 0x%016lX\n", reg);
}
kprintf("XCR0 = 0x%016lX\n", reg);
/* TODO: set MSR_IA32_XSS to enable xsaves/xrstors */
#else
kprintf("init_fpu(): SSE not enabled\n");
#endif
@ -246,6 +256,23 @@ void lapic_icr_write(unsigned int h, unsigned int l)
}
void lapic_timer_enable(unsigned int clocks)
{
unsigned int lvtt_value;
lapic_write(LAPIC_TIMER_INITIAL, clocks / APIC_DIVISOR);
lapic_write(LAPIC_TIMER_DIVIDE, 3);
/* initialize periodic timer */
lvtt_value = LOCAL_TIMER_VECTOR | APIC_LVT_TIMER_PERIODIC;
lapic_write(LAPIC_TIMER, lvtt_value);
}
void lapic_timer_disable()
{
lapic_write(LAPIC_TIMER_INITIAL, 0);
}
void print_msr(int idx)
{
int bit;
@ -276,6 +303,10 @@ void init_pstate_and_turbo(void)
uint64_t eax, ecx;
asm volatile("cpuid" : "=a" (eax), "=c" (ecx) : "a" (0x6) : "%rbx", "%rdx");
if (!(ecx & 0x01)) {
/* P-states and/or Turbo Boost are not supported. */
return;
}
/* Query and set max pstate value:
*
@ -389,7 +420,7 @@ void init_pat(void)
boot_pat_state = rdmsr(MSR_IA32_CR_PAT);
wrmsr(MSR_IA32_CR_PAT, pat);
kprintf("PAT support detected and reconfigured.\n");
dkprintf("PAT support detected and reconfigured.\n");
}
void init_lapic(void)
@ -543,6 +574,29 @@ static void check_no_execute(void)
return;
}
void init_gettime_support(void)
{
uint64_t op;
uint64_t eax;
uint64_t ebx;
uint64_t ecx;
uint64_t edx;
/* Check if Invariant TSC supported.
* Processors support for invariant TSC is indicated by
* CPUID.80000007H:EDX[8].
* See page 2498 of the Intel64 and IA-32 Architectures Software
* Developers Manual - combined */
op = 0x80000007;
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx) : "a" (op));
if (edx & (1 << 8)) {
gettime_local_support = 1;
kprintf("Invariant TSC supported.\n");
}
}
void init_cpu(void)
{
enable_page_protection_fault();
@ -569,6 +623,8 @@ void setup_x86(void)
init_cpu();
init_gettime_support();
kprintf("setup_x86 done.\n");
}
@ -604,14 +660,16 @@ void setup_x86_ap(void (*next_func)(void))
void arch_show_interrupt_context(const void *reg);
void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long rc, void *regs);
void check_signal(unsigned long, void *, int);
extern void tlb_flush_handler(int vector);
void handle_interrupt(int vector, struct x86_user_context *regs)
{
struct ihk_mc_interrupt_handler *h;
struct cpu_local_var *v = get_this_cpu_local_var();
lapic_ack();
++v->in_interrupt;
dkprintf("CPU[%d] got interrupt, vector: %d, RIP: 0x%lX\n",
ihk_mc_get_processor_id(), vector, regs->gpr.rip);
@ -665,6 +723,15 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
panic("Unhandled exception");
}
}
else if (vector == LOCAL_TIMER_VECTOR) {
unsigned long irqstate;
/* Timer interrupt, enabled only on oversubscribed CPU cores,
* request reschedule */
irqstate = ihk_mc_spinlock_lock(&v->runq_lock);
v->flags |= CPU_FLAG_NEED_RESCHED;
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
dkprintf("timer[%lu]: CPU_FLAG_NEED_RESCHED \n", rdtsc());
}
else if (vector >= IHK_TLB_FLUSH_IRQ_VECTOR_START &&
vector < IHK_TLB_FLUSH_IRQ_VECTOR_END) {
@ -678,8 +745,10 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
}
}
check_signal(0, regs);
check_signal(0, regs, 0);
check_need_resched();
--v->in_interrupt;
}
void gpe_handler(struct x86_user_context *regs)
@ -691,7 +760,7 @@ void gpe_handler(struct x86_user_context *regs)
panic("gpe_handler");
}
set_signal(SIGSEGV, regs, NULL);
check_signal(0, regs);
check_signal(0, regs, 0);
check_need_resched();
// panic("GPF");
}
@ -719,7 +788,7 @@ void debug_handler(struct x86_user_context *regs)
memset(&info, '\0', sizeof info);
info.si_code = si_code;
set_signal(SIGTRAP, regs, &info);
check_signal(0, regs);
check_signal(0, regs, 0);
check_need_resched();
}
@ -736,13 +805,25 @@ void int3_handler(struct x86_user_context *regs)
memset(&info, '\0', sizeof info);
info.si_code = TRAP_BRKPT;
set_signal(SIGTRAP, regs, &info);
check_signal(0, regs);
check_signal(0, regs, 0);
check_need_resched();
}
static void wait_icr_idle(void)
{
while (lapic_read(LAPIC_ICR0) & APIC_ICR_BUSY) {
cpu_pause();
}
}
void x86_issue_ipi(unsigned int apicid, unsigned int low)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
wait_icr_idle();
lapic_icr_write(apicid << LAPIC_ICR_ID_SHIFT, low);
cpu_restore_interrupt(flags);
}
static void outb(uint8_t v, uint16_t port)
@ -755,13 +836,6 @@ static void set_warm_reset_vector(unsigned long ip)
x86_set_warm_reset(ip, first_page_va);
}
static void wait_icr_idle(void)
{
while (lapic_read(LAPIC_ICR0) & APIC_ICR_BUSY) {
cpu_pause();
}
}
static void __x86_wakeup(int apicid, unsigned long ip)
{
int retry = 3;
@ -775,7 +849,6 @@ static void __x86_wakeup(int apicid, unsigned long ip)
/* INIT */
x86_issue_ipi(apicid,
APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
wait_icr_idle();
x86_issue_ipi(apicid,
APIC_INT_LEVELTRIG | APIC_DM_INIT);
@ -976,6 +1049,18 @@ void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
}
extern char enter_user_mode[];
/*
* Release runq_lock before entering user space.
* This is needed because schedule() holds the runq lock throughout
* the context switch and when a new process is created it starts
* execution in enter_user_mode, which in turn calls this function.
*/
void release_runq_lock(void)
{
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
cpu_local_var(runq_irqstate));
}
/*@
@ requires \valid(ctx);
@ -1168,7 +1253,6 @@ int ihk_mc_interrupt_cpu(int cpu, int vector)
{
dkprintf("[%d] ihk_mc_interrupt_cpu: %d\n", ihk_mc_get_processor_id(), cpu);
wait_icr_idle();
x86_issue_ipi(cpu, vector);
return 0;
}
@ -1178,54 +1262,85 @@ int ihk_mc_interrupt_cpu(int cpu, int vector)
@ ensures proc->fp_regs == NULL;
@*/
void
release_fp_regs(struct process *proc)
release_fp_regs(struct thread *thread)
{
int pages;
if (!proc->fp_regs)
if (thread && !thread->fp_regs)
return;
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
ihk_mc_free_pages(proc->fp_regs, 1);
proc->fp_regs = NULL;
ihk_mc_free_pages(thread->fp_regs, pages);
thread->fp_regs = NULL;
}
void
save_fp_regs(struct process *proc)
save_fp_regs(struct thread *thread)
{
int pages;
if (proc->fp_regs)
return;
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
proc->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT);
if(!proc->fp_regs)
return;
memset(proc->fp_regs, 0, sizeof(fp_regs_struct));
// TODO: do xsave
if (!thread->fp_regs) {
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
thread->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT);
if (!thread->fp_regs) {
kprintf("error: allocating fp_regs pages\n");
return;
}
memset(thread->fp_regs, 0, sizeof(fp_regs_struct));
}
if (xsave_available) {
unsigned int low, high;
/* Request full save of x87, SSE and AVX states */
low = 0x7;
high = 0;
asm volatile("xsave %0" : : "m" (*thread->fp_regs), "a" (low), "d" (high)
: "memory");
dkprintf("fp_regs for TID %d saved\n", thread->tid);
}
}
void
restore_fp_regs(struct process *proc)
restore_fp_regs(struct thread *thread)
{
if (!proc->fp_regs)
if (!thread->fp_regs)
return;
// TODO: do xrstor
release_fp_regs(proc);
if (xsave_available) {
unsigned int low, high;
/* Request full restore of x87, SSE and AVX states */
low = 0x7;
high = 0;
asm volatile("xrstor %0" : : "m" (*thread->fp_regs),
"a" (low), "d" (high));
dkprintf("fp_regs for TID %d restored\n", thread->tid);
}
// XXX: why release??
//release_fp_regs(thread);
}
ihk_mc_user_context_t *lookup_user_context(struct process *proc)
ihk_mc_user_context_t *lookup_user_context(struct thread *thread)
{
ihk_mc_user_context_t *uctx = proc->uctx;
ihk_mc_user_context_t *uctx = thread->uctx;
if ((!(proc->ftn->status & (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE
if ((!(thread->status & (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE
| PS_STOPPED | PS_TRACED))
&& (proc != cpu_local_var(current)))
&& (thread != cpu_local_var(current)))
|| !uctx->is_gpr_valid) {
return NULL;
}
if (!uctx->is_sr_valid) {
uctx->sr.fs_base = proc->thread.tlsblock_base;
uctx->sr.fs_base = thread->tlsblock_base;
uctx->sr.gs_base = 0;
uctx->sr.ds = 0;
uctx->sr.es = 0;
@ -1237,3 +1352,9 @@ ihk_mc_user_context_t *lookup_user_context(struct process *proc)
return uctx;
} /* lookup_user_context() */
void zero_tsc(void)
{
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
}

View File

@ -78,11 +78,11 @@ int get_prstatus_size(void)
* \brief Fill a prstatus structure.
*
* \param head A pointer to a note structure.
* \param proc A pointer to the current process structure.
* \param thread A pointer to the current thread structure.
* \param regs0 A pointer to a x86_regs structure.
*/
void fill_prstatus(struct note *head, struct process *proc, void *regs0)
void fill_prstatus(struct note *head, struct thread *thread, void *regs0)
{
void *name;
struct elf_prstatus64 *prstatus;
@ -160,11 +160,11 @@ int get_prpsinfo_size(void)
* \brief Fill a prpsinfo structure.
*
* \param head A pointer to a note structure.
* \param proc A pointer to the current process structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_prpsinfo(struct note *head, struct process *proc, void *regs)
void fill_prpsinfo(struct note *head, struct thread *thread, void *regs)
{
void *name;
struct elf_prpsinfo64 *prpsinfo;
@ -176,8 +176,8 @@ void fill_prpsinfo(struct note *head, struct process *proc, void *regs)
memcpy(name, "CORE", sizeof("CORE"));
prpsinfo = (struct elf_prpsinfo64 *)(name + align32(sizeof("CORE")));
prpsinfo->pr_state = proc->ftn->status;
prpsinfo->pr_pid = proc->ftn->pid;
prpsinfo->pr_state = thread->status;
prpsinfo->pr_pid = thread->proc->pid;
/*
We leave most of the fields unfilled.
@ -210,11 +210,11 @@ int get_auxv_size(void)
* \brief Fill an AUXV structure.
*
* \param head A pointer to a note structure.
* \param proc A pointer to the current process structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_auxv(struct note *head, struct process *proc, void *regs)
void fill_auxv(struct note *head, struct thread *thread, void *regs)
{
void *name;
void *auxv;
@ -225,7 +225,7 @@ void fill_auxv(struct note *head, struct process *proc, void *regs)
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
auxv = name + align32(sizeof("CORE"));
memcpy(auxv, proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN);
memcpy(auxv, thread->proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN);
}
/**
@ -243,23 +243,23 @@ int get_note_size(void)
* \brief Fill the NOTE segment.
*
* \param head A pointer to a note structure.
* \param proc A pointer to the current process structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_note(void *note, struct process *proc, void *regs)
void fill_note(void *note, struct thread *thread, void *regs)
{
fill_prstatus(note, proc, regs);
fill_prstatus(note, thread, regs);
note += get_prstatus_size();
fill_prpsinfo(note, proc, regs);
fill_prpsinfo(note, thread, regs);
note += get_prpsinfo_size();
fill_auxv(note, proc, regs);
fill_auxv(note, thread, regs);
}
/**
* \brief Generate an image of the core file.
*
* \param proc A pointer to the current process structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
* \param coretable(out) An array of core chunks.
* \param chunks(out) Number of the entires of coretable.
@ -271,7 +271,7 @@ void fill_note(void *note, struct process *proc, void *regs)
* should be zero.
*/
int gencore(struct process *proc, void *regs,
int gencore(struct thread *thread, void *regs,
struct coretable **coretable, int *chunks)
{
struct coretable *ct = NULL;
@ -279,7 +279,7 @@ int gencore(struct process *proc, void *regs,
Elf64_Phdr *ph = NULL;
void *note = NULL;
struct vm_range *range;
struct process_vm *vm = proc->vm;
struct process_vm *vm = thread->vm;
int segs = 1; /* the first one is for NOTE */
int notesize, phsize, alignednotesize;
unsigned int offset = 0;
@ -306,7 +306,7 @@ int gencore(struct process *proc, void *regs,
unsigned long p, phys;
int prevzero = 0;
for (p = range->start; p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
prevzero = 1;
} else {
@ -326,7 +326,7 @@ int gencore(struct process *proc, void *regs,
dkprintf("we have %d segs and %d chunks.\n\n", segs, *chunks);
{
struct vm_regions region = proc->vm->region;
struct vm_regions region = thread->vm->region;
dkprintf("text: %lx-%lx\n", region.text_start, region.text_end);
dkprintf("data: %lx-%lx\n", region.data_start, region.data_end);
@ -364,7 +364,7 @@ int gencore(struct process *proc, void *regs,
goto fail;
}
memset(note, 0, alignednotesize);
fill_note(note, proc, regs);
fill_note(note, thread, regs);
/* prgram header for NOTE segment is exceptional */
ph[0].p_type = PT_NOTE;
@ -434,7 +434,7 @@ int gencore(struct process *proc, void *regs,
for (start = p = range->start;
p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
if (prevzero == 0) {
/* We begin a new chunk */
@ -472,9 +472,9 @@ int gencore(struct process *proc, void *regs,
i++;
}
} else {
if ((proc->vm->region.user_start <= range->start) &&
(range->end <= proc->vm->region.user_end)) {
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
if ((thread->vm->region.user_start <= range->start) &&
(range->end <= thread->vm->region.user_end)) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)range->start, &phys) != 0) {
dkprintf("could not convert user virtual address %lx"
"to physical address", range->start);

View File

@ -5,15 +5,20 @@
#define __HEADER_X86_COMMON_ARCH_LOCK
#include <ihk/cpu.h>
#include <ihk/atomic.h>
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
#ifdef DEBUG_SPINLOCK
#if defined(DEBUG_SPINLOCK) || defined(DEBUG_MCS_RWLOCK)
int __kprintf(const char *format, ...);
#endif
typedef int ihk_spinlock_t;
extern void preempt_enable(void);
extern void preempt_disable(void);
#define IHK_STATIC_SPINLOCK_FUNCS
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
@ -22,7 +27,17 @@ static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
}
#define SPIN_LOCK_UNLOCKED 0
static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__ihk_mc_spinlock_lock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq
#endif
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
{
int inc = 0x00010000;
int tmp;
@ -41,10 +56,8 @@ static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
: "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc");
#endif
#ifdef DEBUG_SPINLOCK
__kprintf("[%d] trying to grab lock: 0x%lX\n",
ihk_mc_get_processor_id(), lock);
#endif
preempt_disable();
asm volatile("lock; xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
@ -60,36 +73,431 @@ static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
:
: "memory", "cc");
#ifdef DEBUG_SPINLOCK
__kprintf("[%d] holding lock: 0x%lX\n", ihk_mc_get_processor_id(), lock);
#endif
}
static unsigned long ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock(l) ({ unsigned long rc;\
__kprintf("[%d] call ihk_mc_spinlock_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_lock(l);\
__kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\
})
#else
#define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock
#endif
static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
ihk_mc_spinlock_lock_noirq(lock);
__ihk_mc_spinlock_lock_noirq(lock);
return flags;
}
static void ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_unlock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__ihk_mc_spinlock_unlock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq
#endif
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
{
asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc");
preempt_enable();
}
static void ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_unlock(l, f) { \
__kprintf("[%d] call ihk_mc_spinlock_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__ihk_mc_spinlock_unlock((l), (f)); \
__kprintf("[%d] ret ihk_mc_spinlock_unlock\n", ihk_mc_get_processor_id()); \
}
#else
#define ihk_mc_spinlock_unlock __ihk_mc_spinlock_unlock
#endif
static void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
{
ihk_mc_spinlock_unlock_noirq(lock);
__ihk_mc_spinlock_unlock_noirq(lock);
cpu_restore_interrupt(flags);
#ifdef DEBUG_SPINLOCK
__kprintf("[%d] released lock: 0x%lX\n", ihk_mc_get_processor_id(), lock);
}
/* An implementation of the Mellor-Crummey Scott (MCS) lock */
typedef struct mcs_lock_node {
unsigned long locked;
struct mcs_lock_node *next;
} __attribute__((aligned(64))) mcs_lock_node_t;
static void mcs_lock_init(struct mcs_lock_node *node)
{
node->locked = 0;
node->next = NULL;
}
static void mcs_lock_lock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
struct mcs_lock_node *pred;
node->next = NULL;
node->locked = 0;
pred = (struct mcs_lock_node *)xchg8((unsigned long *)&lock->next,
(unsigned long)node);
if (pred) {
node->locked = 1;
pred->next = node;
while (node->locked != 0) {
cpu_pause();
}
}
}
static void mcs_lock_unlock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
if (node->next == NULL) {
struct mcs_lock_node *old = (struct mcs_lock_node *)
atomic_cmpxchg8((unsigned long *)&lock->next,
(unsigned long)node, (unsigned long)0);
if (old == node) {
return;
}
while (node->next == NULL) {
cpu_pause();
}
}
node->next->locked = 0;
}
// reader/writer lock
typedef struct mcs_rwlock_node {
ihk_atomic_t count; // num of readers (use only common reader)
char type; // lock type
#define MCS_RWLOCK_TYPE_COMMON_READER 0
#define MCS_RWLOCK_TYPE_READER 1
#define MCS_RWLOCK_TYPE_WRITER 2
char locked; // lock
#define MCS_RWLOCK_LOCKED 1
#define MCS_RWLOCK_UNLOCKED 0
char dmy1; // unused
char dmy2; // unused
struct mcs_rwlock_node *next;
} __attribute__((aligned(64))) mcs_rwlock_node_t;
typedef struct mcs_rwlock_node_irqsave {
struct mcs_rwlock_node node;
unsigned long irqsave;
} __attribute__((aligned(64))) mcs_rwlock_node_irqsave_t;
typedef struct mcs_rwlock_lock {
struct mcs_rwlock_node reader; /* common reader lock */
struct mcs_rwlock_node *node; /* base */
} __attribute__((aligned(64))) mcs_rwlock_lock_t;
static void
mcs_rwlock_init(struct mcs_rwlock_lock *lock)
{
ihk_atomic_set(&lock->reader.count, 0);
lock->reader.type = MCS_RWLOCK_TYPE_COMMON_READER;
lock->node = NULL;
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_lock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_lock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_lock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_lock_noirq __mcs_rwlock_writer_lock_noirq
#endif
static void
__mcs_rwlock_writer_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
struct mcs_rwlock_node *pred;
preempt_disable();
node->type = MCS_RWLOCK_TYPE_WRITER;
node->next = NULL;
pred = (struct mcs_rwlock_node *)xchg8((unsigned long *)&lock->node,
(unsigned long)node);
if (pred) {
node->locked = MCS_RWLOCK_LOCKED;
pred->next = node;
while (node->locked != MCS_RWLOCK_UNLOCKED) {
cpu_pause();
}
}
}
static void
mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock)
{
struct mcs_rwlock_node *p;
struct mcs_rwlock_node *f = NULL;
struct mcs_rwlock_node *n;
ihk_atomic_inc(&lock->reader.count); // protect to unlock reader
for(p = &lock->reader; p->next; p = n){
n = p->next;
if(p->next->type == MCS_RWLOCK_TYPE_READER){
p->next = n->next;
if(lock->node == n){
struct mcs_rwlock_node *old;
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
(unsigned long *)&lock->node,
(unsigned long)n,
(unsigned long)p);
if(old != n){ // couldn't change
while (n->next == NULL) {
cpu_pause();
}
p->next = n->next;
}
}
else if(p->next == NULL){
while (n->next == NULL) {
cpu_pause();
}
p->next = n->next;
}
if(f){
ihk_atomic_inc(&lock->reader.count);
n->locked = MCS_RWLOCK_UNLOCKED;
}
else
f = n;
n = p;
}
if(n->next == NULL && lock->node != n){
while (n->next == NULL && lock->node != n) {
cpu_pause();
}
}
}
f->locked = MCS_RWLOCK_UNLOCKED;
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_unlock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_unlock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_unlock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_unlock_noirq __mcs_rwlock_writer_unlock_noirq
#endif
static void
__mcs_rwlock_writer_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
if (node->next == NULL) {
struct mcs_rwlock_node *old = (struct mcs_rwlock_node *)
atomic_cmpxchg8((unsigned long *)&lock->node,
(unsigned long)node, (unsigned long)0);
if (old == node) {
goto out;
}
while (node->next == NULL) {
cpu_pause();
}
}
if(node->next->type == MCS_RWLOCK_TYPE_READER){
lock->reader.next = node->next;
mcs_rwlock_unlock_readers(lock);
}
else{
node->next->locked = MCS_RWLOCK_UNLOCKED;
}
out:
preempt_enable();
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_lock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_lock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_lock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_lock_noirq __mcs_rwlock_reader_lock_noirq
#endif
static void
__mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
struct mcs_rwlock_node *pred;
preempt_disable();
node->type = MCS_RWLOCK_TYPE_READER;
node->next = NULL;
node->dmy1 = ihk_mc_get_processor_id();
pred = (struct mcs_rwlock_node *)xchg8((unsigned long *)&lock->node,
(unsigned long)node);
if (pred) {
if(pred == &lock->reader){
if(ihk_atomic_inc_return(&pred->count) != 1){
struct mcs_rwlock_node *old;
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
(unsigned long *)&lock->node,
(unsigned long)node,
(unsigned long)pred);
if (old == node) {
goto out;
}
while (node->next == NULL) {
cpu_pause();
}
pred->next = node->next;
if(node->next->type == MCS_RWLOCK_TYPE_READER)
mcs_rwlock_unlock_readers(lock);
goto out;
}
ihk_atomic_dec(&pred->count);
}
node->locked = MCS_RWLOCK_LOCKED;
pred->next = node;
while (node->locked != MCS_RWLOCK_UNLOCKED) {
cpu_pause();
}
}
else {
lock->reader.next = node;
mcs_rwlock_unlock_readers(lock);
}
out:
return;
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_unlock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_unlock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_unlock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_unlock_noirq __mcs_rwlock_reader_unlock_noirq
#endif
static void
__mcs_rwlock_reader_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
if(ihk_atomic_dec_return(&lock->reader.count))
goto out;
if (lock->reader.next == NULL) {
struct mcs_rwlock_node *old;
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
(unsigned long *)&lock->node,
(unsigned long)&lock->reader,
(unsigned long)0);
if (old == &lock->reader) {
goto out;
}
while (lock->reader.next == NULL) {
cpu_pause();
}
}
if(lock->reader.next->type == MCS_RWLOCK_TYPE_READER){
mcs_rwlock_unlock_readers(lock);
}
else{
lock->reader.next->locked = MCS_RWLOCK_UNLOCKED;
}
out:
preempt_enable();
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_lock(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_lock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_lock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_lock __mcs_rwlock_writer_lock
#endif
static void
__mcs_rwlock_writer_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
node->irqsave = cpu_disable_interrupt_save();
__mcs_rwlock_writer_lock_noirq(lock, &node->node);
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_unlock(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_unlock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_unlock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_unlock __mcs_rwlock_writer_unlock
#endif
static void
__mcs_rwlock_writer_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
__mcs_rwlock_writer_unlock_noirq(lock, &node->node);
cpu_restore_interrupt(node->irqsave);
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_lock(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_lock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_lock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_lock __mcs_rwlock_reader_lock
#endif
static void
__mcs_rwlock_reader_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
node->irqsave = cpu_disable_interrupt_save();
__mcs_rwlock_reader_lock_noirq(lock, &node->node);
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_unlock(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_unlock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_unlock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_unlock __mcs_rwlock_reader_unlock
#endif
static void
__mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
__mcs_rwlock_reader_unlock_noirq(lock, &node->node);
cpu_restore_interrupt(node->irqsave);
}
#endif

View File

@ -0,0 +1,28 @@
/**
* \file cpu.h
* License details are found in the file LICENSE.
* \brief
* Declare architecture-dependent types and functions to control CPU.
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
*/
#ifndef ARCH_CPU_H
#define ARCH_CPU_H
#include <ihk/cpu.h>
static inline void rmb(void)
{
barrier();
}
static inline void wmb(void)
{
barrier();
}
#endif /* ARCH_CPU_H */

View File

@ -13,6 +13,10 @@
#ifndef HEADER_X86_COMMON_IHK_ATOMIC_H
#define HEADER_X86_COMMON_IHK_ATOMIC_H
/***********************************************************************
* ihk_atomic_t
*/
typedef struct {
int counter;
} ihk_atomic_t;
@ -95,6 +99,30 @@ static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
#define ihk_atomic_inc_return(v) (ihk_atomic_add_return(1, v))
#define ihk_atomic_dec_return(v) (ihk_atomic_sub_return(1, v))
/***********************************************************************
* ihk_atomic64_t
*/
typedef struct {
long counter64;
} ihk_atomic64_t;
#define IHK_ATOMIC64_INIT(i) { .counter64 = (i) }
static inline long ihk_atomic64_read(const ihk_atomic64_t *v)
{
return *(volatile long *)&(v)->counter64;
}
static inline void ihk_atomic64_inc(ihk_atomic64_t *v)
{
asm volatile ("lock incq %0" : "+m"(v->counter64));
}
/***********************************************************************
* others
*/
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
* Note 2: xchg has side effect, so that attribute volatile is necessary,
@ -112,6 +140,17 @@ static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
__x; \
})
static inline unsigned long xchg8(unsigned long *ptr, unsigned long x)
{
unsigned long __x = (x);
asm volatile("xchgq %0,%1"
: "=r" (__x)
: "m" (*(volatile unsigned long*)(ptr)), "0" (__x)
: "memory");
return __x;
}
#define __xchg(x, ptr, size) \
({ \
__typeof(*(ptr)) __x = (x); \
@ -150,5 +189,17 @@ static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
#define xchg(ptr, v) \
__xchg((v), (ptr), sizeof(*ptr))
static inline unsigned long atomic_cmpxchg8(unsigned long *addr,
unsigned long oldval,
unsigned long newval)
{
asm volatile("lock; cmpxchgq %2, %1\n"
: "=a" (oldval), "+m" (*addr)
: "r" (newval), "0" (oldval)
: "memory"
);
return oldval;
}
#endif

View File

@ -58,6 +58,7 @@
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
#define MSR_IA32_CR_PAT 0x00000277
#define MSR_IA32_XSS 0xda0
#define CVAL(event, mask) \

View File

@ -26,6 +26,7 @@ SYSCALL_DELEGATED(2, open)
SYSCALL_HANDLED(3, close)
SYSCALL_DELEGATED(4, stat)
SYSCALL_DELEGATED(5, fstat)
SYSCALL_DELEGATED(7, poll)
SYSCALL_DELEGATED(8, lseek)
SYSCALL_HANDLED(9, mmap)
SYSCALL_HANDLED(10, mprotect)
@ -39,6 +40,7 @@ SYSCALL_DELEGATED(17, pread64)
SYSCALL_DELEGATED(18, pwrite64)
SYSCALL_DELEGATED(20, writev)
SYSCALL_DELEGATED(21, access)
SYSCALL_DELEGATED(23, select)
SYSCALL_HANDLED(24, sched_yield)
SYSCALL_HANDLED(25, mremap)
SYSCALL_HANDLED(26, msync)
@ -48,6 +50,7 @@ SYSCALL_HANDLED(29, shmget)
SYSCALL_HANDLED(30, shmat)
SYSCALL_HANDLED(31, shmctl)
SYSCALL_HANDLED(34, pause)
SYSCALL_HANDLED(35, nanosleep)
SYSCALL_HANDLED(39, getpid)
SYSCALL_HANDLED(56, clone)
SYSCALL_DELEGATED(57, fork)
@ -57,11 +60,14 @@ SYSCALL_HANDLED(60, exit)
SYSCALL_HANDLED(61, wait4)
SYSCALL_HANDLED(62, kill)
SYSCALL_DELEGATED(63, uname)
SYSCALL_DELEGATED(65, semop)
SYSCALL_HANDLED(67, shmdt)
SYSCALL_DELEGATED(69, msgsnd)
SYSCALL_DELEGATED(70, msgrcv)
SYSCALL_DELEGATED(72, fcntl)
SYSCALL_DELEGATED(79, getcwd)
SYSCALL_DELEGATED(89, readlink)
SYSCALL_DELEGATED(96, gettimeofday)
SYSCALL_HANDLED(96, gettimeofday)
SYSCALL_HANDLED(97, getrlimit)
SYSCALL_HANDLED(101, ptrace)
SYSCALL_HANDLED(102, getuid)
@ -99,23 +105,31 @@ SYSCALL_HANDLED(151, mlockall)
SYSCALL_HANDLED(152, munlockall)
SYSCALL_HANDLED(158, arch_prctl)
SYSCALL_HANDLED(160, setrlimit)
SYSCALL_HANDLED(164, settimeofday)
SYSCALL_HANDLED(186, gettid)
SYSCALL_DELEGATED(201, time)
SYSCALL_HANDLED(202, futex)
SYSCALL_HANDLED(203, sched_setaffinity)
SYSCALL_HANDLED(204, sched_getaffinity)
SYSCALL_DELEGATED(208, io_getevents)
SYSCALL_HANDLED(216, remap_file_pages)
SYSCALL_DELEGATED(217, getdents64)
SYSCALL_HANDLED(218, set_tid_address)
SYSCALL_DELEGATED(220, semtimedop)
SYSCALL_DELEGATED(230, clock_nanosleep)
SYSCALL_HANDLED(231, exit_group)
SYSCALL_DELEGATED(232, epoll_wait)
SYSCALL_HANDLED(234, tgkill)
SYSCALL_HANDLED(237, mbind)
SYSCALL_HANDLED(238, set_mempolicy)
SYSCALL_HANDLED(239, get_mempolicy)
SYSCALL_HANDLED(247, waitid)
SYSCALL_HANDLED(256, migrate_pages)
SYSCALL_DELEGATED(270, pselect6)
SYSCALL_DELEGATED(271, ppoll)
SYSCALL_HANDLED(273, set_robust_list)
SYSCALL_HANDLED(279, move_pages)
SYSCALL_DELEGATED(281, epoll_pwait)
SYSCALL_HANDLED(282, signalfd)
SYSCALL_HANDLED(289, signalfd4)
#ifdef DCFA_KMOD

View File

@ -206,6 +206,7 @@ x86_syscall:
.globl enter_user_mode
enter_user_mode:
callq release_runq_lock
movq $0, %rdi
movq %rsp, %rsi
call check_signal

View File

@ -2191,7 +2191,7 @@ int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t
cpsize = remain;
}
error = ihk_mc_pt_virt_to_phys(vm->page_table, from, &pa);
error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, from, &pa);
if (error) {
return error;
}
@ -2274,7 +2274,7 @@ int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t
cpsize = remain;
}
error = ihk_mc_pt_virt_to_phys(vm->page_table, to, &pa);
error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, to, &pa);
if (error) {
return error;
}
@ -2330,7 +2330,7 @@ int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t
cpsize = remain;
}
error = ihk_mc_pt_virt_to_phys(vm->page_table, to, &pa);
error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, to, &pa);
if (error) {
kprintf("patch_process_vm(%p,%p,%p,%lx):v2p(%p):%d\n", vm, udst, ksrc, siz, to, error);
return error;

File diff suppressed because it is too large Load Diff

View File

@ -18,20 +18,93 @@
*/
#include <syscall.h>
#include <ihk/atomic.h>
#include <arch/cpu.h>
extern int vsyscall_gettimeofday(void *tv, void *tz)
extern int vsyscall_gettimeofday(struct timeval *tv, void *tz)
__attribute__ ((section (".vsyscall.gettimeofday")));
int vsyscall_gettimeofday(void *tv, void *tz)
struct tod_data_s tod_data
__attribute__ ((section(".vsyscall.gettimeofday.data"))) = {
.do_local = 0,
.version = IHK_ATOMIC64_INIT(0),
};
static inline void cpu_pause_for_vsyscall(void)
{
asm volatile ("pause" ::: "memory");
return;
} /* cpu_pause_for_vsyscall() */
static inline void calculate_time_from_tsc(struct timespec *ts)
{
long ver;
unsigned long current_tsc;
__time_t sec_delta;
long ns_delta;
for (;;) {
while ((ver = ihk_atomic64_read(&tod_data.version)) & 1) {
/* settimeofday() is in progress */
cpu_pause_for_vsyscall();
}
rmb();
*ts = tod_data.origin;
rmb();
if (ver == ihk_atomic64_read(&tod_data.version)) {
break;
}
/* settimeofday() has intervened */
cpu_pause_for_vsyscall();
}
current_tsc = rdtsc();
sec_delta = current_tsc / tod_data.clocks_per_sec;
ns_delta = NS_PER_SEC * (current_tsc % tod_data.clocks_per_sec)
/ tod_data.clocks_per_sec;
/* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */
ts->tv_sec += sec_delta;
ts->tv_nsec += ns_delta;
if (ts->tv_nsec >= NS_PER_SEC) {
ts->tv_nsec -= NS_PER_SEC;
++ts->tv_sec;
}
return;
} /* calculate_time_from_tsc() */
int vsyscall_gettimeofday(struct timeval *tv, void *tz)
{
int error;
struct timespec ats;
if (!tv && !tz) {
/* nothing to do */
return 0;
}
/* Do it locally if supported */
if (!tz && tod_data.do_local) {
calculate_time_from_tsc(&ats);
tv->tv_sec = ats.tv_sec;
tv->tv_usec = ats.tv_nsec / 1000;
return 0;
}
/* Otherwise syscall */
asm ("syscall" : "=a" (error)
: "a" (__NR_gettimeofday), "D" (tv), "S" (tz)
: "%rcx", "%r11", "memory");
if (error) {
*(int *)0 = 0; /* i.e. raise(SIGSEGV) */
}
return error;
}
} /* vsyscall_gettimeofday() */
extern long vsyscall_time(void *tp)
__attribute__ ((section (".vsyscall.time")));

View File

@ -19,14 +19,19 @@ KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
mem="512M@0"
cpus=""
ihk_ikc_irq_core=0
# Get the number of CPUs on NUMA node 0
nr_cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $4}' | wc -l`
# Use the second half of the cores
let nr_cpus="$nr_cpus / 2"
cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $1}' | tail -n $nr_cpus | xargs echo -n | sed 's/ /,/g'`
if [ "$cpus" == "" ]; then echo "error: no available CPUs on NUMA node 0?"; exit; fi
if [ "$cpus" == "" ]; then
# Get the number of CPUs on NUMA node 0
nr_cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $4}' | wc -l`
# Use the second half of the cores
let nr_cpus="$nr_cpus / 2"
cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $1}' | tail -n $nr_cpus | xargs echo -n | sed 's/ /,/g'`
if [ "$cpus" == "" ]; then echo "error: no available CPUs on NUMA node 0?"; exit; fi
fi
# Remove delegator if loaded
if [ "`lsmod | grep mcctrl`" != "" ]; then
@ -42,15 +47,28 @@ fi
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then
ihk_irq=""
for i in `seq 64 255`; do
if [ ! -d /proc/irq/$i ] && [ "`cat /proc/interrupts | grep ":" | awk '{print $1}' | grep -o '[0-9]*' | grep $i`" == "" ]; then
if [ ! -d /proc/irq/$i ] && [ "`cat /proc/interrupts | grep ":" | awk '{print $1}' | grep -o '[0-9]*' | grep -e '^$i$'`" == "" ]; then
ihk_irq=$i
break
fi
done
if [ "$ihk_irq" == "" ]; then echo "error: no IRQ available"; exit; fi
if ! insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq; then echo "error: loading ihk-smp-x86"; exit; fi;
if ! insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core; then echo "error: loading ihk-smp-x86"; exit; fi;
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs"; exit; fi
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory"; exit; fi
# If loaded, but no resources allocated, get CPUs and memory
else
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
cpus_allocated=`${SBINDIR}/ihkosctl 0 query cpu`
if [ "$cpus_allocated" == "" ]; then
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs"; exit; fi
fi
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
mem_allocated=`${SBINDIR}/ihkosctl 0 query mem`
if [ "$mem_allocated" == "" ]; then
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory"; exit; fi
fi
fi
# Check for existing OS instance and destroy
@ -77,3 +95,4 @@ if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then echo "error: loadi
if ! ${SBINDIR}/ihkosctl 0 kargs hidos; then echo "error: setting kernel arguments"; exit; fi
if ! ${SBINDIR}/ihkosctl 0 boot; then echo "error: booting"; exit; fi
if ! insmod ${KMODDIR}/mcctrl.ko; then echo "error: inserting mcctrl.ko"; exit; fi
if ! chown `logname` /dev/mcd* /dev/mcos*; then echo "error: chowning device files"; exit; fi

View File

@ -0,0 +1,47 @@
#!/bin/bash
# IHK SMP-x86 example McKernel unload script.
# author: Balazs Gerofi <bgerofi@riken.jp>
# Copyright (C) 2015 RIKEN AICS
#
# This is an example script for destroying McKernel and releasing IHK resources
# Note that the script does no output anything unless an error occurs.
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
mem=""
cpus=""
# No SMP module? Exit.
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then exit; fi
# Remove delegator if loaded
if [ "`lsmod | grep mcctrl`" != "" ]; then
if ! rmmod mcctrl; then echo "error: removing mcctrl"; exit; fi
fi
# Destroy all LWK instances
for i in /dev/mcos*; do
ind=`echo $i|cut -c10-`;
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then echo "error: destroying LWK instance $ind failed"; exit; fi
done
# Query IHK-SMP resources and release them
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
if ! ${SBINDIR}/ihkconfig 0 release cpu $cpus > /dev/null; then echo "error: releasing CPUs"; exit; fi
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
mem=`${SBINDIR}/ihkconfig 0 query mem`
if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then echo "error: releasing memory"; exit; fi
# Remove SMP module
if [ "`lsmod | grep ihk_smp_x86`" != "" ]; then
if ! rmmod ihk_smp_x86; then echo "error: removing ihk_smp_x86"; exit; fi
fi

3
configure vendored
View File

@ -3816,7 +3816,7 @@ TARGET="$WITH_TARGET"
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/kernel/Makefile kernel/Makefile kernel/Makefile.build arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in"
ac_config_files="$ac_config_files Makefile executer/user/Makefile executer/kernel/Makefile kernel/Makefile kernel/Makefile.build arch/x86/tools/mcreboot-attached-mic.sh arch/x86/tools/mcshutdown-attached-mic.sh arch/x86/tools/mcreboot-builtin-x86.sh arch/x86/tools/mcreboot-smp-x86.sh arch/x86/tools/mcstop+release-smp-x86.sh arch/x86/tools/mcshutdown-builtin-x86.sh arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in"
if test "x$enable_dcfa" = xyes; then :
@ -4539,6 +4539,7 @@ do
"arch/x86/tools/mcshutdown-attached-mic.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-attached-mic.sh" ;;
"arch/x86/tools/mcreboot-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-builtin-x86.sh" ;;
"arch/x86/tools/mcreboot-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot-smp-x86.sh" ;;
"arch/x86/tools/mcstop+release-smp-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcstop+release-smp-x86.sh" ;;
"arch/x86/tools/mcshutdown-builtin-x86.sh") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcshutdown-builtin-x86.sh" ;;
"arch/x86/tools/mcreboot.1") CONFIG_FILES="$CONFIG_FILES arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in" ;;
"kernel/Makefile.dcfa") CONFIG_FILES="$CONFIG_FILES kernel/Makefile.dcfa" ;;

View File

@ -167,6 +167,7 @@ AC_CONFIG_FILES([
arch/x86/tools/mcshutdown-attached-mic.sh
arch/x86/tools/mcreboot-builtin-x86.sh
arch/x86/tools/mcreboot-smp-x86.sh
arch/x86/tools/mcstop+release-smp-x86.sh
arch/x86/tools/mcshutdown-builtin-x86.sh
arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in
])

View File

@ -80,7 +80,11 @@ static int load_elf(struct linux_binprm *bprm
char buf[32];
int l;
int pass;
char pbuf[1024];
const char *path;
if(bprm->envc == 0)
return -ENOEXEC;
if(memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
return -ENOEXEC;
if(elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
@ -89,7 +93,11 @@ static int load_elf(struct linux_binprm *bprm
if(elf_ex->e_ident[EI_CLASS] != ELFCLASS64)
return -ENOEXEC;
cp = strrchr(bprm->interp, '/');
path = d_path(&bprm->file->f_path, pbuf, 1024);
if(!path || IS_ERR(path))
path = bprm->interp;
cp = strrchr(path, '/');
if(!cp ||
!strcmp(cp, "/mcexec") ||
!strcmp(cp, "/ihkosctl") ||
@ -100,7 +108,7 @@ static int load_elf(struct linux_binprm *bprm
cnt[1] = bprm->envc;
for(pass = 0; pass < 2; pass++){
p = bprm->p;
mode = cnt[0] == 0? (cnt[1] == 0? 2: 1): 0;
mode = cnt[0] == 0? 1: 0;
if(pass == 1){
for(ep = env; ep->name; ep++){
if(ep->l)
@ -194,11 +202,11 @@ static int load_elf(struct linux_binprm *bprm
if(rc);
else if(env_mcexec_wl)
rc = !pathcheck(bprm->interp, env_mcexec_wl);
rc = !pathcheck(path, env_mcexec_wl);
else if(env_mcexec_bl)
rc = pathcheck(bprm->interp, env_mcexec_bl);
rc = pathcheck(path, env_mcexec_bl);
else
rc = pathcheck(bprm->interp, "/usr:/bin:/sbin:/opt");
rc = pathcheck(path, "/usr:/bin:/sbin:/opt");
for(ep = env; ep->name; ep++)
if(ep->val)

View File

@ -419,10 +419,10 @@ retry_alloc:
init_waitqueue_head(&wqhln->wq_syscall);
list_add_tail(&wqhln->list, &c->wq_list);
}
ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags);
wqhln->req = 1;
wake_up(&wqhln->wq_syscall);
ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags);
return 0;
}
@ -497,7 +497,7 @@ retry_alloc:
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
list_del(&wqhln->list);
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
if (ret) {
if (ret && !wqhln->req) {
kfree(wqhln);
return -EINTR;
}

View File

@ -183,6 +183,7 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp)
desc = malloc(sizeof(struct program_load_desc)
+ sizeof(struct program_image_section) * nhdrs);
desc->shell_path[0] = '\0';
fseek(fp, hdr.e_phoff, SEEK_SET);
j = 0;
desc->num_sections = nhdrs;
@ -1879,7 +1880,7 @@ fork_err:
}
if(ret != pid) {
fprintf(stderr, "ERROR: waiting for %lu\n", w.sr.args[0]);
fprintf(stderr, "ERROR: waiting for %lu rc=%d errno=%d\n", w.sr.args[0], ret, errno);
}
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);

View File

@ -24,18 +24,21 @@
#include <process.h>
#include <init.h>
#include <march.h>
#include <cls.h>
int num_processors = 1;
static volatile int ap_stop = 1;
extern void zero_tsc(void);
static void ap_wait(void)
{
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
while (ap_stop) {
barrier();
cpu_pause();
}
zero_tsc();
kmalloc_init();
sched_init();
@ -64,8 +67,6 @@ void ap_init(void)
ihk_mc_init_ap();
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
cpu_info = ihk_mc_get_cpu_info();
bsp_hw_id = ihk_mc_get_hardware_processor_id();
@ -74,18 +75,17 @@ void ap_init(void)
return;
}
kprintf("BSP HW ID = %d, ", bsp_hw_id);
kprintf("AP Booting :");
kprintf("BSP HW ID = %d\n", bsp_hw_id);
for (i = 0; i < cpu_info->ncpus; i++) {
if (cpu_info->hw_ids[i] == bsp_hw_id) {
continue;
}
kprintf("AP Booting: %d (HW ID: %d)\n", i, cpu_info->hw_ids[i]);
ihk_mc_boot_cpu(cpu_info->hw_ids[i], (unsigned long)ap_wait);
kprintf(" %d", cpu_info->hw_ids[i]);
num_processors++;
}
kprintf(" .. Done\n");
kprintf("AP Booting: Done\n");
}

View File

@ -23,6 +23,7 @@
extern int num_processors;
struct cpu_local_var *clv;
static int cpu_local_var_initialized = 0;
void cpu_local_var_init(void)
{
@ -33,9 +34,22 @@ void cpu_local_var_init(void)
clv = allocate_pages(z, IHK_MC_AP_CRITICAL);
memset(clv, 0, z * PAGE_SIZE);
cpu_local_var_initialized = 1;
}
struct cpu_local_var *get_cpu_local_var(int id)
{
return clv + id;
}
void preempt_enable(void)
{
if (cpu_local_var_initialized)
--cpu_local_var(no_preempt);
}
void preempt_disable(void)
{
if (cpu_local_var_initialized)
++cpu_local_var(no_preempt);
}

View File

@ -26,6 +26,7 @@ SECTIONS
. = vsyscall_page + 0x000;
*(.vsyscall.gettimeofday)
*(.vsyscall.gettimeofday.*)
. = vsyscall_page + 0x400;
*(.vsyscall.time)

View File

@ -26,6 +26,7 @@ SECTIONS
. = vsyscall_page + 0x000;
*(.vsyscall.gettimeofday)
*(.vsyscall.gettimeofday.*)
. = vsyscall_page + 0x400;
*(.vsyscall.time)

View File

@ -26,6 +26,7 @@ SECTIONS
. = vsyscall_page + 0x000;
*(.vsyscall.gettimeofday)
*(.vsyscall.gettimeofday.*)
. = vsyscall_page + 0x400;
*(.vsyscall.time)

View File

@ -26,6 +26,7 @@ SECTIONS
. = vsyscall_page + 0x000;
*(.vsyscall.gettimeofday)
*(.vsyscall.gettimeofday.*)
. = vsyscall_page + 0x400;
*(.vsyscall.time)
@ -41,9 +42,4 @@ SECTIONS
}
. = ALIGN(4096);
_end = .;
/DISCARD/ : {
*(.eh_frame)
*(.note.gnu.build-id)
}
}

View File

@ -28,7 +28,7 @@ void kputs(char *buf)
int len = strlen(buf);
unsigned long flags;
flags = ihk_mc_spinlock_lock(&kmsg_lock);
flags = __ihk_mc_spinlock_lock(&kmsg_lock);
if (len + kmsg_buf.tail > kmsg_buf.len) {
kmsg_buf.tail = 0;
@ -40,19 +40,19 @@ void kputs(char *buf)
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
kmsg_buf.tail += len;
ihk_mc_spinlock_unlock(&kmsg_lock, flags);
__ihk_mc_spinlock_unlock(&kmsg_lock, flags);
}
#define KPRINTF_LOCAL_BUF_LEN 1024
unsigned long kprintf_lock(void)
{
return ihk_mc_spinlock_lock(&kmsg_lock);
return __ihk_mc_spinlock_lock(&kmsg_lock);
}
void kprintf_unlock(unsigned long irqflags)
{
ihk_mc_spinlock_unlock(&kmsg_lock, irqflags);
__ihk_mc_spinlock_unlock(&kmsg_lock, irqflags);
}
/* Caller must hold kmsg_lock! */
@ -85,7 +85,7 @@ int kprintf(const char *format, ...)
unsigned long flags;
char buf[KPRINTF_LOCAL_BUF_LEN];
flags = ihk_mc_spinlock_lock(&kmsg_lock);
flags = __ihk_mc_spinlock_lock(&kmsg_lock);
/* Copy into the local buf */
len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id());
@ -101,7 +101,7 @@ int kprintf(const char *format, ...)
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
kmsg_buf.tail += len;
ihk_mc_spinlock_unlock(&kmsg_lock, flags);
__ihk_mc_spinlock_unlock(&kmsg_lock, flags);
return len;
}

View File

@ -387,7 +387,7 @@ out:
static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *pflag)
{
struct process *proc = cpu_local_var(current);
struct thread *proc = cpu_local_var(current);
struct fileobj *obj = to_fileobj(memobj);
int error;
void *virt = NULL;

View File

@ -79,6 +79,8 @@
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
extern struct sigpending *hassigpending(struct thread *thread);
int futex_cmpxchg_enabled;
/**
@ -103,7 +105,7 @@ int futex_cmpxchg_enabled;
struct futex_q {
struct plist_node list;
struct process *task;
struct thread *task;
ihk_spinlock_t *lock_ptr;
union futex_key key;
union futex_key *requeue_pi_key;
@ -243,7 +245,7 @@ static int get_futex_value_locked(uint32_t *dest, uint32_t *from)
*/
static void wake_futex(struct futex_q *q)
{
struct process *p = q->task;
struct thread *p = q->task;
/*
* We set q->lock_ptr = NULL _before_ we wake up the task. If
@ -263,7 +265,7 @@ static void wake_futex(struct futex_q *q)
barrier();
q->lock_ptr = NULL;
sched_wakeup_process(p, PS_NORMAL);
sched_wakeup_thread(p, PS_NORMAL);
}
/*
@ -658,7 +660,7 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
* queue_me() calls spin_unlock() upon completion, both serializing
* access to the hash list and forcing another memory barrier.
*/
xchg4(&(cpu_local_var(current)->ftn->status), PS_INTERRUPTIBLE);
xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE);
queue_me(q, hb);
if (!plist_node_empty(&q->list)) {
@ -674,7 +676,7 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
}
/* This does not need to be serialized */
cpu_local_var(current)->ftn->status = PS_RUNNING;
cpu_local_var(current)->status = PS_RUNNING;
return time_remain;
}
@ -775,6 +777,11 @@ retry:
if (timeout && !time_remain)
goto out_put_key;
if(hassigpending(cpu_local_var(current))){
ret = -EINTR;
goto out_put_key;
}
/* RIKEN: no signals */
put_futex_key(fshared, &q.key);
goto retry;

View File

@ -39,11 +39,11 @@
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
void check_mapping_for_proc(struct process *proc, unsigned long addr)
void check_mapping_for_proc(struct thread *thread, unsigned long addr)
{
unsigned long __phys;
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table, (void*)addr, &__phys)) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, (void*)addr, &__phys)) {
kprintf("check_map: no mapping for 0x%lX\n", addr);
}
else {
@ -60,7 +60,7 @@ void check_mapping_for_proc(struct process *proc, unsigned long addr)
* NOTE: if args, args_len, envs, envs_len are zero,
* the function constructs them based on the descriptor
*/
int prepare_process_ranges_args_envs(struct process *proc,
int prepare_process_ranges_args_envs(struct thread *thread,
struct program_load_desc *pn,
struct program_load_desc *p,
enum ihk_mc_pt_attribute attr,
@ -80,6 +80,10 @@ int prepare_process_ranges_args_envs(struct process *proc,
unsigned long flags;
uintptr_t interp_obase = -1;
uintptr_t interp_nbase = -1;
size_t map_size;
struct process *proc = thread->proc;
struct process_vm *vm = proc->vm;
struct address_space *as = vm->address_space;
n = p->num_sections;
@ -88,7 +92,7 @@ int prepare_process_ranges_args_envs(struct process *proc,
if (pn->sections[i].interp && (interp_nbase == (uintptr_t)-1)) {
interp_obase = pn->sections[i].vaddr;
interp_obase -= (interp_obase % pn->interp_align);
interp_nbase = proc->vm->region.map_start;
interp_nbase = vm->region.map_start;
interp_nbase = (interp_nbase + pn->interp_align - 1)
& ~(pn->interp_align - 1);
}
@ -113,7 +117,7 @@ int prepare_process_ranges_args_envs(struct process *proc,
}
up = virt_to_phys(up_v);
if (add_process_memory_range(proc, s, e, up, flags, NULL, 0) != 0) {
if (add_process_memory_range(vm, s, e, up, flags, NULL, 0) != 0) {
ihk_mc_free_pages(up_v, range_npages);
kprintf("ERROR: adding memory range for ELF section %i\n", i);
goto err;
@ -122,14 +126,14 @@ int prepare_process_ranges_args_envs(struct process *proc,
{
void *_virt = (void *)s;
unsigned long _phys;
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
if (ihk_mc_pt_virt_to_phys(as->page_table,
_virt, &_phys)) {
kprintf("ERROR: no mapping for 0x%lX\n", _virt);
}
for (_virt = (void *)s + PAGE_SIZE;
(unsigned long)_virt < e; _virt += PAGE_SIZE) {
unsigned long __phys;
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
if (ihk_mc_pt_virt_to_phys(as->page_table,
_virt, &__phys)) {
kprintf("ERROR: no mapping for 0x%lX\n", _virt);
panic("mapping");
@ -148,23 +152,23 @@ int prepare_process_ranges_args_envs(struct process *proc,
/* TODO: Maybe we need flag */
if (pn->sections[i].interp) {
proc->vm->region.map_end = e;
vm->region.map_end = e;
}
else if (i == 0) {
proc->vm->region.text_start = s;
proc->vm->region.text_end = e;
vm->region.text_start = s;
vm->region.text_end = e;
}
else if (i == 1) {
proc->vm->region.data_start = s;
proc->vm->region.data_end = e;
vm->region.data_start = s;
vm->region.data_end = e;
}
else {
proc->vm->region.data_start =
(s < proc->vm->region.data_start ?
s : proc->vm->region.data_start);
proc->vm->region.data_end =
(e > proc->vm->region.data_end ?
e : proc->vm->region.data_end);
vm->region.data_start =
(s < vm->region.data_start ?
s : vm->region.data_start);
vm->region.data_end =
(e > vm->region.data_end ?
e : vm->region.data_end);
}
}
@ -172,17 +176,17 @@ int prepare_process_ranges_args_envs(struct process *proc,
pn->entry -= interp_obase;
pn->entry += interp_nbase;
p->entry = pn->entry;
ihk_mc_modify_user_context(proc->uctx, IHK_UCR_PROGRAM_COUNTER,
pn->entry);
ihk_mc_modify_user_context(thread->uctx,
IHK_UCR_PROGRAM_COUNTER,
pn->entry);
}
proc->vm->region.brk_start = proc->vm->region.brk_end =
proc->vm->region.data_end;
vm->region.brk_start = vm->region.brk_end = vm->region.data_end;
/* Map, copy and update args and envs */
flags = VR_PROT_READ | VR_PROT_WRITE;
flags |= VRFLAG_PROT_TO_MAXPROT(flags);
addr = proc->vm->region.map_start - PAGE_SIZE * SCD_RESERVED_COUNT;
addr = vm->region.map_start - PAGE_SIZE * SCD_RESERVED_COUNT;
e = addr + PAGE_SIZE * ARGENV_PAGE_COUNT;
if((args_envs = ihk_mc_alloc_pages(ARGENV_PAGE_COUNT, IHK_MC_AP_NOWAIT)) == NULL){
@ -191,7 +195,7 @@ int prepare_process_ranges_args_envs(struct process *proc,
}
args_envs_p = virt_to_phys(args_envs);
if(add_process_memory_range(proc, addr, e, args_envs_p,
if(add_process_memory_range(vm, addr, e, args_envs_p,
flags, NULL, 0) != 0){
ihk_mc_free_pages(args_envs, ARGENV_PAGE_COUNT);
kprintf("ERROR: adding memory range for args/envs\n");
@ -205,7 +209,8 @@ int prepare_process_ranges_args_envs(struct process *proc,
/* Only map remote address if it wasn't specified as an argument */
if (!args) {
// Map in remote physical addr of args and copy it
args_envs_npages = (p->args_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
map_size = ((uintptr_t)p->args & (PAGE_SIZE - 1)) + p->args_len;
args_envs_npages = (map_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
dkprintf("args_envs_npages: %d\n", args_envs_npages);
args_envs_rp = ihk_mc_map_memory(NULL,
(unsigned long)p->args, p->args_len);
@ -238,7 +243,8 @@ int prepare_process_ranges_args_envs(struct process *proc,
/* Only map remote address if it wasn't specified as an argument */
if (!envs) {
// Map in remote physical addr of envs and copy it after args
args_envs_npages = (p->envs_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
map_size = ((uintptr_t)p->envs & (PAGE_SIZE - 1)) + p->envs_len;
args_envs_npages = (map_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
dkprintf("args_envs_npages: %d\n", args_envs_npages);
args_envs_rp = ihk_mc_map_memory(NULL, (unsigned long)p->envs,
p->envs_len);
@ -302,10 +308,10 @@ int prepare_process_ranges_args_envs(struct process *proc,
dkprintf("env OK\n");
p->rprocess = (unsigned long)proc;
p->rpgtable = virt_to_phys(proc->vm->page_table);
p->rprocess = (unsigned long)thread;
p->rpgtable = virt_to_phys(as->page_table);
if (init_process_stack(proc, pn, argc, argv, envc, env) != 0) {
if (init_process_stack(thread, pn, argc, argv, envc, env) != 0) {
goto err;
}
@ -324,7 +330,9 @@ static int process_msg_prepare_process(unsigned long rphys)
unsigned long phys, sz;
struct program_load_desc *p, *pn;
int npages, n;
struct thread *thread;
struct process *proc;
struct process_vm *vm;
enum ihk_mc_pt_attribute attr;
attr = PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_FOR_USER;
@ -351,41 +359,44 @@ static int process_msg_prepare_process(unsigned long rphys)
memcpy_long(pn, p, sizeof(struct program_load_desc)
+ sizeof(struct program_image_section) * n);
if((proc = create_process(p->entry)) == NULL){
if((thread = create_thread(p->entry)) == NULL){
ihk_mc_free(pn);
ihk_mc_unmap_virtual(p, npages, 1);
ihk_mc_unmap_memory(NULL, phys, sz);
return -ENOMEM;
}
proc->ftn->pid = pn->pid;
proc->ftn->pgid = pn->pgid;
proc = thread->proc;
vm = thread->vm;
proc->ftn->ruid = pn->cred[0];
proc->ftn->euid = pn->cred[1];
proc->ftn->suid = pn->cred[2];
proc->ftn->fsuid = pn->cred[3];
proc->ftn->rgid = pn->cred[4];
proc->ftn->egid = pn->cred[5];
proc->ftn->sgid = pn->cred[6];
proc->ftn->fsgid = pn->cred[7];
proc->pid = pn->pid;
proc->vm->address_space->pids[0] = pn->pid;
proc->pgid = pn->pgid;
proc->ruid = pn->cred[0];
proc->euid = pn->cred[1];
proc->suid = pn->cred[2];
proc->fsuid = pn->cred[3];
proc->rgid = pn->cred[4];
proc->egid = pn->cred[5];
proc->sgid = pn->cred[6];
proc->fsgid = pn->cred[7];
proc->vm->region.user_start = pn->user_start;
proc->vm->region.user_end = pn->user_end;
proc->vm->region.map_start = (USER_END / 3) & LARGE_PAGE_MASK;
proc->vm->region.map_end = proc->vm->region.map_start;
vm->region.user_start = pn->user_start;
vm->region.user_end = pn->user_end;
vm->region.map_start = (USER_END / 3) & LARGE_PAGE_MASK;
vm->region.map_end = proc->vm->region.map_start;
memcpy(proc->rlimit, pn->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX);
/* TODO: Clear it at the proper timing */
cpu_local_var(scp).post_idx = 0;
if (prepare_process_ranges_args_envs(proc, pn, p, attr,
if (prepare_process_ranges_args_envs(thread, pn, p, attr,
NULL, 0, NULL, 0) != 0) {
kprintf("error: preparing process ranges, args, envs, stack\n");
goto err;
}
dkprintf("new process : %p [%d] / table : %p\n", proc, proc->ftn->pid,
proc->vm->page_table);
dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid,
vm->address_space->page_table);
ihk_mc_free(pn);
@ -398,8 +409,7 @@ err:
ihk_mc_free(pn);
ihk_mc_unmap_virtual(p, npages, 1);
ihk_mc_unmap_memory(NULL, phys, sz);
free_process_memory(proc);
destroy_process(proc);
destroy_thread(thread);
return -ENOMEM;
}
@ -473,8 +483,8 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c,
ihk_ikc_send(c, packet, 0);
}
extern unsigned long do_kill(int, int, int, struct siginfo *, int ptracecont);
extern void settid(struct process *proc, int mode, int newcpuid, int oldcpuid);
extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont);
extern void settid(struct thread *proc, int mode, int newcpuid, int oldcpuid);
extern void process_procfs_request(unsigned long rarg);
extern int memcheckall();
@ -489,6 +499,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
struct ikc_scd_packet *packet = __packet;
struct ikc_scd_packet pckt;
int rc;
struct thread *thread;
struct process *proc;
struct mcctrl_signal {
int cond;
@ -536,13 +547,17 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
return -1;
}
dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg);
proc = (struct process *)packet->arg;
thread = (struct thread *)packet->arg;
proc = thread->proc;
settid(proc, 0, cpuid, -1);
proc->ftn->status = PS_RUNNING;
runq_add_proc(proc, cpuid);
settid(thread, 0, cpuid, -1);
proc->status = PS_RUNNING;
thread->status = PS_RUNNING;
chain_thread(thread);
chain_process(proc);
runq_add_thread(thread, cpuid);
//cpu_local_var(next) = (struct process *)packet->arg;
//cpu_local_var(next) = (struct thread *)packet->arg;
return 0;
case SCD_MSG_SEND_SIGNAL:
pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal));
@ -556,7 +571,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
pckt.arg = packet->arg;
syscall_channel_send(c, &pckt);
rc = do_kill(info.pid, info.tid, info.sig, &info.info, 0);
rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0);
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
return 0;
case SCD_MSG_PROCFS_REQUEST:

View File

@ -41,12 +41,14 @@ struct cpu_local_var {
struct malloc_header free_list;
ihk_spinlock_t free_list_lock;
struct process idle;
struct fork_tree_node idle_ftn;
struct thread idle;
struct process idle_proc;
struct process_vm idle_vm;
struct address_space idle_asp;
ihk_spinlock_t runq_lock;
struct process *current;
unsigned long runq_irqstate;
struct thread *current;
struct list_head runq;
size_t runq_len;
@ -57,6 +59,7 @@ struct cpu_local_var {
struct ihk_ikc_channel_desc *syscall_channel2;
struct syscall_params scp2;
struct ikc_scd_init_param iip2;
struct resource_set *resource_set;
int status;
int fs;
@ -67,6 +70,9 @@ struct cpu_local_var {
ihk_spinlock_t migq_lock;
struct list_head migq;
int in_interrupt;
int no_preempt;
int timer_enabled;
} __attribute__((aligned(64)));

View File

@ -14,8 +14,18 @@
#define __HEADER_KMALLOC_H
#include <ihk/mm.h>
#include <cls.h>
#define kmalloc(size, flag) _kmalloc(size, flag, __FILE__, __LINE__)
void panic(const char *);
int kprintf(const char *format, ...);
#define kmalloc(size, flag) ({\
void *r = _kmalloc(size, flag, __FILE__, __LINE__);\
if(r == NULL){\
kprintf("kmalloc: out of memory %s:%d no_preempt=%d\n", __FILE__, __LINE__, cpu_local_var(no_preempt)); \
}\
r;\
})
#define kfree(ptr) _kfree(ptr, __FILE__, __LINE__)
#define memcheck(ptr, msg) _memcheck(ptr, msg, __FILE__, __LINE__, 0)
void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line);

View File

@ -31,6 +31,7 @@ enum {
/* for memobj.flags */
MF_HAS_PAGER = 0x0001,
MF_SHMDT_OK = 0x0002,
MF_IS_REMOVABLE = 0x0004,
};
struct memobj {
@ -120,6 +121,11 @@ static inline int memobj_has_pager(struct memobj *obj)
return !!(obj->flags & MF_HAS_PAGER);
}
static inline int memobj_is_removable(struct memobj *obj)
{
return !!(obj->flags & MF_IS_REMOVABLE);
}
int fileobj_create(int fd, struct memobj **objp, int *maxprotp);
struct shmid_ds;
int shmobj_create(struct shmid_ds *ds, struct memobj **objp);

View File

@ -66,4 +66,6 @@ static inline int page_is_multi_mapped(struct page *page)
return (ihk_atomic_read(&page->count) > 1);
}
/* Should we take page faults on ANONYMOUS mappings? */
extern int anon_on_demand;
#endif

View File

@ -51,6 +51,7 @@
#define VRFLAG_PROT_TO_MAXPROT(vrflag) (((vrflag) & VR_PROT_MASK) << 4)
#define VRFLAG_MAXPROT_TO_PROT(vrflag) (((vrflag) & VR_MAXPROT_MASK) >> 4)
// struct process.status, struct thread.status
#define PS_RUNNING 0x1
#define PS_INTERRUPTIBLE 0x2
#define PS_UNINTERRUPTIBLE 0x4
@ -58,15 +59,19 @@
#define PS_EXITED 0x10
#define PS_STOPPED 0x20
#define PS_TRACED 0x40 /* Set to "not running" by a ptrace related event */
#define PS_STOPPING 0x80
#define PS_TRACING 0x100
#define PS_NORMAL (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE)
// struct process.ptrace
#define PT_TRACED 0x80 /* The process is ptraced */
#define PT_TRACE_EXEC 0x100 /* Trace execve(2) */
#define PT_TRACE_SYSCALL_ENTER 0x200 /* Trace syscall enter */
#define PT_TRACE_SYSCALL_EXIT 0x400 /* Trace syscall exit */
#define PT_TRACE_SYSCALL_MASK (PT_TRACE_SYSCALL_ENTER | PT_TRACE_SYSCALL_EXIT)
// ptrace(2) request
#define PTRACE_TRACEME 0
#define PTRACE_PEEKTEXT 1
#define PTRACE_PEEKDATA 2
@ -95,6 +100,7 @@
#define PTRACE_GETREGSET 0x4204
#define PTRACE_SETREGSET 0x4205
// ptrace(2) options
#define PTRACE_O_TRACESYSGOOD 1
#define PTRACE_O_TRACEFORK 2
#define PTRACE_O_TRACEVFORK 4
@ -104,6 +110,7 @@
#define PTRACE_O_TRACEEXIT 0x40
#define PTRACE_O_MASK 0x7f
// ptrace(2) events
#define PTRACE_EVENT_FORK 1
#define PTRACE_EVENT_VFORK 2
#define PTRACE_EVENT_CLONE 3
@ -158,6 +165,64 @@
#include <waitq.h>
#include <futex.h>
struct resource_set;
struct process_hash;
struct thread_hash;
struct address_space;
struct process;
struct thread;
struct process_vm;
struct vm_regions;
struct vm_range;
#define HASH_SIZE 73
struct resource_set {
struct list_head list;
char *path;
struct process_hash *process_hash;
struct thread_hash *thread_hash;
struct list_head phys_mem_list;
mcs_rwlock_lock_t phys_mem_lock;
cpu_set_t cpu_set;
mcs_rwlock_lock_t cpu_set_lock;
struct process *pid1;
};
extern struct list_head resource_set_list;
extern mcs_rwlock_lock_t resource_set_lock;
struct process_hash {
struct list_head list[HASH_SIZE];
mcs_rwlock_lock_t lock[HASH_SIZE];
};
static inline int
process_hash(int pid)
{
return pid % HASH_SIZE;
}
static inline int
thread_hash(int tid)
{
return tid % HASH_SIZE;
}
struct thread_hash {
struct list_head list[HASH_SIZE];
mcs_rwlock_lock_t lock[HASH_SIZE];
};
struct address_space {
struct page_table *page_table;
int type;
#define ADDRESS_SPACE_NORMAL 1
#define ADDRESS_SPACE_PVAS 2
int nslots;
int pids[];
};
struct user_fpregs_struct
{
unsigned short cwd;
@ -234,6 +299,7 @@ struct vm_range {
};
struct vm_regions {
unsigned long vm_start, vm_end;
unsigned long text_start, text_end;
unsigned long data_start, data_end;
unsigned long brk_start, brk_end;
@ -252,11 +318,12 @@ struct sigfd {
#define SFD_CLOEXEC 02000000
#define SFD_NONBLOCK 04000
struct sig_handler {
struct sig_common {
ihk_spinlock_t lock;
ihk_atomic_t use;
ihk_atomic_t use;
struct sigfd *sigfd;
struct k_sigaction action[_NSIG];
struct list_head sigpending;
};
struct sig_pending {
@ -266,27 +333,60 @@ struct sig_pending {
int ptracecont;
};
struct sig_shared {
ihk_spinlock_t lock;
ihk_atomic_t use;
struct list_head sigpending;
};
typedef void pgio_func_t(void *arg);
/* Represents a node in the process fork tree, it may exist even after the
* corresponding process exited due to references from the parent and/or
* children and is used for implementing wait/waitpid without having a
* special "init" process */
struct fork_tree_node {
ihk_spinlock_t lock;
ihk_atomic_t refcount;
int exit_status;
int status;
struct process {
struct list_head hash_list;
mcs_rwlock_lock_t update_lock; // lock for parent, status, ...?
struct process *owner;
// process vm
struct process_vm *vm;
// threads and children
struct list_head threads_list;
mcs_rwlock_lock_t threads_lock; // lock for threads_list
/* The ptracing process behave as the parent of the ptraced process
after using PTRACE_ATTACH except getppid. So we save it here. */
struct process *parent;
struct process *ppid_parent;
struct list_head children_list;
struct list_head ptraced_children_list;
mcs_rwlock_lock_t children_lock; // lock for children_list and ptraced_children_list
struct list_head siblings_list; // lock parent
struct list_head ptraced_siblings_list; // lock ppid_parent
ihk_atomic_t refcount;
// process status and exit status
int status; // PS_RUNNING -> PS_EXITED -> PS_ZOMBIE
// | ^ ^
// | |---+ |
// V | |
// PS_STOPPING-)---+
// (PS_TRACING)| |
// | | |
// V +---- |
// PS_STOPPED -----+
// (PS_TRACED)
int exit_status;
/* Store exit_status for a group of threads when stopped by SIGSTOP.
exit_status can't be used because values of exit_status of threads
might divert while the threads are exiting by group_exit(). */
int group_exit_status;
/* Manage ptraced processes in the separate list to make it easy to
restore the orginal parent child relationship when
performing PTRACE_DETACH */
struct waitq waitpid_q;
// process info and credentials etc.
int pid;
int tid;
int pgid;
int ruid;
int euid;
@ -296,50 +396,36 @@ struct fork_tree_node {
int egid;
int sgid;
int fsgid;
struct fork_tree_node *parent;
struct list_head children;
struct list_head siblings_list;
/* The ptracing process behave as the parent of the ptraced process
after using PTRACE_ATTACH except getppid. So we save it here. */
struct fork_tree_node *ppid_parent;
int execed;
int nohost;
struct rlimit rlimit[MCK_RLIM_MAX];
unsigned long saved_auxv[AUXV_LEN];
char *saved_cmdline;
long saved_cmdline_len;
/* Manage ptraced processes in the separate list to make it easy to
restore the orginal parent child relationship when
performing PTRACE_DETACH */
struct list_head ptrace_children;
struct list_head ptrace_siblings_list;
/* Store ptrace flags.
* The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request.
* Other bits are for inner use of the McKernel.
*/
int ptrace;
struct waitq waitpid_q;
/* Store ptrace event message.
* PTRACE_O_xxx will store event message here.
* PTRACE_GETEVENTMSG will get from here.
*/
unsigned long ptrace_eventmsg;
/* Store exit_status for a group of threads when stopped by SIGSTOP.
exit_status can't be used because values of exit_status of threads
might divert while the threads are exiting by group_exit(). */
int group_exit_status;
/* Store event related to signal. For example,
it represents that the proceess has been resumed by SIGCONT. */
int signal_flags;
/* Store ptrace flags.
* The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request.
* Other bits are for inner use of the McKernel.
*/
int ptrace;
/* Store signal sent to parent when the process terminates. */
int termsig;
/* Store ptrace event message.
PTRACE_O_xxx will store event message here.
PTRACE_GETEVENTMSG will get from here.
*/
unsigned long ptrace_eventmsg;
/* Store event related to signal. For example,
it represents that the proceess has been resumed by SIGCONT. */
int signal_flags;
/* Store signal sent to parent when the process terminates. */
int termsig;
};
void hold_fork_tree_node(struct fork_tree_node *ftn);
void release_fork_tree_node(struct fork_tree_node *ftn);
void hold_thread(struct thread *ftn);
void release_thread(struct thread *ftn);
/*
* Scheduling policies
@ -364,100 +450,114 @@ struct sched_param {
int sched_priority;
};
struct process {
struct thread {
struct list_head hash_list;
// thread info
int cpu_id;
int tid;
int status; // PS_RUNNING -> PS_EXITED
// | ^ ^
// | | |
// V | |
// PS_STOPPED------+
// PS_TRACED
// PS_INTERRPUTIBLE
// PS_UNINTERRUPTIBLE
ihk_atomic_t refcount;
// process vm
struct process_vm *vm;
// context
ihk_mc_kernel_context_t ctx;
ihk_mc_user_context_t *uctx;
// sibling
struct process *proc;
struct list_head siblings_list; // lock process
// Runqueue list entry
struct list_head sched_list;
struct list_head sched_list; // lock cls
int sched_policy;
struct sched_param sched_param;
ihk_spinlock_t spin_sleep_lock;
int spin_sleep;
struct thread {
int *clear_child_tid;
unsigned long tlsblock_base, tlsblock_limit;
} thread;
ihk_atomic_t refcount;
volatile int sigevent;
int nohost;
int execed;
int *clear_child_tid;
unsigned long tlsblock_base, tlsblock_limit;
// thread info
cpu_set_t cpu_set;
fp_regs_struct *fp_regs;
int in_syscall_offload;
// signal
struct sig_common *sigcommon;
sigset_t sigmask;
stack_t sigstack;
ihk_spinlock_t sigpendinglock;
struct list_head sigpending;
struct sig_shared *sigshared;
struct sig_handler *sighandler;
ihk_spinlock_t sigpendinglock;
volatile int sigevent;
struct rlimit rlimit[MCK_RLIM_MAX];
// gpio
pgio_func_t *pgio_fp;
void *pgio_arg;
struct fork_tree_node *ftn;
cpu_set_t cpu_set;
unsigned long saved_auxv[AUXV_LEN];
// for ptrace
unsigned long *ptrace_debugreg; /* debug registers for ptrace */
struct sig_pending *ptrace_recvsig;
struct sig_pending *ptrace_sendsig;
fp_regs_struct *fp_regs;
char *saved_cmdline;
long saved_cmdline_len;
};
struct process_vm {
ihk_atomic_t refcount;
struct page_table *page_table;
struct address_space *address_space;
struct list_head vm_range_list;
struct vm_regions region;
struct process *owner_process; /* process that reside on the same page */
struct process *proc; /* process that reside on the same page */
ihk_spinlock_t page_table_lock;
ihk_spinlock_t memory_range_lock;
ihk_spinlock_t page_table_lock;
ihk_spinlock_t memory_range_lock;
// to protect the followings:
// 1. addition of process "memory range" (extend_process_region, add_process_memory_range)
// 2. addition of process page table (allocate_pages, update_process_page_table)
// note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc)
// is protected by its own lock (see ihk/manycore/generic/page_alloc.c)
ihk_atomic_t refcount;
cpu_set_t cpu_set;
ihk_spinlock_t cpu_set_lock;
int exiting;
};
struct process *create_process(unsigned long user_pc);
struct process *clone_process(struct process *org, unsigned long pc,
struct thread *create_thread(unsigned long user_pc);
struct thread *clone_thread(struct thread *org, unsigned long pc,
unsigned long sp, int clone_flags);
void destroy_process(struct process *proc);
void hold_process(struct process *proc);
void release_process(struct process *proc);
void flush_process_memory(struct process *proc);
void free_process_memory(struct process *proc);
void free_process_memory_ranges(struct process *proc);
int populate_process_memory(struct process *proc, void *start, size_t len);
void destroy_thread(struct thread *thread);
void hold_thread(struct thread *thread);
void release_thread(struct thread *thread);
void flush_process_memory(struct process_vm *vm);
void hold_process_vm(struct process_vm *vm);
void release_process_vm(struct process_vm *vm);
void hold_process(struct process *);
void release_process(struct process *);
void free_process_memory_ranges(struct process_vm *vm);
int populate_process_memory(struct process_vm *vm, void *start, size_t len);
int add_process_memory_range(struct process *process,
int add_process_memory_range(struct process_vm *vm,
unsigned long start, unsigned long end,
unsigned long phys, unsigned long flag,
struct memobj *memobj, off_t objoff);
int remove_process_memory_range(struct process *process, unsigned long start,
int remove_process_memory_range(struct process_vm *vm, unsigned long start,
unsigned long end, int *ro_freedp);
int split_process_memory_range(struct process *process,
int split_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t addr, struct vm_range **splitp);
int join_process_memory_range(struct process *process, struct vm_range *surviving,
int join_process_memory_range(struct process_vm *vm, struct vm_range *surviving,
struct vm_range *merging);
int change_prot_process_memory_range(
struct process *process, struct vm_range *range,
struct process_vm *vm, struct vm_range *range,
unsigned long newflag);
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end, off_t off);
@ -476,31 +576,39 @@ int extend_up_process_memory_range(struct process_vm *vm,
int page_fault_process_vm(struct process_vm *fault_vm, void *fault_addr,
uint64_t reason);
int remove_process_region(struct process *proc,
int remove_process_region(struct process_vm *vm,
unsigned long start, unsigned long end);
struct program_load_desc;
int init_process_stack(struct process *process, struct program_load_desc *pn,
int init_process_stack(struct thread *thread, struct program_load_desc *pn,
int argc, char **argv,
int envc, char **env);
unsigned long extend_process_region(struct process *proc,
unsigned long extend_process_region(struct process_vm *vm,
unsigned long start, unsigned long end,
unsigned long address, unsigned long flag);
extern enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep);
enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep);
void schedule(void);
void runq_add_proc(struct process *proc, int cpu_id);
void runq_del_proc(struct process *proc, int cpu_id);
int sched_wakeup_process(struct process *proc, int valid_states);
void runq_add_thread(struct thread *thread, int cpu_id);
void runq_del_thread(struct thread *thread, int cpu_id);
int sched_wakeup_thread(struct thread *thread, int valid_states);
void sched_request_migrate(int cpu_id, struct process *proc);
void sched_request_migrate(int cpu_id, struct thread *thread);
void check_need_resched(void);
void cpu_set(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock);
void cpu_clear(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock);
void cpu_clear_and_set(int c_cpu, int s_cpu,
cpu_set_t *cpu_set, ihk_spinlock_t *lock);
struct process *findthread_and_lock(int pid, int tid, ihk_spinlock_t **savelock, unsigned long *irqstate);
void process_unlock(void *savelock, unsigned long irqstate);
void release_cpuid(int cpuid);
struct thread *find_thread(int pid, int tid, struct mcs_rwlock_node_irqsave *lock);
void thread_unlock(struct thread *thread, struct mcs_rwlock_node_irqsave *lock);
struct process *find_process(int pid, struct mcs_rwlock_node_irqsave *lock);
void process_unlock(struct process *proc, struct mcs_rwlock_node_irqsave *lock);
void chain_process(struct process *);
void chain_thread(struct thread *);
void proc_init();
#endif

View File

@ -13,9 +13,11 @@
#ifndef __HEADER_SYSCALL_H
#define __HEADER_SYSCALL_H
#include <ihk/atomic.h>
#include <ihk/context.h>
#include <ihk/memconst.h>
#include <rlimit.h>
#include <time.h>
#define NUM_SYSCALLS 255
@ -285,4 +287,15 @@ struct procfs_file {
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
};
extern void terminate(int, int);
struct tod_data_s {
int8_t do_local;
int8_t padding[7];
ihk_atomic64_t version;
unsigned long clocks_per_sec;
struct timespec origin; /* realtime when tsc=0 */
};
extern struct tod_data_s tod_data; /* residing in arch-dependent file */
#endif

View File

@ -19,6 +19,8 @@
#ifndef __TIME_H
#define __TIME_H
#define NS_PER_SEC 1000000000UL
typedef long int __time_t;
/* POSIX.1b structure for a time value. This is like a `struct timeval' but

View File

@ -36,7 +36,7 @@ struct timer {
uint64_t timeout;
struct waitq processes;
struct list_head list;
struct process *proc;
struct thread *thread;
};
uint64_t schedule_timeout(uint64_t timeout);

View File

@ -19,7 +19,7 @@
#include <ihk/lock.h>
#include <list.h>
struct process;
struct thread;
struct waitq_entry;
typedef int (*waitq_func_t)(struct waitq_entry *wait, unsigned mode,
@ -58,7 +58,7 @@ typedef struct waitq_entry {
}
extern void waitq_init(waitq_t *waitq);
extern void waitq_init_entry(waitq_entry_t *entry, struct process *proc);
extern void waitq_init_entry(waitq_entry_t *entry, struct thread *proc);
extern int waitq_active(waitq_t *waitq);
extern void waitq_add_entry(waitq_t *waitq, waitq_entry_t *entry);
extern void waitq_add_entry_locked(waitq_t *waitq, waitq_entry_t *entry);

View File

@ -200,6 +200,34 @@ static void pc_test(void)
ed[1] - st[1], ed[2] - st[2], ed[3] - st[3]);
}
extern void ihk_mc_get_boot_time(unsigned long *tv_sec, unsigned long *tv_nsec);
extern unsigned long ihk_mc_get_ns_per_tsc(void);
static void time_init(void)
{
unsigned long tv_sec, tv_nsec;
unsigned long ns_per_kclock;
ihk_mc_get_boot_time(&tv_sec, &tv_nsec);
ns_per_kclock = ihk_mc_get_ns_per_tsc();
tod_data.origin.tv_sec = tv_sec;
tod_data.origin.tv_nsec = tv_nsec;
if (ns_per_kclock) {
tod_data.clocks_per_sec = (1000L * NS_PER_SEC) / ns_per_kclock;
}
if (!ns_per_kclock) {
gettime_local_support = 0;
}
if (gettime_local_support) {
tod_data.do_local = 1;
}
return;
}
static void rest_init(void)
{
handler_init();
@ -212,14 +240,19 @@ static void rest_init(void)
ap_init();
cpu_local_var_init();
time_init();
kmalloc_init();
ikc_master_init();
proc_init();
sched_init();
}
int host_ikc_inited = 0;
extern int num_processors;
extern void zero_tsc(void);
static void post_init(void)
{
@ -237,7 +270,13 @@ static void post_init(void)
init_host_syscall_channel2();
ihk_mc_spinlock_init(&syscall_lock);
}
/* Zero TSC.
* All AP cores are wait spinning for ap_start() and they will zero
* their TSC immediatly. */
zero_tsc();
ap_start();
create_os_procfs_files();
}
#ifdef DCFA_RUN

View File

@ -56,6 +56,8 @@ extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt);
struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE];
int anon_on_demand = 0;
static void reserve_pages(unsigned long start, unsigned long end, int type)
{
if (start < pa_start) {
@ -171,8 +173,8 @@ static struct ihk_mc_interrupt_handler query_free_mem_handler = {
};
void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long rc, void *regs);
int gencore(struct process *, void *, struct coretable **, int *);
void check_signal(unsigned long, void *, int);
int gencore(struct thread *, void *, struct coretable **, int *);
void freecore(struct coretable **);
/**
@ -182,14 +184,14 @@ void freecore(struct coretable **);
* \param regs A pointer to a x86_regs structure.
*/
void coredump(struct process *proc, void *regs)
void coredump(struct thread *thread, void *regs)
{
struct syscall_request request IHK_DMA_ALIGN;
int ret;
struct coretable *coretable;
int chunks;
ret = gencore(proc, regs, &coretable, &chunks);
ret = gencore(thread, regs, &coretable, &chunks);
if (ret != 0) {
dkprintf("could not generate a core file image\n");
return;
@ -198,7 +200,7 @@ void coredump(struct process *proc, void *regs)
request.args[0] = chunks;
request.args[1] = virt_to_phys(coretable);
/* no data for now */
ret = do_syscall(&request, proc->cpu_id, proc->ftn->pid);
ret = do_syscall(&request, thread->cpu_id, thread->proc->pid);
if (ret == 0) {
kprintf("dumped core.\n");
} else {
@ -207,10 +209,10 @@ void coredump(struct process *proc, void *regs)
freecore(&coretable);
}
static void unhandled_page_fault(struct process *proc, void *fault_addr, void *regs)
static void unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
{
const uintptr_t address = (uintptr_t)fault_addr;
struct process_vm *vm = proc->vm;
struct process_vm *vm = thread->vm;
struct vm_range *range;
char found;
unsigned long irqflags;
@ -233,7 +235,7 @@ static void unhandled_page_fault(struct process *proc, void *fault_addr, void *r
found = 1;
dkprintf("address is in range, flag: 0x%X! \n",
range->flag);
ihk_mc_pt_print_pte(vm->page_table, (void*)address);
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
break;
}
}
@ -364,36 +366,34 @@ void tlb_flush_handler(int vector)
static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
{
struct process *proc = cpu_local_var(current);
struct thread *thread = cpu_local_var(current);
int error;
dkprintf("[%d]page_fault_handler(%p,%lx,%p)\n",
ihk_mc_get_processor_id(), fault_addr, reason, regs);
preempt_disable();
cpu_enable_interrupt();
error = page_fault_process_vm(proc->vm, fault_addr, reason);
error = page_fault_process_vm(thread->vm, fault_addr, reason);
if (error) {
struct siginfo info;
if (error == -ECANCELED) {
kprintf("process is exiting, terminate.\n");
dkprintf("process is exiting, terminate.\n");
ihk_mc_spinlock_lock_noirq(&proc->ftn->lock);
proc->ftn->status = PS_ZOMBIE;
ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock);
release_fork_tree_node(proc->ftn->parent);
release_fork_tree_node(proc->ftn);
//release_process(proc);
schedule();
preempt_enable();
terminate(0, SIGSEGV);
// no return
}
kprintf("[%d]page_fault_handler(%p,%lx,%p):"
"fault vm failed. %d\n",
"fault vm failed. %d, TID: %d\n",
ihk_mc_get_processor_id(), fault_addr,
reason, regs, error);
unhandled_page_fault(proc, fault_addr, regs);
reason, regs, error, thread->tid);
unhandled_page_fault(thread, fault_addr, regs);
preempt_enable();
memset(&info, '\0', sizeof info);
if (error == -ERANGE) {
info.si_signo = SIGBUS;
@ -402,7 +402,7 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
set_signal(SIGBUS, regs, &info);
}
else {
struct process_vm *vm = proc->vm;
struct process_vm *vm = thread->vm;
struct vm_range *range;
info.si_signo = SIGSEGV;
@ -416,11 +416,12 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
info._sifields._sigfault.si_addr = fault_addr;
set_signal(SIGSEGV, regs, &info);
}
check_signal(0, regs);
check_signal(0, regs, 0);
goto out;
}
error = 0;
preempt_enable();
out:
dkprintf("[%d]page_fault_handler(%p,%lx,%p): (%d)\n",
ihk_mc_get_processor_id(), fault_addr, reason,
@ -677,6 +678,11 @@ void mem_init(void)
/* Prepare the kernel virtual map space */
virtual_allocator_init();
if (find_command_line("anon_on_demand")) {
kprintf("Demand paging on ANONYMOUS mappings enabled.\n");
anon_on_demand = 1;
}
}
struct location {
@ -868,12 +874,10 @@ int memcheckall()
struct alloc *ap;
int r = 0;
kprintf("memcheckall\n");
for(i = 0; i < HASHNUM; i++)
for(ap = allochash[i]; ap; ap = ap->next)
if(ap->p)
r |= _memcheck(ap->p + 1, "memcheck", NULL, 0, 2);
kprintf("done\n");
return r;
}
@ -971,8 +975,11 @@ void *___kmalloc(int size, enum ihk_mc_ap_flag flag)
>> PAGE_SHIFT;
h = allocate_pages(req_page, flag);
if(h == NULL)
if(h == NULL) {
kprintf("kmalloc(%#x,%#x): out of memory\n", size, flag);
ihk_mc_spinlock_unlock(&v->free_list_lock, flags);
return NULL;
}
h->check = 0x5a5a5a5a;
prev->next = h;
h->size = (req_page * PAGE_SIZE) / sizeof(*h) - 2;

File diff suppressed because it is too large Load Diff

View File

@ -257,14 +257,14 @@ static void operate_proc_procfs_file(int pid, char *fname, int msg, int mode, in
void process_procfs_request(unsigned long rarg)
{
unsigned long parg, pbuf;
struct process *proc = cpu_local_var(current);
struct thread *thread = cpu_local_var(current);
struct process *proc = thread->proc;
struct procfs_read *r;
struct ikc_scd_packet packet;
int rosnum, ret, pid, tid, ans = -EIO, eof = 0;
char *buf, *p;
struct ihk_ikc_channel_desc *syscall_channel;
ihk_spinlock_t *savelock;
unsigned long irqstate;
struct mcs_rwlock_node_irqsave lock;
unsigned long offset;
int count;
int npages;
@ -336,30 +336,31 @@ void process_procfs_request(unsigned long rarg)
*/
ret = sscanf(p, "%d/", &pid);
if (ret == 1) {
if (pid != cpu_local_var(current)->ftn->pid) {
if (pid != cpu_local_var(current)->proc->pid) {
/* We are not located in the proper cpu for some reason. */
dprintf("mismatched pid. We are %d, but requested pid is %d.\n",
pid, cpu_local_var(current)->pid);
tid = pid; /* main thread */
proc = findthread_and_lock(pid, tid, &savelock, &irqstate);
if (!proc) {
thread = find_thread(pid, tid, &lock);
if (!thread) {
dprintf("We cannot find the proper cpu for requested pid.\n");
goto end;
}
else if (proc->cpu_id != ihk_mc_get_processor_id()) {
else if (thread->cpu_id != ihk_mc_get_processor_id()) {
/* The target process has gone by migration. */
r->newcpu = proc->cpu_id;
dprintf("expected cpu id is %d.\n", proc->cpu_id);
process_unlock(savelock, irqstate);
r->newcpu = thread->cpu_id;
dprintf("expected cpu id is %d.\n", thread->cpu_id);
thread_unlock(thread, &lock);
ans = 0;
goto end;
}
else {
process_unlock(savelock, irqstate);
thread_unlock(thread, &lock);
/* 'proc' is not 'current' */
is_current = 0;
}
proc = thread->proc;
}
}
else if (!strcmp(p, "stat")) { /* "/proc/stat" */
@ -431,7 +432,7 @@ void process_procfs_request(unsigned long rarg)
ans = -EIO;
goto end;
}
ret = ihk_mc_pt_virt_to_phys(vm->page_table,
ret = ihk_mc_pt_virt_to_phys(vm->address_space->page_table,
(void *)offset, &pa);
if(ret){
if(ans == 0)
@ -562,8 +563,8 @@ void process_procfs_request(unsigned long rarg)
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
while (start < end) {
*_buf = ihk_mc_pt_virt_to_pagemap(proc->vm->page_table, start);
dprintf("PID: %d, /proc/pagemap: 0x%lx -> %lx\n", proc->ftn->pid,
*_buf = ihk_mc_pt_virt_to_pagemap(proc->vm->address_space->page_table, start);
dprintf("PID: %d, /proc/pagemap: 0x%lx -> %lx\n", proc->proc->pid,
start, *_buf);
start += PAGE_SIZE;
++_buf;
@ -586,7 +587,6 @@ void process_procfs_request(unsigned long rarg)
unsigned long lockedsize = 0;
char tmp[1024];
int len;
struct fork_tree_node *ftn = proc->ftn;
ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock);
list_for_each_entry(range, &proc->vm->vm_range_list, list) {
@ -599,8 +599,8 @@ void process_procfs_request(unsigned long rarg)
"Uid:\t%d\t%d\t%d\t%d\n"
"Gid:\t%d\t%d\t%d\t%d\n"
"VmLck:\t%9lu kB\n",
ftn->ruid, ftn->euid, ftn->suid, ftn->fsuid,
ftn->rgid, ftn->egid, ftn->sgid, ftn->fsgid,
proc->ruid, proc->euid, proc->suid, proc->fsuid,
proc->rgid, proc->egid, proc->sgid, proc->fsgid,
(lockedsize + 1023) >> 10);
len = strlen(tmp);
if (r->offset < len) {
@ -712,7 +712,7 @@ void process_procfs_request(unsigned long rarg)
char tmp[1024];
int len;
if ((proc = findthread_and_lock(pid, tid, &savelock, &irqstate))){
if ((thread = find_thread(pid, tid, &lock))){
dprintf("thread found! pid=%d tid=%d\n", pid, tid);
/*
* pid (comm) state ppid
@ -748,10 +748,10 @@ void process_procfs_request(unsigned long rarg)
0L, 0L, 0L, 0L, // rsslim...
0L, 0L, 0L, 0L, // kstkesp...
0L, 0L, 0L, 0L, // sigignore...
0L, 0, proc->cpu_id, 0, // cnswap...
0L, 0, thread->cpu_id, 0, // cnswap...
0, 0LL, 0L, 0L // policy...
);
process_unlock(savelock, irqstate);
thread_unlock(thread, &lock);
dprintf("tmp=%s\n", tmp);
len = strlen(tmp);

View File

@ -33,11 +33,13 @@ static ihk_spinlock_t shmobj_list_lock_body = SPIN_LOCK_UNLOCKED;
static memobj_release_func_t shmobj_release;
static memobj_ref_func_t shmobj_ref;
static memobj_get_page_func_t shmobj_get_page;
static memobj_invalidate_page_func_t shmobj_invalidate_page;
static struct memobj_ops shmobj_ops = {
.release = &shmobj_release,
.ref = &shmobj_ref,
.get_page = &shmobj_get_page,
.invalidate_page = &shmobj_invalidate_page,
};
static struct shmobj *to_shmobj(struct memobj *memobj)
@ -158,7 +160,7 @@ int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp)
error = shmobj_create(ds, &obj);
if (!error) {
obj->flags |= MF_SHMDT_OK;
obj->flags |= MF_SHMDT_OK | MF_IS_REMOVABLE;
*objp = to_shmobj(obj);
}
return error;
@ -369,3 +371,30 @@ out:
memobj, off, p2align, physp, error);
return error;
}
static int shmobj_invalidate_page(struct memobj *memobj, uintptr_t phys,
size_t pgsize)
{
struct shmobj *obj = to_shmobj(memobj);
int error;
struct page *page;
dkprintf("shmobj_invalidate_page(%p,%#lx,%#lx)\n", memobj, phys, pgsize);
if (!(page = phys_to_page(phys))
|| !(page = page_list_lookup(obj, page->offset))) {
error = 0;
goto out;
}
if (ihk_atomic_read(&page->count) == 1) {
if (page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys), pgsize/PAGE_SIZE);
}
}
error = 0;
out:
dkprintf("shmobj_invalidate_page(%p,%#lx,%#lx):%d\n", memobj, phys, pgsize, error);
return error;
}

File diff suppressed because it is too large Load Diff

View File

@ -41,7 +41,7 @@
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
#define LOOP_TIMEOUT 10
#define LOOP_TIMEOUT 500
struct list_head timers;
ihk_spinlock_t timers_lock;
@ -57,23 +57,25 @@ uint64_t schedule_timeout(uint64_t timeout)
{
struct waitq_entry my_wait;
struct timer my_timer;
struct process *proc = cpu_local_var(current);
struct thread *thread = cpu_local_var(current);
int irqstate;
int spin_sleep;
ihk_mc_spinlock_lock_noirq(&proc->spin_sleep_lock);
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
dkprintf("schedule_timeout() spin sleep timeout: %lu\n", timeout);
proc->spin_sleep = 1;
ihk_mc_spinlock_unlock_noirq(&proc->spin_sleep_lock);
spin_sleep = ++thread->spin_sleep;
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
/* Spin sleep.. */
for (;;) {
uint64_t t_s = rdtsc();
uint64_t t_e;
int spin_over = 0;
ihk_mc_spinlock_lock_noirq(&proc->spin_sleep_lock);
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
/* Woken up by someone? */
if (!proc->spin_sleep) {
if (thread->spin_sleep < 1) {
t_e = rdtsc();
spin_over = 1;
@ -85,32 +87,76 @@ uint64_t schedule_timeout(uint64_t timeout)
}
}
ihk_mc_spinlock_unlock_noirq(&proc->spin_sleep_lock);
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
t_s = rdtsc();
if (!spin_over) {
t_s = rdtsc();
int need_schedule;
struct cpu_local_var *v = get_this_cpu_local_var();
int irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
need_schedule = v->runq_len > 1 ? 1 : 0;
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
/* Give a chance to another thread (if any) in case the core is
* oversubscribed, but make sure we will be re-scheduled */
if (need_schedule) {
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
schedule();
xchg4(&(cpu_local_var(current)->status),
PS_INTERRUPTIBLE);
}
else {
/* Spin wait */
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
cpu_pause();
}
if (timeout < LOOP_TIMEOUT) {
timeout = 0;
spin_over = 1;
}
else {
timeout -= LOOP_TIMEOUT;
}
}
}
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
cpu_pause();
}
if (timeout < LOOP_TIMEOUT) {
timeout = 0;
spin_over = 1;
}
else {
timeout -= LOOP_TIMEOUT;
}
if (spin_over) {
dkprintf("schedule_timeout() spin woken up, timeout: %lu\n",
timeout);
/* Give a chance to another thread (if any) in case we timed out,
* but make sure we will be re-scheduled */
if (timeout == 0) {
int need_schedule;
struct cpu_local_var *v = get_this_cpu_local_var();
int irqstate =
ihk_mc_spinlock_lock(&(v->runq_lock));
need_schedule = v->runq_len > 1 ? 1 : 0;
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
if (need_schedule) {
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
schedule();
xchg4(&(cpu_local_var(current)->status),
PS_INTERRUPTIBLE);
}
}
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
if (spin_sleep == thread->spin_sleep) {
--thread->spin_sleep;
}
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
return timeout;
}
}
/* Init waitq and wait entry for this timer */
my_timer.timeout = (timeout < LOOP_TIMEOUT) ? LOOP_TIMEOUT : timeout;
my_timer.proc = cpu_local_var(current);
my_timer.thread = cpu_local_var(current);
waitq_init(&my_timer.processes);
waitq_init_entry(&my_wait, cpu_local_var(current));
@ -167,7 +213,7 @@ void wake_timers_loop(void)
list_del(&timer->list);
dkprintf("timers timeout occurred, waking up pid: %d\n",
timer->proc->ftn->pid);
timer->thread->proc->pid);
waitq_wakeup(&timer->processes);
}

View File

@ -19,7 +19,7 @@ int
default_wake_function(waitq_entry_t *entry, unsigned mode,
int flags, void *key)
{
return sched_wakeup_process(entry->private, PS_NORMAL);
return sched_wakeup_thread(entry->private, PS_NORMAL);
}
void
@ -30,7 +30,7 @@ waitq_init(waitq_t *waitq)
}
void
waitq_init_entry(waitq_entry_t *entry, struct process *proc)
waitq_init_entry(waitq_entry_t *entry, struct thread *proc)
{
entry->private = proc;
entry->func = default_wake_function;
@ -89,14 +89,14 @@ waitq_prepare_to_wait(waitq_t *waitq, waitq_entry_t *entry, int state)
ihk_mc_spinlock_lock_noirq(&waitq->lock);
if (list_empty(&entry->link))
list_add(&entry->link, &waitq->waitq);
cpu_local_var(current)->ftn->status = state;
cpu_local_var(current)->status = state;
ihk_mc_spinlock_unlock_noirq(&waitq->lock);
}
void
waitq_finish_wait(waitq_t *waitq, waitq_entry_t *entry)
{
cpu_local_var(current)->ftn->status = PS_RUNNING;
cpu_local_var(current)->status = PS_RUNNING;
waitq_remove_entry(waitq, entry);
}

View File

@ -102,4 +102,6 @@ int ihk_mc_arch_get_special_register(enum ihk_asr_type, unsigned long *value);
extern unsigned int ihk_ikc_irq;
extern unsigned int ihk_ikc_irq_apicid;
extern int gettime_local_support;
#endif