Compare commits
260 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 8d21846562 | |||
| 3e1367caa1 | |||
| 02536b7724 | |||
| e28725884f | |||
| c2b3fb7236 | |||
| 2f95f7cda8 | |||
| e551aa17ed | |||
| e6d4c160cd | |||
| 9390fe5d2c | |||
| 419f5e495b | |||
| 673deadf37 | |||
| 20ea65b38c | |||
| 84665ff699 | |||
| bfbc94dfb0 | |||
| f74dcfc2a1 | |||
| 7c562d0539 | |||
| b5e4459a34 | |||
| 782122b681 | |||
| d550bced78 | |||
| a7ee3f531b | |||
| b9439947a7 | |||
| 3b60a95f13 | |||
| 82ae6d7458 | |||
| 7ebc34ddcc | |||
| bd6a2c2311 | |||
| 5fd68eae54 | |||
| f5857cfc9e | |||
| 1ce1b17a85 | |||
| a2456c3ed2 | |||
| 01d2ea1605 | |||
| 15783f09a0 | |||
| 9efd568e07 | |||
| 1a207e19c2 | |||
| 73cf93727b | |||
| 4410e702d9 | |||
| f584e2ec25 | |||
| 3aa06444f4 | |||
| c897a56c34 | |||
| 5e9957da0f | |||
| 6ff2d4abe7 | |||
| e4239f1885 | |||
| fbbaaf5b54 | |||
| 3fa3920bb3 | |||
| 45e51fcc07 | |||
| 0884e3d543 | |||
| e3c7c9b890 | |||
| f4155cc9e8 | |||
| a01ae91051 | |||
| daca522d25 | |||
| ec521feb15 | |||
| d7bc947a02 | |||
| fb84d4ef11 | |||
| 5fbeee953a | |||
| 4cefb4333f | |||
| 689da07ac6 | |||
| 76981bcc18 | |||
| 6aae35cb3d | |||
| dac6f2883e | |||
| c484f766fa | |||
| 57690479bd | |||
| d0539a9cac | |||
| 4c8f583c0c | |||
| 6118faffa9 | |||
| dad6470c60 | |||
| 46c37fc8f3 | |||
| f6908f21a8 | |||
| 01d9d9a5ba | |||
| c43d993a4d | |||
| 7d9bbecd7a | |||
| d135731398 | |||
| 5c190beb04 | |||
| fc66556f9f | |||
| 648bacc90f | |||
| dd37443fc7 | |||
| e34322702a | |||
| e12997e6a9 | |||
| fabaa806d3 | |||
| a83ad620c8 | |||
| d90900b6e6 | |||
| 6d9a88e9f4 | |||
| d0ee60f9e3 | |||
| 14ec92518e | |||
| 435e2bdeb4 | |||
| f06d8041e3 | |||
| 9b35eaca42 | |||
| 130b1f4327 | |||
| 921280f85c | |||
| d4a0b32f06 | |||
| b3bec32e99 | |||
| 2048980820 | |||
| 176f6d23a9 | |||
| 328175547f | |||
| e2e0fad849 | |||
| 397bf3f4a6 | |||
| aa77228453 | |||
| 82cb8f95ed | |||
| 3f2b4e7282 | |||
| d6784bb4a5 | |||
| 1bb948f43b | |||
| 2a1823d52c | |||
| 89943dc5ba | |||
| fceb02a44a | |||
| 7298d8e179 | |||
| 6f32544dde | |||
| 10d248b3cc | |||
| fb32120659 | |||
| 73de203c16 | |||
| 41bb2ab5e6 | |||
| a587c8f5e5 | |||
| 0c53a5ca35 | |||
| c760a01a79 | |||
| a2c29e8abf | |||
| 18add6a9bd | |||
| a083e6c2bf | |||
| a2548f5421 | |||
| 6790126a23 | |||
| 1195549f41 | |||
| b0096a2740 | |||
| a11479eba8 | |||
| 12eaea401e | |||
| 31595b7409 | |||
| 4a0682bbc1 | |||
| 932a287437 | |||
| 670741ae40 | |||
| 70b27e06ff | |||
| 4c38ddb623 | |||
| 6f00ddced6 | |||
| c0eecd63c9 | |||
| 1fd0b03e78 | |||
| 6c59de9300 | |||
| b1309a5d53 | |||
| 489cd6d1a2 | |||
| c9cc4330c8 | |||
| 604f846cd2 | |||
| e939cf6862 | |||
| 72f2e5ebe0 | |||
| bd7dddd415 | |||
| fbd9dc878b | |||
| d6c51ff997 | |||
| 86ac51157c | |||
| b73fa2b972 | |||
| 798f69bceb | |||
| e8be52a1ff | |||
| 8b5b075f4c | |||
| b214fc278a | |||
| b3ae7f46bd | |||
| 48167d3223 | |||
| d65135c040 | |||
| 1761acc4c3 | |||
| d4d93df032 | |||
| 261bddb999 | |||
| 1a3bc851af | |||
| 15f572ef9c | |||
| 81690c5b5a | |||
| 832c0f9afd | |||
| f92cac7751 | |||
| e74eb1dd51 | |||
| 8f7b9072ea | |||
| 4595aa3079 | |||
| 807d294ac4 | |||
| c947dd0d49 | |||
| d192e6c0fe | |||
| 7dbbcb362f | |||
| 593cf98015 | |||
| 8dd9f5ef3f | |||
| 0eaf058a4f | |||
| 1aac2c8e23 | |||
| 70e8dd7979 | |||
| eb0700359b | |||
| 3f16a9443e | |||
| bf0cf0a346 | |||
| 14b868907b | |||
| dbc778e4fa | |||
| 7fac03d4de | |||
| 26c0180374 | |||
| 8ebb3a4231 | |||
| f1f1ba9c8c | |||
| 6ce00b5f0f | |||
| 4ec0e02a89 | |||
| 8f9192ac36 | |||
| 80ce123ab6 | |||
| 1dc8513cd3 | |||
| b0054643c0 | |||
| 972ff73ecf | |||
| 1f8a859b47 | |||
| 2601d8a36f | |||
| a713c2fcaa | |||
| c4c5e435cc | |||
| 853b56c784 | |||
| 863a5c5e5f | |||
| ebce1cb031 | |||
| fff7744907 | |||
| 27c3ed7e96 | |||
| e2b28da32f | |||
| 2c50b716fd | |||
| 307b2b8da5 | |||
| eba2be8a35 | |||
| a997af71be | |||
| e7c37b8000 | |||
| 8c40f94aa8 | |||
| da13bd408a | |||
| c328d26b8d | |||
| 6cda6792a9 | |||
| 2d3fda1d0b | |||
| 5d43c135db | |||
| a866192db7 | |||
| c0cc6ac6db | |||
| 14c5bc08c2 | |||
| 7f01d273d0 | |||
| 137e0a799c | |||
| f214ff1b57 | |||
| 0ce698eb1f | |||
| e601248bdc | |||
| d8eeab9b89 | |||
| fdf031ac16 | |||
| 1ffe740153 | |||
| 72968d613e | |||
| 2e98f875c3 | |||
| a6cb9a6b93 | |||
| da0a91b9f7 | |||
| f093786bec | |||
| 368f155328 | |||
| 425f920013 | |||
| dbddf37579 | |||
| fa7a5ccd11 | |||
| 172bf0a389 | |||
| 9bafd166e3 | |||
| 2e31b8abd1 | |||
| a42ee00101 | |||
| f6935b0869 | |||
| 03a7763a5e | |||
| 3a2f7b0106 | |||
| 2819ec2197 | |||
| f7d81a9281 | |||
| 914faf042d | |||
| 75c6a94839 | |||
| f7b5b48266 | |||
| f9bd83c726 | |||
| edc275ce4f | |||
| d00ea61d1a | |||
| 01117e92c9 | |||
| d477096cb0 | |||
| f44ddfa3b3 | |||
| e0acd254b1 | |||
| d0507f7e9f | |||
| 0f8b2aba22 | |||
| 7e5c7445e2 | |||
| a055fb525d | |||
| 8cb72df663 | |||
| e805249651 | |||
| 06a7889e1f | |||
| 20deed09f0 | |||
| bb81f84709 | |||
| 5c1dad1660 | |||
| 7f2220b8e9 | |||
| 65dda3f24e | |||
| 544971d665 | |||
| dbddab4356 | |||
| 12eb8a9bb0 | |||
| 828a3ea57a |
13
Makefile.in
13
Makefile.in
@ -1,9 +1,11 @@
|
||||
TARGET = @TARGET@
|
||||
SBINDIR = @SBINDIR@
|
||||
ETCDIR = @ETCDIR@
|
||||
MANDIR = @MANDIR@
|
||||
|
||||
all::
|
||||
@(cd executer/kernel; make modules)
|
||||
@(cd executer/kernel/mcctrl; make modules)
|
||||
@(cd executer/kernel/mcoverlayfs; make modules)
|
||||
@(cd executer/user; make)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
|
||||
@ -16,7 +18,8 @@ all::
|
||||
esac
|
||||
|
||||
install::
|
||||
@(cd executer/kernel; make install)
|
||||
@(cd executer/kernel/mcctrl; make install)
|
||||
@(cd executer/kernel/mcoverlayfs; make install)
|
||||
@(cd executer/user; make install)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
|
||||
@ -46,6 +49,9 @@ install::
|
||||
mkdir -p -m 755 $(SBINDIR); \
|
||||
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
|
||||
install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
|
||||
mkdir -p -m 755 $(ETCDIR); \
|
||||
install -m 644 arch/x86/tools/irqbalance_mck.service $(ETCDIR)/irqbalance_mck.service; \
|
||||
install -m 644 arch/x86/tools/irqbalance_mck.in $(ETCDIR)/irqbalance_mck.in; \
|
||||
mkdir -p -m 755 $(MANDIR)/man1; \
|
||||
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
|
||||
;; \
|
||||
@ -56,7 +62,8 @@ install::
|
||||
esac
|
||||
|
||||
clean::
|
||||
@(cd executer/kernel; make clean)
|
||||
@(cd executer/kernel/mcctrl; make clean)
|
||||
@(cd executer/kernel/mcoverlayfs; make clean)
|
||||
@(cd executer/user; make clean)
|
||||
@case "$(TARGET)" in \
|
||||
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
|
||||
|
||||
@ -10,7 +10,7 @@
|
||||
* HISTORY
|
||||
*/
|
||||
|
||||
#define X86_CPU_LOCAL_OFFSET_TSS 128
|
||||
#define X86_CPU_LOCAL_OFFSET_TSS 176
|
||||
#define X86_TSS_OFFSET_SP0 4
|
||||
#define X86_CPU_LOCAL_OFFSET_SP0 \
|
||||
(X86_CPU_LOCAL_OFFSET_TSS + X86_TSS_OFFSET_SP0)
|
||||
|
||||
@ -28,9 +28,12 @@
|
||||
#include <signal.h>
|
||||
#include <process.h>
|
||||
#include <cls.h>
|
||||
#include <prctl.h>
|
||||
#include <page.h>
|
||||
|
||||
#define LAPIC_ID 0x020
|
||||
#define LAPIC_TIMER 0x320
|
||||
#define LAPIC_LVTPC 0x340
|
||||
#define LAPIC_TIMER_INITIAL 0x380
|
||||
#define LAPIC_TIMER_CURRENT 0x390
|
||||
#define LAPIC_TIMER_DIVIDE 0x3e0
|
||||
@ -40,6 +43,7 @@
|
||||
#define LAPIC_ICR2 0x310
|
||||
#define LAPIC_ESR 0x280
|
||||
#define LOCAL_TIMER_VECTOR 0xef
|
||||
#define LOCAL_PERF_VECTOR 0xf0
|
||||
|
||||
#define APIC_INT_LEVELTRIG 0x08000
|
||||
#define APIC_INT_ASSERT 0x04000
|
||||
@ -52,15 +56,30 @@
|
||||
#define APIC_DIVISOR 16
|
||||
#define APIC_LVT_TIMER_PERIODIC (1 << 17)
|
||||
|
||||
#define APIC_BASE_MSR 0x800
|
||||
#define IA32_X2APIC_APICID 0x802
|
||||
#define IA32_X2APIC_ICR 0x830
|
||||
#define X2APIC_ENABLE (1UL << 10)
|
||||
#define NMI_VECTOR 0x02
|
||||
|
||||
//#define DEBUG_PRINT_CPU
|
||||
|
||||
#ifdef DEBUG_PRINT_CPU
|
||||
#define dkprintf kprintf
|
||||
#define ekprintf kprintf
|
||||
#else
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#define ekprintf kprintf
|
||||
#endif
|
||||
|
||||
static void *lapic_vp;
|
||||
static int x2apic;
|
||||
static void (*lapic_write)(int reg, unsigned int value);
|
||||
static unsigned int (*lapic_read)(int reg);
|
||||
static void (*lapic_icr_write)(unsigned int h, unsigned int l);
|
||||
static void (*lapic_wait_icr_idle)(void);
|
||||
void (*x86_issue_ipi)(unsigned int apicid, unsigned int low);
|
||||
int running_on_kvm(void);
|
||||
|
||||
void init_processors_local(int max_id);
|
||||
void assign_processor_id(void);
|
||||
@ -69,7 +88,9 @@ void x86_set_warm_reset(unsigned long ip, char *first_page_va);
|
||||
void x86_init_perfctr(void);
|
||||
int gettime_local_support = 0;
|
||||
|
||||
extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt);
|
||||
extern int kprintf(const char *format, ...);
|
||||
extern int interrupt_from_user(void *);
|
||||
|
||||
static struct idt_entry{
|
||||
uint32_t desc[4];
|
||||
@ -88,6 +109,12 @@ static uint64_t gdt[] __attribute__((aligned(16))) = {
|
||||
0x00aff3000000ffff, /* 56 : USER_DS */
|
||||
0x0000890000000067, /* 64 : TSS */
|
||||
0, /* (72: TSS) */
|
||||
0, /* 80 */
|
||||
0, /* 88 */
|
||||
0, /* 96 */
|
||||
0, /* 104 */
|
||||
0, /* 112 */
|
||||
0x0000f10000000000, /* 120 : GETCPU */
|
||||
};
|
||||
|
||||
struct tss64 tss __attribute__((aligned(16)));
|
||||
@ -123,6 +150,12 @@ extern char debug_exception[], int3_exception[];
|
||||
uint64_t boot_pat_state = 0;
|
||||
int no_turbo = 0; /* May be updated by early parsing of kargs */
|
||||
|
||||
extern int num_processors; /* kernel/ap.c */
|
||||
struct pvclock_vsyscall_time_info *pvti = NULL;
|
||||
int pvti_npages;
|
||||
static long pvti_msr = -1;
|
||||
|
||||
|
||||
static void init_idt(void)
|
||||
{
|
||||
int i;
|
||||
@ -148,6 +181,8 @@ static void init_idt(void)
|
||||
}
|
||||
|
||||
static int xsave_available = 0;
|
||||
static int xsave_size = 0;
|
||||
static uint64_t xsave_mask = 0x0;
|
||||
|
||||
void init_fpu(void)
|
||||
{
|
||||
@ -191,6 +226,26 @@ void init_fpu(void)
|
||||
xsetbv(0, reg);
|
||||
dkprintf("init_fpu(): AVX init: XCR0 = 0x%016lX\n", reg);
|
||||
}
|
||||
if(xsave_available){
|
||||
unsigned long eax;
|
||||
unsigned long ebx;
|
||||
unsigned long ecx;
|
||||
unsigned long edx;
|
||||
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx) : "a" (0x0d), "c" (0x00));
|
||||
xsave_size = ecx;
|
||||
dkprintf("init_fpu(): xsave_size = %d\n", xsave_size);
|
||||
|
||||
if ((eax & (1 << 5)) && (eax & (1 << 6)) && (eax & (1 << 7))) {
|
||||
/* Set xcr0[7:5] to enable avx-512 ops */
|
||||
reg = xgetbv(0);
|
||||
reg |= 0xe6;
|
||||
xsetbv(0, reg);
|
||||
dkprintf("init_fpu(): AVX-512 init: XCR0 = 0x%016lX\n", reg);
|
||||
}
|
||||
}
|
||||
|
||||
xsave_mask = xgetbv(0);
|
||||
dkprintf("init_fpu(): xsave_mask = 0x%016lX\n", xsave_mask);
|
||||
|
||||
/* TODO: set MSR_IA32_XSS to enable xsaves/xrstors */
|
||||
|
||||
@ -201,6 +256,17 @@ void init_fpu(void)
|
||||
asm volatile("finit");
|
||||
}
|
||||
|
||||
int
|
||||
get_xsave_size()
|
||||
{
|
||||
return xsave_size;
|
||||
}
|
||||
|
||||
uint64_t get_xsave_mask()
|
||||
{
|
||||
return xsave_mask;
|
||||
}
|
||||
|
||||
void reload_gdt(struct x86_desc_ptr *gdt_ptr)
|
||||
{
|
||||
asm volatile("pushq %1\n"
|
||||
@ -238,25 +304,39 @@ void init_gdt(void)
|
||||
reload_gdt(&gdt_desc);
|
||||
}
|
||||
|
||||
static void *lapic_vp;
|
||||
void lapic_write(int reg, unsigned int value)
|
||||
static void
|
||||
apic_write(int reg, unsigned int value)
|
||||
{
|
||||
*(volatile unsigned int *)((char *)lapic_vp + reg) = value;
|
||||
}
|
||||
|
||||
unsigned int lapic_read(int reg)
|
||||
static void
|
||||
x2apic_write(int reg, unsigned int value)
|
||||
{
|
||||
reg >>= 4;
|
||||
reg |= APIC_BASE_MSR;
|
||||
wrmsr(reg, value);
|
||||
}
|
||||
|
||||
static unsigned int
|
||||
apic_read(int reg)
|
||||
{
|
||||
return *(volatile unsigned int *)((char *)lapic_vp + reg);
|
||||
}
|
||||
|
||||
void lapic_icr_write(unsigned int h, unsigned int l)
|
||||
static unsigned int
|
||||
x2apic_read(int reg)
|
||||
{
|
||||
lapic_write(LAPIC_ICR2, (unsigned int)h);
|
||||
lapic_write(LAPIC_ICR0, l);
|
||||
unsigned long value;
|
||||
|
||||
reg >>= 4;
|
||||
reg |= APIC_BASE_MSR;
|
||||
value = rdmsr(reg);
|
||||
return (int)value;
|
||||
}
|
||||
|
||||
|
||||
void lapic_timer_enable(unsigned int clocks)
|
||||
void
|
||||
lapic_timer_enable(unsigned int clocks)
|
||||
{
|
||||
unsigned int lvtt_value;
|
||||
|
||||
@ -268,11 +348,117 @@ void lapic_timer_enable(unsigned int clocks)
|
||||
lapic_write(LAPIC_TIMER, lvtt_value);
|
||||
}
|
||||
|
||||
void lapic_timer_disable()
|
||||
void
|
||||
lapic_timer_disable()
|
||||
{
|
||||
lapic_write(LAPIC_TIMER_INITIAL, 0);
|
||||
}
|
||||
|
||||
void
|
||||
lapic_ack(void)
|
||||
{
|
||||
lapic_write(LAPIC_EOI, 0);
|
||||
}
|
||||
|
||||
static void
|
||||
x2apic_wait_icr_idle(void)
|
||||
{
|
||||
}
|
||||
|
||||
static void
|
||||
apic_wait_icr_idle(void)
|
||||
{
|
||||
while (lapic_read(LAPIC_ICR0) & APIC_ICR_BUSY) {
|
||||
cpu_pause();
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
x2apic_icr_write(unsigned int low, unsigned int apicid)
|
||||
{
|
||||
wrmsr(IA32_X2APIC_ICR, (((unsigned long)apicid) << 32) | low);
|
||||
}
|
||||
|
||||
static void
|
||||
apic_icr_write(unsigned int h, unsigned int l)
|
||||
{
|
||||
lapic_write(LAPIC_ICR2, (unsigned int)h);
|
||||
lapic_write(LAPIC_ICR0, l);
|
||||
}
|
||||
|
||||
static void
|
||||
x2apic_x86_issue_ipi(unsigned int apicid, unsigned int low)
|
||||
{
|
||||
unsigned long icr = low;
|
||||
unsigned long flags;
|
||||
|
||||
ihk_mc_mb();
|
||||
flags = cpu_disable_interrupt_save();
|
||||
x2apic_icr_write(icr, apicid);
|
||||
cpu_restore_interrupt(flags);
|
||||
}
|
||||
|
||||
static void
|
||||
apic_x86_issue_ipi(unsigned int apicid, unsigned int low)
|
||||
{
|
||||
unsigned long flags;
|
||||
|
||||
flags = cpu_disable_interrupt_save();
|
||||
apic_wait_icr_idle();
|
||||
apic_icr_write(apicid << LAPIC_ICR_ID_SHIFT, low);
|
||||
cpu_restore_interrupt(flags);
|
||||
}
|
||||
|
||||
unsigned long
|
||||
x2apic_is_enabled()
|
||||
{
|
||||
unsigned long msr;
|
||||
|
||||
msr = rdmsr(MSR_IA32_APIC_BASE);
|
||||
|
||||
return (msr & X2APIC_ENABLE);
|
||||
}
|
||||
|
||||
void init_lapic_bsp(void)
|
||||
{
|
||||
if(x2apic_is_enabled()){
|
||||
x2apic = 1;
|
||||
lapic_write = x2apic_write;
|
||||
lapic_read = x2apic_read;
|
||||
lapic_icr_write = x2apic_icr_write;
|
||||
lapic_wait_icr_idle = x2apic_wait_icr_idle;
|
||||
x86_issue_ipi = x2apic_x86_issue_ipi;
|
||||
}
|
||||
else{
|
||||
x2apic = 0;
|
||||
lapic_write = apic_write;
|
||||
lapic_read = apic_read;
|
||||
lapic_icr_write = apic_icr_write;
|
||||
lapic_wait_icr_idle = apic_wait_icr_idle;
|
||||
x86_issue_ipi = apic_x86_issue_ipi;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
init_lapic()
|
||||
{
|
||||
if(!x2apic){
|
||||
unsigned long baseaddr;
|
||||
|
||||
/* Enable Local APIC */
|
||||
baseaddr = rdmsr(MSR_IA32_APIC_BASE);
|
||||
if (!lapic_vp) {
|
||||
lapic_vp = map_fixed_area(baseaddr & PAGE_MASK, PAGE_SIZE, 1);
|
||||
}
|
||||
baseaddr |= 0x800;
|
||||
wrmsr(MSR_IA32_APIC_BASE, baseaddr);
|
||||
}
|
||||
|
||||
lapic_write(LAPIC_SPURIOUS, 0x1ff);
|
||||
lapic_write(LAPIC_LVTPC, LOCAL_PERF_VECTOR);
|
||||
}
|
||||
|
||||
void print_msr(int idx)
|
||||
{
|
||||
int bit;
|
||||
@ -302,6 +488,8 @@ void init_pstate_and_turbo(void)
|
||||
uint64_t value;
|
||||
uint64_t eax, ecx;
|
||||
|
||||
if (running_on_kvm()) return;
|
||||
|
||||
asm volatile("cpuid" : "=a" (eax), "=c" (ecx) : "a" (0x6) : "%rbx", "%rdx");
|
||||
if (!(ecx & 0x01)) {
|
||||
/* P-states and/or Turbo Boost are not supported. */
|
||||
@ -423,26 +611,6 @@ void init_pat(void)
|
||||
dkprintf("PAT support detected and reconfigured.\n");
|
||||
}
|
||||
|
||||
void init_lapic(void)
|
||||
{
|
||||
unsigned long baseaddr;
|
||||
|
||||
/* Enable Local APIC */
|
||||
baseaddr = rdmsr(MSR_IA32_APIC_BASE);
|
||||
if (!lapic_vp) {
|
||||
lapic_vp = map_fixed_area(baseaddr & PAGE_MASK, PAGE_SIZE, 1);
|
||||
}
|
||||
baseaddr |= 0x800;
|
||||
wrmsr(MSR_IA32_APIC_BASE, baseaddr);
|
||||
|
||||
lapic_write(LAPIC_SPURIOUS, 0x1ff);
|
||||
}
|
||||
|
||||
void lapic_ack(void)
|
||||
{
|
||||
lapic_write(LAPIC_EOI, 0);
|
||||
}
|
||||
|
||||
static void set_kstack(unsigned long ptr)
|
||||
{
|
||||
struct x86_cpu_local_variables *v;
|
||||
@ -456,11 +624,17 @@ static void init_smp_processor(void)
|
||||
{
|
||||
struct x86_cpu_local_variables *v;
|
||||
unsigned long tss_addr;
|
||||
unsigned node_cpu;
|
||||
|
||||
v = get_x86_this_cpu_local();
|
||||
tss_addr = (unsigned long)&v->tss;
|
||||
|
||||
v->apic_id = lapic_read(LAPIC_ID) >> LAPIC_ID_SHIFT;
|
||||
if(x2apic_is_enabled()){
|
||||
v->apic_id = rdmsr(IA32_X2APIC_APICID);
|
||||
}
|
||||
else{
|
||||
v->apic_id = lapic_read(LAPIC_ID) >> LAPIC_ID_SHIFT;
|
||||
}
|
||||
|
||||
memcpy(v->gdt, gdt, sizeof(v->gdt));
|
||||
|
||||
@ -471,6 +645,9 @@ static void init_smp_processor(void)
|
||||
| (0x89UL << 40) | ((tss_addr & 0xff000000) << 32);
|
||||
v->gdt[GLOBAL_TSS_ENTRY + 1] = (tss_addr >> 32);
|
||||
|
||||
node_cpu = v->processor_id; /* assumes NUMA node 0 */
|
||||
v->gdt[GETCPU_ENTRY] |= node_cpu;
|
||||
|
||||
v->gdt_ptr.size = sizeof(v->gdt) - 1;
|
||||
v->gdt_ptr.address = (unsigned long)v->gdt;
|
||||
|
||||
@ -478,6 +655,11 @@ static void init_smp_processor(void)
|
||||
reload_gdt(&v->gdt_ptr);
|
||||
|
||||
set_kstack((unsigned long)get_x86_this_cpu_kstack());
|
||||
|
||||
/* MSR_IA32_TSC_AUX on KVM seems broken */
|
||||
if (running_on_kvm()) return;
|
||||
#define MSR_IA32_TSC_AUX 0xc0000103
|
||||
wrmsr(MSR_IA32_TSC_AUX, node_cpu);
|
||||
}
|
||||
|
||||
static char *trampoline_va, *first_page_va;
|
||||
@ -497,9 +679,6 @@ void ihk_mc_init_ap(void)
|
||||
kprintf("# of cpus : %d\n", cpu_info->ncpus);
|
||||
init_processors_local(cpu_info->ncpus);
|
||||
|
||||
kprintf("IKC IRQ vector: %d, IKC target CPU APIC: %d\n",
|
||||
ihk_ikc_irq, ihk_ikc_irq_apicid);
|
||||
|
||||
/* Do initialization for THIS cpu (BSP) */
|
||||
assign_processor_id();
|
||||
|
||||
@ -621,6 +800,8 @@ void setup_x86(void)
|
||||
|
||||
check_no_execute();
|
||||
|
||||
init_lapic_bsp();
|
||||
|
||||
init_cpu();
|
||||
|
||||
init_gettime_support();
|
||||
@ -671,6 +852,8 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
|
||||
lapic_ack();
|
||||
++v->in_interrupt;
|
||||
|
||||
set_cputime(interrupt_from_user(regs)? 1: 2);
|
||||
|
||||
dkprintf("CPU[%d] got interrupt, vector: %d, RIP: 0x%lX\n",
|
||||
ihk_mc_get_processor_id(), vector, regs->gpr.rip);
|
||||
|
||||
@ -732,6 +915,38 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
|
||||
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
|
||||
dkprintf("timer[%lu]: CPU_FLAG_NEED_RESCHED \n", rdtsc());
|
||||
}
|
||||
else if (vector == LOCAL_PERF_VECTOR) {
|
||||
struct siginfo info;
|
||||
unsigned long value;
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
struct process *proc = thread->proc;
|
||||
long irqstate;
|
||||
struct mckfd *fdp;
|
||||
|
||||
lapic_write(LAPIC_LVTPC, LOCAL_PERF_VECTOR);
|
||||
|
||||
value = rdmsr(MSR_PERF_GLOBAL_STATUS);
|
||||
wrmsr(MSR_PERF_GLOBAL_OVF_CTRL, value);
|
||||
wrmsr(MSR_PERF_GLOBAL_OVF_CTRL, 0);
|
||||
|
||||
irqstate = ihk_mc_spinlock_lock(&proc->mckfd_lock);
|
||||
for(fdp = proc->mckfd; fdp; fdp = fdp->next) {
|
||||
if(fdp->sig_no > 0)
|
||||
break;
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&proc->mckfd_lock, irqstate);
|
||||
|
||||
if(fdp) {
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_signo = fdp->sig_no;
|
||||
info._sifields._sigfault.si_addr = (void *)regs->gpr.rip;
|
||||
info._sifields._sigpoll.si_fd = fdp->fd;
|
||||
set_signal(fdp->sig_no, regs, &info);
|
||||
}
|
||||
else {
|
||||
set_signal(SIGIO, regs, NULL);
|
||||
}
|
||||
}
|
||||
else if (vector >= IHK_TLB_FLUSH_IRQ_VECTOR_START &&
|
||||
vector < IHK_TLB_FLUSH_IRQ_VECTOR_END) {
|
||||
|
||||
@ -745,14 +960,19 @@ void handle_interrupt(int vector, struct x86_user_context *regs)
|
||||
}
|
||||
}
|
||||
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
if(interrupt_from_user(regs)){
|
||||
cpu_enable_interrupt();
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
}
|
||||
set_cputime(0);
|
||||
|
||||
--v->in_interrupt;
|
||||
}
|
||||
|
||||
void gpe_handler(struct x86_user_context *regs)
|
||||
{
|
||||
set_cputime(interrupt_from_user(regs)? 1: 2);
|
||||
kprintf("General protection fault (err: %lx, %lx:%lx)\n",
|
||||
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
|
||||
arch_show_interrupt_context(regs);
|
||||
@ -760,8 +980,12 @@ void gpe_handler(struct x86_user_context *regs)
|
||||
panic("gpe_handler");
|
||||
}
|
||||
set_signal(SIGSEGV, regs, NULL);
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
if(interrupt_from_user(regs)){
|
||||
cpu_enable_interrupt();
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
}
|
||||
set_cputime(0);
|
||||
// panic("GPF");
|
||||
}
|
||||
|
||||
@ -771,6 +995,7 @@ void debug_handler(struct x86_user_context *regs)
|
||||
int si_code = 0;
|
||||
struct siginfo info;
|
||||
|
||||
set_cputime(interrupt_from_user(regs)? 1: 2);
|
||||
#ifdef DEBUG_PRINT_CPU
|
||||
kprintf("debug exception (err: %lx, %lx:%lx)\n",
|
||||
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
|
||||
@ -788,14 +1013,19 @@ void debug_handler(struct x86_user_context *regs)
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_code = si_code;
|
||||
set_signal(SIGTRAP, regs, &info);
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
if(interrupt_from_user(regs)){
|
||||
cpu_enable_interrupt();
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
}
|
||||
set_cputime(0);
|
||||
}
|
||||
|
||||
void int3_handler(struct x86_user_context *regs)
|
||||
{
|
||||
struct siginfo info;
|
||||
|
||||
set_cputime(interrupt_from_user(regs)? 1: 2);
|
||||
#ifdef DEBUG_PRINT_CPU
|
||||
kprintf("int3 exception (err: %lx, %lx:%lx)\n",
|
||||
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
|
||||
@ -805,25 +1035,67 @@ void int3_handler(struct x86_user_context *regs)
|
||||
memset(&info, '\0', sizeof info);
|
||||
info.si_code = TRAP_BRKPT;
|
||||
set_signal(SIGTRAP, regs, &info);
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
}
|
||||
|
||||
static void wait_icr_idle(void)
|
||||
{
|
||||
while (lapic_read(LAPIC_ICR0) & APIC_ICR_BUSY) {
|
||||
cpu_pause();
|
||||
if(interrupt_from_user(regs)){
|
||||
cpu_enable_interrupt();
|
||||
check_signal(0, regs, 0);
|
||||
check_need_resched();
|
||||
}
|
||||
set_cputime(0);
|
||||
}
|
||||
|
||||
void x86_issue_ipi(unsigned int apicid, unsigned int low)
|
||||
void
|
||||
unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
||||
{
|
||||
unsigned long flags;
|
||||
const uintptr_t address = (uintptr_t)fault_addr;
|
||||
struct process_vm *vm = thread->vm;
|
||||
struct vm_range *range;
|
||||
char found;
|
||||
unsigned long irqflags;
|
||||
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
|
||||
|
||||
flags = cpu_disable_interrupt_save();
|
||||
wait_icr_idle();
|
||||
lapic_icr_write(apicid << LAPIC_ICR_ID_SHIFT, low);
|
||||
cpu_restore_interrupt(flags);
|
||||
irqflags = kprintf_lock();
|
||||
__kprintf("Page fault for 0x%lx\n", address);
|
||||
__kprintf("%s for %s access in %s mode (reserved bit %s set), "
|
||||
"it %s an instruction fetch\n",
|
||||
(error & PF_PROT ? "protection fault" : "no page found"),
|
||||
(error & PF_WRITE ? "write" : "read"),
|
||||
(error & PF_USER ? "user" : "kernel"),
|
||||
(error & PF_RSVD ? "was" : "wasn't"),
|
||||
(error & PF_INSTR ? "was" : "wasn't"));
|
||||
|
||||
found = 0;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
if (range->start <= address && range->end > address) {
|
||||
found = 1;
|
||||
__kprintf("address is in range, flag: 0x%lx\n",
|
||||
range->flag);
|
||||
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
__kprintf("address is out of range! \n");
|
||||
}
|
||||
|
||||
kprintf_unlock(irqflags);
|
||||
|
||||
/* TODO */
|
||||
ihk_mc_debug_show_interrupt_context(regs);
|
||||
|
||||
|
||||
//dkprintf("now dump a core file\n");
|
||||
//coredump(proc, regs);
|
||||
|
||||
#ifdef DEBUG_PRINT_MEM
|
||||
{
|
||||
uint64_t *sp = (void *)REGS_GET_STACK_POINTER(regs);
|
||||
|
||||
kprintf("*rsp:%lx,*rsp+8:%lx,*rsp+16:%lx,*rsp+24:%lx,\n",
|
||||
sp[0], sp[1], sp[2], sp[3]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void outb(uint8_t v, uint16_t port)
|
||||
@ -852,12 +1124,12 @@ static void __x86_wakeup(int apicid, unsigned long ip)
|
||||
|
||||
x86_issue_ipi(apicid,
|
||||
APIC_INT_LEVELTRIG | APIC_DM_INIT);
|
||||
wait_icr_idle();
|
||||
lapic_wait_icr_idle();
|
||||
|
||||
while (retry--) {
|
||||
lapic_read(LAPIC_ESR);
|
||||
x86_issue_ipi(apicid, APIC_DM_STARTUP | (ip >> 12));
|
||||
wait_icr_idle();
|
||||
lapic_wait_icr_idle();
|
||||
|
||||
arch_delay(200);
|
||||
|
||||
@ -868,6 +1140,10 @@ static void __x86_wakeup(int apicid, unsigned long ip)
|
||||
|
||||
/** IHK Functions **/
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@ ensures \interrupt_disabled == 0;
|
||||
@*/
|
||||
void cpu_halt(void)
|
||||
{
|
||||
asm volatile("hlt");
|
||||
@ -1170,6 +1446,10 @@ void arch_show_extended_context(void)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*@
|
||||
@ requires \valid(reg);
|
||||
@ assigns \nothing;
|
||||
@*/
|
||||
void arch_show_interrupt_context(const void *reg)
|
||||
{
|
||||
const struct x86_user_context *uctx = reg;
|
||||
@ -1258,8 +1538,8 @@ int ihk_mc_interrupt_cpu(int cpu, int vector)
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(proc);
|
||||
@ ensures proc->fp_regs == NULL;
|
||||
@ requires \valid(thread);
|
||||
@ ensures thread->fp_regs == NULL;
|
||||
@*/
|
||||
void
|
||||
release_fp_regs(struct thread *thread)
|
||||
@ -1269,18 +1549,23 @@ release_fp_regs(struct thread *thread)
|
||||
if (thread && !thread->fp_regs)
|
||||
return;
|
||||
|
||||
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
|
||||
pages = (xsave_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
|
||||
dkprintf("release_fp_regs: pages=%d\n", pages);
|
||||
ihk_mc_free_pages(thread->fp_regs, pages);
|
||||
thread->fp_regs = NULL;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(thread);
|
||||
@*/
|
||||
void
|
||||
save_fp_regs(struct thread *thread)
|
||||
{
|
||||
int pages;
|
||||
|
||||
if (!thread->fp_regs) {
|
||||
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
|
||||
pages = (xsave_size + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
|
||||
dkprintf("save_fp_regs: pages=%d\n", pages);
|
||||
thread->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT);
|
||||
|
||||
if (!thread->fp_regs) {
|
||||
@ -1289,14 +1574,15 @@ save_fp_regs(struct thread *thread)
|
||||
}
|
||||
|
||||
memset(thread->fp_regs, 0, sizeof(fp_regs_struct));
|
||||
memset(thread->fp_regs, 0, pages * PAGE_SIZE);
|
||||
}
|
||||
|
||||
if (xsave_available) {
|
||||
unsigned int low, high;
|
||||
|
||||
/* Request full save of x87, SSE and AVX states */
|
||||
low = 0x7;
|
||||
high = 0;
|
||||
/* Request full save of x87, SSE, AVX and AVX-512 states */
|
||||
low = (unsigned int)xsave_mask;
|
||||
high = (unsigned int)(xsave_mask >> 32);
|
||||
|
||||
asm volatile("xsave %0" : : "m" (*thread->fp_regs), "a" (low), "d" (high)
|
||||
: "memory");
|
||||
@ -1305,6 +1591,10 @@ save_fp_regs(struct thread *thread)
|
||||
}
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(thread);
|
||||
@ assigns thread->fp_regs;
|
||||
@*/
|
||||
void
|
||||
restore_fp_regs(struct thread *thread)
|
||||
{
|
||||
@ -1314,9 +1604,9 @@ restore_fp_regs(struct thread *thread)
|
||||
if (xsave_available) {
|
||||
unsigned int low, high;
|
||||
|
||||
/* Request full restore of x87, SSE and AVX states */
|
||||
low = 0x7;
|
||||
high = 0;
|
||||
/* Request full restore of x87, SSE, AVX and AVX-512 states */
|
||||
low = (unsigned int)xsave_mask;
|
||||
high = (unsigned int)(xsave_mask >> 32);
|
||||
|
||||
asm volatile("xrstor %0" : : "m" (*thread->fp_regs),
|
||||
"a" (low), "d" (high));
|
||||
@ -1353,8 +1643,186 @@ ihk_mc_user_context_t *lookup_user_context(struct thread *thread)
|
||||
return uctx;
|
||||
} /* lookup_user_context() */
|
||||
|
||||
|
||||
void zero_tsc(void)
|
||||
extern long do_arch_prctl(unsigned long code, unsigned long address);
|
||||
void
|
||||
ihk_mc_init_user_tlsbase(ihk_mc_user_context_t *ctx,
|
||||
unsigned long tls_base_addr)
|
||||
{
|
||||
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
|
||||
do_arch_prctl(ARCH_SET_FS, tls_base_addr);
|
||||
}
|
||||
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@*/
|
||||
void init_tick(void)
|
||||
{
|
||||
dkprintf("init_tick():\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@*/
|
||||
void init_delay(void)
|
||||
{
|
||||
dkprintf("init_delay():\n");
|
||||
return;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ assigns \nothing;
|
||||
@*/
|
||||
void sync_tick(void)
|
||||
{
|
||||
dkprintf("sync_tick():\n");
|
||||
return;
|
||||
}
|
||||
|
||||
static int is_pvclock_available(void)
|
||||
{
|
||||
uint32_t eax;
|
||||
uint32_t ebx;
|
||||
uint32_t ecx;
|
||||
uint32_t edx;
|
||||
|
||||
dkprintf("is_pvclock_available()\n");
|
||||
#define KVM_CPUID_SIGNATURE 0x40000000
|
||||
asm ("cpuid" : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
|
||||
: "a" (KVM_CPUID_SIGNATURE));
|
||||
if ((eax && (eax < 0x40000001))
|
||||
|| (ebx != 0x4b4d564b)
|
||||
|| (ecx != 0x564b4d56)
|
||||
|| (edx != 0x0000004d)) {
|
||||
dkprintf("is_pvclock_available(): false (not kvm)\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define KVM_CPUID_FEATURES 0x40000001
|
||||
asm ("cpuid" : "=a"(eax)
|
||||
: "a"(KVM_CPUID_FEATURES)
|
||||
: "%ebx", "%ecx", "%edx");
|
||||
#define KVM_FEATURE_CLOCKSOURCE2 3
|
||||
if (eax & (1 << KVM_FEATURE_CLOCKSOURCE2)) {
|
||||
#define MSR_KVM_SYSTEM_TIME_NEW 0x4b564d01
|
||||
pvti_msr = MSR_KVM_SYSTEM_TIME_NEW;
|
||||
dkprintf("is_pvclock_available(): true (new)\n");
|
||||
return 1;
|
||||
}
|
||||
#define KVM_FEATURE_CLOCKSOURCE 0
|
||||
else if (eax & (1 << KVM_FEATURE_CLOCKSOURCE)) {
|
||||
#define MSR_KVM_SYSTEM_TIME 0x12
|
||||
pvti_msr = MSR_KVM_SYSTEM_TIME;
|
||||
dkprintf("is_pvclock_available(): true (old)\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
dkprintf("is_pvclock_available(): false (not supported)\n");
|
||||
return 0;
|
||||
} /* is_pvclock_available() */
|
||||
|
||||
int arch_setup_pvclock(void)
|
||||
{
|
||||
size_t size;
|
||||
int npages;
|
||||
|
||||
dkprintf("arch_setup_pvclock()\n");
|
||||
if (!is_pvclock_available()) {
|
||||
dkprintf("arch_setup_pvclock(): not supported\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
size = num_processors * sizeof(*pvti);
|
||||
npages = (size + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
pvti_npages = npages;
|
||||
|
||||
pvti = allocate_pages(npages, IHK_MC_AP_NOWAIT);
|
||||
if (!pvti) {
|
||||
ekprintf("arch_setup_pvclock: allocate_pages failed.\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
memset(pvti, 0, PAGE_SIZE*npages);
|
||||
|
||||
dkprintf("arch_setup_pvclock(): ok\n");
|
||||
return 0;
|
||||
} /* arch_setup_pvclock() */
|
||||
|
||||
void arch_start_pvclock(void)
|
||||
{
|
||||
int cpu;
|
||||
intptr_t phys;
|
||||
|
||||
dkprintf("arch_start_pvclock()\n");
|
||||
if (!pvti) {
|
||||
dkprintf("arch_start_pvclock(): not supported\n");
|
||||
return;
|
||||
}
|
||||
|
||||
cpu = ihk_mc_get_processor_id();
|
||||
phys = virt_to_phys(&pvti[cpu]);
|
||||
#define KVM_SYSTEM_TIME_ENABLE 0x1
|
||||
wrmsr(pvti_msr, phys|KVM_SYSTEM_TIME_ENABLE);
|
||||
dkprintf("arch_start_pvclock(): ok\n");
|
||||
return;
|
||||
} /* arch_start_pvclock() */
|
||||
|
||||
static struct cpu_mapping *cpu_mapping = NULL;
|
||||
|
||||
int arch_get_cpu_mapping(struct cpu_mapping **buf, int *nelemsp)
|
||||
{
|
||||
int error;
|
||||
size_t size;
|
||||
int npages;
|
||||
struct cpu_mapping *mapping;
|
||||
int cpu;
|
||||
struct x86_cpu_local_variables *v;
|
||||
|
||||
if (!cpu_mapping) {
|
||||
size = sizeof(*mapping) * num_processors;
|
||||
npages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
mapping = allocate_pages(npages, IHK_MC_AP_NOWAIT);
|
||||
if (!mapping) {
|
||||
error = -ENOMEM;
|
||||
ekprintf("arch_get_cpu_mapping:allocate_pages failed. %d\n", error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (cpu = 0; cpu < num_processors; ++cpu) {
|
||||
v = get_x86_cpu_local_variable(cpu);
|
||||
mapping[cpu].cpu_number = cpu;
|
||||
mapping[cpu].hw_id = v->apic_id;
|
||||
}
|
||||
|
||||
cpu_mapping = mapping;
|
||||
}
|
||||
|
||||
error = 0;
|
||||
*buf = cpu_mapping;
|
||||
*nelemsp = num_processors;
|
||||
|
||||
out:
|
||||
return error;
|
||||
} /* arch_get_cpu_mapping() */
|
||||
|
||||
#define KVM_CPUID_SIGNATURE 0x40000000
|
||||
|
||||
int running_on_kvm(void) {
|
||||
static const char signature[12] = "KVMKVMKVM\0\0";
|
||||
const uint32_t *sigptr = (const uint32_t *)signature;
|
||||
uint64_t op;
|
||||
uint64_t eax;
|
||||
uint64_t ebx;
|
||||
uint64_t ecx;
|
||||
uint64_t edx;
|
||||
|
||||
op = KVM_CPUID_SIGNATURE;
|
||||
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx) : "a" (op));
|
||||
|
||||
if (ebx == sigptr[0] && ecx == sigptr[1] && edx == sigptr[2]) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*** end of file ***/
|
||||
|
||||
@ -271,6 +271,17 @@ void fill_note(void *note, struct thread *thread, void *regs)
|
||||
* should be zero.
|
||||
*/
|
||||
|
||||
/*@
|
||||
@ requires \valid(thread);
|
||||
@ requires \valid(regs);
|
||||
@ requires \valid(coretable);
|
||||
@ requires \valid(chunks);
|
||||
@ behavior success:
|
||||
@ ensures \result == 0;
|
||||
@ assigns coretable;
|
||||
@ behavior failure:
|
||||
@ ensures \result == -1;
|
||||
@*/
|
||||
int gencore(struct thread *thread, void *regs,
|
||||
struct coretable **coretable, int *chunks)
|
||||
{
|
||||
@ -510,6 +521,10 @@ int gencore(struct thread *thread, void *regs,
|
||||
* \param coretable An array of core chunks.
|
||||
*/
|
||||
|
||||
/*@
|
||||
@ requires \valid(coretable);
|
||||
@ assigns \nothing;
|
||||
@*/
|
||||
void freecore(struct coretable **coretable)
|
||||
{
|
||||
struct coretable *ct = *coretable;
|
||||
|
||||
96
arch/x86/kernel/include/arch-bitops.h
Normal file
96
arch/x86/kernel/include/arch-bitops.h
Normal file
@ -0,0 +1,96 @@
|
||||
/**
|
||||
* \file arch-bitops.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* Find last set bit in word.
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
*/
|
||||
|
||||
#ifndef HEADER_X86_COMMON_ARCH_BITOPS_H
|
||||
#define HEADER_X86_COMMON_ARCH_BITOPS_H
|
||||
|
||||
static inline int fls(int x)
|
||||
{
|
||||
int r;
|
||||
asm("bsrl %1,%0\n\t"
|
||||
"jnz 1f\n\t"
|
||||
"movl $-1,%0\n"
|
||||
"1:" : "=r" (r) : "rm" (x));
|
||||
|
||||
return r + 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* ffs - find first set bit in word
|
||||
* @x: the word to search
|
||||
*
|
||||
* This is defined the same way as the libc and compiler builtin ffs
|
||||
* routines, therefore differs in spirit from the other bitops.
|
||||
*
|
||||
* ffs(value) returns 0 if value is 0 or the position of the first
|
||||
* set bit if value is nonzero. The first (least significant) bit
|
||||
* is at position 1.
|
||||
*/
|
||||
static inline int ffs(int x)
|
||||
{
|
||||
int r;
|
||||
asm("bsfl %1,%0\n\t"
|
||||
"jnz 1f\n\t"
|
||||
"movl $-1,%0\n"
|
||||
"1:" : "=r" (r) : "rm" (x));
|
||||
return r + 1;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* __ffs - find first set bit in word
|
||||
* @word: The word to search
|
||||
*
|
||||
* Undefined if no bit exists, so code should check against 0 first.
|
||||
*/
|
||||
static inline unsigned long __ffs(unsigned long word)
|
||||
{
|
||||
asm("bsf %1,%0"
|
||||
: "=r" (word)
|
||||
: "rm" (word));
|
||||
return word;
|
||||
}
|
||||
|
||||
/**
|
||||
* ffz - find first zero bit in word
|
||||
* @word: The word to search
|
||||
*
|
||||
* Undefined if no zero exists, so code should check against ~0UL first.
|
||||
*/
|
||||
static inline unsigned long ffz(unsigned long word)
|
||||
{
|
||||
asm("bsf %1,%0"
|
||||
: "=r" (word)
|
||||
: "r" (~word));
|
||||
return word;
|
||||
}
|
||||
|
||||
|
||||
#define ADDR (*(volatile long *)addr)
|
||||
|
||||
static inline void set_bit(int nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("lock; btsl %1,%0"
|
||||
: "+m" (ADDR)
|
||||
: "Ir" (nr)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
static inline void clear_bit(int nr, volatile unsigned long *addr)
|
||||
{
|
||||
asm volatile("lock; btrl %1,%0"
|
||||
: "+m" (ADDR)
|
||||
: "Ir" (nr)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#endif
|
||||
67
arch/x86/kernel/include/arch-futex.h
Normal file
67
arch/x86/kernel/include/arch-futex.h
Normal file
@ -0,0 +1,67 @@
|
||||
/**
|
||||
* \file futex.h
|
||||
* Licence details are found in the file LICENSE.
|
||||
*
|
||||
* \brief
|
||||
* Futex adaptation to McKernel
|
||||
*
|
||||
* \author Balazs Gerofi <bgerofi@riken.jp> \par
|
||||
* Copyright (C) 2012 RIKEN AICS
|
||||
*
|
||||
*
|
||||
* HISTORY:
|
||||
*
|
||||
*/
|
||||
#ifndef _ARCH_FUTEX_H
|
||||
#define _ARCH_FUTEX_H
|
||||
#include <asm.h>
|
||||
|
||||
#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
|
||||
asm volatile("1:\t" insn "\n" \
|
||||
"2:\t.section .fixup,\"ax\"\n" \
|
||||
"3:\tmov\t%3, %1\n" \
|
||||
"\tjmp\t2b\n" \
|
||||
"\t.previous\n" \
|
||||
_ASM_EXTABLE(1b, 3b) \
|
||||
: "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
|
||||
: "i" (-EFAULT), "0" (oparg), "1" (0))
|
||||
|
||||
#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
|
||||
asm volatile("1:\tmovl %2, %0\n" \
|
||||
"\tmovl\t%0, %3\n" \
|
||||
"\t" insn "\n" \
|
||||
"2:\tlock; cmpxchgl %3, %2\n" \
|
||||
"\tjnz\t1b\n" \
|
||||
"3:\t.section .fixup,\"ax\"\n" \
|
||||
"4:\tmov\t%5, %1\n" \
|
||||
"\tjmp\t3b\n" \
|
||||
"\t.previous\n" \
|
||||
_ASM_EXTABLE(1b, 4b) \
|
||||
_ASM_EXTABLE(2b, 4b) \
|
||||
: "=&a" (oldval), "=&r" (ret), \
|
||||
"+m" (*uaddr), "=&r" (tem) \
|
||||
: "r" (oparg), "i" (-EFAULT), "1" (0))
|
||||
|
||||
static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
|
||||
int newval)
|
||||
{
|
||||
#ifdef __UACCESS__
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
|
||||
return -EFAULT;
|
||||
#endif
|
||||
|
||||
asm volatile("1:\tlock; cmpxchgl %3, %1\n"
|
||||
"2:\t.section .fixup, \"ax\"\n"
|
||||
"3:\tmov %2, %0\n"
|
||||
"\tjmp 2b\n"
|
||||
"\t.previous\n"
|
||||
_ASM_EXTABLE(1b, 3b)
|
||||
: "=a" (oldval), "+m" (*uaddr)
|
||||
: "i" (-EFAULT), "r" (newval), "0" (oldval)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -248,6 +248,7 @@ mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock)
|
||||
struct mcs_rwlock_node *p;
|
||||
struct mcs_rwlock_node *f = NULL;
|
||||
struct mcs_rwlock_node *n;
|
||||
int breakf = 0;
|
||||
|
||||
ihk_atomic_inc(&lock->reader.count); // protect to unlock reader
|
||||
for(p = &lock->reader; p->next; p = n){
|
||||
@ -268,6 +269,9 @@ mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock)
|
||||
}
|
||||
p->next = n->next;
|
||||
}
|
||||
else{
|
||||
breakf = 1;
|
||||
}
|
||||
}
|
||||
else if(p->next == NULL){
|
||||
while (n->next == NULL) {
|
||||
@ -282,6 +286,8 @@ mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock)
|
||||
else
|
||||
f = n;
|
||||
n = p;
|
||||
if(breakf)
|
||||
break;
|
||||
}
|
||||
if(n->next == NULL && lock->node != n){
|
||||
while (n->next == NULL && lock->node != n) {
|
||||
@ -340,6 +346,24 @@ __kprintf("[%d] ret mcs_rwlock_reader_lock_noirq\n", ihk_mc_get_processor_id());
|
||||
#else
|
||||
#define mcs_rwlock_reader_lock_noirq __mcs_rwlock_reader_lock_noirq
|
||||
#endif
|
||||
|
||||
static inline unsigned int
|
||||
atomic_inc_ifnot0(ihk_atomic_t *v)
|
||||
{
|
||||
unsigned int *p = (unsigned int *)(&(v)->counter);
|
||||
unsigned int old;
|
||||
unsigned int new;
|
||||
unsigned int val;
|
||||
|
||||
do{
|
||||
if(!(old = *p))
|
||||
break;
|
||||
new = old + 1;
|
||||
val = atomic_cmpxchg4(p, old, new);
|
||||
}while(val != old);
|
||||
return old;
|
||||
}
|
||||
|
||||
static void
|
||||
__mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
|
||||
{
|
||||
@ -356,7 +380,7 @@ __mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_n
|
||||
|
||||
if (pred) {
|
||||
if(pred == &lock->reader){
|
||||
if(ihk_atomic_inc_return(&pred->count) != 1){
|
||||
if(atomic_inc_ifnot0(&pred->count)){
|
||||
struct mcs_rwlock_node *old;
|
||||
|
||||
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
|
||||
@ -372,12 +396,12 @@ __mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_n
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
pred->next = node->next;
|
||||
if(node->next->type == MCS_RWLOCK_TYPE_READER)
|
||||
mcs_rwlock_unlock_readers(lock);
|
||||
node->locked = MCS_RWLOCK_LOCKED;
|
||||
lock->reader.next = node;
|
||||
mcs_rwlock_unlock_readers(lock);
|
||||
ihk_atomic_dec(&pred->count);
|
||||
goto out;
|
||||
}
|
||||
ihk_atomic_dec(&pred->count);
|
||||
}
|
||||
node->locked = MCS_RWLOCK_LOCKED;
|
||||
pred->next = node;
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
#define USER_CS_ENTRY 6
|
||||
#define USER_DS_ENTRY 7
|
||||
#define GLOBAL_TSS_ENTRY 8
|
||||
#define GETCPU_ENTRY 15
|
||||
|
||||
#define KERNEL_CS (KERNEL_CS_ENTRY * 8)
|
||||
#define KERNEL_DS (KERNEL_DS_ENTRY * 8)
|
||||
@ -40,10 +41,12 @@
|
||||
#define LARGE_PAGE_P2ALIGN (LARGE_PAGE_SHIFT - PAGE_SHIFT)
|
||||
|
||||
#define USER_END 0x0000800000000000UL
|
||||
#define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL
|
||||
#define MAP_ST_START 0xffff800000000000UL
|
||||
#define MAP_VMAP_START 0xfffff00000000000UL
|
||||
#define MAP_FIXED_START 0xffffffff70000000UL
|
||||
#define MAP_KERNEL_START 0xffffffff80000000UL
|
||||
#define STACK_TOP(region) ((region)->user_end)
|
||||
|
||||
#define MAP_VMAP_SIZE 0x0000000100000000UL
|
||||
|
||||
@ -65,6 +68,8 @@
|
||||
|
||||
#define PF_PRESENT ((pte_t)0x01) /* entry is valid */
|
||||
#define PF_WRITABLE ((pte_t)0x02)
|
||||
#define PFLX_PWT ((pte_t)0x08)
|
||||
#define PFLX_PCD ((pte_t)0x10)
|
||||
#define PF_SIZE ((pte_t)0x80) /* entry points large page */
|
||||
|
||||
#define PFL4_PRESENT ((pte_t)0x01)
|
||||
@ -74,8 +79,8 @@
|
||||
#define PFL3_PRESENT ((pte_t)0x01)
|
||||
#define PFL3_WRITABLE ((pte_t)0x02)
|
||||
#define PFL3_USER ((pte_t)0x04)
|
||||
#define PFL3_PWT ((pte_t)0x08)
|
||||
#define PFL3_PCD ((pte_t)0x10)
|
||||
#define PFL3_PWT PFLX_PWT
|
||||
#define PFL3_PCD PFLX_PCD
|
||||
#define PFL3_ACCESSED ((pte_t)0x20)
|
||||
#define PFL3_DIRTY ((pte_t)0x40)
|
||||
#define PFL3_SIZE ((pte_t)0x80) /* Used in 1G page */
|
||||
@ -86,8 +91,8 @@
|
||||
#define PFL2_PRESENT ((pte_t)0x01)
|
||||
#define PFL2_WRITABLE ((pte_t)0x02)
|
||||
#define PFL2_USER ((pte_t)0x04)
|
||||
#define PFL2_PWT ((pte_t)0x08)
|
||||
#define PFL2_PCD ((pte_t)0x10)
|
||||
#define PFL2_PWT PFLX_PWT
|
||||
#define PFL2_PCD PFLX_PCD
|
||||
#define PFL2_ACCESSED ((pte_t)0x20)
|
||||
#define PFL2_DIRTY ((pte_t)0x40)
|
||||
#define PFL2_SIZE ((pte_t)0x80) /* Used in 2M page */
|
||||
@ -98,8 +103,8 @@
|
||||
#define PFL1_PRESENT ((pte_t)0x01)
|
||||
#define PFL1_WRITABLE ((pte_t)0x02)
|
||||
#define PFL1_USER ((pte_t)0x04)
|
||||
#define PFL1_PWT ((pte_t)0x08)
|
||||
#define PFL1_PCD ((pte_t)0x10)
|
||||
#define PFL1_PWT PFLX_PWT
|
||||
#define PFL1_PCD PFLX_PCD
|
||||
#define PFL1_ACCESSED ((pte_t)0x20)
|
||||
#define PFL1_DIRTY ((pte_t)0x40)
|
||||
#define PFL1_IGNORED_11 ((pte_t)1 << 11)
|
||||
@ -152,6 +157,8 @@ enum ihk_mc_pt_attribute {
|
||||
PTATTR_WRITE_COMBINED = 0x40000,
|
||||
};
|
||||
|
||||
enum ihk_mc_pt_attribute attr_mask;
|
||||
|
||||
static inline int pte_is_null(pte_t *ptep)
|
||||
{
|
||||
return (*ptep == PTE_NULL);
|
||||
@ -207,6 +214,27 @@ static inline off_t pte_get_off(pte_t *ptep, size_t pgsize)
|
||||
return (off_t)(*ptep & PAGE_MASK);
|
||||
}
|
||||
|
||||
static inline enum ihk_mc_pt_attribute pte_get_attr(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
enum ihk_mc_pt_attribute attr;
|
||||
|
||||
attr = *ptep & attr_mask;
|
||||
if (*ptep & PFLX_PWT) {
|
||||
if (*ptep & PFLX_PCD) {
|
||||
attr |= PTATTR_UNCACHABLE;
|
||||
}
|
||||
else {
|
||||
attr |= PTATTR_WRITE_COMBINED;
|
||||
}
|
||||
}
|
||||
if (((pgsize == PTL2_SIZE) && (*ptep & PFL2_SIZE))
|
||||
|| ((pgsize == PTL3_SIZE) && (*ptep & PFL3_SIZE))) {
|
||||
attr |= PTATTR_LARGEPAGE;
|
||||
}
|
||||
|
||||
return attr;
|
||||
} /* pte_get_attr() */
|
||||
|
||||
static inline void pte_make_null(pte_t *ptep, size_t pgsize)
|
||||
{
|
||||
*ptep = PTE_NULL;
|
||||
@ -290,5 +318,5 @@ extern unsigned long ap_trampoline;
|
||||
#define AP_TRAMPOLINE_SIZE 0x2000
|
||||
|
||||
/* Local is cachable */
|
||||
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_UNCACHABLE)
|
||||
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE)
|
||||
#endif
|
||||
|
||||
18
arch/x86/kernel/include/arch/auxvec.h
Normal file
18
arch/x86/kernel/include/arch/auxvec.h
Normal file
@ -0,0 +1,18 @@
|
||||
/**
|
||||
* \file auxvec.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* Declare architecture-dependent constants for auxiliary vector
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
|
||||
* Copyright (C) 2016 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
*/
|
||||
|
||||
#ifndef ARCH_AUXVEC_H
|
||||
#define ARCH_AUXVEC_H
|
||||
|
||||
#define AT_SYSINFO_EHDR 33
|
||||
|
||||
#endif
|
||||
@ -25,4 +25,13 @@ static inline void wmb(void)
|
||||
barrier();
|
||||
}
|
||||
|
||||
static unsigned long read_tsc(void)
|
||||
{
|
||||
unsigned int low, high;
|
||||
|
||||
asm volatile("rdtsc" : "=a"(low), "=d"(high));
|
||||
|
||||
return (low | ((unsigned long)high << 32));
|
||||
}
|
||||
|
||||
#endif /* ARCH_CPU_H */
|
||||
|
||||
16
arch/x86/kernel/include/arch/mm.h
Normal file
16
arch/x86/kernel/include/arch/mm.h
Normal file
@ -0,0 +1,16 @@
|
||||
#ifndef __ARCH_MM_H
|
||||
#define __ARCH_MM_H
|
||||
|
||||
struct process_vm;
|
||||
|
||||
static inline void
|
||||
flush_nfo_tlb()
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
flush_nfo_tlb_mm(struct process_vm *vm)
|
||||
{
|
||||
}
|
||||
|
||||
#endif
|
||||
@ -27,6 +27,10 @@
|
||||
#define MAP_STACK 0x00020000
|
||||
#define MAP_HUGETLB 0x00040000
|
||||
|
||||
#define MAP_HUGE_SHIFT 26
|
||||
#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
|
||||
#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
|
||||
|
||||
/*
|
||||
* for mlockall()
|
||||
*/
|
||||
|
||||
@ -13,6 +13,11 @@
|
||||
#ifndef HEADER_ARCH_SHM_H
|
||||
#define HEADER_ARCH_SHM_H
|
||||
|
||||
/* shmflg */
|
||||
#define SHM_HUGE_SHIFT 26
|
||||
#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
|
||||
#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
|
||||
|
||||
struct ipc_perm {
|
||||
key_t key;
|
||||
uid_t uid;
|
||||
@ -34,7 +39,8 @@ struct shmid_ds {
|
||||
pid_t shm_cpid;
|
||||
pid_t shm_lpid;
|
||||
uint64_t shm_nattch;
|
||||
uint8_t padding[16];
|
||||
uint8_t padding[12];
|
||||
int init_pgshift;
|
||||
};
|
||||
|
||||
#endif /* HEADER_ARCH_SHM_H */
|
||||
|
||||
@ -22,7 +22,7 @@
|
||||
* - 4096 : kernel stack
|
||||
*/
|
||||
|
||||
#define X86_CPU_LOCAL_OFFSET_TSS 128
|
||||
#define X86_CPU_LOCAL_OFFSET_TSS 176
|
||||
#define X86_CPU_LOCAL_OFFSET_KSTACK 16
|
||||
#define X86_CPU_LOCAL_OFFSET_USTACK 24
|
||||
|
||||
@ -39,13 +39,13 @@ struct x86_cpu_local_variables {
|
||||
struct x86_desc_ptr gdt_ptr;
|
||||
unsigned short pad[3];
|
||||
/* 48 */
|
||||
uint64_t gdt[10];
|
||||
/* 128 */
|
||||
uint64_t gdt[16];
|
||||
/* 176 */
|
||||
struct tss64 tss;
|
||||
/* 232 */
|
||||
/* 280 */
|
||||
unsigned long paniced;
|
||||
uint64_t panic_regs[21];
|
||||
/* 408 */
|
||||
/* 456 */
|
||||
} __attribute__((packed));
|
||||
|
||||
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id);
|
||||
|
||||
@ -1,40 +1,7 @@
|
||||
#ifndef _ASM_GENERIC_ERRNO_BASE_H
|
||||
#define _ASM_GENERIC_ERRNO_BASE_H
|
||||
#ifndef _ERRNO_BASE_H
|
||||
#define _ERRNO_BASE_H
|
||||
|
||||
#define EPERM 1 /* Operation not permitted */
|
||||
#define ENOENT 2 /* No such file or directory */
|
||||
#define ESRCH 3 /* No such process */
|
||||
#define EINTR 4 /* Interrupted system call */
|
||||
#define EIO 5 /* I/O error */
|
||||
#define ENXIO 6 /* No such device or address */
|
||||
#define E2BIG 7 /* Argument list too long */
|
||||
#define ENOEXEC 8 /* Exec format error */
|
||||
#define EBADF 9 /* Bad file number */
|
||||
#define ECHILD 10 /* No child processes */
|
||||
#define EAGAIN 11 /* Try again */
|
||||
#define ENOMEM 12 /* Out of memory */
|
||||
#define EACCES 13 /* Permission denied */
|
||||
#define EFAULT 14 /* Bad address */
|
||||
#define ENOTBLK 15 /* Block device required */
|
||||
#define EBUSY 16 /* Device or resource busy */
|
||||
#define EEXIST 17 /* File exists */
|
||||
#define EXDEV 18 /* Cross-device link */
|
||||
#define ENODEV 19 /* No such device */
|
||||
#define ENOTDIR 20 /* Not a directory */
|
||||
#define EISDIR 21 /* Is a directory */
|
||||
#define EINVAL 22 /* Invalid argument */
|
||||
#define ENFILE 23 /* File table overflow */
|
||||
#define EMFILE 24 /* Too many open files */
|
||||
#define ENOTTY 25 /* Not a typewriter */
|
||||
#define ETXTBSY 26 /* Text file busy */
|
||||
#define EFBIG 27 /* File too large */
|
||||
#define ENOSPC 28 /* No space left on device */
|
||||
#define ESPIPE 29 /* Illegal seek */
|
||||
#define EROFS 30 /* Read-only file system */
|
||||
#define EMLINK 31 /* Too many links */
|
||||
#define EPIPE 32 /* Broken pipe */
|
||||
#define EDOM 33 /* Math argument out of domain of func */
|
||||
#define ERANGE 34 /* Math result not representable */
|
||||
#include <generic-errno.h>
|
||||
|
||||
#define EDEADLK 35 /* Resource deadlock would occur */
|
||||
#define ENAMETOOLONG 36 /* File name too long */
|
||||
@ -141,29 +108,4 @@
|
||||
|
||||
#define ERFKILL 132 /* Operation not possible due to RF-kill */
|
||||
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
/* Should never be seen by user programs */
|
||||
#define ERESTARTSYS 512
|
||||
#define ERESTARTNOINTR 513
|
||||
#define ERESTARTNOHAND 514 /* restart if no handler.. */
|
||||
#define ENOIOCTLCMD 515 /* No ioctl command */
|
||||
#define ERESTART_RESTARTBLOCK 516 /* restart by calling sys_restart_syscall */
|
||||
|
||||
/* Defined for the NFSv3 protocol */
|
||||
#define EBADHANDLE 521 /* Illegal NFS file handle */
|
||||
#define ENOTSYNC 522 /* Update synchronization mismatch */
|
||||
#define EBADCOOKIE 523 /* Cookie is stale */
|
||||
#define ENOTSUPP 524 /* Operation is not supported */
|
||||
#define ETOOSMALL 525 /* Buffer or request is too small */
|
||||
#define ESERVERFAULT 526 /* An untranslatable error occurred */
|
||||
#define EBADTYPE 527 /* Type not supported by server */
|
||||
#define EJUKEBOX 528 /* Request initiated, but will not complete before timeout */
|
||||
#define EIOCBQUEUED 529 /* iocb queued, will get completion event */
|
||||
#define EIOCBRETRY 530 /* iocb queued, will trigger a retry */
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
#endif
|
||||
@ -202,4 +202,17 @@ static inline unsigned long atomic_cmpxchg8(unsigned long *addr,
|
||||
return oldval;
|
||||
}
|
||||
|
||||
static inline unsigned long atomic_cmpxchg4(unsigned int *addr,
|
||||
unsigned int oldval,
|
||||
unsigned int newval)
|
||||
{
|
||||
asm volatile("lock; cmpxchgl %2, %1\n"
|
||||
: "=a" (oldval), "+m" (*addr)
|
||||
: "r" (newval), "0" (oldval)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -31,9 +31,5 @@ typedef int64_t off_t;
|
||||
|
||||
#define NULL ((void *)0)
|
||||
|
||||
#define BITS_PER_LONG_SHIFT 6
|
||||
#define BITS_PER_LONG (1 << BITS_PER_LONG_SHIFT)
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
17
arch/x86/kernel/include/prctl.h
Normal file
17
arch/x86/kernel/include/prctl.h
Normal file
@ -0,0 +1,17 @@
|
||||
/**
|
||||
* \file prctl.h
|
||||
* License details are found in the file LICENSE.
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
*/
|
||||
|
||||
#ifndef __ARCH_PRCTL_H
|
||||
#define __ARCH_PRCTL_H
|
||||
|
||||
#define ARCH_SET_GS 0x1001
|
||||
#define ARCH_SET_FS 0x1002
|
||||
#define ARCH_GET_FS 0x1003
|
||||
#define ARCH_GET_GS 0x1004
|
||||
|
||||
#endif
|
||||
@ -90,10 +90,6 @@ enum __rlimit_resource
|
||||
#define RLIM_NLIMITS __RLIM_NLIMITS
|
||||
};
|
||||
|
||||
|
||||
struct rlimit {
|
||||
uint64_t rlim_cur; /* Soft limit */
|
||||
uint64_t rlim_max; /* Hard limit (ceiling for rlim_cur) */
|
||||
};
|
||||
#include <generic-rlimit.h>
|
||||
|
||||
#endif
|
||||
@ -20,7 +20,7 @@
|
||||
* syscall_name[] only, no handler exists.
|
||||
*/
|
||||
|
||||
SYSCALL_DELEGATED(0, read)
|
||||
SYSCALL_HANDLED(0, read)
|
||||
SYSCALL_DELEGATED(1, write)
|
||||
SYSCALL_DELEGATED(2, open)
|
||||
SYSCALL_HANDLED(3, close)
|
||||
@ -35,7 +35,7 @@ SYSCALL_HANDLED(12, brk)
|
||||
SYSCALL_HANDLED(13, rt_sigaction)
|
||||
SYSCALL_HANDLED(14, rt_sigprocmask)
|
||||
SYSCALL_HANDLED(15, rt_sigreturn)
|
||||
SYSCALL_DELEGATED(16, ioctl)
|
||||
SYSCALL_HANDLED(16, ioctl)
|
||||
SYSCALL_DELEGATED(17, pread64)
|
||||
SYSCALL_DELEGATED(18, pwrite64)
|
||||
SYSCALL_DELEGATED(20, writev)
|
||||
@ -51,6 +51,8 @@ SYSCALL_HANDLED(30, shmat)
|
||||
SYSCALL_HANDLED(31, shmctl)
|
||||
SYSCALL_HANDLED(34, pause)
|
||||
SYSCALL_HANDLED(35, nanosleep)
|
||||
SYSCALL_HANDLED(36, getitimer)
|
||||
SYSCALL_HANDLED(38, setitimer)
|
||||
SYSCALL_HANDLED(39, getpid)
|
||||
SYSCALL_HANDLED(56, clone)
|
||||
SYSCALL_DELEGATED(57, fork)
|
||||
@ -64,11 +66,13 @@ SYSCALL_DELEGATED(65, semop)
|
||||
SYSCALL_HANDLED(67, shmdt)
|
||||
SYSCALL_DELEGATED(69, msgsnd)
|
||||
SYSCALL_DELEGATED(70, msgrcv)
|
||||
SYSCALL_DELEGATED(72, fcntl)
|
||||
SYSCALL_HANDLED(72, fcntl)
|
||||
SYSCALL_DELEGATED(79, getcwd)
|
||||
SYSCALL_DELEGATED(89, readlink)
|
||||
SYSCALL_HANDLED(96, gettimeofday)
|
||||
SYSCALL_HANDLED(97, getrlimit)
|
||||
SYSCALL_HANDLED(98, getrusage)
|
||||
SYSCALL_HANDLED(100, times)
|
||||
SYSCALL_HANDLED(101, ptrace)
|
||||
SYSCALL_HANDLED(102, getuid)
|
||||
SYSCALL_HANDLED(104, getgid)
|
||||
@ -107,6 +111,7 @@ SYSCALL_HANDLED(158, arch_prctl)
|
||||
SYSCALL_HANDLED(160, setrlimit)
|
||||
SYSCALL_HANDLED(164, settimeofday)
|
||||
SYSCALL_HANDLED(186, gettid)
|
||||
SYSCALL_HANDLED(200, tkill)
|
||||
SYSCALL_DELEGATED(201, time)
|
||||
SYSCALL_HANDLED(202, futex)
|
||||
SYSCALL_HANDLED(203, sched_setaffinity)
|
||||
@ -116,6 +121,7 @@ SYSCALL_HANDLED(216, remap_file_pages)
|
||||
SYSCALL_DELEGATED(217, getdents64)
|
||||
SYSCALL_HANDLED(218, set_tid_address)
|
||||
SYSCALL_DELEGATED(220, semtimedop)
|
||||
SYSCALL_HANDLED(228, clock_gettime)
|
||||
SYSCALL_DELEGATED(230, clock_nanosleep)
|
||||
SYSCALL_HANDLED(231, exit_group)
|
||||
SYSCALL_DELEGATED(232, epoll_wait)
|
||||
@ -132,6 +138,7 @@ SYSCALL_HANDLED(279, move_pages)
|
||||
SYSCALL_DELEGATED(281, epoll_pwait)
|
||||
SYSCALL_HANDLED(282, signalfd)
|
||||
SYSCALL_HANDLED(289, signalfd4)
|
||||
SYSCALL_HANDLED(298, perf_event_open)
|
||||
#ifdef DCFA_KMOD
|
||||
SYSCALL_HANDLED(303, mod_call)
|
||||
#endif
|
||||
|
||||
@ -13,7 +13,7 @@
|
||||
* 2013/?? - bgerofi + shimosawa: handle rsp correctly for nested interrupts
|
||||
*/
|
||||
|
||||
#define X86_CPU_LOCAL_OFFSET_TSS 128
|
||||
#define X86_CPU_LOCAL_OFFSET_TSS 176
|
||||
#define X86_TSS_OFFSET_SP0 4
|
||||
#define X86_CPU_LOCAL_OFFSET_SP0 \
|
||||
(X86_CPU_LOCAL_OFFSET_TSS + X86_TSS_OFFSET_SP0)
|
||||
@ -209,7 +209,9 @@ enter_user_mode:
|
||||
callq release_runq_lock
|
||||
movq $0, %rdi
|
||||
movq %rsp, %rsi
|
||||
call check_signal
|
||||
call check_signal
|
||||
movq $0, %rdi
|
||||
call set_cputime
|
||||
POP_ALL_REGS
|
||||
addq $8, %rsp
|
||||
iretq
|
||||
|
||||
@ -38,6 +38,11 @@ void init_processors_local(int max_id)
|
||||
kprintf("locals = %p\n", locals);
|
||||
}
|
||||
|
||||
/*@
|
||||
@ requires \valid(id);
|
||||
@ ensures \result == locals + (LOCALS_SPAN * id);
|
||||
@ assigns \nothing;
|
||||
@*/
|
||||
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id)
|
||||
{
|
||||
return (struct x86_cpu_local_variables *)
|
||||
@ -98,6 +103,10 @@ void init_boot_processor_local(void)
|
||||
}
|
||||
|
||||
/** IHK **/
|
||||
/*@
|
||||
@ ensures \result == %gs;
|
||||
@ assigns \nothing;
|
||||
*/
|
||||
int ihk_mc_get_processor_id(void)
|
||||
{
|
||||
int id;
|
||||
@ -107,6 +116,10 @@ int ihk_mc_get_processor_id(void)
|
||||
return id;
|
||||
}
|
||||
|
||||
/*@
|
||||
@ ensures \result == (locals + (LOCALS_SPAN * %gs))->apic_id;
|
||||
@ assigns \nothing;
|
||||
*/
|
||||
int ihk_mc_get_hardware_processor_id(void)
|
||||
{
|
||||
struct x86_cpu_local_variables *v = get_x86_this_cpu_local();
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -38,7 +38,7 @@ int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel,
|
||||
arch_master_channel_packet_handler = packet_handler;
|
||||
|
||||
ihk_ikc_init_desc(channel, IKC_OS_HOST, 0, rq, wq,
|
||||
ihk_ikc_master_channel_packet_handler);
|
||||
ihk_ikc_master_channel_packet_handler, channel);
|
||||
ihk_ikc_enable_channel(channel);
|
||||
|
||||
/* Set boot parameter */
|
||||
|
||||
@ -12,19 +12,72 @@
|
||||
#include <errno.h>
|
||||
#include <ihk/debug.h>
|
||||
#include <registers.h>
|
||||
#include <mc_perf_event.h>
|
||||
|
||||
extern unsigned int *x86_march_perfmap;
|
||||
extern int running_on_kvm(void);
|
||||
|
||||
#define X86_CR4_PCE 0x00000100
|
||||
|
||||
int perf_counters_discovered = 0;
|
||||
int X86_IA32_NUM_PERF_COUNTERS = 0;
|
||||
unsigned long X86_IA32_PERF_COUNTERS_MASK = 0;
|
||||
int X86_IA32_NUM_FIXED_PERF_COUNTERS = 0;
|
||||
unsigned long X86_IA32_FIXED_PERF_COUNTERS_MASK = 0;
|
||||
|
||||
void x86_init_perfctr(void)
|
||||
{
|
||||
int i = 0;
|
||||
unsigned long reg;
|
||||
unsigned long value = 0;
|
||||
uint64_t op;
|
||||
uint64_t eax;
|
||||
uint64_t ebx;
|
||||
uint64_t ecx;
|
||||
uint64_t edx;
|
||||
|
||||
/* Do not do it on KVM */
|
||||
if (running_on_kvm()) return;
|
||||
|
||||
/* Allow PMC to be read from user space */
|
||||
asm volatile("movq %%cr4, %0" : "=r"(reg));
|
||||
reg |= X86_CR4_PCE;
|
||||
asm volatile("movq %0, %%cr4" : : "r"(reg));
|
||||
|
||||
/* Detect number of supported performance counters */
|
||||
if (!perf_counters_discovered) {
|
||||
/* See Table 35.2 - Architectural MSRs in Vol 3C */
|
||||
op = 0x0a;
|
||||
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx):"a"(op));
|
||||
|
||||
X86_IA32_NUM_PERF_COUNTERS = ((eax & 0xFF00) >> 8);
|
||||
X86_IA32_PERF_COUNTERS_MASK = (1 << X86_IA32_NUM_PERF_COUNTERS) - 1;
|
||||
|
||||
X86_IA32_NUM_FIXED_PERF_COUNTERS = (edx & 0x0F);
|
||||
X86_IA32_FIXED_PERF_COUNTERS_MASK =
|
||||
((1UL << X86_IA32_NUM_FIXED_PERF_COUNTERS) - 1) <<
|
||||
X86_IA32_BASE_FIXED_PERF_COUNTERS;
|
||||
|
||||
perf_counters_discovered = 1;
|
||||
kprintf("X86_IA32_NUM_PERF_COUNTERS: %d, X86_IA32_NUM_FIXED_PERF_COUNTERS: %d\n",
|
||||
X86_IA32_NUM_PERF_COUNTERS, X86_IA32_NUM_FIXED_PERF_COUNTERS);
|
||||
}
|
||||
|
||||
/* Clear Fixed Counter Control */
|
||||
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||
value &= 0xfffffffffffff000L;
|
||||
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||
|
||||
/* Clear Generic Counter Control */
|
||||
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
|
||||
wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0);
|
||||
}
|
||||
|
||||
/* Enable PMC Control */
|
||||
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
|
||||
value |= X86_IA32_PERF_COUNTERS_MASK;
|
||||
value |= X86_IA32_FIXED_PERF_COUNTERS_MASK;
|
||||
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
|
||||
}
|
||||
|
||||
static int set_perfctr_x86_direct(int counter, int mode, unsigned int value)
|
||||
@ -33,20 +86,53 @@ static int set_perfctr_x86_direct(int counter, int mode, unsigned int value)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (mode & PERFCTR_USER_MODE) {
|
||||
// clear mode flags
|
||||
value &= ~(3 << 16);
|
||||
|
||||
// set mode flags
|
||||
if(mode & PERFCTR_USER_MODE) {
|
||||
value |= 1 << 16;
|
||||
}
|
||||
if (mode & PERFCTR_KERNEL_MODE) {
|
||||
}
|
||||
if(mode & PERFCTR_KERNEL_MODE) {
|
||||
value |= 1 << 17;
|
||||
}
|
||||
}
|
||||
|
||||
// wrmsr(MSR_PERF_GLOBAL_CTRL, 0);
|
||||
|
||||
value |= (1 << 22) | (1 << 18); /* EN */
|
||||
value |= (1 << 20); /* Enable overflow interrupt */
|
||||
|
||||
wrmsr(MSR_IA32_PERFEVTSEL0 + counter, value);
|
||||
|
||||
kprintf("wrmsr: %d <= %x\n", MSR_PERF_GLOBAL_CTRL, 0);
|
||||
kprintf("wrmsr: %d <= %x\n", MSR_IA32_PERFEVTSEL0 + counter, value);
|
||||
//kprintf("wrmsr: %d <= %x\n", MSR_PERF_GLOBAL_CTRL, 0);
|
||||
//kprintf("wrmsr: %d <= %x\n", MSR_IA32_PERFEVTSEL0 + counter, value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int set_pmc_x86_direct(int counter, long val)
|
||||
{
|
||||
unsigned long cnt_bit = 0;
|
||||
|
||||
if (counter < 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
val &= 0x000000ffffffffff; // 40bit Mask
|
||||
|
||||
cnt_bit = 1UL << counter;
|
||||
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
|
||||
// set generic pmc
|
||||
wrmsr(MSR_IA32_PMC0 + counter, val);
|
||||
}
|
||||
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) {
|
||||
// set fixed pmc
|
||||
wrmsr(MSR_IA32_FIXED_CTR0 + counter - X86_IA32_BASE_FIXED_PERF_COUNTERS, val);
|
||||
}
|
||||
else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -57,6 +143,45 @@ static int set_perfctr_x86(int counter, int event, int mask, int inv, int count,
|
||||
CVAL2(event, mask, inv, count));
|
||||
}
|
||||
|
||||
static int set_fixed_counter(int counter, int mode)
|
||||
{
|
||||
unsigned long value = 0;
|
||||
unsigned int ctr_mask = 0xf;
|
||||
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ;
|
||||
unsigned int set_val = 0;
|
||||
|
||||
if (counter_idx < 0 || counter_idx >= X86_IA32_NUM_FIXED_PERF_COUNTERS) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
// clear specified fixed counter info
|
||||
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||
ctr_mask <<= counter_idx * 4;
|
||||
value &= ~ctr_mask;
|
||||
|
||||
if (mode & PERFCTR_USER_MODE) {
|
||||
set_val |= 1 << 1;
|
||||
}
|
||||
if (mode & PERFCTR_KERNEL_MODE) {
|
||||
set_val |= 1;
|
||||
}
|
||||
|
||||
set_val <<= counter_idx * 4;
|
||||
value |= set_val;
|
||||
|
||||
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode)
|
||||
{
|
||||
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return set_perfctr_x86_direct(counter, mode, code);
|
||||
}
|
||||
int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode)
|
||||
{
|
||||
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
|
||||
@ -78,14 +203,15 @@ extern void x86_march_perfctr_start(unsigned long counter_mask);
|
||||
|
||||
int ihk_mc_perfctr_start(unsigned long counter_mask)
|
||||
{
|
||||
unsigned int value = 0;
|
||||
unsigned long value = 0;
|
||||
unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK;
|
||||
|
||||
#ifdef HAVE_MARCH_PERFCTR_START
|
||||
x86_march_perfctr_start(counter_mask);
|
||||
#endif
|
||||
counter_mask &= ((1 << X86_IA32_NUM_PERF_COUNTERS) - 1);
|
||||
counter_mask &= mask;
|
||||
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
|
||||
value |= counter_mask;
|
||||
value |= counter_mask;
|
||||
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
|
||||
|
||||
return 0;
|
||||
@ -93,25 +219,78 @@ int ihk_mc_perfctr_start(unsigned long counter_mask)
|
||||
|
||||
int ihk_mc_perfctr_stop(unsigned long counter_mask)
|
||||
{
|
||||
unsigned int value;
|
||||
unsigned long value;
|
||||
unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK;
|
||||
|
||||
counter_mask &= ((1 << X86_IA32_NUM_PERF_COUNTERS) - 1);
|
||||
counter_mask &= mask;
|
||||
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
|
||||
value &= ~counter_mask;
|
||||
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
|
||||
|
||||
if(counter_mask >> 32 & 0x1) {
|
||||
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||
value &= ~(0xf);
|
||||
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||
}
|
||||
|
||||
if(counter_mask >> 32 & 0x2) {
|
||||
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||
value &= ~(0xf << 4);
|
||||
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||
}
|
||||
|
||||
if(counter_mask >> 32 & 0x4) {
|
||||
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||
value &= ~(0xf << 8);
|
||||
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// init for fixed counter
|
||||
int ihk_mc_perfctr_fixed_init(int counter, int mode)
|
||||
{
|
||||
unsigned long value = 0;
|
||||
unsigned int ctr_mask = 0xf;
|
||||
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ;
|
||||
unsigned int set_val = 0;
|
||||
|
||||
if (counter_idx < 0 || counter_idx >= X86_IA32_NUM_FIXED_PERF_COUNTERS) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
// clear specified fixed counter info
|
||||
value = rdmsr(MSR_PERF_FIXED_CTRL);
|
||||
ctr_mask <<= counter_idx * 4;
|
||||
value &= ~ctr_mask;
|
||||
|
||||
if (mode & PERFCTR_USER_MODE) {
|
||||
set_val |= 1 << 1;
|
||||
}
|
||||
if (mode & PERFCTR_KERNEL_MODE) {
|
||||
set_val |= 1;
|
||||
}
|
||||
|
||||
// enable PMI on overflow
|
||||
set_val |= 1 << 3;
|
||||
|
||||
set_val <<= counter_idx * 4;
|
||||
value |= set_val;
|
||||
|
||||
wrmsr(MSR_PERF_FIXED_CTRL, value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ihk_mc_perfctr_reset(int counter)
|
||||
{
|
||||
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
|
||||
return -EINVAL;
|
||||
}
|
||||
return set_pmc_x86_direct(counter, 0);
|
||||
}
|
||||
|
||||
wrmsr(MSR_IA32_PMC0 + counter, 0);
|
||||
|
||||
return 0;
|
||||
int ihk_mc_perfctr_set(int counter, long val)
|
||||
{
|
||||
return set_pmc_x86_direct(counter, val);
|
||||
}
|
||||
|
||||
int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value)
|
||||
@ -129,10 +308,87 @@ int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value)
|
||||
|
||||
unsigned long ihk_mc_perfctr_read(int counter)
|
||||
{
|
||||
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
|
||||
unsigned long retval = 0;
|
||||
unsigned long cnt_bit = 0;
|
||||
|
||||
if (counter < 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return rdpmc(counter);
|
||||
cnt_bit = 1UL << counter;
|
||||
|
||||
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
|
||||
// read generic pmc
|
||||
retval = rdpmc(counter);
|
||||
}
|
||||
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) {
|
||||
// read fixed pmc
|
||||
retval = rdpmc((1 << 30) + (counter - X86_IA32_BASE_FIXED_PERF_COUNTERS));
|
||||
}
|
||||
else {
|
||||
retval = -EINVAL;
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
// read by rdmsr
|
||||
unsigned long ihk_mc_perfctr_read_msr(int counter)
|
||||
{
|
||||
unsigned int idx = 0;
|
||||
unsigned long retval = 0;
|
||||
unsigned long cnt_bit = 0;
|
||||
|
||||
if (counter < 0) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
cnt_bit = 1UL << counter;
|
||||
|
||||
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
|
||||
// read generic pmc
|
||||
idx = MSR_IA32_PMC0 + counter;
|
||||
retval = (unsigned long) rdmsr(idx);
|
||||
}
|
||||
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) {
|
||||
// read fixed pmc
|
||||
idx = MSR_IA32_FIXED_CTR0 + counter;
|
||||
retval = (unsigned long) rdmsr(idx);
|
||||
}
|
||||
else {
|
||||
retval = -EINVAL;
|
||||
}
|
||||
|
||||
return retval;
|
||||
}
|
||||
|
||||
int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsigned long pmc_status)
|
||||
{
|
||||
int ret = -1;
|
||||
int i = 0;
|
||||
|
||||
if(*type == PERF_TYPE_HARDWARE) {
|
||||
switch(*config){
|
||||
case PERF_COUNT_HW_INSTRUCTIONS :
|
||||
*type = PERF_TYPE_RAW;
|
||||
*config = 0x5300c0;
|
||||
break;
|
||||
default :
|
||||
// Unexpected config
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else if(*type != PERF_TYPE_RAW) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// find avail generic counter
|
||||
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
|
||||
if(!(pmc_status & (1 << i))) {
|
||||
ret = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
28
arch/x86/tools/irqbalance_mck.in.in
Normal file
28
arch/x86/tools/irqbalance_mck.in.in
Normal file
@ -0,0 +1,28 @@
|
||||
# irqbalance is a daemon process that distributes interrupts across
|
||||
# CPUS on SMP systems. The default is to rebalance once every 10
|
||||
# seconds. This is the environment file that is specified to systemd via the
|
||||
# EnvironmentFile key in the service unit file (or via whatever method the init
|
||||
# system you're using has.
|
||||
#
|
||||
# ONESHOT=yes
|
||||
# after starting, wait for a minute, then look at the interrupt
|
||||
# load and balance it once; after balancing exit and do not change
|
||||
# it again.
|
||||
#IRQBALANCE_ONESHOT=
|
||||
|
||||
#
|
||||
# IRQBALANCE_BANNED_CPUS
|
||||
# 64 bit bitmask which allows you to indicate which cpu's should
|
||||
# be skipped when reblancing irqs. Cpu numbers which have their
|
||||
# corresponding bits set to one in this mask will not have any
|
||||
# irq's assigned to them on rebalance
|
||||
#
|
||||
IRQBALANCE_BANNED_CPUS=%mask%
|
||||
|
||||
#
|
||||
# IRQBALANCE_ARGS
|
||||
# append any args here to the irqbalance daemon as documented in the man page
|
||||
#
|
||||
IRQBALANCE_ARGS=--banirq=%banirq%
|
||||
|
||||
|
||||
10
arch/x86/tools/irqbalance_mck.service.in
Normal file
10
arch/x86/tools/irqbalance_mck.service.in
Normal file
@ -0,0 +1,10 @@
|
||||
[Unit]
|
||||
Description=irqbalance daemon
|
||||
After=syslog.target
|
||||
|
||||
[Service]
|
||||
EnvironmentFile=@ETCDIR@/irqbalance_mck
|
||||
ExecStart=/usr/sbin/irqbalance --foreground $IRQBALANCE_ARGS
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
@ -13,15 +13,90 @@
|
||||
# Note that the script does not output anything unless an error occurs.
|
||||
|
||||
prefix="@prefix@"
|
||||
BINDIR="@BINDIR@"
|
||||
SBINDIR="@SBINDIR@"
|
||||
KMODDIR="@KMODDIR@"
|
||||
KERNDIR="@KERNDIR@"
|
||||
BINDIR="${prefix}/bin"
|
||||
SBINDIR="${prefix}/sbin"
|
||||
ETCDIR=@ETCDIR@
|
||||
KMODDIR="${prefix}/kmod"
|
||||
KERNDIR="${prefix}/@TARGET@/kernel"
|
||||
ENABLE_MCOVERLAYFS="@ENABLE_MCOVERLAYFS@"
|
||||
|
||||
mem="512M@0"
|
||||
cpus=""
|
||||
|
||||
INTERVAL=1
|
||||
LOGMODE=0
|
||||
facility="LOG_LOCAL6"
|
||||
chown_option=`logname 2> /dev/null`
|
||||
|
||||
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" -o "`systemctl status irqbalance.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
|
||||
irqbalance_used="yes"
|
||||
else
|
||||
irqbalance_used="no"
|
||||
fi
|
||||
|
||||
while getopts :i:k:c:m:o:f: OPT
|
||||
do
|
||||
case ${OPT} in
|
||||
f) facility=${OPTARG}
|
||||
;;
|
||||
o) chown_option=${OPTARG}
|
||||
;;
|
||||
i) INTERVAL=${OPTARG}
|
||||
expr "${INTERVAL}" + 1 > /dev/null 2>&1
|
||||
if [ $? -ge 2 ]
|
||||
then
|
||||
echo "invalid -i value" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ ${INTERVAL} -le 0 ]
|
||||
then
|
||||
echo "invalid -i value" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
k) LOGMODE=${OPTARG}
|
||||
expr "${LOGMODE}" + 1 > /dev/null 2>&1
|
||||
if [ $? -ge 2 ]
|
||||
then
|
||||
echo "invalid -k value" >&2
|
||||
exit 1
|
||||
fi
|
||||
if [ ${LOGMODE} -lt 0 -o ${LOGMODE} -gt 2 ]
|
||||
then
|
||||
echo "invalid -k value" >&2
|
||||
exit 1
|
||||
fi
|
||||
;;
|
||||
c) cpus=${OPTARG}
|
||||
;;
|
||||
m) mem=${OPTARG}
|
||||
;;
|
||||
*) echo "invalid option -${OPT}" >&2
|
||||
exit 1
|
||||
esac
|
||||
done
|
||||
|
||||
ihk_ikc_irq_core=0
|
||||
|
||||
release=`uname -r`
|
||||
major=`echo ${release} | sed -e 's/^\([0-9]*\).*/\1/'`
|
||||
minor=`echo ${release} | sed -e 's/^[0-9]*.\([0-9]*\).*/\1/'`
|
||||
patch=`echo ${release} | sed -e 's/^[0-9]*.[0-9]*.\([0-9]*\).*/\1/'`
|
||||
linux_version_code=`expr \( ${major} \* 65536 \) + \( ${minor} \* 256 \) + ${patch}`
|
||||
rhel_release=`echo ${release} | sed -e 's/^[0-9]*.[0-9]*.[0-9]*-\([0-9]*\).*/\1/'`
|
||||
if [ "${release}" == "${rhel_release}" ]; then rhel_release=""; fi
|
||||
enable_mcoverlay="no"
|
||||
if [ "${ENABLE_MCOVERLAYFS}" == "yes" ]; then
|
||||
if [ "${rhel_release}" == "" ]; then
|
||||
if [ ${linux_version_code} -ge 262144 -a ${linux_version_code} -lt 262400 ]; then
|
||||
enable_mcoverlay="yes"
|
||||
fi
|
||||
else
|
||||
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 ]; then
|
||||
enable_mcoverlay="yes"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$cpus" == "" ]; then
|
||||
# Get the number of CPUs on NUMA node 0
|
||||
@ -30,17 +105,30 @@ if [ "$cpus" == "" ]; then
|
||||
# Use the second half of the cores
|
||||
let nr_cpus="$nr_cpus / 2"
|
||||
cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $1}' | tail -n $nr_cpus | xargs echo -n | sed 's/ /,/g'`
|
||||
if [ "$cpus" == "" ]; then echo "error: no available CPUs on NUMA node 0?"; exit; fi
|
||||
if [ "$cpus" == "" ]; then echo "error: no available CPUs on NUMA node 0?" >&2; exit 1; fi
|
||||
fi
|
||||
|
||||
# Remove delegator if loaded
|
||||
if [ "`lsmod | grep mcctrl`" != "" ]; then
|
||||
if ! rmmod mcctrl; then echo "error: removing mcctrl"; exit; fi
|
||||
# Remove mcoverlay if loaded
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
if [ "`lsmod | grep mcoverlay`" != "" ]; then
|
||||
if [ "`cat /proc/mounts | grep /tmp/mcos/mcos0_sys`" != "" ]; then umount -l /tmp/mcos/mcos0_sys; fi
|
||||
if [ "`cat /proc/mounts | grep /tmp/mcos/mcos0_proc`" != "" ]; then umount -l /tmp/mcos/mcos0_proc; fi
|
||||
if [ "`cat /proc/mounts | grep /tmp/mcos/linux_proc`" != "" ]; then umount -l /tmp/mcos/linux_proc; fi
|
||||
if [ "`cat /proc/mounts | grep /tmp/mcos`" != "" ]; then umount -l /tmp/mcos; fi
|
||||
if [ -e /tmp/mcos ]; then rm -rf /tmp/mcos; fi
|
||||
if ! rmmod mcoverlay; then echo "error: removing mcoverlay" >&2; exit 1; fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Stop irqbalance
|
||||
if [ "${irqbalance_used}" == "yes" ]; then
|
||||
systemctl stop irqbalance_mck.service 2>/dev/null
|
||||
if ! systemctl stop irqbalance.service 2>/dev/null ; then echo "error: stopping irqbalance" >&2; exit 1; fi;
|
||||
fi
|
||||
|
||||
# Load IHK if not loaded
|
||||
if [ "`lsmod | grep ihk`" == "" ]; then
|
||||
if ! insmod ${KMODDIR}/ihk.ko; then echo "error: loading ihk"; exit; fi;
|
||||
if ! insmod ${KMODDIR}/ihk.ko; then echo "error: loading ihk" >&2; exit 1; fi;
|
||||
fi
|
||||
|
||||
# Load IHK-SMP if not loaded and reserve CPUs and memory
|
||||
@ -52,47 +140,111 @@ if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [ "$ihk_irq" == "" ]; then echo "error: no IRQ available"; exit; fi
|
||||
if ! insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core; then echo "error: loading ihk-smp-x86"; exit; fi;
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs"; exit; fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory"; exit; fi
|
||||
if [ "$ihk_irq" == "" ]; then echo "error: no IRQ available" >&2; exit 1; fi
|
||||
if ! insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core; then echo "error: loading ihk-smp-x86" >&2; exit 1; fi;
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs" >&2; exit 1; fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory" >&2; exit 1; fi
|
||||
# If loaded, but no resources allocated, get CPUs and memory
|
||||
else
|
||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus" >&2; exit 1; fi
|
||||
cpus_allocated=`${SBINDIR}/ihkosctl 0 query cpu`
|
||||
if [ "$cpus_allocated" == "" ]; then
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs"; exit; fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs" >&2; exit 1; fi
|
||||
fi
|
||||
|
||||
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory" >&2; exit 1; fi
|
||||
mem_allocated=`${SBINDIR}/ihkosctl 0 query mem`
|
||||
if [ "$mem_allocated" == "" ]; then
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory"; exit; fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory" >&2; exit 1; fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Load mcctrl if not loaded
|
||||
if [ "`lsmod | grep mcctrl`" == "" ]; then
|
||||
if ! insmod ${KMODDIR}/mcctrl.ko; then echo "error: inserting mcctrl.ko" >&2; exit 1; fi
|
||||
fi
|
||||
|
||||
# Check for existing OS instance and destroy
|
||||
if [ -c /dev/mcos0 ]; then
|
||||
# Query CPU cores and memory of OS instance so that the same values are used as previously
|
||||
if ! ${SBINDIR}/ihkosctl 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 query cpu > /dev/null; then echo "error: querying cpus" >&2; exit 1; fi
|
||||
cpus=`${SBINDIR}/ihkosctl 0 query cpu`
|
||||
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory" >&2; exit 1; fi
|
||||
mem=`${SBINDIR}/ihkosctl 0 query mem`
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 destroy 0; then echo "warning: destroy failed"; fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 destroy 0; then echo "warning: destroy failed" >&2; fi
|
||||
else
|
||||
# Otherwise query IHK-SMP for resources
|
||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus" >&2; exit 1; fi
|
||||
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
|
||||
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory" >&2; exit 1; fi
|
||||
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
||||
fi
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 create; then echo "error: create"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then echo "error: assign CPUs"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 assign mem ${mem}; then echo "error: assign memory"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then echo "error: loading kernel image"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 kargs hidos; then echo "error: setting kernel arguments"; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 boot; then echo "error: booting"; exit; fi
|
||||
if ! insmod ${KMODDIR}/mcctrl.ko; then echo "error: inserting mcctrl.ko"; exit; fi
|
||||
if ! chown `logname` /dev/mcd* /dev/mcos*; then echo "error: chowning device files"; exit; fi
|
||||
if ! ${SBINDIR}/ihkconfig 0 create; then echo "error: create" >&2; exit; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then echo "error: assign CPUs" >&2; exit 1; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 assign mem ${mem}; then echo "error: assign memory" >&2; exit 1; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then echo "error: loading kernel image" >&2; exit 1; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos ksyslogd=${LOGMODE}"; then echo "error: setting kernel arguments" >&2; exit 1; fi
|
||||
if ! ${SBINDIR}/ihkosctl 0 boot; then echo "error: booting" >&2; exit 1; fi
|
||||
if ! chown ${chown_option} /dev/mcd* /dev/mcos*; then echo "error: chowning device files" >&2; exit 1; fi
|
||||
|
||||
if [ "$enable_mcoverlay" == "yes" ]; then
|
||||
if [ ! -e /tmp/mcos ]; then mkdir -p /tmp/mcos; fi
|
||||
if ! mount -t tmpfs tmpfs /tmp/mcos; then echo "error: mount /tmp/mcos" >&2; exit 1; fi
|
||||
if [ ! -e /tmp/mcos/linux_proc ]; then mkdir -p /tmp/mcos/linux_proc; fi
|
||||
if ! mount --bind /proc /tmp/mcos/linux_proc; then echo "error: mount /tmp/mcos/linux_proc" >&2; exit 1; fi
|
||||
if ! insmod ${KMODDIR}/mcoverlay.ko; then echo "error: inserting mcoverlay.ko" >&2; exit 1; fi
|
||||
while [ ! -e /proc/mcos0 ]
|
||||
do
|
||||
sleep 1
|
||||
done
|
||||
if [ ! -e /tmp/mcos/mcos0_proc ]; then mkdir -p /tmp/mcos/mcos0_proc; fi
|
||||
if [ ! -e /tmp/mcos/mcos0_proc_upper ]; then mkdir -p /tmp/mcos/mcos0_proc_upper; fi
|
||||
if [ ! -e /tmp/mcos/mcos0_proc_work ]; then mkdir -p /tmp/mcos/mcos0_proc_work; fi
|
||||
if ! mount -t mcoverlay mcoverlay -o lowerdir=/proc/mcos0:/proc,upperdir=/tmp/mcos/mcos0_proc_upper,workdir=/tmp/mcos/mcos0_proc_work,nocopyupw,nofscheck /tmp/mcos/mcos0_proc; then echo "error: mount /tmp/mcos/mcos0_proc" >&2; exit 1; fi
|
||||
mount --make-rprivate /proc
|
||||
while [ ! -e /sys/devices/virtual/mcos/mcos0/sys ]
|
||||
do
|
||||
sleep 1
|
||||
done
|
||||
if [ ! -e /tmp/mcos/mcos0_sys ]; then mkdir -p /tmp/mcos/mcos0_sys; fi
|
||||
if [ ! -e /tmp/mcos/mcos0_sys_upper ]; then mkdir -p /tmp/mcos/mcos0_sys_upper; fi
|
||||
if [ ! -e /tmp/mcos/mcos0_sys_work ]; then mkdir -p /tmp/mcos/mcos0_sys_work; fi
|
||||
if ! mount -t mcoverlay mcoverlay -o lowerdir=/sys/devices/virtual/mcos/mcos0/sys:/sys,upperdir=/tmp/mcos/mcos0_sys_upper,workdir=/tmp/mcos/mcos0_sys_work,nocopyupw,nofscheck /tmp/mcos/mcos0_sys; then echo "error: mount /tmp/mcos/mcos0_sys" >&2; exit 1; fi
|
||||
mount --make-rprivate /sys
|
||||
for cpuid in `find /sys/devices/system/cpu/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid" ]; then
|
||||
rm -rf /tmp/mcos/mcos0_sys/devices/system/cpu/$cpuid
|
||||
fi
|
||||
done
|
||||
for cpuid in `find /sys/bus/cpu/devices/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
|
||||
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/bus/cpu/devices/$cpuid" ]; then
|
||||
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid
|
||||
fi
|
||||
done
|
||||
fi
|
||||
if [ ${LOGMODE} -ne 0 ]
|
||||
then
|
||||
# mcklogd survives when McKernel isn't shut down by mcstop+release.sh
|
||||
pkill mcklogd
|
||||
SBINDIR=${SBINDIR} ${SBINDIR}/mcklogd -i ${INTERVAL} -f ${facility}
|
||||
fi
|
||||
|
||||
# Start irqbalance with CPUs and IRQ for McKernel banned
|
||||
if [ "${irqbalance_used}" == "yes" ]; then
|
||||
if ! etcdir=@ETCDIR@ perl -e 'use File::Copy qw(copy); $etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "/proc/irq/*/smp_affinity"; foreach $file (@files) { $rel = substr($file, 1); $dir=substr($rel, 0, length($rel)-length("/smp_affinity")); if(0) { print "cp $file $etcdir/$rel\n";} if(system("mkdir -p $etcdir/$dir")){ exit 1;} if(!copy($file,"$etcdir/$rel")){ exit 1;} }' ; then echo "error: saving /proc/irq/*/smp_affinity" >&2; exit 1; fi;
|
||||
|
||||
ncpus=`lscpu | grep -E '^CPU\(s\):' | awk '{print $2}'`
|
||||
smp_affinity_mask=`echo $cpus | ncpus=$ncpus perl -e 'while(<>){@tokens = split /,/;foreach $token (@tokens) {@nums = split /-/,$token; for($num = $nums[0]; $num <= $nums[$#nums]; $num++) {$ndx=int($num/32); $mask[$ndx] |= (1<<($num % 32))}}} $nint32s = int(($ENV{'ncpus'}+31)/32); for($j = $nint32s - 1; $j >= 0; $j--) { if($j != $nint32s - 1){print ",";} $nblks = $j == $nint32s - 1 ? int(($ENV{'ncpus'} % 32)/4) : 8; for($i = $nblks - 1;$i >= 0;$i--){ printf("%01x",($mask[$j] >> ($i*4)) & 0xf);}}'`
|
||||
|
||||
if ! ncpus=$ncpus smp_affinity_mask=$smp_affinity_mask perl -e '@dirs = grep { -d } glob "/proc/irq/*"; foreach $dir (@dirs) { $hit = 0; $affinity_str = `cat $dir/smp_affinity`; chomp $affinity_str; @int32strs = split /,/, $affinity_str; @int32strs_mask=split /,/, $ENV{'smp_affinity_mask'}; for($i=0;$i <= $#int32strs_mask; $i++) { $int32strs_inv[$i] = sprintf("%08x",hex($int32strs_mask[$i])^0xffffffff); if($i == 0) { $len = int((($ENV{'ncpus'}%32)+3)/4); $int32strs_inv[$i] = substr($int32strs_inv[$i], -$len, $len); } } $inv = join(",", @int32strs_inv); $nint32s = int(($ENV{'ncpus'}+31)/32); for($j = $nint32s - 1; $j >= 0; $j--) { if(hex($int32strs[$nint32s - 1 - $j]) & hex($int32strs_mask[$nint32s - 1 - $j])) { $hit = 1; }} if($hit == 1) { $cmd = "echo $inv > $dir/smp_affinity 2>/dev/null"; system $cmd;}}'; then echo "error: modifying /proc/irq/*/smp_affinity" >&2; exit 1; fi;
|
||||
|
||||
banirq=`cat /proc/interrupts| perl -e 'while(<>) { if(/^\s*(\d+).*IHK\-SMP\s*$/) {print $1;}}'`
|
||||
|
||||
sed "s/%mask%/$smp_affinity_mask/g" $ETCDIR/irqbalance_mck.in | sed "s/%banirq%/$banirq/g" > $ETCDIR/irqbalance_mck
|
||||
if ! systemctl link $ETCDIR/irqbalance_mck.service >/dev/null 2>/dev/null; then echo "error: linking irqbalance_mck" >&2; exit 1; fi;
|
||||
if ! systemctl start irqbalance_mck.service 2>/dev/null ; then echo "error: starting irqbalance_mck" >&2; exit 1; fi;
|
||||
# echo cpus=$cpus mask=$smp_affinity_mask banirq=$banirq
|
||||
fi
|
||||
|
||||
|
||||
@ -10,6 +10,7 @@
|
||||
prefix="@prefix@"
|
||||
BINDIR="@BINDIR@"
|
||||
SBINDIR="@SBINDIR@"
|
||||
ETCDIR=@ETCDIR@
|
||||
KMODDIR="@KMODDIR@"
|
||||
KERNDIR="@KERNDIR@"
|
||||
|
||||
@ -17,31 +18,47 @@ mem=""
|
||||
cpus=""
|
||||
|
||||
# No SMP module? Exit.
|
||||
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then exit; fi
|
||||
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then exit 0; fi
|
||||
|
||||
# Destroy all LWK instances
|
||||
if ls /dev/mcos* 1>/dev/null 2>&1; then
|
||||
for i in /dev/mcos*; do
|
||||
ind=`echo $i|cut -c10-`;
|
||||
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then echo "error: destroying LWK instance $ind failed" >&2; exit 1; fi
|
||||
done
|
||||
fi
|
||||
|
||||
# Query IHK-SMP resources and release them
|
||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus" >&2; exit 1; fi
|
||||
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
|
||||
if ! ${SBINDIR}/ihkconfig 0 release cpu $cpus > /dev/null; then echo "error: releasing CPUs" >&2; exit 1; fi
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory" >&2; exit 1; fi
|
||||
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
||||
if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then echo "error: releasing memory" >&2; exit 1; fi
|
||||
|
||||
# Remove delegator if loaded
|
||||
if [ "`lsmod | grep mcctrl`" != "" ]; then
|
||||
if ! rmmod mcctrl; then echo "error: removing mcctrl"; exit; fi
|
||||
if ! rmmod mcctrl; then echo "error: removing mcctrl" >&2; exit 1; fi
|
||||
fi
|
||||
|
||||
# Destroy all LWK instances
|
||||
for i in /dev/mcos*; do
|
||||
ind=`echo $i|cut -c10-`;
|
||||
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then echo "error: destroying LWK instance $ind failed"; exit; fi
|
||||
done
|
||||
|
||||
# Query IHK-SMP resources and release them
|
||||
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
|
||||
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
|
||||
if ! ${SBINDIR}/ihkconfig 0 release cpu $cpus > /dev/null; then echo "error: releasing CPUs"; exit; fi
|
||||
|
||||
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
|
||||
mem=`${SBINDIR}/ihkconfig 0 query mem`
|
||||
if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then echo "error: releasing memory"; exit; fi
|
||||
|
||||
# Remove SMP module
|
||||
if [ "`lsmod | grep ihk_smp_x86`" != "" ]; then
|
||||
if ! rmmod ihk_smp_x86; then echo "error: removing ihk_smp_x86"; exit; fi
|
||||
if ! rmmod ihk_smp_x86; then echo "error: removing ihk_smp_x86" >&2; exit 1; fi
|
||||
fi
|
||||
|
||||
# Remove core module
|
||||
if [ "`lsmod | grep -E 'ihk\s' | awk '{print $1}'`" != "" ]; then
|
||||
if ! rmmod ihk; then echo "error: removing ihk" >&2; exit 1; fi
|
||||
fi
|
||||
|
||||
# Stop mcklogd
|
||||
pkill mcklogd
|
||||
|
||||
# Start irqbalance with the original settings
|
||||
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
|
||||
if ! systemctl stop irqbalance_mck.service 2>/dev/null ; then echo "error: stopping irqbalance_mck" >&2; exit 1; fi;
|
||||
if ! systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null; then echo "error: disabling irqbalance_mck" >&2; exit 1; fi;
|
||||
if ! etcdir=@ETCDIR@ perl -e '$etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "$etcdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($etcdir)); if(0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }' ; then echo "error: restoring /proc/irq/*/smp_affinity" >&2; exit 1; fi;
|
||||
if ! systemctl start irqbalance.service; then echo "error: starting irqbalance" >&2; exit 1; fi;
|
||||
fi
|
||||
|
||||
173
configure.ac
173
configure.ac
@ -27,10 +27,27 @@ AC_ARG_WITH([target],
|
||||
[--with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86}],[target, default is attached-mic]),
|
||||
[WITH_TARGET=$withval],[WITH_TARGET=yes])
|
||||
|
||||
AC_ARG_WITH([system_map],
|
||||
AS_HELP_STRING(
|
||||
[--with-system_map=path],[Path to 'System.map file', default is /boot/System.map-uname_r]),
|
||||
[WITH_SYSTEM_MAP=$withval],[WITH_SYSTEM_MAP=yes])
|
||||
|
||||
AC_ARG_ENABLE([dcfa],
|
||||
[AS_HELP_STRING(
|
||||
[--enable-dcfa],[Enable DCFA modules])],[],[enable_dcfa=no])
|
||||
|
||||
AC_ARG_ENABLE([memdump],
|
||||
AC_HELP_STRING([--enable-memdump],
|
||||
[enable dumping memory and analyzing a dump]),
|
||||
[ENABLE_MEMDUMP=$enableval],
|
||||
[ENABLE_MEMDUMP=default])
|
||||
|
||||
AC_ARG_ENABLE([mcoverlayfs],
|
||||
AC_HELP_STRING([--enable-mcoverlayfs],
|
||||
[enable mcoverlayfs implementation]),
|
||||
[ENABLE_MCOVERLAYFS=$enableval],
|
||||
[ENABLE_MCOVERLAYFS=yes])
|
||||
|
||||
case "X$WITH_KERNELSRC" in
|
||||
Xyes | Xno | X)
|
||||
WITH_KERNELSRC='/lib/modules/`uname -r`/build'
|
||||
@ -49,9 +66,26 @@ fi
|
||||
test "x$prefix" = xNONE && prefix="$ac_default_prefix"
|
||||
|
||||
case $WITH_TARGET in
|
||||
attached-mic)
|
||||
attached-mic|builtin-x86|smp-x86)
|
||||
ARCH=`uname -m`
|
||||
AC_PROG_CC
|
||||
XCC=$CC
|
||||
;;
|
||||
builtin-mic)
|
||||
ARCH=k1om
|
||||
AC_CHECK_PROG(XCC,
|
||||
[x86_64-$ARCH-linux-gcc],
|
||||
[x86_64-$ARCH-linux-gcc],
|
||||
[no])
|
||||
CC=$XCC
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([target $WITH_TARGET is unknwon])
|
||||
;;
|
||||
esac
|
||||
|
||||
case $WITH_TARGET in
|
||||
attached-mic)
|
||||
if test "X$KERNDIR" = X; then
|
||||
KERNDIR="$prefix/attached/kernel"
|
||||
fi
|
||||
@ -69,12 +103,6 @@ case $WITH_TARGET in
|
||||
fi
|
||||
;;
|
||||
builtin-mic)
|
||||
ARCH=k1om
|
||||
AC_CHECK_PROG(XCC,
|
||||
[x86_64-$ARCH-linux-gcc],
|
||||
[x86_64-$ARCH-linux-gcc],
|
||||
[no])
|
||||
CC=$XCC
|
||||
if test "X$KERNDIR" = X; then
|
||||
KERNDIR="$prefix/attached/kernel"
|
||||
fi
|
||||
@ -92,9 +120,6 @@ case $WITH_TARGET in
|
||||
fi
|
||||
;;
|
||||
builtin-x86)
|
||||
ARCH=`uname -m`
|
||||
AC_PROG_CC
|
||||
XCC=$CC
|
||||
if test "X$KERNDIR" = X; then
|
||||
KERNDIR="$prefix/attached/kernel"
|
||||
fi
|
||||
@ -112,9 +137,6 @@ case $WITH_TARGET in
|
||||
fi
|
||||
;;
|
||||
smp-x86)
|
||||
ARCH=`uname -m`
|
||||
AC_PROG_CC
|
||||
XCC=$CC
|
||||
if test "X$KERNDIR" = X; then
|
||||
KERNDIR="$prefix/smp-x86/kernel"
|
||||
fi
|
||||
@ -124,6 +146,9 @@ case $WITH_TARGET in
|
||||
if test "X$SBINDIR" = X; then
|
||||
SBINDIR="$prefix/sbin"
|
||||
fi
|
||||
if test "X$ETCDIR" = X; then
|
||||
ETCDIR="$prefix/etc"
|
||||
fi
|
||||
if test "X$KMODDIR" = X; then
|
||||
KMODDIR="$prefix/kmod"
|
||||
fi
|
||||
@ -139,6 +164,116 @@ esac
|
||||
KDIR="$WITH_KERNELSRC"
|
||||
TARGET="$WITH_TARGET"
|
||||
|
||||
MCCTRL_LINUX_SYMTAB=""
|
||||
case "X$WITH_SYSTEM_MAP" in
|
||||
Xyes | Xno | X)
|
||||
MCCTRL_LINUX_SYMTAB=""
|
||||
;;
|
||||
*)
|
||||
MCCTRL_LINUX_SYMTAB="$WITH_SYSTEM_MAP"
|
||||
;;
|
||||
esac
|
||||
|
||||
AC_MSG_CHECKING([[for System.map]])
|
||||
if test -f "$MCCTRL_LINUX_SYMTAB"; then
|
||||
MCCTRL_LINUX_SYMTAB="$MCCTRL_LINUX_SYMTAB"
|
||||
elif test -f "/boot/System.map-`uname -r`"; then
|
||||
MCCTRL_LINUX_SYMTAB="/boot/System.map-`uname -r`"
|
||||
elif test -f "$KDIR/System.map"; then
|
||||
MCCTRL_LINUX_SYMTAB="$KDIR/System.map"
|
||||
fi
|
||||
|
||||
if test "$MCCTRL_LINUX_SYMTAB" == ""; then
|
||||
AC_MSG_ERROR([could not find])
|
||||
fi
|
||||
|
||||
if test -z "`eval cat $MCCTRL_LINUX_SYMTAB`"; then
|
||||
AC_MSG_ERROR([could not read System.map file, no read permission?])
|
||||
fi
|
||||
AC_MSG_RESULT([$MCCTRL_LINUX_SYMTAB])
|
||||
|
||||
MCCTRL_LINUX_SYMTAB_CMD="cat $MCCTRL_LINUX_SYMTAB"
|
||||
|
||||
# MCCTRL_FIND_KSYM(SYMBOL)
|
||||
# ------------------------------------------------------
|
||||
# Search System.map for address of the given symbol and
|
||||
# do one of three things in config.h:
|
||||
# If not found, leave MCCTRL_KSYM_foo undefined
|
||||
# If found to be exported, "#define MCCTRL_KSYM_foo 0"
|
||||
# If found not to be exported, "#define MCCTRL_KSYM_foo 0x<value>"
|
||||
AC_DEFUN([MCCTRL_FIND_KSYM],[
|
||||
AC_MSG_CHECKING([[System.map for symbol $1]])
|
||||
mcctrl_addr=`eval $MCCTRL_LINUX_SYMTAB_CMD | grep " $1\$" | cut -d\ -f1`
|
||||
if test -z $mcctrl_addr; then
|
||||
AC_MSG_RESULT([not found])
|
||||
else
|
||||
mcctrl_result=$mcctrl_addr
|
||||
mcctrl_addr="0x$mcctrl_addr"
|
||||
m4_ifval([$2],[],[
|
||||
if `eval $MCCTRL_LINUX_SYMTAB_CMD | grep " __ksymtab_$1\$" >/dev/null`; then
|
||||
mcctrl_result="exported"
|
||||
mcctrl_addr="0"
|
||||
fi
|
||||
])
|
||||
AC_MSG_RESULT([$mcctrl_result])
|
||||
AC_DEFINE_UNQUOTED(MCCTRL_KSYM_[]$1,$mcctrl_addr,[Define to address of kernel symbol $1, or 0 if exported])
|
||||
fi
|
||||
])
|
||||
|
||||
MCCTRL_FIND_KSYM([sys_mount])
|
||||
MCCTRL_FIND_KSYM([sys_unshare])
|
||||
MCCTRL_FIND_KSYM([zap_page_range])
|
||||
MCCTRL_FIND_KSYM([vdso_image_64])
|
||||
MCCTRL_FIND_KSYM([vdso_start])
|
||||
MCCTRL_FIND_KSYM([vdso_end])
|
||||
MCCTRL_FIND_KSYM([vdso_pages])
|
||||
MCCTRL_FIND_KSYM([__vvar_page])
|
||||
MCCTRL_FIND_KSYM([hpet_address])
|
||||
MCCTRL_FIND_KSYM([hv_clock])
|
||||
MCCTRL_FIND_KSYM([sys_readlink])
|
||||
|
||||
case $ENABLE_MEMDUMP in
|
||||
yes|no|auto)
|
||||
;;
|
||||
default)
|
||||
if test "x$WITH_TARGET" = "xsmp-x86" ; then
|
||||
ENABLE_MEMDUMP=auto
|
||||
else
|
||||
ENABLE_MEMDUMP=no
|
||||
fi
|
||||
;;
|
||||
*)
|
||||
AC_MSG_ERROR([unknown memdump argument: $ENABLE_MEMDUMP])
|
||||
;;
|
||||
esac
|
||||
|
||||
if test "x$ENABLE_MEMDUMP" != "xno" ; then
|
||||
enableval=yes
|
||||
AC_CHECK_LIB([bfd],[bfd_init],[],[enableval=no])
|
||||
AC_CHECK_HEADER([bfd.h],[],[enableval=no])
|
||||
|
||||
if test "x$ENABLE_MEMDUMP" = "xyes" -a "x$enableval" = "xno" ; then
|
||||
AC_MSG_ERROR([memdump feature needs bfd.h and libbfd a.k.a bunutils-devel])
|
||||
fi
|
||||
ENABLE_MEMDUMP=$enableval
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_MEMDUMP" = "xyes" ; then
|
||||
AC_MSG_NOTICE([memdump feature is enabled])
|
||||
AC_DEFINE([ENABLE_MEMDUMP],[1],[whether memdump feature is enabled])
|
||||
uncomment_if_ENABLE_MEMDUMP=''
|
||||
else
|
||||
AC_MSG_NOTICE([memdump feature is disabled])
|
||||
uncomment_if_ENABLE_MEMDUMP='#'
|
||||
fi
|
||||
|
||||
if test "x$ENABLE_MCOVERLAYFS" = "xyes" ; then
|
||||
AC_DEFINE([ENABLE_MCOVERLAYFS],[1],[whether mcoverlayfs is enabled])
|
||||
AC_MSG_NOTICE([mcoverlayfs is enabled])
|
||||
else
|
||||
AC_MSG_NOTICE([mcoverlayfs is disabled])
|
||||
fi
|
||||
|
||||
AC_SUBST(CC)
|
||||
AC_SUBST(XCC)
|
||||
AC_SUBST(ARCH)
|
||||
@ -146,9 +281,11 @@ AC_SUBST(KDIR)
|
||||
AC_SUBST(TARGET)
|
||||
AC_SUBST(BINDIR)
|
||||
AC_SUBST(SBINDIR)
|
||||
AC_SUBST(ETCDIR)
|
||||
AC_SUBST(KMODDIR)
|
||||
AC_SUBST(KERNDIR)
|
||||
AC_SUBST(MANDIR)
|
||||
AC_SUBST(ENABLE_MCOVERLAYFS)
|
||||
|
||||
AC_SUBST(IHK_VERSION)
|
||||
AC_SUBST(MCKERNEL_VERSION)
|
||||
@ -156,11 +293,17 @@ AC_SUBST(DCFA_VERSION)
|
||||
AC_SUBST(IHK_RELEASE_DATE)
|
||||
AC_SUBST(MCKERNEL_RELEASE_DATE)
|
||||
AC_SUBST(DCFA_RESEASE_DATE)
|
||||
AC_SUBST(uncomment_if_ENABLE_MEMDUMP)
|
||||
|
||||
AC_CONFIG_HEADERS([executer/config.h])
|
||||
AC_CONFIG_FILES([
|
||||
Makefile
|
||||
executer/user/Makefile
|
||||
executer/kernel/Makefile
|
||||
executer/kernel/mcctrl/Makefile
|
||||
executer/kernel/mcctrl/arch/x86_64/Makefile
|
||||
executer/kernel/mcoverlayfs/Makefile
|
||||
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/Makefile
|
||||
executer/kernel/mcoverlayfs/linux-4.0.9/Makefile
|
||||
kernel/Makefile
|
||||
kernel/Makefile.build
|
||||
arch/x86/tools/mcreboot-attached-mic.sh
|
||||
@ -170,6 +313,8 @@ AC_CONFIG_FILES([
|
||||
arch/x86/tools/mcstop+release-smp-x86.sh
|
||||
arch/x86/tools/mcshutdown-builtin-x86.sh
|
||||
arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in
|
||||
arch/x86/tools/irqbalance_mck.service
|
||||
arch/x86/tools/irqbalance_mck.in
|
||||
])
|
||||
|
||||
AS_IF([test "x$enable_dcfa" = xyes], [
|
||||
|
||||
91
executer/config.h.in
Normal file
91
executer/config.h.in
Normal file
@ -0,0 +1,91 @@
|
||||
/* executer/config.h.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* whether mcoverlayfs is enabled */
|
||||
#undef ENABLE_MCOVERLAYFS
|
||||
|
||||
/* whether memdump feature is enabled */
|
||||
#undef ENABLE_MEMDUMP
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the `bfd' library (-lbfd). */
|
||||
#undef HAVE_LIBBFD
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to address of kernel symbol __vvar_page, or 0 if exported */
|
||||
#undef MCCTRL_KSYM___vvar_page
|
||||
|
||||
/* Define to address of kernel symbol hpet_address, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_hpet_address
|
||||
|
||||
/* Define to address of kernel symbol hv_clock, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_hv_clock
|
||||
|
||||
/* Define to address of kernel symbol sys_mount, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_mount
|
||||
|
||||
/* Define to address of kernel symbol sys_readlink, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_readlink
|
||||
|
||||
/* Define to address of kernel symbol sys_unshare, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_sys_unshare
|
||||
|
||||
/* Define to address of kernel symbol vdso_end, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_vdso_end
|
||||
|
||||
/* Define to address of kernel symbol vdso_image_64, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_vdso_image_64
|
||||
|
||||
/* Define to address of kernel symbol vdso_pages, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_vdso_pages
|
||||
|
||||
/* Define to address of kernel symbol vdso_start, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_vdso_start
|
||||
|
||||
/* Define to address of kernel symbol zap_page_range, or 0 if exported */
|
||||
#undef MCCTRL_KSYM_zap_page_range
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#undef PACKAGE_URL
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
@ -48,6 +48,9 @@
|
||||
#define MCEXEC_UP_OPEN_EXEC 0x30a02912
|
||||
#define MCEXEC_UP_CLOSE_EXEC 0x30a02913
|
||||
|
||||
#define MCEXEC_UP_SYS_MOUNT 0x30a02914
|
||||
#define MCEXEC_UP_SYS_UNSHARE 0x30a02915
|
||||
|
||||
#define MCEXEC_UP_DEBUG_LOG 0x40000000
|
||||
|
||||
#define MCEXEC_UP_TRANSFER_TO_REMOTE 0
|
||||
@ -83,6 +86,9 @@ struct program_load_desc {
|
||||
int stack_prot;
|
||||
int pgid;
|
||||
int cred[8];
|
||||
int reloc;
|
||||
char enable_vdso;
|
||||
char padding[7];
|
||||
unsigned long entry;
|
||||
unsigned long user_start;
|
||||
unsigned long user_end;
|
||||
@ -104,6 +110,13 @@ struct program_load_desc {
|
||||
};
|
||||
|
||||
struct syscall_request {
|
||||
/* TID of requesting thread */
|
||||
int rtid;
|
||||
/*
|
||||
* TID of target thread. Remote page fault response needs to designate the
|
||||
* thread that must serve the request, 0 indicates any thread from the pool
|
||||
*/
|
||||
int ttid;
|
||||
unsigned long valid;
|
||||
unsigned long number;
|
||||
unsigned long args[6];
|
||||
@ -122,8 +135,17 @@ struct syscall_load_desc {
|
||||
unsigned long size;
|
||||
};
|
||||
|
||||
#define IHK_SCD_REQ_THREAD_SPINNING 0
|
||||
#define IHK_SCD_REQ_THREAD_TO_BE_WOKEN 1
|
||||
#define IHK_SCD_REQ_THREAD_DESCHEDULED 2
|
||||
|
||||
struct syscall_response {
|
||||
/* TID of the thread that requested the service */
|
||||
int ttid;
|
||||
/* TID of the mcexec thread that is serving or has served the request */
|
||||
int stid;
|
||||
unsigned long status;
|
||||
unsigned long req_thread_status;
|
||||
long ret;
|
||||
unsigned long fault_address;
|
||||
unsigned long fault_reason;
|
||||
@ -166,4 +188,16 @@ struct newprocess_desc {
|
||||
int pid;
|
||||
};
|
||||
|
||||
struct sys_mount_desc {
|
||||
char *dev_name;
|
||||
char *dir_name;
|
||||
char *type;
|
||||
unsigned long flags;
|
||||
void *data;
|
||||
};
|
||||
|
||||
struct sys_unshare_desc {
|
||||
unsigned long unshare_flags;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
@ -1,26 +0,0 @@
|
||||
KDIR ?= @KDIR@
|
||||
ARCH ?= @ARCH@
|
||||
src = @abs_srcdir@
|
||||
KMODDIR=@KMODDIR@
|
||||
BINDIR=@BINDIR@
|
||||
IHK_BASE=$(src)/../../../ihk
|
||||
|
||||
obj-m += mcctrl.o
|
||||
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\"
|
||||
|
||||
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o
|
||||
|
||||
KBUILD_EXTRA_SYMBOLS = @abs_builddir@/../../../ihk/linux/core/Module.symvers
|
||||
|
||||
.PHONY: clean install modules
|
||||
|
||||
modules:
|
||||
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
|
||||
|
||||
clean:
|
||||
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
|
||||
|
||||
install:
|
||||
mkdir -p -m 755 $(KMODDIR)
|
||||
install -m 644 mcctrl.ko $(KMODDIR)
|
||||
@ -1,191 +0,0 @@
|
||||
/**
|
||||
* \file mcctrl.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* define data structure
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Balazs Gerofi <bgerofi@riken.jp> \par
|
||||
* Copyright (C) 2012 RIKEN AICS
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2013 The University of Tokyo
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
* 2013/11/07 hamada added <sys/resource.h> which is required by getrlimit(2)
|
||||
* 2013/10/21 nakamura exclude interpreter's segment from data region
|
||||
* 2013/10/11 nakamura mcexec: add a upper limit of the stack size
|
||||
* 2013/10/11 nakamura mcexec: add a path prefix for interpreter search
|
||||
* 2013/10/11 nakamura mcexec: add a interpreter invocation
|
||||
* 2013/10/08 nakamura add a AT_ENTRY entry to the auxiliary vector
|
||||
* 2013/09/02 shirasawa add terminate thread
|
||||
* 2013/08/19 shirasawa mcexec forward signal to MIC process
|
||||
* 2013/08/07 nakamura add page fault forwarding
|
||||
* 2013/07/26 shirasawa mcexec print signum or exit status
|
||||
* 2013/07/17 nakamura create more mcexec thread so that all cpu to be serviced
|
||||
* 2013/04/17 nakamura add generic system call forwarding
|
||||
*/
|
||||
#ifndef HEADER_MCCTRL_H
|
||||
#define HEADER_MCCTRL_H
|
||||
|
||||
#include <ihk/ihk_host_driver.h>
|
||||
#include <uprotocol.h>
|
||||
#include <linux/wait.h>
|
||||
#include <ihk/ikc.h>
|
||||
#include <ikc/master.h>
|
||||
|
||||
#define SCD_MSG_PREPARE_PROCESS 0x1
|
||||
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
|
||||
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
|
||||
#define SCD_MSG_SCHEDULE_PROCESS 0x3
|
||||
|
||||
#define SCD_MSG_INIT_CHANNEL 0x5
|
||||
#define SCD_MSG_INIT_CHANNEL_ACKED 0x6
|
||||
|
||||
#define SCD_MSG_SYSCALL_ONESIDE 0x4
|
||||
#define SCD_MSG_SEND_SIGNAL 0x8
|
||||
#define SCD_MSG_CLEANUP_PROCESS 0x9
|
||||
|
||||
#define SCD_MSG_PROCFS_CREATE 0x10
|
||||
#define SCD_MSG_PROCFS_DELETE 0x11
|
||||
#define SCD_MSG_PROCFS_REQUEST 0x12
|
||||
#define SCD_MSG_PROCFS_ANSWER 0x13
|
||||
|
||||
#define SCD_MSG_DEBUG_LOG 0x20
|
||||
|
||||
#define DMA_PIN_SHIFT 21
|
||||
|
||||
#define DO_USER_MODE
|
||||
|
||||
#define __NR_coredump 999
|
||||
|
||||
struct coretable {
|
||||
int len;
|
||||
unsigned long addr;
|
||||
};
|
||||
|
||||
struct ikc_scd_packet {
|
||||
int msg;
|
||||
int ref;
|
||||
int osnum;
|
||||
int pid;
|
||||
int err;
|
||||
unsigned long arg;
|
||||
};
|
||||
|
||||
struct mcctrl_priv {
|
||||
ihk_os_t os;
|
||||
struct program_load_desc *desc;
|
||||
};
|
||||
|
||||
struct ikc_scd_init_param {
|
||||
unsigned long request_page;
|
||||
unsigned long response_page;
|
||||
unsigned long doorbell_page;
|
||||
unsigned long post_page;
|
||||
};
|
||||
|
||||
struct syscall_post {
|
||||
unsigned long v[8];
|
||||
};
|
||||
|
||||
struct syscall_params {
|
||||
unsigned long request_pa;
|
||||
struct syscall_request *request_va;
|
||||
unsigned long response_rpa, response_pa;
|
||||
struct syscall_response *response_va;
|
||||
unsigned long post_pa;
|
||||
struct syscall_post *post_va;
|
||||
|
||||
unsigned long doorbell_pa;
|
||||
unsigned long *doorbell_va;
|
||||
};
|
||||
|
||||
struct wait_queue_head_list_node {
|
||||
struct list_head list;
|
||||
wait_queue_head_t wq_syscall;
|
||||
int pid;
|
||||
int req;
|
||||
};
|
||||
|
||||
struct mcctrl_channel {
|
||||
struct ihk_ikc_channel_desc *c;
|
||||
struct syscall_params param;
|
||||
struct ikc_scd_init_param init;
|
||||
void *dma_buf;
|
||||
|
||||
struct list_head wq_list;
|
||||
ihk_spinlock_t wq_list_lock;
|
||||
};
|
||||
|
||||
struct mcctrl_per_proc_data {
|
||||
struct list_head list;
|
||||
int pid;
|
||||
unsigned long rpgtable; /* per process, not per OS */
|
||||
};
|
||||
|
||||
struct mcctrl_usrdata {
|
||||
struct ihk_ikc_listen_param listen_param;
|
||||
struct ihk_ikc_listen_param listen_param2;
|
||||
ihk_os_t os;
|
||||
int num_channels;
|
||||
struct mcctrl_channel *channels;
|
||||
unsigned long *mcctrl_doorbell_va;
|
||||
unsigned long mcctrl_doorbell_pa;
|
||||
int remaining_job;
|
||||
int base_cpu;
|
||||
int job_pos;
|
||||
int mcctrl_dma_abort;
|
||||
unsigned long last_thread_exec;
|
||||
wait_queue_head_t wq_prepare;
|
||||
|
||||
struct list_head per_proc_list;
|
||||
ihk_spinlock_t per_proc_list_lock;
|
||||
void **keys;
|
||||
};
|
||||
|
||||
struct mcctrl_signal {
|
||||
int cond;
|
||||
int sig;
|
||||
int pid;
|
||||
int tid;
|
||||
char info[128];
|
||||
};
|
||||
|
||||
int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp);
|
||||
int mcctrl_ikc_send_msg(ihk_os_t os, int cpu, int msg, int ref, unsigned long arg);
|
||||
int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu);
|
||||
int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp,
|
||||
unsigned long *endp);
|
||||
|
||||
/* syscall.c */
|
||||
int init_peer_channel_registry(struct mcctrl_usrdata *ud);
|
||||
int register_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch);
|
||||
int deregister_peer_channel(struct mcctrl_usrdata *ud, void *key, struct mcctrl_channel *ch);
|
||||
struct mcctrl_channel *get_peer_channel(struct mcctrl_usrdata *ud, void *key);
|
||||
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc);
|
||||
|
||||
#define PROCFS_NAME_MAX 1000
|
||||
|
||||
struct procfs_read {
|
||||
unsigned long pbuf; /* physical address of the host buffer (request) */
|
||||
unsigned long offset; /* offset to read (request) */
|
||||
int count; /* bytes to read (request) */
|
||||
int eof; /* if eof is detected, 1 otherwise 0. (answer)*/
|
||||
int ret; /* read bytes (answer) */
|
||||
int status; /* non-zero if done (answer) */
|
||||
int newcpu; /* migrated new cpu (answer) */
|
||||
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
|
||||
};
|
||||
|
||||
struct procfs_file {
|
||||
int status; /* status of processing (answer) */
|
||||
int mode; /* file mode (request) */
|
||||
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
|
||||
};
|
||||
|
||||
#endif
|
||||
27
executer/kernel/mcctrl/Makefile.in
Normal file
27
executer/kernel/mcctrl/Makefile.in
Normal file
@ -0,0 +1,27 @@
|
||||
KDIR ?= @KDIR@
|
||||
ARCH ?= @ARCH@
|
||||
src = @abs_srcdir@
|
||||
KMODDIR=@KMODDIR@
|
||||
BINDIR=@BINDIR@
|
||||
IHK_BASE=$(src)/../../../../ihk
|
||||
|
||||
obj-m += mcctrl.o
|
||||
|
||||
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/linux/include/ihk/arch/$(ARCH) -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/ikc/include/ikc/arch/$(ARCH) -I$(IHK_BASE)/include -I$(IHK_BASE)/include/arch/$(ARCH) -I$(src)/../../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\" -I@abs_builddir@
|
||||
|
||||
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o
|
||||
mcctrl-y += sysfs.o sysfs_files.o arch/$(ARCH)/archdeps.o
|
||||
|
||||
KBUILD_EXTRA_SYMBOLS = @abs_builddir@/../../../../ihk/linux/core/Module.symvers
|
||||
|
||||
.PHONY: clean install modules
|
||||
|
||||
modules:
|
||||
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
|
||||
|
||||
clean:
|
||||
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
|
||||
|
||||
install:
|
||||
mkdir -p -m 755 $(KMODDIR)
|
||||
install -m 644 mcctrl.ko $(KMODDIR)
|
||||
1
executer/kernel/mcctrl/arch/x86_64/Makefile.in
Normal file
1
executer/kernel/mcctrl/arch/x86_64/Makefile.in
Normal file
@ -0,0 +1 @@
|
||||
# dummy file
|
||||
192
executer/kernel/mcctrl/arch/x86_64/archdeps.c
Normal file
192
executer/kernel/mcctrl/arch/x86_64/archdeps.c
Normal file
@ -0,0 +1,192 @@
|
||||
#include <linux/version.h>
|
||||
#include "../../config.h"
|
||||
#include "../../mcctrl.h"
|
||||
|
||||
#ifdef MCCTRL_KSYM_vdso_image_64
|
||||
#if MCCTRL_KSYM_vdso_image_64
|
||||
struct vdso_image *vdso_image = (void *)MCCTRL_KSYM_vdso_image_64;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MCCTRL_KSYM_vdso_start
|
||||
#if MCCTRL_KSYM_vdso_start
|
||||
void *vdso_start = (void *)MCCTRL_KSYM_vdso_start;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MCCTRL_KSYM_vdso_end
|
||||
#if MCCTRL_KSYM_vdso_end
|
||||
void *vdso_end = (void *)MCCTRL_KSYM_vdso_end;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MCCTRL_KSYM_vdso_pages
|
||||
#if MCCTRL_KSYM_vdso_pages
|
||||
struct page **vdso_pages = (void *)MCCTRL_KSYM_vdso_pages;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MCCTRL_KSYM___vvar_page
|
||||
#if MCCTRL_KSYM___vvar_page
|
||||
void *__vvar_page = (void *)MCCTRL_KSYM___vvar_page;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
long *hpet_addressp
|
||||
#ifdef MCCTRL_KSYM_hpet_address
|
||||
#if MCCTRL_KSYM_hpet_address
|
||||
= (void *)MCCTRL_KSYM_hpet_address;
|
||||
#else
|
||||
= &hpet_address;
|
||||
#endif
|
||||
#else
|
||||
= NULL;
|
||||
#endif
|
||||
|
||||
void **hv_clockp
|
||||
#ifdef MCCTRL_KSYM_hv_clock
|
||||
#if MCCTRL_KSYM_hv_clock
|
||||
= (void *)MCCTRL_KSYM_hv_clock;
|
||||
#else
|
||||
= &hv_clock;
|
||||
#endif
|
||||
#else
|
||||
= NULL;
|
||||
#endif
|
||||
|
||||
unsigned long
|
||||
reserve_user_space_common(struct mcctrl_usrdata *usrdata, unsigned long start, unsigned long end);
|
||||
|
||||
int
|
||||
reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, unsigned long *endp)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long start = 0L;
|
||||
unsigned long end;
|
||||
|
||||
#define DESIRED_USER_END 0x800000000000
|
||||
#define GAP_FOR_MCEXEC 0x008000000000UL
|
||||
end = DESIRED_USER_END;
|
||||
down_write(¤t->mm->mmap_sem);
|
||||
vma = find_vma(current->mm, 0);
|
||||
if (vma) {
|
||||
end = (vma->vm_start - GAP_FOR_MCEXEC) & ~(GAP_FOR_MCEXEC - 1);
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
#endif
|
||||
start = reserve_user_space_common(usrdata, start, end);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
up_write(¤t->mm->mmap_sem);
|
||||
#endif
|
||||
|
||||
if (IS_ERR_VALUE(start)) {
|
||||
return start;
|
||||
}
|
||||
*startp = start;
|
||||
*endp = end;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void get_vdso_info(ihk_os_t os, long vdso_rpa)
|
||||
{
|
||||
ihk_device_t dev = ihk_os_to_dev(os);
|
||||
long vdso_pa;
|
||||
struct vdso *vdso;
|
||||
size_t size;
|
||||
int i;
|
||||
|
||||
vdso_pa = ihk_device_map_memory(dev, vdso_rpa, sizeof(*vdso));
|
||||
vdso = ihk_device_map_virtual(dev, vdso_pa, sizeof(*vdso), NULL, 0);
|
||||
|
||||
/* VDSO pages */
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
|
||||
size = vdso_image->size;
|
||||
vdso->vdso_npages = size >> PAGE_SHIFT;
|
||||
|
||||
if (vdso->vdso_npages > VDSO_MAXPAGES) {
|
||||
vdso->vdso_npages = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < vdso->vdso_npages; ++i) {
|
||||
vdso->vdso_physlist[i] = virt_to_phys(
|
||||
vdso_image->data + (i * PAGE_SIZE));
|
||||
}
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
|
||||
size = vdso_end - vdso_start;
|
||||
size = (size + PAGE_SIZE - 1) & PAGE_MASK;
|
||||
|
||||
vdso->vdso_npages = size >> PAGE_SHIFT;
|
||||
if (vdso->vdso_npages > VDSO_MAXPAGES) {
|
||||
vdso->vdso_npages = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < vdso->vdso_npages; ++i) {
|
||||
vdso->vdso_physlist[i] = page_to_phys(vdso_pages[i]);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* VVAR page */
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
|
||||
vdso->vvar_is_global = 0;
|
||||
vdso->vvar_virt = (void *)(-3 * PAGE_SIZE);
|
||||
vdso->vvar_phys = virt_to_phys(__vvar_page);
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
|
||||
vdso->vvar_is_global = 0;
|
||||
vdso->vvar_virt = (void *)(-2 * PAGE_SIZE);
|
||||
vdso->vvar_phys = virt_to_phys(__vvar_page);
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
|
||||
vdso->vvar_is_global = 0;
|
||||
vdso->vvar_virt = (void *)(vdso->vdso_npages * PAGE_SIZE);
|
||||
vdso->vvar_phys = virt_to_phys(__vvar_page);
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,1,0)
|
||||
vdso->vvar_is_global = 1;
|
||||
vdso->vvar_virt = (void *)fix_to_virt(VVAR_PAGE);
|
||||
vdso->vvar_phys = virt_to_phys(__vvar_page);
|
||||
#endif
|
||||
|
||||
/* HPET page */
|
||||
if (hpet_addressp && *hpet_addressp) {
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
|
||||
vdso->hpet_is_global = 0;
|
||||
vdso->hpet_virt = (void *)(-2 * PAGE_SIZE);
|
||||
vdso->hpet_phys = *hpet_addressp;
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,17,0)
|
||||
vdso->hpet_is_global = 0;
|
||||
vdso->hpet_virt = (void *)(-1 * PAGE_SIZE);
|
||||
vdso->hpet_phys = *hpet_addressp;
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0)
|
||||
vdso->hpet_is_global = 0;
|
||||
vdso->hpet_virt = (void *)((vdso->vdso_npages + 1) * PAGE_SIZE);
|
||||
vdso->hpet_phys = *hpet_addressp;
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
|
||||
vdso->hpet_is_global = 1;
|
||||
vdso->hpet_virt = (void *)fix_to_virt(VSYSCALL_HPET);
|
||||
vdso->hpet_phys = *hpet_addressp;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* struct pvlock_vcpu_time_info table */
|
||||
if (hv_clockp && *hv_clockp) {
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,5,0)
|
||||
vdso->pvti_is_global = 0;
|
||||
vdso->pvti_virt = (void *)(-1 * PAGE_SIZE);
|
||||
vdso->pvti_phys = virt_to_phys(*hv_clockp);
|
||||
#elif LINUX_VERSION_CODE >= KERNEL_VERSION(3,8,0)
|
||||
vdso->pvti_is_global = 1;
|
||||
vdso->pvti_virt = (void *)fix_to_virt(PVCLOCK_FIXMAP_BEGIN);
|
||||
vdso->pvti_phys = virt_to_phys(*hv_clockp);
|
||||
#endif
|
||||
}
|
||||
|
||||
out:
|
||||
wmb();
|
||||
vdso->busy = 0;
|
||||
|
||||
ihk_device_unmap_virtual(dev, vdso, sizeof(*vdso));
|
||||
ihk_device_unmap_memory(dev, vdso_pa, sizeof(*vdso));
|
||||
return;
|
||||
} /* get_vdso_info() */
|
||||
@ -45,7 +45,6 @@ static int load_elf(struct linux_binprm *bprm
|
||||
#endif
|
||||
)
|
||||
{
|
||||
char mcexec[BINPRM_BUF_SIZE];
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
|
||||
const
|
||||
#endif
|
||||
@ -60,12 +59,8 @@ static int load_elf(struct linux_binprm *bprm
|
||||
int l;
|
||||
} envdata;
|
||||
envdata env[] = {
|
||||
{.name = "MCEXEC"},
|
||||
#define env_mcexec (env[0].val)
|
||||
{.name = "MCEXEC_WL"},
|
||||
#define env_mcexec_wl (env[1].val)
|
||||
{.name = "MCEXEC_BL"},
|
||||
#define env_mcexec_bl (env[2].val)
|
||||
#define env_mcexec_wl (env[0].val)
|
||||
{.name = NULL}
|
||||
};
|
||||
envdata *ep;
|
||||
@ -120,9 +115,15 @@ static int load_elf(struct linux_binprm *bprm
|
||||
for(i = 0, st = 0; mode != 2;){
|
||||
if(st == 0){
|
||||
off = p & ~PAGE_MASK;
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,6,0)
|
||||
rc = get_user_pages_remote(current, bprm->mm,
|
||||
bprm->p, 1, 0, 1,
|
||||
&page, NULL);
|
||||
#else
|
||||
rc = get_user_pages(current, bprm->mm,
|
||||
bprm->p, 1, 0, 1,
|
||||
&page, NULL);
|
||||
#endif
|
||||
if(rc <= 0)
|
||||
return -EFAULT;
|
||||
addr = kmap_atomic(page
|
||||
@ -190,23 +191,10 @@ static int load_elf(struct linux_binprm *bprm
|
||||
}
|
||||
}
|
||||
|
||||
if(!env_mcexec || !strcmp(env_mcexec, "0") || !strcmp(env_mcexec, "off"))
|
||||
rc = 1;
|
||||
else{
|
||||
rc = 0;
|
||||
if(strchr(env_mcexec, '/') && strlen(env_mcexec) < BINPRM_BUF_SIZE)
|
||||
strcpy(mcexec, env_mcexec);
|
||||
else
|
||||
strcpy(mcexec, MCEXEC_PATH);
|
||||
}
|
||||
|
||||
if(rc);
|
||||
else if(env_mcexec_wl)
|
||||
if(env_mcexec_wl)
|
||||
rc = !pathcheck(path, env_mcexec_wl);
|
||||
else if(env_mcexec_bl)
|
||||
rc = pathcheck(path, env_mcexec_bl);
|
||||
else
|
||||
rc = pathcheck(path, "/usr:/bin:/sbin:/opt");
|
||||
rc = 1;
|
||||
|
||||
for(ep = env; ep->name; ep++)
|
||||
if(ep->val)
|
||||
@ -214,7 +202,7 @@ static int load_elf(struct linux_binprm *bprm
|
||||
if(rc)
|
||||
return -ENOEXEC;
|
||||
|
||||
file = open_exec(mcexec);
|
||||
file = open_exec(MCEXEC_PATH);
|
||||
if (IS_ERR(file))
|
||||
return -ENOEXEC;
|
||||
|
||||
@ -229,29 +217,18 @@ static int load_elf(struct linux_binprm *bprm
|
||||
return rc;
|
||||
}
|
||||
bprm->argc++;
|
||||
wp = mcexec;
|
||||
wp = MCEXEC_PATH;
|
||||
rc = copy_strings_kernel(1, &wp, bprm);
|
||||
if (rc){
|
||||
fput(file);
|
||||
return rc;
|
||||
}
|
||||
bprm->argc++;
|
||||
#if 1
|
||||
rc = bprm_change_interp(mcexec, bprm);
|
||||
rc = bprm_change_interp(MCEXEC_PATH, bprm);
|
||||
if (rc < 0){
|
||||
fput(file);
|
||||
return rc;
|
||||
}
|
||||
#else
|
||||
if(brpm->interp != bprm->filename)
|
||||
kfree(brpm->interp);
|
||||
kfree(brpm->filename);
|
||||
bprm->filename = bprm->interp = kstrdup(mcexec, GFP_KERNEL);
|
||||
if(!bprm->interp){
|
||||
fput(file);
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
allow_write_access(bprm->file);
|
||||
fput(bprm->file);
|
||||
@ -278,7 +255,7 @@ void __init binfmt_mcexec_init(void)
|
||||
insert_binfmt(&mcexec_format);
|
||||
}
|
||||
|
||||
void __exit binfmt_mcexec_exit(void)
|
||||
void binfmt_mcexec_exit(void)
|
||||
{
|
||||
unregister_binfmt(&mcexec_format);
|
||||
}
|
||||
@ -32,10 +32,12 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/version.h>
|
||||
#include <linux/semaphore.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include <asm/uaccess.h>
|
||||
#include <asm/delay.h>
|
||||
#include <asm/msr.h>
|
||||
#include <asm/io.h>
|
||||
#include "../../config.h"
|
||||
#include "mcctrl.h"
|
||||
|
||||
//#define DEBUG
|
||||
@ -46,6 +48,28 @@
|
||||
#define dprintk(...)
|
||||
#endif
|
||||
|
||||
#ifdef MCCTRL_KSYM_sys_unshare
|
||||
#if MCCTRL_KSYM_sys_unshare
|
||||
typedef int (*int_star_fn_ulong_t)(unsigned long);
|
||||
int (*mcctrl_sys_unshare)(unsigned long unshare_flags) =
|
||||
(int_star_fn_ulong_t)
|
||||
MCCTRL_KSYM_sys_unshare;
|
||||
#else // exported
|
||||
int (*mcctrl_sys_unshare)(unsigned long unshare_flags) = NULL;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef MCCTRL_KSYM_sys_mount
|
||||
#if MCCTRL_KSYM_sys_mount
|
||||
typedef int (*int_star_fn_char_char_char_ulong_void_t)(char *, char *, char *, unsigned long, void *);
|
||||
int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) =
|
||||
(int_star_fn_char_char_char_ulong_void_t)
|
||||
MCCTRL_KSYM_sys_mount;
|
||||
#else // exported
|
||||
int (*mcctrl_sys_mount)(char *dev_name,char *dir_name, char *type, unsigned long flags, void *data) = NULL;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
//static DECLARE_WAIT_QUEUE_HEAD(wq_prepare);
|
||||
//extern struct mcctrl_channel *channels;
|
||||
int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu);
|
||||
@ -58,7 +82,6 @@ static long mcexec_prepare_image(ihk_os_t os,
|
||||
void *args, *envs;
|
||||
long ret = 0;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
unsigned long flags;
|
||||
struct mcctrl_per_proc_data *ppd = NULL;
|
||||
|
||||
if (copy_from_user(&desc, udesc,
|
||||
@ -101,52 +124,48 @@ static long mcexec_prepare_image(ihk_os_t os,
|
||||
}
|
||||
|
||||
pdesc->args = (void*)virt_to_phys(args);
|
||||
printk("args: 0x%lX\n", (unsigned long)pdesc->args);
|
||||
printk("argc: %d\n", *(int*)args);
|
||||
dprintk("args: 0x%lX\n", (unsigned long)pdesc->args);
|
||||
dprintk("argc: %ld\n", *(long *)args);
|
||||
pdesc->envs = (void*)virt_to_phys(envs);
|
||||
printk("envs: 0x%lX\n", (unsigned long)pdesc->envs);
|
||||
printk("envc: %d\n", *(int*)envs);
|
||||
dprintk("envs: 0x%lX\n", (unsigned long)pdesc->envs);
|
||||
dprintk("envc: %ld\n", *(long *)envs);
|
||||
|
||||
isp.msg = SCD_MSG_PREPARE_PROCESS;
|
||||
isp.ref = pdesc->cpu;
|
||||
isp.arg = virt_to_phys(pdesc);
|
||||
|
||||
printk("# of sections: %d\n", pdesc->num_sections);
|
||||
printk("%p (%lx)\n", pdesc, isp.arg);
|
||||
dprintk("# of sections: %d\n", pdesc->num_sections);
|
||||
dprintk("%p (%lx)\n", pdesc, isp.arg);
|
||||
|
||||
pdesc->status = 0;
|
||||
mcctrl_ikc_send(os, pdesc->cpu, &isp);
|
||||
|
||||
wait_event_interruptible(usrdata->wq_prepare, pdesc->status);
|
||||
while (wait_event_interruptible(usrdata->wq_prepare, pdesc->status) != 0);
|
||||
|
||||
if(pdesc->err < 0){
|
||||
ret = pdesc->err;
|
||||
goto free_out;
|
||||
}
|
||||
|
||||
ppd = kmalloc(sizeof(*ppd), GFP_ATOMIC);
|
||||
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
|
||||
if (!ppd) {
|
||||
printk("ERROR: allocating per process data\n");
|
||||
ret = -ENOMEM;
|
||||
printk("ERROR: no per process data for PID %d\n", task_tgid_vnr(current));
|
||||
ret = -EINVAL;
|
||||
goto free_out;
|
||||
}
|
||||
|
||||
ppd->pid = pdesc->pid;
|
||||
/* Update rpgtable */
|
||||
ppd->rpgtable = pdesc->rpgtable;
|
||||
|
||||
flags = ihk_ikc_spinlock_lock(&usrdata->per_proc_list_lock);
|
||||
list_add_tail(&ppd->list, &usrdata->per_proc_list);
|
||||
ihk_ikc_spinlock_unlock(&usrdata->per_proc_list_lock, flags);
|
||||
|
||||
dprintk("pid %d, rpgtable: 0x%lx added\n",
|
||||
ppd->pid, ppd->rpgtable);
|
||||
|
||||
if (copy_to_user(udesc, pdesc, sizeof(struct program_load_desc) +
|
||||
sizeof(struct program_image_section) * desc.num_sections)) {
|
||||
ret = -EFAULT;
|
||||
goto free_out;
|
||||
}
|
||||
|
||||
dprintk("%s: pid %d, rpgtable: 0x%lx added\n",
|
||||
__FUNCTION__, ppd->pid, ppd->rpgtable);
|
||||
|
||||
ret = 0;
|
||||
|
||||
free_out:
|
||||
@ -264,12 +283,15 @@ static void release_handler(ihk_os_t os, void *param)
|
||||
{
|
||||
struct handlerinfo *info = param;
|
||||
struct ikc_scd_packet isp;
|
||||
int os_ind = ihk_host_os_get_index(os);
|
||||
|
||||
memset(&isp, '\0', sizeof isp);
|
||||
isp.msg = SCD_MSG_CLEANUP_PROCESS;
|
||||
isp.pid = info->pid;
|
||||
|
||||
mcctrl_ikc_send(os, 0, &isp);
|
||||
if(os_ind >= 0)
|
||||
delete_pid_entry(os_ind, info->pid);
|
||||
kfree(param);
|
||||
}
|
||||
|
||||
@ -391,19 +413,200 @@ static long mcexec_get_cpu(ihk_os_t os)
|
||||
return info->n_cpus;
|
||||
}
|
||||
|
||||
int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg)
|
||||
int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid,
|
||||
struct mcctrl_per_proc_data *ppd)
|
||||
{
|
||||
struct mcctrl_per_proc_data *ppd_iter;
|
||||
int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK);
|
||||
int ret = 0;
|
||||
unsigned long flags;
|
||||
|
||||
/* Check if data for this thread exists and add if not */
|
||||
write_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags);
|
||||
list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) {
|
||||
if (ppd_iter->pid == pid) {
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
list_add_tail(&ppd->hash, &ud->per_proc_data_hash[hash]);
|
||||
|
||||
out:
|
||||
write_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mcctrl_delete_per_proc_data(struct mcctrl_usrdata *ud, int pid)
|
||||
{
|
||||
struct mcctrl_per_proc_data *ppd_iter, *ppd = NULL;
|
||||
int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK);
|
||||
int ret = 0;
|
||||
unsigned long flags;
|
||||
|
||||
write_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags);
|
||||
list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) {
|
||||
if (ppd_iter->pid == pid) {
|
||||
ppd = ppd_iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ppd) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
list_del(&ppd->hash);
|
||||
|
||||
out:
|
||||
write_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
|
||||
struct mcctrl_usrdata *ud, int pid)
|
||||
{
|
||||
struct mcctrl_per_proc_data *ppd_iter, *ppd = NULL;
|
||||
int hash = (pid & MCCTRL_PER_PROC_DATA_HASH_MASK);
|
||||
unsigned long flags;
|
||||
|
||||
/* Check if data for this process exists and return it */
|
||||
read_lock_irqsave(&ud->per_proc_data_hash_lock[hash], flags);
|
||||
|
||||
list_for_each_entry(ppd_iter, &ud->per_proc_data_hash[hash], hash) {
|
||||
if (ppd_iter->pid == pid) {
|
||||
ppd = ppd_iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
read_unlock_irqrestore(&ud->per_proc_data_hash_lock[hash], flags);
|
||||
return ppd;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called indirectly from the IKC message handler.
|
||||
*/
|
||||
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet)
|
||||
{
|
||||
struct wait_queue_head_list_node *wqhln = NULL;
|
||||
struct wait_queue_head_list_node *wqhln_iter;
|
||||
struct wait_queue_head_list_node *wqhln_alloc = NULL;
|
||||
int pid = packet->pid;
|
||||
unsigned long flags;
|
||||
struct mcctrl_per_proc_data *ppd;
|
||||
|
||||
/* Look up per-process structure */
|
||||
ppd = mcctrl_get_per_proc_data(ud, pid);
|
||||
|
||||
if (unlikely(!ppd)) {
|
||||
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
|
||||
__FUNCTION__, task_tgid_vnr(current));
|
||||
return 0;
|
||||
}
|
||||
|
||||
dprintk("%s: (packet_handler) rtid: %d, ttid: %d, sys nr: %d\n",
|
||||
__FUNCTION__,
|
||||
packet->req.rtid,
|
||||
packet->req.ttid,
|
||||
packet->req.number);
|
||||
/*
|
||||
* Three scenarios are possible:
|
||||
* - Find the designated thread if req->ttid is specified.
|
||||
* - Find any available thread if req->ttid is zero.
|
||||
* - Add a request element if no threads are available.
|
||||
*/
|
||||
flags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
|
||||
|
||||
/* Is this a request for a specific thread? See if it's waiting */
|
||||
if (unlikely(packet->req.ttid)) {
|
||||
list_for_each_entry(wqhln_iter, &ppd->wq_list_exact, list) {
|
||||
if (packet->req.ttid != task_pid_vnr(wqhln_iter->task))
|
||||
continue;
|
||||
|
||||
/* Look up per-process wait queue head with pid */
|
||||
flags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
|
||||
list_for_each_entry(wqhln_iter, &c->wq_list, list) {
|
||||
if (wqhln_iter->pid == pid) {
|
||||
wqhln = wqhln_iter;
|
||||
break;
|
||||
}
|
||||
if (!wqhln) {
|
||||
printk("%s: WARNING: no target thread found for exact request??\n",
|
||||
__FUNCTION__);
|
||||
}
|
||||
}
|
||||
/* Is there any thread available? */
|
||||
else {
|
||||
list_for_each_entry(wqhln_iter, &ppd->wq_list, list) {
|
||||
if (wqhln_iter->task && !wqhln_iter->req) {
|
||||
wqhln = wqhln_iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* If no match found, add request to pending request list */
|
||||
if (unlikely(!wqhln)) {
|
||||
retry_alloc:
|
||||
wqhln_alloc = kmalloc(sizeof(*wqhln), GFP_ATOMIC);
|
||||
if (!wqhln_alloc) {
|
||||
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
|
||||
goto retry_alloc;
|
||||
}
|
||||
|
||||
wqhln = wqhln_alloc;
|
||||
wqhln->req = 0;
|
||||
wqhln->task = NULL;
|
||||
init_waitqueue_head(&wqhln->wq_syscall);
|
||||
list_add_tail(&wqhln->list, &ppd->wq_req_list);
|
||||
}
|
||||
|
||||
wqhln->packet = packet;
|
||||
wqhln->req = 1;
|
||||
wake_up(&wqhln->wq_syscall);
|
||||
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called from an mcexec thread via ioctl().
|
||||
*/
|
||||
int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
|
||||
{
|
||||
struct ikc_scd_packet *packet;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
struct wait_queue_head_list_node *wqhln = NULL;
|
||||
struct wait_queue_head_list_node *wqhln_iter;
|
||||
int ret = 0;
|
||||
unsigned long irqflags;
|
||||
struct mcctrl_per_proc_data *ppd;
|
||||
|
||||
/* Look up per-process structure */
|
||||
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
|
||||
|
||||
if (unlikely(!ppd)) {
|
||||
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
|
||||
__FUNCTION__, task_tgid_vnr(current));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current);
|
||||
if (packet) {
|
||||
printk("%s: ERROR: packet %p is already registered for thread %d\n",
|
||||
__FUNCTION__, packet, task_pid_vnr(current));
|
||||
return -EBUSY;
|
||||
}
|
||||
|
||||
retry:
|
||||
/* Prepare per-thread wait queue head or find a valid request */
|
||||
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
|
||||
/* First see if there is a valid request already that is not yet taken */
|
||||
list_for_each_entry(wqhln_iter, &ppd->wq_req_list, list) {
|
||||
if (wqhln_iter->task == NULL && wqhln_iter->req) {
|
||||
wqhln = wqhln_iter;
|
||||
wqhln->task = current;
|
||||
list_del(&wqhln->list);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!wqhln) {
|
||||
@ -414,180 +617,86 @@ retry_alloc:
|
||||
goto retry_alloc;
|
||||
}
|
||||
|
||||
wqhln->pid = pid;
|
||||
wqhln->task = current;
|
||||
wqhln->req = 0;
|
||||
init_waitqueue_head(&wqhln->wq_syscall);
|
||||
list_add_tail(&wqhln->list, &c->wq_list);
|
||||
|
||||
/* Wait for a request.. */
|
||||
list_add(&wqhln->list, &ppd->wq_list);
|
||||
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
|
||||
|
||||
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
|
||||
|
||||
/* Remove per-thread wait queue head */
|
||||
irqflags = ihk_ikc_spinlock_lock(&ppd->wq_list_lock);
|
||||
list_del(&wqhln->list);
|
||||
}
|
||||
ihk_ikc_spinlock_unlock(&ppd->wq_list_lock, irqflags);
|
||||
|
||||
wqhln->req = 1;
|
||||
wake_up(&wqhln->wq_syscall);
|
||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#ifndef DO_USER_MODE
|
||||
// static int remaining_job, base_cpu, job_pos;
|
||||
#endif
|
||||
|
||||
// extern int num_channels;
|
||||
// extern int mcctrl_dma_abort;
|
||||
|
||||
int mcexec_wait_syscall(ihk_os_t os, struct syscall_wait_desc *__user req)
|
||||
{
|
||||
struct syscall_wait_desc swd;
|
||||
struct mcctrl_channel *c;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
struct wait_queue_head_list_node *wqhln;
|
||||
struct wait_queue_head_list_node *wqhln_iter;
|
||||
int ret = 0;
|
||||
unsigned long irqflags;
|
||||
#ifndef DO_USER_MODE
|
||||
unsigned long s, w, d;
|
||||
#endif
|
||||
|
||||
//printk("mcexec_wait_syscall swd=%p req=%p size=%d\n", &swd, req, sizeof(swd.cpu));
|
||||
if (copy_from_user(&swd, req, sizeof(swd))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
if (swd.cpu >= usrdata->num_channels)
|
||||
return -EINVAL;
|
||||
|
||||
c = get_peer_channel(usrdata, current);
|
||||
if (c) {
|
||||
printk("mcexec_wait_syscall:already registered. task %p ch %p\n",
|
||||
current, c);
|
||||
return -EBUSY;
|
||||
}
|
||||
c = usrdata->channels + swd.cpu;
|
||||
|
||||
#ifdef DO_USER_MODE
|
||||
retry:
|
||||
/* Prepare per-process wait queue head */
|
||||
retry_alloc:
|
||||
wqhln = kmalloc(sizeof(*wqhln), GFP_KERNEL);
|
||||
if (!wqhln) {
|
||||
printk("WARNING: coudln't alloc wait queue head, retrying..\n");
|
||||
goto retry_alloc;
|
||||
}
|
||||
|
||||
wqhln->pid = swd.pid;
|
||||
wqhln->req = 0;
|
||||
init_waitqueue_head(&wqhln->wq_syscall);
|
||||
|
||||
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
|
||||
/* First see if there is one wait queue already */
|
||||
list_for_each_entry(wqhln_iter, &c->wq_list, list) {
|
||||
if (wqhln_iter->pid == current->tgid) {
|
||||
kfree(wqhln);
|
||||
wqhln = wqhln_iter;
|
||||
list_del(&wqhln->list);
|
||||
break;
|
||||
}
|
||||
}
|
||||
list_add_tail(&wqhln->list, &c->wq_list);
|
||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
|
||||
|
||||
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
|
||||
|
||||
|
||||
/* Remove per-process wait queue head */
|
||||
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
|
||||
list_del(&wqhln->list);
|
||||
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
|
||||
if (ret && !wqhln->req) {
|
||||
kfree(wqhln);
|
||||
wqhln = NULL;
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
packet = wqhln->packet;
|
||||
kfree(wqhln);
|
||||
wqhln = NULL;
|
||||
|
||||
if (c->param.request_va->number == 61 &&
|
||||
c->param.request_va->args[0] == swd.pid) {
|
||||
dprintk("%s: tid: %d request from CPU %d\n",
|
||||
__FUNCTION__, task_pid_vnr(current), packet->ref);
|
||||
|
||||
dprintk("pid: %d, tid: %d: SC %d, swd.cpu: %d, WARNING: wait4() for self?\n",
|
||||
current->tgid,
|
||||
current->pid,
|
||||
c->param.request_va->number,
|
||||
swd.cpu);
|
||||
|
||||
return -EINTR;
|
||||
}
|
||||
|
||||
#if 1
|
||||
mb();
|
||||
if (!c->param.request_va->valid) {
|
||||
printk("mcexec_wait_syscall:stray wakeup\n");
|
||||
if (!packet->req.valid) {
|
||||
printk("%s: ERROR: stray wakeup pid: %d, tid: %d: SC %lu\n",
|
||||
__FUNCTION__,
|
||||
task_tgid_vnr(current),
|
||||
task_pid_vnr(current),
|
||||
packet->req.number);
|
||||
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
|
||||
(usrdata->channels + packet->ref)->c);
|
||||
goto retry;
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
while (1) {
|
||||
c = usrdata->channels + swd.cpu;
|
||||
rdtscll(s);
|
||||
if (!usrdata->remaining_job) {
|
||||
while (!(*c->param.doorbell_va)) {
|
||||
mb();
|
||||
cpu_relax();
|
||||
rdtscll(w);
|
||||
if (w > s + 1024UL * 1024 * 1024 * 10) {
|
||||
return -EINTR;
|
||||
}
|
||||
}
|
||||
d = (*c->param.doorbell_va) - 1;
|
||||
*c->param.doorbell_va = 0;
|
||||
|
||||
if (d < 0 || d >= usrdata->num_channels) {
|
||||
d = 0;
|
||||
}
|
||||
usrdata->base_cpu = d;
|
||||
usrdata->job_pos = 0;
|
||||
usrdata->remaining_job = 1;
|
||||
} else {
|
||||
usrdata->job_pos++;
|
||||
}
|
||||
packet->req.valid = 0; /* ack */
|
||||
dprintk("%s: system call: %d, args[0]: %lu, args[1]: %lu, args[2]: %lu, "
|
||||
"args[3]: %lu, args[4]: %lu, args[5]: %lu\n",
|
||||
__FUNCTION__,
|
||||
packet->req.number,
|
||||
packet->req.args[0],
|
||||
packet->req.args[1],
|
||||
packet->req.args[2],
|
||||
packet->req.args[3],
|
||||
packet->req.args[4],
|
||||
packet->req.args[5]);
|
||||
|
||||
for (; usrdata->job_pos < usrdata->num_channels; usrdata->job_pos++) {
|
||||
if (base_cpu + job_pos >= num_channels) {
|
||||
c = usrdata->channels +
|
||||
(usrdata->base_cpu + usrdata->job_pos - usrdata->num_channels);
|
||||
} else {
|
||||
c = usrdata->channels + usrdata->base_cpu + usrdata->job_pos;
|
||||
}
|
||||
if (!c) {
|
||||
continue;
|
||||
}
|
||||
if (c->param.request_va &&
|
||||
c->param.request_va->valid) {
|
||||
#endif
|
||||
c->param.request_va->valid = 0; /* ack */
|
||||
dprintk("SC #%lx, %lx\n",
|
||||
c->param.request_va->number,
|
||||
c->param.request_va->args[0]);
|
||||
register_peer_channel(usrdata, current, c);
|
||||
if (__do_in_kernel_syscall(os, c, c->param.request_va)) {
|
||||
if (copy_to_user(&req->sr, c->param.request_va,
|
||||
sizeof(struct syscall_request))) {
|
||||
deregister_peer_channel(usrdata, current, c);
|
||||
return -EFAULT;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
deregister_peer_channel(usrdata, current, c);
|
||||
#ifdef DO_USER_MODE
|
||||
goto retry;
|
||||
#endif
|
||||
#ifndef DO_USER_MODE
|
||||
if (usrdata->mcctrl_dma_abort) {
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
}
|
||||
usrdata->remaining_job = 0;
|
||||
if (mcctrl_add_per_thread_data(ppd, current, packet) < 0) {
|
||||
kprintf("%s: error adding per-thread data\n", __FUNCTION__);
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
return 0;
|
||||
|
||||
if (__do_in_kernel_syscall(os, packet)) {
|
||||
if (copy_to_user(&req->sr, &packet->req,
|
||||
sizeof(struct syscall_request))) {
|
||||
|
||||
if (mcctrl_delete_per_thread_data(ppd, current) < 0) {
|
||||
kprintf("%s: error deleting per-thread data\n", __FUNCTION__);
|
||||
return -EINVAL;
|
||||
}
|
||||
return -EFAULT;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
|
||||
(usrdata->channels + packet->ref)->c);
|
||||
|
||||
if (mcctrl_delete_per_thread_data(ppd, current) < 0) {
|
||||
kprintf("%s: error deleting per-thread data\n", __FUNCTION__);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
goto retry;
|
||||
}
|
||||
|
||||
long mcexec_pin_region(ihk_os_t os, unsigned long *__user arg)
|
||||
@ -671,113 +780,72 @@ long mcexec_load_syscall(ihk_os_t os, struct syscall_load_desc *__user arg)
|
||||
|
||||
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, desc.size);
|
||||
|
||||
/*
|
||||
ihk_dma_channel_t channel;
|
||||
struct ihk_dma_request request;
|
||||
unsigned long dma_status = 0;
|
||||
|
||||
channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0);
|
||||
if (!channel) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
memset(&request, 0, sizeof(request));
|
||||
request.src_os = os;
|
||||
request.src_phys = desc.src;
|
||||
request.dest_os = NULL;
|
||||
request.dest_phys = desc.dest;
|
||||
request.size = desc.size;
|
||||
request.notify = (void *)virt_to_phys(&dma_status);
|
||||
request.priv = (void *)1;
|
||||
|
||||
ihk_dma_request(channel, &request);
|
||||
|
||||
while (!dma_status) {
|
||||
mb();
|
||||
udelay(1);
|
||||
}
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
|
||||
{
|
||||
struct syscall_ret_desc ret;
|
||||
struct mcctrl_channel *mc;
|
||||
struct ikc_scd_packet *packet;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
#if 0
|
||||
ihk_dma_channel_t channel;
|
||||
struct ihk_dma_request request;
|
||||
|
||||
channel = ihk_device_get_dma_channel(ihk_os_to_dev(os), 0);
|
||||
if (!channel) {
|
||||
return -EINVAL;
|
||||
}
|
||||
#endif
|
||||
struct mcctrl_per_proc_data *ppd;
|
||||
|
||||
if (copy_from_user(&ret, arg, sizeof(struct syscall_ret_desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
mc = usrdata->channels + ret.cpu;
|
||||
if (!mc) {
|
||||
|
||||
/* Look up per-process structure */
|
||||
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
|
||||
if (!ppd) {
|
||||
kprintf("%s: ERROR: no per-process structure for PID %d??\n",
|
||||
__FUNCTION__, task_tgid_vnr(current));
|
||||
return -EINVAL;
|
||||
}
|
||||
deregister_peer_channel(usrdata, current, mc);
|
||||
|
||||
mc->param.response_va->ret = ret.ret;
|
||||
packet = (struct ikc_scd_packet *)mcctrl_get_per_thread_data(ppd, current);
|
||||
if (!packet) {
|
||||
kprintf("%s: ERROR: no packet registered for TID %d\n",
|
||||
__FUNCTION__, task_pid_vnr(current));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
mcctrl_delete_per_thread_data(ppd, current);
|
||||
|
||||
if (ret.size > 0) {
|
||||
/* Host => Accel. Write is fast. */
|
||||
unsigned long phys;
|
||||
void *rpm;
|
||||
|
||||
phys = ihk_device_map_memory(ihk_os_to_dev(os), ret.dest,
|
||||
ret.size);
|
||||
phys = ihk_device_map_memory(ihk_os_to_dev(os), ret.dest, ret.size);
|
||||
#ifdef CONFIG_MIC
|
||||
rpm = ioremap_wc(phys, ret.size);
|
||||
#else
|
||||
rpm = ihk_device_map_virtual(ihk_os_to_dev(os), phys,
|
||||
ret.size, NULL, 0);
|
||||
#endif
|
||||
|
||||
if (copy_from_user(rpm, (void *__user)ret.src, ret.size)) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
mb();
|
||||
mc->param.response_va->status = 1;
|
||||
|
||||
#ifdef CONFIG_MIC
|
||||
iounmap(rpm);
|
||||
#else
|
||||
ihk_device_unmap_virtual(ihk_os_to_dev(os), rpm, ret.size);
|
||||
#endif
|
||||
ihk_device_unmap_memory(ihk_os_to_dev(os), phys, ret.size);
|
||||
|
||||
/*
|
||||
memset(&request, 0, sizeof(request));
|
||||
request.src_os = NULL;
|
||||
request.src_phys = ret.src;
|
||||
request.dest_os = os;
|
||||
request.dest_phys = ret.dest;
|
||||
request.size = ret.size;
|
||||
request.notify_os = os;
|
||||
request.notify = (void *)mc->param.response_rpa;
|
||||
request.priv = (void *)1;
|
||||
|
||||
ihk_dma_request(channel, &request);
|
||||
*/
|
||||
} else {
|
||||
mb();
|
||||
mc->param.response_va->status = 1;
|
||||
}
|
||||
|
||||
__return_syscall(os, packet, ret.ret, task_pid_vnr(current));
|
||||
|
||||
/* Free packet */
|
||||
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet,
|
||||
(usrdata->channels + packet->ref)->c);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIST_HEAD(mckernel_exec_files);
|
||||
DEFINE_SPINLOCK(mckernel_exec_file_lock);
|
||||
DEFINE_SEMAPHORE(mckernel_exec_file_lock);
|
||||
|
||||
|
||||
struct mckernel_exec_file {
|
||||
@ -834,11 +902,67 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename)
|
||||
struct mckernel_exec_file *mcef;
|
||||
struct mckernel_exec_file *mcef_iter;
|
||||
int retval;
|
||||
int os_ind = ihk_host_os_get_index(os);
|
||||
char *pathbuf, *fullpath;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
struct mcctrl_per_proc_data *ppd = NULL;
|
||||
int i;
|
||||
|
||||
if (os_ind < 0) {
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
|
||||
|
||||
if (!ppd) {
|
||||
ppd = kmalloc(sizeof(*ppd), GFP_KERNEL);
|
||||
if (!ppd) {
|
||||
printk("ERROR: allocating per process data\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
ppd->pid = task_tgid_vnr(current);
|
||||
/*
|
||||
* XXX: rpgtable will be updated in __do_in_kernel_syscall()
|
||||
* under case __NR_munmap
|
||||
*/
|
||||
INIT_LIST_HEAD(&ppd->wq_list);
|
||||
INIT_LIST_HEAD(&ppd->wq_req_list);
|
||||
INIT_LIST_HEAD(&ppd->wq_list_exact);
|
||||
spin_lock_init(&ppd->wq_list_lock);
|
||||
|
||||
for (i = 0; i < MCCTRL_PER_THREAD_DATA_HASH_SIZE; ++i) {
|
||||
INIT_LIST_HEAD(&ppd->per_thread_data_hash[i]);
|
||||
rwlock_init(&ppd->per_thread_data_hash_lock[i]);
|
||||
}
|
||||
|
||||
if (mcctrl_add_per_proc_data(usrdata, ppd->pid, ppd) < 0) {
|
||||
printk("%s: error adding per process data\n", __FUNCTION__);
|
||||
retval = EINVAL;
|
||||
goto out_free_ppd;
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Only deallocate in case of an error if we added it above */
|
||||
ppd = NULL;
|
||||
}
|
||||
|
||||
pathbuf = kmalloc(PATH_MAX, GFP_TEMPORARY);
|
||||
if (!pathbuf) {
|
||||
retval = ENOMEM;
|
||||
goto out_error_drop_ppd;
|
||||
}
|
||||
|
||||
file = open_exec(filename);
|
||||
retval = PTR_ERR(file);
|
||||
if (IS_ERR(file)) {
|
||||
goto out_return;
|
||||
goto out_error_free;
|
||||
}
|
||||
|
||||
fullpath = d_path(&file->f_path, pathbuf, PATH_MAX);
|
||||
if (IS_ERR(fullpath)) {
|
||||
retval = PTR_ERR(fullpath);
|
||||
goto out_error_free;
|
||||
}
|
||||
|
||||
mcef = kmalloc(sizeof(*mcef), GFP_KERNEL);
|
||||
@ -847,33 +971,43 @@ int mcexec_open_exec(ihk_os_t os, char * __user filename)
|
||||
goto out_put_file;
|
||||
}
|
||||
|
||||
spin_lock_irq(&mckernel_exec_file_lock);
|
||||
down(&mckernel_exec_file_lock);
|
||||
/* Find previous file (if exists) and drop it */
|
||||
list_for_each_entry(mcef_iter, &mckernel_exec_files, list) {
|
||||
if (mcef_iter->os == os && mcef_iter->pid == current->tgid) {
|
||||
if (mcef_iter->os == os && mcef_iter->pid == task_tgid_vnr(current)) {
|
||||
allow_write_access(mcef_iter->fp);
|
||||
fput(mcef_iter->fp);
|
||||
list_del(&mcef_iter->list);
|
||||
kfree(mcef_iter);
|
||||
dprintk("%d open_exec dropped previous executable \n", (int)current->tgid);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* Add new exec file to the list */
|
||||
mcef->os = os;
|
||||
mcef->pid = current->tgid;
|
||||
mcef->pid = task_tgid_vnr(current);
|
||||
mcef->fp = file;
|
||||
list_add_tail(&mcef->list, &mckernel_exec_files);
|
||||
spin_unlock(&mckernel_exec_file_lock);
|
||||
|
||||
dprintk("%d open_exec and holding file: %s\n", (int)current->tgid, filename);
|
||||
/* Create /proc/self/exe entry */
|
||||
add_pid_entry(os_ind, task_tgid_vnr(current));
|
||||
proc_exe_link(os_ind, task_tgid_vnr(current), fullpath);
|
||||
up(&mckernel_exec_file_lock);
|
||||
|
||||
dprintk("%d open_exec and holding file: %s\n", (int)task_tgid_vnr(current), filename);
|
||||
|
||||
kfree(pathbuf);
|
||||
|
||||
return 0;
|
||||
|
||||
out_put_file:
|
||||
fput(file);
|
||||
|
||||
out_return:
|
||||
out_error_free:
|
||||
kfree(pathbuf);
|
||||
out_error_drop_ppd:
|
||||
if (ppd) mcctrl_delete_per_proc_data(usrdata, ppd->pid);
|
||||
out_free_ppd:
|
||||
if (ppd) kfree(ppd);
|
||||
return -retval;
|
||||
}
|
||||
|
||||
@ -882,20 +1016,43 @@ int mcexec_close_exec(ihk_os_t os)
|
||||
{
|
||||
struct mckernel_exec_file *mcef = NULL;
|
||||
int found = 0;
|
||||
int os_ind = ihk_host_os_get_index(os);
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
|
||||
struct mcctrl_per_proc_data *ppd = NULL;
|
||||
|
||||
spin_lock_irq(&mckernel_exec_file_lock);
|
||||
ppd = mcctrl_get_per_proc_data(usrdata, task_tgid_vnr(current));
|
||||
|
||||
if (ppd) {
|
||||
mcctrl_delete_per_proc_data(usrdata, ppd->pid);
|
||||
|
||||
dprintk("pid: %d, tid: %d: rpgtable for %d (0x%lx) removed\n",
|
||||
task_tgid_vnr(current), current->pid, ppd->pid, ppd->rpgtable);
|
||||
|
||||
kfree(ppd);
|
||||
}
|
||||
else {
|
||||
printk("WARNING: no per process data for pid %d ?\n",
|
||||
task_tgid_vnr(current));
|
||||
}
|
||||
|
||||
if (os_ind < 0) {
|
||||
return EINVAL;
|
||||
}
|
||||
|
||||
down(&mckernel_exec_file_lock);
|
||||
list_for_each_entry(mcef, &mckernel_exec_files, list) {
|
||||
if (mcef->os == os && mcef->pid == current->tgid) {
|
||||
if (mcef->os == os && mcef->pid == task_tgid_vnr(current)) {
|
||||
allow_write_access(mcef->fp);
|
||||
fput(mcef->fp);
|
||||
list_del(&mcef->list);
|
||||
kfree(mcef);
|
||||
found = 1;
|
||||
dprintk("%d close_exec dropped executable \n", (int)current->tgid);
|
||||
dprintk("%d close_exec dropped executable \n", (int)task_tgid_vnr(current));
|
||||
break;
|
||||
}
|
||||
}
|
||||
spin_unlock(&mckernel_exec_file_lock);
|
||||
|
||||
up(&mckernel_exec_file_lock);
|
||||
|
||||
return (found ? 0 : EINVAL);
|
||||
}
|
||||
@ -952,6 +1109,67 @@ long mcexec_strncpy_from_user(ihk_os_t os, struct strncpy_from_user_desc * __use
|
||||
return 0;
|
||||
}
|
||||
|
||||
long mcexec_sys_mount(struct sys_mount_desc *__user arg)
|
||||
{
|
||||
struct sys_mount_desc desc;
|
||||
struct cred *promoted;
|
||||
const struct cred *original;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&desc, arg, sizeof(desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
promoted = prepare_creds();
|
||||
if (!promoted) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
|
||||
original = override_creds(promoted);
|
||||
|
||||
#if MCCTRL_KSYM_sys_mount
|
||||
ret = mcctrl_sys_mount(desc.dev_name, desc.dir_name, desc.type,
|
||||
desc.flags, desc.data);
|
||||
#else
|
||||
ret = -EFAULT;
|
||||
#endif
|
||||
|
||||
revert_creds(original);
|
||||
put_cred(promoted);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long mcexec_sys_unshare(struct sys_unshare_desc *__user arg)
|
||||
{
|
||||
struct sys_unshare_desc desc;
|
||||
struct cred *promoted;
|
||||
const struct cred *original;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&desc, arg, sizeof(desc))) {
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
promoted = prepare_creds();
|
||||
if (!promoted) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
cap_raise(promoted->cap_effective, CAP_SYS_ADMIN);
|
||||
original = override_creds(promoted);
|
||||
|
||||
#if MCCTRL_KSYM_sys_unshare
|
||||
ret = mcctrl_sys_unshare(desc.unshare_flags);
|
||||
#else
|
||||
ret = -EFAULT;
|
||||
#endif
|
||||
|
||||
revert_creds(original);
|
||||
put_cred(promoted);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
|
||||
struct file *file)
|
||||
{
|
||||
@ -1006,6 +1224,12 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
|
||||
case MCEXEC_UP_GET_CREDV:
|
||||
return mcexec_getcredv((int *)arg);
|
||||
|
||||
case MCEXEC_UP_SYS_MOUNT:
|
||||
return mcexec_sys_mount((struct sys_mount_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_SYS_UNSHARE:
|
||||
return mcexec_sys_unshare((struct sys_unshare_desc *)arg);
|
||||
|
||||
case MCEXEC_UP_DEBUG_LOG:
|
||||
return mcexec_debug_log(os, arg);
|
||||
}
|
||||
@ -25,6 +25,7 @@
|
||||
#include <linux/fs.h>
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/device.h>
|
||||
#include "mcctrl.h"
|
||||
|
||||
#define OS_MAX_MINOR 64
|
||||
@ -67,6 +68,8 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
|
||||
{ .request = MCEXEC_UP_CLOSE_EXEC, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SYS_MOUNT, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_SYS_UNSHARE, .func = mcctrl_ioctl },
|
||||
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
|
||||
};
|
||||
|
||||
@ -79,71 +82,109 @@ static struct ihk_os_user_call mcctrl_uc[OS_MAX_MINOR];
|
||||
|
||||
static ihk_os_t os[OS_MAX_MINOR];
|
||||
|
||||
static int __init mcctrl_init(void)
|
||||
ihk_os_t osnum_to_os(int n)
|
||||
{
|
||||
return os[n];
|
||||
}
|
||||
|
||||
/* OS event notifier implementation */
|
||||
int mcctrl_os_boot_notifier(int os_index)
|
||||
{
|
||||
int i;
|
||||
int rc;
|
||||
|
||||
rc = -ENOENT;
|
||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
||||
os[i] = ihk_host_find_os(i, NULL);
|
||||
if (os[i]) {
|
||||
printk("OS #%d found.\n", i);
|
||||
rc = 0;
|
||||
}
|
||||
}
|
||||
if(rc){
|
||||
printk("OS not found.\n");
|
||||
return rc;
|
||||
os[os_index] = ihk_host_find_os(os_index, NULL);
|
||||
if (!os[os_index]) {
|
||||
printk("mcctrl: error: OS ID %d couldn't be found\n", os_index);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
||||
if (os[i]) {
|
||||
if (prepare_ikc_channels(os[i]) != 0) {
|
||||
printk("Preparing syscall channels failed.\n");
|
||||
os[i] = NULL;
|
||||
}
|
||||
}
|
||||
if (prepare_ikc_channels(os[os_index]) != 0) {
|
||||
printk("mcctrl: error: preparing IKC channels for OS %d\n", os_index);
|
||||
|
||||
os[os_index] = NULL;
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
memcpy(mcctrl_uc + os_index, &mcctrl_uc_proto, sizeof mcctrl_uc_proto);
|
||||
|
||||
rc = ihk_os_register_user_call_handlers(os[os_index], mcctrl_uc + os_index);
|
||||
if (rc < 0) {
|
||||
destroy_ikc_channels(os[os_index]);
|
||||
printk("mcctrl: error: registering callbacks for OS %d\n", os_index);
|
||||
|
||||
goto error_cleanup_channels;
|
||||
}
|
||||
|
||||
procfs_init(os_index);
|
||||
printk("mcctrl: OS ID %d boot event handled\n", os_index);
|
||||
|
||||
return 0;
|
||||
|
||||
error_cleanup_channels:
|
||||
destroy_ikc_channels(os[os_index]);
|
||||
|
||||
os[os_index] = NULL;
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mcctrl_os_shutdown_notifier(int os_index)
|
||||
{
|
||||
sysfsm_cleanup(os[os_index]);
|
||||
free_topology_info(os[os_index]);
|
||||
ihk_os_unregister_user_call_handlers(os[os_index], mcctrl_uc + os_index);
|
||||
destroy_ikc_channels(os[os_index]);
|
||||
procfs_exit(os_index);
|
||||
|
||||
printk("mcctrl: OS ID %d shutdown event handled\n", os_index);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct ihk_os_notifier_ops mcctrl_os_notifier_ops = {
|
||||
.boot = mcctrl_os_boot_notifier,
|
||||
.shutdown = mcctrl_os_shutdown_notifier,
|
||||
};
|
||||
|
||||
static struct ihk_os_notifier mcctrl_os_notifier = {
|
||||
.ops = &mcctrl_os_notifier_ops,
|
||||
};
|
||||
|
||||
static int __init mcctrl_init(void)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
#ifndef DO_USER_MODE
|
||||
mcctrl_syscall_init();
|
||||
#endif
|
||||
|
||||
rus_page_hash_init();
|
||||
|
||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
||||
if (os[i]) {
|
||||
memcpy(mcctrl_uc + i, &mcctrl_uc_proto, sizeof mcctrl_uc_proto);
|
||||
rc = ihk_os_register_user_call_handlers(os[i], mcctrl_uc + i);
|
||||
if(rc < 0){
|
||||
destroy_ikc_channels(os[i]);
|
||||
os[i] = NULL;
|
||||
}
|
||||
procfs_init(i);
|
||||
}
|
||||
}
|
||||
|
||||
binfmt_mcexec_init();
|
||||
|
||||
return 0;
|
||||
if ((ret = ihk_host_register_os_notifier(&mcctrl_os_notifier)) != 0) {
|
||||
printk("mcctrl: error: registering OS notifier\n");
|
||||
goto error;
|
||||
}
|
||||
|
||||
printk("mcctrl: initialized successfully.\n");
|
||||
return ret;
|
||||
|
||||
error:
|
||||
binfmt_mcexec_exit();
|
||||
rus_page_hash_put_pages();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void __exit mcctrl_exit(void)
|
||||
{
|
||||
int i;
|
||||
|
||||
binfmt_mcexec_exit();
|
||||
printk("mcctrl: unregistered.\n");
|
||||
for(i = 0; i < OS_MAX_MINOR; i++){
|
||||
if(os[i]){
|
||||
ihk_os_unregister_user_call_handlers(os[i], mcctrl_uc + i);
|
||||
destroy_ikc_channels(os[i]);
|
||||
procfs_exit(i);
|
||||
}
|
||||
if (ihk_host_deregister_os_notifier(&mcctrl_os_notifier) != 0) {
|
||||
printk("mcctrl: warning: failed to deregister OS notifier??\n");
|
||||
}
|
||||
|
||||
binfmt_mcexec_exit();
|
||||
rus_page_hash_put_pages();
|
||||
|
||||
printk("mcctrl: unregistered.\n");
|
||||
}
|
||||
|
||||
MODULE_LICENSE("GPL v2");
|
||||
@ -27,6 +27,7 @@
|
||||
#include <linux/miscdevice.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include "mcctrl.h"
|
||||
#ifdef ATTACHED_MIC
|
||||
#include <sysdeps/mic/mic/micconst.h>
|
||||
@ -40,19 +41,18 @@
|
||||
|
||||
void mcexec_prepare_ack(ihk_os_t os, unsigned long arg, int err);
|
||||
static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ihk_ikc_channel_desc *c);
|
||||
int mcexec_syscall(struct mcctrl_channel *c, int pid, unsigned long arg);
|
||||
void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg);
|
||||
void procfs_delete(void *__os, int osnum, unsigned long arg);
|
||||
void procfs_answer(unsigned long arg, int err);
|
||||
int mcexec_syscall(struct mcctrl_usrdata *ud, struct ikc_scd_packet *packet);
|
||||
void sig_done(unsigned long arg, int err);
|
||||
|
||||
/* XXX: this runs in atomic context! */
|
||||
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
void *__packet, void *__os)
|
||||
{
|
||||
struct ikc_scd_packet *pisp = __packet;
|
||||
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(__os);
|
||||
int msg = pisp->msg;
|
||||
|
||||
switch (pisp->msg) {
|
||||
switch (msg) {
|
||||
case SCD_MSG_INIT_CHANNEL:
|
||||
mcctrl_ikc_init(__os, pisp->ref, pisp->arg, c);
|
||||
break;
|
||||
@ -66,15 +66,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
break;
|
||||
|
||||
case SCD_MSG_SYSCALL_ONESIDE:
|
||||
mcexec_syscall(usrdata->channels + pisp->ref, pisp->pid, pisp->arg);
|
||||
break;
|
||||
|
||||
case SCD_MSG_PROCFS_CREATE:
|
||||
procfs_create(__os, pisp->ref, pisp->osnum, pisp->pid, pisp->arg);
|
||||
break;
|
||||
|
||||
case SCD_MSG_PROCFS_DELETE:
|
||||
procfs_delete(__os, pisp->osnum, pisp->arg);
|
||||
mcexec_syscall(usrdata, pisp);
|
||||
break;
|
||||
|
||||
case SCD_MSG_PROCFS_ANSWER:
|
||||
@ -84,6 +76,47 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
case SCD_MSG_SEND_SIGNAL:
|
||||
sig_done(pisp->arg, pisp->err);
|
||||
break;
|
||||
|
||||
case SCD_MSG_SYSFS_REQ_CREATE:
|
||||
case SCD_MSG_SYSFS_REQ_MKDIR:
|
||||
case SCD_MSG_SYSFS_REQ_SYMLINK:
|
||||
case SCD_MSG_SYSFS_REQ_LOOKUP:
|
||||
case SCD_MSG_SYSFS_REQ_UNLINK:
|
||||
case SCD_MSG_SYSFS_REQ_SETUP:
|
||||
case SCD_MSG_SYSFS_RESP_SHOW:
|
||||
case SCD_MSG_SYSFS_RESP_STORE:
|
||||
case SCD_MSG_SYSFS_RESP_RELEASE:
|
||||
sysfsm_packet_handler(__os, pisp->msg, pisp->err,
|
||||
pisp->sysfs_arg1, pisp->sysfs_arg2);
|
||||
break;
|
||||
|
||||
case SCD_MSG_PROCFS_TID_CREATE:
|
||||
case SCD_MSG_PROCFS_TID_DELETE:
|
||||
procfsm_packet_handler(__os, pisp->msg, pisp->pid, pisp->arg);
|
||||
break;
|
||||
|
||||
case SCD_MSG_GET_VDSO_INFO:
|
||||
get_vdso_info(__os, pisp->arg);
|
||||
break;
|
||||
|
||||
case SCD_MSG_REPLY_GET_CPU_MAPPING:
|
||||
reply_get_cpu_mapping(pisp->arg);
|
||||
break;
|
||||
|
||||
default:
|
||||
printk(KERN_ERR "mcctrl:syscall_packet_handler:"
|
||||
"unknown message (%d.%d.%d.%d.%d.%#lx)\n",
|
||||
pisp->msg, pisp->ref, pisp->osnum, pisp->pid,
|
||||
pisp->err, pisp->arg);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* SCD_MSG_SYSCALL_ONESIDE holds the packet and frees is it
|
||||
* mcexec_ret_syscall(), for the rest, free it here.
|
||||
*/
|
||||
if (msg != SCD_MSG_SYSCALL_ONESIDE) {
|
||||
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)__packet, c);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
@ -121,8 +154,6 @@ int mcctrl_ikc_set_recv_cpu(ihk_os_t os, int cpu)
|
||||
|
||||
ihk_ikc_channel_set_cpu(usrdata->channels[cpu].c,
|
||||
ihk_ikc_get_processor_id());
|
||||
kprintf("Setting the target to %d\n",
|
||||
ihk_ikc_get_processor_id());
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -168,12 +199,13 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih
|
||||
#endif
|
||||
|
||||
pmc->param.request_va =
|
||||
(void *)__get_free_pages(GFP_KERNEL,
|
||||
(void *)__get_free_pages(in_interrupt() ? GFP_ATOMIC : GFP_KERNEL,
|
||||
REQUEST_SHIFT - PAGE_SHIFT);
|
||||
pmc->param.request_pa = virt_to_phys(pmc->param.request_va);
|
||||
pmc->param.doorbell_va = usrdata->mcctrl_doorbell_va;
|
||||
pmc->param.doorbell_pa = usrdata->mcctrl_doorbell_pa;
|
||||
pmc->param.post_va = (void *)__get_free_page(GFP_KERNEL);
|
||||
pmc->param.post_va = (void *)__get_free_page(in_interrupt() ?
|
||||
GFP_ATOMIC : GFP_KERNEL);
|
||||
pmc->param.post_pa = virt_to_phys(pmc->param.post_va);
|
||||
memset(pmc->param.doorbell_va, 0, PAGE_SIZE);
|
||||
memset(pmc->param.request_va, 0, PAGE_SIZE);
|
||||
@ -193,8 +225,9 @@ static void mcctrl_ikc_init(ihk_os_t os, int cpu, unsigned long rphys, struct ih
|
||||
PAGE_SIZE, NULL, 0);
|
||||
#endif
|
||||
|
||||
pmc->dma_buf = (void *)__get_free_pages(GFP_KERNEL,
|
||||
DMA_PIN_SHIFT - PAGE_SHIFT);
|
||||
pmc->dma_buf = (void *)__get_free_pages(in_interrupt() ?
|
||||
GFP_ATOMIC : GFP_KERNEL,
|
||||
DMA_PIN_SHIFT - PAGE_SHIFT);
|
||||
|
||||
rpm->request_page = pmc->param.request_pa;
|
||||
rpm->doorbell_page = pmc->param.doorbell_pa;
|
||||
@ -240,9 +273,6 @@ static int connect_handler(struct ihk_ikc_channel_info *param)
|
||||
}
|
||||
param->packet_handler = syscall_packet_handler;
|
||||
|
||||
INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list);
|
||||
spin_lock_init(&usrdata->channels[cpu].wq_list_lock);
|
||||
|
||||
usrdata->channels[cpu].c = c;
|
||||
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
|
||||
|
||||
@ -261,9 +291,6 @@ static int connect_handler2(struct ihk_ikc_channel_info *param)
|
||||
|
||||
param->packet_handler = syscall_packet_handler;
|
||||
|
||||
INIT_LIST_HEAD(&usrdata->channels[cpu].wq_list);
|
||||
spin_lock_init(&usrdata->channels[cpu].wq_list_lock);
|
||||
|
||||
usrdata->channels[cpu].c = c;
|
||||
kprintf("syscall: MC CPU %d connected. c=%p\n", cpu, c);
|
||||
|
||||
@ -290,7 +317,7 @@ int prepare_ikc_channels(ihk_os_t os)
|
||||
{
|
||||
struct ihk_cpu_info *info;
|
||||
struct mcctrl_usrdata *usrdata;
|
||||
int error;
|
||||
int i;
|
||||
|
||||
usrdata = kzalloc(sizeof(struct mcctrl_usrdata), GFP_KERNEL);
|
||||
usrdata->mcctrl_doorbell_va = (void *)__get_free_page(GFP_KERNEL);
|
||||
@ -322,14 +349,14 @@ int prepare_ikc_channels(ihk_os_t os)
|
||||
memcpy(&usrdata->listen_param2, &listen_param2, sizeof listen_param2);
|
||||
ihk_ikc_listen_port(os, &usrdata->listen_param2);
|
||||
|
||||
INIT_LIST_HEAD(&usrdata->per_proc_list);
|
||||
spin_lock_init(&usrdata->per_proc_list_lock);
|
||||
|
||||
error = init_peer_channel_registry(usrdata);
|
||||
if (error) {
|
||||
return error;
|
||||
for (i = 0; i < MCCTRL_PER_PROC_DATA_HASH_SIZE; ++i) {
|
||||
INIT_LIST_HEAD(&usrdata->per_proc_data_hash[i]);
|
||||
rwlock_init(&usrdata->per_proc_data_hash_lock[i]);
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&usrdata->cpu_topology_list);
|
||||
INIT_LIST_HEAD(&usrdata->node_topology_list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
393
executer/kernel/mcctrl/mcctrl.h
Normal file
393
executer/kernel/mcctrl/mcctrl.h
Normal file
@ -0,0 +1,393 @@
|
||||
/**
|
||||
* \file mcctrl.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* define data structure
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
* \author Balazs Gerofi <bgerofi@riken.jp> \par
|
||||
* Copyright (C) 2012 RIKEN AICS
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
|
||||
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2013 The University of Tokyo
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
* 2013/11/07 hamada added <sys/resource.h> which is required by getrlimit(2)
|
||||
* 2013/10/21 nakamura exclude interpreter's segment from data region
|
||||
* 2013/10/11 nakamura mcexec: add a upper limit of the stack size
|
||||
* 2013/10/11 nakamura mcexec: add a path prefix for interpreter search
|
||||
* 2013/10/11 nakamura mcexec: add a interpreter invocation
|
||||
* 2013/10/08 nakamura add a AT_ENTRY entry to the auxiliary vector
|
||||
* 2013/09/02 shirasawa add terminate thread
|
||||
* 2013/08/19 shirasawa mcexec forward signal to MIC process
|
||||
* 2013/08/07 nakamura add page fault forwarding
|
||||
* 2013/07/26 shirasawa mcexec print signum or exit status
|
||||
* 2013/07/17 nakamura create more mcexec thread so that all cpu to be serviced
|
||||
* 2013/04/17 nakamura add generic system call forwarding
|
||||
*/
|
||||
#ifndef HEADER_MCCTRL_H
|
||||
#define HEADER_MCCTRL_H
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <ihk/ihk_host_driver.h>
|
||||
#include <linux/resource.h>
|
||||
#include <uprotocol.h>
|
||||
#include <linux/wait.h>
|
||||
#include <ihk/ikc.h>
|
||||
#include <ikc/master.h>
|
||||
#include <ihk/msr.h>
|
||||
#include <linux/semaphore.h>
|
||||
#include <linux/rwlock.h>
|
||||
#include <linux/threads.h>
|
||||
#include "sysfs.h"
|
||||
|
||||
#define SCD_MSG_PREPARE_PROCESS 0x1
|
||||
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
|
||||
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
|
||||
#define SCD_MSG_SCHEDULE_PROCESS 0x3
|
||||
#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14
|
||||
|
||||
#define SCD_MSG_INIT_CHANNEL 0x5
|
||||
#define SCD_MSG_INIT_CHANNEL_ACKED 0x6
|
||||
|
||||
#define SCD_MSG_SYSCALL_ONESIDE 0x4
|
||||
#define SCD_MSG_SEND_SIGNAL 0x8
|
||||
#define SCD_MSG_CLEANUP_PROCESS 0x9
|
||||
#define SCD_MSG_GET_VDSO_INFO 0xa
|
||||
|
||||
#define SCD_MSG_GET_CPU_MAPPING 0xc
|
||||
#define SCD_MSG_REPLY_GET_CPU_MAPPING 0xd
|
||||
|
||||
#define SCD_MSG_PROCFS_CREATE 0x10
|
||||
#define SCD_MSG_PROCFS_DELETE 0x11
|
||||
#define SCD_MSG_PROCFS_REQUEST 0x12
|
||||
#define SCD_MSG_PROCFS_ANSWER 0x13
|
||||
|
||||
#define SCD_MSG_DEBUG_LOG 0x20
|
||||
|
||||
#define SCD_MSG_SYSFS_REQ_CREATE 0x30
|
||||
/* #define SCD_MSG_SYSFS_RESP_CREATE 0x31 */
|
||||
#define SCD_MSG_SYSFS_REQ_MKDIR 0x32
|
||||
/* #define SCD_MSG_SYSFS_RESP_MKDIR 0x33 */
|
||||
#define SCD_MSG_SYSFS_REQ_SYMLINK 0x34
|
||||
/* #define SCD_MSG_SYSFS_RESP_SYMLINK 0x35 */
|
||||
#define SCD_MSG_SYSFS_REQ_LOOKUP 0x36
|
||||
/* #define SCD_MSG_SYSFS_RESP_LOOKUP 0x37 */
|
||||
#define SCD_MSG_SYSFS_REQ_UNLINK 0x38
|
||||
/* #define SCD_MSG_SYSFS_RESP_UNLINK 0x39 */
|
||||
#define SCD_MSG_SYSFS_REQ_SHOW 0x3a
|
||||
#define SCD_MSG_SYSFS_RESP_SHOW 0x3b
|
||||
#define SCD_MSG_SYSFS_REQ_STORE 0x3c
|
||||
#define SCD_MSG_SYSFS_RESP_STORE 0x3d
|
||||
#define SCD_MSG_SYSFS_REQ_RELEASE 0x3e
|
||||
#define SCD_MSG_SYSFS_RESP_RELEASE 0x3f
|
||||
#define SCD_MSG_SYSFS_REQ_SETUP 0x40
|
||||
#define SCD_MSG_SYSFS_RESP_SETUP 0x41
|
||||
/* #define SCD_MSG_SYSFS_REQ_CLEANUP 0x42 */
|
||||
/* #define SCD_MSG_SYSFS_RESP_CLEANUP 0x43 */
|
||||
#define SCD_MSG_PROCFS_TID_CREATE 0x44
|
||||
#define SCD_MSG_PROCFS_TID_DELETE 0x45
|
||||
|
||||
#define DMA_PIN_SHIFT 21
|
||||
|
||||
#define DO_USER_MODE
|
||||
|
||||
#define __NR_coredump 999
|
||||
|
||||
struct coretable {
|
||||
int len;
|
||||
unsigned long addr;
|
||||
};
|
||||
|
||||
struct ikc_scd_packet {
|
||||
int msg;
|
||||
int err;
|
||||
union {
|
||||
/* for traditional SCD_MSG_* */
|
||||
struct {
|
||||
int ref;
|
||||
int osnum;
|
||||
int pid;
|
||||
unsigned long arg;
|
||||
struct syscall_request req;
|
||||
unsigned long resp_pa;
|
||||
};
|
||||
|
||||
/* for SCD_MSG_SYSFS_* */
|
||||
struct {
|
||||
long sysfs_arg1;
|
||||
long sysfs_arg2;
|
||||
long sysfs_arg3;
|
||||
};
|
||||
|
||||
/* SCD_MSG_SCHEDULE_THREAD */
|
||||
struct {
|
||||
int ttid;
|
||||
};
|
||||
};
|
||||
char padding[12];
|
||||
};
|
||||
|
||||
struct mcctrl_priv {
|
||||
ihk_os_t os;
|
||||
struct program_load_desc *desc;
|
||||
};
|
||||
|
||||
struct ikc_scd_init_param {
|
||||
unsigned long request_page;
|
||||
unsigned long response_page;
|
||||
unsigned long doorbell_page;
|
||||
unsigned long post_page;
|
||||
};
|
||||
|
||||
struct syscall_post {
|
||||
unsigned long v[8];
|
||||
};
|
||||
|
||||
struct syscall_params {
|
||||
unsigned long request_pa;
|
||||
struct syscall_request *request_va;
|
||||
unsigned long response_rpa, response_pa;
|
||||
struct syscall_response *response_va;
|
||||
unsigned long post_pa;
|
||||
struct syscall_post *post_va;
|
||||
|
||||
unsigned long doorbell_pa;
|
||||
unsigned long *doorbell_va;
|
||||
};
|
||||
|
||||
struct wait_queue_head_list_node {
|
||||
struct list_head list;
|
||||
wait_queue_head_t wq_syscall;
|
||||
struct task_struct *task;
|
||||
/* Denotes an exclusive wait for requester TID rtid */
|
||||
int rtid;
|
||||
int req;
|
||||
struct ikc_scd_packet *packet;
|
||||
};
|
||||
|
||||
struct mcctrl_channel {
|
||||
struct ihk_ikc_channel_desc *c;
|
||||
struct syscall_params param;
|
||||
struct ikc_scd_init_param init;
|
||||
void *dma_buf;
|
||||
};
|
||||
|
||||
struct mcctrl_per_thread_data {
|
||||
struct list_head hash;
|
||||
struct task_struct *task;
|
||||
void *data;
|
||||
};
|
||||
|
||||
#define MCCTRL_PER_THREAD_DATA_HASH_SHIFT 8
|
||||
#define MCCTRL_PER_THREAD_DATA_HASH_SIZE (1 << MCCTRL_PER_THREAD_DATA_HASH_SHIFT)
|
||||
#define MCCTRL_PER_THREAD_DATA_HASH_MASK (MCCTRL_PER_THREAD_DATA_HASH_SIZE - 1)
|
||||
|
||||
struct mcctrl_per_proc_data {
|
||||
struct list_head hash;
|
||||
int pid;
|
||||
unsigned long rpgtable; /* per process, not per OS */
|
||||
|
||||
struct list_head wq_list;
|
||||
struct list_head wq_req_list;
|
||||
struct list_head wq_list_exact;
|
||||
ihk_spinlock_t wq_list_lock;
|
||||
|
||||
struct list_head per_thread_data_hash[MCCTRL_PER_THREAD_DATA_HASH_SIZE];
|
||||
rwlock_t per_thread_data_hash_lock[MCCTRL_PER_THREAD_DATA_HASH_SIZE];
|
||||
};
|
||||
|
||||
struct sysfsm_req {
|
||||
int busy;
|
||||
int padding;
|
||||
long lresult;
|
||||
wait_queue_head_t wq;
|
||||
};
|
||||
|
||||
struct sysfsm_data {
|
||||
size_t sysfs_bufsize;
|
||||
void *sysfs_buf;
|
||||
long sysfs_buf_rpa;
|
||||
long sysfs_buf_pa;
|
||||
struct kobject *sysfs_kobj;
|
||||
struct sysfsm_node *sysfs_root;
|
||||
struct semaphore sysfs_tree_sem;
|
||||
struct semaphore sysfs_io_sem;
|
||||
struct sysfsm_req sysfs_req;
|
||||
ihk_os_t sysfs_os;
|
||||
};
|
||||
|
||||
static inline int sysfs_inited(struct sysfsm_data *sdp)
|
||||
{
|
||||
return !!(sdp->sysfs_buf);
|
||||
} /* sysfs_inited() */
|
||||
|
||||
struct cpu_mapping {
|
||||
int cpu_number;
|
||||
int hw_id;
|
||||
};
|
||||
|
||||
struct cache_topology {
|
||||
struct ihk_cache_topology *saved;
|
||||
cpumask_t shared_cpu_map;
|
||||
|
||||
struct list_head chain;
|
||||
};
|
||||
|
||||
struct cpu_topology {
|
||||
struct cpu_mapping *cpu_mapping;
|
||||
struct ihk_cpu_topology *saved;
|
||||
cpumask_t core_siblings;
|
||||
cpumask_t thread_siblings;
|
||||
|
||||
struct list_head chain;
|
||||
struct list_head cache_list;
|
||||
};
|
||||
|
||||
struct node_topology {
|
||||
struct ihk_node_topology *saved;
|
||||
cpumask_t cpumap;
|
||||
|
||||
struct list_head chain;
|
||||
};
|
||||
|
||||
#define CPU_LONGS (((NR_CPUS) + (BITS_PER_LONG) - 1) / (BITS_PER_LONG))
|
||||
|
||||
#define MCCTRL_PER_PROC_DATA_HASH_SHIFT 7
|
||||
#define MCCTRL_PER_PROC_DATA_HASH_SIZE (1 << MCCTRL_PER_PROC_DATA_HASH_SHIFT)
|
||||
#define MCCTRL_PER_PROC_DATA_HASH_MASK (MCCTRL_PER_PROC_DATA_HASH_SIZE - 1)
|
||||
|
||||
struct mcctrl_usrdata {
|
||||
struct ihk_ikc_listen_param listen_param;
|
||||
struct ihk_ikc_listen_param listen_param2;
|
||||
ihk_os_t os;
|
||||
int num_channels;
|
||||
struct mcctrl_channel *channels;
|
||||
unsigned long *mcctrl_doorbell_va;
|
||||
unsigned long mcctrl_doorbell_pa;
|
||||
int remaining_job;
|
||||
int base_cpu;
|
||||
int job_pos;
|
||||
int mcctrl_dma_abort;
|
||||
unsigned long last_thread_exec;
|
||||
wait_queue_head_t wq_prepare;
|
||||
|
||||
struct list_head per_proc_data_hash[MCCTRL_PER_PROC_DATA_HASH_SIZE];
|
||||
rwlock_t per_proc_data_hash_lock[MCCTRL_PER_PROC_DATA_HASH_SIZE];
|
||||
|
||||
void **keys;
|
||||
struct sysfsm_data sysfsm_data;
|
||||
unsigned long cpu_online[CPU_LONGS];
|
||||
int cpu_mapping_elems;
|
||||
int padding;
|
||||
struct cpu_mapping *cpu_mapping;
|
||||
long cpu_mapping_pa;
|
||||
struct list_head cpu_topology_list;
|
||||
struct list_head node_topology_list;
|
||||
};
|
||||
|
||||
struct mcctrl_signal {
|
||||
int cond;
|
||||
int sig;
|
||||
int pid;
|
||||
int tid;
|
||||
char info[128];
|
||||
};
|
||||
|
||||
int mcctrl_ikc_send(ihk_os_t os, int cpu, struct ikc_scd_packet *pisp);
|
||||
int mcctrl_ikc_send_msg(ihk_os_t os, int cpu, int msg, int ref, unsigned long arg);
|
||||
int mcctrl_ikc_is_valid_thread(ihk_os_t os, int cpu);
|
||||
|
||||
ihk_os_t osnum_to_os(int n);
|
||||
|
||||
/* syscall.c */
|
||||
int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet);
|
||||
int mcctrl_add_per_proc_data(struct mcctrl_usrdata *ud, int pid,
|
||||
struct mcctrl_per_proc_data *ppd);
|
||||
int mcctrl_delete_per_proc_data(struct mcctrl_usrdata *ud, int pid);
|
||||
inline struct mcctrl_per_proc_data *mcctrl_get_per_proc_data(
|
||||
struct mcctrl_usrdata *ud, int pid);
|
||||
|
||||
int mcctrl_add_per_thread_data(struct mcctrl_per_proc_data* ppd,
|
||||
struct task_struct *task, void *data);
|
||||
int mcctrl_delete_per_thread_data(struct mcctrl_per_proc_data* ppd,
|
||||
struct task_struct *task);
|
||||
inline struct mcctrl_per_thread_data *mcctrl_get_per_thread_data(
|
||||
struct mcctrl_per_proc_data *ppd, struct task_struct *task);
|
||||
|
||||
void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
|
||||
long ret, int stid);
|
||||
|
||||
#define PROCFS_NAME_MAX 1000
|
||||
|
||||
struct procfs_read {
|
||||
unsigned long pbuf; /* physical address of the host buffer (request) */
|
||||
unsigned long offset; /* offset to read (request) */
|
||||
int count; /* bytes to read (request) */
|
||||
int eof; /* if eof is detected, 1 otherwise 0. (answer)*/
|
||||
int ret; /* read bytes (answer) */
|
||||
int status; /* non-zero if done (answer) */
|
||||
int newcpu; /* migrated new cpu (answer) */
|
||||
int readwrite; /* 0:read, 1:write */
|
||||
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
|
||||
};
|
||||
|
||||
struct procfs_file {
|
||||
int status; /* status of processing (answer) */
|
||||
int mode; /* file mode (request) */
|
||||
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
|
||||
};
|
||||
|
||||
void procfs_answer(unsigned int arg, int err);
|
||||
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg);
|
||||
void add_tid_entry(int osnum, int pid, int tid);
|
||||
void add_pid_entry(int osnum, int pid);
|
||||
void delete_tid_entry(int osnum, int pid, int tid);
|
||||
void delete_pid_entry(int osnum, int pid);
|
||||
void proc_exe_link(int osnum, int pid, const char *path);
|
||||
void procfs_init(int osnum);
|
||||
void procfs_exit(int osnum);
|
||||
|
||||
/* sysfs_files.c */
|
||||
void setup_sysfs_files(ihk_os_t os);
|
||||
void reply_get_cpu_mapping(long req_pa);
|
||||
void free_topology_info(ihk_os_t os);
|
||||
|
||||
/* archdep.c */
|
||||
#define VDSO_MAXPAGES 2
|
||||
struct vdso {
|
||||
long busy;
|
||||
int vdso_npages;
|
||||
char vvar_is_global;
|
||||
char hpet_is_global;
|
||||
char pvti_is_global;
|
||||
char padding;
|
||||
long vdso_physlist[VDSO_MAXPAGES];
|
||||
void *vvar_virt;
|
||||
long vvar_phys;
|
||||
void *hpet_virt;
|
||||
long hpet_phys;
|
||||
void *pvti_virt;
|
||||
long pvti_phys;
|
||||
};
|
||||
|
||||
int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp,
|
||||
unsigned long *endp);
|
||||
void get_vdso_info(ihk_os_t os, long vdso_pa);
|
||||
|
||||
struct get_cpu_mapping_req {
|
||||
int busy; /* INOUT: */
|
||||
int error; /* OUT: */
|
||||
long buf_rpa; /* OUT: physical address of struct cpu_mapping */
|
||||
int buf_elems; /* OUT: # of elements of buf */
|
||||
int padding;
|
||||
|
||||
/* work for mcctrl */
|
||||
wait_queue_head_t wq;
|
||||
};
|
||||
|
||||
#endif
|
||||
837
executer/kernel/mcctrl/procfs.c
Normal file
837
executer/kernel/mcctrl/procfs.c
Normal file
@ -0,0 +1,837 @@
|
||||
/**
|
||||
* \file procfs.c
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* mcctrl procfs
|
||||
* \author Naoki Hamada <nao@axe.bz> \par
|
||||
* Copyright (C) 2014 AXE, Inc.
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/resource.h>
|
||||
#include <linux/interrupt.h>
|
||||
#include "mcctrl.h"
|
||||
#include <linux/version.h>
|
||||
#include <linux/semaphore.h>
|
||||
|
||||
//#define PROCFS_DEBUG
|
||||
|
||||
#ifdef PROCFS_DEBUG
|
||||
#define dprintk(...) printk(__VA_ARGS__)
|
||||
#else
|
||||
#define dprintk(...)
|
||||
#endif
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
|
||||
typedef uid_t kuid_t;
|
||||
typedef gid_t kgid_t;
|
||||
#endif
|
||||
|
||||
struct procfs_entry {
|
||||
char *name;
|
||||
mode_t mode;
|
||||
const struct file_operations *fops;
|
||||
};
|
||||
|
||||
#define NOD(NAME, MODE, FOP) { \
|
||||
.name = (NAME), \
|
||||
.mode = MODE, \
|
||||
.fops = FOP, \
|
||||
}
|
||||
#define PROC_DIR(NAME, MODE) \
|
||||
NOD(NAME, (S_IFDIR|(MODE)), NULL)
|
||||
#define PROC_REG(NAME, MODE, fops) \
|
||||
NOD(NAME, (S_IFREG|(MODE)), fops)
|
||||
#define PROC_TERM \
|
||||
NOD(NULL, 0, NULL)
|
||||
|
||||
static const struct procfs_entry tid_entry_stuff[];
|
||||
static const struct procfs_entry pid_entry_stuff[];
|
||||
static const struct procfs_entry base_entry_stuff[];
|
||||
static const struct file_operations mckernel_forward_ro;
|
||||
static const struct file_operations mckernel_forward;
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(procfsq);
|
||||
static ssize_t mckernel_procfs_read(struct file *file, char __user *buf,
|
||||
size_t nbytes, loff_t *ppos);
|
||||
|
||||
/* A private data for the procfs driver. */
|
||||
struct procfs_list_entry;
|
||||
|
||||
struct procfs_list_entry {
|
||||
struct list_head list;
|
||||
struct proc_dir_entry *entry;
|
||||
struct procfs_list_entry *parent;
|
||||
struct list_head children;
|
||||
int osnum;
|
||||
char *data;
|
||||
char name[0];
|
||||
};
|
||||
|
||||
/*
|
||||
* In the procfs_file_list, mckenrel procfs files are
|
||||
* listed in the manner that the leaf file is located
|
||||
* always nearer to the list top than its parent node
|
||||
* file.
|
||||
*/
|
||||
LIST_HEAD(procfs_file_list);
|
||||
DEFINE_SEMAPHORE(procfs_file_list_lock);
|
||||
|
||||
static char *
|
||||
getpath(struct procfs_list_entry *e, char *buf, int bufsize)
|
||||
{
|
||||
char *w = buf + bufsize - 1;
|
||||
|
||||
*w = '\0';
|
||||
for(;;){
|
||||
int l = strlen(e->name);
|
||||
w -= l;
|
||||
memcpy(w, e->name, l);
|
||||
e = e->parent;
|
||||
if(!e)
|
||||
return w;
|
||||
w--;
|
||||
*w = '/';
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Process SCD_MSG_PROCFS_ANSWER message.
|
||||
*
|
||||
* \param arg sent argument
|
||||
* \param err error info (redundant)
|
||||
*/
|
||||
void
|
||||
procfs_answer(unsigned int arg, int err)
|
||||
{
|
||||
dprintk("procfs: received SCD_MSG_PROCFS_ANSWER message(err = %d).\n", err);
|
||||
wake_up_interruptible(&procfsq);
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
find_procfs_entry(struct procfs_list_entry *parent, const char *name)
|
||||
{
|
||||
struct list_head *list;
|
||||
struct procfs_list_entry *e;
|
||||
|
||||
if(parent == NULL)
|
||||
list = &procfs_file_list;
|
||||
else
|
||||
list = &parent->children;
|
||||
|
||||
list_for_each_entry(e, list, list) {
|
||||
if(!strcmp(e->name, name))
|
||||
return e;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
delete_procfs_entries(struct procfs_list_entry *top)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
struct procfs_list_entry *n;
|
||||
|
||||
list_del(&top->list);
|
||||
|
||||
list_for_each_entry_safe(e, n, &top->children, list) {
|
||||
delete_procfs_entries(e);
|
||||
}
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
e->entry->read_proc = NULL;
|
||||
e->entry->data = NULL;
|
||||
#endif
|
||||
remove_proc_entry(top->name, top->parent? top->parent->entry: NULL);
|
||||
if(top->data)
|
||||
kfree(top->data);
|
||||
kfree(top);
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
add_procfs_entry(struct procfs_list_entry *parent, const char *name, int mode,
|
||||
kuid_t uid, kgid_t gid, const void *opaque)
|
||||
{
|
||||
struct procfs_list_entry *e = find_procfs_entry(parent, name);
|
||||
struct proc_dir_entry *pde;
|
||||
struct proc_dir_entry *parent_pde = NULL;
|
||||
int f_mode = mode & 0777;
|
||||
|
||||
if(e)
|
||||
delete_procfs_entries(e);
|
||||
|
||||
e = kmalloc(sizeof(struct procfs_list_entry) + strlen(name) + 1,
|
||||
GFP_KERNEL);
|
||||
if(!e){
|
||||
kprintf("ERROR: not enough memory to create PROCFS entry.\n");
|
||||
return NULL;
|
||||
}
|
||||
memset(e, '\0', sizeof(struct procfs_list_entry));
|
||||
INIT_LIST_HEAD(&e->children);
|
||||
strcpy(e->name, name);
|
||||
|
||||
if(parent)
|
||||
parent_pde = parent->entry;
|
||||
|
||||
if (mode & S_IFDIR) {
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde = proc_mkdir(name, parent_pde);
|
||||
#else
|
||||
pde = proc_mkdir_data(name, f_mode, parent_pde, e);
|
||||
#endif
|
||||
}
|
||||
else if ((mode & S_IFLNK) == S_IFLNK) {
|
||||
pde = proc_symlink(name, parent_pde, (char *)opaque);
|
||||
}
|
||||
else {
|
||||
const struct file_operations *fop;
|
||||
|
||||
if(opaque)
|
||||
fop = (const struct file_operations *)opaque;
|
||||
else if(mode & S_IWUSR)
|
||||
fop = &mckernel_forward;
|
||||
else
|
||||
fop = &mckernel_forward_ro;
|
||||
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde = create_proc_entry(name, f_mode, parent_pde);
|
||||
if(pde)
|
||||
pde->proc_fops = fop;
|
||||
#else
|
||||
pde = proc_create_data(name, f_mode, parent_pde, fop, e);
|
||||
if(pde)
|
||||
proc_set_user(pde, uid, gid);
|
||||
#endif
|
||||
}
|
||||
if(!pde){
|
||||
kprintf("ERROR: cannot create a PROCFS entry for %s.\n", name);
|
||||
kfree(e);
|
||||
return NULL;
|
||||
}
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde->uid = uid;
|
||||
pde->gid = gid;
|
||||
pde->data = e;
|
||||
#endif
|
||||
|
||||
if(parent)
|
||||
e->osnum = parent->osnum;
|
||||
e->entry = pde;
|
||||
e->parent = parent;
|
||||
list_add(&(e->list), parent? &(parent->children): &procfs_file_list);
|
||||
|
||||
return e;
|
||||
}
|
||||
|
||||
static void
|
||||
add_procfs_entries(struct procfs_list_entry *parent,
|
||||
const struct procfs_entry *entries, kuid_t uid, kgid_t gid)
|
||||
{
|
||||
const struct procfs_entry *p;
|
||||
|
||||
for(p = entries; p->name; p++){
|
||||
add_procfs_entry(parent, p->name, p->mode, uid, gid, p->fops);
|
||||
}
|
||||
}
|
||||
|
||||
static const struct cred *
|
||||
get_pid_cred(int pid)
|
||||
{
|
||||
struct task_struct *task = NULL;
|
||||
|
||||
if(pid > 0){
|
||||
task = pid_task(find_vpid(pid), PIDTYPE_PID);
|
||||
if(task){
|
||||
return __task_cred(task);
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
find_base_entry(int osnum)
|
||||
{
|
||||
char name[12];
|
||||
|
||||
sprintf(name, "mcos%d", osnum);
|
||||
return find_procfs_entry(NULL, name);
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
find_pid_entry(int osnum, int pid)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
char name[12];
|
||||
|
||||
if(!(e = find_base_entry(osnum)))
|
||||
return NULL;
|
||||
sprintf(name, "%d", pid);
|
||||
return find_procfs_entry(e, name);
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
find_tid_entry(int osnum, int pid, int tid)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
char name[12];
|
||||
|
||||
if(!(e = find_pid_entry(osnum, pid)))
|
||||
return NULL;
|
||||
if(!(e = find_procfs_entry(e, "task")))
|
||||
return NULL;
|
||||
sprintf(name, "%d", tid);
|
||||
return find_procfs_entry(e, name);
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
get_base_entry(int osnum)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
char name[12];
|
||||
kuid_t uid = KUIDT_INIT(0);
|
||||
kgid_t gid = KGIDT_INIT(0);
|
||||
|
||||
sprintf(name, "mcos%d", osnum);
|
||||
e = find_procfs_entry(NULL, name);
|
||||
if(!e){
|
||||
e = add_procfs_entry(NULL, name, S_IFDIR | 0555,
|
||||
uid, gid, NULL);
|
||||
e->osnum = osnum;
|
||||
}
|
||||
return e;
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
get_pid_entry(int osnum, int pid)
|
||||
{
|
||||
struct procfs_list_entry *parent;
|
||||
struct procfs_list_entry *e;
|
||||
char name[12];
|
||||
kuid_t uid = KUIDT_INIT(0);
|
||||
kgid_t gid = KGIDT_INIT(0);
|
||||
|
||||
sprintf(name, "mcos%d", osnum);
|
||||
if(!(parent = find_procfs_entry(NULL, name)))
|
||||
return NULL;
|
||||
sprintf(name, "%d", pid);
|
||||
e = find_procfs_entry(parent, name);
|
||||
if(!e)
|
||||
e = add_procfs_entry(parent, name, S_IFDIR | 0555,
|
||||
uid, gid, NULL);
|
||||
return e;
|
||||
}
|
||||
|
||||
static struct procfs_list_entry *
|
||||
get_tid_entry(int osnum, int pid, int tid)
|
||||
{
|
||||
struct procfs_list_entry *parent;
|
||||
struct procfs_list_entry *e;
|
||||
char name[12];
|
||||
kuid_t uid = KUIDT_INIT(0);
|
||||
kgid_t gid = KGIDT_INIT(0);
|
||||
|
||||
sprintf(name, "mcos%d", osnum);
|
||||
if(!(parent = find_procfs_entry(NULL, name)))
|
||||
return NULL;
|
||||
sprintf(name, "%d", pid);
|
||||
if(!(parent = find_procfs_entry(parent, name)))
|
||||
return NULL;
|
||||
if(!(parent = find_procfs_entry(parent, "task")))
|
||||
return NULL;
|
||||
sprintf(name, "%d", tid);
|
||||
e = find_procfs_entry(parent, name);
|
||||
if(!e)
|
||||
e = add_procfs_entry(parent, name, S_IFDIR | 0555,
|
||||
uid, gid, NULL);
|
||||
return e;
|
||||
}
|
||||
|
||||
static void
|
||||
_add_tid_entry(int osnum, int pid, int tid, const struct cred *cred)
|
||||
{
|
||||
struct procfs_list_entry *parent;
|
||||
struct procfs_list_entry *exe;
|
||||
|
||||
parent = get_tid_entry(osnum, pid, tid);
|
||||
if(parent){
|
||||
add_procfs_entries(parent, tid_entry_stuff,
|
||||
cred->uid, cred->gid);
|
||||
exe = find_procfs_entry(parent->parent->parent, "exe");
|
||||
if(exe){
|
||||
add_procfs_entry(parent, "exe", S_IFLNK | 0777,
|
||||
cred->uid, cred->gid, exe->data);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
add_tid_entry(int osnum, int pid, int tid)
|
||||
{
|
||||
const struct cred *cred = get_pid_cred(pid);
|
||||
|
||||
if(!cred)
|
||||
return;
|
||||
down(&procfs_file_list_lock);
|
||||
_add_tid_entry(osnum, pid, tid, cred);
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
void
|
||||
add_pid_entry(int osnum, int pid)
|
||||
{
|
||||
struct procfs_list_entry *parent;
|
||||
const struct cred *cred = get_pid_cred(pid);
|
||||
|
||||
if(!cred)
|
||||
return;
|
||||
down(&procfs_file_list_lock);
|
||||
parent = get_pid_entry(osnum, pid);
|
||||
add_procfs_entries(parent, pid_entry_stuff, cred->uid, cred->gid);
|
||||
_add_tid_entry(osnum, pid, pid, cred);
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
void
|
||||
delete_tid_entry(int osnum, int pid, int tid)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
|
||||
down(&procfs_file_list_lock);
|
||||
e = find_tid_entry(osnum, pid, tid);
|
||||
if(e)
|
||||
delete_procfs_entries(e);
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
void
|
||||
delete_pid_entry(int osnum, int pid)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
|
||||
down(&procfs_file_list_lock);
|
||||
e = find_pid_entry(osnum, pid);
|
||||
if(e)
|
||||
delete_procfs_entries(e);
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
void
|
||||
proc_exe_link(int osnum, int pid, const char *path)
|
||||
{
|
||||
struct procfs_list_entry *parent;
|
||||
kuid_t uid = KUIDT_INIT(0);
|
||||
kgid_t gid = KGIDT_INIT(0);
|
||||
|
||||
down(&procfs_file_list_lock);
|
||||
parent = find_pid_entry(osnum, pid);
|
||||
if(parent){
|
||||
struct procfs_list_entry *task;
|
||||
struct procfs_list_entry *e;
|
||||
|
||||
e = add_procfs_entry(parent, "exe", S_IFLNK | 0777, uid, gid,
|
||||
path);
|
||||
e->data = kmalloc(strlen(path) + 1, GFP_KERNEL);
|
||||
strcpy(e->data, path);
|
||||
task = find_procfs_entry(parent, "task");
|
||||
list_for_each_entry(parent, &task->children, list) {
|
||||
add_procfs_entry(parent, "exe", S_IFLNK | 0777,
|
||||
uid, gid, path);
|
||||
}
|
||||
}
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Initialization for procfs
|
||||
*
|
||||
* \param osnum os number
|
||||
*/
|
||||
void
|
||||
procfs_init(int osnum)
|
||||
{
|
||||
struct procfs_list_entry *parent;
|
||||
kuid_t uid = KUIDT_INIT(0);
|
||||
kgid_t gid = KGIDT_INIT(0);
|
||||
|
||||
down(&procfs_file_list_lock);
|
||||
parent = get_base_entry(osnum);
|
||||
add_procfs_entries(parent, base_entry_stuff, uid, gid);
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Finalization for procfs
|
||||
*
|
||||
* \param osnum os number
|
||||
*/
|
||||
void
|
||||
procfs_exit(int osnum)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
|
||||
down(&procfs_file_list_lock);
|
||||
e = find_base_entry(osnum);
|
||||
if(e)
|
||||
delete_procfs_entries(e);
|
||||
up(&procfs_file_list_lock);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief The callback funciton for McKernel procfs
|
||||
*
|
||||
* This function conforms to the 2) way of fs/proc/generic.c
|
||||
* from linux-2.6.39.4.
|
||||
*/
|
||||
static ssize_t
|
||||
mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct inode * inode = file->f_path.dentry->d_inode;
|
||||
char *kern_buffer = NULL;
|
||||
int order = 0;
|
||||
volatile struct procfs_read *r = NULL;
|
||||
struct ikc_scd_packet isp;
|
||||
int ret;
|
||||
unsigned long pbuf;
|
||||
unsigned long count = nbytes;
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
struct proc_dir_entry *dp = PDE(inode);
|
||||
struct procfs_list_entry *e = dp->data;
|
||||
#else
|
||||
struct procfs_list_entry *e = PDE_DATA(inode);
|
||||
#endif
|
||||
loff_t offset = *ppos;
|
||||
char pathbuf[PROCFS_NAME_MAX];
|
||||
char *path;
|
||||
|
||||
path = getpath(e, pathbuf, 256);
|
||||
dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n",
|
||||
path, offset, count);
|
||||
|
||||
if (count <= 0 || offset < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((1 << order) < count) ++order;
|
||||
if (order > 12) {
|
||||
order -= 12;
|
||||
}
|
||||
else {
|
||||
order = 1;
|
||||
}
|
||||
|
||||
/* NOTE: we need physically contigous memory to pass through IKC */
|
||||
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
|
||||
if (!kern_buffer) {
|
||||
printk("mckernel_procfs_read(): ERROR: allocating kernel buffer\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pbuf = virt_to_phys(kern_buffer);
|
||||
|
||||
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
|
||||
if (r == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
r->pbuf = pbuf;
|
||||
r->eof = 0;
|
||||
r->ret = -EIO; /* default */
|
||||
r->status = 0;
|
||||
r->offset = offset;
|
||||
r->count = count;
|
||||
r->readwrite = 0;
|
||||
strncpy((char *)r->fname, path, PROCFS_NAME_MAX);
|
||||
isp.msg = SCD_MSG_PROCFS_REQUEST;
|
||||
isp.ref = 0;
|
||||
isp.arg = virt_to_phys(r);
|
||||
|
||||
ret = mcctrl_ikc_send(osnum_to_os(e->osnum), 0, &isp);
|
||||
|
||||
if (ret < 0) {
|
||||
goto out; /* error */
|
||||
}
|
||||
|
||||
/* Wait for a reply. */
|
||||
ret = -EIO; /* default exit code */
|
||||
dprintk("now wait for a relpy\n");
|
||||
|
||||
/* Wait for the status field of the procfs_read structure set ready. */
|
||||
if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) {
|
||||
kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Wake up and check the result. */
|
||||
dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof);
|
||||
|
||||
if (r->ret > 0) {
|
||||
if (copy_to_user(buf, kern_buffer, r->ret)) {
|
||||
kprintf("ERROR: mckernel_procfs_read: copy_to_user failed.\n");
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*ppos += r->ret;
|
||||
}
|
||||
ret = r->ret;
|
||||
|
||||
out:
|
||||
if(kern_buffer)
|
||||
free_pages((uintptr_t)kern_buffer, order);
|
||||
if(r)
|
||||
kfree((void *)r);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
mckernel_procfs_write(struct file *file, const char __user *buf, size_t nbytes,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct inode * inode = file->f_path.dentry->d_inode;
|
||||
char *kern_buffer = NULL;
|
||||
int order = 0;
|
||||
volatile struct procfs_read *r = NULL;
|
||||
struct ikc_scd_packet isp;
|
||||
int ret;
|
||||
unsigned long pbuf;
|
||||
unsigned long count = nbytes;
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
struct proc_dir_entry *dp = PDE(inode);
|
||||
struct procfs_list_entry *e = dp->data;
|
||||
#else
|
||||
struct procfs_list_entry *e = PDE_DATA(inode);
|
||||
#endif
|
||||
loff_t offset = *ppos;
|
||||
char pathbuf[PROCFS_NAME_MAX];
|
||||
char *path;
|
||||
|
||||
path = getpath(e, pathbuf, 256);
|
||||
dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n",
|
||||
path, offset, count);
|
||||
|
||||
if (count <= 0 || offset < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((1 << order) < count) ++order;
|
||||
if (order > 12) {
|
||||
order -= 12;
|
||||
}
|
||||
else {
|
||||
order = 1;
|
||||
}
|
||||
|
||||
/* NOTE: we need physically contigous memory to pass through IKC */
|
||||
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
|
||||
if (!kern_buffer) {
|
||||
printk("mckernel_procfs_read(): ERROR: allocating kernel buffer\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
if (copy_from_user(kern_buffer, buf, nbytes)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
pbuf = virt_to_phys(kern_buffer);
|
||||
|
||||
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
|
||||
if (r == NULL) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
dprintk("offset: %lx, count: %d, cpu: %d\n", offset, count, e->cpu);
|
||||
|
||||
r->pbuf = pbuf;
|
||||
r->eof = 0;
|
||||
r->ret = -EIO; /* default */
|
||||
r->status = 0;
|
||||
r->offset = offset;
|
||||
r->count = count;
|
||||
r->readwrite = 1;
|
||||
strncpy((char *)r->fname, path, PROCFS_NAME_MAX);
|
||||
isp.msg = SCD_MSG_PROCFS_REQUEST;
|
||||
isp.ref = 0;
|
||||
isp.arg = virt_to_phys(r);
|
||||
|
||||
ret = mcctrl_ikc_send(osnum_to_os(e->osnum), 0, &isp);
|
||||
|
||||
if (ret < 0) {
|
||||
goto out; /* error */
|
||||
}
|
||||
|
||||
/* Wait for a reply. */
|
||||
ret = -EIO; /* default exit code */
|
||||
dprintk("now wait for a relpy\n");
|
||||
|
||||
/* Wait for the status field of the procfs_read structure set ready. */
|
||||
if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) {
|
||||
kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Wake up and check the result. */
|
||||
dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof);
|
||||
|
||||
if (r->ret > 0) {
|
||||
*ppos += r->ret;
|
||||
}
|
||||
ret = r->ret;
|
||||
|
||||
out:
|
||||
if(kern_buffer)
|
||||
free_pages((uintptr_t)kern_buffer, order);
|
||||
if(r)
|
||||
kfree((void *)r);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static loff_t
|
||||
mckernel_procfs_lseek(struct file *file, loff_t offset, int orig)
|
||||
{
|
||||
switch (orig) {
|
||||
case 0:
|
||||
file->f_pos = offset;
|
||||
break;
|
||||
case 1:
|
||||
file->f_pos += offset;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return file->f_pos;
|
||||
}
|
||||
|
||||
struct procfs_work {
|
||||
void *os;
|
||||
int msg;
|
||||
int pid;
|
||||
unsigned long arg;
|
||||
struct work_struct work;
|
||||
};
|
||||
|
||||
static void procfsm_work_main(struct work_struct *work0)
|
||||
{
|
||||
struct procfs_work *work = container_of(work0, struct procfs_work, work);
|
||||
|
||||
switch (work->msg) {
|
||||
case SCD_MSG_PROCFS_TID_CREATE:
|
||||
add_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg);
|
||||
break;
|
||||
|
||||
case SCD_MSG_PROCFS_TID_DELETE:
|
||||
delete_tid_entry(ihk_host_os_get_index(work->os), work->pid, work->arg);
|
||||
break;
|
||||
|
||||
default:
|
||||
printk("%s: unknown work: msg: %d, pid: %d, arg: %lu)\n",
|
||||
__FUNCTION__, work->msg, work->pid, work->arg);
|
||||
break;
|
||||
}
|
||||
|
||||
kfree(work);
|
||||
return;
|
||||
}
|
||||
|
||||
int procfsm_packet_handler(void *os, int msg, int pid, unsigned long arg)
|
||||
{
|
||||
struct procfs_work *work = NULL;
|
||||
|
||||
work = kzalloc(sizeof(*work), GFP_ATOMIC);
|
||||
if (!work) {
|
||||
printk("%s: kzalloc failed\n", __FUNCTION__);
|
||||
return -1;
|
||||
}
|
||||
|
||||
work->os = os;
|
||||
work->msg = msg;
|
||||
work->pid = pid;
|
||||
work->arg = arg;
|
||||
INIT_WORK(&work->work, &procfsm_work_main);
|
||||
|
||||
schedule_work(&work->work);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static const struct file_operations mckernel_forward_ro = {
|
||||
.llseek = mckernel_procfs_lseek,
|
||||
.read = mckernel_procfs_read,
|
||||
.write = NULL,
|
||||
};
|
||||
|
||||
static const struct file_operations mckernel_forward = {
|
||||
.llseek = mckernel_procfs_lseek,
|
||||
.read = mckernel_procfs_read,
|
||||
.write = mckernel_procfs_write,
|
||||
};
|
||||
|
||||
static const struct procfs_entry tid_entry_stuff[] = {
|
||||
// PROC_REG("auxv", S_IRUSR, NULL),
|
||||
// PROC_REG("clear_refs", S_IWUSR, NULL),
|
||||
// PROC_REG("cmdline", S_IRUGO, NULL),
|
||||
// PROC_REG("comm", S_IRUGO|S_IWUSR, NULL),
|
||||
// PROC_REG("environ", S_IRUSR, NULL),
|
||||
// PROC_LNK("exe", mckernel_readlink),
|
||||
// PROC_REG("limits", S_IRUSR|S_IWUSR, NULL),
|
||||
// PROC_REG("maps", S_IRUGO, NULL),
|
||||
PROC_REG("mem", S_IRUSR|S_IWUSR, NULL),
|
||||
// PROC_REG("pagemap", S_IRUGO, NULL),
|
||||
// PROC_REG("smaps", S_IRUGO, NULL),
|
||||
PROC_REG("stat", S_IRUGO, NULL),
|
||||
// PROC_REG("statm", S_IRUGO, NULL),
|
||||
// PROC_REG("status", S_IRUGO, NULL),
|
||||
// PROC_REG("syscall", S_IRUGO, NULL),
|
||||
// PROC_REG("wchan", S_IRUGO, NULL),
|
||||
PROC_TERM
|
||||
};
|
||||
|
||||
static const struct procfs_entry pid_entry_stuff[] = {
|
||||
PROC_REG("auxv", S_IRUSR, NULL),
|
||||
PROC_REG("cgroup", S_IXUSR, NULL),
|
||||
// PROC_REG("clear_refs", S_IWUSR, NULL),
|
||||
PROC_REG("cmdline", S_IRUGO, NULL),
|
||||
// PROC_REG("comm", S_IRUGO|S_IWUSR, NULL),
|
||||
// PROC_REG("coredump_filter", S_IRUGO|S_IWUSR, NULL),
|
||||
PROC_REG("cpuset", S_IXUSR, NULL),
|
||||
// PROC_REG("environ", S_IRUSR, NULL),
|
||||
// PROC_LNK("exe", mckernel_readlink),
|
||||
// PROC_REG("limits", S_IRUSR|S_IWUSR, NULL),
|
||||
PROC_REG("maps", S_IRUGO, NULL),
|
||||
PROC_REG("mem", S_IRUSR|S_IWUSR, NULL),
|
||||
PROC_REG("pagemap", S_IRUGO, NULL),
|
||||
PROC_REG("smaps", S_IRUGO, NULL),
|
||||
// PROC_REG("stat", S_IRUGO, NULL),
|
||||
// PROC_REG("statm", S_IRUGO, NULL),
|
||||
PROC_REG("status", S_IRUGO, NULL),
|
||||
// PROC_REG("syscall", S_IRUGO, NULL),
|
||||
PROC_DIR("task", S_IRUGO|S_IXUGO),
|
||||
// PROC_REG("wchan", S_IRUGO, NULL),
|
||||
PROC_TERM
|
||||
};
|
||||
|
||||
static const struct procfs_entry base_entry_stuff[] = {
|
||||
// PROC_REG("cmdline", S_IRUGO, NULL),
|
||||
// PROC_REG("cpuinfo", S_IRUGO, NULL),
|
||||
// PROC_REG("meminfo", S_IRUGO, NULL),
|
||||
// PROC_REG("pagetypeinfo",S_IRUGO, NULL),
|
||||
// PROC_REG("softirq", S_IRUGO, NULL),
|
||||
PROC_REG("stat", S_IRUGO, NULL),
|
||||
// PROC_REG("uptime", S_IRUGO, NULL),
|
||||
// PROC_REG("version", S_IRUGO, NULL),
|
||||
// PROC_REG("vmallocinfo",S_IRUSR, NULL),
|
||||
// PROC_REG("vmstat", S_IRUGO, NULL),
|
||||
// PROC_REG("zoneinfo", S_IRUGO, NULL),
|
||||
PROC_TERM
|
||||
};
|
||||
File diff suppressed because it is too large
Load Diff
2481
executer/kernel/mcctrl/sysfs.c
Normal file
2481
executer/kernel/mcctrl/sysfs.c
Normal file
File diff suppressed because it is too large
Load Diff
73
executer/kernel/mcctrl/sysfs.h
Normal file
73
executer/kernel/mcctrl/sysfs.h
Normal file
@ -0,0 +1,73 @@
|
||||
/**
|
||||
* \file sysfs.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* sysfs framework API definitions
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2016 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef MCCTRL_SYSFS_H
|
||||
#define MCCTRL_SYSFS_H
|
||||
|
||||
#define SYSFS_PATH_MAX 1024
|
||||
|
||||
/* for sysfs_unlinkf() */
|
||||
#define SYSFS_UNLINK_KEEP_ANCESTOR 0x01
|
||||
|
||||
|
||||
struct sysfsm_ops {
|
||||
ssize_t (*show)(struct sysfsm_ops *ops, void *instance, void *buf,
|
||||
size_t bufsize);
|
||||
ssize_t (*store)(struct sysfsm_ops *ops, void *instance,
|
||||
const void *buf, size_t bufsize);
|
||||
void (*release)(struct sysfsm_ops *ops, void *instance);
|
||||
};
|
||||
|
||||
struct sysfs_handle {
|
||||
long handle;
|
||||
};
|
||||
typedef struct sysfs_handle sysfs_handle_t;
|
||||
|
||||
struct sysfsm_bitmap_param {
|
||||
int nbits;
|
||||
int padding;
|
||||
void *ptr;
|
||||
};
|
||||
|
||||
#define SYSFS_SPECIAL_OPS_MIN ((void *)1)
|
||||
#define SYSFS_SPECIAL_OPS_MAX ((void *)1000)
|
||||
|
||||
#define SYSFS_SNOOPING_OPS_d32 ((void *)1)
|
||||
#define SYSFS_SNOOPING_OPS_d64 ((void *)2)
|
||||
#define SYSFS_SNOOPING_OPS_u32 ((void *)3)
|
||||
#define SYSFS_SNOOPING_OPS_u64 ((void *)4)
|
||||
#define SYSFS_SNOOPING_OPS_s ((void *)5)
|
||||
#define SYSFS_SNOOPING_OPS_pbl ((void *)6)
|
||||
#define SYSFS_SNOOPING_OPS_pb ((void *)7)
|
||||
#define SYSFS_SNOOPING_OPS_u32K ((void *)8)
|
||||
|
||||
static inline int is_special_sysfs_ops(void *ops)
|
||||
{
|
||||
return (((long)SYSFS_SPECIAL_OPS_MIN <= (long)ops)
|
||||
&& ((long)ops <= (long)SYSFS_SPECIAL_OPS_MAX));
|
||||
}
|
||||
|
||||
extern int sysfsm_createf(ihk_os_t os, struct sysfsm_ops *ops, void *instance,
|
||||
int mode, const char *fmt, ...);
|
||||
extern int sysfsm_mkdirf(ihk_os_t os, sysfs_handle_t *dirhp,
|
||||
const char *fmt, ...);
|
||||
extern int sysfsm_symlinkf(ihk_os_t os, sysfs_handle_t targeth,
|
||||
const char *fmt, ...);
|
||||
extern int sysfsm_lookupf(ihk_os_t os, sysfs_handle_t *objhp,
|
||||
const char *fmt, ...);
|
||||
extern int sysfsm_unlinkf(ihk_os_t os, int flags, const char *fmt, ...);
|
||||
|
||||
extern void sysfsm_cleanup(ihk_os_t os);
|
||||
extern void sysfsm_packet_handler(void *os, int msg, int err, long arg1,
|
||||
long arg2);
|
||||
|
||||
#endif /* MCCTRL_SYSFS_H */
|
||||
1038
executer/kernel/mcctrl/sysfs_files.c
Normal file
1038
executer/kernel/mcctrl/sysfs_files.c
Normal file
File diff suppressed because it is too large
Load Diff
88
executer/kernel/mcctrl/sysfs_msg.h
Normal file
88
executer/kernel/mcctrl/sysfs_msg.h
Normal file
@ -0,0 +1,88 @@
|
||||
/**
|
||||
* \file sysfs_msg.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* message declarations for sysfs framework
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef MCKERNEL_SYSFS_MSG_H
|
||||
#define MCKERNEL_SYSFS_MSG_H
|
||||
|
||||
#define SYSFS_PATH_MAX 1024
|
||||
|
||||
struct sysfs_req_create_param {
|
||||
int mode;
|
||||
int error;
|
||||
long client_ops;
|
||||
long client_instance;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_create_param */
|
||||
|
||||
#define SYSFS_SPECIAL_OPS_MIN ((void *)1)
|
||||
#define SYSFS_SPECIAL_OPS_MAX ((void *)1000)
|
||||
|
||||
#define SYSFS_SNOOPING_OPS_d32 ((void *)1)
|
||||
#define SYSFS_SNOOPING_OPS_d64 ((void *)2)
|
||||
#define SYSFS_SNOOPING_OPS_u32 ((void *)3)
|
||||
#define SYSFS_SNOOPING_OPS_u64 ((void *)4)
|
||||
#define SYSFS_SNOOPING_OPS_s ((void *)5)
|
||||
#define SYSFS_SNOOPING_OPS_pbl ((void *)6)
|
||||
#define SYSFS_SNOOPING_OPS_pb ((void *)7)
|
||||
#define SYSFS_SNOOPING_OPS_u32K ((void *)8)
|
||||
|
||||
struct sysfs_req_mkdir_param {
|
||||
int error;
|
||||
int padding;
|
||||
long handle;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding2;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_mkdir_param */
|
||||
|
||||
struct sysfs_req_symlink_param {
|
||||
int error;
|
||||
int padding;
|
||||
long target;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding2;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_symlink_param */
|
||||
|
||||
struct sysfs_req_lookup_param {
|
||||
int error;
|
||||
int padding;
|
||||
long handle;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding2;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_lookup_param */
|
||||
|
||||
/* for sysfs_req_unlink_param.flags */
|
||||
#define SYSFS_UNLINK_KEEP_ANCESTOR 0x01
|
||||
|
||||
struct sysfs_req_unlink_param {
|
||||
int flags;
|
||||
int error;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_unlink_param */
|
||||
|
||||
struct sysfs_req_setup_param {
|
||||
int error;
|
||||
int padding;
|
||||
long buf_rpa;
|
||||
long bufsize;
|
||||
char padding3[SYSFS_PATH_MAX];
|
||||
int padding2;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_setup_param */
|
||||
|
||||
#endif /* MCKERNEL_SYSFS_MSG_H */
|
||||
40
executer/kernel/mcoverlayfs/Makefile.in
Normal file
40
executer/kernel/mcoverlayfs/Makefile.in
Normal file
@ -0,0 +1,40 @@
|
||||
ENABLE_MCOVERLAYFS=@ENABLE_MCOVERLAYFS@
|
||||
|
||||
RELEASE=$(shell uname -r)
|
||||
MAJOR=$(shell echo ${RELEASE} | sed -e 's/^\([0-9]*\).*/\1/')
|
||||
MINOR=$(shell echo ${RELEASE} | sed -e 's/^[0-9]*.\([0-9]*\).*/\1/')
|
||||
PATCH=$(shell echo ${RELEASE} | sed -e 's/^[0-9]*.[0-9]*.\([0-9]*\).*/\1/')
|
||||
LINUX_VERSION_CODE=$(shell expr \( ${MAJOR} \* 65536 \) + \( ${MINOR} \* 256 \) + ${PATCH})
|
||||
RHEL_RELEASE_TMP=$(shell echo ${RELEASE} | sed -e 's/^[0-9]*.[0-9]*.[0-9]*-\([0-9]*\).*/\1/')
|
||||
RHEL_RELEASE=$(shell if [ "${RELEASE}" == "${RHEL_RELEASE_TMP}" ]; then echo ""; else echo ${RHEL_RELEASE_TMP}; fi)
|
||||
BUILD_MODULE_TMP=$(shell if [ "${RHEL_RELEASE}" == "" ]; then echo "org"; else echo "rhel"; fi)
|
||||
BUILD_MODULE=none
|
||||
ifeq ($(ENABLE_MCOVERLAYFS),yes)
|
||||
ifeq ($(BUILD_MODULE_TMP),org)
|
||||
ifeq ($(BUILD_MODULE),none)
|
||||
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -ge 262144 -a ${LINUX_VERSION_CODE} -lt 262400 ]; then echo "linux-4.0.9"; else echo "none"; fi)
|
||||
endif
|
||||
endif
|
||||
ifeq ($(BUILD_MODULE_TMP),rhel)
|
||||
ifeq ($(BUILD_MODULE),none)
|
||||
BUILD_MODULE=$(shell if [ ${LINUX_VERSION_CODE} -eq 199168 -a ${RHEL_RELEASE} -eq 327 ]; then echo "linux-3.10.0-327.36.1.el7"; else echo "none"; fi)
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
.PHONY: clean install modules
|
||||
|
||||
modules:
|
||||
ifneq ($(BUILD_MODULE),none)
|
||||
@(cd $(BUILD_MODULE); make modules)
|
||||
endif
|
||||
|
||||
clean:
|
||||
@(cd linux-3.10.0-327.36.1.el7; make clean)
|
||||
@(cd linux-4.0.9; make clean)
|
||||
|
||||
install:
|
||||
ifneq ($(BUILD_MODULE),none)
|
||||
@(cd $(BUILD_MODULE); make install)
|
||||
endif
|
||||
|
||||
@ -0,0 +1,21 @@
|
||||
KDIR ?= @KDIR@
|
||||
ARCH ?= @ARCH@
|
||||
KMODDIR = @KMODDIR@
|
||||
src = @abs_srcdir@
|
||||
|
||||
obj-m += mcoverlay.o
|
||||
|
||||
mcoverlay-y := copy_up.o dir.o inode.o readdir.o super.o
|
||||
|
||||
.PHONY: clean install modules
|
||||
|
||||
modules:
|
||||
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
|
||||
|
||||
clean:
|
||||
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
|
||||
|
||||
install:
|
||||
mkdir -p -m 755 $(KMODDIR)
|
||||
install -m 644 mcoverlay.ko $(KMODDIR)
|
||||
|
||||
461
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/copy_up.c
Normal file
461
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/copy_up.c
Normal file
@ -0,0 +1,461 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/splice.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/fdtable.h>
|
||||
#include <linux/ratelimit.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
|
||||
|
||||
static unsigned ovl_check_copy_up = 1;
|
||||
module_param_named(check_copy_up, ovl_check_copy_up, uint,
|
||||
S_IWUSR | S_IRUGO);
|
||||
MODULE_PARM_DESC(ovl_check_copy_up,
|
||||
"Warn on copy-up when causing process also has a R/O fd open");
|
||||
|
||||
static int ovl_check_fd(const void *data, struct file *f, unsigned fd)
|
||||
{
|
||||
const struct dentry *dentry = data;
|
||||
|
||||
if (f->f_path.dentry == dentry)
|
||||
pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
|
||||
f, fd, current->pid, current->comm);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the fds open by this process and warn if something like the following
|
||||
* scenario is about to occur:
|
||||
*
|
||||
* fd1 = open("foo", O_RDONLY);
|
||||
* fd2 = open("foo", O_RDWR);
|
||||
*/
|
||||
static void ovl_do_check_copy_up(struct dentry *dentry)
|
||||
{
|
||||
if (ovl_check_copy_up)
|
||||
iterate_fd(current->files, 0, ovl_check_fd, dentry);
|
||||
}
|
||||
|
||||
int ovl_copy_xattr(struct dentry *old, struct dentry *new)
|
||||
{
|
||||
ssize_t list_size, size, value_size = 0;
|
||||
char *buf, *name, *value = NULL;
|
||||
int uninitialized_var(error);
|
||||
|
||||
if (!old->d_inode->i_op->getxattr ||
|
||||
!new->d_inode->i_op->getxattr)
|
||||
return 0;
|
||||
|
||||
list_size = vfs_listxattr(old, NULL, 0);
|
||||
if (list_size <= 0) {
|
||||
if (list_size == -EOPNOTSUPP)
|
||||
return 0;
|
||||
return list_size;
|
||||
}
|
||||
|
||||
buf = kzalloc(list_size, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
list_size = vfs_listxattr(old, buf, list_size);
|
||||
if (list_size <= 0) {
|
||||
error = list_size;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
|
||||
retry:
|
||||
size = vfs_getxattr(old, name, value, value_size);
|
||||
if (size == -ERANGE)
|
||||
size = vfs_getxattr(old, name, NULL, 0);
|
||||
|
||||
if (size < 0) {
|
||||
error = size;
|
||||
break;
|
||||
}
|
||||
|
||||
if (size > value_size) {
|
||||
void *new;
|
||||
|
||||
new = krealloc(value, size, GFP_KERNEL);
|
||||
if (!new) {
|
||||
error = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
value = new;
|
||||
value_size = size;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
error = vfs_setxattr(new, name, value, size, 0);
|
||||
if (error)
|
||||
break;
|
||||
}
|
||||
kfree(value);
|
||||
out:
|
||||
kfree(buf);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
|
||||
{
|
||||
struct file *old_file;
|
||||
struct file *new_file;
|
||||
loff_t old_pos = 0;
|
||||
loff_t new_pos = 0;
|
||||
int error = 0;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
old_file = ovl_path_open(old, O_RDONLY);
|
||||
if (IS_ERR(old_file))
|
||||
return PTR_ERR(old_file);
|
||||
|
||||
new_file = ovl_path_open(new, O_WRONLY);
|
||||
if (IS_ERR(new_file)) {
|
||||
error = PTR_ERR(new_file);
|
||||
goto out_fput;
|
||||
}
|
||||
|
||||
/* FIXME: copy up sparse files efficiently */
|
||||
while (len) {
|
||||
size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
|
||||
long bytes;
|
||||
|
||||
if (len < this_len)
|
||||
this_len = len;
|
||||
|
||||
if (signal_pending_state(TASK_KILLABLE, current)) {
|
||||
error = -EINTR;
|
||||
break;
|
||||
}
|
||||
|
||||
bytes = do_splice_direct(old_file, &old_pos,
|
||||
new_file, &new_pos,
|
||||
this_len, SPLICE_F_MOVE);
|
||||
if (bytes <= 0) {
|
||||
error = bytes;
|
||||
break;
|
||||
}
|
||||
WARN_ON(old_pos != new_pos);
|
||||
|
||||
len -= bytes;
|
||||
}
|
||||
|
||||
fput(new_file);
|
||||
out_fput:
|
||||
fput(old_file);
|
||||
return error;
|
||||
}
|
||||
|
||||
static char *ovl_read_symlink(struct dentry *realdentry)
|
||||
{
|
||||
int res;
|
||||
char *buf;
|
||||
struct inode *inode = realdentry->d_inode;
|
||||
mm_segment_t old_fs;
|
||||
|
||||
res = -EINVAL;
|
||||
if (!inode->i_op->readlink)
|
||||
goto err;
|
||||
|
||||
res = -ENOMEM;
|
||||
buf = (char *) __get_free_page(GFP_KERNEL);
|
||||
if (!buf)
|
||||
goto err;
|
||||
|
||||
old_fs = get_fs();
|
||||
set_fs(get_ds());
|
||||
/* The cast to a user pointer is valid due to the set_fs() */
|
||||
res = inode->i_op->readlink(realdentry,
|
||||
(char __user *)buf, PAGE_SIZE - 1);
|
||||
set_fs(old_fs);
|
||||
if (res < 0) {
|
||||
free_page((unsigned long) buf);
|
||||
goto err;
|
||||
}
|
||||
buf[res] = '\0';
|
||||
|
||||
return buf;
|
||||
|
||||
err:
|
||||
return ERR_PTR(res);
|
||||
}
|
||||
|
||||
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
|
||||
{
|
||||
struct iattr attr = {
|
||||
.ia_valid =
|
||||
ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
|
||||
.ia_atime = stat->atime,
|
||||
.ia_mtime = stat->mtime,
|
||||
};
|
||||
|
||||
return notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
|
||||
int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (!S_ISLNK(stat->mode)) {
|
||||
struct iattr attr = {
|
||||
.ia_valid = ATTR_MODE,
|
||||
.ia_mode = stat->mode,
|
||||
};
|
||||
err = notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
if (!err) {
|
||||
struct iattr attr = {
|
||||
.ia_valid = ATTR_UID | ATTR_GID,
|
||||
.ia_uid = stat->uid,
|
||||
.ia_gid = stat->gid,
|
||||
};
|
||||
err = notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
if (!err)
|
||||
ovl_set_timestamps(upperdentry, stat);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
|
||||
struct dentry *dentry, struct path *lowerpath,
|
||||
struct kstat *stat, struct iattr *attr,
|
||||
const char *link)
|
||||
{
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *newdentry = NULL;
|
||||
struct dentry *upper = NULL;
|
||||
umode_t mode = stat->mode;
|
||||
int err;
|
||||
|
||||
newdentry = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out1;
|
||||
|
||||
/* Can't properly set mode on creation because of the umask */
|
||||
stat->mode &= S_IFMT;
|
||||
err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
|
||||
stat->mode = mode;
|
||||
if (err)
|
||||
goto out2;
|
||||
|
||||
if (S_ISREG(stat->mode)) {
|
||||
struct path upperpath;
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
BUG_ON(upperpath.dentry != NULL);
|
||||
upperpath.dentry = newdentry;
|
||||
|
||||
err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
}
|
||||
|
||||
err = ovl_copy_xattr(lowerpath->dentry, newdentry);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
mutex_lock(&newdentry->d_inode->i_mutex);
|
||||
err = ovl_set_attr(newdentry, stat);
|
||||
if (!err && attr)
|
||||
err = notify_change(newdentry, attr, NULL);
|
||||
mutex_unlock(&newdentry->d_inode->i_mutex);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
newdentry = NULL;
|
||||
|
||||
/*
|
||||
* Non-directores become opaque when copied up.
|
||||
*/
|
||||
if (!S_ISDIR(stat->mode))
|
||||
ovl_dentry_set_opaque(dentry, true);
|
||||
out2:
|
||||
dput(upper);
|
||||
out1:
|
||||
dput(newdentry);
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, newdentry);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy up a single dentry
|
||||
*
|
||||
* Directory renames only allowed on "pure upper" (already created on
|
||||
* upper filesystem, never copied up). Directories which are on lower or
|
||||
* are merged may not be renamed. For these -EXDEV is returned and
|
||||
* userspace has to deal with it. This means, when copying up a
|
||||
* directory we can rely on it and ancestors being stable.
|
||||
*
|
||||
* Non-directory renames start with copy up of source if necessary. The
|
||||
* actual rename will only proceed once the copy up was successful. Copy
|
||||
* up uses upper parent i_mutex for exclusion. Since rename can change
|
||||
* d_parent it is possible that the copy up will lock the old parent. At
|
||||
* that point the file will have already been copied up anyway.
|
||||
*/
|
||||
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
struct path *lowerpath, struct kstat *stat,
|
||||
struct iattr *attr)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
int err;
|
||||
struct kstat pstat;
|
||||
struct path parentpath;
|
||||
struct dentry *upperdir;
|
||||
struct dentry *upperdentry;
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
char *link = NULL;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
ovl_do_check_copy_up(lowerpath->dentry);
|
||||
|
||||
ovl_path_upper(parent, &parentpath);
|
||||
upperdir = parentpath.dentry;
|
||||
|
||||
err = vfs_getattr(&parentpath, &pstat);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (S_ISLNK(stat->mode)) {
|
||||
link = ovl_read_symlink(lowerpath->dentry);
|
||||
if (IS_ERR(link))
|
||||
return PTR_ERR(link);
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_free_link;
|
||||
|
||||
override_cred->fsuid = stat->uid;
|
||||
override_cred->fsgid = stat->gid;
|
||||
/*
|
||||
* CAP_SYS_ADMIN for copying up extended attributes
|
||||
* CAP_DAC_OVERRIDE for create
|
||||
* CAP_FOWNER for chmod, timestamp update
|
||||
* CAP_FSETID for chmod
|
||||
* CAP_CHOWN for chown
|
||||
* CAP_MKNOD for mknod
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
cap_raise(override_cred->cap_effective, CAP_MKNOD);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = -EIO;
|
||||
if (lock_rename(workdir, upperdir) != NULL) {
|
||||
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||
goto out_unlock;
|
||||
}
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
if (upperdentry) {
|
||||
unlock_rename(workdir, upperdir);
|
||||
err = 0;
|
||||
/* Raced with another copy-up? Do the setattr here */
|
||||
if (attr) {
|
||||
mutex_lock(&upperdentry->d_inode->i_mutex);
|
||||
err = notify_change(upperdentry, attr, NULL);
|
||||
mutex_unlock(&upperdentry->d_inode->i_mutex);
|
||||
}
|
||||
goto out_put_cred;
|
||||
}
|
||||
|
||||
err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
|
||||
stat, attr, link);
|
||||
if (!err) {
|
||||
/* Restore timestamps on parent (best effort) */
|
||||
ovl_set_timestamps(upperdir, &pstat);
|
||||
}
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out_put_cred:
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
|
||||
out_free_link:
|
||||
if (link)
|
||||
free_page((unsigned long) link);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int ovl_copy_up(struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
while (!err) {
|
||||
struct dentry *next;
|
||||
struct dentry *parent;
|
||||
struct path lowerpath;
|
||||
struct kstat stat;
|
||||
enum ovl_path_type type = ovl_path_type(dentry);
|
||||
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
break;
|
||||
|
||||
next = dget(dentry);
|
||||
/* find the topmost dentry not yet copied up */
|
||||
for (;;) {
|
||||
parent = dget_parent(next);
|
||||
|
||||
type = ovl_path_type(parent);
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
break;
|
||||
|
||||
dput(next);
|
||||
next = parent;
|
||||
}
|
||||
|
||||
ovl_path_lower(next, &lowerpath);
|
||||
err = vfs_getattr(&lowerpath, &stat);
|
||||
if (!err)
|
||||
err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
|
||||
|
||||
dput(parent);
|
||||
dput(next);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
972
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/dir.c
Normal file
972
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/dir.c
Normal file
@ -0,0 +1,972 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/cred.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
dget(wdentry);
|
||||
if (S_ISDIR(wdentry->d_inode->i_mode))
|
||||
err = ovl_do_rmdir(wdir, wdentry);
|
||||
else
|
||||
err = ovl_do_unlink(wdir, wdentry);
|
||||
dput(wdentry);
|
||||
|
||||
if (err) {
|
||||
pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
|
||||
wdentry, err);
|
||||
}
|
||||
}
|
||||
|
||||
struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
|
||||
{
|
||||
struct dentry *temp;
|
||||
char name[20];
|
||||
|
||||
snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
|
||||
|
||||
temp = lookup_one_len(name, workdir, strlen(name));
|
||||
if (!IS_ERR(temp) && temp->d_inode) {
|
||||
pr_err("overlayfs: workdir/%s already exists\n", name);
|
||||
dput(temp);
|
||||
temp = ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
/* caller holds i_mutex on workdir */
|
||||
static struct dentry *ovl_whiteout(struct dentry *workdir,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
struct dentry *whiteout;
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
|
||||
whiteout = ovl_lookup_temp(workdir, dentry);
|
||||
if (IS_ERR(whiteout))
|
||||
return whiteout;
|
||||
|
||||
err = ovl_do_whiteout(wdir, whiteout);
|
||||
if (err) {
|
||||
dput(whiteout);
|
||||
whiteout = ERR_PTR(err);
|
||||
}
|
||||
|
||||
return whiteout;
|
||||
}
|
||||
|
||||
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink, bool debug)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (newdentry->d_inode)
|
||||
return -ESTALE;
|
||||
|
||||
if (hardlink) {
|
||||
err = ovl_do_link(hardlink, dir, newdentry, debug);
|
||||
} else {
|
||||
switch (stat->mode & S_IFMT) {
|
||||
case S_IFREG:
|
||||
err = ovl_do_create(dir, newdentry, stat->mode, debug);
|
||||
break;
|
||||
|
||||
case S_IFDIR:
|
||||
err = ovl_do_mkdir(dir, newdentry, stat->mode, debug);
|
||||
break;
|
||||
|
||||
case S_IFCHR:
|
||||
case S_IFBLK:
|
||||
case S_IFIFO:
|
||||
case S_IFSOCK:
|
||||
err = ovl_do_mknod(dir, newdentry,
|
||||
stat->mode, stat->rdev, debug);
|
||||
break;
|
||||
|
||||
case S_IFLNK:
|
||||
err = ovl_do_symlink(dir, newdentry, link, debug);
|
||||
break;
|
||||
|
||||
default:
|
||||
err = -EPERM;
|
||||
}
|
||||
}
|
||||
if (!err && WARN_ON(!newdentry->d_inode)) {
|
||||
/*
|
||||
* Not quite sure if non-instantiated dentry is legal or not.
|
||||
* VFS doesn't seem to care so check and warn here.
|
||||
*/
|
||||
err = -ENOENT;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_set_opaque(struct dentry *upperdentry)
|
||||
{
|
||||
return ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
|
||||
}
|
||||
|
||||
static void ovl_remove_opaque(struct dentry *upperdentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = ovl_do_removexattr(upperdentry, OVL_XATTR_OPAQUE);
|
||||
if (err) {
|
||||
pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n",
|
||||
upperdentry->d_name.name, err);
|
||||
}
|
||||
}
|
||||
|
||||
static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
int err;
|
||||
enum ovl_path_type type;
|
||||
struct path realpath;
|
||||
|
||||
type = ovl_path_real(dentry, &realpath);
|
||||
err = vfs_getattr(&realpath, stat);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
stat->dev = dentry->d_sb->s_dev;
|
||||
stat->ino = dentry->d_inode->i_ino;
|
||||
|
||||
/*
|
||||
* It's probably not worth it to count subdirs to get the
|
||||
* correct link count. nlink=1 seems to pacify 'find' and
|
||||
* other utilities.
|
||||
*/
|
||||
if (OVL_TYPE_MERGE(type))
|
||||
stat->nlink = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink)
|
||||
{
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *newdentry;
|
||||
int err;
|
||||
|
||||
mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
|
||||
newdentry = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_unlock;
|
||||
err = ovl_create_real(udir, newdentry, stat, link, hardlink, false);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
ovl_copyattr(newdentry->d_inode, inode);
|
||||
d_instantiate(dentry, inode);
|
||||
newdentry = NULL;
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_unlock:
|
||||
mutex_unlock(&udir->i_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_lock_rename_workdir(struct dentry *workdir,
|
||||
struct dentry *upperdir)
|
||||
{
|
||||
/* Workdir should not be the same as upperdir */
|
||||
if (workdir == upperdir)
|
||||
goto err;
|
||||
|
||||
/* Workdir should not be subdir of upperdir and vice versa */
|
||||
if (lock_rename(workdir, upperdir) != NULL)
|
||||
goto err_unlock;
|
||||
|
||||
return 0;
|
||||
|
||||
err_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
err:
|
||||
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static struct dentry *ovl_clear_empty(struct dentry *dentry,
|
||||
struct list_head *list)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct path upperpath;
|
||||
struct dentry *upper;
|
||||
struct dentry *opaquedir;
|
||||
struct kstat stat;
|
||||
int err;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return ERR_PTR(-EROFS);
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
err = vfs_getattr(&upperpath, &stat);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if (!S_ISDIR(stat.mode))
|
||||
goto out_unlock;
|
||||
upper = upperpath.dentry;
|
||||
if (upper->d_parent->d_inode != udir)
|
||||
goto out_unlock;
|
||||
|
||||
opaquedir = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir))
|
||||
goto out_unlock;
|
||||
|
||||
err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
err = ovl_copy_xattr(upper, opaquedir);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_set_opaque(opaquedir);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
mutex_lock(&opaquedir->d_inode->i_mutex);
|
||||
err = ovl_set_attr(opaquedir, &stat);
|
||||
mutex_unlock(&opaquedir->d_inode->i_mutex);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_cleanup_whiteouts(upper, list);
|
||||
ovl_cleanup(wdir, upper);
|
||||
unlock_rename(workdir, upperdir);
|
||||
|
||||
/* dentry's upper doesn't match now, get rid of it */
|
||||
d_drop(dentry);
|
||||
|
||||
return opaquedir;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, opaquedir);
|
||||
out_dput:
|
||||
dput(opaquedir);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
struct dentry *ret = NULL;
|
||||
LIST_HEAD(list);
|
||||
|
||||
err = ovl_check_empty_dir(dentry, &list);
|
||||
if (err)
|
||||
ret = ERR_PTR(err);
|
||||
else {
|
||||
/*
|
||||
* If no upperdentry then skip clearing whiteouts.
|
||||
*
|
||||
* Can race with copy-up, since we don't hold the upperdir
|
||||
* mutex. Doesn't matter, since copy-up can't create a
|
||||
* non-empty directory from an empty one.
|
||||
*/
|
||||
if (ovl_dentry_upper(dentry))
|
||||
ret = ovl_clear_empty(dentry, &list);
|
||||
}
|
||||
|
||||
ovl_cache_free(&list);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *upper;
|
||||
struct dentry *newdentry;
|
||||
int err;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
newdentry = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_unlock;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out_dput;
|
||||
|
||||
err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true);
|
||||
if (err)
|
||||
goto out_dput2;
|
||||
|
||||
if (S_ISDIR(stat->mode)) {
|
||||
err = ovl_set_opaque(newdentry);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper,
|
||||
RENAME_EXCHANGE);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_cleanup(wdir, upper);
|
||||
} else {
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
}
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
ovl_copyattr(newdentry->d_inode, inode);
|
||||
d_instantiate(dentry, inode);
|
||||
newdentry = NULL;
|
||||
out_dput2:
|
||||
dput(upper);
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, newdentry);
|
||||
goto out_dput2;
|
||||
}
|
||||
|
||||
static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
|
||||
const char *link, struct dentry *hardlink)
|
||||
{
|
||||
int err;
|
||||
struct inode *inode;
|
||||
struct kstat stat = {
|
||||
.mode = mode,
|
||||
.rdev = rdev,
|
||||
};
|
||||
|
||||
err = -ENOMEM;
|
||||
inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
|
||||
if (!inode)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry->d_parent);
|
||||
if (err)
|
||||
goto out_iput;
|
||||
|
||||
if (!ovl_dentry_is_opaque(dentry)) {
|
||||
err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
|
||||
} else {
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_iput;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting opaque xattr
|
||||
* CAP_DAC_OVERRIDE for create in workdir, rename
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = ovl_create_over_whiteout(dentry, inode, &stat, link,
|
||||
hardlink);
|
||||
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
|
||||
if (!err)
|
||||
inode = NULL;
|
||||
out_iput:
|
||||
iput(inode);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
|
||||
const char *link)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (!err) {
|
||||
err = ovl_create_or_link(dentry, mode, rdev, link, NULL);
|
||||
ovl_drop_write(dentry);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
bool excl)
|
||||
{
|
||||
return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
|
||||
}
|
||||
|
||||
static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||
{
|
||||
return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
|
||||
}
|
||||
|
||||
static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
dev_t rdev)
|
||||
{
|
||||
/* Don't allow creation of "whiteout" on overlay */
|
||||
if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
|
||||
return -EPERM;
|
||||
|
||||
return ovl_create_object(dentry, mode, rdev, NULL);
|
||||
}
|
||||
|
||||
static int ovl_symlink(struct inode *dir, struct dentry *dentry,
|
||||
const char *link)
|
||||
{
|
||||
return ovl_create_object(dentry, S_IFLNK, 0, link);
|
||||
}
|
||||
|
||||
static int ovl_link(struct dentry *old, struct inode *newdir,
|
||||
struct dentry *new)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upper;
|
||||
|
||||
err = ovl_want_write(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(old);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
upper = ovl_dentry_upper(old);
|
||||
err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper);
|
||||
|
||||
out_drop_write:
|
||||
ovl_drop_write(old);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *whiteout;
|
||||
struct dentry *upper;
|
||||
struct dentry *opaquedir = NULL;
|
||||
int err;
|
||||
int flags = 0;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
if (is_dir) {
|
||||
if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
|
||||
opaquedir = ovl_check_empty_and_clear(dentry);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir))
|
||||
goto out;
|
||||
} else {
|
||||
LIST_HEAD(list);
|
||||
|
||||
/*
|
||||
* When removing an empty opaque directory, then it
|
||||
* makes no sense to replace it with an exact replica of
|
||||
* itself. But emptiness still needs to be checked.
|
||||
*/
|
||||
err = ovl_check_empty_dir(dentry, &list);
|
||||
ovl_cache_free(&list);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if ((opaquedir && upper != opaquedir) ||
|
||||
(!opaquedir && ovl_dentry_upper(dentry) &&
|
||||
upper != ovl_dentry_upper(dentry))) {
|
||||
goto out_dput_upper;
|
||||
}
|
||||
|
||||
whiteout = ovl_whiteout(workdir, dentry);
|
||||
err = PTR_ERR(whiteout);
|
||||
if (IS_ERR(whiteout))
|
||||
goto out_dput_upper;
|
||||
|
||||
if (d_is_dir(upper))
|
||||
flags = RENAME_EXCHANGE;
|
||||
|
||||
err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
|
||||
if (err)
|
||||
goto kill_whiteout;
|
||||
if (flags)
|
||||
ovl_cleanup(wdir, upper);
|
||||
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
out_d_drop:
|
||||
d_drop(dentry);
|
||||
dput(whiteout);
|
||||
out_dput_upper:
|
||||
dput(upper);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out_dput:
|
||||
dput(opaquedir);
|
||||
out:
|
||||
return err;
|
||||
|
||||
kill_whiteout:
|
||||
ovl_cleanup(wdir, whiteout);
|
||||
goto out_d_drop;
|
||||
}
|
||||
|
||||
static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *dir = upperdir->d_inode;
|
||||
struct dentry *upper;
|
||||
int err;
|
||||
|
||||
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if (upper == ovl_dentry_upper(dentry)) {
|
||||
if (is_dir)
|
||||
err = vfs_rmdir(dir, upper);
|
||||
else
|
||||
err = vfs_unlink(dir, upper, NULL);
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
}
|
||||
dput(upper);
|
||||
|
||||
/*
|
||||
* Keeping this dentry hashed would mean having to release
|
||||
* upperpath/lowerpath, which could only be done if we are the
|
||||
* sole user of this dentry. Too tricky... Just unhash for
|
||||
* now.
|
||||
*/
|
||||
if (!err)
|
||||
d_drop(dentry);
|
||||
out_unlock:
|
||||
mutex_unlock(&dir->i_mutex);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_check_sticky(struct dentry *dentry)
|
||||
{
|
||||
struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode;
|
||||
struct inode *inode = ovl_dentry_real(dentry)->d_inode;
|
||||
|
||||
if (check_sticky(dir, inode))
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
enum ovl_path_type type;
|
||||
int err;
|
||||
|
||||
err = ovl_check_sticky(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry->d_parent);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
type = ovl_path_type(dentry);
|
||||
if (OVL_TYPE_PURE_UPPER(type)) {
|
||||
err = ovl_remove_upper(dentry, is_dir);
|
||||
} else {
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_drop_write;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
|
||||
* CAP_DAC_OVERRIDE for create in workdir, rename
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
* CAP_FSETID for chmod of opaque dir
|
||||
* CAP_CHOWN for chown of opaque dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = ovl_remove_and_whiteout(dentry, is_dir);
|
||||
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
return ovl_do_remove(dentry, false);
|
||||
}
|
||||
|
||||
static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
return ovl_do_remove(dentry, true);
|
||||
}
|
||||
|
||||
static int ovl_rename2(struct inode *olddir, struct dentry *old,
|
||||
struct inode *newdir, struct dentry *new,
|
||||
unsigned int flags)
|
||||
{
|
||||
int err;
|
||||
enum ovl_path_type old_type;
|
||||
enum ovl_path_type new_type;
|
||||
struct dentry *old_upperdir;
|
||||
struct dentry *new_upperdir;
|
||||
struct dentry *olddentry;
|
||||
struct dentry *newdentry;
|
||||
struct dentry *trap;
|
||||
bool old_opaque;
|
||||
bool new_opaque;
|
||||
bool new_create = false;
|
||||
bool cleanup_whiteout = false;
|
||||
bool overwrite = !(flags & RENAME_EXCHANGE);
|
||||
bool is_dir = S_ISDIR(old->d_inode->i_mode);
|
||||
bool new_is_dir = false;
|
||||
struct dentry *opaquedir = NULL;
|
||||
const struct cred *old_cred = NULL;
|
||||
struct cred *override_cred = NULL;
|
||||
|
||||
err = -EINVAL;
|
||||
if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
|
||||
goto out;
|
||||
|
||||
flags &= ~RENAME_NOREPLACE;
|
||||
|
||||
err = ovl_check_sticky(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* Don't copy up directory trees */
|
||||
old_type = ovl_path_type(old);
|
||||
err = -EXDEV;
|
||||
if (OVL_TYPE_MERGE_OR_LOWER(old_type) && is_dir)
|
||||
goto out;
|
||||
|
||||
if (new->d_inode) {
|
||||
err = ovl_check_sticky(new);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (S_ISDIR(new->d_inode->i_mode))
|
||||
new_is_dir = true;
|
||||
|
||||
new_type = ovl_path_type(new);
|
||||
err = -EXDEV;
|
||||
if (!overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir)
|
||||
goto out;
|
||||
|
||||
err = 0;
|
||||
if (!OVL_TYPE_UPPER(new_type) && !OVL_TYPE_UPPER(old_type)) {
|
||||
if (ovl_dentry_lower(old)->d_inode ==
|
||||
ovl_dentry_lower(new)->d_inode)
|
||||
goto out;
|
||||
}
|
||||
if (OVL_TYPE_UPPER(new_type) && OVL_TYPE_UPPER(old_type)) {
|
||||
if (ovl_dentry_upper(old)->d_inode ==
|
||||
ovl_dentry_upper(new)->d_inode)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
if (ovl_dentry_is_opaque(new))
|
||||
new_type = __OVL_PATH_UPPER;
|
||||
else
|
||||
new_type = __OVL_PATH_UPPER | __OVL_PATH_PURE;
|
||||
}
|
||||
|
||||
err = ovl_want_write(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(old);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(new->d_parent);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
if (!overwrite) {
|
||||
err = ovl_copy_up(new);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
}
|
||||
|
||||
old_opaque = !OVL_TYPE_PURE_UPPER(old_type);
|
||||
new_opaque = !OVL_TYPE_PURE_UPPER(new_type);
|
||||
|
||||
if (old_opaque || new_opaque) {
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_drop_write;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
|
||||
* CAP_DAC_OVERRIDE for create in workdir
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
* CAP_FSETID for chmod of opaque dir
|
||||
* CAP_CHOWN for chown of opaque dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
old_cred = override_creds(override_cred);
|
||||
}
|
||||
|
||||
if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) {
|
||||
opaquedir = ovl_check_empty_and_clear(new);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir)) {
|
||||
opaquedir = NULL;
|
||||
goto out_revert_creds;
|
||||
}
|
||||
}
|
||||
|
||||
if (overwrite) {
|
||||
if (old_opaque) {
|
||||
if (new->d_inode || !new_opaque) {
|
||||
/* Whiteout source */
|
||||
flags |= RENAME_WHITEOUT;
|
||||
} else {
|
||||
/* Switch whiteouts */
|
||||
flags |= RENAME_EXCHANGE;
|
||||
}
|
||||
} else if (is_dir && !new->d_inode && new_opaque) {
|
||||
flags |= RENAME_EXCHANGE;
|
||||
cleanup_whiteout = true;
|
||||
}
|
||||
}
|
||||
|
||||
old_upperdir = ovl_dentry_upper(old->d_parent);
|
||||
new_upperdir = ovl_dentry_upper(new->d_parent);
|
||||
|
||||
trap = lock_rename(new_upperdir, old_upperdir);
|
||||
|
||||
|
||||
olddentry = lookup_one_len(old->d_name.name, old_upperdir,
|
||||
old->d_name.len);
|
||||
err = PTR_ERR(olddentry);
|
||||
if (IS_ERR(olddentry))
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if (olddentry != ovl_dentry_upper(old))
|
||||
goto out_dput_old;
|
||||
|
||||
newdentry = lookup_one_len(new->d_name.name, new_upperdir,
|
||||
new->d_name.len);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_dput_old;
|
||||
|
||||
err = -ESTALE;
|
||||
if (ovl_dentry_upper(new)) {
|
||||
if (opaquedir) {
|
||||
if (newdentry != opaquedir)
|
||||
goto out_dput;
|
||||
} else {
|
||||
if (newdentry != ovl_dentry_upper(new))
|
||||
goto out_dput;
|
||||
}
|
||||
} else {
|
||||
new_create = true;
|
||||
if (!d_is_negative(newdentry) &&
|
||||
(!new_opaque || !ovl_is_whiteout(newdentry)))
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (olddentry == trap)
|
||||
goto out_dput;
|
||||
if (newdentry == trap)
|
||||
goto out_dput;
|
||||
|
||||
if (is_dir && !old_opaque && new_opaque) {
|
||||
err = ovl_set_opaque(olddentry);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
}
|
||||
if (!overwrite && new_is_dir && old_opaque && !new_opaque) {
|
||||
err = ovl_set_opaque(newdentry);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (old_opaque || new_opaque) {
|
||||
err = ovl_do_rename(old_upperdir->d_inode, olddentry,
|
||||
new_upperdir->d_inode, newdentry,
|
||||
flags);
|
||||
} else {
|
||||
/* No debug for the plain case */
|
||||
BUG_ON(flags & ~RENAME_EXCHANGE);
|
||||
err = vfs_rename(old_upperdir->d_inode, olddentry,
|
||||
new_upperdir->d_inode, newdentry,
|
||||
NULL, flags);
|
||||
}
|
||||
|
||||
if (err) {
|
||||
if (is_dir && !old_opaque && new_opaque)
|
||||
ovl_remove_opaque(olddentry);
|
||||
if (!overwrite && new_is_dir && old_opaque && !new_opaque)
|
||||
ovl_remove_opaque(newdentry);
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (is_dir && old_opaque && !new_opaque)
|
||||
ovl_remove_opaque(olddentry);
|
||||
if (!overwrite && new_is_dir && !old_opaque && new_opaque)
|
||||
ovl_remove_opaque(newdentry);
|
||||
|
||||
if (old_opaque != new_opaque) {
|
||||
ovl_dentry_set_opaque(old, new_opaque);
|
||||
if (!overwrite)
|
||||
ovl_dentry_set_opaque(new, old_opaque);
|
||||
}
|
||||
|
||||
if (cleanup_whiteout)
|
||||
ovl_cleanup(old_upperdir->d_inode, newdentry);
|
||||
|
||||
ovl_dentry_version_inc(old->d_parent);
|
||||
ovl_dentry_version_inc(new->d_parent);
|
||||
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_dput_old:
|
||||
dput(olddentry);
|
||||
out_unlock:
|
||||
unlock_rename(new_upperdir, old_upperdir);
|
||||
out_revert_creds:
|
||||
if (old_opaque || new_opaque) {
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
out_drop_write:
|
||||
ovl_drop_write(old);
|
||||
out:
|
||||
dput(opaquedir);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_rename(struct inode *olddir, struct dentry *old,
|
||||
struct inode *newdir, struct dentry *new)
|
||||
{
|
||||
return ovl_rename2(olddir, old, newdir, new, 0);
|
||||
}
|
||||
|
||||
const struct inode_operations_wrapper ovl_dir_inode_operations = {
|
||||
.ops = {
|
||||
.lookup = ovl_lookup,
|
||||
.mkdir = ovl_mkdir,
|
||||
.symlink = ovl_symlink,
|
||||
.unlink = ovl_unlink,
|
||||
.rmdir = ovl_rmdir,
|
||||
.rename = ovl_rename,
|
||||
.link = ovl_link,
|
||||
.setattr = ovl_setattr,
|
||||
.create = ovl_create,
|
||||
.mknod = ovl_mknod,
|
||||
.permission = ovl_permission,
|
||||
.getattr = ovl_dir_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
},
|
||||
.rename2 = ovl_rename2,
|
||||
};
|
||||
442
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/inode.c
Normal file
442
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/inode.c
Normal file
@ -0,0 +1,442 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/xattr.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
|
||||
bool no_data)
|
||||
{
|
||||
int err;
|
||||
struct dentry *parent;
|
||||
struct kstat stat;
|
||||
struct path lowerpath;
|
||||
|
||||
parent = dget_parent(dentry);
|
||||
err = ovl_copy_up(parent);
|
||||
if (err)
|
||||
goto out_dput_parent;
|
||||
|
||||
ovl_path_lower(dentry, &lowerpath);
|
||||
err = vfs_getattr(&lowerpath, &stat);
|
||||
if (err)
|
||||
goto out_dput_parent;
|
||||
|
||||
if (no_data)
|
||||
stat.size = 0;
|
||||
|
||||
err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr);
|
||||
|
||||
out_dput_parent:
|
||||
dput(parent);
|
||||
return err;
|
||||
}
|
||||
|
||||
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upperdentry;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (!err) {
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
|
||||
mutex_lock(&upperdentry->d_inode->i_mutex);
|
||||
err = notify_change(upperdentry, attr, NULL);
|
||||
mutex_unlock(&upperdentry->d_inode->i_mutex);
|
||||
}
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
struct path realpath;
|
||||
|
||||
ovl_path_real(dentry, &realpath);
|
||||
return vfs_getattr(&realpath, stat);
|
||||
}
|
||||
|
||||
int ovl_permission(struct inode *inode, int mask)
|
||||
{
|
||||
struct ovl_entry *oe;
|
||||
struct dentry *alias = NULL;
|
||||
struct inode *realinode;
|
||||
struct dentry *realdentry;
|
||||
bool is_upper;
|
||||
int err;
|
||||
|
||||
if (S_ISDIR(inode->i_mode)) {
|
||||
oe = inode->i_private;
|
||||
} else if (mask & MAY_NOT_BLOCK) {
|
||||
return -ECHILD;
|
||||
} else {
|
||||
/*
|
||||
* For non-directories find an alias and get the info
|
||||
* from there.
|
||||
*/
|
||||
alias = d_find_any_alias(inode);
|
||||
if (WARN_ON(!alias))
|
||||
return -ENOENT;
|
||||
|
||||
oe = alias->d_fsdata;
|
||||
}
|
||||
|
||||
realdentry = ovl_entry_real(oe, &is_upper);
|
||||
|
||||
/* Careful in RCU walk mode */
|
||||
realinode = ACCESS_ONCE(realdentry->d_inode);
|
||||
if (!realinode) {
|
||||
WARN_ON(!(mask & MAY_NOT_BLOCK));
|
||||
err = -ENOENT;
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (mask & MAY_WRITE) {
|
||||
umode_t mode = realinode->i_mode;
|
||||
|
||||
/*
|
||||
* Writes will always be redirected to upper layer, so
|
||||
* ignore lower layer being read-only.
|
||||
*
|
||||
* If the overlay itself is read-only then proceed
|
||||
* with the permission check, don't return EROFS.
|
||||
* This will only happen if this is the lower layer of
|
||||
* another overlayfs.
|
||||
*
|
||||
* If upper fs becomes read-only after the overlay was
|
||||
* constructed return EROFS to prevent modification of
|
||||
* upper layer.
|
||||
*/
|
||||
err = -EROFS;
|
||||
if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
|
||||
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
err = __inode_permission(realinode, mask);
|
||||
out_dput:
|
||||
dput(alias);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
struct ovl_link_data {
|
||||
struct dentry *realdentry;
|
||||
void *cookie;
|
||||
};
|
||||
|
||||
static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
{
|
||||
void *ret;
|
||||
struct dentry *realdentry;
|
||||
struct inode *realinode;
|
||||
struct ovl_link_data *data = NULL;
|
||||
|
||||
realdentry = ovl_dentry_real(dentry);
|
||||
realinode = realdentry->d_inode;
|
||||
|
||||
if (WARN_ON(!realinode->i_op->follow_link))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
if (realinode->i_op->put_link) {
|
||||
data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
|
||||
if (!data)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
data->realdentry = realdentry;
|
||||
}
|
||||
|
||||
ret = realinode->i_op->follow_link(realdentry, nd);
|
||||
if (IS_ERR(ret)) {
|
||||
kfree(data);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (data)
|
||||
data->cookie = ret;
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
|
||||
{
|
||||
struct inode *realinode;
|
||||
struct ovl_link_data *data = c;
|
||||
|
||||
if (!data)
|
||||
return;
|
||||
|
||||
realinode = data->realdentry->d_inode;
|
||||
realinode->i_op->put_link(data->realdentry, nd, data->cookie);
|
||||
kfree(data);
|
||||
}
|
||||
|
||||
static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
|
||||
{
|
||||
struct path realpath;
|
||||
struct inode *realinode;
|
||||
|
||||
ovl_path_real(dentry, &realpath);
|
||||
realinode = realpath.dentry->d_inode;
|
||||
|
||||
if (!realinode->i_op->readlink)
|
||||
return -EINVAL;
|
||||
|
||||
touch_atime(&realpath);
|
||||
|
||||
return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
|
||||
}
|
||||
|
||||
|
||||
static bool ovl_is_private_xattr(const char *name)
|
||||
{
|
||||
return strncmp(name, OVL_XATTR_PRE_NAME, OVL_XATTR_PRE_LEN) == 0;
|
||||
}
|
||||
|
||||
int ovl_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upperdentry;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = -EPERM;
|
||||
if (ovl_is_private_xattr(name))
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
err = vfs_setxattr(upperdentry, name, value, size, flags);
|
||||
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool ovl_need_xattr_filter(struct dentry *dentry,
|
||||
enum ovl_path_type type)
|
||||
{
|
||||
if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER)
|
||||
return S_ISDIR(dentry->d_inode->i_mode);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size)
|
||||
{
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
|
||||
if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
|
||||
return -ENODATA;
|
||||
|
||||
return vfs_getxattr(realpath.dentry, name, value, size);
|
||||
}
|
||||
|
||||
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
|
||||
{
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
ssize_t res;
|
||||
int off;
|
||||
|
||||
res = vfs_listxattr(realpath.dentry, list, size);
|
||||
if (res <= 0 || size == 0)
|
||||
return res;
|
||||
|
||||
if (!ovl_need_xattr_filter(dentry, type))
|
||||
return res;
|
||||
|
||||
/* filter out private xattrs */
|
||||
for (off = 0; off < res;) {
|
||||
char *s = list + off;
|
||||
size_t slen = strlen(s) + 1;
|
||||
|
||||
BUG_ON(off + slen > res);
|
||||
|
||||
if (ovl_is_private_xattr(s)) {
|
||||
res -= slen;
|
||||
memmove(s, s + slen, res - off);
|
||||
} else {
|
||||
off += slen;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
int ovl_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = -ENODATA;
|
||||
if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
|
||||
goto out_drop_write;
|
||||
|
||||
if (!OVL_TYPE_UPPER(type)) {
|
||||
err = vfs_getxattr(realpath.dentry, name, NULL, 0);
|
||||
if (err < 0)
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
ovl_path_upper(dentry, &realpath);
|
||||
}
|
||||
|
||||
err = vfs_removexattr(realpath.dentry, name);
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
|
||||
struct dentry *realdentry)
|
||||
{
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
return false;
|
||||
|
||||
if (special_file(realdentry->d_inode->i_mode))
|
||||
return false;
|
||||
|
||||
if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int ovl_dentry_open(struct dentry *dentry, struct file *file,
|
||||
const struct cred *cred)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
enum ovl_path_type type;
|
||||
bool want_write = false;
|
||||
|
||||
type = ovl_path_real(dentry, &realpath);
|
||||
if (!ovl_is_nocopyupw(dentry)) {
|
||||
if (ovl_open_need_copy_up(file->f_flags, type,
|
||||
realpath.dentry)) {
|
||||
want_write = true;
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (file->f_flags & O_TRUNC)
|
||||
err = ovl_copy_up_last(dentry, NULL, true);
|
||||
else
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
ovl_path_upper(dentry, &realpath);
|
||||
}
|
||||
}
|
||||
|
||||
err = vfs_open(&realpath, file, cred);
|
||||
out_drop_write:
|
||||
if (want_write)
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static const struct inode_operations_wrapper ovl_file_inode_operations = {
|
||||
.ops = {
|
||||
.setattr = ovl_setattr,
|
||||
.permission = ovl_permission,
|
||||
.getattr = ovl_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
},
|
||||
.dentry_open = ovl_dentry_open,
|
||||
};
|
||||
|
||||
static const struct inode_operations ovl_symlink_inode_operations = {
|
||||
.setattr = ovl_setattr,
|
||||
.follow_link = ovl_follow_link,
|
||||
.put_link = ovl_put_link,
|
||||
.readlink = ovl_readlink,
|
||||
.getattr = ovl_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
};
|
||||
|
||||
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
|
||||
struct ovl_entry *oe)
|
||||
{
|
||||
struct inode *inode;
|
||||
|
||||
inode = new_inode(sb);
|
||||
if (!inode)
|
||||
return NULL;
|
||||
|
||||
mode &= S_IFMT;
|
||||
|
||||
inode->i_ino = get_next_ino();
|
||||
inode->i_mode = mode;
|
||||
inode->i_flags |= S_NOATIME | S_NOCMTIME;
|
||||
|
||||
switch (mode) {
|
||||
case S_IFDIR:
|
||||
inode->i_private = oe;
|
||||
inode->i_op = &ovl_dir_inode_operations.ops;
|
||||
inode->i_fop = &ovl_dir_operations;
|
||||
inode->i_flags |= S_IOPS_WRAPPER;
|
||||
break;
|
||||
|
||||
case S_IFLNK:
|
||||
inode->i_op = &ovl_symlink_inode_operations;
|
||||
break;
|
||||
|
||||
case S_IFREG:
|
||||
case S_IFSOCK:
|
||||
case S_IFBLK:
|
||||
case S_IFCHR:
|
||||
case S_IFIFO:
|
||||
inode->i_op = &ovl_file_inode_operations.ops;
|
||||
inode->i_flags |= S_IOPS_WRAPPER;
|
||||
break;
|
||||
|
||||
default:
|
||||
WARN(1, "illegal file type: %i\n", mode);
|
||||
iput(inode);
|
||||
inode = NULL;
|
||||
}
|
||||
|
||||
return inode;
|
||||
}
|
||||
@ -0,0 +1,200 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
|
||||
struct ovl_entry;
|
||||
|
||||
enum ovl_path_type {
|
||||
__OVL_PATH_PURE = (1 << 0),
|
||||
__OVL_PATH_UPPER = (1 << 1),
|
||||
__OVL_PATH_MERGE = (1 << 2),
|
||||
};
|
||||
|
||||
#define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER)
|
||||
#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE)
|
||||
#define OVL_TYPE_PURE_UPPER(type) ((type) & __OVL_PATH_PURE)
|
||||
#define OVL_TYPE_MERGE_OR_LOWER(type) \
|
||||
(OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type))
|
||||
|
||||
#define OVL_XATTR_PRE_NAME "trusted.overlay."
|
||||
#define OVL_XATTR_PRE_LEN 16
|
||||
#define OVL_XATTR_OPAQUE OVL_XATTR_PRE_NAME"opaque"
|
||||
|
||||
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_rmdir(dir, dentry);
|
||||
pr_debug("rmdir(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_unlink(dir, dentry, NULL);
|
||||
pr_debug("unlink(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
|
||||
struct dentry *new_dentry, bool debug)
|
||||
{
|
||||
int err = vfs_link(old_dentry, dir, new_dentry, NULL);
|
||||
if (debug) {
|
||||
pr_debug("link(%pd2, %pd2) = %i\n",
|
||||
old_dentry, new_dentry, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, bool debug)
|
||||
{
|
||||
int err = vfs_create(dir, dentry, mode, true);
|
||||
if (debug)
|
||||
pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, bool debug)
|
||||
{
|
||||
int err = vfs_mkdir(dir, dentry, mode);
|
||||
if (debug)
|
||||
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, dev_t dev, bool debug)
|
||||
{
|
||||
int err = vfs_mknod(dir, dentry, mode, dev);
|
||||
if (debug) {
|
||||
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n",
|
||||
dentry, mode, dev, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
|
||||
const char *oldname, bool debug)
|
||||
{
|
||||
int err = vfs_symlink(dir, dentry, oldname);
|
||||
if (debug)
|
||||
pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
int err = vfs_setxattr(dentry, name, value, size, flags);
|
||||
pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n",
|
||||
dentry, name, (int) size, (char *) value, flags, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
int err = vfs_removexattr(dentry, name);
|
||||
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
|
||||
struct inode *newdir, struct dentry *newdentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
int err;
|
||||
|
||||
pr_debug("rename2(%pd2, %pd2, 0x%x)\n",
|
||||
olddentry, newdentry, flags);
|
||||
|
||||
err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
|
||||
|
||||
if (err) {
|
||||
pr_debug("...rename2(%pd2, %pd2, ...) = %i\n",
|
||||
olddentry, newdentry, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_whiteout(dir, dentry);
|
||||
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
bool ovl_is_nocopyupw(struct dentry *dentry);
|
||||
enum ovl_path_type ovl_path_type(struct dentry *dentry);
|
||||
u64 ovl_dentry_version_get(struct dentry *dentry);
|
||||
void ovl_dentry_version_inc(struct dentry *dentry);
|
||||
void ovl_path_upper(struct dentry *dentry, struct path *path);
|
||||
void ovl_path_lower(struct dentry *dentry, struct path *path);
|
||||
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
|
||||
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
|
||||
struct dentry *ovl_dentry_upper(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_lower(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_real(struct dentry *dentry);
|
||||
struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
|
||||
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
|
||||
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
|
||||
struct dentry *ovl_workdir(struct dentry *dentry);
|
||||
int ovl_want_write(struct dentry *dentry);
|
||||
void ovl_drop_write(struct dentry *dentry);
|
||||
bool ovl_dentry_is_opaque(struct dentry *dentry);
|
||||
void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
|
||||
bool ovl_is_whiteout(struct dentry *dentry);
|
||||
void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
|
||||
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
unsigned int flags);
|
||||
struct file *ovl_path_open(struct path *path, int flags);
|
||||
|
||||
struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
|
||||
struct kstat *stat, const char *link);
|
||||
|
||||
/* readdir.c */
|
||||
extern const struct file_operations ovl_dir_operations;
|
||||
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
|
||||
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
|
||||
void ovl_cache_free(struct list_head *list);
|
||||
|
||||
/* inode.c */
|
||||
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
|
||||
int ovl_permission(struct inode *inode, int mask);
|
||||
int ovl_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags);
|
||||
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size);
|
||||
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
|
||||
int ovl_removexattr(struct dentry *dentry, const char *name);
|
||||
|
||||
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
|
||||
struct ovl_entry *oe);
|
||||
static inline void ovl_copyattr(struct inode *from, struct inode *to)
|
||||
{
|
||||
to->i_uid = from->i_uid;
|
||||
to->i_gid = from->i_gid;
|
||||
}
|
||||
|
||||
/* dir.c */
|
||||
extern const struct inode_operations_wrapper ovl_dir_inode_operations;
|
||||
struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
|
||||
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink, bool debug);
|
||||
void ovl_cleanup(struct inode *dir, struct dentry *dentry);
|
||||
|
||||
/* copy_up.c */
|
||||
int ovl_copy_up(struct dentry *dentry);
|
||||
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
struct path *lowerpath, struct kstat *stat,
|
||||
struct iattr *attr);
|
||||
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
|
||||
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
|
||||
588
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/readdir.c
Normal file
588
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/readdir.c
Normal file
@ -0,0 +1,588 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/cred.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
struct ovl_cache_entry {
|
||||
unsigned int len;
|
||||
unsigned int type;
|
||||
u64 ino;
|
||||
struct list_head l_node;
|
||||
struct rb_node node;
|
||||
struct ovl_cache_entry *next_maybe_whiteout;
|
||||
bool is_whiteout;
|
||||
char name[];
|
||||
};
|
||||
|
||||
struct ovl_dir_cache {
|
||||
long refcount;
|
||||
u64 version;
|
||||
struct list_head entries;
|
||||
};
|
||||
|
||||
struct dir_context {
|
||||
const filldir_t actor;
|
||||
//loff_t pos;
|
||||
};
|
||||
|
||||
struct ovl_readdir_data {
|
||||
struct dir_context ctx;
|
||||
bool is_merge;
|
||||
struct rb_root root;
|
||||
struct list_head *list;
|
||||
struct list_head middle;
|
||||
struct ovl_cache_entry *first_maybe_whiteout;
|
||||
int count;
|
||||
int err;
|
||||
};
|
||||
|
||||
struct ovl_dir_file {
|
||||
bool is_real;
|
||||
bool is_upper;
|
||||
struct ovl_dir_cache *cache;
|
||||
struct list_head *cursor;
|
||||
struct file *realfile;
|
||||
struct file *upperfile;
|
||||
};
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
|
||||
{
|
||||
return container_of(n, struct ovl_cache_entry, node);
|
||||
}
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
|
||||
const char *name, int len)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
int cmp;
|
||||
|
||||
while (node) {
|
||||
struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
|
||||
|
||||
cmp = strncmp(name, p->name, len);
|
||||
if (cmp > 0)
|
||||
node = p->node.rb_right;
|
||||
else if (cmp < 0 || len < p->len)
|
||||
node = p->node.rb_left;
|
||||
else
|
||||
return p;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
|
||||
const char *name, int len,
|
||||
u64 ino, unsigned int d_type)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
|
||||
|
||||
p = kmalloc(size, GFP_KERNEL);
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
memcpy(p->name, name, len);
|
||||
p->name[len] = '\0';
|
||||
p->len = len;
|
||||
p->type = d_type;
|
||||
p->ino = ino;
|
||||
p->is_whiteout = false;
|
||||
|
||||
if (d_type == DT_CHR) {
|
||||
p->next_maybe_whiteout = rdd->first_maybe_whiteout;
|
||||
rdd->first_maybe_whiteout = p;
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
|
||||
const char *name, int len, u64 ino,
|
||||
unsigned int d_type)
|
||||
{
|
||||
struct rb_node **newp = &rdd->root.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
while (*newp) {
|
||||
int cmp;
|
||||
struct ovl_cache_entry *tmp;
|
||||
|
||||
parent = *newp;
|
||||
tmp = ovl_cache_entry_from_node(*newp);
|
||||
cmp = strncmp(name, tmp->name, len);
|
||||
if (cmp > 0)
|
||||
newp = &tmp->node.rb_right;
|
||||
else if (cmp < 0 || len < tmp->len)
|
||||
newp = &tmp->node.rb_left;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
p = ovl_cache_entry_new(rdd, name, len, ino, d_type);
|
||||
if (p == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
list_add_tail(&p->l_node, rdd->list);
|
||||
rb_link_node(&p->node, parent, newp);
|
||||
rb_insert_color(&p->node, &rdd->root);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_fill_lower(struct ovl_readdir_data *rdd,
|
||||
const char *name, int namelen,
|
||||
loff_t offset, u64 ino, unsigned int d_type)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
p = ovl_cache_entry_find(&rdd->root, name, namelen);
|
||||
if (p) {
|
||||
list_move_tail(&p->l_node, &rdd->middle);
|
||||
} else {
|
||||
p = ovl_cache_entry_new(rdd, name, namelen, ino, d_type);
|
||||
if (p == NULL)
|
||||
rdd->err = -ENOMEM;
|
||||
else
|
||||
list_add_tail(&p->l_node, &rdd->middle);
|
||||
}
|
||||
|
||||
return rdd->err;
|
||||
}
|
||||
|
||||
void ovl_cache_free(struct list_head *list)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
struct ovl_cache_entry *n;
|
||||
|
||||
list_for_each_entry_safe(p, n, list, l_node)
|
||||
kfree(p);
|
||||
|
||||
INIT_LIST_HEAD(list);
|
||||
}
|
||||
|
||||
static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
|
||||
{
|
||||
struct ovl_dir_cache *cache = od->cache;
|
||||
|
||||
WARN_ON(cache->refcount <= 0);
|
||||
cache->refcount--;
|
||||
if (!cache->refcount) {
|
||||
if (ovl_dir_cache(dentry) == cache)
|
||||
ovl_set_dir_cache(dentry, NULL);
|
||||
|
||||
ovl_cache_free(&cache->entries);
|
||||
kfree(cache);
|
||||
}
|
||||
}
|
||||
|
||||
static int ovl_fill_merge(void *buf, const char *name, int namelen,
|
||||
loff_t offset, u64 ino, unsigned int d_type)
|
||||
{
|
||||
struct dir_context *ctx = buf;
|
||||
struct ovl_readdir_data *rdd =
|
||||
container_of(ctx, struct ovl_readdir_data, ctx);
|
||||
|
||||
rdd->count++;
|
||||
if (!rdd->is_merge)
|
||||
return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
|
||||
else
|
||||
return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
|
||||
}
|
||||
|
||||
static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
|
||||
{
|
||||
int err;
|
||||
struct ovl_cache_entry *p;
|
||||
struct dentry *dentry;
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
return -ENOMEM;
|
||||
|
||||
/*
|
||||
* CAP_DAC_OVERRIDE for lookup
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = mutex_lock_killable(&dir->d_inode->i_mutex);
|
||||
if (!err) {
|
||||
while (rdd->first_maybe_whiteout) {
|
||||
p = rdd->first_maybe_whiteout;
|
||||
rdd->first_maybe_whiteout = p->next_maybe_whiteout;
|
||||
dentry = lookup_one_len(p->name, dir, p->len);
|
||||
if (!IS_ERR(dentry)) {
|
||||
p->is_whiteout = ovl_is_whiteout(dentry);
|
||||
dput(dentry);
|
||||
}
|
||||
}
|
||||
mutex_unlock(&dir->d_inode->i_mutex);
|
||||
}
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_dir_read(struct path *realpath,
|
||||
struct ovl_readdir_data *rdd)
|
||||
{
|
||||
struct file *realfile;
|
||||
int err;
|
||||
|
||||
realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
|
||||
if (IS_ERR(realfile))
|
||||
return PTR_ERR(realfile);
|
||||
|
||||
rdd->first_maybe_whiteout = NULL;
|
||||
//rdd->ctx.pos = 0;
|
||||
do {
|
||||
rdd->count = 0;
|
||||
rdd->err = 0;
|
||||
err = vfs_readdir(realfile, rdd->ctx.actor, rdd);
|
||||
if (err >= 0)
|
||||
err = rdd->err;
|
||||
} while (!err && rdd->count);
|
||||
|
||||
if (!err && rdd->first_maybe_whiteout)
|
||||
err = ovl_check_whiteouts(realpath->dentry, rdd);
|
||||
|
||||
fput(realfile);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ovl_dir_reset(struct file *file)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct ovl_dir_cache *cache = od->cache;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
enum ovl_path_type type = ovl_path_type(dentry);
|
||||
|
||||
if (cache && ovl_dentry_version_get(dentry) != cache->version) {
|
||||
ovl_cache_put(od, dentry);
|
||||
od->cache = NULL;
|
||||
od->cursor = NULL;
|
||||
}
|
||||
WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
|
||||
if (od->is_real && OVL_TYPE_MERGE(type))
|
||||
od->is_real = false;
|
||||
}
|
||||
|
||||
static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
struct ovl_readdir_data rdd = {
|
||||
.ctx.actor = ovl_fill_merge,
|
||||
.list = list,
|
||||
.root = RB_ROOT,
|
||||
.is_merge = false,
|
||||
};
|
||||
int idx, next;
|
||||
|
||||
for (idx = 0; idx != -1; idx = next) {
|
||||
next = ovl_path_next(idx, dentry, &realpath);
|
||||
|
||||
if (next != -1) {
|
||||
err = ovl_dir_read(&realpath, &rdd);
|
||||
if (err)
|
||||
break;
|
||||
} else {
|
||||
/*
|
||||
* Insert lowest layer entries before upper ones, this
|
||||
* allows offsets to be reasonably constant
|
||||
*/
|
||||
list_add(&rdd.middle, rdd.list);
|
||||
rdd.is_merge = true;
|
||||
err = ovl_dir_read(&realpath, &rdd);
|
||||
list_del(&rdd.middle);
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
|
||||
{
|
||||
struct list_head *p;
|
||||
loff_t off = 0;
|
||||
|
||||
list_for_each(p, &od->cache->entries) {
|
||||
if (off >= pos)
|
||||
break;
|
||||
off++;
|
||||
}
|
||||
/* Cursor is safe since the cache is stable */
|
||||
od->cursor = p;
|
||||
}
|
||||
|
||||
static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
|
||||
{
|
||||
int res;
|
||||
struct ovl_dir_cache *cache;
|
||||
|
||||
cache = ovl_dir_cache(dentry);
|
||||
if (cache && ovl_dentry_version_get(dentry) == cache->version) {
|
||||
cache->refcount++;
|
||||
return cache;
|
||||
}
|
||||
ovl_set_dir_cache(dentry, NULL);
|
||||
|
||||
cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
|
||||
if (!cache)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
cache->refcount = 1;
|
||||
INIT_LIST_HEAD(&cache->entries);
|
||||
|
||||
res = ovl_dir_read_merged(dentry, &cache->entries);
|
||||
if (res) {
|
||||
ovl_cache_free(&cache->entries);
|
||||
kfree(cache);
|
||||
return ERR_PTR(res);
|
||||
}
|
||||
|
||||
cache->version = ovl_dentry_version_get(dentry);
|
||||
ovl_set_dir_cache(dentry, cache);
|
||||
|
||||
return cache;
|
||||
}
|
||||
|
||||
static int ovl_readdir(struct file *file, void *buf, filldir_t filler)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct ovl_cache_entry *p;
|
||||
int res;
|
||||
|
||||
if (!file->f_pos)
|
||||
ovl_dir_reset(file);
|
||||
|
||||
if (od->is_real) {
|
||||
res = vfs_readdir(od->realfile, filler, buf);
|
||||
file->f_pos = od->realfile->f_pos;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
if (!od->cache) {
|
||||
struct ovl_dir_cache *cache;
|
||||
|
||||
cache = ovl_cache_get(dentry);
|
||||
if (IS_ERR(cache))
|
||||
return PTR_ERR(cache);
|
||||
|
||||
od->cache = cache;
|
||||
ovl_seek_cursor(od, file->f_pos);
|
||||
}
|
||||
|
||||
while (od->cursor != &od->cache->entries) {
|
||||
p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
|
||||
if (!p->is_whiteout)
|
||||
if (filler(buf, p->name, p->len, file->f_pos, p->ino, p->type))
|
||||
break;
|
||||
od->cursor = p->l_node.next;
|
||||
file->f_pos++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
|
||||
{
|
||||
loff_t res;
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
|
||||
mutex_lock(&file_inode(file)->i_mutex);
|
||||
if (!file->f_pos)
|
||||
ovl_dir_reset(file);
|
||||
|
||||
if (od->is_real) {
|
||||
res = vfs_llseek(od->realfile, offset, origin);
|
||||
file->f_pos = od->realfile->f_pos;
|
||||
} else {
|
||||
res = -EINVAL;
|
||||
|
||||
switch (origin) {
|
||||
case SEEK_CUR:
|
||||
offset += file->f_pos;
|
||||
break;
|
||||
case SEEK_SET:
|
||||
break;
|
||||
default:
|
||||
goto out_unlock;
|
||||
}
|
||||
if (offset < 0)
|
||||
goto out_unlock;
|
||||
|
||||
if (offset != file->f_pos) {
|
||||
file->f_pos = offset;
|
||||
if (od->cache)
|
||||
ovl_seek_cursor(od, offset);
|
||||
}
|
||||
res = offset;
|
||||
}
|
||||
out_unlock:
|
||||
mutex_unlock(&file_inode(file)->i_mutex);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
|
||||
int datasync)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct file *realfile = od->realfile;
|
||||
|
||||
/*
|
||||
* Need to check if we started out being a lower dir, but got copied up
|
||||
*/
|
||||
if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
|
||||
struct inode *inode = file_inode(file);
|
||||
|
||||
realfile = lockless_dereference(od->upperfile);
|
||||
if (!realfile) {
|
||||
struct path upperpath;
|
||||
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
realfile = ovl_path_open(&upperpath, O_RDONLY);
|
||||
smp_mb__before_spinlock();
|
||||
mutex_lock(&inode->i_mutex);
|
||||
if (!od->upperfile) {
|
||||
if (IS_ERR(realfile)) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return PTR_ERR(realfile);
|
||||
}
|
||||
od->upperfile = realfile;
|
||||
} else {
|
||||
/* somebody has beaten us to it */
|
||||
if (!IS_ERR(realfile))
|
||||
fput(realfile);
|
||||
realfile = od->upperfile;
|
||||
}
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
return vfs_fsync_range(realfile, start, end, datasync);
|
||||
}
|
||||
|
||||
static int ovl_dir_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
|
||||
if (od->cache) {
|
||||
mutex_lock(&inode->i_mutex);
|
||||
ovl_cache_put(od, file->f_path.dentry);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
fput(od->realfile);
|
||||
if (od->upperfile)
|
||||
fput(od->upperfile);
|
||||
kfree(od);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_dir_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct path realpath;
|
||||
struct file *realfile;
|
||||
struct ovl_dir_file *od;
|
||||
enum ovl_path_type type;
|
||||
|
||||
od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
|
||||
if (!od)
|
||||
return -ENOMEM;
|
||||
|
||||
type = ovl_path_real(file->f_path.dentry, &realpath);
|
||||
realfile = ovl_path_open(&realpath, file->f_flags);
|
||||
if (IS_ERR(realfile)) {
|
||||
kfree(od);
|
||||
return PTR_ERR(realfile);
|
||||
}
|
||||
od->realfile = realfile;
|
||||
od->is_real = !OVL_TYPE_MERGE(type);
|
||||
od->is_upper = OVL_TYPE_UPPER(type);
|
||||
file->private_data = od;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct file_operations ovl_dir_operations = {
|
||||
.read = generic_read_dir,
|
||||
.open = ovl_dir_open,
|
||||
.readdir = ovl_readdir,
|
||||
.llseek = ovl_dir_llseek,
|
||||
.fsync = ovl_dir_fsync,
|
||||
.release = ovl_dir_release,
|
||||
};
|
||||
|
||||
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
|
||||
{
|
||||
int err;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
err = ovl_dir_read_merged(dentry, list);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = 0;
|
||||
|
||||
list_for_each_entry(p, list, l_node) {
|
||||
if (p->is_whiteout)
|
||||
continue;
|
||||
|
||||
if (p->name[0] == '.') {
|
||||
if (p->len == 1)
|
||||
continue;
|
||||
if (p->len == 2 && p->name[1] == '.')
|
||||
continue;
|
||||
}
|
||||
err = -ENOTEMPTY;
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
|
||||
list_for_each_entry(p, list, l_node) {
|
||||
struct dentry *dentry;
|
||||
|
||||
if (!p->is_whiteout)
|
||||
continue;
|
||||
|
||||
dentry = lookup_one_len(p->name, upper, p->len);
|
||||
if (IS_ERR(dentry)) {
|
||||
pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
|
||||
upper->d_name.name, p->len, p->name,
|
||||
(int) PTR_ERR(dentry));
|
||||
continue;
|
||||
}
|
||||
ovl_cleanup(upper->d_inode, dentry);
|
||||
dput(dentry);
|
||||
}
|
||||
mutex_unlock(&upper->d_inode->i_mutex);
|
||||
}
|
||||
1203
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/super.c
Normal file
1203
executer/kernel/mcoverlayfs/linux-3.10.0-327.36.1.el7/super.c
Normal file
File diff suppressed because it is too large
Load Diff
21
executer/kernel/mcoverlayfs/linux-4.0.9/Makefile.in
Normal file
21
executer/kernel/mcoverlayfs/linux-4.0.9/Makefile.in
Normal file
@ -0,0 +1,21 @@
|
||||
KDIR ?= @KDIR@
|
||||
ARCH ?= @ARCH@
|
||||
KMODDIR = @KMODDIR@
|
||||
src = @abs_srcdir@
|
||||
|
||||
obj-m += mcoverlay.o
|
||||
|
||||
mcoverlay-y := copy_up.o dir.o inode.o readdir.o super.o
|
||||
|
||||
.PHONY: clean install modules
|
||||
|
||||
modules:
|
||||
$(MAKE) -C $(KDIR) M=$(PWD) SUBDIRS=$(PWD) ARCH=$(ARCH) modules
|
||||
|
||||
clean:
|
||||
$(RM) .*.cmd *.mod.c *.o *.ko* Module.symvers modules.order -r .tmp*
|
||||
|
||||
install:
|
||||
mkdir -p -m 755 $(KMODDIR)
|
||||
install -m 644 mcoverlay.ko $(KMODDIR)
|
||||
|
||||
416
executer/kernel/mcoverlayfs/linux-4.0.9/copy_up.c
Normal file
416
executer/kernel/mcoverlayfs/linux-4.0.9/copy_up.c
Normal file
@ -0,0 +1,416 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/splice.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/namei.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
|
||||
|
||||
int ovl_copy_xattr(struct dentry *old, struct dentry *new)
|
||||
{
|
||||
ssize_t list_size, size;
|
||||
char *buf, *name, *value;
|
||||
int error;
|
||||
|
||||
if (!old->d_inode->i_op->getxattr ||
|
||||
!new->d_inode->i_op->getxattr)
|
||||
return 0;
|
||||
|
||||
list_size = vfs_listxattr(old, NULL, 0);
|
||||
if (list_size <= 0) {
|
||||
if (list_size == -EOPNOTSUPP)
|
||||
return 0;
|
||||
return list_size;
|
||||
}
|
||||
|
||||
buf = kzalloc(list_size, GFP_KERNEL);
|
||||
if (!buf)
|
||||
return -ENOMEM;
|
||||
|
||||
error = -ENOMEM;
|
||||
value = kmalloc(XATTR_SIZE_MAX, GFP_KERNEL);
|
||||
if (!value)
|
||||
goto out;
|
||||
|
||||
list_size = vfs_listxattr(old, buf, list_size);
|
||||
if (list_size <= 0) {
|
||||
error = list_size;
|
||||
goto out_free_value;
|
||||
}
|
||||
|
||||
for (name = buf; name < (buf + list_size); name += strlen(name) + 1) {
|
||||
size = vfs_getxattr(old, name, value, XATTR_SIZE_MAX);
|
||||
if (size <= 0) {
|
||||
error = size;
|
||||
goto out_free_value;
|
||||
}
|
||||
error = vfs_setxattr(new, name, value, size, 0);
|
||||
if (error)
|
||||
goto out_free_value;
|
||||
}
|
||||
|
||||
out_free_value:
|
||||
kfree(value);
|
||||
out:
|
||||
kfree(buf);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int ovl_copy_up_data(struct path *old, struct path *new, loff_t len)
|
||||
{
|
||||
struct file *old_file;
|
||||
struct file *new_file;
|
||||
loff_t old_pos = 0;
|
||||
loff_t new_pos = 0;
|
||||
int error = 0;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
old_file = ovl_path_open(old, O_RDONLY);
|
||||
if (IS_ERR(old_file))
|
||||
return PTR_ERR(old_file);
|
||||
|
||||
new_file = ovl_path_open(new, O_WRONLY);
|
||||
if (IS_ERR(new_file)) {
|
||||
error = PTR_ERR(new_file);
|
||||
goto out_fput;
|
||||
}
|
||||
|
||||
/* FIXME: copy up sparse files efficiently */
|
||||
while (len) {
|
||||
size_t this_len = OVL_COPY_UP_CHUNK_SIZE;
|
||||
long bytes;
|
||||
|
||||
if (len < this_len)
|
||||
this_len = len;
|
||||
|
||||
if (signal_pending_state(TASK_KILLABLE, current)) {
|
||||
error = -EINTR;
|
||||
break;
|
||||
}
|
||||
|
||||
bytes = do_splice_direct(old_file, &old_pos,
|
||||
new_file, &new_pos,
|
||||
this_len, SPLICE_F_MOVE);
|
||||
if (bytes <= 0) {
|
||||
error = bytes;
|
||||
break;
|
||||
}
|
||||
WARN_ON(old_pos != new_pos);
|
||||
|
||||
len -= bytes;
|
||||
}
|
||||
|
||||
fput(new_file);
|
||||
out_fput:
|
||||
fput(old_file);
|
||||
return error;
|
||||
}
|
||||
|
||||
static char *ovl_read_symlink(struct dentry *realdentry)
|
||||
{
|
||||
int res;
|
||||
char *buf;
|
||||
struct inode *inode = realdentry->d_inode;
|
||||
mm_segment_t old_fs;
|
||||
|
||||
res = -EINVAL;
|
||||
if (!inode->i_op->readlink)
|
||||
goto err;
|
||||
|
||||
res = -ENOMEM;
|
||||
buf = (char *) __get_free_page(GFP_KERNEL);
|
||||
if (!buf)
|
||||
goto err;
|
||||
|
||||
old_fs = get_fs();
|
||||
set_fs(get_ds());
|
||||
/* The cast to a user pointer is valid due to the set_fs() */
|
||||
res = inode->i_op->readlink(realdentry,
|
||||
(char __user *)buf, PAGE_SIZE - 1);
|
||||
set_fs(old_fs);
|
||||
if (res < 0) {
|
||||
free_page((unsigned long) buf);
|
||||
goto err;
|
||||
}
|
||||
buf[res] = '\0';
|
||||
|
||||
return buf;
|
||||
|
||||
err:
|
||||
return ERR_PTR(res);
|
||||
}
|
||||
|
||||
static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
|
||||
{
|
||||
struct iattr attr = {
|
||||
.ia_valid =
|
||||
ATTR_ATIME | ATTR_MTIME | ATTR_ATIME_SET | ATTR_MTIME_SET,
|
||||
.ia_atime = stat->atime,
|
||||
.ia_mtime = stat->mtime,
|
||||
};
|
||||
|
||||
return notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
|
||||
int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
|
||||
{
|
||||
int err = 0;
|
||||
|
||||
if (!S_ISLNK(stat->mode)) {
|
||||
struct iattr attr = {
|
||||
.ia_valid = ATTR_MODE,
|
||||
.ia_mode = stat->mode,
|
||||
};
|
||||
err = notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
if (!err) {
|
||||
struct iattr attr = {
|
||||
.ia_valid = ATTR_UID | ATTR_GID,
|
||||
.ia_uid = stat->uid,
|
||||
.ia_gid = stat->gid,
|
||||
};
|
||||
err = notify_change(upperdentry, &attr, NULL);
|
||||
}
|
||||
if (!err)
|
||||
ovl_set_timestamps(upperdentry, stat);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
|
||||
struct dentry *dentry, struct path *lowerpath,
|
||||
struct kstat *stat, struct iattr *attr,
|
||||
const char *link)
|
||||
{
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *newdentry = NULL;
|
||||
struct dentry *upper = NULL;
|
||||
umode_t mode = stat->mode;
|
||||
int err;
|
||||
|
||||
newdentry = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out1;
|
||||
|
||||
/* Can't properly set mode on creation because of the umask */
|
||||
stat->mode &= S_IFMT;
|
||||
err = ovl_create_real(wdir, newdentry, stat, link, NULL, true);
|
||||
stat->mode = mode;
|
||||
if (err)
|
||||
goto out2;
|
||||
|
||||
if (S_ISREG(stat->mode)) {
|
||||
struct path upperpath;
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
BUG_ON(upperpath.dentry != NULL);
|
||||
upperpath.dentry = newdentry;
|
||||
|
||||
err = ovl_copy_up_data(lowerpath, &upperpath, stat->size);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
}
|
||||
|
||||
err = ovl_copy_xattr(lowerpath->dentry, newdentry);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
mutex_lock(&newdentry->d_inode->i_mutex);
|
||||
err = ovl_set_attr(newdentry, stat);
|
||||
if (!err && attr)
|
||||
err = notify_change(newdentry, attr, NULL);
|
||||
mutex_unlock(&newdentry->d_inode->i_mutex);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
newdentry = NULL;
|
||||
|
||||
/*
|
||||
* Non-directores become opaque when copied up.
|
||||
*/
|
||||
if (!S_ISDIR(stat->mode))
|
||||
ovl_dentry_set_opaque(dentry, true);
|
||||
out2:
|
||||
dput(upper);
|
||||
out1:
|
||||
dput(newdentry);
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, newdentry);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy up a single dentry
|
||||
*
|
||||
* Directory renames only allowed on "pure upper" (already created on
|
||||
* upper filesystem, never copied up). Directories which are on lower or
|
||||
* are merged may not be renamed. For these -EXDEV is returned and
|
||||
* userspace has to deal with it. This means, when copying up a
|
||||
* directory we can rely on it and ancestors being stable.
|
||||
*
|
||||
* Non-directory renames start with copy up of source if necessary. The
|
||||
* actual rename will only proceed once the copy up was successful. Copy
|
||||
* up uses upper parent i_mutex for exclusion. Since rename can change
|
||||
* d_parent it is possible that the copy up will lock the old parent. At
|
||||
* that point the file will have already been copied up anyway.
|
||||
*/
|
||||
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
struct path *lowerpath, struct kstat *stat,
|
||||
struct iattr *attr)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
int err;
|
||||
struct kstat pstat;
|
||||
struct path parentpath;
|
||||
struct dentry *upperdir;
|
||||
struct dentry *upperdentry;
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
char *link = NULL;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
ovl_path_upper(parent, &parentpath);
|
||||
upperdir = parentpath.dentry;
|
||||
|
||||
err = vfs_getattr(&parentpath, &pstat);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
if (S_ISLNK(stat->mode)) {
|
||||
link = ovl_read_symlink(lowerpath->dentry);
|
||||
if (IS_ERR(link))
|
||||
return PTR_ERR(link);
|
||||
}
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_free_link;
|
||||
|
||||
override_cred->fsuid = stat->uid;
|
||||
override_cred->fsgid = stat->gid;
|
||||
/*
|
||||
* CAP_SYS_ADMIN for copying up extended attributes
|
||||
* CAP_DAC_OVERRIDE for create
|
||||
* CAP_FOWNER for chmod, timestamp update
|
||||
* CAP_FSETID for chmod
|
||||
* CAP_CHOWN for chown
|
||||
* CAP_MKNOD for mknod
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
cap_raise(override_cred->cap_effective, CAP_MKNOD);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = -EIO;
|
||||
if (lock_rename(workdir, upperdir) != NULL) {
|
||||
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||
goto out_unlock;
|
||||
}
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
if (upperdentry) {
|
||||
unlock_rename(workdir, upperdir);
|
||||
err = 0;
|
||||
/* Raced with another copy-up? Do the setattr here */
|
||||
if (attr) {
|
||||
mutex_lock(&upperdentry->d_inode->i_mutex);
|
||||
err = notify_change(upperdentry, attr, NULL);
|
||||
mutex_unlock(&upperdentry->d_inode->i_mutex);
|
||||
}
|
||||
goto out_put_cred;
|
||||
}
|
||||
|
||||
err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath,
|
||||
stat, attr, link);
|
||||
if (!err) {
|
||||
/* Restore timestamps on parent (best effort) */
|
||||
ovl_set_timestamps(upperdir, &pstat);
|
||||
}
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out_put_cred:
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
|
||||
out_free_link:
|
||||
if (link)
|
||||
free_page((unsigned long) link);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
int ovl_copy_up(struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = 0;
|
||||
while (!err) {
|
||||
struct dentry *next;
|
||||
struct dentry *parent;
|
||||
struct path lowerpath;
|
||||
struct kstat stat;
|
||||
enum ovl_path_type type = ovl_path_type(dentry);
|
||||
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
break;
|
||||
|
||||
next = dget(dentry);
|
||||
/* find the topmost dentry not yet copied up */
|
||||
for (;;) {
|
||||
parent = dget_parent(next);
|
||||
|
||||
type = ovl_path_type(parent);
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
break;
|
||||
|
||||
dput(next);
|
||||
next = parent;
|
||||
}
|
||||
|
||||
ovl_path_lower(next, &lowerpath);
|
||||
err = vfs_getattr(&lowerpath, &stat);
|
||||
if (!err)
|
||||
err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL);
|
||||
|
||||
dput(parent);
|
||||
dput(next);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
951
executer/kernel/mcoverlayfs/linux-4.0.9/dir.c
Normal file
951
executer/kernel/mcoverlayfs/linux-4.0.9/dir.c
Normal file
@ -0,0 +1,951 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/cred.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
void ovl_cleanup(struct inode *wdir, struct dentry *wdentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
dget(wdentry);
|
||||
if (d_is_dir(wdentry))
|
||||
err = ovl_do_rmdir(wdir, wdentry);
|
||||
else
|
||||
err = ovl_do_unlink(wdir, wdentry);
|
||||
dput(wdentry);
|
||||
|
||||
if (err) {
|
||||
pr_err("overlayfs: cleanup of '%pd2' failed (%i)\n",
|
||||
wdentry, err);
|
||||
}
|
||||
}
|
||||
|
||||
struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry)
|
||||
{
|
||||
struct dentry *temp;
|
||||
char name[20];
|
||||
|
||||
snprintf(name, sizeof(name), "#%lx", (unsigned long) dentry);
|
||||
|
||||
temp = lookup_one_len(name, workdir, strlen(name));
|
||||
if (!IS_ERR(temp) && temp->d_inode) {
|
||||
pr_err("overlayfs: workdir/%s already exists\n", name);
|
||||
dput(temp);
|
||||
temp = ERR_PTR(-EIO);
|
||||
}
|
||||
|
||||
return temp;
|
||||
}
|
||||
|
||||
/* caller holds i_mutex on workdir */
|
||||
static struct dentry *ovl_whiteout(struct dentry *workdir,
|
||||
struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
struct dentry *whiteout;
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
|
||||
whiteout = ovl_lookup_temp(workdir, dentry);
|
||||
if (IS_ERR(whiteout))
|
||||
return whiteout;
|
||||
|
||||
err = ovl_do_whiteout(wdir, whiteout);
|
||||
if (err) {
|
||||
dput(whiteout);
|
||||
whiteout = ERR_PTR(err);
|
||||
}
|
||||
|
||||
return whiteout;
|
||||
}
|
||||
|
||||
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink, bool debug)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (newdentry->d_inode)
|
||||
return -ESTALE;
|
||||
|
||||
if (hardlink) {
|
||||
err = ovl_do_link(hardlink, dir, newdentry, debug);
|
||||
} else {
|
||||
switch (stat->mode & S_IFMT) {
|
||||
case S_IFREG:
|
||||
err = ovl_do_create(dir, newdentry, stat->mode, debug);
|
||||
break;
|
||||
|
||||
case S_IFDIR:
|
||||
err = ovl_do_mkdir(dir, newdentry, stat->mode, debug);
|
||||
break;
|
||||
|
||||
case S_IFCHR:
|
||||
case S_IFBLK:
|
||||
case S_IFIFO:
|
||||
case S_IFSOCK:
|
||||
err = ovl_do_mknod(dir, newdentry,
|
||||
stat->mode, stat->rdev, debug);
|
||||
break;
|
||||
|
||||
case S_IFLNK:
|
||||
err = ovl_do_symlink(dir, newdentry, link, debug);
|
||||
break;
|
||||
|
||||
default:
|
||||
err = -EPERM;
|
||||
}
|
||||
}
|
||||
if (!err && WARN_ON(!newdentry->d_inode)) {
|
||||
/*
|
||||
* Not quite sure if non-instantiated dentry is legal or not.
|
||||
* VFS doesn't seem to care so check and warn here.
|
||||
*/
|
||||
err = -ENOENT;
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_set_opaque(struct dentry *upperdentry)
|
||||
{
|
||||
return ovl_do_setxattr(upperdentry, OVL_XATTR_OPAQUE, "y", 1, 0);
|
||||
}
|
||||
|
||||
static void ovl_remove_opaque(struct dentry *upperdentry)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = ovl_do_removexattr(upperdentry, OVL_XATTR_OPAQUE);
|
||||
if (err) {
|
||||
pr_warn("overlayfs: failed to remove opaque from '%s' (%i)\n",
|
||||
upperdentry->d_name.name, err);
|
||||
}
|
||||
}
|
||||
|
||||
static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
int err;
|
||||
enum ovl_path_type type;
|
||||
struct path realpath;
|
||||
|
||||
type = ovl_path_real(dentry, &realpath);
|
||||
err = vfs_getattr(&realpath, stat);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
stat->dev = dentry->d_sb->s_dev;
|
||||
stat->ino = dentry->d_inode->i_ino;
|
||||
|
||||
/*
|
||||
* It's probably not worth it to count subdirs to get the
|
||||
* correct link count. nlink=1 seems to pacify 'find' and
|
||||
* other utilities.
|
||||
*/
|
||||
if (OVL_TYPE_MERGE(type))
|
||||
stat->nlink = 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_create_upper(struct dentry *dentry, struct inode *inode,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink)
|
||||
{
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *newdentry;
|
||||
int err;
|
||||
|
||||
mutex_lock_nested(&udir->i_mutex, I_MUTEX_PARENT);
|
||||
newdentry = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_unlock;
|
||||
err = ovl_create_real(udir, newdentry, stat, link, hardlink, false);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
ovl_copyattr(newdentry->d_inode, inode);
|
||||
d_instantiate(dentry, inode);
|
||||
newdentry = NULL;
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_unlock:
|
||||
mutex_unlock(&udir->i_mutex);
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_lock_rename_workdir(struct dentry *workdir,
|
||||
struct dentry *upperdir)
|
||||
{
|
||||
/* Workdir should not be the same as upperdir */
|
||||
if (workdir == upperdir)
|
||||
goto err;
|
||||
|
||||
/* Workdir should not be subdir of upperdir and vice versa */
|
||||
if (lock_rename(workdir, upperdir) != NULL)
|
||||
goto err_unlock;
|
||||
|
||||
return 0;
|
||||
|
||||
err_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
err:
|
||||
pr_err("overlayfs: failed to lock workdir+upperdir\n");
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static struct dentry *ovl_clear_empty(struct dentry *dentry,
|
||||
struct list_head *list)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct path upperpath;
|
||||
struct dentry *upper;
|
||||
struct dentry *opaquedir;
|
||||
struct kstat stat;
|
||||
int err;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return ERR_PTR(-EROFS);
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
err = vfs_getattr(&upperpath, &stat);
|
||||
if (err)
|
||||
goto out_unlock;
|
||||
|
||||
err = -ESTALE;
|
||||
if (!S_ISDIR(stat.mode))
|
||||
goto out_unlock;
|
||||
upper = upperpath.dentry;
|
||||
if (upper->d_parent->d_inode != udir)
|
||||
goto out_unlock;
|
||||
|
||||
opaquedir = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir))
|
||||
goto out_unlock;
|
||||
|
||||
err = ovl_create_real(wdir, opaquedir, &stat, NULL, NULL, true);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
err = ovl_copy_xattr(upper, opaquedir);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_set_opaque(opaquedir);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
mutex_lock(&opaquedir->d_inode->i_mutex);
|
||||
err = ovl_set_attr(opaquedir, &stat);
|
||||
mutex_unlock(&opaquedir->d_inode->i_mutex);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, opaquedir, udir, upper, RENAME_EXCHANGE);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_cleanup_whiteouts(upper, list);
|
||||
ovl_cleanup(wdir, upper);
|
||||
unlock_rename(workdir, upperdir);
|
||||
|
||||
/* dentry's upper doesn't match now, get rid of it */
|
||||
d_drop(dentry);
|
||||
|
||||
return opaquedir;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, opaquedir);
|
||||
out_dput:
|
||||
dput(opaquedir);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out:
|
||||
return ERR_PTR(err);
|
||||
}
|
||||
|
||||
static struct dentry *ovl_check_empty_and_clear(struct dentry *dentry)
|
||||
{
|
||||
int err;
|
||||
struct dentry *ret = NULL;
|
||||
LIST_HEAD(list);
|
||||
|
||||
err = ovl_check_empty_dir(dentry, &list);
|
||||
if (err)
|
||||
ret = ERR_PTR(err);
|
||||
else {
|
||||
/*
|
||||
* If no upperdentry then skip clearing whiteouts.
|
||||
*
|
||||
* Can race with copy-up, since we don't hold the upperdir
|
||||
* mutex. Doesn't matter, since copy-up can't create a
|
||||
* non-empty directory from an empty one.
|
||||
*/
|
||||
if (ovl_dentry_upper(dentry))
|
||||
ret = ovl_clear_empty(dentry, &list);
|
||||
}
|
||||
|
||||
ovl_cache_free(&list);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *upper;
|
||||
struct dentry *newdentry;
|
||||
int err;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
newdentry = ovl_lookup_temp(workdir, dentry);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_unlock;
|
||||
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto out_dput;
|
||||
|
||||
err = ovl_create_real(wdir, newdentry, stat, link, hardlink, true);
|
||||
if (err)
|
||||
goto out_dput2;
|
||||
|
||||
if (S_ISDIR(stat->mode)) {
|
||||
err = ovl_set_opaque(newdentry);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper,
|
||||
RENAME_EXCHANGE);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
|
||||
ovl_cleanup(wdir, upper);
|
||||
} else {
|
||||
err = ovl_do_rename(wdir, newdentry, udir, upper, 0);
|
||||
if (err)
|
||||
goto out_cleanup;
|
||||
}
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
ovl_dentry_update(dentry, newdentry);
|
||||
ovl_copyattr(newdentry->d_inode, inode);
|
||||
d_instantiate(dentry, inode);
|
||||
newdentry = NULL;
|
||||
out_dput2:
|
||||
dput(upper);
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out:
|
||||
return err;
|
||||
|
||||
out_cleanup:
|
||||
ovl_cleanup(wdir, newdentry);
|
||||
goto out_dput2;
|
||||
}
|
||||
|
||||
static int ovl_create_or_link(struct dentry *dentry, int mode, dev_t rdev,
|
||||
const char *link, struct dentry *hardlink)
|
||||
{
|
||||
int err;
|
||||
struct inode *inode;
|
||||
struct kstat stat = {
|
||||
.mode = mode,
|
||||
.rdev = rdev,
|
||||
};
|
||||
|
||||
err = -ENOMEM;
|
||||
inode = ovl_new_inode(dentry->d_sb, mode, dentry->d_fsdata);
|
||||
if (!inode)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry->d_parent);
|
||||
if (err)
|
||||
goto out_iput;
|
||||
|
||||
if (!ovl_dentry_is_opaque(dentry)) {
|
||||
err = ovl_create_upper(dentry, inode, &stat, link, hardlink);
|
||||
} else {
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_iput;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting opaque xattr
|
||||
* CAP_DAC_OVERRIDE for create in workdir, rename
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = ovl_create_over_whiteout(dentry, inode, &stat, link,
|
||||
hardlink);
|
||||
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
|
||||
if (!err)
|
||||
inode = NULL;
|
||||
out_iput:
|
||||
iput(inode);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
|
||||
const char *link)
|
||||
{
|
||||
int err;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (!err) {
|
||||
err = ovl_create_or_link(dentry, mode, rdev, link, NULL);
|
||||
ovl_drop_write(dentry);
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
bool excl)
|
||||
{
|
||||
return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
|
||||
}
|
||||
|
||||
static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
|
||||
{
|
||||
return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
|
||||
}
|
||||
|
||||
static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
dev_t rdev)
|
||||
{
|
||||
/* Don't allow creation of "whiteout" on overlay */
|
||||
if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
|
||||
return -EPERM;
|
||||
|
||||
return ovl_create_object(dentry, mode, rdev, NULL);
|
||||
}
|
||||
|
||||
static int ovl_symlink(struct inode *dir, struct dentry *dentry,
|
||||
const char *link)
|
||||
{
|
||||
return ovl_create_object(dentry, S_IFLNK, 0, link);
|
||||
}
|
||||
|
||||
static int ovl_link(struct dentry *old, struct inode *newdir,
|
||||
struct dentry *new)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upper;
|
||||
|
||||
err = ovl_want_write(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(old);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
upper = ovl_dentry_upper(old);
|
||||
err = ovl_create_or_link(new, upper->d_inode->i_mode, 0, NULL, upper);
|
||||
|
||||
out_drop_write:
|
||||
ovl_drop_write(old);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_remove_and_whiteout(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
struct dentry *workdir = ovl_workdir(dentry);
|
||||
struct inode *wdir = workdir->d_inode;
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *udir = upperdir->d_inode;
|
||||
struct dentry *whiteout;
|
||||
struct dentry *upper;
|
||||
struct dentry *opaquedir = NULL;
|
||||
int err;
|
||||
|
||||
if (WARN_ON(!workdir))
|
||||
return -EROFS;
|
||||
|
||||
if (is_dir) {
|
||||
if (OVL_TYPE_MERGE_OR_LOWER(ovl_path_type(dentry))) {
|
||||
opaquedir = ovl_check_empty_and_clear(dentry);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir))
|
||||
goto out;
|
||||
} else {
|
||||
LIST_HEAD(list);
|
||||
|
||||
/*
|
||||
* When removing an empty opaque directory, then it
|
||||
* makes no sense to replace it with an exact replica of
|
||||
* itself. But emptiness still needs to be checked.
|
||||
*/
|
||||
err = ovl_check_empty_dir(dentry, &list);
|
||||
ovl_cache_free(&list);
|
||||
if (err)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
err = ovl_lock_rename_workdir(workdir, upperdir);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
|
||||
whiteout = ovl_whiteout(workdir, dentry);
|
||||
err = PTR_ERR(whiteout);
|
||||
if (IS_ERR(whiteout))
|
||||
goto out_unlock;
|
||||
|
||||
upper = ovl_dentry_upper(dentry);
|
||||
if (!upper) {
|
||||
upper = lookup_one_len(dentry->d_name.name, upperdir,
|
||||
dentry->d_name.len);
|
||||
err = PTR_ERR(upper);
|
||||
if (IS_ERR(upper))
|
||||
goto kill_whiteout;
|
||||
|
||||
err = ovl_do_rename(wdir, whiteout, udir, upper, 0);
|
||||
dput(upper);
|
||||
if (err)
|
||||
goto kill_whiteout;
|
||||
} else {
|
||||
int flags = 0;
|
||||
|
||||
if (opaquedir)
|
||||
upper = opaquedir;
|
||||
err = -ESTALE;
|
||||
if (upper->d_parent != upperdir)
|
||||
goto kill_whiteout;
|
||||
|
||||
if (is_dir)
|
||||
flags |= RENAME_EXCHANGE;
|
||||
|
||||
err = ovl_do_rename(wdir, whiteout, udir, upper, flags);
|
||||
if (err)
|
||||
goto kill_whiteout;
|
||||
|
||||
if (is_dir)
|
||||
ovl_cleanup(wdir, upper);
|
||||
}
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
out_d_drop:
|
||||
d_drop(dentry);
|
||||
dput(whiteout);
|
||||
out_unlock:
|
||||
unlock_rename(workdir, upperdir);
|
||||
out_dput:
|
||||
dput(opaquedir);
|
||||
out:
|
||||
return err;
|
||||
|
||||
kill_whiteout:
|
||||
ovl_cleanup(wdir, whiteout);
|
||||
goto out_d_drop;
|
||||
}
|
||||
|
||||
static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
|
||||
struct inode *dir = upperdir->d_inode;
|
||||
struct dentry *upper = ovl_dentry_upper(dentry);
|
||||
int err;
|
||||
|
||||
mutex_lock_nested(&dir->i_mutex, I_MUTEX_PARENT);
|
||||
err = -ESTALE;
|
||||
if (upper->d_parent == upperdir) {
|
||||
/* Don't let d_delete() think it can reset d_inode */
|
||||
dget(upper);
|
||||
if (is_dir)
|
||||
err = vfs_rmdir(dir, upper);
|
||||
else
|
||||
err = vfs_unlink(dir, upper, NULL);
|
||||
dput(upper);
|
||||
ovl_dentry_version_inc(dentry->d_parent);
|
||||
}
|
||||
|
||||
/*
|
||||
* Keeping this dentry hashed would mean having to release
|
||||
* upperpath/lowerpath, which could only be done if we are the
|
||||
* sole user of this dentry. Too tricky... Just unhash for
|
||||
* now.
|
||||
*/
|
||||
d_drop(dentry);
|
||||
mutex_unlock(&dir->i_mutex);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_check_sticky(struct dentry *dentry)
|
||||
{
|
||||
struct inode *dir = ovl_dentry_real(dentry->d_parent)->d_inode;
|
||||
struct inode *inode = ovl_dentry_real(dentry)->d_inode;
|
||||
|
||||
if (check_sticky(dir, inode))
|
||||
return -EPERM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_do_remove(struct dentry *dentry, bool is_dir)
|
||||
{
|
||||
enum ovl_path_type type;
|
||||
int err;
|
||||
|
||||
err = ovl_check_sticky(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(dentry->d_parent);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
type = ovl_path_type(dentry);
|
||||
if (OVL_TYPE_PURE_UPPER(type)) {
|
||||
err = ovl_remove_upper(dentry, is_dir);
|
||||
} else {
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_drop_write;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
|
||||
* CAP_DAC_OVERRIDE for create in workdir, rename
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
* CAP_FSETID for chmod of opaque dir
|
||||
* CAP_CHOWN for chown of opaque dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
err = ovl_remove_and_whiteout(dentry, is_dir);
|
||||
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
return ovl_do_remove(dentry, false);
|
||||
}
|
||||
|
||||
static int ovl_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
return ovl_do_remove(dentry, true);
|
||||
}
|
||||
|
||||
static int ovl_rename2(struct inode *olddir, struct dentry *old,
|
||||
struct inode *newdir, struct dentry *new,
|
||||
unsigned int flags)
|
||||
{
|
||||
int err;
|
||||
enum ovl_path_type old_type;
|
||||
enum ovl_path_type new_type;
|
||||
struct dentry *old_upperdir;
|
||||
struct dentry *new_upperdir;
|
||||
struct dentry *olddentry;
|
||||
struct dentry *newdentry;
|
||||
struct dentry *trap;
|
||||
bool old_opaque;
|
||||
bool new_opaque;
|
||||
bool new_create = false;
|
||||
bool cleanup_whiteout = false;
|
||||
bool overwrite = !(flags & RENAME_EXCHANGE);
|
||||
bool is_dir = d_is_dir(old);
|
||||
bool new_is_dir = false;
|
||||
struct dentry *opaquedir = NULL;
|
||||
const struct cred *old_cred = NULL;
|
||||
struct cred *override_cred = NULL;
|
||||
|
||||
err = -EINVAL;
|
||||
if (flags & ~(RENAME_EXCHANGE | RENAME_NOREPLACE))
|
||||
goto out;
|
||||
|
||||
flags &= ~RENAME_NOREPLACE;
|
||||
|
||||
err = ovl_check_sticky(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/* Don't copy up directory trees */
|
||||
old_type = ovl_path_type(old);
|
||||
err = -EXDEV;
|
||||
if (OVL_TYPE_MERGE_OR_LOWER(old_type) && is_dir)
|
||||
goto out;
|
||||
|
||||
if (new->d_inode) {
|
||||
err = ovl_check_sticky(new);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (d_is_dir(new))
|
||||
new_is_dir = true;
|
||||
|
||||
new_type = ovl_path_type(new);
|
||||
err = -EXDEV;
|
||||
if (!overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir)
|
||||
goto out;
|
||||
|
||||
err = 0;
|
||||
if (!OVL_TYPE_UPPER(new_type) && !OVL_TYPE_UPPER(old_type)) {
|
||||
if (ovl_dentry_lower(old)->d_inode ==
|
||||
ovl_dentry_lower(new)->d_inode)
|
||||
goto out;
|
||||
}
|
||||
if (OVL_TYPE_UPPER(new_type) && OVL_TYPE_UPPER(old_type)) {
|
||||
if (ovl_dentry_upper(old)->d_inode ==
|
||||
ovl_dentry_upper(new)->d_inode)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
if (ovl_dentry_is_opaque(new))
|
||||
new_type = __OVL_PATH_UPPER;
|
||||
else
|
||||
new_type = __OVL_PATH_UPPER | __OVL_PATH_PURE;
|
||||
}
|
||||
|
||||
err = ovl_want_write(old);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = ovl_copy_up(old);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(new->d_parent);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
if (!overwrite) {
|
||||
err = ovl_copy_up(new);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
}
|
||||
|
||||
old_opaque = !OVL_TYPE_PURE_UPPER(old_type);
|
||||
new_opaque = !OVL_TYPE_PURE_UPPER(new_type);
|
||||
|
||||
if (old_opaque || new_opaque) {
|
||||
err = -ENOMEM;
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred)
|
||||
goto out_drop_write;
|
||||
|
||||
/*
|
||||
* CAP_SYS_ADMIN for setting xattr on whiteout, opaque dir
|
||||
* CAP_DAC_OVERRIDE for create in workdir
|
||||
* CAP_FOWNER for removing whiteout from sticky dir
|
||||
* CAP_FSETID for chmod of opaque dir
|
||||
* CAP_CHOWN for chown of opaque dir
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_SYS_ADMIN);
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
cap_raise(override_cred->cap_effective, CAP_FOWNER);
|
||||
cap_raise(override_cred->cap_effective, CAP_FSETID);
|
||||
cap_raise(override_cred->cap_effective, CAP_CHOWN);
|
||||
old_cred = override_creds(override_cred);
|
||||
}
|
||||
|
||||
if (overwrite && OVL_TYPE_MERGE_OR_LOWER(new_type) && new_is_dir) {
|
||||
opaquedir = ovl_check_empty_and_clear(new);
|
||||
err = PTR_ERR(opaquedir);
|
||||
if (IS_ERR(opaquedir)) {
|
||||
opaquedir = NULL;
|
||||
goto out_revert_creds;
|
||||
}
|
||||
}
|
||||
|
||||
if (overwrite) {
|
||||
if (old_opaque) {
|
||||
if (new->d_inode || !new_opaque) {
|
||||
/* Whiteout source */
|
||||
flags |= RENAME_WHITEOUT;
|
||||
} else {
|
||||
/* Switch whiteouts */
|
||||
flags |= RENAME_EXCHANGE;
|
||||
}
|
||||
} else if (is_dir && !new->d_inode && new_opaque) {
|
||||
flags |= RENAME_EXCHANGE;
|
||||
cleanup_whiteout = true;
|
||||
}
|
||||
}
|
||||
|
||||
old_upperdir = ovl_dentry_upper(old->d_parent);
|
||||
new_upperdir = ovl_dentry_upper(new->d_parent);
|
||||
|
||||
trap = lock_rename(new_upperdir, old_upperdir);
|
||||
|
||||
olddentry = ovl_dentry_upper(old);
|
||||
newdentry = ovl_dentry_upper(new);
|
||||
if (newdentry) {
|
||||
if (opaquedir) {
|
||||
newdentry = opaquedir;
|
||||
opaquedir = NULL;
|
||||
} else {
|
||||
dget(newdentry);
|
||||
}
|
||||
} else {
|
||||
new_create = true;
|
||||
newdentry = lookup_one_len(new->d_name.name, new_upperdir,
|
||||
new->d_name.len);
|
||||
err = PTR_ERR(newdentry);
|
||||
if (IS_ERR(newdentry))
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
err = -ESTALE;
|
||||
if (olddentry->d_parent != old_upperdir)
|
||||
goto out_dput;
|
||||
if (newdentry->d_parent != new_upperdir)
|
||||
goto out_dput;
|
||||
if (olddentry == trap)
|
||||
goto out_dput;
|
||||
if (newdentry == trap)
|
||||
goto out_dput;
|
||||
|
||||
if (is_dir && !old_opaque && new_opaque) {
|
||||
err = ovl_set_opaque(olddentry);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
}
|
||||
if (!overwrite && new_is_dir && old_opaque && !new_opaque) {
|
||||
err = ovl_set_opaque(newdentry);
|
||||
if (err)
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (old_opaque || new_opaque) {
|
||||
err = ovl_do_rename(old_upperdir->d_inode, olddentry,
|
||||
new_upperdir->d_inode, newdentry,
|
||||
flags);
|
||||
} else {
|
||||
/* No debug for the plain case */
|
||||
BUG_ON(flags & ~RENAME_EXCHANGE);
|
||||
err = vfs_rename(old_upperdir->d_inode, olddentry,
|
||||
new_upperdir->d_inode, newdentry,
|
||||
NULL, flags);
|
||||
}
|
||||
|
||||
if (err) {
|
||||
if (is_dir && !old_opaque && new_opaque)
|
||||
ovl_remove_opaque(olddentry);
|
||||
if (!overwrite && new_is_dir && old_opaque && !new_opaque)
|
||||
ovl_remove_opaque(newdentry);
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (is_dir && old_opaque && !new_opaque)
|
||||
ovl_remove_opaque(olddentry);
|
||||
if (!overwrite && new_is_dir && !old_opaque && new_opaque)
|
||||
ovl_remove_opaque(newdentry);
|
||||
|
||||
if (old_opaque != new_opaque) {
|
||||
ovl_dentry_set_opaque(old, new_opaque);
|
||||
if (!overwrite)
|
||||
ovl_dentry_set_opaque(new, old_opaque);
|
||||
}
|
||||
|
||||
if (cleanup_whiteout)
|
||||
ovl_cleanup(old_upperdir->d_inode, newdentry);
|
||||
|
||||
ovl_dentry_version_inc(old->d_parent);
|
||||
ovl_dentry_version_inc(new->d_parent);
|
||||
|
||||
out_dput:
|
||||
dput(newdentry);
|
||||
out_unlock:
|
||||
unlock_rename(new_upperdir, old_upperdir);
|
||||
out_revert_creds:
|
||||
if (old_opaque || new_opaque) {
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
out_drop_write:
|
||||
ovl_drop_write(old);
|
||||
out:
|
||||
dput(opaquedir);
|
||||
return err;
|
||||
}
|
||||
|
||||
const struct inode_operations ovl_dir_inode_operations = {
|
||||
.lookup = ovl_lookup,
|
||||
.mkdir = ovl_mkdir,
|
||||
.symlink = ovl_symlink,
|
||||
.unlink = ovl_unlink,
|
||||
.rmdir = ovl_rmdir,
|
||||
.rename2 = ovl_rename2,
|
||||
.link = ovl_link,
|
||||
.setattr = ovl_setattr,
|
||||
.create = ovl_create,
|
||||
.mknod = ovl_mknod,
|
||||
.permission = ovl_permission,
|
||||
.getattr = ovl_dir_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
};
|
||||
438
executer/kernel/mcoverlayfs/linux-4.0.9/inode.c
Normal file
438
executer/kernel/mcoverlayfs/linux-4.0.9/inode.c
Normal file
@ -0,0 +1,438 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/xattr.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr,
|
||||
bool no_data)
|
||||
{
|
||||
int err;
|
||||
struct dentry *parent;
|
||||
struct kstat stat;
|
||||
struct path lowerpath;
|
||||
|
||||
parent = dget_parent(dentry);
|
||||
err = ovl_copy_up(parent);
|
||||
if (err)
|
||||
goto out_dput_parent;
|
||||
|
||||
ovl_path_lower(dentry, &lowerpath);
|
||||
err = vfs_getattr(&lowerpath, &stat);
|
||||
if (err)
|
||||
goto out_dput_parent;
|
||||
|
||||
if (no_data)
|
||||
stat.size = 0;
|
||||
|
||||
err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr);
|
||||
|
||||
out_dput_parent:
|
||||
dput(parent);
|
||||
return err;
|
||||
}
|
||||
|
||||
int ovl_setattr(struct dentry *dentry, struct iattr *attr)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upperdentry;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
if (upperdentry) {
|
||||
mutex_lock(&upperdentry->d_inode->i_mutex);
|
||||
err = notify_change(upperdentry, attr, NULL);
|
||||
mutex_unlock(&upperdentry->d_inode->i_mutex);
|
||||
} else {
|
||||
err = ovl_copy_up_last(dentry, attr, false);
|
||||
}
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
struct path realpath;
|
||||
|
||||
ovl_path_real(dentry, &realpath);
|
||||
return vfs_getattr(&realpath, stat);
|
||||
}
|
||||
|
||||
int ovl_permission(struct inode *inode, int mask)
|
||||
{
|
||||
struct ovl_entry *oe;
|
||||
struct dentry *alias = NULL;
|
||||
struct inode *realinode;
|
||||
struct dentry *realdentry;
|
||||
bool is_upper;
|
||||
int err;
|
||||
|
||||
if (S_ISDIR(inode->i_mode)) {
|
||||
oe = inode->i_private;
|
||||
} else if (mask & MAY_NOT_BLOCK) {
|
||||
return -ECHILD;
|
||||
} else {
|
||||
/*
|
||||
* For non-directories find an alias and get the info
|
||||
* from there.
|
||||
*/
|
||||
alias = d_find_any_alias(inode);
|
||||
if (WARN_ON(!alias))
|
||||
return -ENOENT;
|
||||
|
||||
oe = alias->d_fsdata;
|
||||
}
|
||||
|
||||
realdentry = ovl_entry_real(oe, &is_upper);
|
||||
|
||||
/* Careful in RCU walk mode */
|
||||
realinode = ACCESS_ONCE(realdentry->d_inode);
|
||||
if (!realinode) {
|
||||
WARN_ON(!(mask & MAY_NOT_BLOCK));
|
||||
err = -ENOENT;
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
if (mask & MAY_WRITE) {
|
||||
umode_t mode = realinode->i_mode;
|
||||
|
||||
/*
|
||||
* Writes will always be redirected to upper layer, so
|
||||
* ignore lower layer being read-only.
|
||||
*
|
||||
* If the overlay itself is read-only then proceed
|
||||
* with the permission check, don't return EROFS.
|
||||
* This will only happen if this is the lower layer of
|
||||
* another overlayfs.
|
||||
*
|
||||
* If upper fs becomes read-only after the overlay was
|
||||
* constructed return EROFS to prevent modification of
|
||||
* upper layer.
|
||||
*/
|
||||
err = -EROFS;
|
||||
if (is_upper && !IS_RDONLY(inode) && IS_RDONLY(realinode) &&
|
||||
(S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
|
||||
goto out_dput;
|
||||
}
|
||||
|
||||
err = __inode_permission(realinode, mask);
|
||||
out_dput:
|
||||
dput(alias);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
struct ovl_link_data {
|
||||
struct dentry *realdentry;
|
||||
void *cookie;
|
||||
};
|
||||
|
||||
static void *ovl_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
{
|
||||
void *ret;
|
||||
struct dentry *realdentry;
|
||||
struct inode *realinode;
|
||||
|
||||
realdentry = ovl_dentry_real(dentry);
|
||||
realinode = realdentry->d_inode;
|
||||
|
||||
if (WARN_ON(!realinode->i_op->follow_link))
|
||||
return ERR_PTR(-EPERM);
|
||||
|
||||
ret = realinode->i_op->follow_link(realdentry, nd);
|
||||
if (IS_ERR(ret))
|
||||
return ret;
|
||||
|
||||
if (realinode->i_op->put_link) {
|
||||
struct ovl_link_data *data;
|
||||
|
||||
data = kmalloc(sizeof(struct ovl_link_data), GFP_KERNEL);
|
||||
if (!data) {
|
||||
realinode->i_op->put_link(realdentry, nd, ret);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
data->realdentry = realdentry;
|
||||
data->cookie = ret;
|
||||
|
||||
return data;
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void ovl_put_link(struct dentry *dentry, struct nameidata *nd, void *c)
|
||||
{
|
||||
struct inode *realinode;
|
||||
struct ovl_link_data *data = c;
|
||||
|
||||
if (!data)
|
||||
return;
|
||||
|
||||
realinode = data->realdentry->d_inode;
|
||||
realinode->i_op->put_link(data->realdentry, nd, data->cookie);
|
||||
kfree(data);
|
||||
}
|
||||
|
||||
static int ovl_readlink(struct dentry *dentry, char __user *buf, int bufsiz)
|
||||
{
|
||||
struct path realpath;
|
||||
struct inode *realinode;
|
||||
|
||||
ovl_path_real(dentry, &realpath);
|
||||
realinode = realpath.dentry->d_inode;
|
||||
|
||||
if (!realinode->i_op->readlink)
|
||||
return -EINVAL;
|
||||
|
||||
touch_atime(&realpath);
|
||||
|
||||
return realinode->i_op->readlink(realpath.dentry, buf, bufsiz);
|
||||
}
|
||||
|
||||
|
||||
static bool ovl_is_private_xattr(const char *name)
|
||||
{
|
||||
return strncmp(name, OVL_XATTR_PRE_NAME, OVL_XATTR_PRE_LEN) == 0;
|
||||
}
|
||||
|
||||
int ovl_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
int err;
|
||||
struct dentry *upperdentry;
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = -EPERM;
|
||||
if (ovl_is_private_xattr(name))
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
upperdentry = ovl_dentry_upper(dentry);
|
||||
err = vfs_setxattr(upperdentry, name, value, size, flags);
|
||||
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool ovl_need_xattr_filter(struct dentry *dentry,
|
||||
enum ovl_path_type type)
|
||||
{
|
||||
if ((type & (__OVL_PATH_PURE | __OVL_PATH_UPPER)) == __OVL_PATH_UPPER)
|
||||
return S_ISDIR(dentry->d_inode->i_mode);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size)
|
||||
{
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
|
||||
if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
|
||||
return -ENODATA;
|
||||
|
||||
return vfs_getxattr(realpath.dentry, name, value, size);
|
||||
}
|
||||
|
||||
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size)
|
||||
{
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
ssize_t res;
|
||||
int off;
|
||||
|
||||
res = vfs_listxattr(realpath.dentry, list, size);
|
||||
if (res <= 0 || size == 0)
|
||||
return res;
|
||||
|
||||
if (!ovl_need_xattr_filter(dentry, type))
|
||||
return res;
|
||||
|
||||
/* filter out private xattrs */
|
||||
for (off = 0; off < res;) {
|
||||
char *s = list + off;
|
||||
size_t slen = strlen(s) + 1;
|
||||
|
||||
BUG_ON(off + slen > res);
|
||||
|
||||
if (ovl_is_private_xattr(s)) {
|
||||
res -= slen;
|
||||
memmove(s, s + slen, res - off);
|
||||
} else {
|
||||
off += slen;
|
||||
}
|
||||
}
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
int ovl_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
enum ovl_path_type type = ovl_path_real(dentry, &realpath);
|
||||
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = -ENODATA;
|
||||
if (ovl_need_xattr_filter(dentry, type) && ovl_is_private_xattr(name))
|
||||
goto out_drop_write;
|
||||
|
||||
if (!OVL_TYPE_UPPER(type)) {
|
||||
err = vfs_getxattr(realpath.dentry, name, NULL, 0);
|
||||
if (err < 0)
|
||||
goto out_drop_write;
|
||||
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
ovl_path_upper(dentry, &realpath);
|
||||
}
|
||||
|
||||
err = vfs_removexattr(realpath.dentry, name);
|
||||
out_drop_write:
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static bool ovl_open_need_copy_up(int flags, enum ovl_path_type type,
|
||||
struct dentry *realdentry)
|
||||
{
|
||||
if (OVL_TYPE_UPPER(type))
|
||||
return false;
|
||||
|
||||
if (special_file(realdentry->d_inode->i_mode))
|
||||
return false;
|
||||
|
||||
if (!(OPEN_FMODE(flags) & FMODE_WRITE) && !(flags & O_TRUNC))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int ovl_dentry_open(struct dentry *dentry, struct file *file,
|
||||
const struct cred *cred)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
enum ovl_path_type type;
|
||||
bool want_write = false;
|
||||
|
||||
type = ovl_path_real(dentry, &realpath);
|
||||
if (!ovl_is_nocopyupw(dentry)) {
|
||||
if (ovl_open_need_copy_up(file->f_flags, type, realpath.dentry)) {
|
||||
want_write = true;
|
||||
err = ovl_want_write(dentry);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
if (file->f_flags & O_TRUNC)
|
||||
err = ovl_copy_up_last(dentry, NULL, true);
|
||||
else
|
||||
err = ovl_copy_up(dentry);
|
||||
if (err)
|
||||
goto out_drop_write;
|
||||
|
||||
ovl_path_upper(dentry, &realpath);
|
||||
}
|
||||
}
|
||||
|
||||
err = vfs_open(&realpath, file, cred);
|
||||
out_drop_write:
|
||||
if (want_write)
|
||||
ovl_drop_write(dentry);
|
||||
out:
|
||||
return err;
|
||||
}
|
||||
|
||||
static const struct inode_operations ovl_file_inode_operations = {
|
||||
.setattr = ovl_setattr,
|
||||
.permission = ovl_permission,
|
||||
.getattr = ovl_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
.dentry_open = ovl_dentry_open,
|
||||
};
|
||||
|
||||
static const struct inode_operations ovl_symlink_inode_operations = {
|
||||
.setattr = ovl_setattr,
|
||||
.follow_link = ovl_follow_link,
|
||||
.put_link = ovl_put_link,
|
||||
.readlink = ovl_readlink,
|
||||
.getattr = ovl_getattr,
|
||||
.setxattr = ovl_setxattr,
|
||||
.getxattr = ovl_getxattr,
|
||||
.listxattr = ovl_listxattr,
|
||||
.removexattr = ovl_removexattr,
|
||||
};
|
||||
|
||||
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
|
||||
struct ovl_entry *oe)
|
||||
{
|
||||
struct inode *inode;
|
||||
|
||||
inode = new_inode(sb);
|
||||
if (!inode)
|
||||
return NULL;
|
||||
|
||||
mode &= S_IFMT;
|
||||
|
||||
inode->i_ino = get_next_ino();
|
||||
inode->i_mode = mode;
|
||||
inode->i_flags |= S_NOATIME | S_NOCMTIME;
|
||||
|
||||
switch (mode) {
|
||||
case S_IFDIR:
|
||||
inode->i_private = oe;
|
||||
inode->i_op = &ovl_dir_inode_operations;
|
||||
inode->i_fop = &ovl_dir_operations;
|
||||
break;
|
||||
|
||||
case S_IFLNK:
|
||||
inode->i_op = &ovl_symlink_inode_operations;
|
||||
break;
|
||||
|
||||
case S_IFREG:
|
||||
case S_IFSOCK:
|
||||
case S_IFBLK:
|
||||
case S_IFCHR:
|
||||
case S_IFIFO:
|
||||
inode->i_op = &ovl_file_inode_operations;
|
||||
break;
|
||||
|
||||
default:
|
||||
WARN(1, "illegal file type: %i\n", mode);
|
||||
iput(inode);
|
||||
inode = NULL;
|
||||
}
|
||||
|
||||
return inode;
|
||||
}
|
||||
200
executer/kernel/mcoverlayfs/linux-4.0.9/overlayfs.h
Normal file
200
executer/kernel/mcoverlayfs/linux-4.0.9/overlayfs.h
Normal file
@ -0,0 +1,200 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
|
||||
struct ovl_entry;
|
||||
|
||||
enum ovl_path_type {
|
||||
__OVL_PATH_PURE = (1 << 0),
|
||||
__OVL_PATH_UPPER = (1 << 1),
|
||||
__OVL_PATH_MERGE = (1 << 2),
|
||||
};
|
||||
|
||||
#define OVL_TYPE_UPPER(type) ((type) & __OVL_PATH_UPPER)
|
||||
#define OVL_TYPE_MERGE(type) ((type) & __OVL_PATH_MERGE)
|
||||
#define OVL_TYPE_PURE_UPPER(type) ((type) & __OVL_PATH_PURE)
|
||||
#define OVL_TYPE_MERGE_OR_LOWER(type) \
|
||||
(OVL_TYPE_MERGE(type) || !OVL_TYPE_UPPER(type))
|
||||
|
||||
#define OVL_XATTR_PRE_NAME "trusted.overlay."
|
||||
#define OVL_XATTR_PRE_LEN 16
|
||||
#define OVL_XATTR_OPAQUE OVL_XATTR_PRE_NAME"opaque"
|
||||
|
||||
static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_rmdir(dir, dentry);
|
||||
pr_debug("rmdir(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_unlink(dir, dentry, NULL);
|
||||
pr_debug("unlink(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
|
||||
struct dentry *new_dentry, bool debug)
|
||||
{
|
||||
int err = vfs_link(old_dentry, dir, new_dentry, NULL);
|
||||
if (debug) {
|
||||
pr_debug("link(%pd2, %pd2) = %i\n",
|
||||
old_dentry, new_dentry, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, bool debug)
|
||||
{
|
||||
int err = vfs_create(dir, dentry, mode, true);
|
||||
if (debug)
|
||||
pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, bool debug)
|
||||
{
|
||||
int err = vfs_mkdir(dir, dentry, mode);
|
||||
if (debug)
|
||||
pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, dev_t dev, bool debug)
|
||||
{
|
||||
int err = vfs_mknod(dir, dentry, mode, dev);
|
||||
if (debug) {
|
||||
pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n",
|
||||
dentry, mode, dev, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
|
||||
const char *oldname, bool debug)
|
||||
{
|
||||
int err = vfs_symlink(dir, dentry, oldname);
|
||||
if (debug)
|
||||
pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
int err = vfs_setxattr(dentry, name, value, size, flags);
|
||||
pr_debug("setxattr(%pd2, \"%s\", \"%*s\", 0x%x) = %i\n",
|
||||
dentry, name, (int) size, (char *) value, flags, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
int err = vfs_removexattr(dentry, name);
|
||||
pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
|
||||
struct inode *newdir, struct dentry *newdentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
int err;
|
||||
|
||||
pr_debug("rename2(%pd2, %pd2, 0x%x)\n",
|
||||
olddentry, newdentry, flags);
|
||||
|
||||
err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
|
||||
|
||||
if (err) {
|
||||
pr_debug("...rename2(%pd2, %pd2, ...) = %i\n",
|
||||
olddentry, newdentry, err);
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
int err = vfs_whiteout(dir, dentry);
|
||||
pr_debug("whiteout(%pd2) = %i\n", dentry, err);
|
||||
return err;
|
||||
}
|
||||
|
||||
bool ovl_is_nocopyupw(struct dentry *dentry);
|
||||
enum ovl_path_type ovl_path_type(struct dentry *dentry);
|
||||
u64 ovl_dentry_version_get(struct dentry *dentry);
|
||||
void ovl_dentry_version_inc(struct dentry *dentry);
|
||||
void ovl_path_upper(struct dentry *dentry, struct path *path);
|
||||
void ovl_path_lower(struct dentry *dentry, struct path *path);
|
||||
enum ovl_path_type ovl_path_real(struct dentry *dentry, struct path *path);
|
||||
int ovl_path_next(int idx, struct dentry *dentry, struct path *path);
|
||||
struct dentry *ovl_dentry_upper(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_lower(struct dentry *dentry);
|
||||
struct dentry *ovl_dentry_real(struct dentry *dentry);
|
||||
struct dentry *ovl_entry_real(struct ovl_entry *oe, bool *is_upper);
|
||||
struct ovl_dir_cache *ovl_dir_cache(struct dentry *dentry);
|
||||
void ovl_set_dir_cache(struct dentry *dentry, struct ovl_dir_cache *cache);
|
||||
struct dentry *ovl_workdir(struct dentry *dentry);
|
||||
int ovl_want_write(struct dentry *dentry);
|
||||
void ovl_drop_write(struct dentry *dentry);
|
||||
bool ovl_dentry_is_opaque(struct dentry *dentry);
|
||||
void ovl_dentry_set_opaque(struct dentry *dentry, bool opaque);
|
||||
bool ovl_is_whiteout(struct dentry *dentry);
|
||||
void ovl_dentry_update(struct dentry *dentry, struct dentry *upperdentry);
|
||||
struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
|
||||
unsigned int flags);
|
||||
struct file *ovl_path_open(struct path *path, int flags);
|
||||
|
||||
struct dentry *ovl_upper_create(struct dentry *upperdir, struct dentry *dentry,
|
||||
struct kstat *stat, const char *link);
|
||||
|
||||
/* readdir.c */
|
||||
extern const struct file_operations ovl_dir_operations;
|
||||
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
|
||||
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
|
||||
void ovl_cache_free(struct list_head *list);
|
||||
|
||||
/* inode.c */
|
||||
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
|
||||
int ovl_permission(struct inode *inode, int mask);
|
||||
int ovl_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags);
|
||||
ssize_t ovl_getxattr(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size);
|
||||
ssize_t ovl_listxattr(struct dentry *dentry, char *list, size_t size);
|
||||
int ovl_removexattr(struct dentry *dentry, const char *name);
|
||||
|
||||
struct inode *ovl_new_inode(struct super_block *sb, umode_t mode,
|
||||
struct ovl_entry *oe);
|
||||
static inline void ovl_copyattr(struct inode *from, struct inode *to)
|
||||
{
|
||||
to->i_uid = from->i_uid;
|
||||
to->i_gid = from->i_gid;
|
||||
}
|
||||
|
||||
/* dir.c */
|
||||
extern const struct inode_operations ovl_dir_inode_operations;
|
||||
struct dentry *ovl_lookup_temp(struct dentry *workdir, struct dentry *dentry);
|
||||
int ovl_create_real(struct inode *dir, struct dentry *newdentry,
|
||||
struct kstat *stat, const char *link,
|
||||
struct dentry *hardlink, bool debug);
|
||||
void ovl_cleanup(struct inode *dir, struct dentry *dentry);
|
||||
|
||||
/* copy_up.c */
|
||||
int ovl_copy_up(struct dentry *dentry);
|
||||
int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
|
||||
struct path *lowerpath, struct kstat *stat,
|
||||
struct iattr *attr);
|
||||
int ovl_copy_xattr(struct dentry *old, struct dentry *new);
|
||||
int ovl_set_attr(struct dentry *upper, struct kstat *stat);
|
||||
557
executer/kernel/mcoverlayfs/linux-4.0.9/readdir.c
Normal file
557
executer/kernel/mcoverlayfs/linux-4.0.9/readdir.c
Normal file
@ -0,0 +1,557 @@
|
||||
/*
|
||||
*
|
||||
* Copyright (C) 2011 Novell Inc.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 as published by
|
||||
* the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/rbtree.h>
|
||||
#include <linux/security.h>
|
||||
#include <linux/cred.h>
|
||||
#include "overlayfs.h"
|
||||
|
||||
struct ovl_cache_entry {
|
||||
unsigned int len;
|
||||
unsigned int type;
|
||||
u64 ino;
|
||||
struct list_head l_node;
|
||||
struct rb_node node;
|
||||
bool is_whiteout;
|
||||
char name[];
|
||||
};
|
||||
|
||||
struct ovl_dir_cache {
|
||||
long refcount;
|
||||
u64 version;
|
||||
struct list_head entries;
|
||||
};
|
||||
|
||||
struct ovl_readdir_data {
|
||||
struct dir_context ctx;
|
||||
bool is_merge;
|
||||
struct rb_root root;
|
||||
struct list_head *list;
|
||||
struct list_head middle;
|
||||
struct dentry *dir;
|
||||
int count;
|
||||
int err;
|
||||
};
|
||||
|
||||
struct ovl_dir_file {
|
||||
bool is_real;
|
||||
bool is_upper;
|
||||
struct ovl_dir_cache *cache;
|
||||
struct list_head *cursor;
|
||||
struct file *realfile;
|
||||
struct file *upperfile;
|
||||
};
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
|
||||
{
|
||||
return container_of(n, struct ovl_cache_entry, node);
|
||||
}
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
|
||||
const char *name, int len)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
int cmp;
|
||||
|
||||
while (node) {
|
||||
struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
|
||||
|
||||
cmp = strncmp(name, p->name, len);
|
||||
if (cmp > 0)
|
||||
node = p->node.rb_right;
|
||||
else if (cmp < 0 || len < p->len)
|
||||
node = p->node.rb_left;
|
||||
else
|
||||
return p;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct ovl_cache_entry *ovl_cache_entry_new(struct dentry *dir,
|
||||
const char *name, int len,
|
||||
u64 ino, unsigned int d_type)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
size_t size = offsetof(struct ovl_cache_entry, name[len + 1]);
|
||||
|
||||
p = kmalloc(size, GFP_KERNEL);
|
||||
if (!p)
|
||||
return NULL;
|
||||
|
||||
memcpy(p->name, name, len);
|
||||
p->name[len] = '\0';
|
||||
p->len = len;
|
||||
p->type = d_type;
|
||||
p->ino = ino;
|
||||
p->is_whiteout = false;
|
||||
|
||||
if (d_type == DT_CHR) {
|
||||
struct dentry *dentry;
|
||||
const struct cred *old_cred;
|
||||
struct cred *override_cred;
|
||||
|
||||
override_cred = prepare_creds();
|
||||
if (!override_cred) {
|
||||
kfree(p);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* CAP_DAC_OVERRIDE for lookup
|
||||
*/
|
||||
cap_raise(override_cred->cap_effective, CAP_DAC_OVERRIDE);
|
||||
old_cred = override_creds(override_cred);
|
||||
|
||||
dentry = lookup_one_len(name, dir, len);
|
||||
if (!IS_ERR(dentry)) {
|
||||
p->is_whiteout = ovl_is_whiteout(dentry);
|
||||
dput(dentry);
|
||||
}
|
||||
revert_creds(old_cred);
|
||||
put_cred(override_cred);
|
||||
}
|
||||
return p;
|
||||
}
|
||||
|
||||
static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
|
||||
const char *name, int len, u64 ino,
|
||||
unsigned int d_type)
|
||||
{
|
||||
struct rb_node **newp = &rdd->root.rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
while (*newp) {
|
||||
int cmp;
|
||||
struct ovl_cache_entry *tmp;
|
||||
|
||||
parent = *newp;
|
||||
tmp = ovl_cache_entry_from_node(*newp);
|
||||
cmp = strncmp(name, tmp->name, len);
|
||||
if (cmp > 0)
|
||||
newp = &tmp->node.rb_right;
|
||||
else if (cmp < 0 || len < tmp->len)
|
||||
newp = &tmp->node.rb_left;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
p = ovl_cache_entry_new(rdd->dir, name, len, ino, d_type);
|
||||
if (p == NULL)
|
||||
return -ENOMEM;
|
||||
|
||||
list_add_tail(&p->l_node, rdd->list);
|
||||
rb_link_node(&p->node, parent, newp);
|
||||
rb_insert_color(&p->node, &rdd->root);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_fill_lower(struct ovl_readdir_data *rdd,
|
||||
const char *name, int namelen,
|
||||
loff_t offset, u64 ino, unsigned int d_type)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
p = ovl_cache_entry_find(&rdd->root, name, namelen);
|
||||
if (p) {
|
||||
list_move_tail(&p->l_node, &rdd->middle);
|
||||
} else {
|
||||
p = ovl_cache_entry_new(rdd->dir, name, namelen, ino, d_type);
|
||||
if (p == NULL)
|
||||
rdd->err = -ENOMEM;
|
||||
else
|
||||
list_add_tail(&p->l_node, &rdd->middle);
|
||||
}
|
||||
|
||||
return rdd->err;
|
||||
}
|
||||
|
||||
void ovl_cache_free(struct list_head *list)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
struct ovl_cache_entry *n;
|
||||
|
||||
list_for_each_entry_safe(p, n, list, l_node)
|
||||
kfree(p);
|
||||
|
||||
INIT_LIST_HEAD(list);
|
||||
}
|
||||
|
||||
static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
|
||||
{
|
||||
struct ovl_dir_cache *cache = od->cache;
|
||||
|
||||
WARN_ON(cache->refcount <= 0);
|
||||
cache->refcount--;
|
||||
if (!cache->refcount) {
|
||||
if (ovl_dir_cache(dentry) == cache)
|
||||
ovl_set_dir_cache(dentry, NULL);
|
||||
|
||||
ovl_cache_free(&cache->entries);
|
||||
kfree(cache);
|
||||
}
|
||||
}
|
||||
|
||||
static int ovl_fill_merge(struct dir_context *ctx, const char *name,
|
||||
int namelen, loff_t offset, u64 ino,
|
||||
unsigned int d_type)
|
||||
{
|
||||
struct ovl_readdir_data *rdd =
|
||||
container_of(ctx, struct ovl_readdir_data, ctx);
|
||||
|
||||
rdd->count++;
|
||||
if (!rdd->is_merge)
|
||||
return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
|
||||
else
|
||||
return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
|
||||
}
|
||||
|
||||
static inline int ovl_dir_read(struct path *realpath,
|
||||
struct ovl_readdir_data *rdd)
|
||||
{
|
||||
struct file *realfile;
|
||||
int err;
|
||||
|
||||
realfile = ovl_path_open(realpath, O_RDONLY | O_DIRECTORY);
|
||||
if (IS_ERR(realfile))
|
||||
return PTR_ERR(realfile);
|
||||
|
||||
rdd->dir = realpath->dentry;
|
||||
rdd->ctx.pos = 0;
|
||||
do {
|
||||
rdd->count = 0;
|
||||
rdd->err = 0;
|
||||
err = iterate_dir(realfile, &rdd->ctx);
|
||||
if (err >= 0)
|
||||
err = rdd->err;
|
||||
} while (!err && rdd->count);
|
||||
fput(realfile);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ovl_dir_reset(struct file *file)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct ovl_dir_cache *cache = od->cache;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
enum ovl_path_type type = ovl_path_type(dentry);
|
||||
|
||||
if (cache && ovl_dentry_version_get(dentry) != cache->version) {
|
||||
ovl_cache_put(od, dentry);
|
||||
od->cache = NULL;
|
||||
od->cursor = NULL;
|
||||
}
|
||||
WARN_ON(!od->is_real && !OVL_TYPE_MERGE(type));
|
||||
if (od->is_real && OVL_TYPE_MERGE(type))
|
||||
od->is_real = false;
|
||||
}
|
||||
|
||||
static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
|
||||
{
|
||||
int err;
|
||||
struct path realpath;
|
||||
struct ovl_readdir_data rdd = {
|
||||
.ctx.actor = ovl_fill_merge,
|
||||
.list = list,
|
||||
.root = RB_ROOT,
|
||||
.is_merge = false,
|
||||
};
|
||||
int idx, next;
|
||||
|
||||
for (idx = 0; idx != -1; idx = next) {
|
||||
next = ovl_path_next(idx, dentry, &realpath);
|
||||
|
||||
if (next != -1) {
|
||||
err = ovl_dir_read(&realpath, &rdd);
|
||||
if (err)
|
||||
break;
|
||||
} else {
|
||||
/*
|
||||
* Insert lowest layer entries before upper ones, this
|
||||
* allows offsets to be reasonably constant
|
||||
*/
|
||||
list_add(&rdd.middle, rdd.list);
|
||||
rdd.is_merge = true;
|
||||
err = ovl_dir_read(&realpath, &rdd);
|
||||
list_del(&rdd.middle);
|
||||
}
|
||||
}
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
|
||||
{
|
||||
struct list_head *p;
|
||||
loff_t off = 0;
|
||||
|
||||
list_for_each(p, &od->cache->entries) {
|
||||
if (off >= pos)
|
||||
break;
|
||||
off++;
|
||||
}
|
||||
/* Cursor is safe since the cache is stable */
|
||||
od->cursor = p;
|
||||
}
|
||||
|
||||
static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
|
||||
{
|
||||
int res;
|
||||
struct ovl_dir_cache *cache;
|
||||
|
||||
cache = ovl_dir_cache(dentry);
|
||||
if (cache && ovl_dentry_version_get(dentry) == cache->version) {
|
||||
cache->refcount++;
|
||||
return cache;
|
||||
}
|
||||
ovl_set_dir_cache(dentry, NULL);
|
||||
|
||||
cache = kzalloc(sizeof(struct ovl_dir_cache), GFP_KERNEL);
|
||||
if (!cache)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
cache->refcount = 1;
|
||||
INIT_LIST_HEAD(&cache->entries);
|
||||
|
||||
res = ovl_dir_read_merged(dentry, &cache->entries);
|
||||
if (res) {
|
||||
ovl_cache_free(&cache->entries);
|
||||
kfree(cache);
|
||||
return ERR_PTR(res);
|
||||
}
|
||||
|
||||
cache->version = ovl_dentry_version_get(dentry);
|
||||
ovl_set_dir_cache(dentry, cache);
|
||||
|
||||
return cache;
|
||||
}
|
||||
|
||||
static int ovl_iterate(struct file *file, struct dir_context *ctx)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
if (!ctx->pos)
|
||||
ovl_dir_reset(file);
|
||||
|
||||
if (od->is_real)
|
||||
return iterate_dir(od->realfile, ctx);
|
||||
|
||||
if (!od->cache) {
|
||||
struct ovl_dir_cache *cache;
|
||||
|
||||
cache = ovl_cache_get(dentry);
|
||||
if (IS_ERR(cache))
|
||||
return PTR_ERR(cache);
|
||||
|
||||
od->cache = cache;
|
||||
ovl_seek_cursor(od, ctx->pos);
|
||||
}
|
||||
|
||||
while (od->cursor != &od->cache->entries) {
|
||||
p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
|
||||
if (!p->is_whiteout)
|
||||
if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
|
||||
break;
|
||||
od->cursor = p->l_node.next;
|
||||
ctx->pos++;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
|
||||
{
|
||||
loff_t res;
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
|
||||
mutex_lock(&file_inode(file)->i_mutex);
|
||||
if (!file->f_pos)
|
||||
ovl_dir_reset(file);
|
||||
|
||||
if (od->is_real) {
|
||||
res = vfs_llseek(od->realfile, offset, origin);
|
||||
file->f_pos = od->realfile->f_pos;
|
||||
} else {
|
||||
res = -EINVAL;
|
||||
|
||||
switch (origin) {
|
||||
case SEEK_CUR:
|
||||
offset += file->f_pos;
|
||||
break;
|
||||
case SEEK_SET:
|
||||
break;
|
||||
default:
|
||||
goto out_unlock;
|
||||
}
|
||||
if (offset < 0)
|
||||
goto out_unlock;
|
||||
|
||||
if (offset != file->f_pos) {
|
||||
file->f_pos = offset;
|
||||
if (od->cache)
|
||||
ovl_seek_cursor(od, offset);
|
||||
}
|
||||
res = offset;
|
||||
}
|
||||
out_unlock:
|
||||
mutex_unlock(&file_inode(file)->i_mutex);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
|
||||
int datasync)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
struct dentry *dentry = file->f_path.dentry;
|
||||
struct file *realfile = od->realfile;
|
||||
|
||||
/*
|
||||
* Need to check if we started out being a lower dir, but got copied up
|
||||
*/
|
||||
if (!od->is_upper && OVL_TYPE_UPPER(ovl_path_type(dentry))) {
|
||||
struct inode *inode = file_inode(file);
|
||||
|
||||
realfile = lockless_dereference(od->upperfile);
|
||||
if (!realfile) {
|
||||
struct path upperpath;
|
||||
|
||||
ovl_path_upper(dentry, &upperpath);
|
||||
realfile = ovl_path_open(&upperpath, O_RDONLY);
|
||||
smp_mb__before_spinlock();
|
||||
mutex_lock(&inode->i_mutex);
|
||||
if (!od->upperfile) {
|
||||
if (IS_ERR(realfile)) {
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
return PTR_ERR(realfile);
|
||||
}
|
||||
od->upperfile = realfile;
|
||||
} else {
|
||||
/* somebody has beaten us to it */
|
||||
if (!IS_ERR(realfile))
|
||||
fput(realfile);
|
||||
realfile = od->upperfile;
|
||||
}
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
}
|
||||
|
||||
return vfs_fsync_range(realfile, start, end, datasync);
|
||||
}
|
||||
|
||||
static int ovl_dir_release(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct ovl_dir_file *od = file->private_data;
|
||||
|
||||
if (od->cache) {
|
||||
mutex_lock(&inode->i_mutex);
|
||||
ovl_cache_put(od, file->f_path.dentry);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
}
|
||||
fput(od->realfile);
|
||||
if (od->upperfile)
|
||||
fput(od->upperfile);
|
||||
kfree(od);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ovl_dir_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
struct path realpath;
|
||||
struct file *realfile;
|
||||
struct ovl_dir_file *od;
|
||||
enum ovl_path_type type;
|
||||
|
||||
od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
|
||||
if (!od)
|
||||
return -ENOMEM;
|
||||
|
||||
type = ovl_path_real(file->f_path.dentry, &realpath);
|
||||
realfile = ovl_path_open(&realpath, file->f_flags);
|
||||
if (IS_ERR(realfile)) {
|
||||
kfree(od);
|
||||
return PTR_ERR(realfile);
|
||||
}
|
||||
od->realfile = realfile;
|
||||
od->is_real = !OVL_TYPE_MERGE(type);
|
||||
od->is_upper = OVL_TYPE_UPPER(type);
|
||||
file->private_data = od;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct file_operations ovl_dir_operations = {
|
||||
.read = generic_read_dir,
|
||||
.open = ovl_dir_open,
|
||||
.iterate = ovl_iterate,
|
||||
.llseek = ovl_dir_llseek,
|
||||
.fsync = ovl_dir_fsync,
|
||||
.release = ovl_dir_release,
|
||||
};
|
||||
|
||||
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
|
||||
{
|
||||
int err;
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
err = ovl_dir_read_merged(dentry, list);
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = 0;
|
||||
|
||||
list_for_each_entry(p, list, l_node) {
|
||||
if (p->is_whiteout)
|
||||
continue;
|
||||
|
||||
if (p->name[0] == '.') {
|
||||
if (p->len == 1)
|
||||
continue;
|
||||
if (p->len == 2 && p->name[1] == '.')
|
||||
continue;
|
||||
}
|
||||
err = -ENOTEMPTY;
|
||||
break;
|
||||
}
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
|
||||
{
|
||||
struct ovl_cache_entry *p;
|
||||
|
||||
mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
|
||||
list_for_each_entry(p, list, l_node) {
|
||||
struct dentry *dentry;
|
||||
|
||||
if (!p->is_whiteout)
|
||||
continue;
|
||||
|
||||
dentry = lookup_one_len(p->name, upper, p->len);
|
||||
if (IS_ERR(dentry)) {
|
||||
pr_err("overlayfs: lookup '%s/%.*s' failed (%i)\n",
|
||||
upper->d_name.name, p->len, p->name,
|
||||
(int) PTR_ERR(dentry));
|
||||
continue;
|
||||
}
|
||||
ovl_cleanup(upper->d_inode, dentry);
|
||||
dput(dentry);
|
||||
}
|
||||
mutex_unlock(&upper->d_inode->i_mutex);
|
||||
}
|
||||
1094
executer/kernel/mcoverlayfs/linux-4.0.9/super.c
Normal file
1094
executer/kernel/mcoverlayfs/linux-4.0.9/super.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,488 +0,0 @@
|
||||
/**
|
||||
* \file procfs.c
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* mcctrl procfs
|
||||
* \author Naoki Hamada <nao@axe.bz> \par
|
||||
* Copyright (C) 2014 AXE, Inc.
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#include <linux/slab.h>
|
||||
#include <linux/string.h>
|
||||
#include <linux/proc_fs.h>
|
||||
#include <linux/list.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/resource.h>
|
||||
#include "mcctrl.h"
|
||||
#include <linux/version.h>
|
||||
|
||||
//#define PROCFS_DEBUG
|
||||
|
||||
#ifdef PROCFS_DEBUG
|
||||
#define dprintk(...) printk(__VA_ARGS__)
|
||||
#else
|
||||
#define dprintk(...)
|
||||
#endif
|
||||
|
||||
static DECLARE_WAIT_QUEUE_HEAD(procfsq);
|
||||
static ssize_t mckernel_procfs_read(struct file *file, char __user *buf,
|
||||
size_t nbytes, loff_t *ppos);
|
||||
|
||||
/* A private data for the procfs driver. */
|
||||
struct procfs_list_entry;
|
||||
|
||||
struct procfs_list_entry {
|
||||
struct list_head list;
|
||||
struct proc_dir_entry *entry;
|
||||
struct procfs_list_entry *parent;
|
||||
ihk_os_t os;
|
||||
int osnum;
|
||||
int pid;
|
||||
int cpu;
|
||||
char fname[PROCFS_NAME_MAX];
|
||||
};
|
||||
|
||||
/*
|
||||
* In the procfs_file_list, mckenrel procfs files are
|
||||
* listed in the manner that the leaf file is located
|
||||
* always nearer to the list top than its parent node
|
||||
* file.
|
||||
*/
|
||||
|
||||
LIST_HEAD(procfs_file_list);
|
||||
static ihk_spinlock_t procfs_file_list_lock;
|
||||
|
||||
loff_t mckernel_procfs_lseek(struct file *file, loff_t offset, int orig)
|
||||
{
|
||||
switch (orig) {
|
||||
case 0:
|
||||
file->f_pos = offset;
|
||||
break;
|
||||
case 1:
|
||||
file->f_pos += offset;
|
||||
break;
|
||||
default:
|
||||
return -EINVAL;
|
||||
}
|
||||
return file->f_pos;
|
||||
}
|
||||
|
||||
static const struct file_operations mckernel_procfs_file_operations = {
|
||||
.llseek = mckernel_procfs_lseek,
|
||||
.read = mckernel_procfs_read,
|
||||
.write = NULL,
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* \brief Return specified procfs entry.
|
||||
*
|
||||
* \param p a name of the procfs file
|
||||
* \param osnum os number
|
||||
* \param mode if zero create a directory otherwise a file
|
||||
*
|
||||
* return value: NULL: Something wrong has occurred.
|
||||
* otherwise: address of the proc_dir_entry structure of the procfs file
|
||||
*
|
||||
* p should not be NULL nor terminated by "/".
|
||||
*
|
||||
* We create a procfs entry if there is not already one.
|
||||
* This process is recursive to the root of the procfs tree.
|
||||
*/
|
||||
/*
|
||||
* XXX: Two or more entries which have same name can be created.
|
||||
*
|
||||
* get_procfs_list_entry() avoids creating an entry which has already been created.
|
||||
* But, it allows creating an entry which is being created by another thread.
|
||||
*
|
||||
* This problem occurred when two requests which created files with a common
|
||||
* ancestor directory which was not explicitly created were racing.
|
||||
*/
|
||||
|
||||
static struct procfs_list_entry *get_procfs_list_entry(char *p, int osnum, int mode)
|
||||
{
|
||||
char *r;
|
||||
struct proc_dir_entry *pde = NULL;
|
||||
struct procfs_list_entry *e, *ret = NULL, *parent = NULL;
|
||||
char name[PROCFS_NAME_MAX];
|
||||
unsigned long irqflags;
|
||||
|
||||
dprintk("get_procfs_list_entry: %s for osnum %d mode %o\n", p, osnum, mode);
|
||||
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
||||
list_for_each_entry(e, &procfs_file_list, list) {
|
||||
if (e == NULL) {
|
||||
kprintf("ERROR: The procfs_file_list has a null entry.\n");
|
||||
return NULL;
|
||||
}
|
||||
if (strncmp(e->fname, p, PROCFS_NAME_MAX) == 0) {
|
||||
/* We found the entry */
|
||||
ret = e;
|
||||
break;
|
||||
}
|
||||
}
|
||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
|
||||
if (ret != NULL) {
|
||||
return ret;
|
||||
}
|
||||
r = strrchr(p, '/');
|
||||
if (r != NULL) {
|
||||
/* We have non-null parent dir. */
|
||||
strncpy(name, p, r - p);
|
||||
name[r - p] = '\0';
|
||||
parent = get_procfs_list_entry(name, osnum, 0);
|
||||
if (parent == NULL) {
|
||||
/* We counld not get a parent procfs entry. Give up.*/
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
ret = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL);
|
||||
if (ret == NULL) {
|
||||
kprintf("ERROR: not enough memory to create PROCFS entry.\n");
|
||||
return NULL;
|
||||
}
|
||||
/* Fill the fname field of the entry */
|
||||
strncpy(ret->fname, p, PROCFS_NAME_MAX);
|
||||
|
||||
if (r != NULL) {
|
||||
strncpy(name, r + 1, p + PROCFS_NAME_MAX - r - 1);
|
||||
} else {
|
||||
strncpy(name, p, PROCFS_NAME_MAX);
|
||||
}
|
||||
if (mode == 0) {
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde = proc_mkdir(name, parent ? parent->entry : NULL);
|
||||
#else
|
||||
pde = proc_mkdir_data(name, 0555, parent ? parent->entry : NULL, ret);
|
||||
#endif
|
||||
} else {
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde = create_proc_entry(name, mode, parent->entry);
|
||||
if (pde)
|
||||
pde->proc_fops = &mckernel_procfs_file_operations;
|
||||
#else
|
||||
pde = proc_create_data(name, mode, parent->entry,
|
||||
&mckernel_procfs_file_operations, ret);
|
||||
#endif
|
||||
}
|
||||
if (pde == NULL) {
|
||||
kprintf("ERROR: cannot create a PROCFS entry for %s.\n", p);
|
||||
kfree(ret);
|
||||
return NULL;
|
||||
}
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
pde->data = ret;
|
||||
#endif
|
||||
ret->osnum = osnum;
|
||||
ret->entry = pde;
|
||||
ret->parent = parent;
|
||||
|
||||
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
||||
list_add(&(ret->list), &procfs_file_list);
|
||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
|
||||
|
||||
dprintk("get_procfs_list_entry: %s done\n", p);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Create a procfs entry.
|
||||
*
|
||||
* \param __os (opeque) os variable
|
||||
* \param ref cpuid of the requesting mckernel process
|
||||
* \param osnum osnum of the requesting mckernel process
|
||||
* \param pid pid of the requesting mckernel process
|
||||
* \param arg sent argument
|
||||
*/
|
||||
|
||||
void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg)
|
||||
{
|
||||
struct procfs_list_entry *e;
|
||||
ihk_device_t dev = ihk_os_to_dev(__os);
|
||||
unsigned long parg;
|
||||
struct procfs_file *f;
|
||||
int mode;
|
||||
char name[PROCFS_NAME_MAX];
|
||||
|
||||
dprintk("procfs_create: osnum: %d, cpu: %d, pid: %d\n", osnum, ref, pid);
|
||||
|
||||
parg = ihk_device_map_memory(dev, arg, sizeof(struct procfs_file));
|
||||
f = ihk_device_map_virtual(dev, parg, sizeof(struct procfs_file), NULL, 0);
|
||||
|
||||
dprintk("name: %s mode: %o\n", f->fname, f->mode);
|
||||
|
||||
strncpy(name, f->fname, PROCFS_NAME_MAX);
|
||||
mode = f->mode;
|
||||
|
||||
if (name[PROCFS_NAME_MAX - 1] != '\0') {
|
||||
printk("ERROR: procfs_creat: file name not properly terminated.\n");
|
||||
goto quit;
|
||||
}
|
||||
e = get_procfs_list_entry(name, osnum, mode);
|
||||
if (e == NULL) {
|
||||
printk("ERROR: could not create a procfs entry for %s.\n", name);
|
||||
goto quit;
|
||||
}
|
||||
|
||||
e->os = __os;
|
||||
e->cpu = ref;
|
||||
e->pid = pid;
|
||||
|
||||
quit:
|
||||
f->status = 1; /* Now the peer can free the data. */
|
||||
ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file));
|
||||
ihk_device_unmap_memory(dev, parg, sizeof(struct procfs_file));
|
||||
dprintk("procfs_create: done\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Delete a procfs entry.
|
||||
*
|
||||
* \param __os (opaque) os variable
|
||||
* \param osnum os number
|
||||
* \param arg sent argument
|
||||
*/
|
||||
|
||||
void procfs_delete(void *__os, int osnum, unsigned long arg)
|
||||
{
|
||||
ihk_device_t dev = ihk_os_to_dev(__os);
|
||||
unsigned long parg;
|
||||
struct procfs_file *f;
|
||||
struct procfs_list_entry *e;
|
||||
struct procfs_list_entry *parent = NULL;
|
||||
char name[PROCFS_NAME_MAX];
|
||||
char *r;
|
||||
unsigned long irqflags;
|
||||
|
||||
dprintk("procfs_delete: \n");
|
||||
parg = ihk_device_map_memory(dev, arg, sizeof(struct procfs_file));
|
||||
f = ihk_device_map_virtual(dev, parg, sizeof(struct procfs_file), NULL, 0);
|
||||
dprintk("fname: %s.\n", f->fname);
|
||||
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
||||
list_for_each_entry(e, &procfs_file_list, list) {
|
||||
if ((strncmp(e->fname, f->fname, PROCFS_NAME_MAX) == 0) &&
|
||||
(e->osnum == osnum)) {
|
||||
list_del(&e->list);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
e->entry->read_proc = NULL;
|
||||
e->entry->data = NULL;
|
||||
#endif
|
||||
parent = e->parent;
|
||||
kfree(e);
|
||||
r = strrchr(f->fname, '/');
|
||||
if (r == NULL) {
|
||||
strncpy(name, f->fname, PROCFS_NAME_MAX);
|
||||
} else {
|
||||
strncpy(name, r + 1, PROCFS_NAME_MAX);
|
||||
}
|
||||
dprintk("found and remove %s from the list.\n", name);
|
||||
remove_proc_entry(name, parent->entry);
|
||||
break;
|
||||
}
|
||||
}
|
||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
|
||||
f->status = 1; /* Now the peer can free the data. */
|
||||
ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file));
|
||||
ihk_device_unmap_memory(dev, parg, sizeof(struct procfs_file));
|
||||
dprintk("procfs_delete: done\n");
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Process SCD_MSG_PROCFS_ANSWER message.
|
||||
*
|
||||
* \param arg sent argument
|
||||
* \param err error info (redundant)
|
||||
*/
|
||||
|
||||
void procfs_answer(unsigned int arg, int err)
|
||||
{
|
||||
dprintk("procfs: received SCD_MSG_PROCFS_ANSWER message(err = %d).\n", err);
|
||||
wake_up_interruptible(&procfsq);
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief The callback funciton for McKernel procfs
|
||||
*
|
||||
* This function conforms to the 2) way of fs/proc/generic.c
|
||||
* from linux-2.6.39.4.
|
||||
*/
|
||||
static ssize_t
|
||||
mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes,
|
||||
loff_t *ppos)
|
||||
{
|
||||
struct inode * inode = file->f_path.dentry->d_inode;
|
||||
char *kern_buffer;
|
||||
int order = 0;
|
||||
volatile struct procfs_read *r;
|
||||
struct ikc_scd_packet isp;
|
||||
int ret, retrycount = 0;
|
||||
unsigned long pbuf;
|
||||
unsigned long count = nbytes;
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
struct proc_dir_entry *dp = PDE(inode);
|
||||
struct procfs_list_entry *e = dp->data;
|
||||
#else
|
||||
struct procfs_list_entry *e = PDE_DATA(inode);
|
||||
#endif
|
||||
loff_t offset = *ppos;
|
||||
|
||||
dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n",
|
||||
e->fname, offset, count);
|
||||
|
||||
if (count <= 0 || offset < 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
while ((1 << order) < count) ++order;
|
||||
if (order > 12) {
|
||||
order -= 12;
|
||||
}
|
||||
else {
|
||||
order = 1;
|
||||
}
|
||||
|
||||
/* NOTE: we need physically contigous memory to pass through IKC */
|
||||
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
|
||||
if (!kern_buffer) {
|
||||
printk("mckernel_procfs_read(): ERROR: allocating kernel buffer\n");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
pbuf = virt_to_phys(kern_buffer);
|
||||
|
||||
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
|
||||
if (r == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
retry:
|
||||
dprintk("offset: %lx, count: %d, cpu: %d\n", offset, count, e->cpu);
|
||||
|
||||
r->pbuf = pbuf;
|
||||
r->eof = 0;
|
||||
r->ret = -EIO; /* default */
|
||||
r->status = 0;
|
||||
r->offset = offset;
|
||||
r->count = count;
|
||||
strncpy((char *)r->fname, e->fname, PROCFS_NAME_MAX);
|
||||
isp.msg = SCD_MSG_PROCFS_REQUEST;
|
||||
isp.ref = e->cpu;
|
||||
isp.arg = virt_to_phys(r);
|
||||
|
||||
ret = mcctrl_ikc_send(e->os, e->cpu, &isp);
|
||||
|
||||
if (ret < 0) {
|
||||
goto out; /* error */
|
||||
}
|
||||
|
||||
/* Wait for a reply. */
|
||||
ret = -EIO; /* default exit code */
|
||||
dprintk("now wait for a relpy\n");
|
||||
|
||||
/* Wait for the status field of the procfs_read structure set ready. */
|
||||
if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) {
|
||||
kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* Wake up and check the result. */
|
||||
dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof);
|
||||
if ((r->ret == 0) && (r->eof != 1)) {
|
||||
/* A miss-hit caused by migration has occurred.
|
||||
* We simply retry the query with a new CPU.
|
||||
*/
|
||||
if (retrycount++ > 10) {
|
||||
kprintf("ERROR: mckernel_procfs_read: excessive retry.\n");
|
||||
goto out;
|
||||
}
|
||||
e->cpu = r->newcpu;
|
||||
dprintk("retry\n");
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (r->ret > 0) {
|
||||
if (copy_to_user(buf, kern_buffer, r->ret)) {
|
||||
kprintf("ERROR: mckernel_procfs_read: copy_to_user failed.\n");
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*ppos += r->ret;
|
||||
}
|
||||
ret = r->ret;
|
||||
|
||||
out:
|
||||
free_pages((uintptr_t)kern_buffer, order);
|
||||
kfree((void *)r);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Initialization for procfs
|
||||
*
|
||||
* \param osnum os number
|
||||
*/
|
||||
|
||||
void procfs_init(int osnum) {
|
||||
}
|
||||
|
||||
/**
|
||||
* \brief Finalization for procfs
|
||||
*
|
||||
* \param osnum os number
|
||||
*/
|
||||
|
||||
void procfs_exit(int osnum) {
|
||||
char buf[20], *r;
|
||||
int error;
|
||||
mm_segment_t old_fs = get_fs();
|
||||
struct kstat stat;
|
||||
struct procfs_list_entry *parent;
|
||||
struct procfs_list_entry *e, *temp = NULL;
|
||||
unsigned long irqflags;
|
||||
|
||||
dprintk("remove remaining mckernel procfs files.\n");
|
||||
|
||||
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
|
||||
list_for_each_entry_safe(e, temp, &procfs_file_list, list) {
|
||||
if (e->osnum == osnum) {
|
||||
dprintk("found entry for %s.\n", e->fname);
|
||||
list_del(&e->list);
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
|
||||
e->entry->read_proc = NULL;
|
||||
e->entry->data = NULL;
|
||||
#endif
|
||||
parent = e->parent;
|
||||
r = strrchr(e->fname, '/');
|
||||
if (r == NULL) {
|
||||
r = e->fname;
|
||||
} else {
|
||||
r += 1;
|
||||
}
|
||||
if (parent) {
|
||||
remove_proc_entry(r, parent->entry);
|
||||
}
|
||||
dprintk("free the entry\n");
|
||||
kfree(e);
|
||||
}
|
||||
dprintk("iterate it.\n");
|
||||
}
|
||||
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
|
||||
|
||||
sprintf(buf, "/proc/mcos%d", osnum);
|
||||
|
||||
set_fs(KERNEL_DS);
|
||||
error = vfs_stat (buf, &stat);
|
||||
set_fs(old_fs);
|
||||
if (error != 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
printk("procfs_exit: We have to remove unexpectedly remaining %s.\n", buf);
|
||||
|
||||
/* remove remnant of previous mcos%d */
|
||||
remove_proc_entry(buf + 6, NULL);
|
||||
}
|
||||
@ -1,13 +1,19 @@
|
||||
CC=@CC@
|
||||
BINDIR=@BINDIR@
|
||||
CFLAGS=-Wall -O -fPIE -pie
|
||||
KDIR ?= @KDIR@
|
||||
CFLAGS=-Wall -O -I.
|
||||
VPATH=@abs_srcdir@
|
||||
TARGET=mcexec
|
||||
@uncomment_if_ENABLE_MEMDUMP@TARGET+=eclair
|
||||
LIBS=@LIBS@
|
||||
|
||||
all: $(TARGET)
|
||||
|
||||
mcexec: mcexec.c
|
||||
$(CC) $(CFLAGS) $(EXTRA_CFLAGS) -pthread -o $@ $^ $(EXTRA_OBJS)
|
||||
$(CC) -I${KDIR} $(CFLAGS) $(EXTRA_CFLAGS) -fPIE -pie -lrt -pthread -o $@ $^ $(EXTRA_OBJS)
|
||||
|
||||
eclair: eclair.c
|
||||
$(CC) $(CFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
clean:
|
||||
$(RM) $(TARGET) *.o
|
||||
@ -17,4 +23,5 @@ clean:
|
||||
install:
|
||||
mkdir -p -m 755 $(BINDIR)
|
||||
install -m 755 mcexec $(BINDIR)
|
||||
@uncomment_if_ENABLE_MEMDUMP@install -m 755 eclair $(BINDIR)
|
||||
|
||||
|
||||
1026
executer/user/eclair.c
Normal file
1026
executer/user/eclair.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -59,7 +59,12 @@
|
||||
#include <semaphore.h>
|
||||
#include <signal.h>
|
||||
#include <sys/signalfd.h>
|
||||
#include <sys/mount.h>
|
||||
#include <include/generated/uapi/linux/version.h>
|
||||
#include <sys/user.h>
|
||||
#include "../include/uprotocol.h"
|
||||
#include <getopt.h>
|
||||
#include "../config.h"
|
||||
|
||||
//#define DEBUG
|
||||
|
||||
@ -96,6 +101,19 @@ int __glob_argc = -1;
|
||||
char **__glob_argv = 0;
|
||||
#endif
|
||||
|
||||
#ifdef ENABLE_MCOVERLAYFS
|
||||
#undef ENABLE_MCOVERLAYFS
|
||||
#ifndef RHEL_RELEASE_CODE
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) && LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0)
|
||||
#define ENABLE_MCOVERLAYFS 1
|
||||
#endif // LINUX_VERSION_CODE == 4.0
|
||||
#else
|
||||
#if RHEL_RELEASE_CODE == RHEL_RELEASE_VERSION(7,2)
|
||||
#define ENABLE_MCOVERLAYFS 1
|
||||
#endif // RHEL_RELEASE_CODE == 7.2
|
||||
#endif // RHEL_RELEASE_CODE
|
||||
#endif // ENABLE_MCOVERLAYFS
|
||||
|
||||
typedef unsigned char cc_t;
|
||||
typedef unsigned int speed_t;
|
||||
typedef unsigned int tcflag_t;
|
||||
@ -129,6 +147,7 @@ static char *exec_path = NULL;
|
||||
static char *altroot;
|
||||
static const char rlimit_stack_envname[] = "MCKERNEL_RLIMIT_STACK";
|
||||
static int ischild;
|
||||
static int enable_vdso = 1;
|
||||
|
||||
struct fork_sync {
|
||||
pid_t pid;
|
||||
@ -183,6 +202,8 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp)
|
||||
|
||||
desc = malloc(sizeof(struct program_load_desc)
|
||||
+ sizeof(struct program_image_section) * nhdrs);
|
||||
memset(desc, '\0', sizeof(struct program_load_desc)
|
||||
+ sizeof(struct program_image_section) * nhdrs);
|
||||
desc->shell_path[0] = '\0';
|
||||
fseek(fp, hdr.e_phoff, SEEK_SET);
|
||||
j = 0;
|
||||
@ -243,6 +264,8 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp)
|
||||
}
|
||||
desc->pid = getpid();
|
||||
desc->pgid = getpgid(0);
|
||||
if(*interp_pathp)
|
||||
desc->reloc = hdr.e_type == ET_DYN;
|
||||
desc->entry = hdr.e_entry;
|
||||
ioctl(fd, MCEXEC_UP_GET_CREDV, desc->cred);
|
||||
desc->at_phdr = load_addr + hdr.e_phoff;
|
||||
@ -365,7 +388,7 @@ struct program_load_desc *load_interp(struct program_load_desc *desc0, FILE *fp)
|
||||
|
||||
unsigned char *dma_buf;
|
||||
|
||||
int lookup_exec_path(char *filename, char *path, int max_len)
|
||||
int lookup_exec_path(char *filename, char *path, int max_len, int execvp)
|
||||
{
|
||||
int found;
|
||||
int error;
|
||||
@ -383,7 +406,21 @@ retry:
|
||||
|
||||
char *token, *string, *tofree;
|
||||
char *PATH = getenv("COKERNEL_PATH");
|
||||
if (!PATH) {
|
||||
|
||||
if (!execvp) {
|
||||
if (strlen(filename) + 1 > max_len) {
|
||||
return ENAMETOOLONG;
|
||||
}
|
||||
strcpy(path, filename);
|
||||
error = access(path, X_OK);
|
||||
if (error) {
|
||||
return errno;
|
||||
}
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!(PATH = getenv("COKERNEL_PATH"))) {
|
||||
PATH = getenv("PATH");
|
||||
}
|
||||
|
||||
@ -391,21 +428,6 @@ retry:
|
||||
return ENAMETOOLONG;
|
||||
}
|
||||
|
||||
/* See first whether file is available in current working dir */
|
||||
error = access(filename, X_OK);
|
||||
if (error == 0) {
|
||||
__dprintf("lookup_exec_path(): found %s in cwd\n", filename);
|
||||
error = snprintf(path, max_len, "%s", filename);
|
||||
|
||||
if (error < 0 || error >= max_len) {
|
||||
fprintf(stderr, "lookup_exec_path(): array too small?\n");
|
||||
return ENOMEM;
|
||||
}
|
||||
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
__dprintf("PATH: %s\n", PATH);
|
||||
|
||||
/* strsep() modifies string! */
|
||||
@ -432,6 +454,9 @@ retry:
|
||||
}
|
||||
|
||||
free(tofree);
|
||||
if(!found){
|
||||
return ENOENT;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
@ -478,7 +503,7 @@ retry:
|
||||
}
|
||||
|
||||
if ((sb.st_mode & S_IFMT) == S_IFLNK) {
|
||||
char *link_path = malloc(max_len);
|
||||
link_path = malloc(max_len);
|
||||
if (!link_path) {
|
||||
fprintf(stderr, "lookup_exec_path(): error allocating\n");
|
||||
return ENOMEM;
|
||||
@ -489,9 +514,18 @@ retry:
|
||||
fprintf(stderr, "lookup_exec_path(): error readlink\n");
|
||||
return EINVAL;
|
||||
}
|
||||
link_path[error] = '\0';
|
||||
|
||||
__dprintf("lookup_exec_path(): %s is link -> %s\n", path, link_path);
|
||||
|
||||
if(link_path[0] != '/'){
|
||||
char *t = strrchr(path, '/');
|
||||
if(t){
|
||||
t++;
|
||||
strcpy(t, link_path);
|
||||
strcpy(link_path, path);
|
||||
}
|
||||
}
|
||||
filename = link_path;
|
||||
goto retry;
|
||||
}
|
||||
@ -635,10 +669,7 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p,
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define PAGE_SIZE 4096
|
||||
#define PAGE_MASK ~((unsigned long)PAGE_SIZE - 1)
|
||||
|
||||
void transfer_image(int fd, struct program_load_desc *desc)
|
||||
int transfer_image(int fd, struct program_load_desc *desc)
|
||||
{
|
||||
struct remote_transfer pt;
|
||||
unsigned long s, e, flen, rpa;
|
||||
@ -652,13 +683,17 @@ void transfer_image(int fd, struct program_load_desc *desc)
|
||||
+ PAGE_SIZE - 1) & PAGE_MASK;
|
||||
rpa = desc->sections[i].remote_pa;
|
||||
|
||||
fseek(fp, desc->sections[i].offset, SEEK_SET);
|
||||
if (fseek(fp, desc->sections[i].offset, SEEK_SET) != 0) {
|
||||
fprintf(stderr, "transfer_image(): error: seeking file position\n");
|
||||
return -1;
|
||||
}
|
||||
flen = desc->sections[i].filesz;
|
||||
|
||||
__dprintf("seeked to %lx | size %ld\n",
|
||||
desc->sections[i].offset, flen);
|
||||
|
||||
while (s < e) {
|
||||
memset(&pt, '\0', sizeof pt);
|
||||
pt.rphys = rpa;
|
||||
pt.userp = dma_buf;
|
||||
pt.size = PAGE_SIZE;
|
||||
@ -673,7 +708,20 @@ void transfer_image(int fd, struct program_load_desc *desc)
|
||||
if (lr > flen) {
|
||||
lr = flen;
|
||||
}
|
||||
fread(dma_buf + l, 1, lr, fp);
|
||||
if (fread(dma_buf + l, 1, lr, fp) != lr) {
|
||||
if (ferror(fp) > 0) {
|
||||
fprintf(stderr, "transfer_image(): error: accessing file\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
else if (feof(fp) > 0) {
|
||||
fprintf(stderr, "transfer_image(): file too short?\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
else {
|
||||
/* TODO: handle smaller reads.. */
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
flen -= lr;
|
||||
}
|
||||
else if (flen > 0) {
|
||||
@ -682,7 +730,20 @@ void transfer_image(int fd, struct program_load_desc *desc)
|
||||
} else {
|
||||
lr = flen;
|
||||
}
|
||||
fread(dma_buf, 1, lr, fp);
|
||||
if (fread(dma_buf, 1, lr, fp) != lr) {
|
||||
if (ferror(fp) > 0) {
|
||||
fprintf(stderr, "transfer_image(): error: accessing file\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
else if (feof(fp) > 0) {
|
||||
fprintf(stderr, "transfer_image(): file too short?\n");
|
||||
return -EINVAL;
|
||||
}
|
||||
else {
|
||||
/* TODO: handle smaller reads.. */
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
flen -= lr;
|
||||
}
|
||||
s += PAGE_SIZE;
|
||||
@ -698,6 +759,8 @@ void transfer_image(int fd, struct program_load_desc *desc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void print_desc(struct program_load_desc *desc)
|
||||
@ -762,7 +825,7 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat)
|
||||
}
|
||||
|
||||
/* Count full length */
|
||||
full_len = sizeof(int) + sizeof(char *); // Counter and terminating NULL
|
||||
full_len = sizeof(long) + sizeof(char *); // Counter and terminating NULL
|
||||
if (first) {
|
||||
full_len += sizeof(char *) + strlen(first) + 1;
|
||||
}
|
||||
@ -772,6 +835,8 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat)
|
||||
full_len += sizeof(char *) + strlen(strings[string_i]) + 1;
|
||||
}
|
||||
|
||||
full_len = (full_len + sizeof(long) - 1) & ~(sizeof(long) - 1);
|
||||
|
||||
_flat = (char *)malloc(full_len);
|
||||
if (!_flat) {
|
||||
return 0;
|
||||
@ -780,14 +845,14 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat)
|
||||
memset(_flat, 0, full_len);
|
||||
|
||||
/* Number of strings */
|
||||
*((int*)_flat) = nr_strings + (first ? 1 : 0);
|
||||
*((long *)_flat) = nr_strings + (first ? 1 : 0);
|
||||
|
||||
// Actual offset
|
||||
flat_offset = sizeof(int) + sizeof(char *) * (nr_strings + 1 +
|
||||
flat_offset = sizeof(long) + sizeof(char *) * (nr_strings + 1 +
|
||||
(first ? 1 : 0));
|
||||
|
||||
if (first) {
|
||||
*((char **)(_flat + sizeof(int))) = (void *)flat_offset;
|
||||
*((char **)(_flat + sizeof(long))) = (void *)flat_offset;
|
||||
memcpy(_flat + flat_offset, first, strlen(first) + 1);
|
||||
flat_offset += strlen(first) + 1;
|
||||
}
|
||||
@ -795,7 +860,7 @@ int flatten_strings(int nr_strings, char *first, char **strings, char **flat)
|
||||
for (string_i = 0; string_i < nr_strings; ++string_i) {
|
||||
|
||||
/* Fabricate the string */
|
||||
*((char **)(_flat + sizeof(int) + (string_i + (first ? 1 : 0))
|
||||
*((char **)(_flat + sizeof(long) + (string_i + (first ? 1 : 0))
|
||||
* sizeof(char *))) = (void *)flat_offset;
|
||||
memcpy(_flat + flat_offset, strings[string_i], strlen(strings[string_i]) + 1);
|
||||
flat_offset += strlen(strings[string_i]) + 1;
|
||||
@ -818,7 +883,10 @@ struct thread_data_s {
|
||||
pthread_mutex_t *lock;
|
||||
pthread_barrier_t *init_ready;
|
||||
} *thread_data;
|
||||
|
||||
int ncpu;
|
||||
int n_threads;
|
||||
|
||||
pid_t master_tid;
|
||||
|
||||
pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
|
||||
@ -829,7 +897,7 @@ static void *main_loop_thread_func(void *arg)
|
||||
struct thread_data_s *td = (struct thread_data_s *)arg;
|
||||
|
||||
td->tid = gettid();
|
||||
td->remote_tid = (int)td->tid;
|
||||
td->remote_tid = -1;
|
||||
pthread_barrier_wait(&init_ready);
|
||||
td->ret = main_loop(td->fd, td->cpu, td->lock);
|
||||
|
||||
@ -878,6 +946,7 @@ sendsig(int sig, siginfo_t *siginfo, void *context)
|
||||
remote_tid = -1;
|
||||
}
|
||||
|
||||
memset(&sigdesc, '\0', sizeof sigdesc);
|
||||
sigdesc.cpu = cpu;
|
||||
sigdesc.pid = (int)pid;
|
||||
sigdesc.tid = remote_tid;
|
||||
@ -904,13 +973,17 @@ act_signalfd4(struct syscall_wait_desc *w)
|
||||
switch(mode){
|
||||
case 0: /* new signalfd */
|
||||
sfd = malloc(sizeof(struct sigfd));
|
||||
memset(sfd, '\0', sizeof(struct sigfd));
|
||||
tmp = w->sr.args[1];
|
||||
flags = 0;
|
||||
if(tmp & SFD_NONBLOCK)
|
||||
flags |= O_NONBLOCK;
|
||||
if(tmp & SFD_CLOEXEC)
|
||||
flags |= O_CLOEXEC;
|
||||
pipe2(sfd->sigpipe, flags);
|
||||
if (pipe2(sfd->sigpipe, flags) < 0) {
|
||||
perror("pipe2 failed:");
|
||||
return -1;
|
||||
}
|
||||
sfd->next = sigfdtop;
|
||||
sigfdtop = sfd;
|
||||
rc = sfd->sigpipe[0];
|
||||
@ -941,7 +1014,11 @@ act_signalfd4(struct syscall_wait_desc *w)
|
||||
rc = -EBADF;
|
||||
else{
|
||||
info = (struct signalfd_siginfo *)w->sr.args[2];
|
||||
write(sfd->sigpipe[1], info, sizeof(struct signalfd_siginfo));
|
||||
if (write(sfd->sigpipe[1], info, sizeof(struct signalfd_siginfo))
|
||||
!= sizeof(struct signalfd_siginfo)) {
|
||||
fprintf(stderr, "error: writing sigpipe\n");
|
||||
rc = -EBADF;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -1047,9 +1124,9 @@ void init_worker_threads(int fd)
|
||||
int i;
|
||||
|
||||
pthread_mutex_init(&lock, NULL);
|
||||
pthread_barrier_init(&init_ready, NULL, ncpu + 2);
|
||||
pthread_barrier_init(&init_ready, NULL, n_threads + 2);
|
||||
|
||||
for (i = 0; i <= ncpu; ++i) {
|
||||
for (i = 0; i <= n_threads; ++i) {
|
||||
int ret;
|
||||
|
||||
thread_data[i].fd = fd;
|
||||
@ -1069,6 +1146,80 @@ void init_worker_threads(int fd)
|
||||
pthread_barrier_wait(&init_ready);
|
||||
}
|
||||
|
||||
#ifdef ENABLE_MCOVERLAYFS
|
||||
#define READ_BUFSIZE 1024
|
||||
static int isunshare(void)
|
||||
{
|
||||
int err = 0;
|
||||
int ret;
|
||||
int fd;
|
||||
char proc_path[PATH_MAX];
|
||||
ssize_t len_read;
|
||||
char buf_read[READ_BUFSIZE + 1];
|
||||
char *buf_read_off;
|
||||
char *buf_find;
|
||||
char buf_cmp[READ_BUFSIZE + 1];
|
||||
char *buf_cmp_off;
|
||||
ssize_t len_copy;
|
||||
|
||||
snprintf(proc_path, sizeof(proc_path), "/proc/%d/mounts", getpid());
|
||||
fd = open(proc_path, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "Error: Failed to open %s.\n", proc_path);
|
||||
return -1;
|
||||
}
|
||||
|
||||
buf_cmp_off = buf_cmp;
|
||||
while (1) {
|
||||
len_read = read(fd, buf_read, READ_BUFSIZE);
|
||||
if (len_read == -1) {
|
||||
fprintf(stderr, "Error: Failed to read.\n");
|
||||
err = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
buf_read_off = buf_read;
|
||||
while (1) {
|
||||
if ((len_read - (buf_read_off - buf_read)) <= 0) {
|
||||
break;
|
||||
}
|
||||
buf_find = memchr(buf_read_off, '\n',
|
||||
len_read - (buf_read_off - buf_read));
|
||||
if (buf_find) {
|
||||
len_copy = buf_find - buf_read_off;
|
||||
} else {
|
||||
len_copy = len_read - (buf_read_off - buf_read);
|
||||
}
|
||||
memcpy(buf_cmp_off, buf_read_off, len_copy);
|
||||
*(buf_cmp_off + len_copy) = '\0';
|
||||
|
||||
if (buf_find) {
|
||||
buf_read_off = buf_read_off + len_copy + 1;
|
||||
buf_cmp_off = buf_cmp;
|
||||
ret = strncmp(buf_cmp, "mcoverlay /proc ", 16);
|
||||
if (!ret) {
|
||||
err = 1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
buf_read_off = buf_read_off + len_copy;
|
||||
buf_cmp_off = buf_cmp_off + len_copy;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (err == 1 || len_read == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
||||
__dprintf("err=%d\n", err);
|
||||
return err;
|
||||
}
|
||||
#endif // ENABLE_MCOVERLAYFS
|
||||
|
||||
#define MCK_RLIMIT_AS 0
|
||||
#define MCK_RLIMIT_CORE 1
|
||||
#define MCK_RLIMIT_CPU 2
|
||||
@ -1139,6 +1290,24 @@ static int rlimits[] = {
|
||||
|
||||
char dev[64];
|
||||
|
||||
static struct option mcexec_options[] = {
|
||||
{
|
||||
.name = "disable-vdso",
|
||||
.has_arg = no_argument,
|
||||
.flag = &enable_vdso,
|
||||
.val = 0,
|
||||
},
|
||||
{
|
||||
.name = "enable-vdso",
|
||||
.has_arg = no_argument,
|
||||
.flag = &enable_vdso,
|
||||
.val = 1,
|
||||
},
|
||||
|
||||
/* end */
|
||||
{ NULL, 0, NULL, 0, },
|
||||
};
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
// int fd;
|
||||
@ -1190,12 +1359,15 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
/* Parse options ("+" denotes stop at the first non-option) */
|
||||
while ((opt = getopt(argc, argv, "+c:")) != -1) {
|
||||
while ((opt = getopt_long(argc, argv, "+c:", mcexec_options, NULL)) != -1) {
|
||||
switch (opt) {
|
||||
case 'c':
|
||||
target_core = atoi(optarg);
|
||||
break;
|
||||
|
||||
case 0: /* long opt */
|
||||
break;
|
||||
|
||||
default: /* '?' */
|
||||
print_usage(argv);
|
||||
exit(EXIT_FAILURE);
|
||||
@ -1234,7 +1406,59 @@ int main(int argc, char **argv)
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (lookup_exec_path(argv[optind], path, sizeof(path)) != 0) {
|
||||
#ifdef ENABLE_MCOVERLAYFS
|
||||
__dprintf("mcoverlay enable\n");
|
||||
char mcos_procdir[PATH_MAX];
|
||||
char mcos_sysdir[PATH_MAX];
|
||||
|
||||
error = isunshare();
|
||||
if (error == 0) {
|
||||
struct sys_unshare_desc unshare_desc;
|
||||
struct sys_mount_desc mount_desc;
|
||||
|
||||
memset(&unshare_desc, '\0', sizeof unshare_desc);
|
||||
memset(&mount_desc, '\0', sizeof mount_desc);
|
||||
unshare_desc.unshare_flags = CLONE_NEWNS;
|
||||
if (ioctl(fd, MCEXEC_UP_SYS_UNSHARE,
|
||||
(unsigned long)&unshare_desc) != 0) {
|
||||
fprintf(stderr, "Error: Failed to unshare. (%s)\n",
|
||||
strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
sprintf(mcos_procdir, "/tmp/mcos/mcos%d_proc", mcosid);
|
||||
mount_desc.dev_name = mcos_procdir;
|
||||
mount_desc.dir_name = "/proc";
|
||||
mount_desc.type = NULL;
|
||||
mount_desc.flags = MS_BIND;
|
||||
mount_desc.data = NULL;
|
||||
if (ioctl(fd, MCEXEC_UP_SYS_MOUNT,
|
||||
(unsigned long)&mount_desc) != 0) {
|
||||
fprintf(stderr, "Error: Failed to mount /proc. (%s)\n",
|
||||
strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
sprintf(mcos_sysdir, "/tmp/mcos/mcos%d_sys", mcosid);
|
||||
mount_desc.dev_name = mcos_sysdir;
|
||||
mount_desc.dir_name = "/sys";
|
||||
mount_desc.type = NULL;
|
||||
mount_desc.flags = MS_BIND;
|
||||
mount_desc.data = NULL;
|
||||
if (ioctl(fd, MCEXEC_UP_SYS_MOUNT,
|
||||
(unsigned long)&mount_desc) != 0) {
|
||||
fprintf(stderr, "Error: Failed to mount /sys. (%s)\n",
|
||||
strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
} else if (error == -1) {
|
||||
return 1;
|
||||
}
|
||||
#else
|
||||
__dprintf("mcoverlay disable\n");
|
||||
#endif // ENABLE_MCOVERLAYFS
|
||||
|
||||
if (lookup_exec_path(argv[optind], path, sizeof(path), 1) != 0) {
|
||||
fprintf(stderr, "error: finding file: %s\n", argv[optind]);
|
||||
return 1;
|
||||
}
|
||||
@ -1246,7 +1470,7 @@ int main(int argc, char **argv)
|
||||
|
||||
/* Check whether shell script */
|
||||
if (shell) {
|
||||
if (lookup_exec_path(shell, shell_path, sizeof(shell_path)) != 0) {
|
||||
if (lookup_exec_path(shell, shell_path, sizeof(shell_path), 0) != 0) {
|
||||
fprintf(stderr, "error: finding file: %s\n", shell);
|
||||
return 1;
|
||||
}
|
||||
@ -1272,6 +1496,8 @@ int main(int argc, char **argv)
|
||||
//print_flat(args);
|
||||
|
||||
desc->cpu = target_core;
|
||||
desc->enable_vdso = enable_vdso;
|
||||
|
||||
p = getenv(rlimit_stack_envname);
|
||||
if (p) {
|
||||
errno = 0;
|
||||
@ -1306,6 +1532,19 @@ int main(int argc, char **argv)
|
||||
return 1;
|
||||
}
|
||||
|
||||
n_threads = ncpu;
|
||||
if (ncpu > 16) {
|
||||
n_threads = 16;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: keep thread_data ncpu sized despite that there are only
|
||||
* n_threads worker threads in the pool so that signaling code
|
||||
* keeps working.
|
||||
*
|
||||
* TODO: fix signaling code to be independent of TIDs.
|
||||
* TODO: implement dynaic thread pool resizing.
|
||||
*/
|
||||
thread_data = (struct thread_data_s *)malloc(sizeof(struct thread_data_s) * (ncpu + 1));
|
||||
memset(thread_data, '\0', sizeof(struct thread_data_s) * (ncpu + 1));
|
||||
|
||||
@ -1348,7 +1587,10 @@ int main(int argc, char **argv)
|
||||
}
|
||||
|
||||
print_desc(desc);
|
||||
transfer_image(fd, desc);
|
||||
if (transfer_image(fd, desc) < 0) {
|
||||
fprintf(stderr, "error: transferring image\n");
|
||||
return -1;
|
||||
}
|
||||
fflush(stdout);
|
||||
fflush(stderr);
|
||||
|
||||
@ -1387,7 +1629,7 @@ int main(int argc, char **argv)
|
||||
return 1;
|
||||
}
|
||||
|
||||
for (i = 0; i <= ncpu; ++i) {
|
||||
for (i = 0; i <= n_threads; ++i) {
|
||||
pthread_join(thread_data[i].thread_id, NULL);
|
||||
}
|
||||
|
||||
@ -1401,6 +1643,7 @@ void do_syscall_return(int fd, int cpu,
|
||||
{
|
||||
struct syscall_ret_desc desc;
|
||||
|
||||
memset(&desc, '\0', sizeof desc);
|
||||
desc.cpu = cpu;
|
||||
desc.ret = ret;
|
||||
desc.src = src;
|
||||
@ -1417,6 +1660,7 @@ void do_syscall_load(int fd, int cpu, unsigned long dest, unsigned long src,
|
||||
{
|
||||
struct syscall_load_desc desc;
|
||||
|
||||
memset(&desc, '\0', sizeof desc);
|
||||
desc.cpu = cpu;
|
||||
desc.src = src;
|
||||
desc.dest = dest;
|
||||
@ -1447,16 +1691,14 @@ do_generic_syscall(
|
||||
}
|
||||
|
||||
static void
|
||||
kill_thread(unsigned long cpu)
|
||||
kill_thread(unsigned long tid)
|
||||
{
|
||||
if(cpu >= 0 && cpu < ncpu){
|
||||
pthread_kill(thread_data[cpu].thread_id, LOCALSIG);
|
||||
}
|
||||
else{
|
||||
int i;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < ncpu; ++i) {
|
||||
for (i = 0; i < n_threads; ++i) {
|
||||
if(thread_data[i].remote_tid == tid){
|
||||
pthread_kill(thread_data[i].thread_id, LOCALSIG);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1466,6 +1708,7 @@ static long do_strncpy_from_user(int fd, void *dest, void *src, unsigned long n)
|
||||
struct strncpy_from_user_desc desc;
|
||||
int ret;
|
||||
|
||||
memset(&desc, '\0', sizeof desc);
|
||||
desc.dest = dest;
|
||||
desc.src = src;
|
||||
desc.n = n;
|
||||
@ -1560,6 +1803,9 @@ int close_cloexec_fds(int mcos_fd)
|
||||
char *
|
||||
chgpath(char *in, char *buf)
|
||||
{
|
||||
#ifdef ENABLE_MCOVERLAYFS
|
||||
return in;
|
||||
#endif // ENABLE_MCOVERLAYFS
|
||||
char *fn = in;
|
||||
struct stat sb;
|
||||
|
||||
@ -1589,10 +1835,11 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
||||
char *fn;
|
||||
int sig;
|
||||
int term;
|
||||
struct timeval tv;
|
||||
struct timespec tv;
|
||||
char pathbuf[PATH_MAX];
|
||||
char tmpbuf[PATH_MAX];
|
||||
|
||||
memset(&w, '\0', sizeof w);
|
||||
w.cpu = cpu;
|
||||
w.pid = getpid();
|
||||
|
||||
@ -1608,6 +1855,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
||||
|
||||
//pthread_mutex_lock(lock);
|
||||
|
||||
thread_data[cpu].remote_tid = w.sr.rtid;
|
||||
|
||||
switch (w.sr.number) {
|
||||
case __NR_open:
|
||||
ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX);
|
||||
@ -1628,13 +1877,13 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
||||
break;
|
||||
|
||||
case __NR_futex:
|
||||
ret = gettimeofday(&tv, NULL);
|
||||
ret = clock_gettime(w.sr.args[1], &tv);
|
||||
SET_ERR(ret);
|
||||
__dprintf("gettimeofday=%016ld,%09ld\n",
|
||||
__dprintf("clock_gettime=%016ld,%09ld\n",
|
||||
tv.tv_sec,
|
||||
tv.tv_usec);
|
||||
tv.tv_nsec);
|
||||
do_syscall_return(fd, cpu, ret, 1, (unsigned long)&tv,
|
||||
w.sr.args[0], sizeof(struct timeval));
|
||||
w.sr.args[0], sizeof(struct timespec));
|
||||
break;
|
||||
|
||||
case __NR_kill: // interrupt syscall
|
||||
@ -1646,13 +1895,13 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
||||
sig = 0;
|
||||
term = 0;
|
||||
|
||||
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
|
||||
|
||||
/* Drop executable file */
|
||||
if ((ret = ioctl(fd, MCEXEC_UP_CLOSE_EXEC)) != 0) {
|
||||
fprintf(stderr, "WARNING: close_exec() couldn't find exec file?\n");
|
||||
}
|
||||
|
||||
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
|
||||
|
||||
__dprintf("__NR_exit/__NR_exit_group: %ld (cpu_id: %d)\n",
|
||||
w.sr.args[0], cpu);
|
||||
if(w.sr.number == __NR_exit_group){
|
||||
@ -1690,17 +1939,12 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
||||
return w.sr.args[0];
|
||||
|
||||
case __NR_mmap:
|
||||
case __NR_munmap:
|
||||
case __NR_mprotect:
|
||||
/* reserved for internal use */
|
||||
do_syscall_return(fd, cpu, -ENOSYS, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_munmap:
|
||||
ret = madvise((void *)w.sr.args[0], w.sr.args[1], MADV_DONTNEED);
|
||||
SET_ERR(ret);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
#ifdef USE_SYSCALL_MOD_CALL
|
||||
case 303:{
|
||||
__dprintf("mcexec.c,mod_cal,mod=%ld,cmd=%ld\n", w.sr.args[0], w.sr.args[1]);
|
||||
@ -1725,6 +1969,39 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
||||
thread_data[oldcpuid].remote_tid = wtid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Number of TIDs and the remote physical address where TIDs are
|
||||
* expected are passed in arg 4 and 5, respectively.
|
||||
*/
|
||||
if (w.sr.args[4] > 0) {
|
||||
struct remote_transfer trans;
|
||||
int i = 0;
|
||||
int *tids = malloc(sizeof(int) * w.sr.args[4]);
|
||||
if (!tids) {
|
||||
fprintf(stderr, "__NR_gettid(): error allocating TIDs\n");
|
||||
goto gettid_out;
|
||||
}
|
||||
|
||||
for (i = 0; i < ncpu && i < w.sr.args[4]; ++i) {
|
||||
tids[i] = thread_data[i].tid;
|
||||
}
|
||||
|
||||
for (; i < ncpu; ++i) {
|
||||
tids[i] = 0;
|
||||
}
|
||||
|
||||
trans.userp = (void*)tids;
|
||||
trans.rphys = w.sr.args[5];
|
||||
trans.size = sizeof(int) * w.sr.args[4];
|
||||
trans.direction = MCEXEC_UP_TRANSFER_TO_REMOTE;
|
||||
|
||||
if (ioctl(fd, MCEXEC_UP_TRANSFER, &trans) != 0) {
|
||||
fprintf(stderr, "__NR_gettid(): error transfering TIDs\n");
|
||||
}
|
||||
|
||||
free(tids);
|
||||
}
|
||||
gettid_out:
|
||||
do_syscall_return(fd, cpu, thread_data[newcpuid].remote_tid, 0, 0, 0, 0);
|
||||
break;
|
||||
}
|
||||
@ -1734,6 +2011,7 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
||||
struct fork_sync_container *fsc;
|
||||
struct fork_sync_container *fp;
|
||||
struct fork_sync_container *fb;
|
||||
int flag = w.sr.args[0];
|
||||
int rc = -1;
|
||||
pid_t pid;
|
||||
|
||||
@ -1753,7 +2031,45 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
||||
memset(fs, '\0', sizeof(struct fork_sync));
|
||||
sem_init(&fs->sem, 1, 0);
|
||||
|
||||
pid = fork();
|
||||
if(flag){
|
||||
int pipefds[2];
|
||||
|
||||
if(pipe(pipefds) == -1){
|
||||
rc = -errno;
|
||||
sem_destroy(&fs->sem);
|
||||
goto fork_err;
|
||||
}
|
||||
pid = fork();
|
||||
if(pid == 0){
|
||||
close(pipefds[0]);
|
||||
pid = fork();
|
||||
if(pid != 0){
|
||||
if (write(pipefds[1], &pid, sizeof pid) != sizeof(pid)) {
|
||||
fprintf(stderr, "error: writing pipefds\n");
|
||||
}
|
||||
exit(0);
|
||||
}
|
||||
}
|
||||
else if(pid != -1){
|
||||
int npid;
|
||||
int st;
|
||||
|
||||
close(pipefds[1]);
|
||||
if (read(pipefds[0], &npid, sizeof npid) != sizeof(npid)) {
|
||||
fprintf(stderr, "error: reading pipefds\n");
|
||||
}
|
||||
close(pipefds[0]);
|
||||
waitpid(pid, &st, 0);
|
||||
pid = npid;
|
||||
}
|
||||
else{
|
||||
rc = -errno;
|
||||
sem_destroy(&fs->sem);
|
||||
goto fork_err;
|
||||
}
|
||||
}
|
||||
else
|
||||
pid = fork();
|
||||
|
||||
switch (pid) {
|
||||
/* Error */
|
||||
@ -1781,7 +2097,6 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
||||
|
||||
/* Reinit signals and syscall threads */
|
||||
init_sigaction();
|
||||
init_worker_threads(fd);
|
||||
|
||||
__dprintf("pid(%d): signals and syscall threads OK\n",
|
||||
getpid());
|
||||
@ -1795,6 +2110,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock)
|
||||
goto fork_child_sync_pipe;
|
||||
}
|
||||
|
||||
init_worker_threads(fd);
|
||||
|
||||
fork_child_sync_pipe:
|
||||
sem_post(&fs->sem);
|
||||
if (fs->status)
|
||||
@ -1896,15 +2213,16 @@ fork_err:
|
||||
char path[1024];
|
||||
char *filename;
|
||||
int ret;
|
||||
char *shell = NULL;
|
||||
char *shell;
|
||||
char shell_path[1024];
|
||||
|
||||
/* Load descriptor phase */
|
||||
case 1:
|
||||
|
||||
shell = NULL;
|
||||
filename = (char *)w.sr.args[1];
|
||||
|
||||
if ((ret = lookup_exec_path(filename, path, sizeof(path)))
|
||||
if ((ret = lookup_exec_path(filename, path, sizeof(path), 0))
|
||||
!= 0) {
|
||||
goto return_execve1;
|
||||
}
|
||||
@ -1918,7 +2236,7 @@ fork_err:
|
||||
/* Check whether shell script */
|
||||
if (shell) {
|
||||
if ((ret = lookup_exec_path(shell, shell_path,
|
||||
sizeof(shell_path))) != 0) {
|
||||
sizeof(shell_path), 0)) != 0) {
|
||||
fprintf(stderr, "execve(): error: finding file: %s\n", shell);
|
||||
goto return_execve1;
|
||||
}
|
||||
@ -1939,6 +2257,7 @@ fork_err:
|
||||
strcpy(desc->shell_path, shell_path);
|
||||
}
|
||||
|
||||
desc->enable_vdso = enable_vdso;
|
||||
__dprintf("execve(): load_elf_desc() for %s OK, num sections: %d\n",
|
||||
path, desc->num_sections);
|
||||
|
||||
@ -1979,6 +2298,7 @@ return_execve1:
|
||||
fprintf(stderr, "execve(): error allocating desc\n");
|
||||
goto return_execve2;
|
||||
}
|
||||
memset(desc, '\0', w.sr.args[2]);
|
||||
|
||||
/* Copy descriptor from co-kernel side */
|
||||
trans.userp = (void*)desc;
|
||||
@ -1995,7 +2315,10 @@ return_execve1:
|
||||
|
||||
__dprintf("%s", "execve(): transfer ELF desc OK\n");
|
||||
|
||||
transfer_image(fd, desc);
|
||||
if (transfer_image(fd, desc) != 0) {
|
||||
fprintf(stderr, "error: transferring image\n");
|
||||
return -1;
|
||||
}
|
||||
__dprintf("%s", "execve(): image transferred\n");
|
||||
|
||||
if (close_cloexec_fds(fd) < 0) {
|
||||
@ -2021,6 +2344,11 @@ return_execve2:
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_perf_event_open:
|
||||
ret = open("/dev/null", O_RDONLY);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_rt_sigaction:
|
||||
act_sigaction(&w);
|
||||
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
|
||||
@ -2042,6 +2370,53 @@ return_execve2:
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_setresuid:
|
||||
ret = setresuid(w.sr.args[0], w.sr.args[1], w.sr.args[2]);
|
||||
if(ret == -1)
|
||||
ret = -errno;
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_setreuid:
|
||||
ret = setreuid(w.sr.args[0], w.sr.args[1]);
|
||||
if(ret == -1)
|
||||
ret = -errno;
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_setuid:
|
||||
ret = setuid(w.sr.args[0]);
|
||||
if(ret == -1)
|
||||
ret = -errno;
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_setresgid:
|
||||
ret = setresgid(w.sr.args[0], w.sr.args[1], w.sr.args[2]);
|
||||
if(ret == -1)
|
||||
ret = -errno;
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_setregid:
|
||||
ret = setregid(w.sr.args[0], w.sr.args[1]);
|
||||
if(ret == -1)
|
||||
ret = -errno;
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_setgid:
|
||||
ret = setgid(w.sr.args[0]);
|
||||
if(ret == -1)
|
||||
ret = -errno;
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_setfsgid:
|
||||
ret = setfsgid(w.sr.args[0]);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_close:
|
||||
if(w.sr.args[0] == fd)
|
||||
ret = -EBADF;
|
||||
@ -2050,6 +2425,25 @@ return_execve2:
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
case __NR_readlink:
|
||||
ret = do_strncpy_from_user(fd, pathbuf, (void *)w.sr.args[0], PATH_MAX);
|
||||
if (ret >= PATH_MAX) {
|
||||
ret = -ENAMETOOLONG;
|
||||
}
|
||||
if (ret < 0) {
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
}
|
||||
|
||||
fn = chgpath(pathbuf, tmpbuf);
|
||||
|
||||
ret = readlink(fn, (char *)w.sr.args[1], w.sr.args[2]);
|
||||
__dprintf("readlink: path=%s, buf=%s, ret=%ld\n",
|
||||
fn, (char *)w.sr.args[1], ret);
|
||||
SET_ERR(ret);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
break;
|
||||
|
||||
default:
|
||||
ret = do_generic_syscall(&w);
|
||||
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
|
||||
@ -2057,6 +2451,8 @@ return_execve2:
|
||||
|
||||
}
|
||||
|
||||
thread_data[cpu].remote_tid = -1;
|
||||
|
||||
//pthread_mutex_unlock(lock);
|
||||
}
|
||||
__dprint("timed out.\n");
|
||||
|
||||
@ -3,7 +3,7 @@ OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
|
||||
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o
|
||||
DEPSRCS=$(wildcard $(SRC)/*.c)
|
||||
|
||||
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__
|
||||
CFLAGS += -I$(SRC)/include -D__KERNEL__
|
||||
CFLAGS += -DKNC_MAP_MICPA $(EXTRA_CFLAGS)
|
||||
|
||||
ifeq ("$(DCFA_MODE)", "kmod")
|
||||
|
||||
@ -3,10 +3,10 @@ SRC=$(VPATH)
|
||||
IHKDIR=$(IHKBASE)/$(TARGETDIR)
|
||||
OBJS = init.o mem.o debug.o mikc.o listeners.o ap.o syscall.o cls.o host.o
|
||||
OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
|
||||
OBJS += zeroobj.o procfs.o devobj.o
|
||||
OBJS += zeroobj.o procfs.o devobj.o sysfs.o
|
||||
DEPSRCS=$(wildcard $(SRC)/*.c)
|
||||
|
||||
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__ -g
|
||||
CFLAGS += -I$(SRC)/include -D__KERNEL__ -g
|
||||
LDFLAGS += -e arch_start
|
||||
IHKOBJ = ihk/ihk.o
|
||||
|
||||
|
||||
146
kernel/ap.c
146
kernel/ap.c
@ -28,19 +28,19 @@
|
||||
|
||||
int num_processors = 1;
|
||||
static volatile int ap_stop = 1;
|
||||
extern void zero_tsc(void);
|
||||
|
||||
static void ap_wait(void)
|
||||
{
|
||||
init_tick();
|
||||
while (ap_stop) {
|
||||
barrier();
|
||||
cpu_pause();
|
||||
}
|
||||
|
||||
zero_tsc();
|
||||
sync_tick();
|
||||
|
||||
kmalloc_init();
|
||||
sched_init();
|
||||
arch_start_pvclock();
|
||||
|
||||
if (find_command_line("hidos")) {
|
||||
init_host_syscall_channel();
|
||||
@ -56,7 +56,9 @@ static void ap_wait(void)
|
||||
|
||||
void ap_start(void)
|
||||
{
|
||||
init_tick();
|
||||
ap_stop = 0;
|
||||
sync_tick();
|
||||
}
|
||||
|
||||
void ap_init(void)
|
||||
@ -66,6 +68,7 @@ void ap_init(void)
|
||||
int bsp_hw_id;
|
||||
|
||||
ihk_mc_init_ap();
|
||||
init_delay();
|
||||
|
||||
cpu_info = ihk_mc_get_cpu_info();
|
||||
bsp_hw_id = ihk_mc_get_hardware_processor_id();
|
||||
@ -89,3 +92,140 @@ void ap_init(void)
|
||||
kprintf("AP Booting: Done\n");
|
||||
}
|
||||
|
||||
#include <sysfs.h>
|
||||
#include <kmalloc.h>
|
||||
#include <string.h>
|
||||
#include <vsprintf.h>
|
||||
|
||||
static ssize_t
|
||||
show_int(struct sysfs_ops *ops, void *instance, void *buf, size_t size)
|
||||
{
|
||||
int *p = instance;
|
||||
|
||||
return snprintf(buf, size, "%d\n", *p);
|
||||
}/* show_int() */
|
||||
|
||||
struct sysfs_ops show_int_ops = {
|
||||
.show = &show_int,
|
||||
};
|
||||
|
||||
struct fake_cpu_info {
|
||||
int online;
|
||||
};
|
||||
|
||||
static struct fake_cpu_info *fake_cpu_infos = NULL;
|
||||
|
||||
enum fake_cpu_info_member {
|
||||
ONLINE,
|
||||
};
|
||||
|
||||
struct fake_cpu_info_ops {
|
||||
enum fake_cpu_info_member member;
|
||||
struct sysfs_ops ops;
|
||||
};
|
||||
|
||||
static ssize_t
|
||||
show_fake_cpu_info(struct sysfs_ops *ops0, void *instance, void *buf,
|
||||
size_t size)
|
||||
{
|
||||
struct fake_cpu_info_ops *ops
|
||||
= container_of(ops0, struct fake_cpu_info_ops, ops);
|
||||
struct fake_cpu_info *info = instance;
|
||||
ssize_t n;
|
||||
|
||||
switch (ops->member) {
|
||||
case ONLINE:
|
||||
n = snprintf(buf, size, "%d\n", info->online);
|
||||
break;
|
||||
default:
|
||||
n = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (n >= size) {
|
||||
n = -ENOSPC;
|
||||
}
|
||||
|
||||
return n;
|
||||
} /* show_fake_cpu_info() */
|
||||
|
||||
static ssize_t
|
||||
store_fake_cpu_info(struct sysfs_ops *ops0, void *instance, void *buf,
|
||||
size_t size)
|
||||
{
|
||||
struct fake_cpu_info_ops *ops
|
||||
= container_of(ops0, struct fake_cpu_info_ops, ops);
|
||||
struct fake_cpu_info *info = instance;
|
||||
ssize_t n;
|
||||
|
||||
switch (ops->member) {
|
||||
case ONLINE:
|
||||
kprintf("NYI:store_fake_cpu_info(%p,%p,%p,%ld): "
|
||||
"online %d --> \"%.*s\"\n",
|
||||
ops0, instance, buf, size, info->online,
|
||||
(int)size, buf);
|
||||
n = size;
|
||||
break;
|
||||
default:
|
||||
n = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
return n;
|
||||
} /* store_fake_cpu_info() */
|
||||
|
||||
static struct fake_cpu_info_ops show_fci_online = {
|
||||
.member = ONLINE,
|
||||
.ops.show = &show_fake_cpu_info,
|
||||
.ops.store = &store_fake_cpu_info,
|
||||
};
|
||||
|
||||
void
|
||||
cpu_sysfs_setup(void)
|
||||
{
|
||||
int error;
|
||||
int cpu;
|
||||
sysfs_handle_t targeth;
|
||||
struct fake_cpu_info *info;
|
||||
|
||||
/* sample of simple variable **********************************/
|
||||
error = sysfs_createf(&show_int_ops, &num_processors, 0444,
|
||||
"/sys/devices/system/cpu/num_processors");
|
||||
if (error) {
|
||||
panic("cpu_sysfs_setup:sysfs_createf(num_processors) failed\n");
|
||||
}
|
||||
|
||||
/* sample of more complex variable ****************************/
|
||||
/* setup table */
|
||||
info = kmalloc(sizeof(*info) * num_processors, IHK_MC_AP_CRITICAL);
|
||||
for (cpu = 0; cpu < num_processors; ++cpu) {
|
||||
info[cpu].online = 10+cpu;
|
||||
}
|
||||
fake_cpu_infos = info;
|
||||
|
||||
/* setup sysfs tree */
|
||||
for (cpu = 0; cpu < num_processors; ++cpu) {
|
||||
/* online */
|
||||
error = sysfs_createf(&show_fci_online.ops,
|
||||
&fake_cpu_infos[cpu], 0644,
|
||||
"/sys/devices/system/cpu/cpu%d/online", cpu);
|
||||
if (error) {
|
||||
panic("cpu_sysfs_setup:sysfs_createf failed\n");
|
||||
}
|
||||
|
||||
/* link to cpu%d */
|
||||
error = sysfs_lookupf(&targeth,
|
||||
"/sys/devices/system/cpu/cpu%d", cpu);
|
||||
if (error) {
|
||||
panic("cpu_sysfs_setup:sysfs_lookupf failed\n");
|
||||
}
|
||||
|
||||
error = sysfs_symlinkf(targeth, "/sys/bus/cpu/devices/cpu%d",
|
||||
cpu);
|
||||
if (error) {
|
||||
panic("cpu_sysfs_setup:sysfs_symlinkf failed\n");
|
||||
}
|
||||
}
|
||||
|
||||
return;
|
||||
} /* cpu_sysfs_setup() */
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
CC = /usr/linux-k1om-4.7/bin/x86_64-k1om-linux-gcc
|
||||
LD = /usr/linux-k1om-4.7/bin/x86_64-k1om-linux-ld
|
||||
|
||||
CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
||||
LDFLAGS += -m elf_k1om -T $(SRC)/config/attached-mic.lds
|
||||
LDFLAGS_MKIMAGE = -m elf_k1om
|
||||
|
||||
@ -3,6 +3,5 @@ LD = /usr/linux-k1om-4.7/bin/x86_64-k1om-linux-ld
|
||||
OBJDUMP = /usr/linux-k1om-4.7/bin/x86_64-k1om-linux-objdump
|
||||
OBJCOPY = /usr/linux-k1om-4.7/bin/x86_64-k1om-linux-objcopy
|
||||
|
||||
CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
||||
LDFLAGS += -m elf_k1om -T $(SRC)/config/builtin-mic.lds
|
||||
LDFLAGS_MKIMAGE = -m elf_k1om
|
||||
|
||||
@ -1,2 +1 @@
|
||||
CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
||||
LDFLAGS += -T $(SRC)/config/builtin-x86.lds
|
||||
|
||||
@ -1,2 +1 @@
|
||||
CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
|
||||
LDFLAGS += -T $(SRC)/config/smp-x86.lds
|
||||
|
||||
129
kernel/debug.c
129
kernel/debug.c
@ -22,13 +22,44 @@ extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
|
||||
extern int sprintf(char * buf, const char *fmt, ...);
|
||||
static ihk_spinlock_t kmsg_lock;
|
||||
|
||||
static unsigned long kprintf_lock_head(void);
|
||||
static void kprintf_unlock_head(unsigned long irqflags);
|
||||
|
||||
static void kprintf_wait(int len, unsigned long *flags_head, int *slide) {
|
||||
int head, tail, buf_len, mode, adj;
|
||||
|
||||
mode = kmsg_buf.mode;
|
||||
while (1) {
|
||||
adj = 0;
|
||||
tail = kmsg_buf.tail;
|
||||
buf_len = kmsg_buf.len;
|
||||
head = kmsg_buf.head;
|
||||
if (head < tail) head += buf_len;
|
||||
if (tail + len > buf_len) adj = buf_len - tail;
|
||||
if (head > tail && head <= tail + len + adj) {
|
||||
if (mode != 1) {
|
||||
*slide = 1;
|
||||
break;
|
||||
} else {
|
||||
kprintf_unlock_head(*flags_head);
|
||||
*flags_head = kprintf_lock_head();
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* TODO: lock */
|
||||
void kputs(char *buf)
|
||||
{
|
||||
int len = strlen(buf);
|
||||
unsigned long flags;
|
||||
int slide = 0;
|
||||
unsigned long flags_tail, flags_head;
|
||||
|
||||
flags = __ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
flags_tail = kprintf_lock();
|
||||
flags_head = kprintf_lock_head();
|
||||
kprintf_wait(len, &flags_head, &slide);
|
||||
|
||||
if (len + kmsg_buf.tail > kmsg_buf.len) {
|
||||
kmsg_buf.tail = 0;
|
||||
@ -39,8 +70,12 @@ void kputs(char *buf)
|
||||
|
||||
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
|
||||
kmsg_buf.tail += len;
|
||||
|
||||
__ihk_mc_spinlock_unlock(&kmsg_lock, flags);
|
||||
if (slide == 1) {
|
||||
kmsg_buf.head = kmsg_buf.tail + 1;
|
||||
if (kmsg_buf.head >= kmsg_buf.len) kmsg_buf.head = 0;
|
||||
}
|
||||
kprintf_unlock_head(flags_head);
|
||||
kprintf_unlock(flags_tail);
|
||||
}
|
||||
|
||||
#define KPRINTF_LOCAL_BUF_LEN 1024
|
||||
@ -55,44 +90,34 @@ void kprintf_unlock(unsigned long irqflags)
|
||||
__ihk_mc_spinlock_unlock(&kmsg_lock, irqflags);
|
||||
}
|
||||
|
||||
static unsigned long kprintf_lock_head(void)
|
||||
{
|
||||
return __ihk_mc_spinlock_lock(&kmsg_buf.lock);
|
||||
}
|
||||
|
||||
static void kprintf_unlock_head(unsigned long irqflags)
|
||||
{
|
||||
__ihk_mc_spinlock_unlock(&kmsg_buf.lock, irqflags);
|
||||
}
|
||||
|
||||
/* Caller must hold kmsg_lock! */
|
||||
int __kprintf(const char *format, ...)
|
||||
{
|
||||
int len = 0;
|
||||
int slide = 0;
|
||||
va_list va;
|
||||
unsigned long flags_head;
|
||||
char buf[KPRINTF_LOCAL_BUF_LEN];
|
||||
|
||||
/* Copy into the local buf */
|
||||
va_start(va, format);
|
||||
len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va);
|
||||
va_end(va);
|
||||
|
||||
/* Append to kmsg buffer */
|
||||
if (kmsg_buf.tail + len > kmsg_buf.len) {
|
||||
kmsg_buf.tail = 0;
|
||||
}
|
||||
|
||||
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
|
||||
kmsg_buf.tail += len;
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
int kprintf(const char *format, ...)
|
||||
{
|
||||
int len = 0;
|
||||
va_list va;
|
||||
unsigned long flags;
|
||||
char buf[KPRINTF_LOCAL_BUF_LEN];
|
||||
|
||||
flags = __ihk_mc_spinlock_lock(&kmsg_lock);
|
||||
|
||||
/* Copy into the local buf */
|
||||
len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id());
|
||||
va_start(va, format);
|
||||
len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va);
|
||||
va_end(va);
|
||||
|
||||
flags_head = kprintf_lock_head();
|
||||
kprintf_wait(len, &flags_head, &slide);
|
||||
|
||||
/* Append to kmsg buffer */
|
||||
if (kmsg_buf.tail + len > kmsg_buf.len) {
|
||||
kmsg_buf.tail = 0;
|
||||
@ -100,16 +125,58 @@ int kprintf(const char *format, ...)
|
||||
|
||||
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
|
||||
kmsg_buf.tail += len;
|
||||
if (slide == 1) {
|
||||
kmsg_buf.head = kmsg_buf.tail + 1;
|
||||
if (kmsg_buf.head >= kmsg_buf.len) kmsg_buf.head = 0;
|
||||
}
|
||||
|
||||
__ihk_mc_spinlock_unlock(&kmsg_lock, flags);
|
||||
kprintf_unlock_head(flags_head);
|
||||
return len;
|
||||
}
|
||||
|
||||
int kprintf(const char *format, ...)
|
||||
{
|
||||
int len = 0;
|
||||
int slide = 0;
|
||||
va_list va;
|
||||
unsigned long flags_tail, flags_head;
|
||||
char buf[KPRINTF_LOCAL_BUF_LEN];
|
||||
|
||||
/* Copy into the local buf */
|
||||
len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id());
|
||||
va_start(va, format);
|
||||
len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va);
|
||||
va_end(va);
|
||||
|
||||
flags_tail = kprintf_lock();
|
||||
flags_head = kprintf_lock_head();
|
||||
kprintf_wait(len, &flags_head, &slide);
|
||||
|
||||
/* Append to kmsg buffer */
|
||||
if (kmsg_buf.tail + len > kmsg_buf.len) {
|
||||
kmsg_buf.tail = 0;
|
||||
}
|
||||
|
||||
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
|
||||
kmsg_buf.tail += len;
|
||||
if (slide == 1) {
|
||||
kmsg_buf.head = kmsg_buf.tail + 1;
|
||||
if (kmsg_buf.head >= kmsg_buf.len) kmsg_buf.head = 0;
|
||||
}
|
||||
|
||||
kprintf_unlock_head(flags_head);
|
||||
kprintf_unlock(flags_tail);
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
void kmsg_init(void)
|
||||
void kmsg_init(int mode)
|
||||
{
|
||||
ihk_mc_spinlock_init(&kmsg_lock);
|
||||
kmsg_buf.tail = 0;
|
||||
kmsg_buf.len = sizeof(kmsg_buf.str);
|
||||
kmsg_buf.head = 0;
|
||||
kmsg_buf.mode = mode;
|
||||
ihk_mc_spinlock_init(&kmsg_buf.lock);
|
||||
memset(kmsg_buf.str, 0, kmsg_buf.len);
|
||||
}
|
||||
|
||||
@ -78,51 +78,52 @@ static struct memobj *to_memobj(struct devobj *devobj)
|
||||
/***********************************************************************
|
||||
* devobj
|
||||
*/
|
||||
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp)
|
||||
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp,
|
||||
int prot, int populate_flags)
|
||||
{
|
||||
ihk_mc_user_context_t ctx;
|
||||
struct pager_map_result result; // XXX: assumes contiguous physical
|
||||
int error;
|
||||
struct devobj *obj = NULL;
|
||||
const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE;
|
||||
const size_t pfn_npages = (npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1;
|
||||
|
||||
dkprintf("devobj_create(%d,%lx,%lx)\n", fd, len, off);
|
||||
#define MAX_PAGES_IN_DEVOBJ (PAGE_SIZE / sizeof(uintptr_t))
|
||||
if (npages > MAX_PAGES_IN_DEVOBJ) {
|
||||
error = -EFBIG;
|
||||
kprintf("devobj_create(%d,%lx,%lx):too large len. %d\n", fd, len, off, error);
|
||||
goto out;
|
||||
}
|
||||
dkprintf("%s: fd: %d, len: %lu, off: %lu \n", __FUNCTION__, fd, len, off);
|
||||
|
||||
obj = kmalloc(sizeof(*obj), IHK_MC_AP_NOWAIT);
|
||||
if (!obj) {
|
||||
error = -ENOMEM;
|
||||
kprintf("devobj_create(%d,%lx,%lx):kmalloc failed. %d\n", fd, len, off, error);
|
||||
kprintf("%s: error: fd: %d, len: %lu, off: %lu kmalloc failed.\n",
|
||||
__FUNCTION__, fd, len, off);
|
||||
goto out;
|
||||
}
|
||||
memset(obj, 0, sizeof(*obj));
|
||||
|
||||
obj->pfn_table = allocate_pages(1, IHK_MC_AP_NOWAIT);
|
||||
obj->pfn_table = ihk_mc_alloc_pages(pfn_npages, IHK_MC_AP_NOWAIT);
|
||||
if (!obj->pfn_table) {
|
||||
error = -ENOMEM;
|
||||
kprintf("devobj_create(%d,%lx,%lx):allocate_pages failed. %d\n", fd, len, off, error);
|
||||
kprintf("%s: error: fd: %d, len: %lu, off: %lu allocating PFN failed.\n",
|
||||
__FUNCTION__, fd, len, off);
|
||||
goto out;
|
||||
}
|
||||
memset(obj->pfn_table, 0, 1*PAGE_SIZE);
|
||||
memset(obj->pfn_table, 0, pfn_npages * PAGE_SIZE);
|
||||
|
||||
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_MAP;
|
||||
ihk_mc_syscall_arg1(&ctx) = fd;
|
||||
ihk_mc_syscall_arg2(&ctx) = len;
|
||||
ihk_mc_syscall_arg3(&ctx) = off;
|
||||
ihk_mc_syscall_arg4(&ctx) = virt_to_phys(&result);
|
||||
ihk_mc_syscall_arg5(&ctx) = prot | populate_flags;
|
||||
|
||||
error = syscall_generic_forwarding(__NR_mmap, &ctx);
|
||||
if (error) {
|
||||
kprintf("devobj_create(%d,%lx,%lx):map failed. %d\n", fd, len, off, error);
|
||||
kprintf("%s: error: fd: %d, len: %lu, off: %lu map failed.\n",
|
||||
__FUNCTION__, fd, len, off);
|
||||
goto out;
|
||||
}
|
||||
dkprintf("devobj_create:handle: %lx\n", result.handle);
|
||||
dkprintf("devobj_create:maxprot: %x\n", result.maxprot);
|
||||
|
||||
dkprintf("%s: fd: %d, len: %lu, off: %lu, handle: %p, maxprot: %x\n",
|
||||
__FUNCTION__, fd, len, off, result.handle, result.maxprot);
|
||||
|
||||
obj->memobj.ops = &devobj_ops;
|
||||
obj->memobj.flags = MF_HAS_PAGER;
|
||||
@ -140,11 +141,12 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
|
||||
out:
|
||||
if (obj) {
|
||||
if (obj->pfn_table) {
|
||||
free_pages(obj->pfn_table, 1);
|
||||
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
|
||||
}
|
||||
kfree(obj);
|
||||
}
|
||||
dkprintf("devobj_create(%d,%lx,%lx): %d %p %x%d\n", fd, len, off, error, *objp, *maxprotp);
|
||||
dkprintf("%s: ret: %d, fd: %d, len: %lu, off: %lu, handle: %p, maxprot: %x \n",
|
||||
__FUNCTION__, error, fd, len, off, result.handle, result.maxprot);
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -164,6 +166,8 @@ static void devobj_release(struct memobj *memobj)
|
||||
struct devobj *obj = to_devobj(memobj);
|
||||
struct devobj *free_obj = NULL;
|
||||
uintptr_t handle;
|
||||
const size_t pfn_npages =
|
||||
(obj->npages / (PAGE_SIZE / sizeof(uintptr_t))) + 1;
|
||||
|
||||
dkprintf("devobj_release(%p %lx)\n", obj, obj->handle);
|
||||
|
||||
@ -192,7 +196,7 @@ static void devobj_release(struct memobj *memobj)
|
||||
}
|
||||
|
||||
if (obj->pfn_table) {
|
||||
free_pages(obj->pfn_table, 1);
|
||||
ihk_mc_free_pages(obj->pfn_table, pfn_npages);
|
||||
}
|
||||
kfree(free_obj);
|
||||
}
|
||||
@ -204,7 +208,7 @@ static void devobj_release(struct memobj *memobj)
|
||||
|
||||
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag)
|
||||
{
|
||||
const off_t pgoff = off >> PAGE_SHIFT;
|
||||
const off_t pgoff = off / PAGE_SIZE;
|
||||
struct devobj *obj = to_devobj(memobj);
|
||||
int error;
|
||||
uintptr_t pfn;
|
||||
@ -216,7 +220,7 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
|
||||
|
||||
if ((pgoff < obj->pfn_pgoff) || ((obj->pfn_pgoff + obj->npages) <= pgoff)) {
|
||||
error = -EFBIG;
|
||||
kprintf("devobj_get_page(%p %lx,%lx,%d): out of range. %d\n", memobj, obj->handle, off, p2align, error);
|
||||
kprintf("%s: error: out of range: off: %lu, page off: %lu obj->npages: %d\n", __FUNCTION__, off, pgoff, obj->npages);
|
||||
goto out;
|
||||
}
|
||||
ix = pgoff - obj->pfn_pgoff;
|
||||
|
||||
@ -47,6 +47,7 @@ static memobj_get_page_func_t fileobj_get_page;
|
||||
static memobj_copy_page_func_t fileobj_copy_page;
|
||||
static memobj_flush_page_func_t fileobj_flush_page;
|
||||
static memobj_invalidate_page_func_t fileobj_invalidate_page;
|
||||
static memobj_lookup_page_func_t fileobj_lookup_page;
|
||||
|
||||
static struct memobj_ops fileobj_ops = {
|
||||
.release = &fileobj_release,
|
||||
@ -55,6 +56,7 @@ static struct memobj_ops fileobj_ops = {
|
||||
.copy_page = &fileobj_copy_page,
|
||||
.flush_page = &fileobj_flush_page,
|
||||
.invalidate_page = &fileobj_invalidate_page,
|
||||
.lookup_page = &fileobj_lookup_page,
|
||||
};
|
||||
|
||||
static struct fileobj *to_fileobj(struct memobj *memobj)
|
||||
@ -180,7 +182,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp)
|
||||
|
||||
error = syscall_generic_forwarding(__NR_mmap, &ctx);
|
||||
if (error) {
|
||||
kprintf("fileobj_create(%d):create failed. %d\n", fd, error);
|
||||
dkprintf("fileobj_create(%d):create failed. %d\n", fd, error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -609,3 +611,37 @@ out:
|
||||
memobj, phys, pgsize, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
static int fileobj_lookup_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *pflag)
|
||||
{
|
||||
struct fileobj *obj = to_fileobj(memobj);
|
||||
int error;
|
||||
uintptr_t phys = -1;
|
||||
struct page *page;
|
||||
|
||||
dkprintf("fileobj_lookup_page(%p,%lx,%x,%p)\n", obj, off, p2align, physp);
|
||||
|
||||
memobj_lock(&obj->memobj);
|
||||
if (p2align != PAGE_P2ALIGN) {
|
||||
error = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
page = page_list_lookup(obj, off);
|
||||
if (!page) {
|
||||
error = -ENOENT;
|
||||
dkprintf("fileobj_lookup_page(%p,%lx,%x,%p): page not found. %d\n", obj, off, p2align, physp, error);
|
||||
goto out;
|
||||
}
|
||||
phys = page_to_phys(page);
|
||||
|
||||
error = 0;
|
||||
if (physp) {
|
||||
*physp = phys;
|
||||
}
|
||||
out:
|
||||
memobj_unlock(&obj->memobj);
|
||||
dkprintf("fileobj_lookup_page(%p,%lx,%x,%p): %d %lx\n",
|
||||
obj, off, p2align, physp, error, phys);
|
||||
return error;
|
||||
}
|
||||
|
||||
@ -79,8 +79,6 @@
|
||||
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
|
||||
#endif
|
||||
|
||||
extern struct sigpending *hassigpending(struct thread *thread);
|
||||
|
||||
int futex_cmpxchg_enabled;
|
||||
|
||||
/**
|
||||
@ -153,7 +151,7 @@ static inline int match_futex(union futex_key *key1, union futex_key *key2)
|
||||
*/
|
||||
static void get_futex_key_refs(union futex_key *key)
|
||||
{
|
||||
/* RIKEN: only !fshared futexes... */
|
||||
/* RIKEN: no swapping in McKernel */
|
||||
return;
|
||||
}
|
||||
|
||||
@ -163,7 +161,7 @@ static void get_futex_key_refs(union futex_key *key)
|
||||
*/
|
||||
static void drop_futex_key_refs(union futex_key *key)
|
||||
{
|
||||
/* RIKEN: only !fshared futexes... */
|
||||
/* RIKEN: no swapping in McKernel */
|
||||
return;
|
||||
}
|
||||
/**
|
||||
@ -185,6 +183,7 @@ static int
|
||||
get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key)
|
||||
{
|
||||
unsigned long address = (unsigned long)uaddr;
|
||||
unsigned long phys;
|
||||
struct process_vm *mm = cpu_local_var(current)->vm;
|
||||
|
||||
/*
|
||||
@ -203,15 +202,31 @@ get_futex_key(uint32_t *uaddr, int fshared, union futex_key *key)
|
||||
* but access_ok() should be faster than find_vma()
|
||||
*/
|
||||
if (!fshared) {
|
||||
|
||||
key->private.mm = mm;
|
||||
key->private.address = address;
|
||||
get_futex_key_refs(key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* RIKEN: No shared futex support... */
|
||||
return -EFAULT;
|
||||
key->both.offset |= FUT_OFF_MMSHARED;
|
||||
|
||||
retry_v2p:
|
||||
/* Just use physical address of page, McKernel does not do swapping */
|
||||
if (ihk_mc_pt_virt_to_phys(mm->address_space->page_table,
|
||||
(void *)uaddr, &phys)) {
|
||||
|
||||
/* Check if we can fault in page */
|
||||
if (page_fault_process_vm(mm, uaddr, PF_POPULATE | PF_WRITE | PF_USER)) {
|
||||
kprintf("error: get_futex_key() virt to phys translation failed\n");
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
goto retry_v2p;
|
||||
}
|
||||
key->shared.phys = (void *)phys;
|
||||
key->shared.pgoff = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
@ -234,7 +249,7 @@ static int cmpxchg_futex_value_locked(uint32_t __user *uaddr, uint32_t uval, uin
|
||||
static int get_futex_value_locked(uint32_t *dest, uint32_t *from)
|
||||
{
|
||||
/* RIKEN: futexes are always on not swappable pages */
|
||||
*dest = *from;
|
||||
*dest = getint_user((int *)from);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -265,6 +280,7 @@ static void wake_futex(struct futex_q *q)
|
||||
barrier();
|
||||
q->lock_ptr = NULL;
|
||||
|
||||
dkprintf("wake_futex(): waking up tid %d\n", p->tid);
|
||||
sched_wakeup_thread(p, PS_NORMAL);
|
||||
}
|
||||
|
||||
@ -667,12 +683,16 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
|
||||
|
||||
/* RIKEN: use mcos timers */
|
||||
if (timeout) {
|
||||
dkprintf("futex_wait_queue_me(): tid: %d schedule_timeout()\n", cpu_local_var(current)->tid);
|
||||
time_remain = schedule_timeout(timeout);
|
||||
}
|
||||
else {
|
||||
dkprintf("futex_wait_queue_me(): tid: %d schedule()\n", cpu_local_var(current)->tid);
|
||||
schedule();
|
||||
time_remain = 0;
|
||||
}
|
||||
|
||||
dkprintf("futex_wait_queue_me(): tid: %d woken up\n", cpu_local_var(current)->tid);
|
||||
}
|
||||
|
||||
/* This does not need to be serialized */
|
||||
@ -777,10 +797,10 @@ retry:
|
||||
if (timeout && !time_remain)
|
||||
goto out_put_key;
|
||||
|
||||
if(hassigpending(cpu_local_var(current))){
|
||||
if (hassigpending(cpu_local_var(current))) {
|
||||
ret = -EINTR;
|
||||
goto out_put_key;
|
||||
}
|
||||
}
|
||||
|
||||
/* RIKEN: no signals */
|
||||
put_futex_key(fshared, &q.key);
|
||||
@ -793,17 +813,10 @@ out:
|
||||
}
|
||||
|
||||
int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
|
||||
uint32_t *uaddr2, uint32_t val2, uint32_t val3)
|
||||
uint32_t *uaddr2, uint32_t val2, uint32_t val3, int fshared)
|
||||
{
|
||||
int clockrt, ret = -ENOSYS;
|
||||
int cmd = op & FUTEX_CMD_MASK;
|
||||
int fshared = 0;
|
||||
|
||||
/* RIKEN: Assume address space private futexes.
|
||||
if (!(op & FUTEX_PRIVATE_FLAG)) {
|
||||
fshared = 1;
|
||||
}
|
||||
*/
|
||||
|
||||
clockrt = op & FUTEX_CLOCK_REALTIME;
|
||||
if (clockrt && cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI)
|
||||
@ -824,8 +837,7 @@ int futex(uint32_t *uaddr, int op, uint32_t val, uint64_t timeout,
|
||||
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL, 0);
|
||||
break;
|
||||
case FUTEX_CMP_REQUEUE:
|
||||
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3,
|
||||
0);
|
||||
ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, &val3, 0);
|
||||
break;
|
||||
case FUTEX_WAKE_OP:
|
||||
ret = futex_wake_op(uaddr, fshared, uaddr2, val, val2, val3);
|
||||
|
||||
210
kernel/host.c
210
kernel/host.c
@ -30,6 +30,7 @@
|
||||
#include <mman.h>
|
||||
#include <init.h>
|
||||
#include <kmalloc.h>
|
||||
#include <sysfs.h>
|
||||
|
||||
//#define DEBUG_PRINT_HOST
|
||||
|
||||
@ -84,15 +85,17 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
struct process *proc = thread->proc;
|
||||
struct process_vm *vm = proc->vm;
|
||||
struct address_space *as = vm->address_space;
|
||||
long aout_base;
|
||||
int error;
|
||||
|
||||
n = p->num_sections;
|
||||
|
||||
aout_base = (pn->reloc)? vm->region.map_end: 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
|
||||
if (pn->sections[i].interp && (interp_nbase == (uintptr_t)-1)) {
|
||||
interp_obase = pn->sections[i].vaddr;
|
||||
interp_obase -= (interp_obase % pn->interp_align);
|
||||
interp_nbase = vm->region.map_start;
|
||||
interp_nbase = vm->region.map_end;
|
||||
interp_nbase = (interp_nbase + pn->interp_align - 1)
|
||||
& ~(pn->interp_align - 1);
|
||||
}
|
||||
@ -102,6 +105,10 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
pn->sections[i].vaddr += interp_nbase;
|
||||
p->sections[i].vaddr = pn->sections[i].vaddr;
|
||||
}
|
||||
else{
|
||||
pn->sections[i].vaddr += aout_base;
|
||||
p->sections[i].vaddr = pn->sections[i].vaddr;
|
||||
}
|
||||
s = (pn->sections[i].vaddr) & PAGE_MASK;
|
||||
e = (pn->sections[i].vaddr + pn->sections[i].len
|
||||
+ PAGE_SIZE - 1) & PAGE_MASK;
|
||||
@ -117,7 +124,8 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
}
|
||||
|
||||
up = virt_to_phys(up_v);
|
||||
if (add_process_memory_range(vm, s, e, up, flags, NULL, 0) != 0) {
|
||||
if (add_process_memory_range(vm, s, e, up, flags, NULL, 0,
|
||||
PAGE_SHIFT) != 0) {
|
||||
ihk_mc_free_pages(up_v, range_npages);
|
||||
kprintf("ERROR: adding memory range for ELF section %i\n", i);
|
||||
goto err;
|
||||
@ -170,6 +178,10 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
(e > vm->region.data_end ?
|
||||
e : vm->region.data_end);
|
||||
}
|
||||
|
||||
if (aout_base) {
|
||||
vm->region.map_end = e;
|
||||
}
|
||||
}
|
||||
|
||||
if (interp_nbase != (uintptr_t)-1) {
|
||||
@ -181,6 +193,11 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
pn->entry);
|
||||
}
|
||||
|
||||
if (aout_base) {
|
||||
pn->at_phdr += aout_base;
|
||||
pn->at_entry += aout_base;
|
||||
}
|
||||
|
||||
vm->region.brk_start = vm->region.brk_end = vm->region.data_end;
|
||||
|
||||
/* Map, copy and update args and envs */
|
||||
@ -196,7 +213,7 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
args_envs_p = virt_to_phys(args_envs);
|
||||
|
||||
if(add_process_memory_range(vm, addr, e, args_envs_p,
|
||||
flags, NULL, 0) != 0){
|
||||
flags, NULL, 0, PAGE_SHIFT) != 0){
|
||||
ihk_mc_free_pages(args_envs, ARGENV_PAGE_COUNT);
|
||||
kprintf("ERROR: adding memory range for args/envs\n");
|
||||
goto err;
|
||||
@ -227,9 +244,9 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
p->args_len = args_len;
|
||||
}
|
||||
|
||||
dkprintf("args copy, nr: %d\n", *((int*)args_envs_r));
|
||||
dkprintf("args copy, nr: %d\n", *((long *)args_envs_r));
|
||||
|
||||
memcpy_long(args_envs, args_envs_r, p->args_len + 8);
|
||||
memcpy_long(args_envs, args_envs_r, p->args_len + sizeof(long) - 1);
|
||||
|
||||
/* Only unmap remote address if it wasn't specified as an argument */
|
||||
if (!args) {
|
||||
@ -262,9 +279,9 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
p->envs_len = envs_len;
|
||||
}
|
||||
|
||||
dkprintf("envs copy, nr: %d\n", *((int*)args_envs_r));
|
||||
dkprintf("envs copy, nr: %d\n", *((long *)args_envs_r));
|
||||
|
||||
memcpy_long(args_envs + p->args_len, args_envs_r, p->envs_len + 8);
|
||||
memcpy_long(args_envs + p->args_len, args_envs_r, p->envs_len + sizeof(long) - 1);
|
||||
|
||||
/* Only map remote address if it wasn't specified as an argument */
|
||||
if (!envs) {
|
||||
@ -274,10 +291,10 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
flush_tlb();
|
||||
|
||||
// Update variables
|
||||
argc = *((int*)(args_envs));
|
||||
argc = *((long *)(args_envs));
|
||||
dkprintf("argc: %d\n", argc);
|
||||
|
||||
argv = (char **)(args_envs + (sizeof(int)));
|
||||
argv = (char **)(args_envs + (sizeof(long)));
|
||||
if(proc->saved_cmdline){
|
||||
kfree(proc->saved_cmdline);
|
||||
proc->saved_cmdline_len = 0;
|
||||
@ -294,20 +311,31 @@ int prepare_process_ranges_args_envs(struct thread *thread,
|
||||
*a = (char *)addr + (unsigned long)*a; // Process' address space!
|
||||
}
|
||||
|
||||
envc = *((int*)(args_envs + p->args_len));
|
||||
envc = *((long *)(args_envs + p->args_len));
|
||||
dkprintf("envc: %d\n", envc);
|
||||
|
||||
env = (char **)(args_envs + p->args_len + sizeof(int));
|
||||
env = (char **)(args_envs + p->args_len + sizeof(long));
|
||||
while (*env) {
|
||||
char **_env = env;
|
||||
//dkprintf("%s\n", args_envs + p->args_len + (unsigned long)*env);
|
||||
*env = (char *)addr + p->args_len + (unsigned long)*env;
|
||||
env = ++_env;
|
||||
}
|
||||
env = (char **)(args_envs + p->args_len + sizeof(int));
|
||||
env = (char **)(args_envs + p->args_len + sizeof(long));
|
||||
|
||||
dkprintf("env OK\n");
|
||||
|
||||
if (pn->enable_vdso) {
|
||||
error = arch_map_vdso(vm);
|
||||
if (error) {
|
||||
kprintf("ERROR: mapping vdso pages. %d\n", error);
|
||||
goto err;
|
||||
}
|
||||
}
|
||||
else {
|
||||
vm->vdso_addr = NULL;
|
||||
}
|
||||
|
||||
p->rprocess = (unsigned long)thread;
|
||||
p->rpgtable = virt_to_phys(as->page_table);
|
||||
|
||||
@ -348,10 +376,16 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
}
|
||||
|
||||
n = p->num_sections;
|
||||
if (n > 16) {
|
||||
kprintf("%s: ERROR: more ELF sections than 16??\n",
|
||||
__FUNCTION__);
|
||||
return -ENOMEM;
|
||||
}
|
||||
dkprintf("# of sections: %d\n", n);
|
||||
|
||||
if((pn = ihk_mc_allocate(sizeof(struct program_load_desc)
|
||||
+ sizeof(struct program_image_section) * n, IHK_MC_AP_NOWAIT)) == NULL){
|
||||
if((pn = kmalloc(sizeof(struct program_load_desc)
|
||||
+ sizeof(struct program_image_section) * n,
|
||||
IHK_MC_AP_NOWAIT)) == NULL){
|
||||
ihk_mc_unmap_virtual(p, npages, 0);
|
||||
ihk_mc_unmap_memory(NULL, phys, sz);
|
||||
return -ENOMEM;
|
||||
@ -360,7 +394,7 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
+ sizeof(struct program_image_section) * n);
|
||||
|
||||
if((thread = create_thread(p->entry)) == NULL){
|
||||
ihk_mc_free(pn);
|
||||
kfree(pn);
|
||||
ihk_mc_unmap_virtual(p, npages, 1);
|
||||
ihk_mc_unmap_memory(NULL, phys, sz);
|
||||
return -ENOMEM;
|
||||
@ -379,11 +413,23 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
proc->egid = pn->cred[5];
|
||||
proc->sgid = pn->cred[6];
|
||||
proc->fsgid = pn->cred[7];
|
||||
proc->termsig = SIGCHLD;
|
||||
|
||||
vm->region.user_start = pn->user_start;
|
||||
vm->region.user_end = pn->user_end;
|
||||
vm->region.map_start = (USER_END / 3) & LARGE_PAGE_MASK;
|
||||
vm->region.map_end = proc->vm->region.map_start;
|
||||
if(vm->region.user_end > USER_END)
|
||||
vm->region.user_end = USER_END;
|
||||
if(vm->region.user_start != 0UL ||
|
||||
vm->region.user_end < TASK_UNMAPPED_BASE){
|
||||
vm->region.map_start =
|
||||
(vm->region.user_start +
|
||||
(vm->region.user_end - vm->region.user_start) / 3) &
|
||||
LARGE_PAGE_MASK;
|
||||
}
|
||||
else{
|
||||
vm->region.map_start = TASK_UNMAPPED_BASE;
|
||||
}
|
||||
vm->region.map_end = vm->region.map_start;
|
||||
memcpy(proc->rlimit, pn->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX);
|
||||
|
||||
/* TODO: Clear it at the proper timing */
|
||||
@ -398,7 +444,7 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid,
|
||||
vm->address_space->page_table);
|
||||
|
||||
ihk_mc_free(pn);
|
||||
kfree(pn);
|
||||
|
||||
ihk_mc_unmap_virtual(p, npages, 1);
|
||||
ihk_mc_unmap_memory(NULL, phys, sz);
|
||||
@ -406,7 +452,7 @@ static int process_msg_prepare_process(unsigned long rphys)
|
||||
|
||||
return 0;
|
||||
err:
|
||||
ihk_mc_free(pn);
|
||||
kfree(pn);
|
||||
ihk_mc_unmap_virtual(p, npages, 1);
|
||||
ihk_mc_unmap_memory(NULL, phys, sz);
|
||||
destroy_thread(thread);
|
||||
@ -415,7 +461,7 @@ err:
|
||||
|
||||
static void process_msg_init(struct ikc_scd_init_param *pcp, struct syscall_params *lparam)
|
||||
{
|
||||
lparam->response_va = allocate_pages(RESPONSE_PAGE_COUNT, 0);
|
||||
lparam->response_va = ihk_mc_alloc_pages(RESPONSE_PAGE_COUNT, 0);
|
||||
lparam->response_pa = virt_to_phys(lparam->response_va);
|
||||
|
||||
pcp->request_page = 0;
|
||||
@ -425,7 +471,7 @@ static void process_msg_init(struct ikc_scd_init_param *pcp, struct syscall_para
|
||||
|
||||
static void process_msg_init_acked(struct ihk_ikc_channel_desc *c, unsigned long pphys)
|
||||
{
|
||||
struct ikc_scd_init_param *param = (void *)pphys;
|
||||
struct ikc_scd_init_param *param = phys_to_virt(pphys);
|
||||
struct syscall_params *lparam;
|
||||
enum ihk_mc_pt_attribute attr;
|
||||
|
||||
@ -484,21 +530,42 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c,
|
||||
}
|
||||
|
||||
extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont);
|
||||
extern void settid(struct thread *proc, int mode, int newcpuid, int oldcpuid);
|
||||
|
||||
extern void process_procfs_request(unsigned long rarg);
|
||||
extern int memcheckall();
|
||||
extern int freecheck(int runcount);
|
||||
extern int runcount;
|
||||
extern void terminate_host(int pid);
|
||||
extern void debug_log(long);
|
||||
|
||||
static void req_get_cpu_mapping(long req_rpa)
|
||||
{
|
||||
size_t mapsize;
|
||||
size_t size;
|
||||
int npages;
|
||||
long phys;
|
||||
struct get_cpu_mapping_req *req;
|
||||
struct cpu_mapping *buf;
|
||||
|
||||
size = sizeof(*req);
|
||||
mapsize = size + (req_rpa & (PAGE_SIZE - 1));
|
||||
npages = (mapsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
|
||||
phys = ihk_mc_map_memory(NULL, req_rpa, size);
|
||||
req = ihk_mc_map_virtual(phys, npages, PTATTR_WRITABLE);
|
||||
|
||||
req->error = arch_get_cpu_mapping(&buf, &req->buf_elems);
|
||||
if (!req->error) {
|
||||
req->buf_rpa = virt_to_phys(buf);
|
||||
}
|
||||
|
||||
ihk_mc_unmap_virtual(req, npages, 0);
|
||||
ihk_mc_unmap_memory(NULL, phys, size);
|
||||
return;
|
||||
} /* req_get_cpu_mapping() */
|
||||
|
||||
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
void *__packet, void *ihk_os)
|
||||
{
|
||||
struct ikc_scd_packet *packet = __packet;
|
||||
struct ikc_scd_packet pckt;
|
||||
int rc;
|
||||
struct mcs_rwlock_node_irqsave lock;
|
||||
struct thread *thread;
|
||||
struct process *proc;
|
||||
struct mcctrl_signal {
|
||||
@ -510,22 +577,17 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
} *sp, info;
|
||||
unsigned long pp;
|
||||
int cpuid;
|
||||
int ret = 0;
|
||||
|
||||
switch (packet->msg) {
|
||||
case SCD_MSG_INIT_CHANNEL_ACKED:
|
||||
dkprintf("SCD_MSG_INIT_CHANNEL_ACKED\n");
|
||||
process_msg_init_acked(c, packet->arg);
|
||||
return 0;
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case SCD_MSG_PREPARE_PROCESS:
|
||||
|
||||
if (find_command_line("memdebug")) {
|
||||
memcheckall();
|
||||
if (runcount)
|
||||
freecheck(runcount);
|
||||
runcount++;
|
||||
}
|
||||
|
||||
if((rc = process_msg_prepare_process(packet->arg)) == 0){
|
||||
pckt.msg = SCD_MSG_PREPARE_PROCESS_ACKED;
|
||||
pckt.err = 0;
|
||||
@ -538,19 +600,21 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
pckt.arg = packet->arg;
|
||||
syscall_channel_send(c, &pckt);
|
||||
|
||||
return 0;
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case SCD_MSG_SCHEDULE_PROCESS:
|
||||
cpuid = obtain_clone_cpuid();
|
||||
if(cpuid == -1){
|
||||
kprintf("No CPU available\n");
|
||||
return -1;
|
||||
ret = -1;
|
||||
break;
|
||||
}
|
||||
dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg);
|
||||
thread = (struct thread *)packet->arg;
|
||||
proc = thread->proc;
|
||||
|
||||
settid(thread, 0, cpuid, -1);
|
||||
settid(thread, 0, cpuid, -1, 0, NULL);
|
||||
proc->status = PS_RUNNING;
|
||||
thread->status = PS_RUNNING;
|
||||
chain_thread(thread);
|
||||
@ -558,7 +622,29 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
runq_add_thread(thread, cpuid);
|
||||
|
||||
//cpu_local_var(next) = (struct thread *)packet->arg;
|
||||
return 0;
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
/*
|
||||
* Used for syscall offload reply message to explicitly schedule in
|
||||
* the waiting thread
|
||||
*/
|
||||
case SCD_MSG_WAKE_UP_SYSCALL_THREAD:
|
||||
thread = find_thread(0, packet->ttid, &lock);
|
||||
if (!thread) {
|
||||
kprintf("%s: WARNING: no thread for SCD reply? TID: %d\n",
|
||||
__FUNCTION__, packet->ttid);
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
thread_unlock(thread, &lock);
|
||||
|
||||
dkprintf("%s: SCD_MSG_WAKE_UP_SYSCALL_THREAD: waking up tid %d\n",
|
||||
__FUNCTION__, packet->ttid);
|
||||
waitq_wakeup(&thread->scd_wq);
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case SCD_MSG_SEND_SIGNAL:
|
||||
pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal));
|
||||
sp = (struct mcctrl_signal *)ihk_mc_map_virtual(pp, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);
|
||||
@ -573,20 +659,56 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
|
||||
|
||||
rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0);
|
||||
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
|
||||
return 0;
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case SCD_MSG_PROCFS_REQUEST:
|
||||
process_procfs_request(packet->arg);
|
||||
return 0;
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case SCD_MSG_CLEANUP_PROCESS:
|
||||
dkprintf("SCD_MSG_CLEANUP_PROCESS pid=%d\n", packet->pid);
|
||||
terminate_host(packet->pid);
|
||||
return 0;
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case SCD_MSG_DEBUG_LOG:
|
||||
dkprintf("SCD_MSG_DEBUG_LOG code=%lx\n", packet->arg);
|
||||
debug_log(packet->arg);
|
||||
return 0;
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case SCD_MSG_SYSFS_REQ_SHOW:
|
||||
case SCD_MSG_SYSFS_REQ_STORE:
|
||||
case SCD_MSG_SYSFS_REQ_RELEASE:
|
||||
sysfss_packet_handler(c, packet->msg, packet->err,
|
||||
packet->sysfs_arg1, packet->sysfs_arg2,
|
||||
packet->sysfs_arg3);
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
case SCD_MSG_GET_CPU_MAPPING:
|
||||
req_get_cpu_mapping(packet->arg);
|
||||
|
||||
pckt.msg = SCD_MSG_REPLY_GET_CPU_MAPPING;
|
||||
pckt.arg = packet->arg;
|
||||
syscall_channel_send(c, &pckt);
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
default:
|
||||
kprintf("syscall_pakcet_handler:unknown message "
|
||||
"(%d.%d.%d.%d.%d.%#lx)\n",
|
||||
packet->msg, packet->ref, packet->osnum,
|
||||
packet->pid, packet->err, packet->arg);
|
||||
ret = 0;
|
||||
break;
|
||||
|
||||
}
|
||||
return 0;
|
||||
|
||||
ihk_ikc_release_packet((struct ihk_ikc_free_packet *)packet, c);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void init_host_syscall_channel(void)
|
||||
|
||||
@ -1,6 +1,8 @@
|
||||
#ifndef _LINUX_AUXVEC_H
|
||||
#define _LINUX_AUXVEC_H
|
||||
|
||||
#include <arch/auxvec.h>
|
||||
|
||||
/* Symbolic values for the entries in the auxiliary table
|
||||
put on the initial stack */
|
||||
#define AT_NULL 0 /* end of vector */
|
||||
|
||||
@ -19,11 +19,13 @@
|
||||
* CPU Local Storage (cls)
|
||||
*/
|
||||
|
||||
struct malloc_header {
|
||||
unsigned int check;
|
||||
struct kmalloc_header {
|
||||
unsigned int front_magic;
|
||||
unsigned int cpu_id;
|
||||
struct malloc_header *next;
|
||||
unsigned long size;
|
||||
struct list_head list;
|
||||
int size; /* The size of this chunk without the header */
|
||||
unsigned int end_magic;
|
||||
/* 32 bytes */
|
||||
};
|
||||
|
||||
#include <ihk/lock.h>
|
||||
@ -38,8 +40,9 @@ extern ihk_spinlock_t cpu_status_lock;
|
||||
|
||||
struct cpu_local_var {
|
||||
/* malloc */
|
||||
struct malloc_header free_list;
|
||||
ihk_spinlock_t free_list_lock;
|
||||
struct list_head free_list;
|
||||
struct list_head remote_free_list;
|
||||
ihk_spinlock_t remote_free_list_lock;
|
||||
|
||||
struct thread idle;
|
||||
struct process idle_proc;
|
||||
@ -73,6 +76,7 @@ struct cpu_local_var {
|
||||
int in_interrupt;
|
||||
int no_preempt;
|
||||
int timer_enabled;
|
||||
int kmalloc_initialized;
|
||||
} __attribute__((aligned(64)));
|
||||
|
||||
|
||||
|
||||
@ -99,6 +99,8 @@
|
||||
|
||||
#ifdef __KERNEL__
|
||||
|
||||
#define __user
|
||||
|
||||
/* We don't deal with uaccess at the moment, because x86 can access
|
||||
* userspace directly, we rely on glibc and the app developers.
|
||||
*/
|
||||
@ -106,42 +108,14 @@
|
||||
#include <arch/uaccess.h>
|
||||
#endif
|
||||
|
||||
#include <asm.h>
|
||||
#include <errno.h>
|
||||
|
||||
#define __user
|
||||
#include <arch-futex.h>
|
||||
|
||||
#if 0
|
||||
#include <arch/processor.h>
|
||||
#include <arch/system.h>
|
||||
#endif
|
||||
|
||||
#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
|
||||
asm volatile("1:\t" insn "\n" \
|
||||
"2:\t.section .fixup,\"ax\"\n" \
|
||||
"3:\tmov\t%3, %1\n" \
|
||||
"\tjmp\t2b\n" \
|
||||
"\t.previous\n" \
|
||||
_ASM_EXTABLE(1b, 3b) \
|
||||
: "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
|
||||
: "i" (-EFAULT), "0" (oparg), "1" (0))
|
||||
|
||||
#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
|
||||
asm volatile("1:\tmovl %2, %0\n" \
|
||||
"\tmovl\t%0, %3\n" \
|
||||
"\t" insn "\n" \
|
||||
"2:\tlock; cmpxchgl %3, %2\n" \
|
||||
"\tjnz\t1b\n" \
|
||||
"3:\t.section .fixup,\"ax\"\n" \
|
||||
"4:\tmov\t%5, %1\n" \
|
||||
"\tjmp\t3b\n" \
|
||||
"\t.previous\n" \
|
||||
_ASM_EXTABLE(1b, 4b) \
|
||||
_ASM_EXTABLE(2b, 4b) \
|
||||
: "=&a" (oldval), "=&r" (ret), \
|
||||
"+m" (*uaddr), "=&r" (tem) \
|
||||
: "r" (oparg), "i" (-EFAULT), "1" (0))
|
||||
|
||||
static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
|
||||
{
|
||||
int op = (encoded_op >> 28) & 7;
|
||||
@ -206,28 +180,6 @@ static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
|
||||
int newval)
|
||||
{
|
||||
#ifdef __UACCESS__
|
||||
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
|
||||
return -EFAULT;
|
||||
#endif
|
||||
|
||||
asm volatile("1:\tlock; cmpxchgl %3, %1\n"
|
||||
"2:\t.section .fixup, \"ax\"\n"
|
||||
"3:\tmov %2, %0\n"
|
||||
"\tjmp 2b\n"
|
||||
"\t.previous\n"
|
||||
_ASM_EXTABLE(1b, 3b)
|
||||
: "=a" (oldval), "+m" (*uaddr)
|
||||
: "i" (-EFAULT), "r" (newval), "0" (oldval)
|
||||
: "memory"
|
||||
);
|
||||
|
||||
return oldval;
|
||||
}
|
||||
|
||||
#endif // __KERNEL__
|
||||
#endif // _ASM_X86_FUTEX_H
|
||||
|
||||
@ -241,13 +193,11 @@ static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
|
||||
struct process_vm;
|
||||
|
||||
union futex_key {
|
||||
#if 0
|
||||
struct {
|
||||
unsigned long pgoff;
|
||||
struct inode *inode;
|
||||
void *phys;
|
||||
int offset;
|
||||
} shared;
|
||||
#endif
|
||||
struct {
|
||||
unsigned long address;
|
||||
struct process_vm *mm;
|
||||
@ -261,6 +211,7 @@ union futex_key {
|
||||
};
|
||||
|
||||
#define FUTEX_KEY_INIT (union futex_key) { .both = { .ptr = NULL } }
|
||||
#define FUT_OFF_MMSHARED 2
|
||||
|
||||
extern int futex_init(void);
|
||||
|
||||
@ -272,7 +223,8 @@ futex(
|
||||
uint64_t timeout,
|
||||
uint32_t __user * uaddr2,
|
||||
uint32_t val2,
|
||||
uint32_t val3
|
||||
uint32_t val3,
|
||||
int fshared
|
||||
);
|
||||
|
||||
|
||||
|
||||
23
kernel/include/generic-rlimit.h
Normal file
23
kernel/include/generic-rlimit.h
Normal file
@ -0,0 +1,23 @@
|
||||
/**
|
||||
* \file rlimit.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* Kinds of resource limit
|
||||
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
|
||||
* Copyright (C) 2011 - 2012 Taku Shimosawa
|
||||
*/
|
||||
/*
|
||||
* HISTORY
|
||||
*/
|
||||
|
||||
#ifndef __GENERIC_RLIMIT_H
|
||||
#define __GENERIC_RLIMIT_H
|
||||
|
||||
typedef uint64_t rlim_t;
|
||||
|
||||
struct rlimit {
|
||||
rlim_t rlim_cur; /* Soft limit */
|
||||
rlim_t rlim_max; /* Hard limit (ceiling for rlim_cur) */
|
||||
};
|
||||
|
||||
#endif
|
||||
@ -14,7 +14,7 @@
|
||||
#define INIT_H
|
||||
|
||||
extern void arch_init(void);
|
||||
extern void kmsg_init(void);
|
||||
extern void kmsg_init(int);
|
||||
extern void mem_init(void);
|
||||
extern void ikc_master_init(void);
|
||||
extern void ap_init(void);
|
||||
@ -28,6 +28,7 @@ extern void init_host_syscall_channel(void);
|
||||
extern void init_host_syscall_channel2(void);
|
||||
extern void sched_init(void);
|
||||
extern void pc_ap_init(void);
|
||||
extern void cpu_sysfs_setup(void);
|
||||
|
||||
extern char *find_command_line(char *name);
|
||||
|
||||
|
||||
@ -32,11 +32,10 @@ void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line);
|
||||
void _kfree(void *ptr, char *file, int line);
|
||||
void *__kmalloc(int size, enum ihk_mc_ap_flag flag);
|
||||
void __kfree(void *ptr);
|
||||
void *___kmalloc(int size, enum ihk_mc_ap_flag flag);
|
||||
void ___kfree(void *ptr);
|
||||
|
||||
int _memcheck(void *ptr, char *msg, char *file, int line, int free);
|
||||
int memcheckall();
|
||||
int freecheck(int runcount);
|
||||
void kmalloc_consolidate_free_list(void);
|
||||
|
||||
#endif
|
||||
|
||||
@ -16,6 +16,6 @@
|
||||
void kputs(char *buf);
|
||||
int kprintf(const char *format, ...);
|
||||
|
||||
void kmsg_init(void);
|
||||
void kmsg_init(int);
|
||||
|
||||
#endif
|
||||
|
||||
@ -92,7 +92,8 @@ futex(
|
||||
uint64_t timeout,
|
||||
uint32_t __user * uaddr2,
|
||||
uint32_t val2,
|
||||
uint32_t val3
|
||||
uint32_t val3,
|
||||
int fshared
|
||||
);
|
||||
|
||||
extern long
|
||||
|
||||
@ -47,6 +47,7 @@ typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, u
|
||||
typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align);
|
||||
typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
|
||||
typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
|
||||
typedef int memobj_lookup_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag);
|
||||
|
||||
struct memobj_ops {
|
||||
memobj_release_func_t * release;
|
||||
@ -55,6 +56,7 @@ struct memobj_ops {
|
||||
memobj_copy_page_func_t * copy_page;
|
||||
memobj_flush_page_func_t * flush_page;
|
||||
memobj_invalidate_page_func_t * invalidate_page;
|
||||
memobj_lookup_page_func_t * lookup_page;
|
||||
};
|
||||
|
||||
static inline void memobj_release(struct memobj *obj)
|
||||
@ -106,6 +108,15 @@ static inline int memobj_invalidate_page(struct memobj *obj, uintptr_t phys,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int memobj_lookup_page(struct memobj *obj, off_t off,
|
||||
int p2align, uintptr_t *physp, unsigned long *pflag)
|
||||
{
|
||||
if (obj->ops->lookup_page) {
|
||||
return (*obj->ops->lookup_page)(obj, off, p2align, physp, pflag);
|
||||
}
|
||||
return -ENXIO;
|
||||
}
|
||||
|
||||
static inline void memobj_lock(struct memobj *obj)
|
||||
{
|
||||
ihk_mc_spinlock_lock_noirq(&obj->lock);
|
||||
@ -130,6 +141,7 @@ int fileobj_create(int fd, struct memobj **objp, int *maxprotp);
|
||||
struct shmid_ds;
|
||||
int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
|
||||
int zeroobj_create(struct memobj **objp);
|
||||
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp);
|
||||
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp,
|
||||
int prot, int populate_flags);
|
||||
|
||||
#endif /* HEADER_MEMOBJ_H */
|
||||
|
||||
@ -29,6 +29,7 @@
|
||||
#define VR_IO_NOCACHE 0x100
|
||||
#define VR_REMOTE 0x200
|
||||
#define VR_WRITE_COMBINED 0x400
|
||||
#define VR_DONTFORK 0x800
|
||||
#define VR_DEMAND_PAGING 0x1000
|
||||
#define VR_PRIVATE 0x2000
|
||||
#define VR_LOCKED 0x4000
|
||||
@ -160,7 +161,9 @@
|
||||
#endif
|
||||
|
||||
#define USER_STACK_NR_PAGES 8192
|
||||
#define KERNEL_STACK_NR_PAGES 25
|
||||
#define KERNEL_STACK_NR_PAGES 32
|
||||
|
||||
#define NOPHYS ((uintptr_t)-1)
|
||||
|
||||
#include <waitq.h>
|
||||
#include <futex.h>
|
||||
@ -216,9 +219,11 @@ struct thread_hash {
|
||||
|
||||
struct address_space {
|
||||
struct page_table *page_table;
|
||||
int type;
|
||||
#define ADDRESS_SPACE_NORMAL 1
|
||||
#define ADDRESS_SPACE_PVAS 2
|
||||
void *opt;
|
||||
void (*free_cb)(struct address_space *, void *);
|
||||
ihk_atomic_t refcount;
|
||||
cpu_set_t cpu_set;
|
||||
ihk_spinlock_t cpu_set_lock;
|
||||
int nslots;
|
||||
int pids[];
|
||||
};
|
||||
@ -288,7 +293,7 @@ struct user
|
||||
unsigned long int u_debugreg [8];
|
||||
};
|
||||
|
||||
#define AUXV_LEN 16
|
||||
#define AUXV_LEN 18
|
||||
|
||||
struct vm_range {
|
||||
struct list_head list;
|
||||
@ -296,6 +301,8 @@ struct vm_range {
|
||||
unsigned long flag;
|
||||
struct memobj *memobj;
|
||||
off_t objoff;
|
||||
int pgshift; /* page size. 0 means THP */
|
||||
int padding;
|
||||
};
|
||||
|
||||
struct vm_regions {
|
||||
@ -310,18 +317,25 @@ struct vm_regions {
|
||||
|
||||
struct process_vm;
|
||||
|
||||
struct sigfd {
|
||||
struct sigfd *next;
|
||||
struct mckfd {
|
||||
struct mckfd *next;
|
||||
int fd;
|
||||
__sigset_t mask;
|
||||
int sig_no;
|
||||
long data;
|
||||
void *opt;
|
||||
long (*read_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
||||
int (*ioctl_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
||||
long (*mmap_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
||||
int (*close_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
||||
int (*fcntl_cb)(struct mckfd *, ihk_mc_user_context_t *);
|
||||
};
|
||||
|
||||
#define SFD_CLOEXEC 02000000
|
||||
#define SFD_NONBLOCK 04000
|
||||
|
||||
struct sig_common {
|
||||
ihk_spinlock_t lock;
|
||||
ihk_atomic_t use;
|
||||
struct sigfd *sigfd;
|
||||
struct k_sigaction action[_NSIG];
|
||||
struct list_head sigpending;
|
||||
};
|
||||
@ -335,13 +349,18 @@ struct sig_pending {
|
||||
|
||||
typedef void pgio_func_t(void *arg);
|
||||
|
||||
struct mcexec_tid {
|
||||
int tid;
|
||||
struct thread *thread;
|
||||
};
|
||||
|
||||
/* Represents a node in the process fork tree, it may exist even after the
|
||||
* corresponding process exited due to references from the parent and/or
|
||||
* children and is used for implementing wait/waitpid without having a
|
||||
* special "init" process */
|
||||
struct process {
|
||||
struct list_head hash_list;
|
||||
mcs_rwlock_lock_t update_lock; // lock for parent, status, ...?
|
||||
mcs_rwlock_lock_t update_lock; // lock for parent, status, cpu time...
|
||||
|
||||
// process vm
|
||||
struct process_vm *vm;
|
||||
@ -349,6 +368,9 @@ struct process {
|
||||
// threads and children
|
||||
struct list_head threads_list;
|
||||
mcs_rwlock_lock_t threads_lock; // lock for threads_list
|
||||
/* TID set of proxy process */
|
||||
struct mcexec_tid *tids;
|
||||
int nr_tids;
|
||||
|
||||
/* The ptracing process behave as the parent of the ptraced process
|
||||
after using PTRACE_ATTACH except getppid. So we save it here. */
|
||||
@ -398,6 +420,7 @@ struct process {
|
||||
int fsgid;
|
||||
int execed;
|
||||
int nohost;
|
||||
int nowait;
|
||||
struct rlimit rlimit[MCK_RLIM_MAX];
|
||||
unsigned long saved_auxv[AUXV_LEN];
|
||||
char *saved_cmdline;
|
||||
@ -422,6 +445,27 @@ struct process {
|
||||
/* Store signal sent to parent when the process terminates. */
|
||||
int termsig;
|
||||
|
||||
ihk_spinlock_t mckfd_lock;
|
||||
struct mckfd *mckfd;
|
||||
|
||||
// cpu time (summary)
|
||||
struct timespec stime;
|
||||
struct timespec utime;
|
||||
|
||||
// cpu time (children)
|
||||
struct timespec stime_children;
|
||||
struct timespec utime_children;
|
||||
|
||||
long maxrss;
|
||||
long maxrss_children;
|
||||
|
||||
// perf_event
|
||||
int perf_status;
|
||||
#define PP_NONE 0
|
||||
#define PP_RESET 1
|
||||
#define PP_COUNT 2
|
||||
#define PP_STOP 3
|
||||
struct mc_perf_event *monitoring_event;
|
||||
};
|
||||
|
||||
void hold_thread(struct thread *ftn);
|
||||
@ -509,6 +553,23 @@ struct thread {
|
||||
unsigned long *ptrace_debugreg; /* debug registers for ptrace */
|
||||
struct sig_pending *ptrace_recvsig;
|
||||
struct sig_pending *ptrace_sendsig;
|
||||
|
||||
// cpu time
|
||||
struct timespec stime;
|
||||
struct timespec utime;
|
||||
struct timespec btime;
|
||||
int times_update;
|
||||
int in_kernel;
|
||||
|
||||
// interval timers
|
||||
int itimer_enabled;
|
||||
struct itimerval itimer_virtual;
|
||||
struct itimerval itimer_prof;
|
||||
struct timespec itimer_virtual_value;
|
||||
struct timespec itimer_prof_value;
|
||||
|
||||
/* Syscall offload wait queue head */
|
||||
struct waitq scd_wq;
|
||||
};
|
||||
|
||||
struct process_vm {
|
||||
@ -516,6 +577,10 @@ struct process_vm {
|
||||
struct list_head vm_range_list;
|
||||
struct vm_regions region;
|
||||
struct process *proc; /* process that reside on the same page */
|
||||
void *opt;
|
||||
void (*free_cb)(struct process_vm *, void *);
|
||||
void *vdso_addr;
|
||||
void *vvar_addr;
|
||||
|
||||
ihk_spinlock_t page_table_lock;
|
||||
ihk_spinlock_t memory_range_lock;
|
||||
@ -526,12 +591,25 @@ struct process_vm {
|
||||
// is protected by its own lock (see ihk/manycore/generic/page_alloc.c)
|
||||
|
||||
ihk_atomic_t refcount;
|
||||
cpu_set_t cpu_set;
|
||||
ihk_spinlock_t cpu_set_lock;
|
||||
int exiting;
|
||||
|
||||
long currss;
|
||||
};
|
||||
|
||||
static inline int has_cap_ipc_lock(struct thread *th)
|
||||
{
|
||||
/* CAP_IPC_LOCK (= 14) */
|
||||
return !(th->proc->euid);
|
||||
}
|
||||
|
||||
static inline int has_cap_sys_admin(struct thread *th)
|
||||
{
|
||||
/* CAP_SYS_ADMIN (= 21) */
|
||||
return !(th->proc->euid);
|
||||
}
|
||||
|
||||
void hold_address_space(struct address_space *);
|
||||
void release_address_space(struct address_space *);
|
||||
struct thread *create_thread(unsigned long user_pc);
|
||||
struct thread *clone_thread(struct thread *org, unsigned long pc,
|
||||
unsigned long sp, int clone_flags);
|
||||
@ -549,7 +627,7 @@ int populate_process_memory(struct process_vm *vm, void *start, size_t len);
|
||||
int add_process_memory_range(struct process_vm *vm,
|
||||
unsigned long start, unsigned long end,
|
||||
unsigned long phys, unsigned long flag,
|
||||
struct memobj *memobj, off_t objoff);
|
||||
struct memobj *memobj, off_t objoff, int pgshift);
|
||||
int remove_process_memory_range(struct process_vm *vm, unsigned long start,
|
||||
unsigned long end, int *ro_freedp);
|
||||
int split_process_memory_range(struct process_vm *vm,
|
||||
@ -610,5 +688,9 @@ void process_unlock(struct process *proc, struct mcs_rwlock_node_irqsave *lock);
|
||||
void chain_process(struct process *);
|
||||
void chain_thread(struct thread *);
|
||||
void proc_init();
|
||||
void set_timer();
|
||||
struct sig_pending *hassigpending(struct thread *thread);
|
||||
void settid(struct thread *thread, int mode, int newcpuid, int oldcpuid,
|
||||
int nr_tids, int *tids);
|
||||
|
||||
#endif
|
||||
|
||||
@ -25,6 +25,7 @@ enum {
|
||||
IPC_CREAT = 01000,
|
||||
IPC_EXCL = 02000,
|
||||
|
||||
SHM_HUGETLB = 04000,
|
||||
SHM_RDONLY = 010000,
|
||||
SHM_RND = 020000,
|
||||
SHM_REMAP = 040000,
|
||||
@ -46,11 +47,14 @@ enum {
|
||||
SHM_INFO = 14,
|
||||
};
|
||||
|
||||
struct shmlock_user;
|
||||
|
||||
struct shmobj {
|
||||
struct memobj memobj; /* must be first */
|
||||
int index;
|
||||
uint8_t padding[4];
|
||||
int pgshift;
|
||||
size_t real_segsz;
|
||||
struct shmlock_user * user;
|
||||
struct shmid_ds ds;
|
||||
struct list_head page_list;
|
||||
struct list_head chain; /* shmobj_list */
|
||||
@ -75,9 +79,33 @@ struct shm_info {
|
||||
uint64_t swap_successes;
|
||||
};
|
||||
|
||||
struct shmlock_user {
|
||||
uid_t ruid;
|
||||
int padding;
|
||||
size_t locked;
|
||||
|
||||
struct list_head chain;
|
||||
};
|
||||
|
||||
extern ihk_spinlock_t shmlock_users_lock_body;
|
||||
|
||||
static inline void shmlock_users_lock(void)
|
||||
{
|
||||
ihk_mc_spinlock_lock_noirq(&shmlock_users_lock_body);
|
||||
return;
|
||||
}
|
||||
|
||||
static inline void shmlock_users_unlock(void)
|
||||
{
|
||||
ihk_mc_spinlock_unlock_noirq(&shmlock_users_lock_body);
|
||||
return;
|
||||
}
|
||||
|
||||
void shmobj_list_lock(void);
|
||||
void shmobj_list_unlock(void);
|
||||
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp);
|
||||
void shmobj_destroy(struct shmobj *obj);
|
||||
void shmlock_user_free(struct shmlock_user *user);
|
||||
int shmlock_user_get(uid_t ruid, struct shmlock_user **userp);
|
||||
|
||||
#endif /* HEADER_SHM_H */
|
||||
|
||||
@ -31,6 +31,7 @@
|
||||
#define SCD_MSG_PREPARE_PROCESS_ACKED 0x2
|
||||
#define SCD_MSG_PREPARE_PROCESS_NACKED 0x7
|
||||
#define SCD_MSG_SCHEDULE_PROCESS 0x3
|
||||
#define SCD_MSG_WAKE_UP_SYSCALL_THREAD 0x14
|
||||
|
||||
#define SCD_MSG_INIT_CHANNEL 0x5
|
||||
#define SCD_MSG_INIT_CHANNEL_ACKED 0x6
|
||||
@ -38,6 +39,10 @@
|
||||
#define SCD_MSG_SYSCALL_ONESIDE 0x4
|
||||
#define SCD_MSG_SEND_SIGNAL 0x8
|
||||
#define SCD_MSG_CLEANUP_PROCESS 0x9
|
||||
#define SCD_MSG_GET_VDSO_INFO 0xa
|
||||
|
||||
#define SCD_MSG_GET_CPU_MAPPING 0xc
|
||||
#define SCD_MSG_REPLY_GET_CPU_MAPPING 0xd
|
||||
|
||||
#define SCD_MSG_PROCFS_CREATE 0x10
|
||||
#define SCD_MSG_PROCFS_DELETE 0x11
|
||||
@ -46,10 +51,28 @@
|
||||
|
||||
#define SCD_MSG_DEBUG_LOG 0x20
|
||||
|
||||
#define ARCH_SET_GS 0x1001
|
||||
#define ARCH_SET_FS 0x1002
|
||||
#define ARCH_GET_FS 0x1003
|
||||
#define ARCH_GET_GS 0x1004
|
||||
#define SCD_MSG_SYSFS_REQ_CREATE 0x30
|
||||
/* #define SCD_MSG_SYSFS_RESP_CREATE 0x31 */
|
||||
#define SCD_MSG_SYSFS_REQ_MKDIR 0x32
|
||||
/* #define SCD_MSG_SYSFS_RESP_MKDIR 0x33 */
|
||||
#define SCD_MSG_SYSFS_REQ_SYMLINK 0x34
|
||||
/* #define SCD_MSG_SYSFS_RESP_SYMLINK 0x35 */
|
||||
#define SCD_MSG_SYSFS_REQ_LOOKUP 0x36
|
||||
/* #define SCD_MSG_SYSFS_RESP_LOOKUP 0x37 */
|
||||
#define SCD_MSG_SYSFS_REQ_UNLINK 0x38
|
||||
/* #define SCD_MSG_SYSFS_RESP_UNLINK 0x39 */
|
||||
#define SCD_MSG_SYSFS_REQ_SHOW 0x3a
|
||||
#define SCD_MSG_SYSFS_RESP_SHOW 0x3b
|
||||
#define SCD_MSG_SYSFS_REQ_STORE 0x3c
|
||||
#define SCD_MSG_SYSFS_RESP_STORE 0x3d
|
||||
#define SCD_MSG_SYSFS_REQ_RELEASE 0x3e
|
||||
#define SCD_MSG_SYSFS_RESP_RELEASE 0x3f
|
||||
#define SCD_MSG_SYSFS_REQ_SETUP 0x40
|
||||
#define SCD_MSG_SYSFS_RESP_SETUP 0x41
|
||||
/* #define SCD_MSG_SYSFS_REQ_CLEANUP 0x42 */
|
||||
/* #define SCD_MSG_SYSFS_RESP_CLEANUP 0x43 */
|
||||
#define SCD_MSG_PROCFS_TID_CREATE 0x44
|
||||
#define SCD_MSG_PROCFS_TID_DELETE 0x45
|
||||
|
||||
/* Cloning flags. */
|
||||
# define CSIGNAL 0x000000ff /* Signal mask to be sent at exit. */
|
||||
@ -94,14 +117,6 @@ struct user_desc {
|
||||
unsigned int useable:1;
|
||||
unsigned int lm:1;
|
||||
};
|
||||
struct ikc_scd_packet {
|
||||
int msg;
|
||||
int ref;
|
||||
int osnum;
|
||||
int pid;
|
||||
int err;
|
||||
unsigned long arg;
|
||||
};
|
||||
|
||||
struct program_image_section {
|
||||
unsigned long vaddr;
|
||||
@ -143,6 +158,9 @@ struct program_load_desc {
|
||||
int stack_prot;
|
||||
int pgid;
|
||||
int cred[8];
|
||||
int reloc;
|
||||
char enable_vdso;
|
||||
char padding[7];
|
||||
unsigned long entry;
|
||||
unsigned long user_start;
|
||||
unsigned long user_end;
|
||||
@ -171,13 +189,58 @@ struct ikc_scd_init_param {
|
||||
};
|
||||
|
||||
struct syscall_request {
|
||||
/* TID of requesting thread */
|
||||
int rtid;
|
||||
/*
|
||||
* TID of target thread. Remote page fault response needs to designate the
|
||||
* thread that must serve the request, 0 indicates any thread from the pool
|
||||
*/
|
||||
int ttid;
|
||||
unsigned long valid;
|
||||
unsigned long number;
|
||||
unsigned long args[6];
|
||||
};
|
||||
|
||||
struct ikc_scd_packet {
|
||||
int msg;
|
||||
int err;
|
||||
union {
|
||||
/* for traditional SCD_MSG_* */
|
||||
struct {
|
||||
int ref;
|
||||
int osnum;
|
||||
int pid;
|
||||
unsigned long arg;
|
||||
struct syscall_request req;
|
||||
unsigned long resp_pa;
|
||||
};
|
||||
|
||||
/* for SCD_MSG_SYSFS_* */
|
||||
struct {
|
||||
long sysfs_arg1;
|
||||
long sysfs_arg2;
|
||||
long sysfs_arg3;
|
||||
};
|
||||
|
||||
/* SCD_MSG_SCHEDULE_THREAD */
|
||||
struct {
|
||||
int ttid;
|
||||
};
|
||||
};
|
||||
char padding[12];
|
||||
};
|
||||
|
||||
#define IHK_SCD_REQ_THREAD_SPINNING 0
|
||||
#define IHK_SCD_REQ_THREAD_TO_BE_WOKEN 1
|
||||
#define IHK_SCD_REQ_THREAD_DESCHEDULED 2
|
||||
|
||||
struct syscall_response {
|
||||
/* TID of the thread that requested the service */
|
||||
int ttid;
|
||||
/* TID of the mcexec thread that is serving the request */
|
||||
int stid;
|
||||
unsigned long status;
|
||||
unsigned long req_thread_status;
|
||||
long ret;
|
||||
unsigned long fault_address;
|
||||
unsigned long fault_reason;
|
||||
@ -278,6 +341,7 @@ struct procfs_read {
|
||||
int ret; /* read bytes (answer) */
|
||||
int status; /* non-zero if done (answer) */
|
||||
int newcpu; /* migrated new cpu (answer) */
|
||||
int readwrite; /* 0:read, 1:write */
|
||||
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
|
||||
};
|
||||
|
||||
@ -287,6 +351,29 @@ struct procfs_file {
|
||||
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
|
||||
};
|
||||
|
||||
#define RUSAGE_SELF 0
|
||||
#define RUSAGE_CHILDREN -1
|
||||
#define RUSAGE_THREAD 1
|
||||
|
||||
struct rusage {
|
||||
struct timeval ru_utime;
|
||||
struct timeval ru_stime;
|
||||
long ru_maxrss;
|
||||
long ru_ixrss;
|
||||
long ru_idrss;
|
||||
long ru_isrss;
|
||||
long ru_minflt;
|
||||
long ru_majflt;
|
||||
long ru_nswap;
|
||||
long ru_inblock;
|
||||
long ru_oublock;
|
||||
long ru_msgsnd;
|
||||
long ru_msgrcv;
|
||||
long ru_nsignals;
|
||||
long ru_nvcsw;
|
||||
long ru_nivcsw;
|
||||
};
|
||||
|
||||
extern void terminate(int, int);
|
||||
|
||||
struct tod_data_s {
|
||||
@ -298,4 +385,50 @@ struct tod_data_s {
|
||||
};
|
||||
extern struct tod_data_s tod_data; /* residing in arch-dependent file */
|
||||
|
||||
void reset_cputime();
|
||||
void set_cputime(int mode);
|
||||
intptr_t do_mmap(intptr_t addr0, size_t len0, int prot, int flags, int fd,
|
||||
off_t off0);
|
||||
void clear_host_pte(uintptr_t addr, size_t len);
|
||||
typedef int32_t key_t;
|
||||
int do_shmget(key_t key, size_t size, int shmflg);
|
||||
struct process_vm;
|
||||
int arch_map_vdso(struct process_vm *vm); /* arch dependent */
|
||||
int arch_setup_vdso(void);
|
||||
|
||||
#define VDSO_MAXPAGES 2
|
||||
struct vdso {
|
||||
long busy;
|
||||
int vdso_npages;
|
||||
char vvar_is_global;
|
||||
char hpet_is_global;
|
||||
char pvti_is_global;
|
||||
char padding;
|
||||
long vdso_physlist[VDSO_MAXPAGES];
|
||||
void *vvar_virt;
|
||||
long vvar_phys;
|
||||
void *hpet_virt;
|
||||
long hpet_phys;
|
||||
void *pvti_virt;
|
||||
long pvti_phys;
|
||||
};
|
||||
|
||||
struct cpu_mapping {
|
||||
int cpu_number;
|
||||
int hw_id;
|
||||
};
|
||||
|
||||
struct get_cpu_mapping_req {
|
||||
int busy; /* INOUT: */
|
||||
int error; /* OUT: */
|
||||
long buf_rpa; /* OUT: physical address of struct cpu_mapping */
|
||||
int buf_elems; /* OUT: # of elements of buf */
|
||||
int padding;
|
||||
|
||||
/* work for mcctrl */
|
||||
#if 0
|
||||
wait_queue_head_t wq;
|
||||
#endif
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
71
kernel/include/sysfs.h
Normal file
71
kernel/include/sysfs.h
Normal file
@ -0,0 +1,71 @@
|
||||
/**
|
||||
* \file sysfs.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* sysfs framework API definitions
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef MCKERNEL_SYSFS_H
|
||||
#define MCKERNEL_SYSFS_H
|
||||
|
||||
#define SYSFS_PATH_MAX 1024
|
||||
|
||||
/* for sysfs_unlinkf() */
|
||||
#define SYSFS_UNLINK_KEEP_ANCESTOR 0x01
|
||||
|
||||
|
||||
struct sysfs_ops {
|
||||
ssize_t (*show)(struct sysfs_ops *ops, void *instance, void *buf,
|
||||
size_t bufsize);
|
||||
ssize_t (*store)(struct sysfs_ops *ops, void *instance, void *buf,
|
||||
size_t bufsize);
|
||||
void (*release)(struct sysfs_ops *ops, void *instance);
|
||||
};
|
||||
|
||||
struct sysfs_handle {
|
||||
long handle;
|
||||
};
|
||||
typedef struct sysfs_handle sysfs_handle_t;
|
||||
|
||||
struct sysfs_bitmap_param {
|
||||
int nbits;
|
||||
int padding;
|
||||
void *ptr;
|
||||
};
|
||||
|
||||
#define SYSFS_SPECIAL_OPS_MIN ((void *)1)
|
||||
#define SYSFS_SPECIAL_OPS_MAX ((void *)1000)
|
||||
|
||||
#define SYSFS_SNOOPING_OPS_d32 ((void *)1)
|
||||
#define SYSFS_SNOOPING_OPS_d64 ((void *)2)
|
||||
#define SYSFS_SNOOPING_OPS_u32 ((void *)3)
|
||||
#define SYSFS_SNOOPING_OPS_u64 ((void *)4)
|
||||
#define SYSFS_SNOOPING_OPS_s ((void *)5)
|
||||
#define SYSFS_SNOOPING_OPS_pbl ((void *)6)
|
||||
#define SYSFS_SNOOPING_OPS_pb ((void *)7)
|
||||
#define SYSFS_SNOOPING_OPS_u32K ((void *)8)
|
||||
|
||||
static inline int is_special_sysfs_ops(void *ops)
|
||||
{
|
||||
return (((long)SYSFS_SPECIAL_OPS_MIN <= (long)ops)
|
||||
&& ((long)ops <= (long)SYSFS_SPECIAL_OPS_MAX));
|
||||
}
|
||||
|
||||
extern int sysfs_createf(struct sysfs_ops *ops, void *instance, int mode,
|
||||
const char *fmt, ...);
|
||||
extern int sysfs_mkdirf(sysfs_handle_t *dirhp, const char *fmt, ...);
|
||||
extern int sysfs_symlinkf(sysfs_handle_t targeth, const char *fmt, ...);
|
||||
extern int sysfs_lookupf(sysfs_handle_t *objhp, const char *fmt, ...);
|
||||
extern int sysfs_unlinkf(int flags, const char *fmt, ...);
|
||||
|
||||
extern void sysfs_init(void);
|
||||
struct ihk_ikc_channel_desc;
|
||||
extern void sysfss_packet_handler(struct ihk_ikc_channel_desc *ch, int msg,
|
||||
int error, long arg1, long arg2, long arg3);
|
||||
|
||||
#endif /* MCKERNEL_SYSFS_H */
|
||||
88
kernel/include/sysfs_msg.h
Normal file
88
kernel/include/sysfs_msg.h
Normal file
@ -0,0 +1,88 @@
|
||||
/**
|
||||
* \file sysfs_msg.h
|
||||
* License details are found in the file LICENSE.
|
||||
* \brief
|
||||
* message declarations for sysfs framework
|
||||
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
|
||||
* Copyright (C) 2015 RIKEN AICS
|
||||
*/
|
||||
/*
|
||||
* HISTORY:
|
||||
*/
|
||||
|
||||
#ifndef MCKERNEL_SYSFS_MSG_H
|
||||
#define MCKERNEL_SYSFS_MSG_H
|
||||
|
||||
#define SYSFS_PATH_MAX 1024
|
||||
|
||||
struct sysfs_req_create_param {
|
||||
int mode;
|
||||
int error;
|
||||
long client_ops;
|
||||
long client_instance;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_create_param */
|
||||
|
||||
#define SYSFS_SPECIAL_OPS_MIN ((void *)1)
|
||||
#define SYSFS_SPECIAL_OPS_MAX ((void *)1000)
|
||||
|
||||
#define SYSFS_SNOOPING_OPS_d32 ((void *)1)
|
||||
#define SYSFS_SNOOPING_OPS_d64 ((void *)2)
|
||||
#define SYSFS_SNOOPING_OPS_u32 ((void *)3)
|
||||
#define SYSFS_SNOOPING_OPS_u64 ((void *)4)
|
||||
#define SYSFS_SNOOPING_OPS_s ((void *)5)
|
||||
#define SYSFS_SNOOPING_OPS_pbl ((void *)6)
|
||||
#define SYSFS_SNOOPING_OPS_pb ((void *)7)
|
||||
#define SYSFS_SNOOPING_OPS_u32K ((void *)8)
|
||||
|
||||
struct sysfs_req_mkdir_param {
|
||||
int error;
|
||||
int padding;
|
||||
long handle;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding2;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_mkdir_param */
|
||||
|
||||
struct sysfs_req_symlink_param {
|
||||
int error;
|
||||
int padding;
|
||||
long target;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding2;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_symlink_param */
|
||||
|
||||
struct sysfs_req_lookup_param {
|
||||
int error;
|
||||
int padding;
|
||||
long handle;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding2;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_lookup_param */
|
||||
|
||||
/* for sysfs_req_unlink_param.flags */
|
||||
#define SYSFS_UNLINK_KEEP_ANCESTOR 0x01
|
||||
|
||||
struct sysfs_req_unlink_param {
|
||||
int flags;
|
||||
int error;
|
||||
char path[SYSFS_PATH_MAX];
|
||||
int padding;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_unlink_param */
|
||||
|
||||
struct sysfs_req_setup_param {
|
||||
int error;
|
||||
int padding;
|
||||
long buf_rpa;
|
||||
long bufsize;
|
||||
char padding3[SYSFS_PATH_MAX];
|
||||
int padding2;
|
||||
int busy;
|
||||
}; /* struct sysfs_req_setup_param */
|
||||
|
||||
#endif /* MCKERNEL_SYSFS_MSG_H */
|
||||
@ -20,6 +20,10 @@
|
||||
#define __TIME_H
|
||||
|
||||
#define NS_PER_SEC 1000000000UL
|
||||
#define CLOCK_REALTIME 0
|
||||
#define CLOCK_MONOTONIC 1
|
||||
#define CLOCK_PROCESS_CPUTIME_ID 2
|
||||
#define CLOCK_THREAD_CPUTIME_ID 3
|
||||
|
||||
typedef long int __time_t;
|
||||
|
||||
@ -49,5 +53,72 @@ struct timezone
|
||||
int tz_dsttime; /* Nonzero if DST is ever in effect. */
|
||||
};
|
||||
|
||||
#define ITIMER_REAL 0
|
||||
#define ITIMER_VIRTUAL 1
|
||||
#define ITIMER_PROF 2
|
||||
|
||||
struct itimerval {
|
||||
struct timeval it_interval;
|
||||
struct timeval it_value;
|
||||
};
|
||||
|
||||
static inline void
|
||||
ts_add(struct timespec *ats, const struct timespec *bts)
|
||||
{
|
||||
ats->tv_sec += bts->tv_sec;
|
||||
ats->tv_nsec += bts->tv_nsec;
|
||||
while(ats->tv_nsec >= 1000000000){
|
||||
ats->tv_sec++;
|
||||
ats->tv_nsec -= 1000000000;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
ts_sub(struct timespec *ats, const struct timespec *bts)
|
||||
{
|
||||
ats->tv_sec -= bts->tv_sec;
|
||||
ats->tv_nsec -= bts->tv_nsec;
|
||||
while(ats->tv_nsec < 0){
|
||||
ats->tv_sec--;
|
||||
ats->tv_nsec += 1000000000;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
tv_add(struct timeval *ats, const struct timeval *bts)
|
||||
{
|
||||
ats->tv_sec += bts->tv_sec;
|
||||
ats->tv_usec += bts->tv_usec;
|
||||
while(ats->tv_usec >= 1000000){
|
||||
ats->tv_sec++;
|
||||
ats->tv_usec -= 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
tv_sub(struct timeval *ats, const struct timeval *bts)
|
||||
{
|
||||
ats->tv_sec -= bts->tv_sec;
|
||||
ats->tv_usec -= bts->tv_usec;
|
||||
while(ats->tv_usec < 0){
|
||||
ats->tv_sec--;
|
||||
ats->tv_usec += 1000000;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void
|
||||
tv_to_ts(struct timespec *ats, const struct timeval *bts)
|
||||
{
|
||||
ats->tv_sec = bts->tv_sec;
|
||||
ats->tv_nsec = bts->tv_usec * 1000;
|
||||
}
|
||||
|
||||
static inline void
|
||||
ts_to_tv(struct timeval *ats, const struct timespec *bts)
|
||||
{
|
||||
ats->tv_sec = bts->tv_sec;
|
||||
ats->tv_usec = bts->tv_nsec / 1000;
|
||||
}
|
||||
|
||||
#endif // __TIME_H
|
||||
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
#include <init.h>
|
||||
#include <cls.h>
|
||||
#include <syscall.h>
|
||||
#include <sysfs.h>
|
||||
|
||||
//#define IOCTL_FUNC_EXTENSION
|
||||
#ifdef IOCTL_FUNC_EXTENSION
|
||||
@ -207,6 +208,7 @@ static void time_init(void)
|
||||
{
|
||||
unsigned long tv_sec, tv_nsec;
|
||||
unsigned long ns_per_kclock;
|
||||
unsigned long tsc;
|
||||
|
||||
ihk_mc_get_boot_time(&tv_sec, &tv_nsec);
|
||||
ns_per_kclock = ihk_mc_get_ns_per_tsc();
|
||||
@ -216,6 +218,15 @@ static void time_init(void)
|
||||
|
||||
if (ns_per_kclock) {
|
||||
tod_data.clocks_per_sec = (1000L * NS_PER_SEC) / ns_per_kclock;
|
||||
|
||||
tsc = rdtsc();
|
||||
tod_data.origin.tv_sec -= tsc / tod_data.clocks_per_sec;
|
||||
tod_data.origin.tv_nsec -= NS_PER_SEC * (tsc % tod_data.clocks_per_sec)
|
||||
/ tod_data.clocks_per_sec;
|
||||
if (tod_data.origin.tv_nsec < 0) {
|
||||
--tod_data.origin.tv_sec;
|
||||
tod_data.origin.tv_nsec += NS_PER_SEC;
|
||||
}
|
||||
}
|
||||
|
||||
if (!ns_per_kclock) {
|
||||
@ -250,9 +261,70 @@ static void rest_init(void)
|
||||
sched_init();
|
||||
}
|
||||
|
||||
static void setup_remote_snooping_samples(void)
|
||||
{
|
||||
static long lvalue = 0xf123456789abcde0;
|
||||
static char *svalue = "string(remote)";
|
||||
int error;
|
||||
struct sysfs_bitmap_param param;
|
||||
|
||||
error = sysfs_createf(SYSFS_SNOOPING_OPS_d32, &lvalue, 0444, "/sys/test/remote/d32");
|
||||
if (error) {
|
||||
panic("setup_remote_snooping_samples: d32");
|
||||
}
|
||||
|
||||
error = sysfs_createf(SYSFS_SNOOPING_OPS_d64, &lvalue, 0444, "/sys/test/remote/d64");
|
||||
if (error) {
|
||||
panic("setup_remote_snooping_samples: d64");
|
||||
}
|
||||
|
||||
error = sysfs_createf(SYSFS_SNOOPING_OPS_u32, &lvalue, 0444, "/sys/test/remote/u32");
|
||||
if (error) {
|
||||
panic("setup_remote_snooping_samples: u32");
|
||||
}
|
||||
|
||||
error = sysfs_createf(SYSFS_SNOOPING_OPS_u64, &lvalue, 0444, "/sys/test/remote/u64");
|
||||
if (error) {
|
||||
panic("setup_remote_snooping_samples: u64");
|
||||
}
|
||||
|
||||
error = sysfs_createf(SYSFS_SNOOPING_OPS_s, svalue, 0444, "/sys/test/remote/s");
|
||||
if (error) {
|
||||
panic("setup_remote_snooping_samples: s");
|
||||
}
|
||||
|
||||
param.nbits = 40;
|
||||
param.ptr = &lvalue;
|
||||
|
||||
error = sysfs_createf(SYSFS_SNOOPING_OPS_pbl, ¶m, 0444, "/sys/test/remote/pbl");
|
||||
if (error) {
|
||||
panic("setup_remote_snooping_samples: pbl");
|
||||
}
|
||||
|
||||
param.nbits = 40;
|
||||
param.ptr = &lvalue;
|
||||
|
||||
error = sysfs_createf(SYSFS_SNOOPING_OPS_pb, ¶m, 0444, "/sys/test/remote/pb");
|
||||
if (error) {
|
||||
panic("setup_remote_snooping_samples: pb");
|
||||
}
|
||||
|
||||
error = sysfs_createf(SYSFS_SNOOPING_OPS_u32K, &lvalue, 0444, "/sys/test/remote/u32K");
|
||||
if (error) {
|
||||
panic("setup_remote_snooping_samples: u32K");
|
||||
}
|
||||
|
||||
return;
|
||||
} /* setup_remote_snooping_samples() */
|
||||
|
||||
static void populate_sysfs(void)
|
||||
{
|
||||
cpu_sysfs_setup();
|
||||
setup_remote_snooping_samples();
|
||||
} /* populate_sysfs() */
|
||||
|
||||
int host_ikc_inited = 0;
|
||||
extern int num_processors;
|
||||
extern void zero_tsc(void);
|
||||
|
||||
static void post_init(void)
|
||||
{
|
||||
@ -271,13 +343,12 @@ static void post_init(void)
|
||||
ihk_mc_spinlock_init(&syscall_lock);
|
||||
}
|
||||
|
||||
/* Zero TSC.
|
||||
* All AP cores are wait spinning for ap_start() and they will zero
|
||||
* their TSC immediatly. */
|
||||
zero_tsc();
|
||||
arch_setup_vdso();
|
||||
arch_start_pvclock();
|
||||
ap_start();
|
||||
|
||||
create_os_procfs_files();
|
||||
sysfs_init();
|
||||
populate_sysfs();
|
||||
}
|
||||
#ifdef DCFA_RUN
|
||||
extern void user_main();
|
||||
@ -290,9 +361,17 @@ extern void ibmic_cmd_init(void);
|
||||
|
||||
int main(void)
|
||||
{
|
||||
kmsg_init();
|
||||
char *ptr;
|
||||
int mode = 0;
|
||||
|
||||
kputs("MCK started.\n");
|
||||
ptr = find_command_line("ksyslogd=");
|
||||
if (ptr) {
|
||||
mode = ptr[9] - 0x30;
|
||||
if (mode < 0 || mode > 2) mode = 0;
|
||||
}
|
||||
kmsg_init(mode);
|
||||
|
||||
kputs("IHK/McKernel started.\n");
|
||||
|
||||
arch_init();
|
||||
|
||||
@ -314,7 +393,7 @@ int main(void)
|
||||
|
||||
futex_init();
|
||||
|
||||
kputs("MCK/IHK booted.\n");
|
||||
kputs("IHK/McKernel booted.\n");
|
||||
|
||||
#ifdef DCFA_KMOD
|
||||
mc_cmd_client_init();
|
||||
|
||||
@ -17,6 +17,7 @@
|
||||
#include <ihk/debug.h>
|
||||
#include <ihk/ikc.h>
|
||||
#include <ikc/master.h>
|
||||
#include <arch/cpu.h>
|
||||
|
||||
//#define DEBUG_LISTENERS
|
||||
|
||||
@ -28,16 +29,6 @@
|
||||
#define ekprintf(...) kprintf(__VA_ARGS__)
|
||||
#endif
|
||||
|
||||
static unsigned long read_tsc(void)
|
||||
{
|
||||
unsigned int low, high;
|
||||
|
||||
asm volatile("rdtsc" : "=a"(low), "=d"(high));
|
||||
|
||||
return (low | ((unsigned long)high << 32));
|
||||
}
|
||||
|
||||
|
||||
void testmem(void *v, unsigned long size)
|
||||
{
|
||||
unsigned long i, st, ed, s = 0;
|
||||
|
||||
823
kernel/mem.c
823
kernel/mem.c
@ -52,7 +52,8 @@ static struct ihk_page_allocator_desc *pa_allocator;
|
||||
static unsigned long pa_start, pa_end;
|
||||
static struct page *pa_pages;
|
||||
|
||||
extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt);
|
||||
extern void unhandled_page_fault(struct thread *, void *, void *);
|
||||
extern int interrupt_from_user(void *);
|
||||
|
||||
struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE];
|
||||
|
||||
@ -155,13 +156,17 @@ void sbox_write(int offset, unsigned int value);
|
||||
|
||||
static void query_free_mem_interrupt_handler(void *priv)
|
||||
{
|
||||
#ifdef ATTACHED_MIC
|
||||
dkprintf("query free mem handler!\n");
|
||||
|
||||
int pages = ihk_pagealloc_query_free(pa_allocator);
|
||||
|
||||
dkprintf("free pages: %d\n", pages);
|
||||
kprintf("McKernel free pages: %d\n", pages);
|
||||
|
||||
if (find_command_line("memdebug")) {
|
||||
extern void kmalloc_memcheck(void);
|
||||
|
||||
kmalloc_memcheck();
|
||||
}
|
||||
|
||||
#ifdef ATTACHED_MIC
|
||||
sbox_write(SBOX_SCRATCH0, pages);
|
||||
sbox_write(SBOX_SCRATCH1, 1);
|
||||
#endif
|
||||
@ -209,61 +214,6 @@ void coredump(struct thread *thread, void *regs)
|
||||
freecore(&coretable);
|
||||
}
|
||||
|
||||
static void unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
|
||||
{
|
||||
const uintptr_t address = (uintptr_t)fault_addr;
|
||||
struct process_vm *vm = thread->vm;
|
||||
struct vm_range *range;
|
||||
char found;
|
||||
unsigned long irqflags;
|
||||
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
|
||||
|
||||
irqflags = kprintf_lock();
|
||||
dkprintf("[%d] Page fault for 0x%lX\n",
|
||||
ihk_mc_get_processor_id(), address);
|
||||
dkprintf("%s for %s access in %s mode (reserved bit %s set), "
|
||||
"it %s an instruction fetch\n",
|
||||
(error & PF_PROT ? "protection fault" : "no page found"),
|
||||
(error & PF_WRITE ? "write" : "read"),
|
||||
(error & PF_USER ? "user" : "kernel"),
|
||||
(error & PF_RSVD ? "was" : "wasn't"),
|
||||
(error & PF_INSTR ? "was" : "wasn't"));
|
||||
|
||||
found = 0;
|
||||
list_for_each_entry(range, &vm->vm_range_list, list) {
|
||||
if (range->start <= address && range->end > address) {
|
||||
found = 1;
|
||||
dkprintf("address is in range, flag: 0x%X! \n",
|
||||
range->flag);
|
||||
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
dkprintf("address is out of range! \n");
|
||||
}
|
||||
|
||||
kprintf_unlock(irqflags);
|
||||
|
||||
/* TODO */
|
||||
ihk_mc_debug_show_interrupt_context(regs);
|
||||
|
||||
|
||||
//dkprintf("now dump a core file\n");
|
||||
//coredump(proc, regs);
|
||||
|
||||
#ifdef DEBUG_PRINT_MEM
|
||||
{
|
||||
uint64_t *sp = (void *)REGS_GET_STACK_POINTER(regs);
|
||||
|
||||
kprintf("*rsp:%lx,*rsp+8:%lx,*rsp+16:%lx,*rsp+24:%lx,\n",
|
||||
sp[0], sp[1], sp[2], sp[3]);
|
||||
}
|
||||
#endif
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
void remote_flush_tlb_cpumask(struct process_vm *vm,
|
||||
unsigned long addr, int cpu_id)
|
||||
{
|
||||
@ -285,9 +235,9 @@ void remote_flush_tlb_cpumask(struct process_vm *vm,
|
||||
|
||||
/* Take a copy of the cpu set so that we don't hold the lock
|
||||
* all the way while interrupting other cores */
|
||||
ihk_mc_spinlock_lock_noirq(&vm->cpu_set_lock);
|
||||
memcpy(&_cpu_set, &vm->cpu_set, sizeof(cpu_set_t));
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->cpu_set_lock);
|
||||
ihk_mc_spinlock_lock_noirq(&vm->address_space->cpu_set_lock);
|
||||
memcpy(&_cpu_set, &vm->address_space->cpu_set, sizeof(cpu_set_t));
|
||||
ihk_mc_spinlock_unlock_noirq(&vm->address_space->cpu_set_lock);
|
||||
|
||||
dkprintf("trying to aquire flush_entry->lock flush_ind: %d\n", flush_ind);
|
||||
|
||||
@ -319,6 +269,13 @@ void remote_flush_tlb_cpumask(struct process_vm *vm,
|
||||
unsigned long tsc;
|
||||
tsc = rdtsc() + 12884901888; /* 1.2GHz =>10 sec */
|
||||
#endif
|
||||
if (flush_entry->addr) {
|
||||
flush_tlb_single(flush_entry->addr & PAGE_MASK);
|
||||
}
|
||||
/* Zero address denotes full TLB flush */
|
||||
else {
|
||||
flush_tlb();
|
||||
}
|
||||
|
||||
/* Wait for all cores */
|
||||
while (ihk_atomic_read(&flush_entry->pending) != 0) {
|
||||
@ -369,6 +326,7 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
|
||||
struct thread *thread = cpu_local_var(current);
|
||||
int error;
|
||||
|
||||
set_cputime(interrupt_from_user(regs)? 1: 2);
|
||||
dkprintf("[%d]page_fault_handler(%p,%lx,%p)\n",
|
||||
ihk_mc_get_processor_id(), fault_addr, reason, regs);
|
||||
|
||||
@ -388,10 +346,9 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
|
||||
// no return
|
||||
}
|
||||
|
||||
kprintf("[%d]page_fault_handler(%p,%lx,%p):"
|
||||
"fault vm failed. %d, TID: %d\n",
|
||||
ihk_mc_get_processor_id(), fault_addr,
|
||||
reason, regs, error, thread->tid);
|
||||
kprintf("%s fault VM failed for TID: %d, addr: 0x%lx, "
|
||||
"reason: %d, error: %d\n", __FUNCTION__,
|
||||
thread->tid, fault_addr, reason, error);
|
||||
unhandled_page_fault(thread, fault_addr, regs);
|
||||
preempt_enable();
|
||||
memset(&info, '\0', sizeof info);
|
||||
@ -416,7 +373,10 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
|
||||
info._sifields._sigfault.si_addr = fault_addr;
|
||||
set_signal(SIGSEGV, regs, &info);
|
||||
}
|
||||
check_signal(0, regs, 0);
|
||||
if(interrupt_from_user(regs)){
|
||||
cpu_enable_interrupt();
|
||||
check_signal(0, regs, 0);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -427,6 +387,7 @@ out:
|
||||
ihk_mc_get_processor_id(), fault_addr, reason,
|
||||
regs, error);
|
||||
check_need_resched();
|
||||
set_cputime(0);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -474,8 +435,9 @@ static void page_allocator_init(void)
|
||||
|
||||
ihk_mc_reserve_arch_pages(pa_start, pa_end, reserve_pages);
|
||||
|
||||
kprintf("Available pages: %ld pages\n",
|
||||
ihk_pagealloc_count(pa_allocator));
|
||||
kprintf("Available memory: %ld bytes in %ld pages\n",
|
||||
(ihk_pagealloc_count(pa_allocator) * PAGE_SIZE),
|
||||
ihk_pagealloc_count(pa_allocator));
|
||||
|
||||
/* Notify the ihk to use my page allocator */
|
||||
ihk_mc_set_page_allocator(&allocator);
|
||||
@ -556,6 +518,9 @@ static void page_init(void)
|
||||
|
||||
static char *memdebug = NULL;
|
||||
|
||||
static void *___kmalloc(int size, enum ihk_mc_ap_flag flag);
|
||||
static void ___kfree(void *ptr);
|
||||
|
||||
void register_kmalloc(void)
|
||||
{
|
||||
if(memdebug){
|
||||
@ -685,60 +650,100 @@ void mem_init(void)
|
||||
}
|
||||
}
|
||||
|
||||
struct location {
|
||||
struct location *next;
|
||||
int line;
|
||||
int cnt;
|
||||
char file[0];
|
||||
};
|
||||
#define KMALLOC_TRACK_HASH_SHIFT (8)
|
||||
#define KMALLOC_TRACK_HASH_SIZE (1 << KMALLOC_TRACK_HASH_SHIFT)
|
||||
#define KMALLOC_TRACK_HASH_MASK (KMALLOC_TRACK_HASH_SIZE - 1)
|
||||
|
||||
struct alloc {
|
||||
struct alloc *next;
|
||||
struct malloc_header *p;
|
||||
struct location *loc;
|
||||
int size;
|
||||
struct list_head kmalloc_track_hash[KMALLOC_TRACK_HASH_SIZE];
|
||||
ihk_spinlock_t kmalloc_track_hash_locks[KMALLOC_TRACK_HASH_SIZE];
|
||||
|
||||
struct list_head kmalloc_addr_hash[KMALLOC_TRACK_HASH_SIZE];
|
||||
ihk_spinlock_t kmalloc_addr_hash_locks[KMALLOC_TRACK_HASH_SIZE];
|
||||
|
||||
int kmalloc_track_initialized = 0;
|
||||
int kmalloc_runcount = 0;
|
||||
|
||||
struct kmalloc_track_addr_entry {
|
||||
void *addr;
|
||||
int runcount;
|
||||
struct list_head list; /* track_entry's list */
|
||||
struct kmalloc_track_entry *entry;
|
||||
struct list_head hash; /* address hash */
|
||||
};
|
||||
|
||||
#define HASHNUM 129
|
||||
struct kmalloc_track_entry {
|
||||
char *file;
|
||||
int line;
|
||||
int size;
|
||||
ihk_atomic_t alloc_count;
|
||||
struct list_head hash;
|
||||
struct list_head addr_list;
|
||||
ihk_spinlock_t addr_list_lock;
|
||||
};
|
||||
|
||||
static struct alloc *allochash[HASHNUM];
|
||||
static struct location *lochash[HASHNUM];
|
||||
static ihk_spinlock_t alloclock;
|
||||
int runcount;
|
||||
static unsigned char *page;
|
||||
static int space;
|
||||
|
||||
static void *dalloc(unsigned long size)
|
||||
void kmalloc_init(void)
|
||||
{
|
||||
void *r;
|
||||
static int pos = 0;
|
||||
unsigned long irqstate;
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
|
||||
irqstate = ihk_mc_spinlock_lock(&alloclock);
|
||||
size = (size + 7) & 0xfffffffffffffff8L;
|
||||
if (pos + size > space) {
|
||||
page = allocate_pages(1, IHK_MC_AP_NOWAIT);
|
||||
space = 4096;
|
||||
pos = 0;
|
||||
register_kmalloc();
|
||||
|
||||
INIT_LIST_HEAD(&v->free_list);
|
||||
INIT_LIST_HEAD(&v->remote_free_list);
|
||||
ihk_mc_spinlock_init(&v->remote_free_list_lock);
|
||||
|
||||
v->kmalloc_initialized = 1;
|
||||
|
||||
if (!kmalloc_track_initialized) {
|
||||
int i;
|
||||
|
||||
memdebug = find_command_line("memdebug");
|
||||
|
||||
kmalloc_track_initialized = 1;
|
||||
for (i = 0; i < KMALLOC_TRACK_HASH_SIZE; ++i) {
|
||||
ihk_mc_spinlock_init(&kmalloc_track_hash_locks[i]);
|
||||
INIT_LIST_HEAD(&kmalloc_track_hash[i]);
|
||||
ihk_mc_spinlock_init(&kmalloc_addr_hash_locks[i]);
|
||||
INIT_LIST_HEAD(&kmalloc_addr_hash[i]);
|
||||
}
|
||||
}
|
||||
r = page + pos;
|
||||
pos += size;
|
||||
ihk_mc_spinlock_unlock(&alloclock, irqstate);
|
||||
|
||||
return r;
|
||||
}
|
||||
|
||||
/* NOTE: Hash lock must be held */
|
||||
struct kmalloc_track_entry *__kmalloc_track_find_entry(
|
||||
int size, char *file, int line)
|
||||
{
|
||||
struct kmalloc_track_entry *entry_iter, *entry = NULL;
|
||||
int hash = (strlen(file) + line + size) & KMALLOC_TRACK_HASH_MASK;
|
||||
|
||||
list_for_each_entry(entry_iter, &kmalloc_track_hash[hash], hash) {
|
||||
if (!strcmp(entry_iter->file, file) &&
|
||||
entry_iter->size == size &&
|
||||
entry_iter->line == line) {
|
||||
entry = entry_iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (entry) {
|
||||
dkprintf("%s found entry %s:%d size: %d\n", __FUNCTION__,
|
||||
file, line, size);
|
||||
}
|
||||
else {
|
||||
dkprintf("%s couldn't find entry %s:%d size: %d\n", __FUNCTION__,
|
||||
file, line, size);
|
||||
}
|
||||
|
||||
return entry;
|
||||
}
|
||||
|
||||
/* Top level routines called from macro */
|
||||
void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line)
|
||||
{
|
||||
char *r = ___kmalloc(size, flag);
|
||||
struct malloc_header *h;
|
||||
unsigned long hash;
|
||||
char *t;
|
||||
struct location *lp;
|
||||
struct alloc *ap;
|
||||
unsigned long alcsize;
|
||||
unsigned long chksize;
|
||||
unsigned long irqflags;
|
||||
struct kmalloc_track_entry *entry;
|
||||
struct kmalloc_track_addr_entry *addr_entry;
|
||||
int hash, addr_hash;
|
||||
void *r = ___kmalloc(size, flag);
|
||||
|
||||
if (!memdebug)
|
||||
return r;
|
||||
@ -746,177 +751,177 @@ void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line)
|
||||
if (!r)
|
||||
return r;
|
||||
|
||||
h = ((struct malloc_header *)r) - 1;
|
||||
alcsize = h->size * sizeof(struct malloc_header);
|
||||
chksize = alcsize - size;
|
||||
memset(r + size, '\x5a', chksize);
|
||||
hash = (strlen(file) + line + size) & KMALLOC_TRACK_HASH_MASK;
|
||||
irqflags = ihk_mc_spinlock_lock(&kmalloc_track_hash_locks[hash]);
|
||||
|
||||
for (hash = 0, t = file; *t; t++) {
|
||||
hash <<= 1;
|
||||
hash += *t;
|
||||
entry = __kmalloc_track_find_entry(size, file, line);
|
||||
|
||||
if (!entry) {
|
||||
entry = ___kmalloc(sizeof(*entry), IHK_MC_AP_NOWAIT);
|
||||
if (!entry) {
|
||||
kprintf("%s: ERROR: allocating tracking entry\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
entry->line = line;
|
||||
entry->size = size;
|
||||
ihk_atomic_set(&entry->alloc_count, 0);
|
||||
ihk_mc_spinlock_init(&entry->addr_list_lock);
|
||||
INIT_LIST_HEAD(&entry->addr_list);
|
||||
|
||||
entry->file = ___kmalloc(strlen(file) + 1, IHK_MC_AP_NOWAIT);
|
||||
if (!entry->file) {
|
||||
kprintf("%s: ERROR: allocating file string\n");
|
||||
___kfree(entry);
|
||||
ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[hash], irqflags);
|
||||
goto out;
|
||||
}
|
||||
|
||||
strcpy(entry->file, file);
|
||||
entry->file[strlen(file)] = 0;
|
||||
list_add(&entry->hash, &kmalloc_track_hash[hash]);
|
||||
dkprintf("%s entry %s:%d size: %d added\n", __FUNCTION__,
|
||||
file, line, size);
|
||||
}
|
||||
hash += line;
|
||||
hash %= HASHNUM;
|
||||
for (lp = lochash[hash]; lp; lp = lp->next)
|
||||
if (lp->line == line &&
|
||||
!strcmp(lp->file, file))
|
||||
break;
|
||||
if (!lp) {
|
||||
lp = dalloc(sizeof(struct location) + strlen(file) + 1);
|
||||
memset(lp, '\0', sizeof(struct location));
|
||||
lp->line = line;
|
||||
strcpy(lp->file, file);
|
||||
do {
|
||||
lp->next = lochash[hash];
|
||||
} while (!compare_and_swap(lochash + hash, (unsigned long)lp->next, (unsigned long)lp));
|
||||
ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[hash], irqflags);
|
||||
|
||||
ihk_atomic_inc(&entry->alloc_count);
|
||||
|
||||
/* Add new addr entry for this allocation entry */
|
||||
addr_entry = ___kmalloc(sizeof(*addr_entry), IHK_MC_AP_NOWAIT);
|
||||
if (!addr_entry) {
|
||||
kprintf("%s: ERROR: allocating addr entry\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
hash = (unsigned long)h % HASHNUM;
|
||||
do {
|
||||
for (ap = allochash[hash]; ap; ap = ap->next)
|
||||
if (!ap->p)
|
||||
break;
|
||||
} while (ap && !compare_and_swap(&ap->p, 0UL, (unsigned long)h));
|
||||
if (!ap) {
|
||||
ap = dalloc(sizeof(struct alloc));
|
||||
memset(ap, '\0', sizeof(struct alloc));
|
||||
ap->p = h;
|
||||
do {
|
||||
ap->next = allochash[hash];
|
||||
} while (!compare_and_swap(allochash + hash, (unsigned long)ap->next, (unsigned long)ap));
|
||||
}
|
||||
addr_entry->addr = r;
|
||||
addr_entry->runcount = kmalloc_runcount;
|
||||
addr_entry->entry = entry;
|
||||
|
||||
ap->loc = lp;
|
||||
ap->size = size;
|
||||
ap->runcount = runcount;
|
||||
irqflags = ihk_mc_spinlock_lock(&entry->addr_list_lock);
|
||||
list_add(&addr_entry->list, &entry->addr_list);
|
||||
ihk_mc_spinlock_unlock(&entry->addr_list_lock, irqflags);
|
||||
|
||||
return r;
|
||||
}
|
||||
/* Add addr entry to address hash */
|
||||
addr_hash = ((unsigned long)r >> 5) & KMALLOC_TRACK_HASH_MASK;
|
||||
irqflags = ihk_mc_spinlock_lock(&kmalloc_addr_hash_locks[addr_hash]);
|
||||
list_add(&addr_entry->hash, &kmalloc_addr_hash[addr_hash]);
|
||||
ihk_mc_spinlock_unlock(&kmalloc_addr_hash_locks[addr_hash], irqflags);
|
||||
|
||||
int _memcheck(void *ptr, char *msg, char *file, int line, int flags)
|
||||
{
|
||||
struct malloc_header *h = ((struct malloc_header *)ptr) - 1;
|
||||
struct malloc_header *next;
|
||||
unsigned long hash = (unsigned long)h % HASHNUM;
|
||||
struct alloc *ap;
|
||||
static unsigned long check = 0x5a5a5a5a5a5a5a5aUL;
|
||||
unsigned long alcsize;
|
||||
unsigned long chksize;
|
||||
|
||||
|
||||
if (h->check != 0x5a5a5a5a) {
|
||||
int i;
|
||||
unsigned long max = 0;
|
||||
unsigned long cur = (unsigned long)h;
|
||||
struct alloc *maxap = NULL;
|
||||
|
||||
for (i = 0; i < HASHNUM; i++)
|
||||
for (ap = allochash[i]; ap; ap = ap->next)
|
||||
if ((unsigned long)ap->p < cur &&
|
||||
(unsigned long)ap->p > max) {
|
||||
max = (unsigned long)ap->p;
|
||||
maxap = ap;
|
||||
}
|
||||
|
||||
kprintf("%s: detect buffer overrun, alc=%s:%d size=%ld h=%p, s=%ld\n", msg, maxap->loc->file, maxap->loc->line, maxap->size, maxap->p, maxap->p->size);
|
||||
kprintf("broken header: h=%p next=%p size=%ld cpu_id=%d\n", h, h->next, h->size, h->cpu_id);
|
||||
}
|
||||
|
||||
for (ap = allochash[hash]; ap; ap = ap->next)
|
||||
if (ap->p == h)
|
||||
break;
|
||||
if (!ap) {
|
||||
if(file)
|
||||
kprintf("%s: address not found, %s:%d p=%p\n", msg, file, line, ptr);
|
||||
else
|
||||
kprintf("%s: address not found p=%p\n", msg, ptr);
|
||||
return 1;
|
||||
}
|
||||
|
||||
alcsize = h->size * sizeof(struct malloc_header);
|
||||
chksize = alcsize - ap->size;
|
||||
if (chksize > 8)
|
||||
chksize = 8;
|
||||
next = (struct malloc_header *)((char *)ptr + alcsize);
|
||||
|
||||
if (next->check != 0x5a5a5a5a ||
|
||||
memcmp((char *)ptr + ap->size, &check, chksize)) {
|
||||
unsigned long buf = 0x5a5a5a5a5a5a5a5aUL;
|
||||
unsigned char *p;
|
||||
unsigned char *q;
|
||||
memcpy(&buf, (char *)ptr + ap->size, chksize);
|
||||
p = (unsigned char *)&(next->check);
|
||||
q = (unsigned char *)&buf;
|
||||
|
||||
if (file)
|
||||
kprintf("%s: broken, %s:%d alc=%s:%d %02x%02x%02x%02x%02x%02x%02x%02x %02x%02x%02x%02x size=%ld\n", msg, file, line, ap->loc->file, ap->loc->line, q[0], q[1], q[2], q[3], q[4], q[5], q[6], q[7], p[0], p[1], p[2], p[3], ap->size);
|
||||
else
|
||||
kprintf("%s: broken, alc=%s:%d %02x%02x%02x%02x%02x%02x%02x%02x %02x%02x%02x%02x size=%ld\n", msg, ap->loc->file, ap->loc->line, q[0], q[1], q[2], q[3], q[4], q[5], q[6], q[7], p[0], p[1], p[2], p[3], ap->size);
|
||||
|
||||
|
||||
if (next->check != 0x5a5a5a5a)
|
||||
kprintf("next->HEADER: next=%p size=%ld cpu_id=%d\n", next->next, next->size, next->cpu_id);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
if(flags & 1){
|
||||
ap->p = NULL;
|
||||
ap->loc = NULL;
|
||||
ap->size = 0;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int memcheckall()
|
||||
{
|
||||
int i;
|
||||
struct alloc *ap;
|
||||
int r = 0;
|
||||
|
||||
for(i = 0; i < HASHNUM; i++)
|
||||
for(ap = allochash[i]; ap; ap = ap->next)
|
||||
if(ap->p)
|
||||
r |= _memcheck(ap->p + 1, "memcheck", NULL, 0, 2);
|
||||
return r;
|
||||
}
|
||||
|
||||
int freecheck(int runcount)
|
||||
{
|
||||
int i;
|
||||
struct alloc *ap;
|
||||
struct location *lp;
|
||||
int r = 0;
|
||||
|
||||
for (i = 0; i < HASHNUM; i++)
|
||||
for (lp = lochash[i]; lp; lp = lp->next)
|
||||
lp->cnt = 0;
|
||||
|
||||
for (i = 0; i < HASHNUM; i++)
|
||||
for (ap = allochash[i]; ap; ap = ap->next)
|
||||
if (ap->p && ap->runcount == runcount) {
|
||||
ap->loc->cnt++;
|
||||
r++;
|
||||
}
|
||||
|
||||
if (r) {
|
||||
kprintf("memory leak?\n");
|
||||
for (i = 0; i < HASHNUM; i++)
|
||||
for (lp = lochash[i]; lp; lp = lp->next)
|
||||
if (lp->cnt)
|
||||
kprintf(" alc=%s:%d cnt=%d\n", lp->file, lp->line, lp->cnt);
|
||||
}
|
||||
dkprintf("%s addr_entry %p added\n", __FUNCTION__, r);
|
||||
|
||||
out:
|
||||
return r;
|
||||
}
|
||||
|
||||
void _kfree(void *ptr, char *file, int line)
|
||||
{
|
||||
if (memdebug)
|
||||
_memcheck(ptr, "KFREE", file, line, 1);
|
||||
unsigned long irqflags;
|
||||
struct kmalloc_track_entry *entry;
|
||||
struct kmalloc_track_addr_entry *addr_entry_iter, *addr_entry = NULL;
|
||||
int hash;
|
||||
|
||||
if (!memdebug) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
hash = ((unsigned long)ptr >> 5) & KMALLOC_TRACK_HASH_MASK;
|
||||
irqflags = ihk_mc_spinlock_lock(&kmalloc_addr_hash_locks[hash]);
|
||||
list_for_each_entry(addr_entry_iter,
|
||||
&kmalloc_addr_hash[hash], hash) {
|
||||
if (addr_entry_iter->addr == ptr) {
|
||||
addr_entry = addr_entry_iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (addr_entry) {
|
||||
list_del(&addr_entry->hash);
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&kmalloc_addr_hash_locks[hash], irqflags);
|
||||
|
||||
if (!addr_entry) {
|
||||
kprintf("%s: ERROR: kfree()ing invalid pointer\n", __FUNCTION__);
|
||||
panic("panic");
|
||||
}
|
||||
|
||||
entry = addr_entry->entry;
|
||||
|
||||
irqflags = ihk_mc_spinlock_lock(&entry->addr_list_lock);
|
||||
list_del(&addr_entry->list);
|
||||
ihk_mc_spinlock_unlock(&entry->addr_list_lock, irqflags);
|
||||
|
||||
dkprintf("%s addr_entry %p removed\n", __FUNCTION__, addr_entry->addr);
|
||||
___kfree(addr_entry);
|
||||
|
||||
/* Do we need to remove tracking entry as well? */
|
||||
if (!ihk_atomic_dec_and_test(&entry->alloc_count)) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
hash = (strlen(entry->file) + entry->line + entry->size) &
|
||||
KMALLOC_TRACK_HASH_MASK;
|
||||
irqflags = ihk_mc_spinlock_lock(&kmalloc_track_hash_locks[hash]);
|
||||
list_del(&entry->hash);
|
||||
ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[hash], irqflags);
|
||||
|
||||
dkprintf("%s entry %s:%d size: %d removed\n", __FUNCTION__,
|
||||
entry->file, entry->line, entry->size);
|
||||
___kfree(entry->file);
|
||||
___kfree(entry);
|
||||
|
||||
out:
|
||||
___kfree(ptr);
|
||||
}
|
||||
|
||||
void kmalloc_memcheck(void)
|
||||
{
|
||||
int i;
|
||||
unsigned long irqflags;
|
||||
struct kmalloc_track_entry *entry = NULL;
|
||||
|
||||
for (i = 0; i < KMALLOC_TRACK_HASH_SIZE; ++i) {
|
||||
irqflags = ihk_mc_spinlock_lock(&kmalloc_track_hash_locks[i]);
|
||||
list_for_each_entry(entry, &kmalloc_track_hash[i], hash) {
|
||||
struct kmalloc_track_addr_entry *addr_entry = NULL;
|
||||
int cnt = 0;
|
||||
|
||||
ihk_mc_spinlock_lock_noirq(&entry->addr_list_lock);
|
||||
list_for_each_entry(addr_entry, &entry->addr_list, list) {
|
||||
|
||||
dkprintf("%s memory leak: %p @ %s:%d size: %d runcount: %d\n",
|
||||
__FUNCTION__,
|
||||
addr_entry->addr,
|
||||
entry->file,
|
||||
entry->line,
|
||||
entry->size,
|
||||
addr_entry->runcount);
|
||||
|
||||
if (kmalloc_runcount != addr_entry->runcount)
|
||||
continue;
|
||||
|
||||
cnt++;
|
||||
}
|
||||
ihk_mc_spinlock_unlock_noirq(&entry->addr_list_lock);
|
||||
|
||||
if (!cnt)
|
||||
continue;
|
||||
|
||||
kprintf("%s memory leak: %s:%d size: %d cnt: %d, runcount: %d\n",
|
||||
__FUNCTION__,
|
||||
entry->file,
|
||||
entry->line,
|
||||
entry->size,
|
||||
cnt,
|
||||
kmalloc_runcount);
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&kmalloc_track_hash_locks[i], irqflags);
|
||||
}
|
||||
|
||||
++kmalloc_runcount;
|
||||
}
|
||||
|
||||
/* Redirection routines registered in alloc structure */
|
||||
void *__kmalloc(int size, enum ihk_mc_ap_flag flag)
|
||||
{
|
||||
return kmalloc(size, flag);
|
||||
@ -927,143 +932,199 @@ void __kfree(void *ptr)
|
||||
kfree(ptr);
|
||||
}
|
||||
|
||||
void kmalloc_init(void)
|
||||
|
||||
static void ___kmalloc_insert_chunk(struct list_head *free_list,
|
||||
struct kmalloc_header *chunk)
|
||||
{
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
struct malloc_header *h = &v->free_list;
|
||||
ihk_mc_spinlock_init(&v->free_list_lock);
|
||||
int i;
|
||||
struct kmalloc_header *chunk_iter, *next_chunk = NULL;
|
||||
|
||||
h->check = 0x5a5a5a5a;
|
||||
h->next = &v->free_list;
|
||||
h->size = 0;
|
||||
|
||||
register_kmalloc();
|
||||
|
||||
memdebug = find_command_line("memdebug");
|
||||
for (i = 0; i < HASHNUM; i++) {
|
||||
allochash[i] = NULL;
|
||||
lochash[i] = NULL;
|
||||
/* Find out where to insert */
|
||||
list_for_each_entry(chunk_iter, free_list, list) {
|
||||
if ((void *)chunk < (void *)chunk_iter) {
|
||||
next_chunk = chunk_iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
page = allocate_pages(16, IHK_MC_AP_NOWAIT);
|
||||
space = 16 * 4096;
|
||||
ihk_mc_spinlock_init(&alloclock);
|
||||
|
||||
/* Add in front of next */
|
||||
if (next_chunk) {
|
||||
list_add_tail(&chunk->list, &next_chunk->list);
|
||||
}
|
||||
/* Add after the head */
|
||||
else {
|
||||
list_add(&chunk->list, free_list);
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
static void ___kmalloc_init_chunk(struct kmalloc_header *h, int size)
|
||||
{
|
||||
h->size = size;
|
||||
h->front_magic = 0x5c5c5c5c;
|
||||
h->end_magic = 0x6d6d6d6d;
|
||||
h->cpu_id = ihk_mc_get_processor_id();
|
||||
}
|
||||
|
||||
static void ___kmalloc_consolidate_list(struct list_head *list)
|
||||
{
|
||||
struct kmalloc_header *chunk_iter, *chunk, *next_chunk;
|
||||
|
||||
reiterate:
|
||||
chunk_iter = NULL;
|
||||
chunk = NULL;
|
||||
|
||||
list_for_each_entry(next_chunk, list, list) {
|
||||
|
||||
if (chunk_iter && (((void *)chunk_iter + sizeof(struct kmalloc_header)
|
||||
+ chunk_iter->size) == (void *)next_chunk)) {
|
||||
chunk = chunk_iter;
|
||||
break;
|
||||
}
|
||||
|
||||
chunk_iter = next_chunk;
|
||||
}
|
||||
|
||||
if (!chunk) {
|
||||
return;
|
||||
}
|
||||
|
||||
chunk->size += (next_chunk->size + sizeof(struct kmalloc_header));
|
||||
list_del(&next_chunk->list);
|
||||
goto reiterate;
|
||||
}
|
||||
|
||||
|
||||
void *___kmalloc(int size, enum ihk_mc_ap_flag flag)
|
||||
void kmalloc_consolidate_free_list(void)
|
||||
{
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
struct malloc_header *h = &v->free_list, *prev, *p;
|
||||
int u, req_page;
|
||||
unsigned long flags;
|
||||
struct kmalloc_header *chunk, *tmp;
|
||||
unsigned long irqflags =
|
||||
ihk_mc_spinlock_lock(&cpu_local_var(remote_free_list_lock));
|
||||
|
||||
if (size >= PAGE_SIZE * 4) {
|
||||
/* Clean up remotely deallocated chunks */
|
||||
list_for_each_entry_safe(chunk, tmp,
|
||||
&cpu_local_var(remote_free_list), list) {
|
||||
|
||||
list_del(&chunk->list);
|
||||
___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk);
|
||||
}
|
||||
|
||||
/* Free list lock ensures IRQs are disabled */
|
||||
___kmalloc_consolidate_list(&cpu_local_var(free_list));
|
||||
|
||||
ihk_mc_spinlock_unlock(&cpu_local_var(remote_free_list_lock), irqflags);
|
||||
}
|
||||
|
||||
#define KMALLOC_MIN_SHIFT (5)
|
||||
#define KMALLOC_MIN_SIZE (1 << KMALLOC_TRACK_HASH_SHIFT)
|
||||
#define KMALLOC_MIN_MASK (KMALLOC_MIN_SIZE - 1)
|
||||
|
||||
/* Actual low-level allocation routines */
|
||||
static void *___kmalloc(int size, enum ihk_mc_ap_flag flag)
|
||||
{
|
||||
struct kmalloc_header *chunk_iter;
|
||||
struct kmalloc_header *chunk = NULL;
|
||||
int npages;
|
||||
unsigned long kmalloc_irq_flags = cpu_disable_interrupt_save();
|
||||
|
||||
/* KMALLOC_MIN_SIZE bytes aligned size. */
|
||||
if (size & KMALLOC_MIN_MASK) {
|
||||
size = ((size + KMALLOC_MIN_SIZE - 1) & ~(KMALLOC_MIN_MASK));
|
||||
}
|
||||
|
||||
chunk = NULL;
|
||||
/* Find a chunk that is big enough */
|
||||
list_for_each_entry(chunk_iter, &cpu_local_var(free_list), list) {
|
||||
if (chunk_iter->size >= size) {
|
||||
chunk = chunk_iter;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
split_and_return:
|
||||
/* Did we find one? */
|
||||
if (chunk) {
|
||||
/* Do we need to split it? Only if there is enough space for
|
||||
* another header and some actual content */
|
||||
if (chunk->size > (size + sizeof(struct kmalloc_header))) {
|
||||
struct kmalloc_header *leftover;
|
||||
|
||||
leftover = (struct kmalloc_header *)
|
||||
((void *)chunk + sizeof(struct kmalloc_header) + size);
|
||||
___kmalloc_init_chunk(leftover,
|
||||
(chunk->size - size - sizeof(struct kmalloc_header)));
|
||||
list_add(&leftover->list, &chunk->list);
|
||||
chunk->size = size;
|
||||
}
|
||||
|
||||
list_del(&chunk->list);
|
||||
cpu_restore_interrupt(kmalloc_irq_flags);
|
||||
return ((void *)chunk + sizeof(struct kmalloc_header));
|
||||
}
|
||||
|
||||
|
||||
/* Allocate new memory and add it to free list */
|
||||
npages = (size + sizeof(struct kmalloc_header) + (PAGE_SIZE - 1))
|
||||
>> PAGE_SHIFT;
|
||||
chunk = ihk_mc_alloc_pages(npages, flag);
|
||||
|
||||
if (!chunk) {
|
||||
cpu_restore_interrupt(kmalloc_irq_flags);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
u = (size + sizeof(*h) - 1) / sizeof(*h);
|
||||
___kmalloc_init_chunk(chunk,
|
||||
(npages * PAGE_SIZE - sizeof(struct kmalloc_header)));
|
||||
___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk);
|
||||
|
||||
flags = ihk_mc_spinlock_lock(&v->free_list_lock);
|
||||
|
||||
prev = h;
|
||||
h = h->next;
|
||||
|
||||
while (1) {
|
||||
if (h == &v->free_list) {
|
||||
req_page = ((u + 2) * sizeof(*h) + PAGE_SIZE - 1)
|
||||
>> PAGE_SHIFT;
|
||||
|
||||
h = allocate_pages(req_page, flag);
|
||||
if(h == NULL) {
|
||||
kprintf("kmalloc(%#x,%#x): out of memory\n", size, flag);
|
||||
ihk_mc_spinlock_unlock(&v->free_list_lock, flags);
|
||||
return NULL;
|
||||
}
|
||||
h->check = 0x5a5a5a5a;
|
||||
prev->next = h;
|
||||
h->size = (req_page * PAGE_SIZE) / sizeof(*h) - 2;
|
||||
/* Guard entry */
|
||||
p = h + h->size + 1;
|
||||
p->check = 0x5a5a5a5a;
|
||||
p->next = &v->free_list;
|
||||
p->size = 0;
|
||||
h->next = p;
|
||||
}
|
||||
|
||||
if (h->size >= u) {
|
||||
if (h->size == u || h->size == u + 1) {
|
||||
prev->next = h->next;
|
||||
h->cpu_id = ihk_mc_get_processor_id();
|
||||
|
||||
ihk_mc_spinlock_unlock(&v->free_list_lock, flags);
|
||||
return h + 1;
|
||||
} else { /* Divide */
|
||||
h->size -= u + 1;
|
||||
|
||||
p = h + h->size + 1;
|
||||
p->check = 0x5a5a5a5a;
|
||||
p->size = u;
|
||||
p->cpu_id = ihk_mc_get_processor_id();
|
||||
|
||||
ihk_mc_spinlock_unlock(&v->free_list_lock, flags);
|
||||
return p + 1;
|
||||
}
|
||||
}
|
||||
prev = h;
|
||||
h = h->next;
|
||||
}
|
||||
goto split_and_return;
|
||||
}
|
||||
|
||||
void ___kfree(void *ptr)
|
||||
static void ___kfree(void *ptr)
|
||||
{
|
||||
struct malloc_header *p = (struct malloc_header *)ptr;
|
||||
struct cpu_local_var *v = get_cpu_local_var((--p)->cpu_id);
|
||||
struct malloc_header *h = &v->free_list;
|
||||
int combined = 0;
|
||||
unsigned long flags;
|
||||
struct kmalloc_header *chunk =
|
||||
(struct kmalloc_header*)(ptr - sizeof(struct kmalloc_header));
|
||||
unsigned long kmalloc_irq_flags = cpu_disable_interrupt_save();
|
||||
|
||||
flags = ihk_mc_spinlock_lock(&v->free_list_lock);
|
||||
h = h->next;
|
||||
|
||||
while ((p < h || p > h->next) && h != &v->free_list) {
|
||||
h = h->next;
|
||||
/* Sanity check */
|
||||
if (chunk->front_magic != 0x5c5c5c5c || chunk->end_magic != 0x6d6d6d6d) {
|
||||
kprintf("%s: memory corruption at address 0x%p\n", __FUNCTION__, ptr);
|
||||
panic("panic");
|
||||
}
|
||||
|
||||
if (h + h->size + 1 == p && h->size != 0) {
|
||||
combined = 1;
|
||||
h->size += p->size + 1;
|
||||
h->check = 0x5a5a5a5a;
|
||||
/* Does this chunk belong to this CPU? */
|
||||
if (chunk->cpu_id == ihk_mc_get_processor_id()) {
|
||||
|
||||
___kmalloc_insert_chunk(&cpu_local_var(free_list), chunk);
|
||||
___kmalloc_consolidate_list(&cpu_local_var(free_list));
|
||||
}
|
||||
if (h->next == p + p->size + 1 && h->next->size != 0) {
|
||||
if (combined) {
|
||||
h->check = 0x5a5a5a5a;
|
||||
h->size += h->next->size + 1;
|
||||
h->next = h->next->next;
|
||||
} else {
|
||||
p->check = 0x5a5a5a5a;
|
||||
p->size += h->next->size + 1;
|
||||
p->next = h->next->next;
|
||||
h->next = p;
|
||||
}
|
||||
} else if (!combined) {
|
||||
p->next = h->next;
|
||||
h->next = p;
|
||||
else {
|
||||
struct cpu_local_var *v = get_cpu_local_var(chunk->cpu_id);
|
||||
unsigned long irqflags;
|
||||
|
||||
irqflags = ihk_mc_spinlock_lock(&v->remote_free_list_lock);
|
||||
list_add(&chunk->list, &v->remote_free_list);
|
||||
ihk_mc_spinlock_unlock(&v->remote_free_list_lock, irqflags);
|
||||
}
|
||||
ihk_mc_spinlock_unlock(&v->free_list_lock, flags);
|
||||
|
||||
cpu_restore_interrupt(kmalloc_irq_flags);
|
||||
}
|
||||
|
||||
void print_free_list(void)
|
||||
|
||||
void ___kmalloc_print_free_list(struct list_head *list)
|
||||
{
|
||||
struct cpu_local_var *v = get_this_cpu_local_var();
|
||||
struct malloc_header *h = &v->free_list;
|
||||
struct kmalloc_header *chunk_iter;
|
||||
unsigned long irqflags = kprintf_lock();
|
||||
|
||||
h = h->next;
|
||||
|
||||
kprintf("free_list : \n");
|
||||
while (h != &v->free_list) {
|
||||
kprintf(" %p : %p, %d ->\n", h, h->next, h->size);
|
||||
h = h->next;
|
||||
__kprintf("%s: [ \n", __FUNCTION__);
|
||||
list_for_each_entry(chunk_iter, &cpu_local_var(free_list), list) {
|
||||
__kprintf("%s: 0x%lx:%d (VA PFN: %lu, off: %lu)\n", __FUNCTION__,
|
||||
(unsigned long)chunk_iter,
|
||||
chunk_iter->size,
|
||||
(unsigned long)chunk_iter >> PAGE_SHIFT,
|
||||
(unsigned long)chunk_iter % PAGE_SIZE);
|
||||
}
|
||||
kprintf("\n");
|
||||
__kprintf("%s: ] \n", __FUNCTION__);
|
||||
kprintf_unlock(irqflags);
|
||||
}
|
||||
|
||||
|
||||
759
kernel/process.c
759
kernel/process.c
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user