arch: x86 -> x86_64 and build system changes

This commit is contained in:
Balazs Gerofi
2017-10-10 23:24:07 +09:00
committed by Hannes Weisbach
parent 51982de36b
commit 43ecf06e83
70 changed files with 50 additions and 56 deletions

View File

@ -0,0 +1,36 @@
DEST=$(O)/elfboot
CFLAGS=-c -Wall -O
CFLAGS_TEST=-DTEST
all: $(DEST) $(DEST)/elfboot
$(DEST)/elfboot: $(DEST)/elfboot.bin
cp $^ $@
truncate -s $(shell expr '(' `stat -c '%s' $^` + 4095 ')' / 4096 '*' 4096) $@
$(DEST)/elfboot.bin: $(DEST)/elfboot.elf
$(OBJCOPY) -O binary $^ $@
$(DEST)/elfboot.elf: $(DEST)/head.o $(DEST)/elfboot.raw.o raw.lds
$(LD) $(LDFLAGS_RAW) -T raw.lds -o $@ $^
$(DEST)/elfboot_test: $(DEST)/elfboot.test.o $(DEST)/test_main.o
$(CC) -o $@ $^
$(DEST)/head.o: head.S
$(CC) $(CFLAGS) -o $@ $^
$(DEST)/elfboot.raw.o: elfboot.c
$(CC) $(CFLAGS) -o $@ $^
$(DEST)/elfboot.test.o: elfboot.c
$(CC) $(CFLAGS) $(CFLAGS_TEST) -o $@ $^
clean:
$(RM) $(DEST)/elfboot *.bin $(DEST)/*.elf $(DEST)/elfboot_test *.o
disas:
$(OBJDUMP) -b binary -m i386:x86-64 -D $(DEST)/elfboot.bin
$(DEST):
@mkdir -p $(DEST)

View File

@ -0,0 +1,96 @@
/**
* \file elfboot.c
* License details are found in the file LICENSE.
* \brief
* Load an ELF image.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#include <elf.h>
#include "test.h"
#ifdef TEST
static void *memcpy(void *dest, void *src, unsigned long len)
{
dprintf("Copying %p to %p for %08ld bytes\n", src, dest, len);
return dest;
}
static void *memset(void *dest, int v, unsigned long len)
{
dprintf("Filling %p with %02x for %08ld bytes\n", dest, (unsigned char)v, len);
return dest;
}
#else
static void *memcpy(void *dest, void *src, unsigned long len)
{
char *d = dest, *s = src;
for ( ; len ; len--) {
*(d++) = *(s++);
}
return d;
}
static void *memset(void *dest, int v, unsigned long len)
{
char *d = dest;
for ( ; len ; len--) {
*(d++) = (char)v;
}
return d;
}
#endif
static void load_programs(unsigned char *image, Elf64_Phdr *hdrs, int nhdr)
{
int i;
for (i = 0; i < nhdr; i++) {
if (hdrs[i].p_type == PT_LOAD) {
dprintf("PT_LOAD : %lx: %lx - %lx (%lx)\n",
hdrs[i].p_vaddr,
hdrs[i].p_offset, hdrs[i].p_filesz,
hdrs[i].p_memsz);
memcpy((void *)hdrs[i].p_vaddr,
image + hdrs[i].p_offset,
hdrs[i].p_filesz);
if (hdrs[i].p_filesz < hdrs[i].p_memsz) {
memset((void *)hdrs[i].p_vaddr +
hdrs[i].p_filesz, 0,
hdrs[i].p_memsz - hdrs[i].p_filesz);
}
}
}
}
/*
* Return value: If success, the entry point address. Otherwise, 0.
*/
unsigned long elfboot_main(unsigned char *image)
{
Elf64_Ehdr *hdr;
hdr = (Elf64_Ehdr *)image;
if (hdr->e_ident[0] != 0x7f || hdr->e_ident[1] != 'E'
|| hdr->e_ident[2] != 'L' || hdr->e_ident[3] != 'F') {
return 0;
}
/* TODO: We may overlap. So copying should be more sophisticated */
if (!hdr->e_phoff || hdr->e_phentsize != sizeof(Elf64_Phdr)) {
return 0;
}
load_programs(image,
(Elf64_Phdr *)(image + hdr->e_phoff), hdr->e_phnum);
return hdr->e_entry;
}

View File

@ -0,0 +1,34 @@
/**
* \file head.S
* License details are found in the file LICENSE.
* \brief
* Entry point calling to elfboot_main.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
.text
.globl _start
_start:
leaq _stack_end(%rip), %rsp
/* preserve arguments */
pushq %rdi
pushq %rsi
pushq %rdx
pushq %rcx
leaq _stack_end(%rip), %rdi
call elfboot_main
andq %rax, %rax
jz 1f
popq %rcx
popq %rdx
popq %rsi
popq %rdi
jmpq *%rax
1:
cli
hlt
jmp 1b

View File

@ -0,0 +1,37 @@
ENTRY(_start)
PHDRS
{
text PT_LOAD;
data PT_LOAD;
}
SECTIONS
{
. = SIZEOF_HEADERS;
. = ALIGN(4096);
.text : {
*(.text)
} :text
.data : {
*(.data)
*(.data.*)
} :data
.rodata : {
*(.rodata .rodata.*)
} :data
. = ALIGN(8);
.bss : {
_bss_start = .;
*(.bss .bss.*)
_bss_end = .;
. = ALIGN(4096);
_stack_end = .;
} :data
/DISCARD/ : {
*(.eh_frame)
*(.note.gnu.build-id)
}
}

View File

@ -0,0 +1,21 @@
/**
* \file test.h
* License details are found in the file LICENSE.
* \brief
* Header file of test of loading an ELF image.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#ifdef TEST
#include <stdio.h>
#include <stdlib.h>
#define dprintf printf
#else
#define dprintf(...)
#endif

View File

@ -0,0 +1,53 @@
/**
* \file test_main.c
* License details are found in the file LICENSE.
* \brief
* Test of loading an ELF file.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/mman.h>
unsigned long elfboot_main(unsigned char *image);
int main(int argc, char **argv)
{
int fd;
struct stat st;
void *p;
if (argc < 2) {
fprintf(stderr, "Usage : %s (elf)\n", argv[0]);
return 1;
}
fd = open(argv[1], O_RDONLY);
if (fd < 0){
perror("open");
return 1;
}
fstat(fd, &st);
p = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (p == MAP_FAILED) {
perror("mmap");
return 2;
}
printf("read result : %lx\n", elfboot_main(p));
munmap(p, st.st_size);
close(fd);
return 0;
}

View File

@ -0,0 +1,27 @@
DEST=$(O)/kboot
OBJS=$(DEST)/main.o $(DEST)/data.o
CFLAGS=-mno-sse -mno-mmx -mno-sse2 -mno-3dnow -mcmodel=large
$(if $(O),,$(error Specify the target directory))
$(DEST)/kboot.elf: $(DEST) $(DEST)/kernel.lds $(OBJS)
@$(LD) $(LDFLAGS) -o $@ -T $(DEST)/kernel.lds -nostdlib $(OBJS)
$(DEST)/%.o: %.c
@$(CC) $(CFLAGS) -c -o $@ -O3 $<
$(DEST)/data.o: data.S
@$(CC) -c -o $@ -O3 -DKIMAGE='"$(KIMAGE)"' $^
$(DEST)/kernel.lds: kernel.lds.S
$(if $(LOAD_PA),,$(error Specify the loading physical address))
@$(CC) -E -P -DLOAD_PA=$(LOAD_PA) -o $@ $<
$(DEST):
@mkdir -p $(DEST)
clean:
@$(RM) $(DEST)/*
.PHONY: clean

9
arch/x86_64/kboot/data.S Normal file
View File

@ -0,0 +1,9 @@
#ifndef KIMAGE
#error "No kernel image is specified"
#endif
.data
.globl data_start
data_start:
.incbin KIMAGE

View File

@ -0,0 +1,34 @@
ENTRY(main)
PHDRS
{
text PT_LOAD FLAGS(5);
data PT_LOAD FLAGS(7);
}
SECTIONS
{
. = LOAD_PA;
_head = .;
.text : {
*(.text);
} : text
. = ALIGN(4096);
.data : {
*(.data)
*(.data.*)
} :data
.rodata : {
*(.rodata .rodata.*)
} :data
.bss : {
*(.bss .bss.*)
}
. = ALIGN(4096);
data_end = .;
/DISCARD/ : {
*(.eh_frame)
*(.note.gnu.build-id)
}
}

144
arch/x86_64/kboot/main.c Normal file
View File

@ -0,0 +1,144 @@
/**
* \file main.c
* License details are found in the file LICENSE.
* \brief
* Load an ELF image on data_start and jump to its entry point.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#include <elf.h>
extern char data_start[], data_end[];
#define LARGE_PAGE_SIZE (1UL << 21)
#define LARGE_PAGE_MASK (~((unsigned long)LARGE_PAGE_SIZE - 1))
#define MAP_ST_START 0xffff800000000000UL
#define MAP_KERNEL_START 0xffffffff80000000UL
#define PTL4_SHIFT 39
#define PTL3_SHIFT 30
#define PTL2_SHIFT 21
unsigned long page_tables[3][512] __attribute__((aligned(4096)));
static void *memcpy(void *dest, void *src, unsigned long len)
{
char *d = dest, *s = src;
for ( ; len ; len--) {
*(d++) = *(s++);
}
return d;
}
static void *memset(void *dest, int v, unsigned long len)
{
char *d = dest;
for ( ; len ; len--) {
*(d++) = (char)v;
}
return d;
}
void memzerol(unsigned long *p, unsigned long size)
{
unsigned long i;
size /= sizeof(unsigned long);
for (i = 0; i < size; i++) {
p[i] = 0;
}
}
static unsigned long load_programs(unsigned char *image, Elf64_Phdr *hdrs,
int nhdr, unsigned long offset)
{
int i;
unsigned long end = MAP_KERNEL_START;
for (i = 0; i < nhdr; i++) {
if (hdrs[i].p_type == PT_LOAD) {
memcpy((void *)(hdrs[i].p_vaddr - offset),
image + hdrs[i].p_offset,
hdrs[i].p_filesz);
if (hdrs[i].p_filesz < hdrs[i].p_memsz) {
memset((void *)(hdrs[i].p_vaddr +
hdrs[i].p_filesz - offset), 0,
hdrs[i].p_memsz - hdrs[i].p_filesz);
}
if (end < hdrs[i].p_vaddr + hdrs[i].p_memsz) {
end = hdrs[i].p_vaddr + hdrs[i].p_memsz;
}
}
}
return end;
}
/*
* Return value: If success, the entry point address. Otherwise, 0.
*/
unsigned long load_elf(unsigned char *image, unsigned long offset)
{
Elf64_Ehdr *hdr = (Elf64_Ehdr *)image;
if (hdr->e_ident[0] != 0x7f || hdr->e_ident[1] != 'E'
|| hdr->e_ident[2] != 'L' || hdr->e_ident[3] != 'F') {
return 0;
}
/* TODO: We may overlap. So copying should be more sophisticated */
if (!hdr->e_phoff || hdr->e_phentsize != sizeof(Elf64_Phdr)) {
return 0;
}
return load_programs(image,
(Elf64_Phdr *)(image + hdr->e_phoff), hdr->e_phnum,
offset);
}
void main(unsigned long param)
{
/* Assume phys == virt */
unsigned long load_address, end, *org_cr3;
unsigned long i, n;
Elf64_Ehdr *hdr;
void (*entry)(unsigned long param, unsigned long load_address);
load_address = (unsigned long)data_end;
load_address = (load_address + LARGE_PAGE_SIZE - 1) & LARGE_PAGE_MASK;
asm volatile("movq %%cr3, %0" : "=r"(org_cr3));
memzerol((unsigned long *)page_tables, sizeof(page_tables));
page_tables[0][0] = org_cr3[0];
page_tables[0][(MAP_ST_START >> PTL4_SHIFT) & 511] = org_cr3[0];
page_tables[0][(MAP_KERNEL_START >> PTL4_SHIFT) & 511] =
((unsigned long)page_tables[1]) | 3;
page_tables[1][(MAP_KERNEL_START >> PTL3_SHIFT) & 511] =
((unsigned long)page_tables[2]) | 3;
end = load_elf(data_start, MAP_KERNEL_START - load_address);
/* map 4MB more in case */
n = (end - MAP_KERNEL_START + (1 << PTL2_SHIFT) - 1) >> PTL2_SHIFT;
n += 2;
for (i = 0; i < n; i++) {
page_tables[2][i] = (load_address + (i << PTL2_SHIFT)) | 0x83;
}
hdr = (Elf64_Ehdr *)data_start;
asm volatile("movq %0, %%cr3" : : "r"(page_tables) : "memory");
entry = (void *)hdr->e_entry;
entry(param, load_address);
}

View File

@ -0,0 +1,2 @@
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
IHK_OBJS += perfctr.o syscall.o vsyscall.o

View File

@ -0,0 +1,57 @@
/**
* \file context.S
* License details are found in the file LICENSE.
* \brief
* Save registers of old context and load registers of new context.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#define X86_CPU_LOCAL_OFFSET_TSS 176
#define X86_TSS_OFFSET_SP0 4
#define X86_CPU_LOCAL_OFFSET_SP0 \
(X86_CPU_LOCAL_OFFSET_TSS + X86_TSS_OFFSET_SP0)
.text
.globl ihk_mc_switch_context
ihk_mc_switch_context:
/*
* rdi - ihk_mc_kernel_context_t *old_ctx
* rsi - ihk_mc_kernel_context_t *new_ctx
* rdx - void *prev
*/
pushfq
popq %rax
testq %rdi, %rdi
jz 1f /* skip saving if "old_ctx" is null. */
movq %rbp, 8(%rdi)
movq %rbx, 16(%rdi)
movq %rax, 72(%rdi) /* rflags */
movq %rsi, 24(%rdi)
movq %rdi, 32(%rdi)
movq %r12, 40(%rdi)
movq %r13, 48(%rdi)
movq %r14, 56(%rdi)
movq %r15, 64(%rdi)
movq %rsp, 0(%rdi)
1:
movq 0(%rsi), %rsp
movq 80(%rsi), %rbp
movq %rbp, %gs:(X86_CPU_LOCAL_OFFSET_SP0)
movq 64(%rsi), %r15
movq 56(%rsi), %r14
movq 48(%rsi), %r13
movq 40(%rsi), %r12
movq 32(%rsi), %rdi
movq 16(%rsi), %rbx
movq 72(%rsi), %rax
pushq %rax
popfq
movq 8(%rsi), %rbp
movq 24(%rsi), %rsi
movq %rdx,%rax
retq

View File

@ -0,0 +1,59 @@
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#include <process.h>
#include <elfcore.h>
void arch_fill_prstatus(struct elf_prstatus64 *prstatus, struct thread *thread, void *regs0)
{
struct x86_user_context *uctx = regs0;
struct x86_basic_regs *regs = &uctx->gpr;
register unsigned long _r12 asm("r12");
register unsigned long _r13 asm("r13");
register unsigned long _r14 asm("r14");
register unsigned long _r15 asm("r15");
/*
We ignore following entries for now.
struct elf_siginfo pr_info;
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
struct prstatus64_timeval pr_stime;
struct prstatus64_timeval pr_cutime;
struct prstatus64_timeval pr_cstime;
*/
prstatus->pr_reg[0] = _r15;
prstatus->pr_reg[1] = _r14;
prstatus->pr_reg[2] = _r13;
prstatus->pr_reg[3] = _r12;
prstatus->pr_reg[4] = regs->rbp;
prstatus->pr_reg[5] = regs->rbx;
prstatus->pr_reg[6] = regs->r11;
prstatus->pr_reg[7] = regs->r10;
prstatus->pr_reg[8] = regs->r9;
prstatus->pr_reg[9] = regs->r8;
prstatus->pr_reg[10] = regs->rax;
prstatus->pr_reg[11] = regs->rcx;
prstatus->pr_reg[12] = regs->rdx;
prstatus->pr_reg[13] = regs->rsi;
prstatus->pr_reg[14] = regs->rdi;
prstatus->pr_reg[15] = regs->rax; /* ??? */
prstatus->pr_reg[16] = regs->rip;
prstatus->pr_reg[17] = regs->cs;
prstatus->pr_reg[18] = regs->rflags;
prstatus->pr_reg[19] = regs->rsp;
prstatus->pr_reg[20] = regs->ss;
prstatus->pr_reg[21] = rdmsr(MSR_FS_BASE);
prstatus->pr_reg[22] = rdmsr(MSR_GS_BASE);
/* There is no ds, es, fs and gs. */
prstatus->pr_fpvalid = 0; /* We assume no fp */
}
#endif /* POSTK_DEBUG_ARCH_DEP_18 */

2143
arch/x86_64/kernel/cpu.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,546 @@
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#include <ihk/debug.h>
#include <kmalloc.h>
#include <cls.h>
#include <list.h>
#include <process.h>
#include <string.h>
#include <elfcore.h>
#define align32(x) ((((x) + 3) / 4) * 4)
#define alignpage(x) ((((x) + (PAGE_SIZE) - 1) / (PAGE_SIZE)) * (PAGE_SIZE))
//#define DEBUG_PRINT_GENCORE
#ifdef DEBUG_PRINT_GENCORE
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
/*
* Generate a core file image, which consists of many chunks.
* Returns an allocated table, an etnry of which is a pair of the address
* of a chunk and its length.
*/
/**
* \brief Fill the elf header.
*
* \param eh An Elf64_Ehdr structure.
* \param segs Number of segments of the core file.
*/
void fill_elf_header(Elf64_Ehdr *eh, int segs)
{
eh->e_ident[EI_MAG0] = 0x7f;
eh->e_ident[EI_MAG1] = 'E';
eh->e_ident[EI_MAG2] = 'L';
eh->e_ident[EI_MAG3] = 'F';
eh->e_ident[EI_CLASS] = ELFCLASS64;
eh->e_ident[EI_DATA] = ELFDATA2LSB;
eh->e_ident[EI_VERSION] = El_VERSION;
eh->e_ident[EI_OSABI] = ELFOSABI_NONE;
eh->e_ident[EI_ABIVERSION] = El_ABIVERSION_NONE;
eh->e_type = ET_CORE;
#ifdef CONFIG_MIC
eh->e_machine = EM_K10M;
#else
eh->e_machine = EM_X86_64;
#endif
eh->e_version = EV_CURRENT;
eh->e_entry = 0; /* Do we really need this? */
eh->e_phoff = 64; /* fixed */
eh->e_shoff = 0; /* no section header */
eh->e_flags = 0;
eh->e_ehsize = 64; /* fixed */
eh->e_phentsize = 56; /* fixed */
eh->e_phnum = segs;
eh->e_shentsize = 0;
eh->e_shnum = 0;
eh->e_shstrndx = 0;
}
/**
* \brief Return the size of the prstatus entry of the NOTE segment.
*
*/
int get_prstatus_size(void)
{
return sizeof(struct note) + align32(sizeof("CORE"))
+ align32(sizeof(struct elf_prstatus64));
}
/**
* \brief Fill a prstatus structure.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs0 A pointer to a x86_regs structure.
*/
void fill_prstatus(struct note *head, struct thread *thread, void *regs0)
{
void *name;
struct elf_prstatus64 *prstatus;
struct x86_user_context *uctx = regs0;
struct x86_basic_regs *regs = &uctx->gpr;
register unsigned long _r12 asm("r12");
register unsigned long _r13 asm("r13");
register unsigned long _r14 asm("r14");
register unsigned long _r15 asm("r15");
head->namesz = sizeof("CORE");
head->descsz = sizeof(struct elf_prstatus64);
head->type = NT_PRSTATUS;
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
prstatus = (struct elf_prstatus64 *)(name + align32(sizeof("CORE")));
/*
We ignore following entries for now.
struct elf_siginfo pr_info;
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
struct prstatus64_timeval pr_stime;
struct prstatus64_timeval pr_cutime;
struct prstatus64_timeval pr_cstime;
*/
prstatus->pr_reg[0] = _r15;
prstatus->pr_reg[1] = _r14;
prstatus->pr_reg[2] = _r13;
prstatus->pr_reg[3] = _r12;
prstatus->pr_reg[4] = regs->rbp;
prstatus->pr_reg[5] = regs->rbx;
prstatus->pr_reg[6] = regs->r11;
prstatus->pr_reg[7] = regs->r10;
prstatus->pr_reg[8] = regs->r9;
prstatus->pr_reg[9] = regs->r8;
prstatus->pr_reg[10] = regs->rax;
prstatus->pr_reg[11] = regs->rcx;
prstatus->pr_reg[12] = regs->rdx;
prstatus->pr_reg[13] = regs->rsi;
prstatus->pr_reg[14] = regs->rdi;
prstatus->pr_reg[15] = regs->rax; /* ??? */
prstatus->pr_reg[16] = regs->rip;
prstatus->pr_reg[17] = regs->cs;
prstatus->pr_reg[18] = regs->rflags;
prstatus->pr_reg[19] = regs->rsp;
prstatus->pr_reg[20] = regs->ss;
prstatus->pr_reg[21] = rdmsr(MSR_FS_BASE);
prstatus->pr_reg[22] = rdmsr(MSR_GS_BASE);
/* There is no ds, es, fs and gs. */
prstatus->pr_fpvalid = 0; /* We assume no fp */
}
/**
* \brief Return the size of the prpsinfo entry of the NOTE segment.
*
*/
int get_prpsinfo_size(void)
{
return sizeof(struct note) + align32(sizeof("CORE"))
+ align32(sizeof(struct elf_prpsinfo64));
}
/**
* \brief Fill a prpsinfo structure.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_prpsinfo(struct note *head, struct thread *thread, void *regs)
{
void *name;
struct elf_prpsinfo64 *prpsinfo;
head->namesz = sizeof("CORE");
head->descsz = sizeof(struct elf_prpsinfo64);
head->type = NT_PRPSINFO;
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
prpsinfo = (struct elf_prpsinfo64 *)(name + align32(sizeof("CORE")));
prpsinfo->pr_state = thread->status;
prpsinfo->pr_pid = thread->proc->pid;
/*
We leave most of the fields unfilled.
char pr_sname;
char pr_zomb;
char pr_nice;
a8_uint64_t pr_flag;
unsigned int pr_uid;
unsigned int pr_gid;
int pr_ppid, pr_pgrp, pr_sid;
char pr_fname[16];
char pr_psargs[ELF_PRARGSZ];
*/
}
/**
* \brief Return the size of the AUXV entry of the NOTE segment.
*
*/
int get_auxv_size(void)
{
return sizeof(struct note) + align32(sizeof("CORE"))
+ sizeof(unsigned long) * AUXV_LEN;
}
/**
* \brief Fill an AUXV structure.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_auxv(struct note *head, struct thread *thread, void *regs)
{
void *name;
void *auxv;
head->namesz = sizeof("CORE");
head->descsz = sizeof(unsigned long) * AUXV_LEN;
head->type = NT_AUXV;
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
auxv = name + align32(sizeof("CORE"));
memcpy(auxv, thread->proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN);
}
/**
* \brief Return the size of the whole NOTE segment.
*
*/
int get_note_size(void)
{
return get_prstatus_size() + get_prpsinfo_size()
+ get_auxv_size();
}
/**
* \brief Fill the NOTE segment.
*
* \param head A pointer to a note structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_note(void *note, struct thread *thread, void *regs)
{
fill_prstatus(note, thread, regs);
note += get_prstatus_size();
fill_prpsinfo(note, thread, regs);
note += get_prpsinfo_size();
fill_auxv(note, thread, regs);
}
/**
* \brief Generate an image of the core file.
*
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
* \param coretable(out) An array of core chunks.
* \param chunks(out) Number of the entires of coretable.
*
* A core chunk is represented by a pair of a physical
* address of memory region and its size. If there are
* no corresponding physical address for a VM area
* (an unallocated demand-paging page, e.g.), the address
* should be zero.
*/
/*@
@ requires \valid(thread);
@ requires \valid(regs);
@ requires \valid(coretable);
@ requires \valid(chunks);
@ behavior success:
@ ensures \result == 0;
@ assigns coretable;
@ behavior failure:
@ ensures \result == -1;
@*/
int gencore(struct thread *thread, void *regs,
struct coretable **coretable, int *chunks)
{
struct coretable *ct = NULL;
Elf64_Ehdr eh;
Elf64_Phdr *ph = NULL;
void *note = NULL;
struct vm_range *range, *next;
struct process_vm *vm = thread->vm;
int segs = 1; /* the first one is for NOTE */
int notesize, phsize, alignednotesize;
unsigned int offset = 0;
int i;
*chunks = 3; /* Elf header , header table and NOTE segment */
if (vm == NULL) {
dkprintf("no vm found.\n");
return -1;
}
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
dkprintf("start:%lx end:%lx flag:%lx objoff:%lx\n",
range->start, range->end, range->flag, range->objoff);
/* We omit reserved areas because they are only for
mckernel's internal use. */
if (range->flag & VR_RESERVED)
continue;
if (range->flag & VR_DONTDUMP)
continue;
/* We need a chunk for each page for a demand paging area.
This can be optimized for spacial complexity but we would
lose simplicity instead. */
if (range->flag & VR_DEMAND_PAGING) {
unsigned long p, phys;
int prevzero = 0;
for (p = range->start; p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
prevzero = 1;
} else {
if (prevzero == 1)
(*chunks)++;
(*chunks)++;
prevzero = 0;
}
}
if (prevzero == 1)
(*chunks)++;
} else {
(*chunks)++;
}
segs++;
}
dkprintf("we have %d segs and %d chunks.\n\n", segs, *chunks);
{
struct vm_regions region = thread->vm->region;
dkprintf("text: %lx-%lx\n", region.text_start, region.text_end);
dkprintf("data: %lx-%lx\n", region.data_start, region.data_end);
dkprintf("brk: %lx-%lx\n", region.brk_start, region.brk_end);
dkprintf("map: %lx-%lx\n", region.map_start, region.map_end);
dkprintf("stack: %lx-%lx\n", region.stack_start, region.stack_end);
dkprintf("user: %lx-%lx\n\n", region.user_start, region.user_end);
}
dkprintf("now generate a core file image\n");
offset += sizeof(eh);
fill_elf_header(&eh, segs);
/* program header table */
phsize = sizeof(Elf64_Phdr) * segs;
ph = kmalloc(phsize, IHK_MC_AP_NOWAIT);
if (ph == NULL) {
dkprintf("could not alloc a program header table.\n");
goto fail;
}
memset(ph, 0, phsize);
offset += phsize;
/* NOTE segment
* To align the next segment page-sized, we prepare a padded
* region for our NOTE segment.
*/
notesize = get_note_size();
alignednotesize = alignpage(notesize + offset) - offset;
note = kmalloc(alignednotesize, IHK_MC_AP_NOWAIT);
if (note == NULL) {
dkprintf("could not alloc NOTE for core.\n");
goto fail;
}
memset(note, 0, alignednotesize);
fill_note(note, thread, regs);
/* prgram header for NOTE segment is exceptional */
ph[0].p_type = PT_NOTE;
ph[0].p_flags = 0;
ph[0].p_offset = offset;
ph[0].p_vaddr = 0;
ph[0].p_paddr = 0;
ph[0].p_filesz = notesize;
ph[0].p_memsz = notesize;
ph[0].p_align = 0;
offset += alignednotesize;
/* program header for each memory chunk */
i = 1;
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
unsigned long flag = range->flag;
unsigned long size = range->end - range->start;
if (range->flag & VR_RESERVED)
continue;
ph[i].p_type = PT_LOAD;
ph[i].p_flags = ((flag & VR_PROT_READ) ? PF_R : 0)
| ((flag & VR_PROT_WRITE) ? PF_W : 0)
| ((flag & VR_PROT_EXEC) ? PF_X : 0);
ph[i].p_offset = offset;
ph[i].p_vaddr = range->start;
ph[i].p_paddr = 0;
ph[i].p_filesz = size;
ph[i].p_memsz = size;
ph[i].p_align = PAGE_SIZE;
i++;
offset += size;
}
/* coretable to send to host */
ct = kmalloc(sizeof(struct coretable) * (*chunks), IHK_MC_AP_NOWAIT);
if (!ct) {
dkprintf("could not alloc a coretable.\n");
goto fail;
}
ct[0].addr = virt_to_phys(&eh); /* ELF header */
ct[0].len = 64;
dkprintf("coretable[0]: %lx@%lx(%lx)\n", ct[0].len, ct[0].addr, &eh);
ct[1].addr = virt_to_phys(ph); /* program header table */
ct[1].len = phsize;
dkprintf("coretable[1]: %lx@%lx(%lx)\n", ct[1].len, ct[1].addr, ph);
ct[2].addr = virt_to_phys(note); /* NOTE segment */
ct[2].len = alignednotesize;
dkprintf("coretable[2]: %lx@%lx(%lx)\n", ct[2].len, ct[2].addr, note);
i = 3; /* memory segments */
next = lookup_process_memory_range(vm, 0, -1);
while ((range = next)) {
next = next_process_memory_range(vm, range);
unsigned long phys;
if (range->flag & VR_RESERVED)
continue;
if (range->flag & VR_DEMAND_PAGING) {
/* Just an ad hoc kluge. */
unsigned long p, start, phys;
int prevzero = 0;
unsigned long size = 0;
for (start = p = range->start;
p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
if (prevzero == 0) {
/* We begin a new chunk */
size = PAGE_SIZE;
start = p;
} else {
/* We extend the previous chunk */
size += PAGE_SIZE;
}
prevzero = 1;
} else {
if (prevzero == 1) {
/* Flush out an empty chunk */
ct[i].addr = 0;
ct[i].len = size;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, start);
i++;
}
ct[i].addr = phys;
ct[i].len = PAGE_SIZE;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, p);
i++;
prevzero = 0;
}
}
if (prevzero == 1) {
/* An empty chunk */
ct[i].addr = 0;
ct[i].len = size;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, start);
i++;
}
} else {
if ((thread->vm->region.user_start <= range->start) &&
(range->end <= thread->vm->region.user_end)) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)range->start, &phys) != 0) {
dkprintf("could not convert user virtual address %lx"
"to physical address", range->start);
goto fail;
}
} else {
phys = virt_to_phys((void *)range->start);
}
ct[i].addr = phys;
ct[i].len = range->end - range->start;
dkprintf("coretable[%d]: %lx@%lx(%lx)\n", i,
ct[i].len, ct[i].addr, range->start);
i++;
}
}
*coretable = ct;
return 0;
fail:
if (ct)
kfree(ct);
if (ph)
kfree(ph);
if (note)
kfree(note);
return -1;
}
/**
* \brief Free all the allocated spaces for an image of the core file.
*
* \param coretable An array of core chunks.
*/
/*@
@ requires \valid(coretable);
@ assigns \nothing;
@*/
void freecore(struct coretable **coretable)
{
struct coretable *ct = *coretable;
kfree(phys_to_virt(ct[2].addr)); /* NOTE segment */
kfree(phys_to_virt(ct[1].addr)); /* ph */
kfree(*coretable);
}
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -0,0 +1,98 @@
/**
* \file arch-bitops.h
* License details are found in the file LICENSE.
* \brief
* Find last set bit in word.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#ifndef HEADER_X86_COMMON_ARCH_BITOPS_H
#define HEADER_X86_COMMON_ARCH_BITOPS_H
#define ARCH_HAS_FAST_MULTIPLIER 1
static inline int fls(int x)
{
int r;
asm("bsrl %1,%0\n\t"
"jnz 1f\n\t"
"movl $-1,%0\n"
"1:" : "=r" (r) : "rm" (x));
return r + 1;
}
/**
* ffs - find first set bit in word
* @x: the word to search
*
* This is defined the same way as the libc and compiler builtin ffs
* routines, therefore differs in spirit from the other bitops.
*
* ffs(value) returns 0 if value is 0 or the position of the first
* set bit if value is nonzero. The first (least significant) bit
* is at position 1.
*/
static inline int ffs(int x)
{
int r;
asm("bsfl %1,%0\n\t"
"jnz 1f\n\t"
"movl $-1,%0\n"
"1:" : "=r" (r) : "rm" (x));
return r + 1;
}
/**
* __ffs - find first set bit in word
* @word: The word to search
*
* Undefined if no bit exists, so code should check against 0 first.
*/
static inline unsigned long __ffs(unsigned long word)
{
asm("bsf %1,%0"
: "=r" (word)
: "rm" (word));
return word;
}
/**
* ffz - find first zero bit in word
* @word: The word to search
*
* Undefined if no zero exists, so code should check against ~0UL first.
*/
static inline unsigned long ffz(unsigned long word)
{
asm("bsf %1,%0"
: "=r" (word)
: "r" (~word));
return word;
}
#define ADDR (*(volatile long *)addr)
static inline void set_bit(int nr, volatile unsigned long *addr)
{
asm volatile("lock; btsl %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
}
static inline void clear_bit(int nr, volatile unsigned long *addr)
{
asm volatile("lock; btrl %1,%0"
: "+m" (ADDR)
: "Ir" (nr)
: "memory");
}
#endif

View File

@ -0,0 +1,133 @@
/**
* \file futex.h
* Licence details are found in the file LICENSE.
*
* \brief
* Futex adaptation to McKernel
*
* \author Balazs Gerofi <bgerofi@riken.jp> \par
* Copyright (C) 2012 RIKEN AICS
*
*
* HISTORY:
*
*/
#ifndef _ARCH_FUTEX_H
#define _ARCH_FUTEX_H
#include <asm.h>
#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \
asm volatile("1:\t" insn "\n" \
"2:\t.section .fixup,\"ax\"\n" \
"3:\tmov\t%3, %1\n" \
"\tjmp\t2b\n" \
"\t.previous\n" \
_ASM_EXTABLE(1b, 3b) \
: "=r" (oldval), "=r" (ret), "+m" (*uaddr) \
: "i" (-EFAULT), "0" (oparg), "1" (0))
#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \
asm volatile("1:\tmovl %2, %0\n" \
"\tmovl\t%0, %3\n" \
"\t" insn "\n" \
"2:\tlock; cmpxchgl %3, %2\n" \
"\tjnz\t1b\n" \
"3:\t.section .fixup,\"ax\"\n" \
"4:\tmov\t%5, %1\n" \
"\tjmp\t3b\n" \
"\t.previous\n" \
_ASM_EXTABLE(1b, 4b) \
_ASM_EXTABLE(2b, 4b) \
: "=&a" (oldval), "=&r" (ret), \
"+m" (*uaddr), "=&r" (tem) \
: "r" (oparg), "i" (-EFAULT), "1" (0))
static inline int futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval,
int newval)
{
#ifdef __UACCESS__
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
return -EFAULT;
#endif
asm volatile("1:\tlock; cmpxchgl %3, %1\n"
"2:\t.section .fixup, \"ax\"\n"
"3:\tmov %2, %0\n"
"\tjmp 2b\n"
"\t.previous\n"
_ASM_EXTABLE(1b, 3b)
: "=a" (oldval), "+m" (*uaddr)
: "i" (-EFAULT), "r" (newval), "0" (oldval)
: "memory"
);
return oldval;
}
#ifdef POSTK_DEBUG_ARCH_DEP_8 /* arch depend hide */
static inline int futex_atomic_op_inuser(int encoded_op, int __user *uaddr)
{
int op = (encoded_op >> 28) & 7;
int cmp = (encoded_op >> 24) & 15;
int oparg = (encoded_op << 8) >> 20;
int cmparg = (encoded_op << 20) >> 20;
int oldval = 0, ret, tem;
if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28))
oparg = 1 << oparg;
#ifdef __UACCESS__
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int)))
return -EFAULT;
#endif
switch (op) {
case FUTEX_OP_SET:
__futex_atomic_op1("xchgl %0, %2", ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_ADD:
__futex_atomic_op1("lock; xaddl %0, %2", ret, oldval,
uaddr, oparg);
break;
case FUTEX_OP_OR:
__futex_atomic_op2("orl %4, %3", ret, oldval, uaddr, oparg);
break;
case FUTEX_OP_ANDN:
__futex_atomic_op2("andl %4, %3", ret, oldval, uaddr, ~oparg);
break;
case FUTEX_OP_XOR:
__futex_atomic_op2("xorl %4, %3", ret, oldval, uaddr, oparg);
break;
default:
ret = -ENOSYS;
}
if (!ret) {
switch (cmp) {
case FUTEX_OP_CMP_EQ:
ret = (oldval == cmparg);
break;
case FUTEX_OP_CMP_NE:
ret = (oldval != cmparg);
break;
case FUTEX_OP_CMP_LT:
ret = (oldval < cmparg);
break;
case FUTEX_OP_CMP_GE:
ret = (oldval >= cmparg);
break;
case FUTEX_OP_CMP_LE:
ret = (oldval <= cmparg);
break;
case FUTEX_OP_CMP_GT:
ret = (oldval > cmparg);
break;
default:
ret = -ENOSYS;
}
}
return ret;
}
#endif /* !POSTK_DEBUG_ARCH_DEP_8 */
#endif

View File

@ -0,0 +1,603 @@
/*
* Excerpted from Linux 3.0: arch/x86/include/asm/spinlock.h
*/
#ifndef __HEADER_X86_COMMON_ARCH_LOCK
#define __HEADER_X86_COMMON_ARCH_LOCK
#include <ihk/cpu.h>
#include <ihk/atomic.h>
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
#if defined(DEBUG_SPINLOCK) || defined(DEBUG_MCS_RWLOCK)
int __kprintf(const char *format, ...);
#endif
typedef int ihk_spinlock_t;
extern void preempt_enable(void);
extern void preempt_disable(void);
#define IHK_STATIC_SPINLOCK_FUNCS
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
{
*lock = 0;
}
#define SPIN_LOCK_UNLOCKED 0
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__ihk_mc_spinlock_lock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq
#endif
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
{
int inc = 0x00010000;
int tmp;
#if 0
asm volatile("lock ; xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0, %2\n\t"
"je 2f\n\t"
"rep ; nop\n\t"
"movzwl %1, %2\n\t"
"jmp 1b\n"
"2:"
: "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc");
#endif
preempt_disable();
asm volatile("lock; xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0, %2\n\t"
"je 2f\n\t"
"rep ; nop\n\t"
"movzwl %1, %2\n\t"
/* don't need lfence here, because loads are in-order */
"jmp 1b\n"
"2:"
: "+r" (inc), "+m" (*lock), "=&r" (tmp)
:
: "memory", "cc");
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock(l) ({ unsigned long rc;\
__kprintf("[%d] call ihk_mc_spinlock_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_lock(l);\
__kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\
})
#else
#define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock
#endif
static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
__ihk_mc_spinlock_lock_noirq(lock);
return flags;
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_unlock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__ihk_mc_spinlock_unlock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq
#endif
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
{
asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc");
preempt_enable();
}
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_unlock(l, f) { \
__kprintf("[%d] call ihk_mc_spinlock_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__ihk_mc_spinlock_unlock((l), (f)); \
__kprintf("[%d] ret ihk_mc_spinlock_unlock\n", ihk_mc_get_processor_id()); \
}
#else
#define ihk_mc_spinlock_unlock __ihk_mc_spinlock_unlock
#endif
static void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
{
__ihk_mc_spinlock_unlock_noirq(lock);
cpu_restore_interrupt(flags);
}
/* An implementation of the Mellor-Crummey Scott (MCS) lock */
typedef struct mcs_lock_node {
unsigned long locked;
struct mcs_lock_node *next;
unsigned long irqsave;
} __attribute__((aligned(64))) mcs_lock_node_t;
static void mcs_lock_init(struct mcs_lock_node *node)
{
node->locked = 0;
node->next = NULL;
}
static void __mcs_lock_lock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
struct mcs_lock_node *pred;
node->next = NULL;
node->locked = 0;
pred = (struct mcs_lock_node *)xchg8((unsigned long *)&lock->next,
(unsigned long)node);
if (pred) {
node->locked = 1;
pred->next = node;
while (node->locked != 0) {
cpu_pause();
}
}
}
static void __mcs_lock_unlock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
if (node->next == NULL) {
struct mcs_lock_node *old = (struct mcs_lock_node *)
atomic_cmpxchg8((unsigned long *)&lock->next,
(unsigned long)node, (unsigned long)0);
if (old == node) {
return;
}
while (node->next == NULL) {
cpu_pause();
}
}
node->next->locked = 0;
}
static void mcs_lock_lock_noirq(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
preempt_disable();
__mcs_lock_lock(lock, node);
}
static void mcs_lock_unlock_noirq(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
__mcs_lock_unlock(lock, node);
preempt_enable();
}
static void mcs_lock_lock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
node->irqsave = cpu_disable_interrupt_save();
mcs_lock_lock_noirq(lock, node);
}
static void mcs_lock_unlock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
mcs_lock_unlock_noirq(lock, node);
cpu_restore_interrupt(node->irqsave);
}
#define SPINLOCK_IN_MCS_RWLOCK
// reader/writer lock
typedef struct mcs_rwlock_node {
ihk_atomic_t count; // num of readers (use only common reader)
char type; // lock type
#define MCS_RWLOCK_TYPE_COMMON_READER 0
#define MCS_RWLOCK_TYPE_READER 1
#define MCS_RWLOCK_TYPE_WRITER 2
char locked; // lock
#define MCS_RWLOCK_LOCKED 1
#define MCS_RWLOCK_UNLOCKED 0
char dmy1; // unused
char dmy2; // unused
struct mcs_rwlock_node *next;
} __attribute__((aligned(64))) mcs_rwlock_node_t;
typedef struct mcs_rwlock_node_irqsave {
#ifndef SPINLOCK_IN_MCS_RWLOCK
struct mcs_rwlock_node node;
#endif
unsigned long irqsave;
} __attribute__((aligned(64))) mcs_rwlock_node_irqsave_t;
typedef struct mcs_rwlock_lock {
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_spinlock_t slock;
#else
struct mcs_rwlock_node reader; /* common reader lock */
struct mcs_rwlock_node *node; /* base */
#endif
} __attribute__((aligned(64))) mcs_rwlock_lock_t;
static void
mcs_rwlock_init(struct mcs_rwlock_lock *lock)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_init(&lock->slock);
#else
ihk_atomic_set(&lock->reader.count, 0);
lock->reader.type = MCS_RWLOCK_TYPE_COMMON_READER;
lock->node = NULL;
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_lock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_lock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_lock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_lock_noirq __mcs_rwlock_writer_lock_noirq
#endif
static void
__mcs_rwlock_writer_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_lock_noirq(&lock->slock);
#else
struct mcs_rwlock_node *pred;
preempt_disable();
node->type = MCS_RWLOCK_TYPE_WRITER;
node->next = NULL;
pred = (struct mcs_rwlock_node *)xchg8((unsigned long *)&lock->node,
(unsigned long)node);
if (pred) {
node->locked = MCS_RWLOCK_LOCKED;
pred->next = node;
while (node->locked != MCS_RWLOCK_UNLOCKED) {
cpu_pause();
}
}
#endif
}
#ifndef SPINLOCK_IN_MCS_RWLOCK
static void
mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock)
{
struct mcs_rwlock_node *p;
struct mcs_rwlock_node *f = NULL;
struct mcs_rwlock_node *n;
int breakf = 0;
ihk_atomic_inc(&lock->reader.count); // protect to unlock reader
for(p = &lock->reader; p->next; p = n){
n = p->next;
if(p->next->type == MCS_RWLOCK_TYPE_READER){
p->next = n->next;
if(lock->node == n){
struct mcs_rwlock_node *old;
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
(unsigned long *)&lock->node,
(unsigned long)n,
(unsigned long)p);
if(old != n){ // couldn't change
while (n->next == NULL) {
cpu_pause();
}
p->next = n->next;
}
else{
breakf = 1;
}
}
else if(p->next == NULL){
while (n->next == NULL) {
cpu_pause();
}
p->next = n->next;
}
if(f){
ihk_atomic_inc(&lock->reader.count);
n->locked = MCS_RWLOCK_UNLOCKED;
}
else
f = n;
n = p;
if(breakf)
break;
}
if(n->next == NULL && lock->node != n){
while (n->next == NULL && lock->node != n) {
cpu_pause();
}
}
}
f->locked = MCS_RWLOCK_UNLOCKED;
}
#endif
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_unlock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_unlock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_unlock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_unlock_noirq __mcs_rwlock_writer_unlock_noirq
#endif
static void
__mcs_rwlock_writer_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_unlock_noirq(&lock->slock);
#else
if (node->next == NULL) {
struct mcs_rwlock_node *old = (struct mcs_rwlock_node *)
atomic_cmpxchg8((unsigned long *)&lock->node,
(unsigned long)node, (unsigned long)0);
if (old == node) {
goto out;
}
while (node->next == NULL) {
cpu_pause();
}
}
if(node->next->type == MCS_RWLOCK_TYPE_READER){
lock->reader.next = node->next;
mcs_rwlock_unlock_readers(lock);
}
else{
node->next->locked = MCS_RWLOCK_UNLOCKED;
}
out:
preempt_enable();
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_lock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_lock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_lock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_lock_noirq __mcs_rwlock_reader_lock_noirq
#endif
static inline unsigned int
atomic_inc_ifnot0(ihk_atomic_t *v)
{
unsigned int *p = (unsigned int *)(&(v)->counter);
unsigned int old;
unsigned int new;
unsigned int val;
do{
if(!(old = *p))
break;
new = old + 1;
val = atomic_cmpxchg4(p, old, new);
}while(val != old);
return old;
}
static void
__mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_lock_noirq(&lock->slock);
#else
struct mcs_rwlock_node *pred;
preempt_disable();
node->type = MCS_RWLOCK_TYPE_READER;
node->next = NULL;
node->dmy1 = ihk_mc_get_processor_id();
pred = (struct mcs_rwlock_node *)xchg8((unsigned long *)&lock->node,
(unsigned long)node);
if (pred) {
if(pred == &lock->reader){
if(atomic_inc_ifnot0(&pred->count)){
struct mcs_rwlock_node *old;
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
(unsigned long *)&lock->node,
(unsigned long)node,
(unsigned long)pred);
if (old == node) {
goto out;
}
while (node->next == NULL) {
cpu_pause();
}
node->locked = MCS_RWLOCK_LOCKED;
lock->reader.next = node;
mcs_rwlock_unlock_readers(lock);
ihk_atomic_dec(&pred->count);
goto out;
}
}
node->locked = MCS_RWLOCK_LOCKED;
pred->next = node;
while (node->locked != MCS_RWLOCK_UNLOCKED) {
cpu_pause();
}
}
else {
lock->reader.next = node;
mcs_rwlock_unlock_readers(lock);
}
out:
return;
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_unlock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_unlock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_unlock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_unlock_noirq __mcs_rwlock_reader_unlock_noirq
#endif
static void
__mcs_rwlock_reader_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_unlock_noirq(&lock->slock);
#else
if(ihk_atomic_dec_return(&lock->reader.count))
goto out;
if (lock->reader.next == NULL) {
struct mcs_rwlock_node *old;
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
(unsigned long *)&lock->node,
(unsigned long)&lock->reader,
(unsigned long)0);
if (old == &lock->reader) {
goto out;
}
while (lock->reader.next == NULL) {
cpu_pause();
}
}
if(lock->reader.next->type == MCS_RWLOCK_TYPE_READER){
mcs_rwlock_unlock_readers(lock);
}
else{
lock->reader.next->locked = MCS_RWLOCK_UNLOCKED;
}
out:
preempt_enable();
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_lock(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_lock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_lock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_lock __mcs_rwlock_writer_lock
#endif
static void
__mcs_rwlock_writer_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
node->irqsave = ihk_mc_spinlock_lock(&lock->slock);
#else
node->irqsave = cpu_disable_interrupt_save();
__mcs_rwlock_writer_lock_noirq(lock, &node->node);
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_unlock(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_unlock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_unlock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_unlock __mcs_rwlock_writer_unlock
#endif
static void
__mcs_rwlock_writer_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_unlock(&lock->slock, node->irqsave);
#else
__mcs_rwlock_writer_unlock_noirq(lock, &node->node);
cpu_restore_interrupt(node->irqsave);
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_lock(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_lock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_lock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_lock __mcs_rwlock_reader_lock
#endif
static void
__mcs_rwlock_reader_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
node->irqsave = ihk_mc_spinlock_lock(&lock->slock);
#else
node->irqsave = cpu_disable_interrupt_save();
__mcs_rwlock_reader_lock_noirq(lock, &node->node);
#endif
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_unlock(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_unlock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_unlock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_unlock __mcs_rwlock_reader_unlock
#endif
static void
__mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
#ifdef SPINLOCK_IN_MCS_RWLOCK
ihk_mc_spinlock_unlock(&lock->slock, node->irqsave);
#else
__mcs_rwlock_reader_unlock_noirq(lock, &node->node);
cpu_restore_interrupt(node->irqsave);
#endif
}
#endif

View File

@ -0,0 +1,334 @@
/**
* \file arch-memomry.h
* License details are found in the file LICENSE.
* \brief
* Define and declare memory management macros and functions
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
*/
#ifndef __HEADER_X86_COMMON_ARCH_MEMORY_H
#define __HEADER_X86_COMMON_ARCH_MEMORY_H
#include <ihk/types.h>
#define KERNEL_CS_ENTRY 4
#define KERNEL_DS_ENTRY 5
#define USER_CS_ENTRY 6
#define USER_DS_ENTRY 7
#define GLOBAL_TSS_ENTRY 8
#define GETCPU_ENTRY 15
#define KERNEL_CS (KERNEL_CS_ENTRY * 8)
#define KERNEL_DS (KERNEL_DS_ENTRY * 8)
#define USER_CS (USER_CS_ENTRY * 8 + 3)
#define USER_DS (USER_DS_ENTRY * 8 + 3)
#define GLOBAL_TSS (GLOBAL_TSS_ENTRY * 8)
#define PAGE_SHIFT 12
#define PAGE_SIZE (1UL << PAGE_SHIFT)
#define PAGE_MASK (~((unsigned long)PAGE_SIZE - 1))
#define PAGE_P2ALIGN 0
#define LARGE_PAGE_SHIFT 21
#define LARGE_PAGE_SIZE (1UL << LARGE_PAGE_SHIFT)
#define LARGE_PAGE_MASK (~((unsigned long)LARGE_PAGE_SIZE - 1))
#define LARGE_PAGE_P2ALIGN (LARGE_PAGE_SHIFT - PAGE_SHIFT)
#define USER_END 0x0000800000000000UL
#define TASK_UNMAPPED_BASE 0x00002AAAAAA00000UL
#define MAP_ST_START 0xffff800000000000UL
#define MAP_VMAP_START 0xfffff00000000000UL
#define MAP_FIXED_START 0xffffffff70000000UL
#define MAP_KERNEL_START 0xffffffff80000000UL
#define STACK_TOP(region) ((region)->user_end)
#define MAP_VMAP_SIZE 0x0000000100000000UL
#define KERNEL_PHYS_OFFSET MAP_ST_START
#define PTL4_SHIFT 39
#define PTL4_SIZE (1UL << PTL4_SHIFT)
#define PTL3_SHIFT 30
#define PTL3_SIZE (1UL << PTL3_SHIFT)
#define PTL2_SHIFT 21
#define PTL2_SIZE (1UL << PTL2_SHIFT)
#define PTL1_SHIFT 12
#define PTL1_SIZE (1UL << PTL1_SHIFT)
#define PT_ENTRIES 512
/* mask of the physical address of the entry to the page table */
#define PT_PHYSMASK (((1UL << 52) - 1) & PAGE_MASK)
#define PF_PRESENT ((pte_t)0x01) /* entry is valid */
#define PF_WRITABLE ((pte_t)0x02)
#define PFLX_PWT ((pte_t)0x08)
#define PFLX_PCD ((pte_t)0x10)
#define PF_SIZE ((pte_t)0x80) /* entry points large page */
#define PFL4_PRESENT ((pte_t)0x01)
#define PFL4_WRITABLE ((pte_t)0x02)
#define PFL4_USER ((pte_t)0x04)
#define PFL3_PRESENT ((pte_t)0x01)
#define PFL3_WRITABLE ((pte_t)0x02)
#define PFL3_USER ((pte_t)0x04)
#define PFL3_PWT PFLX_PWT
#define PFL3_PCD PFLX_PCD
#define PFL3_ACCESSED ((pte_t)0x20)
#define PFL3_DIRTY ((pte_t)0x40)
#define PFL3_SIZE ((pte_t)0x80) /* Used in 1G page */
#define PFL3_GLOBAL ((pte_t)0x100)
#define PFL3_IGNORED_11 ((pte_t)1 << 11)
#define PFL3_FILEOFF PFL3_IGNORED_11
#define PFL2_PRESENT ((pte_t)0x01)
#define PFL2_WRITABLE ((pte_t)0x02)
#define PFL2_USER ((pte_t)0x04)
#define PFL2_PWT PFLX_PWT
#define PFL2_PCD PFLX_PCD
#define PFL2_ACCESSED ((pte_t)0x20)
#define PFL2_DIRTY ((pte_t)0x40)
#define PFL2_SIZE ((pte_t)0x80) /* Used in 2M page */
#define PFL2_GLOBAL ((pte_t)0x100)
#define PFL2_IGNORED_11 ((pte_t)1 << 11)
#define PFL2_FILEOFF PFL2_IGNORED_11
#define PFL1_PRESENT ((pte_t)0x01)
#define PFL1_WRITABLE ((pte_t)0x02)
#define PFL1_USER ((pte_t)0x04)
#define PFL1_PWT PFLX_PWT
#define PFL1_PCD PFLX_PCD
#define PFL1_ACCESSED ((pte_t)0x20)
#define PFL1_DIRTY ((pte_t)0x40)
#define PFL1_IGNORED_11 ((pte_t)1 << 11)
#define PFL1_FILEOFF PFL1_IGNORED_11
/* We allow user programs to access all the memory */
#define PFL4_KERN_ATTR (PFL4_PRESENT | PFL4_WRITABLE)
#define PFL3_KERN_ATTR (PFL3_PRESENT | PFL3_WRITABLE)
#define PFL2_KERN_ATTR (PFL2_PRESENT | PFL2_WRITABLE)
#define PFL1_KERN_ATTR (PFL1_PRESENT | PFL1_WRITABLE)
/* for the page table entry that points another page table */
#define PFL4_PDIR_ATTR (PFL4_PRESENT | PFL4_WRITABLE | PFL4_USER)
#define PFL3_PDIR_ATTR (PFL3_PRESENT | PFL3_WRITABLE | PFL3_USER)
#define PFL2_PDIR_ATTR (PFL2_PRESENT | PFL2_WRITABLE | PFL2_USER)
#define PTE_NULL ((pte_t)0)
typedef unsigned long pte_t;
/*
* pagemap kernel ABI bits
*/
#define PM_ENTRY_BYTES sizeof(uint64_t)
#define PM_STATUS_BITS 3
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
#define PM_PSHIFT_BITS 6
#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
#define PM_PSHIFT(x) (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
#define PM_PRESENT PM_STATUS(4LL)
#define PM_SWAP PM_STATUS(2LL)
/* For easy conversion, it is better to be the same as architecture's ones */
enum ihk_mc_pt_attribute {
PTATTR_ACTIVE = 0x01,
PTATTR_WRITABLE = 0x02,
PTATTR_USER = 0x04,
PTATTR_DIRTY = 0x40,
PTATTR_LARGEPAGE = 0x80,
PTATTR_FILEOFF = PFL2_FILEOFF,
PTATTR_NO_EXECUTE = 0x8000000000000000,
PTATTR_UNCACHABLE = 0x10000,
PTATTR_FOR_USER = 0x20000,
PTATTR_WRITE_COMBINED = 0x40000,
};
enum ihk_mc_pt_attribute attr_mask;
#ifdef POSTK_DEBUG_ARCH_DEP_12
static inline int pfn_is_write_combined(uintptr_t pfn)
{
return ((pfn & PFL1_PWT) && !(pfn & PFL1_PCD));
}
#endif /* #ifdef POSTK_DEBUG_ARCH_DEP_12 */
static inline int pte_is_null(pte_t *ptep)
{
return (*ptep == PTE_NULL);
}
static inline int pte_is_present(pte_t *ptep)
{
return !!(*ptep & PF_PRESENT);
}
static inline int pte_is_writable(pte_t *ptep)
{
return !!(*ptep & PF_WRITABLE);
}
static inline int pte_is_dirty(pte_t *ptep, size_t pgsize)
{
switch (pgsize) {
case PTL1_SIZE: return !!(*ptep & PFL1_DIRTY);
case PTL2_SIZE: return !!(*ptep & PFL2_DIRTY);
case PTL3_SIZE: return !!(*ptep & PFL3_DIRTY);
default:
#if 0 /* XXX: workaround. cannot use panic() here */
panic("pte_is_dirty");
#else
return !!(*ptep & PTATTR_DIRTY);
#endif
}
}
static inline int pte_is_fileoff(pte_t *ptep, size_t pgsize)
{
switch (pgsize) {
case PTL1_SIZE: return !!(*ptep & PFL1_FILEOFF);
case PTL2_SIZE: return !!(*ptep & PFL2_FILEOFF);
case PTL3_SIZE: return !!(*ptep & PFL3_FILEOFF);
default:
#if 0 /* XXX: workaround. cannot use panic() here */
panic("pte_is_fileoff");
#else
return !!(*ptep & PTATTR_FILEOFF);
#endif
}
}
static inline void pte_update_phys(pte_t *ptep, unsigned long phys)
{
*ptep = (*ptep & ~PT_PHYSMASK) | (phys & PT_PHYSMASK);
}
static inline uintptr_t pte_get_phys(pte_t *ptep)
{
return (*ptep & PT_PHYSMASK);
}
static inline off_t pte_get_off(pte_t *ptep, size_t pgsize)
{
return (off_t)(*ptep & PAGE_MASK);
}
static inline enum ihk_mc_pt_attribute pte_get_attr(pte_t *ptep, size_t pgsize)
{
enum ihk_mc_pt_attribute attr;
attr = *ptep & attr_mask;
if (*ptep & PFLX_PWT) {
if (*ptep & PFLX_PCD) {
attr |= PTATTR_UNCACHABLE;
}
else {
attr |= PTATTR_WRITE_COMBINED;
}
}
if (((pgsize == PTL2_SIZE) && (*ptep & PFL2_SIZE))
|| ((pgsize == PTL3_SIZE) && (*ptep & PFL3_SIZE))) {
attr |= PTATTR_LARGEPAGE;
}
return attr;
} /* pte_get_attr() */
static inline void pte_make_null(pte_t *ptep, size_t pgsize)
{
*ptep = PTE_NULL;
return;
}
static inline void pte_make_fileoff(off_t off,
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
{
uint64_t attr;
attr = ptattr & ~PAGE_MASK;
switch (pgsize) {
case PTL1_SIZE: attr |= PFL1_FILEOFF; break;
case PTL2_SIZE: attr |= PFL2_FILEOFF | PFL2_SIZE; break;
case PTL3_SIZE: attr |= PFL3_FILEOFF | PFL3_SIZE; break;
default:
#if 0 /* XXX: workaround. cannot use panic() here */
panic("pte_make_fileoff");
#else
attr |= PTATTR_FILEOFF;
#endif
break;
}
*ptep = (off & PAGE_MASK) | attr;
}
#if 0 /* XXX: workaround. cannot use panic() here */
static inline void pte_xchg(pte_t *ptep, pte_t *valp)
{
*valp = xchg(ptep, *valp);
}
#else
#define pte_xchg(p,vp) do { *(vp) = xchg((p), *(vp)); } while (0)
#endif
static inline void pte_clear_dirty(pte_t *ptep, size_t pgsize)
{
uint64_t mask;
switch (pgsize) {
default: /* through */
case PTL1_SIZE: mask = ~PFL1_DIRTY; break;
case PTL2_SIZE: mask = ~PFL2_DIRTY; break;
case PTL3_SIZE: mask = ~PFL3_DIRTY; break;
}
asm volatile ("lock andq %0,%1" :: "r"(mask), "m"(*ptep));
return;
}
static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
{
uint64_t mask;
switch (pgsize) {
default: /* through */
case PTL1_SIZE: mask = PFL1_DIRTY; break;
case PTL2_SIZE: mask = PFL2_DIRTY; break;
case PTL3_SIZE: mask = PFL3_DIRTY; break;
}
asm volatile ("lock orq %0,%1" :: "r"(mask), "m"(*ptep));
return;
}
struct page_table;
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
void *early_alloc_pages(int nr_pages);
void *get_last_early_heap(void);
void flush_tlb(void);
void flush_tlb_single(unsigned long addr);
void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable);
extern unsigned long ap_trampoline;
//#define AP_TRAMPOLINE 0x10000
#define AP_TRAMPOLINE_SIZE 0x2000
/* Local is cachable */
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE)
#endif

View File

@ -0,0 +1,42 @@
#ifndef _ASM_X86_STRING_H
#define _ASM_X86_STRING_H
#define ARCH_FAST_MEMCPY
static inline void *__inline_memcpy(void *to, const void *from, size_t n)
{
unsigned long d0, d1, d2;
asm volatile("rep ; movsl\n\t"
"testb $2,%b4\n\t"
"je 1f\n\t"
"movsw\n"
"1:\ttestb $1,%b4\n\t"
"je 2f\n\t"
"movsb\n"
"2:"
: "=&c" (d0), "=&D" (d1), "=&S" (d2)
: "0" (n / 4), "q" (n), "1" ((long)to), "2" ((long)from)
: "memory");
return to;
}
#define ARCH_FAST_MEMSET
static inline void *__inline_memset(void *s, unsigned long c, size_t count)
{
int d0, d1;
asm volatile("rep ; stosl\n\t"
"testb $2,%b3\n\t"
"je 1f\n\t"
"stosw\n"
"1:\ttestb $1,%b3\n\t"
"je 2f\n\t"
"stosb\n"
"2:"
: "=&c" (d0), "=&D" (d1)
: "a" (c), "q" (count), "0" (count/4), "1" ((long)s)
: "memory");
return s;
}
#endif

View File

@ -0,0 +1,18 @@
/**
* \file auxvec.h
* License details are found in the file LICENSE.
* \brief
* Declare architecture-dependent constants for auxiliary vector
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* Copyright (C) 2016 RIKEN AICS
*/
/*
* HISTORY
*/
#ifndef ARCH_AUXVEC_H
#define ARCH_AUXVEC_H
#define AT_SYSINFO_EHDR 33
#endif

View File

@ -0,0 +1,37 @@
/**
* \file cpu.h
* License details are found in the file LICENSE.
* \brief
* Declare architecture-dependent types and functions to control CPU.
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
*/
#ifndef ARCH_CPU_H
#define ARCH_CPU_H
#define arch_barrier() asm volatile("" : : : "memory")
static inline void rmb(void)
{
arch_barrier();
}
static inline void wmb(void)
{
arch_barrier();
}
static unsigned long read_tsc(void)
{
unsigned int low, high;
asm volatile("rdtsc" : "=a"(low), "=d"(high));
return (low | ((unsigned long)high << 32));
}
#endif /* ARCH_CPU_H */

View File

@ -0,0 +1,16 @@
#ifndef __ARCH_MM_H
#define __ARCH_MM_H
struct process_vm;
static inline void
flush_nfo_tlb()
{
}
static inline void
flush_nfo_tlb_mm(struct process_vm *vm)
{
}
#endif

View File

@ -0,0 +1,40 @@
/**
* \file mman.h
* License details are found in the file LICENSE.
* \brief
* memory management declarations
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY:
*/
#ifndef HEADER_ARCH_MMAN_H
#define HEADER_ARCH_MMAN_H
/*
* mapping flags
*/
#define MAP_32BIT 0x40
#define MAP_GROWSDOWN 0x0100
#define MAP_DENYWRITE 0x0800
#define MAP_EXECUTABLE 0x1000
#define MAP_LOCKED 0x2000
#define MAP_NORESERVE 0x4000
#define MAP_POPULATE 0x8000
#define MAP_NONBLOCK 0x00010000
#define MAP_STACK 0x00020000
#define MAP_HUGETLB 0x00040000
#define MAP_HUGE_SHIFT 26
#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
/*
* for mlockall()
*/
#define MCL_CURRENT 0x01
#define MCL_FUTURE 0x02
#endif /* HEADER_ARCH_MMAN_H */

View File

@ -0,0 +1,46 @@
/**
* \file shm.h
* License details are found in the file LICENSE.
* \brief
* header file for System V shared memory
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY:
*/
#ifndef HEADER_ARCH_SHM_H
#define HEADER_ARCH_SHM_H
/* shmflg */
#define SHM_HUGE_SHIFT 26
#define SHM_HUGE_2MB (21 << SHM_HUGE_SHIFT)
#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
struct ipc_perm {
key_t key;
uid_t uid;
gid_t gid;
uid_t cuid;
gid_t cgid;
uint16_t mode;
uint8_t padding[2];
uint16_t seq;
uint8_t padding2[22];
};
struct shmid_ds {
struct ipc_perm shm_perm;
size_t shm_segsz;
time_t shm_atime;
time_t shm_dtime;
time_t shm_ctime;
pid_t shm_cpid;
pid_t shm_lpid;
uint64_t shm_nattch;
uint8_t padding[12];
int init_pgshift;
};
#endif /* HEADER_ARCH_SHM_H */

View File

@ -0,0 +1,32 @@
#ifndef ARCH_RUSAGE_H_INCLUDED
#define ARCH_RUSAGE_H_INCLUDED
#define DEBUG_RUSAGE
#define IHK_OS_PGSIZE_4KB 0
#define IHK_OS_PGSIZE_2MB 1
#define IHK_OS_PGSIZE_1GB 2
extern struct rusage_global *rusage;
static inline int rusage_pgsize_to_pgtype(size_t pgsize)
{
int ret = IHK_OS_PGSIZE_4KB;
switch (pgsize) {
case PTL1_SIZE:
ret = IHK_OS_PGSIZE_4KB;
break;
case PTL2_SIZE:
ret = IHK_OS_PGSIZE_2MB;
break;
case PTL3_SIZE:
ret = IHK_OS_PGSIZE_1GB;
break;
default:
kprintf("%s: Error: Unknown pgsize=%ld\n", __FUNCTION__, pgsize);
break;
}
return ret;
}
#endif /* !defined(ARCH_RUSAGE_H_INCLUDED) */

View File

@ -0,0 +1,46 @@
/**
* \file arch/x86/kernel/include/cas.h
* License details are found in the file LICENSE.
* \brief
* compare and swap
* \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
*/
/*
* HISTORY:
*/
#ifndef __HEADER_X86_COMMON_CAS_H
#define __HEADER_X86_COMMON_CAS_H
// return 0:fail, 1:success
static inline int
compare_and_swap(void *addr, unsigned long olddata, unsigned long newdata)
{
unsigned long before;
asm volatile (
"lock; cmpxchgq %2,%1"
: "=a" (before), "+m" (*(unsigned long *)addr)
: "q" (newdata), "0" (olddata)
: "cc");
return before == olddata;
}
#if 0 // cmpxchg16b was not support on k1om
// return 0:fail, 1:success
static inline int
compare_and_swap16(void *addr, void *olddata, void *newdata)
{
char rc;
asm volatile (
"lock; cmpxchg16b %0; setz %1"
: "=m" (*(long *)addr), "=q" (rc)
: "m" (*(long *)addr),
"d" (((long *)olddata)[0]), "a" (((long *)olddata)[1]),
"c" (((long *)newdata)[0]), "b" (((long *)newdata)[1])
: "memory");
return rc;
}
#endif
#endif /*__HEADER_X86_COMMON_CAS_H*/

View File

@ -0,0 +1,56 @@
/**
* \file cpulocal.h
* License details are found in the file LICENSE.
* \brief
* Declare information for individual CPUs.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#ifndef HEADER_X86_COMMON_CPULOCAL_H
#define HEADER_X86_COMMON_CPULOCAL_H
#include <types.h>
#include <registers.h>
/*
* CPU Local Page
* 0 - : struct x86_cpu_local_varibles
* - 4096 : kernel stack
*/
#define X86_CPU_LOCAL_OFFSET_TSS 176
#define X86_CPU_LOCAL_OFFSET_KSTACK 16
#define X86_CPU_LOCAL_OFFSET_USTACK 24
struct x86_cpu_local_variables {
/* 0 */
unsigned long processor_id;
unsigned long apic_id;
/* 16 */
unsigned long kernel_stack;
unsigned long user_stack;
/* 32 */
struct x86_desc_ptr gdt_ptr;
unsigned short pad[3];
/* 48 */
uint64_t gdt[16];
/* 176 */
struct tss64 tss;
/* 280 */
unsigned long paniced;
uint64_t panic_regs[21];
/* 456 */
} __attribute__((packed));
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id);
struct x86_cpu_local_variables *get_x86_this_cpu_local(void);
void *get_x86_this_cpu_kstack(void);
#endif

View File

@ -0,0 +1,59 @@
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
#ifndef __HEADER_X86_COMMON_ELF_H
#define __HEADER_X86_COMMON_ELF_H
/* NOTE segment type defined */
#define NT_X86_STATE 0x202
/* ELF target machines defined */
#define EM_K10M 181 /* Intel K10M */
#define EM_X86_64 62 /* AMD x86-64 architecture */
/* ELF header defined */
#define ELF_CLASS ELFCLASS64
#define ELF_DATA ELFDATA2LSB
#define ELF_OSABI ELFOSABI_NONE
#define ELF_ABIVERSION El_ABIVERSION_NONE
#ifdef CONFIG_MIC
#define ELF_ARCH EM_K10M
#else /* CONFIG_MIC */
#define ELF_ARCH EM_X86_64
#endif /* CONFIG_MIC */
struct user_regs64_struct
{
a8_uint64_t r15;
a8_uint64_t r14;
a8_uint64_t r13;
a8_uint64_t r12;
a8_uint64_t rbp;
a8_uint64_t rbx;
a8_uint64_t r11;
a8_uint64_t r10;
a8_uint64_t r9;
a8_uint64_t r8;
a8_uint64_t rax;
a8_uint64_t rcx;
a8_uint64_t rdx;
a8_uint64_t rsi;
a8_uint64_t rdi;
a8_uint64_t orig_rax;
a8_uint64_t rip;
a8_uint64_t cs;
a8_uint64_t eflags;
a8_uint64_t rsp;
a8_uint64_t ss;
a8_uint64_t fs_base;
a8_uint64_t gs_base;
a8_uint64_t ds;
a8_uint64_t es;
a8_uint64_t fs;
a8_uint64_t gs;
};
#define ELF_NGREG64 (sizeof (struct user_regs64_struct) / sizeof(elf_greg64_t))
typedef elf_greg64_t elf_gregset64_t[ELF_NGREG64];
#endif /* __HEADER_S64FX_COMMON_ELF_H */
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -0,0 +1,94 @@
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
/*
* Structures and definitions for ELF core file.
* Extracted from
* System V Application Binary Interface - DRAFT - 10 June 2013,
* http://www.sco.com/developers/gabi/latest/contents.html
*/
typedef uint16_t Elf64_Half;
typedef uint32_t Elf64_Word;
typedef uint64_t Elf64_Xword;
typedef uint64_t Elf64_Addr;
typedef uint64_t Elf64_Off;
#define EI_NIDENT 16
typedef struct {
unsigned char e_ident[EI_NIDENT];
Elf64_Half e_type;
Elf64_Half e_machine;
Elf64_Word e_version;
Elf64_Addr e_entry;
Elf64_Off e_phoff;
Elf64_Off e_shoff;
Elf64_Word e_flags;
Elf64_Half e_ehsize;
Elf64_Half e_phentsize;
Elf64_Half e_phnum;
Elf64_Half e_shentsize;
Elf64_Half e_shnum;
Elf64_Half e_shstrndx;
} Elf64_Ehdr;
#define EI_MAG0 0
#define EI_MAG1 1
#define EI_MAG2 2
#define EI_MAG3 3
#define EI_CLASS 4
#define EI_DATA 5
#define EI_VERSION 6
#define EI_OSABI 7
#define EI_ABIVERSION 8
#define EI_PAD 9
#define ELFMAG0 0x7f
#define ELFMAG1 'E'
#define ELFMAG2 'L'
#define ELFMAG3 'F'
#define ELFCLASS64 2 /* 64-bit object */
#define ELFDATA2LSB 1 /* LSB */
#define El_VERSION 1 /* defined to be the same as EV CURRENT */
#define ELFOSABI_NONE 0 /* unspecied */
#define El_ABIVERSION_NONE 0 /* unspecied */
#define ET_CORE 4 /* Core file */
#define EM_X86_64 62 /* AMD x86-64 architecture */
#define EM_K10M 181 /* Intel K10M */
#define EV_CURRENT 1 /* Current version */
typedef struct {
Elf64_Word p_type;
Elf64_Word p_flags;
Elf64_Off p_offset;
Elf64_Addr p_vaddr;
Elf64_Addr p_paddr;
Elf64_Xword p_filesz;
Elf64_Xword p_memsz;
Elf64_Xword p_align;
} Elf64_Phdr;
#define PT_LOAD 1
#define PT_NOTE 4
#define PF_X 1 /* executable bit */
#define PF_W 2 /* writable bit */
#define PF_R 4 /* readable bit */
struct note {
Elf64_Word namesz;
Elf64_Word descsz;
Elf64_Word type;
/* name char[namesz] and desc[descsz] */
};
#define NT_PRSTATUS 1
#define NT_PRFRPREG 2
#define NT_PRPSINFO 3
#define NT_AUXV 6
#define NT_X86_STATE 0x202
#include "elfcoregpl.h"
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -0,0 +1,96 @@
#ifndef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
/*
* Structures and defines from GPLed file.
*/
#define pid_t int
/* From /usr/include/linux/elfcore.h of Linux */
#define ELF_PRARGSZ (80)
/* From /usr/include/linux/elfcore.h fro Linux */
struct elf_siginfo
{
int si_signo;
int si_code;
int si_errno;
};
/* From bfd/hosts/x86-64linux.h of gdb. */
typedef uint64_t __attribute__ ((__aligned__ (8))) a8_uint64_t;
typedef a8_uint64_t elf_greg64_t;
struct user_regs64_struct
{
a8_uint64_t r15;
a8_uint64_t r14;
a8_uint64_t r13;
a8_uint64_t r12;
a8_uint64_t rbp;
a8_uint64_t rbx;
a8_uint64_t r11;
a8_uint64_t r10;
a8_uint64_t r9;
a8_uint64_t r8;
a8_uint64_t rax;
a8_uint64_t rcx;
a8_uint64_t rdx;
a8_uint64_t rsi;
a8_uint64_t rdi;
a8_uint64_t orig_rax;
a8_uint64_t rip;
a8_uint64_t cs;
a8_uint64_t eflags;
a8_uint64_t rsp;
a8_uint64_t ss;
a8_uint64_t fs_base;
a8_uint64_t gs_base;
a8_uint64_t ds;
a8_uint64_t es;
a8_uint64_t fs;
a8_uint64_t gs;
};
#define ELF_NGREG64 (sizeof (struct user_regs64_struct) / sizeof(elf_greg64_t))
typedef elf_greg64_t elf_gregset64_t[ELF_NGREG64];
struct prstatus64_timeval
{
a8_uint64_t tv_sec;
a8_uint64_t tv_usec;
};
struct elf_prstatus64
{
struct elf_siginfo pr_info;
short int pr_cursig;
a8_uint64_t pr_sigpend;
a8_uint64_t pr_sighold;
pid_t pr_pid;
pid_t pr_ppid;
pid_t pr_pgrp;
pid_t pr_sid;
struct prstatus64_timeval pr_utime;
struct prstatus64_timeval pr_stime;
struct prstatus64_timeval pr_cutime;
struct prstatus64_timeval pr_cstime;
elf_gregset64_t pr_reg;
int pr_fpvalid;
};
struct elf_prpsinfo64
{
char pr_state;
char pr_sname;
char pr_zomb;
char pr_nice;
a8_uint64_t pr_flag;
unsigned int pr_uid;
unsigned int pr_gid;
int pr_pid, pr_ppid, pr_pgrp, pr_sid;
char pr_fname[16];
char pr_psargs[ELF_PRARGSZ];
};
#endif /* !POSTK_DEBUG_ARCH_DEP_18 */

View File

@ -0,0 +1,111 @@
#ifndef _ERRNO_BASE_H
#define _ERRNO_BASE_H
#include <generic-errno.h>
#define EDEADLK 35 /* Resource deadlock would occur */
#define ENAMETOOLONG 36 /* File name too long */
#define ENOLCK 37 /* No record locks available */
#define ENOSYS 38 /* Function not implemented */
#define ENOTEMPTY 39 /* Directory not empty */
#define ELOOP 40 /* Too many symbolic links encountered */
#define EWOULDBLOCK EAGAIN /* Operation would block */
#define ENOMSG 42 /* No message of desired type */
#define EIDRM 43 /* Identifier removed */
#define ECHRNG 44 /* Channel number out of range */
#define EL2NSYNC 45 /* Level 2 not synchronized */
#define EL3HLT 46 /* Level 3 halted */
#define EL3RST 47 /* Level 3 reset */
#define ELNRNG 48 /* Link number out of range */
#define EUNATCH 49 /* Protocol driver not attached */
#define ENOCSI 50 /* No CSI structure available */
#define EL2HLT 51 /* Level 2 halted */
#define EBADE 52 /* Invalid exchange */
#define EBADR 53 /* Invalid request descriptor */
#define EXFULL 54 /* Exchange full */
#define ENOANO 55 /* No anode */
#define EBADRQC 56 /* Invalid request code */
#define EBADSLT 57 /* Invalid slot */
#define EDEADLOCK EDEADLK
#define EBFONT 59 /* Bad font file format */
#define ENOSTR 60 /* Device not a stream */
#define ENODATA 61 /* No data available */
#define ETIME 62 /* Timer expired */
#define ENOSR 63 /* Out of streams resources */
#define ENONET 64 /* Machine is not on the network */
#define ENOPKG 65 /* Package not installed */
#define EREMOTE 66 /* Object is remote */
#define ENOLINK 67 /* Link has been severed */
#define EADV 68 /* Advertise error */
#define ESRMNT 69 /* Srmount error */
#define ECOMM 70 /* Communication error on send */
#define EPROTO 71 /* Protocol error */
#define EMULTIHOP 72 /* Multihop attempted */
#define EDOTDOT 73 /* RFS specific error */
#define EBADMSG 74 /* Not a data message */
#define EOVERFLOW 75 /* Value too large for defined data type */
#define ENOTUNIQ 76 /* Name not unique on network */
#define EBADFD 77 /* File descriptor in bad state */
#define EREMCHG 78 /* Remote address changed */
#define ELIBACC 79 /* Can not access a needed shared library */
#define ELIBBAD 80 /* Accessing a corrupted shared library */
#define ELIBSCN 81 /* .lib section in a.out corrupted */
#define ELIBMAX 82 /* Attempting to link in too many shared libraries */
#define ELIBEXEC 83 /* Cannot exec a shared library directly */
#define EILSEQ 84 /* Illegal byte sequence */
#define ERESTART 85 /* Interrupted system call should be restarted */
#define ESTRPIPE 86 /* Streams pipe error */
#define EUSERS 87 /* Too many users */
#define ENOTSOCK 88 /* Socket operation on non-socket */
#define EDESTADDRREQ 89 /* Destination address required */
#define EMSGSIZE 90 /* Message too long */
#define EPROTOTYPE 91 /* Protocol wrong type for socket */
#define ENOPROTOOPT 92 /* Protocol not available */
#define EPROTONOSUPPORT 93 /* Protocol not supported */
#define ESOCKTNOSUPPORT 94 /* Socket type not supported */
#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */
#define EPFNOSUPPORT 96 /* Protocol family not supported */
#define EAFNOSUPPORT 97 /* Address family not supported by protocol */
#define EADDRINUSE 98 /* Address already in use */
#define EADDRNOTAVAIL 99 /* Cannot assign requested address */
#define ENETDOWN 100 /* Network is down */
#define ENETUNREACH 101 /* Network is unreachable */
#define ENETRESET 102 /* Network dropped connection because of reset */
#define ECONNABORTED 103 /* Software caused connection abort */
#define ECONNRESET 104 /* Connection reset by peer */
#define ENOBUFS 105 /* No buffer space available */
#define EISCONN 106 /* Transport endpoint is already connected */
#define ENOTCONN 107 /* Transport endpoint is not connected */
#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */
#define ETOOMANYREFS 109 /* Too many references: cannot splice */
#define ETIMEDOUT 110 /* Connection timed out */
#define ECONNREFUSED 111 /* Connection refused */
#define EHOSTDOWN 112 /* Host is down */
#define EHOSTUNREACH 113 /* No route to host */
#define EALREADY 114 /* Operation already in progress */
#define EINPROGRESS 115 /* Operation now in progress */
#define ESTALE 116 /* Stale NFS file handle */
#define EUCLEAN 117 /* Structure needs cleaning */
#define ENOTNAM 118 /* Not a XENIX named type file */
#define ENAVAIL 119 /* No XENIX semaphores available */
#define EISNAM 120 /* Is a named type file */
#define EREMOTEIO 121 /* Remote I/O error */
#define EDQUOT 122 /* Quota exceeded */
#define ENOMEDIUM 123 /* No medium found */
#define EMEDIUMTYPE 124 /* Wrong medium type */
#define ECANCELED 125 /* Operation Canceled */
#define ENOKEY 126 /* Required key not available */
#define EKEYEXPIRED 127 /* Key has expired */
#define EKEYREVOKED 128 /* Key has been revoked */
#define EKEYREJECTED 129 /* Key was rejected by service */
/* for robust mutexes */
#define EOWNERDEAD 130 /* Owner died */
#define ENOTRECOVERABLE 131 /* State not recoverable */
#define ERFKILL 132 /* Operation not possible due to RF-kill */
#endif

View File

@ -0,0 +1,12 @@
/* hwcap.h COPYRIGHT FUJITSU LIMITED 2017 */
#ifdef POSTK_DEBUG_ARCH_DEP_65
#ifndef _UAPI__ASM_HWCAP_H
#define _UAPI__ASM_HWCAP_H
static unsigned long arch_get_hwcap(void)
{
return 0;
}
#endif /* _UAPI__ASM_HWCAP_H */
#endif /* POSTK_DEBUG_ARCH_DEP_65 */

View File

@ -0,0 +1,244 @@
/**
* \file atomic.h
* License details are found in the file LICENSE.
* \brief
* Atomic memory operations.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#ifndef HEADER_X86_COMMON_IHK_ATOMIC_H
#define HEADER_X86_COMMON_IHK_ATOMIC_H
/***********************************************************************
* ihk_atomic_t
*/
typedef struct {
int counter;
} ihk_atomic_t;
#define IHK_ATOMIC_INIT(i) { (i) }
static inline int ihk_atomic_read(const ihk_atomic_t *v)
{
return (*(volatile int *)&(v)->counter);
}
static inline void ihk_atomic_set(ihk_atomic_t *v, int i)
{
v->counter = i;
}
static inline void ihk_atomic_add(int i, ihk_atomic_t *v)
{
asm volatile("lock addl %1,%0"
: "+m" (v->counter)
: "ir" (i));
}
static inline void ihk_atomic_sub(int i, ihk_atomic_t *v)
{
asm volatile("lock subl %1,%0"
: "+m" (v->counter)
: "ir" (i));
}
static inline void ihk_atomic_inc(ihk_atomic_t *v)
{
asm volatile("lock incl %0"
: "+m" (v->counter));
}
static inline void ihk_atomic_dec(ihk_atomic_t *v)
{
asm volatile("lock decl %0"
: "+m" (v->counter));
}
static inline int ihk_atomic_dec_and_test(ihk_atomic_t *v)
{
unsigned char c;
asm volatile("lock decl %0; sete %1"
: "+m" (v->counter), "=qm" (c)
: : "memory");
return c != 0;
}
static inline int ihk_atomic_inc_and_test(ihk_atomic_t *v)
{
unsigned char c;
asm volatile("lock incl %0; sete %1"
: "+m" (v->counter), "=qm" (c)
: : "memory");
return c != 0;
}
static inline int ihk_atomic_add_return(int i, ihk_atomic_t *v)
{
int __i;
__i = i;
asm volatile("lock xaddl %0, %1"
: "+r" (i), "+m" (v->counter)
: : "memory");
return i + __i;
}
static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
{
return ihk_atomic_add_return(-i, v);
}
#define ihk_atomic_inc_return(v) (ihk_atomic_add_return(1, v))
#define ihk_atomic_dec_return(v) (ihk_atomic_sub_return(1, v))
/***********************************************************************
* ihk_atomic64_t
*/
typedef struct {
long counter64;
} ihk_atomic64_t;
#define IHK_ATOMIC64_INIT(i) { .counter64 = (i) }
static inline long ihk_atomic64_read(const ihk_atomic64_t *v)
{
return *(volatile long *)&(v)->counter64;
}
static inline void ihk_atomic64_set(ihk_atomic64_t *v, int i)
{
v->counter64 = i;
}
static inline void ihk_atomic64_inc(ihk_atomic64_t *v)
{
asm volatile ("lock incq %0" : "+m"(v->counter64));
}
/***********************************************************************
* others
*/
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
* Note 2: xchg has side effect, so that attribute volatile is necessary,
* but generally the primitive is invalid, *ptr is output argument. --ANK
*/
#define __xg(x) ((volatile long *)(x))
#define xchg4(ptr, x) \
({ \
int __x = (x); \
asm volatile("xchgl %k0,%1" \
: "=r" (__x) \
: "m" (*ptr), "0" (__x) \
: "memory"); \
__x; \
})
static inline unsigned long xchg8(unsigned long *ptr, unsigned long x)
{
unsigned long __x = (x);
asm volatile("xchgq %0,%1"
: "=r" (__x)
: "m" (*(volatile unsigned long*)(ptr)), "0" (__x)
: "memory");
return __x;
}
#define __xchg(x, ptr, size) \
({ \
__typeof(*(ptr)) __x = (x); \
switch (size) { \
case 1: \
asm volatile("xchgb %b0,%1" \
: "=q" (__x) \
: "m" (*__xg(ptr)), "0" (__x) \
: "memory"); \
break; \
case 2: \
asm volatile("xchgw %w0,%1" \
: "=r" (__x) \
: "m" (*__xg(ptr)), "0" (__x) \
: "memory"); \
break; \
case 4: \
asm volatile("xchgl %k0,%1" \
: "=r" (__x) \
: "m" (*__xg(ptr)), "0" (__x) \
: "memory"); \
break; \
case 8: \
asm volatile("xchgq %0,%1" \
: "=r" (__x) \
: "m" (*__xg(ptr)), "0" (__x) \
: "memory"); \
break; \
default: \
panic("xchg for wrong size"); \
} \
__x; \
})
#define xchg(ptr, v) \
__xchg((v), (ptr), sizeof(*ptr))
static inline unsigned long atomic_cmpxchg8(unsigned long *addr,
unsigned long oldval,
unsigned long newval)
{
asm volatile("lock; cmpxchgq %2, %1\n"
: "=a" (oldval), "+m" (*addr)
: "r" (newval), "0" (oldval)
: "memory"
);
return oldval;
}
static inline unsigned long atomic_cmpxchg4(unsigned int *addr,
unsigned int oldval,
unsigned int newval)
{
asm volatile("lock; cmpxchgl %2, %1\n"
: "=a" (oldval), "+m" (*addr)
: "r" (newval), "0" (oldval)
: "memory"
);
return oldval;
}
static inline void ihk_atomic_add_long(long i, long *v) {
asm volatile("lock addq %1,%0"
: "+m" (*v)
: "ir" (i));
}
static inline void ihk_atomic_add_ulong(long i, unsigned long *v) {
asm volatile("lock addq %1,%0"
: "+m" (*v)
: "ir" (i));
}
static inline unsigned long ihk_atomic_add_long_return(long i, long *v) {
long __i;
__i = i;
asm volatile("lock xaddq %0, %1"
: "+r" (i), "+m" (*v)
: : "memory");
return i + __i;
}
#endif

View File

@ -0,0 +1,57 @@
/**
* \file context.h
* License details are found in the file LICENSE.
* \brief
* Define types of registers consisting of context.
* Define macros to retrieve arguments of system call.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#ifndef __HEADER_X86_COMMON_CONTEXT_H
#define __HEADER_X86_COMMON_CONTEXT_H
#include <registers.h>
struct x86_kregs {
unsigned long rsp, rbp, rbx, rsi, rdi, r12, r13, r14, r15, rflags;
unsigned long rsp0;
};
typedef struct x86_kregs ihk_mc_kernel_context_t;
/* XXX: User context should contain floating point registers */
struct x86_user_context {
struct x86_sregs sr;
/* 16-byte boundary here */
uint8_t is_gpr_valid;
uint8_t is_sr_valid;
uint8_t spare_flags6;
uint8_t spare_flags5;
uint8_t spare_flags4;
uint8_t spare_flags3;
uint8_t spare_flags2;
uint8_t spare_flags1;
struct x86_basic_regs gpr; /* must be last */
/* 16-byte boundary here */
};
typedef struct x86_user_context ihk_mc_user_context_t;
#define ihk_mc_syscall_arg0(uc) (uc)->gpr.rdi
#define ihk_mc_syscall_arg1(uc) (uc)->gpr.rsi
#define ihk_mc_syscall_arg2(uc) (uc)->gpr.rdx
#define ihk_mc_syscall_arg3(uc) (uc)->gpr.r10
#define ihk_mc_syscall_arg4(uc) (uc)->gpr.r8
#define ihk_mc_syscall_arg5(uc) (uc)->gpr.r9
#define ihk_mc_syscall_ret(uc) (uc)->gpr.rax
#define ihk_mc_syscall_number(uc) (uc)->gpr.orig_rax
#define ihk_mc_syscall_pc(uc) (uc)->gpr.rip
#define ihk_mc_syscall_sp(uc) (uc)->gpr.rsp
#endif

View File

@ -0,0 +1,26 @@
/**
* \file ikc.h
* License details are found in the file LICENSE.
* \brief
* Declare functions to initialize Inter-Kernel Communication
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#ifndef HEADER_X86_COMMON_IHK_IKC_H
#define HEADER_X86_COMMON_IHK_IKC_H
#include <ikc/ihk.h>
#define IKC_PORT_IKC2MCKERNEL 501
#define IKC_PORT_IKC2LINUX 503
/* manycore side */
int ihk_mc_ikc_init_first(struct ihk_ikc_channel_desc *,
ihk_ikc_ph_t handler);
#endif

View File

@ -0,0 +1,43 @@
/**
* \file types.h
* Licence details are found in the file LICENSE.
* \brief
* typedef stdint.h like integer types
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#ifndef X86_COMMON_TYPES_H
#define X86_COMMON_TYPES_H
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
typedef signed char int8_t;
typedef signed short int16_t;
typedef signed int int32_t;
typedef signed long long int64_t;
typedef int64_t ptrdiff_t;
typedef int64_t intptr_t;
typedef uint64_t uintptr_t;
typedef uint64_t size_t;
typedef int64_t ssize_t;
typedef int64_t off_t;
#ifdef POSTK_DEBUG_ARCH_DEP_18 /* coredump arch separation. */
typedef int32_t key_t;
typedef uint32_t uid_t;
typedef uint32_t gid_t;
typedef int64_t time_t;
typedef int32_t pid_t;
#endif /* POSTK_DEBUG_ARCH_DEP_18 */
#define NULL ((void *)0)
#endif

View File

@ -0,0 +1,17 @@
/**
* \file prctl.h
* License details are found in the file LICENSE.
*/
/*
* HISTORY
*/
#ifndef __ARCH_PRCTL_H
#define __ARCH_PRCTL_H
#define ARCH_SET_GS 0x1001
#define ARCH_SET_FS 0x1002
#define ARCH_GET_FS 0x1003
#define ARCH_GET_GS 0x1004
#endif

View File

@ -0,0 +1,317 @@
/**
* \file registers.h
* License details are found in the file LICENSE.
* \brief
* Declare macros and functions to manipulate
* Machine Specific Registers (MSR)
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
*/
#ifndef __HEADER_X86_COMMON_REGISTERS_H
#define __HEADER_X86_COMMON_REGISTERS_H
#include <types.h>
#define RFLAGS_CF (1 << 0)
#define RFLAGS_PF (1 << 2)
#define RFLAGS_AF (1 << 4)
#define RFLAGS_ZF (1 << 6)
#define RFLAGS_SF (1 << 7)
#define RFLAGS_TF (1 << 8)
#define RFLAGS_IF (1 << 9)
#define RFLAGS_DF (1 << 10)
#define RFLAGS_OF (1 << 11)
#define RFLAGS_IOPL (3 << 12)
#define RFLAGS_NT (1 << 14)
#define RFLAGS_RF (1 << 16)
#define RFLAGS_VM (1 << 17)
#define RFLAGS_AC (1 << 18)
#define RFLAGS_VIF (1 << 19)
#define RFLAGS_VIP (1 << 20)
#define RFLAGS_ID (1 << 21)
#define DB6_B0 (1 << 0)
#define DB6_B1 (1 << 1)
#define DB6_B2 (1 << 2)
#define DB6_B3 (1 << 3)
#define DB6_BD (1 << 13)
#define DB6_BS (1 << 14)
#define DB6_BT (1 << 15)
#define MSR_EFER 0xc0000080
#define MSR_STAR 0xc0000081
#define MSR_LSTAR 0xc0000082
#define MSR_FMASK 0xc0000084
#define MSR_FS_BASE 0xc0000100
#define MSR_GS_BASE 0xc0000101
#define MSR_IA32_APIC_BASE 0x000000001b
#define MSR_PLATFORM_INFO 0x000000ce
#define MSR_IA32_PERF_CTL 0x00000199
#define MSR_IA32_MISC_ENABLE 0x000001a0
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
#define MSR_IA32_CR_PAT 0x00000277
#define MSR_IA32_XSS 0xda0
#define CVAL(event, mask) \
((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff))
#define CVAL2(event, mask, inv, count) \
((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff) | \
((inv & 1) << 23) | ((count & 0xff) << 24))
/* AMD */
#define MSR_PERF_CTL_0 0xc0010000
#define MSR_PERF_CTR_0 0xc0010004
static unsigned long xgetbv(unsigned int index)
{
unsigned int low, high;
asm volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (index));
return low | ((unsigned long)high << 32);
}
static void xsetbv(unsigned int index, unsigned long val)
{
unsigned int low, high;
low = val;
high = val >> 32;
asm volatile("xsetbv" : : "a" (low), "d" (high), "c" (index));
}
static void wrmsr(unsigned int idx, unsigned long value){
unsigned int high, low;
high = value >> 32;
low = value & 0xffffffffU;
asm volatile("wrmsr" : : "c" (idx), "a" (low), "d" (high) : "memory");
}
static unsigned long rdpmc(unsigned int counter)
{
unsigned int high, low;
asm volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
return (unsigned long)high << 32 | low;
}
static unsigned long rdmsr(unsigned int index)
{
unsigned int high, low;
asm volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (index));
return (unsigned long)high << 32 | low;
}
static unsigned long rdtsc(void)
{
unsigned int high, low;
asm volatile("rdtsc" : "=a" (low), "=d" (high));
return (unsigned long)high << 32 | low;
}
static void set_perfctl(int counter, int event, int mask)
{
unsigned long value;
value = ((unsigned long)(event & 0x700) << 32)
| (event & 0xff) | ((mask & 0xff) << 8) | (1 << 18)
| (1 << 17);
wrmsr(MSR_PERF_CTL_0 + counter, value);
}
static void start_perfctr(int counter)
{
unsigned long value;
value = rdmsr(MSR_PERF_CTL_0 + counter);
value |= (1 << 22);
wrmsr(MSR_PERF_CTL_0 + counter, value);
}
static void stop_perfctr(int counter)
{
unsigned long value;
value = rdmsr(MSR_PERF_CTL_0 + counter);
value &= ~(1 << 22);
wrmsr(MSR_PERF_CTL_0 + counter, value);
}
static void clear_perfctl(int counter)
{
wrmsr(MSR_PERF_CTL_0 + counter, 0);
}
static void set_perfctr(int counter, unsigned long value)
{
wrmsr(MSR_PERF_CTR_0 + counter, value);
}
static unsigned long read_perfctr(int counter)
{
return rdpmc(counter);
}
#define ihk_mc_mb() asm volatile("mfence" : : : "memory");
struct x86_desc_ptr {
uint16_t size;
uint64_t address;
} __attribute__((packed));
struct tss64 {
unsigned int reserved0;
unsigned long rsp0;
unsigned long rsp1;
unsigned long rsp2;
unsigned int reserved1, reserved2;
unsigned long ist[7];
unsigned int reserved3, reserved4;
unsigned short reserved5;
unsigned short iomap_address;
} __attribute__((packed));
struct x86_basic_regs {
unsigned long r15;
unsigned long r14;
unsigned long r13;
unsigned long r12;
unsigned long rbp;
unsigned long rbx;
unsigned long r11;
unsigned long r10;
unsigned long r9;
unsigned long r8;
unsigned long rax;
unsigned long rcx;
unsigned long rdx;
unsigned long rsi;
unsigned long rdi;
union {
unsigned long orig_rax; /* syscall */
unsigned long error; /* interrupts */
};
unsigned long rip;
unsigned long cs;
unsigned long rflags;
unsigned long rsp;
unsigned long ss;
};
struct x86_sregs {
unsigned long fs_base;
unsigned long gs_base;
unsigned long ds;
unsigned long es;
unsigned long fs;
unsigned long gs;
};
#define REGS_GET_STACK_POINTER(regs) (((struct x86_regs *)regs)->rsp)
/*
* Page fault error code bits:
*
* bit 0 == 0: no page found 1: protection fault
* bit 1 == 0: read access 1: write access
* bit 2 == 0: kernel-mode access 1: user-mode access
* bit 3 == 1: use of reserved bit detected
* bit 4 == 1: fault was an instruction fetch
*
* internal use:
* bit 29 == 1: Make PF map text modified by ptrace_poketext()
* bit 30 == 1: don't use COW page to resolve page fault.
*/
enum x86_pf_error_code {
PF_PROT = 1 << 0,
PF_WRITE = 1 << 1,
PF_USER = 1 << 2,
PF_RSVD = 1 << 3,
PF_INSTR = 1 << 4,
PF_PATCH = 1 << 29,
PF_POPULATE = 1 << 30,
};
struct i387_fxsave_struct {
unsigned short cwd;
unsigned short swd;
unsigned short twd;
unsigned short fop;
union {
struct {
unsigned long rip;
unsigned long rdp;
};
struct {
unsigned int fip;
unsigned int fcs;
unsigned int foo;
unsigned int fos;
};
};
unsigned int mxcsr;
unsigned int mxcsr_mask;
unsigned int st_space[32];
unsigned int xmm_space[64];
unsigned int padding[12];
union {
unsigned int padding1[12];
unsigned int sw_reserved[12];
};
} __attribute__((aligned(16)));
struct ymmh_struct {
unsigned int ymmh_space[64];
};
struct lwp_struct {
unsigned char reserved[128];
};
struct bndreg {
unsigned long lower_bound;
unsigned long upper_bound;
} __attribute__((packed));
struct bndcsr {
unsigned long bndcfgu;
unsigned long bndstatus;
} __attribute__((packed));
struct xsave_hdr_struct {
unsigned long xstate_bv;
unsigned long xcomp_bv;
unsigned long reserved[6];
} __attribute__((packed));
struct xsave_struct {
struct i387_fxsave_struct i387;
struct xsave_hdr_struct xsave_hdr;
struct ymmh_struct ymmh;
struct lwp_struct lwp;
struct bndreg bndreg[4];
struct bndcsr bndcsr;
} __attribute__ ((packed, aligned (64)));
typedef struct xsave_struct fp_regs_struct;
#endif

View File

@ -0,0 +1,95 @@
/**
* \file rlimit.h
* License details are found in the file LICENSE.
* \brief
* Kinds of resource limit
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#ifndef __RLIMIT_H
#define __RLIMIT_H
/* Kinds of resource limit. */
enum __rlimit_resource
{
/* Per-process CPU limit, in seconds. */
RLIMIT_CPU = 0,
#define RLIMIT_CPU RLIMIT_CPU
/* Largest file that can be created, in bytes. */
RLIMIT_FSIZE = 1,
#define RLIMIT_FSIZE RLIMIT_FSIZE
/* Maximum size of data segment, in bytes. */
RLIMIT_DATA = 2,
#define RLIMIT_DATA RLIMIT_DATA
/* Maximum size of stack segment, in bytes. */
RLIMIT_STACK = 3,
#define RLIMIT_STACK RLIMIT_STACK
/* Largest core file that can be created, in bytes. */
RLIMIT_CORE = 4,
#define RLIMIT_CORE RLIMIT_CORE
/* Largest resident set size, in bytes.
This affects swapping; processes that are exceeding their
resident set size will be more likely to have physical memory
taken from them. */
__RLIMIT_RSS = 5,
#define RLIMIT_RSS __RLIMIT_RSS
/* Number of open files. */
RLIMIT_NOFILE = 7,
__RLIMIT_OFILE = RLIMIT_NOFILE, /* BSD name for same. */
#define RLIMIT_NOFILE RLIMIT_NOFILE
#define RLIMIT_OFILE __RLIMIT_OFILE
/* Address space limit. */
RLIMIT_AS = 9,
#define RLIMIT_AS RLIMIT_AS
/* Number of processes. */
__RLIMIT_NPROC = 6,
#define RLIMIT_NPROC __RLIMIT_NPROC
/* Locked-in-memory address space. */
__RLIMIT_MEMLOCK = 8,
#define RLIMIT_MEMLOCK __RLIMIT_MEMLOCK
/* Maximum number of file locks. */
__RLIMIT_LOCKS = 10,
#define RLIMIT_LOCKS __RLIMIT_LOCKS
/* Maximum number of pending signals. */
__RLIMIT_SIGPENDING = 11,
#define RLIMIT_SIGPENDING __RLIMIT_SIGPENDING
/* Maximum bytes in POSIX message queues. */
__RLIMIT_MSGQUEUE = 12,
#define RLIMIT_MSGQUEUE __RLIMIT_MSGQUEUE
/* Maximum nice priority allowed to raise to.
Nice levels 19 .. -20 correspond to 0 .. 39
values of this resource limit. */
__RLIMIT_NICE = 13,
#define RLIMIT_NICE __RLIMIT_NICE
/* Maximum realtime priority allowed for non-priviledged
processes. */
__RLIMIT_RTPRIO = 14,
#define RLIMIT_RTPRIO __RLIMIT_RTPRIO
__RLIMIT_NLIMITS = 15,
__RLIM_NLIMITS = __RLIMIT_NLIMITS
#define RLIMIT_NLIMITS __RLIMIT_NLIMITS
#define RLIM_NLIMITS __RLIM_NLIMITS
};
#include <generic-rlimit.h>
#endif

View File

@ -0,0 +1,250 @@
/**
* \file arch/x86/kernel/include/signal.h
* License details are found in the file LICENSE.
* \brief
* define signal
* \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
*/
/*
* HISTORY:
* 2012/02/11 bgerofi what kind of new features have been added
*/
#ifndef __HEADER_X86_COMMON_SIGNAL_H
#define __HEADER_X86_COMMON_SIGNAL_H
#define _NSIG 64
#define _NSIG_BPW 64
#define _NSIG_WORDS (_NSIG / _NSIG_BPW)
typedef unsigned long int __sigset_t;
#define __sigmask(sig) (((__sigset_t) 1) << ((sig) - 1))
typedef struct {
__sigset_t __val[_NSIG_WORDS];
} sigset_t;
#define SIG_BLOCK 0
#define SIG_UNBLOCK 1
#define SIG_SETMASK 2
struct sigaction {
void (*sa_handler)(int);
unsigned long sa_flags;
void (*sa_restorer)(int);
sigset_t sa_mask;
};
typedef void __sig_fn_t(int);
typedef __sig_fn_t *__sig_handler_t;
#define SIG_DFL (__sig_handler_t)0
#define SIG_IGN (__sig_handler_t)1
#define SIG_ERR (__sig_handler_t)-1
#define SA_NOCLDSTOP 0x00000001U
#define SA_NOCLDWAIT 0x00000002U
#define SA_NODEFER 0x40000000U
#define SA_ONSTACK 0x08000000U
#define SA_RESETHAND 0x80000000U
#define SA_RESTART 0x10000000U
#define SA_SIGINFO 0x00000004U
struct k_sigaction {
struct sigaction sa;
};
typedef struct sigaltstack {
void *ss_sp;
int ss_flags;
size_t ss_size;
} stack_t;
#define MINSIGSTKSZ 2048
#define SS_ONSTACK 1
#define SS_DISABLE 2
typedef union sigval {
int sival_int;
void *sival_ptr;
} sigval_t;
#define __SI_MAX_SIZE 128
#define __SI_PAD_SIZE ((__SI_MAX_SIZE / sizeof (int)) - 4)
typedef struct siginfo {
int si_signo; /* Signal number. */
int si_errno; /* If non-zero, an errno value associated with
this signal, as defined in <errno.h>. */
int si_code; /* Signal code. */
#define SI_USER 0 /* sent by kill, sigsend, raise */
#define SI_KERNEL 0x80 /* sent by the kernel from somewhere */
#define SI_QUEUE -1 /* sent by sigqueue */
#define SI_TIMER __SI_CODE(__SI_TIMER,-2) /* sent by timer expiration */
#define SI_MESGQ __SI_CODE(__SI_MESGQ,-3) /* sent by real time mesq state change
*/
#define SI_ASYNCIO -4 /* sent by AIO completion */
#define SI_SIGIO -5 /* sent by queued SIGIO */
#define SI_TKILL -6 /* sent by tkill system call */
#define SI_DETHREAD -7 /* sent by execve() killing subsidiary threads */
#define ILL_ILLOPC 1 /* illegal opcode */
#define ILL_ILLOPN 2 /* illegal operand */
#define ILL_ILLADR 3 /* illegal addressing mode */
#define ILL_ILLTRP 4 /* illegal trap */
#define ILL_PRVOPC 5 /* privileged opcode */
#define ILL_PRVREG 6 /* privileged register */
#define ILL_COPROC 7 /* coprocessor error */
#define ILL_BADSTK 8 /* internal stack error */
#define FPE_INTDIV 1 /* integer divide by zero */
#define FPE_INTOVF 2 /* integer overflow */
#define FPE_FLTDIV 3 /* floating point divide by zero */
#define FPE_FLTOVF 4 /* floating point overflow */
#define FPE_FLTUND 5 /* floating point underflow */
#define FPE_FLTRES 6 /* floating point inexact result */
#define FPE_FLTINV 7 /* floating point invalid operation */
#define FPE_FLTSUB 8 /* subscript out of range */
#define SEGV_MAPERR 1 /* address not mapped to object */
#define SEGV_ACCERR 2 /* invalid permissions for mapped object */
#define BUS_ADRALN 1 /* invalid address alignment */
#define BUS_ADRERR 2 /* non-existant physical address */
#define BUS_OBJERR 3 /* object specific hardware error */
/* hardware memory error consumed on a machine check: action required */
#define BUS_MCEERR_AR 4
/* hardware memory error detected in process but not consumed: action optional*/
#define BUS_MCEERR_AO 5
#define TRAP_BRKPT 1 /* process breakpoint */
#define TRAP_TRACE 2 /* process trace trap */
#define TRAP_BRANCH 3 /* process taken branch trap */
#define TRAP_HWBKPT 4 /* hardware breakpoint/watchpoint */
#define CLD_EXITED 1 /* child has exited */
#define CLD_KILLED 2 /* child was killed */
#define CLD_DUMPED 3 /* child terminated abnormally */
#define CLD_TRAPPED 4 /* traced child has trapped */
#define CLD_STOPPED 5 /* child has stopped */
#define CLD_CONTINUED 6 /* stopped child has continued */
#define POLL_IN 1 /* data input available */
#define POLL_OUT 2 /* output buffers available */
#define POLL_MSG 3 /* input message available */
#define POLL_ERR 4 /* i/o error */
#define POLL_PRI 5 /* high priority input available */
#define POLL_HUP 6 /* device disconnected */
#define SIGEV_SIGNAL 0 /* notify via signal */
#define SIGEV_NONE 1 /* other notification: meaningless */
#define SIGEV_THREAD 2 /* deliver via thread creation */
#define SIGEV_THREAD_ID 4 /* deliver to thread */
union {
int _pad[__SI_PAD_SIZE];
/* kill(). */
struct {
int si_pid;/* Sending process ID. */
int si_uid;/* Real user ID of sending process. */
} _kill;
/* POSIX.1b timers. */
struct {
int si_tid; /* Timer ID. */
int si_overrun; /* Overrun count. */
sigval_t si_sigval; /* Signal value. */
} _timer;
/* POSIX.1b signals. */
struct {
int si_pid; /* Sending process ID. */
int si_uid; /* Real user ID of sending process. */
sigval_t si_sigval; /* Signal value. */
} _rt;
/* SIGCHLD. */
struct {
int si_pid; /* Which child. */
int si_uid; /* Real user ID of sending process. */
int si_status; /* Exit value or signal. */
long si_utime;
long si_stime;
} _sigchld;
/* SIGILL, SIGFPE, SIGSEGV, SIGBUS. */
struct {
void *si_addr; /* Faulting insn/memory ref. */
} _sigfault;
/* SIGPOLL. */
struct {
long int si_band; /* Band event for SIGPOLL. */
int si_fd;
} _sigpoll;
} _sifields;
} siginfo_t;
struct signalfd_siginfo {
unsigned int ssi_signo;
int ssi_errno;
int ssi_code;
unsigned int ssi_pid;
unsigned int ssi_uid;
int ssi_fd;
unsigned int ssi_tid;
unsigned int ssi_band;
unsigned int ssi_overrun;
unsigned int ssi_trapno;
int ssi_status;
int ssi_int;
unsigned long ssi_ptr;
unsigned long ssi_utime;
unsigned long ssi_stime;
unsigned long ssi_addr;
unsigned short ssi_addr_lsb;
char __pad[46];
};
#define SIGHUP 1
#define SIGINT 2
#define SIGQUIT 3
#define SIGILL 4
#define SIGTRAP 5
#define SIGABRT 6
#define SIGIOT 6
#define SIGBUS 7
#define SIGFPE 8
#define SIGKILL 9
#define SIGUSR1 10
#define SIGSEGV 11
#define SIGUSR2 12
#define SIGPIPE 13
#define SIGALRM 14
#define SIGTERM 15
#define SIGSTKFLT 16
#define SIGCHLD 17
#define SIGCONT 18
#define SIGSTOP 19
#define SIGTSTP 20
#define SIGTTIN 21
#define SIGTTOU 22
#define SIGURG 23
#define SIGXCPU 24
#define SIGXFSZ 25
#define SIGVTALRM 26
#define SIGPROF 27
#define SIGWINCH 28
#define SIGIO 29
#define SIGPOLL SIGIO
#define SIGPWR 30
#define SIGSYS 31
#define SIGUNUSED 31
#define SIGRTMIN 32
#define PTRACE_EVENT_EXEC 4
#endif /*__HEADER_X86_COMMON_SIGNAL_H*/

View File

@ -0,0 +1,171 @@
/* syscall_list.h COPYRIGHT FUJITSU LIMITED 2017 */
/**
* \file syscall_list.h
* License details are found in the file LICENSE.
* \brief
* define system calls
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2013 Hitachi, Ltd.
*/
/*
* HISTORY:
*/
/*
* SYSCALL_HANDLED(number, name)
* defines the system call that handled by McKernel.
* handler is defined with SYSCALL_DECLARE.
*
* SYSCALL_DELEGATED(number, name)
* defines the system call that is just delegated to the host.
* syscall_name[] only, no handler exists.
*/
SYSCALL_HANDLED(0, read)
SYSCALL_DELEGATED(1, write)
SYSCALL_HANDLED(2, open)
SYSCALL_HANDLED(3, close)
SYSCALL_DELEGATED(4, stat)
SYSCALL_DELEGATED(5, fstat)
SYSCALL_DELEGATED(7, poll)
SYSCALL_DELEGATED(8, lseek)
SYSCALL_HANDLED(9, mmap)
SYSCALL_HANDLED(10, mprotect)
SYSCALL_HANDLED(11, munmap)
SYSCALL_HANDLED(12, brk)
SYSCALL_HANDLED(13, rt_sigaction)
SYSCALL_HANDLED(14, rt_sigprocmask)
SYSCALL_HANDLED(15, rt_sigreturn)
SYSCALL_HANDLED(16, ioctl)
SYSCALL_DELEGATED(17, pread64)
SYSCALL_DELEGATED(18, pwrite64)
SYSCALL_DELEGATED(20, writev)
SYSCALL_DELEGATED(21, access)
SYSCALL_DELEGATED(23, select)
SYSCALL_HANDLED(24, sched_yield)
SYSCALL_HANDLED(25, mremap)
SYSCALL_HANDLED(26, msync)
SYSCALL_HANDLED(27, mincore)
SYSCALL_HANDLED(28, madvise)
SYSCALL_HANDLED(29, shmget)
SYSCALL_HANDLED(30, shmat)
SYSCALL_HANDLED(31, shmctl)
SYSCALL_HANDLED(34, pause)
SYSCALL_HANDLED(35, nanosleep)
SYSCALL_HANDLED(36, getitimer)
SYSCALL_HANDLED(38, setitimer)
SYSCALL_HANDLED(39, getpid)
SYSCALL_HANDLED(56, clone)
SYSCALL_DELEGATED(57, fork)
SYSCALL_HANDLED(58, vfork)
SYSCALL_HANDLED(59, execve)
SYSCALL_HANDLED(60, exit)
SYSCALL_HANDLED(61, wait4)
SYSCALL_HANDLED(62, kill)
SYSCALL_DELEGATED(63, uname)
SYSCALL_DELEGATED(65, semop)
SYSCALL_HANDLED(67, shmdt)
SYSCALL_DELEGATED(69, msgsnd)
SYSCALL_DELEGATED(70, msgrcv)
SYSCALL_HANDLED(72, fcntl)
SYSCALL_DELEGATED(79, getcwd)
SYSCALL_DELEGATED(87, unlink)
SYSCALL_DELEGATED(89, readlink)
SYSCALL_HANDLED(96, gettimeofday)
SYSCALL_HANDLED(97, getrlimit)
SYSCALL_HANDLED(98, getrusage)
SYSCALL_HANDLED(100, times)
SYSCALL_HANDLED(101, ptrace)
SYSCALL_HANDLED(102, getuid)
SYSCALL_HANDLED(104, getgid)
SYSCALL_HANDLED(105, setuid)
SYSCALL_HANDLED(106, setgid)
SYSCALL_HANDLED(107, geteuid)
SYSCALL_HANDLED(108, getegid)
SYSCALL_HANDLED(109, setpgid)
SYSCALL_HANDLED(110, getppid)
SYSCALL_DELEGATED(111, getpgrp)
SYSCALL_HANDLED(113, setreuid)
SYSCALL_HANDLED(114, setregid)
SYSCALL_HANDLED(117, setresuid)
SYSCALL_HANDLED(118, getresuid)
SYSCALL_HANDLED(119, setresgid)
SYSCALL_HANDLED(120, getresgid)
SYSCALL_HANDLED(122, setfsuid)
SYSCALL_HANDLED(123, setfsgid)
SYSCALL_HANDLED(127, rt_sigpending)
SYSCALL_HANDLED(128, rt_sigtimedwait)
SYSCALL_HANDLED(129, rt_sigqueueinfo)
SYSCALL_HANDLED(130, rt_sigsuspend)
SYSCALL_HANDLED(131, sigaltstack)
SYSCALL_HANDLED(142, sched_setparam)
SYSCALL_HANDLED(143, sched_getparam)
SYSCALL_HANDLED(144, sched_setscheduler)
SYSCALL_HANDLED(145, sched_getscheduler)
SYSCALL_HANDLED(146, sched_get_priority_max)
SYSCALL_HANDLED(147, sched_get_priority_min)
SYSCALL_HANDLED(148, sched_rr_get_interval)
SYSCALL_HANDLED(149, mlock)
SYSCALL_HANDLED(150, munlock)
SYSCALL_HANDLED(151, mlockall)
SYSCALL_HANDLED(152, munlockall)
SYSCALL_HANDLED(158, arch_prctl)
SYSCALL_HANDLED(160, setrlimit)
SYSCALL_HANDLED(164, settimeofday)
SYSCALL_HANDLED(186, gettid)
SYSCALL_HANDLED(200, tkill)
SYSCALL_DELEGATED(201, time)
SYSCALL_HANDLED(202, futex)
SYSCALL_HANDLED(203, sched_setaffinity)
SYSCALL_HANDLED(204, sched_getaffinity)
SYSCALL_DELEGATED(208, io_getevents)
SYSCALL_HANDLED(216, remap_file_pages)
SYSCALL_DELEGATED(217, getdents64)
SYSCALL_HANDLED(218, set_tid_address)
SYSCALL_DELEGATED(220, semtimedop)
SYSCALL_HANDLED(228, clock_gettime)
SYSCALL_DELEGATED(230, clock_nanosleep)
SYSCALL_HANDLED(231, exit_group)
SYSCALL_DELEGATED(232, epoll_wait)
SYSCALL_HANDLED(234, tgkill)
SYSCALL_HANDLED(237, mbind)
SYSCALL_HANDLED(238, set_mempolicy)
SYSCALL_HANDLED(239, get_mempolicy)
SYSCALL_HANDLED(247, waitid)
SYSCALL_HANDLED(256, migrate_pages)
#ifdef POSTK_DEBUG_ARCH_DEP_62 /* Absorb the difference between open and openat args. */
SYSCALL_HANDLED(257, openat)
#endif /* POSTK_DEBUG_ARCH_DEP_62 */
SYSCALL_DELEGATED(270, pselect6)
SYSCALL_DELEGATED(271, ppoll)
SYSCALL_HANDLED(273, set_robust_list)
SYSCALL_HANDLED(279, move_pages)
SYSCALL_DELEGATED(281, epoll_pwait)
SYSCALL_HANDLED(282, signalfd)
SYSCALL_HANDLED(289, signalfd4)
SYSCALL_HANDLED(298, perf_event_open)
#ifdef DCFA_KMOD
SYSCALL_HANDLED(303, mod_call)
#endif
SYSCALL_HANDLED(309, getcpu)
SYSCALL_HANDLED(310, process_vm_readv)
SYSCALL_HANDLED(311, process_vm_writev)
SYSCALL_HANDLED(601, pmc_init)
SYSCALL_HANDLED(602, pmc_start)
SYSCALL_HANDLED(603, pmc_stop)
SYSCALL_HANDLED(604, pmc_reset)
SYSCALL_HANDLED(700, get_cpu_id)
#ifdef PROFILE_ENABLE
SYSCALL_HANDLED(__NR_profile, profile)
#endif // PROFILE_ENABLE
SYSCALL_HANDLED(730, util_migrate_inter_kernel)
SYSCALL_HANDLED(731, util_indicate_clone)
SYSCALL_HANDLED(732, get_system)
/* McKernel Specific */
SYSCALL_HANDLED(801, swapout)
SYSCALL_HANDLED(802, linux_mlock)
SYSCALL_HANDLED(803, suspend_threads)
SYSCALL_HANDLED(804, resume_threads)
SYSCALL_HANDLED(811, linux_spawn)
/**** End of File ****/

View File

@ -0,0 +1,270 @@
/**
* \file interrupt.S
* License details are found in the file LICENSE.
* \brief
* Interrupt handler entries.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*
* 2014/04 - bgerofi: save/restore rbp when entering/leaving kernel (for glibc)
* 2013/?? - bgerofi + shimosawa: handle rsp correctly for nested interrupts
*/
#define X86_CPU_LOCAL_OFFSET_TSS 176
#define X86_TSS_OFFSET_SP0 4
#define X86_CPU_LOCAL_OFFSET_SP0 \
(X86_CPU_LOCAL_OFFSET_TSS + X86_TSS_OFFSET_SP0)
#define X86_CPU_LOCAL_OFFSET_KSTACK 16
#define X86_CPU_LOCAL_OFFSET_USTACK 24
#define KERNEL_CS 32
#define KERNEL_DS 40
#define USER_CS (48 + 3)
#define USER_DS (56 + 3)
/* struct x86_user_context */
#define X86_SREGS_BASE (0)
#define X86_SREGS_SIZE 48
#define X86_FLAGS_BASE (X86_SREGS_BASE + X86_SREGS_SIZE)
#define X86_FLAGS_SIZE 8
#define X86_REGS_BASE (X86_FLAGS_BASE + X86_FLAGS_SIZE)
#define RAX_OFFSET (X86_REGS_BASE + 80)
#define ERROR_OFFSET (X86_REGS_BASE + 120)
#define RSP_OFFSET (X86_REGS_BASE + 152)
#define PUSH_ALL_REGS \
pushq %rdi; \
pushq %rsi; \
pushq %rdx; \
pushq %rcx; \
pushq %rax; \
pushq %r8; \
pushq %r9; \
pushq %r10; \
pushq %r11; \
pushq %rbx; \
pushq %rbp; \
pushq %r12; \
pushq %r13; \
pushq %r14; \
pushq %r15; \
pushq $1; /* is_gpr_valid is set, and others are cleared */ \
subq $X86_FLAGS_BASE,%rsp /* for x86_sregs, etc. */
#define POP_ALL_REGS \
movq $0,X86_FLAGS_BASE(%rsp); /* clear all flags */ \
addq $X86_REGS_BASE,%rsp; /* discard x86_sregs, flags, etc. */ \
popq %r15; \
popq %r14; \
popq %r13; \
popq %r12; \
popq %rbp; \
popq %rbx; \
popq %r11; \
popq %r10; \
popq %r9; \
popq %r8; \
popq %rax; \
popq %rcx; \
popq %rdx; \
popq %rsi; \
popq %rdi
.data
.globl generic_common_handlers
generic_common_handlers:
.text
vector=0
.rept 256
1:
cld
pushq $vector
jmp common_interrupt
.previous
.quad 1b
.text
vector=vector+1
.endr
common_interrupt:
PUSH_ALL_REGS
movq ERROR_OFFSET(%rsp), %rdi
movq %rsp, %rsi
call handle_interrupt /* Enter C code */
POP_ALL_REGS
addq $8, %rsp
iretq
.globl __page_fault_handler_address
__page_fault_handler_address:
.quad 0
.globl page_fault
page_fault:
cld
PUSH_ALL_REGS
movq %cr2, %rdi
movq ERROR_OFFSET(%rsp),%rsi
movq %rsp,%rdx
movq __page_fault_handler_address(%rip), %rax
andq %rax, %rax
jz 1f
call *%rax
POP_ALL_REGS
addq $8, %rsp
iretq
1:
jmp 1b
.globl general_protection_exception
general_protection_exception:
cld
PUSH_ALL_REGS
movq %rsp, %rdi
call gpe_handler
POP_ALL_REGS
addq $8, %rsp
iretq
.global __freeze
__freeze:
PUSH_ALL_REGS
callq freeze
POP_ALL_REGS
iretq
.globl nmi
nmi:
#define PANICED 232
#define PANIC_REGS 240
movq %rax,%gs:PANIC_REGS+0x00
movq %rsp,%gs:PANIC_REGS+0x08
movl nmi_mode(%rip),%eax
cmp $1,%rax
je 1f
cmp $2,%rax
jne 3f
1:
cld
movq %gs:PANIC_REGS+0x00,%rax
PUSH_ALL_REGS
subq $40, %rsp
movq %rsp,%gs:PANIC_REGS+0x10
movq %rsp, %rdi
call freeze_thaw
cmpq $0, %rax
jnz 2f
addq $40, %rsp
2:
POP_ALL_REGS
iretq
3:
movq %rbx,%gs:PANIC_REGS+0x08
movq %rcx,%gs:PANIC_REGS+0x10
movq %rdx,%gs:PANIC_REGS+0x18
movq %rsi,%gs:PANIC_REGS+0x20
movq %rdi,%gs:PANIC_REGS+0x28
movq %rbp,%gs:PANIC_REGS+0x30
movq 0x18(%rsp),%rax /* rsp */
movq %rax,%gs:PANIC_REGS+0x38
movq %r8, %gs:PANIC_REGS+0x40
movq %r9, %gs:PANIC_REGS+0x48
movq %r10,%gs:PANIC_REGS+0x50
movq %r11,%gs:PANIC_REGS+0x58
movq %r12,%gs:PANIC_REGS+0x60
movq %r13,%gs:PANIC_REGS+0x68
movq %r14,%gs:PANIC_REGS+0x70
movq %r15,%gs:PANIC_REGS+0x78
movq 0x00(%rsp),%rax /* rip */
movq %rax,%gs:PANIC_REGS+0x80
movq 0x10(%rsp),%rax /* rflags */
movl %eax,%gs:PANIC_REGS+0x88
movq 0x08(%rsp),%rax /* cs */
movl %eax,%gs:PANIC_REGS+0x8C
movq 0x20(%rsp),%rax /* ss */
movl %eax,%gs:PANIC_REGS+0x90
xorq %rax,%rax
movw %ds,%ax
movl %eax,%gs:PANIC_REGS+0x94
movw %es,%ax
movl %eax,%gs:PANIC_REGS+0x98
movw %fs,%ax
movl %eax,%gs:PANIC_REGS+0x9C
movw %gs,%ax
movl %eax,%gs:PANIC_REGS+0xA0
movq $1,%gs:PANICED
call ihk_mc_query_mem_areas
1:
hlt
jmp 1b
.globl x86_syscall
x86_syscall:
cld
movq %rsp, %gs:X86_CPU_LOCAL_OFFSET_USTACK
movq %gs:(X86_CPU_LOCAL_OFFSET_SP0), %rsp
pushq $(USER_DS)
pushq $0
pushq %r11
pushq $(USER_CS)
pushq %rcx
pushq %rax /* error code (= system call number) */
PUSH_ALL_REGS
movq %gs:X86_CPU_LOCAL_OFFSET_USTACK, %rcx
movq %rcx, RSP_OFFSET(%rsp)
movq RAX_OFFSET(%rsp), %rdi
movw %ss, %ax
movw %ax, %ds
movq %rsp, %rsi
callq *__x86_syscall_handler(%rip)
1:
movq %rax, RAX_OFFSET(%rsp)
POP_ALL_REGS
#ifdef USE_SYSRET
movq 32(%rsp), %rsp
sysretq
#else
addq $8, %rsp
iretq
#endif
.globl enter_user_mode
enter_user_mode:
callq release_runq_lock
movq $0, %rdi
movq %rsp, %rsi
call check_signal
call utilthr_migrate
movq $0, %rdi
call set_cputime
POP_ALL_REGS
addq $8, %rsp
iretq
.globl debug_exception
debug_exception:
cld
pushq $0 /* error */
PUSH_ALL_REGS
movq %rsp, %rdi
call debug_handler
POP_ALL_REGS
addq $8, %rsp
iretq
.globl int3_exception
int3_exception:
cld
pushq $0 /* error */
PUSH_ALL_REGS
movq %rsp, %rdi
call int3_handler
POP_ALL_REGS
addq $8, %rsp
iretq

128
arch/x86_64/kernel/local.c Normal file
View File

@ -0,0 +1,128 @@
/**
* \file local.c
* License details are found in the file LICENSE.
* \brief
* Manipulate information for individual CPUs. These information
* resides in memory.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
*/
#include <cpulocal.h>
#include <ihk/atomic.h>
#include <ihk/mm.h>
#include <ihk/cpu.h>
#include <ihk/debug.h>
#include <registers.h>
#include <string.h>
#define LOCALS_SPAN (4 * PAGE_SIZE)
struct x86_cpu_local_variables *locals;
size_t x86_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */
void init_processors_local(int max_id)
{
size_t size;
size = LOCALS_SPAN * max_id;
/* Is contiguous allocating adequate?? */
locals = ihk_mc_alloc_pages(size/PAGE_SIZE, IHK_MC_AP_CRITICAL);
memset(locals, 0, size);
kprintf("locals = %p\n", locals);
}
/*@
@ requires \valid(id);
@ ensures \result == locals + (LOCALS_SPAN * id);
@ assigns \nothing;
@*/
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id)
{
return (struct x86_cpu_local_variables *)
((char *)locals + (LOCALS_SPAN * id));
}
static void *get_x86_cpu_local_kstack(int id)
{
return ((char *)locals + (LOCALS_SPAN * (id + 1)));
}
struct x86_cpu_local_variables *get_x86_this_cpu_local(void)
{
int id = ihk_mc_get_processor_id();
return get_x86_cpu_local_variable(id);
}
void *get_x86_this_cpu_kstack(void)
{
int id = ihk_mc_get_processor_id();
return get_x86_cpu_local_kstack(id);
}
static void set_fs_base(void *address)
{
wrmsr(MSR_FS_BASE, (unsigned long)address);
}
static void set_gs_base(void *address)
{
wrmsr(MSR_GS_BASE, (unsigned long)address);
}
static ihk_atomic_t last_processor_id = IHK_ATOMIC_INIT(-1);
void assign_processor_id(void)
{
int id;
struct x86_cpu_local_variables *v;
id = ihk_atomic_inc_return(&last_processor_id);
v = get_x86_cpu_local_variable(id);
set_gs_base(v);
v->processor_id = id;
}
void init_boot_processor_local(void)
{
static struct x86_cpu_local_variables avar;
memset(&avar, -1, sizeof(avar));
set_gs_base(&avar);
return;
}
/** IHK **/
/*@
@ ensures \result == %gs;
@ assigns \nothing;
*/
int ihk_mc_get_processor_id(void)
{
int id;
asm volatile("movl %%gs:0, %0" : "=r"(id));
return id;
}
/*@
@ ensures \result == (locals + (LOCALS_SPAN * %gs))->apic_id;
@ assigns \nothing;
*/
int ihk_mc_get_hardware_processor_id(void)
{
struct x86_cpu_local_variables *v = get_x86_this_cpu_local();
return v->apic_id;
}

50
arch/x86_64/kernel/lock.c Normal file
View File

@ -0,0 +1,50 @@
/**
* \file lock.c
* License details are found in the file LICENSE.
* \brief
* Spin lock.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#include <ihk/lock.h>
#if 0
void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
{
*lock = 0;
}
unsigned long ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
{
int inc = 0x00010000;
int tmp;
unsigned long flags;
flags = cpu_disable_interrupt_save();
asm volatile("lock ; xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
"1:\t"
"cmpl %0, %2\n\t"
"je 2f\n\t"
"rep ; nop\n\t"
"movzwl %1, %2\n\t"
"jmp 1b\n"
"2:"
: "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc");
return flags;
}
void ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
{
asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc");
cpu_restore_interrupt(flags);
}
#endif

3025
arch/x86_64/kernel/memory.c Normal file

File diff suppressed because it is too large Load Diff

55
arch/x86_64/kernel/mikc.c Normal file
View File

@ -0,0 +1,55 @@
/**
* \file mikc.c
* License details are found in the file LICENSE.
* \brief
* Initialize Inter-Kernel Communication (IKC)
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#include <ihk/ikc.h>
#include <ihk/lock.h>
#include <ikc/msg.h>
#include <memory.h>
#include <string.h>
extern int num_processors;
extern void arch_set_mikc_queue(void *r, void *w);
ihk_ikc_ph_t arch_master_channel_packet_handler;
int ihk_mc_ikc_init_first_local(struct ihk_ikc_channel_desc *channel,
ihk_ikc_ph_t packet_handler)
{
struct ihk_ikc_queue_head *rq, *wq;
size_t mikc_queue_pages;
ihk_ikc_system_init(NULL);
memset(channel, 0, sizeof(struct ihk_ikc_channel_desc));
mikc_queue_pages = ((2 * num_processors * MASTER_IKCQ_PKTSIZE)
+ (PAGE_SIZE - 1)) / PAGE_SIZE;
/* Place both sides in this side */
rq = ihk_mc_alloc_pages(mikc_queue_pages, IHK_MC_AP_CRITICAL);
wq = ihk_mc_alloc_pages(mikc_queue_pages, IHK_MC_AP_CRITICAL);
ihk_ikc_init_queue(rq, 0, 0,
mikc_queue_pages * PAGE_SIZE, MASTER_IKCQ_PKTSIZE);
ihk_ikc_init_queue(wq, 0, 0,
mikc_queue_pages * PAGE_SIZE, MASTER_IKCQ_PKTSIZE);
arch_master_channel_packet_handler = packet_handler;
ihk_ikc_init_desc(channel, IKC_OS_HOST, 0, rq, wq,
ihk_ikc_master_channel_packet_handler, channel);
ihk_ikc_enable_channel(channel);
/* Set boot parameter */
arch_set_mikc_queue(rq, wq);
return 0;
}

View File

@ -0,0 +1,475 @@
/**
* \file perfctr.c
* License details are found in the file LICENSE.
* \brief
* Manipulate performance counter.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
#include <ihk/perfctr.h>
#include <march.h>
#include <errno.h>
#include <ihk/debug.h>
#include <registers.h>
#include <mc_perf_event.h>
extern unsigned int *x86_march_perfmap;
extern int running_on_kvm(void);
#ifdef POSTK_DEBUG_TEMP_FIX_31
int ihk_mc_perfctr_fixed_init(int counter, int mode);
#endif/*POSTK_DEBUG_TEMP_FIX_31*/
//#define PERFCTR_DEBUG
#ifdef PERFCTR_DEBUG
#define dkprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#else
#define dkprintf(...) do { } while (0)
#define ekprintf(...) do { kprintf(__VA_ARGS__); } while (0)
#endif
#define X86_CR4_PCE 0x00000100
#define PERFCTR_CHKANDJUMP(cond, msg, err) \
do { \
if(cond) { \
ekprintf("%s,"msg"\n", __FUNCTION__); \
ret = err; \
goto fn_fail; \
} \
} while(0)
int perf_counters_discovered = 0;
int X86_IA32_NUM_PERF_COUNTERS = 0;
unsigned long X86_IA32_PERF_COUNTERS_MASK = 0;
int X86_IA32_NUM_FIXED_PERF_COUNTERS = 0;
unsigned long X86_IA32_FIXED_PERF_COUNTERS_MASK = 0;
void x86_init_perfctr(void)
{
int i = 0;
unsigned long reg;
unsigned long value = 0;
uint64_t op;
uint64_t eax;
uint64_t ebx;
uint64_t ecx;
uint64_t edx;
/* Do not do it on KVM */
if (running_on_kvm()) return;
/* Allow PMC to be read from user space */
asm volatile("movq %%cr4, %0" : "=r"(reg));
reg |= X86_CR4_PCE;
asm volatile("movq %0, %%cr4" : : "r"(reg));
/* Detect number of supported performance counters */
if (!perf_counters_discovered) {
/* See Table 35.2 - Architectural MSRs in Vol 3C */
op = 0x0a;
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx):"a"(op));
X86_IA32_NUM_PERF_COUNTERS = ((eax & 0xFF00) >> 8);
X86_IA32_PERF_COUNTERS_MASK = (1 << X86_IA32_NUM_PERF_COUNTERS) - 1;
X86_IA32_NUM_FIXED_PERF_COUNTERS = (edx & 0x0F);
X86_IA32_FIXED_PERF_COUNTERS_MASK =
((1UL << X86_IA32_NUM_FIXED_PERF_COUNTERS) - 1) <<
X86_IA32_BASE_FIXED_PERF_COUNTERS;
perf_counters_discovered = 1;
kprintf("X86_IA32_NUM_PERF_COUNTERS: %d, X86_IA32_NUM_FIXED_PERF_COUNTERS: %d\n",
X86_IA32_NUM_PERF_COUNTERS, X86_IA32_NUM_FIXED_PERF_COUNTERS);
}
/* Clear Fixed Counter Control */
value = rdmsr(MSR_PERF_FIXED_CTRL);
value &= 0xfffffffffffff000L;
wrmsr(MSR_PERF_FIXED_CTRL, value);
/* Clear Generic Counter Control */
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
wrmsr(MSR_IA32_PERFEVTSEL0 + i, 0);
}
/* Enable PMC Control */
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
value |= X86_IA32_PERF_COUNTERS_MASK;
value |= X86_IA32_FIXED_PERF_COUNTERS_MASK;
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
}
static int set_perfctr_x86_direct(int counter, int mode, unsigned int value)
{
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
return -EINVAL;
}
// clear mode flags
value &= ~(3 << 16);
// set mode flags
if(mode & PERFCTR_USER_MODE) {
value |= 1 << 16;
}
if(mode & PERFCTR_KERNEL_MODE) {
value |= 1 << 17;
}
// wrmsr(MSR_PERF_GLOBAL_CTRL, 0);
value |= (1 << 22) | (1 << 18); /* EN */
value |= (1 << 20); /* Enable overflow interrupt */
wrmsr(MSR_IA32_PERFEVTSEL0 + counter, value);
//kprintf("wrmsr: %d <= %x\n", MSR_PERF_GLOBAL_CTRL, 0);
//kprintf("wrmsr: %d <= %x\n", MSR_IA32_PERFEVTSEL0 + counter, value);
return 0;
}
static int set_pmc_x86_direct(int counter, long val)
{
unsigned long cnt_bit = 0;
if (counter < 0) {
return -EINVAL;
}
val &= 0x000000ffffffffff; // 40bit Mask
cnt_bit = 1UL << counter;
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
// set generic pmc
wrmsr(MSR_IA32_PMC0 + counter, val);
}
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) {
// set fixed pmc
wrmsr(MSR_IA32_FIXED_CTR0 + counter - X86_IA32_BASE_FIXED_PERF_COUNTERS, val);
}
else {
return -EINVAL;
}
return 0;
}
static int set_perfctr_x86(int counter, int event, int mask, int inv, int count,
int mode)
{
return set_perfctr_x86_direct(counter, mode,
CVAL2(event, mask, inv, count));
}
static int set_fixed_counter(int counter, int mode)
{
unsigned long value = 0;
unsigned int ctr_mask = 0xf;
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ;
unsigned int set_val = 0;
if (counter_idx < 0 || counter_idx >= X86_IA32_NUM_FIXED_PERF_COUNTERS) {
return -EINVAL;
}
// clear specified fixed counter info
value = rdmsr(MSR_PERF_FIXED_CTRL);
ctr_mask <<= counter_idx * 4;
value &= ~ctr_mask;
if (mode & PERFCTR_USER_MODE) {
set_val |= 1 << 1;
}
if (mode & PERFCTR_KERNEL_MODE) {
set_val |= 1;
}
set_val <<= counter_idx * 4;
value |= set_val;
wrmsr(MSR_PERF_FIXED_CTRL, value);
return 0;
}
#ifdef POSTK_DEBUG_TEMP_FIX_29
int ihk_mc_perfctr_init_raw(int counter, uint64_t config, int mode)
#else
int ihk_mc_perfctr_init_raw(int counter, unsigned int code, int mode)
#endif /*POSTK_DEBUG_TEMP_FIX_29*/
{
#ifdef POSTK_DEBUG_TEMP_FIX_31
// PAPI_REF_CYC counted by fixed counter
if (counter >= X86_IA32_BASE_FIXED_PERF_COUNTERS) {
return ihk_mc_perfctr_fixed_init(counter, mode);
}
#endif /*POSTK_DEBUG_TEMP_FIX_31*/
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
return -EINVAL;
}
#ifdef POSTK_DEBUG_TEMP_FIX_29
return set_perfctr_x86_direct(counter, mode, config);
#else
return set_perfctr_x86_direct(counter, mode, code);
#endif /*POSTK_DEBUG_TEMP_FIX_29*/
}
#ifdef POSTK_DEBUG_TEMP_FIX_29
int ihk_mc_perfctr_init(int counter, uint64_t config, int mode)
#else
int ihk_mc_perfctr_init(int counter, enum ihk_perfctr_type type, int mode)
#endif /*POSTK_DEBUG_TEMP_FIX_29*/
{
#ifdef POSTK_DEBUG_TEMP_FIX_29
enum ihk_perfctr_type type;
switch (config) {
case PERF_COUNT_HW_CPU_CYCLES :
type = APT_TYPE_CYCLE;
break;
case PERF_COUNT_HW_INSTRUCTIONS :
type = APT_TYPE_INSTRUCTIONS;
break;
default :
// Not supported config.
type = PERFCTR_MAX_TYPE;
}
#endif /*POSTK_DEBUG_TEMP_FIX_29*/
if (counter < 0 || counter >= X86_IA32_NUM_PERF_COUNTERS) {
return -EINVAL;
}
if (type < 0 || type >= PERFCTR_MAX_TYPE) {
return -EINVAL;
}
if (!x86_march_perfmap[type]) {
return -EINVAL;
}
return set_perfctr_x86_direct(counter, mode, x86_march_perfmap[type]);
}
#ifdef HAVE_MARCH_PERFCTR_START
extern void x86_march_perfctr_start(unsigned long counter_mask);
#endif
#ifdef POSTK_DEBUG_TEMP_FIX_30
int ihk_mc_perfctr_start(int counter)
#else
int ihk_mc_perfctr_start(unsigned long counter_mask)
#endif /*POSTK_DEBUG_TEMP_FIX_30*/
{
int ret = 0;
unsigned long value = 0;
unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK;
#ifdef POSTK_DEBUG_TEMP_FIX_30
unsigned long counter_mask = 1UL << counter;
#endif /*POSTK_DEBUG_TEMP_FIX_30*/
PERFCTR_CHKANDJUMP(counter_mask & ~mask, "counter_mask out of range", -EINVAL);
#ifdef HAVE_MARCH_PERFCTR_START
x86_march_perfctr_start(counter_mask);
#endif
counter_mask &= mask;
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
value |= counter_mask;
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
fn_exit:
return ret;
fn_fail:
goto fn_exit;
}
#ifdef POSTK_DEBUG_TEMP_FIX_30
int ihk_mc_perfctr_stop(int counter)
#else
int ihk_mc_perfctr_stop(unsigned long counter_mask)
#endif/*POSTK_DEBUG_TEMP_FIX_30*/
{
int ret = 0;
unsigned long value;
unsigned long mask = X86_IA32_PERF_COUNTERS_MASK | X86_IA32_FIXED_PERF_COUNTERS_MASK;
#ifdef POSTK_DEBUG_TEMP_FIX_30
unsigned long counter_mask = 1UL << counter;
#endif/*POSTK_DEBUG_TEMP_FIX_30*/
PERFCTR_CHKANDJUMP(counter_mask & ~mask, "counter_mask out of range", -EINVAL);
counter_mask &= mask;
value = rdmsr(MSR_PERF_GLOBAL_CTRL);
value &= ~counter_mask;
wrmsr(MSR_PERF_GLOBAL_CTRL, value);
if(counter_mask >> 32 & 0x1) {
value = rdmsr(MSR_PERF_FIXED_CTRL);
value &= ~(0xf);
wrmsr(MSR_PERF_FIXED_CTRL, value);
}
if(counter_mask >> 32 & 0x2) {
value = rdmsr(MSR_PERF_FIXED_CTRL);
value &= ~(0xf << 4);
wrmsr(MSR_PERF_FIXED_CTRL, value);
}
if(counter_mask >> 32 & 0x4) {
value = rdmsr(MSR_PERF_FIXED_CTRL);
value &= ~(0xf << 8);
wrmsr(MSR_PERF_FIXED_CTRL, value);
}
fn_exit:
return ret;
fn_fail:
goto fn_exit;
}
// init for fixed counter
int ihk_mc_perfctr_fixed_init(int counter, int mode)
{
unsigned long value = 0;
unsigned int ctr_mask = 0xf;
int counter_idx = counter - X86_IA32_BASE_FIXED_PERF_COUNTERS ;
unsigned int set_val = 0;
if (counter_idx < 0 || counter_idx >= X86_IA32_NUM_FIXED_PERF_COUNTERS) {
return -EINVAL;
}
// clear specified fixed counter info
value = rdmsr(MSR_PERF_FIXED_CTRL);
ctr_mask <<= counter_idx * 4;
value &= ~ctr_mask;
if (mode & PERFCTR_USER_MODE) {
set_val |= 1 << 1;
}
if (mode & PERFCTR_KERNEL_MODE) {
set_val |= 1;
}
// enable PMI on overflow
set_val |= 1 << 3;
set_val <<= counter_idx * 4;
value |= set_val;
wrmsr(MSR_PERF_FIXED_CTRL, value);
return 0;
}
int ihk_mc_perfctr_reset(int counter)
{
return set_pmc_x86_direct(counter, 0);
}
int ihk_mc_perfctr_set(int counter, long val)
{
return set_pmc_x86_direct(counter, val);
}
int ihk_mc_perfctr_read_mask(unsigned long counter_mask, unsigned long *value)
{
int i, j;
for (i = 0, j = 0; i < X86_IA32_NUM_PERF_COUNTERS && counter_mask;
i++, counter_mask >>= 1) {
if (counter_mask & 1) {
value[j++] = rdpmc(i);
}
}
return 0;
}
unsigned long ihk_mc_perfctr_read(int counter)
{
unsigned long retval = 0;
unsigned long cnt_bit = 0;
if (counter < 0) {
return -EINVAL;
}
cnt_bit = 1UL << counter;
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
// read generic pmc
retval = rdpmc(counter);
}
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) {
// read fixed pmc
retval = rdpmc((1 << 30) + (counter - X86_IA32_BASE_FIXED_PERF_COUNTERS));
}
else {
retval = -EINVAL;
}
return retval;
}
// read by rdmsr
unsigned long ihk_mc_perfctr_read_msr(int counter)
{
unsigned int idx = 0;
unsigned long retval = 0;
unsigned long cnt_bit = 0;
if (counter < 0) {
return -EINVAL;
}
cnt_bit = 1UL << counter;
if ( cnt_bit & X86_IA32_PERF_COUNTERS_MASK ) {
// read generic pmc
idx = MSR_IA32_PMC0 + counter;
retval = (unsigned long) rdmsr(idx);
}
else if ( cnt_bit & X86_IA32_FIXED_PERF_COUNTERS_MASK ) {
// read fixed pmc
idx = MSR_IA32_FIXED_CTR0 + counter;
retval = (unsigned long) rdmsr(idx);
}
else {
retval = -EINVAL;
}
return retval;
}
int ihk_mc_perfctr_alloc_counter(unsigned int *type, unsigned long *config, unsigned long pmc_status)
{
int ret = -1;
int i = 0;
if(*type == PERF_TYPE_HARDWARE) {
switch(*config){
case PERF_COUNT_HW_INSTRUCTIONS :
*type = PERF_TYPE_RAW;
*config = 0x5300c0;
break;
default :
// Unexpected config
return -1;
}
}
else if(*type != PERF_TYPE_RAW) {
return -1;
}
// find avail generic counter
for(i = 0; i < X86_IA32_NUM_PERF_COUNTERS; i++) {
if(!(pmc_status & (1 << i))) {
ret = i;
break;
}
}
return ret;
}

1912
arch/x86_64/kernel/syscall.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,163 @@
/**
* \file trampoline.c
* License details are found in the file LICENSE.
* \brief
* (1) Set-up page table address, (2) Transition to 64-bit and paging mode,
* (3) Jump to specified address
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
*/
/*
* HISTORY
*/
#define BOOT_CS 0x10
#define BOOT_DS 0x18
#define BOOT_CS64 0x20
#define MSR_EFER 0xc0000080
#define EFER_LME (1 << 8)
.section .rodata, "a", @progbits
.code16
.globl trampoline_code_data
base = .
trampoline_code_data:
jmp cpu_start_body
.org 8
header_pgtbl:
.quad 0 /* page table address */
func_address:
.quad 0 /* load address */
arg:
.quad 0 /* next address */
stack_ptr:
.quad 0 /* initial stack */
debug:
.quad 0 /* debug area */
transit_pgtbl:
.quad 0 /* 32->64 bit table address */
cpu_start_body:
cli
wbinvd
movw %cs, %ax
movw %ax, %ds
movw %ax, %es
movw %ax, %ss
xorl %ebx, %ebx
movw %cs, %bx
shll $4, %ebx
movw $0x29, debug - base
/* Adjust GDT ptr to the 32-bit physical address */
addl %ebx, boot_gdtptr + 2 - base
addl %ebx, 2f - base
addl %ebx, start_64_vec - base
lgdtl boot_gdtptr - base
lidtl boot_idtptr - base
jmp 1f
1:
movl %cr0, %edx
orb $1, %dl
movl %edx, %cr0
ljmpl *(2f - base)
2: .long protect_start - base
.word BOOT_CS
.balign 8
.code32
protect_start:
movl $(BOOT_DS), %eax
movl %eax, %ds
movl %eax, %ss
/* Enable PAE */
movl %cr4, %eax
orl $0x20, %eax
movl %eax, %cr4
leal (stack_end - base)(%ebx), %esp
/* Load a page table */
movl (transit_pgtbl - base)(%ebx), %eax
movl %eax, %cr3
1:
/* Enable Long Mode */
movl $MSR_EFER, %ecx
movl $EFER_LME, %eax
xorl %edx, %edx
wrmsr
/* Enable Paging */
movl %cr0, %edx
orl $0x80000000, %edx
movl %edx, %cr0
ljmp *(start_64_vec - base)(%ebx)
.code64
.balign 8
start_64:
/* Okay, we are completely in the long mode ! */
/* So, use the real page table! */
movq (header_pgtbl - base)(%ebx), %rax
movq %rax, %cr3
movq (func_address - base)(%ebx), %rcx
cmpq $0, %rcx
/* If Loading IP is zero, just enter the infinite loop */
jz 3f
movq (stack_ptr - base)(%ebx), %rax
cmpq $0, %rax
jz 1f
movq %rax, %rsp
1:
/* Now, we prepare the parameters */
movq (arg - base)(%ebx), %rdi
jmp *%rcx
3:
cli
hlt
jmp 3b
boot_idtptr:
.short 0
.long 0
boot_gdtptr:
.short boot_gdt32_end - boot_gdt32
.long boot_gdt32 - base
.align 4
boot_gdt32:
.quad 0
.quad 0
.quad 0x00cf9b000000ffff
.quad 0x00cf93000000ffff
.quad 0x00af9b000000ffff
.quad 0x0000890000000067
boot_gdt32_end:
start_64_vec:
.long start_64 - base
.word BOOT_CS64, 0
stack:
.org 0x1000
stack_end:
.globl trampoline_code_data_end
trampoline_code_data_end:

View File

@ -0,0 +1,150 @@
/**
* \file vsyscall.c
* License details are found in the file LICENSE.
* \brief
* implements x86's vsyscall
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 Hitachi, Ltd.
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY:
*/
/*
* .vsyscall.* section's LMAs are different from their VMA.
* make sure that these are position-independent codes.
*/
#include <cls.h>
#include <syscall.h>
#include <ihk/atomic.h>
#include <arch/cpu.h>
extern int vsyscall_gettimeofday(struct timeval *tv, void *tz)
__attribute__ ((section (".vsyscall.gettimeofday")));
struct tod_data_s tod_data
__attribute__ ((section(".vsyscall.gettimeofday.data"))) = {
.do_local = 0,
.version = IHK_ATOMIC64_INIT(0),
};
static inline void cpu_pause_for_vsyscall(void)
{
asm volatile ("pause" ::: "memory");
return;
} /* cpu_pause_for_vsyscall() */
static inline void calculate_time_from_tsc(struct timespec *ts)
{
long ver;
unsigned long current_tsc;
__time_t sec_delta;
long ns_delta;
for (;;) {
while ((ver = ihk_atomic64_read(&tod_data.version)) & 1) {
/* settimeofday() is in progress */
cpu_pause_for_vsyscall();
}
rmb();
*ts = tod_data.origin;
rmb();
if (ver == ihk_atomic64_read(&tod_data.version)) {
break;
}
/* settimeofday() has intervened */
cpu_pause_for_vsyscall();
}
current_tsc = rdtsc();
sec_delta = current_tsc / tod_data.clocks_per_sec;
ns_delta = NS_PER_SEC * (current_tsc % tod_data.clocks_per_sec)
/ tod_data.clocks_per_sec;
/* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */
ts->tv_sec += sec_delta;
ts->tv_nsec += ns_delta;
if (ts->tv_nsec >= NS_PER_SEC) {
ts->tv_nsec -= NS_PER_SEC;
++ts->tv_sec;
}
return;
} /* calculate_time_from_tsc() */
int vsyscall_gettimeofday(struct timeval *tv, void *tz)
{
int error;
struct timespec ats;
if (!tv && !tz) {
/* nothing to do */
return 0;
}
/* Do it locally if supported */
if (!tz && tod_data.do_local) {
calculate_time_from_tsc(&ats);
tv->tv_sec = ats.tv_sec;
tv->tv_usec = ats.tv_nsec / 1000;
return 0;
}
/* Otherwise syscall */
asm ("syscall" : "=a" (error)
: "a" (__NR_gettimeofday), "D" (tv), "S" (tz)
: "%rcx", "%r11", "memory");
if (error) {
*(volatile int *)0 = 0; /* i.e. raise(SIGSEGV) */
}
return error;
} /* vsyscall_gettimeofday() */
extern long vsyscall_time(void *tp)
__attribute__ ((section (".vsyscall.time")));
long vsyscall_time(void *tp)
{
long t;
asm ( "syscall ;"
/*
* This vsyscall_time() cannot fail, because glibc's
* vsyscall_time() does not set the errno.
*
* Because a possible error is only a memory access error,
* in order that this function generates SIGSEGV
* rather than returns error when a memory access error occurs,
* this function accesses memory in user mode.
*/
"test %%rdx,%%rdx;"
"jz 1f;"
"mov %%rax,(%%rdx);"
"1:"
: "=a" (t)
: "a" (__NR_time), "d" (tp), "D" (0)
: "%rcx", "%r11", "memory");
return t;
}
extern int vsyscall_getcpu(unsigned *cpup, unsigned *nodep, void *tcachep)
__attribute__ ((section (".vsyscall.getcpu")));
int vsyscall_getcpu(unsigned *cpup, unsigned *nodep, void *tcachep)
{
int error;
asm ("syscall" : "=a" (error)
: "a" (__NR_getcpu), "D" (cpup), "S" (nodep), "d" (tcachep)
: "%rcx", "%r11", "memory");
return error;
}

View File

@ -0,0 +1,67 @@
#!/usr/bin/expect
set INST_DIR "@prefix@"
spawn $INST_DIR/bin/eclair -d /tmp/mckernel.dump -k $INST_DIR/smp-x86/kernel/mckernel.img -i
set state "init"
set thread_id 0
expect {
"in ?? ()" {
switch -- $state {
"thread_chosen" {
set state "thread_skip"
}
"thread_bt" {
set state "thread_skip"
}
}
exp_continue
}
"(eclair) " {
switch -- $state {
"init" {
set state "threads_list"
send "info threads\r"
}
"threads_list" {
incr thread_id
set state "thread_chosen"
send "thread $thread_id\r"
}
"thread_skip" {
incr thread_id
set state "thread_chosen"
send "thread $thread_id\r"
}
"thread_chosen" {
set state "thread_bt"
send "bt\r"
}
}
exp_continue
}
"Type <return> to continue, or q <return> to quit" {
switch -- $state {
"threads_list" {
send "\r"
}
"thread_bt" {
send "\r"
}
"thread_skip" {
send "q\r"
}
}
exp_continue
}
" not known." {
expect "(eclair) " { send "quit\r" }
expect "Quit anyway? (y or n) " { send "y\r" }
exit 0
}
}

View File

@ -0,0 +1,28 @@
# irqbalance is a daemon process that distributes interrupts across
# CPUS on SMP systems. The default is to rebalance once every 10
# seconds. This is the environment file that is specified to systemd via the
# EnvironmentFile key in the service unit file (or via whatever method the init
# system you're using has.
#
# ONESHOT=yes
# after starting, wait for a minute, then look at the interrupt
# load and balance it once; after balancing exit and do not change
# it again.
#IRQBALANCE_ONESHOT=
#
# IRQBALANCE_BANNED_CPUS
# 64 bit bitmask which allows you to indicate which cpu's should
# be skipped when reblancing irqs. Cpu numbers which have their
# corresponding bits set to one in this mask will not have any
# irq's assigned to them on rebalance
#
IRQBALANCE_BANNED_CPUS=%mask%
#
# IRQBALANCE_ARGS
# append any args here to the irqbalance daemon as documented in the man page
#
IRQBALANCE_ARGS=--banirq=%banirq%

View File

@ -0,0 +1,10 @@
[Unit]
Description=irqbalance daemon
After=syslog.target
[Service]
EnvironmentFile=/tmp/irqbalance_mck
ExecStart=/usr/sbin/irqbalance --foreground $IRQBALANCE_ARGS
[Install]
WantedBy=multi-user.target

View File

@ -0,0 +1,155 @@
# Overlay /proc, /sys with McKernel specific contents
#
# Revert any state that has been initialized before the error occured.
#
if [ -z "$(declare -f error_exit)" ]; then
error_exit() {
local status=$1
case $status in
mcos_sys_mounted)
if [ "$enable_mcoverlay" == "yes" ]; then
umount /tmp/mcos/mcos0_sys
fi
;&
mcos_proc_mounted)
if [ "$enable_mcoverlay" == "yes" ]; then
umount /tmp/mcos/mcos0_proc
fi
;&
mcoverlayfs_loaded)
if [ "$enable_mcoverlay" == "yes" ]; then
rmmod mcoverlay 2>/dev/null
fi
;&
linux_proc_bind_mounted)
if [ "$enable_mcoverlay" == "yes" ]; then
umount /tmp/mcos/linux_proc
fi
;&
tmp_mcos_mounted)
if [ "$enable_mcoverlay" == "yes" ]; then
umount /tmp/mcos
fi
;&
tmp_mcos_created)
if [ "$enable_mcoverlay" == "yes" ]; then
umask $umask_old
rm -rf /tmp/mcos
fi
;&
initial)
# Nothing more to revert
;;
esac
exit 1
}
fi
# Change umask for /proc and /sys files
umask_dec=$(( 8#${umask_old} & 8#0002 ))
umask 0`printf "%o" ${umask_dec}`
if [ ! -e /tmp/mcos ]; then
mkdir -p /tmp/mcos;
fi
if ! mount -t tmpfs tmpfs /tmp/mcos; then
echo "error: mount /tmp/mcos" >&2
error_exit "tmp_mcos_created"
fi
if [ ! -e /tmp/mcos/linux_proc ]; then
mkdir -p /tmp/mcos/linux_proc;
fi
if ! mount --bind /proc /tmp/mcos/linux_proc; then
echo "error: mount /tmp/mcos/linux_proc" >&2
error_exit "tmp_mcos_mounted"
fi
if ! taskset -c 0 insmod @KMODDIR@/mcoverlay.ko 2>/dev/null; then
echo "error: inserting mcoverlay.ko" >&2
error_exit "linux_proc_bind_mounted"
fi
while [ ! -e /proc/mcos0 ]
do
sleep 0.1
done
if [ ! -e /tmp/mcos/mcos0_proc ]; then
mkdir -p /tmp/mcos/mcos0_proc;
fi
if [ ! -e /tmp/mcos/mcos0_proc_upper ]; then
mkdir -p /tmp/mcos/mcos0_proc_upper;
fi
if [ ! -e /tmp/mcos/mcos0_proc_work ]; then
mkdir -p /tmp/mcos/mcos0_proc_work;
fi
if ! mount -t mcoverlay mcoverlay -o lowerdir=/proc/mcos0:/proc,upperdir=/tmp/mcos/mcos0_proc_upper,workdir=/tmp/mcos/mcos0_proc_work,nocopyupw,nofscheck /tmp/mcos/mcos0_proc; then
echo "error: mounting /tmp/mcos/mcos0_proc" >&2
error_exit "mcoverlayfs_loaded"
fi
# TODO: How de we revert this in case of failure??
mount --make-rprivate /proc
while [ ! -e /sys/devices/virtual/mcos/mcos0/sys/setup_complete ]
do
sleep 0.1
done
if [ ! -e /tmp/mcos/mcos0_sys ]; then
mkdir -p /tmp/mcos/mcos0_sys;
fi
if [ ! -e /tmp/mcos/mcos0_sys_upper ]; then
mkdir -p /tmp/mcos/mcos0_sys_upper;
fi
if [ ! -e /tmp/mcos/mcos0_sys_work ]; then
mkdir -p /tmp/mcos/mcos0_sys_work;
fi
if ! mount -t mcoverlay mcoverlay -o lowerdir=/sys/devices/virtual/mcos/mcos0/sys:/sys,upperdir=/tmp/mcos/mcos0_sys_upper,workdir=/tmp/mcos/mcos0_sys_work,nocopyupw,nofscheck /tmp/mcos/mcos0_sys; then
echo "error: mount /tmp/mcos/mcos0_sys" >&2
error_exit "mcos_proc_mounted"
fi
# TODO: How de we revert this in case of failure??
mount --make-rprivate /sys
touch /tmp/mcos/mcos0_proc/mckernel
rm -rf /tmp/mcos/mcos0_sys/setup_complete
# Hide NUMA related files which are outside the LWK partition
for cpuid in `find /sys/devices/system/cpu/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid" ]; then
rm -rf /tmp/mcos/mcos0_sys/devices/system/cpu/$cpuid
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/drivers/processor/$cpuid
else
for nodeid in `find /sys/devices/system/cpu/$cpuid/* -maxdepth 0 -name "node[0123456789]*" -printf "%f "`; do
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/cpu/$cpuid/$nodeid" ]; then
rm -f /tmp/mcos/mcos0_sys/devices/system/cpu/$cpuid/$nodeid
fi
done
fi
done
for nodeid in `find /sys/devices/system/node/* -maxdepth 0 -name "node[0123456789]*" -printf "%f "`; do
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/node/$nodeid" ]; then
rm -rf /tmp/mcos/mcos0_sys/devices/system/node/$nodeid/*
rm -rf /tmp/mcos/mcos0_sys/bus/node/devices/$nodeid
else
# Delete non-existent symlinks
for cpuid in `find /sys/devices/system/node/$nodeid/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/devices/system/node/$nodeid/$cpuid" ]; then
rm -f /tmp/mcos/mcos0_sys/devices/system/node/$nodeid/$cpuid
fi
done
rm -f /tmp/mcos/mcos0_sys/devices/system/node/$nodeid/memory*
fi
done
rm -f /tmp/mcos/mcos0_sys/devices/system/node/has_*
for cpuid in `find /sys/bus/cpu/devices/* -maxdepth 0 -name "cpu[0123456789]*" -printf "%f "`; do
if [ ! -e "/sys/devices/virtual/mcos/mcos0/sys/bus/cpu/devices/$cpuid" ]; then
rm -rf /tmp/mcos/mcos0_sys/bus/cpu/devices/$cpuid
fi
done
# Restore umask
umask ${umask_old}

View File

@ -0,0 +1,13 @@
# Remove mcoverlay if loaded
if grep mcoverlay /proc/modules &>/dev/null; then
if [ "`cat /proc/mounts | grep /tmp/mcos/mcos0_sys`" != "" ]; then umount -l /tmp/mcos/mcos0_sys; fi
if [ "`cat /proc/mounts | grep /tmp/mcos/mcos0_proc`" != "" ]; then umount -l /tmp/mcos/mcos0_proc; fi
if [ "`cat /proc/mounts | grep /tmp/mcos/linux_proc`" != "" ]; then umount -l /tmp/mcos/linux_proc; fi
if [ "`cat /proc/mounts | grep /tmp/mcos`" != "" ]; then umount -l /tmp/mcos; fi
if [ -e /tmp/mcos ]; then rm -rf /tmp/mcos; fi
if ! rmmod mcoverlay 2>/dev/null; then
echo "error: removing mcoverlay" >&2
exit 1
fi
fi

View File

@ -0,0 +1,112 @@
#!/bin/bash
# \file arch/x86/tools/mcreboot-attached-mic.sh.in
# License details are found in the file LICENSE.
# \brief
# mckernel boot script
# \author Masamichi Takagi <m-takagi@ab.jp.nec.com> \par
# Copyright (C) 2013 NEC Corporation
# \author Tomoki Shirasawa <tomoki.shirasawa.kk@hitachi-solutions.com> \par
# Copyright (C) 2012 - 2013 Hitachi, Ltd.
# HISTORY:
#
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
if ! lspci | grep 'Co-processor.*Intel Corporation' > /dev/null 2>&1; then
echo No Intel co-processor found. >&2
exit 1
fi
echo "removing kernel modules..." >&2
modules_were_loaded="0"
if [ "`service mpss status 2> /dev/null`" != "mpss is stopped" ]; then
modules_were_loaded="1"
sudo service mpss stop
fi
if lsmod | awk 'BEGIN{rc=1}$1 == "mic"{rc=0}END{exit(rc)}'; then
modules_were_loaded="1"
sudo service mpss unload
fi
"$SBINDIR/ihkosctl" 0 shutdown
for mod_name in mcctrl ihk_mic ihk; do
if lsmod | awk 'BEGIN{rc=1}$1 == "'"$mod_name"'"{rc=0}END{exit(rc)}'; then
modules_were_loaded="1"
echo "rmmod $mod_name" >&2
if rmmod $mod_name; then
echo "$mod_name removed succesfully" >&2
sleep 1
else
echo "ERROR: couldn't remove $mod_name" >&2
exit 1
fi
fi
done
echo "removing kernel modules done" >&2
if [ "$1" == "-u" ]; then
exit
fi
wait_time=10
if [ "$modules_were_loaded" == "1" ]; then
echo "waiting for ${wait_time} seconds: " >&2
while [ "$wait_time" != 0 ]; do
echo -n "$wait_time " >&2
sleep 1
let wait_time=(${wait_time}-1)
done
echo "" >&2
fi
if [ "$1" == "-w" ]; then
shift 1
echo "press enter to continue" >&2
read enter_press
fi
for mod_path in "$KMODDIR/ihk.ko" "$KMODDIR/ihk_mic.ko" "$KMODDIR/mcctrl.ko"; do
if insmod $mod_path; then
sleep 1
echo "$mod_path inserted succesfully" >&2
else
echo "ERROR: couldn't insert $mod_path" >&2
exit 1
fi
if [ "$mod_path" == "$KMODDIR/ihk_mic.ko" ]; then
echo "creating OS device" >&2
sleep 1
"$SBINDIR/ihkconfig" 0 create
sleep 1
fi
if [ "$mod_path" == "$KMODDIR/mcctrl.ko" ]; then
if [ $# -gt 0 ]; then
echo "setting kernel parameter to: \"$1\"" >&2
"$SBINDIR/ihkosctl" 0 kargs "$1"
sleep 1
else
echo "setting kernel parameter to: \"hidos\"" >&2
"$SBINDIR/ihkosctl" 0 kargs "hidos"
sleep 1
fi
echo "using kernel image: $KERNDIR/mckernel.img" >&2
"$SBINDIR/ihkosctl" 0 load "$KERNDIR/mckernel.img"
sleep 1
echo "booting OS 0" >&2
"$SBINDIR/ihkosctl" 0 boot
sleep 1
fi
done

View File

@ -0,0 +1,46 @@
#!/bin/bash -x
# \file arch/x86/tools/mcreboot-builtin-x86.sh.in
# License details are found in the file LICENSE.
# \brief
# mckernel boot script
# \author Masamichi Takagi <masamichi.takagi@riken.jp> \par
# Copyright (C) 2014 RIKEN AICS
# HISTORY:
#
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
kill -9 `pidof mcexec`
if lsmod | grep mcctrl > /dev/null 2>&1; then
rmmod mcctrl || exit 1
fi
if lsmod | grep dcfa > /dev/null 2>&1; then
rmmod dcfa || exit 1
fi
if lsmod | grep ihk_builtin > /dev/null 2>&1; then
rmmod ihk_builtin || exit 1
fi
if lsmod | grep ihk > /dev/null 2>&1; then
rmmod ihk || exit 1
fi
insmod "$KMODDIR/ihk.ko" &&
insmod "$KMODDIR/ihk_builtin.ko" &&
"$SBINDIR/ihkconfig" 0 create &&
NCORE=`dmesg | grep -E 'SHIMOS: CPU Status:'|awk '{split($0,a," "); for (i = 1; i <= length(a); i++) { if(a[i] ~ /2/) {count++}} print count;}'`
MEM=`free -g | grep -E 'Mem:' | awk '{print int($2/4)}'`
"$SBINDIR/ihkosctl" 0 alloc "$NCORE" "$MEM"g &&
"$SBINDIR/ihkosctl" 0 load "$KERNDIR/mckernel.img" &&
"$SBINDIR/ihkosctl" 0 kargs hidos osnum=0 &&
"$SBINDIR/ihkosctl" 0 boot &&
sleep 1 &&
"$SBINDIR/ihkosctl" 0 kmsg &&
insmod "$KMODDIR/mcctrl.ko" &&
sleep 1 &&
"$SBINDIR/ihkosctl" 0 kmsg &&
exit 0

View File

@ -0,0 +1,429 @@
#!/bin/bash
# IHK SMP-x86 example boot script.
# author: Balazs Gerofi <bgerofi@riken.jp>
# Copyright (C) 2014 RIKEN AICS
#
# This is an example script for loading IHK, configuring a partition and
# booting McKernel on it. Unless specific CPUs and memory are requested,
# the script reserves half of the CPU cores and 512MB of RAM from
# NUMA node 0 when IHK is loaded for the first time.
# Otherwise, it destroys the current McKernel instance and reboots it using
# the same set of resources as it used previously.
# Note that the script does not output anything unless an error occurs.
prefix="@prefix@"
BINDIR="${prefix}/bin"
SBINDIR="${prefix}/sbin"
ETCDIR=@ETCDIR@
KMODDIR="${prefix}/kmod"
KERNDIR="${prefix}/@TARGET@/kernel"
ENABLE_MCOVERLAYFS="@ENABLE_MCOVERLAYFS@"
mem="512M@0"
cpus=""
ikc_map=""
if [ "${BASH_VERSINFO[0]}" -lt 4 ]; then
echo "You need at least bash-4.0 to run this script." >&2
exit 1
fi
redirect_kmsg=0
mon_interval="-1"
DUMP_LEVEL=24
facility="LOG_LOCAL6"
chown_option=`logname 2> /dev/null`
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" -o "`systemctl status irqbalance.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
irqbalance_used="yes"
else
irqbalance_used="no"
fi
turbo=""
ihk_irq=""
umask_old=`umask`
while getopts :tk:c:m:o:f:r:q:i:d:e: OPT
do
case ${OPT} in
f) facility=${OPTARG}
;;
o) chown_option=${OPTARG}
;;
k) redirect_kmsg=${OPTARG}
;;
c) cpus=${OPTARG}
;;
m) mem=${OPTARG}
;;
r) ikc_map=${OPTARG}
;;
q) ihk_irq=${OPTARG}
;;
t) turbo="turbo"
;;
e) extra_kopts=${OPTARG}
;;
d) DUMP_LEVEL=${OPTARG}
;;
i) mon_interval=${OPTARG}
;;
*) echo "invalid option -${OPT}" >&2
exit 1
esac
done
# Start ihkmond
pid=`pidof ihkmond`
if [ "${pid}" != "" ]; then
sudo kill -9 ${pid} > /dev/null 2> /dev/null
fi
if [ "${redirect_kmsg}" != "0" -o "${mon_interval}" != "-1" ]; then
${SBINDIR}/ihkmond -f ${facility} -k ${redirect_kmsg} -i ${mon_interval}
fi
#
# Revert any state that has been initialized before the error occured.
#
error_exit() {
local status=$1
case $status in
mcos_sys_mounted)
if [ "$enable_mcoverlay" == "yes" ]; then
umount /tmp/mcos/mcos0_sys
fi
;&
mcos_proc_mounted)
if [ "$enable_mcoverlay" == "yes" ]; then
umount /tmp/mcos/mcos0_proc
fi
;&
mcoverlayfs_loaded)
if [ "$enable_mcoverlay" == "yes" ]; then
rmmod mcoverlay 2>/dev/null
fi
;&
linux_proc_bind_mounted)
if [ "$enable_mcoverlay" == "yes" ]; then
umount /tmp/mcos/linux_proc
fi
;&
tmp_mcos_mounted)
if [ "$enable_mcoverlay" == "yes" ]; then
umount /tmp/mcos
fi
;&
tmp_mcos_created)
if [ "$enable_mcoverlay" == "yes" ]; then
umask $umask_old
rm -rf /tmp/mcos
fi
;&
os_created)
# Destroy all LWK instances
if ls /dev/mcos* 1>/dev/null 2>&1; then
for i in /dev/mcos*; do
ind=`echo $i|cut -c10-`;
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then
echo "warning: failed to destroy LWK instance $ind" >&2
fi
done
fi
;&
mcctrl_loaded)
rmmod mcctrl 2>/dev/null || echo "warning: failed to remove mcctrl" >&2
;&
cpus_reserved)
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
if [ "${cpus}" != "" ]; then
if ! ${SBINDIR}/ihkconfig 0 release cpu $cpus > /dev/null; then
echo "warning: failed to release CPUs" >&2
fi
fi
;&
mem_reserved)
mem=`${SBINDIR}/ihkconfig 0 query mem`
if [ "${mem}" != "" ]; then
if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then
echo "warning: failed to release memory" >&2
fi
fi
;&
ihk_smp_loaded)
rmmod ihk_smp_@ARCH@ 2>/dev/null || echo "warning: failed to remove ihk_smp_@ARCH@" >&2
;&
ihk_loaded)
rmmod ihk 2>/dev/null || echo "warning: failed to remove ihk" >&2
;&
irqbalance_stopped)
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
if ! systemctl stop irqbalance_mck.service 2>/dev/null; then
echo "warning: failed to stop irqbalance_mck" >&2
fi
if ! systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null; then
echo "warning: failed to disable irqbalance_mck" >&2
fi
if ! etcdir=@ETCDIR@ perl -e '$etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "$etcdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($etcdir)); if(0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }'; then
echo "warning: failed to restore /proc/irq/*/smp_affinity" >&2
fi
if ! systemctl start irqbalance.service; then
echo "warning: failed to start irqbalance" >&2;
fi
fi
;&
initial)
# Nothing more to revert
;;
esac
exit 1
}
ihk_ikc_irq_core=0
release=`uname -r`
major=`echo ${release} | sed -e 's/^\([0-9]*\).*/\1/'`
minor=`echo ${release} | sed -e 's/^[0-9]*.\([0-9]*\).*/\1/'`
patch=`echo ${release} | sed -e 's/^[0-9]*.[0-9]*.\([0-9]*\).*/\1/'`
linux_version_code=`expr \( ${major} \* 65536 \) + \( ${minor} \* 256 \) + ${patch}`
rhel_release=`echo ${release} | sed -e 's/^[0-9]*.[0-9]*.[0-9]*-\([0-9]*\).*/\1/'`
if [ "${release}" == "${rhel_release}" ]; then
rhel_release="";
fi
enable_mcoverlay="no"
if [ "${ENABLE_MCOVERLAYFS}" == "yes" ]; then
if [ "${rhel_release}" == "" ]; then
if [ ${linux_version_code} -ge 262144 -a ${linux_version_code} -lt 262400 ]; then
enable_mcoverlay="yes"
fi
if [ ${linux_version_code} -ge 263680 -a ${linux_version_code} -lt 263936 ]; then
enable_mcoverlay="yes"
fi
else
if [ ${linux_version_code} -eq 199168 -a ${rhel_release} -ge 327 -a ${rhel_release} -le 693 ]; then
enable_mcoverlay="yes"
fi
if [ ${linux_version_code} -ge 262144 -a ${linux_version_code} -lt 262400 ]; then
enable_mcoverlay="yes"
fi
fi
fi
# Figure out CPUs if not requested by user
if [ "$cpus" == "" ]; then
# Get the number of CPUs on NUMA node 0
nr_cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $4}' | wc -l`
# Use the second half of the cores
let nr_cpus="$nr_cpus / 2"
cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $1}' | tail -n $nr_cpus | xargs echo -n | sed 's/ /,/g'`
if [ "$cpus" == "" ]; then
echo "error: no available CPUs on NUMA node 0?" >&2
exit 1
fi
fi
# Remove mcoverlay if loaded
if [ "$enable_mcoverlay" == "yes" ]; then
. ${SBINDIR}/mcoverlay-destroy.sh
fi
# Stop irqbalance
if [ "${irqbalance_used}" == "yes" ]; then
systemctl stop irqbalance_mck.service 2>/dev/null
if ! systemctl stop irqbalance.service 2>/dev/null ; then
echo "error: stopping irqbalance" >&2
exit 1
fi;
if ! etcdir=@ETCDIR@ perl -e 'use File::Copy qw(copy); $etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "/proc/irq/*/smp_affinity"; foreach $file (@files) { $rel = substr($file, 1); $dir=substr($rel, 0, length($rel)-length("/smp_affinity")); if(0) { print "cp $file $etcdir/$rel\n";} if(system("mkdir -p $etcdir/$dir")){ exit 1;} if(!copy($file,"$etcdir/$rel")){ exit 1;} }'; then
echo "error: saving /proc/irq/*/smp_affinity" >&2
error_exit "mcos_sys_mounted"
fi;
# Prevent /proc/irq/*/smp_affinity from getting zero after offlining
# McKernel CPUs by using the following algorithm.
# if (smp_affinity & mck_cores) {
# smp_affinity = (mck_cores ^ -1);
# }
ncpus=`lscpu | grep -E '^CPU\(s\):' | awk '{print $2}'`
smp_affinity_mask=`echo $cpus | ncpus=$ncpus perl -e 'while(<>){@tokens = split /,/;foreach $token (@tokens) {@nums = split /-/,$token; for($num = $nums[0]; $num <= $nums[$#nums]; $num++) {$ndx=int($num/32); $mask[$ndx] |= (1<<($num % 32))}}} $nint32s = int(($ENV{'ncpus'}+31)/32); for($j = $nint32s - 1; $j >= 0; $j--) { if($j != $nint32s - 1){print ",";} $nblks = ($j != $nint32s - 1) ? 8 : ($ENV{'ncpus'} % 32 != 0) ? int((($ENV{'ncpus'} + 3) % 32) / 4) : 8; for($i = $nblks - 1;$i >= 0;$i--){ printf("%01x",($mask[$j] >> ($i*4)) & 0xf);}}'`
# echo cpus=$cpus ncpus=$ncpus smp_affinity_mask=$smp_affinity_mask
if ! ncpus=$ncpus smp_affinity_mask=$smp_affinity_mask perl -e '@dirs = grep { -d } glob "/proc/irq/*"; foreach $dir (@dirs) { $hit = 0; $affinity_str = `cat $dir/smp_affinity`; chomp $affinity_str; @int32strs = split /,/, $affinity_str; @int32strs_mask=split /,/, $ENV{'smp_affinity_mask'}; for($i=0;$i <= $#int32strs_mask; $i++) { $int32strs_inv[$i] = sprintf("%08x",hex($int32strs_mask[$i])^0xffffffff); if($i == 0) { $len = int((($ENV{'ncpus'}%32)+3)/4); if($len != 0) { $int32strs_inv[$i] = substr($int32strs_inv[$i], -$len, $len); } } } $inv = join(",", @int32strs_inv); $nint32s = int(($ENV{'ncpus'}+31)/32); for($j = $nint32s - 1; $j >= 0; $j--) { if(hex($int32strs[$nint32s - 1 - $j]) & hex($int32strs_mask[$nint32s - 1 - $j])) { $hit = 1; }} if($hit == 1) { $cmd = "echo $inv > $dir/smp_affinity 2>/dev/null"; system $cmd;}}'; then
echo "error: modifying /proc/irq/*/smp_affinity" >&2
error_exit "mcos_sys_mounted"
fi
fi
# Load IHK if not loaded
if ! grep -E 'ihk\s' /proc/modules &>/dev/null; then
if ! taskset -c 0 insmod ${KMODDIR}/ihk.ko 2>/dev/null; then
echo "error: loading ihk" >&2
error_exit "irqbalance_stopped"
fi
fi
# Increase swappiness so that we have better chance to allocate memory for IHK
echo 100 > /proc/sys/vm/swappiness
# Drop Linux caches to free memory
sync && echo 3 > /proc/sys/vm/drop_caches
# Merge free memory areas into large, physically contigous ones
echo 1 > /proc/sys/vm/compact_memory 2>/dev/null
sync
# Load IHK-SMP if not loaded and reserve CPUs and memory
if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
if [ "$ihk_irq" == "" ]; then
for i in `seq 64 255`; do
if [ ! -d /proc/irq/$i ] && [ "`cat /proc/interrupts | grep ":" | awk '{print $1}' | grep -o '[0-9]*' | grep -e '^$i$'`" == "" ]; then
ihk_irq=$i
break
fi
done
if [ "$ihk_irq" == "" ]; then
echo "error: no IRQ available" >&2
error_exit "ihk_loaded"
fi
fi
if ! taskset -c 0 insmod ${KMODDIR}/ihk-smp-@ARCH@.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core 2>/dev/null; then
echo "error: loading ihk-smp-@ARCH@" >&2
error_exit "ihk_loaded"
fi
# Offline-reonline RAM (special case for OFP SNC-4 mode)
if [ "`hostname | grep "c[0-9][0-9][0-9][0-9].ofp"`" != "" ] && [ "`cat /sys/devices/system/node/online`" == "0-7" ]; then
for i in 0 1 2 3; do
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
echo 0 > $f 2>&1 > /dev/null;
done
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
echo 1 > $f 2>&1 > /dev/null;
done
done
for i in 4 5 6 7; do
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
echo 0 > $f 2>&1 > /dev/null;
done
find /sys/devices/system/node/node$i/memory*/ -name "online" | while read f; do
echo 1 > $f 2>&1 > /dev/null;
done
done
fi
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then
echo "error: reserving memory" >&2
error_exit "ihk_smp_loaded"
fi
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then
echo "error: reserving CPUs" >&2;
error_exit "mem_reserved"
fi
fi
# Load mcctrl if not loaded
if ! grep mcctrl /proc/modules &>/dev/null; then
if ! taskset -c 0 insmod ${KMODDIR}/mcctrl.ko 2>/dev/null; then
echo "error: inserting mcctrl.ko" >&2
error_exit "cpus_reserved"
fi
fi
# Destroy all LWK instances
if ls /dev/mcos* 1>/dev/null 2>&1; then
for i in /dev/mcos*; do
ind=`echo $i|cut -c10-`;
# Retry when conflicting with ihkmond
nretry=0
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -lt 4 ]; do
sleep 0.25
nretry=$[ $nretry + 1 ]
done
if [ $nretry -eq 4 ]; then
echo "error: destroying LWK instance $ind failed" >&2
error_exit "mcctrl_loaded"
fi
done
fi
# Create OS instance
if ! ${SBINDIR}/ihkconfig 0 create; then
echo "error: creating OS instance" >&2
error_exit "mcctrl_loaded"
fi
# Assign CPUs
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then
echo "error: assign CPUs" >&2
error_exit "os_created"
fi
if [ "$ikc_map" != "" ]; then
# Specify IKC map
if ! ${SBINDIR}/ihkosctl 0 set ikc_map ${ikc_map}; then
echo "error: assign CPUs" >&2
error_exit "os_created"
fi
fi
# Assign memory
if ! ${SBINDIR}/ihkosctl 0 assign mem ${mem}; then
echo "error: assign memory" >&2
error_exit "os_created"
fi
# Load kernel image
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
echo "error: loading kernel image: ${KERNDIR}/mckernel.img" >&2
error_exit "os_created"
fi
# Set kernel arguments
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo dump_level=${DUMP_LEVEL} $extra_kopts"; then
echo "error: setting kernel arguments" >&2
error_exit "os_created"
fi
# Boot OS instance
if ! ${SBINDIR}/ihkosctl 0 boot; then
echo "error: booting" >&2
error_exit "os_created"
fi
# Set device file ownership
if ! chown ${chown_option} /dev/mcd* /dev/mcos*; then
echo "warning: failed to chown device files" >&2
fi
# Overlay /proc, /sys with McKernel specific contents
if [ "$enable_mcoverlay" == "yes" ]; then
. ${SBINDIR}/mcoverlay-create.sh
fi
# Start irqbalance with CPUs and IRQ for McKernel banned
if [ "${irqbalance_used}" == "yes" ]; then
banirq=`cat /proc/interrupts| perl -e 'while(<>) { if(/^\s*(\d+).*IHK\-SMP\s*$/) {print $1;}}'`
sed "s/%mask%/$smp_affinity_mask/g" $ETCDIR/irqbalance_mck.in | sed "s/%banirq%/$banirq/g" > /tmp/irqbalance_mck
systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null
if ! systemctl link $ETCDIR/irqbalance_mck.service >/dev/null 2>/dev/null; then
echo "error: linking irqbalance_mck" >&2
error_exit "mcos_sys_mounted"
fi
if ! systemctl start irqbalance_mck.service 2>/dev/null ; then
echo "error: starting irqbalance_mck" >&2
error_exit "mcos_sys_mounted"
fi
# echo cpus=$cpus ncpus=$ncpus banirq=$banirq
fi
exit 0

View File

@ -0,0 +1,36 @@
.\" Man page for McKernel
.\" mcreboot
.\"
.\" Copyright (C) 2013 The University of Tokyo, Japan
.\" Written by Yutaka Ishikawa <ishikawa@is.s.u-tokyo.ac.jp>
.\"
.TH MCREBOOT 1 "@MCKERNEL_RELEASE_DATE@" "Version @MCKERNEL_VERSION@" MCKERNEL @MCKERNEL_VERSION@"
.SH NAME
mcreboot \- a script to boot/reboot the McKernel environment
.\"
.\" ---------------------------- SYNOPSIS ----------------------------
.SH SYNOPSIS
mcreboot
.BR
.\" ---------------------------- DESCRIPTION ----------------------------
.SH DESCRIPTION
The mcreboot command, executed by the root privileged mode, shutdowns
the MPSS environment if it is running, and then initializes the
McKernel environment.
.\" ---------------------------- FILES ----------------------------
.SH FILES
.LP
.I mcctrl,
.I ihk_mic,
.I ihk
.\" ---------------------------- SEE ALSO ----------------------------
.SH SEE ALSO
ihkosctl (1)
.\" ---------------------------- AUTHORS ----------------------------
.SH AUTHORS
Copyright (C) 2013 McKernel Development Team, The University of Tokyo, Japan

View File

@ -0,0 +1,16 @@
#!/bin/bash
# \file arch/x86/tools/mcshutdown-attached-mic.sh.in
# License details are found in the file LICENSE.
# \brief
# mckernel shutdown script
#
# \author McKernel Development Team
#
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
"$SBINDIR/ihkosctl" 0 shutdown

View File

@ -0,0 +1,16 @@
#!/bin/bash
# \file arch/x86/tools/mcshutdown-attached-mic.sh.in
# License details are found in the file LICENSE.
# \brief
# mckernel shutdown script
#
# \author McKernel Development Team
#
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
"$SBINDIR/ihkosctl" 0 shutdown

View File

@ -0,0 +1,122 @@
#!/bin/bash
# IHK SMP-x86 example McKernel unload script.
# author: Balazs Gerofi <bgerofi@riken.jp>
# Copyright (C) 2015 RIKEN AICS
#
# This is an example script for destroying McKernel and releasing IHK resources
# Note that the script does no output anything unless an error occurs.
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
ETCDIR=@ETCDIR@
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
mem=""
cpus=""
irqbalance_used=""
# No SMP module? Exit.
if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then exit 0; fi
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
irqbalance_used="yes"
if ! systemctl stop irqbalance_mck.service 2>/dev/null; then
echo "warning: failed to stop irqbalance_mck" >&2
fi
if ! systemctl disable irqbalance_mck.service >/dev/null 2>/dev/null; then
echo "warning: failed to disable irqbalance_mck" >&2
fi
fi
# Destroy all LWK instances
if ls /dev/mcos* 1>/dev/null 2>&1; then
for i in /dev/mcos*; do
ind=`echo $i|cut -c10-`;
# Retry when conflicting with ihkmond
nretry=0
until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -lt 4 ]; do
sleep 0.25
nretry=$[ $nretry + 1 ]
done
if [ $nretry -eq 4 ]; then
echo "error: destroying LWK instance $ind failed" >&2
exit 1
fi
done
fi
# Query IHK-SMP resources and release them
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then
echo "error: querying cpus" >&2
exit 1
fi
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
if [ "${cpus}" != "" ]; then
if ! ${SBINDIR}/ihkconfig 0 release cpu $cpus > /dev/null; then
echo "error: releasing CPUs" >&2
exit 1
fi
fi
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then
echo "error: querying memory" >&2
exit 1
fi
mem=`${SBINDIR}/ihkconfig 0 query mem`
if [ "${mem}" != "" ]; then
if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then
echo "error: releasing memory" >&2
exit 1
fi
fi
# Remove delegator if loaded
if grep mcctrl /proc/modules &>/dev/null; then
if ! rmmod mcctrl 2>/dev/null; then
echo "error: removing mcctrl" >&2
exit 1
fi
fi
# Remove mcoverlay if loaded
. ${SBINDIR}/mcoverlay-destroy.sh
# Remove SMP module
if grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
if ! rmmod ihk_smp_@ARCH@ 2>/dev/null; then
echo "error: removing ihk_smp_@ARCH@" >&2
exit 1
fi
fi
# Remove core module
if grep -E 'ihk\s' /proc/modules &>/dev/null; then
if ! rmmod ihk 2>/dev/null; then
echo "error: removing ihk" >&2
exit 1
fi
fi
# Stop ihkmond
pid=`pidof ihkmond`
if [ "${pid}" != "" ]; then
sudo kill -9 ${pid} > /dev/null 2> /dev/null
fi
# Start irqbalance with the original settings
if [ "${irqbalance_used}" != "" ]; then
if ! etcdir=@ETCDIR@ perl -e '$etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "$etcdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($etcdir)); if(0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }'; then
echo "warning: failed to restore /proc/irq/*/smp_affinity" >&2
fi
if ! systemctl start irqbalance.service; then
echo "warning: failed to start irqbalance" >&2;
fi
fi
# Set back default swappiness
echo 60 > /proc/sys/vm/swappiness