/** * \file procfs.c * License details are found in the file LICENSE. * \brief * McKernel procfs * \author Naoki Hamada \par * Copyright (C) 2014 AXE, Inc. */ /* * HISTORY: */ #include #include #include #include #include #include #include #include #include #include #include #include #include //#define DEBUG_PRINT_PROCFS #ifdef DEBUG_PRINT_PROCFS #define dprintf(...) kprintf(__VA_ARGS__) #else #define dprintf(...) #endif extern int snprintf(char * buf, size_t size, const char *fmt, ...); extern int sprintf(char * buf, const char *fmt, ...); extern int sscanf(const char * buf, const char * fmt, ...); extern int osnum; static void procfs_thread_ctl(struct thread *thread, int msg) { struct ihk_ikc_channel_desc *syscall_channel; struct ikc_scd_packet packet; syscall_channel = cpu_local_var(syscall_channel); memset(&packet, '\0', sizeof packet); packet.arg = thread->tid; packet.msg = msg; packet.osnum = osnum; packet.ref = thread->cpu_id; packet.pid = thread->proc->pid; packet.err = 0; ihk_ikc_send(syscall_channel, &packet, 0); } void procfs_create_thread(struct thread *thread) { procfs_thread_ctl(thread, SCD_MSG_PROCFS_TID_CREATE); } void procfs_delete_thread(struct thread *thread) { procfs_thread_ctl(thread, SCD_MSG_PROCFS_TID_DELETE); } /** * \brief The callback function for mckernel procfs files. * * \param rarg returned argument */ void process_procfs_request(unsigned long rarg) { unsigned long parg, pbuf; struct thread *thread = NULL; struct process *proc = NULL; struct process_vm *vm = NULL; struct procfs_read *r; struct ikc_scd_packet packet; int rosnum, ret, pid, tid, ans = -EIO, eof = 0; char *buf, *p; struct ihk_ikc_channel_desc *syscall_channel; struct mcs_rwlock_node_irqsave lock; unsigned long offset; int count; int npages; int readwrite = 0; dprintf("process_procfs_request: invoked.\n"); syscall_channel = get_cpu_local_var(0)->syscall_channel; dprintf("rarg: %x\n", rarg); parg = ihk_mc_map_memory(NULL, rarg, sizeof(struct procfs_read)); dprintf("parg: %x\n", parg); r = ihk_mc_map_virtual(parg, 1, PTATTR_WRITABLE | PTATTR_ACTIVE); if (r == NULL) { kprintf("ERROR: process_procfs_request: got a null procfs_read structure.\n"); packet.err = -EIO; goto dataunavail; } dprintf("r: %p\n", r); dprintf("remote pbuf: %x\n", r->pbuf); pbuf = ihk_mc_map_memory(NULL, r->pbuf, r->count); dprintf("pbuf: %x\n", pbuf); count = r->count + ((uintptr_t)pbuf & (PAGE_SIZE - 1)); npages = (count + (PAGE_SIZE - 1)) / PAGE_SIZE; buf = ihk_mc_map_virtual(pbuf, npages, PTATTR_WRITABLE | PTATTR_ACTIVE); dprintf("buf: %p\n", buf); if (buf == NULL) { kprintf("ERROR: process_procfs_request: got a null buffer.\n"); packet.err = -EIO; goto bufunavail; } readwrite = r->readwrite; count = r->count; offset = r->offset; dprintf("fname: %s, offset: %lx, count:%d.\n", r->fname, r->offset, r->count); /* * check for "mcos%d/" */ ret = sscanf(r->fname, "mcos%d/", &rosnum); if (ret == 1) { if (osnum != rosnum) { kprintf("ERROR: process_procfs_request osnum mismatch " "(we are %d != requested %d)\n", osnum, rosnum); goto end; } dprintf("matched mcos%d.\n", osnum); } else { goto end; } p = strchr(r->fname, '/') + 1; /* Processing for pattern "mcos%d/xxx" files should be here. Its template is something like what follows: if (pattern matches) { get the data (at 'r->offset') and write it to 'buf' up to 'r->count' bytes. ans = written bytes; goto end; } */ /* * check for "mcos%d/PID/" */ ret = sscanf(p, "%d/", &pid); if (ret == 1) { struct mcs_rwlock_node tlock; int tids; struct thread *thread1 = NULL; proc = find_process(pid, &lock); if(proc == NULL){ kprintf("process_procfs_request: no such pid %d\n", pid); goto end; } p = strchr(p, '/') + 1; if((tids = sscanf(p, "task/%d/", &tid)) == 1){ p = strchr(p, '/') + 1; p = strchr(p, '/') + 1; } else tid = pid; mcs_rwlock_reader_lock_noirq(&proc->threads_lock, &tlock); list_for_each_entry(thread, &proc->threads_list, siblings_list){ if(thread->tid == tid) break; if(!thread1) thread1 = thread; } if(thread == NULL){ kprintf("process_procfs_request: no such tid %d-%d\n", pid, tid); if(tids){ process_unlock(proc, &lock); mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &tlock); goto end; } thread = thread1; } if(thread) hold_thread(thread); mcs_rwlock_reader_unlock_noirq(&proc->threads_lock, &tlock); hold_process(proc); vm = proc->vm; if(vm) hold_process_vm(vm); process_unlock(proc, &lock); } else if (!strcmp(p, "stat")) { /* "/proc/stat" */ extern int num_processors; /* kernel/ap.c */ char *p; size_t remain; int cpu; if (offset > 0) { ans = 0; eof = 1; goto end; } p = buf; remain = count; for (cpu = 0; cpu < num_processors; ++cpu) { size_t n; n = snprintf(p, remain, "cpu%d\n", cpu); if (n >= remain) { ans = -ENOSPC; eof = 1; goto end; } p += n; } ans = p - buf; eof = 1; goto end; } else { kprintf("unsupported procfs entry: %s\n", p); goto end; } /* * mcos%d/PID/mem * * The offset is treated as the beginning of the virtual address area * of the process. The count is the length of the area. */ if (strcmp(p, "mem") == 0) { uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER; unsigned long offset = r->offset; unsigned long left = r->count; int ret; struct page_table *pt = vm->address_space->page_table; ans = 0; if(left == 0) goto end; #if 0 if(!(proc->ptrace & PT_TRACED) || !(proc->status & (PS_STOPPED | PS_TRACED))){ ans = -EIO; goto end; } #endif if(readwrite == 0) reason = PF_POPULATE | PF_USER; while(left){ unsigned long pa; char *va; int pos = offset & (PAGE_SIZE - 1); int size = PAGE_SIZE - pos; if(size > left) size = left; ret = page_fault_process_vm(vm, (void *)offset, reason); if(ret){ if(ans == 0) ans = -EIO; goto end; } ret = ihk_mc_pt_virt_to_phys(pt, (void *)offset, &pa); if(ret){ if(ans == 0) ans = -EIO; goto end; } va = phys_to_virt(pa); if(readwrite) memcpy(va, buf + ans, size); else memcpy(buf + ans, va, size); offset += size; left -= size; ans += size; } goto end; } /* * mcos%d/PID/maps */ if (strcmp(p, "maps") == 0) { struct vm_range *range; int left = r->count - 1; /* extra 1 for terminating NULL */ int written = 0; char *_buf = buf; /* Starting from the middle of a proc file is not supported for maps */ if (offset > 0) { ans = 0; eof = 1; goto end; } ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); list_for_each_entry(range, &vm->vm_range_list, list) { int written_now; /* format is (from man proc): * address perms offset dev inode pathname * 08048000-08056000 r-xp 00000000 03:0c 64593 /usr/sbin/gpm */ written_now = snprintf(_buf, left, "%lx-%lx %s%s%s%s %lx %lx:%lx %d %s\n", range->start, range->end, range->flag & VR_PROT_READ ? "r" : "-", range->flag & VR_PROT_WRITE ? "w" : "-", range->flag & VR_PROT_EXEC ? "x" : "-", range->flag & VR_PRIVATE ? "p" : "s", /* TODO: fill in file details! */ 0UL, 0UL, 0UL, 0, "" ); left -= written_now; _buf += written_now; written += written_now; if (left == 0) { kprintf("%s(): WARNING: buffer too small to fill proc/maps\n", __FUNCTION__); break; } } ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); ans = written + 1; eof = 1; goto end; } /* * mcos%d/PID/pagemap */ if (strcmp(p, "pagemap") == 0) { uint64_t *_buf = (uint64_t *)buf; uint64_t start, end; if (offset < PAGE_SIZE) { kprintf("WARNING: /proc/pagemap queried for NULL page\n"); ans = 0; goto end; } /* Check alignment */ if ((offset % sizeof(uint64_t) != 0) || (count % sizeof(uint64_t) != 0)) { ans = 0; eof = 1; goto end; } start = (offset / sizeof(uint64_t)) << PAGE_SHIFT; end = start + ((count / sizeof(uint64_t)) << PAGE_SHIFT); ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock); while (start < end) { *_buf = ihk_mc_pt_virt_to_pagemap(proc->vm->address_space->page_table, start); dprintf("PID: %d, /proc/pagemap: 0x%lx -> %lx\n", proc->proc->pid, start, *_buf); start += PAGE_SIZE; ++_buf; } ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock); dprintf("/proc/pagemap: 0x%lx - 0x%lx, count: %d\n", start, end, count); ans = count; goto end; } /* * mcos%d/PID/status */ if (strcmp(p, "status") == 0) { struct vm_range *range; unsigned long lockedsize = 0; char tmp[1024]; int len; ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock); list_for_each_entry(range, &proc->vm->vm_range_list, list) { if(range->flag & VR_LOCKED) lockedsize += range->end - range->start; } ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock); sprintf(tmp, "Uid:\t%d\t%d\t%d\t%d\n" "Gid:\t%d\t%d\t%d\t%d\n" "VmLck:\t%9lu kB\n", proc->ruid, proc->euid, proc->suid, proc->fsuid, proc->rgid, proc->egid, proc->sgid, proc->fsgid, (lockedsize + 1023) >> 10); len = strlen(tmp); if (r->offset < len) { if (r->offset + r->count < len) { ans = r->count; } else { eof = 1; ans = len; } strncpy(buf, tmp + r->offset, ans); } else if (r->offset == len) { ans = 0; eof = 1; } goto end; } /* * mcos%d/PID/auxv */ if (strcmp(p, "auxv") == 0) { unsigned int limit = AUXV_LEN * sizeof(unsigned long); unsigned int len = r->count; if (r->offset < limit) { if (limit < r->offset + r->count) { len = limit - r->offset; } memcpy((void *)buf, ((char *) proc->saved_auxv) + r->offset, len); ans = len; if (r->offset + len == limit) { eof = 1; } } else if (r->offset == limit) { ans = 0; eof = 1; } goto end; } /* * mcos%d/PID/cmdline */ if (strcmp(p, "cmdline") == 0) { unsigned int limit = proc->saved_cmdline_len; unsigned int len = r->count; if(!proc->saved_cmdline){ ans = 0; eof = 1; goto end; } if (r->offset < limit) { if (limit < r->offset + r->count) { len = limit - r->offset; } memcpy((void *)buf, ((char *) proc->saved_cmdline) + r->offset, len); ans = len; if (r->offset + len == limit) { eof = 1; } } else if (r->offset == limit) { ans = 0; eof = 1; } goto end; } /* * mcos%d/PID/taks/PID/mem * * The offset is treated as the beginning of the virtual address area * of the process. The count is the length of the area. */ if (!strcmp(p, "stat")) { char tmp[1024]; int len; /* * pid (comm) state ppid * pgrp session tty_nr tpgid * flags minflt cminflt majflt * cmajflt utime stime cutime * cstime priority nice num_threads * itrealvalue starttime vsize rss * rsslim startcode endcode startstack * kstkesp kstkeip signal blocked * sigignore sigcatch wchan nswap * cnswap exit_signal processor rt_priority * policy delayacct_blkio_ticks guest_time cguest_time */ ans = sprintf(tmp, "%d (%s) %c %d " // pid... "%d %d %d %d " // pgrp... "%u %lu %lu %lu " // flags... "%lu %lu %lu %ld " // cmajflt... "%ld %ld %ld %ld " // cstime... "%ld %llu %lu %ld " // itrealvalue... "%lu %lu %lu %lu " // rsslim... "%lu %lu %lu %lu " // kstkesp... "%lu %lu %lu %lu " // sigignore... "%lu %d %d %u " // cnswap... "%u %llu %lu %ld\n", // policy... 0, "exe", 'R', 0, // pid... 0, 0, 0, 0, // pgrp... 0, 0L, 0L, 0L, // flags... 0L, 0L, 0L, 0L, // cmajflt... 0L, 0L, 0L, 0L, // cstime... 0L, 0LL, 0L, 0L, // itrealvalue... 0L, 0L, 0L, 0L, // rsslim... 0L, 0L, 0L, 0L, // kstkesp... 0L, 0L, 0L, 0L, // sigignore... 0L, 0, thread->cpu_id, 0, // cnswap... 0, 0LL, 0L, 0L // policy... ); dprintf("tmp=%s\n", tmp); len = strlen(tmp); if (r->offset < len) { if (r->offset + r->count < len) { ans = r->count; } else { eof = 1; ans = len; } strncpy(buf, tmp + r->offset, ans); } else if (r->offset == len) { ans = 0; eof = 1; } goto end; } if(thread) kprintf("unsupported procfs entry: %d/task/%d/%s\n", pid, tid, p); else kprintf("unsupported procfs entry: %d/%s\n", pid, p); end: ihk_mc_unmap_virtual(buf, npages, 0); dprintf("ret: %d, eof: %d\n", ans, eof); r->ret = ans; r->eof = eof; r->status = 1; /* done */ packet.err = 0; bufunavail: ihk_mc_unmap_memory(NULL, pbuf, r->count); ihk_mc_unmap_virtual(r, 1, 0); dataunavail: ihk_mc_unmap_memory(NULL, parg, sizeof(struct procfs_read)); packet.msg = SCD_MSG_PROCFS_ANSWER; packet.arg = rarg; ret = ihk_ikc_send(syscall_channel, &packet, 0); if (ret < 0) { kprintf("ERROR: sending IKC msg, ret: %d\n", ret); } if(proc) release_process(proc); if(thread) release_thread(thread); if(vm) release_process_vm(vm); return; }