Redirect kmsg to /dev/log and detect hungup

1. ihkmond retrieves kmsg when the amount of kmsg exceeds the threashold and
   /dev/mcosX is deleted
2. ihkmond periodically monitors OS status change to detect hungup
This commit is contained in:
Masamichi Takagi
2017-09-01 15:31:37 +09:00
parent daa7526127
commit 5b51eb80a3
7 changed files with 138 additions and 186 deletions

View File

@ -29,8 +29,8 @@ if [ "${BASH_VERSINFO[0]}" -lt 4 ]; then
exit 1 exit 1
fi fi
INTERVAL=1 redirect_kmsg=0
LOGMODE=0 mon_interval="-1"
DUMP_LEVEL=24 DUMP_LEVEL=24
facility="LOG_LOCAL6" facility="LOG_LOCAL6"
chown_option=`logname 2> /dev/null` chown_option=`logname 2> /dev/null`
@ -44,38 +44,14 @@ fi
turbo="" turbo=""
ihk_irq="" ihk_irq=""
while getopts :ti:k:c:m:o:f:r:q:d: OPT while getopts :tk:c:m:o:f:r:q:i:d: OPT
do do
case ${OPT} in case ${OPT} in
f) facility=${OPTARG} f) facility=${OPTARG}
;; ;;
o) chown_option=${OPTARG} o) chown_option=${OPTARG}
;; ;;
i) INTERVAL=${OPTARG} k) redirect_kmsg=${OPTARG}
expr "${INTERVAL}" + 1 > /dev/null 2>&1
if [ $? -ge 2 ]
then
echo "invalid -i value" >&2
exit 1
fi
if [ ${INTERVAL} -le 0 ]
then
echo "invalid -i value" >&2
exit 1
fi
;;
k) LOGMODE=${OPTARG}
expr "${LOGMODE}" + 1 > /dev/null 2>&1
if [ $? -ge 2 ]
then
echo "invalid -k value" >&2
exit 1
fi
if [ ${LOGMODE} -lt 0 -o ${LOGMODE} -gt 2 ]
then
echo "invalid -k value" >&2
exit 1
fi
;; ;;
c) cpus=${OPTARG} c) cpus=${OPTARG}
;; ;;
@ -89,11 +65,21 @@ do
;; ;;
d) DUMP_LEVEL=${OPTARG} d) DUMP_LEVEL=${OPTARG}
;; ;;
i) mon_interval=${OPTARG}
;;
*) echo "invalid option -${OPT}" >&2 *) echo "invalid option -${OPT}" >&2
exit 1 exit 1
esac esac
done done
# Start ihkmond
pid=`pidof ihkmond`
if [ "${pid}" != "" ]; then
sudo kill -9 ${pid} > /dev/null 2> /dev/null
fi
if [ "${redirect_kmsg}" != "0" -o "${mon_interval}" != "-1" ]; then
${SBINDIR}/ihkmond -f ${facility} -k ${redirect_kmsg} -i ${mon_interval}
fi
# #
# Revert any state that has been initialized before the error occured. # Revert any state that has been initialized before the error occured.
# #
@ -248,7 +234,7 @@ if [ "${irqbalance_used}" == "yes" ]; then
if ! systemctl stop irqbalance.service 2>/dev/null ; then if ! systemctl stop irqbalance.service 2>/dev/null ; then
echo "error: stopping irqbalance" >&2 echo "error: stopping irqbalance" >&2
exit 1 exit 1
fi; fi;
if ! etcdir=@ETCDIR@ perl -e 'use File::Copy qw(copy); $etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "/proc/irq/*/smp_affinity"; foreach $file (@files) { $rel = substr($file, 1); $dir=substr($rel, 0, length($rel)-length("/smp_affinity")); if(0) { print "cp $file $etcdir/$rel\n";} if(system("mkdir -p $etcdir/$dir")){ exit 1;} if(!copy($file,"$etcdir/$rel")){ exit 1;} }'; then if ! etcdir=@ETCDIR@ perl -e 'use File::Copy qw(copy); $etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "/proc/irq/*/smp_affinity"; foreach $file (@files) { $rel = substr($file, 1); $dir=substr($rel, 0, length($rel)-length("/smp_affinity")); if(0) { print "cp $file $etcdir/$rel\n";} if(system("mkdir -p $etcdir/$dir")){ exit 1;} if(!copy($file,"$etcdir/$rel")){ exit 1;} }'; then
echo "error: saving /proc/irq/*/smp_affinity" >&2 echo "error: saving /proc/irq/*/smp_affinity" >&2
@ -351,9 +337,15 @@ fi
if ls /dev/mcos* 1>/dev/null 2>&1; then if ls /dev/mcos* 1>/dev/null 2>&1; then
for i in /dev/mcos*; do for i in /dev/mcos*; do
ind=`echo $i|cut -c10-`; ind=`echo $i|cut -c10-`;
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then # Retry when conflicting with ihkmond
echo "error: destroying LWK instance $ind failed" >&2 nretry=0
error_exit "mcctrl_loaded" until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -lt 4 ]; do
sleep 0.25
nretry=$[ $nretry + 1 ]
done
if [ $nretry -eq 4 ]; then
echo "error: destroying LWK instance $ind failed" >&2
error_exit "mcctrl_loaded"
fi fi
done done
fi fi
@ -391,7 +383,7 @@ if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
fi fi
# Set kernel arguments # Set kernel arguments
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos ksyslogd=${LOGMODE} $turbo dump_level=${DUMP_LEVEL}"; then if ! ${SBINDIR}/ihkosctl 0 kargs "hidos $turbo dump_level=${DUMP_LEVEL}"; then
echo "error: setting kernel arguments" >&2 echo "error: setting kernel arguments" >&2
error_exit "os_created" error_exit "os_created"
fi fi
@ -429,14 +421,3 @@ if [ "${irqbalance_used}" == "yes" ]; then
fi fi
# echo cpus=$cpus ncpus=$ncpus banirq=$banirq # echo cpus=$cpus ncpus=$ncpus banirq=$banirq
fi fi
# Start mcklogd. Note that McKernel blocks when kmsg buffer is full
# with '-k 1' until mcklogd unblocks it so starting mcklogd must preceed
# booting McKernel
if [ ${LOGMODE} -ne 0 ]; then
# Stop mcklogd which has survived McKernel shutdown because
# mcstop+release.sh is not used
pkill mcklogd
SBINDIR=${SBINDIR} ${SBINDIR}/mcklogd -i ${INTERVAL} -f ${facility}
fi

View File

@ -21,12 +21,6 @@ irqbalance_used=""
# No SMP module? Exit. # No SMP module? Exit.
if ! grep ihk_smp_x86 /proc/modules &>/dev/null; then exit 0; fi if ! grep ihk_smp_x86 /proc/modules &>/dev/null; then exit 0; fi
# Stop mcklogd
while pgrep "mcklogd" > /dev/null 2>&1;
do
pkill -9 mcklogd
done
if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then if [ "`systemctl status irqbalance_mck.service 2> /dev/null |grep -E 'Active: active'`" != "" ]; then
irqbalance_used="yes" irqbalance_used="yes"
if ! systemctl stop irqbalance_mck.service 2>/dev/null; then if ! systemctl stop irqbalance_mck.service 2>/dev/null; then
@ -41,9 +35,15 @@ fi
if ls /dev/mcos* 1>/dev/null 2>&1; then if ls /dev/mcos* 1>/dev/null 2>&1; then
for i in /dev/mcos*; do for i in /dev/mcos*; do
ind=`echo $i|cut -c10-`; ind=`echo $i|cut -c10-`;
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then # Retry when conflicting with ihkmond
echo "error: destroying LWK instance $ind failed" >&2 nretry=0
exit 1 until ${SBINDIR}/ihkconfig 0 destroy $ind || [ $nretry -lt 4 ]; do
sleep 0.25
nretry=$[ $nretry + 1 ]
done
if [ $nretry -eq 4 ]; then
echo "error: destroying LWK instance $ind failed" >&2
exit 1
fi fi
done done
fi fi
@ -102,6 +102,12 @@ if grep -E 'ihk\s' /proc/modules &>/dev/null; then
fi fi
fi fi
# Stop ihkmond
pid=`pidof ihkmond`
if [ "${pid}" != "" ]; then
sudo kill -9 ${pid} > /dev/null 2> /dev/null
fi
# Start irqbalance with the original settings # Start irqbalance with the original settings
if [ "${irqbalance_used}" != "" ]; then if [ "${irqbalance_used}" != "" ]; then
if ! etcdir=@ETCDIR@ perl -e '$etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "$etcdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($etcdir)); if(0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }'; then if ! etcdir=@ETCDIR@ perl -e '$etcdir=$ENV{'etcdir'}; @files = grep { -f } glob "$etcdir/proc/irq/*/smp_affinity"; foreach $file (@files) { $dest = substr($file, length($etcdir)); if(0) {print "cp $file $dest\n";} system("cp $file $dest 2>/dev/null"); }'; then

View File

@ -122,7 +122,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
break; break;
case SCD_MSG_EVENTFD: case SCD_MSG_EVENTFD:
dkprintf(KERN_EMERG "%s: SCD_MSG_EVENTFD,pisp->eventfd_type=%d\n", __FUNCTION__, pisp->eventfd_type); dkprintf("%s: SCD_MSG_EVENTFD,pisp->eventfd_type=%d\n", __FUNCTION__, pisp->eventfd_type);
mcctrl_eventfd(__os, pisp); mcctrl_eventfd(__os, pisp);
break; break;

View File

@ -13,77 +13,22 @@
#include <stdarg.h> #include <stdarg.h>
#include <string.h> #include <string.h>
#include <kmsg.h> #include <kmsg.h>
#include <ihk/cpu.h>
#include <ihk/debug.h> #include <ihk/debug.h>
#include <ihk/lock.h> #include <ihk/lock.h>
#include <ihk/monitor.h>
#include <errno.h>
struct ihk_kmsg_buf kmsg_buf IHK_KMSG_ALIGN; struct ihk_kmsg_buf *kmsg_buf;
extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args);
extern int sprintf(char * buf, const char *fmt, ...); extern int sprintf(char * buf, const char *fmt, ...);
extern void eventfd(int type);
static ihk_spinlock_t kmsg_lock; static ihk_spinlock_t kmsg_lock;
extern char *find_command_line(char *name);
static unsigned long kprintf_lock_head(void); #define DEBUG_KMSG_USED (((unsigned int)kmsg_buf->tail - (unsigned int)kmsg_buf->head) % (unsigned int)kmsg_buf->len)
static void kprintf_unlock_head(unsigned long irqflags); #define DEBUG_KMSG_MARGIN (kmsg_buf->head == kmsg_buf->tail ? kmsg_buf->len : (((unsigned int)kmsg_buf->head - (unsigned int)kmsg_buf->tail) % (unsigned int)kmsg_buf->len))
static void kprintf_wait(int len, unsigned long *flags_head, int *slide) {
int head, tail, buf_len, mode, adj;
mode = kmsg_buf.mode;
while (1) {
adj = 0;
tail = kmsg_buf.tail;
buf_len = kmsg_buf.len;
head = kmsg_buf.head;
if (head < tail) head += buf_len;
if (tail + len > buf_len) adj = buf_len - tail;
if (head > tail && head <= tail + len + adj) {
/* When proceeding tail (producer pointer) by len would
cross head (consumer pointer) in ring-buffer */
if (mode != 1) {
*slide = 1;
break;
} else {
kprintf_unlock_head(*flags_head);
*flags_head = kprintf_lock_head();
}
} else {
break;
}
}
}
/* TODO: lock */
void kputs(char *buf)
{
int len = strlen(buf);
int slide = 0;
unsigned long flags_tail, flags_head;
flags_tail = kprintf_lock();
flags_head = kprintf_lock_head();
kprintf_wait(len, &flags_head, &slide);
if (len + kmsg_buf.tail > kmsg_buf.len) {
kmsg_buf.tail = 0;
if(len > kmsg_buf.len) {
len = kmsg_buf.len;
}
}
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
kmsg_buf.tail += len;
/* When proceeding tail (producer pointer) by len would
cross head (consumer pointer) in ring-buffer, give up
[head, tail] because the range is overwritten */
if (slide == 1) {
kmsg_buf.head = kmsg_buf.tail + 1;
if (kmsg_buf.head >= kmsg_buf.len) kmsg_buf.head = 0;
}
kprintf_unlock_head(flags_head);
kprintf_unlock(flags_tail);
}
#define KPRINTF_LOCAL_BUF_LEN 1024
unsigned long kprintf_lock(void) unsigned long kprintf_lock(void)
{ {
@ -95,104 +40,140 @@ void kprintf_unlock(unsigned long irqflags)
__ihk_mc_spinlock_unlock(&kmsg_lock, irqflags); __ihk_mc_spinlock_unlock(&kmsg_lock, irqflags);
} }
static unsigned long kprintf_lock_head(void) #define debug_spin_lock_irqsave(lock, flags) do { \
{ flags = cpu_disable_interrupt_save(); \
return __ihk_mc_spinlock_lock(&kmsg_buf.lock); while (__sync_val_compare_and_swap(lock, 0, 1) != 0) { \
cpu_pause(); \
} \
} while (0)
#define debug_spin_unlock_irqrestore(lock, flags) do { \
*(lock) = 0; \
cpu_restore_interrupt(flags); \
} while (0)
static void memcpy_ringbuf(char* buf, int len) {
int i;
for(i = 0; i < len; i++) {
*(kmsg_buf->str + kmsg_buf->tail) = *(buf + i);
kmsg_buf->tail = (kmsg_buf->tail + 1) % kmsg_buf->len;
}
} }
static void kprintf_unlock_head(unsigned long irqflags) void kputs(char *buf)
{ {
__ihk_mc_spinlock_unlock(&kmsg_buf.lock, irqflags); int len = strlen(buf);
unsigned long flags_outer, flags_inner;
int overflow;
if (kmsg_buf == NULL) {
return;
}
flags_outer = kprintf_lock(); /* Guard from destruction */
debug_spin_lock_irqsave(&kmsg_buf->lock, flags_inner); /* For consistency */
overflow = DEBUG_KMSG_MARGIN <= len;
memcpy_ringbuf(buf, len);
if (overflow) {
kmsg_buf->head = (kmsg_buf->tail + 1) % kmsg_buf->len;
}
debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner);
kprintf_unlock(flags_outer);
if (DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
eventfd(IHK_OS_EVENTFD_TYPE_KMSG);
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
}
} }
#define KPRINTF_LOCAL_BUF_LEN 1024
/* Caller must hold kmsg_lock! */ /* Caller must hold kmsg_lock! */
int __kprintf(const char *format, ...) int __kprintf(const char *format, ...)
{ {
int len = 0; int len = 0;
int slide = 0;
va_list va; va_list va;
unsigned long flags_head;
char buf[KPRINTF_LOCAL_BUF_LEN]; char buf[KPRINTF_LOCAL_BUF_LEN];
int overflow;
unsigned long flags_inner;
if (kmsg_buf == NULL) {
return -EINVAL;
}
/* Copy into the local buf */ /* Copy into the local buf */
len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id()); len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id());
va_start(va, format); va_start(va, format);
len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va); len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va);
va_end(va); va_end(va);
flags_head = kprintf_lock_head(); debug_spin_lock_irqsave(&kmsg_buf->lock, flags_inner);
kprintf_wait(len, &flags_head, &slide);
/* Append to kmsg buffer */ overflow = DEBUG_KMSG_MARGIN <= len;
if (kmsg_buf.tail + len > kmsg_buf.len) {
kmsg_buf.tail = 0; memcpy_ringbuf(buf, len);
if (overflow) {
kmsg_buf->head = (kmsg_buf->tail + 1) % kmsg_buf->len;
} }
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len); debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner);
kmsg_buf.tail += len;
if (slide == 1) { if (DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
kmsg_buf.head = kmsg_buf.tail + 1; eventfd(IHK_OS_EVENTFD_TYPE_KMSG);
if (kmsg_buf.head >= kmsg_buf.len) kmsg_buf.head = 0; ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
} }
kprintf_unlock_head(flags_head);
return len; return len;
} }
int kprintf(const char *format, ...) int kprintf(const char *format, ...)
{ {
int len = 0; int len = 0;
int slide = 0;
va_list va; va_list va;
unsigned long flags_tail, flags_head; unsigned long flags_outer, flags_inner;
char buf[KPRINTF_LOCAL_BUF_LEN]; char buf[KPRINTF_LOCAL_BUF_LEN];
int overflow;
if (kmsg_buf == NULL) {
return -EINVAL;
}
/* Copy into the local buf */ /* Copy into the local buf */
len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id()); len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id());
va_start(va, format); va_start(va, format);
len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va); len += vsnprintf(buf + len, KPRINTF_LOCAL_BUF_LEN - len - 2, format, va);
va_end(va); va_end(va);
flags_tail = kprintf_lock(); flags_outer = kprintf_lock();
flags_head = kprintf_lock_head(); debug_spin_lock_irqsave(&kmsg_buf->lock, flags_inner);
kprintf_wait(len, &flags_head, &slide);
/* Append to kmsg buffer */ overflow = DEBUG_KMSG_MARGIN <= len;
if (kmsg_buf.tail + len > kmsg_buf.len) {
kmsg_buf.tail = 0; memcpy_ringbuf(buf, len);
if (overflow) {
kmsg_buf->head = (kmsg_buf->tail + 1) % kmsg_buf->len;
} }
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len); debug_spin_unlock_irqrestore(&kmsg_buf->lock, flags_inner);
kmsg_buf.tail += len; kprintf_unlock(flags_outer);
if (slide == 1) {
kmsg_buf.head = kmsg_buf.tail + 1;
if (kmsg_buf.head >= kmsg_buf.len) kmsg_buf.head = 0;
}
kprintf_unlock_head(flags_head); if (DEBUG_KMSG_USED > IHK_KMSG_HIGH_WATER_MARK) {
kprintf_unlock(flags_tail); eventfd(IHK_OS_EVENTFD_TYPE_KMSG);
ihk_mc_delay_us(IHK_KMSG_NOTIFY_DELAY);
}
return len; return len;
} }
/* mode: void kmsg_init()
0: mcklogd is not running.
When kmsg buffer is full, writer doesn't block
and overwrites the buffer.
1: mcklogd periodically retrieves kmsg.
When kmsg buffer is full, writer blocks until
someone retrieves kmsg.
2: mcklogd periodically retrieves kmsg.
When kmsg buffer is full, writer doesn't block
and overwrites the buffer.
*/
void kmsg_init(int mode)
{ {
ihk_mc_spinlock_init(&kmsg_lock); ihk_mc_spinlock_init(&kmsg_lock);
kmsg_buf.tail = 0;
kmsg_buf.len = sizeof(kmsg_buf.str);
kmsg_buf.head = 0;
kmsg_buf.mode = mode;
ihk_mc_spinlock_init(&kmsg_buf.lock);
memset(kmsg_buf.str, 0, kmsg_buf.len);
} }

View File

@ -14,7 +14,6 @@
#define INIT_H #define INIT_H
extern void arch_init(void); extern void arch_init(void);
extern void kmsg_init(int);
extern void mem_init(void); extern void mem_init(void);
extern void ihk_ikc_master_init(void); extern void ihk_ikc_master_init(void);
extern void ap_init(void); extern void ap_init(void);

View File

@ -13,9 +13,8 @@
#ifndef KMSG_H #ifndef KMSG_H
#define KMSG_H #define KMSG_H
void kmsg_init(void);
void kputs(char *buf); void kputs(char *buf);
int kprintf(const char *format, ...); int kprintf(const char *format, ...);
void kmsg_init(int);
#endif #endif

View File

@ -50,7 +50,6 @@
#define DUMP_LEVEL_USER_UNUSED_EXCLUDE 24 #define DUMP_LEVEL_USER_UNUSED_EXCLUDE 24
extern struct ihk_kmsg_buf kmsg_buf;
extern unsigned long ihk_mc_get_ns_per_tsc(void); extern unsigned long ihk_mc_get_ns_per_tsc(void);
extern long syscall(int, ihk_mc_user_context_t *); extern long syscall(int, ihk_mc_user_context_t *);
@ -424,19 +423,6 @@ extern void ibmic_cmd_init(void);
int main(void) int main(void)
{ {
char *ptr;
int mode = 0;
ptr = find_command_line("ksyslogd=");
if (ptr) {
mode = ptr[9] - 0x30;
if (mode < 0 || mode > 2) mode = 0;
}
kmsg_init(mode);
kputs("IHK/McKernel started.\n");
ihk_set_kmsg(virt_to_phys(&kmsg_buf), IHK_KMSG_SIZE);
arch_init(); arch_init();
/* /*