Revert "Detect hang of McKernel in mcexec"

Change-Id: Ie8a0cf725f84a2f5d85da8b8fb15b30a826ddfcb
This commit is contained in:
Masamichi Takagi
2020-04-14 20:01:15 +09:00
parent 04d17dd3e9
commit 8ee1d61d0f
2 changed files with 1 additions and 106 deletions

View File

@ -15,7 +15,7 @@ set_property(TARGET libmcexec PROPERTY POSITION_INDEPENDENT_CODE ON)
add_executable(mcexec mcexec.c)
target_link_libraries(mcexec
libmcexec ihklib ${LIBRT} ${LIBNUMA} $<$<BOOL:ENABLE_QLMPI>:${LIBMPI}> pthread)
libmcexec ${LIBRT} ${LIBNUMA} $<$<BOOL:ENABLE_QLMPI>:${LIBMPI}> pthread)
target_include_directories(mcexec PUBLIC "${KERNEL_DIR}")
set_property(TARGET mcexec PROPERTY POSITION_INDEPENDENT_CODE ON)
set_property(TARGET mcexec PROPERTY LINK_FLAGS "-fPIE -pie")

View File

@ -83,8 +83,6 @@
#include <sys/un.h>
#include "../include/pmi.h"
#include "../include/qlmpi.h"
#include <ihk/ihklib.h>
#include <sys/epoll.h>
#include <sys/xattr.h>
#include "../../lib/include/list.h"
@ -1061,89 +1059,6 @@ pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER;
pthread_barrier_t init_ready;
pthread_barrier_t uti_init_ready;
pthread_attr_t watchdog_thread_attr;
pthread_t watchdog_thread;
/* Detects hang of McKernel */
static void *watchdog_thread_func(void *arg) {
int ret = 0;
int evfd = -1;
int epfd = -1;
struct epoll_event event_in;
struct epoll_event event_out;
if ((evfd = ihk_os_get_eventfd(0, IHK_OS_EVENTFD_TYPE_STATUS)) < 0) {
fprintf(stderr, "%s: Error: geteventfd failed (%d)\n", __FUNCTION__, evfd);
goto out;
}
if ((epfd = epoll_create(1)) == -1) {
fprintf(stderr, "%s: Error: epoll_create failed (%d)\n", __FUNCTION__, epfd);
goto out;
}
memset(&event_in, 0, sizeof(struct epoll_event));
event_in.events = EPOLLIN;
event_in.data.fd = evfd;
if ((ret = epoll_ctl(epfd, EPOLL_CTL_ADD, evfd, &event_in)) != 0) {
fprintf(stderr, "%s: Error: epoll_ctl failed (%d)\n", __FUNCTION__, ret);
goto out;
}
do {
int nfd;
uint64_t counter;
ssize_t nread;
nfd = epoll_wait(epfd, &event_out, 1, -1);
if (nfd == -1) {
if (errno == EINTR) {
continue;
}
fprintf(stderr, "%s: Error: epoll_wait failed (%s)\n", __FUNCTION__, strerror(errno));
goto out;
}
if (nfd == 0) {
fprintf(stderr, "%s: Error: epoll_wait timed out unexpectedly\n", __FUNCTION__);
goto out;
}
if (nfd > 1) {
fprintf(stderr, "%s: Error: Too many (%d) events\n", __FUNCTION__, nfd);
goto out;
}
if (event_out.data.fd != evfd) {
fprintf(stderr, "%s: Error: Unknown event (fd:%d)\n", __FUNCTION__, event_out.data.fd);
goto out;
}
nread = read(evfd, &counter, sizeof(counter));
if (nread == 0) {
fprintf(stderr, "%s: Error: read got EOF\n", __FUNCTION__);
goto out;
}
if (nread == -1) {
fprintf(stderr, "%s: Error: read failed (%s)\n", __FUNCTION__, strerror(errno));
goto out;
}
fprintf(stderr, "mcexec detected hang of McKernel\n");
exit(EXIT_FAILURE);
} while (1);
out:
if (evfd != -1) {
close(evfd);
}
if (epfd != -1) {
close(epfd);
}
return NULL;
}
static void *main_loop_thread_func(void *arg)
{
struct thread_data_s *td = (struct thread_data_s *)arg;
@ -2703,26 +2618,6 @@ int main(int argc, char **argv)
init_sigaction();
/* Initialize watchdog thread which detects hang of McKernel */
if ((error = pthread_attr_init(&watchdog_thread_attr))) {
fprintf(stderr, "Error: pthread_attr_init failed (%d)\n", error);
close(fd);
return 1;
}
if ((error = pthread_attr_setdetachstate(&watchdog_thread_attr, PTHREAD_CREATE_DETACHED))) {
fprintf(stderr, "Error: pthread_attr_getdetachstate failed (%d)\n", error);
close(fd);
return 1;
}
if ((error = pthread_create(&watchdog_thread, &watchdog_thread_attr, watchdog_thread_func, NULL))) {
fprintf(stderr, "Error: pthread_create failed (%d)\n", error);
close(fd);
return 1;
}
if ((error = init_worker_threads(fd)) != 0) {
fprintf(stderr, "%s: Error: creating worker threads: %s\n",
__func__, strerror(-error));