From 8ee1d61d0f628513b4f6626c891e1f57ace18a84 Mon Sep 17 00:00:00 2001 From: Masamichi Takagi Date: Tue, 14 Apr 2020 20:01:15 +0900 Subject: [PATCH] Revert "Detect hang of McKernel in mcexec" Change-Id: Ie8a0cf725f84a2f5d85da8b8fb15b30a826ddfcb --- executer/user/CMakeLists.txt | 2 +- executer/user/mcexec.c | 105 ----------------------------------- 2 files changed, 1 insertion(+), 106 deletions(-) diff --git a/executer/user/CMakeLists.txt b/executer/user/CMakeLists.txt index de3fdd8f..c1d46936 100644 --- a/executer/user/CMakeLists.txt +++ b/executer/user/CMakeLists.txt @@ -15,7 +15,7 @@ set_property(TARGET libmcexec PROPERTY POSITION_INDEPENDENT_CODE ON) add_executable(mcexec mcexec.c) target_link_libraries(mcexec - libmcexec ihklib ${LIBRT} ${LIBNUMA} $<$:${LIBMPI}> pthread) + libmcexec ${LIBRT} ${LIBNUMA} $<$:${LIBMPI}> pthread) target_include_directories(mcexec PUBLIC "${KERNEL_DIR}") set_property(TARGET mcexec PROPERTY POSITION_INDEPENDENT_CODE ON) set_property(TARGET mcexec PROPERTY LINK_FLAGS "-fPIE -pie") diff --git a/executer/user/mcexec.c b/executer/user/mcexec.c index 37794d78..b40d7466 100644 --- a/executer/user/mcexec.c +++ b/executer/user/mcexec.c @@ -83,8 +83,6 @@ #include #include "../include/pmi.h" #include "../include/qlmpi.h" -#include -#include #include #include "../../lib/include/list.h" @@ -1061,89 +1059,6 @@ pthread_mutex_t lock = PTHREAD_MUTEX_INITIALIZER; pthread_barrier_t init_ready; pthread_barrier_t uti_init_ready; -pthread_attr_t watchdog_thread_attr; -pthread_t watchdog_thread; - -/* Detects hang of McKernel */ -static void *watchdog_thread_func(void *arg) { - int ret = 0; - int evfd = -1; - int epfd = -1; - struct epoll_event event_in; - struct epoll_event event_out; - - if ((evfd = ihk_os_get_eventfd(0, IHK_OS_EVENTFD_TYPE_STATUS)) < 0) { - fprintf(stderr, "%s: Error: geteventfd failed (%d)\n", __FUNCTION__, evfd); - goto out; - } - - if ((epfd = epoll_create(1)) == -1) { - fprintf(stderr, "%s: Error: epoll_create failed (%d)\n", __FUNCTION__, epfd); - goto out; - } - - memset(&event_in, 0, sizeof(struct epoll_event)); - event_in.events = EPOLLIN; - event_in.data.fd = evfd; - if ((ret = epoll_ctl(epfd, EPOLL_CTL_ADD, evfd, &event_in)) != 0) { - fprintf(stderr, "%s: Error: epoll_ctl failed (%d)\n", __FUNCTION__, ret); - goto out; - } - - do { - int nfd; - uint64_t counter; - ssize_t nread; - - nfd = epoll_wait(epfd, &event_out, 1, -1); - if (nfd == -1) { - if (errno == EINTR) { - continue; - } - fprintf(stderr, "%s: Error: epoll_wait failed (%s)\n", __FUNCTION__, strerror(errno)); - goto out; - } - - if (nfd == 0) { - fprintf(stderr, "%s: Error: epoll_wait timed out unexpectedly\n", __FUNCTION__); - goto out; - } - - if (nfd > 1) { - fprintf(stderr, "%s: Error: Too many (%d) events\n", __FUNCTION__, nfd); - goto out; - } - - if (event_out.data.fd != evfd) { - fprintf(stderr, "%s: Error: Unknown event (fd:%d)\n", __FUNCTION__, event_out.data.fd); - goto out; - } - - nread = read(evfd, &counter, sizeof(counter)); - if (nread == 0) { - fprintf(stderr, "%s: Error: read got EOF\n", __FUNCTION__); - goto out; - } - - if (nread == -1) { - fprintf(stderr, "%s: Error: read failed (%s)\n", __FUNCTION__, strerror(errno)); - goto out; - } - - fprintf(stderr, "mcexec detected hang of McKernel\n"); - exit(EXIT_FAILURE); - } while (1); - - out: - if (evfd != -1) { - close(evfd); - } - if (epfd != -1) { - close(epfd); - } - return NULL; -} - static void *main_loop_thread_func(void *arg) { struct thread_data_s *td = (struct thread_data_s *)arg; @@ -2703,26 +2618,6 @@ int main(int argc, char **argv) init_sigaction(); - /* Initialize watchdog thread which detects hang of McKernel */ - - if ((error = pthread_attr_init(&watchdog_thread_attr))) { - fprintf(stderr, "Error: pthread_attr_init failed (%d)\n", error); - close(fd); - return 1; - } - - if ((error = pthread_attr_setdetachstate(&watchdog_thread_attr, PTHREAD_CREATE_DETACHED))) { - fprintf(stderr, "Error: pthread_attr_getdetachstate failed (%d)\n", error); - close(fd); - return 1; - } - - if ((error = pthread_create(&watchdog_thread, &watchdog_thread_attr, watchdog_thread_func, NULL))) { - fprintf(stderr, "Error: pthread_create failed (%d)\n", error); - close(fd); - return 1; - } - if ((error = init_worker_threads(fd)) != 0) { fprintf(stderr, "%s: Error: creating worker threads: %s\n", __func__, strerror(-error));