Compare commits

..

7 Commits
1.7.9 ... 1.8.0

Author SHA1 Message Date
Masamichi Takagi
1a71203872 release: 1.8.0: MAP_LOCKED and pre-populate PMIx shared memory PFNs
Change-Id: I171c87f0f49cf2f791693e397a1d94b1bc2d0440
2021-03-23 01:49:46 +00:00
Masamichi Takagi
03d99a0ed1 submodule: migrate to github.com/ihkmckernel
Change-Id: I64ee7c89e7316bb98b31833b5c15af9cf371b0ff
2021-03-23 01:12:25 +00:00
Balazs Gerofi
8fb42631f2 profile: fix infinite recursion for allocation miss event
Change-Id: I248c2abc7d02a9d9bffce20b3183724ddc8c2c1c
2021-03-21 15:26:39 +09:00
Balazs Gerofi
ba04c8a7b9 Fugaku: MAP_LOCKED and pre-populate PMIx shared memory PFNs
Change-Id: I74a0d0e50af0b6c60a6f9a4389ef3ab0534deda2
2021-03-21 15:25:15 +09:00
Masamichi Takagi
1bb8dcef05 release: 1.7.10: detect hungup via device-ioctl
Change-Id: I6531a159a44683085004ad3e90d7b4e67f51422c
2021-03-18 15:42:24 +09:00
Masamichi Takagi
ceb55d53b1 mcreboot-smp.sh: sudo ihkmond for /dev/kmsg log
Change-Id: I47aa483e6f787b8392b4b33b0fb10e4728157253
2021-03-18 06:36:33 +00:00
Masamichi Takagi
002f36c7f5 docs: add limitation about Linux kernel dump
Change-Id: Ic007f2f1915e37981955ad2160ea6614b1c36ec1
2021-03-17 21:39:07 +09:00
9 changed files with 128 additions and 9 deletions

6
.gitmodules vendored
View File

@@ -1,12 +1,12 @@
[submodule "ihk"] [submodule "ihk"]
path = ihk path = ihk
url = https://github.com/RIKEN-SysSoft/ihk.git url = https://github.com/ihkmckernel/ihk.git
[submodule "executer/user/lib/libdwarf/libdwarf"] [submodule "executer/user/lib/libdwarf/libdwarf"]
path = executer/user/lib/libdwarf/libdwarf path = executer/user/lib/libdwarf/libdwarf
url = https://github.com/bgerofi/libdwarf.git url = https://github.com/bgerofi/libdwarf.git
[submodule "executer/user/lib/syscall_intercept"] [submodule "executer/user/lib/syscall_intercept"]
path = executer/user/lib/syscall_intercept path = executer/user/lib/syscall_intercept
url = https://github.com/RIKEN-SysSoft/syscall_intercept.git url = https://github.com/ihkmckernel/syscall_intercept.git
[submodule "executer/user/lib/uti"] [submodule "executer/user/lib/uti"]
path = executer/user/lib/uti path = executer/user/lib/uti
url = https://github.com/RIKEN-SysSoft/uti.git url = https://github.com/ihkmckernel/uti.git

View File

@@ -7,7 +7,7 @@ endif (NOT CMAKE_BUILD_TYPE)
enable_language(C ASM) enable_language(C ASM)
project(mckernel C ASM) project(mckernel C ASM)
set(MCKERNEL_VERSION "1.7.9") set(MCKERNEL_VERSION "1.8.0")
# See "Fedora Packaging Guidelines -- Versioning" # See "Fedora Packaging Guidelines -- Versioning"
set(MCKERNEL_RELEASE "") set(MCKERNEL_RELEASE "")

View File

@@ -1,3 +1,52 @@
=============================================
Version 1.8.0 (Mar 23, 2021)
=============================================
----------------------
IHK major updates
----------------------
N/A
------------------------
IHK major bug fixes
------------------------
N/A
----------------------
McKernel major updates
----------------------
N/A
------------------------
McKernel major bug fixes
------------------------
#. profile: fix infinite recursion for allocation miss event
#. Fugaku: MAP_LOCKED and pre-populate PMIx shared memory PFNs
=============================================
Version 1.7.10 (Mar 18, 2021)
=============================================
----------------------
IHK major updates
----------------------
N/A
------------------------
IHK major bug fixes
------------------------
#. __ihk_device_detect_hungup: detect hungup via device-ioctl
----------------------
McKernel major updates
----------------------
N/A
------------------------
McKernel major bug fixes
------------------------
N/A
============================================= =============================================
Version 1.7.9 (Mar 17, 2021) Version 1.7.9 (Mar 17, 2021)
============================================= =============================================

View File

@@ -242,3 +242,5 @@ Limitations
#. procfs entry creation done by Linux work queue could starve when #. procfs entry creation done by Linux work queue could starve when
Linux CPUs are flooded with system call offloads. LTP-2019 Linux CPUs are flooded with system call offloads. LTP-2019
sendmsg02 causes this issue. sendmsg02 causes this issue.
#. Linux kernel dump file doesn't include the memory allocated to McKernel. This is because of the issues in the implementation of the panic notifier handler.

2
ihk

Submodule ihk updated: 8b92b9d7f4...8fd23109f1

View File

@@ -136,6 +136,27 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
error = 0; error = 0;
*objp = to_memobj(obj); *objp = to_memobj(obj);
*maxprotp = result.maxprot; *maxprotp = result.maxprot;
#ifdef ENABLE_FUGAKU_HACKS
/* Pre-populate device file PFNs for PMIx shared mem */
if (!strncmp(obj->memobj.path,
"/var/opt/FJSVtcs/ple/daemonif", 29)) {
off_t offset;
uintptr_t phys;
unsigned long flag;
for (offset = 0; offset < obj->memobj.size; offset += PAGE_SIZE) {
if (devobj_get_page(&obj->memobj, offset, PAGE_P2ALIGN,
&phys, &flag, 0) < 0) {
kprintf("%s: WARNING: failed to populate offset %lu in %s\n",
__func__, offset, obj->memobj.path);
}
}
dkprintf("%s: pre-populated PFNs for %s, len: %lu\n",
__func__, obj->memobj.path, obj->memobj.size);
}
#endif
obj = NULL; obj = NULL;
out: out:
@@ -200,6 +221,10 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
uintptr_t attr; uintptr_t attr;
ihk_mc_user_context_t ctx; ihk_mc_user_context_t ctx;
int ix; int ix;
unsigned long irqstate;
#ifdef ENABLE_FUGAKU_HACKS
int page_fault_attempts = 5;
#endif
dkprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align); dkprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align);
@@ -214,8 +239,15 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE
profile_event_add(PROFILE_page_fault_dev_file, PAGE_SIZE); profile_event_add(PROFILE_page_fault_dev_file, PAGE_SIZE);
#endif // PROFILE_ENABLE #endif // PROFILE_ENABLE
irqstate = ihk_mc_spinlock_lock(&obj->pfn_table_lock);
pfn = obj->pfn_table[ix]; pfn = obj->pfn_table[ix];
ihk_mc_spinlock_unlock(&obj->pfn_table_lock, irqstate);
if (!(pfn & PFN_VALID)) { if (!(pfn & PFN_VALID)) {
#ifdef ENABLE_FUGAKU_HACKS
pf_retry:
#endif
ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_PFN; ihk_mc_syscall_arg0(&ctx) = PAGER_REQ_PFN;
ihk_mc_syscall_arg1(&ctx) = obj->handle; ihk_mc_syscall_arg1(&ctx) = obj->handle;
ihk_mc_syscall_arg2(&ctx) = off & ~(PAGE_SIZE - 1); ihk_mc_syscall_arg2(&ctx) = off & ~(PAGE_SIZE - 1);
@@ -241,8 +273,24 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
pfn |= attr; pfn |= attr;
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn); dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
} }
#ifdef ENABLE_FUGAKU_HACKS
else if (page_fault_attempts > 0) {
kprintf("%s(): va: 0x%lx !PFN_PRESENT for offset %lu in %s, "
"page_fault_attempts: %d\n",
__func__, virt_addr, off,
memobj->path ? memobj->path : "<unknown>",
page_fault_attempts);
--page_fault_attempts;
goto pf_retry;
}
#endif
obj->pfn_table[ix] = pfn; /* Update atomically if unset */
irqstate = ihk_mc_spinlock_lock(&obj->pfn_table_lock);
if (obj->pfn_table[ix] == 0) {
obj->pfn_table[ix] = pfn;
}
ihk_mc_spinlock_unlock(&obj->pfn_table_lock, irqstate);
// Don't call memory_stat_rss_add() because devobj related pages don't reside in main memory // Don't call memory_stat_rss_add() because devobj related pages don't reside in main memory
} }

View File

@@ -100,8 +100,13 @@ void profile_event_add(enum profile_event_type type, uint64_t tsc)
return; return;
if (!cpu_local_var(current)->profile_events) { if (!cpu_local_var(current)->profile_events) {
if (profile_alloc_events(cpu_local_var(current)) < 0) if (type == PROFILE_mpol_alloc_missed) {
return; return;
}
if (profile_alloc_events(cpu_local_var(current)) < 0) {
return;
}
} }
if (type < PROFILE_EVENT_MAX) { if (type < PROFILE_EVENT_MAX) {

View File

@@ -2219,6 +2219,8 @@ straight_out:
} }
#endif // PROFILE_ENABLE #endif // PROFILE_ENABLE
if (error == -ESRCH) { if (error == -ESRCH) {
int populate_flags = 0;
dkprintf("do_mmap:hit non VREG\n"); dkprintf("do_mmap:hit non VREG\n");
/* /*
* XXX: temporary: * XXX: temporary:
@@ -2230,8 +2232,21 @@ straight_out:
vrflags &= ~VR_MEMTYPE_MASK; vrflags &= ~VR_MEMTYPE_MASK;
vrflags |= VR_MEMTYPE_UC; vrflags |= VR_MEMTYPE_UC;
} }
#ifdef ENABLE_FUGAKU_HACKS
#ifdef ENABLE_TOFU
if (!strncmp("/var/opt/FJSVtcs/ple/daemonif/",
thread->proc->fd_path[fd], 30)) {
dkprintf("%s: MAP_POPULATE | MAP_LOCKED for %s\n",
__func__, thread->proc->fd_path[fd]);
populate_flags = (MAP_POPULATE | MAP_LOCKED);
}
#endif
#endif
error = devobj_create(fd, len, off, &memobj, &maxprot, error = devobj_create(fd, len, off, &memobj, &maxprot,
prot, (flags & (MAP_POPULATE | MAP_LOCKED))); prot,
populate_flags | (flags & (MAP_POPULATE | MAP_LOCKED)));
if (!error) { if (!error) {
#ifdef PROFILE_ENABLE #ifdef PROFILE_ENABLE

View File

@@ -119,7 +119,7 @@ if [ "${pid}" != "" ]; then
${SUDO} kill -9 ${pid} > /dev/null 2> /dev/null ${SUDO} kill -9 ${pid} > /dev/null 2> /dev/null
fi fi
if [ "${redirect_kmsg}" != "0" -o "${mon_interval}" != "-1" ]; then if [ "${redirect_kmsg}" != "0" -o "${mon_interval}" != "-1" ]; then
${SBINDIR}/ihkmond -f ${facility} -k ${redirect_kmsg} -i ${mon_interval} ${SUDO} ${SBINDIR}/ihkmond -f ${facility} -k ${redirect_kmsg} -i ${mon_interval}
fi fi
disable_irqbalance_mck() { disable_irqbalance_mck() {