Compare commits

..

28 Commits

Author SHA1 Message Date
Masamichi Takagi
d73e6a161c spec: prerelease 0.4 for testing capped best-effort memory reservation
Change-Id: Iec35ea1b7fa6b8930153461c395675f1576042ba
2020-12-29 17:12:14 +09:00
Masamichi Takagi
67334b65c3 rus_vm_fault: vmf_insert_pfn: treat VM_FAULT_NOPAGE as success
vmf_insert_pfn is added with the following commit.
https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=1c8f422059ae5da07db7406ab916203f9417e396

Refer to the following page for the meaning of VM_FAULT_NOPAGE.
https://lwn.net/Articles/242237/

Change-Id: I2b0144a20a57c74e0e2e0d2fc24281852f49b717
2020-12-29 16:31:41 +09:00
Masamichi Takagi
fe3992a3a2 cmake: add switch to turn on/off Fugaku debug modifications
To prevent "TO RESET: send SIGSTOP instead of SIGV in PF" from making
some tests expecting SIGSEGV fail.

Change-Id: I8bb111cff59fe5b0b2bf6bc652dfd2fa308321ed
2020-12-29 16:31:41 +09:00
Masamichi Takagi
5d58100c20 cmake: add switch to turn on/off Fugaku hacks
Change-Id: I2a1ac906a19c4e45ee62acdbf0bc6f77f61974f8
2020-12-29 16:31:41 +09:00
Balazs Gerofi
1b106d825c Tofu: fix phys addr calculation for contiguous pages in MBPT/BCH update
Change-Id: I70def9d02bdd7e1e969dedfc277a20df6ed2dff8
2020-12-29 16:31:41 +09:00
Balazs Gerofi
a680395093 Tofu: kmalloc cache for stag range
Change-Id: Ib5ea12c7c8cdafa7b699308c4eeb6e9ab39905c7
2020-12-29 16:31:41 +09:00
Balazs Gerofi
fd5a1c4b0a TO RESET: send SIGSTOP instead of SIGV in PF
Change-Id: I5f7e07cb89f5f38b7c631d838f0eee0a2a98e246
2020-12-29 16:31:40 +09:00
Balazs Gerofi
b3b1883ad8 eclair: turn off gdb pagination by default
Change-Id: I7758d97b90705310bc57cb9b6da6f6af436ea7fb
2020-12-29 16:31:40 +09:00
Balazs Gerofi
7145c4d383 TO RESET: stack changes
Change-Id: I325420701dfa5e9eac294be086a9d1e7326d95bc
2020-12-29 16:31:40 +09:00
Balazs Gerofi
0b82c8942b Tofu: keep track of stags per memory range
Change-Id: I033beaeee3b141dab4485dd3a2a3848eaa84e54e
2020-12-29 16:31:40 +09:00
Balazs Gerofi
75694152f0 Tofu: match page sizes to MBPT and fault PTEs if not present
Change-Id: Ia7aa92005a9941d6399063fec9a0776e73fc88fe
2020-12-29 16:31:40 +09:00
Masamichi Takagi
1cf0bd5a78 TO RESET: add debug instruments, map Linux areas for tofu
Change-Id: I09880cad3b87182cb663d414041254817c254759
2020-12-29 16:31:39 +09:00
Masamichi Takagi
25943634e9 TO RESET: do_mmap: show debug message when profile is turned on
Change-Id: I18f498f3a8660114b5e038e74179df95a645d232
2020-12-29 16:31:39 +09:00
Masamichi Takagi
72f95f92f8 TO RESET: hugefileobj: show debug messages
Change-Id: I904c811c13a59c0db74052bc92f6661a3e1b5d34
2020-12-29 16:31:39 +09:00
Masamichi Takagi
ab1014863d TO RESET: page_fault_handler: send SIGSTOP instead of SIGSEGV for debug
Change-Id: Ie281dbf43280464c8f412c8444a6861e43f28beb
2020-12-29 16:31:39 +09:00
Masamichi Takagi
4cd7051c2d TO RESET: setup_rt_frame: show debug message
Change-Id: I07d4f2dbba9bdb72f8a2892e6b5bd429b8e0aeec
2020-12-29 16:31:39 +09:00
Masamichi Takagi
d5716d3c3a TO RESET: mcctrl_get_request_os_cpu and __mcctrl_os_read_write_cpu_register: show debug messages
Change-Id: Ic8430e3fd6a814b888192233b029c942500a2dc9
2020-12-29 16:31:39 +09:00
Masamichi Takagi
2a984a12fe TO RESET: unhandled_page_fault: show instruction address
Change-Id: I29a8d30d9b3e5cfbe5e16b1faaa253e794b8fc5b
2020-12-29 16:31:38 +09:00
Masamichi Takagi
3949ab65a8 TO RESET: Add kernel argument to toggle on-demand paging for hugetlbfs map
Change-Id: Id748e0a2afc4ea59142fedb652a15b4007c5dee4
2020-12-29 16:31:33 +09:00
Masamichi Takagi
ed923ac82f TO RESET: hugefileobj: pre-allocate on mmap
Set this change to "TO RESET" because one of the Fujitsu tests fails.

Change-Id: Iddc30e8452b3d39da4975079d0c6a035e4f3dbde
2020-12-25 11:34:14 +09:00
Masamichi Takagi
191e6f7499 TO RESET: preempt_enable: check if no_preempt isn't negative
Change-Id: I1cef2077c50f3b3020870505dd065d10617f440e
2020-12-25 11:34:14 +09:00
Masamichi Takagi
4f7fd90300 TO RESET: lock: check if runq lock is held with IRQs disabled
Change-Id: I9a79ceaf9e399ad3695ed8959ca10c587591751a
2020-12-25 11:34:09 +09:00
Masamichi Takagi
8f2c8791bf TO RESET: arm64: enable interrupt on panic
Change-Id: I1ceb321de324f307fc82366b162c72f64184247b
2020-12-24 17:18:37 +09:00
Balazs Gerofi
bbfb296c26 TO RESET: mcreboot, mcstop+release.sh: add functions
Change-Id: Ic3992dc4e16b7ade00e93edbd107c64a32068c02
2020-12-24 16:53:27 +09:00
Balazs Gerofi
10b17e230c TO RESET: physical memory: free memory consistency checker
Change-Id: I15aa59bb81be4d8f2acfe8d161c8255f70f9e7d3
2020-12-24 16:53:12 +09:00
Masamichi Takagi
b268c28e7e TO RESET: mmap: ignore MAP_HUGETLB
Change-Id: Ifd50f24de0747b06d71ebba441ae2ef451f66c4d
2020-12-24 16:51:51 +09:00
Masamichi Takagi
2fa1c053d7 spec: prerelease 0.3 for testing ihk_reserve_mem and memory policy
Change-Id: I4fbcfa1f93522fd01af42d1ef13d0be075086773
2020-12-24 15:11:01 +09:00
Masamichi Takagi
530110e3a9 Tofu: fix ENABLE_TOFU switching
Change-Id: Ib33323d4b59ea8fb4f5f40dff7ea25a36773d5e2
2020-12-24 15:00:14 +09:00
41 changed files with 1124 additions and 33 deletions

View File

@@ -10,7 +10,7 @@ project(mckernel C ASM)
set(MCKERNEL_VERSION "1.7.1")
# See "Fedora Packaging Guidelines -- Versioning"
set(MCKERNEL_RELEASE "0.2")
set(MCKERNEL_RELEASE "0.4")
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake/modules)
# for rpmbuild
@@ -26,10 +26,8 @@ endif()
if (BUILD_TARGET STREQUAL "smp-x86")
set(ARCH "x86_64")
option(ENABLE_TOFU "Built-in tofu driver support" OFF)
elseif (BUILD_TARGET STREQUAL "smp-arm64")
set(ARCH "arm64")
option(ENABLE_TOFU "Built-in tofu driver support" ON)
endif()
include(GNUInstallDirs)
@@ -52,6 +50,40 @@ if (ENABLE_WERROR)
add_compile_options("-Werror")
endif(ENABLE_WERROR)
execute_process(COMMAND bash -c "ls -ld /proc/tofu/ 2>/dev/null | wc -l"
OUTPUT_VARIABLE PROC_TOFU OUTPUT_STRIP_TRAILING_WHITESPACE)
if(PROC_TOFU STREQUAL "1")
option(ENABLE_TOFU "Built-in tofu driver support" ON)
else()
option(ENABLE_TOFU "Built-in tofu driver support" OFF)
endif()
if(ENABLE_TOFU)
add_definitions(-DENABLE_TOFU)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_TOFU")
endif()
execute_process(COMMAND bash -c "grep $(hostname) /etc/opt/FJSVfefs/config/fefs_node1.csv 2>/dev/null | cut -d, -f2"
OUTPUT_VARIABLE FUGAKU_NODE_TYPE OUTPUT_STRIP_TRAILING_WHITESPACE)
if(FUGAKU_NODE_TYPE STREQUAL "CN")
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" ON)
option(ENABLE_FUGAKU_DEBUG "Fugaku debug instrumentation" ON)
else()
option(ENABLE_FUGAKU_HACKS "Fugaku hacks" OFF)
option(ENABLE_FUGAKU_DEBUG "Fugaku debug instrumentation" OFF)
endif()
if(ENABLE_FUGAKU_HACKS)
add_definitions(-DENABLE_FUGAKU_HACKS)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_HACKS")
endif()
if(ENABLE_FUGAKU_DEBUG)
add_definitions(-DENABLE_FUGAKU_DEBUG)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DENABLE_FUGAKU_DEBUG")
endif()
option(ENABLE_LINUX_WORK_IRQ_FOR_IKC "Use Linux work IRQ for IKC IPI" ON)
if (ENABLE_LINUX_WORK_IRQ_FOR_IKC)
set(KBUILD_C_FLAGS "${KBUILD_C_FLAGS} -DIHK_IKC_USE_LINUX_WORK_IRQ")
@@ -255,6 +287,8 @@ message("MAP_KERNEL_START: ${MAP_KERNEL_START}")
message("ENABLE_MEMDUMP: ${ENABLE_MEMDUMP}")
message("ENABLE_PERF: ${ENABLE_PERF}")
message("ENABLE_TOFU: ${ENABLE_TOFU}")
message("ENABLE_FUGAKU_HACKS: ${ENABLE_FUGAKU_HACKS}")
message("ENABLE_FUGAKU_DEBUG: ${ENABLE_FUGAKU_DEBUG}")
message("ENABLE_RUSAGE: ${ENABLE_RUSAGE}")
message("ENABLE_QLMPI: ${ENABLE_QLMPI}")
message("ENABLE_UTI: ${ENABLE_UTI}")

View File

@@ -730,6 +730,49 @@ static void show_context_stack(struct pt_regs *regs)
}
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked)
{
uintptr_t stack_top;
unsigned long irqflags = 0;
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
if (!kprintf_locked)
irqflags = kprintf_lock();
__kprintf("TID: %d, call stack (most recent first):\n",
thread->tid);
__kprintf("PC: %016lx, SP: %016lx\n", pc, sp);
for (;;) {
extern char _head[], _end[];
uintptr_t *fp, *lr;
fp = (uintptr_t *)sp;
lr = (uintptr_t *)(sp + 8);
if ((*fp <= sp)) {
break;
}
if ((*fp > stack_top)) {
break;
}
if ((*lr < (unsigned long)_head) ||
(*lr > (unsigned long)_end)) {
break;
}
__kprintf("PC: %016lx, SP: %016lx, FP: %016lx\n", *lr - 4, sp, *fp);
sp = *fp;
}
if (!kprintf_locked)
kprintf_unlock(irqflags);
}
#endif
void handle_IPI(unsigned int vector, struct pt_regs *regs)
{
struct ihk_mc_interrupt_handler *h;
@@ -791,6 +834,19 @@ void cpu_safe_halt(void)
cpu_enable_interrupt();
}
#ifdef ENABLE_FUGAKU_HACKS
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_halt_panic(void)
{
extern void __cpu_do_idle(void);
cpu_enable_interrupt();
__cpu_do_idle();
}
#endif
#if defined(CONFIG_HAS_NMI)
#include <arm-gic-v3.h>
@@ -856,6 +912,21 @@ unsigned long cpu_enable_interrupt_save(void)
return flags;
}
#ifdef ENABLE_FUGAKU_HACKS
int cpu_interrupt_disabled(void)
{
unsigned long flags;
unsigned long masked = ICC_PMR_EL1_MASKED;
asm volatile(
"mrs_s %0, " __stringify(ICC_PMR_EL1)
: "=&r" (flags)
:
: "memory");
return (flags == masked);
}
#endif
#else /* defined(CONFIG_HAS_NMI) */
/* @ref.impl arch/arm64/include/asm/irqflags.h::arch_local_irq_enable */
@@ -1377,6 +1448,14 @@ void arch_print_stack(void)
{
}
#ifdef ENABLE_FUGAKU_HACKS
unsigned long arch_get_instruction_address(const void *reg)
{
const struct pt_regs *regs = (struct pt_regs *)reg;
return regs->pc;
}
#endif
void arch_show_interrupt_context(const void *reg)
{
const struct pt_regs *regs = (struct pt_regs *)reg;

View File

@@ -223,8 +223,12 @@ static int do_translation_fault(unsigned long addr,
unsigned int esr,
struct pt_regs *regs)
{
#ifdef ENABLE_TOFU
// XXX: Handle kernel space page faults for Tofu driver
//if (addr < USER_END)
#else
if (addr < USER_END)
#endif
return do_page_fault(addr, esr, regs);
do_bad_area(addr, esr, regs);

View File

@@ -9,6 +9,9 @@
#include "affinity.h"
#include <lwk/compiler.h>
#include "config.h"
#ifdef ENABLE_FUGAKU_HACKS
#include <ihk/debug.h>
#endif
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
@@ -31,6 +34,10 @@ typedef struct {
#endif /* __AARCH64EB__ */
} __attribute__((aligned(4))) ihk_spinlock_t;
#ifdef ENABLE_FUGAKU_HACKS
extern ihk_spinlock_t *get_this_cpu_runq_lock(void);
#endif
extern void preempt_enable(void);
extern void preempt_disable(void);
@@ -98,6 +105,18 @@ static int __ihk_mc_spinlock_trylock_noirq(ihk_spinlock_t *lock)
: "memory");
success = !tmp;
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (success) {
if (get_this_cpu_runq_lock() == lock &&
!cpu_interrupt_disabled()) {
kprintf("%s: WARNING: runq lock held without IRQs disabled?\n", __func__); \
}
}
#endif
#endif
if (!success) {
preempt_enable();
}
@@ -182,6 +201,14 @@ static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
: "=&r" (lockval), "=&r" (newval), "=&r" (tmp), "+Q" (*lock)
: "Q" (lock->owner), "I" (1 << TICKET_SHIFT)
: "memory");
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (get_this_cpu_runq_lock() == lock &&
!cpu_interrupt_disabled()) {
kprintf("%s: WARNING: runq lock held without IRQs disabled?\n", __func__); \
}
#endif
#endif
}
#ifdef DEBUG_SPINLOCK

View File

@@ -94,7 +94,11 @@ extern char _end[];
# define LD_TASK_UNMAPPED_BASE UL(0x0000080000000000)
# define TASK_UNMAPPED_BASE UL(0x0000100000000000)
# define USER_END UL(0x0000400000000000)
#ifdef ENABLE_TOFU
# define MAP_VMAP_START UL(0xffff7bdfffff0000)
#else
# define MAP_VMAP_START UL(0xffff780000000000)
#endif
# define MAP_VMAP_SIZE UL(0x0000000100000000)
# define MAP_FIXED_START UL(0xffff7ffffbdd0000)
# define MAP_ST_START UL(0xffff800000000000)

View File

@@ -7,6 +7,9 @@
#include <process.h>
#include <syscall.h>
#include <ihk/debug.h>
#ifdef ENABLE_FUGAKU_HACKS
#include <ihk/monitor.h>
#endif
#include <arch-timer.h>
#include <cls.h>
@@ -313,14 +316,27 @@ void handle_interrupt_gicv3(struct pt_regs *regs)
struct cpu_local_var *v = get_this_cpu_local_var();
//unsigned long irqflags;
int do_check = 0;
#ifdef ENABLE_FUGAKU_HACKS
struct ihk_os_cpu_monitor *monitor = cpu_local_var(monitor);
++v->in_interrupt;
#endif
irqnr = gic_read_iar();
cpu_enable_nmi();
set_cputime(from_user ? CPUTIME_MODE_U2K : CPUTIME_MODE_K2K_IN);
while (irqnr != ICC_IAR1_EL1_SPURIOUS) {
if ((irqnr < 1020) || (irqnr >= 8192)) {
gic_write_eoir(irqnr);
#ifndef ENABLE_FUGAKU_HACKS
handle_IPI(irqnr, regs);
#else
/* Once paniced, only allow CPU stop and NMI IRQs */
if (monitor->status != IHK_OS_MONITOR_PANIC ||
irqnr == INTRID_CPU_STOP ||
irqnr == INTRID_MULTI_NMI) {
handle_IPI(irqnr, regs);
}
#endif
}
irqnr = gic_read_iar();
}
@@ -335,7 +351,12 @@ void handle_interrupt_gicv3(struct pt_regs *regs)
}
//ihk_mc_spinlock_unlock(&v->runq_lock, irqflags);
#ifndef ENABLE_FUGAKU_HACKS
if (do_check) {
#else
--v->in_interrupt;
if (monitor->status != IHK_OS_MONITOR_PANIC && do_check) {
#endif
check_signal(0, regs, 0);
schedule();
}

View File

@@ -217,11 +217,13 @@ static inline int ptl4_index(unsigned long addr)
int idx = (addr >> PTL4_SHIFT) & PTL4_INDEX_MASK;
return idx;
}
#ifdef ENABLE_TOFU
static inline int ptl3_index_linux(unsigned long addr)
{
int idx = (addr >> PTL3_SHIFT) & PTL3_INDEX_MASK_LINUX;
return idx;
}
#endif
static inline int ptl3_index(unsigned long addr)
{
int idx = (addr >> PTL3_SHIFT) & PTL3_INDEX_MASK;
@@ -281,6 +283,7 @@ static inline pte_t* ptl4_offset(const translation_table_t* ptl4, unsigned long
return ptep;
}
#ifdef ENABLE_TOFU
static inline pte_t* ptl3_offset_linux(const pte_t* l4p, unsigned long addr)
{
pte_t* ptep = NULL;
@@ -311,6 +314,7 @@ static inline pte_t* ptl3_offset_linux(const pte_t* l4p, unsigned long addr)
}
return ptep;
}
#endif
static inline pte_t* ptl3_offset(const pte_t* l4p, unsigned long addr)
{
@@ -991,10 +995,12 @@ static void init_normal_area(struct page_table *pt)
tt = get_translation_table(pt);
#ifdef ENABLE_TOFU
setup(tt,
arm64_st_phys_base,
arm64_st_phys_base + (1UL << 40));
return;
#endif
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); i++) {
unsigned long map_start, map_end;
@@ -1323,6 +1329,7 @@ out:
return ret;
}
#ifdef ENABLE_TOFU
int ihk_mc_linux_pt_virt_to_phys_size(struct page_table *pt,
const void *virt,
unsigned long *phys,
@@ -1373,7 +1380,7 @@ out:
if(size) *size = lsize;
return 0;
}
#endif
int ihk_mc_pt_virt_to_phys_size(struct page_table *pt,
const void *virt,

View File

@@ -1071,6 +1071,9 @@ static int setup_rt_frame(int usig, unsigned long rc, int to_restart,
if (k->sa.sa_flags & SA_RESTORER){
regs->regs[30] = (unsigned long)k->sa.sa_restorer;
#ifdef ENABLE_FUGAKU_HACKS
kprintf("%s: SA_RESTORER: 0x%lx\n", __func__, regs->regs[30]);
#endif
} else {
regs->regs[30] = (unsigned long)VDSO_SYMBOL(thread->vm->vdso_addr, sigtramp);
}
@@ -1723,6 +1726,7 @@ SYSCALL_DECLARE(mmap)
/* check arguments */
pgsize = PAGE_SIZE;
#ifndef ENABLE_FUGAKU_HACKS
if (flags & MAP_HUGETLB) {
int hugeshift = flags & (0x3F << MAP_HUGE_SHIFT);
@@ -1763,6 +1767,11 @@ SYSCALL_DECLARE(mmap)
goto out;
}
}
#else
if (flags & MAP_HUGETLB) {
flags &= ~(MAP_HUGETLB);
}
#endif
#define VALID_DUMMY_ADDR ((region->user_start + PTL3_SIZE - 1) & ~(PTL3_SIZE - 1))
addr = (flags & MAP_FIXED)? addr0: VALID_DUMMY_ADDR;

View File

@@ -174,13 +174,14 @@ void bad_mode(struct pt_regs *regs, int reason, unsigned int esr)
arch_show_interrupt_context(regs);
#if 0
#ifdef ENABLE_TOFU
info.si_signo = SIGSTOP;
info.si_errno = 0;
#else
info.si_signo = SIGILL;
info.si_errno = 0;
info.si_code = ILL_ILLOPC;
#endif
info.si_signo = SIGSTOP;
info.si_errno = 0;
info._sifields._sigfault.si_addr = (void*)regs->pc;
arm64_notify_die("Oops - bad mode", regs, &info, 0);

View File

@@ -868,6 +868,49 @@ void show_context_stack(uintptr_t *rbp) {
return;
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked)
{
uintptr_t stack_top;
unsigned long irqflags = 0;
stack_top = ALIGN_UP(sp, (uintptr_t)KERNEL_STACK_SIZE);
if (!kprintf_locked)
irqflags = kprintf_lock();
__kprintf("TID: %d, call stack (most recent first):\n",
thread->tid);
__kprintf("PC: %016lx, SP: %016lx\n", pc, sp);
for (;;) {
extern char _head[], _end[];
uintptr_t *fp, *lr;
fp = (uintptr_t *)sp;
lr = (uintptr_t *)(sp + 8);
if ((*fp <= sp)) {
break;
}
if ((*fp > stack_top)) {
break;
}
if ((*lr < (unsigned long)_head) ||
(*lr > (unsigned long)_end)) {
break;
}
__kprintf("PC: %016lx, SP: %016lx, FP: %016lx\n", *lr - 4, sp, *fp);
sp = *fp;
}
if (!kprintf_locked)
kprintf_unlock(irqflags);
}
#endif
void interrupt_exit(struct x86_user_context *regs)
{
if (interrupt_from_user(regs)) {
@@ -1137,6 +1180,17 @@ void cpu_halt(void)
asm volatile("hlt");
}
#ifdef ENABLE_FUGAKU_HACKS
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_halt_panic(void)
{
cpu_halt();
}
#endif
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@@ -1521,6 +1575,16 @@ void arch_print_stack(void)
__print_stack(rbp, 0);
}
#ifdef ENABLE_FUGAKU_HACKS
unsigned long arch_get_instruction_address(const void *reg)
{
const struct x86_user_context *uctx = reg;
const struct x86_basic_regs *regs = &uctx->gpr;
return regs->rip;
}
#endif
/*@
@ requires \valid(reg);
@ assigns \nothing;

View File

@@ -451,4 +451,12 @@ extern unsigned long ap_trampoline;
/* Local is cachable */
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE)
#ifdef ENABLE_FUGAKU_HACKS
#ifndef __ASSEMBLY__
# define ALIGN_UP(x, align) ALIGN_DOWN((x) + (align) - 1, align)
# define ALIGN_DOWN(x, align) ((x) & ~((align) - 1))
#endif /* !__ASSEMBLY__ */
#endif
#endif

View File

@@ -53,5 +53,9 @@ struct x86_cpu_local_variables *get_x86_this_cpu_local(void);
void *get_x86_cpu_local_kstack(int id);
void *get_x86_this_cpu_kstack(void);
#ifdef ENABLE_FUGAKU_HACKS
#define LOCALS_SPAN (4 * PAGE_SIZE)
#define KERNEL_STACK_SIZE LOCALS_SPAN
#endif
#endif

View File

@@ -21,7 +21,9 @@
#include <registers.h>
#include <string.h>
#ifndef ENABLE_FUGAKU_HACKS
#define LOCALS_SPAN (4 * PAGE_SIZE)
#endif
struct x86_cpu_local_variables *locals;
size_t x86_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */

View File

@@ -16,9 +16,6 @@
/* whether perf is enabled */
#cmakedefine ENABLE_PERF 1
/* whether built-in tofu driver is enabled */
#cmakedefine ENABLE_TOFU 1
/* whether qlmpi is enabled */
#cmakedefine ENABLE_QLMPI 1

View File

@@ -168,7 +168,9 @@ struct program_load_desc {
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int straight_map;
size_t straight_map_threshold;
#ifdef ENABLE_TOFU
int enable_tofu;
#endif
int nr_processes;
int process_rank;
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];
@@ -215,7 +217,9 @@ struct syscall_response {
unsigned long req_thread_status;
long ret;
unsigned long fault_address;
#ifdef ENABLE_TOFU
void *pde_data;
#endif
};
struct syscall_ret_desc {

View File

@@ -28,6 +28,7 @@ void *vdso_end;
static struct vm_special_mapping (*vdso_spec)[2];
#endif
#ifdef ENABLE_TOFU
/* Tofu CQ and barrier gate release functions */
struct file_operations *mcctrl_tof_utofu_procfs_ops_cq;
int (*mcctrl_tof_utofu_release_cq)(struct inode *inode,
@@ -35,6 +36,7 @@ int (*mcctrl_tof_utofu_release_cq)(struct inode *inode,
struct file_operations *mcctrl_tof_utofu_procfs_ops_bch;
int (*mcctrl_tof_utofu_release_bch)(struct inode *inode,
struct file *filp);
#endif
int arch_symbols_init(void)
{
@@ -52,6 +54,7 @@ int arch_symbols_init(void)
return -EFAULT;
#endif
#ifdef ENABLE_TOFU
mcctrl_tof_utofu_procfs_ops_cq =
(void *)kallsyms_lookup_name("tof_utofu_procfs_ops_cq");
if (WARN_ON(!mcctrl_tof_utofu_procfs_ops_cq))
@@ -71,6 +74,7 @@ int arch_symbols_init(void)
(void *)kallsyms_lookup_name("tof_utofu_release_bch");
if (WARN_ON(!mcctrl_tof_utofu_release_bch))
return -EFAULT;
#endif
return 0;
}
@@ -360,6 +364,15 @@ int translate_rva_to_rpa(ihk_os_t os, unsigned long rpt, unsigned long rva,
// page table to translation_table.
phys = ihk_device_map_memory(ihk_os_to_dev(os), rpt, PAGE_SIZE);
#ifdef ENABLE_FUGAKU_HACKS
if (!phys) {
pr_err("%s(): ERROR: VA: 0x%lx, rpt is NULL for PID %d\n",
__func__, rva, task_tgid_vnr(current));
error = -EFAULT;
goto out;
}
#endif
tbl = ihk_device_map_virtual(ihk_os_to_dev(os), phys, PAGE_SIZE, NULL, 0);
rpa = (unsigned long)tbl->tt_pa;
@@ -448,6 +461,7 @@ out:
}
#ifdef ENABLE_TOFU
/*
* Tofu CQ and BCH release handlers
*/
@@ -549,3 +563,4 @@ int __mcctrl_tof_utofu_release_bch(struct inode *inode, struct file *filp)
return __mcctrl_tof_utofu_release_handler(inode, filp,
mcctrl_tof_utofu_release_bch);
}
#endif

View File

@@ -3582,7 +3582,11 @@ int mcctrl_get_request_os_cpu(ihk_os_t os, int *ret_cpu)
*ret_cpu = ch->send.queue->read_cpu;
ret = 0;
#ifndef ENABLE_FUGAKU_HACKS
pr_info("%s: OS: %lx, CPU: %d\n",
#else
dprintk("%s: OS: %lx, CPU: %d\n",
#endif
__func__, (unsigned long)os, *ret_cpu);
out_put_ppd:
@@ -3646,7 +3650,11 @@ int __mcctrl_os_read_write_cpu_register(ihk_os_t os, int cpu,
/* Notify caller (for future async implementation) */
atomic_set(&desc->sync, 1);
#ifndef ENABLE_FUGAKU_HACKS
dprintk("%s: MCCTRL_OS_CPU_%s_REGISTER: CPU: %d, addr_ext: 0x%lx, val: 0x%lx\n",
#else
printk("%s: MCCTRL_OS_CPU_%s_REGISTER: CPU: %d, addr_ext: 0x%lx, val: 0x%lx\n",
#endif
__FUNCTION__,
(op == MCCTRL_OS_CPU_READ_REGISTER ? "READ" : "WRITE"), cpu,
desc->addr_ext, desc->val);

View File

@@ -50,7 +50,9 @@ extern void procfs_exit(int);
extern void uti_attr_finalize(void);
extern void binfmt_mcexec_init(void);
extern void binfmt_mcexec_exit(void);
#ifdef ENABLE_TOFU
extern void mcctrl_file_to_pidfd_hash_init(void);
#endif
extern int mcctrl_os_read_cpu_register(ihk_os_t os, int cpu,
struct ihk_os_cpu_register *desc);
@@ -233,7 +235,6 @@ void (*mcctrl_zap_page_range)(struct vm_area_struct *vma,
struct inode_operations *mcctrl_hugetlbfs_inode_operations;
static int symbols_init(void)
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4,17,0)
@@ -325,7 +326,9 @@ static int __init mcctrl_init(void)
}
binfmt_mcexec_init();
#ifdef ENABLE_TOFU
mcctrl_file_to_pidfd_hash_init();
#endif
if ((ret = symbols_init()))
goto error;

View File

@@ -560,6 +560,7 @@ struct uti_futex_resp {
wait_queue_head_t wq;
};
#ifdef ENABLE_TOFU
/*
* Hash table to keep track of files and related processes
* and file descriptors.
@@ -585,3 +586,4 @@ struct mcctrl_file_to_pidfd *mcctrl_file_to_pidfd_hash_lookup(
int mcctrl_file_to_pidfd_hash_remove(struct file *filp,
ihk_os_t os, struct task_struct *group_leader, int fd);
#endif
#endif

View File

@@ -692,14 +692,20 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
vma->vm_start, vma->vm_end, pgsize, pix);
}
}
else
else {
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 18, 0)
error = vmf_insert_pfn(vma, rva+(pix*PAGE_SIZE),
pfn+pix);
if (error == VM_FAULT_NOPAGE) {
dprintk("%s: vmf_insert_pfn returned %d\n",
__func__, error);
error = 0;
}
#else
error = vm_insert_pfn(vma, rva+(pix*PAGE_SIZE),
pfn+pix);
#endif
}
if (error) {
pr_err("%s: vm_insert_pfn returned %d\n",
__func__, error);
@@ -1843,6 +1849,7 @@ static long pager_call(ihk_os_t os, struct syscall_request *req)
return ret;
}
#ifdef ENABLE_TOFU
struct list_head mcctrl_file_to_pidfd_hash[MCCTRL_FILE_2_PIDFD_HASH_SIZE];
spinlock_t mcctrl_file_to_pidfd_hash_lock;
@@ -1971,7 +1978,7 @@ unlock_out:
spin_unlock_irqrestore(&mcctrl_file_to_pidfd_hash_lock, irqflags);
return ret;
}
#endif
void __return_syscall(ihk_os_t os, struct ikc_scd_packet *packet,
long ret, int stid)
@@ -2458,6 +2465,7 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
dprintk("%s: system call: %lx\n", __FUNCTION__, sc->args[0]);
switch (sc->number) {
#ifdef ENABLE_TOFU
case __NR_close: {
struct fd f;
int fd;
@@ -2478,6 +2486,7 @@ int __do_in_kernel_syscall(ihk_os_t os, struct ikc_scd_packet *packet)
break;
}
#endif
case __NR_mmap:
ret = pager_call(os, sc);
break;

View File

@@ -1179,7 +1179,7 @@ static int start_gdb(void) {
sprintf(buf, "target remote :%d", ntohs(sin.sin_port));
execlp("gdb", "eclair", "-q", "-ex", "set prompt (eclair) ",
"-ex", buf, opt.kernel_path, NULL);
"-ex", buf, opt.kernel_path, "-ex", "set pagination off", NULL);
perror("execlp");
return 3;
}

View File

@@ -68,13 +68,13 @@
#include <sys/user.h>
#endif /* !__aarch64__ */
#include <sys/prctl.h>
#include "../../config.h"
#include "../include/uprotocol.h"
#include <ihk/ihk_host_user.h>
#include "../include/uti.h"
#include <getopt.h>
#include "archdep.h"
#include "arch_args.h"
#include "../../config.h"
#include <numa.h>
#include <numaif.h>
#include <spawn.h>
@@ -204,7 +204,9 @@ static char *mpol_bind_nodes = NULL;
static int uti_thread_rank = 0;
static int uti_use_last_cpu = 0;
static int enable_uti = 0;
#ifdef ENABLE_TOFU
static int enable_tofu = 0;
#endif
/* Partitioned execution (e.g., for MPI) */
static int nr_processes = 0;
@@ -1787,12 +1789,14 @@ static struct option mcexec_options[] = {
.flag = &enable_uti,
.val = 1,
},
#ifdef ENABLE_TOFU
{
.name = "enable-tofu",
.has_arg = no_argument,
.flag = &enable_tofu,
.val = 1,
},
#endif
{
.name = "debug-mcexec",
.has_arg = no_argument,
@@ -2816,7 +2820,9 @@ int main(int argc, char **argv)
desc->straight_map = straight_map;
desc->straight_map_threshold = straight_map_threshold;
#ifdef ENABLE_TOFU
desc->enable_tofu = enable_tofu;
#endif
/* user_start and user_end are set by this call */
if (ioctl(fd, MCEXEC_UP_PREPARE_IMAGE, (unsigned long)desc) != 0) {

2
ihk

Submodule ihk updated: bf40051828...675ab08a3c

View File

@@ -58,16 +58,43 @@ struct cpu_local_var *get_cpu_local_var(int id)
return clv + id;
}
#ifdef ENABLE_FUGAKU_HACKS
void __show_context_stack(struct thread *thread,
unsigned long pc, uintptr_t sp, int kprintf_locked);
#endif
void preempt_enable(void)
{
#ifndef ENABLE_FUGAKU_HACKS
if (cpu_local_var_initialized)
--cpu_local_var(no_preempt);
#else
if (cpu_local_var_initialized) {
--cpu_local_var(no_preempt);
if (cpu_local_var(no_preempt) < 0) {
//cpu_disable_interrupt();
__kprintf("%s: %d\n", __func__, cpu_local_var(no_preempt));
__kprintf("TID: %d, call stack from builtin frame (most recent first):\n",
cpu_local_var(current)->tid);
__show_context_stack(cpu_local_var(current), (uintptr_t)&preempt_enable,
(unsigned long)__builtin_frame_address(0), 1);
//arch_cpu_stop();
//cpu_halt();
#ifdef ENABLE_FUGAKU_HACKS
panic("panic: negative preemption??");
#endif
}
}
#endif
}
void preempt_disable(void)
{
if (cpu_local_var_initialized)
if (cpu_local_var_initialized) {
++cpu_local_var(no_preempt);
}
}
int add_backlog(int (*func)(void *arg), void *arg)
@@ -120,3 +147,10 @@ void do_backlog(void)
}
}
}
#ifdef ENABLE_FUGAKU_HACKS
ihk_spinlock_t *get_this_cpu_runq_lock(void)
{
return &get_this_cpu_local_var()->runq_lock;
}
#endif

View File

@@ -788,7 +788,11 @@ out_remote_pf:
syscall_channel_send(resp_channel, &pckt);
rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0);
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
#else
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
#endif
ret = 0;
break;

View File

@@ -85,7 +85,11 @@ static int hugefileobj_get_page(struct memobj *memobj, off_t off,
}
memset(obj->pages[pgind], 0, obj->pgsize);
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: obj: 0x%lx, allocated page for off: %lu"
#else
kprintf("%s: obj: 0x%lx, allocated page for off: %lu"
#endif
" (ind: %d), page size: %lu\n",
__func__, obj, off, pgind, obj->pgsize);
}
@@ -274,13 +278,51 @@ int hugefileobj_create(struct memobj *memobj, size_t len, off_t off,
obj->nr_pages = nr_pages;
obj->pages = pages;
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: obj: 0x%lx, VA: 0x%lx, page array allocated"
#else
kprintf("%s: obj: 0x%lx, VA: 0x%lx, page array allocated"
#endif
" for %d pages, pagesize: %lu\n",
__func__,
obj,
virt_addr,
nr_pages,
obj->pgsize);
#ifdef ENABLE_FUGAKU_HACKS
if (!hugetlbfs_on_demand) {
int pgind;
int npages;
#ifndef ENABLE_FUGAKU_HACKS
for (pgind = 0; pgind < obj->nr_pages; ++pgind) {
#else
/* Map in only the last 8 pages */
for (pgind = ((obj->nr_pages > 8) ? (obj->nr_pages - 8) : 0);
pgind < obj->nr_pages; ++pgind) {
#endif
if (obj->pages[pgind]) {
continue;
}
npages = obj->pgsize >> PAGE_SHIFT;
obj->pages[pgind] = ihk_mc_alloc_aligned_pages_user(npages,
obj->pgshift - PTL1_SHIFT,
IHK_MC_AP_NOWAIT | IHK_MC_AP_USER, 0);
if (!obj->pages[pgind]) {
kprintf("%s: error: could not allocate page for off: %lu"
", page size: %lu\n", __func__, off, obj->pgsize);
continue;
}
memset(obj->pages[pgind], 0, obj->pgsize);
dkprintf("%s: obj: 0x%lx, pre-allocated page for off: %lu"
" (ind: %d), page size: %lu\n",
__func__, obj, off, pgind, obj->pgsize);
}
}
#endif
}
obj->memobj.size = len;

View File

@@ -106,6 +106,9 @@ struct cpu_local_var {
ihk_spinlock_t migq_lock;
struct list_head migq;
int in_interrupt;
#ifdef ENABLE_FUGAKU_HACKS
int in_page_fault;
#endif
int no_preempt;
int timer_enabled;
unsigned long nr_ctx_switches;

View File

@@ -69,4 +69,7 @@ static inline int page_is_multi_mapped(struct page *page)
/* Should we take page faults on ANONYMOUS mappings? */
extern int anon_on_demand;
#ifdef ENABLE_FUGAKU_HACKS
extern int hugetlbfs_on_demand;
#endif
#endif

View File

@@ -395,6 +395,9 @@ struct vm_range {
off_t objoff;
int pgshift; /* page size. 0 means THP */
int padding;
#ifdef ENABLE_TOFU
struct list_head tofu_stag_list;
#endif
void *private_data;
};
@@ -567,7 +570,9 @@ struct process {
int thp_disable;
int straight_map;
#ifdef ENABLE_TOFU
int enable_tofu;
#endif
size_t straight_map_threshold;
// perf_event
@@ -592,9 +597,11 @@ struct process {
int coredump_barrier_count, coredump_barrier_count2;
mcs_rwlock_lock_t coredump_lock; // lock for coredump
#ifdef ENABLE_TOFU
#define MAX_FD_PDE 1024
void *fd_pde_data[MAX_FD_PDE];
char *fd_path[MAX_FD_PDE];
#endif
};
/*
@@ -753,11 +760,16 @@ struct thread {
struct waitq coredump_wq;
int coredump_status;
#ifdef ENABLE_TOFU
/* Path of file being opened */
char *fd_path_in_open;
#endif
};
#define VM_RANGE_CACHE_SIZE 4
#ifdef ENABLE_TOFU
#define TOFU_STAG_HASH_SIZE 4
#endif
struct process_vm {
struct address_space *address_space;
@@ -790,6 +802,12 @@ struct process_vm {
struct vm_range *range_cache[VM_RANGE_CACHE_SIZE];
int range_cache_ind;
struct swapinfo *swapinfo;
#ifdef ENABLE_TOFU
/* Tofu STAG hash */
ihk_spinlock_t tofu_stag_lock;
struct list_head tofu_stag_hash[TOFU_STAG_HASH_SIZE];
#endif
};
static inline int has_cap_ipc_lock(struct thread *th)

View File

@@ -238,7 +238,9 @@ struct program_load_desc {
int uti_use_last_cpu; /* Work-around not to share CPU with OpenMP thread */
int straight_map;
size_t straight_map_threshold;
#ifdef ENABLE_TOFU
int enable_tofu;
#endif
int nr_processes;
int process_rank;
__cpu_set_unit cpu_set[PLD_CPU_SET_SIZE];

View File

@@ -63,6 +63,9 @@ extern int interrupt_from_user(void *);
struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE];
int anon_on_demand = 0;
#ifdef ENABLE_FUGAKU_HACKS
int hugetlbfs_on_demand;
#endif
int sysctl_overcommit_memory = OVERCOMMIT_ALWAYS;
static struct ihk_mc_pa_ops *pa_ops;
@@ -744,7 +747,11 @@ distance_based:
}
else {
if (i == 0)
#ifndef ENABLE_FUGAKU_HACKS
kprintf("%s: distance: CPU @ node %d failed to allocate "
#else
dkprintf("%s: distance: CPU @ node %d failed to allocate "
#endif
"%d pages from node %d\n",
__FUNCTION__,
ihk_mc_get_numa_id(),
@@ -951,6 +958,9 @@ static void query_free_mem_interrupt_handler(void *priv)
}
kprintf("McKernel free pages in total: %d\n", pages);
#ifdef ENABLE_FUGAKU_HACKS
panic("PANIC");
#endif
if (find_command_line("memdebug")) {
extern void kmalloc_memcheck(void);
@@ -1286,6 +1296,9 @@ void tlb_flush_handler(int vector)
}
#endif // PROFILE_ENABLE
}
#ifdef ENABLE_FUGAKU_HACKS
extern unsigned long arch_get_instruction_address(const void *reg);
#endif
static void unhandled_page_fault(struct thread *thread, void *fault_addr,
uint64_t reason, void *regs)
@@ -1317,6 +1330,22 @@ static void unhandled_page_fault(struct thread *thread, void *fault_addr,
__kprintf("address is out of range!\n");
}
#ifdef ENABLE_FUGAKU_HACKS
{
unsigned long pc = arch_get_instruction_address(regs);
range = lookup_process_memory_range(vm, pc, pc + 1);
if (range) {
__kprintf("PC: 0x%lx (%lx in %s)\n",
pc,
(range->memobj && range->memobj->flags & MF_REG_FILE) ?
pc - range->start + range->objoff :
pc - range->start,
(range->memobj && range->memobj->path) ?
range->memobj->path : "(unknown)");
}
}
#endif
kprintf_unlock(irqflags);
/* TODO */
@@ -1324,7 +1353,13 @@ static void unhandled_page_fault(struct thread *thread, void *fault_addr,
if (!(reason & PF_USER)) {
cpu_local_var(kernel_mode_pf_regs) = regs;
#ifndef ENABLE_FUGAKU_HACKS
panic("panic: kernel mode PF");
#else
kprintf("panic: kernel mode PF");
for (;;) cpu_pause();
//panic("panic: kernel mode PF");
#endif
}
//dkprintf("now dump a core file\n");
@@ -1360,6 +1395,20 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
__FUNCTION__, fault_addr, reason, regs);
preempt_disable();
#ifdef ENABLE_FUGAKU_HACKS
++cpu_local_var(in_page_fault);
if (cpu_local_var(in_page_fault) > 1) {
kprintf("%s: PF in PF??\n", __func__);
cpu_disable_interrupt();
if (!(reason & PF_USER)) {
cpu_local_var(kernel_mode_pf_regs) = regs;
panic("panic: kernel mode PF in PF");
}
while (1) {
panic("PANIC");
}
}
#endif
cpu_enable_interrupt();
@@ -1427,6 +1476,13 @@ out_linux:
reason, error);
unhandled_page_fault(thread, fault_addr, reason, regs);
preempt_enable();
#ifdef ENABLE_FUGAKU_DEBUG
kprintf("%s: sending SIGSTOP to TID: %d\n", __func__, thread->tid);
do_kill(thread, thread->proc->pid, thread->tid, SIGSTOP, NULL, 0);
goto out;
#endif
memset(&info, '\0', sizeof info);
if (error == -ERANGE) {
info.si_signo = SIGBUS;
@@ -1455,6 +1511,9 @@ out_linux:
out_ok:
#endif
error = 0;
#ifdef ENABLE_FUGAKU_HACKS
--cpu_local_var(in_page_fault);
#endif
preempt_enable();
out:
dkprintf("%s: addr: %p, reason: %lx, regs: %p -> error: %d\n",
@@ -2041,6 +2100,13 @@ void mem_init(void)
anon_on_demand = 1;
}
#ifdef ENABLE_FUGAKU_HACKS
if (find_command_line("hugetlbfs_on_demand")) {
kprintf("Demand paging on hugetlbfs mappings enabled.\n");
hugetlbfs_on_demand = 1;
}
#endif
/* Init distance vectors */
numa_distances_init();
}

View File

@@ -36,6 +36,9 @@
#include <rusage_private.h>
#include <ihk/monitor.h>
#include <ihk/debug.h>
#ifdef ENABLE_TOFU
#include <tofu/tofu_stag_range.h>
#endif
//#define DEBUG_PRINT_PROCESS
@@ -269,6 +272,12 @@ init_process_vm(struct process *owner, struct address_space *asp, struct process
}
vm->range_cache_ind = 0;
#ifdef ENABLE_TOFU
ihk_mc_spinlock_init(&vm->tofu_stag_lock);
for (i = 0; i < TOFU_STAG_HASH_SIZE; ++i) {
INIT_LIST_HEAD(&vm->tofu_stag_hash[i]);
}
#endif
return 0;
}
@@ -955,6 +964,11 @@ int split_process_memory_range(struct process_vm *vm, struct vm_range *range,
newrange->pgshift = range->pgshift;
newrange->private_data = range->private_data;
#ifdef ENABLE_TOFU
/* TODO: figure out which entries to put on which list! */
INIT_LIST_HEAD(&newrange->tofu_stag_list);
#endif
if (range->memobj) {
memobj_ref(range->memobj);
newrange->memobj = range->memobj;
@@ -1023,6 +1037,28 @@ int join_process_memory_range(struct process_vm *vm,
if (vm->range_cache[i] == merging)
vm->range_cache[i] = surviving;
}
#ifdef ENABLE_TOFU
/* Move Tofu stag range entries */
if (vm->proc->enable_tofu) {
struct tofu_stag_range *tsr, *next;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_for_each_entry_safe(tsr, next,
&merging->tofu_stag_list, list) {
list_del(&tsr->list);
list_add_tail(&tsr->list, &surviving->tofu_stag_list);
dkprintf("%s: stag: %d @ %p:%lu moved in VM range merge\n",
__func__,
tsr->stag,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
}
#endif
kfree(merging);
error = 0;
@@ -1137,6 +1173,24 @@ static int free_process_memory_range(struct process_vm *vm,
}
straight_out:
#ifdef ENABLE_TOFU
if (vm->proc->enable_tofu) {
int entries;
extern int tofu_stag_range_remove_overlapping(struct process_vm *vm,
struct vm_range *range);
entries = tofu_stag_range_remove_overlapping(vm, range);
if (entries > 0) {
kprintf("%s: removed %d Tofu stag entries for range 0x%lx:%lu\n",
__func__,
entries,
range->start,
range->end - range->start);
}
}
#endif
rb_erase(&range->vm_rb_node, &vm->vm_range_tree);
for (i = 0; i < VM_RANGE_CACHE_SIZE; ++i) {
if (vm->range_cache[i] == range)
@@ -1428,6 +1482,9 @@ int add_process_memory_range(struct process_vm *vm,
range->pgshift = pgshift;
range->private_data = NULL;
range->straight_start = 0;
#ifdef ENABLE_TOFU
INIT_LIST_HEAD(&range->tofu_stag_list);
#endif
rc = 0;
if (phys == NOPHYS) {
@@ -2521,6 +2578,14 @@ int init_process_stack(struct thread *thread, struct program_load_desc *pn,
__FUNCTION__, size, minsz,
ap_flag ? "(IHK_MC_AP_USER)" : "");
#ifdef ENABLE_FUGAKU_HACKS
/*
* XXX: Fugaku: Fujitsu's runtime remaps the stack
* using hugetlbfs so don't bother allocating too much here..
*/
minsz = 8*1024*1024;
#endif
stack = ihk_mc_alloc_aligned_pages_user(minsz >> PAGE_SHIFT,
USER_STACK_PAGE_P2ALIGN,
IHK_MC_AP_NOWAIT | ap_flag,

View File

@@ -204,6 +204,14 @@ long do_syscall(struct syscall_request *req, int cpu)
++thread->in_syscall_offload;
}
#ifdef ENABLE_FUGAKU_HACKS
#if 0
if (req->number == __NR_write && req->args[0] == 1) {
return req->args[2];
}
#endif
#endif
/* The current thread is the requester */
req->rtid = cpu_local_var(current)->tid;
@@ -220,7 +228,9 @@ long do_syscall(struct syscall_request *req, int cpu)
req->ttid = 0;
}
res.req_thread_status = IHK_SCD_REQ_THREAD_SPINNING;
#ifdef ENABLE_TOFU
res.pde_data = NULL;
#endif
send_syscall(req, cpu, &res);
if (req->rtid == -1) {
@@ -381,6 +391,7 @@ long do_syscall(struct syscall_request *req, int cpu)
rc = res.ret;
#ifdef ENABLE_TOFU
if ((req->number == __NR_ioctl && rc == 0) ||
(req->number == __NR_openat && rc > 0)) {
int fd = req->number == __NR_ioctl ? req->args[0] : rc;
@@ -407,6 +418,7 @@ long do_syscall(struct syscall_request *req, int cpu)
res.pde_data);
}
}
#endif
if(req->number != __NR_exit_group){
--thread->in_syscall_offload;
@@ -1378,6 +1390,15 @@ void terminate(int rc, int sig)
mcs_rwlock_writer_unlock(&proc->threads_lock, &lock);
vm = proc->vm;
#ifdef ENABLE_TOFU
if (proc->enable_tofu) {
extern void tof_utofu_finalize();
tof_utofu_finalize();
}
#endif
free_all_process_memory_range(vm);
if (proc->saved_cmdline) {
@@ -1528,7 +1549,9 @@ int process_cleanup_before_terminate(int pid)
{
struct process *proc;
struct mcs_rwlock_node_irqsave lock;
#ifdef ENABLE_TOFU
int fd;
#endif
proc = find_process(pid, &lock);
if (!proc) {
@@ -1536,10 +1559,12 @@ int process_cleanup_before_terminate(int pid)
return 0;
}
#ifdef ENABLE_TOFU
/* Clean up PDE file descriptors */
for (fd = 2; fd < MAX_FD_PDE; ++fd) {
__process_cleanup_fd(proc, fd);
}
#endif
process_unlock(proc, &lock);
return 0;
@@ -1922,6 +1947,10 @@ do_mmap(const uintptr_t addr0, const size_t len0, const int prot,
__FUNCTION__, proc->straight_va, range->pgshift);
ptattr = arch_vrflag_to_ptattr(range->flag, PF_POPULATE, NULL);
#ifdef ENABLE_FUGAKU_HACKS
if (1) { // Un-safe mapping of covering physical range
#endif
error = ihk_mc_pt_set_range(proc->vm->address_space->page_table,
proc->vm,
(void *)range->start,
@@ -1948,6 +1977,90 @@ do_mmap(const uintptr_t addr0, const size_t len0, const int prot,
proc->straight_pa,
psize,
proc->straight_map_threshold);
#ifdef ENABLE_FUGAKU_HACKS
}
else { // Safe mapping of only LWK memory ranges
size_t max_pgsize = 0;
size_t min_pgsize = 0xFFFFFFFFFFFFFFFF;
/*
* Iterate LWK phsyical memory chunks and map them to their
* corresponding offset in the straight range using the largest
* suitable pages.
*/
for (i = 0; i < ihk_mc_get_nr_memory_chunks(); ++i) {
unsigned long start, end, pa;
void *va, *va_end;
size_t pgsize;
int pg2align;
ihk_mc_get_memory_chunk(i, &start, &end, NULL);
va = proc->straight_va + (start - straight_pa_start);
va_end = va + (end - start);
pa = start;
while (va < va_end) {
pgsize = (va_end - va) + 1;
retry:
error = arch_get_smaller_page_size(NULL, pgsize,
&pgsize, &pg2align);
if (error) {
ekprintf("%s: arch_get_smaller_page_size() failed"
" during straight mapping: %d\n",
__func__, error);
proc->straight_va = 0;
goto straight_out;
}
/* Are virtual or physical not page aligned for this size? */
if (((unsigned long)va & (pgsize - 1)) ||
(pa & (pgsize - 1))) {
goto retry;
}
error = ihk_mc_pt_set_range(
proc->vm->address_space->page_table,
proc->vm,
va,
va + pgsize,
pa,
ptattr,
pg2align + PAGE_SHIFT,
range,
0);
if (error) {
kprintf("%s: ihk_mc_pt_set_range() failed"
" during straight mapping: %d\n",
__func__, error);
proc->straight_va = 0;
goto straight_out;
}
if (pgsize > max_pgsize)
max_pgsize = pgsize;
if (pgsize < min_pgsize)
min_pgsize = pgsize;
va += pgsize;
pa += pgsize;
}
}
region->map_end = (unsigned long)proc->straight_va +
proc->straight_len;
proc->straight_pa = straight_pa_start;
kprintf("%s: straight mapping: 0x%lx:%lu @ "
"min_pgsize: %lu, max_pgsize: %lu\n",
__FUNCTION__,
proc->straight_va,
proc->straight_len,
min_pgsize,
max_pgsize);
}
#endif
}
straight_out:
@@ -2276,8 +2389,15 @@ straight_out:
range->straight_start =
(unsigned long)proc->straight_va +
(straight_phys - proc->straight_pa);
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: range 0x%lx:%lu is straight starting at 0x%lx\n",
__FUNCTION__, addr, len, range->straight_start);
__FUNCTION__, addr, len, range->straight_start);
#else
dkprintf("%s: range 0x%lx:%lu is straight starting at 0x%lx"
" (phys: 0x%lx)\n",
__FUNCTION__, addr, len, range->straight_start,
straight_phys);
#endif
if (!zero_at_free) {
memset((void *)phys_to_virt(straight_phys), 0, len);
}
@@ -2377,11 +2497,20 @@ out:
if (memobj) {
memobj_unref(memobj);
}
#ifndef ENABLE_FUGAKU_HACKS
dkprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, "
#else
if (cpu_local_var(current)->profile) {
kprintf("%s: 0x%lx:%8lu, (req: 0x%lx:%lu), prot: %x, flags: %x, "
#endif
"fd: %d, off: %lu, error: %ld, addr: 0x%lx\n",
__FUNCTION__,
addr, len, addr0, len0, prot, flags,
fd, off0, error, addr);
#ifdef ENABLE_FUGAKU_HACKS
}
#endif
return !error ?
(range->straight_start ? range->straight_start : addr) :
@@ -2418,6 +2547,11 @@ SYSCALL_DECLARE(munmap)
out:
dkprintf("[%d]sys_munmap(%lx,%lx): %d\n",
ihk_mc_get_processor_id(), addr, len0, error);
#ifdef ENABLE_FUGAKU_HACKS
if (error) {
kprintf("%s: error: %d\n", __func__, error);
}
#endif
return error;
}
@@ -3978,7 +4112,9 @@ SYSCALL_DECLARE(open)
goto out;
}
#ifdef ENABLE_TOFU
cpu_local_var(current)->fd_path_in_open = pathname;
#endif
dkprintf("open(): pathname=%s\n", pathname);
if (!strncmp(pathname, XPMEM_DEV_PATH, len)) {
@@ -3987,15 +4123,21 @@ SYSCALL_DECLARE(open)
rc = syscall_generic_forwarding(__NR_open, ctx);
}
#ifdef ENABLE_TOFU
cpu_local_var(current)->fd_path_in_open = NULL;
#endif
out:
#ifdef ENABLE_TOFU
if (rc > 0 && rc < MAX_FD_PDE) {
cpu_local_var(current)->proc->fd_path[rc] = pathname;
}
else {
kfree(pathname);
}
#else
kfree(pathname);
#endif
return rc;
}
@@ -4023,7 +4165,9 @@ SYSCALL_DECLARE(openat)
goto out;
}
#ifdef ENABLE_TOFU
cpu_local_var(current)->fd_path_in_open = pathname;
#endif
dkprintf("openat(): pathname=%s\n", pathname);
if (!strncmp(pathname, XPMEM_DEV_PATH, len)) {
@@ -4032,15 +4176,21 @@ SYSCALL_DECLARE(openat)
rc = syscall_generic_forwarding(__NR_openat, ctx);
}
#ifdef ENABLE_TOFU
cpu_local_var(current)->fd_path_in_open = NULL;
#endif
out:
#ifdef ENABLE_TOFU
if (rc > 0 && rc < MAX_FD_PDE) {
cpu_local_var(current)->proc->fd_path[rc] = pathname;
}
else {
kfree(pathname);
}
#else
kfree(pathname);
#endif
return rc;
}

View File

@@ -24,6 +24,7 @@
struct kmalloc_cache_header tofu_scatterlist_cache[8];
struct kmalloc_cache_header tofu_mbpt_cache[8];
struct ihk_mc_page_cache_header tofu_mbpt_sg_pages_cache[8];
struct kmalloc_cache_header tofu_stag_range_cache[8];
typedef ihk_spinlock_t spinlock_t;
@@ -43,6 +44,124 @@ typedef void (*tof_core_signal_handler)(int, int, uint64_t, uint64_t);
#include <tofu/tofu_generated-tof_utofu_bg.h>
#include <tofu/tofu_generated-tof_utofu_mbpt.h>
#include <tofu/tofu_stag_range.h>
/*
* Tofu STAG regions list keeps track of stags in a given VM range..
* Per-process tree is protected by process' vm_range_lock.
*/
int tof_utofu_stag_range_insert(struct process_vm *vm,
struct vm_range *range,
uintptr_t start, uintptr_t end,
struct tof_utofu_cq *ucq, int stag)
{
struct tofu_stag_range *tsr; // = kmalloc(sizeof(*tsr), IHK_MC_AP_NOWAIT);
tsr = kmalloc_cache_alloc(&tofu_stag_range_cache[ihk_mc_get_numa_id()],
sizeof(*tsr));
if (!tsr) {
kprintf("%s: error: allocating tofu_stag_range\n", __func__);
return -ENOMEM;
}
tsr->start = start;
tsr->end = end;
tsr->ucq = ucq;
tsr->stag = stag;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_add_tail(&tsr->list, &range->tofu_stag_list);
list_add_tail(&tsr->hash, &vm->tofu_stag_hash[stag % TOFU_STAG_HASH_SIZE]);
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
dkprintf("%s: stag: %d for TNI %d CQ %d @ %p:%lu\n",
__func__,
tsr->stag,
tsr->ucq->tni,
tsr->ucq->cqid,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
return 0;
}
struct tofu_stag_range *tofu_stag_range_lookup_by_stag(struct process_vm *vm,
int stag)
{
struct tofu_stag_range *tsr;
struct tofu_stag_range *match = NULL;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_for_each_entry(tsr,
&vm->tofu_stag_hash[stag % TOFU_STAG_HASH_SIZE], hash) {
if (tsr->stag == stag) {
match = tsr;
break;
}
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
return match;
}
/* XXX: vm->tofu_stag_lock must be held */
void __tofu_stag_range_remove(struct process_vm *vm, struct tofu_stag_range *tsr)
{
dkprintf("%s: stag: %d for TNI %d CQ %d @ %p:%lu\n",
__func__,
tsr->stag,
tsr->ucq->tni,
tsr->ucq->cqid,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
list_del(&tsr->list);
list_del(&tsr->hash);
//kfree(tsr);
kmalloc_cache_free(tsr);
}
void tofu_stag_range_remove(struct process_vm *vm, struct tofu_stag_range *tsr)
{
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
__tofu_stag_range_remove(vm, tsr);
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
}
static int tof_utofu_free_stag(struct tof_utofu_cq *ucq, int stag);
int tofu_stag_range_remove_overlapping(struct process_vm *vm,
struct vm_range *range)
{
struct tofu_stag_range *tsr, *next;
int entries = 0;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
list_for_each_entry_safe(tsr, next,
&range->tofu_stag_list, list) {
dkprintf("%s: stag: %d @ %p:%lu\n",
__func__,
tsr->stag,
tsr->start,
(unsigned long)(tsr->end - tsr->start));
linux_spin_lock(&tsr->ucq->trans.mru_lock);
tof_utofu_free_stag(tsr->ucq, tsr->stag);
linux_spin_unlock(&tsr->ucq->trans.mru_lock);
__tofu_stag_range_remove(vm, tsr);
++entries;
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
return entries;
}
#define TOF_UTOFU_VERSION TOF_UAPI_VERSION
#define TOF_UTOFU_NUM_STAG_NTYPES 3
#define TOF_UTOFU_NUM_STAG_BITS(size) ((size) + 13)
@@ -146,6 +265,7 @@ static int tof_utofu_get_pagesize_locked(uintptr_t addr, size_t len,
}
else {
*_pgszbits = PTL2_SHIFT;
*_pgszbits = PTL1_CONT_SHIFT;
}
return 0;
}
@@ -171,7 +291,7 @@ static int tof_utofu_get_pagesize_locked(uintptr_t addr, size_t len,
}
}
#if 0
#if 1
/* Tofu only support 64kB and 2MB pages */
if (min_shift > PTL1_CONT_SHIFT)
min_shift = PTL1_CONT_SHIFT;
@@ -647,7 +767,6 @@ static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq,
//struct page *page;
struct process *proc = cpu_local_var(current)->proc;
uintptr_t iova = 0, va;
int ret;
unsigned long phys = 0;
/* Special case for straight mapping */
@@ -697,6 +816,8 @@ static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq,
}
for(va = start; va < end; va += pgsz, ix++){
size_t psize;
pte_t *ptep;
if (tof_utofu_mbpt_is_enabled(mbpt, ix)) {
/* this page is already mapped to mbpt */
@@ -715,15 +836,18 @@ static int tof_utofu_update_mbpt_entries(struct tof_utofu_cq *ucq,
// return -ENOMEM;
//}
ret = ihk_mc_pt_virt_to_phys(
cpu_local_var(current)->vm->address_space->page_table,
(void *)va, &phys);
ptep = ihk_mc_pt_lookup_fault_pte(cpu_local_var(current)->vm,
(void *)va, 0, NULL, &psize, NULL);
if (ret) {
raw_rc_output(ret);
if (unlikely(!ptep || !pte_is_present(ptep))) {
kprintf("%s: ERROR: no valid PTE for 0x%lx\n",
__func__, va);
return -ENOMEM;
}
phys = (pte_get_phys(ptep) & ~(psize - 1)) +
(va & (psize - 1));
//iova = tof_smmu_get_ipa_cq(ucq->tni, ucq->cqid,
// pfn_to_kaddr(page_to_pfn(page)), pgsz);
//if (iova == 0) {
@@ -1012,6 +1136,7 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
size_t pgsz;
int ret = -ENOTSUPP;
unsigned long irqflags;
struct vm_range *range = NULL;
ucq = container_of(dev, struct tof_utofu_cq, common);
if(!ucq->common.enabled){
@@ -1033,7 +1158,46 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
}
readonly = (req.flags & 1) != 0;
ihk_rwspinlock_read_lock_noirq(&vm->memory_range_lock);
/* Assume smallest page size at first */
start = round_down((uintptr_t)req.va, PAGE_SIZE);
end = round_up((uintptr_t)req.va + req.len, PAGE_SIZE);
/* Find range, straight mapping special lookup */
if (vm->proc->straight_va &&
start >= (unsigned long)vm->proc->straight_va &&
end <= ((unsigned long)vm->proc->straight_va +
vm->proc->straight_len) &&
!(start == (unsigned long)vm->proc->straight_va &&
end == ((unsigned long)vm->proc->straight_va +
vm->proc->straight_len))) {
struct vm_range *range_iter;
range_iter = lookup_process_memory_range(vm, 0, -1);
while (range_iter) {
if (range_iter->straight_start &&
start >= range_iter->straight_start &&
start < (range_iter->straight_start +
(range_iter->end - range_iter->start))) {
range = range_iter;
break;
}
range_iter = next_process_memory_range(vm, range_iter);
}
}
else {
range = lookup_process_memory_range(vm, start, end);
}
if (!range) {
ret = -EINVAL;
goto unlock_out;
}
pgszbits = PAGE_SHIFT;
if (req.flags & TOF_UTOFU_ALLOC_STAG_LPG) {
ret = tof_utofu_get_pagesize_locked((uintptr_t)req.va,
@@ -1109,6 +1273,12 @@ static int tof_utofu_ioctl_alloc_stag(struct tof_utofu_device *dev, unsigned lon
//up(&ucq->ucq_sem);
ihk_mc_spinlock_unlock_noirq(&tofu_tni_cq_lock[ucq->tni][ucq->cqid]);
if (ret == 0) {
tof_utofu_stag_range_insert(vm, range, start, end, ucq, req.stag);
}
unlock_out:
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
if(ret == 0){
@@ -1332,6 +1502,21 @@ static int tof_utofu_ioctl_free_stags(struct tof_utofu_device *dev, unsigned lon
linux_spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
ret = tof_utofu_free_stag(ucq, stags[i]);
linux_spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
{
struct tofu_stag_range *tsr;
tsr = tofu_stag_range_lookup_by_stag(
cpu_local_var(current)->vm, stags[i]);
if (tsr) {
tofu_stag_range_remove(cpu_local_var(current)->vm, tsr);
}
else {
kprintf("%s: no stag range object for %d??\n", __func__, stags[i]);
}
}
if(ret == 0){
stags[i] = -1;
}
@@ -1374,9 +1559,11 @@ static int tof_utofu_ioctl_free_stags(struct tof_utofu_device *dev, unsigned lon
void tof_utofu_release_cq(void *pde_data)
{
struct tof_utofu_cq *ucq;
int stag;
//int stag;
struct tof_utofu_device *dev;
unsigned long irqflags;
struct process_vm *vm = cpu_local_var(current)->vm;
int do_free = 1;
dev = (struct tof_utofu_device *)pde_data;
ucq = container_of(dev, struct tof_utofu_cq, common);
@@ -1384,13 +1571,43 @@ void tof_utofu_release_cq(void *pde_data)
if (!ucq->common.enabled) {
kprintf("%s: UCQ TNI %d, CQ %d is disabled\n",
__func__, ucq->tni, ucq->cqid);
return;
do_free = 0;
}
#if 0
for (stag = 0; stag < TOF_UTOFU_NUM_STAG(ucq->num_stag); stag++) {
linux_spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
tof_utofu_free_stag(ucq, stag);
linux_spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
#endif
{
int i;
struct tofu_stag_range *tsr, *next;
ihk_mc_spinlock_lock_noirq(&vm->tofu_stag_lock);
for (i = 0; i < TOFU_STAG_HASH_SIZE; ++i) {
list_for_each_entry_safe(tsr, next,
&vm->tofu_stag_hash[i], hash) {
if (tsr->ucq != ucq)
continue;
if (do_free) {
dkprintf("%s: removing stag %d for TNI %d CQ %d\n",
__func__, tsr->stag, ucq->tni, ucq->cqid);
linux_spin_lock_irqsave(&ucq->trans.mru_lock, irqflags);
tof_utofu_free_stag(tsr->ucq, tsr->stag);
linux_spin_unlock_irqrestore(&ucq->trans.mru_lock, irqflags);
}
else {
kprintf("%s: WARNING: could not free stag %d for TNI %d CQ %d (UCQ is disabled)\n",
__func__, tsr->stag, ucq->tni, ucq->cqid);
}
__tofu_stag_range_remove(vm, tsr);
}
}
ihk_mc_spinlock_unlock_noirq(&vm->tofu_stag_lock);
}
dkprintf("%s: UCQ (pde: %p) TNI %d, CQ %d\n",
@@ -1829,14 +2046,22 @@ static int tof_utofu_ioctl_enable_bch(struct tof_utofu_device *dev, unsigned lon
}
if (!phys) {
ret = ihk_mc_pt_virt_to_phys(vm->address_space->page_table,
(void *)req.addr, &phys);
size_t psize;
pte_t *ptep;
if (ret) {
raw_rc_output(ret);
ptep = ihk_mc_pt_lookup_fault_pte(cpu_local_var(current)->vm,
(void *)req.addr, 0, NULL, &psize, NULL);
if (unlikely(!ptep || !pte_is_present(ptep))) {
kprintf("%s: ERROR: no valid PTE for 0x%lx\n",
__func__, req.addr);
raw_rc_output(-ENOMEM);
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
return -ENOMEM;
}
phys = (pte_get_phys(ptep) & ~(psize - 1)) +
((uint64_t)req.addr & (psize - 1));
}
ihk_rwspinlock_read_unlock_noirq(&vm->memory_range_lock);
@@ -2068,6 +2293,7 @@ void tof_utofu_init_globals(void)
memset(tofu_scatterlist_cache, 0, sizeof(tofu_scatterlist_cache));
memset(tofu_mbpt_cache, 0, sizeof(tofu_mbpt_cache));
memset(tofu_mbpt_sg_pages_cache, 0, sizeof(tofu_mbpt_sg_pages_cache));
memset(tofu_stag_range_cache, 0, sizeof(tofu_stag_range_cache));
{
int tni, cq;
@@ -2120,6 +2346,24 @@ void tof_utofu_finalize(void)
{
struct tofu_globals *tg = ihk_mc_get_tofu_globals();
/* Could be called from idle.. */
if (cpu_local_var(current)->proc->enable_tofu) {
int i;
struct process_vm *vm = cpu_local_var(current)->vm;
struct tofu_stag_range *tsr, *next;
for (i = 0; i < TOFU_STAG_HASH_SIZE; ++i) {
list_for_each_entry_safe(tsr, next,
&vm->tofu_stag_hash[i], hash) {
dkprintf("%s: WARNING: stray stag %d for TNI %d CQ %d?\n",
__func__, tsr->stag, tsr->ucq->tni, tsr->ucq->cqid);
}
}
kprintf("%s: STAG processing done\n", __func__);
}
ihk_mc_clear_kernel_range((void *)tg->linux_vmalloc_start,
(void *)tg->linux_vmalloc_end);
}

View File

@@ -22,12 +22,18 @@ void panic(const char *msg)
arch_print_stack();
#ifndef ENABLE_FUGAKU_HACKS
/* do not assume anything after this is executed */
arch_cpu_stop();
while (1) {
cpu_halt();
}
#else
while (1) {
cpu_halt_panic();
}
#endif
}
extern void arch_show_interrupt_context(const void*);

View File

@@ -23,7 +23,13 @@ extern int num_processors;
void cpu_enable_interrupt(void);
void cpu_disable_interrupt(void);
#ifdef ENABLE_FUGAKU_HACKS
int cpu_interrupt_disabled(void);
#endif
void cpu_halt(void);
#ifdef ENABLE_FUGAKU_HACKS
void cpu_halt_panic(void);
#endif
void cpu_safe_halt(void);
void cpu_restore_interrupt(unsigned long);
void cpu_pause(void);

View File

@@ -227,9 +227,11 @@ int ihk_mc_get_memory_chunk(int id,
unsigned long *start,
unsigned long *end,
int *numa_id);
#ifdef ENABLE_TOFU
int ihk_mc_get_memory_chunk_dma_addr(int id,
int tni, int cqid,
uintptr_t *dma_addr);
#endif
void remote_flush_tlb_cpumask(struct process_vm *vm,
unsigned long addr, int cpu_id);

View File

@@ -332,6 +332,23 @@ int deferred_zero_at_free = 1;
* of their corresponding memory (i.e., they are on the free memory chunk itself).
*/
#ifdef ENABLE_FUGAKU_HACKS
size_t __count_free_bytes(struct rb_root *root)
{
struct free_chunk *chunk;
struct rb_node *node;
size_t size = 0;
for (node = rb_first(root); node; node = rb_next(node)) {
chunk = container_of(node, struct free_chunk, node);
size += chunk->size;
}
return size;
}
#endif
/*
* Free pages.
* NOTE: locking must be managed by the caller.

View File

@@ -0,0 +1,99 @@
#!/bin/bash
# IHK/McKernel user priviledge reboot script.
# author: Balazs Gerofi <bgerofi@riken.jp>
# Copyright (C) 2019 RIKEN
#
prefix="@prefix@"
BINDIR="${prefix}/bin"
SBINDIR="${prefix}/sbin"
KERNDIR="@MCKERNELDIR@"
mem=""
cpus=""
ikc_map=""
while getopts c:m:r: OPT
do
case ${OPT} in
c) cpus=${OPTARG}
;;
m) mem=${OPTARG}
;;
r) ikc_map=${OPTARG}
;;
\?) exit 1
;;
esac
done
if [ "${ikc_map}" == "" ]; then
# Query IKC map
if ! ${SBINDIR}/ihkosctl 0 get ikc_map > /dev/null; then
echo "error: querying IKC map" >&2
exit 1
fi
ikc_map=`${SBINDIR}/ihkosctl 0 get ikc_map`
fi
# Shutdown OS
if ! ${SBINDIR}/ihkosctl 0 shutdown; then
echo "error: shuting down OS" >&2
exit 1
fi
sleep 2
# Query IHK-SMP resources and reassign
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then
echo "error: querying cpus" >&2
exit 1
fi
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
if [ "${cpus}" == "" ]; then
echo "error: querying CPUs" >&2
exit 1
fi
# Assign CPUs
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then
echo "error: assign CPUs" >&2
exit 1
fi
# Assign memory
for i in `seq 0 15`; do
if ! ${SBINDIR}/ihkosctl 0 assign mem all@${i}; then
echo "error: assign memory" >&2
exit 1
fi
done
if [ "${ikc_map}" != "" ]; then
# Set IKC map
if ! ${SBINDIR}/ihkosctl 0 set ikc_map ${ikc_map}; then
echo "error: setting IKC map" >&2
exit 1
fi
fi
# Load kernel image
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then
echo "error: loading kernel image: ${KERNDIR}/mckernel.img" >&2
exit 1
fi
# Set kernel arguments
if ! ${SBINDIR}/ihkosctl 0 kargs "hidos dump_level=24"; then
echo "error: setting kernel arguments" >&2
exit 1
fi
# Boot OS instance
if ! ${SBINDIR}/ihkosctl 0 boot; then
echo "error: booting" >&2
exit 1
fi

View File

@@ -65,8 +65,9 @@ umask_old=`umask`
idle_halt=""
allow_oversubscribe=""
time_sharing="time_sharing"
force_reserve="no"
while getopts stk:c:m:o:f:r:q:i:d:e:hOT: OPT
while getopts stk:c:m:o:f:r:q:i:d:e:hROT: OPT
do
case ${OPT} in
f) facility=${OPTARG}
@@ -97,6 +98,8 @@ do
;;
O) allow_oversubscribe="allow_oversubscribe"
;;
R) force_reserve="yes"
;;
T)
case ${OPTARG} in
1) time_sharing="time_sharing"
@@ -343,6 +346,17 @@ if ! grep ihk_smp_@ARCH@ /proc/modules &>/dev/null; then
fi
fi
if [ ${force_reserve} == "yes" ]; then
if ! ${SUDO} ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then
echo "error: reserving memory" >&2
error_exit "ihk_smp_loaded"
fi
if ! ${SUDO} ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then
echo "error: reserving CPUs" >&2;
error_exit "mem_reserved"
fi
fi
# Load mcctrl if not loaded
if ! grep mcctrl /proc/modules &>/dev/null; then
if ! taskset -c 0 ${SUDO} insmod ${KMODDIR}/mcctrl.ko 2>/dev/null; then

View File

@@ -18,9 +18,10 @@ KERNDIR="@KERNDIR@"
mem=""
cpus=""
kill_in_use=""
dont_unload="no"
RMMOD_PATH=/sbin/rmmod
while getopts r:k OPT
while getopts r:kR OPT
do
case ${OPT} in
r)
@@ -29,6 +30,9 @@ do
k)
kill_in_use=1
;;
R)
dont_unload="yes"
;;
\?) exit 1
;;
esac
@@ -115,6 +119,10 @@ if ! sudo ${SBINDIR}/ihkconfig 0 release mem "all" > /dev/null; then
exit 1
fi
if [ "${dont_unload}" == "yes" ]; then
exit 0
fi
# Remove delegator if loaded
if grep mcctrl /proc/modules &>/dev/null; then
if ! sudo ${RMMOD_PATH} mcctrl 2>/dev/null; then