Compare commits

...

147 Commits
0.4.0 ... 0.9.0

Author SHA1 Message Date
Balazs Gerofi
a029bcac37 mcreboot-smp-x86: find unused IRQ line and pass start vector to ihk_smp_x86.ko 2015-07-07 09:07:16 +09:00
Balazs Gerofi
bd913c503b sched_setaffinity(): find an actual target core 2015-07-03 11:59:52 +09:00
Tomoki Shirasawa
e838affde8 fix to compile error on CentOS 7 2015-07-02 17:08:35 +09:00
Tomoki Shirasawa
59ee251e1c fix /proc/pid/mem, /proc/pid/status, /proc/pid/cmdline 2015-07-02 00:22:35 +09:00
Tomoki Shirasawa
fa79db3bcc fix out of tree build 2015-07-01 23:58:50 +09:00
Tomoki Shirasawa
b7c5cba361 fix to compile on CentOS 6 2015-07-01 23:57:40 +09:00
Balazs Gerofi
382614ddae pstate: use MSR_NHM_TURBO_RATIO_LIMIT as maximum single-core turbo ratio 2015-07-01 22:18:38 +09:00
Tomoki Shirasawa
aa959c6b34 temporary fix for CentOS 6.x 2015-06-30 18:19:53 +09:00
Tomoki Shirasawa
aabc3d386d support a function to execute mcexec automatically. 2015-06-30 17:47:01 +09:00
Balazs Gerofi
4ebe778ede vm->exiting: deal with exit_group() and concurrent page faults 2015-06-25 16:04:04 +09:00
Balazs Gerofi
fbb776e4fb cpu init: support for no_turbo kernel argument 2015-06-25 12:18:27 +09:00
Balazs Gerofi
41b85281a4 mcctrl: introduction of RUS page hash to handle page refcounts properly 2015-05-31 15:42:39 +09:00
Balazs Gerofi
5532e3c663 mcreboot script for new IHK SMP-x86 I/F 2015-05-26 14:41:28 +09:00
Tomoki Shirasawa
2af2b1205f temporary fix for setfsuid/setfsgid 2015-05-19 06:27:59 +09:00
Tomoki Shirasawa
7d5a68be1b add PID and GID to /proc/pid/status
add /proc/pid/cmdline

refs #445
refs #447
2015-05-18 17:45:37 +09:00
Tomoki Shirasawa
f4162dff52 some signals set siginfo.si_code 2015-04-14 15:11:36 +09:00
NAKAMURA Gou
a0d909af75 add supports for dump analyzer 2015-03-31 12:59:53 +09:00
Tomoki Shirasawa
63669b7f71 support /proc/pid/status for LTP mmap14 2015-03-28 14:20:07 +09:00
NAKAMURA Gou
4946964ed0 update copyright notices 2015-03-27 14:50:09 +09:00
Balazs Gerofi
5f19842a6a support for process_vm_readv()/process_vm_writev() 2015-03-25 19:44:56 +09:00
NAKAMURA Gou
9271d5346d add ACSL annotation to cpu.c 2015-03-25 15:54:08 +09:00
Susumu Komae
7bba05cfa4 Revise use of iov_base in ptrace_read_regset() and ptrace_write_regset(). 2015-03-20 20:33:40 +09:00
Susumu Komae
c2a1f933e8 Set tid (instead of pid) for ptrace event message of
PTRACE_EVENT_{FORK,VFORK,CLONE,VFORKDONE}.
Specify 2nd argument as pid (instead of -1) of function findthread_and_lock(),
to find tracee process in ptrace subroutines.
(gdb testsuite gdb.base/watch_thread_num.exp)
2015-03-20 13:22:00 +09:00
NAKAMURA Gou
055769254d implement mlockall()/munlockall() for LTP syscall 2015-03-19 16:46:31 +09:00
NAKAMURA Gou
786ae83380 add arch-dependent mman.h 2015-03-19 16:36:57 +09:00
NAKAMURA Gou
8c662c83be implement mincore(2) for LTP 2015-03-19 16:32:03 +09:00
NAKAMURA Gou
4698bc40c2 implement System V shared memory for LTP syscalls 2015-03-19 16:21:18 +09:00
Tomoki Shirasawa
f5d935b703 support signalfd4 step1 2015-03-18 17:35:43 +09:00
Tomoki Shirasawa
d53865ac5f change to check sequence of kill syscall, check sig num zero after uid checking 2015-03-18 12:59:05 +09:00
Tomoki Shirasawa
8934eb91a4 kill syscall check uid 2015-03-17 15:04:36 +09:00
Tomoki Shirasawa
ed6d94a358 syscall slowdown when repeat fork/exit/wait (LTP fork13) 2015-03-11 16:09:59 +09:00
NAKAMURA Gou
fa923da0e3 add host PTE cleaning to execve(). refs #377
This removes a cause of LTP gethostid01's wrong behavior.
2015-03-10 18:23:50 +09:00
Balazs Gerofi
1f8265efbc check _PAGE_PWT and _PAGE_PCD directly instead of _PAGE_CACHE_WC 2015-03-07 02:12:48 +09:00
Susumu Komae
b553de7435 supports PTRACE_GETREGSET, PTRACE_SETREGSET.
supports PTRACE_GETFPREGS, PTRACE_SETFPREGS.

refs #421
2015-03-06 19:18:32 +09:00
NAKAMURA Gou
6a82412d64 modify procfs to read inactive thread's files
However, the following files can be read only if the corresponding
thread is in active.
- /proc/<PID>/mem
- /proc/<PID>/task/<TID>/mem

refs #371
2015-03-05 21:41:24 +09:00
NAKAMURA Gou
fa29c34995 expand the size of kstack 12 KiB
When a procfs file belonging to a process which was in PS_TRACED status
was accessed, calling kprintf() from process_procfs_request() caused
stack overrun, and x86_cpu_local_variables was destroyed.
2015-03-05 20:30:33 +09:00
NAKAMURA Gou
f84b5acf79 map entire buffer to read procfs
Reading data from procfs file more than 4096 byte caused a buffer
overrun in McKernel because the buffer was always mapped in McKernel
4096 byte regardless of actual buffer size.
2015-03-05 20:30:33 +09:00
Balazs Gerofi
8b24f60861 Combine range and memobj flags before arch_vrflag_to_ptattr() 2015-03-05 16:40:14 +09:00
Balazs Gerofi
f82bb284bb Make pager and devobj debug messages optional 2015-03-05 16:03:21 +09:00
Balazs Gerofi
bf12a5c45e Introduction of write-combined memory type mappings.
Introduction of VR_WRITE_COMBINED, PTATTR_WRITE_COMBINED and modification
to the memobj's get_page() interface so that Linux communicates back mapping
flags (such as write-combined).
2015-03-05 16:03:21 +09:00
Balazs Gerofi
ea5681232e x86 Page Attribute Table (PAT) MSR support.
Reconfigure PAT to permit write-combining memory type to be assigned
on a page-by-page basis. Changes PWT and PCD bit combinations in page
table entries so that they correspond to the following format:

  PAT
  |PCD
  ||PWT
  |||
  000 WB  Write Back (WB)
  001 WC  Write Combining (WC)
  010 UC- Uncached (UC-)
  011 UC  Uncacheable (UC)
2015-03-05 16:03:20 +09:00
Tomoki Shirasawa
e6011be1af create area for to save fp regs
refs #421
2015-03-05 12:18:46 +09:00
Tomoki Shirasawa
9946ccd6b1 pipe free fork is implemented (LTP fork09) 2015-03-04 17:40:58 +09:00
NAKAMURA Gou
daec7de828 implement /proc/stat
only for sysconf(_SC_NPROCESSORS_ONLN).  This enables Intel OpenMP
runtime to arrange threads with regard for CPU topology.

refs #291
2015-03-04 15:46:53 +09:00
NAKAMURA Gou
9ad48083aa make PTRACE_POKETEXT use patch_process_vm() 2015-03-04 12:04:54 +09:00
NAKAMURA Gou
2eac58aab3 add patch_process_vm(). (in progress)
This function patches specified range of specified user space even if
the range is not writable.

refs #401
2015-03-04 12:00:51 +09:00
NAKAMURA Gou
22d8d169b6 change copy-out routines
- restrict copy_to_user() to only current process.
- add write_process_vm() to write specified process space.
2015-03-04 11:29:16 +09:00
NAKAMURA Gou
8db54c2637 make GPE on CPL0 cause panic 2015-03-04 11:29:16 +09:00
NAKAMURA Gou
063fa963c3 change copy-in routines
- restrict copy_from_user() to only current process.
- add read_process_vm() to read specified process space.
2015-03-04 11:29:15 +09:00
NAKAMURA Gou
a6488adcc1 change parameter type of ihk_mc_pt_virt_to_phys()
- add type qualifier 'const' to virtual address parameter.
  that is, change parameter 'virt' from       'void *'
                                     to 'const void *'
2015-03-04 11:29:15 +09:00
NAKAMURA Gou
2239a6b09b modify page_fault_process()
- change its argument from 'struct process *'
                        to 'struct process_vm *'.
- change its name from 'page_fault_process()'
                    to 'page_fault_process_vm()'.
- allow to resolve a fault on the process_vm of another process.
2015-03-04 11:29:15 +09:00
Susumu Komae
8c179d506a support PTRACE_ARCH_PRCTL.
refs #420
2015-03-03 14:22:57 +09:00
Susumu Komae
377341ce5f change debug output in debug/int3 handler, for struct x86_user_context. 2015-03-03 14:06:30 +09:00
Tomoki Shirasawa
8caeba7cba support PTRACE_GETSIGINFO and PTRACE_SETSIGINFO
refs #422
2015-03-03 09:54:57 +09:00
NAKAMURA Gou
1d2f5d9893 set is_gpr_valid to initial user context 2015-02-27 14:47:43 +09:00
Balazs Gerofi
e4f47df3c3 initialize pstate, turbo mode and power/performace bias MSR registers
MSR_IA32_MISC_ENABLE, MSR_IA32_PERF_CTL and MSR_IA32_ENERGY_PERF_BIAS
are responsible for performance settings, this change enables McKernel
to perform on par with Linux when running the fwq benchmark.
2015-02-27 11:29:11 +09:00
NAKAMURA Gou
4751055ee4 make ptrace(2) use lookup_user_context() 2015-02-26 17:43:10 +09:00
NAKAMURA Gou
305ebfed0e add lookup_user_context(). refs #420 2015-02-26 17:43:10 +09:00
NAKAMURA Gou
b66b950129 add x86_sregs into x86_user_context
x86_sregs contains the registers which are included in user_regs_struct
but not included in x86_basic_regs.
2015-02-26 17:43:10 +09:00
NAKAMURA Gou
4aa8ba2eef sort x86_basic_regs into user_regs_struct's order 2015-02-26 17:43:10 +09:00
NAKAMURA Gou
fab2c2aa97 wrap x86_regs with x86_user_context
and, rename x86_regs to x86_basic_regs.
2015-02-26 17:43:10 +09:00
Susumu Komae
026164eda4 fix PTRACE_ATTACH, PTRACE_DETACH, detach at tracer process terminated.
tracee process may have no parent, increment/decrement refcount.

refs #374
refs #280
2015-02-25 21:09:44 +09:00
Tomoki Shirasawa
e91d1e5b7b stack of signal handler is not 16 byte align
refs #429
2015-02-24 17:20:52 +09:00
Tomoki Shirasawa
73743eeeb0 temporary fix for waiting tracee blocked 2015-02-24 15:20:32 +09:00
Tomoki Shirasawa
c1c1fd578a modify file path of /proc files
LTP getsid02 mount06 msgctl08 msgget03 pause02 pipe07 readhead02
    swapon03 sysconf01 wait402
2015-02-24 11:33:49 +09:00
Tomoki Shirasawa
f35cc66d18 delete unused argument "ctx" from do_syscall
support waitid option "WNOWAIT"
2015-02-23 17:14:14 +09:00
Tomoki Shirasawa
d9cf1d49b1 support waitid
send SIGCHLD to parent when SIGSTOP or SIGCONT received

refs #425
refs #283
2015-02-22 20:05:30 +09:00
Balazs Gerofi
3d426ada01 use remap_pfn_range() in rus_vm_fault() for kernel versions newer than 3.0 2015-02-19 13:52:55 -08:00
Balazs Gerofi
0307f6a6cc impementation of sched_{setparam, getparam, setscheduler, getscheduler, get_priority_min, get_priority_max, rr_get_interval} system calls 2015-02-19 11:46:03 -08:00
NAKAMURA Gou
0dee04f16b move parse_args() to after arch_init()
In attached-mic, bootparam is not mapped until arch_init() is finished.
In builtin-mic and builtin-x86, virtual address of bootparam is changed
in arch_init().
2015-02-18 20:49:46 +09:00
NAKAMURA Gou
0e98e87b95 change type of kprintf_lock() to "unsigned long"
to match type of ihk_mc_spinlock_lock().
2015-02-18 20:49:46 +09:00
NAKAMURA Gou
d35e60c1a3 add init_boot_processor_local() for arch_start() 2015-02-18 20:49:46 +09:00
NAKAMURA Gou
037e17c4ed fix parsing of "osnum=" kargs 2015-02-18 16:44:14 +09:00
Susumu Komae
2baf274dac fix PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK and PTRACE_O_TRACECLONE.
allocate debug registers area, for new process.
(gdb testsuite gdb.base/inferior-died.exp)

refs #266
refs #372
2015-02-18 16:20:23 +09:00
Tomoki Shirasawa
3b04043f2a change to throw signal SIGILL to SIGSEGV when GPE 2015-02-18 14:54:49 +09:00
Tomoki Shirasawa
c0edb6fe6f add new cpu state CPU_STATUS_RESERVED 2015-02-18 13:46:08 +09:00
NAKAMURA Gou
bb137bc9bb make brk region just follow data region
This effectively reverts commit d70dd2338c.
2015-02-18 11:52:15 +09:00
NAKAMURA Gou
16af976a71 support msync() system call. refs #382
Msync(2) of this version writes only the pages which the calling process
modified. Modifications of the other processes are not written.
2015-02-18 11:52:15 +09:00
Balazs Gerofi
6485578a7f sched_yield implementation 2015-02-17 16:20:51 -08:00
Tomoki Shirasawa
d2d0fc6721 The mcexec command became executable from a command-line at the same time 2015-02-17 18:33:38 +09:00
Tomoki Shirasawa
9574a28a5f The same CPU is assigned to a different process.
refs #423
2015-02-17 18:27:46 +09:00
Susumu Komae
dbe4ec3247 support PTRACE_O_TRACECLONE and PTRACE_O_TRACEEXEC. 2015-02-17 17:00:48 +09:00
Susumu Komae
99debc548f detach traced process, when tracer process terminate.
some fixes on PTRACE_DETACH.

refs #374
refs #280
2015-02-17 16:58:29 +09:00
Susumu Komae
fa15f6b106 support PTRACE_SYSCALL.
support PTRACE_O_TRACESYSGOOD.
ptrace_report_exec() calls ptrace_report_signal().

refs #265
2015-02-17 16:56:27 +09:00
Susumu Komae
8568a73f33 traced process should stop by any signal except for SIGKILL,
even if SIG_IGN.  (LTP ptrace01)
2015-02-17 16:51:29 +09:00
Tomoki Shirasawa
8b57b2ee57 change signal handling at mcexec 2015-02-15 17:54:11 +09:00
Tomoki Shirasawa
9a36e7b84a restart waitpid if it returns with EINTR. 2015-02-13 16:00:40 +09:00
Tomoki Shirasawa
d998691425 fix setpgid(0, 0) 2015-02-13 13:51:00 +09:00
Dave van Dresser
8cdf70c500 Enable AVX extensions for processors that support it. 2015-02-12 17:51:50 -08:00
Tomoki Shirasawa
0e0bc548f6 fix mcexec SIG_IGN 2015-02-12 19:02:58 +09:00
NAKAMURA Gou
d21ae28843 add dummy NUMA system calls. refs #405
ENOSYS system call handlers for the following.
- get_mempolicy()
- mbind()
- migrate_pages()
- move_pages()
- set_mempolicy()
2015-02-10 21:16:19 +09:00
NAKAMURA Gou
a4a806bef7 support vsyscall_getcpu() vsyscall. refs #385
This version simply calls getcpu() system call, so that it's not fast.
2015-02-10 18:35:48 +09:00
NAKAMURA Gou
d30d8fe71c support getcpu() system call. refs #385
It appeared on Linux(x86) in kernel 3.1.
2015-02-10 18:35:41 +09:00
Balazs Gerofi
a5bdd41c3d procfs: check parent entry to avoid page fault in procfs_exit() 2015-01-31 22:27:13 -08:00
Susumu Komae
5f5ab34559 support PTRACE_ATTACH.
fix PTRACE_TRACEME, PTRACE_DETACH.
2015-01-30 21:02:01 +09:00
Tomoki Shirasawa
b26fa4e87c wrong send signal to sender process when kill other process group (LTP kill10)
refs #404
2015-01-29 16:14:31 +09:00
Susumu Komae
bd5f43b119 support PTRACE_SINGLESTEP.
support debug/int3 exception.
2015-01-29 15:48:05 +09:00
Susumu Komae
f97f8dbab3 support PTRACE_PEEKTEXT and PTRACE_PEEKDATA.
support PTRACE_POKETEXT and PTRACE_POKEDATA.
  now, force write anywhere.
  read-only page must copy-on-write.
2015-01-29 15:02:15 +09:00
Susumu Komae
e30946f1f0 fix PTRACE_CONT may cause error.
refs #369
2015-01-29 14:10:31 +09:00
Susumu Komae
c3ade864d9 fix PTRACE_PEEKUSER, PTRACE_POKEUSER, PTRACE_GETREGS.
support PTRACE_SETREGS.
  In struct process, add 'unsigned long *ptrace_debugreg', instead of 'struct user *userp'.
  debug registers are read/written from/to ptrace_debugreg, save/restore in schedule().
  most general registers are proc->uctx.
  fs_base is proc->thread.tlsblock_base.
  gs_base,ds,es,fs,gs and orig_rax are uncompleted.
  other members in 'struct user' are ignored, same as Linux implementation.

refs #257
refs #373
refs #263
2015-01-29 14:08:38 +09:00
bgerofi@riken.jp
9c35935671 mcexec: fix memory allocation bug that crashes CentOS7 glibc 2015-01-27 16:55:30 +09:00
Balazs Gerofi
ed33ee65b2 CentOS7 spinlock, procfs and vm_munmap support (i.e., Linux kernel 3.10) 2015-01-27 16:55:28 +09:00
Tomoki Shirasawa
d04b5a09bd PTRACE_KILL omit sched_wakeup_process return
refs #369
2015-01-27 10:55:49 +09:00
Tomoki Shirasawa
08cc31f9bf support setrlimits/getrlimits, however this fix is these syscalls only.
checking resource process must implement it separately.

refs #330
2015-01-27 10:35:58 +09:00
Tomoki Shirasawa
cf2166f830 function enter_user_mode calls check_signal.
refs #392
2015-01-16 14:28:28 +09:00
Susumu Komae
765de119dc support PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK, PTRACE_O_TRACEVFORKDONE.
to start with a SIGSTOP, do not set proc->ftn->status to PS_RUNNING in __runq_add_proc().
  change vfork() set CLONE_VFORK.

refs #266
refs #267
refs #372

support PTRACE_GETEVENTMSG.
  to store ptrace event, add 'unsigned long ptrace_eventmsg;' member in struct fork_tree_node.

refs #273
2015-01-14 10:43:18 +09:00
Susumu Komae
d46110b4d9 support PTRACE_DETACH.
change getppid() to use proc->ftn->ppid_parent->pid, for ptraced process.

refs #280
2015-01-08 12:39:52 +09:00
Susumu Komae
74f0aec478 skip copy_to_user() when r->ret is negative error number in mckernel_procfs_read().
refs #370
2015-01-08 12:38:06 +09:00
Tomoki Shirasawa
e3eb7e68bc Fix need to modify ihk/cokernel/Makefile when a file has been added under mckernel/arch (Bug#365) 2014-12-26 16:05:23 +09:00
Tomoki Shirasawa
912b8a886c do_kill distinguish PTRACE_CONT from kill. 2014-12-26 15:23:11 +09:00
Balazs Gerofi bgerofi@riken.jp
e25d35a191 ihk_mc_init_ap(): cosmetics for reporting IKC, trampoline info 2014-12-25 11:05:52 +09:00
bgerofi@riken.jp
a9aad67541 IHK-SMP: boot scripts placeholder 2014-12-25 11:03:07 +09:00
Balazs Gerofi
cd6e663f48 handle VM_RESERVED (non-existing since Linux 3.7.0) and do_mmap_pgoff() (unexported since Linux 3.5.0) in mcctrl's syscall.c 2014-12-25 11:03:05 +09:00
Balazs Gerofi
5f095b3952 McKernel IHK SMP-x86 support (build system and config files) 2014-12-25 11:03:04 +09:00
bgerofi@riken.jp
811a275176 build scripts: support for separate build and source directories 2014-12-25 11:03:03 +09:00
bgerofi@riken.jp
b388f59ebd ihk_ikc_irq and ihk_ikc_irq_apicid 2014-12-25 11:03:01 +09:00
bgerofi@riken.jp
ff47261337 receive trampoline addr via parameter of arch_start() 2014-12-25 11:03:00 +09:00
Naoki Hamada
a91bf9a13d ptrace: Make PTRACE_CONT/KILL debug print separated. 2014-12-24 12:39:29 +09:00
Naoki Hamada
fcfa94cea1 ptrace: Add PTRACE_O_TRACEFORK (fake) support. 2014-12-24 12:39:13 +09:00
NAKAMURA Gou
55f7ee1526 fix a warning
| mckernel/kernel/../arch/x86/kernel/memory.c: In function '__set_pt_page':
| mckernel/kernel/../arch/x86/kernel/memory.c:367:
|     warning: 'init_pt_lock_flags' may be used uninitialized in this function
2014-12-22 17:03:32 +09:00
NAKAMURA Gou
b1b6fab7b8 fix a warning
| mckernel/kernel/host.c: In function 'syscall_packet_handler':
| mckernel/kernel/host.c:504:
|     warning: implicit declaration of function 'find_command_line'
2014-12-22 16:58:08 +09:00
NAKAMURA Gou
391886a6f1 fix a warning
| mckernel/kernel/syscall.c: In function 'do_syscall':
| mckernel/kernel/syscall.c:187:
|     warning: 'irqstate' may be used uninitialized in this function
2014-12-22 16:58:07 +09:00
NAKAMURA Gou
c810afe224 fix a warning
| mckernel/kernel/syscall.c: In function 'sys_madvise':
| mckernel/kernel/syscall.c:2108:
|     warning: 'range' may be used uninitialized in this function
2014-12-22 16:58:06 +09:00
NAKAMURA Gou
5566ed1a63 fix a warning
| mckernel/executer/kernel/control.c: In function ‘release_handler’:
| mckernel/executer/kernel/control.c:264: warning: unused variable ‘c’
2014-12-22 16:58:05 +09:00
NAKAMURA Gou
f0f91d2246 fix a warning
| mckernel/executer/kernel/control.c: In function ‘mcexec_debug_log’:
| mckernel/executer/kernel/control.c:252: warning: unused variable ‘c’
2014-12-22 16:58:04 +09:00
NAKAMURA Gou
0942bf0ce0 make dkprintf() evaluate its parameters always
Parameters of dkprintf() should be evaluated even if dkprintf() is
disabled.  Because this enables to find expression of parameter obsolete
and to avoid unnecessary compiler warnings such as "unused variable".
2014-12-22 16:58:03 +09:00
NAKAMURA Gou
9c94e90007 use ftn->tid instead of proc->tid 2014-12-22 16:58:02 +09:00
NAKAMURA Gou
a6ac906105 use ftn->pid instead of proc->pid 2014-12-22 16:58:01 +09:00
bgerofi@riken.jp
d4ba4dc8b3 introduction of mckernel_procfs_file_operations; fix /proc/self path resolution;
implementation of /proc/self/pagemap (LTP mmap12)
2014-12-15 12:46:05 +09:00
Tomoki Shirasawa
815d907ca4 setpgid return -EACCES when the child process had already performed an execve (LTP setpgid03) 2014-12-09 14:01:20 +09:00
Balazs Gerofi bgerofi@riken.jp
3c24315f91 support for /proc/mcos%d/PID/maps (without file info) (LTP mlock03) 2014-12-05 16:29:20 +09:00
Balazs Gerofi bgerofi@riken.jp
25f108bf78 mckernel_procfs_read(): fix buffer allocation, offset check and return code 2014-12-05 16:27:48 +09:00
Balazs Gerofi bgerofi@riken.jp
cc9d30efbf do_signal(): support for SIGSYS
as of POSIX.1-2001:
Signal  Value       Action  Comment
---------------------------------------------------
SIGSYS  12,31,12    Core    Bad argument to routine
2014-12-04 18:10:10 +09:00
Balazs Gerofi bgerofi@riken.jp
af83f1be64 rlimit(RLIMIT_NOFILE): return one less to make sure sync pipe can be created (LTP fork09) 2014-12-04 17:40:00 +09:00
bgerofi@riken.jp
b2cab453f1 clone(): do not allow setting CLONE_THREAD and CLONE_VM separately
XXX: When CLONE_VM is set but CLONE_THREAD is not the new thread is
meant to have its own thread group, i.e., when calling exit_group()
the cloner thread wouldn't be killed. However, this is a problem on
the Linux side because we do not invoke clone in mcexec when threads
are created. Thus, currently no support for this combination is
provided.
2014-12-04 16:55:18 +09:00
bgerofi@riken.jp
8909597499 clone(): support for handling CLONE_SIGHAND and CLONE_VM flags separately 2014-12-04 16:55:17 +09:00
bgerofi@riken.jp
86f2a9067b getppid() implementation 2014-12-04 16:55:17 +09:00
Tomoki Shirasawa
a5889fb5df sigaction check signal number (LTP sigaction02) 2014-12-04 11:31:50 +09:00
Tomoki Shirasawa
f1a86cfbd3 when host mcexec down, syscall is hung up 2014-12-04 11:17:29 +09:00
Balazs Gerofi bgerofi@riken.jp
c1cf630a94 mcexec: store full path to executable
required so that a forked process can obtain exec reference in the
Linux kernel even if executable was specified with relative path
and fork was called after changing the current working directory
2014-12-03 15:14:26 +09:00
Tomoki Shirasawa
8f30e16976 when mcexec is killed by SIGKILL, terminate mckernel process (BUG#259) 2014-11-27 16:13:52 +09:00
Masamichi Takagi
58e2e0a246 Use pidof in mcreboot script 2014-11-23 17:54:14 +09:00
Masamichi Takagi
ea02628f2b Add reboot and shutdown script for builtin-x86
It decides the number of cores for McKernel by looking into the
"SHIMOS: CPU Status:" line of dmesg. It sets the amount of memory for
McKernel to one urth of the total memory obtained by "free -g".
2014-11-13 20:06:29 +09:00
Balazs Gerofi
89acf5c5d6 support for AT_RANDOM auxiliary entry on the process stack (needed for _dl_random in glibc) 2014-11-11 08:48:27 +09:00
Balazs Gerofi
ac8e2a0c40 handle VM_RESERVED (non-existing since Linux 3.7.0) and do_mmap_pgoff() (unexported since Linux 3.5.0) in mcctrl's syscall.c 2014-11-11 08:42:07 +09:00
Tomoki Shirasawa
ab7aa3354f repair signal implementation.
- Don't intrrupt syscall with the ignored signal.
2014-11-07 07:55:30 +09:00
Tomoki Shirasawa
c4e0b84792 repair signal implementation.
- can not interrupt syscall
- can not recieve SIGKILL
2014-10-31 16:34:59 +09:00
61 changed files with 9065 additions and 2438 deletions

View File

@@ -6,7 +6,7 @@ all::
@(cd executer/kernel; make modules)
@(cd executer/user; make)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic) \
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
(cd kernel; make) \
;; \
*) \
@@ -19,7 +19,7 @@ install::
@(cd executer/kernel; make install)
@(cd executer/user; make install)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic) \
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
(cd kernel; make install) \
;; \
*) \
@@ -27,19 +27,38 @@ install::
exit 1 \
;; \
esac
if [ "$(TARGET)" = attached-mic ]; then \
@case "$(TARGET)" in \
attached-mic) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-attached-mic.sh $(SBINDIR)/mcreboot; \
install -m 755 arch/x86/tools/mcshutdown-attached-mic.sh $(SBINDIR)/mcshutdown; \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
fi
;; \
builtin-x86) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-builtin-x86.sh $(SBINDIR)/mcreboot; \
install -m 755 arch/x86/tools/mcshutdown-builtin-x86.sh $(SBINDIR)/mcshutdown; \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
;; \
smp-x86) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot; \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
clean::
@(cd executer/kernel; make clean)
@(cd executer/user; make clean)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic) \
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
(cd kernel; make clean) \
;; \
*) \

View File

@@ -0,0 +1,2 @@
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
IHK_OBJS += perfctr.o syscall.o vsyscall.o

View File

@@ -5,13 +5,18 @@
* Control CPU.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
* 2015/02/26: bgerofi - set pstate, turbo mode and power/perf bias MSRs
* 2015/02/12: Dave - enable AVX if supported
*/
#include <ihk/cpu.h>
#include <ihk/debug.h>
#include <ihk/mm.h>
#include <types.h>
#include <errno.h>
#include <list.h>
@@ -22,6 +27,7 @@
#include <march.h>
#include <signal.h>
#include <process.h>
#include <cls.h>
#define LAPIC_ID 0x020
#define LAPIC_TIMER 0x320
@@ -49,7 +55,7 @@
#ifdef DEBUG_PRINT_CPU
#define dkprintf kprintf
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
@@ -106,7 +112,12 @@ void reload_idt(void)
}
static struct list_head handlers[256 - 32];
extern char nmi[];
extern char page_fault[], general_protection_exception[];
extern char debug_exception[], int3_exception[];
uint64_t boot_pat_state = 0;
int no_turbo = 0; /* May be updated by early parsing of kargs */
static void init_idt(void)
{
@@ -122,15 +133,20 @@ static void init_idt(void)
set_idt_entry(i, generic_common_handlers[i]);
}
set_idt_entry(2, (uintptr_t)nmi);
set_idt_entry(13, (unsigned long)general_protection_exception);
set_idt_entry(14, (unsigned long)page_fault);
set_idt_entry_trap_gate(1, (unsigned long)debug_exception);
set_idt_entry_trap_gate(3, (unsigned long)int3_exception);
reload_idt();
}
void init_fpu(void)
{
unsigned long reg;
unsigned long cpuid01_ecx;
asm volatile("movq %%cr0, %0" : "=r"(reg));
/* Unset EM and TS flag. */
@@ -140,10 +156,36 @@ void init_fpu(void)
asm volatile("movq %0, %%cr0" : : "r"(reg));
#ifdef ENABLE_SSE
asm volatile("cpuid" : "=c" (cpuid01_ecx) : "a" (0x1) : "%rbx", "%rdx");
asm volatile("movq %%cr4, %0" : "=r"(reg));
/* Set OSFXSR flag. */
reg |= (1 << 9);
/* Cr4 flags:
OSFXSR[b9] - enables SSE instructions
OSXMMEXCPT[b10] - generate SIMD FP exception instead of invalid op
OSXSAVE[b18] - enables access to xcr0
CPUID.01H:ECX flags:
XSAVE[b26] - verify existence of extended crs/XSAVE
AVX[b28] - verify existence of AVX instructions
*/
reg |= ((1 << 9) | (1 << 10));
if(cpuid01_ecx & (1 << 26)) {
/* XSAVE set, enable access to xcr0 */
reg |= (1 << 18);
}
asm volatile("movq %0, %%cr4" : : "r"(reg));
kprintf("init_fpu(): SSE init: CR4 = 0x%016lX; ", reg);
/* Set xcr0[2:1] to enable avx ops */
if(cpuid01_ecx & (1 << 28)) {
reg = xgetbv(0);
reg |= 0x6;
xsetbv(0, reg);
}
kprintf("XCR0 = 0x%016lX\n", reg);
#else
kprintf("init_fpu(): SSE not enabled\n");
#endif
asm volatile("finit");
@@ -203,6 +245,153 @@ void lapic_icr_write(unsigned int h, unsigned int l)
lapic_write(LAPIC_ICR0, l);
}
void print_msr(int idx)
{
int bit;
unsigned long long val;
val = rdmsr(idx);
__kprintf("MSR 0x%x val (dec): %llu\n", idx, val);
__kprintf("MSR 0x%x val (hex): 0x%llx\n", idx, val);
__kprintf(" ");
for (bit = 63; bit >= 0; --bit) {
__kprintf("%3d", bit);
}
__kprintf("\n");
__kprintf("MSR 0x%x val (bin):", idx);
for (bit = 63; bit >= 0; --bit) {
__kprintf("%3d", (val & ((unsigned long)1 << bit)) ? 1 : 0);
}
__kprintf("\n");
}
void init_pstate_and_turbo(void)
{
uint64_t value;
uint64_t eax, ecx;
asm volatile("cpuid" : "=a" (eax), "=c" (ecx) : "a" (0x6) : "%rbx", "%rdx");
/* Query and set max pstate value:
*
* IA32_PERF_CTL (0x199H) bit 15:0:
* Target performance State Value
*
* The base operating ratio can be read
* from MSR_PLATFORM_INFO[15:8].
*/
value = rdmsr(MSR_PLATFORM_INFO);
value &= 0xFF00;
/* Turbo boost setting:
* Bit 1 of EAX in Leaf 06H (i.e. CPUID.06H:EAX[1]) indicates opportunistic
* processor performance operation, such as IDA, has been enabled by BIOS.
*
* IA32_PERF_CTL (0x199H) bit 32: IDA (i.e., turbo boost) Engage. (R/W)
* When set to 1: disengages IDA
* When set to 0: enables IDA
*/
if ((eax & (1 << 1))) {
if (!no_turbo) {
uint64_t turbo_value;
turbo_value = rdmsr(MSR_NHM_TURBO_RATIO_LIMIT);
turbo_value &= 0xFF;
value = turbo_value << 8;
/* Enable turbo boost */
value &= ~((uint64_t)1 << 32);
}
/* Turbo boost feature is supported, but requested to be turned off */
else {
/* Disable turbo boost */
value |= (uint64_t)1 << 32;
}
}
wrmsr(MSR_IA32_PERF_CTL, value);
/* IA32_ENERGY_PERF_BIAS (0x1B0H) bit 3:0:
* (The processor supports this capability if CPUID.06H:ECX.SETBH[bit 3] is set.)
* Power Policy Preference:
* 0 indicates preference to highest performance.
* 15 indicates preference to maximize energy saving.
*
* Set energy/perf bias to high performance
*/
if (ecx & (1 << 3)) {
wrmsr(MSR_IA32_ENERGY_PERF_BIAS, 0);
}
//print_msr(MSR_IA32_MISC_ENABLE);
//print_msr(MSR_IA32_PERF_CTL);
//print_msr(MSR_IA32_ENERGY_PERF_BIAS);
}
enum {
PAT_UC = 0, /* uncached */
PAT_WC = 1, /* Write combining */
PAT_WT = 4, /* Write Through */
PAT_WP = 5, /* Write Protected */
PAT_WB = 6, /* Write Back (default) */
PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
};
#define PAT(x, y) ((uint64_t)PAT_ ## y << ((x)*8))
void init_pat(void)
{
uint64_t pat;
uint64_t edx;
/*
* An operating system or executive can detect the availability of the
* PAT by executing the CPUID instruction with a value of 1 in the EAX
* register. Support for the PAT is indicated by the PAT flag (bit 16
* of the values returned to EDX register). If the PAT is supported,
* the operating system or executive can use the IA32_PAT MSR to program
* the PAT. When memory types have been assigned to entries in the PAT,
* software can then use of the PAT-index bit (PAT) in the page-table and
* page-directory entries along with the PCD and PWT bits to assign memory
* types from the PAT to individual pages.
*/
asm volatile("cpuid" : "=d" (edx) : "a" (0x1) : "%rbx", "%rcx");
if (!(edx & ((uint64_t)1 << 16))) {
kprintf("PAT not supported.\n");
return;
}
/* Set PWT to Write-Combining. All other bits stay the same */
/* (Based on Linux' settings)
*
* PTE encoding used in Linux:
* PAT
* |PCD
* ||PWT
* |||
* 000 WB _PAGE_CACHE_WB
* 001 WC _PAGE_CACHE_WC
* 010 UC- _PAGE_CACHE_UC_MINUS
* 011 UC _PAGE_CACHE_UC
* PAT bit unused
*/
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
/* Boot CPU check */
if (!boot_pat_state)
boot_pat_state = rdmsr(MSR_IA32_CR_PAT);
wrmsr(MSR_IA32_CR_PAT, pat);
kprintf("PAT support detected and reconfigured.\n");
}
void init_lapic(void)
{
unsigned long baseaddr;
@@ -262,16 +451,23 @@ static void init_smp_processor(void)
static char *trampoline_va, *first_page_va;
/*@
@ assigns torampoline_va;
@ assigns first_page_va;
@*/
void ihk_mc_init_ap(void)
{
struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info();
trampoline_va = map_fixed_area(AP_TRAMPOLINE, AP_TRAMPOLINE_SIZE,
0);
trampoline_va = map_fixed_area(ap_trampoline, AP_TRAMPOLINE_SIZE, 0);
kprintf("Trampoline area: 0x%lx \n", ap_trampoline);
first_page_va = map_fixed_area(0, PAGE_SIZE, 0);
kprintf("# of cpus : %d\n", cpu_info->ncpus);
init_processors_local(cpu_info->ncpus);
kprintf("IKC IRQ vector: %d, IKC target CPU APIC: %d\n",
ihk_ikc_irq, ihk_ikc_irq_apicid);
/* Do initialization for THIS cpu (BSP) */
assign_processor_id();
@@ -355,6 +551,8 @@ void init_cpu(void)
init_lapic();
init_syscall();
x86_init_perfctr();
init_pstate_and_turbo();
init_pat();
}
void setup_x86(void)
@@ -409,29 +607,63 @@ void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long rc, void *regs);
extern void tlb_flush_handler(int vector);
void handle_interrupt(int vector, struct x86_regs *regs)
void handle_interrupt(int vector, struct x86_user_context *regs)
{
struct ihk_mc_interrupt_handler *h;
lapic_ack();
dkprintf("CPU[%d] got interrupt, vector: %d, RIP: 0x%lX\n",
ihk_mc_get_processor_id(), vector, regs->rip);
ihk_mc_get_processor_id(), vector, regs->gpr.rip);
if (vector < 0 || vector > 255) {
panic("Invalid interrupt vector.");
}
else if (vector < 32) {
if (vector == 8 ||
(vector >= 10 && vector <= 15) || vector == 17) {
struct siginfo info;
switch(vector){
case 0:
memset(&info, '\0', sizeof info);
info.si_signo = SIGFPE;
info.si_code = FPE_INTDIV;
info._sifields._sigfault.si_addr = (void *)regs->gpr.rip;
set_signal(SIGFPE, regs, &info);
break;
case 9:
case 16:
case 19:
set_signal(SIGFPE, regs, NULL);
break;
case 4:
case 5:
set_signal(SIGSEGV, regs, NULL);
break;
case 6:
memset(&info, '\0', sizeof info);
info.si_signo = SIGILL;
info.si_code = ILL_ILLOPN;
info._sifields._sigfault.si_addr = (void *)regs->gpr.rip;
set_signal(SIGILL, regs, &info);
break;
case 10:
set_signal(SIGSEGV, regs, NULL);
break;
case 11:
case 12:
set_signal(SIGBUS, regs, NULL);
break;
case 17:
memset(&info, '\0', sizeof info);
info.si_signo = SIGBUS;
info.si_code = BUS_ADRALN;
set_signal(SIGBUS, regs, &info);
break;
default:
kprintf("Exception %d, rflags: 0x%lX CS: 0x%lX, RIP: 0x%lX\n",
vector, regs->rflags, regs->cs, regs->rip);
} else {
kprintf("Exception %d, rflags: 0x%lX CS: 0x%lX, RIP: 0x%lX\n",
vector, regs->rflags, regs->cs, regs->rip);
vector, regs->gpr.rflags, regs->gpr.cs, regs->gpr.rip);
arch_show_interrupt_context(regs);
panic("Unhandled exception");
}
arch_show_interrupt_context(regs);
panic("Unhandled exception");
}
else if (vector >= IHK_TLB_FLUSH_IRQ_VECTOR_START &&
vector < IHK_TLB_FLUSH_IRQ_VECTOR_END) {
@@ -450,20 +682,64 @@ void handle_interrupt(int vector, struct x86_regs *regs)
check_need_resched();
}
void gpe_handler(struct x86_regs *regs)
void gpe_handler(struct x86_user_context *regs)
{
struct siginfo info;
kprintf("General protection fault (err: %lx, %lx:%lx)\n",
regs->error, regs->cs, regs->rip);
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
arch_show_interrupt_context(regs);
memset(&info, '\0', sizeof info);
set_signal(SIGILL, regs, &info);
if ((regs->gpr.cs & 3) == 0) {
panic("gpe_handler");
}
set_signal(SIGSEGV, regs, NULL);
check_signal(0, regs);
check_need_resched();
// panic("GPF");
}
void debug_handler(struct x86_user_context *regs)
{
unsigned long db6;
int si_code = 0;
struct siginfo info;
#ifdef DEBUG_PRINT_CPU
kprintf("debug exception (err: %lx, %lx:%lx)\n",
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
arch_show_interrupt_context(regs);
#endif
asm("mov %%db6, %0" :"=r" (db6));
if (db6 & DB6_BS) {
regs->gpr.rflags &= ~RFLAGS_TF;
si_code = TRAP_TRACE;
} else if (db6 & (DB6_B3|DB6_B2|DB6_B1|DB6_B0)) {
si_code = TRAP_HWBKPT;
}
memset(&info, '\0', sizeof info);
info.si_code = si_code;
set_signal(SIGTRAP, regs, &info);
check_signal(0, regs);
check_need_resched();
}
void int3_handler(struct x86_user_context *regs)
{
struct siginfo info;
#ifdef DEBUG_PRINT_CPU
kprintf("int3 exception (err: %lx, %lx:%lx)\n",
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
arch_show_interrupt_context(regs);
#endif
memset(&info, '\0', sizeof info);
info.si_code = TRAP_BRKPT;
set_signal(SIGTRAP, regs, &info);
check_signal(0, regs);
check_need_resched();
}
void x86_issue_ipi(unsigned int apicid, unsigned int low)
{
lapic_icr_write(apicid << LAPIC_ICR_ID_SHIFT, low);
@@ -524,31 +800,65 @@ void cpu_halt(void)
asm volatile("hlt");
}
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_safe_halt(void)
{
asm volatile("sti; hlt");
}
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_enable_interrupt(void)
{
asm volatile("sti");
}
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled > 0;
@*/
void cpu_disable_interrupt(void)
{
asm volatile("cli");
}
/*@
@ assigns \nothing;
@ behavior to_enabled:
@ assumes flags & RFLAGS_IF;
@ ensures \interrupt_disabled == 0;
@ behavior to_disabled:
@ assumes !(flags & RFLAGS_IF);
@ ensures \interrupt_disabled > 0;
@*/
void cpu_restore_interrupt(unsigned long flags)
{
asm volatile("push %0; popf" : : "g"(flags) : "memory", "cc");
}
/*@
@ assigns \nothing;
@*/
void cpu_pause(void)
{
asm volatile("pause" ::: "memory");
}
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled > 0;
@ behavior from_enabled:
@ assumes \interrupt_disabled == 0;
@ ensures \result & RFLAGS_IF;
@ behavior from_disabled:
@ assumes \interrupt_disabled > 0;
@ ensures !(\result & RFLAGS_IF);
@*/
unsigned long cpu_disable_interrupt_save(void)
{
unsigned long flags;
@@ -558,6 +868,17 @@ unsigned long cpu_disable_interrupt_save(void)
return flags;
}
/*@
@ behavior valid_vector:
@ assumes 32 <= vector <= 255;
@ requires \valid(h);
@ assigns handlers[vector-32];
@ ensures \result == 0;
@ behavior invalid_vector:
@ assumes (vector < 32) || (255 < vector);
@ assigns \nothing;
@ ensures \result == -EINVAL;
@*/
int ihk_mc_register_interrupt_handler(int vector,
struct ihk_mc_interrupt_handler *h)
{
@@ -579,6 +900,11 @@ int ihk_mc_unregister_interrupt_handler(int vector,
extern unsigned long __page_fault_handler_address;
/*@
@ requires \valid(h);
@ assigns __page_fault_handler_address;
@ ensures __page_fault_handler_address == h;
@*/
void ihk_mc_set_page_fault_handler(void (*h)(void *, uint64_t, void *))
{
__page_fault_handler_address = (unsigned long)h;
@@ -588,6 +914,18 @@ extern char trampoline_code_data[], trampoline_code_data_end[];
struct page_table *get_init_page_table(void);
unsigned long get_transit_page_table(void);
/* reusable, but not reentrant */
/*@
@ requires \valid_apicid(cpuid); // valid APIC ID or not
@ requires \valid(pc);
@ requires \valid(trampoline_va);
@ requires \valid(trampoline_code_data
@ +(0..(trampoline_code_data_end - trampoline_code_data)));
@ requires \valid_physical(ap_trampoline); // valid physical address or not
@ assigns (char *)trampoline_va+(0..trampoline_code_data_end - trampoline_code_data);
@ assigns cpu_boot_status;
@ ensures cpu_boot_status != 0;
@*/
void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
{
unsigned long *p;
@@ -607,7 +945,7 @@ void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
cpu_boot_status = 0;
__x86_wakeup(cpuid, AP_TRAMPOLINE);
__x86_wakeup(cpuid, ap_trampoline);
/* XXX: Time out */
while (!cpu_boot_status) {
@@ -615,6 +953,11 @@ void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
}
}
/*@
@ requires \valid(new_ctx);
@ requires (stack_pointer == NULL) || \valid((unsigned long *)stack_pointer-1);
@ requires \valid(next_function);
@*/
void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
void *stack_pointer, void (*next_function)(void))
{
@@ -634,6 +977,15 @@ void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
extern char enter_user_mode[];
/*@
@ requires \valid(ctx);
@ requires \valid(puctx);
@ requires \valid((ihk_mc_user_context_t *)stack_pointer-1);
@ requires \valid_user(new_pc); // valid user space address or not
@ requires \valid_user(user_sp-1);
@ assigns *((ihk_mc_user_context_t *)stack_pointer-1);
@ assigns ctx->rsp0;
@*/
void ihk_mc_init_user_process(ihk_mc_kernel_context_t *ctx,
ihk_mc_user_context_t **puctx,
void *stack_pointer, unsigned long new_pc,
@@ -649,49 +1001,95 @@ void ihk_mc_init_user_process(ihk_mc_kernel_context_t *ctx,
*puctx = uctx;
memset(uctx, 0, sizeof(ihk_mc_user_context_t));
uctx->cs = USER_CS;
uctx->rip = new_pc;
uctx->ss = USER_DS;
uctx->rsp = user_sp;
uctx->rflags = RFLAGS_IF;
uctx->gpr.cs = USER_CS;
uctx->gpr.rip = new_pc;
uctx->gpr.ss = USER_DS;
uctx->gpr.rsp = user_sp;
uctx->gpr.rflags = RFLAGS_IF;
uctx->is_gpr_valid = 1;
ihk_mc_init_context(ctx, sp, (void (*)(void))enter_user_mode);
ctx->rsp0 = (unsigned long)stack_pointer;
}
/*@
@ behavior rsp:
@ assumes reg == IHK_UCR_STACK_POINTER;
@ requires \valid(uctx);
@ assigns uctx->gpr.rsp;
@ ensures uctx->gpr.rsp == value;
@ behavior rip:
@ assumes reg == IHK_UCR_PROGRAM_COUNTER;
@ requires \valid(uctx);
@ assigns uctx->gpr.rip;
@ ensures uctx->gpr.rip == value;
@*/
void ihk_mc_modify_user_context(ihk_mc_user_context_t *uctx,
enum ihk_mc_user_context_regtype reg,
unsigned long value)
{
if (reg == IHK_UCR_STACK_POINTER) {
uctx->rsp = value;
uctx->gpr.rsp = value;
} else if (reg == IHK_UCR_PROGRAM_COUNTER) {
uctx->rip = value;
uctx->gpr.rip = value;
}
}
void ihk_mc_print_user_context(ihk_mc_user_context_t *uctx)
{
kprintf("CS:RIP = %04lx:%16lx\n", uctx->cs, uctx->rip);
kprintf("CS:RIP = %04lx:%16lx\n", uctx->gpr.cs, uctx->gpr.rip);
kprintf("%16lx %16lx %16lx %16lx\n%16lx %16lx %16lx\n",
uctx->rax, uctx->rbx, uctx->rcx, uctx->rdx,
uctx->rsi, uctx->rdi, uctx->rsp);
uctx->gpr.rax, uctx->gpr.rbx, uctx->gpr.rcx, uctx->gpr.rdx,
uctx->gpr.rsi, uctx->gpr.rdi, uctx->gpr.rsp);
}
/*@
@ requires \valid(handler);
@ assigns __x86_syscall_handler;
@ ensures __x86_syscall_handler == handler;
@*/
void ihk_mc_set_syscall_handler(long (*handler)(int, ihk_mc_user_context_t *))
{
__x86_syscall_handler = handler;
}
/*@
@ assigns \nothing;
@*/
void ihk_mc_delay_us(int us)
{
arch_delay(us);
}
#define EXTENDED_ARCH_SHOW_CONTEXT
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
void arch_show_extended_context(void)
{
unsigned long cr0, cr4, msr, xcr0;
/* Read and print CRs, MSR_EFER, XCR0 */
asm volatile("movq %%cr0, %0" : "=r"(cr0));
asm volatile("movq %%cr4, %0" : "=r"(cr4));
msr = rdmsr(MSR_EFER);
xcr0 = xgetbv(0);
__kprintf("\n CR0 CR4\n");
__kprintf("%016lX %016lX\n", cr0, cr4);
__kprintf(" MSR_EFER\n");
__kprintf("%016lX\n", msr);
__kprintf(" XCR0\n");
__kprintf("%016lX\n", xcr0);
}
#endif
void arch_show_interrupt_context(const void *reg)
{
const struct x86_regs *regs = reg;
int irqflags;
const struct x86_user_context *uctx = reg;
const struct x86_basic_regs *regs = &uctx->gpr;
unsigned long irqflags;
irqflags = kprintf_lock();
@@ -711,10 +1109,22 @@ void arch_show_interrupt_context(const void *reg)
__kprintf(" CS SS RFLAGS ERROR\n");
__kprintf("%16lx %16lx %16lx %16lx\n",
regs->cs, regs->ss, regs->rflags, regs->error);
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
arch_show_extended_context();
#endif
kprintf_unlock(irqflags);
}
/*@
@ behavior fs_base:
@ assumes type == IHK_ASR_X86_FS;
@ ensures \result == 0;
@ behavior invaiid_type:
@ assumes type != IHK_ASR_X86_FS;
@ ensures \result == -EINVAL;
@*/
int ihk_mc_arch_set_special_register(enum ihk_asr_type type,
unsigned long value)
{
@@ -728,6 +1138,15 @@ int ihk_mc_arch_set_special_register(enum ihk_asr_type type,
}
}
/*@
@ behavior fs_base:
@ assumes type == IHK_ASR_X86_FS;
@ requires \valid(value);
@ ensures \result == 0;
@ behavior invalid_type:
@ assumes type != IHK_ASR_X86_FS;
@ ensures \result == -EINVAL;
@*/
int ihk_mc_arch_get_special_register(enum ihk_asr_type type,
unsigned long *value)
{
@@ -741,6 +1160,10 @@ int ihk_mc_arch_get_special_register(enum ihk_asr_type type,
}
}
/*@
@ requires \valid_apicid(cpu); // valid APIC ID or not
@ ensures \result == 0
@*/
int ihk_mc_interrupt_cpu(int cpu, int vector)
{
dkprintf("[%d] ihk_mc_interrupt_cpu: %d\n", ihk_mc_get_processor_id(), cpu);
@@ -749,3 +1172,68 @@ int ihk_mc_interrupt_cpu(int cpu, int vector)
x86_issue_ipi(cpu, vector);
return 0;
}
/*@
@ requires \valid(proc);
@ ensures proc->fp_regs == NULL;
@*/
void
release_fp_regs(struct process *proc)
{
int pages;
if (!proc->fp_regs)
return;
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
ihk_mc_free_pages(proc->fp_regs, 1);
proc->fp_regs = NULL;
}
void
save_fp_regs(struct process *proc)
{
int pages;
if (proc->fp_regs)
return;
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
proc->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT);
if(!proc->fp_regs)
return;
memset(proc->fp_regs, 0, sizeof(fp_regs_struct));
// TODO: do xsave
}
void
restore_fp_regs(struct process *proc)
{
if (!proc->fp_regs)
return;
// TODO: do xrstor
release_fp_regs(proc);
}
ihk_mc_user_context_t *lookup_user_context(struct process *proc)
{
ihk_mc_user_context_t *uctx = proc->uctx;
if ((!(proc->ftn->status & (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE
| PS_STOPPED | PS_TRACED))
&& (proc != cpu_local_var(current)))
|| !uctx->is_gpr_valid) {
return NULL;
}
if (!uctx->is_sr_valid) {
uctx->sr.fs_base = proc->thread.tlsblock_base;
uctx->sr.gs_base = 0;
uctx->sr.ds = 0;
uctx->sr.es = 0;
uctx->sr.fs = 0;
uctx->sr.gs = 0;
uctx->is_sr_valid = 1;
}
return uctx;
} /* lookup_user_context() */

View File

@@ -15,7 +15,7 @@
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
@@ -86,7 +86,8 @@ void fill_prstatus(struct note *head, struct process *proc, void *regs0)
{
void *name;
struct elf_prstatus64 *prstatus;
struct x86_regs *regs = regs0;
struct x86_user_context *uctx = regs0;
struct x86_basic_regs *regs = &uctx->gpr;
register unsigned long _r12 asm("r12");
register unsigned long _r13 asm("r13");
register unsigned long _r14 asm("r14");

View File

@@ -5,6 +5,8 @@
* Define and declare memory management macros and functions
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
@@ -117,6 +119,25 @@
#define PTE_NULL ((pte_t)0)
typedef unsigned long pte_t;
/*
* pagemap kernel ABI bits
*/
#define PM_ENTRY_BYTES sizeof(uint64_t)
#define PM_STATUS_BITS 3
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
#define PM_PSHIFT_BITS 6
#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
#define PM_PSHIFT(x) (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
#define PM_PRESENT PM_STATUS(4LL)
#define PM_SWAP PM_STATUS(2LL)
/* For easy conversion, it is better to be the same as architecture's ones */
enum ihk_mc_pt_attribute {
PTATTR_ACTIVE = 0x01,
@@ -128,6 +149,7 @@ enum ihk_mc_pt_attribute {
PTATTR_NO_EXECUTE = 0x8000000000000000,
PTATTR_UNCACHABLE = 0x10000,
PTATTR_FOR_USER = 0x20000,
PTATTR_WRITE_COMBINED = 0x40000,
};
static inline int pte_is_null(pte_t *ptep)
@@ -185,6 +207,12 @@ static inline off_t pte_get_off(pte_t *ptep, size_t pgsize)
return (off_t)(*ptep & PAGE_MASK);
}
static inline void pte_make_null(pte_t *ptep, size_t pgsize)
{
*ptep = PTE_NULL;
return;
}
static inline void pte_make_fileoff(off_t off,
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
{
@@ -216,6 +244,36 @@ static inline void pte_xchg(pte_t *ptep, pte_t *valp)
#define pte_xchg(p,vp) do { *(vp) = xchg((p), *(vp)); } while (0)
#endif
static inline void pte_clear_dirty(pte_t *ptep, size_t pgsize)
{
uint64_t mask;
switch (pgsize) {
default: /* through */
case PTL1_SIZE: mask = ~PFL1_DIRTY; break;
case PTL2_SIZE: mask = ~PFL2_DIRTY; break;
case PTL3_SIZE: mask = ~PFL3_DIRTY; break;
}
asm volatile ("lock andq %0,%1" :: "r"(mask), "m"(*ptep));
return;
}
static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
{
uint64_t mask;
switch (pgsize) {
default: /* through */
case PTL1_SIZE: mask = PFL1_DIRTY; break;
case PTL2_SIZE: mask = PFL2_DIRTY; break;
case PTL3_SIZE: mask = PFL3_DIRTY; break;
}
asm volatile ("lock orq %0,%1" :: "r"(mask), "m"(*ptep));
return;
}
struct page_table;
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
@@ -227,8 +285,9 @@ void flush_tlb_single(unsigned long addr);
void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable);
#define AP_TRAMPOLINE 0x10000
#define AP_TRAMPOLINE_SIZE 0x4000
extern unsigned long ap_trampoline;
//#define AP_TRAMPOLINE 0x10000
#define AP_TRAMPOLINE_SIZE 0x2000
/* Local is cachable */
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_UNCACHABLE)

View File

@@ -0,0 +1,36 @@
/**
* \file mman.h
* License details are found in the file LICENSE.
* \brief
* memory management declarations
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY:
*/
#ifndef HEADER_ARCH_MMAN_H
#define HEADER_ARCH_MMAN_H
/*
* mapping flags
*/
#define MAP_32BIT 0x40
#define MAP_GROWSDOWN 0x0100
#define MAP_DENYWRITE 0x0800
#define MAP_EXECUTABLE 0x1000
#define MAP_LOCKED 0x2000
#define MAP_NORESERVE 0x4000
#define MAP_POPULATE 0x8000
#define MAP_NONBLOCK 0x00010000
#define MAP_STACK 0x00020000
#define MAP_HUGETLB 0x00040000
/*
* for mlockall()
*/
#define MCL_CURRENT 0x01
#define MCL_FUTURE 0x02
#endif /* HEADER_ARCH_MMAN_H */

View File

@@ -0,0 +1,40 @@
/**
* \file shm.h
* License details are found in the file LICENSE.
* \brief
* header file for System V shared memory
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY:
*/
#ifndef HEADER_ARCH_SHM_H
#define HEADER_ARCH_SHM_H
struct ipc_perm {
key_t key;
uid_t uid;
gid_t gid;
uid_t cuid;
gid_t cgid;
uint16_t mode;
uint8_t padding[2];
uint16_t seq;
uint8_t padding2[22];
};
struct shmid_ds {
struct ipc_perm shm_perm;
size_t shm_segsz;
time_t shm_atime;
time_t shm_dtime;
time_t shm_ctime;
pid_t shm_cpid;
pid_t shm_lpid;
uint64_t shm_nattch;
uint8_t padding[16];
};
#endif /* HEADER_ARCH_SHM_H */

View File

@@ -42,7 +42,10 @@ struct x86_cpu_local_variables {
uint64_t gdt[10];
/* 128 */
struct tss64 tss;
/* 232 */
unsigned long paniced;
uint64_t panic_regs[21];
/* 408 */
} __attribute__((packed));
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id);

View File

@@ -22,19 +22,35 @@ struct x86_kregs {
};
typedef struct x86_kregs ihk_mc_kernel_context_t;
/* XXX: User context should contain floating point registers */
typedef struct x86_regs ihk_mc_user_context_t;
struct x86_user_context {
struct x86_sregs sr;
#define ihk_mc_syscall_arg0(uc) (uc)->rdi
#define ihk_mc_syscall_arg1(uc) (uc)->rsi
#define ihk_mc_syscall_arg2(uc) (uc)->rdx
#define ihk_mc_syscall_arg3(uc) (uc)->r10
#define ihk_mc_syscall_arg4(uc) (uc)->r8
#define ihk_mc_syscall_arg5(uc) (uc)->r9
/* 16-byte boundary here */
uint8_t is_gpr_valid;
uint8_t is_sr_valid;
uint8_t spare_flags6;
uint8_t spare_flags5;
uint8_t spare_flags4;
uint8_t spare_flags3;
uint8_t spare_flags2;
uint8_t spare_flags1;
struct x86_basic_regs gpr; /* must be last */
/* 16-byte boundary here */
};
typedef struct x86_user_context ihk_mc_user_context_t;
#define ihk_mc_syscall_ret(uc) (uc)->rax
#define ihk_mc_syscall_arg0(uc) (uc)->gpr.rdi
#define ihk_mc_syscall_arg1(uc) (uc)->gpr.rsi
#define ihk_mc_syscall_arg2(uc) (uc)->gpr.rdx
#define ihk_mc_syscall_arg3(uc) (uc)->gpr.r10
#define ihk_mc_syscall_arg4(uc) (uc)->gpr.r8
#define ihk_mc_syscall_arg5(uc) (uc)->gpr.r9
#define ihk_mc_syscall_pc(uc) (uc)->rip
#define ihk_mc_syscall_sp(uc) (uc)->rsp
#define ihk_mc_syscall_ret(uc) (uc)->gpr.rax
#define ihk_mc_syscall_pc(uc) (uc)->gpr.rip
#define ihk_mc_syscall_sp(uc) (uc)->gpr.rsp
#endif

View File

@@ -6,6 +6,8 @@
* Machine Specific Registers (MSR)
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
@@ -16,7 +18,31 @@
#include <types.h>
#define RFLAGS_CF (1 << 0)
#define RFLAGS_PF (1 << 2)
#define RFLAGS_AF (1 << 4)
#define RFLAGS_ZF (1 << 6)
#define RFLAGS_SF (1 << 7)
#define RFLAGS_TF (1 << 8)
#define RFLAGS_IF (1 << 9)
#define RFLAGS_DF (1 << 10)
#define RFLAGS_OF (1 << 11)
#define RFLAGS_IOPL (3 << 12)
#define RFLAGS_NT (1 << 14)
#define RFLAGS_RF (1 << 16)
#define RFLAGS_VM (1 << 17)
#define RFLAGS_AC (1 << 18)
#define RFLAGS_VIF (1 << 19)
#define RFLAGS_VIP (1 << 20)
#define RFLAGS_ID (1 << 21)
#define DB6_B0 (1 << 0)
#define DB6_B1 (1 << 1)
#define DB6_B2 (1 << 2)
#define DB6_B3 (1 << 3)
#define DB6_BD (1 << 13)
#define DB6_BS (1 << 14)
#define DB6_BT (1 << 15)
#define MSR_EFER 0xc0000080
#define MSR_STAR 0xc0000081
@@ -26,6 +52,13 @@
#define MSR_GS_BASE 0xc0000101
#define MSR_IA32_APIC_BASE 0x000000001b
#define MSR_PLATFORM_INFO 0x000000ce
#define MSR_IA32_PERF_CTL 0x00000199
#define MSR_IA32_MISC_ENABLE 0x000001a0
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
#define MSR_IA32_CR_PAT 0x00000277
#define CVAL(event, mask) \
((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff))
@@ -37,6 +70,25 @@
#define MSR_PERF_CTL_0 0xc0010000
#define MSR_PERF_CTR_0 0xc0010004
static unsigned long xgetbv(unsigned int index)
{
unsigned int low, high;
asm volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (index));
return low | ((unsigned long)high << 32);
}
static void xsetbv(unsigned int index, unsigned long val)
{
unsigned int low, high;
low = val;
high = val >> 32;
asm volatile("xsetbv" : : "a" (low), "d" (high), "c" (index));
}
static void wrmsr(unsigned int idx, unsigned long value){
unsigned int high, low;
@@ -135,10 +187,19 @@ struct tss64 {
unsigned short iomap_address;
} __attribute__((packed));
struct x86_regs {
unsigned long r15, r14, r13, r12, r11, r10, r9, r8;
unsigned long rdi, rsi, rdx, rcx, rbx, rax, rbp;
unsigned long error, rip, cs, rflags, rsp, ss;
struct x86_basic_regs {
unsigned long r15, r14, r13, r12, rbp, rbx, r11, r10;
unsigned long r9, r8, rax, rcx, rdx, rsi, rdi, error;
unsigned long rip, cs, rflags, rsp, ss;
};
struct x86_sregs {
unsigned long fs_base;
unsigned long gs_base;
unsigned long ds;
unsigned long es;
unsigned long fs;
unsigned long gs;
};
#define REGS_GET_STACK_POINTER(regs) (((struct x86_regs *)regs)->rsp)
@@ -162,7 +223,72 @@ enum x86_pf_error_code {
PF_RSVD = 1 << 3,
PF_INSTR = 1 << 4,
PF_PATCH = 1 << 29,
PF_POPULATE = 1 << 30,
};
struct i387_fxsave_struct {
unsigned short cwd;
unsigned short swd;
unsigned short twd;
unsigned short fop;
union {
struct {
unsigned long rip;
unsigned long rdp;
};
struct {
unsigned int fip;
unsigned int fcs;
unsigned int foo;
unsigned int fos;
};
};
unsigned int mxcsr;
unsigned int mxcsr_mask;
unsigned int st_space[32];
unsigned int xmm_space[64];
unsigned int padding[12];
union {
unsigned int padding1[12];
unsigned int sw_reserved[12];
};
} __attribute__((aligned(16)));
struct ymmh_struct {
unsigned int ymmh_space[64];
};
struct lwp_struct {
unsigned char reserved[128];
};
struct bndreg {
unsigned long lower_bound;
unsigned long upper_bound;
} __attribute__((packed));
struct bndcsr {
unsigned long bndcfgu;
unsigned long bndstatus;
} __attribute__((packed));
struct xsave_hdr_struct {
unsigned long xstate_bv;
unsigned long xcomp_bv;
unsigned long reserved[6];
} __attribute__((packed));
struct xsave_struct {
struct i387_fxsave_struct i387;
struct xsave_hdr_struct xsave_hdr;
struct ymmh_struct ymmh;
struct lwp_struct lwp;
struct bndreg bndreg[4];
struct bndcsr bndcsr;
} __attribute__ ((packed, aligned (64)));
typedef struct xsave_struct fp_regs_struct;
#endif

View File

@@ -23,7 +23,7 @@
SYSCALL_DELEGATED(0, read)
SYSCALL_DELEGATED(1, write)
SYSCALL_DELEGATED(2, open)
SYSCALL_DELEGATED(3, close)
SYSCALL_HANDLED(3, close)
SYSCALL_DELEGATED(4, stat)
SYSCALL_DELEGATED(5, fstat)
SYSCALL_DELEGATED(8, lseek)
@@ -41,7 +41,12 @@ SYSCALL_DELEGATED(20, writev)
SYSCALL_DELEGATED(21, access)
SYSCALL_HANDLED(24, sched_yield)
SYSCALL_HANDLED(25, mremap)
SYSCALL_HANDLED(26, msync)
SYSCALL_HANDLED(27, mincore)
SYSCALL_HANDLED(28, madvise)
SYSCALL_HANDLED(29, shmget)
SYSCALL_HANDLED(30, shmat)
SYSCALL_HANDLED(31, shmctl)
SYSCALL_HANDLED(34, pause)
SYSCALL_HANDLED(39, getpid)
SYSCALL_HANDLED(56, clone)
@@ -52,27 +57,48 @@ SYSCALL_HANDLED(60, exit)
SYSCALL_HANDLED(61, wait4)
SYSCALL_HANDLED(62, kill)
SYSCALL_DELEGATED(63, uname)
SYSCALL_HANDLED(67, shmdt)
SYSCALL_DELEGATED(72, fcntl)
SYSCALL_DELEGATED(79, getcwd)
SYSCALL_DELEGATED(89, readlink)
SYSCALL_DELEGATED(96, gettimeofday)
SYSCALL_HANDLED(97, getrlimit)
SYSCALL_HANDLED(101, ptrace)
SYSCALL_DELEGATED(102, getuid)
SYSCALL_DELEGATED(104, getgid)
SYSCALL_DELEGATED(107, geteuid)
SYSCALL_DELEGATED(108, getegid)
SYSCALL_HANDLED(102, getuid)
SYSCALL_HANDLED(104, getgid)
SYSCALL_HANDLED(105, setuid)
SYSCALL_HANDLED(106, setgid)
SYSCALL_HANDLED(107, geteuid)
SYSCALL_HANDLED(108, getegid)
SYSCALL_HANDLED(109, setpgid)
SYSCALL_DELEGATED(110, getppid)
SYSCALL_HANDLED(110, getppid)
SYSCALL_DELEGATED(111, getpgrp)
SYSCALL_HANDLED(113, setreuid)
SYSCALL_HANDLED(114, setregid)
SYSCALL_HANDLED(117, setresuid)
SYSCALL_HANDLED(118, getresuid)
SYSCALL_HANDLED(119, setresgid)
SYSCALL_HANDLED(120, getresgid)
SYSCALL_HANDLED(122, setfsuid)
SYSCALL_HANDLED(123, setfsgid)
SYSCALL_HANDLED(127, rt_sigpending)
SYSCALL_HANDLED(128, rt_sigtimedwait)
SYSCALL_HANDLED(129, rt_sigqueueinfo)
SYSCALL_HANDLED(130, rt_sigsuspend)
SYSCALL_HANDLED(131, sigaltstack)
SYSCALL_HANDLED(142, sched_setparam)
SYSCALL_HANDLED(143, sched_getparam)
SYSCALL_HANDLED(144, sched_setscheduler)
SYSCALL_HANDLED(145, sched_getscheduler)
SYSCALL_HANDLED(146, sched_get_priority_max)
SYSCALL_HANDLED(147, sched_get_priority_min)
SYSCALL_HANDLED(148, sched_rr_get_interval)
SYSCALL_HANDLED(149, mlock)
SYSCALL_HANDLED(150, munlock)
SYSCALL_HANDLED(151, mlockall)
SYSCALL_HANDLED(152, munlockall)
SYSCALL_HANDLED(158, arch_prctl)
SYSCALL_HANDLED(160, setrlimit)
SYSCALL_HANDLED(186, gettid)
SYSCALL_DELEGATED(201, time)
SYSCALL_HANDLED(202, futex)
@@ -83,12 +109,21 @@ SYSCALL_DELEGATED(217, getdents64)
SYSCALL_HANDLED(218, set_tid_address)
SYSCALL_HANDLED(231, exit_group)
SYSCALL_HANDLED(234, tgkill)
SYSCALL_HANDLED(237, mbind)
SYSCALL_HANDLED(238, set_mempolicy)
SYSCALL_HANDLED(239, get_mempolicy)
SYSCALL_HANDLED(247, waitid)
SYSCALL_HANDLED(256, migrate_pages)
SYSCALL_HANDLED(273, set_robust_list)
SYSCALL_HANDLED(279, move_pages)
SYSCALL_HANDLED(282, signalfd)
SYSCALL_HANDLED(289, signalfd4)
#ifdef DCFA_KMOD
SYSCALL_HANDLED(303, mod_call)
#endif
SYSCALL_HANDLED(309, getcpu)
SYSCALL_HANDLED(310, process_vm_readv)
SYSCALL_HANDLED(311, process_vm_writev)
SYSCALL_HANDLED(601, pmc_init)
SYSCALL_HANDLED(602, pmc_start)
SYSCALL_HANDLED(603, pmc_stop)

View File

@@ -24,39 +24,56 @@
#define USER_CS (48 + 3)
#define USER_DS (56 + 3)
#define PUSH_ALL_REGS \
pushq %rbp; \
pushq %rax; \
pushq %rbx; \
pushq %rcx; \
pushq %rdx; \
pushq %rsi; \
pushq %rdi; \
pushq %r8; \
pushq %r9; \
pushq %r10; \
pushq %r11; \
pushq %r12; \
pushq %r13; \
pushq %r14; \
pushq %r15;
#define POP_ALL_REGS \
popq %r15; \
popq %r14; \
popq %r13; \
popq %r12; \
popq %r11; \
popq %r10; \
popq %r9; \
popq %r8; \
popq %rdi; \
popq %rsi; \
popq %rdx; \
popq %rcx; \
popq %rbx; \
popq %rax; \
popq %rbp
/* struct x86_user_context */
#define X86_SREGS_BASE (0)
#define X86_SREGS_SIZE 48
#define X86_FLAGS_BASE (X86_SREGS_BASE + X86_SREGS_SIZE)
#define X86_FLAGS_SIZE 8
#define X86_REGS_BASE (X86_FLAGS_BASE + X86_FLAGS_SIZE)
#define RAX_OFFSET (X86_REGS_BASE + 80)
#define ERROR_OFFSET (X86_REGS_BASE + 120)
#define RSP_OFFSET (X86_REGS_BASE + 152)
#define PUSH_ALL_REGS \
pushq %rdi; \
pushq %rsi; \
pushq %rdx; \
pushq %rcx; \
pushq %rax; \
pushq %r8; \
pushq %r9; \
pushq %r10; \
pushq %r11; \
pushq %rbx; \
pushq %rbp; \
pushq %r12; \
pushq %r13; \
pushq %r14; \
pushq %r15; \
pushq $1; /* is_gpr_valid is set, and others are cleared */ \
subq $X86_FLAGS_BASE,%rsp /* for x86_sregs, etc. */
#define POP_ALL_REGS \
movq $0,X86_FLAGS_BASE(%rsp); /* clear all flags */ \
addq $X86_REGS_BASE,%rsp; /* discard x86_sregs, flags, etc. */ \
popq %r15; \
popq %r14; \
popq %r13; \
popq %r12; \
popq %rbp; \
popq %rbx; \
popq %r11; \
popq %r10; \
popq %r9; \
popq %r8; \
popq %rax; \
popq %rcx; \
popq %rdx; \
popq %rsi; \
popq %rdi
.data
.globl generic_common_handlers
generic_common_handlers:
@@ -75,7 +92,7 @@ vector=vector+1
common_interrupt:
PUSH_ALL_REGS
movq 120(%rsp), %rdi
movq ERROR_OFFSET(%rsp), %rdi
movq %rsp, %rsi
call handle_interrupt /* Enter C code */
POP_ALL_REGS
@@ -91,7 +108,7 @@ page_fault:
cld
PUSH_ALL_REGS
movq %cr2, %rdi
movq 120(%rsp),%rsi
movq ERROR_OFFSET(%rsp),%rsi
movq %rsp,%rdx
movq __page_fault_handler_address(%rip), %rax
andq %rax, %rax
@@ -113,10 +130,53 @@ general_protection_exception:
addq $8, %rsp
iretq
.globl nmi
nmi:
#define PANICED 232
#define PANIC_REGS 240
movq %rax,%gs:PANIC_REGS+0x00
movq %rbx,%gs:PANIC_REGS+0x08
movq %rcx,%gs:PANIC_REGS+0x10
movq %rdx,%gs:PANIC_REGS+0x18
movq %rsi,%gs:PANIC_REGS+0x20
movq %rdi,%gs:PANIC_REGS+0x28
movq %rbp,%gs:PANIC_REGS+0x30
movq 0x18(%rsp),%rax /* rsp */
movq %rax,%gs:PANIC_REGS+0x38
movq %r8, %gs:PANIC_REGS+0x40
movq %r9, %gs:PANIC_REGS+0x48
movq %r10,%gs:PANIC_REGS+0x50
movq %r11,%gs:PANIC_REGS+0x58
movq %r12,%gs:PANIC_REGS+0x60
movq %r13,%gs:PANIC_REGS+0x68
movq %r14,%gs:PANIC_REGS+0x70
movq %r15,%gs:PANIC_REGS+0x78
movq 0x00(%rsp),%rax /* rip */
movq %rax,%gs:PANIC_REGS+0x80
movq 0x10(%rsp),%rax /* rflags */
movl %eax,%gs:PANIC_REGS+0x88
movq 0x08(%rsp),%rax /* cs */
movl %eax,%gs:PANIC_REGS+0x8C
movq 0x20(%rsp),%rax /* ss */
movl %eax,%gs:PANIC_REGS+0x90
xorq %rax,%rax
movw %ds,%ax
movl %eax,%gs:PANIC_REGS+0x94
movw %es,%ax
movl %eax,%gs:PANIC_REGS+0x98
movw %fs,%ax
movl %eax,%gs:PANIC_REGS+0x9C
movw %gs,%ax
movl %eax,%gs:PANIC_REGS+0xA0
movq $1,%gs:PANICED
1:
hlt
jmp 1b
.globl x86_syscall
x86_syscall:
cld
movq %rsp, %gs:24
movq %rsp, %gs:X86_CPU_LOCAL_OFFSET_USTACK
movq %gs:(X86_CPU_LOCAL_OFFSET_SP0), %rsp
pushq $(USER_DS)
@@ -124,21 +184,19 @@ x86_syscall:
pushq %r11
pushq $(USER_CS)
pushq %rcx
pushq $0
movq %gs:24, %rcx
movq %rcx, 32(%rsp)
pushq %rax /* error code (= system call number) */
PUSH_ALL_REGS
movq 104(%rsp), %rdi
movq %gs:X86_CPU_LOCAL_OFFSET_USTACK, %rcx
movq %rcx, RSP_OFFSET(%rsp)
movq RAX_OFFSET(%rsp), %rdi
movw %ss, %ax
movw %ax, %ds
movq %rsp, %rsi
callq *__x86_syscall_handler(%rip)
1:
movq %rax, 104(%rsp)
movq %rax, RAX_OFFSET(%rsp)
POP_ALL_REGS
#ifdef USE_SYSRET
movq 8(%rsp), %rcx
movq 24(%rsp), %r11
movq 32(%rsp), %rsp
sysretq
#else
@@ -147,7 +205,32 @@ x86_syscall:
#endif
.globl enter_user_mode
enter_user_mode:
enter_user_mode:
movq $0, %rdi
movq %rsp, %rsi
call check_signal
POP_ALL_REGS
addq $8, %rsp
iretq
.globl debug_exception
debug_exception:
cld
pushq $0 /* error */
PUSH_ALL_REGS
movq %rsp, %rdi
call debug_handler
POP_ALL_REGS
addq $8, %rsp
iretq
.globl int3_exception
int3_exception:
cld
pushq $0 /* error */
PUSH_ALL_REGS
movq %rsp, %rdi
call int3_handler
POP_ALL_REGS
addq $8, %rsp
iretq

View File

@@ -6,6 +6,8 @@
* resides in memory.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
@@ -19,13 +21,19 @@
#include <registers.h>
#include <string.h>
#define LOCALS_SPAN (4 * PAGE_SIZE)
struct x86_cpu_local_variables *locals;
size_t x86_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */
void init_processors_local(int max_id)
{
size_t size;
size = LOCALS_SPAN * max_id;
/* Is contiguous allocating adequate?? */
locals = ihk_mc_alloc_pages(max_id, IHK_MC_AP_CRITICAL);
memset(locals, 0, PAGE_SIZE * max_id);
locals = ihk_mc_alloc_pages(size/PAGE_SIZE, IHK_MC_AP_CRITICAL);
memset(locals, 0, size);
kprintf("locals = %p\n", locals);
}
@@ -33,12 +41,12 @@ void init_processors_local(int max_id)
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id)
{
return (struct x86_cpu_local_variables *)
((char *)locals + (id << PAGE_SHIFT));
((char *)locals + (LOCALS_SPAN * id));
}
static void *get_x86_cpu_local_kstack(int id)
{
return ((char *)locals + ((id + 1) << PAGE_SHIFT));
return ((char *)locals + (LOCALS_SPAN * (id + 1)));
}
struct x86_cpu_local_variables *get_x86_this_cpu_local(void)
@@ -80,6 +88,15 @@ void assign_processor_id(void)
v->processor_id = id;
}
void init_boot_processor_local(void)
{
static struct x86_cpu_local_variables avar;
memset(&avar, -1, sizeof(avar));
set_gs_base(&avar);
return;
}
/** IHK **/
int ihk_mc_get_processor_id(void)
{

View File

@@ -5,6 +5,8 @@
* Acquire physical pages and manipulate page table entries.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
@@ -20,8 +22,9 @@
#include <list.h>
#include <process.h>
#include <page.h>
#include <cls.h>
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
static char *last_page;
@@ -263,7 +266,11 @@ static unsigned long attr_to_l1attr(enum ihk_mc_pt_attribute attr)
{
if (attr & PTATTR_UNCACHABLE) {
return (attr & ATTR_MASK) | PFL1_PCD | PFL1_PWT;
} else {
}
else if (attr & PTATTR_WRITE_COMBINED) {
return (attr & ATTR_MASK) | PFL1_PWT;
}
else {
return (attr & ATTR_MASK);
}
}
@@ -367,6 +374,7 @@ static int __set_pt_page(struct page_table *pt, void *virt, unsigned long phys,
unsigned long init_pt_lock_flags;
int ret = -ENOMEM;
init_pt_lock_flags = 0; /* for avoidance of warning */
if (in_kernel) {
init_pt_lock_flags = ihk_mc_spinlock_lock(&init_pt_lock);
}
@@ -494,8 +502,52 @@ static int __clear_pt_page(struct page_table *pt, void *virt, int largepage)
return 0;
}
uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt)
{
int l4idx, l3idx, l2idx, l1idx;
unsigned long v = (unsigned long)virt;
uint64_t ret = 0;
if (!pt) {
pt = init_pt;
}
GET_VIRT_INDICES(v, l4idx, l3idx, l2idx, l1idx);
if (!(pt->entry[l4idx] & PFL4_PRESENT)) {
return ret;
}
pt = phys_to_virt(pt->entry[l4idx] & PAGE_MASK);
if (!(pt->entry[l3idx] & PFL3_PRESENT)) {
return ret;
}
pt = phys_to_virt(pt->entry[l3idx] & PAGE_MASK);
if (!(pt->entry[l2idx] & PFL2_PRESENT)) {
return ret;
}
if ((pt->entry[l2idx] & PFL2_SIZE)) {
ret = PM_PFRAME(((pt->entry[l2idx] & LARGE_PAGE_MASK) +
(v & (LARGE_PAGE_SIZE - 1))) >> PAGE_SHIFT);
ret |= PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
return ret;
}
pt = phys_to_virt(pt->entry[l2idx] & PAGE_MASK);
if (!(pt->entry[l1idx] & PFL1_PRESENT)) {
return ret;
}
ret = PM_PFRAME((pt->entry[l1idx] & PT_PHYSMASK) >> PAGE_SHIFT);
ret |= PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
return ret;
}
int ihk_mc_pt_virt_to_phys(struct page_table *pt,
void *virt, unsigned long *phys)
const void *virt, unsigned long *phys)
{
int l4idx, l3idx, l2idx, l1idx;
unsigned long v = (unsigned long)virt;
@@ -1824,7 +1876,8 @@ enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t faul
attr = common_vrflag_to_ptattr(flag, fault, ptep);
if ((fault & PF_PROT)
|| ((fault & PF_POPULATE) && (flag & VR_PRIVATE))) {
|| ((fault & (PF_POPULATE | PF_PATCH))
&& (flag & VR_PRIVATE))) {
attr |= PTATTR_DIRTY;
}
@@ -2043,7 +2096,7 @@ void ihk_mc_reserve_arch_pages(unsigned long start, unsigned long end,
/* Reserve Text + temporal heap */
cb(virt_to_phys(_head), virt_to_phys(get_last_early_heap()), 0);
/* Reserve trampoline area to boot the second ap */
cb(AP_TRAMPOLINE, AP_TRAMPOLINE + AP_TRAMPOLINE_SIZE, 0);
cb(ap_trampoline, ap_trampoline + AP_TRAMPOLINE_SIZE, 0);
/* Reserve the null page */
cb(0, PAGE_SIZE, 0);
/* Micro-arch specific */
@@ -2072,9 +2125,9 @@ void *phys_to_virt(unsigned long p)
return (void *)(p + MAP_ST_START);
}
int copy_from_user(struct process *proc, void *dst, const void *src, size_t siz)
int copy_from_user(void *dst, const void *src, size_t siz)
{
struct process_vm *vm = proc->vm;
struct process_vm *vm = cpu_local_var(current)->vm;
struct vm_range *range;
size_t pos;
size_t wsiz;
@@ -2101,9 +2154,62 @@ int copy_from_user(struct process *proc, void *dst, const void *src, size_t siz)
return 0;
}
int copy_to_user(struct process *proc, void *dst, const void *src, size_t siz)
int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz)
{
struct process_vm *vm = proc->vm;
const uintptr_t ustart = (uintptr_t)usrc;
const uintptr_t uend = ustart + siz;
uint64_t reason;
uintptr_t addr;
int error;
const void *from;
void *to;
size_t remain;
size_t cpsize;
unsigned long pa;
void *va;
if ((ustart < vm->region.user_start)
|| (vm->region.user_end <= ustart)
|| ((vm->region.user_end - ustart) < siz)) {
return -EFAULT;
}
reason = PF_USER; /* page not present */
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
error = page_fault_process_vm(vm, (void *)addr, reason);
if (error) {
return error;
}
}
from = usrc;
to = kdst;
remain = siz;
while (remain > 0) {
cpsize = PAGE_SIZE - ((uintptr_t)from & (PAGE_SIZE - 1));
if (cpsize > remain) {
cpsize = remain;
}
error = ihk_mc_pt_virt_to_phys(vm->page_table, from, &pa);
if (error) {
return error;
}
va = phys_to_virt(pa);
memcpy(to, va, cpsize);
from += cpsize;
to += cpsize;
remain -= cpsize;
}
return 0;
} /* read_process_vm() */
int copy_to_user(void *dst, const void *src, size_t siz)
{
struct process_vm *vm = cpu_local_var(current)->vm;
struct vm_range *range;
size_t pos;
size_t wsiz;
@@ -2130,3 +2236,114 @@ int copy_to_user(struct process *proc, void *dst, const void *src, size_t siz)
memcpy(dst, src, siz);
return 0;
}
int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz)
{
const uintptr_t ustart = (uintptr_t)udst;
const uintptr_t uend = ustart + siz;
uint64_t reason;
uintptr_t addr;
int error;
const void *from;
void *to;
size_t remain;
size_t cpsize;
unsigned long pa;
void *va;
if ((ustart < vm->region.user_start)
|| (vm->region.user_end <= ustart)
|| ((vm->region.user_end - ustart) < siz)) {
return -EFAULT;
}
reason = PF_POPULATE | PF_WRITE | PF_USER;
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
error = page_fault_process_vm(vm, (void *)addr, reason);
if (error) {
return error;
}
}
from = ksrc;
to = udst;
remain = siz;
while (remain > 0) {
cpsize = PAGE_SIZE - ((uintptr_t)to & (PAGE_SIZE - 1));
if (cpsize > remain) {
cpsize = remain;
}
error = ihk_mc_pt_virt_to_phys(vm->page_table, to, &pa);
if (error) {
return error;
}
va = phys_to_virt(pa);
memcpy(va, from, cpsize);
from += cpsize;
to += cpsize;
remain -= cpsize;
}
return 0;
} /* write_process_vm() */
int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz)
{
const uintptr_t ustart = (uintptr_t)udst;
const uintptr_t uend = ustart + siz;
uint64_t reason;
uintptr_t addr;
int error;
const void *from;
void *to;
size_t remain;
size_t cpsize;
unsigned long pa;
void *va;
kprintf("patch_process_vm(%p,%p,%p,%lx)\n", vm, udst, ksrc, siz);
if ((ustart < vm->region.user_start)
|| (vm->region.user_end <= ustart)
|| ((vm->region.user_end - ustart) < siz)) {
kprintf("patch_process_vm(%p,%p,%p,%lx):not in user\n", vm, udst, ksrc, siz);
return -EFAULT;
}
reason = PF_PATCH | PF_WRITE | PF_USER;
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
error = page_fault_process_vm(vm, (void *)addr, reason);
if (error) {
kprintf("patch_process_vm(%p,%p,%p,%lx):pf(%lx):%d\n", vm, udst, ksrc, siz, addr, error);
return error;
}
}
from = ksrc;
to = udst;
remain = siz;
while (remain > 0) {
cpsize = PAGE_SIZE - ((uintptr_t)to & (PAGE_SIZE - 1));
if (cpsize > remain) {
cpsize = remain;
}
error = ihk_mc_pt_virt_to_phys(vm->page_table, to, &pa);
if (error) {
kprintf("patch_process_vm(%p,%p,%p,%lx):v2p(%p):%d\n", vm, udst, ksrc, siz, to, error);
return error;
}
va = phys_to_virt(pa);
memcpy(va, from, cpsize);
from += cpsize;
to += cpsize;
remain -= cpsize;
}
kprintf("patch_process_vm(%p,%p,%p,%lx):%d\n", vm, udst, ksrc, siz, 0);
return 0;
} /* patch_process_vm() */

File diff suppressed because it is too large Load Diff

View File

@@ -5,6 +5,8 @@
* implements x86's vsyscall
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 Hitachi, Ltd.
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY:
@@ -58,3 +60,17 @@ long vsyscall_time(void *tp)
return t;
}
extern int vsyscall_getcpu(unsigned *cpup, unsigned *nodep, void *tcachep)
__attribute__ ((section (".vsyscall.getcpu")));
int vsyscall_getcpu(unsigned *cpup, unsigned *nodep, void *tcachep)
{
int error;
asm ("syscall" : "=a" (error)
: "a" (__NR_getcpu), "D" (cpup), "S" (nodep), "d" (tcachep)
: "%rcx", "%r11", "memory");
return error;
}

View File

@@ -0,0 +1,46 @@
#!/bin/bash -x
# \file arch/x86/tools/mcreboot-builtin-x86.sh.in
# License details are found in the file LICENSE.
# \brief
# mckernel boot script
# \author Masamichi Takagi <masamichi.takagi@riken.jp> \par
# Copyright (C) 2014 RIKEN AICS
# HISTORY:
#
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
kill -9 `pidof mcexec`
if lsmod | grep mcctrl > /dev/null 2>&1; then
rmmod mcctrl || exit 1
fi
if lsmod | grep dcfa > /dev/null 2>&1; then
rmmod dcfa || exit 1
fi
if lsmod | grep ihk_builtin > /dev/null 2>&1; then
rmmod ihk_builtin || exit 1
fi
if lsmod | grep ihk > /dev/null 2>&1; then
rmmod ihk || exit 1
fi
insmod "$KMODDIR/ihk.ko" &&
insmod "$KMODDIR/ihk_builtin.ko" &&
"$SBINDIR/ihkconfig" 0 create &&
NCORE=`dmesg | grep -E 'SHIMOS: CPU Status:'|awk '{split($0,a," "); for (i = 1; i <= length(a); i++) { if(a[i] ~ /2/) {count++}} print count;}'`
MEM=`free -g | grep -E 'Mem:' | awk '{print int($2/4)}'`
"$SBINDIR/ihkosctl" 0 alloc "$NCORE" "$MEM"g &&
"$SBINDIR/ihkosctl" 0 load "$KERNDIR/mckernel.img" &&
"$SBINDIR/ihkosctl" 0 kargs hidos osnum=0 &&
"$SBINDIR/ihkosctl" 0 boot &&
sleep 1 &&
"$SBINDIR/ihkosctl" 0 kmsg &&
insmod "$KMODDIR/mcctrl.ko" &&
sleep 1 &&
"$SBINDIR/ihkosctl" 0 kmsg &&
exit 0

View File

@@ -0,0 +1,79 @@
#!/bin/bash
# IHK SMP-x86 example boot script.
# author: Balazs Gerofi <bgerofi@riken.jp>
# Copyright (C) 2014 RIKEN AICS
#
# This is an example script for loading IHK, configuring a partition and
# booting McKernel on it.
# The script reserves half of the CPU cores and 512MB of RAM from NUMA node 0
# when IHK is loaded for the first time, otherwise it destroys the current
# McKernel instance and reboots it using the same set of resources as it used
# previously.
# Note that the script does not output anything unless an error occurs.
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
mem="512M@0"
# Get the number of CPUs on NUMA node 0
nr_cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $4}' | wc -l`
# Use the second half of the cores
let nr_cpus="$nr_cpus / 2"
cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $1}' | tail -n $nr_cpus | xargs echo -n | sed 's/ /,/g'`
if [ "$cpus" == "" ]; then echo "error: no available CPUs on NUMA node 0?"; exit; fi
# Remove delegator if loaded
if [ "`lsmod | grep mcctrl`" != "" ]; then
if ! rmmod mcctrl; then echo "error: removing mcctrl"; exit; fi
fi
# Load IHK if not loaded
if [ "`lsmod | grep ihk`" == "" ]; then
if ! insmod ${KMODDIR}/ihk.ko; then echo "error: loading ihk"; exit; fi;
fi
# Load IHK-SMP if not loaded and reserve CPUs and memory
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then
ihk_irq=""
for i in `seq 64 255`; do
if [ ! -d /proc/irq/$i ] && [ "`cat /proc/interrupts | grep ":" | awk '{print $1}' | grep -o '[0-9]*' | grep $i`" == "" ]; then
ihk_irq=$i
break
fi
done
if [ "$ihk_irq" == "" ]; then echo "error: no IRQ available"; exit; fi
if ! insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq; then echo "error: loading ihk-smp-x86"; exit; fi;
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs"; exit; fi
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory"; exit; fi
fi
# Check for existing OS instance and destroy
if [ -c /dev/mcos0 ]; then
# Query CPU cores and memory of OS instance so that the same values are used as previously
if ! ${SBINDIR}/ihkosctl 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
cpus=`${SBINDIR}/ihkosctl 0 query cpu`
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
mem=`${SBINDIR}/ihkosctl 0 query mem`
if ! ${SBINDIR}/ihkconfig 0 destroy 0; then echo "warning: destroy failed"; fi
else
# Otherwise query IHK-SMP for resources
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
mem=`${SBINDIR}/ihkconfig 0 query mem`
fi
if ! ${SBINDIR}/ihkconfig 0 create; then echo "error: create"; exit; fi
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then echo "error: assign CPUs"; exit; fi
if ! ${SBINDIR}/ihkosctl 0 assign mem ${mem}; then echo "error: assign memory"; exit; fi
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then echo "error: loading kernel image"; exit; fi
if ! ${SBINDIR}/ihkosctl 0 kargs hidos; then echo "error: setting kernel arguments"; exit; fi
if ! ${SBINDIR}/ihkosctl 0 boot; then echo "error: booting"; exit; fi
if ! insmod ${KMODDIR}/mcctrl.ko; then echo "error: inserting mcctrl.ko"; exit; fi

View File

@@ -0,0 +1,16 @@
#!/bin/bash
# \file arch/x86/tools/mcshutdown-attached-mic.sh.in
# License details are found in the file LICENSE.
# \brief
# mckernel shutdown script
#
# \author McKernel Development Team
#
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
"$SBINDIR/ihkosctl" 0 shutdown

3049
configure vendored

File diff suppressed because it is too large Load Diff

View File

@@ -24,7 +24,7 @@ AC_ARG_WITH([kernelsrc],
AC_ARG_WITH([target],
AC_HELP_STRING(
[--with-target={attached-mic | builtin-mic | builtin-x86}],[target, default is attached-mic]),
[--with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86}],[target, default is attached-mic]),
[WITH_TARGET=$withval],[WITH_TARGET=yes])
AC_ARG_ENABLE([dcfa],
@@ -111,6 +111,26 @@ case $WITH_TARGET in
MANDIR="$prefix/attached/man"
fi
;;
smp-x86)
ARCH=`uname -m`
AC_PROG_CC
XCC=$CC
if test "X$KERNDIR" = X; then
KERNDIR="$prefix/smp-x86/kernel"
fi
if test "X$BINDIR" = X; then
BINDIR="$prefix/bin"
fi
if test "X$SBINDIR" = X; then
SBINDIR="$prefix/sbin"
fi
if test "X$KMODDIR" = X; then
KMODDIR="$prefix/kmod"
fi
if test "X$MANDIR" = X; then
MANDIR="$prefix/smp-x86/man"
fi
;;
*)
AC_MSG_ERROR([target $WITH_TARGET is unknwon])
;;
@@ -145,6 +165,9 @@ AC_CONFIG_FILES([
kernel/Makefile.build
arch/x86/tools/mcreboot-attached-mic.sh
arch/x86/tools/mcshutdown-attached-mic.sh
arch/x86/tools/mcreboot-builtin-x86.sh
arch/x86/tools/mcreboot-smp-x86.sh
arch/x86/tools/mcshutdown-builtin-x86.sh
arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in
])

View File

@@ -38,6 +38,9 @@
#define MCEXEC_UP_SEND_SIGNAL 0x30a02906
#define MCEXEC_UP_GET_CPU 0x30a02907
#define MCEXEC_UP_STRNCPY_FROM_USER 0x30a02908
#define MCEXEC_UP_NEW_PROCESS 0x30a02909
#define MCEXEC_UP_GET_CRED 0x30a0290a
#define MCEXEC_UP_GET_CREDV 0x30a0290b
#define MCEXEC_UP_PREPARE_DMA 0x30a02910
#define MCEXEC_UP_FREE_DMA 0x30a02911
@@ -45,6 +48,8 @@
#define MCEXEC_UP_OPEN_EXEC 0x30a02912
#define MCEXEC_UP_CLOSE_EXEC 0x30a02913
#define MCEXEC_UP_DEBUG_LOG 0x40000000
#define MCEXEC_UP_TRANSFER_TO_REMOTE 0
#define MCEXEC_UP_TRANSFER_FROM_REMOTE 1
@@ -67,6 +72,7 @@ struct program_image_section {
};
#define SHELL_PATH_MAX_LEN 1024
#define MCK_RLIM_MAX 20
struct program_load_desc {
int num_sections;
@@ -76,6 +82,7 @@ struct program_load_desc {
int err;
int stack_prot;
int pgid;
int cred[8];
unsigned long entry;
unsigned long user_start;
unsigned long user_end;
@@ -90,8 +97,7 @@ struct program_load_desc {
unsigned long args_len;
char *envs;
unsigned long envs_len;
unsigned long rlimit_stack_cur;
unsigned long rlimit_stack_max;
struct rlimit rlimit[MCK_RLIM_MAX];
unsigned long interp_align;
char shell_path[SHELL_PATH_MAX_LEN];
struct program_image_section sections[0];
@@ -156,4 +162,8 @@ struct signal_desc {
char info[128];
};
struct newprocess_desc {
int pid;
};
#endif

View File

@@ -2,13 +2,14 @@ KDIR ?= @KDIR@
ARCH ?= @ARCH@
src = @abs_srcdir@
KMODDIR=@KMODDIR@
BINDIR=@BINDIR@
IHK_BASE=$(src)/../../../ihk
obj-m += mcctrl.o
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../include
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\"
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o
KBUILD_EXTRA_SYMBOLS = @abs_builddir@/../../../ihk/linux/core/Module.symvers

View File

@@ -0,0 +1,276 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/binfmts.h>
#include <linux/elfcore.h>
#include <linux/elf.h>
#include <linux/init.h>
#include <linux/file.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/version.h>
#include "mcctrl.h"
static int pathcheck(const char *file, const char *list)
{
const char *p;
const char *q;
const char *r;
int l;
if(!*list)
return 1;
p = list;
do{
q = strchr(p, ':');
if(!q)
q = strchr(p, '\0');
for(r = q - 1; r >= p && *r == '/'; r--);
l = r - p + 1;
if(!strncmp(file, p, l) &&
file[l] == '/')
return 1;
p = q + 1;
} while(*q);
return 0;
}
static int load_elf(struct linux_binprm *bprm
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
, struct pt_regs *regs
#endif
)
{
char mcexec[BINPRM_BUF_SIZE];
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
const
#endif
char *wp;
char *cp;
struct file *file;
int rc;
struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
typedef struct {
char *name;
char *val;
int l;
} envdata;
envdata env[] = {
{.name = "MCEXEC"},
#define env_mcexec (env[0].val)
{.name = "MCEXEC_WL"},
#define env_mcexec_wl (env[1].val)
{.name = "MCEXEC_BL"},
#define env_mcexec_bl (env[2].val)
{.name = NULL}
};
envdata *ep;
unsigned long off = 0;
struct page *page;
char *addr = NULL;
int i;
unsigned long p;
int st;
int mode;
int cnt[2];
char buf[32];
int l;
int pass;
if(memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
return -ENOEXEC;
if(elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
return -ENOEXEC;
if(elf_ex->e_ident[EI_CLASS] != ELFCLASS64)
return -ENOEXEC;
cp = strrchr(bprm->interp, '/');
if(!cp ||
!strcmp(cp, "/mcexec") ||
!strcmp(cp, "/ihkosctl") ||
!strcmp(cp, "/ihkconfig"))
return -ENOEXEC;
cnt[0] = bprm->argc;
cnt[1] = bprm->envc;
for(pass = 0; pass < 2; pass++){
p = bprm->p;
mode = cnt[0] == 0? (cnt[1] == 0? 2: 1): 0;
if(pass == 1){
for(ep = env; ep->name; ep++){
if(ep->l)
ep->val = kmalloc(ep->l, GFP_KERNEL);
}
}
ep = NULL;
l = 0;
for(i = 0, st = 0; mode != 2;){
if(st == 0){
off = p & ~PAGE_MASK;
rc = get_user_pages(current, bprm->mm,
bprm->p, 1, 0, 1,
&page, NULL);
if(rc <= 0)
return -EFAULT;
addr = kmap_atomic(page
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
, KM_USER0
#endif
);
st = 1;
}
if(addr[off]){
if(mode == 1){
if(ep){
if(pass == 1)
ep->val[l] = addr[off];
l++;
}
else if(addr[off] == '='){
if(l < 32)
buf[l] = '\0';
buf[31] = '\0';
for(ep = env; ep->name; ep++)
if(!strcmp(ep->name, buf))
break;
if(ep->name)
l = 0;
else
ep = NULL;
}
else{
if(l < 32)
buf[l] = addr[off];
l++;
}
}
}
else{
if(mode == 1 && ep){
if(pass == 0){
ep->l = l + 1;
}
else{
ep->val[l] = '\0';
}
}
ep = NULL;
l = 0;
i++;
if(i == cnt[mode]){
i = 0;
mode++;
}
}
off++;
p++;
if(off == PAGE_SIZE || mode == 2){
kunmap_atomic(addr
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
, KM_USER0
#endif
);
put_page(page);
st = 0;
}
}
}
if(!env_mcexec || !strcmp(env_mcexec, "0") || !strcmp(env_mcexec, "off"))
rc = 1;
else{
rc = 0;
if(strchr(env_mcexec, '/') && strlen(env_mcexec) < BINPRM_BUF_SIZE)
strcpy(mcexec, env_mcexec);
else
strcpy(mcexec, MCEXEC_PATH);
}
if(rc);
else if(env_mcexec_wl)
rc = !pathcheck(bprm->interp, env_mcexec_wl);
else if(env_mcexec_bl)
rc = pathcheck(bprm->interp, env_mcexec_bl);
else
rc = pathcheck(bprm->interp, "/usr:/bin:/sbin:/opt");
for(ep = env; ep->name; ep++)
if(ep->val)
kfree(ep->val);
if(rc)
return -ENOEXEC;
file = open_exec(mcexec);
if (IS_ERR(file))
return -ENOEXEC;
rc = remove_arg_zero(bprm);
if (rc){
fput(file);
return rc;
}
rc = copy_strings_kernel(1, &bprm->interp, bprm);
if (rc < 0){
fput(file);
return rc;
}
bprm->argc++;
wp = mcexec;
rc = copy_strings_kernel(1, &wp, bprm);
if (rc){
fput(file);
return rc;
}
bprm->argc++;
#if 1
rc = bprm_change_interp(mcexec, bprm);
if (rc < 0){
fput(file);
return rc;
}
#else
if(brpm->interp != bprm->filename)
kfree(brpm->interp);
kfree(brpm->filename);
bprm->filename = bprm->interp = kstrdup(mcexec, GFP_KERNEL);
if(!bprm->interp){
fput(file);
return -ENOMEM;
}
#endif
allow_write_access(bprm->file);
fput(bprm->file);
bprm->file = file;
rc = prepare_binprm(bprm);
if (rc < 0){
return rc;
}
return search_binary_handler(bprm
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
, regs
#endif
);
}
static struct linux_binfmt mcexec_format = {
.module = THIS_MODULE,
.load_binary = load_elf,
};
void __init binfmt_mcexec_init(void)
{
insert_binfmt(&mcexec_format);
}
void __exit binfmt_mcexec_exit(void)
{
unregister_binfmt(&mcexec_format);
}

View File

@@ -31,12 +31,15 @@
#include <linux/gfp.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/version.h>
#include <asm/uaccess.h>
#include <asm/delay.h>
#include <asm/msr.h>
#include <asm/io.h>
#include "mcctrl.h"
//#define DEBUG
#ifdef DEBUG
#define dprintk printk
#else
@@ -242,19 +245,69 @@ int mcexec_transfer_image(ihk_os_t os, struct remote_transfer *__user upt)
//extern unsigned long last_thread_exec;
struct handlerinfo {
int pid;
};
static long mcexec_debug_log(ihk_os_t os, unsigned long arg)
{
struct ikc_scd_packet isp;
memset(&isp, '\0', sizeof isp);
isp.msg = SCD_MSG_DEBUG_LOG;
isp.arg = arg;
mcctrl_ikc_send(os, 0, &isp);
return 0;
}
static void release_handler(ihk_os_t os, void *param)
{
struct handlerinfo *info = param;
struct ikc_scd_packet isp;
memset(&isp, '\0', sizeof isp);
isp.msg = SCD_MSG_CLEANUP_PROCESS;
isp.pid = info->pid;
mcctrl_ikc_send(os, 0, &isp);
kfree(param);
}
static long mcexec_newprocess(ihk_os_t os,
struct newprocess_desc *__user udesc,
struct file *file)
{
struct newprocess_desc desc;
struct handlerinfo *info;
if (copy_from_user(&desc, udesc, sizeof(struct newprocess_desc))) {
return -EFAULT;
}
info = kmalloc(sizeof(struct handlerinfo), GFP_KERNEL);
info->pid = desc.pid;
ihk_os_register_release_handler(file, release_handler, info);
return 0;
}
static long mcexec_start_image(ihk_os_t os,
struct program_load_desc * __user udesc)
struct program_load_desc * __user udesc,
struct file *file)
{
struct program_load_desc desc;
struct ikc_scd_packet isp;
struct mcctrl_channel *c;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct handlerinfo *info;
if (copy_from_user(&desc, udesc,
sizeof(struct program_load_desc))) {
return -EFAULT;
}
info = kmalloc(sizeof(struct handlerinfo), GFP_KERNEL);
info->pid = desc.pid;
ihk_os_register_release_handler(file, release_handler, info);
c = usrdata->channels + desc.cpu;
mcctrl_ikc_set_recv_cpu(os, desc.cpu);
@@ -439,14 +492,15 @@ retry_alloc:
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
if (ret) {
return -EINTR;
}
/* Remove per-process wait queue head */
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
list_del(&wqhln->list);
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
if (ret) {
kfree(wqhln);
return -EINTR;
}
kfree(wqhln);
if (c->param.request_va->number == 61 &&
@@ -723,7 +777,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
}
LIST_HEAD(mckernel_exec_files);
spinlock_t mckernel_exec_file_lock = SPIN_LOCK_UNLOCKED;
DEFINE_SPINLOCK(mckernel_exec_file_lock);
struct mckernel_exec_file {
@@ -733,6 +787,47 @@ struct mckernel_exec_file {
struct list_head list;
};
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
#define GUIDVAL(x) (x)
#else
#define GUIDVAL(x) ((x).val)
#endif
int
mcexec_getcred(unsigned long phys)
{
int *virt = phys_to_virt(phys);
virt[0] = GUIDVAL(current_uid());
virt[1] = GUIDVAL(current_euid());
virt[2] = GUIDVAL(current_suid());
virt[3] = GUIDVAL(current_fsuid());
virt[4] = GUIDVAL(current_gid());
virt[5] = GUIDVAL(current_egid());
virt[6] = GUIDVAL(current_sgid());
virt[7] = GUIDVAL(current_fsgid());
return 0;
}
int
mcexec_getcredv(int __user *virt)
{
int wk[8];
wk[0] = GUIDVAL(current_uid());
wk[1] = GUIDVAL(current_euid());
wk[2] = GUIDVAL(current_suid());
wk[3] = GUIDVAL(current_fsuid());
wk[4] = GUIDVAL(current_gid());
wk[5] = GUIDVAL(current_egid());
wk[6] = GUIDVAL(current_sgid());
wk[7] = GUIDVAL(current_fsgid());
if(copy_to_user(virt, wk, sizeof(int) * 8))
return -EFAULT;
return 0;
}
int mcexec_open_exec(ihk_os_t os, char * __user filename)
{
struct file *file;
@@ -857,7 +952,8 @@ long mcexec_strncpy_from_user(ihk_os_t os, struct strncpy_from_user_desc * __use
return 0;
}
long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
struct file *file)
{
switch (req) {
case MCEXEC_UP_PREPARE_IMAGE:
@@ -867,7 +963,7 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
return mcexec_transfer_image(os, (struct remote_transfer *)arg);
case MCEXEC_UP_START_IMAGE:
return mcexec_start_image(os, (struct program_load_desc *)arg);
return mcexec_start_image(os, (struct program_load_desc *)arg, file);
case MCEXEC_UP_WAIT_SYSCALL:
return mcexec_wait_syscall(os, (struct syscall_wait_desc *)arg);
@@ -888,6 +984,10 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
return mcexec_strncpy_from_user(os,
(struct strncpy_from_user_desc *)arg);
case MCEXEC_UP_NEW_PROCESS:
return mcexec_newprocess(os, (struct newprocess_desc *)arg,
file);
case MCEXEC_UP_OPEN_EXEC:
return mcexec_open_exec(os, (char *)arg);
@@ -899,6 +999,15 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
case MCEXEC_UP_FREE_DMA:
return mcexec_free_region(os, (unsigned long *)arg);
case MCEXEC_UP_GET_CRED:
return mcexec_getcred((unsigned long)arg);
case MCEXEC_UP_GET_CREDV:
return mcexec_getcredv((int *)arg);
case MCEXEC_UP_DEBUG_LOG:
return mcexec_debug_log(os, arg);
}
return -EINVAL;
}

View File

@@ -29,7 +29,8 @@
#define OS_MAX_MINOR 64
extern long __mcctrl_control(ihk_os_t, unsigned int, unsigned long);
extern long __mcctrl_control(ihk_os_t, unsigned int, unsigned long,
struct file *);
extern int prepare_ikc_channels(ihk_os_t os);
extern void destroy_ikc_channels(ihk_os_t os);
#ifndef DO_USER_MODE
@@ -38,11 +39,15 @@ extern void mcctrl_syscall_init(void);
extern void procfs_init(int);
extern void procfs_exit(int);
extern void rus_page_hash_init(void);
extern void rus_page_hash_put_pages(void);
extern void binfmt_mcexec_init(void);
extern void binfmt_mcexec_exit(void);
static long mcctrl_ioctl(ihk_os_t os, unsigned int request, void *priv,
unsigned long arg)
unsigned long arg, struct file *file)
{
return __mcctrl_control(os, request, arg);
return __mcctrl_control(os, request, arg, file);
}
static struct ihk_os_user_call_handler mcctrl_uchs[] = {
@@ -55,10 +60,14 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
{ .request = MCEXEC_UP_SEND_SIGNAL, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_GET_CPU, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_STRNCPY_FROM_USER, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_NEW_PROCESS, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_PREPARE_DMA, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_FREE_DMA, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_OPEN_EXEC, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_CLOSE_EXEC, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
};
static struct ihk_os_user_call mcctrl_uc_proto = {
@@ -101,6 +110,8 @@ static int __init mcctrl_init(void)
mcctrl_syscall_init();
#endif
rus_page_hash_init();
for(i = 0; i < OS_MAX_MINOR; i++){
if (os[i]) {
memcpy(mcctrl_uc + i, &mcctrl_uc_proto, sizeof mcctrl_uc_proto);
@@ -113,6 +124,8 @@ static int __init mcctrl_init(void)
}
}
binfmt_mcexec_init();
return 0;
}
@@ -120,6 +133,7 @@ static void __exit mcctrl_exit(void)
{
int i;
binfmt_mcexec_exit();
printk("mcctrl: unregistered.\n");
for(i = 0; i < OS_MAX_MINOR; i++){
if(os[i]){
@@ -128,6 +142,8 @@ static void __exit mcctrl_exit(void)
procfs_exit(i);
}
}
rus_page_hash_put_pages();
}
MODULE_LICENSE("GPL v2");

View File

@@ -48,12 +48,15 @@
#define SCD_MSG_SYSCALL_ONESIDE 0x4
#define SCD_MSG_SEND_SIGNAL 0x8
#define SCD_MSG_CLEANUP_PROCESS 0x9
#define SCD_MSG_PROCFS_CREATE 0x10
#define SCD_MSG_PROCFS_DELETE 0x11
#define SCD_MSG_PROCFS_REQUEST 0x12
#define SCD_MSG_PROCFS_ANSWER 0x13
#define SCD_MSG_DEBUG_LOG 0x20
#define DMA_PIN_SHIFT 21
#define DO_USER_MODE

View File

@@ -10,12 +10,15 @@
* HISTORY:
*/
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/proc_fs.h>
#include <linux/list.h>
#include <linux/uaccess.h>
#include <linux/fs.h>
#include <linux/resource.h>
#include "mcctrl.h"
#include <linux/version.h>
//#define PROCFS_DEBUG
@@ -26,16 +29,16 @@
#endif
static DECLARE_WAIT_QUEUE_HEAD(procfsq);
int mckernel_procfs_read(char *buffer, char **start, off_t offset,
int count, int *peof, void *dat);
static ssize_t mckernel_procfs_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos);
/* A private data for the procfs driver. */
struct procfs_list_entry;
struct procfs_list_entry {
struct list_head list;
struct proc_dir_entry *entry;
struct proc_dir_entry *parent;
struct procfs_list_entry *parent;
ihk_os_t os;
int osnum;
int pid;
@@ -53,6 +56,28 @@ struct procfs_list_entry {
LIST_HEAD(procfs_file_list);
static ihk_spinlock_t procfs_file_list_lock;
loff_t mckernel_procfs_lseek(struct file *file, loff_t offset, int orig)
{
switch (orig) {
case 0:
file->f_pos = offset;
break;
case 1:
file->f_pos += offset;
break;
default:
return -EINVAL;
}
return file->f_pos;
}
static const struct file_operations mckernel_procfs_file_operations = {
.llseek = mckernel_procfs_lseek,
.read = mckernel_procfs_read,
.write = NULL,
};
/**
* \brief Return specified procfs entry.
*
@@ -71,22 +96,22 @@ static ihk_spinlock_t procfs_file_list_lock;
/*
* XXX: Two or more entries which have same name can be created.
*
* get_procfs_entry() avoids creating an entry which has already been created.
* get_procfs_list_entry() avoids creating an entry which has already been created.
* But, it allows creating an entry which is being created by another thread.
*
* This problem occurred when two requests which created files with a common
* ancestor directory which was not explicitly created were racing.
*/
static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
static struct procfs_list_entry *get_procfs_list_entry(char *p, int osnum, int mode)
{
char *r;
struct proc_dir_entry *ret = NULL, *parent = NULL;
struct procfs_list_entry *e;
struct proc_dir_entry *pde = NULL;
struct procfs_list_entry *e, *ret = NULL, *parent = NULL;
char name[PROCFS_NAME_MAX];
unsigned long irqflags;
dprintk("get_procfs_entry: %s for osnum %d mode %o\n", p, osnum, mode);
dprintk("get_procfs_list_entry: %s for osnum %d mode %o\n", p, osnum, mode);
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
list_for_each_entry(e, &procfs_file_list, list) {
if (e == NULL) {
@@ -95,7 +120,8 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
}
if (strncmp(e->fname, p, PROCFS_NAME_MAX) == 0) {
/* We found the entry */
ret = e->entry;
ret = e;
break;
}
}
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
@@ -107,19 +133,19 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
/* We have non-null parent dir. */
strncpy(name, p, r - p);
name[r - p] = '\0';
parent = get_procfs_entry(name, osnum, 0);
parent = get_procfs_list_entry(name, osnum, 0);
if (parent == NULL) {
/* We counld not get a parent procfs entry. Give up.*/
return NULL;
}
}
e = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL);
if (e == NULL) {
ret = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL);
if (ret == NULL) {
kprintf("ERROR: not enough memory to create PROCFS entry.\n");
return NULL;
}
/* Fill the fname field of the entry */
strncpy(e->fname, p, PROCFS_NAME_MAX);
strncpy(ret->fname, p, PROCFS_NAME_MAX);
if (r != NULL) {
strncpy(name, r + 1, p + PROCFS_NAME_MAX - r - 1);
@@ -127,25 +153,38 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
strncpy(name, p, PROCFS_NAME_MAX);
}
if (mode == 0) {
ret = proc_mkdir(name, parent);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
pde = proc_mkdir(name, parent ? parent->entry : NULL);
#else
pde = proc_mkdir_data(name, 0555, parent ? parent->entry : NULL, ret);
#endif
} else {
ret = create_proc_entry(name, mode, parent);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
pde = create_proc_entry(name, mode, parent->entry);
if (pde)
pde->proc_fops = &mckernel_procfs_file_operations;
#else
pde = proc_create_data(name, mode, parent->entry,
&mckernel_procfs_file_operations, ret);
#endif
}
if (ret == NULL) {
if (pde == NULL) {
kprintf("ERROR: cannot create a PROCFS entry for %s.\n", p);
kfree(e);
kfree(ret);
return NULL;
}
ret->data = e;
e->osnum = osnum;
e->entry = ret;
e->parent = parent;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
pde->data = ret;
#endif
ret->osnum = osnum;
ret->entry = pde;
ret->parent = parent;
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
list_add(&(e->list), &procfs_file_list);
list_add(&(ret->list), &procfs_file_list);
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
dprintk("get_procfs_entry: %s done\n", p);
dprintk("get_procfs_list_entry: %s done\n", p);
return ret;
}
@@ -161,7 +200,6 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg)
{
struct proc_dir_entry *entry;
struct procfs_list_entry *e;
ihk_device_t dev = ihk_os_to_dev(__os);
unsigned long parg;
@@ -183,18 +221,16 @@ void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg)
printk("ERROR: procfs_creat: file name not properly terminated.\n");
goto quit;
}
entry = get_procfs_entry(name, osnum, mode);
if (entry == NULL) {
e = get_procfs_list_entry(name, osnum, mode);
if (e == NULL) {
printk("ERROR: could not create a procfs entry for %s.\n", name);
goto quit;
}
e = entry->data;
e->os = __os;
e->cpu = ref;
e->pid = pid;
entry->read_proc = mckernel_procfs_read;
quit:
f->status = 1; /* Now the peer can free the data. */
ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file));
@@ -216,7 +252,7 @@ void procfs_delete(void *__os, int osnum, unsigned long arg)
unsigned long parg;
struct procfs_file *f;
struct procfs_list_entry *e;
struct proc_dir_entry *parent = NULL;
struct procfs_list_entry *parent = NULL;
char name[PROCFS_NAME_MAX];
char *r;
unsigned long irqflags;
@@ -230,8 +266,10 @@ void procfs_delete(void *__os, int osnum, unsigned long arg)
if ((strncmp(e->fname, f->fname, PROCFS_NAME_MAX) == 0) &&
(e->osnum == osnum)) {
list_del(&e->list);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
e->entry->read_proc = NULL;
e->entry->data = NULL;
#endif
parent = e->parent;
kfree(e);
r = strrchr(f->fname, '/');
@@ -241,7 +279,7 @@ void procfs_delete(void *__os, int osnum, unsigned long arg)
strncpy(name, r + 1, PROCFS_NAME_MAX);
}
dprintk("found and remove %s from the list.\n", name);
remove_proc_entry(name, parent);
remove_proc_entry(name, parent->entry);
break;
}
}
@@ -271,27 +309,50 @@ void procfs_answer(unsigned int arg, int err)
* This function conforms to the 2) way of fs/proc/generic.c
* from linux-2.6.39.4.
*/
int mckernel_procfs_read(char *buffer, char **start, off_t offset,
int count, int *peof, void *dat)
static ssize_t
mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes,
loff_t *ppos)
{
struct procfs_list_entry *e = dat;
struct inode * inode = file->f_path.dentry->d_inode;
char *kern_buffer;
int order = 0;
volatile struct procfs_read *r;
struct ikc_scd_packet isp;
int ret, retrycount = 0;
unsigned long pbuf;
unsigned long count = nbytes;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
struct proc_dir_entry *dp = PDE(inode);
struct procfs_list_entry *e = dp->data;
#else
struct procfs_list_entry *e = PDE_DATA(inode);
#endif
loff_t offset = *ppos;
dprintk("mckernel_procfs_read: invoked for %s\n", e->fname);
if (count <= 0 || dat == NULL || offset < 0) {
dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n",
e->fname, offset, count);
if (count <= 0 || offset < 0) {
return 0;
}
pbuf = virt_to_phys(buffer);
if (pbuf / PAGE_SIZE != (pbuf + count - 1) / PAGE_SIZE) {
/* Truncate the read count upto the nearest page boundary */
count = ((pbuf + count - 1) / PAGE_SIZE) * PAGE_SIZE - pbuf;
while ((1 << order) < count) ++order;
if (order > 12) {
order -= 12;
}
else {
order = 1;
}
/* NOTE: we need physically contigous memory to pass through IKC */
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
if (!kern_buffer) {
printk("mckernel_procfs_read(): ERROR: allocating kernel buffer\n");
return -ENOMEM;
}
pbuf = virt_to_phys(kern_buffer);
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
if (r == NULL) {
return -ENOMEM;
@@ -309,18 +370,23 @@ retry:
isp.msg = SCD_MSG_PROCFS_REQUEST;
isp.ref = e->cpu;
isp.arg = virt_to_phys(r);
ret = mcctrl_ikc_send(e->os, e->cpu, &isp);
if (ret < 0) {
goto out; /* error */
}
/* Wait for a reply. */
ret = -EIO; /* default exit code */
dprintk("now wait for a relpy\n");
/* Wait for the status field of the procfs_read structure set ready. */
if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) {
kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n");
goto out;
}
/* Wake up and check the result. */
dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof);
if ((r->ret == 0) && (r->eof != 1)) {
@@ -335,13 +401,20 @@ retry:
dprintk("retry\n");
goto retry;
}
if (r->eof == 1) {
dprintk("reached end of file.\n");
*peof = 1;
if (r->ret > 0) {
if (copy_to_user(buf, kern_buffer, r->ret)) {
kprintf("ERROR: mckernel_procfs_read: copy_to_user failed.\n");
ret = -EFAULT;
goto out;
}
*ppos += r->ret;
}
*start = buffer;
ret = r->ret;
out:
free_pages((uintptr_t)kern_buffer, order);
kfree((void *)r);
return ret;
@@ -367,7 +440,7 @@ void procfs_exit(int osnum) {
int error;
mm_segment_t old_fs = get_fs();
struct kstat stat;
struct proc_dir_entry *parent;
struct procfs_list_entry *parent;
struct procfs_list_entry *e, *temp = NULL;
unsigned long irqflags;
@@ -378,8 +451,10 @@ void procfs_exit(int osnum) {
if (e->osnum == osnum) {
dprintk("found entry for %s.\n", e->fname);
list_del(&e->list);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
e->entry->read_proc = NULL;
e->entry->data = NULL;
#endif
parent = e->parent;
r = strrchr(e->fname, '/');
if (r == NULL) {
@@ -387,7 +462,9 @@ void procfs_exit(int osnum) {
} else {
r += 1;
}
remove_proc_entry(r, parent);
if (parent) {
remove_proc_entry(r, parent->entry);
}
dprintk("free the entry\n");
kfree(e);
}

View File

@@ -13,6 +13,8 @@
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2013 The University of Tokyo
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 RIKEN AICS
*/
/*
* HISTORY:
@@ -43,6 +45,7 @@
#include <asm/delay.h>
#include <asm/io.h>
#include "mcctrl.h"
#include <linux/version.h>
#define ALIGN_WAIT_BUF(z) (((z + 63) >> 6) << 6)
@@ -319,6 +322,109 @@ out:
return error;
}
#define RUS_PAGE_HASH_SHIFT 8
#define RUS_PAGE_HASH_SIZE (1UL << RUS_PAGE_HASH_SHIFT)
#define RUS_PAGE_HASH_MASK (RUS_PAGE_HASH_SIZE - 1)
struct list_head rus_page_hash[RUS_PAGE_HASH_SIZE];
spinlock_t rus_page_hash_lock;
struct rus_page {
struct list_head hash;
struct page *page;
int refcount;
int put_page;
};
void rus_page_hash_init(void)
{
int i;
spin_lock_init(&rus_page_hash_lock);
for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) {
INIT_LIST_HEAD(&rus_page_hash[i]);
}
}
/* rus_page_hash_lock must be held */
struct rus_page *_rus_page_hash_lookup(struct page *page)
{
struct rus_page *rp = NULL;
struct rus_page *rp_iter;
list_for_each_entry(rp_iter,
&rus_page_hash[page_to_pfn(page) & RUS_PAGE_HASH_MASK], hash) {
if (rp_iter->page != page)
continue;
rp = rp_iter;
break;
}
return rp;
}
static int rus_page_hash_insert(struct page *page)
{
int ret = 0;
struct rus_page *rp;
spin_lock(&rus_page_hash_lock);
rp = _rus_page_hash_lookup(page);
if (!rp) {
rp = kmalloc(sizeof(*rp), GFP_ATOMIC);
if (!rp) {
printk("rus_page_add_hash(): error allocating rp\n");
ret = -ENOMEM;
goto out;
}
rp->page = page;
rp->put_page = 0;
get_page(page);
rp->refcount = 0; /* Will be increased below */
list_add_tail(&rp->hash,
&rus_page_hash[page_to_pfn(page) & RUS_PAGE_HASH_MASK]);
}
++rp->refcount;
out:
spin_unlock(&rus_page_hash_lock);
return ret;
}
void rus_page_hash_put_pages(void)
{
int i;
struct rus_page *rp_iter;
struct rus_page *rp_iter_next;
spin_lock(&rus_page_hash_lock);
for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) {
list_for_each_entry_safe(rp_iter, rp_iter_next,
&rus_page_hash[i], hash) {
list_del(&rp_iter->hash);
put_page(rp_iter->page);
kfree(rp_iter);
}
}
spin_unlock(&rus_page_hash_lock);
}
/*
* By remap_pfn_range(), VM_PFN_AT_MMAP may be raised.
* VM_PFN_AT_MMAP cause the following problems.
@@ -329,6 +435,7 @@ out:
* These problems may be solved in linux-3.7.
* It uses vm_insert_pfn() until it is fixed.
*/
#define USE_VM_INSERT_PFN 1
static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -409,15 +516,11 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (pfn_valid(pfn+pix)) {
page = pfn_to_page(pfn+pix);
if (!page_count(page)) {
get_page(page);
/*
* TODO:
* The pages which get_page() has been called with
* should be recorded. Because these pages have to
* be passed to put_page() before they are freed.
*/
if ((error = rus_page_hash_insert(page)) < 0) {
printk("rus_vm_fault: error hashing page??\n");
}
error = vm_insert_page(vma, rva+(pix*PAGE_SIZE), page);
if (error) {
printk("vm_insert_page: %d\n", error);
@@ -448,7 +551,11 @@ static struct vm_operations_struct rus_vmops = {
static int rus_mmap(struct file *file, struct vm_area_struct *vma)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND | VM_MIXEDMAP;
#else
vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND | VM_MIXEDMAP;
#endif
vma->vm_ops = &rus_vmops;
return 0;
}
@@ -491,9 +598,18 @@ int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, un
if (vma) {
end = (vma->vm_start - GAP_FOR_MCEXEC) & ~(GAP_FOR_MCEXEC - 1);
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
start = do_mmap_pgoff(file, 0, end,
PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0);
#endif
up_write(&current->mm->mmap_sem);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
start = vm_mmap(file, 0, end,
PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0);
#endif
revert_creds(original);
put_cred(promoted);
@@ -782,19 +898,19 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
error = vfs_fstat(fd, &st);
if (error) {
printk("pager_req_create(%d,%lx):vfs_stat failed. %d\n", fd, (long)result_pa, error);
dprintk("pager_req_create(%d,%lx):vfs_stat failed. %d\n", fd, (long)result_pa, error);
goto out;
}
if (!S_ISREG(st.mode)) {
error = -ESRCH;
printk("pager_req_create(%d,%lx):not VREG. %x\n", fd, (long)result_pa, st.mode);
dprintk("pager_req_create(%d,%lx):not VREG. %x\n", fd, (long)result_pa, st.mode);
goto out;
}
file = fget(fd);
if (!file) {
error = -EBADF;
printk("pager_req_create(%d,%lx):file not found. %d\n", fd, (long)result_pa, error);
dprintk("pager_req_create(%d,%lx):file not found. %d\n", fd, (long)result_pa, error);
goto out;
}
@@ -817,7 +933,7 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
}
if (!(maxprot & PROT_READ)) {
error = -EACCES;
printk("pager_req_create(%d,%lx):cannot read file. %d\n", fd, (long)result_pa, error);
dprintk("pager_req_create(%d,%lx):cannot read file. %d\n", fd, (long)result_pa, error);
goto out;
}
@@ -1100,7 +1216,7 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r
struct pager_map_result *resp;
uintptr_t phys;
printk("pager_req_map(%p,%d,%lx,%lx,%lx)\n", os, fd, len, off, result_rpa);
dprintk("pager_req_map(%p,%d,%lx,%lx,%lx)\n", os, fd, len, off, result_rpa);
pager = kzalloc(sizeof(*pager), GFP_KERNEL);
if (!pager) {
error = -ENOMEM;
@@ -1128,8 +1244,17 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r
down_write(&current->mm->mmap_sem);
#define ANY_WHERE 0
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
va = do_mmap_pgoff(file, ANY_WHERE, len, maxprot, MAP_SHARED, pgoff);
#endif
up_write(&current->mm->mmap_sem);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
va = vm_mmap(file, ANY_WHERE, len, maxprot, MAP_SHARED, pgoff << PAGE_SHIFT);
#endif
if (IS_ERR_VALUE(va)) {
printk("pager_req_map(%p,%d,%lx,%lx,%lx):do_mmap_pgoff failed. %d\n", os, fd, len, off, result_rpa, (int)va);
error = va;
@@ -1140,6 +1265,9 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r
pager->map_uaddr = va;
pager->map_len = len;
pager->map_off = off;
dprintk("pager_req_map(%s): 0x%lx - 0x%lx (len: %lu)\n",
file->f_dentry->d_name.name, va, va + len, len);
phys = ihk_device_map_memory(dev, result_rpa, sizeof(*resp));
resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0);
@@ -1158,10 +1286,11 @@ out:
if (pager) {
kfree(pager);
}
printk("pager_req_map(%p,%d,%lx,%lx,%lx): %d\n", os, fd, len, off, result_rpa, error);
dprintk("pager_req_map(%p,%d,%lx,%lx,%lx): %d\n", os, fd, len, off, result_rpa, error);
return error;
}
static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppfn_rpa)
{
const ihk_device_t dev = ihk_os_to_dev(os);
@@ -1176,7 +1305,7 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
uintptr_t phys;
uintptr_t *ppfn;
printk("pager_req_pfn(%p,%lx,%lx)\n", os, handle, off);
dprintk("pager_req_pfn(%p,%lx,%lx)\n", os, handle, off);
if ((off < pager->map_off) || ((pager->map_off+pager->map_len) < (off + PAGE_SIZE))) {
error = -ERANGE;
@@ -1201,6 +1330,12 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
pfn = (uintptr_t)pte_pfn(*pte) << PAGE_SHIFT;
#define PFN_PRESENT ((uintptr_t)1 << 0)
pfn |= PFN_VALID | PFN_PRESENT;
/* Check if mapping is write-combined */
if ((pte_flags(*pte) & _PAGE_PWT) &&
!(pte_flags(*pte) & _PAGE_PCD)) {
pfn |= _PAGE_PWT;
}
}
pte_unmap(pte);
}
@@ -1216,7 +1351,7 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
error = 0;
out:
printk("pager_req_pfn(%p,%lx,%lx): %d %lx\n", os, handle, off, error, pfn);
dprintk("pager_req_pfn(%p,%lx,%lx): %d %lx\n", os, handle, off, error, pfn);
return error;
}
@@ -1225,11 +1360,15 @@ static int pager_req_unmap(ihk_os_t os, uintptr_t handle)
struct pager * const pager = (void *)handle;
int error;
printk("pager_req_unmap(%p,%lx)\n", os, handle);
dprintk("pager_req_unmap(%p,%lx)\n", os, handle);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
down_write(&current->mm->mmap_sem);
error = do_munmap(current->mm, pager->map_uaddr, pager->map_len);
up_write(&current->mm->mmap_sem);
#else
error = vm_munmap(pager->map_uaddr, pager->map_len);
#endif
if (error) {
printk("pager_req_unmap(%p,%lx):do_munmap failed. %d\n", os, handle, error);
@@ -1237,7 +1376,7 @@ static int pager_req_unmap(ihk_os_t os, uintptr_t handle)
}
kfree(pager);
printk("pager_req_unmap(%p,%lx): %d\n", os, handle, error);
dprintk("pager_req_unmap(%p,%lx): %d\n", os, handle, error);
return error;
}
@@ -1325,9 +1464,18 @@ static int remap_user_space(uintptr_t rva, size_t len, int prot)
start = rva;
pgoff = vma->vm_pgoff + ((rva - vma->vm_start) >> PAGE_SHIFT);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
map = do_mmap_pgoff(file, start, len,
prot, MAP_FIXED|MAP_SHARED, pgoff);
#endif
up_write(&mm->mmap_sem);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
map = vm_mmap(file, start, len,
prot, MAP_FIXED|MAP_SHARED, pgoff << PAGE_SHIFT);
#endif
out:
dprintk("remap_user_space(%lx,%lx,%x): %lx (%ld)\n",
rva, len, prot, (long)map, (long)map);
@@ -1469,6 +1617,8 @@ fail:
return error;
}
#define SCHED_CHECK_SAME_OWNER 0x01
#define SCHED_CHECK_ROOT 0x02
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc)
{
@@ -1556,6 +1706,71 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall
error = writecore(os, sc->args[1], sc->args[0]);
ret = 0;
break;
case __NR_sched_setparam: {
switch (sc->args[0]) {
case SCHED_CHECK_SAME_OWNER: {
const struct cred *cred = current_cred();
const struct cred *pcred;
bool match;
struct task_struct *p;
int pid = sc->args[1];
rcu_read_lock();
p = pid_task(find_get_pid(pid), PIDTYPE_PID);
if (!p) {
rcu_read_unlock();
ret = -ESRCH;
goto sched_setparam_out;
}
rcu_read_unlock();
rcu_read_lock();
pcred = __task_cred(p);
#if LINUX_VERSION_CODE > KERNEL_VERSION(3,4,0)
match = (uid_eq(cred->euid, pcred->euid) ||
uid_eq(cred->euid, pcred->uid));
#else
match = ((cred->euid == pcred->euid) ||
(cred->euid == pcred->uid));
#endif
rcu_read_unlock();
if (match) {
ret = 0;
}
else {
ret = -EPERM;
}
break;
}
case SCHED_CHECK_ROOT: {
const struct cred *cred = current_cred();
bool match;
#if LINUX_VERSION_CODE > KERNEL_VERSION(3,4,0)
match = uid_eq(cred->euid, GLOBAL_ROOT_UID);
#else
match = (cred->euid == 0);
#endif
if (match) {
ret = 0;
}
else {
ret = -EPERM;
}
break;
}
}
sched_setparam_out:
break;
}
default:
error = -ENOSYS;

View File

@@ -40,7 +40,6 @@
#include <ctype.h>
#include <sys/mman.h>
#include <asm/unistd.h>
#include "../include/uprotocol.h"
#include <sched.h>
#include <termios.h>
@@ -49,6 +48,7 @@
#include <sys/stat.h>
#include <sys/resource.h>
#include <sys/utsname.h>
#include <sys/fsuid.h>
#include <time.h>
#include <sys/time.h>
#include <signal.h>
@@ -56,7 +56,10 @@
#include <dirent.h>
#include <sys/syscall.h>
#include <pthread.h>
#include <semaphore.h>
#include <signal.h>
#include <sys/signalfd.h>
#include "../include/uprotocol.h"
//#define DEBUG
@@ -97,6 +100,13 @@ typedef unsigned char cc_t;
typedef unsigned int speed_t;
typedef unsigned int tcflag_t;
struct sigfd {
struct sigfd *next;
int sigpipe[2];
};
struct sigfd *sigfdtop;
#ifdef NCCS
#undef NCCS
#endif
@@ -111,14 +121,29 @@ struct kernel_termios {
cc_t c_cc[NCCS]; /* control characters */
};
int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid);
int main_loop(int fd, int cpu, pthread_mutex_t *lock);
static int mcosid;
static int fd;
static char *exec_path = NULL;
static char *altroot;
static const char rlimit_stack_envname[] = "MCKERNEL_RLIMIT_STACK";
static int ischild;
struct fork_sync {
pid_t pid;
int status;
sem_t sem;
};
struct fork_sync_container {
struct fork_sync_container *next;
struct fork_sync *fs;
};
struct fork_sync_container *fork_sync_top;
pthread_mutex_t fork_sync_mutex = PTHREAD_MUTEX_INITIALIZER;
pid_t gettid(void)
{
return syscall(SYS_gettid);
@@ -218,7 +243,7 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp)
desc->pid = getpid();
desc->pgid = getpgid(0);
desc->entry = hdr.e_entry;
ioctl(fd, MCEXEC_UP_GET_CREDV, desc->cred);
desc->at_phdr = load_addr + hdr.e_phoff;
desc->at_phent = sizeof(phdr);
desc->at_phnum = hdr.e_phnum;
@@ -546,11 +571,32 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p,
/* Drop old name if exists */
if (exec_path) {
free(exec_path);
exec_path = NULL;
}
exec_path = strdup(filename);
if (!exec_path) {
fprintf(stderr, "WARNING: strdup(filename) failed\n");
if (!strncmp("/", filename, 1)) {
exec_path = strdup(filename);
if (!exec_path) {
fprintf(stderr, "WARNING: strdup(filename) failed\n");
return ENOMEM;
}
}
else {
char *cwd = getcwd(NULL, 0);
if (!cwd) {
fprintf(stderr, "Error: getting current working dir pathname\n");
return ENOMEM;
}
exec_path = malloc(strlen(cwd) + strlen(filename) + 2);
if (!exec_path) {
fprintf(stderr, "Error: allocating exec_path\n");
return ENOMEM;
}
sprintf(exec_path, "%s/%s", cwd, filename);
free(cwd);
}
desc = load_elf(fp, &interp_path);
@@ -764,7 +810,6 @@ struct thread_data_s {
pthread_t thread_id;
int fd;
int cpu;
int mcosid;
int ret;
pid_t tid;
int terminate;
@@ -785,11 +830,13 @@ static void *main_loop_thread_func(void *arg)
td->tid = gettid();
td->remote_tid = (int)td->tid;
pthread_barrier_wait(&init_ready);
td->ret = main_loop(td->fd, td->cpu, td->lock, td->mcosid);
td->ret = main_loop(td->fd, td->cpu, td->lock);
return NULL;
}
#define LOCALSIG SIGURG
void
sendsig(int sig, siginfo_t *siginfo, void *context)
{
@@ -801,7 +848,10 @@ sendsig(int sig, siginfo_t *siginfo, void *context)
struct signal_desc sigdesc;
if(siginfo->si_pid == pid &&
siginfo->si_signo == SIGINT)
siginfo->si_signo == LOCALSIG)
return;
if(siginfo->si_signo == SIGCHLD)
return;
for(i = 0; i < ncpu; i++){
@@ -839,6 +889,94 @@ sendsig(int sig, siginfo_t *siginfo, void *context)
}
}
long
act_signalfd4(struct syscall_wait_desc *w)
{
struct sigfd *sfd;
struct sigfd *sb;
int mode = w->sr.args[0];
int flags;
int tmp;
int rc = 0;
struct signalfd_siginfo *info;
switch(mode){
case 0: /* new signalfd */
sfd = malloc(sizeof(struct sigfd));
tmp = w->sr.args[1];
flags = 0;
if(tmp & SFD_NONBLOCK)
flags |= O_NONBLOCK;
if(tmp & SFD_CLOEXEC)
flags |= O_CLOEXEC;
pipe2(sfd->sigpipe, flags);
sfd->next = sigfdtop;
sigfdtop = sfd;
rc = sfd->sigpipe[0];
break;
case 1: /* close signalfd */
tmp = w->sr.args[1];
for(sfd = sigfdtop, sb = NULL; sfd; sb = sfd, sfd = sfd->next)
if(sfd->sigpipe[0] == tmp)
break;
if(!sfd)
rc = -EBADF;
else{
if(sb)
sb->next = sfd->next;
else
sigfdtop = sfd->next;
close(sfd->sigpipe[0]);
close(sfd->sigpipe[1]);
free(sfd);
}
break;
case 2: /* push signal */
tmp = w->sr.args[1];
for(sfd = sigfdtop; sfd; sfd = sfd->next)
if(sfd->sigpipe[0] == tmp)
break;
if(!sfd)
rc = -EBADF;
else{
info = (struct signalfd_siginfo *)w->sr.args[2];
write(sfd->sigpipe[1], info, sizeof(struct signalfd_siginfo));
}
break;
}
return rc;
}
void
act_sigaction(struct syscall_wait_desc *w)
{
struct sigaction act;
int sig;
sig = w->sr.args[0];
if (sig == SIGCHLD || sig == LOCALSIG)
return;
memset(&act, '\0', sizeof act);
if (w->sr.args[1] == (unsigned long)SIG_IGN)
act.sa_handler = SIG_IGN;
else{
act.sa_sigaction = sendsig;
act.sa_flags = SA_SIGINFO;
}
sigaction(sig, &act, NULL);
}
void
act_sigprocmask(struct syscall_wait_desc *w)
{
sigset_t set;
sigemptyset(&set);
memcpy(&set, &w->sr.args[0], sizeof(unsigned long));
sigdelset(&set, LOCALSIG);
sigprocmask(SIG_SETMASK, &set, NULL);
}
static int reduce_stack(struct rlimit *orig_rlim, char *argv[])
{
int n;
@@ -891,8 +1029,7 @@ void init_sigaction(void)
master_tid = gettid();
for (i = 1; i <= 64; i++) {
if (i != SIGCHLD && i != SIGCONT && i != SIGSTOP &&
i != SIGTSTP && i != SIGTTIN && i != SIGTTOU) {
if (i != SIGKILL && i != SIGSTOP && i != SIGCHLD) {
struct sigaction act;
sigaction(i, NULL, &act);
@@ -904,7 +1041,7 @@ void init_sigaction(void)
}
}
void init_worker_threads(int fd, int mcosid)
void init_worker_threads(int fd)
{
int i;
@@ -916,7 +1053,6 @@ void init_worker_threads(int fd, int mcosid)
thread_data[i].fd = fd;
thread_data[i].cpu = i;
thread_data[i].mcosid = mcosid;
thread_data[i].lock = &lock;
thread_data[i].init_ready = &init_ready;
thread_data[i].terminate = 0;
@@ -930,7 +1066,75 @@ void init_worker_threads(int fd, int mcosid)
}
pthread_barrier_wait(&init_ready);
}
}
#define MCK_RLIMIT_AS 0
#define MCK_RLIMIT_CORE 1
#define MCK_RLIMIT_CPU 2
#define MCK_RLIMIT_DATA 3
#define MCK_RLIMIT_FSIZE 4
#define MCK_RLIMIT_LOCKS 5
#define MCK_RLIMIT_MEMLOCK 6
#define MCK_RLIMIT_MSGQUEUE 7
#define MCK_RLIMIT_NICE 8
#define MCK_RLIMIT_NOFILE 9
#define MCK_RLIMIT_NPROC 10
#define MCK_RLIMIT_RSS 11
#define MCK_RLIMIT_RTPRIO 12
#define MCK_RLIMIT_RTTIME 13
#define MCK_RLIMIT_SIGPENDING 14
#define MCK_RLIMIT_STACK 15
static int rlimits[] = {
#ifdef RLIMIT_AS
RLIMIT_AS, MCK_RLIMIT_AS,
#endif
#ifdef RLIMIT_CORE
RLIMIT_CORE, MCK_RLIMIT_CORE,
#endif
#ifdef RLIMIT_CPU
RLIMIT_CPU, MCK_RLIMIT_CPU,
#endif
#ifdef RLIMIT_DATA
RLIMIT_DATA, MCK_RLIMIT_DATA,
#endif
#ifdef RLIMIT_FSIZE
RLIMIT_FSIZE, MCK_RLIMIT_FSIZE,
#endif
#ifdef RLIMIT_LOCKS
RLIMIT_LOCKS, MCK_RLIMIT_LOCKS,
#endif
#ifdef RLIMIT_MEMLOCK
RLIMIT_MEMLOCK, MCK_RLIMIT_MEMLOCK,
#endif
#ifdef RLIMIT_MSGQUEUE
RLIMIT_MSGQUEUE,MCK_RLIMIT_MSGQUEUE,
#endif
#ifdef RLIMIT_NICE
RLIMIT_NICE, MCK_RLIMIT_NICE,
#endif
#ifdef RLIMIT_NOFILE
RLIMIT_NOFILE, MCK_RLIMIT_NOFILE,
#endif
#ifdef RLIMIT_NPROC
RLIMIT_NPROC, MCK_RLIMIT_NPROC,
#endif
#ifdef RLIMIT_RSS
RLIMIT_RSS, MCK_RLIMIT_RSS,
#endif
#ifdef RLIMIT_RTPRIO
RLIMIT_RTPRIO, MCK_RLIMIT_RTPRIO,
#endif
#ifdef RLIMIT_RTTIME
RLIMIT_RTTIME, MCK_RLIMIT_RTTIME,
#endif
#ifdef RLIMIT_SIGPENDING
RLIMIT_SIGPENDING,MCK_RLIMIT_SIGPENDING,
#endif
#ifdef RLIMIT_STACK
RLIMIT_STACK, MCK_RLIMIT_STACK,
#endif
};
char dev[64];
@@ -952,7 +1156,6 @@ int main(int argc, char **argv)
unsigned long lcur;
unsigned long lmax;
int target_core = 0;
int mcosid = 0;
int opt;
char path[1024];
char *shell = NULL;
@@ -1056,7 +1259,9 @@ int main(int argc, char **argv)
if (shell) {
argv[optind] = path;
}
for(i = 0; i < sizeof(rlimits) / sizeof(int); i += 2)
getrlimit(rlimits[i], &desc->rlimit[rlimits[i + 1]]);
desc->envs_len = envs_len;
desc->envs = envs;
//print_flat(envs);
@@ -1091,8 +1296,8 @@ int main(int argc, char **argv)
rlim_stack.rlim_cur = lcur;
rlim_stack.rlim_max = lmax;
}
desc->rlimit_stack_cur = rlim_stack.rlim_cur;
desc->rlimit_stack_max = rlim_stack.rlim_max;
desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur;
desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max;
ncpu = ioctl(fd, MCEXEC_UP_GET_CPU, 0);
if(ncpu == -1){
@@ -1173,7 +1378,7 @@ int main(int argc, char **argv)
init_sigaction();
init_worker_threads(fd, mcosid);
init_worker_threads(fd);
if (ioctl(fd, MCEXEC_UP_START_IMAGE, (unsigned long)desc) != 0) {
perror("exec");
@@ -1244,13 +1449,13 @@ static void
kill_thread(unsigned long cpu)
{
if(cpu >= 0 && cpu < ncpu){
pthread_kill(thread_data[cpu].thread_id, SIGINT);
pthread_kill(thread_data[cpu].thread_id, LOCALSIG);
}
else{
int i;
for (i = 0; i < ncpu; ++i) {
pthread_kill(thread_data[i].thread_id, SIGINT);
pthread_kill(thread_data[i].thread_id, LOCALSIG);
}
}
}
@@ -1351,7 +1556,32 @@ int close_cloexec_fds(int mcos_fd)
return 0;
}
int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
char *
chgpath(char *in, char *buf)
{
char *fn = in;
struct stat sb;
if (!strncmp(fn, "/proc/self/", 11)){
sprintf(buf, "/proc/mcos%d/%d/%s", mcosid, getpid(), fn + 11);
fn = buf;
}
else if(!strncmp(fn, "/proc/", 6)){
sprintf(buf, "/proc/mcos%d/%s", mcosid, fn + 6);
fn = buf;
}
else if(!strcmp(fn, "/sys/devices/system/cpu/online")){
fn = "/admin/fs/attached/files/sys/devices/system/cpu/online";
}
else
return in;
if(stat(fn, &sb) == -1)
return in;
return fn;
}
int main_loop(int fd, int cpu, pthread_mutex_t *lock)
{
struct syscall_wait_desc w;
long ret;
@@ -1389,14 +1619,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
}
__dprintf("open: %s\n", pathbuf);
fn = pathbuf;
if(!strncmp(fn, "/proc/", 6)){
sprintf(tmpbuf, "/proc/mcos%d/%s", mcosid, fn + 6);
fn = tmpbuf;
}
else if(!strcmp(fn, "/sys/devices/system/cpu/online")){
fn = "/admin/fs/attached/files/sys/devices/system/cpu/online";
}
fn = chgpath(pathbuf, tmpbuf);
ret = open(fn, w.sr.args[1], w.sr.args[2]);
SET_ERR(ret);
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
@@ -1505,112 +1729,156 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
}
case __NR_fork: {
int child;
int sync_pipe_fd[2];
char sync_msg;
struct fork_sync *fs;
struct fork_sync_container *fsc;
struct fork_sync_container *fp;
struct fork_sync_container *fb;
int rc = -1;
pid_t pid;
if (pipe(sync_pipe_fd) != 0) {
fprintf(stderr, "fork(): error creating sync pipe\n");
do_syscall_return(fd, cpu, -1, 0, 0, 0, 0);
fsc = malloc(sizeof(struct fork_sync_container));
memset(fsc, '\0', sizeof(struct fork_sync_container));
pthread_mutex_lock(&fork_sync_mutex);
fsc->next = fork_sync_top;
fork_sync_top = fsc;
pthread_mutex_unlock(&fork_sync_mutex);
fsc->fs = fs = mmap(NULL, sizeof(struct fork_sync),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if(fs == (void *)-1){
goto fork_err;
}
memset(fs, '\0', sizeof(struct fork_sync));
sem_init(&fs->sem, 1, 0);
pid = fork();
switch (pid) {
/* Error */
case -1:
fprintf(stderr, "fork(): error forking child process\n");
rc = -errno;
break;
/* Child process */
case 0: {
int i;
int ret = 1;
struct newprocess_desc npdesc;
ischild = 1;
/* Reopen device fd */
close(fd);
fd = open(dev, O_RDWR);
if (fd < 0) {
fs->status = -errno;
fprintf(stderr, "ERROR: opening %s\n", dev);
goto fork_child_sync_pipe;
}
/* Reinit signals and syscall threads */
init_sigaction();
init_worker_threads(fd);
__dprintf("pid(%d): signals and syscall threads OK\n",
getpid());
/* Hold executable also in the child process */
if ((ret = ioctl(fd, MCEXEC_UP_OPEN_EXEC, exec_path))
!= 0) {
fprintf(stderr, "Error: open_exec() fails for %s: %d (fd: %d)\n",
exec_path, ret, fd);
fs->status = -errno;
goto fork_child_sync_pipe;
}
fork_child_sync_pipe:
sem_post(&fs->sem);
if (fs->status)
exit(1);
for (fp = fork_sync_top; fp;) {
fb = fp->next;
if (fp->fs)
munmap(fp->fs, sizeof(struct fork_sync));
free(fp);
fp = fb;
}
fork_sync_top = NULL;
pthread_mutex_init(&fork_sync_mutex, NULL);
npdesc.pid = getpid();
ioctl(fd, MCEXEC_UP_NEW_PROCESS, &npdesc);
/* TODO: does the forked thread run in a pthread context? */
for (i = 0; i <= ncpu; ++i) {
pthread_join(thread_data[i].thread_id, NULL);
}
return ret;
}
/* Parent */
default:
fs->pid = pid;
while ((rc = sem_trywait(&fs->sem)) == -1 && (errno == EAGAIN || errno == EINTR)) {
int st;
int wrc;
wrc = waitpid(pid, &st, WNOHANG);
if(wrc == pid) {
fs->status = -ENOMEM;
break;
}
sched_yield();
}
if (fs->status != 0) {
fprintf(stderr, "fork(): error with child process after fork\n");
rc = fs->status;
break;
}
rc = pid;
break;
}
child = fork();
switch (child) {
/* Error */
case -1:
fprintf(stderr, "fork(): error forking child process\n");
close(sync_pipe_fd[0]);
close(sync_pipe_fd[1]);
do_syscall_return(fd, cpu, -1, 0, 0, 0, 0);
break;
/* Child process */
case 0: {
int i;
int ret = 1;
ischild = 1;
/* Reopen device fd */
close(fd);
fd = open(dev, O_RDWR);
if (fd < 0) {
/* TODO: tell parent something went wrong? */
fprintf(stderr, "ERROR: opening %s\n", dev);
/* Tell parent something went wrong */
sync_msg = 1;
goto fork_child_sync_pipe;
}
/* Reinit signals and syscall threads */
init_sigaction();
init_worker_threads(fd, mcosid);
__dprintf("pid(%d): signals and syscall threads OK\n",
getpid());
/* Hold executable also in the child process */
if ((ret = ioctl(fd, MCEXEC_UP_OPEN_EXEC, exec_path))
!= 0) {
fprintf(stderr, "Error: open_exec() fails for %s: %d (fd: %d)\n",
exec_path, ret, fd);
goto fork_child_sync_pipe;
}
/* Tell parent everything went OK */
sync_msg = 0;
fork_child_sync_pipe:
if (write(sync_pipe_fd[1], &sync_msg, 1) != 1) {
fprintf(stderr, "ERROR: writing sync pipe\n");
goto fork_child_out;
}
ret = 0;
fork_child_out:
close(sync_pipe_fd[0]);
close(sync_pipe_fd[1]);
/* TODO: does the forked thread run in a pthread context? */
for (i = 0; i <= ncpu; ++i) {
pthread_join(thread_data[i].thread_id, NULL);
}
return ret;
}
/* Parent */
default:
if (read(sync_pipe_fd[0], &sync_msg, 1) != 1) {
fprintf(stderr, "fork(): error reading sync message\n");
child = -1;
goto sync_out;
}
if (sync_msg != 0) {
fprintf(stderr, "fork(): error with child process after fork\n");
child = -1;
goto sync_out;
}
sync_out:
close(sync_pipe_fd[0]);
close(sync_pipe_fd[1]);
do_syscall_return(fd, cpu, child, 0, 0, 0, 0);
sem_destroy(&fs->sem);
munmap(fs, sizeof(struct fork_sync));
fork_err:
pthread_mutex_lock(&fork_sync_mutex);
for(fp = fork_sync_top, fb = NULL; fp; fb = fp, fp = fp->next)
if(fp == fsc)
break;
if(fp){
if(fb)
fb->next = fsc->next;
else
fork_sync_top = fsc->next;
}
pthread_mutex_unlock(&fork_sync_mutex);
do_syscall_return(fd, cpu, rc, 0, 0, 0, 0);
break;
}
case __NR_wait4: {
int status;
int ret;
pid_t pid = w.sr.args[0];
int options = w.sr.args[2];
siginfo_t info;
int opt;
if ((ret = waitpid(pid, &status, 0)) != pid) {
opt = WEXITED | (options & WNOWAIT);
memset(&info, '\0', sizeof info);
while((ret = waitid(P_PID, pid, &info, opt)) == -1 &&
errno == EINTR);
if(ret == 0){
ret = info.si_pid;
}
if(ret != pid) {
fprintf(stderr, "ERROR: waiting for %lu\n", w.sr.args[0]);
}
@@ -1747,6 +2015,32 @@ return_execve2:
break;
}
case __NR_signalfd4:
ret = act_signalfd4(&w);
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
case __NR_rt_sigaction:
act_sigaction(&w);
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
break;
case __NR_rt_sigprocmask:
act_sigprocmask(&w);
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
break;
case __NR_setfsuid:
if(w.sr.args[1] == 1){
ioctl(fd, MCEXEC_UP_GET_CRED, w.sr.args[0]);
ret = 0;
}
else{
ret = setfsuid(w.sr.args[0]);
}
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
case __NR_close:
if(w.sr.args[0] == fd)
ret = -EBADF;
@@ -1756,8 +2050,8 @@ return_execve2:
break;
default:
ret = do_generic_syscall(&w);
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
ret = do_generic_syscall(&w);
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
}

View File

@@ -6,7 +6,7 @@ OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
OBJS += zeroobj.o procfs.o devobj.o
DEPSRCS=$(wildcard $(SRC)/*.c)
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__ -g
LDFLAGS += -e arch_start
IHKOBJ = ihk/ihk.o

View File

@@ -30,6 +30,9 @@ SECTIONS
. = vsyscall_page + 0x400;
*(.vsyscall.time)
. = vsyscall_page + 0x800;
*(.vsyscall.getcpu)
. = ALIGN(4096);
} : data = 0xf4

View File

@@ -30,6 +30,9 @@ SECTIONS
. = vsyscall_page + 0x400;
*(.vsyscall.time)
. = vsyscall_page + 0x800;
*(.vsyscall.getcpu)
. = ALIGN(4096);
} : data = 0xf4

View File

@@ -30,6 +30,9 @@ SECTIONS
. = vsyscall_page + 0x400;
*(.vsyscall.time)
. = vsyscall_page + 0x800;
*(.vsyscall.getcpu)
. = ALIGN(4096);
} : data = 0xf4
@@ -39,8 +42,4 @@ SECTIONS
. = ALIGN(4096);
_end = .;
/DISCARD/ : {
*(.eh_frame)
*(.note.gnu.build-id)
}
}

View File

@@ -0,0 +1,2 @@
CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
LDFLAGS += -T $(SRC)/config/smp-x86.lds

49
kernel/config/smp-x86.lds Normal file
View File

@@ -0,0 +1,49 @@
PHDRS
{
text PT_LOAD FLAGS(5);
data PT_LOAD FLAGS(7);
}
SECTIONS
{
. = 0xffffffff80001000;
_head = .;
.text : {
*(.text);
} : text
. = ALIGN(4096);
.data : {
*(.data)
*(.data.*)
} :data
.rodata : {
*(.rodata .rodata.*)
} :data
.vsyscall : ALIGN(0x1000) {
vsyscall_page = .;
. = vsyscall_page + 0x000;
*(.vsyscall.gettimeofday)
. = vsyscall_page + 0x400;
*(.vsyscall.time)
. = vsyscall_page + 0x800;
*(.vsyscall.getcpu)
. = ALIGN(4096);
} : data = 0xf4
.bss : {
*(.bss .bss.*)
}
. = ALIGN(4096);
_end = .;
/DISCARD/ : {
*(.eh_frame)
*(.note.gnu.build-id)
}
}

View File

@@ -45,12 +45,12 @@ void kputs(char *buf)
#define KPRINTF_LOCAL_BUF_LEN 1024
int kprintf_lock()
unsigned long kprintf_lock(void)
{
return ihk_mc_spinlock_lock(&kmsg_lock);
}
void kprintf_unlock(int irqflags)
void kprintf_unlock(unsigned long irqflags)
{
ihk_mc_spinlock_unlock(&kmsg_lock, irqflags);
}

View File

@@ -3,7 +3,8 @@
* License details are found in the file LICENSE.
* \brief
* memory mapped device pager client
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 RIKEN AICS
*/
/*
* HISTORY:
@@ -32,9 +33,18 @@
#include <pager.h>
#include <string.h>
#include <syscall.h>
#include <process.h>
//#define DEBUG_PRINT_DEVOBJ
#ifdef DEBUG_PRINT_DEVOBJ
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
#define dkprintf(...)
#define ekprintf(...) kprintf(__VA_ARGS__)
struct devobj {
struct memobj memobj; /* must be first */
@@ -76,7 +86,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
struct devobj *obj = NULL;
const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE;
kprintf("devobj_create(%d,%lx,%lx)\n", fd, len, off);
dkprintf("devobj_create(%d,%lx,%lx)\n", fd, len, off);
#define MAX_PAGES_IN_DEVOBJ (PAGE_SIZE / sizeof(uintptr_t))
if (npages > MAX_PAGES_IN_DEVOBJ) {
error = -EFBIG;
@@ -111,8 +121,8 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
kprintf("devobj_create(%d,%lx,%lx):map failed. %d\n", fd, len, off, error);
goto out;
}
kprintf("devobj_create:handle: %lx\n", result.handle);
kprintf("devobj_create:maxprot: %x\n", result.maxprot);
dkprintf("devobj_create:handle: %lx\n", result.handle);
dkprintf("devobj_create:maxprot: %x\n", result.maxprot);
obj->memobj.ops = &devobj_ops;
obj->memobj.flags = MF_HAS_PAGER;
@@ -134,7 +144,7 @@ out:
}
kfree(obj);
}
kprintf("devobj_create(%d,%lx,%lx): %d %p %x%d\n", fd, len, off, error, *objp, *maxprotp);
dkprintf("devobj_create(%d,%lx,%lx): %d %p %x%d\n", fd, len, off, error, *objp, *maxprotp);
return error;
}
@@ -142,7 +152,7 @@ static void devobj_ref(struct memobj *memobj)
{
struct devobj *obj = to_devobj(memobj);
kprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
dkprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
memobj_lock(&obj->memobj);
++obj->ref;
memobj_unlock(&obj->memobj);
@@ -155,7 +165,7 @@ static void devobj_release(struct memobj *memobj)
struct devobj *free_obj = NULL;
uintptr_t handle;
kprintf("devobj_release(%p %lx)\n", obj, obj->handle);
dkprintf("devobj_release(%p %lx)\n", obj, obj->handle);
memobj_lock(&obj->memobj);
--obj->ref;
@@ -187,12 +197,12 @@ static void devobj_release(struct memobj *memobj)
kfree(free_obj);
}
kprintf("devobj_release(%p %lx):free %p\n",
dkprintf("devobj_release(%p %lx):free %p\n",
obj, handle, free_obj);
return;
}
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp)
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag)
{
const off_t pgoff = off >> PAGE_SHIFT;
struct devobj *obj = to_devobj(memobj);
@@ -202,7 +212,7 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
ihk_mc_user_context_t ctx;
int ix;
kprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align);
dkprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align);
if ((pgoff < obj->pfn_pgoff) || ((obj->pfn_pgoff + obj->npages) <= pgoff)) {
error = -EFBIG;
@@ -210,7 +220,7 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
goto out;
}
ix = pgoff - obj->pfn_pgoff;
kprintf("ix: %ld\n", ix);
dkprintf("ix: %ld\n", ix);
memobj_lock(&obj->memobj);
pfn = obj->pfn_table[ix];
@@ -230,12 +240,20 @@ kprintf("ix: %ld\n", ix);
if (pfn & PFN_PRESENT) {
/* convert remote physical into local physical */
kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT before %#lx\n", memobj, obj->handle, off, p2align, pfn);
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT before %#lx\n", memobj, obj->handle, off, p2align, pfn);
attr = pfn & ~PFN_PFN;
/* TODO: do an arch dependent PTE to mapping flag conversion
* instead of this inline check, also, we rely on having the
* same PAT config as Linux here.. */
if ((pfn & PFL1_PWT) && !(pfn & PFL1_PCD)) {
*flag |= VR_WRITE_COMBINED;
}
pfn = ihk_mc_map_memory(NULL, (pfn & PFN_PFN), PAGE_SIZE);
pfn &= PFN_PFN;
pfn |= attr;
kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
}
memobj_lock(&obj->memobj);
@@ -253,6 +271,6 @@ kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->
*physp = pfn & PFN_PFN;
out:
kprintf("devobj_get_page(%p %lx,%lx,%d): %d %lx\n", memobj, obj->handle, off, p2align, error, *physp);
dkprintf("devobj_get_page(%p %lx,%lx,%d): %d %lx\n", memobj, obj->handle, off, p2align, error, *physp);
return error;
}

View File

@@ -26,7 +26,7 @@
#include <string.h>
#include <syscall.h>
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
static ihk_spinlock_t fileobj_list_lock = SPIN_LOCK_UNLOCKED;
@@ -46,6 +46,7 @@ static memobj_ref_func_t fileobj_ref;
static memobj_get_page_func_t fileobj_get_page;
static memobj_copy_page_func_t fileobj_copy_page;
static memobj_flush_page_func_t fileobj_flush_page;
static memobj_invalidate_page_func_t fileobj_invalidate_page;
static struct memobj_ops fileobj_ops = {
.release = &fileobj_release,
@@ -53,6 +54,7 @@ static struct memobj_ops fileobj_ops = {
.get_page = &fileobj_get_page,
.copy_page = &fileobj_copy_page,
.flush_page = &fileobj_flush_page,
.invalidate_page = &fileobj_invalidate_page,
};
static struct fileobj *to_fileobj(struct memobj *memobj)
@@ -383,7 +385,7 @@ out:
return;
}
static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp)
static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *pflag)
{
struct process *proc = cpu_local_var(current);
struct fileobj *obj = to_fileobj(memobj);
@@ -577,3 +579,33 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
memobj_lock(&obj->memobj);
return 0;
}
static int fileobj_invalidate_page(struct memobj *memobj, uintptr_t phys,
size_t pgsize)
{
struct fileobj *obj = to_fileobj(memobj);
int error;
struct page *page;
dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx)\n",
memobj, phys, pgsize);
if (!(page = phys_to_page(phys))
|| !(page = page_list_lookup(obj, page->offset))) {
error = 0;
goto out;
}
if (ihk_atomic_read(&page->count) == 1) {
if (page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys),
pgsize/PAGE_SIZE);
}
}
error = 0;
out:
dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx):%d\n",
memobj, phys, pgsize, error);
return error;
}

View File

@@ -76,7 +76,7 @@
#ifdef DEBUG_PRINT_FUTEX
#define dkprintf kprintf
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
int futex_cmpxchg_enabled;

View File

@@ -28,13 +28,15 @@
#include <process.h>
#include <page.h>
#include <mman.h>
#include <init.h>
#include <kmalloc.h>
//#define DEBUG_PRINT_HOST
#ifdef DEBUG_PRINT_HOST
#define dkprintf kprintf
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
void check_mapping_for_proc(struct process *proc, unsigned long addr)
@@ -69,7 +71,8 @@ int prepare_process_ranges_args_envs(struct process *proc,
unsigned long args_envs_p, args_envs_rp;
unsigned long s, e, up;
char **argv;
int i, n, argc, envc, args_envs_npages;
char **a;
int i, n, argc, envc, args_envs_npages, l;
char **env;
int range_npages;
void *up_v;
@@ -173,23 +176,8 @@ int prepare_process_ranges_args_envs(struct process *proc,
pn->entry);
}
#if 1
/*
Fix for the problem where brk grows to hit .bss section
when using dynamically linked executables.
Test code resides in /home/takagi/project/mpich/src/brk_icc_mic.
This is because when using
ld.so (i.e. using shared objects), mckernel/kernel/host.c sets "brk" to
the end of .bss of ld.so (e.g. 0x21f000), and then ld.so places a
main-program after this (e.g. 0x400000), so "brk" will hit .bss
eventually.
*/
proc->vm->region.brk_start = proc->vm->region.brk_end =
(USER_END / 4) & LARGE_PAGE_MASK;
#else
proc->vm->region.brk_start = proc->vm->region.brk_end =
proc->vm->region.data_end;
#endif
/* Map, copy and update args and envs */
flags = VR_PROT_READ | VR_PROT_WRITE;
@@ -284,13 +272,21 @@ int prepare_process_ranges_args_envs(struct process *proc,
dkprintf("argc: %d\n", argc);
argv = (char **)(args_envs + (sizeof(int)));
while (*argv) {
char **_argv = argv;
dkprintf("%s\n", args_envs + (unsigned long)*argv);
*argv = (char *)addr + (unsigned long)*argv; // Process' address space!
argv = ++_argv;
if(proc->saved_cmdline){
kfree(proc->saved_cmdline);
proc->saved_cmdline_len = 0;
}
for(a = argv, l = 0; *a; a++)
l += strlen(args_envs + (unsigned long)*a) + 1;
proc->saved_cmdline = kmalloc(p->args_len, IHK_MC_AP_NOWAIT);
if(!proc->saved_cmdline)
goto err;
proc->saved_cmdline_len = l;
for(a = argv, l = 0; *a; a++){
strcpy(proc->saved_cmdline + l, args_envs + (unsigned long)*a);
l += strlen(args_envs + (unsigned long)*a) + 1;
*a = (char *)addr + (unsigned long)*a; // Process' address space!
}
argv = (char **)(args_envs + (sizeof(int)));
envc = *((int*)(args_envs + p->args_len));
dkprintf("envc: %d\n", envc);
@@ -308,7 +304,7 @@ int prepare_process_ranges_args_envs(struct process *proc,
p->rprocess = (unsigned long)proc;
p->rpgtable = virt_to_phys(proc->vm->page_table);
if (init_process_stack(proc, pn, argc, argv, envc, env) != 0) {
goto err;
}
@@ -363,12 +359,21 @@ static int process_msg_prepare_process(unsigned long rphys)
}
proc->ftn->pid = pn->pid;
proc->ftn->pgid = pn->pgid;
proc->ftn->ruid = pn->cred[0];
proc->ftn->euid = pn->cred[1];
proc->ftn->suid = pn->cred[2];
proc->ftn->fsuid = pn->cred[3];
proc->ftn->rgid = pn->cred[4];
proc->ftn->egid = pn->cred[5];
proc->ftn->sgid = pn->cred[6];
proc->ftn->fsgid = pn->cred[7];
proc->vm->region.user_start = pn->user_start;
proc->vm->region.user_end = pn->user_end;
proc->vm->region.map_start = (USER_END / 3) & LARGE_PAGE_MASK;
proc->vm->region.map_end = proc->vm->region.map_start;
proc->rlimit_stack.rlim_cur = pn->rlimit_stack_cur;
proc->rlimit_stack.rlim_max = pn->rlimit_stack_max;
memcpy(proc->rlimit, pn->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX);
/* TODO: Clear it at the proper timing */
cpu_local_var(scp).post_idx = 0;
@@ -379,7 +384,7 @@ static int process_msg_prepare_process(unsigned long rphys)
goto err;
}
dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid,
dkprintf("new process : %p [%d] / table : %p\n", proc, proc->ftn->pid,
proc->vm->page_table);
ihk_mc_free(pn);
@@ -387,6 +392,7 @@ static int process_msg_prepare_process(unsigned long rphys)
ihk_mc_unmap_virtual(p, npages, 1);
ihk_mc_unmap_memory(NULL, phys, sz);
flush_tlb();
return 0;
err:
ihk_mc_free(pn);
@@ -467,13 +473,15 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c,
ihk_ikc_send(c, packet, 0);
}
extern unsigned long do_kill(int, int, int, struct siginfo *);
extern unsigned long do_kill(int, int, int, struct siginfo *, int ptracecont);
extern void settid(struct process *proc, int mode, int newcpuid, int oldcpuid);
extern void process_procfs_request(unsigned long rarg);
extern int memcheckall();
extern int freecheck(int runcount);
extern int runcount;
extern void terminate_host(int pid);
extern void debug_log(long);
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
void *__packet, void *ihk_os)
@@ -490,6 +498,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
struct siginfo info;
} *sp, info;
unsigned long pp;
int cpuid;
switch (packet->msg) {
case SCD_MSG_INIT_CHANNEL_ACKED:
@@ -521,11 +530,17 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
return 0;
case SCD_MSG_SCHEDULE_PROCESS:
cpuid = obtain_clone_cpuid();
if(cpuid == -1){
kprintf("No CPU available\n");
return -1;
}
dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg);
proc = (struct process *)packet->arg;
settid(proc, 0, ihk_mc_get_processor_id(), -1);
runq_add_proc(proc, ihk_mc_get_processor_id());
settid(proc, 0, cpuid, -1);
proc->ftn->status = PS_RUNNING;
runq_add_proc(proc, cpuid);
//cpu_local_var(next) = (struct process *)packet->arg;
return 0;
@@ -541,12 +556,20 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
pckt.arg = packet->arg;
syscall_channel_send(c, &pckt);
rc = do_kill(info.pid, info.tid, info.sig, &info.info);
rc = do_kill(info.pid, info.tid, info.sig, &info.info, 0);
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
return 0;
case SCD_MSG_PROCFS_REQUEST:
process_procfs_request(packet->arg);
return 0;
case SCD_MSG_CLEANUP_PROCESS:
dkprintf("SCD_MSG_CLEANUP_PROCESS pid=%d\n", packet->pid);
terminate_host(packet->pid);
return 0;
case SCD_MSG_DEBUG_LOG:
dkprintf("SCD_MSG_DEBUG_LOG code=%lx\n", packet->arg);
debug_log(packet->arg);
return 0;
}
return 0;
}

View File

@@ -30,6 +30,7 @@ struct malloc_header {
#define CPU_STATUS_DISABLE (0)
#define CPU_STATUS_IDLE (1)
#define CPU_STATUS_RUNNING (2)
#define CPU_STATUS_RESERVED (3)
extern ihk_spinlock_t cpu_status_lock;
#define CPU_FLAG_NEED_RESCHED 0x1U

View File

@@ -18,11 +18,19 @@
#include <ihk/lock.h>
#include <errno.h>
#include <list.h>
#include <shm.h>
/* begin types.h */
typedef int32_t key_t;
typedef uint32_t uid_t;
typedef uint32_t gid_t;
typedef int64_t time_t;
typedef int32_t pid_t;
/* end types.h */
enum {
/* for memobj.flags */
MF_HAS_PAGER = 0x0001,
MF_SHMDT_OK = 0x0002,
};
struct memobj {
@@ -34,9 +42,10 @@ struct memobj {
typedef void memobj_release_func_t(struct memobj *obj);
typedef void memobj_ref_func_t(struct memobj *obj);
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp);
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag);
typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align);
typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
struct memobj_ops {
memobj_release_func_t * release;
@@ -44,6 +53,7 @@ struct memobj_ops {
memobj_get_page_func_t * get_page;
memobj_copy_page_func_t * copy_page;
memobj_flush_page_func_t * flush_page;
memobj_invalidate_page_func_t * invalidate_page;
};
static inline void memobj_release(struct memobj *obj)
@@ -61,10 +71,10 @@ static inline void memobj_ref(struct memobj *obj)
}
static inline int memobj_get_page(struct memobj *obj, off_t off,
int p2align, uintptr_t *physp)
int p2align, uintptr_t *physp, unsigned long *pflag)
{
if (obj->ops->get_page) {
return (*obj->ops->get_page)(obj, off, p2align, physp);
return (*obj->ops->get_page)(obj, off, p2align, physp, pflag);
}
return -ENXIO;
}
@@ -86,6 +96,15 @@ static inline int memobj_flush_page(struct memobj *obj, uintptr_t phys, size_t p
return 0;
}
static inline int memobj_invalidate_page(struct memobj *obj, uintptr_t phys,
size_t pgsize)
{
if (obj->ops->invalidate_page) {
return (*obj->ops->invalidate_page)(obj, phys, pgsize);
}
return 0;
}
static inline void memobj_lock(struct memobj *obj)
{
ihk_mc_spinlock_lock_noirq(&obj->lock);
@@ -102,6 +121,7 @@ static inline int memobj_has_pager(struct memobj *obj)
}
int fileobj_create(int fd, struct memobj **objp, int *maxprotp);
struct shmid_ds;
int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
int zeroobj_create(struct memobj **objp);
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp);

View File

@@ -5,6 +5,8 @@
* memory management declarations
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2013 Hitachi, Ltd.
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY:
@@ -13,6 +15,8 @@
#ifndef HEADER_MMAN_H
#define HEADER_MMAN_H
#include <arch/mman.h>
/*
* memory protection
*/
@@ -32,16 +36,6 @@
#define MAP_PRIVATE 0x02
#define MAP_FIXED 0x10
#define MAP_ANONYMOUS 0x20
#define MAP_32BIT 0x40
#define MAP_GROWSDOWN 0x0100
#define MAP_DENYWRITE 0x0800
#define MAP_EXECUTABLE 0x1000
#define MAP_LOCKED 0x2000
#define MAP_NORESERVE 0x4000
#define MAP_POPULATE 0x8000
#define MAP_NONBLOCK 0x00010000
#define MAP_STACK 0x00020000
#define MAP_HUGETLB 0x00040000
/*
* memory advice
@@ -69,4 +63,11 @@
#define MREMAP_MAYMOVE 0x01
#define MREMAP_FIXED 0x02
/*
* for msync()
*/
#define MS_ASYNC 0x01
#define MS_INVALIDATE 0x02
#define MS_SYNC 0x04
#endif /* HEADER_MMAN_H */

60
kernel/include/prio.h Normal file
View File

@@ -0,0 +1,60 @@
#ifndef _SCHED_PRIO_H
#define _SCHED_PRIO_H
#define MAX_NICE 19
#define MIN_NICE -20
#define NICE_WIDTH (MAX_NICE - MIN_NICE + 1)
/*
* Priority of a process goes from 0..MAX_PRIO-1, valid RT
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
* tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
* values are inverted: lower p->prio value means higher priority.
*
* The MAX_USER_RT_PRIO value allows the actual maximum
* RT priority to be separate from the value exported to
* user-space. This allows kernel threads to set their
* priority to a value higher than any user task. Note:
* MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
*/
#define MAX_USER_RT_PRIO 100
#define MAX_RT_PRIO MAX_USER_RT_PRIO
#define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH)
#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2)
/*
* Convert user-nice values [ -20 ... 0 ... 19 ]
* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
* and back.
*/
#define NICE_TO_PRIO(nice) ((nice) + DEFAULT_PRIO)
#define PRIO_TO_NICE(prio) ((prio) - DEFAULT_PRIO)
/*
* 'User priority' is the nice value converted to something we
* can work with better when scaling various scheduler parameters,
* it's a [ 0 ... 39 ] range.
*/
#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
/*
* Convert nice value [19,-20] to rlimit style value [1,40].
*/
static inline long nice_to_rlimit(long nice)
{
return (MAX_NICE - nice + 1);
}
/*
* Convert rlimit style value [1,40] to nice value [-20, 19].
*/
static inline long rlimit_to_nice(long prio)
{
return (MAX_NICE - prio + 1);
}
#endif /* _SCHED_PRIO_H */

View File

@@ -21,12 +21,14 @@
#include <signal.h>
#include <memobj.h>
#include <affinity.h>
#include <syscall.h>
#define VR_NONE 0x0
#define VR_STACK 0x1
#define VR_RESERVED 0x2
#define VR_IO_NOCACHE 0x100
#define VR_REMOTE 0x200
#define VR_WRITE_COMBINED 0x400
#define VR_DEMAND_PAGING 0x1000
#define VR_PRIVATE 0x2000
#define VR_LOCKED 0x4000
@@ -61,6 +63,9 @@
#define PT_TRACED 0x80 /* The process is ptraced */
#define PT_TRACE_EXEC 0x100 /* Trace execve(2) */
#define PT_TRACE_SYSCALL_ENTER 0x200 /* Trace syscall enter */
#define PT_TRACE_SYSCALL_EXIT 0x400 /* Trace syscall exit */
#define PT_TRACE_SYSCALL_MASK (PT_TRACE_SYSCALL_ENTER | PT_TRACE_SYSCALL_EXIT)
#define PTRACE_TRACEME 0
#define PTRACE_PEEKTEXT 1
@@ -106,6 +111,8 @@
#define PTRACE_EVENT_VFORK_DONE 5
#define PTRACE_EVENT_EXIT 6
#define NT_X86_XSTATE 0x202 /* x86 XSAVE extended state */
#define SIGNAL_STOP_STOPPED 0x1 /* The process has been stopped by SIGSTOP */
#define SIGNAL_STOP_CONTINUED 0x2 /* The process has been resumed by SIGCONT */
@@ -118,6 +125,11 @@
#define WNOWAIT 0x01000000 /* Don't reap, just poll status. */
#define __WCLONE 0x80000000
/* idtype */
#define P_ALL 0
#define P_PID 1
#define P_PGID 2
/* If WIFEXITED(STATUS), the low-order 8 bits of the status. */
#define __WEXITSTATUS(status) (((status) & 0xff00) >> 8)
@@ -145,7 +157,6 @@
#include <waitq.h>
#include <futex.h>
#include <rlimit.h>
struct user_fpregs_struct
{
@@ -212,7 +223,7 @@ struct user
unsigned long int u_debugreg [8];
};
#define AUXV_LEN 14
#define AUXV_LEN 16
struct vm_range {
struct list_head list;
@@ -233,9 +244,18 @@ struct vm_regions {
struct process_vm;
struct sigfd {
struct sigfd *next;
int fd;
__sigset_t mask;
};
#define SFD_CLOEXEC 02000000
#define SFD_NONBLOCK 04000
struct sig_handler {
ihk_spinlock_t lock;
ihk_atomic_t use;
struct sigfd *sigfd;
struct k_sigaction action[_NSIG];
};
@@ -243,6 +263,7 @@ struct sig_pending {
struct list_head list;
sigset_t sigmask;
siginfo_t info;
int ptracecont;
};
struct sig_shared {
@@ -267,6 +288,14 @@ struct fork_tree_node {
int pid;
int tid;
int pgid;
int ruid;
int euid;
int suid;
int fsuid;
int rgid;
int egid;
int sgid;
int fsgid;
struct fork_tree_node *parent;
struct list_head children;
@@ -295,6 +324,12 @@ struct fork_tree_node {
*/
int ptrace;
/* Store ptrace event message.
PTRACE_O_xxx will store event message here.
PTRACE_GETEVENTMSG will get from here.
*/
unsigned long ptrace_eventmsg;
/* Store event related to signal. For example,
it represents that the proceess has been resumed by SIGCONT. */
int signal_flags;
@@ -306,6 +341,29 @@ struct fork_tree_node {
void hold_fork_tree_node(struct fork_tree_node *ftn);
void release_fork_tree_node(struct fork_tree_node *ftn);
/*
* Scheduling policies
*/
#define SCHED_NORMAL 0
#define SCHED_FIFO 1
#define SCHED_RR 2
#define SCHED_BATCH 3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE 5
#define SCHED_DEADLINE 6
/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
#define SCHED_RESET_ON_FORK 0x40000000
/*
* For the sched_{set,get}attr() calls
*/
#define SCHED_FLAG_RESET_ON_FORK 0x01
struct sched_param {
int sched_priority;
};
struct process {
int cpu_id;
@@ -317,6 +375,8 @@ struct process {
// Runqueue list entry
struct list_head sched_list;
int sched_policy;
struct sched_param sched_param;
ihk_spinlock_t spin_sleep_lock;
int spin_sleep;
@@ -327,6 +387,8 @@ struct process {
} thread;
volatile int sigevent;
int nohost;
int execed;
sigset_t sigmask;
stack_t sigstack;
ihk_spinlock_t sigpendinglock;
@@ -334,7 +396,7 @@ struct process {
struct sig_shared *sigshared;
struct sig_handler *sighandler;
struct rlimit rlimit_stack;
struct rlimit rlimit[MCK_RLIM_MAX];
pgio_func_t *pgio_fp;
void *pgio_arg;
@@ -343,7 +405,12 @@ struct process {
cpu_set_t cpu_set;
unsigned long saved_auxv[AUXV_LEN];
struct user *userp;
unsigned long *ptrace_debugreg; /* debug registers for ptrace */
struct sig_pending *ptrace_recvsig;
struct sig_pending *ptrace_sendsig;
fp_regs_struct *fp_regs;
char *saved_cmdline;
long saved_cmdline_len;
};
struct process_vm {
@@ -364,6 +431,7 @@ struct process_vm {
cpu_set_t cpu_set;
ihk_spinlock_t cpu_set_lock;
int exiting;
};
@@ -393,6 +461,10 @@ int change_prot_process_memory_range(
unsigned long newflag);
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end, off_t off);
int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end);
int invalidate_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t start, uintptr_t end);
struct vm_range *lookup_process_memory_range(
struct process_vm *vm, uintptr_t start, uintptr_t end);
struct vm_range *next_process_memory_range(
@@ -402,7 +474,8 @@ struct vm_range *previous_process_memory_range(
int extend_up_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t newend);
int page_fault_process(struct process *proc, void *fault_addr, uint64_t reason);
int page_fault_process_vm(struct process_vm *fault_vm, void *fault_addr,
uint64_t reason);
int remove_process_region(struct process *proc,
unsigned long start, unsigned long end);
struct program_load_desc;
@@ -428,5 +501,6 @@ void cpu_clear(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock);
struct process *findthread_and_lock(int pid, int tid, ihk_spinlock_t **savelock, unsigned long *irqstate);
void process_unlock(void *savelock, unsigned long irqstate);
void release_cpuid(int cpuid);
#endif

View File

@@ -3,7 +3,8 @@
* License details are found in the file LICENSE.
* \brief
* header file for System V shared memory
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 - 2015 RIKEN AICS
*/
/*
* HISTORY:
@@ -12,38 +13,71 @@
#ifndef HEADER_SHM_H
#define HEADER_SHM_H
/* begin types.h */
typedef int32_t key_t;
typedef uint32_t uid_t;
typedef uint32_t gid_t;
typedef int64_t time_t;
typedef int32_t pid_t;
/* end types.h */
#include <list.h>
#include <memobj.h>
#include <arch/shm.h>
typedef uint64_t shmatt_t;
enum {
/* for key_t */
IPC_PRIVATE = 0,
struct ipc_perm {
key_t key;
uid_t uid;
gid_t gid;
uid_t cuid;
gid_t cgid;
uint16_t mode;
uint8_t padding[2];
uint16_t seq;
uint8_t padding2[22];
/* for shmflg */
IPC_CREAT = 01000,
IPC_EXCL = 02000,
SHM_RDONLY = 010000,
SHM_RND = 020000,
SHM_REMAP = 040000,
SHM_EXEC = 0100000,
/* for shm_mode */
SHM_DEST = 01000,
SHM_LOCKED = 02000,
/* for cmd of shmctl() */
IPC_RMID = 0,
IPC_SET = 1,
IPC_STAT = 2,
IPC_INFO = 3,
SHM_LOCK = 11,
SHM_UNLOCK = 12,
SHM_STAT = 13,
SHM_INFO = 14,
};
struct shmid_ds {
struct ipc_perm shm_perm;
size_t shm_segsz;
time_t shm_atime;
time_t shm_dtime;
time_t shm_ctime;
pid_t shm_cpid;
pid_t shm_lpid;
shmatt_t shm_nattch;
uint8_t padding[16];
struct shmobj {
struct memobj memobj; /* must be first */
int index;
uint8_t padding[4];
size_t real_segsz;
struct shmid_ds ds;
struct list_head page_list;
struct list_head chain; /* shmobj_list */
};
struct shminfo {
uint64_t shmmax;
uint64_t shmmin;
uint64_t shmmni;
uint64_t shmseg;
uint64_t shmall;
uint8_t padding[32];
};
struct shm_info {
int32_t used_ids;
uint8_t padding[4];
uint64_t shm_tot;
uint64_t shm_rss;
uint64_t shm_swp;
uint64_t swap_attempts;
uint64_t swap_successes;
};
void shmobj_list_lock(void);
void shmobj_list_unlock(void);
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp);
void shmobj_destroy(struct shmobj *obj);
#endif /* HEADER_SHM_H */

View File

@@ -15,6 +15,7 @@
#include <ihk/context.h>
#include <ihk/memconst.h>
#include <rlimit.h>
#define NUM_SYSCALLS 255
@@ -34,12 +35,15 @@
#define SCD_MSG_SYSCALL_ONESIDE 0x4
#define SCD_MSG_SEND_SIGNAL 0x8
#define SCD_MSG_CLEANUP_PROCESS 0x9
#define SCD_MSG_PROCFS_CREATE 0x10
#define SCD_MSG_PROCFS_DELETE 0x11
#define SCD_MSG_PROCFS_REQUEST 0x12
#define SCD_MSG_PROCFS_ANSWER 0x13
#define SCD_MSG_DEBUG_LOG 0x20
#define ARCH_SET_GS 0x1001
#define ARCH_SET_FS 0x1002
#define ARCH_GET_FS 0x1003
@@ -109,6 +113,24 @@ struct program_image_section {
};
#define SHELL_PATH_MAX_LEN 1024
#define MCK_RLIM_MAX 20
#define MCK_RLIMIT_AS 0
#define MCK_RLIMIT_CORE 1
#define MCK_RLIMIT_CPU 2
#define MCK_RLIMIT_DATA 3
#define MCK_RLIMIT_FSIZE 4
#define MCK_RLIMIT_LOCKS 5
#define MCK_RLIMIT_MEMLOCK 6
#define MCK_RLIMIT_MSGQUEUE 7
#define MCK_RLIMIT_NICE 8
#define MCK_RLIMIT_NOFILE 9
#define MCK_RLIMIT_NPROC 10
#define MCK_RLIMIT_RSS 11
#define MCK_RLIMIT_RTPRIO 12
#define MCK_RLIMIT_RTTIME 13
#define MCK_RLIMIT_SIGPENDING 14
#define MCK_RLIMIT_STACK 15
struct program_load_desc {
int num_sections;
@@ -118,6 +140,7 @@ struct program_load_desc {
int err;
int stack_prot;
int pgid;
int cred[8];
unsigned long entry;
unsigned long user_start;
unsigned long user_end;
@@ -132,8 +155,7 @@ struct program_load_desc {
unsigned long args_len;
char *envs;
unsigned long envs_len;
unsigned long rlimit_stack_cur;
unsigned long rlimit_stack_max;
struct rlimit rlimit[MCK_RLIM_MAX];
unsigned long interp_align;
char shell_path[SHELL_PATH_MAX_LEN];
struct program_image_section sections[0];
@@ -217,9 +239,9 @@ struct syscall_params {
SYSCALL_ARG_##a2(2); SYSCALL_ARG_##a3(3); \
SYSCALL_ARG_##a4(4); SYSCALL_ARG_##a5(5);
#define SYSCALL_FOOTER return do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0)
#define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id(), 0)
extern long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu, int pid);
extern long do_syscall(struct syscall_request *req, int cpu, int pid);
extern int obtain_clone_cpuid();
extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx);

View File

@@ -29,6 +29,7 @@
#include <process.h>
#include <init.h>
#include <cls.h>
#include <syscall.h>
//#define IOCTL_FUNC_EXTENSION
#ifdef IOCTL_FUNC_EXTENSION
@@ -40,7 +41,7 @@
#ifdef DEBUG_PRINT_INIT
#define dkprintf kprintf
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
int osnum = 0;
@@ -118,6 +119,27 @@ char *find_command_line(char *name)
return strstr(cmdline, name);
}
static void parse_kargs(void)
{
kprintf("KCommand Line: %s\n", ihk_mc_get_kernel_args());
if (1) {
char *key = "osnum=";
char *p;
p = find_command_line(key);
if (p != NULL) {
p += strlen(key);
osnum = 0;
while (('0' <= *p) && (*p <= '9')) {
osnum *= 10;
osnum += *p++ - '0';
}
kprintf("osnum: %d\n", osnum);
}
}
}
void pc_init(void)
{
int i;
@@ -134,15 +156,6 @@ void pc_init(void)
APT_TYPE_STALL, APT_TYPE_CYCLE }, // not updated for KNC
};
p = find_command_line("osnum=");
if (p != NULL) {
while (('0' <= *p) && (*p <= '9')) {
osnum *= 10;
osnum += *p++ - '0';
}
}
dkprintf("osnum: %d\n", osnum);
if (!(p = find_command_line("perfctr"))) {
dkprintf("perfctr not initialized.\n");
@@ -189,10 +202,6 @@ static void pc_test(void)
static void rest_init(void)
{
char *cmdline;
cmdline = ihk_mc_get_kernel_args();
kprintf("KCommand Line: %s\n", cmdline);
handler_init();
#ifdef USE_DMA
@@ -229,6 +238,7 @@ static void post_init(void)
ihk_mc_spinlock_init(&syscall_lock);
}
ap_start();
create_os_procfs_files();
}
#ifdef DCFA_RUN
extern void user_main();
@@ -247,6 +257,14 @@ int main(void)
arch_init();
/*
* In attached-mic,
* bootparam is not mapped until arch_init() is finished.
* In builtin-mic and builtin-x86,
* virtual address of bootparam is changed in arch_init().
*/
parse_kargs();
mem_init();
rest_init();

View File

@@ -24,7 +24,7 @@
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif

View File

@@ -44,7 +44,7 @@
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
@@ -198,8 +198,7 @@ void coredump(struct process *proc, void *regs)
request.args[0] = chunks;
request.args[1] = virt_to_phys(coretable);
/* no data for now */
ret = do_syscall(&request, proc->uctx,
proc->cpu_id, proc->ftn->pid);
ret = do_syscall(&request, proc->cpu_id, proc->ftn->pid);
if (ret == 0) {
kprintf("dumped core.\n");
} else {
@@ -214,8 +213,8 @@ static void unhandled_page_fault(struct process *proc, void *fault_addr, void *r
struct process_vm *vm = proc->vm;
struct vm_range *range;
char found;
int irqflags;
unsigned long error = ((struct x86_regs *)regs)->error;
unsigned long irqflags;
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
irqflags = kprintf_lock();
dkprintf("[%d] Page fault for 0x%lX\n",
@@ -371,20 +370,50 @@ static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
dkprintf("[%d]page_fault_handler(%p,%lx,%p)\n",
ihk_mc_get_processor_id(), fault_addr, reason, regs);
error = page_fault_process(proc, fault_addr, reason);
cpu_enable_interrupt();
error = page_fault_process_vm(proc->vm, fault_addr, reason);
if (error) {
struct siginfo info;
if (error == -ECANCELED) {
kprintf("process is exiting, terminate.\n");
ihk_mc_spinlock_lock_noirq(&proc->ftn->lock);
proc->ftn->status = PS_ZOMBIE;
ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock);
release_fork_tree_node(proc->ftn->parent);
release_fork_tree_node(proc->ftn);
//release_process(proc);
schedule();
}
kprintf("[%d]page_fault_handler(%p,%lx,%p):"
"fault proc failed. %d\n",
"fault vm failed. %d\n",
ihk_mc_get_processor_id(), fault_addr,
reason, regs, error);
unhandled_page_fault(proc, fault_addr, regs);
memset(&info, '\0', sizeof info);
if (error == -ERANGE) {
info.si_signo = SIGBUS;
info.si_code = BUS_ADRERR;
info._sifields._sigfault.si_addr = fault_addr;
set_signal(SIGBUS, regs, &info);
}
else {
struct process_vm *vm = proc->vm;
struct vm_range *range;
info.si_signo = SIGSEGV;
info.si_code = SEGV_MAPERR;
list_for_each_entry(range, &vm->vm_range_list, list) {
if (range->start <= (unsigned long)fault_addr && range->end > (unsigned long)fault_addr) {
info.si_code = SEGV_ACCERR;
break;
}
}
info._sifields._sigfault.si_addr = fault_addr;
set_signal(SIGSEGV, regs, &info);
}
check_signal(0, regs);

View File

@@ -38,14 +38,22 @@
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
extern long do_arch_prctl(unsigned long code, unsigned long address);
extern long alloc_debugreg(struct process *proc);
extern void save_debugreg(unsigned long *debugreg);
extern void restore_debugreg(unsigned long *debugreg);
extern void clear_debugreg(void);
extern void clear_single_step(struct process *proc);
static void insert_vm_range_list(struct process_vm *vm,
struct vm_range *newrange);
static int copy_user_ranges(struct process *proc, struct process *org);
extern void release_fp_regs(struct process *proc);
extern void save_fp_regs(struct process *proc);
extern void restore_fp_regs(struct process *proc);
void settid(struct process *proc, int mode, int newcpuid, int oldcpuid);
int refcount_fork_tree_node(struct fork_tree_node *ftn)
@@ -98,6 +106,10 @@ void init_fork_tree_node(struct fork_tree_node *ftn,
ftn->parent = NULL;
if (parent) {
ftn->parent = parent;
ftn->pgid = parent->pgid;
ftn->ruid = parent->ruid;
ftn->euid = parent->euid;
ftn->suid = parent->suid;
}
INIT_LIST_HEAD(&ftn->children);
INIT_LIST_HEAD(&ftn->siblings_list);
@@ -125,7 +137,8 @@ static int init_process_vm(struct process *owner, struct process_vm *vm)
vm->owner_process = owner;
memset(&vm->cpu_set, 0, sizeof(cpu_set_t));
ihk_mc_spinlock_init(&vm->cpu_set_lock);
vm->exiting = 0;
return 0;
}
@@ -149,6 +162,8 @@ struct process *create_process(unsigned long user_pc)
}
}
proc->sched_policy = SCHED_NORMAL;
proc->sighandler = kmalloc(sizeof(struct sig_handler), IHK_MC_AP_NOWAIT);
if(!proc->sighandler){
goto err_free_process;
@@ -235,7 +250,7 @@ struct process *clone_process(struct process *org, unsigned long pc,
ihk_mc_modify_user_context(proc->uctx, IHK_UCR_STACK_POINTER, sp);
ihk_mc_modify_user_context(proc->uctx, IHK_UCR_PROGRAM_COUNTER, pc);
proc->rlimit_stack = org->rlimit_stack;
memcpy(proc->rlimit, org->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX);
proc->sigmask = org->sigmask;
proc->ftn = kmalloc(sizeof(struct fork_tree_node), IHK_MC_AP_NOWAIT);
@@ -245,14 +260,13 @@ struct process *clone_process(struct process *org, unsigned long pc,
proc->ftn->termsig = termsig;
init_fork_tree_node(proc->ftn, (clone_flags & CLONE_VM) ? NULL : org->ftn,
proc);
init_fork_tree_node(proc->ftn, org->ftn, proc);
/* clone() */
if (clone_flags & CLONE_VM) {
ihk_atomic_inc(&org->vm->refcount);
proc->vm = org->vm;
proc->sched_policy = org->sched_policy;
proc->sched_param.sched_priority = org->sched_param.sched_priority;
/* clone signal handlers */
if (clone_flags & CLONE_SIGHAND) {
proc->sigstack.ss_sp = NULL;
proc->sigstack.ss_flags = SS_DISABLE;
proc->sigstack.ss_size = 0;
@@ -266,7 +280,7 @@ struct process *clone_process(struct process *org, unsigned long pc,
ihk_mc_spinlock_init(&proc->sigpendinglock);
INIT_LIST_HEAD(&proc->sigpending);
}
/* fork() */
/* copy signal handlers (i.e., fork()) */
else {
dkprintf("fork(): sighandler\n");
proc->sighandler = kmalloc(sizeof(struct sig_handler),
@@ -291,7 +305,15 @@ struct process *clone_process(struct process *org, unsigned long pc,
INIT_LIST_HEAD(&proc->sigshared->sigpending);
ihk_mc_spinlock_init(&proc->sigpendinglock);
INIT_LIST_HEAD(&proc->sigpending);
}
/* clone VM */
if (clone_flags & CLONE_VM) {
ihk_atomic_inc(&org->vm->refcount);
proc->vm = org->vm;
}
/* fork() */
else {
proc->vm = (struct process_vm *)(proc + 1);
dkprintf("fork(): init_process_vm()\n");
@@ -309,18 +331,18 @@ struct process *clone_process(struct process *org, unsigned long pc,
}
dkprintf("fork(): copy_user_ranges() OK\n");
/* Add proc's fork_tree_node to parent's children list */
ihk_mc_spinlock_lock_noirq(&org->ftn->lock);
list_add_tail(&proc->ftn->siblings_list, &org->ftn->children);
ihk_mc_spinlock_unlock_noirq(&org->ftn->lock);
/* We hold a reference to parent */
hold_fork_tree_node(proc->ftn->parent);
/* Parent holds a reference to us */
hold_fork_tree_node(proc->ftn);
}
/* Add thread/proc's fork_tree_node to parent's children list */
ihk_mc_spinlock_lock_noirq(&org->ftn->lock);
list_add_tail(&proc->ftn->siblings_list, &org->ftn->children);
ihk_mc_spinlock_unlock_noirq(&org->ftn->lock);
/* We hold a reference to parent */
hold_fork_tree_node(proc->ftn->parent);
/* Parent holds a reference to us */
hold_fork_tree_node(proc->ftn);
ihk_mc_spinlock_init(&proc->spin_sleep_lock);
proc->spin_sleep = 0;
@@ -345,18 +367,15 @@ int ptrace_traceme(void){
struct fork_tree_node *child, *next;
dkprintf("ptrace_traceme,pid=%d,proc->ftn->parent=%p\n", proc->ftn->pid, proc->ftn->parent);
if (proc->ftn->parent == NULL) {
if (proc->ftn->parent == NULL || proc->ftn->ptrace) {
error = -EPERM;
goto out;
}
dkprintf("ptrace_traceme,parent->pid=%d\n", proc->ftn->parent->pid);
ihk_mc_spinlock_lock_noirq(&proc->ftn->lock);
proc->ftn->ptrace = PT_TRACED | PT_TRACE_EXEC;
proc->ftn->ppid_parent = proc->ftn->parent;
ihk_mc_spinlock_lock_noirq(&proc->ftn->parent->lock);
list_for_each_entry_safe(child, next, &proc->ftn->parent->children, siblings_list) {
if(child == proc->ftn) {
@@ -368,12 +387,20 @@ int ptrace_traceme(void){
error = -EPERM;
goto out_notfound;
found:
proc->ftn->ptrace = PT_TRACED | PT_TRACE_EXEC;
proc->ftn->ppid_parent = proc->ftn->parent;
list_add_tail(&proc->ftn->ptrace_siblings_list, &proc->ftn->parent->ptrace_children);
ihk_mc_spinlock_unlock_noirq(&proc->ftn->parent->lock);
ihk_mc_spinlock_unlock_noirq(&proc->ftn->lock);
if (proc->ptrace_debugreg == NULL) {
error = alloc_debugreg(proc);
}
clear_single_step(proc);
out:
dkprintf("ptrace_traceme,returning,error=%d\n", error);
return error;
@@ -428,6 +455,15 @@ static int copy_user_ranges(struct process *proc, struct process *org)
vaddr += PAGE_SIZE;
continue;
}
if (1) {
struct page *page;
page = phys_to_page(pte_get_phys(ptep));
if (page && page_is_in_memobj(page)) {
vaddr += PAGE_SIZE;
continue;
}
}
dkprintf("copy_user_ranges(): 0x%lx PTE found\n", vaddr);
@@ -859,8 +895,8 @@ enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fa
attr |= PTATTR_NO_EXECUTE;
}
if ((flag & VR_MEMTYPE_MASK) == VR_MEMTYPE_UC) {
attr |= PTATTR_UNCACHABLE;
if (flag & VR_WRITE_COMBINED) {
attr |= PTATTR_WRITE_COMBINED;
}
return attr;
@@ -1196,6 +1232,154 @@ out:
return error;
}
struct sync_args {
struct memobj *memobj;
};
static int sync_one_page(void *arg0, page_table_t pt, pte_t *ptep,
void *pgaddr, size_t pgsize)
{
struct sync_args *args = arg0;
int error;
uintptr_t phys;
dkprintf("sync_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize);
if (pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)
|| !pte_is_dirty(ptep, pgsize)) {
error = 0;
goto out;
}
pte_clear_dirty(ptep, pgsize);
flush_tlb_single((uintptr_t)pgaddr); /* XXX: TLB flush */
phys = pte_get_phys(ptep);
error = memobj_flush_page(args->memobj, phys, pgsize);
if (error) {
ekprintf("sync_one_page(%p,%p,%p %#lx,%p,%#lx):"
"flush failed. %d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
pte_set_dirty(ptep, pgsize);
goto out;
}
error = 0;
out:
dkprintf("sync_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
return error;
}
int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end)
{
int error;
struct sync_args args;
dkprintf("sync_process_memory_range(%p,%p,%#lx,%#lx)\n",
vm, range, start, end);
args.memobj = range->memobj;
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
memobj_lock(range->memobj);
error = visit_pte_range(vm->page_table, (void *)start, (void *)end,
VPTEF_SKIP_NULL, &sync_one_page, &args);
memobj_unlock(range->memobj);
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
if (error) {
ekprintf("sync_process_memory_range(%p,%p,%#lx,%#lx):"
"visit failed%d\n",
vm, range, start, end, error);
goto out;
}
out:
dkprintf("sync_process_memory_range(%p,%p,%#lx,%#lx):%d\n",
vm, range, start, end, error);
return error;
}
struct invalidate_args {
struct vm_range *range;
};
static int invalidate_one_page(void *arg0, page_table_t pt, pte_t *ptep,
void *pgaddr, size_t pgsize)
{
struct invalidate_args *args = arg0;
struct vm_range *range = args->range;
int error;
uintptr_t phys;
struct page *page;
off_t linear_off;
pte_t apte;
dkprintf("invalidate_one_page(%p,%p,%p %#lx,%p,%#lx)\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize);
if (pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
error = 0;
goto out;
}
phys = pte_get_phys(ptep);
page = phys_to_page(phys);
linear_off = range->objoff + ((uintptr_t)pgaddr - range->start);
if (page && (page->offset == linear_off)) {
pte_make_null(&apte, pgsize);
}
else {
pte_make_fileoff(page->offset, 0, pgsize, &apte);
}
pte_xchg(ptep, &apte);
flush_tlb_single((uintptr_t)pgaddr); /* XXX: TLB flush */
if (page && page_unmap(page)) {
panic("invalidate_one_page");
}
error = memobj_invalidate_page(range->memobj, phys, pgsize);
if (error) {
ekprintf("invalidate_one_page(%p,%p,%p %#lx,%p,%#lx):"
"invalidate failed. %d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
goto out;
}
error = 0;
out:
dkprintf("invalidate_one_page(%p,%p,%p %#lx,%p,%#lx):%d\n",
arg0, pt, ptep, *ptep, pgaddr, pgsize, error);
return error;
}
int invalidate_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t start, uintptr_t end)
{
int error;
struct invalidate_args args;
dkprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx)\n",
vm, range, start, end);
args.range = range;
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
memobj_lock(range->memobj);
error = visit_pte_range(vm->page_table, (void *)start, (void *)end,
VPTEF_SKIP_NULL, &invalidate_one_page, &args);
memobj_unlock(range->memobj);
ihk_mc_spinlock_unlock_noirq(&vm->page_table_lock);
if (error) {
ekprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):"
"visit failed%d\n",
vm, range, start, end, error);
goto out;
}
out:
dkprintf("invalidate_process_memory_range(%p,%p,%#lx,%#lx):%d\n",
vm, range, start, end, error);
return error;
}
static int page_fault_process_memory_range(struct process_vm *vm, struct vm_range *range, uintptr_t fault_addr, uint64_t reason)
{
int error;
@@ -1206,12 +1390,13 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
enum ihk_mc_pt_attribute attr;
uintptr_t phys;
struct page *page = NULL;
unsigned long memobj_flag = 0;
dkprintf("page_fault_process_memory_range(%p,%lx-%lx %lx,%lx,%lx)\n", vm, range->start, range->end, range->flag, fault_addr, reason);
ihk_mc_spinlock_lock_noirq(&vm->page_table_lock);
/*****/
ptep = ihk_mc_pt_lookup_pte(vm->page_table, (void *)fault_addr, &pgaddr, &pgsize, &p2align);
if (!(reason & PF_PROT) && ptep && !pte_is_null(ptep)
if (!(reason & (PF_PROT | PF_PATCH)) && ptep && !pte_is_null(ptep)
&& !pte_is_fileoff(ptep, pgsize)) {
if (!pte_is_present(ptep)) {
error = -EFAULT;
@@ -1232,7 +1417,6 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
pgsize = PAGE_SIZE;
p2align = PAGE_P2ALIGN;
}
attr = arch_vrflag_to_ptattr(range->flag, reason, ptep);
pgaddr = (void *)(fault_addr & ~(pgsize - 1));
if (!ptep || pte_is_null(ptep) || pte_is_fileoff(ptep, pgsize)) {
if (range->memobj) {
@@ -1244,7 +1428,8 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
else {
off = pte_get_off(ptep, pgsize);
}
error = memobj_get_page(range->memobj, off, p2align, &phys);
error = memobj_get_page(range->memobj, off, p2align,
&phys, &memobj_flag);
if (error) {
if (error != -ERESTART) {
}
@@ -1270,9 +1455,16 @@ static int page_fault_process_memory_range(struct process_vm *vm, struct vm_rang
else {
phys = pte_get_phys(ptep);
}
page = phys_to_page(phys);
attr = arch_vrflag_to_ptattr(range->flag | memobj_flag, reason, ptep);
/*****/
if ((range->flag & VR_PRIVATE) && (!page || page_is_in_memobj(page) || page_is_multi_mapped(page))) {
if (((range->flag & VR_PRIVATE)
|| ((reason & PF_PATCH)
&& !(range->flag & VR_PROT_WRITE)))
&& (!page || page_is_in_memobj(page) || page_is_multi_mapped(page))) {
if (!(attr & PTATTR_DIRTY)) {
attr &= ~PTATTR_WRITABLE;
}
@@ -1324,37 +1516,41 @@ out:
return error;
}
static int do_page_fault_process(struct process *proc, void *fault_addr0, uint64_t reason)
static int do_page_fault_process_vm(struct process_vm *vm, void *fault_addr0, uint64_t reason)
{
struct process_vm *vm = proc->vm;
int error;
const uintptr_t fault_addr = (uintptr_t)fault_addr0;
struct vm_range *range;
dkprintf("[%d]do_page_fault_process(%p,%lx,%lx)\n",
ihk_mc_get_processor_id(), proc, fault_addr0, reason);
dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx)\n",
ihk_mc_get_processor_id(), vm, fault_addr0, reason);
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
if (vm->exiting) {
error = -ECANCELED;
goto out;
}
range = lookup_process_memory_range(vm, fault_addr, fault_addr+1);
if (range == NULL) {
error = -EFAULT;
kprintf("[%d]do_page_fault_process(%p,%lx,%lx):"
kprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx):"
"out of range. %d\n",
ihk_mc_get_processor_id(), proc,
ihk_mc_get_processor_id(), vm,
fault_addr0, reason, error);
goto out;
}
if (((range->flag & VR_PROT_MASK) == VR_PROT_NONE)
|| ((reason & PF_WRITE)
|| (((reason & PF_WRITE) && !(reason & PF_PATCH))
&& !(range->flag & VR_PROT_WRITE))
|| ((reason & PF_INSTR)
&& !(range->flag & VR_PROT_EXEC))) {
error = -EFAULT;
kprintf("[%d]do_page_fault_process(%p,%lx,%lx):"
kprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx):"
"access denied. %d\n",
ihk_mc_get_processor_id(), proc,
ihk_mc_get_processor_id(), vm,
fault_addr0, reason, error);
goto out;
}
@@ -1391,9 +1587,9 @@ static int do_page_fault_process(struct process *proc, void *fault_addr0, uint64
goto out;
}
if (error) {
kprintf("[%d]do_page_fault_process(%p,%lx,%lx):"
kprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx):"
"fault range failed. %d\n",
ihk_mc_get_processor_id(), proc,
ihk_mc_get_processor_id(), vm,
fault_addr0, reason, error);
goto out;
}
@@ -1401,22 +1597,19 @@ static int do_page_fault_process(struct process *proc, void *fault_addr0, uint64
error = 0;
out:
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
dkprintf("[%d]do_page_fault_process(%p,%lx,%lx): %d\n",
ihk_mc_get_processor_id(), proc, fault_addr0,
dkprintf("[%d]do_page_fault_process_vm(%p,%lx,%lx): %d\n",
ihk_mc_get_processor_id(), vm, fault_addr0,
reason, error);
return error;
}
int page_fault_process(struct process *proc, void *fault_addr, uint64_t reason)
int page_fault_process_vm(struct process_vm *fault_vm, void *fault_addr, uint64_t reason)
{
int error;
if (proc != cpu_local_var(current)) {
panic("page_fault_process: other process");
}
struct process *proc = cpu_local_var(current);
for (;;) {
error = do_page_fault_process(proc, fault_addr, reason);
error = do_page_fault_process_vm(fault_vm, fault_addr, reason);
if (error != -ERESTART) {
break;
}
@@ -1445,10 +1638,11 @@ int init_process_stack(struct process *process, struct program_load_desc *pn,
int error;
unsigned long *p;
unsigned long minsz;
unsigned long at_rand;
/* create stack range */
minsz = PAGE_SIZE;
size = process->rlimit_stack.rlim_cur & PAGE_MASK;
size = process->rlimit[MCK_RLIMIT_STACK].rlim_cur & PAGE_MASK;
if (size > (USER_END / 2)) {
size = USER_END / 2;
}
@@ -1487,6 +1681,12 @@ int init_process_stack(struct process *process, struct program_load_desc *pn,
/* set up initial stack frame */
p = (unsigned long *)(stack + minsz);
s_ind = -1;
/* "random" 16 bytes on the very top */
p[s_ind--] = 0x010101011;
p[s_ind--] = 0x010101011;
at_rand = end + sizeof(unsigned long) * s_ind;
/* auxiliary vector */
/* If you add/delete entires, please increase/decrease
AUXV_LEN in include/process.h. */
@@ -1504,10 +1704,14 @@ int init_process_stack(struct process *process, struct program_load_desc *pn,
p[s_ind--] = AT_PAGESZ;
p[s_ind--] = pn->at_clktck; /* AT_CLKTCK */
p[s_ind--] = AT_CLKTCK;
p[s_ind--] = at_rand; /* AT_RANDOM */
p[s_ind--] = AT_RANDOM;
/* Save auxiliary vector for later use. */
memcpy(process->saved_auxv, &p[s_ind + 1],
sizeof(process->saved_auxv));
p[s_ind--] = 0; /* envp terminating NULL */
p[s_ind--] = 0; /* envp terminating NULL */
/* envp */
for (arg_ind = envc - 1; arg_ind > -1; --arg_ind) {
p[s_ind--] = (unsigned long)env[arg_ind];
@@ -1656,6 +1860,8 @@ void flush_process_memory(struct process *proc)
dkprintf("flush_process_memory(%p)\n", proc);
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
/* Let concurrent page faults know the VM will be gone */
vm->exiting = 1;
list_for_each_entry_safe(range, next, &vm->vm_range_list, list) {
if (range->memobj) {
// XXX: temporary of temporary
@@ -1736,9 +1942,9 @@ int populate_process_memory(struct process *proc, void *start, size_t len)
end = (uintptr_t)start + len;
for (addr = (uintptr_t)start; addr < end; addr += PAGE_SIZE) {
error = page_fault_process(proc, (void *)addr, reason);
error = page_fault_process_vm(proc->vm, (void *)addr, reason);
if (error) {
ekprintf("populate_process_range:page_fault_process"
ekprintf("populate_process_range:page_fault_process_vm"
"(%p,%lx,%lx) failed %d\n",
proc, addr, reason, error);
goto out;
@@ -1788,6 +1994,21 @@ void destroy_process(struct process *proc)
list_del(&pending->list);
kfree(pending);
}
if (proc->ptrace_debugreg) {
kfree(proc->ptrace_debugreg);
}
if (proc->ptrace_recvsig) {
kfree(proc->ptrace_recvsig);
}
if (proc->ptrace_sendsig) {
kfree(proc->ptrace_sendsig);
}
if (proc->fp_regs) {
release_fp_regs(proc);
}
if (proc->saved_cmdline) {
kfree(proc->saved_cmdline);
}
ihk_mc_free_pages(proc, KERNEL_STACK_NR_PAGES);
}
@@ -1820,7 +2041,8 @@ static void idle(void)
{
struct cpu_local_var *v = get_this_cpu_local_var();
v->status = CPU_STATUS_IDLE;
if(v->status == CPU_STATUS_RUNNING)
v->status = CPU_STATUS_IDLE;
cpu_enable_interrupt();
while (1) {
@@ -1845,7 +2067,8 @@ static void idle(void)
* 4) The idle process was resumed, and halted for waiting for
* the interrupt that had already been handled.
*/
if (v->status == CPU_STATUS_IDLE) {
if (v->status == CPU_STATUS_IDLE ||
v->status == CPU_STATUS_RESERVED) {
long s;
struct process *p;
@@ -1858,7 +2081,8 @@ static void idle(void)
}
ihk_mc_spinlock_unlock(&v->runq_lock, s);
}
if (v->status == CPU_STATUS_IDLE) {
if (v->status == CPU_STATUS_IDLE ||
v->status == CPU_STATUS_RESERVED) {
cpu_safe_halt();
}
else {
@@ -2014,7 +2238,7 @@ redo:
/* No process? Run idle.. */
if (!next) {
next = &cpu_local_var(idle);
v->status = CPU_STATUS_IDLE;
v->status = v->runq_len? CPU_STATUS_RESERVED: CPU_STATUS_IDLE;
}
}
@@ -2026,7 +2250,17 @@ redo:
if (switch_ctx) {
dkprintf("[%d] schedule: %d => %d \n",
ihk_mc_get_processor_id(),
prev ? prev->tid : 0, next ? next->tid : 0);
prev ? prev->ftn->tid : 0, next ? next->ftn->tid : 0);
if (prev && prev->ptrace_debugreg) {
save_debugreg(prev->ptrace_debugreg);
if (next->ptrace_debugreg == NULL) {
clear_debugreg();
}
}
if (next->ptrace_debugreg) {
restore_debugreg(next->ptrace_debugreg);
}
ihk_mc_load_page_table(next->vm->page_table);
@@ -2061,6 +2295,13 @@ redo:
}
}
void
release_cpuid(int cpuid)
{
if (!get_cpu_local_var(cpuid)->runq_len)
get_cpu_local_var(cpuid)->status = CPU_STATUS_IDLE;
}
void check_need_resched(void)
{
struct cpu_local_var *v = get_this_cpu_local_var();
@@ -2168,11 +2409,11 @@ void __runq_add_proc(struct process *proc, int cpu_id)
list_add_tail(&proc->sched_list, &v->runq);
++v->runq_len;
proc->cpu_id = cpu_id;
proc->ftn->status = PS_RUNNING;
//proc->ftn->status = PS_RUNNING; /* not set here */
get_cpu_local_var(cpu_id)->status = CPU_STATUS_RUNNING;
dkprintf("runq_add_proc(): tid %d added to CPU[%d]'s runq\n",
proc->tid, cpu_id);
proc->ftn->tid, cpu_id);
}
void runq_add_proc(struct process *proc, int cpu_id)
@@ -2236,3 +2477,29 @@ process_unlock(void *savelock, unsigned long irqstate)
{
ihk_mc_spinlock_unlock((ihk_spinlock_t *)savelock, irqstate);
}
void
debug_log(unsigned long arg)
{
struct cpu_local_var *v;
struct process *p;
int i;
extern int num_processors;
unsigned long irqstate;
switch(arg){
case 1:
for(i = 0; i < num_processors; i++){
v = get_cpu_local_var(i);
irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
list_for_each_entry(p, &(v->runq), sched_list){
if(p->ftn->pid <= 0)
continue;
kprintf("cpu=%d pid=%d tid=%d status=%d\n",
i, p->ftn->pid, p->ftn->tid, p->ftn->status);
}
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
}
break;
}
}

View File

@@ -47,6 +47,9 @@ static void create_proc_procfs_file(int pid, char *fname, int mode, int cpuid);
static void delete_proc_procfs_file(int pid, char *fname);
static void operate_proc_procfs_file(int pid, char *fname, int msg, int mode, int cpuid);
int copy_from_user(void *dst, const void *src, size_t siz);
int copy_to_user(void *dst, const void *src, size_t siz);
/**
* \brief Create all procfs files for process.
*
@@ -63,9 +66,21 @@ void create_proc_procfs_files(int pid, int cpuid)
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/auxv", osnum, pid);
create_proc_procfs_file(pid, fname, 0400, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/cmdline", osnum, pid);
create_proc_procfs_file(pid, fname, 0444, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/mem", osnum, pid);
create_proc_procfs_file(pid, fname, 0400, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid);
create_proc_procfs_file(pid, fname, 0444, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid);
create_proc_procfs_file(pid, fname, 0444, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/status", osnum, pid);
create_proc_procfs_file(pid, fname, 0444, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/task/%d/mem", osnum, pid, pid);
create_proc_procfs_file(pid, fname, 0400, cpuid);
@@ -116,6 +131,18 @@ void delete_proc_procfs_files(int pid)
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/mem", osnum, pid);
delete_proc_procfs_file(pid, fname);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid);
delete_proc_procfs_file(pid, fname);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/status", osnum, pid);
delete_proc_procfs_file(pid, fname);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid);
delete_proc_procfs_file(pid, fname);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/cmdline", osnum, pid);
delete_proc_procfs_file(pid, fname);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/auxv", osnum, pid);
delete_proc_procfs_file(pid, fname);
@@ -139,6 +166,42 @@ static void delete_proc_procfs_file(int pid, char *fname)
dprintf("delete procfs file: %s done\n", fname);
}
/**
* \brief create a procfs file for this operating system
* \param fname relative path name from "host:/proc".
* \param mode permissions of the file to be created
*
* Though operate_proc_procfs_file() is intended to create a process
* specific file, it is reused to create a OS specific file by
* specifying -1 as the pid parameter.
*/
static void create_os_procfs_file(char *fname, int mode)
{
const pid_t pid = -1;
const int msg = SCD_MSG_PROCFS_CREATE;
const int cpuid = ihk_mc_get_processor_id(); /* i.e. BSP */
operate_proc_procfs_file(pid, fname, msg, mode, cpuid);
return;
}
/**
* \brief create all procfs files for this operating system
*/
void create_os_procfs_files(void)
{
char *fname = NULL;
size_t n;
fname = kmalloc(PROCFS_NAME_MAX, IHK_MC_AP_CRITICAL);
n = snprintf(fname, PROCFS_NAME_MAX, "mcos%d/stat", osnum);
if (n >= PROCFS_NAME_MAX) panic("/proc/stat");
create_os_procfs_file(fname, 0444);
return;
}
/**
* \brief Create/delete a procfs file for process.
*
@@ -202,6 +265,10 @@ void process_procfs_request(unsigned long rarg)
struct ihk_ikc_channel_desc *syscall_channel;
ihk_spinlock_t *savelock;
unsigned long irqstate;
unsigned long offset;
int count;
int npages;
int is_current = 1; /* is 'proc' same as 'current'? */
dprintf("process_procfs_request: invoked.\n");
@@ -221,7 +288,9 @@ void process_procfs_request(unsigned long rarg)
dprintf("remote pbuf: %x\n", r->pbuf);
pbuf = ihk_mc_map_memory(NULL, r->pbuf, r->count);
dprintf("pbuf: %x\n", pbuf);
buf = ihk_mc_map_virtual(pbuf, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);
count = r->count + ((uintptr_t)pbuf & (PAGE_SIZE - 1));
npages = (count + (PAGE_SIZE - 1)) / PAGE_SIZE;
buf = ihk_mc_map_virtual(pbuf, npages, PTATTR_WRITABLE | PTATTR_ACTIVE);
dprintf("buf: %p\n", buf);
if (buf == NULL) {
kprintf("ERROR: process_procfs_request: got a null buffer.\n");
@@ -229,6 +298,8 @@ void process_procfs_request(unsigned long rarg)
goto bufunavail;
}
count = r->count;
offset = r->offset;
dprintf("fname: %s, offset: %lx, count:%d.\n", r->fname, r->offset, r->count);
/*
@@ -270,18 +341,56 @@ void process_procfs_request(unsigned long rarg)
dprintf("mismatched pid. We are %d, but requested pid is %d.\n",
pid, cpu_local_var(current)->pid);
if ((proc = findthread_and_lock(pid, tid, &savelock, &irqstate))){
tid = pid; /* main thread */
proc = findthread_and_lock(pid, tid, &savelock, &irqstate);
if (!proc) {
dprintf("We cannot find the proper cpu for requested pid.\n");
goto end;
}
else if (proc->cpu_id != ihk_mc_get_processor_id()) {
/* The target process has gone by migration. */
r->newcpu = proc->cpu_id;
dprintf("expected cpu id is %d.\n", proc->cpu_id);
process_unlock(savelock, irqstate);
ans = 0;
} else {
dprintf("We cannot find the proper cpu for requested pid.\n");
goto end;
}
else {
process_unlock(savelock, irqstate);
/* 'proc' is not 'current' */
is_current = 0;
}
}
}
else if (!strcmp(p, "stat")) { /* "/proc/stat" */
extern int num_processors; /* kernel/ap.c */
char *p;
size_t remain;
int cpu;
if (offset > 0) {
ans = 0;
eof = 1;
goto end;
}
} else {
p = buf;
remain = count;
for (cpu = 0; cpu < num_processors; ++cpu) {
size_t n;
n = snprintf(p, remain, "cpu%d\n", cpu);
if (n >= remain) {
ans = -ENOSPC;
eof = 1;
goto end;
}
p += n;
}
ans = p - buf;
eof = 1;
goto end;
}
else {
goto end;
}
dprintf("matched PID: %d.\n", pid);
@@ -297,19 +406,215 @@ void process_procfs_request(unsigned long rarg)
struct vm_range *range;
struct process_vm *vm = proc->vm;
list_for_each_entry(range, &vm->vm_range_list, list) {
dprintf("range: %lx - %lx\n", range->start, range->end);
if ((range->start <= r->offset) &&
(r->offset < range->end)) {
unsigned int len = r->count;
if (range->end < r->offset + r->count) {
len = range->end - r->offset;
if (!is_current) {
uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER;
unsigned long offset = r->offset;
unsigned long left = r->count;
int ret;
ans = 0;
if(left == 0)
goto end;
while(left){
unsigned long pa;
char *va;
int pos = offset & (PAGE_SIZE - 1);
int size = PAGE_SIZE - pos;
if(size > left)
size = left;
ret = page_fault_process_vm(proc->vm,
(void *)offset, reason);
if(ret){
if(ans == 0)
ans = -EIO;
goto end;
}
memcpy((void *)buf, (void *)range->start, len);
ans = len;
ret = ihk_mc_pt_virt_to_phys(vm->page_table,
(void *)offset, &pa);
if(ret){
if(ans == 0)
ans = -EIO;
goto end;
}
va = phys_to_virt(pa);
memcpy(buf + ans, va, size);
offset += size;
left -= size;
ans += size;
}
}
else{
unsigned long offset = r->offset;
unsigned long left = r->count;
unsigned long pos;
unsigned long l;
ans = 0;
list_for_each_entry(range, &vm->vm_range_list, list) {
dprintf("range: %lx - %lx\n", range->start, range->end);
while (left &&
(range->start <= offset) &&
(offset < range->end)) {
pos = offset & (PAGE_SIZE - 1);
l = PAGE_SIZE - pos;
if(l > left)
l = left;
if(copy_from_user(buf, (void *)offset, l)){
if(ans == 0)
ans = -EIO;
goto end;
}
buf += l;
ans += l;
offset += l;
left -= l;
}
}
}
goto end;
}
/*
* mcos%d/PID/maps
*/
if (strcmp(p, "maps") == 0) {
struct vm_range *range;
struct process_vm *vm = proc->vm;
int left = r->count - 1; /* extra 1 for terminating NULL */
int written = 0;
char *_buf = buf;
/* Starting from the middle of a proc file is not supported for maps */
if (offset > 0) {
ans = 0;
eof = 1;
goto end;
}
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
list_for_each_entry(range, &vm->vm_range_list, list) {
int written_now;
/* format is (from man proc):
* address perms offset dev inode pathname
* 08048000-08056000 r-xp 00000000 03:0c 64593 /usr/sbin/gpm
*/
written_now = snprintf(_buf, left,
"%lx-%lx %s%s%s%s %lx %lx:%lx %d %s\n",
range->start, range->end,
range->flag & VR_PROT_READ ? "r" : "-",
range->flag & VR_PROT_WRITE ? "w" : "-",
range->flag & VR_PROT_EXEC ? "x" : "-",
range->flag & VR_PRIVATE ? "p" : "s",
/* TODO: fill in file details! */
0UL,
0UL,
0UL,
0,
""
);
left -= written_now;
_buf += written_now;
written += written_now;
if (left == 0) {
kprintf("%s(): WARNING: buffer too small to fill proc/maps\n",
__FUNCTION__);
break;
}
}
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
ans = written + 1;
eof = 1;
goto end;
}
/*
* mcos%d/PID/pagemap
*/
if (strcmp(p, "pagemap") == 0) {
struct process_vm *vm = proc->vm;
uint64_t *_buf = (uint64_t *)buf;
uint64_t start, end;
if (offset < PAGE_SIZE) {
kprintf("WARNING: /proc/pagemap queried for NULL page\n");
ans = 0;
goto end;
}
/* Check alignment */
if ((offset % sizeof(uint64_t) != 0) ||
(count % sizeof(uint64_t) != 0)) {
ans = 0;
eof = 1;
goto end;
}
start = (offset / sizeof(uint64_t)) << PAGE_SHIFT;
end = start + ((count / sizeof(uint64_t)) << PAGE_SHIFT);
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
while (start < end) {
*_buf = ihk_mc_pt_virt_to_pagemap(proc->vm->page_table, start);
dprintf("PID: %d, /proc/pagemap: 0x%lx -> %lx\n", proc->ftn->pid,
start, *_buf);
start += PAGE_SIZE;
++_buf;
}
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
dprintf("/proc/pagemap: 0x%lx - 0x%lx, count: %d\n",
start, end, count);
ans = count;
goto end;
}
/*
* mcos%d/PID/status
*/
if (strcmp(p, "status") == 0) {
struct vm_range *range;
unsigned long lockedsize = 0;
char tmp[1024];
int len;
struct fork_tree_node *ftn = proc->ftn;
ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock);
list_for_each_entry(range, &proc->vm->vm_range_list, list) {
if(range->flag & VR_LOCKED)
lockedsize += range->end - range->start;
}
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
sprintf(tmp,
"Uid:\t%d\t%d\t%d\t%d\n"
"Gid:\t%d\t%d\t%d\t%d\n"
"VmLck:\t%9lu kB\n",
ftn->ruid, ftn->euid, ftn->suid, ftn->fsuid,
ftn->rgid, ftn->egid, ftn->sgid, ftn->fsgid,
(lockedsize + 1023) >> 10);
len = strlen(tmp);
if (r->offset < len) {
if (r->offset + r->count < len) {
ans = r->count;
} else {
eof = 1;
ans = len;
}
strncpy(buf, tmp + r->offset, ans);
} else if (r->offset == len) {
ans = 0;
eof = 1;
}
goto end;
}
@@ -335,6 +640,35 @@ void process_procfs_request(unsigned long rarg)
goto end;
}
/*
* mcos%d/PID/cmdline
*/
if (strcmp(p, "cmdline") == 0) {
unsigned int limit = proc->saved_cmdline_len;
unsigned int len = r->count;
if(!proc->saved_cmdline){
ans = 0;
eof = 1;
goto end;
}
if (r->offset < limit) {
if (limit < r->offset + r->count) {
len = limit - r->offset;
}
memcpy((void *)buf, ((char *) proc->saved_cmdline) + r->offset, len);
ans = len;
if (r->offset + len == limit) {
eof = 1;
}
} else if (r->offset == limit) {
ans = 0;
eof = 1;
}
goto end;
}
/*
* mcos%d/PID/taks/PID/mem
*
@@ -351,6 +685,9 @@ void process_procfs_request(unsigned long rarg)
struct vm_range *range;
struct process_vm *vm = proc->vm;
if (!is_current) {
goto end;
}
if (pid != tid) {
/* We are not multithreaded yet. */
goto end;
@@ -445,7 +782,7 @@ void process_procfs_request(unsigned long rarg)
*/
dprintf("could not find a matching entry for %s.\n", p);
end:
ihk_mc_unmap_virtual(buf, 1, 0);
ihk_mc_unmap_virtual(buf, npages, 0);
dprintf("ret: %d, eof: %d\n", ans, eof);
r->ret = ans;
r->eof = eof;

View File

@@ -0,0 +1,24 @@
#!/bin/sh
cp $1 $2
exit 0
#set -e
#
#O=`pwd`
#
#make -C $3/../arch/x86/kboot O=$O clean
##make -C $3/../kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x3a001000
#make -C $3/../arch/x86/kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x101001000
##make -C $3/../kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x901001000
#
#make -C $3/../arch/x86/elfboot O=$O clean
#make -C $3/../arch/x86/elfboot O=$O
#
#cat elfboot/elfboot kboot/kboot.elf > $2
#
#make -C $3/../arch/x86/kboot O=$O clean
##make -C $3/../kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x3a001000
#make -C $3/../arch/x86/kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x201001000
#cat elfboot/elfboot kboot/kboot.elf > $2.8G

View File

@@ -3,7 +3,8 @@
* License details are found in the file LICENSE.
* \brief
* shared memory object
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 - 2015 RIKEN AICS
*/
/*
* HISTORY:
@@ -26,12 +27,8 @@
#define ekprintf(...) kprintf(__VA_ARGS__)
#define fkprintf(...) kprintf(__VA_ARGS__)
struct shmobj {
struct memobj memobj; /* must be first */
long ref;
struct shmid_ds ds;
struct list_head page_list;
};
static LIST_HEAD(shmobj_list_head);
static ihk_spinlock_t shmobj_list_lock_body = SPIN_LOCK_UNLOCKED;
static memobj_release_func_t shmobj_release;
static memobj_ref_func_t shmobj_ref;
@@ -98,6 +95,25 @@ static struct page *page_list_first(struct shmobj *obj)
return list_first_entry(&obj->page_list, struct page, list);
}
/***********************************************************************
* shmobj_list
*/
void shmobj_list_lock(void)
{
ihk_mc_spinlock_lock_noirq(&shmobj_list_lock_body);
return;
}
void shmobj_list_unlock(void)
{
ihk_mc_spinlock_unlock_noirq(&shmobj_list_lock_body);
return;
}
/***********************************************************************
* operations
*/
int the_seq = 0;
int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
{
struct shmobj *obj = NULL;
@@ -114,8 +130,11 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
memset(obj, 0, sizeof(*obj));
obj->memobj.ops = &shmobj_ops;
obj->ref = 1;
obj->ds = *ds;
obj->ds.shm_perm.seq = the_seq++;
obj->ds.shm_nattch = 1;
obj->index = -1;
obj->real_segsz = (obj->ds.shm_segsz + PAGE_SIZE - 1) & PAGE_MASK;
page_list_init(obj);
ihk_mc_spinlock_init(&obj->memobj.lock);
@@ -127,65 +146,124 @@ out:
if (obj) {
kfree(obj);
}
dkprintf("shmobj_create(%p %#lx,%p):%d %p\n",
dkprintf("shmobj_create_indexed(%p %#lx,%p):%d %p\n",
ds, ds->shm_segsz, objp, error, *objp);
return error;
}
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp)
{
int error;
struct memobj *obj;
error = shmobj_create(ds, &obj);
if (!error) {
obj->flags |= MF_SHMDT_OK;
*objp = to_shmobj(obj);
}
return error;
}
void shmobj_destroy(struct shmobj *obj)
{
extern struct shm_info the_shm_info;
extern struct list_head kds_free_list;
extern int the_maxi;
dkprintf("shmobj_destroy(%p [%d %o])\n", obj, obj->index, obj->ds.shm_perm.mode);
/* zap page_list */
for (;;) {
struct page *page;
int count;
page = page_list_first(obj);
if (!page) {
break;
}
page_list_remove(obj, page);
dkprintf("shmobj_destroy(%p):"
"release page. %p %#lx %d %d",
obj, page, page_to_phys(page),
page->mode, page->count);
count = ihk_atomic_sub_return(1, &page->count);
if (!((page->mode == PM_MAPPED) && (count == 0))) {
fkprintf("shmobj_destroy(%p): "
"page %p phys %#lx mode %#x"
" count %d off %#lx\n",
obj, page,
page_to_phys(page),
page->mode, count,
page->offset);
panic("shmobj_release");
}
/* XXX:NYI: large pages */
page->mode = PM_NONE;
free_pages(phys_to_virt(page_to_phys(page)), 1);
}
if (obj->index < 0) {
kfree(obj);
}
else {
list_del(&obj->chain);
--the_shm_info.used_ids;
list_add(&obj->chain, &kds_free_list);
for (;;) {
struct shmobj *p;
list_for_each_entry(p, &kds_free_list, chain) {
if (p->index == the_maxi) {
break;
}
}
if (&p->chain == &kds_free_list) {
break;
}
list_del(&p->chain);
kfree(p);
--the_maxi;
}
}
return;
}
static void shmobj_release(struct memobj *memobj)
{
struct shmobj *obj = to_shmobj(memobj);
struct shmobj *freeobj = NULL;
long newref;
extern time_t time(void);
extern pid_t getpid(void);
dkprintf("shmobj_release(%p)\n", memobj);
memobj_lock(&obj->memobj);
--obj->ref;
if (obj->ref <= 0) {
if (obj->ref < 0) {
if (obj->index >= 0) {
obj->ds.shm_dtime = time();
obj->ds.shm_lpid = getpid();
dkprintf("shmobj_release:drop shm_nattach %p %d\n", obj, obj->ds.shm_nattch);
}
newref = --obj->ds.shm_nattch;
if (newref <= 0) {
if (newref < 0) {
fkprintf("shmobj_release(%p):ref %ld\n",
memobj, obj->ref);
memobj, newref);
panic("shmobj_release:freeing free shmobj");
}
freeobj = obj;
if (obj->ds.shm_perm.mode & SHM_DEST) {
freeobj = obj;
}
}
memobj_unlock(&obj->memobj);
if (freeobj) {
/* zap page_list */
for (;;) {
struct page *page;
int count;
page = page_list_first(obj);
if (!page) {
break;
}
page_list_remove(obj, page);
dkprintf("shmobj_release(%p):"
"release page. %p %#lx %d %d",
memobj, page, page_to_phys(page),
page->mode, page->count);
count = ihk_atomic_sub_return(1, &page->count);
if (!((page->mode == PM_MAPPED) && (count == 0))) {
fkprintf("shmobj_release(%p): "
"page %p phys %#lx mode %#x"
" count %d off %#lx\n",
memobj, page,
page_to_phys(page),
page->mode, count,
page->offset);
panic("shmobj_release");
}
/* XXX:NYI: large pages */
page->mode = PM_NONE;
free_pages(phys_to_virt(page_to_phys(page)), 1);
}
dkprintf("shmobj_release(%p):free shmobj", memobj);
kfree(freeobj);
shmobj_list_lock();
shmobj_destroy(freeobj);
shmobj_list_unlock();
}
dkprintf("shmobj_release(%p):\n", memobj);
dkprintf("shmobj_release(%p): %ld\n", memobj, newref);
return;
}
@@ -193,17 +271,23 @@ static void shmobj_ref(struct memobj *memobj)
{
struct shmobj *obj = to_shmobj(memobj);
long newref;
extern time_t time(void);
extern pid_t getpid(void);
dkprintf("shmobj_ref(%p)\n", memobj);
memobj_lock(&obj->memobj);
newref = ++obj->ref;
newref = ++obj->ds.shm_nattch;
if (obj->index >= 0) {
obj->ds.shm_atime = time();
obj->ds.shm_lpid = getpid();
}
memobj_unlock(&obj->memobj);
dkprintf("shmobj_ref(%p): newref %ld\n", memobj, newref);
return;
}
static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
uintptr_t *physp)
uintptr_t *physp, unsigned long *pflag)
{
struct shmobj *obj = to_shmobj(memobj);
int error;
@@ -227,13 +311,13 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
memobj, off, p2align, physp, error);
goto out;
}
if (obj->ds.shm_segsz <= off) {
if (obj->real_segsz <= off) {
error = -ERANGE;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):beyond the end. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
if ((obj->ds.shm_segsz - off) < (PAGE_SIZE << p2align)) {
if ((obj->real_segsz - off) < (PAGE_SIZE << p2align)) {
error = -ENOSPC;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):too large. %d\n",
memobj, off, p2align, physp, error);

File diff suppressed because it is too large Load Diff

View File

@@ -38,7 +38,7 @@
#ifdef DEBUG_PRINT_TIMER
#define dkprintf kprintf
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
#define LOOP_TIMEOUT 10
@@ -167,7 +167,7 @@ void wake_timers_loop(void)
list_del(&timer->list);
dkprintf("timers timeout occurred, waking up pid: %d\n",
timer->proc->pid);
timer->proc->ftn->pid);
waitq_wakeup(&timer->processes);
}

View File

@@ -3,7 +3,8 @@
* License details are found in the file LICENSE.
* \brief
* read-only zeroed page object
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 RIKEN AICS
*/
/*
* HISTORY:
@@ -165,7 +166,7 @@ out:
}
static int zeroobj_get_page(struct memobj *memobj, off_t off, int p2align,
uintptr_t *physp)
uintptr_t *physp, unsigned long *pflag)
{
int error;
struct zeroobj *obj = to_zeroobj(memobj);

View File

@@ -99,4 +99,7 @@ enum ihk_asr_type {
int ihk_mc_arch_set_special_register(enum ihk_asr_type, unsigned long value);
int ihk_mc_arch_get_special_register(enum ihk_asr_type, unsigned long *value);
extern unsigned int ihk_ikc_irq;
extern unsigned int ihk_ikc_irq_apicid;
#endif

View File

@@ -22,8 +22,8 @@ struct ihk_kmsg_buf {
};
extern int kprintf(const char *format, ...);
extern int kprintf_lock();
extern void kprintf_unlock(int irqflags);
extern unsigned long kprintf_lock(void);
extern void kprintf_unlock(unsigned long irqflags);
extern int __kprintf(const char *format, ...);
extern void panic(const char *msg);

View File

@@ -147,7 +147,8 @@ struct page_table *ihk_mc_pt_create(enum ihk_mc_ap_flag ap_flag);
void ihk_mc_pt_destroy(struct page_table *pt);
void ihk_mc_load_page_table(struct page_table *pt);
int ihk_mc_pt_virt_to_phys(struct page_table *pt,
void *virt, unsigned long *phys);
const void *virt, unsigned long *phys);
uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt);
void remote_flush_tlb_cpumask(struct process_vm *vm,
unsigned long addr, int cpu_id);