Compare commits

...

234 Commits
0.4.0 ... 1.0.0

Author SHA1 Message Date
Tomoki Shirasawa
eb6de9d1de delete debug code 2015-11-13 15:10:14 +09:00
Balazs Gerofi
42c8ef6539 do_fork(): fix CLONE_PARENT_SETTID bug 2015-11-13 12:46:09 +09:00
Balazs Gerofi
780d4fc29b futex_wait(): support for FUTEX_CLOCK_REALTIME 2015-11-13 12:46:02 +09:00
Tomoki Shirasawa
94fcc5bb9a futex_wait: add to check signal 2015-11-12 09:38:36 +09:00
Tomoki Shirasawa
e822fc47dd fix dead locking when kill subthreads 2015-11-11 23:03:43 +09:00
NAKAMURA Gou
26492a2895 vsyscall_gettimeofday: make timeval from TSC 2015-11-11 19:45:14 +09:00
NAKAMURA Gou
1a5ff7f535 gettimeofday: gather variables into new struct 2015-11-11 18:31:33 +09:00
NAKAMURA Gou
4c181d7fc0 smp-x86: add supports for dump analyzer 2015-11-09 16:06:55 +09:00
NAKAMURA Gou
be78eb752e time_init: fix zero divide on KVM 2015-11-06 19:31:42 +09:00
NAKAMURA Gou
0ad7c8ac50 nanosleep: fix arguments to be delegated 2015-11-06 19:31:42 +09:00
Tomoki Shirasawa
e9458a6cd3 fix ptrace02 failed 2015-10-30 16:59:03 +09:00
Tomoki Shirasawa
9e3b0b5866 bug fix 'GDB: missing parent-child relationship'
refs #641
2015-10-30 15:06:27 +09:00
Balazs Gerofi
0eaa27291a thread: move clear_child_tid, etc. to main structure 2015-10-29 11:01:27 +09:00
NAKAMURA Gou
0b07dd1b79 support madvise(MADV_REMOVE) partially
This MADV_REMOVE works with a mapping which is
- created with shmat() and
- not sharing memobj with other mappings.
2015-10-28 18:41:28 +09:00
NAKAMURA Gou
c25f8c7a39 support settimeofday() 2015-10-27 19:21:50 +09:00
NAKAMURA Gou
9e53ae20d4 add memory barriers
- rmb()
- wmb()
2015-10-27 19:21:50 +09:00
NAKAMURA Gou
09c9ee58d1 add 64bit atomic operations
- ihk_atomic64_t
- IHK_ATOMIC64_INIT()
- ihk_atomic64_read()
- ihk_atomic64_inc()
2015-10-27 19:21:50 +09:00
NAKAMURA Gou
153a59a6f4 gettimeofday: avoid per-cpu data in calculation
Because it is difficult to safely update per-cpu data of other cpus in
settimeofday().
2015-10-27 19:21:50 +09:00
Tomoki Shirasawa
cad72a8562 when SIGXCPU or SIGXFSZ, set coredump bit to exit status 2015-10-22 20:57:37 +09:00
Tomoki Shirasawa
343bfbd30a rename back status field 2015-10-22 20:26:50 +09:00
Tomoki Shirasawa
4e4f1208f7 delete unused member 2015-10-19 20:12:26 +09:00
Tomoki Shirasawa
a325a78866 refactoring to send signal 2015-10-15 17:10:02 +09:00
Tomoki Shirasawa
6ae99454da delete debug print 2015-10-15 06:51:41 +09:00
Tomoki Shirasawa
04e193de13 refactoring process structures 2015-10-13 23:04:08 +09:00
Tomoki Shirasawa
2ca46fabfd support reader/writer lock 2015-10-02 14:05:10 +09:00
Tomoki Shirasawa
5b737b499d fix cmpxchgq operand 2015-10-02 14:04:05 +09:00
NAKAMURA Gou
cb4f3a4d65 take into account args/envs' offset in page
- prepare_process_ranges_args_envs()
2015-10-01 21:08:42 +09:00
NAKAMURA Gou
51789fcd38 initialize idle_vm for page faluts 2015-10-01 21:08:35 +09:00
NAKAMURA Gou
9f50c5dc3a mcexec_wait_syscall: handle request even if signaled (reworked) 2015-09-29 19:53:40 +09:00
NAKAMURA Gou
cd905f7ad1 Revert "mcexec_wait_syscall: handle request even if signaled"
This reverts commit d862f345be.
2015-09-29 19:52:36 +09:00
NAKAMURA Gou
79266f6b97 x86_issue_ipi: keep interrupt disabled while issuing IPI 2015-09-29 19:10:01 +09:00
NAKAMURA Gou
a666b69c2c make x86_issue_ipi() call wait_icr_idle() 2015-09-29 19:10:01 +09:00
NAKAMURA Gou
47e8552eba move wait_icr_idle() before x86_issue_ipi() 2015-09-29 19:10:00 +09:00
NAKAMURA Gou
8dd9175411 schedule: fix null pointer dereference 2015-09-29 19:10:00 +09:00
NAKAMURA Gou
f08e0c0054 guess whether MSR_PLATFORM_INFO exists or not 2015-09-29 19:10:00 +09:00
NAKAMURA Gou
d862f345be mcexec_wait_syscall: handle request even if signaled 2015-09-24 21:35:30 +09:00
NAKAMURA Gou
a14768c49a kmalloc: fix missing unlock on out-of-memory path 2015-09-18 21:26:15 +09:00
NAKAMURA Gou
56e57775e7 clone: fix error message 2015-09-18 21:26:15 +09:00
NAKAMURA Gou
b3b752ba41 nanosleep: use copy_from_user instead of direct access 2015-09-17 21:46:32 +09:00
NAKAMURA Gou
7b32f2f73b nanosleep: fix tscs_rem underflow issue 2015-09-17 21:46:26 +09:00
NAKAMURA Gou
ea5a1a8693 nanosleep: update *rem whenever signaled 2015-09-17 21:44:49 +09:00
NAKAMURA Gou
92f8fb2b2b nanosleep: use copy_to_user instead of direct access 2015-09-17 21:44:49 +09:00
NAKAMURA Gou
a3e440414d nanosleep: cosmetic change 2015-09-17 21:44:49 +09:00
Yoichi Umezawa
10ba03ccea mcreboot-smp-x86.sh: fix querying free irq 2015-09-17 13:19:07 +09:00
Balazs Gerofi
ccb7c30a05 page_fault_handler(): reenable preempt after failed PF when process is exiting 2015-09-17 10:05:32 +09:00
NAKAMURA Gou
7dfeb8e7ce create demand-paging mapping in case of MAP_SHARED
On current McKernel, only mappings for demand paging can be shared.
Therefore, if MAP_SHARED and MAP_ANONYMOUS are specified and
anon_on_demand is disabled, then mmap(2) should create a mapping which
is for demand paging and is entirely populated with physical pages.
2015-09-16 21:38:00 +09:00
NAKAMURA Gou
b1b706453f vsyscall: send SIGSEGV to the caller if syscall fails
On CentOS 7 (RHEL 7?), "errno" isn't set when vsyscall_gettimeofday
fails. So, in such case, vsyscall_gettimeofday send SIGSEGV to the
caller to report failure of gettimeofday operation.
2015-09-16 21:37:11 +09:00
NAKAMURA Gou
bd5708286d make sys_gettimeofday() use copy_to_user() 2015-09-16 21:26:32 +09:00
NAKAMURA Gou
c8a13cf213 make gettimeofday ignore NULL parameter 2015-09-16 21:26:24 +09:00
NAKAMURA Gou
5ad0a03d18 make gettimeofday handle second parameter (timezone) 2015-09-16 21:25:29 +09:00
NAKAMURA Gou
3819eec03f cosmetic changes
- sys_gettimeofday()
2015-09-16 21:13:12 +09:00
Balazs Gerofi
40b8587a8a schedule(): sync CPU_FLAG_NEED_RESCHED flag with clone and migrate 2015-09-16 19:22:40 +09:00
Balazs Gerofi
e7b1115572 mcreboot-smp-x86.sh: introduction of ihk_ikc_irq_core argument 2015-09-14 17:30:25 +09:00
Balazs Gerofi
e1a01803d0 disable demand paging on ANONYMOUS mappings unless anon_on_demand kernel argument is passed 2015-09-14 17:26:37 +09:00
Balazs Gerofi
69f4b0e1ad gettimeofday()/nanosleep(): check arguments, return on pending signal 2015-09-14 17:05:30 +09:00
Tomoki Shirasawa
0909a5bed5 tracee context is broken when tracee call execve 2015-09-03 10:05:25 +09:00
Tomoki Shirasawa
9dd224385e When SIGSEGV occurred on a tracee process, a tracee process freezes. 2015-09-01 17:37:56 +09:00
Tomoki Shirasawa
4176c59fd3 using d_path for solution to file path. 2015-08-28 13:01:34 +09:00
Tomoki Shirasawa
afeee5432f When envp is NULL, execve is delayed. 2015-08-28 13:00:45 +09:00
Balazs Gerofi
9ae5bcf46e gettimeofday(): an implementation based on CPU invariant TSC support 2015-08-24 23:53:56 +02:00
Balazs Gerofi
b8f166e608 mcreboot-smp-x86.sh: handle resource allocation after unloading; mcstop+release-smp-x86.sh 2015-08-22 18:55:53 +09:00
Balazs Gerofi
c85a9b99e1 a couple of cosmetic changes of debug messages 2015-08-22 18:53:14 +09:00
Balazs Gerofi
7c816a6b73 an implementation of the Mellor-Crummey Scott (MCS) lock 2015-08-20 15:26:52 +09:00
Tomoki Shirasawa
5a0cd3f53f ptrace_detach when exiting
refs #590
2015-08-18 18:03:09 +09:00
Balazs Gerofi
9fa62adfe7 execve(): stay compliant with locked context switching 2015-08-10 14:18:11 +09:00
Balazs Gerofi
f0ab8ec89a sched_request_migrate(): change CPU flags atomically 2015-08-10 12:45:59 +09:00
Balazs Gerofi
f4cc82578d check_need_resched(): no thread migration in IRQ context 2015-08-10 12:43:35 +09:00
Balazs Gerofi
9ba40dc0ff schedule(): hold runq lock for the entire duration of context switching
releasing the runq lock after loading page tables but before the actual
context switch can leave execution in an inconsistent if the current
process is descheduled from an IRQ between these two steps.
this patch holds the runq lock with IRQs disabled and makes the context
switch a single atomic operation.
2015-08-10 12:37:12 +09:00
Balazs Gerofi
8d6c97ea5c schedule(): disable auto thread migration 2015-08-07 16:07:31 +09:00
Balazs Gerofi
386f59000a mcreboot-smp-x86.sh.in: grant real user rw permission on /dev/mcos* 2015-08-07 13:33:44 +09:00
Balazs Gerofi
215cd370a1 ap_init(): clean up AP boot kernel messages 2015-08-07 10:57:59 +09:00
Balazs Gerofi
0a0e2c04a0 support for dynamically toggling time sharing when CPU is oversubscribed 2015-08-07 08:51:50 +09:00
Balazs Gerofi
aa191b87d3 schedule(): use XSAVE/XRSTOR and swap floating point registers in context switch 2015-08-07 08:41:00 +09:00
Balazs Gerofi
d5c243571f cpu_clear_and_set(): atomic CPU mask update in migration code 2015-08-06 10:49:55 +09:00
Balazs Gerofi
328e69a335 schedule(): do not preempt while holding spinlocks or while in offloaded syscall 2015-08-06 10:36:13 +09:00
Balazs Gerofi
b77755d0f7 obtain_clone_cpuid(): always start from CPU 0 and fill in cores linearily 2015-07-28 20:20:47 +09:00
Balazs Gerofi
d7bae14707 TEMPORARY: schedule(): move threads when core is explicitly oversubscribed 2015-07-28 20:12:58 +09:00
Balazs Gerofi
4e58d08f5c schedule_timeout(): give a chance to other process in spin sleep if CPU core is oversubscribed 2015-07-28 20:06:56 +09:00
Balazs Gerofi
9b1e691588 fix thread migration code (i.e., sched_setaffinity())
- moved migration code into idle() process and updated schedule() to detect
  when a thread has moved to another CPU in order to avoid doing housekeeping
  on behalf of the original one
- start CPU head from core 0
- keeps track of nested interrupts
2015-07-24 20:09:17 +09:00
Balazs Gerofi
3988b0fc61 keep track of IRQ context and don't do thread migration there 2015-07-23 16:56:58 +09:00
Balazs Gerofi
54eb345847 settid(): prevent modifying tid after thread migration 2015-07-23 16:51:24 +09:00
Tomoki Shirasawa
bbe7aef95b fix calling do_signal (argument lacked) 2015-07-17 10:18:43 +09:00
Tomoki Shirasawa
1ff4cf68c2 support SA_RESTART flag and restart syscall 2015-07-16 16:33:14 +09:00
Tomoki Shirasawa
1bc84d3feb modify to copy credentials 2015-07-13 15:29:26 +09:00
Balazs Gerofi
f7d78c8b7d sched_getaffinity(): return EINVAL for 0 lenght request (fixes LTP sched_getaffinity01) 2015-07-10 11:00:43 +09:00
Balazs Gerofi
7647c99cc2 do_migrate(): disable IRQ while holding migq_lock to avoid deadlocking with reschedule interrupts 2015-07-09 15:23:28 +09:00
Balazs Gerofi
43a774fbfc sched_setaffinity(): undo target core change, avoid abort on length mismatch 2015-07-09 11:00:26 +09:00
Balazs Gerofi
a029bcac37 mcreboot-smp-x86: find unused IRQ line and pass start vector to ihk_smp_x86.ko 2015-07-07 09:07:16 +09:00
Balazs Gerofi
bd913c503b sched_setaffinity(): find an actual target core 2015-07-03 11:59:52 +09:00
Tomoki Shirasawa
e838affde8 fix to compile error on CentOS 7 2015-07-02 17:08:35 +09:00
Tomoki Shirasawa
59ee251e1c fix /proc/pid/mem, /proc/pid/status, /proc/pid/cmdline 2015-07-02 00:22:35 +09:00
Tomoki Shirasawa
fa79db3bcc fix out of tree build 2015-07-01 23:58:50 +09:00
Tomoki Shirasawa
b7c5cba361 fix to compile on CentOS 6 2015-07-01 23:57:40 +09:00
Balazs Gerofi
382614ddae pstate: use MSR_NHM_TURBO_RATIO_LIMIT as maximum single-core turbo ratio 2015-07-01 22:18:38 +09:00
Tomoki Shirasawa
aa959c6b34 temporary fix for CentOS 6.x 2015-06-30 18:19:53 +09:00
Tomoki Shirasawa
aabc3d386d support a function to execute mcexec automatically. 2015-06-30 17:47:01 +09:00
Balazs Gerofi
4ebe778ede vm->exiting: deal with exit_group() and concurrent page faults 2015-06-25 16:04:04 +09:00
Balazs Gerofi
fbb776e4fb cpu init: support for no_turbo kernel argument 2015-06-25 12:18:27 +09:00
Balazs Gerofi
41b85281a4 mcctrl: introduction of RUS page hash to handle page refcounts properly 2015-05-31 15:42:39 +09:00
Balazs Gerofi
5532e3c663 mcreboot script for new IHK SMP-x86 I/F 2015-05-26 14:41:28 +09:00
Tomoki Shirasawa
2af2b1205f temporary fix for setfsuid/setfsgid 2015-05-19 06:27:59 +09:00
Tomoki Shirasawa
7d5a68be1b add PID and GID to /proc/pid/status
add /proc/pid/cmdline

refs #445
refs #447
2015-05-18 17:45:37 +09:00
Tomoki Shirasawa
f4162dff52 some signals set siginfo.si_code 2015-04-14 15:11:36 +09:00
NAKAMURA Gou
a0d909af75 add supports for dump analyzer 2015-03-31 12:59:53 +09:00
Tomoki Shirasawa
63669b7f71 support /proc/pid/status for LTP mmap14 2015-03-28 14:20:07 +09:00
NAKAMURA Gou
4946964ed0 update copyright notices 2015-03-27 14:50:09 +09:00
Balazs Gerofi
5f19842a6a support for process_vm_readv()/process_vm_writev() 2015-03-25 19:44:56 +09:00
NAKAMURA Gou
9271d5346d add ACSL annotation to cpu.c 2015-03-25 15:54:08 +09:00
Susumu Komae
7bba05cfa4 Revise use of iov_base in ptrace_read_regset() and ptrace_write_regset(). 2015-03-20 20:33:40 +09:00
Susumu Komae
c2a1f933e8 Set tid (instead of pid) for ptrace event message of
PTRACE_EVENT_{FORK,VFORK,CLONE,VFORKDONE}.
Specify 2nd argument as pid (instead of -1) of function findthread_and_lock(),
to find tracee process in ptrace subroutines.
(gdb testsuite gdb.base/watch_thread_num.exp)
2015-03-20 13:22:00 +09:00
NAKAMURA Gou
055769254d implement mlockall()/munlockall() for LTP syscall 2015-03-19 16:46:31 +09:00
NAKAMURA Gou
786ae83380 add arch-dependent mman.h 2015-03-19 16:36:57 +09:00
NAKAMURA Gou
8c662c83be implement mincore(2) for LTP 2015-03-19 16:32:03 +09:00
NAKAMURA Gou
4698bc40c2 implement System V shared memory for LTP syscalls 2015-03-19 16:21:18 +09:00
Tomoki Shirasawa
f5d935b703 support signalfd4 step1 2015-03-18 17:35:43 +09:00
Tomoki Shirasawa
d53865ac5f change to check sequence of kill syscall, check sig num zero after uid checking 2015-03-18 12:59:05 +09:00
Tomoki Shirasawa
8934eb91a4 kill syscall check uid 2015-03-17 15:04:36 +09:00
Tomoki Shirasawa
ed6d94a358 syscall slowdown when repeat fork/exit/wait (LTP fork13) 2015-03-11 16:09:59 +09:00
NAKAMURA Gou
fa923da0e3 add host PTE cleaning to execve(). refs #377
This removes a cause of LTP gethostid01's wrong behavior.
2015-03-10 18:23:50 +09:00
Balazs Gerofi
1f8265efbc check _PAGE_PWT and _PAGE_PCD directly instead of _PAGE_CACHE_WC 2015-03-07 02:12:48 +09:00
Susumu Komae
b553de7435 supports PTRACE_GETREGSET, PTRACE_SETREGSET.
supports PTRACE_GETFPREGS, PTRACE_SETFPREGS.

refs #421
2015-03-06 19:18:32 +09:00
NAKAMURA Gou
6a82412d64 modify procfs to read inactive thread's files
However, the following files can be read only if the corresponding
thread is in active.
- /proc/<PID>/mem
- /proc/<PID>/task/<TID>/mem

refs #371
2015-03-05 21:41:24 +09:00
NAKAMURA Gou
fa29c34995 expand the size of kstack 12 KiB
When a procfs file belonging to a process which was in PS_TRACED status
was accessed, calling kprintf() from process_procfs_request() caused
stack overrun, and x86_cpu_local_variables was destroyed.
2015-03-05 20:30:33 +09:00
NAKAMURA Gou
f84b5acf79 map entire buffer to read procfs
Reading data from procfs file more than 4096 byte caused a buffer
overrun in McKernel because the buffer was always mapped in McKernel
4096 byte regardless of actual buffer size.
2015-03-05 20:30:33 +09:00
Balazs Gerofi
8b24f60861 Combine range and memobj flags before arch_vrflag_to_ptattr() 2015-03-05 16:40:14 +09:00
Balazs Gerofi
f82bb284bb Make pager and devobj debug messages optional 2015-03-05 16:03:21 +09:00
Balazs Gerofi
bf12a5c45e Introduction of write-combined memory type mappings.
Introduction of VR_WRITE_COMBINED, PTATTR_WRITE_COMBINED and modification
to the memobj's get_page() interface so that Linux communicates back mapping
flags (such as write-combined).
2015-03-05 16:03:21 +09:00
Balazs Gerofi
ea5681232e x86 Page Attribute Table (PAT) MSR support.
Reconfigure PAT to permit write-combining memory type to be assigned
on a page-by-page basis. Changes PWT and PCD bit combinations in page
table entries so that they correspond to the following format:

  PAT
  |PCD
  ||PWT
  |||
  000 WB  Write Back (WB)
  001 WC  Write Combining (WC)
  010 UC- Uncached (UC-)
  011 UC  Uncacheable (UC)
2015-03-05 16:03:20 +09:00
Tomoki Shirasawa
e6011be1af create area for to save fp regs
refs #421
2015-03-05 12:18:46 +09:00
Tomoki Shirasawa
9946ccd6b1 pipe free fork is implemented (LTP fork09) 2015-03-04 17:40:58 +09:00
NAKAMURA Gou
daec7de828 implement /proc/stat
only for sysconf(_SC_NPROCESSORS_ONLN).  This enables Intel OpenMP
runtime to arrange threads with regard for CPU topology.

refs #291
2015-03-04 15:46:53 +09:00
NAKAMURA Gou
9ad48083aa make PTRACE_POKETEXT use patch_process_vm() 2015-03-04 12:04:54 +09:00
NAKAMURA Gou
2eac58aab3 add patch_process_vm(). (in progress)
This function patches specified range of specified user space even if
the range is not writable.

refs #401
2015-03-04 12:00:51 +09:00
NAKAMURA Gou
22d8d169b6 change copy-out routines
- restrict copy_to_user() to only current process.
- add write_process_vm() to write specified process space.
2015-03-04 11:29:16 +09:00
NAKAMURA Gou
8db54c2637 make GPE on CPL0 cause panic 2015-03-04 11:29:16 +09:00
NAKAMURA Gou
063fa963c3 change copy-in routines
- restrict copy_from_user() to only current process.
- add read_process_vm() to read specified process space.
2015-03-04 11:29:15 +09:00
NAKAMURA Gou
a6488adcc1 change parameter type of ihk_mc_pt_virt_to_phys()
- add type qualifier 'const' to virtual address parameter.
  that is, change parameter 'virt' from       'void *'
                                     to 'const void *'
2015-03-04 11:29:15 +09:00
NAKAMURA Gou
2239a6b09b modify page_fault_process()
- change its argument from 'struct process *'
                        to 'struct process_vm *'.
- change its name from 'page_fault_process()'
                    to 'page_fault_process_vm()'.
- allow to resolve a fault on the process_vm of another process.
2015-03-04 11:29:15 +09:00
Susumu Komae
8c179d506a support PTRACE_ARCH_PRCTL.
refs #420
2015-03-03 14:22:57 +09:00
Susumu Komae
377341ce5f change debug output in debug/int3 handler, for struct x86_user_context. 2015-03-03 14:06:30 +09:00
Tomoki Shirasawa
8caeba7cba support PTRACE_GETSIGINFO and PTRACE_SETSIGINFO
refs #422
2015-03-03 09:54:57 +09:00
NAKAMURA Gou
1d2f5d9893 set is_gpr_valid to initial user context 2015-02-27 14:47:43 +09:00
Balazs Gerofi
e4f47df3c3 initialize pstate, turbo mode and power/performace bias MSR registers
MSR_IA32_MISC_ENABLE, MSR_IA32_PERF_CTL and MSR_IA32_ENERGY_PERF_BIAS
are responsible for performance settings, this change enables McKernel
to perform on par with Linux when running the fwq benchmark.
2015-02-27 11:29:11 +09:00
NAKAMURA Gou
4751055ee4 make ptrace(2) use lookup_user_context() 2015-02-26 17:43:10 +09:00
NAKAMURA Gou
305ebfed0e add lookup_user_context(). refs #420 2015-02-26 17:43:10 +09:00
NAKAMURA Gou
b66b950129 add x86_sregs into x86_user_context
x86_sregs contains the registers which are included in user_regs_struct
but not included in x86_basic_regs.
2015-02-26 17:43:10 +09:00
NAKAMURA Gou
4aa8ba2eef sort x86_basic_regs into user_regs_struct's order 2015-02-26 17:43:10 +09:00
NAKAMURA Gou
fab2c2aa97 wrap x86_regs with x86_user_context
and, rename x86_regs to x86_basic_regs.
2015-02-26 17:43:10 +09:00
Susumu Komae
026164eda4 fix PTRACE_ATTACH, PTRACE_DETACH, detach at tracer process terminated.
tracee process may have no parent, increment/decrement refcount.

refs #374
refs #280
2015-02-25 21:09:44 +09:00
Tomoki Shirasawa
e91d1e5b7b stack of signal handler is not 16 byte align
refs #429
2015-02-24 17:20:52 +09:00
Tomoki Shirasawa
73743eeeb0 temporary fix for waiting tracee blocked 2015-02-24 15:20:32 +09:00
Tomoki Shirasawa
c1c1fd578a modify file path of /proc files
LTP getsid02 mount06 msgctl08 msgget03 pause02 pipe07 readhead02
    swapon03 sysconf01 wait402
2015-02-24 11:33:49 +09:00
Tomoki Shirasawa
f35cc66d18 delete unused argument "ctx" from do_syscall
support waitid option "WNOWAIT"
2015-02-23 17:14:14 +09:00
Tomoki Shirasawa
d9cf1d49b1 support waitid
send SIGCHLD to parent when SIGSTOP or SIGCONT received

refs #425
refs #283
2015-02-22 20:05:30 +09:00
Balazs Gerofi
3d426ada01 use remap_pfn_range() in rus_vm_fault() for kernel versions newer than 3.0 2015-02-19 13:52:55 -08:00
Balazs Gerofi
0307f6a6cc impementation of sched_{setparam, getparam, setscheduler, getscheduler, get_priority_min, get_priority_max, rr_get_interval} system calls 2015-02-19 11:46:03 -08:00
NAKAMURA Gou
0dee04f16b move parse_args() to after arch_init()
In attached-mic, bootparam is not mapped until arch_init() is finished.
In builtin-mic and builtin-x86, virtual address of bootparam is changed
in arch_init().
2015-02-18 20:49:46 +09:00
NAKAMURA Gou
0e98e87b95 change type of kprintf_lock() to "unsigned long"
to match type of ihk_mc_spinlock_lock().
2015-02-18 20:49:46 +09:00
NAKAMURA Gou
d35e60c1a3 add init_boot_processor_local() for arch_start() 2015-02-18 20:49:46 +09:00
NAKAMURA Gou
037e17c4ed fix parsing of "osnum=" kargs 2015-02-18 16:44:14 +09:00
Susumu Komae
2baf274dac fix PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK and PTRACE_O_TRACECLONE.
allocate debug registers area, for new process.
(gdb testsuite gdb.base/inferior-died.exp)

refs #266
refs #372
2015-02-18 16:20:23 +09:00
Tomoki Shirasawa
3b04043f2a change to throw signal SIGILL to SIGSEGV when GPE 2015-02-18 14:54:49 +09:00
Tomoki Shirasawa
c0edb6fe6f add new cpu state CPU_STATUS_RESERVED 2015-02-18 13:46:08 +09:00
NAKAMURA Gou
bb137bc9bb make brk region just follow data region
This effectively reverts commit d70dd2338c.
2015-02-18 11:52:15 +09:00
NAKAMURA Gou
16af976a71 support msync() system call. refs #382
Msync(2) of this version writes only the pages which the calling process
modified. Modifications of the other processes are not written.
2015-02-18 11:52:15 +09:00
Balazs Gerofi
6485578a7f sched_yield implementation 2015-02-17 16:20:51 -08:00
Tomoki Shirasawa
d2d0fc6721 The mcexec command became executable from a command-line at the same time 2015-02-17 18:33:38 +09:00
Tomoki Shirasawa
9574a28a5f The same CPU is assigned to a different process.
refs #423
2015-02-17 18:27:46 +09:00
Susumu Komae
dbe4ec3247 support PTRACE_O_TRACECLONE and PTRACE_O_TRACEEXEC. 2015-02-17 17:00:48 +09:00
Susumu Komae
99debc548f detach traced process, when tracer process terminate.
some fixes on PTRACE_DETACH.

refs #374
refs #280
2015-02-17 16:58:29 +09:00
Susumu Komae
fa15f6b106 support PTRACE_SYSCALL.
support PTRACE_O_TRACESYSGOOD.
ptrace_report_exec() calls ptrace_report_signal().

refs #265
2015-02-17 16:56:27 +09:00
Susumu Komae
8568a73f33 traced process should stop by any signal except for SIGKILL,
even if SIG_IGN.  (LTP ptrace01)
2015-02-17 16:51:29 +09:00
Tomoki Shirasawa
8b57b2ee57 change signal handling at mcexec 2015-02-15 17:54:11 +09:00
Tomoki Shirasawa
9a36e7b84a restart waitpid if it returns with EINTR. 2015-02-13 16:00:40 +09:00
Tomoki Shirasawa
d998691425 fix setpgid(0, 0) 2015-02-13 13:51:00 +09:00
Dave van Dresser
8cdf70c500 Enable AVX extensions for processors that support it. 2015-02-12 17:51:50 -08:00
Tomoki Shirasawa
0e0bc548f6 fix mcexec SIG_IGN 2015-02-12 19:02:58 +09:00
NAKAMURA Gou
d21ae28843 add dummy NUMA system calls. refs #405
ENOSYS system call handlers for the following.
- get_mempolicy()
- mbind()
- migrate_pages()
- move_pages()
- set_mempolicy()
2015-02-10 21:16:19 +09:00
NAKAMURA Gou
a4a806bef7 support vsyscall_getcpu() vsyscall. refs #385
This version simply calls getcpu() system call, so that it's not fast.
2015-02-10 18:35:48 +09:00
NAKAMURA Gou
d30d8fe71c support getcpu() system call. refs #385
It appeared on Linux(x86) in kernel 3.1.
2015-02-10 18:35:41 +09:00
Balazs Gerofi
a5bdd41c3d procfs: check parent entry to avoid page fault in procfs_exit() 2015-01-31 22:27:13 -08:00
Susumu Komae
5f5ab34559 support PTRACE_ATTACH.
fix PTRACE_TRACEME, PTRACE_DETACH.
2015-01-30 21:02:01 +09:00
Tomoki Shirasawa
b26fa4e87c wrong send signal to sender process when kill other process group (LTP kill10)
refs #404
2015-01-29 16:14:31 +09:00
Susumu Komae
bd5f43b119 support PTRACE_SINGLESTEP.
support debug/int3 exception.
2015-01-29 15:48:05 +09:00
Susumu Komae
f97f8dbab3 support PTRACE_PEEKTEXT and PTRACE_PEEKDATA.
support PTRACE_POKETEXT and PTRACE_POKEDATA.
  now, force write anywhere.
  read-only page must copy-on-write.
2015-01-29 15:02:15 +09:00
Susumu Komae
e30946f1f0 fix PTRACE_CONT may cause error.
refs #369
2015-01-29 14:10:31 +09:00
Susumu Komae
c3ade864d9 fix PTRACE_PEEKUSER, PTRACE_POKEUSER, PTRACE_GETREGS.
support PTRACE_SETREGS.
  In struct process, add 'unsigned long *ptrace_debugreg', instead of 'struct user *userp'.
  debug registers are read/written from/to ptrace_debugreg, save/restore in schedule().
  most general registers are proc->uctx.
  fs_base is proc->thread.tlsblock_base.
  gs_base,ds,es,fs,gs and orig_rax are uncompleted.
  other members in 'struct user' are ignored, same as Linux implementation.

refs #257
refs #373
refs #263
2015-01-29 14:08:38 +09:00
bgerofi@riken.jp
9c35935671 mcexec: fix memory allocation bug that crashes CentOS7 glibc 2015-01-27 16:55:30 +09:00
Balazs Gerofi
ed33ee65b2 CentOS7 spinlock, procfs and vm_munmap support (i.e., Linux kernel 3.10) 2015-01-27 16:55:28 +09:00
Tomoki Shirasawa
d04b5a09bd PTRACE_KILL omit sched_wakeup_process return
refs #369
2015-01-27 10:55:49 +09:00
Tomoki Shirasawa
08cc31f9bf support setrlimits/getrlimits, however this fix is these syscalls only.
checking resource process must implement it separately.

refs #330
2015-01-27 10:35:58 +09:00
Tomoki Shirasawa
cf2166f830 function enter_user_mode calls check_signal.
refs #392
2015-01-16 14:28:28 +09:00
Susumu Komae
765de119dc support PTRACE_O_TRACEFORK, PTRACE_O_TRACEVFORK, PTRACE_O_TRACEVFORKDONE.
to start with a SIGSTOP, do not set proc->ftn->status to PS_RUNNING in __runq_add_proc().
  change vfork() set CLONE_VFORK.

refs #266
refs #267
refs #372

support PTRACE_GETEVENTMSG.
  to store ptrace event, add 'unsigned long ptrace_eventmsg;' member in struct fork_tree_node.

refs #273
2015-01-14 10:43:18 +09:00
Susumu Komae
d46110b4d9 support PTRACE_DETACH.
change getppid() to use proc->ftn->ppid_parent->pid, for ptraced process.

refs #280
2015-01-08 12:39:52 +09:00
Susumu Komae
74f0aec478 skip copy_to_user() when r->ret is negative error number in mckernel_procfs_read().
refs #370
2015-01-08 12:38:06 +09:00
Tomoki Shirasawa
e3eb7e68bc Fix need to modify ihk/cokernel/Makefile when a file has been added under mckernel/arch (Bug#365) 2014-12-26 16:05:23 +09:00
Tomoki Shirasawa
912b8a886c do_kill distinguish PTRACE_CONT from kill. 2014-12-26 15:23:11 +09:00
Balazs Gerofi bgerofi@riken.jp
e25d35a191 ihk_mc_init_ap(): cosmetics for reporting IKC, trampoline info 2014-12-25 11:05:52 +09:00
bgerofi@riken.jp
a9aad67541 IHK-SMP: boot scripts placeholder 2014-12-25 11:03:07 +09:00
Balazs Gerofi
cd6e663f48 handle VM_RESERVED (non-existing since Linux 3.7.0) and do_mmap_pgoff() (unexported since Linux 3.5.0) in mcctrl's syscall.c 2014-12-25 11:03:05 +09:00
Balazs Gerofi
5f095b3952 McKernel IHK SMP-x86 support (build system and config files) 2014-12-25 11:03:04 +09:00
bgerofi@riken.jp
811a275176 build scripts: support for separate build and source directories 2014-12-25 11:03:03 +09:00
bgerofi@riken.jp
b388f59ebd ihk_ikc_irq and ihk_ikc_irq_apicid 2014-12-25 11:03:01 +09:00
bgerofi@riken.jp
ff47261337 receive trampoline addr via parameter of arch_start() 2014-12-25 11:03:00 +09:00
Naoki Hamada
a91bf9a13d ptrace: Make PTRACE_CONT/KILL debug print separated. 2014-12-24 12:39:29 +09:00
Naoki Hamada
fcfa94cea1 ptrace: Add PTRACE_O_TRACEFORK (fake) support. 2014-12-24 12:39:13 +09:00
NAKAMURA Gou
55f7ee1526 fix a warning
| mckernel/kernel/../arch/x86/kernel/memory.c: In function '__set_pt_page':
| mckernel/kernel/../arch/x86/kernel/memory.c:367:
|     warning: 'init_pt_lock_flags' may be used uninitialized in this function
2014-12-22 17:03:32 +09:00
NAKAMURA Gou
b1b6fab7b8 fix a warning
| mckernel/kernel/host.c: In function 'syscall_packet_handler':
| mckernel/kernel/host.c:504:
|     warning: implicit declaration of function 'find_command_line'
2014-12-22 16:58:08 +09:00
NAKAMURA Gou
391886a6f1 fix a warning
| mckernel/kernel/syscall.c: In function 'do_syscall':
| mckernel/kernel/syscall.c:187:
|     warning: 'irqstate' may be used uninitialized in this function
2014-12-22 16:58:07 +09:00
NAKAMURA Gou
c810afe224 fix a warning
| mckernel/kernel/syscall.c: In function 'sys_madvise':
| mckernel/kernel/syscall.c:2108:
|     warning: 'range' may be used uninitialized in this function
2014-12-22 16:58:06 +09:00
NAKAMURA Gou
5566ed1a63 fix a warning
| mckernel/executer/kernel/control.c: In function ‘release_handler’:
| mckernel/executer/kernel/control.c:264: warning: unused variable ‘c’
2014-12-22 16:58:05 +09:00
NAKAMURA Gou
f0f91d2246 fix a warning
| mckernel/executer/kernel/control.c: In function ‘mcexec_debug_log’:
| mckernel/executer/kernel/control.c:252: warning: unused variable ‘c’
2014-12-22 16:58:04 +09:00
NAKAMURA Gou
0942bf0ce0 make dkprintf() evaluate its parameters always
Parameters of dkprintf() should be evaluated even if dkprintf() is
disabled.  Because this enables to find expression of parameter obsolete
and to avoid unnecessary compiler warnings such as "unused variable".
2014-12-22 16:58:03 +09:00
NAKAMURA Gou
9c94e90007 use ftn->tid instead of proc->tid 2014-12-22 16:58:02 +09:00
NAKAMURA Gou
a6ac906105 use ftn->pid instead of proc->pid 2014-12-22 16:58:01 +09:00
bgerofi@riken.jp
d4ba4dc8b3 introduction of mckernel_procfs_file_operations; fix /proc/self path resolution;
implementation of /proc/self/pagemap (LTP mmap12)
2014-12-15 12:46:05 +09:00
Tomoki Shirasawa
815d907ca4 setpgid return -EACCES when the child process had already performed an execve (LTP setpgid03) 2014-12-09 14:01:20 +09:00
Balazs Gerofi bgerofi@riken.jp
3c24315f91 support for /proc/mcos%d/PID/maps (without file info) (LTP mlock03) 2014-12-05 16:29:20 +09:00
Balazs Gerofi bgerofi@riken.jp
25f108bf78 mckernel_procfs_read(): fix buffer allocation, offset check and return code 2014-12-05 16:27:48 +09:00
Balazs Gerofi bgerofi@riken.jp
cc9d30efbf do_signal(): support for SIGSYS
as of POSIX.1-2001:
Signal  Value       Action  Comment
---------------------------------------------------
SIGSYS  12,31,12    Core    Bad argument to routine
2014-12-04 18:10:10 +09:00
Balazs Gerofi bgerofi@riken.jp
af83f1be64 rlimit(RLIMIT_NOFILE): return one less to make sure sync pipe can be created (LTP fork09) 2014-12-04 17:40:00 +09:00
bgerofi@riken.jp
b2cab453f1 clone(): do not allow setting CLONE_THREAD and CLONE_VM separately
XXX: When CLONE_VM is set but CLONE_THREAD is not the new thread is
meant to have its own thread group, i.e., when calling exit_group()
the cloner thread wouldn't be killed. However, this is a problem on
the Linux side because we do not invoke clone in mcexec when threads
are created. Thus, currently no support for this combination is
provided.
2014-12-04 16:55:18 +09:00
bgerofi@riken.jp
8909597499 clone(): support for handling CLONE_SIGHAND and CLONE_VM flags separately 2014-12-04 16:55:17 +09:00
bgerofi@riken.jp
86f2a9067b getppid() implementation 2014-12-04 16:55:17 +09:00
Tomoki Shirasawa
a5889fb5df sigaction check signal number (LTP sigaction02) 2014-12-04 11:31:50 +09:00
Tomoki Shirasawa
f1a86cfbd3 when host mcexec down, syscall is hung up 2014-12-04 11:17:29 +09:00
Balazs Gerofi bgerofi@riken.jp
c1cf630a94 mcexec: store full path to executable
required so that a forked process can obtain exec reference in the
Linux kernel even if executable was specified with relative path
and fork was called after changing the current working directory
2014-12-03 15:14:26 +09:00
Tomoki Shirasawa
8f30e16976 when mcexec is killed by SIGKILL, terminate mckernel process (BUG#259) 2014-11-27 16:13:52 +09:00
Masamichi Takagi
58e2e0a246 Use pidof in mcreboot script 2014-11-23 17:54:14 +09:00
Masamichi Takagi
ea02628f2b Add reboot and shutdown script for builtin-x86
It decides the number of cores for McKernel by looking into the
"SHIMOS: CPU Status:" line of dmesg. It sets the amount of memory for
McKernel to one urth of the total memory obtained by "free -g".
2014-11-13 20:06:29 +09:00
Balazs Gerofi
89acf5c5d6 support for AT_RANDOM auxiliary entry on the process stack (needed for _dl_random in glibc) 2014-11-11 08:48:27 +09:00
Balazs Gerofi
ac8e2a0c40 handle VM_RESERVED (non-existing since Linux 3.7.0) and do_mmap_pgoff() (unexported since Linux 3.5.0) in mcctrl's syscall.c 2014-11-11 08:42:07 +09:00
Tomoki Shirasawa
ab7aa3354f repair signal implementation.
- Don't intrrupt syscall with the ignored signal.
2014-11-07 07:55:30 +09:00
Tomoki Shirasawa
c4e0b84792 repair signal implementation.
- can not interrupt syscall
- can not recieve SIGKILL
2014-10-31 16:34:59 +09:00
73 changed files with 12155 additions and 3911 deletions

View File

@@ -6,7 +6,7 @@ all::
@(cd executer/kernel; make modules)
@(cd executer/user; make)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic) \
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
(cd kernel; make) \
;; \
*) \
@@ -19,7 +19,7 @@ install::
@(cd executer/kernel; make install)
@(cd executer/user; make install)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic) \
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
(cd kernel; make install) \
;; \
*) \
@@ -27,19 +27,39 @@ install::
exit 1 \
;; \
esac
if [ "$(TARGET)" = attached-mic ]; then \
@case "$(TARGET)" in \
attached-mic) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-attached-mic.sh $(SBINDIR)/mcreboot; \
install -m 755 arch/x86/tools/mcshutdown-attached-mic.sh $(SBINDIR)/mcshutdown; \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
fi
;; \
builtin-x86) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-builtin-x86.sh $(SBINDIR)/mcreboot; \
install -m 755 arch/x86/tools/mcshutdown-builtin-x86.sh $(SBINDIR)/mcshutdown; \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
;; \
smp-x86) \
mkdir -p -m 755 $(SBINDIR); \
install -m 755 arch/x86/tools/mcreboot-smp-x86.sh $(SBINDIR)/mcreboot.sh; \
install -m 755 arch/x86/tools/mcstop+release-smp-x86.sh $(SBINDIR)/mcstop+release.sh; \
mkdir -p -m 755 $(MANDIR)/man1; \
install -m 644 arch/x86/tools/mcreboot.1 $(MANDIR)/man1/mcreboot.1; \
;; \
*) \
echo "unknown target $(TARGET)" >&2 \
exit 1 \
;; \
esac
clean::
@(cd executer/kernel; make clean)
@(cd executer/user; make clean)
@case "$(TARGET)" in \
attached-mic | builtin-x86 | builtin-mic) \
attached-mic | builtin-x86 | builtin-mic | smp-x86) \
(cd kernel; make clean) \
;; \
*) \

View File

@@ -0,0 +1,2 @@
IHK_OBJS += cpu.o interrupt.o memory.o trampoline.o local.o context.o
IHK_OBJS += perfctr.o syscall.o vsyscall.o

View File

@@ -5,13 +5,18 @@
* Control CPU.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
* 2015/02/26: bgerofi - set pstate, turbo mode and power/perf bias MSRs
* 2015/02/12: Dave - enable AVX if supported
*/
#include <ihk/cpu.h>
#include <ihk/debug.h>
#include <ihk/mm.h>
#include <types.h>
#include <errno.h>
#include <list.h>
@@ -22,6 +27,7 @@
#include <march.h>
#include <signal.h>
#include <process.h>
#include <cls.h>
#define LAPIC_ID 0x020
#define LAPIC_TIMER 0x320
@@ -33,6 +39,7 @@
#define LAPIC_ICR0 0x300
#define LAPIC_ICR2 0x310
#define LAPIC_ESR 0x280
#define LOCAL_TIMER_VECTOR 0xef
#define APIC_INT_LEVELTRIG 0x08000
#define APIC_INT_ASSERT 0x04000
@@ -42,6 +49,8 @@
#define APIC_DM_NMI 0x00400
#define APIC_DM_INIT 0x00500
#define APIC_DM_STARTUP 0x00600
#define APIC_DIVISOR 16
#define APIC_LVT_TIMER_PERIODIC (1 << 17)
//#define DEBUG_PRINT_CPU
@@ -49,7 +58,7 @@
#ifdef DEBUG_PRINT_CPU
#define dkprintf kprintf
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
@@ -58,6 +67,7 @@ void assign_processor_id(void);
void arch_delay(int);
void x86_set_warm_reset(unsigned long ip, char *first_page_va);
void x86_init_perfctr(void);
int gettime_local_support = 0;
extern int kprintf(const char *format, ...);
@@ -106,7 +116,12 @@ void reload_idt(void)
}
static struct list_head handlers[256 - 32];
extern char nmi[];
extern char page_fault[], general_protection_exception[];
extern char debug_exception[], int3_exception[];
uint64_t boot_pat_state = 0;
int no_turbo = 0; /* May be updated by early parsing of kargs */
static void init_idt(void)
{
@@ -122,15 +137,22 @@ static void init_idt(void)
set_idt_entry(i, generic_common_handlers[i]);
}
set_idt_entry(2, (uintptr_t)nmi);
set_idt_entry(13, (unsigned long)general_protection_exception);
set_idt_entry(14, (unsigned long)page_fault);
set_idt_entry_trap_gate(1, (unsigned long)debug_exception);
set_idt_entry_trap_gate(3, (unsigned long)int3_exception);
reload_idt();
}
static int xsave_available = 0;
void init_fpu(void)
{
unsigned long reg;
unsigned long cpuid01_ecx;
asm volatile("movq %%cr0, %0" : "=r"(reg));
/* Unset EM and TS flag. */
@@ -140,10 +162,40 @@ void init_fpu(void)
asm volatile("movq %0, %%cr0" : : "r"(reg));
#ifdef ENABLE_SSE
asm volatile("cpuid" : "=c" (cpuid01_ecx) : "a" (0x1) : "%rbx", "%rdx");
asm volatile("movq %%cr4, %0" : "=r"(reg));
/* Set OSFXSR flag. */
reg |= (1 << 9);
/* Cr4 flags:
OSFXSR[b9] - enables SSE instructions
OSXMMEXCPT[b10] - generate SIMD FP exception instead of invalid op
OSXSAVE[b18] - enables access to xcr0
CPUID.01H:ECX flags:
XSAVE[b26] - verify existence of extended crs/XSAVE
AVX[b28] - verify existence of AVX instructions
*/
reg |= ((1 << 9) | (1 << 10));
if(cpuid01_ecx & (1 << 26)) {
/* XSAVE set, enable access to xcr0 */
dkprintf("init_fpu(): XSAVE available\n");
xsave_available = 1;
reg |= (1 << 18);
}
asm volatile("movq %0, %%cr4" : : "r"(reg));
dkprintf("init_fpu(): SSE init: CR4 = 0x%016lX\n", reg);
/* Set xcr0[2:1] to enable avx ops */
if(cpuid01_ecx & (1 << 28)) {
reg = xgetbv(0);
reg |= 0x6;
xsetbv(0, reg);
dkprintf("init_fpu(): AVX init: XCR0 = 0x%016lX\n", reg);
}
/* TODO: set MSR_IA32_XSS to enable xsaves/xrstors */
#else
kprintf("init_fpu(): SSE not enabled\n");
#endif
asm volatile("finit");
@@ -203,6 +255,174 @@ void lapic_icr_write(unsigned int h, unsigned int l)
lapic_write(LAPIC_ICR0, l);
}
void lapic_timer_enable(unsigned int clocks)
{
unsigned int lvtt_value;
lapic_write(LAPIC_TIMER_INITIAL, clocks / APIC_DIVISOR);
lapic_write(LAPIC_TIMER_DIVIDE, 3);
/* initialize periodic timer */
lvtt_value = LOCAL_TIMER_VECTOR | APIC_LVT_TIMER_PERIODIC;
lapic_write(LAPIC_TIMER, lvtt_value);
}
void lapic_timer_disable()
{
lapic_write(LAPIC_TIMER_INITIAL, 0);
}
void print_msr(int idx)
{
int bit;
unsigned long long val;
val = rdmsr(idx);
__kprintf("MSR 0x%x val (dec): %llu\n", idx, val);
__kprintf("MSR 0x%x val (hex): 0x%llx\n", idx, val);
__kprintf(" ");
for (bit = 63; bit >= 0; --bit) {
__kprintf("%3d", bit);
}
__kprintf("\n");
__kprintf("MSR 0x%x val (bin):", idx);
for (bit = 63; bit >= 0; --bit) {
__kprintf("%3d", (val & ((unsigned long)1 << bit)) ? 1 : 0);
}
__kprintf("\n");
}
void init_pstate_and_turbo(void)
{
uint64_t value;
uint64_t eax, ecx;
asm volatile("cpuid" : "=a" (eax), "=c" (ecx) : "a" (0x6) : "%rbx", "%rdx");
if (!(ecx & 0x01)) {
/* P-states and/or Turbo Boost are not supported. */
return;
}
/* Query and set max pstate value:
*
* IA32_PERF_CTL (0x199H) bit 15:0:
* Target performance State Value
*
* The base operating ratio can be read
* from MSR_PLATFORM_INFO[15:8].
*/
value = rdmsr(MSR_PLATFORM_INFO);
value &= 0xFF00;
/* Turbo boost setting:
* Bit 1 of EAX in Leaf 06H (i.e. CPUID.06H:EAX[1]) indicates opportunistic
* processor performance operation, such as IDA, has been enabled by BIOS.
*
* IA32_PERF_CTL (0x199H) bit 32: IDA (i.e., turbo boost) Engage. (R/W)
* When set to 1: disengages IDA
* When set to 0: enables IDA
*/
if ((eax & (1 << 1))) {
if (!no_turbo) {
uint64_t turbo_value;
turbo_value = rdmsr(MSR_NHM_TURBO_RATIO_LIMIT);
turbo_value &= 0xFF;
value = turbo_value << 8;
/* Enable turbo boost */
value &= ~((uint64_t)1 << 32);
}
/* Turbo boost feature is supported, but requested to be turned off */
else {
/* Disable turbo boost */
value |= (uint64_t)1 << 32;
}
}
wrmsr(MSR_IA32_PERF_CTL, value);
/* IA32_ENERGY_PERF_BIAS (0x1B0H) bit 3:0:
* (The processor supports this capability if CPUID.06H:ECX.SETBH[bit 3] is set.)
* Power Policy Preference:
* 0 indicates preference to highest performance.
* 15 indicates preference to maximize energy saving.
*
* Set energy/perf bias to high performance
*/
if (ecx & (1 << 3)) {
wrmsr(MSR_IA32_ENERGY_PERF_BIAS, 0);
}
//print_msr(MSR_IA32_MISC_ENABLE);
//print_msr(MSR_IA32_PERF_CTL);
//print_msr(MSR_IA32_ENERGY_PERF_BIAS);
}
enum {
PAT_UC = 0, /* uncached */
PAT_WC = 1, /* Write combining */
PAT_WT = 4, /* Write Through */
PAT_WP = 5, /* Write Protected */
PAT_WB = 6, /* Write Back (default) */
PAT_UC_MINUS = 7, /* UC, but can be overriden by MTRR */
};
#define PAT(x, y) ((uint64_t)PAT_ ## y << ((x)*8))
void init_pat(void)
{
uint64_t pat;
uint64_t edx;
/*
* An operating system or executive can detect the availability of the
* PAT by executing the CPUID instruction with a value of 1 in the EAX
* register. Support for the PAT is indicated by the PAT flag (bit 16
* of the values returned to EDX register). If the PAT is supported,
* the operating system or executive can use the IA32_PAT MSR to program
* the PAT. When memory types have been assigned to entries in the PAT,
* software can then use of the PAT-index bit (PAT) in the page-table and
* page-directory entries along with the PCD and PWT bits to assign memory
* types from the PAT to individual pages.
*/
asm volatile("cpuid" : "=d" (edx) : "a" (0x1) : "%rbx", "%rcx");
if (!(edx & ((uint64_t)1 << 16))) {
kprintf("PAT not supported.\n");
return;
}
/* Set PWT to Write-Combining. All other bits stay the same */
/* (Based on Linux' settings)
*
* PTE encoding used in Linux:
* PAT
* |PCD
* ||PWT
* |||
* 000 WB _PAGE_CACHE_WB
* 001 WC _PAGE_CACHE_WC
* 010 UC- _PAGE_CACHE_UC_MINUS
* 011 UC _PAGE_CACHE_UC
* PAT bit unused
*/
pat = PAT(0, WB) | PAT(1, WC) | PAT(2, UC_MINUS) | PAT(3, UC) |
PAT(4, WB) | PAT(5, WC) | PAT(6, UC_MINUS) | PAT(7, UC);
/* Boot CPU check */
if (!boot_pat_state)
boot_pat_state = rdmsr(MSR_IA32_CR_PAT);
wrmsr(MSR_IA32_CR_PAT, pat);
dkprintf("PAT support detected and reconfigured.\n");
}
void init_lapic(void)
{
unsigned long baseaddr;
@@ -262,16 +482,23 @@ static void init_smp_processor(void)
static char *trampoline_va, *first_page_va;
/*@
@ assigns torampoline_va;
@ assigns first_page_va;
@*/
void ihk_mc_init_ap(void)
{
struct ihk_mc_cpu_info *cpu_info = ihk_mc_get_cpu_info();
trampoline_va = map_fixed_area(AP_TRAMPOLINE, AP_TRAMPOLINE_SIZE,
0);
trampoline_va = map_fixed_area(ap_trampoline, AP_TRAMPOLINE_SIZE, 0);
kprintf("Trampoline area: 0x%lx \n", ap_trampoline);
first_page_va = map_fixed_area(0, PAGE_SIZE, 0);
kprintf("# of cpus : %d\n", cpu_info->ncpus);
init_processors_local(cpu_info->ncpus);
kprintf("IKC IRQ vector: %d, IKC target CPU APIC: %d\n",
ihk_ikc_irq, ihk_ikc_irq_apicid);
/* Do initialization for THIS cpu (BSP) */
assign_processor_id();
@@ -347,6 +574,29 @@ static void check_no_execute(void)
return;
}
void init_gettime_support(void)
{
uint64_t op;
uint64_t eax;
uint64_t ebx;
uint64_t ecx;
uint64_t edx;
/* Check if Invariant TSC supported.
* Processors support for invariant TSC is indicated by
* CPUID.80000007H:EDX[8].
* See page 2498 of the Intel64 and IA-32 Architectures Software
* Developers Manual - combined */
op = 0x80000007;
asm volatile("cpuid" : "=a"(eax),"=b"(ebx),"=c"(ecx),"=d"(edx) : "a" (op));
if (edx & (1 << 8)) {
gettime_local_support = 1;
kprintf("Invariant TSC supported.\n");
}
}
void init_cpu(void)
{
enable_page_protection_fault();
@@ -355,6 +605,8 @@ void init_cpu(void)
init_lapic();
init_syscall();
x86_init_perfctr();
init_pstate_and_turbo();
init_pat();
}
void setup_x86(void)
@@ -371,6 +623,8 @@ void setup_x86(void)
init_cpu();
init_gettime_support();
kprintf("setup_x86 done.\n");
}
@@ -406,32 +660,77 @@ void setup_x86_ap(void (*next_func)(void))
void arch_show_interrupt_context(const void *reg);
void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long rc, void *regs);
void check_signal(unsigned long, void *, int);
extern void tlb_flush_handler(int vector);
void handle_interrupt(int vector, struct x86_regs *regs)
void handle_interrupt(int vector, struct x86_user_context *regs)
{
struct ihk_mc_interrupt_handler *h;
struct cpu_local_var *v = get_this_cpu_local_var();
lapic_ack();
++v->in_interrupt;
dkprintf("CPU[%d] got interrupt, vector: %d, RIP: 0x%lX\n",
ihk_mc_get_processor_id(), vector, regs->rip);
ihk_mc_get_processor_id(), vector, regs->gpr.rip);
if (vector < 0 || vector > 255) {
panic("Invalid interrupt vector.");
}
else if (vector < 32) {
if (vector == 8 ||
(vector >= 10 && vector <= 15) || vector == 17) {
struct siginfo info;
switch(vector){
case 0:
memset(&info, '\0', sizeof info);
info.si_signo = SIGFPE;
info.si_code = FPE_INTDIV;
info._sifields._sigfault.si_addr = (void *)regs->gpr.rip;
set_signal(SIGFPE, regs, &info);
break;
case 9:
case 16:
case 19:
set_signal(SIGFPE, regs, NULL);
break;
case 4:
case 5:
set_signal(SIGSEGV, regs, NULL);
break;
case 6:
memset(&info, '\0', sizeof info);
info.si_signo = SIGILL;
info.si_code = ILL_ILLOPN;
info._sifields._sigfault.si_addr = (void *)regs->gpr.rip;
set_signal(SIGILL, regs, &info);
break;
case 10:
set_signal(SIGSEGV, regs, NULL);
break;
case 11:
case 12:
set_signal(SIGBUS, regs, NULL);
break;
case 17:
memset(&info, '\0', sizeof info);
info.si_signo = SIGBUS;
info.si_code = BUS_ADRALN;
set_signal(SIGBUS, regs, &info);
break;
default:
kprintf("Exception %d, rflags: 0x%lX CS: 0x%lX, RIP: 0x%lX\n",
vector, regs->rflags, regs->cs, regs->rip);
} else {
kprintf("Exception %d, rflags: 0x%lX CS: 0x%lX, RIP: 0x%lX\n",
vector, regs->rflags, regs->cs, regs->rip);
vector, regs->gpr.rflags, regs->gpr.cs, regs->gpr.rip);
arch_show_interrupt_context(regs);
panic("Unhandled exception");
}
arch_show_interrupt_context(regs);
panic("Unhandled exception");
}
else if (vector == LOCAL_TIMER_VECTOR) {
unsigned long irqstate;
/* Timer interrupt, enabled only on oversubscribed CPU cores,
* request reschedule */
irqstate = ihk_mc_spinlock_lock(&v->runq_lock);
v->flags |= CPU_FLAG_NEED_RESCHED;
ihk_mc_spinlock_unlock(&v->runq_lock, irqstate);
dkprintf("timer[%lu]: CPU_FLAG_NEED_RESCHED \n", rdtsc());
}
else if (vector >= IHK_TLB_FLUSH_IRQ_VECTOR_START &&
vector < IHK_TLB_FLUSH_IRQ_VECTOR_END) {
@@ -446,27 +745,85 @@ void handle_interrupt(int vector, struct x86_regs *regs)
}
}
check_signal(0, regs);
check_signal(0, regs, 0);
check_need_resched();
--v->in_interrupt;
}
void gpe_handler(struct x86_regs *regs)
void gpe_handler(struct x86_user_context *regs)
{
struct siginfo info;
kprintf("General protection fault (err: %lx, %lx:%lx)\n",
regs->error, regs->cs, regs->rip);
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
arch_show_interrupt_context(regs);
memset(&info, '\0', sizeof info);
set_signal(SIGILL, regs, &info);
check_signal(0, regs);
if ((regs->gpr.cs & 3) == 0) {
panic("gpe_handler");
}
set_signal(SIGSEGV, regs, NULL);
check_signal(0, regs, 0);
check_need_resched();
// panic("GPF");
}
void debug_handler(struct x86_user_context *regs)
{
unsigned long db6;
int si_code = 0;
struct siginfo info;
#ifdef DEBUG_PRINT_CPU
kprintf("debug exception (err: %lx, %lx:%lx)\n",
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
arch_show_interrupt_context(regs);
#endif
asm("mov %%db6, %0" :"=r" (db6));
if (db6 & DB6_BS) {
regs->gpr.rflags &= ~RFLAGS_TF;
si_code = TRAP_TRACE;
} else if (db6 & (DB6_B3|DB6_B2|DB6_B1|DB6_B0)) {
si_code = TRAP_HWBKPT;
}
memset(&info, '\0', sizeof info);
info.si_code = si_code;
set_signal(SIGTRAP, regs, &info);
check_signal(0, regs, 0);
check_need_resched();
}
void int3_handler(struct x86_user_context *regs)
{
struct siginfo info;
#ifdef DEBUG_PRINT_CPU
kprintf("int3 exception (err: %lx, %lx:%lx)\n",
regs->gpr.error, regs->gpr.cs, regs->gpr.rip);
arch_show_interrupt_context(regs);
#endif
memset(&info, '\0', sizeof info);
info.si_code = TRAP_BRKPT;
set_signal(SIGTRAP, regs, &info);
check_signal(0, regs, 0);
check_need_resched();
}
static void wait_icr_idle(void)
{
while (lapic_read(LAPIC_ICR0) & APIC_ICR_BUSY) {
cpu_pause();
}
}
void x86_issue_ipi(unsigned int apicid, unsigned int low)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
wait_icr_idle();
lapic_icr_write(apicid << LAPIC_ICR_ID_SHIFT, low);
cpu_restore_interrupt(flags);
}
static void outb(uint8_t v, uint16_t port)
@@ -479,13 +836,6 @@ static void set_warm_reset_vector(unsigned long ip)
x86_set_warm_reset(ip, first_page_va);
}
static void wait_icr_idle(void)
{
while (lapic_read(LAPIC_ICR0) & APIC_ICR_BUSY) {
cpu_pause();
}
}
static void __x86_wakeup(int apicid, unsigned long ip)
{
int retry = 3;
@@ -499,7 +849,6 @@ static void __x86_wakeup(int apicid, unsigned long ip)
/* INIT */
x86_issue_ipi(apicid,
APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT);
wait_icr_idle();
x86_issue_ipi(apicid,
APIC_INT_LEVELTRIG | APIC_DM_INIT);
@@ -524,31 +873,65 @@ void cpu_halt(void)
asm volatile("hlt");
}
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_safe_halt(void)
{
asm volatile("sti; hlt");
}
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled == 0;
@*/
void cpu_enable_interrupt(void)
{
asm volatile("sti");
}
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled > 0;
@*/
void cpu_disable_interrupt(void)
{
asm volatile("cli");
}
/*@
@ assigns \nothing;
@ behavior to_enabled:
@ assumes flags & RFLAGS_IF;
@ ensures \interrupt_disabled == 0;
@ behavior to_disabled:
@ assumes !(flags & RFLAGS_IF);
@ ensures \interrupt_disabled > 0;
@*/
void cpu_restore_interrupt(unsigned long flags)
{
asm volatile("push %0; popf" : : "g"(flags) : "memory", "cc");
}
/*@
@ assigns \nothing;
@*/
void cpu_pause(void)
{
asm volatile("pause" ::: "memory");
}
/*@
@ assigns \nothing;
@ ensures \interrupt_disabled > 0;
@ behavior from_enabled:
@ assumes \interrupt_disabled == 0;
@ ensures \result & RFLAGS_IF;
@ behavior from_disabled:
@ assumes \interrupt_disabled > 0;
@ ensures !(\result & RFLAGS_IF);
@*/
unsigned long cpu_disable_interrupt_save(void)
{
unsigned long flags;
@@ -558,6 +941,17 @@ unsigned long cpu_disable_interrupt_save(void)
return flags;
}
/*@
@ behavior valid_vector:
@ assumes 32 <= vector <= 255;
@ requires \valid(h);
@ assigns handlers[vector-32];
@ ensures \result == 0;
@ behavior invalid_vector:
@ assumes (vector < 32) || (255 < vector);
@ assigns \nothing;
@ ensures \result == -EINVAL;
@*/
int ihk_mc_register_interrupt_handler(int vector,
struct ihk_mc_interrupt_handler *h)
{
@@ -579,6 +973,11 @@ int ihk_mc_unregister_interrupt_handler(int vector,
extern unsigned long __page_fault_handler_address;
/*@
@ requires \valid(h);
@ assigns __page_fault_handler_address;
@ ensures __page_fault_handler_address == h;
@*/
void ihk_mc_set_page_fault_handler(void (*h)(void *, uint64_t, void *))
{
__page_fault_handler_address = (unsigned long)h;
@@ -588,6 +987,18 @@ extern char trampoline_code_data[], trampoline_code_data_end[];
struct page_table *get_init_page_table(void);
unsigned long get_transit_page_table(void);
/* reusable, but not reentrant */
/*@
@ requires \valid_apicid(cpuid); // valid APIC ID or not
@ requires \valid(pc);
@ requires \valid(trampoline_va);
@ requires \valid(trampoline_code_data
@ +(0..(trampoline_code_data_end - trampoline_code_data)));
@ requires \valid_physical(ap_trampoline); // valid physical address or not
@ assigns (char *)trampoline_va+(0..trampoline_code_data_end - trampoline_code_data);
@ assigns cpu_boot_status;
@ ensures cpu_boot_status != 0;
@*/
void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
{
unsigned long *p;
@@ -607,7 +1018,7 @@ void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
cpu_boot_status = 0;
__x86_wakeup(cpuid, AP_TRAMPOLINE);
__x86_wakeup(cpuid, ap_trampoline);
/* XXX: Time out */
while (!cpu_boot_status) {
@@ -615,6 +1026,11 @@ void ihk_mc_boot_cpu(int cpuid, unsigned long pc)
}
}
/*@
@ requires \valid(new_ctx);
@ requires (stack_pointer == NULL) || \valid((unsigned long *)stack_pointer-1);
@ requires \valid(next_function);
@*/
void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
void *stack_pointer, void (*next_function)(void))
{
@@ -633,7 +1049,28 @@ void ihk_mc_init_context(ihk_mc_kernel_context_t *new_ctx,
}
extern char enter_user_mode[];
/*
* Release runq_lock before entering user space.
* This is needed because schedule() holds the runq lock throughout
* the context switch and when a new process is created it starts
* execution in enter_user_mode, which in turn calls this function.
*/
void release_runq_lock(void)
{
ihk_mc_spinlock_unlock(&(cpu_local_var(runq_lock)),
cpu_local_var(runq_irqstate));
}
/*@
@ requires \valid(ctx);
@ requires \valid(puctx);
@ requires \valid((ihk_mc_user_context_t *)stack_pointer-1);
@ requires \valid_user(new_pc); // valid user space address or not
@ requires \valid_user(user_sp-1);
@ assigns *((ihk_mc_user_context_t *)stack_pointer-1);
@ assigns ctx->rsp0;
@*/
void ihk_mc_init_user_process(ihk_mc_kernel_context_t *ctx,
ihk_mc_user_context_t **puctx,
void *stack_pointer, unsigned long new_pc,
@@ -649,49 +1086,95 @@ void ihk_mc_init_user_process(ihk_mc_kernel_context_t *ctx,
*puctx = uctx;
memset(uctx, 0, sizeof(ihk_mc_user_context_t));
uctx->cs = USER_CS;
uctx->rip = new_pc;
uctx->ss = USER_DS;
uctx->rsp = user_sp;
uctx->rflags = RFLAGS_IF;
uctx->gpr.cs = USER_CS;
uctx->gpr.rip = new_pc;
uctx->gpr.ss = USER_DS;
uctx->gpr.rsp = user_sp;
uctx->gpr.rflags = RFLAGS_IF;
uctx->is_gpr_valid = 1;
ihk_mc_init_context(ctx, sp, (void (*)(void))enter_user_mode);
ctx->rsp0 = (unsigned long)stack_pointer;
}
/*@
@ behavior rsp:
@ assumes reg == IHK_UCR_STACK_POINTER;
@ requires \valid(uctx);
@ assigns uctx->gpr.rsp;
@ ensures uctx->gpr.rsp == value;
@ behavior rip:
@ assumes reg == IHK_UCR_PROGRAM_COUNTER;
@ requires \valid(uctx);
@ assigns uctx->gpr.rip;
@ ensures uctx->gpr.rip == value;
@*/
void ihk_mc_modify_user_context(ihk_mc_user_context_t *uctx,
enum ihk_mc_user_context_regtype reg,
unsigned long value)
{
if (reg == IHK_UCR_STACK_POINTER) {
uctx->rsp = value;
uctx->gpr.rsp = value;
} else if (reg == IHK_UCR_PROGRAM_COUNTER) {
uctx->rip = value;
uctx->gpr.rip = value;
}
}
void ihk_mc_print_user_context(ihk_mc_user_context_t *uctx)
{
kprintf("CS:RIP = %04lx:%16lx\n", uctx->cs, uctx->rip);
kprintf("CS:RIP = %04lx:%16lx\n", uctx->gpr.cs, uctx->gpr.rip);
kprintf("%16lx %16lx %16lx %16lx\n%16lx %16lx %16lx\n",
uctx->rax, uctx->rbx, uctx->rcx, uctx->rdx,
uctx->rsi, uctx->rdi, uctx->rsp);
uctx->gpr.rax, uctx->gpr.rbx, uctx->gpr.rcx, uctx->gpr.rdx,
uctx->gpr.rsi, uctx->gpr.rdi, uctx->gpr.rsp);
}
/*@
@ requires \valid(handler);
@ assigns __x86_syscall_handler;
@ ensures __x86_syscall_handler == handler;
@*/
void ihk_mc_set_syscall_handler(long (*handler)(int, ihk_mc_user_context_t *))
{
__x86_syscall_handler = handler;
}
/*@
@ assigns \nothing;
@*/
void ihk_mc_delay_us(int us)
{
arch_delay(us);
}
#define EXTENDED_ARCH_SHOW_CONTEXT
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
void arch_show_extended_context(void)
{
unsigned long cr0, cr4, msr, xcr0;
/* Read and print CRs, MSR_EFER, XCR0 */
asm volatile("movq %%cr0, %0" : "=r"(cr0));
asm volatile("movq %%cr4, %0" : "=r"(cr4));
msr = rdmsr(MSR_EFER);
xcr0 = xgetbv(0);
__kprintf("\n CR0 CR4\n");
__kprintf("%016lX %016lX\n", cr0, cr4);
__kprintf(" MSR_EFER\n");
__kprintf("%016lX\n", msr);
__kprintf(" XCR0\n");
__kprintf("%016lX\n", xcr0);
}
#endif
void arch_show_interrupt_context(const void *reg)
{
const struct x86_regs *regs = reg;
int irqflags;
const struct x86_user_context *uctx = reg;
const struct x86_basic_regs *regs = &uctx->gpr;
unsigned long irqflags;
irqflags = kprintf_lock();
@@ -711,10 +1194,22 @@ void arch_show_interrupt_context(const void *reg)
__kprintf(" CS SS RFLAGS ERROR\n");
__kprintf("%16lx %16lx %16lx %16lx\n",
regs->cs, regs->ss, regs->rflags, regs->error);
#ifdef EXTENDED_ARCH_SHOW_CONTEXT
arch_show_extended_context();
#endif
kprintf_unlock(irqflags);
}
/*@
@ behavior fs_base:
@ assumes type == IHK_ASR_X86_FS;
@ ensures \result == 0;
@ behavior invaiid_type:
@ assumes type != IHK_ASR_X86_FS;
@ ensures \result == -EINVAL;
@*/
int ihk_mc_arch_set_special_register(enum ihk_asr_type type,
unsigned long value)
{
@@ -728,6 +1223,15 @@ int ihk_mc_arch_set_special_register(enum ihk_asr_type type,
}
}
/*@
@ behavior fs_base:
@ assumes type == IHK_ASR_X86_FS;
@ requires \valid(value);
@ ensures \result == 0;
@ behavior invalid_type:
@ assumes type != IHK_ASR_X86_FS;
@ ensures \result == -EINVAL;
@*/
int ihk_mc_arch_get_special_register(enum ihk_asr_type type,
unsigned long *value)
{
@@ -741,11 +1245,116 @@ int ihk_mc_arch_get_special_register(enum ihk_asr_type type,
}
}
/*@
@ requires \valid_apicid(cpu); // valid APIC ID or not
@ ensures \result == 0
@*/
int ihk_mc_interrupt_cpu(int cpu, int vector)
{
dkprintf("[%d] ihk_mc_interrupt_cpu: %d\n", ihk_mc_get_processor_id(), cpu);
wait_icr_idle();
x86_issue_ipi(cpu, vector);
return 0;
}
/*@
@ requires \valid(proc);
@ ensures proc->fp_regs == NULL;
@*/
void
release_fp_regs(struct thread *thread)
{
int pages;
if (thread && !thread->fp_regs)
return;
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
ihk_mc_free_pages(thread->fp_regs, pages);
thread->fp_regs = NULL;
}
void
save_fp_regs(struct thread *thread)
{
int pages;
if (!thread->fp_regs) {
pages = (sizeof(fp_regs_struct) + 4095) >> 12;
thread->fp_regs = ihk_mc_alloc_pages(pages, IHK_MC_AP_NOWAIT);
if (!thread->fp_regs) {
kprintf("error: allocating fp_regs pages\n");
return;
}
memset(thread->fp_regs, 0, sizeof(fp_regs_struct));
}
if (xsave_available) {
unsigned int low, high;
/* Request full save of x87, SSE and AVX states */
low = 0x7;
high = 0;
asm volatile("xsave %0" : : "m" (*thread->fp_regs), "a" (low), "d" (high)
: "memory");
dkprintf("fp_regs for TID %d saved\n", thread->tid);
}
}
void
restore_fp_regs(struct thread *thread)
{
if (!thread->fp_regs)
return;
if (xsave_available) {
unsigned int low, high;
/* Request full restore of x87, SSE and AVX states */
low = 0x7;
high = 0;
asm volatile("xrstor %0" : : "m" (*thread->fp_regs),
"a" (low), "d" (high));
dkprintf("fp_regs for TID %d restored\n", thread->tid);
}
// XXX: why release??
//release_fp_regs(thread);
}
ihk_mc_user_context_t *lookup_user_context(struct thread *thread)
{
ihk_mc_user_context_t *uctx = thread->uctx;
if ((!(thread->status & (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE
| PS_STOPPED | PS_TRACED))
&& (thread != cpu_local_var(current)))
|| !uctx->is_gpr_valid) {
return NULL;
}
if (!uctx->is_sr_valid) {
uctx->sr.fs_base = thread->tlsblock_base;
uctx->sr.gs_base = 0;
uctx->sr.ds = 0;
uctx->sr.es = 0;
uctx->sr.fs = 0;
uctx->sr.gs = 0;
uctx->is_sr_valid = 1;
}
return uctx;
} /* lookup_user_context() */
void zero_tsc(void)
{
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
}

View File

@@ -15,7 +15,7 @@
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
@@ -78,15 +78,16 @@ int get_prstatus_size(void)
* \brief Fill a prstatus structure.
*
* \param head A pointer to a note structure.
* \param proc A pointer to the current process structure.
* \param thread A pointer to the current thread structure.
* \param regs0 A pointer to a x86_regs structure.
*/
void fill_prstatus(struct note *head, struct process *proc, void *regs0)
void fill_prstatus(struct note *head, struct thread *thread, void *regs0)
{
void *name;
struct elf_prstatus64 *prstatus;
struct x86_regs *regs = regs0;
struct x86_user_context *uctx = regs0;
struct x86_basic_regs *regs = &uctx->gpr;
register unsigned long _r12 asm("r12");
register unsigned long _r13 asm("r13");
register unsigned long _r14 asm("r14");
@@ -159,11 +160,11 @@ int get_prpsinfo_size(void)
* \brief Fill a prpsinfo structure.
*
* \param head A pointer to a note structure.
* \param proc A pointer to the current process structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_prpsinfo(struct note *head, struct process *proc, void *regs)
void fill_prpsinfo(struct note *head, struct thread *thread, void *regs)
{
void *name;
struct elf_prpsinfo64 *prpsinfo;
@@ -175,8 +176,8 @@ void fill_prpsinfo(struct note *head, struct process *proc, void *regs)
memcpy(name, "CORE", sizeof("CORE"));
prpsinfo = (struct elf_prpsinfo64 *)(name + align32(sizeof("CORE")));
prpsinfo->pr_state = proc->ftn->status;
prpsinfo->pr_pid = proc->ftn->pid;
prpsinfo->pr_state = thread->status;
prpsinfo->pr_pid = thread->proc->pid;
/*
We leave most of the fields unfilled.
@@ -209,11 +210,11 @@ int get_auxv_size(void)
* \brief Fill an AUXV structure.
*
* \param head A pointer to a note structure.
* \param proc A pointer to the current process structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_auxv(struct note *head, struct process *proc, void *regs)
void fill_auxv(struct note *head, struct thread *thread, void *regs)
{
void *name;
void *auxv;
@@ -224,7 +225,7 @@ void fill_auxv(struct note *head, struct process *proc, void *regs)
name = (void *) (head + 1);
memcpy(name, "CORE", sizeof("CORE"));
auxv = name + align32(sizeof("CORE"));
memcpy(auxv, proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN);
memcpy(auxv, thread->proc->saved_auxv, sizeof(unsigned long) * AUXV_LEN);
}
/**
@@ -242,23 +243,23 @@ int get_note_size(void)
* \brief Fill the NOTE segment.
*
* \param head A pointer to a note structure.
* \param proc A pointer to the current process structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
*/
void fill_note(void *note, struct process *proc, void *regs)
void fill_note(void *note, struct thread *thread, void *regs)
{
fill_prstatus(note, proc, regs);
fill_prstatus(note, thread, regs);
note += get_prstatus_size();
fill_prpsinfo(note, proc, regs);
fill_prpsinfo(note, thread, regs);
note += get_prpsinfo_size();
fill_auxv(note, proc, regs);
fill_auxv(note, thread, regs);
}
/**
* \brief Generate an image of the core file.
*
* \param proc A pointer to the current process structure.
* \param thread A pointer to the current thread structure.
* \param regs A pointer to a x86_regs structure.
* \param coretable(out) An array of core chunks.
* \param chunks(out) Number of the entires of coretable.
@@ -270,7 +271,7 @@ void fill_note(void *note, struct process *proc, void *regs)
* should be zero.
*/
int gencore(struct process *proc, void *regs,
int gencore(struct thread *thread, void *regs,
struct coretable **coretable, int *chunks)
{
struct coretable *ct = NULL;
@@ -278,7 +279,7 @@ int gencore(struct process *proc, void *regs,
Elf64_Phdr *ph = NULL;
void *note = NULL;
struct vm_range *range;
struct process_vm *vm = proc->vm;
struct process_vm *vm = thread->vm;
int segs = 1; /* the first one is for NOTE */
int notesize, phsize, alignednotesize;
unsigned int offset = 0;
@@ -305,7 +306,7 @@ int gencore(struct process *proc, void *regs,
unsigned long p, phys;
int prevzero = 0;
for (p = range->start; p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
prevzero = 1;
} else {
@@ -325,7 +326,7 @@ int gencore(struct process *proc, void *regs,
dkprintf("we have %d segs and %d chunks.\n\n", segs, *chunks);
{
struct vm_regions region = proc->vm->region;
struct vm_regions region = thread->vm->region;
dkprintf("text: %lx-%lx\n", region.text_start, region.text_end);
dkprintf("data: %lx-%lx\n", region.data_start, region.data_end);
@@ -363,7 +364,7 @@ int gencore(struct process *proc, void *regs,
goto fail;
}
memset(note, 0, alignednotesize);
fill_note(note, proc, regs);
fill_note(note, thread, regs);
/* prgram header for NOTE segment is exceptional */
ph[0].p_type = PT_NOTE;
@@ -433,7 +434,7 @@ int gencore(struct process *proc, void *regs,
for (start = p = range->start;
p < range->end; p += PAGE_SIZE) {
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)p, &phys) != 0) {
if (prevzero == 0) {
/* We begin a new chunk */
@@ -471,9 +472,9 @@ int gencore(struct process *proc, void *regs,
i++;
}
} else {
if ((proc->vm->region.user_start <= range->start) &&
(range->end <= proc->vm->region.user_end)) {
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
if ((thread->vm->region.user_start <= range->start) &&
(range->end <= thread->vm->region.user_end)) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table,
(void *)range->start, &phys) != 0) {
dkprintf("could not convert user virtual address %lx"
"to physical address", range->start);

View File

@@ -5,15 +5,20 @@
#define __HEADER_X86_COMMON_ARCH_LOCK
#include <ihk/cpu.h>
#include <ihk/atomic.h>
//#define DEBUG_SPINLOCK
//#define DEBUG_MCS_RWLOCK
#ifdef DEBUG_SPINLOCK
#if defined(DEBUG_SPINLOCK) || defined(DEBUG_MCS_RWLOCK)
int __kprintf(const char *format, ...);
#endif
typedef int ihk_spinlock_t;
extern void preempt_enable(void);
extern void preempt_disable(void);
#define IHK_STATIC_SPINLOCK_FUNCS
static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
@@ -22,7 +27,17 @@ static void ihk_mc_spinlock_init(ihk_spinlock_t *lock)
}
#define SPIN_LOCK_UNLOCKED 0
static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__ihk_mc_spinlock_lock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_lock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define ihk_mc_spinlock_lock_noirq __ihk_mc_spinlock_lock_noirq
#endif
static void __ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
{
int inc = 0x00010000;
int tmp;
@@ -41,10 +56,8 @@ static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
: "+Q" (inc), "+m" (*lock), "=r" (tmp) : : "memory", "cc");
#endif
#ifdef DEBUG_SPINLOCK
__kprintf("[%d] trying to grab lock: 0x%lX\n",
ihk_mc_get_processor_id(), lock);
#endif
preempt_disable();
asm volatile("lock; xaddl %0, %1\n"
"movzwl %w0, %2\n\t"
"shrl $16, %0\n\t"
@@ -60,36 +73,431 @@ static void ihk_mc_spinlock_lock_noirq(ihk_spinlock_t *lock)
:
: "memory", "cc");
#ifdef DEBUG_SPINLOCK
__kprintf("[%d] holding lock: 0x%lX\n", ihk_mc_get_processor_id(), lock);
#endif
}
static unsigned long ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_lock(l) ({ unsigned long rc;\
__kprintf("[%d] call ihk_mc_spinlock_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
rc = __ihk_mc_spinlock_lock(l);\
__kprintf("[%d] ret ihk_mc_spinlock_lock\n", ihk_mc_get_processor_id()); rc;\
})
#else
#define ihk_mc_spinlock_lock __ihk_mc_spinlock_lock
#endif
static unsigned long __ihk_mc_spinlock_lock(ihk_spinlock_t *lock)
{
unsigned long flags;
flags = cpu_disable_interrupt_save();
ihk_mc_spinlock_lock_noirq(lock);
__ihk_mc_spinlock_lock_noirq(lock);
return flags;
}
static void ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_unlock_noirq(l) { \
__kprintf("[%d] call ihk_mc_spinlock_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__ihk_mc_spinlock_unlock_noirq(l); \
__kprintf("[%d] ret ihk_mc_spinlock_unlock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define ihk_mc_spinlock_unlock_noirq __ihk_mc_spinlock_unlock_noirq
#endif
static void __ihk_mc_spinlock_unlock_noirq(ihk_spinlock_t *lock)
{
asm volatile ("lock incw %0" : "+m"(*lock) : : "memory", "cc");
preempt_enable();
}
static void ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
#ifdef DEBUG_SPINLOCK
#define ihk_mc_spinlock_unlock(l, f) { \
__kprintf("[%d] call ihk_mc_spinlock_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__ihk_mc_spinlock_unlock((l), (f)); \
__kprintf("[%d] ret ihk_mc_spinlock_unlock\n", ihk_mc_get_processor_id()); \
}
#else
#define ihk_mc_spinlock_unlock __ihk_mc_spinlock_unlock
#endif
static void __ihk_mc_spinlock_unlock(ihk_spinlock_t *lock, unsigned long flags)
{
ihk_mc_spinlock_unlock_noirq(lock);
__ihk_mc_spinlock_unlock_noirq(lock);
cpu_restore_interrupt(flags);
#ifdef DEBUG_SPINLOCK
__kprintf("[%d] released lock: 0x%lX\n", ihk_mc_get_processor_id(), lock);
}
/* An implementation of the Mellor-Crummey Scott (MCS) lock */
typedef struct mcs_lock_node {
unsigned long locked;
struct mcs_lock_node *next;
} __attribute__((aligned(64))) mcs_lock_node_t;
static void mcs_lock_init(struct mcs_lock_node *node)
{
node->locked = 0;
node->next = NULL;
}
static void mcs_lock_lock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
struct mcs_lock_node *pred;
node->next = NULL;
node->locked = 0;
pred = (struct mcs_lock_node *)xchg8((unsigned long *)&lock->next,
(unsigned long)node);
if (pred) {
node->locked = 1;
pred->next = node;
while (node->locked != 0) {
cpu_pause();
}
}
}
static void mcs_lock_unlock(struct mcs_lock_node *lock,
struct mcs_lock_node *node)
{
if (node->next == NULL) {
struct mcs_lock_node *old = (struct mcs_lock_node *)
atomic_cmpxchg8((unsigned long *)&lock->next,
(unsigned long)node, (unsigned long)0);
if (old == node) {
return;
}
while (node->next == NULL) {
cpu_pause();
}
}
node->next->locked = 0;
}
// reader/writer lock
typedef struct mcs_rwlock_node {
ihk_atomic_t count; // num of readers (use only common reader)
char type; // lock type
#define MCS_RWLOCK_TYPE_COMMON_READER 0
#define MCS_RWLOCK_TYPE_READER 1
#define MCS_RWLOCK_TYPE_WRITER 2
char locked; // lock
#define MCS_RWLOCK_LOCKED 1
#define MCS_RWLOCK_UNLOCKED 0
char dmy1; // unused
char dmy2; // unused
struct mcs_rwlock_node *next;
} __attribute__((aligned(64))) mcs_rwlock_node_t;
typedef struct mcs_rwlock_node_irqsave {
struct mcs_rwlock_node node;
unsigned long irqsave;
} __attribute__((aligned(64))) mcs_rwlock_node_irqsave_t;
typedef struct mcs_rwlock_lock {
struct mcs_rwlock_node reader; /* common reader lock */
struct mcs_rwlock_node *node; /* base */
} __attribute__((aligned(64))) mcs_rwlock_lock_t;
static void
mcs_rwlock_init(struct mcs_rwlock_lock *lock)
{
ihk_atomic_set(&lock->reader.count, 0);
lock->reader.type = MCS_RWLOCK_TYPE_COMMON_READER;
lock->node = NULL;
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_lock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_lock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_lock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_lock_noirq __mcs_rwlock_writer_lock_noirq
#endif
static void
__mcs_rwlock_writer_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
struct mcs_rwlock_node *pred;
preempt_disable();
node->type = MCS_RWLOCK_TYPE_WRITER;
node->next = NULL;
pred = (struct mcs_rwlock_node *)xchg8((unsigned long *)&lock->node,
(unsigned long)node);
if (pred) {
node->locked = MCS_RWLOCK_LOCKED;
pred->next = node;
while (node->locked != MCS_RWLOCK_UNLOCKED) {
cpu_pause();
}
}
}
static void
mcs_rwlock_unlock_readers(struct mcs_rwlock_lock *lock)
{
struct mcs_rwlock_node *p;
struct mcs_rwlock_node *f = NULL;
struct mcs_rwlock_node *n;
ihk_atomic_inc(&lock->reader.count); // protect to unlock reader
for(p = &lock->reader; p->next; p = n){
n = p->next;
if(p->next->type == MCS_RWLOCK_TYPE_READER){
p->next = n->next;
if(lock->node == n){
struct mcs_rwlock_node *old;
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
(unsigned long *)&lock->node,
(unsigned long)n,
(unsigned long)p);
if(old != n){ // couldn't change
while (n->next == NULL) {
cpu_pause();
}
p->next = n->next;
}
}
else if(p->next == NULL){
while (n->next == NULL) {
cpu_pause();
}
p->next = n->next;
}
if(f){
ihk_atomic_inc(&lock->reader.count);
n->locked = MCS_RWLOCK_UNLOCKED;
}
else
f = n;
n = p;
}
if(n->next == NULL && lock->node != n){
while (n->next == NULL && lock->node != n) {
cpu_pause();
}
}
}
f->locked = MCS_RWLOCK_UNLOCKED;
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_unlock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_unlock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_unlock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_unlock_noirq __mcs_rwlock_writer_unlock_noirq
#endif
static void
__mcs_rwlock_writer_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
if (node->next == NULL) {
struct mcs_rwlock_node *old = (struct mcs_rwlock_node *)
atomic_cmpxchg8((unsigned long *)&lock->node,
(unsigned long)node, (unsigned long)0);
if (old == node) {
goto out;
}
while (node->next == NULL) {
cpu_pause();
}
}
if(node->next->type == MCS_RWLOCK_TYPE_READER){
lock->reader.next = node->next;
mcs_rwlock_unlock_readers(lock);
}
else{
node->next->locked = MCS_RWLOCK_UNLOCKED;
}
out:
preempt_enable();
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_lock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_lock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_lock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_lock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_lock_noirq __mcs_rwlock_reader_lock_noirq
#endif
static void
__mcs_rwlock_reader_lock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
struct mcs_rwlock_node *pred;
preempt_disable();
node->type = MCS_RWLOCK_TYPE_READER;
node->next = NULL;
node->dmy1 = ihk_mc_get_processor_id();
pred = (struct mcs_rwlock_node *)xchg8((unsigned long *)&lock->node,
(unsigned long)node);
if (pred) {
if(pred == &lock->reader){
if(ihk_atomic_inc_return(&pred->count) != 1){
struct mcs_rwlock_node *old;
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
(unsigned long *)&lock->node,
(unsigned long)node,
(unsigned long)pred);
if (old == node) {
goto out;
}
while (node->next == NULL) {
cpu_pause();
}
pred->next = node->next;
if(node->next->type == MCS_RWLOCK_TYPE_READER)
mcs_rwlock_unlock_readers(lock);
goto out;
}
ihk_atomic_dec(&pred->count);
}
node->locked = MCS_RWLOCK_LOCKED;
pred->next = node;
while (node->locked != MCS_RWLOCK_UNLOCKED) {
cpu_pause();
}
}
else {
lock->reader.next = node;
mcs_rwlock_unlock_readers(lock);
}
out:
return;
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_unlock_noirq(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_unlock_noirq %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_unlock_noirq((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_unlock_noirq\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_unlock_noirq __mcs_rwlock_reader_unlock_noirq
#endif
static void
__mcs_rwlock_reader_unlock_noirq(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node *node)
{
if(ihk_atomic_dec_return(&lock->reader.count))
goto out;
if (lock->reader.next == NULL) {
struct mcs_rwlock_node *old;
old = (struct mcs_rwlock_node *)atomic_cmpxchg8(
(unsigned long *)&lock->node,
(unsigned long)&lock->reader,
(unsigned long)0);
if (old == &lock->reader) {
goto out;
}
while (lock->reader.next == NULL) {
cpu_pause();
}
}
if(lock->reader.next->type == MCS_RWLOCK_TYPE_READER){
mcs_rwlock_unlock_readers(lock);
}
else{
lock->reader.next->locked = MCS_RWLOCK_UNLOCKED;
}
out:
preempt_enable();
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_lock(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_lock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_lock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_lock __mcs_rwlock_writer_lock
#endif
static void
__mcs_rwlock_writer_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
node->irqsave = cpu_disable_interrupt_save();
__mcs_rwlock_writer_lock_noirq(lock, &node->node);
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_writer_unlock(l, n) { \
__kprintf("[%d] call mcs_rwlock_writer_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_writer_unlock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_writer_unlock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_writer_unlock __mcs_rwlock_writer_unlock
#endif
static void
__mcs_rwlock_writer_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
__mcs_rwlock_writer_unlock_noirq(lock, &node->node);
cpu_restore_interrupt(node->irqsave);
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_lock(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_lock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_lock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_lock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_lock __mcs_rwlock_reader_lock
#endif
static void
__mcs_rwlock_reader_lock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
node->irqsave = cpu_disable_interrupt_save();
__mcs_rwlock_reader_lock_noirq(lock, &node->node);
}
#ifdef DEBUG_MCS_RWLOCK
#define mcs_rwlock_reader_unlock(l, n) { \
__kprintf("[%d] call mcs_rwlock_reader_unlock %p %s:%d\n", ihk_mc_get_processor_id(), (l), __FILE__, __LINE__); \
__mcs_rwlock_reader_unlock((l), (n)); \
__kprintf("[%d] ret mcs_rwlock_reader_unlock\n", ihk_mc_get_processor_id()); \
}
#else
#define mcs_rwlock_reader_unlock __mcs_rwlock_reader_unlock
#endif
static void
__mcs_rwlock_reader_unlock(struct mcs_rwlock_lock *lock, struct mcs_rwlock_node_irqsave *node)
{
__mcs_rwlock_reader_unlock_noirq(lock, &node->node);
cpu_restore_interrupt(node->irqsave);
}
#endif

View File

@@ -5,6 +5,8 @@
* Define and declare memory management macros and functions
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
@@ -117,6 +119,25 @@
#define PTE_NULL ((pte_t)0)
typedef unsigned long pte_t;
/*
* pagemap kernel ABI bits
*/
#define PM_ENTRY_BYTES sizeof(uint64_t)
#define PM_STATUS_BITS 3
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
#define PM_STATUS(nr) (((nr) << PM_STATUS_OFFSET) & PM_STATUS_MASK)
#define PM_PSHIFT_BITS 6
#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
#define PM_PSHIFT(x) (((uint64_t) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
#define PM_PRESENT PM_STATUS(4LL)
#define PM_SWAP PM_STATUS(2LL)
/* For easy conversion, it is better to be the same as architecture's ones */
enum ihk_mc_pt_attribute {
PTATTR_ACTIVE = 0x01,
@@ -128,6 +149,7 @@ enum ihk_mc_pt_attribute {
PTATTR_NO_EXECUTE = 0x8000000000000000,
PTATTR_UNCACHABLE = 0x10000,
PTATTR_FOR_USER = 0x20000,
PTATTR_WRITE_COMBINED = 0x40000,
};
static inline int pte_is_null(pte_t *ptep)
@@ -185,6 +207,12 @@ static inline off_t pte_get_off(pte_t *ptep, size_t pgsize)
return (off_t)(*ptep & PAGE_MASK);
}
static inline void pte_make_null(pte_t *ptep, size_t pgsize)
{
*ptep = PTE_NULL;
return;
}
static inline void pte_make_fileoff(off_t off,
enum ihk_mc_pt_attribute ptattr, size_t pgsize, pte_t *ptep)
{
@@ -216,6 +244,36 @@ static inline void pte_xchg(pte_t *ptep, pte_t *valp)
#define pte_xchg(p,vp) do { *(vp) = xchg((p), *(vp)); } while (0)
#endif
static inline void pte_clear_dirty(pte_t *ptep, size_t pgsize)
{
uint64_t mask;
switch (pgsize) {
default: /* through */
case PTL1_SIZE: mask = ~PFL1_DIRTY; break;
case PTL2_SIZE: mask = ~PFL2_DIRTY; break;
case PTL3_SIZE: mask = ~PFL3_DIRTY; break;
}
asm volatile ("lock andq %0,%1" :: "r"(mask), "m"(*ptep));
return;
}
static inline void pte_set_dirty(pte_t *ptep, size_t pgsize)
{
uint64_t mask;
switch (pgsize) {
default: /* through */
case PTL1_SIZE: mask = PFL1_DIRTY; break;
case PTL2_SIZE: mask = PFL2_DIRTY; break;
case PTL3_SIZE: mask = PFL3_DIRTY; break;
}
asm volatile ("lock orq %0,%1" :: "r"(mask), "m"(*ptep));
return;
}
struct page_table;
void set_pte(pte_t *ppte, unsigned long phys, enum ihk_mc_pt_attribute attr);
pte_t *get_pte(struct page_table *pt, void *virt, enum ihk_mc_pt_attribute attr);
@@ -227,8 +285,9 @@ void flush_tlb_single(unsigned long addr);
void *map_fixed_area(unsigned long phys, unsigned long size, int uncachable);
#define AP_TRAMPOLINE 0x10000
#define AP_TRAMPOLINE_SIZE 0x4000
extern unsigned long ap_trampoline;
//#define AP_TRAMPOLINE 0x10000
#define AP_TRAMPOLINE_SIZE 0x2000
/* Local is cachable */
#define IHK_IKC_QUEUE_PT_ATTR (PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_UNCACHABLE)

View File

@@ -0,0 +1,28 @@
/**
* \file cpu.h
* License details are found in the file LICENSE.
* \brief
* Declare architecture-dependent types and functions to control CPU.
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
*/
#ifndef ARCH_CPU_H
#define ARCH_CPU_H
#include <ihk/cpu.h>
static inline void rmb(void)
{
barrier();
}
static inline void wmb(void)
{
barrier();
}
#endif /* ARCH_CPU_H */

View File

@@ -0,0 +1,36 @@
/**
* \file mman.h
* License details are found in the file LICENSE.
* \brief
* memory management declarations
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY:
*/
#ifndef HEADER_ARCH_MMAN_H
#define HEADER_ARCH_MMAN_H
/*
* mapping flags
*/
#define MAP_32BIT 0x40
#define MAP_GROWSDOWN 0x0100
#define MAP_DENYWRITE 0x0800
#define MAP_EXECUTABLE 0x1000
#define MAP_LOCKED 0x2000
#define MAP_NORESERVE 0x4000
#define MAP_POPULATE 0x8000
#define MAP_NONBLOCK 0x00010000
#define MAP_STACK 0x00020000
#define MAP_HUGETLB 0x00040000
/*
* for mlockall()
*/
#define MCL_CURRENT 0x01
#define MCL_FUTURE 0x02
#endif /* HEADER_ARCH_MMAN_H */

View File

@@ -0,0 +1,40 @@
/**
* \file shm.h
* License details are found in the file LICENSE.
* \brief
* header file for System V shared memory
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY:
*/
#ifndef HEADER_ARCH_SHM_H
#define HEADER_ARCH_SHM_H
struct ipc_perm {
key_t key;
uid_t uid;
gid_t gid;
uid_t cuid;
gid_t cgid;
uint16_t mode;
uint8_t padding[2];
uint16_t seq;
uint8_t padding2[22];
};
struct shmid_ds {
struct ipc_perm shm_perm;
size_t shm_segsz;
time_t shm_atime;
time_t shm_dtime;
time_t shm_ctime;
pid_t shm_cpid;
pid_t shm_lpid;
uint64_t shm_nattch;
uint8_t padding[16];
};
#endif /* HEADER_ARCH_SHM_H */

View File

@@ -42,7 +42,10 @@ struct x86_cpu_local_variables {
uint64_t gdt[10];
/* 128 */
struct tss64 tss;
/* 232 */
unsigned long paniced;
uint64_t panic_regs[21];
/* 408 */
} __attribute__((packed));
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id);

View File

@@ -13,6 +13,10 @@
#ifndef HEADER_X86_COMMON_IHK_ATOMIC_H
#define HEADER_X86_COMMON_IHK_ATOMIC_H
/***********************************************************************
* ihk_atomic_t
*/
typedef struct {
int counter;
} ihk_atomic_t;
@@ -95,6 +99,30 @@ static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
#define ihk_atomic_inc_return(v) (ihk_atomic_add_return(1, v))
#define ihk_atomic_dec_return(v) (ihk_atomic_sub_return(1, v))
/***********************************************************************
* ihk_atomic64_t
*/
typedef struct {
long counter64;
} ihk_atomic64_t;
#define IHK_ATOMIC64_INIT(i) { .counter64 = (i) }
static inline long ihk_atomic64_read(const ihk_atomic64_t *v)
{
return *(volatile long *)&(v)->counter64;
}
static inline void ihk_atomic64_inc(ihk_atomic64_t *v)
{
asm volatile ("lock incq %0" : "+m"(v->counter64));
}
/***********************************************************************
* others
*/
/*
* Note: no "lock" prefix even on SMP: xchg always implies lock anyway
* Note 2: xchg has side effect, so that attribute volatile is necessary,
@@ -112,6 +140,17 @@ static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
__x; \
})
static inline unsigned long xchg8(unsigned long *ptr, unsigned long x)
{
unsigned long __x = (x);
asm volatile("xchgq %0,%1"
: "=r" (__x)
: "m" (*(volatile unsigned long*)(ptr)), "0" (__x)
: "memory");
return __x;
}
#define __xchg(x, ptr, size) \
({ \
__typeof(*(ptr)) __x = (x); \
@@ -150,5 +189,17 @@ static inline int ihk_atomic_sub_return(int i, ihk_atomic_t *v)
#define xchg(ptr, v) \
__xchg((v), (ptr), sizeof(*ptr))
static inline unsigned long atomic_cmpxchg8(unsigned long *addr,
unsigned long oldval,
unsigned long newval)
{
asm volatile("lock; cmpxchgq %2, %1\n"
: "=a" (oldval), "+m" (*addr)
: "r" (newval), "0" (oldval)
: "memory"
);
return oldval;
}
#endif

View File

@@ -22,19 +22,35 @@ struct x86_kregs {
};
typedef struct x86_kregs ihk_mc_kernel_context_t;
/* XXX: User context should contain floating point registers */
typedef struct x86_regs ihk_mc_user_context_t;
struct x86_user_context {
struct x86_sregs sr;
#define ihk_mc_syscall_arg0(uc) (uc)->rdi
#define ihk_mc_syscall_arg1(uc) (uc)->rsi
#define ihk_mc_syscall_arg2(uc) (uc)->rdx
#define ihk_mc_syscall_arg3(uc) (uc)->r10
#define ihk_mc_syscall_arg4(uc) (uc)->r8
#define ihk_mc_syscall_arg5(uc) (uc)->r9
/* 16-byte boundary here */
uint8_t is_gpr_valid;
uint8_t is_sr_valid;
uint8_t spare_flags6;
uint8_t spare_flags5;
uint8_t spare_flags4;
uint8_t spare_flags3;
uint8_t spare_flags2;
uint8_t spare_flags1;
struct x86_basic_regs gpr; /* must be last */
/* 16-byte boundary here */
};
typedef struct x86_user_context ihk_mc_user_context_t;
#define ihk_mc_syscall_ret(uc) (uc)->rax
#define ihk_mc_syscall_arg0(uc) (uc)->gpr.rdi
#define ihk_mc_syscall_arg1(uc) (uc)->gpr.rsi
#define ihk_mc_syscall_arg2(uc) (uc)->gpr.rdx
#define ihk_mc_syscall_arg3(uc) (uc)->gpr.r10
#define ihk_mc_syscall_arg4(uc) (uc)->gpr.r8
#define ihk_mc_syscall_arg5(uc) (uc)->gpr.r9
#define ihk_mc_syscall_pc(uc) (uc)->rip
#define ihk_mc_syscall_sp(uc) (uc)->rsp
#define ihk_mc_syscall_ret(uc) (uc)->gpr.rax
#define ihk_mc_syscall_pc(uc) (uc)->gpr.rip
#define ihk_mc_syscall_sp(uc) (uc)->gpr.rsp
#endif

View File

@@ -6,6 +6,8 @@
* Machine Specific Registers (MSR)
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
@@ -16,7 +18,31 @@
#include <types.h>
#define RFLAGS_CF (1 << 0)
#define RFLAGS_PF (1 << 2)
#define RFLAGS_AF (1 << 4)
#define RFLAGS_ZF (1 << 6)
#define RFLAGS_SF (1 << 7)
#define RFLAGS_TF (1 << 8)
#define RFLAGS_IF (1 << 9)
#define RFLAGS_DF (1 << 10)
#define RFLAGS_OF (1 << 11)
#define RFLAGS_IOPL (3 << 12)
#define RFLAGS_NT (1 << 14)
#define RFLAGS_RF (1 << 16)
#define RFLAGS_VM (1 << 17)
#define RFLAGS_AC (1 << 18)
#define RFLAGS_VIF (1 << 19)
#define RFLAGS_VIP (1 << 20)
#define RFLAGS_ID (1 << 21)
#define DB6_B0 (1 << 0)
#define DB6_B1 (1 << 1)
#define DB6_B2 (1 << 2)
#define DB6_B3 (1 << 3)
#define DB6_BD (1 << 13)
#define DB6_BS (1 << 14)
#define DB6_BT (1 << 15)
#define MSR_EFER 0xc0000080
#define MSR_STAR 0xc0000081
@@ -26,6 +52,14 @@
#define MSR_GS_BASE 0xc0000101
#define MSR_IA32_APIC_BASE 0x000000001b
#define MSR_PLATFORM_INFO 0x000000ce
#define MSR_IA32_PERF_CTL 0x00000199
#define MSR_IA32_MISC_ENABLE 0x000001a0
#define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0
#define MSR_NHM_TURBO_RATIO_LIMIT 0x000001ad
#define MSR_IA32_CR_PAT 0x00000277
#define MSR_IA32_XSS 0xda0
#define CVAL(event, mask) \
((((event) & 0xf00) << 24) | ((mask) << 8) | ((event) & 0xff))
@@ -37,6 +71,25 @@
#define MSR_PERF_CTL_0 0xc0010000
#define MSR_PERF_CTR_0 0xc0010004
static unsigned long xgetbv(unsigned int index)
{
unsigned int low, high;
asm volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (index));
return low | ((unsigned long)high << 32);
}
static void xsetbv(unsigned int index, unsigned long val)
{
unsigned int low, high;
low = val;
high = val >> 32;
asm volatile("xsetbv" : : "a" (low), "d" (high), "c" (index));
}
static void wrmsr(unsigned int idx, unsigned long value){
unsigned int high, low;
@@ -135,10 +188,19 @@ struct tss64 {
unsigned short iomap_address;
} __attribute__((packed));
struct x86_regs {
unsigned long r15, r14, r13, r12, r11, r10, r9, r8;
unsigned long rdi, rsi, rdx, rcx, rbx, rax, rbp;
unsigned long error, rip, cs, rflags, rsp, ss;
struct x86_basic_regs {
unsigned long r15, r14, r13, r12, rbp, rbx, r11, r10;
unsigned long r9, r8, rax, rcx, rdx, rsi, rdi, error;
unsigned long rip, cs, rflags, rsp, ss;
};
struct x86_sregs {
unsigned long fs_base;
unsigned long gs_base;
unsigned long ds;
unsigned long es;
unsigned long fs;
unsigned long gs;
};
#define REGS_GET_STACK_POINTER(regs) (((struct x86_regs *)regs)->rsp)
@@ -162,7 +224,72 @@ enum x86_pf_error_code {
PF_RSVD = 1 << 3,
PF_INSTR = 1 << 4,
PF_PATCH = 1 << 29,
PF_POPULATE = 1 << 30,
};
struct i387_fxsave_struct {
unsigned short cwd;
unsigned short swd;
unsigned short twd;
unsigned short fop;
union {
struct {
unsigned long rip;
unsigned long rdp;
};
struct {
unsigned int fip;
unsigned int fcs;
unsigned int foo;
unsigned int fos;
};
};
unsigned int mxcsr;
unsigned int mxcsr_mask;
unsigned int st_space[32];
unsigned int xmm_space[64];
unsigned int padding[12];
union {
unsigned int padding1[12];
unsigned int sw_reserved[12];
};
} __attribute__((aligned(16)));
struct ymmh_struct {
unsigned int ymmh_space[64];
};
struct lwp_struct {
unsigned char reserved[128];
};
struct bndreg {
unsigned long lower_bound;
unsigned long upper_bound;
} __attribute__((packed));
struct bndcsr {
unsigned long bndcfgu;
unsigned long bndstatus;
} __attribute__((packed));
struct xsave_hdr_struct {
unsigned long xstate_bv;
unsigned long xcomp_bv;
unsigned long reserved[6];
} __attribute__((packed));
struct xsave_struct {
struct i387_fxsave_struct i387;
struct xsave_hdr_struct xsave_hdr;
struct ymmh_struct ymmh;
struct lwp_struct lwp;
struct bndreg bndreg[4];
struct bndcsr bndcsr;
} __attribute__ ((packed, aligned (64)));
typedef struct xsave_struct fp_regs_struct;
#endif

View File

@@ -23,9 +23,10 @@
SYSCALL_DELEGATED(0, read)
SYSCALL_DELEGATED(1, write)
SYSCALL_DELEGATED(2, open)
SYSCALL_DELEGATED(3, close)
SYSCALL_HANDLED(3, close)
SYSCALL_DELEGATED(4, stat)
SYSCALL_DELEGATED(5, fstat)
SYSCALL_DELEGATED(7, poll)
SYSCALL_DELEGATED(8, lseek)
SYSCALL_HANDLED(9, mmap)
SYSCALL_HANDLED(10, mprotect)
@@ -39,10 +40,17 @@ SYSCALL_DELEGATED(17, pread64)
SYSCALL_DELEGATED(18, pwrite64)
SYSCALL_DELEGATED(20, writev)
SYSCALL_DELEGATED(21, access)
SYSCALL_DELEGATED(23, select)
SYSCALL_HANDLED(24, sched_yield)
SYSCALL_HANDLED(25, mremap)
SYSCALL_HANDLED(26, msync)
SYSCALL_HANDLED(27, mincore)
SYSCALL_HANDLED(28, madvise)
SYSCALL_HANDLED(29, shmget)
SYSCALL_HANDLED(30, shmat)
SYSCALL_HANDLED(31, shmctl)
SYSCALL_HANDLED(34, pause)
SYSCALL_HANDLED(35, nanosleep)
SYSCALL_HANDLED(39, getpid)
SYSCALL_HANDLED(56, clone)
SYSCALL_DELEGATED(57, fork)
@@ -52,43 +60,84 @@ SYSCALL_HANDLED(60, exit)
SYSCALL_HANDLED(61, wait4)
SYSCALL_HANDLED(62, kill)
SYSCALL_DELEGATED(63, uname)
SYSCALL_DELEGATED(65, semop)
SYSCALL_HANDLED(67, shmdt)
SYSCALL_DELEGATED(69, msgsnd)
SYSCALL_DELEGATED(70, msgrcv)
SYSCALL_DELEGATED(72, fcntl)
SYSCALL_DELEGATED(79, getcwd)
SYSCALL_DELEGATED(89, readlink)
SYSCALL_DELEGATED(96, gettimeofday)
SYSCALL_HANDLED(96, gettimeofday)
SYSCALL_HANDLED(97, getrlimit)
SYSCALL_HANDLED(101, ptrace)
SYSCALL_DELEGATED(102, getuid)
SYSCALL_DELEGATED(104, getgid)
SYSCALL_DELEGATED(107, geteuid)
SYSCALL_DELEGATED(108, getegid)
SYSCALL_HANDLED(102, getuid)
SYSCALL_HANDLED(104, getgid)
SYSCALL_HANDLED(105, setuid)
SYSCALL_HANDLED(106, setgid)
SYSCALL_HANDLED(107, geteuid)
SYSCALL_HANDLED(108, getegid)
SYSCALL_HANDLED(109, setpgid)
SYSCALL_DELEGATED(110, getppid)
SYSCALL_HANDLED(110, getppid)
SYSCALL_DELEGATED(111, getpgrp)
SYSCALL_HANDLED(113, setreuid)
SYSCALL_HANDLED(114, setregid)
SYSCALL_HANDLED(117, setresuid)
SYSCALL_HANDLED(118, getresuid)
SYSCALL_HANDLED(119, setresgid)
SYSCALL_HANDLED(120, getresgid)
SYSCALL_HANDLED(122, setfsuid)
SYSCALL_HANDLED(123, setfsgid)
SYSCALL_HANDLED(127, rt_sigpending)
SYSCALL_HANDLED(128, rt_sigtimedwait)
SYSCALL_HANDLED(129, rt_sigqueueinfo)
SYSCALL_HANDLED(130, rt_sigsuspend)
SYSCALL_HANDLED(131, sigaltstack)
SYSCALL_HANDLED(142, sched_setparam)
SYSCALL_HANDLED(143, sched_getparam)
SYSCALL_HANDLED(144, sched_setscheduler)
SYSCALL_HANDLED(145, sched_getscheduler)
SYSCALL_HANDLED(146, sched_get_priority_max)
SYSCALL_HANDLED(147, sched_get_priority_min)
SYSCALL_HANDLED(148, sched_rr_get_interval)
SYSCALL_HANDLED(149, mlock)
SYSCALL_HANDLED(150, munlock)
SYSCALL_HANDLED(151, mlockall)
SYSCALL_HANDLED(152, munlockall)
SYSCALL_HANDLED(158, arch_prctl)
SYSCALL_HANDLED(160, setrlimit)
SYSCALL_HANDLED(164, settimeofday)
SYSCALL_HANDLED(186, gettid)
SYSCALL_DELEGATED(201, time)
SYSCALL_HANDLED(202, futex)
SYSCALL_HANDLED(203, sched_setaffinity)
SYSCALL_HANDLED(204, sched_getaffinity)
SYSCALL_DELEGATED(208, io_getevents)
SYSCALL_HANDLED(216, remap_file_pages)
SYSCALL_DELEGATED(217, getdents64)
SYSCALL_HANDLED(218, set_tid_address)
SYSCALL_DELEGATED(220, semtimedop)
SYSCALL_DELEGATED(230, clock_nanosleep)
SYSCALL_HANDLED(231, exit_group)
SYSCALL_DELEGATED(232, epoll_wait)
SYSCALL_HANDLED(234, tgkill)
SYSCALL_HANDLED(237, mbind)
SYSCALL_HANDLED(238, set_mempolicy)
SYSCALL_HANDLED(239, get_mempolicy)
SYSCALL_HANDLED(247, waitid)
SYSCALL_HANDLED(256, migrate_pages)
SYSCALL_DELEGATED(270, pselect6)
SYSCALL_DELEGATED(271, ppoll)
SYSCALL_HANDLED(273, set_robust_list)
SYSCALL_HANDLED(279, move_pages)
SYSCALL_DELEGATED(281, epoll_pwait)
SYSCALL_HANDLED(282, signalfd)
SYSCALL_HANDLED(289, signalfd4)
#ifdef DCFA_KMOD
SYSCALL_HANDLED(303, mod_call)
#endif
SYSCALL_HANDLED(309, getcpu)
SYSCALL_HANDLED(310, process_vm_readv)
SYSCALL_HANDLED(311, process_vm_writev)
SYSCALL_HANDLED(601, pmc_init)
SYSCALL_HANDLED(602, pmc_start)
SYSCALL_HANDLED(603, pmc_stop)

View File

@@ -24,39 +24,56 @@
#define USER_CS (48 + 3)
#define USER_DS (56 + 3)
#define PUSH_ALL_REGS \
pushq %rbp; \
pushq %rax; \
pushq %rbx; \
pushq %rcx; \
pushq %rdx; \
pushq %rsi; \
pushq %rdi; \
pushq %r8; \
pushq %r9; \
pushq %r10; \
pushq %r11; \
pushq %r12; \
pushq %r13; \
pushq %r14; \
pushq %r15;
#define POP_ALL_REGS \
popq %r15; \
popq %r14; \
popq %r13; \
popq %r12; \
popq %r11; \
popq %r10; \
popq %r9; \
popq %r8; \
popq %rdi; \
popq %rsi; \
popq %rdx; \
popq %rcx; \
popq %rbx; \
popq %rax; \
popq %rbp
/* struct x86_user_context */
#define X86_SREGS_BASE (0)
#define X86_SREGS_SIZE 48
#define X86_FLAGS_BASE (X86_SREGS_BASE + X86_SREGS_SIZE)
#define X86_FLAGS_SIZE 8
#define X86_REGS_BASE (X86_FLAGS_BASE + X86_FLAGS_SIZE)
#define RAX_OFFSET (X86_REGS_BASE + 80)
#define ERROR_OFFSET (X86_REGS_BASE + 120)
#define RSP_OFFSET (X86_REGS_BASE + 152)
#define PUSH_ALL_REGS \
pushq %rdi; \
pushq %rsi; \
pushq %rdx; \
pushq %rcx; \
pushq %rax; \
pushq %r8; \
pushq %r9; \
pushq %r10; \
pushq %r11; \
pushq %rbx; \
pushq %rbp; \
pushq %r12; \
pushq %r13; \
pushq %r14; \
pushq %r15; \
pushq $1; /* is_gpr_valid is set, and others are cleared */ \
subq $X86_FLAGS_BASE,%rsp /* for x86_sregs, etc. */
#define POP_ALL_REGS \
movq $0,X86_FLAGS_BASE(%rsp); /* clear all flags */ \
addq $X86_REGS_BASE,%rsp; /* discard x86_sregs, flags, etc. */ \
popq %r15; \
popq %r14; \
popq %r13; \
popq %r12; \
popq %rbp; \
popq %rbx; \
popq %r11; \
popq %r10; \
popq %r9; \
popq %r8; \
popq %rax; \
popq %rcx; \
popq %rdx; \
popq %rsi; \
popq %rdi
.data
.globl generic_common_handlers
generic_common_handlers:
@@ -75,7 +92,7 @@ vector=vector+1
common_interrupt:
PUSH_ALL_REGS
movq 120(%rsp), %rdi
movq ERROR_OFFSET(%rsp), %rdi
movq %rsp, %rsi
call handle_interrupt /* Enter C code */
POP_ALL_REGS
@@ -91,7 +108,7 @@ page_fault:
cld
PUSH_ALL_REGS
movq %cr2, %rdi
movq 120(%rsp),%rsi
movq ERROR_OFFSET(%rsp),%rsi
movq %rsp,%rdx
movq __page_fault_handler_address(%rip), %rax
andq %rax, %rax
@@ -113,10 +130,53 @@ general_protection_exception:
addq $8, %rsp
iretq
.globl nmi
nmi:
#define PANICED 232
#define PANIC_REGS 240
movq %rax,%gs:PANIC_REGS+0x00
movq %rbx,%gs:PANIC_REGS+0x08
movq %rcx,%gs:PANIC_REGS+0x10
movq %rdx,%gs:PANIC_REGS+0x18
movq %rsi,%gs:PANIC_REGS+0x20
movq %rdi,%gs:PANIC_REGS+0x28
movq %rbp,%gs:PANIC_REGS+0x30
movq 0x18(%rsp),%rax /* rsp */
movq %rax,%gs:PANIC_REGS+0x38
movq %r8, %gs:PANIC_REGS+0x40
movq %r9, %gs:PANIC_REGS+0x48
movq %r10,%gs:PANIC_REGS+0x50
movq %r11,%gs:PANIC_REGS+0x58
movq %r12,%gs:PANIC_REGS+0x60
movq %r13,%gs:PANIC_REGS+0x68
movq %r14,%gs:PANIC_REGS+0x70
movq %r15,%gs:PANIC_REGS+0x78
movq 0x00(%rsp),%rax /* rip */
movq %rax,%gs:PANIC_REGS+0x80
movq 0x10(%rsp),%rax /* rflags */
movl %eax,%gs:PANIC_REGS+0x88
movq 0x08(%rsp),%rax /* cs */
movl %eax,%gs:PANIC_REGS+0x8C
movq 0x20(%rsp),%rax /* ss */
movl %eax,%gs:PANIC_REGS+0x90
xorq %rax,%rax
movw %ds,%ax
movl %eax,%gs:PANIC_REGS+0x94
movw %es,%ax
movl %eax,%gs:PANIC_REGS+0x98
movw %fs,%ax
movl %eax,%gs:PANIC_REGS+0x9C
movw %gs,%ax
movl %eax,%gs:PANIC_REGS+0xA0
movq $1,%gs:PANICED
1:
hlt
jmp 1b
.globl x86_syscall
x86_syscall:
cld
movq %rsp, %gs:24
movq %rsp, %gs:X86_CPU_LOCAL_OFFSET_USTACK
movq %gs:(X86_CPU_LOCAL_OFFSET_SP0), %rsp
pushq $(USER_DS)
@@ -124,21 +184,19 @@ x86_syscall:
pushq %r11
pushq $(USER_CS)
pushq %rcx
pushq $0
movq %gs:24, %rcx
movq %rcx, 32(%rsp)
pushq %rax /* error code (= system call number) */
PUSH_ALL_REGS
movq 104(%rsp), %rdi
movq %gs:X86_CPU_LOCAL_OFFSET_USTACK, %rcx
movq %rcx, RSP_OFFSET(%rsp)
movq RAX_OFFSET(%rsp), %rdi
movw %ss, %ax
movw %ax, %ds
movq %rsp, %rsi
callq *__x86_syscall_handler(%rip)
1:
movq %rax, 104(%rsp)
movq %rax, RAX_OFFSET(%rsp)
POP_ALL_REGS
#ifdef USE_SYSRET
movq 8(%rsp), %rcx
movq 24(%rsp), %r11
movq 32(%rsp), %rsp
sysretq
#else
@@ -147,7 +205,33 @@ x86_syscall:
#endif
.globl enter_user_mode
enter_user_mode:
enter_user_mode:
callq release_runq_lock
movq $0, %rdi
movq %rsp, %rsi
call check_signal
POP_ALL_REGS
addq $8, %rsp
iretq
.globl debug_exception
debug_exception:
cld
pushq $0 /* error */
PUSH_ALL_REGS
movq %rsp, %rdi
call debug_handler
POP_ALL_REGS
addq $8, %rsp
iretq
.globl int3_exception
int3_exception:
cld
pushq $0 /* error */
PUSH_ALL_REGS
movq %rsp, %rdi
call int3_handler
POP_ALL_REGS
addq $8, %rsp
iretq

View File

@@ -6,6 +6,8 @@
* resides in memory.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
@@ -19,13 +21,19 @@
#include <registers.h>
#include <string.h>
#define LOCALS_SPAN (4 * PAGE_SIZE)
struct x86_cpu_local_variables *locals;
size_t x86_cpu_local_variables_span = LOCALS_SPAN; /* for debugger */
void init_processors_local(int max_id)
{
size_t size;
size = LOCALS_SPAN * max_id;
/* Is contiguous allocating adequate?? */
locals = ihk_mc_alloc_pages(max_id, IHK_MC_AP_CRITICAL);
memset(locals, 0, PAGE_SIZE * max_id);
locals = ihk_mc_alloc_pages(size/PAGE_SIZE, IHK_MC_AP_CRITICAL);
memset(locals, 0, size);
kprintf("locals = %p\n", locals);
}
@@ -33,12 +41,12 @@ void init_processors_local(int max_id)
struct x86_cpu_local_variables *get_x86_cpu_local_variable(int id)
{
return (struct x86_cpu_local_variables *)
((char *)locals + (id << PAGE_SHIFT));
((char *)locals + (LOCALS_SPAN * id));
}
static void *get_x86_cpu_local_kstack(int id)
{
return ((char *)locals + ((id + 1) << PAGE_SHIFT));
return ((char *)locals + (LOCALS_SPAN * (id + 1)));
}
struct x86_cpu_local_variables *get_x86_this_cpu_local(void)
@@ -80,6 +88,15 @@ void assign_processor_id(void)
v->processor_id = id;
}
void init_boot_processor_local(void)
{
static struct x86_cpu_local_variables avar;
memset(&avar, -1, sizeof(avar));
set_gs_base(&avar);
return;
}
/** IHK **/
int ihk_mc_get_processor_id(void)
{

View File

@@ -5,6 +5,8 @@
* Acquire physical pages and manipulate page table entries.
* \author Taku Shimosawa <shimosawa@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2011 - 2012 Taku Shimosawa
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY
@@ -20,8 +22,9 @@
#include <list.h>
#include <process.h>
#include <page.h>
#include <cls.h>
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
static char *last_page;
@@ -263,7 +266,11 @@ static unsigned long attr_to_l1attr(enum ihk_mc_pt_attribute attr)
{
if (attr & PTATTR_UNCACHABLE) {
return (attr & ATTR_MASK) | PFL1_PCD | PFL1_PWT;
} else {
}
else if (attr & PTATTR_WRITE_COMBINED) {
return (attr & ATTR_MASK) | PFL1_PWT;
}
else {
return (attr & ATTR_MASK);
}
}
@@ -367,6 +374,7 @@ static int __set_pt_page(struct page_table *pt, void *virt, unsigned long phys,
unsigned long init_pt_lock_flags;
int ret = -ENOMEM;
init_pt_lock_flags = 0; /* for avoidance of warning */
if (in_kernel) {
init_pt_lock_flags = ihk_mc_spinlock_lock(&init_pt_lock);
}
@@ -494,8 +502,52 @@ static int __clear_pt_page(struct page_table *pt, void *virt, int largepage)
return 0;
}
uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt)
{
int l4idx, l3idx, l2idx, l1idx;
unsigned long v = (unsigned long)virt;
uint64_t ret = 0;
if (!pt) {
pt = init_pt;
}
GET_VIRT_INDICES(v, l4idx, l3idx, l2idx, l1idx);
if (!(pt->entry[l4idx] & PFL4_PRESENT)) {
return ret;
}
pt = phys_to_virt(pt->entry[l4idx] & PAGE_MASK);
if (!(pt->entry[l3idx] & PFL3_PRESENT)) {
return ret;
}
pt = phys_to_virt(pt->entry[l3idx] & PAGE_MASK);
if (!(pt->entry[l2idx] & PFL2_PRESENT)) {
return ret;
}
if ((pt->entry[l2idx] & PFL2_SIZE)) {
ret = PM_PFRAME(((pt->entry[l2idx] & LARGE_PAGE_MASK) +
(v & (LARGE_PAGE_SIZE - 1))) >> PAGE_SHIFT);
ret |= PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
return ret;
}
pt = phys_to_virt(pt->entry[l2idx] & PAGE_MASK);
if (!(pt->entry[l1idx] & PFL1_PRESENT)) {
return ret;
}
ret = PM_PFRAME((pt->entry[l1idx] & PT_PHYSMASK) >> PAGE_SHIFT);
ret |= PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT;
return ret;
}
int ihk_mc_pt_virt_to_phys(struct page_table *pt,
void *virt, unsigned long *phys)
const void *virt, unsigned long *phys)
{
int l4idx, l3idx, l2idx, l1idx;
unsigned long v = (unsigned long)virt;
@@ -1824,7 +1876,8 @@ enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t faul
attr = common_vrflag_to_ptattr(flag, fault, ptep);
if ((fault & PF_PROT)
|| ((fault & PF_POPULATE) && (flag & VR_PRIVATE))) {
|| ((fault & (PF_POPULATE | PF_PATCH))
&& (flag & VR_PRIVATE))) {
attr |= PTATTR_DIRTY;
}
@@ -2043,7 +2096,7 @@ void ihk_mc_reserve_arch_pages(unsigned long start, unsigned long end,
/* Reserve Text + temporal heap */
cb(virt_to_phys(_head), virt_to_phys(get_last_early_heap()), 0);
/* Reserve trampoline area to boot the second ap */
cb(AP_TRAMPOLINE, AP_TRAMPOLINE + AP_TRAMPOLINE_SIZE, 0);
cb(ap_trampoline, ap_trampoline + AP_TRAMPOLINE_SIZE, 0);
/* Reserve the null page */
cb(0, PAGE_SIZE, 0);
/* Micro-arch specific */
@@ -2072,9 +2125,9 @@ void *phys_to_virt(unsigned long p)
return (void *)(p + MAP_ST_START);
}
int copy_from_user(struct process *proc, void *dst, const void *src, size_t siz)
int copy_from_user(void *dst, const void *src, size_t siz)
{
struct process_vm *vm = proc->vm;
struct process_vm *vm = cpu_local_var(current)->vm;
struct vm_range *range;
size_t pos;
size_t wsiz;
@@ -2101,9 +2154,62 @@ int copy_from_user(struct process *proc, void *dst, const void *src, size_t siz)
return 0;
}
int copy_to_user(struct process *proc, void *dst, const void *src, size_t siz)
int read_process_vm(struct process_vm *vm, void *kdst, const void *usrc, size_t siz)
{
struct process_vm *vm = proc->vm;
const uintptr_t ustart = (uintptr_t)usrc;
const uintptr_t uend = ustart + siz;
uint64_t reason;
uintptr_t addr;
int error;
const void *from;
void *to;
size_t remain;
size_t cpsize;
unsigned long pa;
void *va;
if ((ustart < vm->region.user_start)
|| (vm->region.user_end <= ustart)
|| ((vm->region.user_end - ustart) < siz)) {
return -EFAULT;
}
reason = PF_USER; /* page not present */
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
error = page_fault_process_vm(vm, (void *)addr, reason);
if (error) {
return error;
}
}
from = usrc;
to = kdst;
remain = siz;
while (remain > 0) {
cpsize = PAGE_SIZE - ((uintptr_t)from & (PAGE_SIZE - 1));
if (cpsize > remain) {
cpsize = remain;
}
error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, from, &pa);
if (error) {
return error;
}
va = phys_to_virt(pa);
memcpy(to, va, cpsize);
from += cpsize;
to += cpsize;
remain -= cpsize;
}
return 0;
} /* read_process_vm() */
int copy_to_user(void *dst, const void *src, size_t siz)
{
struct process_vm *vm = cpu_local_var(current)->vm;
struct vm_range *range;
size_t pos;
size_t wsiz;
@@ -2130,3 +2236,114 @@ int copy_to_user(struct process *proc, void *dst, const void *src, size_t siz)
memcpy(dst, src, siz);
return 0;
}
int write_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz)
{
const uintptr_t ustart = (uintptr_t)udst;
const uintptr_t uend = ustart + siz;
uint64_t reason;
uintptr_t addr;
int error;
const void *from;
void *to;
size_t remain;
size_t cpsize;
unsigned long pa;
void *va;
if ((ustart < vm->region.user_start)
|| (vm->region.user_end <= ustart)
|| ((vm->region.user_end - ustart) < siz)) {
return -EFAULT;
}
reason = PF_POPULATE | PF_WRITE | PF_USER;
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
error = page_fault_process_vm(vm, (void *)addr, reason);
if (error) {
return error;
}
}
from = ksrc;
to = udst;
remain = siz;
while (remain > 0) {
cpsize = PAGE_SIZE - ((uintptr_t)to & (PAGE_SIZE - 1));
if (cpsize > remain) {
cpsize = remain;
}
error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, to, &pa);
if (error) {
return error;
}
va = phys_to_virt(pa);
memcpy(va, from, cpsize);
from += cpsize;
to += cpsize;
remain -= cpsize;
}
return 0;
} /* write_process_vm() */
int patch_process_vm(struct process_vm *vm, void *udst, const void *ksrc, size_t siz)
{
const uintptr_t ustart = (uintptr_t)udst;
const uintptr_t uend = ustart + siz;
uint64_t reason;
uintptr_t addr;
int error;
const void *from;
void *to;
size_t remain;
size_t cpsize;
unsigned long pa;
void *va;
kprintf("patch_process_vm(%p,%p,%p,%lx)\n", vm, udst, ksrc, siz);
if ((ustart < vm->region.user_start)
|| (vm->region.user_end <= ustart)
|| ((vm->region.user_end - ustart) < siz)) {
kprintf("patch_process_vm(%p,%p,%p,%lx):not in user\n", vm, udst, ksrc, siz);
return -EFAULT;
}
reason = PF_PATCH | PF_WRITE | PF_USER;
for (addr = ustart & PAGE_MASK; addr < uend; addr += PAGE_SIZE) {
error = page_fault_process_vm(vm, (void *)addr, reason);
if (error) {
kprintf("patch_process_vm(%p,%p,%p,%lx):pf(%lx):%d\n", vm, udst, ksrc, siz, addr, error);
return error;
}
}
from = ksrc;
to = udst;
remain = siz;
while (remain > 0) {
cpsize = PAGE_SIZE - ((uintptr_t)to & (PAGE_SIZE - 1));
if (cpsize > remain) {
cpsize = remain;
}
error = ihk_mc_pt_virt_to_phys(vm->address_space->page_table, to, &pa);
if (error) {
kprintf("patch_process_vm(%p,%p,%p,%lx):v2p(%p):%d\n", vm, udst, ksrc, siz, to, error);
return error;
}
va = phys_to_virt(pa);
memcpy(va, from, cpsize);
from += cpsize;
to += cpsize;
remain -= cpsize;
}
kprintf("patch_process_vm(%p,%p,%p,%lx):%d\n", vm, udst, ksrc, siz, 0);
return 0;
} /* patch_process_vm() */

File diff suppressed because it is too large Load Diff

View File

@@ -5,6 +5,8 @@
* implements x86's vsyscall
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 Hitachi, Ltd.
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY:
@@ -16,20 +18,93 @@
*/
#include <syscall.h>
#include <ihk/atomic.h>
#include <arch/cpu.h>
extern int vsyscall_gettimeofday(void *tv, void *tz)
extern int vsyscall_gettimeofday(struct timeval *tv, void *tz)
__attribute__ ((section (".vsyscall.gettimeofday")));
int vsyscall_gettimeofday(void *tv, void *tz)
struct tod_data_s tod_data
__attribute__ ((section(".vsyscall.gettimeofday.data"))) = {
.do_local = 0,
.version = IHK_ATOMIC64_INIT(0),
};
static inline void cpu_pause_for_vsyscall(void)
{
asm volatile ("pause" ::: "memory");
return;
} /* cpu_pause_for_vsyscall() */
static inline void calculate_time_from_tsc(struct timespec *ts)
{
long ver;
unsigned long current_tsc;
__time_t sec_delta;
long ns_delta;
for (;;) {
while ((ver = ihk_atomic64_read(&tod_data.version)) & 1) {
/* settimeofday() is in progress */
cpu_pause_for_vsyscall();
}
rmb();
*ts = tod_data.origin;
rmb();
if (ver == ihk_atomic64_read(&tod_data.version)) {
break;
}
/* settimeofday() has intervened */
cpu_pause_for_vsyscall();
}
current_tsc = rdtsc();
sec_delta = current_tsc / tod_data.clocks_per_sec;
ns_delta = NS_PER_SEC * (current_tsc % tod_data.clocks_per_sec)
/ tod_data.clocks_per_sec;
/* calc. of ns_delta overflows if clocks_per_sec exceeds 18.44 GHz */
ts->tv_sec += sec_delta;
ts->tv_nsec += ns_delta;
if (ts->tv_nsec >= NS_PER_SEC) {
ts->tv_nsec -= NS_PER_SEC;
++ts->tv_sec;
}
return;
} /* calculate_time_from_tsc() */
int vsyscall_gettimeofday(struct timeval *tv, void *tz)
{
int error;
struct timespec ats;
if (!tv && !tz) {
/* nothing to do */
return 0;
}
/* Do it locally if supported */
if (!tz && tod_data.do_local) {
calculate_time_from_tsc(&ats);
tv->tv_sec = ats.tv_sec;
tv->tv_usec = ats.tv_nsec / 1000;
return 0;
}
/* Otherwise syscall */
asm ("syscall" : "=a" (error)
: "a" (__NR_gettimeofday), "D" (tv), "S" (tz)
: "%rcx", "%r11", "memory");
if (error) {
*(int *)0 = 0; /* i.e. raise(SIGSEGV) */
}
return error;
}
} /* vsyscall_gettimeofday() */
extern long vsyscall_time(void *tp)
__attribute__ ((section (".vsyscall.time")));
@@ -58,3 +133,17 @@ long vsyscall_time(void *tp)
return t;
}
extern int vsyscall_getcpu(unsigned *cpup, unsigned *nodep, void *tcachep)
__attribute__ ((section (".vsyscall.getcpu")));
int vsyscall_getcpu(unsigned *cpup, unsigned *nodep, void *tcachep)
{
int error;
asm ("syscall" : "=a" (error)
: "a" (__NR_getcpu), "D" (cpup), "S" (nodep), "d" (tcachep)
: "%rcx", "%r11", "memory");
return error;
}

View File

@@ -0,0 +1,46 @@
#!/bin/bash -x
# \file arch/x86/tools/mcreboot-builtin-x86.sh.in
# License details are found in the file LICENSE.
# \brief
# mckernel boot script
# \author Masamichi Takagi <masamichi.takagi@riken.jp> \par
# Copyright (C) 2014 RIKEN AICS
# HISTORY:
#
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
kill -9 `pidof mcexec`
if lsmod | grep mcctrl > /dev/null 2>&1; then
rmmod mcctrl || exit 1
fi
if lsmod | grep dcfa > /dev/null 2>&1; then
rmmod dcfa || exit 1
fi
if lsmod | grep ihk_builtin > /dev/null 2>&1; then
rmmod ihk_builtin || exit 1
fi
if lsmod | grep ihk > /dev/null 2>&1; then
rmmod ihk || exit 1
fi
insmod "$KMODDIR/ihk.ko" &&
insmod "$KMODDIR/ihk_builtin.ko" &&
"$SBINDIR/ihkconfig" 0 create &&
NCORE=`dmesg | grep -E 'SHIMOS: CPU Status:'|awk '{split($0,a," "); for (i = 1; i <= length(a); i++) { if(a[i] ~ /2/) {count++}} print count;}'`
MEM=`free -g | grep -E 'Mem:' | awk '{print int($2/4)}'`
"$SBINDIR/ihkosctl" 0 alloc "$NCORE" "$MEM"g &&
"$SBINDIR/ihkosctl" 0 load "$KERNDIR/mckernel.img" &&
"$SBINDIR/ihkosctl" 0 kargs hidos osnum=0 &&
"$SBINDIR/ihkosctl" 0 boot &&
sleep 1 &&
"$SBINDIR/ihkosctl" 0 kmsg &&
insmod "$KMODDIR/mcctrl.ko" &&
sleep 1 &&
"$SBINDIR/ihkosctl" 0 kmsg &&
exit 0

View File

@@ -0,0 +1,98 @@
#!/bin/bash
# IHK SMP-x86 example boot script.
# author: Balazs Gerofi <bgerofi@riken.jp>
# Copyright (C) 2014 RIKEN AICS
#
# This is an example script for loading IHK, configuring a partition and
# booting McKernel on it.
# The script reserves half of the CPU cores and 512MB of RAM from NUMA node 0
# when IHK is loaded for the first time, otherwise it destroys the current
# McKernel instance and reboots it using the same set of resources as it used
# previously.
# Note that the script does not output anything unless an error occurs.
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
mem="512M@0"
cpus=""
ihk_ikc_irq_core=0
if [ "$cpus" == "" ]; then
# Get the number of CPUs on NUMA node 0
nr_cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $4}' | wc -l`
# Use the second half of the cores
let nr_cpus="$nr_cpus / 2"
cpus=`lscpu --parse | awk -F"," '{if ($4 == 0) print $1}' | tail -n $nr_cpus | xargs echo -n | sed 's/ /,/g'`
if [ "$cpus" == "" ]; then echo "error: no available CPUs on NUMA node 0?"; exit; fi
fi
# Remove delegator if loaded
if [ "`lsmod | grep mcctrl`" != "" ]; then
if ! rmmod mcctrl; then echo "error: removing mcctrl"; exit; fi
fi
# Load IHK if not loaded
if [ "`lsmod | grep ihk`" == "" ]; then
if ! insmod ${KMODDIR}/ihk.ko; then echo "error: loading ihk"; exit; fi;
fi
# Load IHK-SMP if not loaded and reserve CPUs and memory
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then
ihk_irq=""
for i in `seq 64 255`; do
if [ ! -d /proc/irq/$i ] && [ "`cat /proc/interrupts | grep ":" | awk '{print $1}' | grep -o '[0-9]*' | grep -e '^$i$'`" == "" ]; then
ihk_irq=$i
break
fi
done
if [ "$ihk_irq" == "" ]; then echo "error: no IRQ available"; exit; fi
if ! insmod ${KMODDIR}/ihk-smp-x86.ko ihk_start_irq=$ihk_irq ihk_ikc_irq_core=$ihk_ikc_irq_core; then echo "error: loading ihk-smp-x86"; exit; fi;
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs"; exit; fi
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory"; exit; fi
# If loaded, but no resources allocated, get CPUs and memory
else
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
cpus_allocated=`${SBINDIR}/ihkosctl 0 query cpu`
if [ "$cpus_allocated" == "" ]; then
if ! ${SBINDIR}/ihkconfig 0 reserve cpu ${cpus}; then echo "error: reserving CPUs"; exit; fi
fi
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
mem_allocated=`${SBINDIR}/ihkosctl 0 query mem`
if [ "$mem_allocated" == "" ]; then
if ! ${SBINDIR}/ihkconfig 0 reserve mem ${mem}; then echo "error: reserving memory"; exit; fi
fi
fi
# Check for existing OS instance and destroy
if [ -c /dev/mcos0 ]; then
# Query CPU cores and memory of OS instance so that the same values are used as previously
if ! ${SBINDIR}/ihkosctl 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
cpus=`${SBINDIR}/ihkosctl 0 query cpu`
if ! ${SBINDIR}/ihkosctl 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
mem=`${SBINDIR}/ihkosctl 0 query mem`
if ! ${SBINDIR}/ihkconfig 0 destroy 0; then echo "warning: destroy failed"; fi
else
# Otherwise query IHK-SMP for resources
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
mem=`${SBINDIR}/ihkconfig 0 query mem`
fi
if ! ${SBINDIR}/ihkconfig 0 create; then echo "error: create"; exit; fi
if ! ${SBINDIR}/ihkosctl 0 assign cpu ${cpus}; then echo "error: assign CPUs"; exit; fi
if ! ${SBINDIR}/ihkosctl 0 assign mem ${mem}; then echo "error: assign memory"; exit; fi
if ! ${SBINDIR}/ihkosctl 0 load ${KERNDIR}/mckernel.img; then echo "error: loading kernel image"; exit; fi
if ! ${SBINDIR}/ihkosctl 0 kargs hidos; then echo "error: setting kernel arguments"; exit; fi
if ! ${SBINDIR}/ihkosctl 0 boot; then echo "error: booting"; exit; fi
if ! insmod ${KMODDIR}/mcctrl.ko; then echo "error: inserting mcctrl.ko"; exit; fi
if ! chown `logname` /dev/mcd* /dev/mcos*; then echo "error: chowning device files"; exit; fi

View File

@@ -0,0 +1,16 @@
#!/bin/bash
# \file arch/x86/tools/mcshutdown-attached-mic.sh.in
# License details are found in the file LICENSE.
# \brief
# mckernel shutdown script
#
# \author McKernel Development Team
#
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
"$SBINDIR/ihkosctl" 0 shutdown

View File

@@ -0,0 +1,47 @@
#!/bin/bash
# IHK SMP-x86 example McKernel unload script.
# author: Balazs Gerofi <bgerofi@riken.jp>
# Copyright (C) 2015 RIKEN AICS
#
# This is an example script for destroying McKernel and releasing IHK resources
# Note that the script does no output anything unless an error occurs.
prefix="@prefix@"
BINDIR="@BINDIR@"
SBINDIR="@SBINDIR@"
KMODDIR="@KMODDIR@"
KERNDIR="@KERNDIR@"
mem=""
cpus=""
# No SMP module? Exit.
if [ "`lsmod | grep ihk_smp_x86`" == "" ]; then exit; fi
# Remove delegator if loaded
if [ "`lsmod | grep mcctrl`" != "" ]; then
if ! rmmod mcctrl; then echo "error: removing mcctrl"; exit; fi
fi
# Destroy all LWK instances
for i in /dev/mcos*; do
ind=`echo $i|cut -c10-`;
if ! ${SBINDIR}/ihkconfig 0 destroy $ind; then echo "error: destroying LWK instance $ind failed"; exit; fi
done
# Query IHK-SMP resources and release them
if ! ${SBINDIR}/ihkconfig 0 query cpu > /dev/null; then echo "error: querying cpus"; exit; fi
cpus=`${SBINDIR}/ihkconfig 0 query cpu`
if ! ${SBINDIR}/ihkconfig 0 release cpu $cpus > /dev/null; then echo "error: releasing CPUs"; exit; fi
if ! ${SBINDIR}/ihkconfig 0 query mem > /dev/null; then echo "error: querying memory"; exit; fi
mem=`${SBINDIR}/ihkconfig 0 query mem`
if ! ${SBINDIR}/ihkconfig 0 release mem $mem > /dev/null; then echo "error: releasing memory"; exit; fi
# Remove SMP module
if [ "`lsmod | grep ihk_smp_x86`" != "" ]; then
if ! rmmod ihk_smp_x86; then echo "error: removing ihk_smp_x86"; exit; fi
fi

3050
configure vendored

File diff suppressed because it is too large Load Diff

View File

@@ -24,7 +24,7 @@ AC_ARG_WITH([kernelsrc],
AC_ARG_WITH([target],
AC_HELP_STRING(
[--with-target={attached-mic | builtin-mic | builtin-x86}],[target, default is attached-mic]),
[--with-target={attached-mic | builtin-mic | builtin-x86 | smp-x86}],[target, default is attached-mic]),
[WITH_TARGET=$withval],[WITH_TARGET=yes])
AC_ARG_ENABLE([dcfa],
@@ -111,6 +111,26 @@ case $WITH_TARGET in
MANDIR="$prefix/attached/man"
fi
;;
smp-x86)
ARCH=`uname -m`
AC_PROG_CC
XCC=$CC
if test "X$KERNDIR" = X; then
KERNDIR="$prefix/smp-x86/kernel"
fi
if test "X$BINDIR" = X; then
BINDIR="$prefix/bin"
fi
if test "X$SBINDIR" = X; then
SBINDIR="$prefix/sbin"
fi
if test "X$KMODDIR" = X; then
KMODDIR="$prefix/kmod"
fi
if test "X$MANDIR" = X; then
MANDIR="$prefix/smp-x86/man"
fi
;;
*)
AC_MSG_ERROR([target $WITH_TARGET is unknwon])
;;
@@ -145,6 +165,10 @@ AC_CONFIG_FILES([
kernel/Makefile.build
arch/x86/tools/mcreboot-attached-mic.sh
arch/x86/tools/mcshutdown-attached-mic.sh
arch/x86/tools/mcreboot-builtin-x86.sh
arch/x86/tools/mcreboot-smp-x86.sh
arch/x86/tools/mcstop+release-smp-x86.sh
arch/x86/tools/mcshutdown-builtin-x86.sh
arch/x86/tools/mcreboot.1:arch/x86/tools/mcreboot.1in
])

View File

@@ -38,6 +38,9 @@
#define MCEXEC_UP_SEND_SIGNAL 0x30a02906
#define MCEXEC_UP_GET_CPU 0x30a02907
#define MCEXEC_UP_STRNCPY_FROM_USER 0x30a02908
#define MCEXEC_UP_NEW_PROCESS 0x30a02909
#define MCEXEC_UP_GET_CRED 0x30a0290a
#define MCEXEC_UP_GET_CREDV 0x30a0290b
#define MCEXEC_UP_PREPARE_DMA 0x30a02910
#define MCEXEC_UP_FREE_DMA 0x30a02911
@@ -45,6 +48,8 @@
#define MCEXEC_UP_OPEN_EXEC 0x30a02912
#define MCEXEC_UP_CLOSE_EXEC 0x30a02913
#define MCEXEC_UP_DEBUG_LOG 0x40000000
#define MCEXEC_UP_TRANSFER_TO_REMOTE 0
#define MCEXEC_UP_TRANSFER_FROM_REMOTE 1
@@ -67,6 +72,7 @@ struct program_image_section {
};
#define SHELL_PATH_MAX_LEN 1024
#define MCK_RLIM_MAX 20
struct program_load_desc {
int num_sections;
@@ -76,6 +82,7 @@ struct program_load_desc {
int err;
int stack_prot;
int pgid;
int cred[8];
unsigned long entry;
unsigned long user_start;
unsigned long user_end;
@@ -90,8 +97,7 @@ struct program_load_desc {
unsigned long args_len;
char *envs;
unsigned long envs_len;
unsigned long rlimit_stack_cur;
unsigned long rlimit_stack_max;
struct rlimit rlimit[MCK_RLIM_MAX];
unsigned long interp_align;
char shell_path[SHELL_PATH_MAX_LEN];
struct program_image_section sections[0];
@@ -156,4 +162,8 @@ struct signal_desc {
char info[128];
};
struct newprocess_desc {
int pid;
};
#endif

View File

@@ -2,13 +2,14 @@ KDIR ?= @KDIR@
ARCH ?= @ARCH@
src = @abs_srcdir@
KMODDIR=@KMODDIR@
BINDIR=@BINDIR@
IHK_BASE=$(src)/../../../ihk
obj-m += mcctrl.o
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../include
ccflags-y := -I$(IHK_BASE)/linux/include -I$(IHK_BASE)/ikc/include -I$(IHK_BASE)/include -I$(src)/../include -mcmodel=kernel -mno-red-zone -DMCEXEC_PATH=\"$(BINDIR)/mcexec\"
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o
mcctrl-y := driver.o control.o ikc.o syscall.o procfs.o binfmt_mcexec.o
KBUILD_EXTRA_SYMBOLS = @abs_builddir@/../../../ihk/linux/core/Module.symvers

View File

@@ -0,0 +1,284 @@
#include <linux/module.h>
#include <linux/string.h>
#include <linux/stat.h>
#include <linux/binfmts.h>
#include <linux/elfcore.h>
#include <linux/elf.h>
#include <linux/init.h>
#include <linux/file.h>
#include <linux/err.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/version.h>
#include "mcctrl.h"
static int pathcheck(const char *file, const char *list)
{
const char *p;
const char *q;
const char *r;
int l;
if(!*list)
return 1;
p = list;
do{
q = strchr(p, ':');
if(!q)
q = strchr(p, '\0');
for(r = q - 1; r >= p && *r == '/'; r--);
l = r - p + 1;
if(!strncmp(file, p, l) &&
file[l] == '/')
return 1;
p = q + 1;
} while(*q);
return 0;
}
static int load_elf(struct linux_binprm *bprm
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
, struct pt_regs *regs
#endif
)
{
char mcexec[BINPRM_BUF_SIZE];
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,36)
const
#endif
char *wp;
char *cp;
struct file *file;
int rc;
struct elfhdr *elf_ex = (struct elfhdr *)bprm->buf;
typedef struct {
char *name;
char *val;
int l;
} envdata;
envdata env[] = {
{.name = "MCEXEC"},
#define env_mcexec (env[0].val)
{.name = "MCEXEC_WL"},
#define env_mcexec_wl (env[1].val)
{.name = "MCEXEC_BL"},
#define env_mcexec_bl (env[2].val)
{.name = NULL}
};
envdata *ep;
unsigned long off = 0;
struct page *page;
char *addr = NULL;
int i;
unsigned long p;
int st;
int mode;
int cnt[2];
char buf[32];
int l;
int pass;
char pbuf[1024];
const char *path;
if(bprm->envc == 0)
return -ENOEXEC;
if(memcmp(elf_ex->e_ident, ELFMAG, SELFMAG) != 0)
return -ENOEXEC;
if(elf_ex->e_type != ET_EXEC && elf_ex->e_type != ET_DYN)
return -ENOEXEC;
if(elf_ex->e_ident[EI_CLASS] != ELFCLASS64)
return -ENOEXEC;
path = d_path(&bprm->file->f_path, pbuf, 1024);
if(!path || IS_ERR(path))
path = bprm->interp;
cp = strrchr(path, '/');
if(!cp ||
!strcmp(cp, "/mcexec") ||
!strcmp(cp, "/ihkosctl") ||
!strcmp(cp, "/ihkconfig"))
return -ENOEXEC;
cnt[0] = bprm->argc;
cnt[1] = bprm->envc;
for(pass = 0; pass < 2; pass++){
p = bprm->p;
mode = cnt[0] == 0? 1: 0;
if(pass == 1){
for(ep = env; ep->name; ep++){
if(ep->l)
ep->val = kmalloc(ep->l, GFP_KERNEL);
}
}
ep = NULL;
l = 0;
for(i = 0, st = 0; mode != 2;){
if(st == 0){
off = p & ~PAGE_MASK;
rc = get_user_pages(current, bprm->mm,
bprm->p, 1, 0, 1,
&page, NULL);
if(rc <= 0)
return -EFAULT;
addr = kmap_atomic(page
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
, KM_USER0
#endif
);
st = 1;
}
if(addr[off]){
if(mode == 1){
if(ep){
if(pass == 1)
ep->val[l] = addr[off];
l++;
}
else if(addr[off] == '='){
if(l < 32)
buf[l] = '\0';
buf[31] = '\0';
for(ep = env; ep->name; ep++)
if(!strcmp(ep->name, buf))
break;
if(ep->name)
l = 0;
else
ep = NULL;
}
else{
if(l < 32)
buf[l] = addr[off];
l++;
}
}
}
else{
if(mode == 1 && ep){
if(pass == 0){
ep->l = l + 1;
}
else{
ep->val[l] = '\0';
}
}
ep = NULL;
l = 0;
i++;
if(i == cnt[mode]){
i = 0;
mode++;
}
}
off++;
p++;
if(off == PAGE_SIZE || mode == 2){
kunmap_atomic(addr
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0)
, KM_USER0
#endif
);
put_page(page);
st = 0;
}
}
}
if(!env_mcexec || !strcmp(env_mcexec, "0") || !strcmp(env_mcexec, "off"))
rc = 1;
else{
rc = 0;
if(strchr(env_mcexec, '/') && strlen(env_mcexec) < BINPRM_BUF_SIZE)
strcpy(mcexec, env_mcexec);
else
strcpy(mcexec, MCEXEC_PATH);
}
if(rc);
else if(env_mcexec_wl)
rc = !pathcheck(path, env_mcexec_wl);
else if(env_mcexec_bl)
rc = pathcheck(path, env_mcexec_bl);
else
rc = pathcheck(path, "/usr:/bin:/sbin:/opt");
for(ep = env; ep->name; ep++)
if(ep->val)
kfree(ep->val);
if(rc)
return -ENOEXEC;
file = open_exec(mcexec);
if (IS_ERR(file))
return -ENOEXEC;
rc = remove_arg_zero(bprm);
if (rc){
fput(file);
return rc;
}
rc = copy_strings_kernel(1, &bprm->interp, bprm);
if (rc < 0){
fput(file);
return rc;
}
bprm->argc++;
wp = mcexec;
rc = copy_strings_kernel(1, &wp, bprm);
if (rc){
fput(file);
return rc;
}
bprm->argc++;
#if 1
rc = bprm_change_interp(mcexec, bprm);
if (rc < 0){
fput(file);
return rc;
}
#else
if(brpm->interp != bprm->filename)
kfree(brpm->interp);
kfree(brpm->filename);
bprm->filename = bprm->interp = kstrdup(mcexec, GFP_KERNEL);
if(!bprm->interp){
fput(file);
return -ENOMEM;
}
#endif
allow_write_access(bprm->file);
fput(bprm->file);
bprm->file = file;
rc = prepare_binprm(bprm);
if (rc < 0){
return rc;
}
return search_binary_handler(bprm
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0)
, regs
#endif
);
}
static struct linux_binfmt mcexec_format = {
.module = THIS_MODULE,
.load_binary = load_elf,
};
void __init binfmt_mcexec_init(void)
{
insert_binfmt(&mcexec_format);
}
void __exit binfmt_mcexec_exit(void)
{
unregister_binfmt(&mcexec_format);
}

View File

@@ -31,12 +31,15 @@
#include <linux/gfp.h>
#include <linux/fs.h>
#include <linux/file.h>
#include <linux/version.h>
#include <asm/uaccess.h>
#include <asm/delay.h>
#include <asm/msr.h>
#include <asm/io.h>
#include "mcctrl.h"
//#define DEBUG
#ifdef DEBUG
#define dprintk printk
#else
@@ -242,19 +245,69 @@ int mcexec_transfer_image(ihk_os_t os, struct remote_transfer *__user upt)
//extern unsigned long last_thread_exec;
struct handlerinfo {
int pid;
};
static long mcexec_debug_log(ihk_os_t os, unsigned long arg)
{
struct ikc_scd_packet isp;
memset(&isp, '\0', sizeof isp);
isp.msg = SCD_MSG_DEBUG_LOG;
isp.arg = arg;
mcctrl_ikc_send(os, 0, &isp);
return 0;
}
static void release_handler(ihk_os_t os, void *param)
{
struct handlerinfo *info = param;
struct ikc_scd_packet isp;
memset(&isp, '\0', sizeof isp);
isp.msg = SCD_MSG_CLEANUP_PROCESS;
isp.pid = info->pid;
mcctrl_ikc_send(os, 0, &isp);
kfree(param);
}
static long mcexec_newprocess(ihk_os_t os,
struct newprocess_desc *__user udesc,
struct file *file)
{
struct newprocess_desc desc;
struct handlerinfo *info;
if (copy_from_user(&desc, udesc, sizeof(struct newprocess_desc))) {
return -EFAULT;
}
info = kmalloc(sizeof(struct handlerinfo), GFP_KERNEL);
info->pid = desc.pid;
ihk_os_register_release_handler(file, release_handler, info);
return 0;
}
static long mcexec_start_image(ihk_os_t os,
struct program_load_desc * __user udesc)
struct program_load_desc * __user udesc,
struct file *file)
{
struct program_load_desc desc;
struct ikc_scd_packet isp;
struct mcctrl_channel *c;
struct mcctrl_usrdata *usrdata = ihk_host_os_get_usrdata(os);
struct handlerinfo *info;
if (copy_from_user(&desc, udesc,
sizeof(struct program_load_desc))) {
return -EFAULT;
}
info = kmalloc(sizeof(struct handlerinfo), GFP_KERNEL);
info->pid = desc.pid;
ihk_os_register_release_handler(file, release_handler, info);
c = usrdata->channels + desc.cpu;
mcctrl_ikc_set_recv_cpu(os, desc.cpu);
@@ -366,10 +419,10 @@ retry_alloc:
init_waitqueue_head(&wqhln->wq_syscall);
list_add_tail(&wqhln->list, &c->wq_list);
}
ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags);
wqhln->req = 1;
wake_up(&wqhln->wq_syscall);
ihk_ikc_spinlock_unlock(&c->wq_list_lock, flags);
return 0;
}
@@ -439,14 +492,15 @@ retry_alloc:
ret = wait_event_interruptible(wqhln->wq_syscall, wqhln->req);
if (ret) {
return -EINTR;
}
/* Remove per-process wait queue head */
irqflags = ihk_ikc_spinlock_lock(&c->wq_list_lock);
list_del(&wqhln->list);
ihk_ikc_spinlock_unlock(&c->wq_list_lock, irqflags);
if (ret && !wqhln->req) {
kfree(wqhln);
return -EINTR;
}
kfree(wqhln);
if (c->param.request_va->number == 61 &&
@@ -723,7 +777,7 @@ long mcexec_ret_syscall(ihk_os_t os, struct syscall_ret_desc *__user arg)
}
LIST_HEAD(mckernel_exec_files);
spinlock_t mckernel_exec_file_lock = SPIN_LOCK_UNLOCKED;
DEFINE_SPINLOCK(mckernel_exec_file_lock);
struct mckernel_exec_file {
@@ -733,6 +787,47 @@ struct mckernel_exec_file {
struct list_head list;
};
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
#define GUIDVAL(x) (x)
#else
#define GUIDVAL(x) ((x).val)
#endif
int
mcexec_getcred(unsigned long phys)
{
int *virt = phys_to_virt(phys);
virt[0] = GUIDVAL(current_uid());
virt[1] = GUIDVAL(current_euid());
virt[2] = GUIDVAL(current_suid());
virt[3] = GUIDVAL(current_fsuid());
virt[4] = GUIDVAL(current_gid());
virt[5] = GUIDVAL(current_egid());
virt[6] = GUIDVAL(current_sgid());
virt[7] = GUIDVAL(current_fsgid());
return 0;
}
int
mcexec_getcredv(int __user *virt)
{
int wk[8];
wk[0] = GUIDVAL(current_uid());
wk[1] = GUIDVAL(current_euid());
wk[2] = GUIDVAL(current_suid());
wk[3] = GUIDVAL(current_fsuid());
wk[4] = GUIDVAL(current_gid());
wk[5] = GUIDVAL(current_egid());
wk[6] = GUIDVAL(current_sgid());
wk[7] = GUIDVAL(current_fsgid());
if(copy_to_user(virt, wk, sizeof(int) * 8))
return -EFAULT;
return 0;
}
int mcexec_open_exec(ihk_os_t os, char * __user filename)
{
struct file *file;
@@ -857,7 +952,8 @@ long mcexec_strncpy_from_user(ihk_os_t os, struct strncpy_from_user_desc * __use
return 0;
}
long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg,
struct file *file)
{
switch (req) {
case MCEXEC_UP_PREPARE_IMAGE:
@@ -867,7 +963,7 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
return mcexec_transfer_image(os, (struct remote_transfer *)arg);
case MCEXEC_UP_START_IMAGE:
return mcexec_start_image(os, (struct program_load_desc *)arg);
return mcexec_start_image(os, (struct program_load_desc *)arg, file);
case MCEXEC_UP_WAIT_SYSCALL:
return mcexec_wait_syscall(os, (struct syscall_wait_desc *)arg);
@@ -888,6 +984,10 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
return mcexec_strncpy_from_user(os,
(struct strncpy_from_user_desc *)arg);
case MCEXEC_UP_NEW_PROCESS:
return mcexec_newprocess(os, (struct newprocess_desc *)arg,
file);
case MCEXEC_UP_OPEN_EXEC:
return mcexec_open_exec(os, (char *)arg);
@@ -899,6 +999,15 @@ long __mcctrl_control(ihk_os_t os, unsigned int req, unsigned long arg)
case MCEXEC_UP_FREE_DMA:
return mcexec_free_region(os, (unsigned long *)arg);
case MCEXEC_UP_GET_CRED:
return mcexec_getcred((unsigned long)arg);
case MCEXEC_UP_GET_CREDV:
return mcexec_getcredv((int *)arg);
case MCEXEC_UP_DEBUG_LOG:
return mcexec_debug_log(os, arg);
}
return -EINVAL;
}

View File

@@ -29,7 +29,8 @@
#define OS_MAX_MINOR 64
extern long __mcctrl_control(ihk_os_t, unsigned int, unsigned long);
extern long __mcctrl_control(ihk_os_t, unsigned int, unsigned long,
struct file *);
extern int prepare_ikc_channels(ihk_os_t os);
extern void destroy_ikc_channels(ihk_os_t os);
#ifndef DO_USER_MODE
@@ -38,11 +39,15 @@ extern void mcctrl_syscall_init(void);
extern void procfs_init(int);
extern void procfs_exit(int);
extern void rus_page_hash_init(void);
extern void rus_page_hash_put_pages(void);
extern void binfmt_mcexec_init(void);
extern void binfmt_mcexec_exit(void);
static long mcctrl_ioctl(ihk_os_t os, unsigned int request, void *priv,
unsigned long arg)
unsigned long arg, struct file *file)
{
return __mcctrl_control(os, request, arg);
return __mcctrl_control(os, request, arg, file);
}
static struct ihk_os_user_call_handler mcctrl_uchs[] = {
@@ -55,10 +60,14 @@ static struct ihk_os_user_call_handler mcctrl_uchs[] = {
{ .request = MCEXEC_UP_SEND_SIGNAL, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_GET_CPU, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_STRNCPY_FROM_USER, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_NEW_PROCESS, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_PREPARE_DMA, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_FREE_DMA, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_OPEN_EXEC, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_CLOSE_EXEC, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_GET_CRED, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_GET_CREDV, .func = mcctrl_ioctl },
{ .request = MCEXEC_UP_DEBUG_LOG, .func = mcctrl_ioctl },
};
static struct ihk_os_user_call mcctrl_uc_proto = {
@@ -101,6 +110,8 @@ static int __init mcctrl_init(void)
mcctrl_syscall_init();
#endif
rus_page_hash_init();
for(i = 0; i < OS_MAX_MINOR; i++){
if (os[i]) {
memcpy(mcctrl_uc + i, &mcctrl_uc_proto, sizeof mcctrl_uc_proto);
@@ -113,6 +124,8 @@ static int __init mcctrl_init(void)
}
}
binfmt_mcexec_init();
return 0;
}
@@ -120,6 +133,7 @@ static void __exit mcctrl_exit(void)
{
int i;
binfmt_mcexec_exit();
printk("mcctrl: unregistered.\n");
for(i = 0; i < OS_MAX_MINOR; i++){
if(os[i]){
@@ -128,6 +142,8 @@ static void __exit mcctrl_exit(void)
procfs_exit(i);
}
}
rus_page_hash_put_pages();
}
MODULE_LICENSE("GPL v2");

View File

@@ -48,12 +48,15 @@
#define SCD_MSG_SYSCALL_ONESIDE 0x4
#define SCD_MSG_SEND_SIGNAL 0x8
#define SCD_MSG_CLEANUP_PROCESS 0x9
#define SCD_MSG_PROCFS_CREATE 0x10
#define SCD_MSG_PROCFS_DELETE 0x11
#define SCD_MSG_PROCFS_REQUEST 0x12
#define SCD_MSG_PROCFS_ANSWER 0x13
#define SCD_MSG_DEBUG_LOG 0x20
#define DMA_PIN_SHIFT 21
#define DO_USER_MODE

View File

@@ -10,12 +10,15 @@
* HISTORY:
*/
#include <linux/slab.h>
#include <linux/string.h>
#include <linux/proc_fs.h>
#include <linux/list.h>
#include <linux/uaccess.h>
#include <linux/fs.h>
#include <linux/resource.h>
#include "mcctrl.h"
#include <linux/version.h>
//#define PROCFS_DEBUG
@@ -26,16 +29,16 @@
#endif
static DECLARE_WAIT_QUEUE_HEAD(procfsq);
int mckernel_procfs_read(char *buffer, char **start, off_t offset,
int count, int *peof, void *dat);
static ssize_t mckernel_procfs_read(struct file *file, char __user *buf,
size_t nbytes, loff_t *ppos);
/* A private data for the procfs driver. */
struct procfs_list_entry;
struct procfs_list_entry {
struct list_head list;
struct proc_dir_entry *entry;
struct proc_dir_entry *parent;
struct procfs_list_entry *parent;
ihk_os_t os;
int osnum;
int pid;
@@ -53,6 +56,28 @@ struct procfs_list_entry {
LIST_HEAD(procfs_file_list);
static ihk_spinlock_t procfs_file_list_lock;
loff_t mckernel_procfs_lseek(struct file *file, loff_t offset, int orig)
{
switch (orig) {
case 0:
file->f_pos = offset;
break;
case 1:
file->f_pos += offset;
break;
default:
return -EINVAL;
}
return file->f_pos;
}
static const struct file_operations mckernel_procfs_file_operations = {
.llseek = mckernel_procfs_lseek,
.read = mckernel_procfs_read,
.write = NULL,
};
/**
* \brief Return specified procfs entry.
*
@@ -71,22 +96,22 @@ static ihk_spinlock_t procfs_file_list_lock;
/*
* XXX: Two or more entries which have same name can be created.
*
* get_procfs_entry() avoids creating an entry which has already been created.
* get_procfs_list_entry() avoids creating an entry which has already been created.
* But, it allows creating an entry which is being created by another thread.
*
* This problem occurred when two requests which created files with a common
* ancestor directory which was not explicitly created were racing.
*/
static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
static struct procfs_list_entry *get_procfs_list_entry(char *p, int osnum, int mode)
{
char *r;
struct proc_dir_entry *ret = NULL, *parent = NULL;
struct procfs_list_entry *e;
struct proc_dir_entry *pde = NULL;
struct procfs_list_entry *e, *ret = NULL, *parent = NULL;
char name[PROCFS_NAME_MAX];
unsigned long irqflags;
dprintk("get_procfs_entry: %s for osnum %d mode %o\n", p, osnum, mode);
dprintk("get_procfs_list_entry: %s for osnum %d mode %o\n", p, osnum, mode);
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
list_for_each_entry(e, &procfs_file_list, list) {
if (e == NULL) {
@@ -95,7 +120,8 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
}
if (strncmp(e->fname, p, PROCFS_NAME_MAX) == 0) {
/* We found the entry */
ret = e->entry;
ret = e;
break;
}
}
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
@@ -107,19 +133,19 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
/* We have non-null parent dir. */
strncpy(name, p, r - p);
name[r - p] = '\0';
parent = get_procfs_entry(name, osnum, 0);
parent = get_procfs_list_entry(name, osnum, 0);
if (parent == NULL) {
/* We counld not get a parent procfs entry. Give up.*/
return NULL;
}
}
e = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL);
if (e == NULL) {
ret = kmalloc(sizeof(struct procfs_list_entry), GFP_KERNEL);
if (ret == NULL) {
kprintf("ERROR: not enough memory to create PROCFS entry.\n");
return NULL;
}
/* Fill the fname field of the entry */
strncpy(e->fname, p, PROCFS_NAME_MAX);
strncpy(ret->fname, p, PROCFS_NAME_MAX);
if (r != NULL) {
strncpy(name, r + 1, p + PROCFS_NAME_MAX - r - 1);
@@ -127,25 +153,38 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
strncpy(name, p, PROCFS_NAME_MAX);
}
if (mode == 0) {
ret = proc_mkdir(name, parent);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
pde = proc_mkdir(name, parent ? parent->entry : NULL);
#else
pde = proc_mkdir_data(name, 0555, parent ? parent->entry : NULL, ret);
#endif
} else {
ret = create_proc_entry(name, mode, parent);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
pde = create_proc_entry(name, mode, parent->entry);
if (pde)
pde->proc_fops = &mckernel_procfs_file_operations;
#else
pde = proc_create_data(name, mode, parent->entry,
&mckernel_procfs_file_operations, ret);
#endif
}
if (ret == NULL) {
if (pde == NULL) {
kprintf("ERROR: cannot create a PROCFS entry for %s.\n", p);
kfree(e);
kfree(ret);
return NULL;
}
ret->data = e;
e->osnum = osnum;
e->entry = ret;
e->parent = parent;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
pde->data = ret;
#endif
ret->osnum = osnum;
ret->entry = pde;
ret->parent = parent;
irqflags = ihk_ikc_spinlock_lock(&procfs_file_list_lock);
list_add(&(e->list), &procfs_file_list);
list_add(&(ret->list), &procfs_file_list);
ihk_ikc_spinlock_unlock(&procfs_file_list_lock, irqflags);
dprintk("get_procfs_entry: %s done\n", p);
dprintk("get_procfs_list_entry: %s done\n", p);
return ret;
}
@@ -161,7 +200,6 @@ static struct proc_dir_entry *get_procfs_entry(char *p, int osnum, int mode)
void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg)
{
struct proc_dir_entry *entry;
struct procfs_list_entry *e;
ihk_device_t dev = ihk_os_to_dev(__os);
unsigned long parg;
@@ -183,18 +221,16 @@ void procfs_create(void *__os, int ref, int osnum, int pid, unsigned long arg)
printk("ERROR: procfs_creat: file name not properly terminated.\n");
goto quit;
}
entry = get_procfs_entry(name, osnum, mode);
if (entry == NULL) {
e = get_procfs_list_entry(name, osnum, mode);
if (e == NULL) {
printk("ERROR: could not create a procfs entry for %s.\n", name);
goto quit;
}
e = entry->data;
e->os = __os;
e->cpu = ref;
e->pid = pid;
entry->read_proc = mckernel_procfs_read;
quit:
f->status = 1; /* Now the peer can free the data. */
ihk_device_unmap_virtual(dev, f, sizeof(struct procfs_file));
@@ -216,7 +252,7 @@ void procfs_delete(void *__os, int osnum, unsigned long arg)
unsigned long parg;
struct procfs_file *f;
struct procfs_list_entry *e;
struct proc_dir_entry *parent = NULL;
struct procfs_list_entry *parent = NULL;
char name[PROCFS_NAME_MAX];
char *r;
unsigned long irqflags;
@@ -230,8 +266,10 @@ void procfs_delete(void *__os, int osnum, unsigned long arg)
if ((strncmp(e->fname, f->fname, PROCFS_NAME_MAX) == 0) &&
(e->osnum == osnum)) {
list_del(&e->list);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
e->entry->read_proc = NULL;
e->entry->data = NULL;
#endif
parent = e->parent;
kfree(e);
r = strrchr(f->fname, '/');
@@ -241,7 +279,7 @@ void procfs_delete(void *__os, int osnum, unsigned long arg)
strncpy(name, r + 1, PROCFS_NAME_MAX);
}
dprintk("found and remove %s from the list.\n", name);
remove_proc_entry(name, parent);
remove_proc_entry(name, parent->entry);
break;
}
}
@@ -271,27 +309,50 @@ void procfs_answer(unsigned int arg, int err)
* This function conforms to the 2) way of fs/proc/generic.c
* from linux-2.6.39.4.
*/
int mckernel_procfs_read(char *buffer, char **start, off_t offset,
int count, int *peof, void *dat)
static ssize_t
mckernel_procfs_read(struct file *file, char __user *buf, size_t nbytes,
loff_t *ppos)
{
struct procfs_list_entry *e = dat;
struct inode * inode = file->f_path.dentry->d_inode;
char *kern_buffer;
int order = 0;
volatile struct procfs_read *r;
struct ikc_scd_packet isp;
int ret, retrycount = 0;
unsigned long pbuf;
unsigned long count = nbytes;
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
struct proc_dir_entry *dp = PDE(inode);
struct procfs_list_entry *e = dp->data;
#else
struct procfs_list_entry *e = PDE_DATA(inode);
#endif
loff_t offset = *ppos;
dprintk("mckernel_procfs_read: invoked for %s\n", e->fname);
if (count <= 0 || dat == NULL || offset < 0) {
dprintk("mckernel_procfs_read: invoked for %s, offset: %lu, count: %d\n",
e->fname, offset, count);
if (count <= 0 || offset < 0) {
return 0;
}
pbuf = virt_to_phys(buffer);
if (pbuf / PAGE_SIZE != (pbuf + count - 1) / PAGE_SIZE) {
/* Truncate the read count upto the nearest page boundary */
count = ((pbuf + count - 1) / PAGE_SIZE) * PAGE_SIZE - pbuf;
while ((1 << order) < count) ++order;
if (order > 12) {
order -= 12;
}
else {
order = 1;
}
/* NOTE: we need physically contigous memory to pass through IKC */
kern_buffer = (char *)__get_free_pages(GFP_KERNEL, order);
if (!kern_buffer) {
printk("mckernel_procfs_read(): ERROR: allocating kernel buffer\n");
return -ENOMEM;
}
pbuf = virt_to_phys(kern_buffer);
r = kmalloc(sizeof(struct procfs_read), GFP_KERNEL);
if (r == NULL) {
return -ENOMEM;
@@ -309,18 +370,23 @@ retry:
isp.msg = SCD_MSG_PROCFS_REQUEST;
isp.ref = e->cpu;
isp.arg = virt_to_phys(r);
ret = mcctrl_ikc_send(e->os, e->cpu, &isp);
if (ret < 0) {
goto out; /* error */
}
/* Wait for a reply. */
ret = -EIO; /* default exit code */
dprintk("now wait for a relpy\n");
/* Wait for the status field of the procfs_read structure set ready. */
if (wait_event_interruptible_timeout(procfsq, r->status != 0, HZ) == 0) {
kprintf("ERROR: mckernel_procfs_read: timeout (1 sec).\n");
goto out;
}
/* Wake up and check the result. */
dprintk("mckernel_procfs_read: woke up. ret: %d, eof: %d\n", r->ret, r->eof);
if ((r->ret == 0) && (r->eof != 1)) {
@@ -335,13 +401,20 @@ retry:
dprintk("retry\n");
goto retry;
}
if (r->eof == 1) {
dprintk("reached end of file.\n");
*peof = 1;
if (r->ret > 0) {
if (copy_to_user(buf, kern_buffer, r->ret)) {
kprintf("ERROR: mckernel_procfs_read: copy_to_user failed.\n");
ret = -EFAULT;
goto out;
}
*ppos += r->ret;
}
*start = buffer;
ret = r->ret;
out:
free_pages((uintptr_t)kern_buffer, order);
kfree((void *)r);
return ret;
@@ -367,7 +440,7 @@ void procfs_exit(int osnum) {
int error;
mm_segment_t old_fs = get_fs();
struct kstat stat;
struct proc_dir_entry *parent;
struct procfs_list_entry *parent;
struct procfs_list_entry *e, *temp = NULL;
unsigned long irqflags;
@@ -378,8 +451,10 @@ void procfs_exit(int osnum) {
if (e->osnum == osnum) {
dprintk("found entry for %s.\n", e->fname);
list_del(&e->list);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
e->entry->read_proc = NULL;
e->entry->data = NULL;
#endif
parent = e->parent;
r = strrchr(e->fname, '/');
if (r == NULL) {
@@ -387,7 +462,9 @@ void procfs_exit(int osnum) {
} else {
r += 1;
}
remove_proc_entry(r, parent);
if (parent) {
remove_proc_entry(r, parent->entry);
}
dprintk("free the entry\n");
kfree(e);
}

View File

@@ -13,6 +13,8 @@
* Copyright (C) 2012 - 2013 Hitachi, Ltd.
* \author Balazs Gerofi <bgerofi@is.s.u-tokyo.ac.jp> \par
* Copyright (C) 2013 The University of Tokyo
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 RIKEN AICS
*/
/*
* HISTORY:
@@ -43,6 +45,7 @@
#include <asm/delay.h>
#include <asm/io.h>
#include "mcctrl.h"
#include <linux/version.h>
#define ALIGN_WAIT_BUF(z) (((z + 63) >> 6) << 6)
@@ -319,6 +322,109 @@ out:
return error;
}
#define RUS_PAGE_HASH_SHIFT 8
#define RUS_PAGE_HASH_SIZE (1UL << RUS_PAGE_HASH_SHIFT)
#define RUS_PAGE_HASH_MASK (RUS_PAGE_HASH_SIZE - 1)
struct list_head rus_page_hash[RUS_PAGE_HASH_SIZE];
spinlock_t rus_page_hash_lock;
struct rus_page {
struct list_head hash;
struct page *page;
int refcount;
int put_page;
};
void rus_page_hash_init(void)
{
int i;
spin_lock_init(&rus_page_hash_lock);
for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) {
INIT_LIST_HEAD(&rus_page_hash[i]);
}
}
/* rus_page_hash_lock must be held */
struct rus_page *_rus_page_hash_lookup(struct page *page)
{
struct rus_page *rp = NULL;
struct rus_page *rp_iter;
list_for_each_entry(rp_iter,
&rus_page_hash[page_to_pfn(page) & RUS_PAGE_HASH_MASK], hash) {
if (rp_iter->page != page)
continue;
rp = rp_iter;
break;
}
return rp;
}
static int rus_page_hash_insert(struct page *page)
{
int ret = 0;
struct rus_page *rp;
spin_lock(&rus_page_hash_lock);
rp = _rus_page_hash_lookup(page);
if (!rp) {
rp = kmalloc(sizeof(*rp), GFP_ATOMIC);
if (!rp) {
printk("rus_page_add_hash(): error allocating rp\n");
ret = -ENOMEM;
goto out;
}
rp->page = page;
rp->put_page = 0;
get_page(page);
rp->refcount = 0; /* Will be increased below */
list_add_tail(&rp->hash,
&rus_page_hash[page_to_pfn(page) & RUS_PAGE_HASH_MASK]);
}
++rp->refcount;
out:
spin_unlock(&rus_page_hash_lock);
return ret;
}
void rus_page_hash_put_pages(void)
{
int i;
struct rus_page *rp_iter;
struct rus_page *rp_iter_next;
spin_lock(&rus_page_hash_lock);
for (i = 0; i < RUS_PAGE_HASH_SIZE; ++i) {
list_for_each_entry_safe(rp_iter, rp_iter_next,
&rus_page_hash[i], hash) {
list_del(&rp_iter->hash);
put_page(rp_iter->page);
kfree(rp_iter);
}
}
spin_unlock(&rus_page_hash_lock);
}
/*
* By remap_pfn_range(), VM_PFN_AT_MMAP may be raised.
* VM_PFN_AT_MMAP cause the following problems.
@@ -329,6 +435,7 @@ out:
* These problems may be solved in linux-3.7.
* It uses vm_insert_pfn() until it is fixed.
*/
#define USE_VM_INSERT_PFN 1
static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
@@ -409,15 +516,11 @@ static int rus_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (pfn_valid(pfn+pix)) {
page = pfn_to_page(pfn+pix);
if (!page_count(page)) {
get_page(page);
/*
* TODO:
* The pages which get_page() has been called with
* should be recorded. Because these pages have to
* be passed to put_page() before they are freed.
*/
if ((error = rus_page_hash_insert(page)) < 0) {
printk("rus_vm_fault: error hashing page??\n");
}
error = vm_insert_page(vma, rva+(pix*PAGE_SIZE), page);
if (error) {
printk("vm_insert_page: %d\n", error);
@@ -448,7 +551,11 @@ static struct vm_operations_struct rus_vmops = {
static int rus_mmap(struct file *file, struct vm_area_struct *vma)
{
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0)
vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND | VM_MIXEDMAP;
#else
vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND | VM_MIXEDMAP;
#endif
vma->vm_ops = &rus_vmops;
return 0;
}
@@ -491,9 +598,18 @@ int reserve_user_space(struct mcctrl_usrdata *usrdata, unsigned long *startp, un
if (vma) {
end = (vma->vm_start - GAP_FOR_MCEXEC) & ~(GAP_FOR_MCEXEC - 1);
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
start = do_mmap_pgoff(file, 0, end,
PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0);
#endif
up_write(&current->mm->mmap_sem);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
start = vm_mmap(file, 0, end,
PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, 0);
#endif
revert_creds(original);
put_cred(promoted);
@@ -782,19 +898,19 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
error = vfs_fstat(fd, &st);
if (error) {
printk("pager_req_create(%d,%lx):vfs_stat failed. %d\n", fd, (long)result_pa, error);
dprintk("pager_req_create(%d,%lx):vfs_stat failed. %d\n", fd, (long)result_pa, error);
goto out;
}
if (!S_ISREG(st.mode)) {
error = -ESRCH;
printk("pager_req_create(%d,%lx):not VREG. %x\n", fd, (long)result_pa, st.mode);
dprintk("pager_req_create(%d,%lx):not VREG. %x\n", fd, (long)result_pa, st.mode);
goto out;
}
file = fget(fd);
if (!file) {
error = -EBADF;
printk("pager_req_create(%d,%lx):file not found. %d\n", fd, (long)result_pa, error);
dprintk("pager_req_create(%d,%lx):file not found. %d\n", fd, (long)result_pa, error);
goto out;
}
@@ -817,7 +933,7 @@ static int pager_req_create(ihk_os_t os, int fd, uintptr_t result_pa)
}
if (!(maxprot & PROT_READ)) {
error = -EACCES;
printk("pager_req_create(%d,%lx):cannot read file. %d\n", fd, (long)result_pa, error);
dprintk("pager_req_create(%d,%lx):cannot read file. %d\n", fd, (long)result_pa, error);
goto out;
}
@@ -1100,7 +1216,7 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r
struct pager_map_result *resp;
uintptr_t phys;
printk("pager_req_map(%p,%d,%lx,%lx,%lx)\n", os, fd, len, off, result_rpa);
dprintk("pager_req_map(%p,%d,%lx,%lx,%lx)\n", os, fd, len, off, result_rpa);
pager = kzalloc(sizeof(*pager), GFP_KERNEL);
if (!pager) {
error = -ENOMEM;
@@ -1128,8 +1244,17 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r
down_write(&current->mm->mmap_sem);
#define ANY_WHERE 0
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
va = do_mmap_pgoff(file, ANY_WHERE, len, maxprot, MAP_SHARED, pgoff);
#endif
up_write(&current->mm->mmap_sem);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
va = vm_mmap(file, ANY_WHERE, len, maxprot, MAP_SHARED, pgoff << PAGE_SHIFT);
#endif
if (IS_ERR_VALUE(va)) {
printk("pager_req_map(%p,%d,%lx,%lx,%lx):do_mmap_pgoff failed. %d\n", os, fd, len, off, result_rpa, (int)va);
error = va;
@@ -1140,6 +1265,9 @@ static int pager_req_map(ihk_os_t os, int fd, size_t len, off_t off, uintptr_t r
pager->map_uaddr = va;
pager->map_len = len;
pager->map_off = off;
dprintk("pager_req_map(%s): 0x%lx - 0x%lx (len: %lu)\n",
file->f_dentry->d_name.name, va, va + len, len);
phys = ihk_device_map_memory(dev, result_rpa, sizeof(*resp));
resp = ihk_device_map_virtual(dev, phys, sizeof(*resp), NULL, 0);
@@ -1158,10 +1286,11 @@ out:
if (pager) {
kfree(pager);
}
printk("pager_req_map(%p,%d,%lx,%lx,%lx): %d\n", os, fd, len, off, result_rpa, error);
dprintk("pager_req_map(%p,%d,%lx,%lx,%lx): %d\n", os, fd, len, off, result_rpa, error);
return error;
}
static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppfn_rpa)
{
const ihk_device_t dev = ihk_os_to_dev(os);
@@ -1176,7 +1305,7 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
uintptr_t phys;
uintptr_t *ppfn;
printk("pager_req_pfn(%p,%lx,%lx)\n", os, handle, off);
dprintk("pager_req_pfn(%p,%lx,%lx)\n", os, handle, off);
if ((off < pager->map_off) || ((pager->map_off+pager->map_len) < (off + PAGE_SIZE))) {
error = -ERANGE;
@@ -1201,6 +1330,12 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
pfn = (uintptr_t)pte_pfn(*pte) << PAGE_SHIFT;
#define PFN_PRESENT ((uintptr_t)1 << 0)
pfn |= PFN_VALID | PFN_PRESENT;
/* Check if mapping is write-combined */
if ((pte_flags(*pte) & _PAGE_PWT) &&
!(pte_flags(*pte) & _PAGE_PCD)) {
pfn |= _PAGE_PWT;
}
}
pte_unmap(pte);
}
@@ -1216,7 +1351,7 @@ static int pager_req_pfn(ihk_os_t os, uintptr_t handle, off_t off, uintptr_t ppf
error = 0;
out:
printk("pager_req_pfn(%p,%lx,%lx): %d %lx\n", os, handle, off, error, pfn);
dprintk("pager_req_pfn(%p,%lx,%lx): %d %lx\n", os, handle, off, error, pfn);
return error;
}
@@ -1225,11 +1360,15 @@ static int pager_req_unmap(ihk_os_t os, uintptr_t handle)
struct pager * const pager = (void *)handle;
int error;
printk("pager_req_unmap(%p,%lx)\n", os, handle);
dprintk("pager_req_unmap(%p,%lx)\n", os, handle);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
down_write(&current->mm->mmap_sem);
error = do_munmap(current->mm, pager->map_uaddr, pager->map_len);
up_write(&current->mm->mmap_sem);
#else
error = vm_munmap(pager->map_uaddr, pager->map_len);
#endif
if (error) {
printk("pager_req_unmap(%p,%lx):do_munmap failed. %d\n", os, handle, error);
@@ -1237,7 +1376,7 @@ static int pager_req_unmap(ihk_os_t os, uintptr_t handle)
}
kfree(pager);
printk("pager_req_unmap(%p,%lx): %d\n", os, handle, error);
dprintk("pager_req_unmap(%p,%lx): %d\n", os, handle, error);
return error;
}
@@ -1325,9 +1464,18 @@ static int remap_user_space(uintptr_t rva, size_t len, int prot)
start = rva;
pgoff = vma->vm_pgoff + ((rva - vma->vm_start) >> PAGE_SHIFT);
#if LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0)
map = do_mmap_pgoff(file, start, len,
prot, MAP_FIXED|MAP_SHARED, pgoff);
#endif
up_write(&mm->mmap_sem);
#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,5,0)
map = vm_mmap(file, start, len,
prot, MAP_FIXED|MAP_SHARED, pgoff << PAGE_SHIFT);
#endif
out:
dprintk("remap_user_space(%lx,%lx,%x): %lx (%ld)\n",
rva, len, prot, (long)map, (long)map);
@@ -1469,6 +1617,8 @@ fail:
return error;
}
#define SCHED_CHECK_SAME_OWNER 0x01
#define SCHED_CHECK_ROOT 0x02
int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall_request *sc)
{
@@ -1556,6 +1706,71 @@ int __do_in_kernel_syscall(ihk_os_t os, struct mcctrl_channel *c, struct syscall
error = writecore(os, sc->args[1], sc->args[0]);
ret = 0;
break;
case __NR_sched_setparam: {
switch (sc->args[0]) {
case SCHED_CHECK_SAME_OWNER: {
const struct cred *cred = current_cred();
const struct cred *pcred;
bool match;
struct task_struct *p;
int pid = sc->args[1];
rcu_read_lock();
p = pid_task(find_get_pid(pid), PIDTYPE_PID);
if (!p) {
rcu_read_unlock();
ret = -ESRCH;
goto sched_setparam_out;
}
rcu_read_unlock();
rcu_read_lock();
pcred = __task_cred(p);
#if LINUX_VERSION_CODE > KERNEL_VERSION(3,4,0)
match = (uid_eq(cred->euid, pcred->euid) ||
uid_eq(cred->euid, pcred->uid));
#else
match = ((cred->euid == pcred->euid) ||
(cred->euid == pcred->uid));
#endif
rcu_read_unlock();
if (match) {
ret = 0;
}
else {
ret = -EPERM;
}
break;
}
case SCHED_CHECK_ROOT: {
const struct cred *cred = current_cred();
bool match;
#if LINUX_VERSION_CODE > KERNEL_VERSION(3,4,0)
match = uid_eq(cred->euid, GLOBAL_ROOT_UID);
#else
match = (cred->euid == 0);
#endif
if (match) {
ret = 0;
}
else {
ret = -EPERM;
}
break;
}
}
sched_setparam_out:
break;
}
default:
error = -ENOSYS;

View File

@@ -40,7 +40,6 @@
#include <ctype.h>
#include <sys/mman.h>
#include <asm/unistd.h>
#include "../include/uprotocol.h"
#include <sched.h>
#include <termios.h>
@@ -49,6 +48,7 @@
#include <sys/stat.h>
#include <sys/resource.h>
#include <sys/utsname.h>
#include <sys/fsuid.h>
#include <time.h>
#include <sys/time.h>
#include <signal.h>
@@ -56,7 +56,10 @@
#include <dirent.h>
#include <sys/syscall.h>
#include <pthread.h>
#include <semaphore.h>
#include <signal.h>
#include <sys/signalfd.h>
#include "../include/uprotocol.h"
//#define DEBUG
@@ -97,6 +100,13 @@ typedef unsigned char cc_t;
typedef unsigned int speed_t;
typedef unsigned int tcflag_t;
struct sigfd {
struct sigfd *next;
int sigpipe[2];
};
struct sigfd *sigfdtop;
#ifdef NCCS
#undef NCCS
#endif
@@ -111,14 +121,29 @@ struct kernel_termios {
cc_t c_cc[NCCS]; /* control characters */
};
int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid);
int main_loop(int fd, int cpu, pthread_mutex_t *lock);
static int mcosid;
static int fd;
static char *exec_path = NULL;
static char *altroot;
static const char rlimit_stack_envname[] = "MCKERNEL_RLIMIT_STACK";
static int ischild;
struct fork_sync {
pid_t pid;
int status;
sem_t sem;
};
struct fork_sync_container {
struct fork_sync_container *next;
struct fork_sync *fs;
};
struct fork_sync_container *fork_sync_top;
pthread_mutex_t fork_sync_mutex = PTHREAD_MUTEX_INITIALIZER;
pid_t gettid(void)
{
return syscall(SYS_gettid);
@@ -158,6 +183,7 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp)
desc = malloc(sizeof(struct program_load_desc)
+ sizeof(struct program_image_section) * nhdrs);
desc->shell_path[0] = '\0';
fseek(fp, hdr.e_phoff, SEEK_SET);
j = 0;
desc->num_sections = nhdrs;
@@ -218,7 +244,7 @@ struct program_load_desc *load_elf(FILE *fp, char **interp_pathp)
desc->pid = getpid();
desc->pgid = getpgid(0);
desc->entry = hdr.e_entry;
ioctl(fd, MCEXEC_UP_GET_CREDV, desc->cred);
desc->at_phdr = load_addr + hdr.e_phoff;
desc->at_phent = sizeof(phdr);
desc->at_phnum = hdr.e_phnum;
@@ -546,11 +572,32 @@ int load_elf_desc(char *filename, struct program_load_desc **desc_p,
/* Drop old name if exists */
if (exec_path) {
free(exec_path);
exec_path = NULL;
}
exec_path = strdup(filename);
if (!exec_path) {
fprintf(stderr, "WARNING: strdup(filename) failed\n");
if (!strncmp("/", filename, 1)) {
exec_path = strdup(filename);
if (!exec_path) {
fprintf(stderr, "WARNING: strdup(filename) failed\n");
return ENOMEM;
}
}
else {
char *cwd = getcwd(NULL, 0);
if (!cwd) {
fprintf(stderr, "Error: getting current working dir pathname\n");
return ENOMEM;
}
exec_path = malloc(strlen(cwd) + strlen(filename) + 2);
if (!exec_path) {
fprintf(stderr, "Error: allocating exec_path\n");
return ENOMEM;
}
sprintf(exec_path, "%s/%s", cwd, filename);
free(cwd);
}
desc = load_elf(fp, &interp_path);
@@ -764,7 +811,6 @@ struct thread_data_s {
pthread_t thread_id;
int fd;
int cpu;
int mcosid;
int ret;
pid_t tid;
int terminate;
@@ -785,11 +831,13 @@ static void *main_loop_thread_func(void *arg)
td->tid = gettid();
td->remote_tid = (int)td->tid;
pthread_barrier_wait(&init_ready);
td->ret = main_loop(td->fd, td->cpu, td->lock, td->mcosid);
td->ret = main_loop(td->fd, td->cpu, td->lock);
return NULL;
}
#define LOCALSIG SIGURG
void
sendsig(int sig, siginfo_t *siginfo, void *context)
{
@@ -801,7 +849,10 @@ sendsig(int sig, siginfo_t *siginfo, void *context)
struct signal_desc sigdesc;
if(siginfo->si_pid == pid &&
siginfo->si_signo == SIGINT)
siginfo->si_signo == LOCALSIG)
return;
if(siginfo->si_signo == SIGCHLD)
return;
for(i = 0; i < ncpu; i++){
@@ -839,6 +890,94 @@ sendsig(int sig, siginfo_t *siginfo, void *context)
}
}
long
act_signalfd4(struct syscall_wait_desc *w)
{
struct sigfd *sfd;
struct sigfd *sb;
int mode = w->sr.args[0];
int flags;
int tmp;
int rc = 0;
struct signalfd_siginfo *info;
switch(mode){
case 0: /* new signalfd */
sfd = malloc(sizeof(struct sigfd));
tmp = w->sr.args[1];
flags = 0;
if(tmp & SFD_NONBLOCK)
flags |= O_NONBLOCK;
if(tmp & SFD_CLOEXEC)
flags |= O_CLOEXEC;
pipe2(sfd->sigpipe, flags);
sfd->next = sigfdtop;
sigfdtop = sfd;
rc = sfd->sigpipe[0];
break;
case 1: /* close signalfd */
tmp = w->sr.args[1];
for(sfd = sigfdtop, sb = NULL; sfd; sb = sfd, sfd = sfd->next)
if(sfd->sigpipe[0] == tmp)
break;
if(!sfd)
rc = -EBADF;
else{
if(sb)
sb->next = sfd->next;
else
sigfdtop = sfd->next;
close(sfd->sigpipe[0]);
close(sfd->sigpipe[1]);
free(sfd);
}
break;
case 2: /* push signal */
tmp = w->sr.args[1];
for(sfd = sigfdtop; sfd; sfd = sfd->next)
if(sfd->sigpipe[0] == tmp)
break;
if(!sfd)
rc = -EBADF;
else{
info = (struct signalfd_siginfo *)w->sr.args[2];
write(sfd->sigpipe[1], info, sizeof(struct signalfd_siginfo));
}
break;
}
return rc;
}
void
act_sigaction(struct syscall_wait_desc *w)
{
struct sigaction act;
int sig;
sig = w->sr.args[0];
if (sig == SIGCHLD || sig == LOCALSIG)
return;
memset(&act, '\0', sizeof act);
if (w->sr.args[1] == (unsigned long)SIG_IGN)
act.sa_handler = SIG_IGN;
else{
act.sa_sigaction = sendsig;
act.sa_flags = SA_SIGINFO;
}
sigaction(sig, &act, NULL);
}
void
act_sigprocmask(struct syscall_wait_desc *w)
{
sigset_t set;
sigemptyset(&set);
memcpy(&set, &w->sr.args[0], sizeof(unsigned long));
sigdelset(&set, LOCALSIG);
sigprocmask(SIG_SETMASK, &set, NULL);
}
static int reduce_stack(struct rlimit *orig_rlim, char *argv[])
{
int n;
@@ -891,8 +1030,7 @@ void init_sigaction(void)
master_tid = gettid();
for (i = 1; i <= 64; i++) {
if (i != SIGCHLD && i != SIGCONT && i != SIGSTOP &&
i != SIGTSTP && i != SIGTTIN && i != SIGTTOU) {
if (i != SIGKILL && i != SIGSTOP && i != SIGCHLD) {
struct sigaction act;
sigaction(i, NULL, &act);
@@ -904,7 +1042,7 @@ void init_sigaction(void)
}
}
void init_worker_threads(int fd, int mcosid)
void init_worker_threads(int fd)
{
int i;
@@ -916,7 +1054,6 @@ void init_worker_threads(int fd, int mcosid)
thread_data[i].fd = fd;
thread_data[i].cpu = i;
thread_data[i].mcosid = mcosid;
thread_data[i].lock = &lock;
thread_data[i].init_ready = &init_ready;
thread_data[i].terminate = 0;
@@ -930,7 +1067,75 @@ void init_worker_threads(int fd, int mcosid)
}
pthread_barrier_wait(&init_ready);
}
}
#define MCK_RLIMIT_AS 0
#define MCK_RLIMIT_CORE 1
#define MCK_RLIMIT_CPU 2
#define MCK_RLIMIT_DATA 3
#define MCK_RLIMIT_FSIZE 4
#define MCK_RLIMIT_LOCKS 5
#define MCK_RLIMIT_MEMLOCK 6
#define MCK_RLIMIT_MSGQUEUE 7
#define MCK_RLIMIT_NICE 8
#define MCK_RLIMIT_NOFILE 9
#define MCK_RLIMIT_NPROC 10
#define MCK_RLIMIT_RSS 11
#define MCK_RLIMIT_RTPRIO 12
#define MCK_RLIMIT_RTTIME 13
#define MCK_RLIMIT_SIGPENDING 14
#define MCK_RLIMIT_STACK 15
static int rlimits[] = {
#ifdef RLIMIT_AS
RLIMIT_AS, MCK_RLIMIT_AS,
#endif
#ifdef RLIMIT_CORE
RLIMIT_CORE, MCK_RLIMIT_CORE,
#endif
#ifdef RLIMIT_CPU
RLIMIT_CPU, MCK_RLIMIT_CPU,
#endif
#ifdef RLIMIT_DATA
RLIMIT_DATA, MCK_RLIMIT_DATA,
#endif
#ifdef RLIMIT_FSIZE
RLIMIT_FSIZE, MCK_RLIMIT_FSIZE,
#endif
#ifdef RLIMIT_LOCKS
RLIMIT_LOCKS, MCK_RLIMIT_LOCKS,
#endif
#ifdef RLIMIT_MEMLOCK
RLIMIT_MEMLOCK, MCK_RLIMIT_MEMLOCK,
#endif
#ifdef RLIMIT_MSGQUEUE
RLIMIT_MSGQUEUE,MCK_RLIMIT_MSGQUEUE,
#endif
#ifdef RLIMIT_NICE
RLIMIT_NICE, MCK_RLIMIT_NICE,
#endif
#ifdef RLIMIT_NOFILE
RLIMIT_NOFILE, MCK_RLIMIT_NOFILE,
#endif
#ifdef RLIMIT_NPROC
RLIMIT_NPROC, MCK_RLIMIT_NPROC,
#endif
#ifdef RLIMIT_RSS
RLIMIT_RSS, MCK_RLIMIT_RSS,
#endif
#ifdef RLIMIT_RTPRIO
RLIMIT_RTPRIO, MCK_RLIMIT_RTPRIO,
#endif
#ifdef RLIMIT_RTTIME
RLIMIT_RTTIME, MCK_RLIMIT_RTTIME,
#endif
#ifdef RLIMIT_SIGPENDING
RLIMIT_SIGPENDING,MCK_RLIMIT_SIGPENDING,
#endif
#ifdef RLIMIT_STACK
RLIMIT_STACK, MCK_RLIMIT_STACK,
#endif
};
char dev[64];
@@ -952,7 +1157,6 @@ int main(int argc, char **argv)
unsigned long lcur;
unsigned long lmax;
int target_core = 0;
int mcosid = 0;
int opt;
char path[1024];
char *shell = NULL;
@@ -1056,7 +1260,9 @@ int main(int argc, char **argv)
if (shell) {
argv[optind] = path;
}
for(i = 0; i < sizeof(rlimits) / sizeof(int); i += 2)
getrlimit(rlimits[i], &desc->rlimit[rlimits[i + 1]]);
desc->envs_len = envs_len;
desc->envs = envs;
//print_flat(envs);
@@ -1091,8 +1297,8 @@ int main(int argc, char **argv)
rlim_stack.rlim_cur = lcur;
rlim_stack.rlim_max = lmax;
}
desc->rlimit_stack_cur = rlim_stack.rlim_cur;
desc->rlimit_stack_max = rlim_stack.rlim_max;
desc->rlimit[MCK_RLIMIT_STACK].rlim_cur = rlim_stack.rlim_cur;
desc->rlimit[MCK_RLIMIT_STACK].rlim_max = rlim_stack.rlim_max;
ncpu = ioctl(fd, MCEXEC_UP_GET_CPU, 0);
if(ncpu == -1){
@@ -1173,7 +1379,7 @@ int main(int argc, char **argv)
init_sigaction();
init_worker_threads(fd, mcosid);
init_worker_threads(fd);
if (ioctl(fd, MCEXEC_UP_START_IMAGE, (unsigned long)desc) != 0) {
perror("exec");
@@ -1244,13 +1450,13 @@ static void
kill_thread(unsigned long cpu)
{
if(cpu >= 0 && cpu < ncpu){
pthread_kill(thread_data[cpu].thread_id, SIGINT);
pthread_kill(thread_data[cpu].thread_id, LOCALSIG);
}
else{
int i;
for (i = 0; i < ncpu; ++i) {
pthread_kill(thread_data[i].thread_id, SIGINT);
pthread_kill(thread_data[i].thread_id, LOCALSIG);
}
}
}
@@ -1351,7 +1557,32 @@ int close_cloexec_fds(int mcos_fd)
return 0;
}
int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
char *
chgpath(char *in, char *buf)
{
char *fn = in;
struct stat sb;
if (!strncmp(fn, "/proc/self/", 11)){
sprintf(buf, "/proc/mcos%d/%d/%s", mcosid, getpid(), fn + 11);
fn = buf;
}
else if(!strncmp(fn, "/proc/", 6)){
sprintf(buf, "/proc/mcos%d/%s", mcosid, fn + 6);
fn = buf;
}
else if(!strcmp(fn, "/sys/devices/system/cpu/online")){
fn = "/admin/fs/attached/files/sys/devices/system/cpu/online";
}
else
return in;
if(stat(fn, &sb) == -1)
return in;
return fn;
}
int main_loop(int fd, int cpu, pthread_mutex_t *lock)
{
struct syscall_wait_desc w;
long ret;
@@ -1389,14 +1620,8 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
}
__dprintf("open: %s\n", pathbuf);
fn = pathbuf;
if(!strncmp(fn, "/proc/", 6)){
sprintf(tmpbuf, "/proc/mcos%d/%s", mcosid, fn + 6);
fn = tmpbuf;
}
else if(!strcmp(fn, "/sys/devices/system/cpu/online")){
fn = "/admin/fs/attached/files/sys/devices/system/cpu/online";
}
fn = chgpath(pathbuf, tmpbuf);
ret = open(fn, w.sr.args[1], w.sr.args[2]);
SET_ERR(ret);
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
@@ -1505,113 +1730,157 @@ int main_loop(int fd, int cpu, pthread_mutex_t *lock, int mcosid)
}
case __NR_fork: {
int child;
int sync_pipe_fd[2];
char sync_msg;
struct fork_sync *fs;
struct fork_sync_container *fsc;
struct fork_sync_container *fp;
struct fork_sync_container *fb;
int rc = -1;
pid_t pid;
if (pipe(sync_pipe_fd) != 0) {
fprintf(stderr, "fork(): error creating sync pipe\n");
do_syscall_return(fd, cpu, -1, 0, 0, 0, 0);
fsc = malloc(sizeof(struct fork_sync_container));
memset(fsc, '\0', sizeof(struct fork_sync_container));
pthread_mutex_lock(&fork_sync_mutex);
fsc->next = fork_sync_top;
fork_sync_top = fsc;
pthread_mutex_unlock(&fork_sync_mutex);
fsc->fs = fs = mmap(NULL, sizeof(struct fork_sync),
PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if(fs == (void *)-1){
goto fork_err;
}
memset(fs, '\0', sizeof(struct fork_sync));
sem_init(&fs->sem, 1, 0);
pid = fork();
switch (pid) {
/* Error */
case -1:
fprintf(stderr, "fork(): error forking child process\n");
rc = -errno;
break;
/* Child process */
case 0: {
int i;
int ret = 1;
struct newprocess_desc npdesc;
ischild = 1;
/* Reopen device fd */
close(fd);
fd = open(dev, O_RDWR);
if (fd < 0) {
fs->status = -errno;
fprintf(stderr, "ERROR: opening %s\n", dev);
goto fork_child_sync_pipe;
}
/* Reinit signals and syscall threads */
init_sigaction();
init_worker_threads(fd);
__dprintf("pid(%d): signals and syscall threads OK\n",
getpid());
/* Hold executable also in the child process */
if ((ret = ioctl(fd, MCEXEC_UP_OPEN_EXEC, exec_path))
!= 0) {
fprintf(stderr, "Error: open_exec() fails for %s: %d (fd: %d)\n",
exec_path, ret, fd);
fs->status = -errno;
goto fork_child_sync_pipe;
}
fork_child_sync_pipe:
sem_post(&fs->sem);
if (fs->status)
exit(1);
for (fp = fork_sync_top; fp;) {
fb = fp->next;
if (fp->fs)
munmap(fp->fs, sizeof(struct fork_sync));
free(fp);
fp = fb;
}
fork_sync_top = NULL;
pthread_mutex_init(&fork_sync_mutex, NULL);
npdesc.pid = getpid();
ioctl(fd, MCEXEC_UP_NEW_PROCESS, &npdesc);
/* TODO: does the forked thread run in a pthread context? */
for (i = 0; i <= ncpu; ++i) {
pthread_join(thread_data[i].thread_id, NULL);
}
return ret;
}
/* Parent */
default:
fs->pid = pid;
while ((rc = sem_trywait(&fs->sem)) == -1 && (errno == EAGAIN || errno == EINTR)) {
int st;
int wrc;
wrc = waitpid(pid, &st, WNOHANG);
if(wrc == pid) {
fs->status = -ENOMEM;
break;
}
sched_yield();
}
if (fs->status != 0) {
fprintf(stderr, "fork(): error with child process after fork\n");
rc = fs->status;
break;
}
rc = pid;
break;
}
child = fork();
switch (child) {
/* Error */
case -1:
fprintf(stderr, "fork(): error forking child process\n");
close(sync_pipe_fd[0]);
close(sync_pipe_fd[1]);
do_syscall_return(fd, cpu, -1, 0, 0, 0, 0);
break;
/* Child process */
case 0: {
int i;
int ret = 1;
ischild = 1;
/* Reopen device fd */
close(fd);
fd = open(dev, O_RDWR);
if (fd < 0) {
/* TODO: tell parent something went wrong? */
fprintf(stderr, "ERROR: opening %s\n", dev);
/* Tell parent something went wrong */
sync_msg = 1;
goto fork_child_sync_pipe;
}
/* Reinit signals and syscall threads */
init_sigaction();
init_worker_threads(fd, mcosid);
__dprintf("pid(%d): signals and syscall threads OK\n",
getpid());
/* Hold executable also in the child process */
if ((ret = ioctl(fd, MCEXEC_UP_OPEN_EXEC, exec_path))
!= 0) {
fprintf(stderr, "Error: open_exec() fails for %s: %d (fd: %d)\n",
exec_path, ret, fd);
goto fork_child_sync_pipe;
}
/* Tell parent everything went OK */
sync_msg = 0;
fork_child_sync_pipe:
if (write(sync_pipe_fd[1], &sync_msg, 1) != 1) {
fprintf(stderr, "ERROR: writing sync pipe\n");
goto fork_child_out;
}
ret = 0;
fork_child_out:
close(sync_pipe_fd[0]);
close(sync_pipe_fd[1]);
/* TODO: does the forked thread run in a pthread context? */
for (i = 0; i <= ncpu; ++i) {
pthread_join(thread_data[i].thread_id, NULL);
}
return ret;
}
/* Parent */
default:
if (read(sync_pipe_fd[0], &sync_msg, 1) != 1) {
fprintf(stderr, "fork(): error reading sync message\n");
child = -1;
goto sync_out;
}
if (sync_msg != 0) {
fprintf(stderr, "fork(): error with child process after fork\n");
child = -1;
goto sync_out;
}
sync_out:
close(sync_pipe_fd[0]);
close(sync_pipe_fd[1]);
do_syscall_return(fd, cpu, child, 0, 0, 0, 0);
sem_destroy(&fs->sem);
munmap(fs, sizeof(struct fork_sync));
fork_err:
pthread_mutex_lock(&fork_sync_mutex);
for(fp = fork_sync_top, fb = NULL; fp; fb = fp, fp = fp->next)
if(fp == fsc)
break;
if(fp){
if(fb)
fb->next = fsc->next;
else
fork_sync_top = fsc->next;
}
pthread_mutex_unlock(&fork_sync_mutex);
do_syscall_return(fd, cpu, rc, 0, 0, 0, 0);
break;
}
case __NR_wait4: {
int status;
int ret;
pid_t pid = w.sr.args[0];
int options = w.sr.args[2];
siginfo_t info;
int opt;
if ((ret = waitpid(pid, &status, 0)) != pid) {
fprintf(stderr, "ERROR: waiting for %lu\n", w.sr.args[0]);
opt = WEXITED | (options & WNOWAIT);
memset(&info, '\0', sizeof info);
while((ret = waitid(P_PID, pid, &info, opt)) == -1 &&
errno == EINTR);
if(ret == 0){
ret = info.si_pid;
}
if(ret != pid) {
fprintf(stderr, "ERROR: waiting for %lu rc=%d errno=%d\n", w.sr.args[0], ret, errno);
}
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
@@ -1747,6 +2016,32 @@ return_execve2:
break;
}
case __NR_signalfd4:
ret = act_signalfd4(&w);
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
case __NR_rt_sigaction:
act_sigaction(&w);
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
break;
case __NR_rt_sigprocmask:
act_sigprocmask(&w);
do_syscall_return(fd, cpu, 0, 0, 0, 0, 0);
break;
case __NR_setfsuid:
if(w.sr.args[1] == 1){
ioctl(fd, MCEXEC_UP_GET_CRED, w.sr.args[0]);
ret = 0;
}
else{
ret = setfsuid(w.sr.args[0]);
}
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
case __NR_close:
if(w.sr.args[0] == fd)
ret = -EBADF;
@@ -1756,8 +2051,8 @@ return_execve2:
break;
default:
ret = do_generic_syscall(&w);
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
ret = do_generic_syscall(&w);
do_syscall_return(fd, cpu, ret, 0, 0, 0, 0);
break;
}

View File

@@ -6,7 +6,7 @@ OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o
OBJS += zeroobj.o procfs.o devobj.o
DEPSRCS=$(wildcard $(SRC)/*.c)
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__
CFLAGS += -I$(SRC)/include -mcmodel=kernel -D__KERNEL__ -g
LDFLAGS += -e arch_start
IHKOBJ = ihk/ihk.o

View File

@@ -24,18 +24,21 @@
#include <process.h>
#include <init.h>
#include <march.h>
#include <cls.h>
int num_processors = 1;
static volatile int ap_stop = 1;
extern void zero_tsc(void);
static void ap_wait(void)
{
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
while (ap_stop) {
barrier();
cpu_pause();
}
zero_tsc();
kmalloc_init();
sched_init();
@@ -64,8 +67,6 @@ void ap_init(void)
ihk_mc_init_ap();
wrmsr(MSR_IA32_TIME_STAMP_COUNTER, 0);
cpu_info = ihk_mc_get_cpu_info();
bsp_hw_id = ihk_mc_get_hardware_processor_id();
@@ -74,18 +75,17 @@ void ap_init(void)
return;
}
kprintf("BSP HW ID = %d, ", bsp_hw_id);
kprintf("AP Booting :");
kprintf("BSP HW ID = %d\n", bsp_hw_id);
for (i = 0; i < cpu_info->ncpus; i++) {
if (cpu_info->hw_ids[i] == bsp_hw_id) {
continue;
}
kprintf("AP Booting: %d (HW ID: %d)\n", i, cpu_info->hw_ids[i]);
ihk_mc_boot_cpu(cpu_info->hw_ids[i], (unsigned long)ap_wait);
kprintf(" %d", cpu_info->hw_ids[i]);
num_processors++;
}
kprintf(" .. Done\n");
kprintf("AP Booting: Done\n");
}

View File

@@ -23,6 +23,7 @@
extern int num_processors;
struct cpu_local_var *clv;
static int cpu_local_var_initialized = 0;
void cpu_local_var_init(void)
{
@@ -33,9 +34,22 @@ void cpu_local_var_init(void)
clv = allocate_pages(z, IHK_MC_AP_CRITICAL);
memset(clv, 0, z * PAGE_SIZE);
cpu_local_var_initialized = 1;
}
struct cpu_local_var *get_cpu_local_var(int id)
{
return clv + id;
}
void preempt_enable(void)
{
if (cpu_local_var_initialized)
--cpu_local_var(no_preempt);
}
void preempt_disable(void)
{
if (cpu_local_var_initialized)
++cpu_local_var(no_preempt);
}

View File

@@ -26,10 +26,14 @@ SECTIONS
. = vsyscall_page + 0x000;
*(.vsyscall.gettimeofday)
*(.vsyscall.gettimeofday.*)
. = vsyscall_page + 0x400;
*(.vsyscall.time)
. = vsyscall_page + 0x800;
*(.vsyscall.getcpu)
. = ALIGN(4096);
} : data = 0xf4

View File

@@ -26,10 +26,14 @@ SECTIONS
. = vsyscall_page + 0x000;
*(.vsyscall.gettimeofday)
*(.vsyscall.gettimeofday.*)
. = vsyscall_page + 0x400;
*(.vsyscall.time)
. = vsyscall_page + 0x800;
*(.vsyscall.getcpu)
. = ALIGN(4096);
} : data = 0xf4

View File

@@ -26,10 +26,14 @@ SECTIONS
. = vsyscall_page + 0x000;
*(.vsyscall.gettimeofday)
*(.vsyscall.gettimeofday.*)
. = vsyscall_page + 0x400;
*(.vsyscall.time)
. = vsyscall_page + 0x800;
*(.vsyscall.getcpu)
. = ALIGN(4096);
} : data = 0xf4
@@ -39,8 +43,4 @@ SECTIONS
. = ALIGN(4096);
_end = .;
/DISCARD/ : {
*(.eh_frame)
*(.note.gnu.build-id)
}
}

View File

@@ -0,0 +1,2 @@
CFLAGS += -mno-sse -mno-mmx -mno-sse2 -mno-3dnow
LDFLAGS += -T $(SRC)/config/smp-x86.lds

45
kernel/config/smp-x86.lds Normal file
View File

@@ -0,0 +1,45 @@
PHDRS
{
text PT_LOAD FLAGS(5);
data PT_LOAD FLAGS(7);
}
SECTIONS
{
. = 0xffffffff80001000;
_head = .;
.text : {
*(.text);
} : text
. = ALIGN(4096);
.data : {
*(.data)
*(.data.*)
} :data
.rodata : {
*(.rodata .rodata.*)
} :data
.vsyscall : ALIGN(0x1000) {
vsyscall_page = .;
. = vsyscall_page + 0x000;
*(.vsyscall.gettimeofday)
*(.vsyscall.gettimeofday.*)
. = vsyscall_page + 0x400;
*(.vsyscall.time)
. = vsyscall_page + 0x800;
*(.vsyscall.getcpu)
. = ALIGN(4096);
} : data = 0xf4
.bss : {
*(.bss .bss.*)
}
. = ALIGN(4096);
_end = .;
}

View File

@@ -28,7 +28,7 @@ void kputs(char *buf)
int len = strlen(buf);
unsigned long flags;
flags = ihk_mc_spinlock_lock(&kmsg_lock);
flags = __ihk_mc_spinlock_lock(&kmsg_lock);
if (len + kmsg_buf.tail > kmsg_buf.len) {
kmsg_buf.tail = 0;
@@ -40,19 +40,19 @@ void kputs(char *buf)
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
kmsg_buf.tail += len;
ihk_mc_spinlock_unlock(&kmsg_lock, flags);
__ihk_mc_spinlock_unlock(&kmsg_lock, flags);
}
#define KPRINTF_LOCAL_BUF_LEN 1024
int kprintf_lock()
unsigned long kprintf_lock(void)
{
return ihk_mc_spinlock_lock(&kmsg_lock);
return __ihk_mc_spinlock_lock(&kmsg_lock);
}
void kprintf_unlock(int irqflags)
void kprintf_unlock(unsigned long irqflags)
{
ihk_mc_spinlock_unlock(&kmsg_lock, irqflags);
__ihk_mc_spinlock_unlock(&kmsg_lock, irqflags);
}
/* Caller must hold kmsg_lock! */
@@ -85,7 +85,7 @@ int kprintf(const char *format, ...)
unsigned long flags;
char buf[KPRINTF_LOCAL_BUF_LEN];
flags = ihk_mc_spinlock_lock(&kmsg_lock);
flags = __ihk_mc_spinlock_lock(&kmsg_lock);
/* Copy into the local buf */
len = sprintf(buf, "[%3d]: ", ihk_mc_get_processor_id());
@@ -101,7 +101,7 @@ int kprintf(const char *format, ...)
memcpy(kmsg_buf.str + kmsg_buf.tail, buf, len);
kmsg_buf.tail += len;
ihk_mc_spinlock_unlock(&kmsg_lock, flags);
__ihk_mc_spinlock_unlock(&kmsg_lock, flags);
return len;
}

View File

@@ -3,7 +3,8 @@
* License details are found in the file LICENSE.
* \brief
* memory mapped device pager client
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 RIKEN AICS
*/
/*
* HISTORY:
@@ -32,9 +33,18 @@
#include <pager.h>
#include <string.h>
#include <syscall.h>
#include <process.h>
//#define DEBUG_PRINT_DEVOBJ
#ifdef DEBUG_PRINT_DEVOBJ
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
#define dkprintf(...)
#define ekprintf(...) kprintf(__VA_ARGS__)
struct devobj {
struct memobj memobj; /* must be first */
@@ -76,7 +86,7 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
struct devobj *obj = NULL;
const size_t npages = (len + PAGE_SIZE - 1) / PAGE_SIZE;
kprintf("devobj_create(%d,%lx,%lx)\n", fd, len, off);
dkprintf("devobj_create(%d,%lx,%lx)\n", fd, len, off);
#define MAX_PAGES_IN_DEVOBJ (PAGE_SIZE / sizeof(uintptr_t))
if (npages > MAX_PAGES_IN_DEVOBJ) {
error = -EFBIG;
@@ -111,8 +121,8 @@ int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxp
kprintf("devobj_create(%d,%lx,%lx):map failed. %d\n", fd, len, off, error);
goto out;
}
kprintf("devobj_create:handle: %lx\n", result.handle);
kprintf("devobj_create:maxprot: %x\n", result.maxprot);
dkprintf("devobj_create:handle: %lx\n", result.handle);
dkprintf("devobj_create:maxprot: %x\n", result.maxprot);
obj->memobj.ops = &devobj_ops;
obj->memobj.flags = MF_HAS_PAGER;
@@ -134,7 +144,7 @@ out:
}
kfree(obj);
}
kprintf("devobj_create(%d,%lx,%lx): %d %p %x%d\n", fd, len, off, error, *objp, *maxprotp);
dkprintf("devobj_create(%d,%lx,%lx): %d %p %x%d\n", fd, len, off, error, *objp, *maxprotp);
return error;
}
@@ -142,7 +152,7 @@ static void devobj_ref(struct memobj *memobj)
{
struct devobj *obj = to_devobj(memobj);
kprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
dkprintf("devobj_ref(%p %lx):\n", obj, obj->handle);
memobj_lock(&obj->memobj);
++obj->ref;
memobj_unlock(&obj->memobj);
@@ -155,7 +165,7 @@ static void devobj_release(struct memobj *memobj)
struct devobj *free_obj = NULL;
uintptr_t handle;
kprintf("devobj_release(%p %lx)\n", obj, obj->handle);
dkprintf("devobj_release(%p %lx)\n", obj, obj->handle);
memobj_lock(&obj->memobj);
--obj->ref;
@@ -187,12 +197,12 @@ static void devobj_release(struct memobj *memobj)
kfree(free_obj);
}
kprintf("devobj_release(%p %lx):free %p\n",
dkprintf("devobj_release(%p %lx):free %p\n",
obj, handle, free_obj);
return;
}
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp)
static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag)
{
const off_t pgoff = off >> PAGE_SHIFT;
struct devobj *obj = to_devobj(memobj);
@@ -202,7 +212,7 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
ihk_mc_user_context_t ctx;
int ix;
kprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align);
dkprintf("devobj_get_page(%p %lx,%lx,%d)\n", memobj, obj->handle, off, p2align);
if ((pgoff < obj->pfn_pgoff) || ((obj->pfn_pgoff + obj->npages) <= pgoff)) {
error = -EFBIG;
@@ -210,7 +220,7 @@ static int devobj_get_page(struct memobj *memobj, off_t off, int p2align, uintpt
goto out;
}
ix = pgoff - obj->pfn_pgoff;
kprintf("ix: %ld\n", ix);
dkprintf("ix: %ld\n", ix);
memobj_lock(&obj->memobj);
pfn = obj->pfn_table[ix];
@@ -230,12 +240,20 @@ kprintf("ix: %ld\n", ix);
if (pfn & PFN_PRESENT) {
/* convert remote physical into local physical */
kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT before %#lx\n", memobj, obj->handle, off, p2align, pfn);
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT before %#lx\n", memobj, obj->handle, off, p2align, pfn);
attr = pfn & ~PFN_PFN;
/* TODO: do an arch dependent PTE to mapping flag conversion
* instead of this inline check, also, we rely on having the
* same PAT config as Linux here.. */
if ((pfn & PFL1_PWT) && !(pfn & PFL1_PCD)) {
*flag |= VR_WRITE_COMBINED;
}
pfn = ihk_mc_map_memory(NULL, (pfn & PFN_PFN), PAGE_SIZE);
pfn &= PFN_PFN;
pfn |= attr;
kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
dkprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->handle, off, p2align, pfn);
}
memobj_lock(&obj->memobj);
@@ -253,6 +271,6 @@ kprintf("devobj_get_page(%p %lx,%lx,%d):PFN_PRESENT after %#lx\n", memobj, obj->
*physp = pfn & PFN_PFN;
out:
kprintf("devobj_get_page(%p %lx,%lx,%d): %d %lx\n", memobj, obj->handle, off, p2align, error, *physp);
dkprintf("devobj_get_page(%p %lx,%lx,%d): %d %lx\n", memobj, obj->handle, off, p2align, error, *physp);
return error;
}

View File

@@ -26,7 +26,7 @@
#include <string.h>
#include <syscall.h>
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
static ihk_spinlock_t fileobj_list_lock = SPIN_LOCK_UNLOCKED;
@@ -46,6 +46,7 @@ static memobj_ref_func_t fileobj_ref;
static memobj_get_page_func_t fileobj_get_page;
static memobj_copy_page_func_t fileobj_copy_page;
static memobj_flush_page_func_t fileobj_flush_page;
static memobj_invalidate_page_func_t fileobj_invalidate_page;
static struct memobj_ops fileobj_ops = {
.release = &fileobj_release,
@@ -53,6 +54,7 @@ static struct memobj_ops fileobj_ops = {
.get_page = &fileobj_get_page,
.copy_page = &fileobj_copy_page,
.flush_page = &fileobj_flush_page,
.invalidate_page = &fileobj_invalidate_page,
};
static struct fileobj *to_fileobj(struct memobj *memobj)
@@ -383,9 +385,9 @@ out:
return;
}
static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp)
static int fileobj_get_page(struct memobj *memobj, off_t off, int p2align, uintptr_t *physp, unsigned long *pflag)
{
struct process *proc = cpu_local_var(current);
struct thread *proc = cpu_local_var(current);
struct fileobj *obj = to_fileobj(memobj);
int error;
void *virt = NULL;
@@ -577,3 +579,33 @@ static int fileobj_flush_page(struct memobj *memobj, uintptr_t phys,
memobj_lock(&obj->memobj);
return 0;
}
static int fileobj_invalidate_page(struct memobj *memobj, uintptr_t phys,
size_t pgsize)
{
struct fileobj *obj = to_fileobj(memobj);
int error;
struct page *page;
dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx)\n",
memobj, phys, pgsize);
if (!(page = phys_to_page(phys))
|| !(page = page_list_lookup(obj, page->offset))) {
error = 0;
goto out;
}
if (ihk_atomic_read(&page->count) == 1) {
if (page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys),
pgsize/PAGE_SIZE);
}
}
error = 0;
out:
dkprintf("fileobj_invalidate_page(%p,%#lx,%#lx):%d\n",
memobj, phys, pgsize, error);
return error;
}

View File

@@ -76,9 +76,11 @@
#ifdef DEBUG_PRINT_FUTEX
#define dkprintf kprintf
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
extern struct sigpending *hassigpending(struct thread *thread);
int futex_cmpxchg_enabled;
/**
@@ -103,7 +105,7 @@ int futex_cmpxchg_enabled;
struct futex_q {
struct plist_node list;
struct process *task;
struct thread *task;
ihk_spinlock_t *lock_ptr;
union futex_key key;
union futex_key *requeue_pi_key;
@@ -243,7 +245,7 @@ static int get_futex_value_locked(uint32_t *dest, uint32_t *from)
*/
static void wake_futex(struct futex_q *q)
{
struct process *p = q->task;
struct thread *p = q->task;
/*
* We set q->lock_ptr = NULL _before_ we wake up the task. If
@@ -263,7 +265,7 @@ static void wake_futex(struct futex_q *q)
barrier();
q->lock_ptr = NULL;
sched_wakeup_process(p, PS_NORMAL);
sched_wakeup_thread(p, PS_NORMAL);
}
/*
@@ -658,7 +660,7 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
* queue_me() calls spin_unlock() upon completion, both serializing
* access to the hash list and forcing another memory barrier.
*/
xchg4(&(cpu_local_var(current)->ftn->status), PS_INTERRUPTIBLE);
xchg4(&(cpu_local_var(current)->status), PS_INTERRUPTIBLE);
queue_me(q, hb);
if (!plist_node_empty(&q->list)) {
@@ -674,7 +676,7 @@ static uint64_t futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q
}
/* This does not need to be serialized */
cpu_local_var(current)->ftn->status = PS_RUNNING;
cpu_local_var(current)->status = PS_RUNNING;
return time_remain;
}
@@ -775,6 +777,11 @@ retry:
if (timeout && !time_remain)
goto out_put_key;
if(hassigpending(cpu_local_var(current))){
ret = -EINTR;
goto out_put_key;
}
/* RIKEN: no signals */
put_futex_key(fshared, &q.key);
goto retry;

View File

@@ -28,20 +28,22 @@
#include <process.h>
#include <page.h>
#include <mman.h>
#include <init.h>
#include <kmalloc.h>
//#define DEBUG_PRINT_HOST
#ifdef DEBUG_PRINT_HOST
#define dkprintf kprintf
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
void check_mapping_for_proc(struct process *proc, unsigned long addr)
void check_mapping_for_proc(struct thread *thread, unsigned long addr)
{
unsigned long __phys;
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table, (void*)addr, &__phys)) {
if (ihk_mc_pt_virt_to_phys(thread->vm->address_space->page_table, (void*)addr, &__phys)) {
kprintf("check_map: no mapping for 0x%lX\n", addr);
}
else {
@@ -58,7 +60,7 @@ void check_mapping_for_proc(struct process *proc, unsigned long addr)
* NOTE: if args, args_len, envs, envs_len are zero,
* the function constructs them based on the descriptor
*/
int prepare_process_ranges_args_envs(struct process *proc,
int prepare_process_ranges_args_envs(struct thread *thread,
struct program_load_desc *pn,
struct program_load_desc *p,
enum ihk_mc_pt_attribute attr,
@@ -69,7 +71,8 @@ int prepare_process_ranges_args_envs(struct process *proc,
unsigned long args_envs_p, args_envs_rp;
unsigned long s, e, up;
char **argv;
int i, n, argc, envc, args_envs_npages;
char **a;
int i, n, argc, envc, args_envs_npages, l;
char **env;
int range_npages;
void *up_v;
@@ -77,6 +80,10 @@ int prepare_process_ranges_args_envs(struct process *proc,
unsigned long flags;
uintptr_t interp_obase = -1;
uintptr_t interp_nbase = -1;
size_t map_size;
struct process *proc = thread->proc;
struct process_vm *vm = proc->vm;
struct address_space *as = vm->address_space;
n = p->num_sections;
@@ -85,7 +92,7 @@ int prepare_process_ranges_args_envs(struct process *proc,
if (pn->sections[i].interp && (interp_nbase == (uintptr_t)-1)) {
interp_obase = pn->sections[i].vaddr;
interp_obase -= (interp_obase % pn->interp_align);
interp_nbase = proc->vm->region.map_start;
interp_nbase = vm->region.map_start;
interp_nbase = (interp_nbase + pn->interp_align - 1)
& ~(pn->interp_align - 1);
}
@@ -110,7 +117,7 @@ int prepare_process_ranges_args_envs(struct process *proc,
}
up = virt_to_phys(up_v);
if (add_process_memory_range(proc, s, e, up, flags, NULL, 0) != 0) {
if (add_process_memory_range(vm, s, e, up, flags, NULL, 0) != 0) {
ihk_mc_free_pages(up_v, range_npages);
kprintf("ERROR: adding memory range for ELF section %i\n", i);
goto err;
@@ -119,14 +126,14 @@ int prepare_process_ranges_args_envs(struct process *proc,
{
void *_virt = (void *)s;
unsigned long _phys;
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
if (ihk_mc_pt_virt_to_phys(as->page_table,
_virt, &_phys)) {
kprintf("ERROR: no mapping for 0x%lX\n", _virt);
}
for (_virt = (void *)s + PAGE_SIZE;
(unsigned long)_virt < e; _virt += PAGE_SIZE) {
unsigned long __phys;
if (ihk_mc_pt_virt_to_phys(proc->vm->page_table,
if (ihk_mc_pt_virt_to_phys(as->page_table,
_virt, &__phys)) {
kprintf("ERROR: no mapping for 0x%lX\n", _virt);
panic("mapping");
@@ -145,23 +152,23 @@ int prepare_process_ranges_args_envs(struct process *proc,
/* TODO: Maybe we need flag */
if (pn->sections[i].interp) {
proc->vm->region.map_end = e;
vm->region.map_end = e;
}
else if (i == 0) {
proc->vm->region.text_start = s;
proc->vm->region.text_end = e;
vm->region.text_start = s;
vm->region.text_end = e;
}
else if (i == 1) {
proc->vm->region.data_start = s;
proc->vm->region.data_end = e;
vm->region.data_start = s;
vm->region.data_end = e;
}
else {
proc->vm->region.data_start =
(s < proc->vm->region.data_start ?
s : proc->vm->region.data_start);
proc->vm->region.data_end =
(e > proc->vm->region.data_end ?
e : proc->vm->region.data_end);
vm->region.data_start =
(s < vm->region.data_start ?
s : vm->region.data_start);
vm->region.data_end =
(e > vm->region.data_end ?
e : vm->region.data_end);
}
}
@@ -169,32 +176,17 @@ int prepare_process_ranges_args_envs(struct process *proc,
pn->entry -= interp_obase;
pn->entry += interp_nbase;
p->entry = pn->entry;
ihk_mc_modify_user_context(proc->uctx, IHK_UCR_PROGRAM_COUNTER,
pn->entry);
ihk_mc_modify_user_context(thread->uctx,
IHK_UCR_PROGRAM_COUNTER,
pn->entry);
}
#if 1
/*
Fix for the problem where brk grows to hit .bss section
when using dynamically linked executables.
Test code resides in /home/takagi/project/mpich/src/brk_icc_mic.
This is because when using
ld.so (i.e. using shared objects), mckernel/kernel/host.c sets "brk" to
the end of .bss of ld.so (e.g. 0x21f000), and then ld.so places a
main-program after this (e.g. 0x400000), so "brk" will hit .bss
eventually.
*/
proc->vm->region.brk_start = proc->vm->region.brk_end =
(USER_END / 4) & LARGE_PAGE_MASK;
#else
proc->vm->region.brk_start = proc->vm->region.brk_end =
proc->vm->region.data_end;
#endif
vm->region.brk_start = vm->region.brk_end = vm->region.data_end;
/* Map, copy and update args and envs */
flags = VR_PROT_READ | VR_PROT_WRITE;
flags |= VRFLAG_PROT_TO_MAXPROT(flags);
addr = proc->vm->region.map_start - PAGE_SIZE * SCD_RESERVED_COUNT;
addr = vm->region.map_start - PAGE_SIZE * SCD_RESERVED_COUNT;
e = addr + PAGE_SIZE * ARGENV_PAGE_COUNT;
if((args_envs = ihk_mc_alloc_pages(ARGENV_PAGE_COUNT, IHK_MC_AP_NOWAIT)) == NULL){
@@ -203,7 +195,7 @@ int prepare_process_ranges_args_envs(struct process *proc,
}
args_envs_p = virt_to_phys(args_envs);
if(add_process_memory_range(proc, addr, e, args_envs_p,
if(add_process_memory_range(vm, addr, e, args_envs_p,
flags, NULL, 0) != 0){
ihk_mc_free_pages(args_envs, ARGENV_PAGE_COUNT);
kprintf("ERROR: adding memory range for args/envs\n");
@@ -217,7 +209,8 @@ int prepare_process_ranges_args_envs(struct process *proc,
/* Only map remote address if it wasn't specified as an argument */
if (!args) {
// Map in remote physical addr of args and copy it
args_envs_npages = (p->args_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
map_size = ((uintptr_t)p->args & (PAGE_SIZE - 1)) + p->args_len;
args_envs_npages = (map_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
dkprintf("args_envs_npages: %d\n", args_envs_npages);
args_envs_rp = ihk_mc_map_memory(NULL,
(unsigned long)p->args, p->args_len);
@@ -250,7 +243,8 @@ int prepare_process_ranges_args_envs(struct process *proc,
/* Only map remote address if it wasn't specified as an argument */
if (!envs) {
// Map in remote physical addr of envs and copy it after args
args_envs_npages = (p->envs_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
map_size = ((uintptr_t)p->envs & (PAGE_SIZE - 1)) + p->envs_len;
args_envs_npages = (map_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
dkprintf("args_envs_npages: %d\n", args_envs_npages);
args_envs_rp = ihk_mc_map_memory(NULL, (unsigned long)p->envs,
p->envs_len);
@@ -284,13 +278,21 @@ int prepare_process_ranges_args_envs(struct process *proc,
dkprintf("argc: %d\n", argc);
argv = (char **)(args_envs + (sizeof(int)));
while (*argv) {
char **_argv = argv;
dkprintf("%s\n", args_envs + (unsigned long)*argv);
*argv = (char *)addr + (unsigned long)*argv; // Process' address space!
argv = ++_argv;
if(proc->saved_cmdline){
kfree(proc->saved_cmdline);
proc->saved_cmdline_len = 0;
}
for(a = argv, l = 0; *a; a++)
l += strlen(args_envs + (unsigned long)*a) + 1;
proc->saved_cmdline = kmalloc(p->args_len, IHK_MC_AP_NOWAIT);
if(!proc->saved_cmdline)
goto err;
proc->saved_cmdline_len = l;
for(a = argv, l = 0; *a; a++){
strcpy(proc->saved_cmdline + l, args_envs + (unsigned long)*a);
l += strlen(args_envs + (unsigned long)*a) + 1;
*a = (char *)addr + (unsigned long)*a; // Process' address space!
}
argv = (char **)(args_envs + (sizeof(int)));
envc = *((int*)(args_envs + p->args_len));
dkprintf("envc: %d\n", envc);
@@ -306,10 +308,10 @@ int prepare_process_ranges_args_envs(struct process *proc,
dkprintf("env OK\n");
p->rprocess = (unsigned long)proc;
p->rpgtable = virt_to_phys(proc->vm->page_table);
if (init_process_stack(proc, pn, argc, argv, envc, env) != 0) {
p->rprocess = (unsigned long)thread;
p->rpgtable = virt_to_phys(as->page_table);
if (init_process_stack(thread, pn, argc, argv, envc, env) != 0) {
goto err;
}
@@ -328,7 +330,9 @@ static int process_msg_prepare_process(unsigned long rphys)
unsigned long phys, sz;
struct program_load_desc *p, *pn;
int npages, n;
struct thread *thread;
struct process *proc;
struct process_vm *vm;
enum ihk_mc_pt_attribute attr;
attr = PTATTR_NO_EXECUTE | PTATTR_WRITABLE | PTATTR_FOR_USER;
@@ -355,45 +359,57 @@ static int process_msg_prepare_process(unsigned long rphys)
memcpy_long(pn, p, sizeof(struct program_load_desc)
+ sizeof(struct program_image_section) * n);
if((proc = create_process(p->entry)) == NULL){
if((thread = create_thread(p->entry)) == NULL){
ihk_mc_free(pn);
ihk_mc_unmap_virtual(p, npages, 1);
ihk_mc_unmap_memory(NULL, phys, sz);
return -ENOMEM;
}
proc->ftn->pid = pn->pid;
proc->ftn->pgid = pn->pgid;
proc->vm->region.user_start = pn->user_start;
proc->vm->region.user_end = pn->user_end;
proc->vm->region.map_start = (USER_END / 3) & LARGE_PAGE_MASK;
proc->vm->region.map_end = proc->vm->region.map_start;
proc->rlimit_stack.rlim_cur = pn->rlimit_stack_cur;
proc->rlimit_stack.rlim_max = pn->rlimit_stack_max;
proc = thread->proc;
vm = thread->vm;
proc->pid = pn->pid;
proc->vm->address_space->pids[0] = pn->pid;
proc->pgid = pn->pgid;
proc->ruid = pn->cred[0];
proc->euid = pn->cred[1];
proc->suid = pn->cred[2];
proc->fsuid = pn->cred[3];
proc->rgid = pn->cred[4];
proc->egid = pn->cred[5];
proc->sgid = pn->cred[6];
proc->fsgid = pn->cred[7];
vm->region.user_start = pn->user_start;
vm->region.user_end = pn->user_end;
vm->region.map_start = (USER_END / 3) & LARGE_PAGE_MASK;
vm->region.map_end = proc->vm->region.map_start;
memcpy(proc->rlimit, pn->rlimit, sizeof(struct rlimit) * MCK_RLIM_MAX);
/* TODO: Clear it at the proper timing */
cpu_local_var(scp).post_idx = 0;
if (prepare_process_ranges_args_envs(proc, pn, p, attr,
if (prepare_process_ranges_args_envs(thread, pn, p, attr,
NULL, 0, NULL, 0) != 0) {
kprintf("error: preparing process ranges, args, envs, stack\n");
goto err;
}
dkprintf("new process : %p [%d] / table : %p\n", proc, proc->pid,
proc->vm->page_table);
vm->address_space->page_table);
ihk_mc_free(pn);
ihk_mc_unmap_virtual(p, npages, 1);
ihk_mc_unmap_memory(NULL, phys, sz);
flush_tlb();
return 0;
err:
ihk_mc_free(pn);
ihk_mc_unmap_virtual(p, npages, 1);
ihk_mc_unmap_memory(NULL, phys, sz);
free_process_memory(proc);
destroy_process(proc);
destroy_thread(thread);
return -ENOMEM;
}
@@ -467,13 +483,15 @@ static void syscall_channel_send(struct ihk_ikc_channel_desc *c,
ihk_ikc_send(c, packet, 0);
}
extern unsigned long do_kill(int, int, int, struct siginfo *);
extern void settid(struct process *proc, int mode, int newcpuid, int oldcpuid);
extern unsigned long do_kill(struct thread *, int, int, int, struct siginfo *, int ptracecont);
extern void settid(struct thread *proc, int mode, int newcpuid, int oldcpuid);
extern void process_procfs_request(unsigned long rarg);
extern int memcheckall();
extern int freecheck(int runcount);
extern int runcount;
extern void terminate_host(int pid);
extern void debug_log(long);
static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
void *__packet, void *ihk_os)
@@ -481,6 +499,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
struct ikc_scd_packet *packet = __packet;
struct ikc_scd_packet pckt;
int rc;
struct thread *thread;
struct process *proc;
struct mcctrl_signal {
int cond;
@@ -490,6 +509,7 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
struct siginfo info;
} *sp, info;
unsigned long pp;
int cpuid;
switch (packet->msg) {
case SCD_MSG_INIT_CHANNEL_ACKED:
@@ -521,13 +541,23 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
return 0;
case SCD_MSG_SCHEDULE_PROCESS:
cpuid = obtain_clone_cpuid();
if(cpuid == -1){
kprintf("No CPU available\n");
return -1;
}
dkprintf("SCD_MSG_SCHEDULE_PROCESS: %lx\n", packet->arg);
proc = (struct process *)packet->arg;
thread = (struct thread *)packet->arg;
proc = thread->proc;
settid(proc, 0, ihk_mc_get_processor_id(), -1);
runq_add_proc(proc, ihk_mc_get_processor_id());
settid(thread, 0, cpuid, -1);
proc->status = PS_RUNNING;
thread->status = PS_RUNNING;
chain_thread(thread);
chain_process(proc);
runq_add_thread(thread, cpuid);
//cpu_local_var(next) = (struct process *)packet->arg;
//cpu_local_var(next) = (struct thread *)packet->arg;
return 0;
case SCD_MSG_SEND_SIGNAL:
pp = ihk_mc_map_memory(NULL, packet->arg, sizeof(struct mcctrl_signal));
@@ -541,12 +571,20 @@ static int syscall_packet_handler(struct ihk_ikc_channel_desc *c,
pckt.arg = packet->arg;
syscall_channel_send(c, &pckt);
rc = do_kill(info.pid, info.tid, info.sig, &info.info);
rc = do_kill(NULL, info.pid, info.tid, info.sig, &info.info, 0);
kprintf("SCD_MSG_SEND_SIGNAL: do_kill(pid=%d, tid=%d, sig=%d)=%d\n", info.pid, info.tid, info.sig, rc);
return 0;
case SCD_MSG_PROCFS_REQUEST:
process_procfs_request(packet->arg);
return 0;
case SCD_MSG_CLEANUP_PROCESS:
dkprintf("SCD_MSG_CLEANUP_PROCESS pid=%d\n", packet->pid);
terminate_host(packet->pid);
return 0;
case SCD_MSG_DEBUG_LOG:
dkprintf("SCD_MSG_DEBUG_LOG code=%lx\n", packet->arg);
debug_log(packet->arg);
return 0;
}
return 0;
}

View File

@@ -30,6 +30,7 @@ struct malloc_header {
#define CPU_STATUS_DISABLE (0)
#define CPU_STATUS_IDLE (1)
#define CPU_STATUS_RUNNING (2)
#define CPU_STATUS_RESERVED (3)
extern ihk_spinlock_t cpu_status_lock;
#define CPU_FLAG_NEED_RESCHED 0x1U
@@ -40,12 +41,14 @@ struct cpu_local_var {
struct malloc_header free_list;
ihk_spinlock_t free_list_lock;
struct process idle;
struct fork_tree_node idle_ftn;
struct thread idle;
struct process idle_proc;
struct process_vm idle_vm;
struct address_space idle_asp;
ihk_spinlock_t runq_lock;
struct process *current;
unsigned long runq_irqstate;
struct thread *current;
struct list_head runq;
size_t runq_len;
@@ -56,6 +59,7 @@ struct cpu_local_var {
struct ihk_ikc_channel_desc *syscall_channel2;
struct syscall_params scp2;
struct ikc_scd_init_param iip2;
struct resource_set *resource_set;
int status;
int fs;
@@ -66,6 +70,9 @@ struct cpu_local_var {
ihk_spinlock_t migq_lock;
struct list_head migq;
int in_interrupt;
int no_preempt;
int timer_enabled;
} __attribute__((aligned(64)));

View File

@@ -14,8 +14,18 @@
#define __HEADER_KMALLOC_H
#include <ihk/mm.h>
#include <cls.h>
#define kmalloc(size, flag) _kmalloc(size, flag, __FILE__, __LINE__)
void panic(const char *);
int kprintf(const char *format, ...);
#define kmalloc(size, flag) ({\
void *r = _kmalloc(size, flag, __FILE__, __LINE__);\
if(r == NULL){\
kprintf("kmalloc: out of memory %s:%d no_preempt=%d\n", __FILE__, __LINE__, cpu_local_var(no_preempt)); \
}\
r;\
})
#define kfree(ptr) _kfree(ptr, __FILE__, __LINE__)
#define memcheck(ptr, msg) _memcheck(ptr, msg, __FILE__, __LINE__, 0)
void *_kmalloc(int size, enum ihk_mc_ap_flag flag, char *file, int line);

View File

@@ -18,11 +18,20 @@
#include <ihk/lock.h>
#include <errno.h>
#include <list.h>
#include <shm.h>
/* begin types.h */
typedef int32_t key_t;
typedef uint32_t uid_t;
typedef uint32_t gid_t;
typedef int64_t time_t;
typedef int32_t pid_t;
/* end types.h */
enum {
/* for memobj.flags */
MF_HAS_PAGER = 0x0001,
MF_SHMDT_OK = 0x0002,
MF_IS_REMOVABLE = 0x0004,
};
struct memobj {
@@ -34,9 +43,10 @@ struct memobj {
typedef void memobj_release_func_t(struct memobj *obj);
typedef void memobj_ref_func_t(struct memobj *obj);
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp);
typedef int memobj_get_page_func_t(struct memobj *obj, off_t off, int p2align, uintptr_t *physp, unsigned long *flag);
typedef uintptr_t memobj_copy_page_func_t(struct memobj *obj, uintptr_t orgphys, int p2align);
typedef int memobj_flush_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
typedef int memobj_invalidate_page_func_t(struct memobj *obj, uintptr_t phys, size_t pgsize);
struct memobj_ops {
memobj_release_func_t * release;
@@ -44,6 +54,7 @@ struct memobj_ops {
memobj_get_page_func_t * get_page;
memobj_copy_page_func_t * copy_page;
memobj_flush_page_func_t * flush_page;
memobj_invalidate_page_func_t * invalidate_page;
};
static inline void memobj_release(struct memobj *obj)
@@ -61,10 +72,10 @@ static inline void memobj_ref(struct memobj *obj)
}
static inline int memobj_get_page(struct memobj *obj, off_t off,
int p2align, uintptr_t *physp)
int p2align, uintptr_t *physp, unsigned long *pflag)
{
if (obj->ops->get_page) {
return (*obj->ops->get_page)(obj, off, p2align, physp);
return (*obj->ops->get_page)(obj, off, p2align, physp, pflag);
}
return -ENXIO;
}
@@ -86,6 +97,15 @@ static inline int memobj_flush_page(struct memobj *obj, uintptr_t phys, size_t p
return 0;
}
static inline int memobj_invalidate_page(struct memobj *obj, uintptr_t phys,
size_t pgsize)
{
if (obj->ops->invalidate_page) {
return (*obj->ops->invalidate_page)(obj, phys, pgsize);
}
return 0;
}
static inline void memobj_lock(struct memobj *obj)
{
ihk_mc_spinlock_lock_noirq(&obj->lock);
@@ -101,7 +121,13 @@ static inline int memobj_has_pager(struct memobj *obj)
return !!(obj->flags & MF_HAS_PAGER);
}
static inline int memobj_is_removable(struct memobj *obj)
{
return !!(obj->flags & MF_IS_REMOVABLE);
}
int fileobj_create(int fd, struct memobj **objp, int *maxprotp);
struct shmid_ds;
int shmobj_create(struct shmid_ds *ds, struct memobj **objp);
int zeroobj_create(struct memobj **objp);
int devobj_create(int fd, size_t len, off_t off, struct memobj **objp, int *maxprotp);

View File

@@ -5,6 +5,8 @@
* memory management declarations
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2013 Hitachi, Ltd.
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2015 RIKEN AICS
*/
/*
* HISTORY:
@@ -13,6 +15,8 @@
#ifndef HEADER_MMAN_H
#define HEADER_MMAN_H
#include <arch/mman.h>
/*
* memory protection
*/
@@ -32,16 +36,6 @@
#define MAP_PRIVATE 0x02
#define MAP_FIXED 0x10
#define MAP_ANONYMOUS 0x20
#define MAP_32BIT 0x40
#define MAP_GROWSDOWN 0x0100
#define MAP_DENYWRITE 0x0800
#define MAP_EXECUTABLE 0x1000
#define MAP_LOCKED 0x2000
#define MAP_NORESERVE 0x4000
#define MAP_POPULATE 0x8000
#define MAP_NONBLOCK 0x00010000
#define MAP_STACK 0x00020000
#define MAP_HUGETLB 0x00040000
/*
* memory advice
@@ -69,4 +63,11 @@
#define MREMAP_MAYMOVE 0x01
#define MREMAP_FIXED 0x02
/*
* for msync()
*/
#define MS_ASYNC 0x01
#define MS_INVALIDATE 0x02
#define MS_SYNC 0x04
#endif /* HEADER_MMAN_H */

View File

@@ -66,4 +66,6 @@ static inline int page_is_multi_mapped(struct page *page)
return (ihk_atomic_read(&page->count) > 1);
}
/* Should we take page faults on ANONYMOUS mappings? */
extern int anon_on_demand;
#endif

60
kernel/include/prio.h Normal file
View File

@@ -0,0 +1,60 @@
#ifndef _SCHED_PRIO_H
#define _SCHED_PRIO_H
#define MAX_NICE 19
#define MIN_NICE -20
#define NICE_WIDTH (MAX_NICE - MIN_NICE + 1)
/*
* Priority of a process goes from 0..MAX_PRIO-1, valid RT
* priority is 0..MAX_RT_PRIO-1, and SCHED_NORMAL/SCHED_BATCH
* tasks are in the range MAX_RT_PRIO..MAX_PRIO-1. Priority
* values are inverted: lower p->prio value means higher priority.
*
* The MAX_USER_RT_PRIO value allows the actual maximum
* RT priority to be separate from the value exported to
* user-space. This allows kernel threads to set their
* priority to a value higher than any user task. Note:
* MAX_RT_PRIO must not be smaller than MAX_USER_RT_PRIO.
*/
#define MAX_USER_RT_PRIO 100
#define MAX_RT_PRIO MAX_USER_RT_PRIO
#define MAX_PRIO (MAX_RT_PRIO + NICE_WIDTH)
#define DEFAULT_PRIO (MAX_RT_PRIO + NICE_WIDTH / 2)
/*
* Convert user-nice values [ -20 ... 0 ... 19 ]
* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
* and back.
*/
#define NICE_TO_PRIO(nice) ((nice) + DEFAULT_PRIO)
#define PRIO_TO_NICE(prio) ((prio) - DEFAULT_PRIO)
/*
* 'User priority' is the nice value converted to something we
* can work with better when scaling various scheduler parameters,
* it's a [ 0 ... 39 ] range.
*/
#define USER_PRIO(p) ((p)-MAX_RT_PRIO)
#define TASK_USER_PRIO(p) USER_PRIO((p)->static_prio)
#define MAX_USER_PRIO (USER_PRIO(MAX_PRIO))
/*
* Convert nice value [19,-20] to rlimit style value [1,40].
*/
static inline long nice_to_rlimit(long nice)
{
return (MAX_NICE - nice + 1);
}
/*
* Convert rlimit style value [1,40] to nice value [-20, 19].
*/
static inline long rlimit_to_nice(long prio)
{
return (MAX_NICE - prio + 1);
}
#endif /* _SCHED_PRIO_H */

View File

@@ -21,12 +21,14 @@
#include <signal.h>
#include <memobj.h>
#include <affinity.h>
#include <syscall.h>
#define VR_NONE 0x0
#define VR_STACK 0x1
#define VR_RESERVED 0x2
#define VR_IO_NOCACHE 0x100
#define VR_REMOTE 0x200
#define VR_WRITE_COMBINED 0x400
#define VR_DEMAND_PAGING 0x1000
#define VR_PRIVATE 0x2000
#define VR_LOCKED 0x4000
@@ -49,6 +51,7 @@
#define VRFLAG_PROT_TO_MAXPROT(vrflag) (((vrflag) & VR_PROT_MASK) << 4)
#define VRFLAG_MAXPROT_TO_PROT(vrflag) (((vrflag) & VR_MAXPROT_MASK) >> 4)
// struct process.status, struct thread.status
#define PS_RUNNING 0x1
#define PS_INTERRUPTIBLE 0x2
#define PS_UNINTERRUPTIBLE 0x4
@@ -56,12 +59,19 @@
#define PS_EXITED 0x10
#define PS_STOPPED 0x20
#define PS_TRACED 0x40 /* Set to "not running" by a ptrace related event */
#define PS_STOPPING 0x80
#define PS_TRACING 0x100
#define PS_NORMAL (PS_INTERRUPTIBLE | PS_UNINTERRUPTIBLE)
// struct process.ptrace
#define PT_TRACED 0x80 /* The process is ptraced */
#define PT_TRACE_EXEC 0x100 /* Trace execve(2) */
#define PT_TRACE_SYSCALL_ENTER 0x200 /* Trace syscall enter */
#define PT_TRACE_SYSCALL_EXIT 0x400 /* Trace syscall exit */
#define PT_TRACE_SYSCALL_MASK (PT_TRACE_SYSCALL_ENTER | PT_TRACE_SYSCALL_EXIT)
// ptrace(2) request
#define PTRACE_TRACEME 0
#define PTRACE_PEEKTEXT 1
#define PTRACE_PEEKDATA 2
@@ -90,6 +100,7 @@
#define PTRACE_GETREGSET 0x4204
#define PTRACE_SETREGSET 0x4205
// ptrace(2) options
#define PTRACE_O_TRACESYSGOOD 1
#define PTRACE_O_TRACEFORK 2
#define PTRACE_O_TRACEVFORK 4
@@ -99,6 +110,7 @@
#define PTRACE_O_TRACEEXIT 0x40
#define PTRACE_O_MASK 0x7f
// ptrace(2) events
#define PTRACE_EVENT_FORK 1
#define PTRACE_EVENT_VFORK 2
#define PTRACE_EVENT_CLONE 3
@@ -106,6 +118,8 @@
#define PTRACE_EVENT_VFORK_DONE 5
#define PTRACE_EVENT_EXIT 6
#define NT_X86_XSTATE 0x202 /* x86 XSAVE extended state */
#define SIGNAL_STOP_STOPPED 0x1 /* The process has been stopped by SIGSTOP */
#define SIGNAL_STOP_CONTINUED 0x2 /* The process has been resumed by SIGCONT */
@@ -118,6 +132,11 @@
#define WNOWAIT 0x01000000 /* Don't reap, just poll status. */
#define __WCLONE 0x80000000
/* idtype */
#define P_ALL 0
#define P_PID 1
#define P_PGID 2
/* If WIFEXITED(STATUS), the low-order 8 bits of the status. */
#define __WEXITSTATUS(status) (((status) & 0xff00) >> 8)
@@ -145,7 +164,64 @@
#include <waitq.h>
#include <futex.h>
#include <rlimit.h>
struct resource_set;
struct process_hash;
struct thread_hash;
struct address_space;
struct process;
struct thread;
struct process_vm;
struct vm_regions;
struct vm_range;
#define HASH_SIZE 73
struct resource_set {
struct list_head list;
char *path;
struct process_hash *process_hash;
struct thread_hash *thread_hash;
struct list_head phys_mem_list;
mcs_rwlock_lock_t phys_mem_lock;
cpu_set_t cpu_set;
mcs_rwlock_lock_t cpu_set_lock;
struct process *pid1;
};
extern struct list_head resource_set_list;
extern mcs_rwlock_lock_t resource_set_lock;
struct process_hash {
struct list_head list[HASH_SIZE];
mcs_rwlock_lock_t lock[HASH_SIZE];
};
static inline int
process_hash(int pid)
{
return pid % HASH_SIZE;
}
static inline int
thread_hash(int tid)
{
return tid % HASH_SIZE;
}
struct thread_hash {
struct list_head list[HASH_SIZE];
mcs_rwlock_lock_t lock[HASH_SIZE];
};
struct address_space {
struct page_table *page_table;
int type;
#define ADDRESS_SPACE_NORMAL 1
#define ADDRESS_SPACE_PVAS 2
int nslots;
int pids[];
};
struct user_fpregs_struct
{
@@ -212,7 +288,7 @@ struct user
unsigned long int u_debugreg [8];
};
#define AUXV_LEN 14
#define AUXV_LEN 16
struct vm_range {
struct list_head list;
@@ -223,6 +299,7 @@ struct vm_range {
};
struct vm_regions {
unsigned long vm_start, vm_end;
unsigned long text_start, text_end;
unsigned long data_start, data_end;
unsigned long brk_start, brk_end;
@@ -233,22 +310,27 @@ struct vm_regions {
struct process_vm;
struct sig_handler {
struct sigfd {
struct sigfd *next;
int fd;
__sigset_t mask;
};
#define SFD_CLOEXEC 02000000
#define SFD_NONBLOCK 04000
struct sig_common {
ihk_spinlock_t lock;
ihk_atomic_t use;
ihk_atomic_t use;
struct sigfd *sigfd;
struct k_sigaction action[_NSIG];
struct list_head sigpending;
};
struct sig_pending {
struct list_head list;
sigset_t sigmask;
siginfo_t info;
};
struct sig_shared {
ihk_spinlock_t lock;
ihk_atomic_t use;
struct list_head sigpending;
int ptracecont;
};
typedef void pgio_func_t(void *arg);
@@ -257,142 +339,232 @@ typedef void pgio_func_t(void *arg);
* corresponding process exited due to references from the parent and/or
* children and is used for implementing wait/waitpid without having a
* special "init" process */
struct fork_tree_node {
ihk_spinlock_t lock;
ihk_atomic_t refcount;
int exit_status;
int status;
struct process *owner;
int pid;
int tid;
int pgid;
struct fork_tree_node *parent;
struct list_head children;
struct list_head siblings_list;
/* The ptracing process behave as the parent of the ptraced process
after using PTRACE_ATTACH except getppid. So we save it here. */
struct fork_tree_node *ppid_parent;
/* Manage ptraced processes in the separate list to make it easy to
restore the orginal parent child relationship when
performing PTRACE_DETACH */
struct list_head ptrace_children;
struct list_head ptrace_siblings_list;
struct waitq waitpid_q;
/* Store exit_status for a group of threads when stopped by SIGSTOP.
exit_status can't be used because values of exit_status of threads
might divert while the threads are exiting by group_exit(). */
int group_exit_status;
/* Store ptrace flags.
* The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request.
* Other bits are for inner use of the McKernel.
*/
int ptrace;
/* Store event related to signal. For example,
it represents that the proceess has been resumed by SIGCONT. */
int signal_flags;
/* Store signal sent to parent when the process terminates. */
int termsig;
};
void hold_fork_tree_node(struct fork_tree_node *ftn);
void release_fork_tree_node(struct fork_tree_node *ftn);
struct process {
int cpu_id;
struct list_head hash_list;
mcs_rwlock_lock_t update_lock; // lock for parent, status, ...?
ihk_atomic_t refcount;
// process vm
struct process_vm *vm;
// threads and children
struct list_head threads_list;
mcs_rwlock_lock_t threads_lock; // lock for threads_list
/* The ptracing process behave as the parent of the ptraced process
after using PTRACE_ATTACH except getppid. So we save it here. */
struct process *parent;
struct process *ppid_parent;
struct list_head children_list;
struct list_head ptraced_children_list;
mcs_rwlock_lock_t children_lock; // lock for children_list and ptraced_children_list
struct list_head siblings_list; // lock parent
struct list_head ptraced_siblings_list; // lock ppid_parent
ihk_atomic_t refcount;
// process status and exit status
int status; // PS_RUNNING -> PS_EXITED -> PS_ZOMBIE
// | ^ ^
// | |---+ |
// V | |
// PS_STOPPING-)---+
// (PS_TRACING)| |
// | | |
// V +---- |
// PS_STOPPED -----+
// (PS_TRACED)
int exit_status;
/* Store exit_status for a group of threads when stopped by SIGSTOP.
exit_status can't be used because values of exit_status of threads
might divert while the threads are exiting by group_exit(). */
int group_exit_status;
/* Manage ptraced processes in the separate list to make it easy to
restore the orginal parent child relationship when
performing PTRACE_DETACH */
struct waitq waitpid_q;
// process info and credentials etc.
int pid;
int pgid;
int ruid;
int euid;
int suid;
int fsuid;
int rgid;
int egid;
int sgid;
int fsgid;
int execed;
int nohost;
struct rlimit rlimit[MCK_RLIM_MAX];
unsigned long saved_auxv[AUXV_LEN];
char *saved_cmdline;
long saved_cmdline_len;
/* Store ptrace flags.
* The lower 8 bits are PTRACE_O_xxx of the PTRACE_SETOPTIONS request.
* Other bits are for inner use of the McKernel.
*/
int ptrace;
/* Store ptrace event message.
* PTRACE_O_xxx will store event message here.
* PTRACE_GETEVENTMSG will get from here.
*/
unsigned long ptrace_eventmsg;
/* Store event related to signal. For example,
it represents that the proceess has been resumed by SIGCONT. */
int signal_flags;
/* Store signal sent to parent when the process terminates. */
int termsig;
};
void hold_thread(struct thread *ftn);
void release_thread(struct thread *ftn);
/*
* Scheduling policies
*/
#define SCHED_NORMAL 0
#define SCHED_FIFO 1
#define SCHED_RR 2
#define SCHED_BATCH 3
/* SCHED_ISO: reserved but not implemented yet */
#define SCHED_IDLE 5
#define SCHED_DEADLINE 6
/* Can be ORed in to make sure the process is reverted back to SCHED_NORMAL on fork */
#define SCHED_RESET_ON_FORK 0x40000000
/*
* For the sched_{set,get}attr() calls
*/
#define SCHED_FLAG_RESET_ON_FORK 0x01
struct sched_param {
int sched_priority;
};
struct thread {
struct list_head hash_list;
// thread info
int cpu_id;
int tid;
int status; // PS_RUNNING -> PS_EXITED
// | ^ ^
// | | |
// V | |
// PS_STOPPED------+
// PS_TRACED
// PS_INTERRPUTIBLE
// PS_UNINTERRUPTIBLE
// process vm
struct process_vm *vm;
// context
ihk_mc_kernel_context_t ctx;
ihk_mc_user_context_t *uctx;
// sibling
struct process *proc;
struct list_head siblings_list; // lock process
// Runqueue list entry
struct list_head sched_list;
struct list_head sched_list; // lock cls
int sched_policy;
struct sched_param sched_param;
ihk_spinlock_t spin_sleep_lock;
int spin_sleep;
struct thread {
int *clear_child_tid;
unsigned long tlsblock_base, tlsblock_limit;
} thread;
ihk_atomic_t refcount;
volatile int sigevent;
int *clear_child_tid;
unsigned long tlsblock_base, tlsblock_limit;
// thread info
cpu_set_t cpu_set;
fp_regs_struct *fp_regs;
int in_syscall_offload;
// signal
struct sig_common *sigcommon;
sigset_t sigmask;
stack_t sigstack;
ihk_spinlock_t sigpendinglock;
struct list_head sigpending;
struct sig_shared *sigshared;
struct sig_handler *sighandler;
ihk_spinlock_t sigpendinglock;
volatile int sigevent;
struct rlimit rlimit_stack;
// gpio
pgio_func_t *pgio_fp;
void *pgio_arg;
struct fork_tree_node *ftn;
cpu_set_t cpu_set;
unsigned long saved_auxv[AUXV_LEN];
struct user *userp;
// for ptrace
unsigned long *ptrace_debugreg; /* debug registers for ptrace */
struct sig_pending *ptrace_recvsig;
struct sig_pending *ptrace_sendsig;
};
struct process_vm {
ihk_atomic_t refcount;
struct page_table *page_table;
struct address_space *address_space;
struct list_head vm_range_list;
struct vm_regions region;
struct process *owner_process; /* process that reside on the same page */
struct process *proc; /* process that reside on the same page */
ihk_spinlock_t page_table_lock;
ihk_spinlock_t memory_range_lock;
ihk_spinlock_t page_table_lock;
ihk_spinlock_t memory_range_lock;
// to protect the followings:
// 1. addition of process "memory range" (extend_process_region, add_process_memory_range)
// 2. addition of process page table (allocate_pages, update_process_page_table)
// note that physical memory allocator (ihk_mc_alloc_pages, ihk_pagealloc_alloc)
// is protected by its own lock (see ihk/manycore/generic/page_alloc.c)
ihk_atomic_t refcount;
cpu_set_t cpu_set;
ihk_spinlock_t cpu_set_lock;
int exiting;
};
struct process *create_process(unsigned long user_pc);
struct process *clone_process(struct process *org, unsigned long pc,
struct thread *create_thread(unsigned long user_pc);
struct thread *clone_thread(struct thread *org, unsigned long pc,
unsigned long sp, int clone_flags);
void destroy_process(struct process *proc);
void hold_process(struct process *proc);
void release_process(struct process *proc);
void flush_process_memory(struct process *proc);
void free_process_memory(struct process *proc);
void free_process_memory_ranges(struct process *proc);
int populate_process_memory(struct process *proc, void *start, size_t len);
void destroy_thread(struct thread *thread);
void hold_thread(struct thread *thread);
void release_thread(struct thread *thread);
void flush_process_memory(struct process_vm *vm);
void hold_process_vm(struct process_vm *vm);
void release_process_vm(struct process_vm *vm);
void hold_process(struct process *);
void release_process(struct process *);
void free_process_memory_ranges(struct process_vm *vm);
int populate_process_memory(struct process_vm *vm, void *start, size_t len);
int add_process_memory_range(struct process *process,
int add_process_memory_range(struct process_vm *vm,
unsigned long start, unsigned long end,
unsigned long phys, unsigned long flag,
struct memobj *memobj, off_t objoff);
int remove_process_memory_range(struct process *process, unsigned long start,
int remove_process_memory_range(struct process_vm *vm, unsigned long start,
unsigned long end, int *ro_freedp);
int split_process_memory_range(struct process *process,
int split_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t addr, struct vm_range **splitp);
int join_process_memory_range(struct process *process, struct vm_range *surviving,
int join_process_memory_range(struct process_vm *vm, struct vm_range *surviving,
struct vm_range *merging);
int change_prot_process_memory_range(
struct process *process, struct vm_range *range,
struct process_vm *vm, struct vm_range *range,
unsigned long newflag);
int remap_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end, off_t off);
int sync_process_memory_range(struct process_vm *vm, struct vm_range *range,
uintptr_t start, uintptr_t end);
int invalidate_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t start, uintptr_t end);
struct vm_range *lookup_process_memory_range(
struct process_vm *vm, uintptr_t start, uintptr_t end);
struct vm_range *next_process_memory_range(
@@ -402,31 +574,41 @@ struct vm_range *previous_process_memory_range(
int extend_up_process_memory_range(struct process_vm *vm,
struct vm_range *range, uintptr_t newend);
int page_fault_process(struct process *proc, void *fault_addr, uint64_t reason);
int remove_process_region(struct process *proc,
int page_fault_process_vm(struct process_vm *fault_vm, void *fault_addr,
uint64_t reason);
int remove_process_region(struct process_vm *vm,
unsigned long start, unsigned long end);
struct program_load_desc;
int init_process_stack(struct process *process, struct program_load_desc *pn,
int init_process_stack(struct thread *thread, struct program_load_desc *pn,
int argc, char **argv,
int envc, char **env);
unsigned long extend_process_region(struct process *proc,
unsigned long extend_process_region(struct process_vm *vm,
unsigned long start, unsigned long end,
unsigned long address, unsigned long flag);
extern enum ihk_mc_pt_attribute arch_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep);
enum ihk_mc_pt_attribute common_vrflag_to_ptattr(unsigned long flag, uint64_t fault, pte_t *ptep);
void schedule(void);
void runq_add_proc(struct process *proc, int cpu_id);
void runq_del_proc(struct process *proc, int cpu_id);
int sched_wakeup_process(struct process *proc, int valid_states);
void runq_add_thread(struct thread *thread, int cpu_id);
void runq_del_thread(struct thread *thread, int cpu_id);
int sched_wakeup_thread(struct thread *thread, int valid_states);
void sched_request_migrate(int cpu_id, struct process *proc);
void sched_request_migrate(int cpu_id, struct thread *thread);
void check_need_resched(void);
void cpu_set(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock);
void cpu_clear(int cpu, cpu_set_t *cpu_set, ihk_spinlock_t *lock);
void cpu_clear_and_set(int c_cpu, int s_cpu,
cpu_set_t *cpu_set, ihk_spinlock_t *lock);
struct process *findthread_and_lock(int pid, int tid, ihk_spinlock_t **savelock, unsigned long *irqstate);
void process_unlock(void *savelock, unsigned long irqstate);
void release_cpuid(int cpuid);
struct thread *find_thread(int pid, int tid, struct mcs_rwlock_node_irqsave *lock);
void thread_unlock(struct thread *thread, struct mcs_rwlock_node_irqsave *lock);
struct process *find_process(int pid, struct mcs_rwlock_node_irqsave *lock);
void process_unlock(struct process *proc, struct mcs_rwlock_node_irqsave *lock);
void chain_process(struct process *);
void chain_thread(struct thread *);
void proc_init();
#endif

View File

@@ -3,7 +3,8 @@
* License details are found in the file LICENSE.
* \brief
* header file for System V shared memory
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 - 2015 RIKEN AICS
*/
/*
* HISTORY:
@@ -12,38 +13,71 @@
#ifndef HEADER_SHM_H
#define HEADER_SHM_H
/* begin types.h */
typedef int32_t key_t;
typedef uint32_t uid_t;
typedef uint32_t gid_t;
typedef int64_t time_t;
typedef int32_t pid_t;
/* end types.h */
#include <list.h>
#include <memobj.h>
#include <arch/shm.h>
typedef uint64_t shmatt_t;
enum {
/* for key_t */
IPC_PRIVATE = 0,
struct ipc_perm {
key_t key;
uid_t uid;
gid_t gid;
uid_t cuid;
gid_t cgid;
uint16_t mode;
uint8_t padding[2];
uint16_t seq;
uint8_t padding2[22];
/* for shmflg */
IPC_CREAT = 01000,
IPC_EXCL = 02000,
SHM_RDONLY = 010000,
SHM_RND = 020000,
SHM_REMAP = 040000,
SHM_EXEC = 0100000,
/* for shm_mode */
SHM_DEST = 01000,
SHM_LOCKED = 02000,
/* for cmd of shmctl() */
IPC_RMID = 0,
IPC_SET = 1,
IPC_STAT = 2,
IPC_INFO = 3,
SHM_LOCK = 11,
SHM_UNLOCK = 12,
SHM_STAT = 13,
SHM_INFO = 14,
};
struct shmid_ds {
struct ipc_perm shm_perm;
size_t shm_segsz;
time_t shm_atime;
time_t shm_dtime;
time_t shm_ctime;
pid_t shm_cpid;
pid_t shm_lpid;
shmatt_t shm_nattch;
uint8_t padding[16];
struct shmobj {
struct memobj memobj; /* must be first */
int index;
uint8_t padding[4];
size_t real_segsz;
struct shmid_ds ds;
struct list_head page_list;
struct list_head chain; /* shmobj_list */
};
struct shminfo {
uint64_t shmmax;
uint64_t shmmin;
uint64_t shmmni;
uint64_t shmseg;
uint64_t shmall;
uint8_t padding[32];
};
struct shm_info {
int32_t used_ids;
uint8_t padding[4];
uint64_t shm_tot;
uint64_t shm_rss;
uint64_t shm_swp;
uint64_t swap_attempts;
uint64_t swap_successes;
};
void shmobj_list_lock(void);
void shmobj_list_unlock(void);
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp);
void shmobj_destroy(struct shmobj *obj);
#endif /* HEADER_SHM_H */

View File

@@ -13,8 +13,11 @@
#ifndef __HEADER_SYSCALL_H
#define __HEADER_SYSCALL_H
#include <ihk/atomic.h>
#include <ihk/context.h>
#include <ihk/memconst.h>
#include <rlimit.h>
#include <time.h>
#define NUM_SYSCALLS 255
@@ -34,12 +37,15 @@
#define SCD_MSG_SYSCALL_ONESIDE 0x4
#define SCD_MSG_SEND_SIGNAL 0x8
#define SCD_MSG_CLEANUP_PROCESS 0x9
#define SCD_MSG_PROCFS_CREATE 0x10
#define SCD_MSG_PROCFS_DELETE 0x11
#define SCD_MSG_PROCFS_REQUEST 0x12
#define SCD_MSG_PROCFS_ANSWER 0x13
#define SCD_MSG_DEBUG_LOG 0x20
#define ARCH_SET_GS 0x1001
#define ARCH_SET_FS 0x1002
#define ARCH_GET_FS 0x1003
@@ -109,6 +115,24 @@ struct program_image_section {
};
#define SHELL_PATH_MAX_LEN 1024
#define MCK_RLIM_MAX 20
#define MCK_RLIMIT_AS 0
#define MCK_RLIMIT_CORE 1
#define MCK_RLIMIT_CPU 2
#define MCK_RLIMIT_DATA 3
#define MCK_RLIMIT_FSIZE 4
#define MCK_RLIMIT_LOCKS 5
#define MCK_RLIMIT_MEMLOCK 6
#define MCK_RLIMIT_MSGQUEUE 7
#define MCK_RLIMIT_NICE 8
#define MCK_RLIMIT_NOFILE 9
#define MCK_RLIMIT_NPROC 10
#define MCK_RLIMIT_RSS 11
#define MCK_RLIMIT_RTPRIO 12
#define MCK_RLIMIT_RTTIME 13
#define MCK_RLIMIT_SIGPENDING 14
#define MCK_RLIMIT_STACK 15
struct program_load_desc {
int num_sections;
@@ -118,6 +142,7 @@ struct program_load_desc {
int err;
int stack_prot;
int pgid;
int cred[8];
unsigned long entry;
unsigned long user_start;
unsigned long user_end;
@@ -132,8 +157,7 @@ struct program_load_desc {
unsigned long args_len;
char *envs;
unsigned long envs_len;
unsigned long rlimit_stack_cur;
unsigned long rlimit_stack_max;
struct rlimit rlimit[MCK_RLIM_MAX];
unsigned long interp_align;
char shell_path[SHELL_PATH_MAX_LEN];
struct program_image_section sections[0];
@@ -217,9 +241,9 @@ struct syscall_params {
SYSCALL_ARG_##a2(2); SYSCALL_ARG_##a3(3); \
SYSCALL_ARG_##a4(4); SYSCALL_ARG_##a5(5);
#define SYSCALL_FOOTER return do_syscall(&request, ctx, ihk_mc_get_processor_id(), 0)
#define SYSCALL_FOOTER return do_syscall(&request, ihk_mc_get_processor_id(), 0)
extern long do_syscall(struct syscall_request *req, ihk_mc_user_context_t *ctx, int cpu, int pid);
extern long do_syscall(struct syscall_request *req, int cpu, int pid);
extern int obtain_clone_cpuid();
extern long syscall_generic_forwarding(int n, ihk_mc_user_context_t *ctx);
@@ -263,4 +287,15 @@ struct procfs_file {
char fname[PROCFS_NAME_MAX]; /* procfs filename (request) */
};
extern void terminate(int, int);
struct tod_data_s {
int8_t do_local;
int8_t padding[7];
ihk_atomic64_t version;
unsigned long clocks_per_sec;
struct timespec origin; /* realtime when tsc=0 */
};
extern struct tod_data_s tod_data; /* residing in arch-dependent file */
#endif

View File

@@ -19,6 +19,8 @@
#ifndef __TIME_H
#define __TIME_H
#define NS_PER_SEC 1000000000UL
typedef long int __time_t;
/* POSIX.1b structure for a time value. This is like a `struct timeval' but

View File

@@ -36,7 +36,7 @@ struct timer {
uint64_t timeout;
struct waitq processes;
struct list_head list;
struct process *proc;
struct thread *thread;
};
uint64_t schedule_timeout(uint64_t timeout);

View File

@@ -19,7 +19,7 @@
#include <ihk/lock.h>
#include <list.h>
struct process;
struct thread;
struct waitq_entry;
typedef int (*waitq_func_t)(struct waitq_entry *wait, unsigned mode,
@@ -58,7 +58,7 @@ typedef struct waitq_entry {
}
extern void waitq_init(waitq_t *waitq);
extern void waitq_init_entry(waitq_entry_t *entry, struct process *proc);
extern void waitq_init_entry(waitq_entry_t *entry, struct thread *proc);
extern int waitq_active(waitq_t *waitq);
extern void waitq_add_entry(waitq_t *waitq, waitq_entry_t *entry);
extern void waitq_add_entry_locked(waitq_t *waitq, waitq_entry_t *entry);

View File

@@ -29,6 +29,7 @@
#include <process.h>
#include <init.h>
#include <cls.h>
#include <syscall.h>
//#define IOCTL_FUNC_EXTENSION
#ifdef IOCTL_FUNC_EXTENSION
@@ -40,7 +41,7 @@
#ifdef DEBUG_PRINT_INIT
#define dkprintf kprintf
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
int osnum = 0;
@@ -118,6 +119,27 @@ char *find_command_line(char *name)
return strstr(cmdline, name);
}
static void parse_kargs(void)
{
kprintf("KCommand Line: %s\n", ihk_mc_get_kernel_args());
if (1) {
char *key = "osnum=";
char *p;
p = find_command_line(key);
if (p != NULL) {
p += strlen(key);
osnum = 0;
while (('0' <= *p) && (*p <= '9')) {
osnum *= 10;
osnum += *p++ - '0';
}
kprintf("osnum: %d\n", osnum);
}
}
}
void pc_init(void)
{
int i;
@@ -134,15 +156,6 @@ void pc_init(void)
APT_TYPE_STALL, APT_TYPE_CYCLE }, // not updated for KNC
};
p = find_command_line("osnum=");
if (p != NULL) {
while (('0' <= *p) && (*p <= '9')) {
osnum *= 10;
osnum += *p++ - '0';
}
}
dkprintf("osnum: %d\n", osnum);
if (!(p = find_command_line("perfctr"))) {
dkprintf("perfctr not initialized.\n");
@@ -187,12 +200,36 @@ static void pc_test(void)
ed[1] - st[1], ed[2] - st[2], ed[3] - st[3]);
}
extern void ihk_mc_get_boot_time(unsigned long *tv_sec, unsigned long *tv_nsec);
extern unsigned long ihk_mc_get_ns_per_tsc(void);
static void time_init(void)
{
unsigned long tv_sec, tv_nsec;
unsigned long ns_per_kclock;
ihk_mc_get_boot_time(&tv_sec, &tv_nsec);
ns_per_kclock = ihk_mc_get_ns_per_tsc();
tod_data.origin.tv_sec = tv_sec;
tod_data.origin.tv_nsec = tv_nsec;
if (ns_per_kclock) {
tod_data.clocks_per_sec = (1000L * NS_PER_SEC) / ns_per_kclock;
}
if (!ns_per_kclock) {
gettime_local_support = 0;
}
if (gettime_local_support) {
tod_data.do_local = 1;
}
return;
}
static void rest_init(void)
{
char *cmdline;
cmdline = ihk_mc_get_kernel_args();
kprintf("KCommand Line: %s\n", cmdline);
handler_init();
#ifdef USE_DMA
@@ -203,14 +240,19 @@ static void rest_init(void)
ap_init();
cpu_local_var_init();
time_init();
kmalloc_init();
ikc_master_init();
proc_init();
sched_init();
}
int host_ikc_inited = 0;
extern int num_processors;
extern void zero_tsc(void);
static void post_init(void)
{
@@ -228,7 +270,14 @@ static void post_init(void)
init_host_syscall_channel2();
ihk_mc_spinlock_init(&syscall_lock);
}
/* Zero TSC.
* All AP cores are wait spinning for ap_start() and they will zero
* their TSC immediatly. */
zero_tsc();
ap_start();
create_os_procfs_files();
}
#ifdef DCFA_RUN
extern void user_main();
@@ -247,6 +296,14 @@ int main(void)
arch_init();
/*
* In attached-mic,
* bootparam is not mapped until arch_init() is finished.
* In builtin-mic and builtin-x86,
* virtual address of bootparam is changed in arch_init().
*/
parse_kargs();
mem_init();
rest_init();

View File

@@ -24,7 +24,7 @@
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif

View File

@@ -44,7 +44,7 @@
#define dkprintf(...) kprintf(__VA_ARGS__)
#define ekprintf(...) kprintf(__VA_ARGS__)
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#define ekprintf(...) kprintf(__VA_ARGS__)
#endif
@@ -56,6 +56,8 @@ extern int ihk_mc_pt_print_pte(struct page_table *pt, void *virt);
struct tlb_flush_entry tlb_flush_vector[IHK_TLB_FLUSH_IRQ_VECTOR_SIZE];
int anon_on_demand = 0;
static void reserve_pages(unsigned long start, unsigned long end, int type)
{
if (start < pa_start) {
@@ -171,8 +173,8 @@ static struct ihk_mc_interrupt_handler query_free_mem_handler = {
};
void set_signal(int sig, void *regs, struct siginfo *info);
void check_signal(unsigned long rc, void *regs);
int gencore(struct process *, void *, struct coretable **, int *);
void check_signal(unsigned long, void *, int);
int gencore(struct thread *, void *, struct coretable **, int *);
void freecore(struct coretable **);
/**
@@ -182,14 +184,14 @@ void freecore(struct coretable **);
* \param regs A pointer to a x86_regs structure.
*/
void coredump(struct process *proc, void *regs)
void coredump(struct thread *thread, void *regs)
{
struct syscall_request request IHK_DMA_ALIGN;
int ret;
struct coretable *coretable;
int chunks;
ret = gencore(proc, regs, &coretable, &chunks);
ret = gencore(thread, regs, &coretable, &chunks);
if (ret != 0) {
dkprintf("could not generate a core file image\n");
return;
@@ -198,8 +200,7 @@ void coredump(struct process *proc, void *regs)
request.args[0] = chunks;
request.args[1] = virt_to_phys(coretable);
/* no data for now */
ret = do_syscall(&request, proc->uctx,
proc->cpu_id, proc->ftn->pid);
ret = do_syscall(&request, thread->cpu_id, thread->proc->pid);
if (ret == 0) {
kprintf("dumped core.\n");
} else {
@@ -208,14 +209,14 @@ void coredump(struct process *proc, void *regs)
freecore(&coretable);
}
static void unhandled_page_fault(struct process *proc, void *fault_addr, void *regs)
static void unhandled_page_fault(struct thread *thread, void *fault_addr, void *regs)
{
const uintptr_t address = (uintptr_t)fault_addr;
struct process_vm *vm = proc->vm;
struct process_vm *vm = thread->vm;
struct vm_range *range;
char found;
int irqflags;
unsigned long error = ((struct x86_regs *)regs)->error;
unsigned long irqflags;
unsigned long error = ((struct x86_user_context *)regs)->gpr.error;
irqflags = kprintf_lock();
dkprintf("[%d] Page fault for 0x%lX\n",
@@ -234,7 +235,7 @@ static void unhandled_page_fault(struct process *proc, void *fault_addr, void *r
found = 1;
dkprintf("address is in range, flag: 0x%X! \n",
range->flag);
ihk_mc_pt_print_pte(vm->page_table, (void*)address);
ihk_mc_pt_print_pte(vm->address_space->page_table, (void*)address);
break;
}
}
@@ -365,33 +366,62 @@ void tlb_flush_handler(int vector)
static void page_fault_handler(void *fault_addr, uint64_t reason, void *regs)
{
struct process *proc = cpu_local_var(current);
struct thread *thread = cpu_local_var(current);
int error;
dkprintf("[%d]page_fault_handler(%p,%lx,%p)\n",
ihk_mc_get_processor_id(), fault_addr, reason, regs);
error = page_fault_process(proc, fault_addr, reason);
preempt_disable();
cpu_enable_interrupt();
error = page_fault_process_vm(thread->vm, fault_addr, reason);
if (error) {
struct siginfo info;
if (error == -ECANCELED) {
dkprintf("process is exiting, terminate.\n");
preempt_enable();
terminate(0, SIGSEGV);
// no return
}
kprintf("[%d]page_fault_handler(%p,%lx,%p):"
"fault proc failed. %d\n",
"fault vm failed. %d, TID: %d\n",
ihk_mc_get_processor_id(), fault_addr,
reason, regs, error);
unhandled_page_fault(proc, fault_addr, regs);
reason, regs, error, thread->tid);
unhandled_page_fault(thread, fault_addr, regs);
preempt_enable();
memset(&info, '\0', sizeof info);
if (error == -ERANGE) {
info.si_signo = SIGBUS;
info.si_code = BUS_ADRERR;
info._sifields._sigfault.si_addr = fault_addr;
set_signal(SIGBUS, regs, &info);
}
else {
struct process_vm *vm = thread->vm;
struct vm_range *range;
info.si_signo = SIGSEGV;
info.si_code = SEGV_MAPERR;
list_for_each_entry(range, &vm->vm_range_list, list) {
if (range->start <= (unsigned long)fault_addr && range->end > (unsigned long)fault_addr) {
info.si_code = SEGV_ACCERR;
break;
}
}
info._sifields._sigfault.si_addr = fault_addr;
set_signal(SIGSEGV, regs, &info);
}
check_signal(0, regs);
check_signal(0, regs, 0);
goto out;
}
error = 0;
preempt_enable();
out:
dkprintf("[%d]page_fault_handler(%p,%lx,%p): (%d)\n",
ihk_mc_get_processor_id(), fault_addr, reason,
@@ -648,6 +678,11 @@ void mem_init(void)
/* Prepare the kernel virtual map space */
virtual_allocator_init();
if (find_command_line("anon_on_demand")) {
kprintf("Demand paging on ANONYMOUS mappings enabled.\n");
anon_on_demand = 1;
}
}
struct location {
@@ -839,12 +874,10 @@ int memcheckall()
struct alloc *ap;
int r = 0;
kprintf("memcheckall\n");
for(i = 0; i < HASHNUM; i++)
for(ap = allochash[i]; ap; ap = ap->next)
if(ap->p)
r |= _memcheck(ap->p + 1, "memcheck", NULL, 0, 2);
kprintf("done\n");
return r;
}
@@ -942,8 +975,11 @@ void *___kmalloc(int size, enum ihk_mc_ap_flag flag)
>> PAGE_SHIFT;
h = allocate_pages(req_page, flag);
if(h == NULL)
if(h == NULL) {
kprintf("kmalloc(%#x,%#x): out of memory\n", size, flag);
ihk_mc_spinlock_unlock(&v->free_list_lock, flags);
return NULL;
}
h->check = 0x5a5a5a5a;
prev->next = h;
h->size = (req_page * PAGE_SIZE) / sizeof(*h) - 2;

File diff suppressed because it is too large Load Diff

View File

@@ -47,6 +47,9 @@ static void create_proc_procfs_file(int pid, char *fname, int mode, int cpuid);
static void delete_proc_procfs_file(int pid, char *fname);
static void operate_proc_procfs_file(int pid, char *fname, int msg, int mode, int cpuid);
int copy_from_user(void *dst, const void *src, size_t siz);
int copy_to_user(void *dst, const void *src, size_t siz);
/**
* \brief Create all procfs files for process.
*
@@ -63,9 +66,21 @@ void create_proc_procfs_files(int pid, int cpuid)
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/auxv", osnum, pid);
create_proc_procfs_file(pid, fname, 0400, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/cmdline", osnum, pid);
create_proc_procfs_file(pid, fname, 0444, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/mem", osnum, pid);
create_proc_procfs_file(pid, fname, 0400, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid);
create_proc_procfs_file(pid, fname, 0444, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid);
create_proc_procfs_file(pid, fname, 0444, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/status", osnum, pid);
create_proc_procfs_file(pid, fname, 0444, cpuid);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/task/%d/mem", osnum, pid, pid);
create_proc_procfs_file(pid, fname, 0400, cpuid);
@@ -116,6 +131,18 @@ void delete_proc_procfs_files(int pid)
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/mem", osnum, pid);
delete_proc_procfs_file(pid, fname);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/maps", osnum, pid);
delete_proc_procfs_file(pid, fname);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/status", osnum, pid);
delete_proc_procfs_file(pid, fname);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/pagemap", osnum, pid);
delete_proc_procfs_file(pid, fname);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/cmdline", osnum, pid);
delete_proc_procfs_file(pid, fname);
snprintf(fname, PROCFS_NAME_MAX, "mcos%d/%d/auxv", osnum, pid);
delete_proc_procfs_file(pid, fname);
@@ -139,6 +166,42 @@ static void delete_proc_procfs_file(int pid, char *fname)
dprintf("delete procfs file: %s done\n", fname);
}
/**
* \brief create a procfs file for this operating system
* \param fname relative path name from "host:/proc".
* \param mode permissions of the file to be created
*
* Though operate_proc_procfs_file() is intended to create a process
* specific file, it is reused to create a OS specific file by
* specifying -1 as the pid parameter.
*/
static void create_os_procfs_file(char *fname, int mode)
{
const pid_t pid = -1;
const int msg = SCD_MSG_PROCFS_CREATE;
const int cpuid = ihk_mc_get_processor_id(); /* i.e. BSP */
operate_proc_procfs_file(pid, fname, msg, mode, cpuid);
return;
}
/**
* \brief create all procfs files for this operating system
*/
void create_os_procfs_files(void)
{
char *fname = NULL;
size_t n;
fname = kmalloc(PROCFS_NAME_MAX, IHK_MC_AP_CRITICAL);
n = snprintf(fname, PROCFS_NAME_MAX, "mcos%d/stat", osnum);
if (n >= PROCFS_NAME_MAX) panic("/proc/stat");
create_os_procfs_file(fname, 0444);
return;
}
/**
* \brief Create/delete a procfs file for process.
*
@@ -194,14 +257,18 @@ static void operate_proc_procfs_file(int pid, char *fname, int msg, int mode, in
void process_procfs_request(unsigned long rarg)
{
unsigned long parg, pbuf;
struct process *proc = cpu_local_var(current);
struct thread *thread = cpu_local_var(current);
struct process *proc = thread->proc;
struct procfs_read *r;
struct ikc_scd_packet packet;
int rosnum, ret, pid, tid, ans = -EIO, eof = 0;
char *buf, *p;
struct ihk_ikc_channel_desc *syscall_channel;
ihk_spinlock_t *savelock;
unsigned long irqstate;
struct mcs_rwlock_node_irqsave lock;
unsigned long offset;
int count;
int npages;
int is_current = 1; /* is 'proc' same as 'current'? */
dprintf("process_procfs_request: invoked.\n");
@@ -221,7 +288,9 @@ void process_procfs_request(unsigned long rarg)
dprintf("remote pbuf: %x\n", r->pbuf);
pbuf = ihk_mc_map_memory(NULL, r->pbuf, r->count);
dprintf("pbuf: %x\n", pbuf);
buf = ihk_mc_map_virtual(pbuf, 1, PTATTR_WRITABLE | PTATTR_ACTIVE);
count = r->count + ((uintptr_t)pbuf & (PAGE_SIZE - 1));
npages = (count + (PAGE_SIZE - 1)) / PAGE_SIZE;
buf = ihk_mc_map_virtual(pbuf, npages, PTATTR_WRITABLE | PTATTR_ACTIVE);
dprintf("buf: %p\n", buf);
if (buf == NULL) {
kprintf("ERROR: process_procfs_request: got a null buffer.\n");
@@ -229,6 +298,8 @@ void process_procfs_request(unsigned long rarg)
goto bufunavail;
}
count = r->count;
offset = r->offset;
dprintf("fname: %s, offset: %lx, count:%d.\n", r->fname, r->offset, r->count);
/*
@@ -265,23 +336,62 @@ void process_procfs_request(unsigned long rarg)
*/
ret = sscanf(p, "%d/", &pid);
if (ret == 1) {
if (pid != cpu_local_var(current)->ftn->pid) {
if (pid != cpu_local_var(current)->proc->pid) {
/* We are not located in the proper cpu for some reason. */
dprintf("mismatched pid. We are %d, but requested pid is %d.\n",
pid, cpu_local_var(current)->pid);
if ((proc = findthread_and_lock(pid, tid, &savelock, &irqstate))){
/* The target process has gone by migration. */
r->newcpu = proc->cpu_id;
dprintf("expected cpu id is %d.\n", proc->cpu_id);
process_unlock(savelock, irqstate);
ans = 0;
} else {
tid = pid; /* main thread */
thread = find_thread(pid, tid, &lock);
if (!thread) {
dprintf("We cannot find the proper cpu for requested pid.\n");
goto end;
}
else if (thread->cpu_id != ihk_mc_get_processor_id()) {
/* The target process has gone by migration. */
r->newcpu = thread->cpu_id;
dprintf("expected cpu id is %d.\n", thread->cpu_id);
thread_unlock(thread, &lock);
ans = 0;
goto end;
}
else {
thread_unlock(thread, &lock);
/* 'proc' is not 'current' */
is_current = 0;
}
proc = thread->proc;
}
}
else if (!strcmp(p, "stat")) { /* "/proc/stat" */
extern int num_processors; /* kernel/ap.c */
char *p;
size_t remain;
int cpu;
if (offset > 0) {
ans = 0;
eof = 1;
goto end;
}
} else {
p = buf;
remain = count;
for (cpu = 0; cpu < num_processors; ++cpu) {
size_t n;
n = snprintf(p, remain, "cpu%d\n", cpu);
if (n >= remain) {
ans = -ENOSPC;
eof = 1;
goto end;
}
p += n;
}
ans = p - buf;
eof = 1;
goto end;
}
else {
goto end;
}
dprintf("matched PID: %d.\n", pid);
@@ -297,19 +407,214 @@ void process_procfs_request(unsigned long rarg)
struct vm_range *range;
struct process_vm *vm = proc->vm;
list_for_each_entry(range, &vm->vm_range_list, list) {
dprintf("range: %lx - %lx\n", range->start, range->end);
if ((range->start <= r->offset) &&
(r->offset < range->end)) {
unsigned int len = r->count;
if (range->end < r->offset + r->count) {
len = range->end - r->offset;
if (!is_current) {
uint64_t reason = PF_POPULATE | PF_WRITE | PF_USER;
unsigned long offset = r->offset;
unsigned long left = r->count;
int ret;
ans = 0;
if(left == 0)
goto end;
while(left){
unsigned long pa;
char *va;
int pos = offset & (PAGE_SIZE - 1);
int size = PAGE_SIZE - pos;
if(size > left)
size = left;
ret = page_fault_process_vm(proc->vm,
(void *)offset, reason);
if(ret){
if(ans == 0)
ans = -EIO;
goto end;
}
memcpy((void *)buf, (void *)range->start, len);
ans = len;
ret = ihk_mc_pt_virt_to_phys(vm->address_space->page_table,
(void *)offset, &pa);
if(ret){
if(ans == 0)
ans = -EIO;
goto end;
}
va = phys_to_virt(pa);
memcpy(buf + ans, va, size);
offset += size;
left -= size;
ans += size;
}
}
else{
unsigned long offset = r->offset;
unsigned long left = r->count;
unsigned long pos;
unsigned long l;
ans = 0;
list_for_each_entry(range, &vm->vm_range_list, list) {
dprintf("range: %lx - %lx\n", range->start, range->end);
while (left &&
(range->start <= offset) &&
(offset < range->end)) {
pos = offset & (PAGE_SIZE - 1);
l = PAGE_SIZE - pos;
if(l > left)
l = left;
if(copy_from_user(buf, (void *)offset, l)){
if(ans == 0)
ans = -EIO;
goto end;
}
buf += l;
ans += l;
offset += l;
left -= l;
}
}
}
goto end;
}
/*
* mcos%d/PID/maps
*/
if (strcmp(p, "maps") == 0) {
struct vm_range *range;
struct process_vm *vm = proc->vm;
int left = r->count - 1; /* extra 1 for terminating NULL */
int written = 0;
char *_buf = buf;
/* Starting from the middle of a proc file is not supported for maps */
if (offset > 0) {
ans = 0;
eof = 1;
goto end;
}
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
list_for_each_entry(range, &vm->vm_range_list, list) {
int written_now;
/* format is (from man proc):
* address perms offset dev inode pathname
* 08048000-08056000 r-xp 00000000 03:0c 64593 /usr/sbin/gpm
*/
written_now = snprintf(_buf, left,
"%lx-%lx %s%s%s%s %lx %lx:%lx %d %s\n",
range->start, range->end,
range->flag & VR_PROT_READ ? "r" : "-",
range->flag & VR_PROT_WRITE ? "w" : "-",
range->flag & VR_PROT_EXEC ? "x" : "-",
range->flag & VR_PRIVATE ? "p" : "s",
/* TODO: fill in file details! */
0UL,
0UL,
0UL,
0,
""
);
left -= written_now;
_buf += written_now;
written += written_now;
if (left == 0) {
kprintf("%s(): WARNING: buffer too small to fill proc/maps\n",
__FUNCTION__);
break;
}
}
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
ans = written + 1;
eof = 1;
goto end;
}
/*
* mcos%d/PID/pagemap
*/
if (strcmp(p, "pagemap") == 0) {
struct process_vm *vm = proc->vm;
uint64_t *_buf = (uint64_t *)buf;
uint64_t start, end;
if (offset < PAGE_SIZE) {
kprintf("WARNING: /proc/pagemap queried for NULL page\n");
ans = 0;
goto end;
}
/* Check alignment */
if ((offset % sizeof(uint64_t) != 0) ||
(count % sizeof(uint64_t) != 0)) {
ans = 0;
eof = 1;
goto end;
}
start = (offset / sizeof(uint64_t)) << PAGE_SHIFT;
end = start + ((count / sizeof(uint64_t)) << PAGE_SHIFT);
ihk_mc_spinlock_lock_noirq(&vm->memory_range_lock);
while (start < end) {
*_buf = ihk_mc_pt_virt_to_pagemap(proc->vm->address_space->page_table, start);
dprintf("PID: %d, /proc/pagemap: 0x%lx -> %lx\n", proc->proc->pid,
start, *_buf);
start += PAGE_SIZE;
++_buf;
}
ihk_mc_spinlock_unlock_noirq(&vm->memory_range_lock);
dprintf("/proc/pagemap: 0x%lx - 0x%lx, count: %d\n",
start, end, count);
ans = count;
goto end;
}
/*
* mcos%d/PID/status
*/
if (strcmp(p, "status") == 0) {
struct vm_range *range;
unsigned long lockedsize = 0;
char tmp[1024];
int len;
ihk_mc_spinlock_lock_noirq(&proc->vm->memory_range_lock);
list_for_each_entry(range, &proc->vm->vm_range_list, list) {
if(range->flag & VR_LOCKED)
lockedsize += range->end - range->start;
}
ihk_mc_spinlock_unlock_noirq(&proc->vm->memory_range_lock);
sprintf(tmp,
"Uid:\t%d\t%d\t%d\t%d\n"
"Gid:\t%d\t%d\t%d\t%d\n"
"VmLck:\t%9lu kB\n",
proc->ruid, proc->euid, proc->suid, proc->fsuid,
proc->rgid, proc->egid, proc->sgid, proc->fsgid,
(lockedsize + 1023) >> 10);
len = strlen(tmp);
if (r->offset < len) {
if (r->offset + r->count < len) {
ans = r->count;
} else {
eof = 1;
ans = len;
}
strncpy(buf, tmp + r->offset, ans);
} else if (r->offset == len) {
ans = 0;
eof = 1;
}
goto end;
}
@@ -335,6 +640,35 @@ void process_procfs_request(unsigned long rarg)
goto end;
}
/*
* mcos%d/PID/cmdline
*/
if (strcmp(p, "cmdline") == 0) {
unsigned int limit = proc->saved_cmdline_len;
unsigned int len = r->count;
if(!proc->saved_cmdline){
ans = 0;
eof = 1;
goto end;
}
if (r->offset < limit) {
if (limit < r->offset + r->count) {
len = limit - r->offset;
}
memcpy((void *)buf, ((char *) proc->saved_cmdline) + r->offset, len);
ans = len;
if (r->offset + len == limit) {
eof = 1;
}
} else if (r->offset == limit) {
ans = 0;
eof = 1;
}
goto end;
}
/*
* mcos%d/PID/taks/PID/mem
*
@@ -351,6 +685,9 @@ void process_procfs_request(unsigned long rarg)
struct vm_range *range;
struct process_vm *vm = proc->vm;
if (!is_current) {
goto end;
}
if (pid != tid) {
/* We are not multithreaded yet. */
goto end;
@@ -375,7 +712,7 @@ void process_procfs_request(unsigned long rarg)
char tmp[1024];
int len;
if ((proc = findthread_and_lock(pid, tid, &savelock, &irqstate))){
if ((thread = find_thread(pid, tid, &lock))){
dprintf("thread found! pid=%d tid=%d\n", pid, tid);
/*
* pid (comm) state ppid
@@ -411,10 +748,10 @@ void process_procfs_request(unsigned long rarg)
0L, 0L, 0L, 0L, // rsslim...
0L, 0L, 0L, 0L, // kstkesp...
0L, 0L, 0L, 0L, // sigignore...
0L, 0, proc->cpu_id, 0, // cnswap...
0L, 0, thread->cpu_id, 0, // cnswap...
0, 0LL, 0L, 0L // policy...
);
process_unlock(savelock, irqstate);
thread_unlock(thread, &lock);
dprintf("tmp=%s\n", tmp);
len = strlen(tmp);
@@ -445,7 +782,7 @@ void process_procfs_request(unsigned long rarg)
*/
dprintf("could not find a matching entry for %s.\n", p);
end:
ihk_mc_unmap_virtual(buf, 1, 0);
ihk_mc_unmap_virtual(buf, npages, 0);
dprintf("ret: %d, eof: %d\n", ans, eof);
r->ret = ans;
r->eof = eof;

View File

@@ -0,0 +1,24 @@
#!/bin/sh
cp $1 $2
exit 0
#set -e
#
#O=`pwd`
#
#make -C $3/../arch/x86/kboot O=$O clean
##make -C $3/../kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x3a001000
#make -C $3/../arch/x86/kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x101001000
##make -C $3/../kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x901001000
#
#make -C $3/../arch/x86/elfboot O=$O clean
#make -C $3/../arch/x86/elfboot O=$O
#
#cat elfboot/elfboot kboot/kboot.elf > $2
#
#make -C $3/../arch/x86/kboot O=$O clean
##make -C $3/../kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x3a001000
#make -C $3/../arch/x86/kboot O=$O KIMAGE="$O/$1" LOAD_PA=0x201001000
#cat elfboot/elfboot kboot/kboot.elf > $2.8G

View File

@@ -3,7 +3,8 @@
* License details are found in the file LICENSE.
* \brief
* shared memory object
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 - 2015 RIKEN AICS
*/
/*
* HISTORY:
@@ -26,21 +27,19 @@
#define ekprintf(...) kprintf(__VA_ARGS__)
#define fkprintf(...) kprintf(__VA_ARGS__)
struct shmobj {
struct memobj memobj; /* must be first */
long ref;
struct shmid_ds ds;
struct list_head page_list;
};
static LIST_HEAD(shmobj_list_head);
static ihk_spinlock_t shmobj_list_lock_body = SPIN_LOCK_UNLOCKED;
static memobj_release_func_t shmobj_release;
static memobj_ref_func_t shmobj_ref;
static memobj_get_page_func_t shmobj_get_page;
static memobj_invalidate_page_func_t shmobj_invalidate_page;
static struct memobj_ops shmobj_ops = {
.release = &shmobj_release,
.ref = &shmobj_ref,
.get_page = &shmobj_get_page,
.invalidate_page = &shmobj_invalidate_page,
};
static struct shmobj *to_shmobj(struct memobj *memobj)
@@ -98,6 +97,25 @@ static struct page *page_list_first(struct shmobj *obj)
return list_first_entry(&obj->page_list, struct page, list);
}
/***********************************************************************
* shmobj_list
*/
void shmobj_list_lock(void)
{
ihk_mc_spinlock_lock_noirq(&shmobj_list_lock_body);
return;
}
void shmobj_list_unlock(void)
{
ihk_mc_spinlock_unlock_noirq(&shmobj_list_lock_body);
return;
}
/***********************************************************************
* operations
*/
int the_seq = 0;
int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
{
struct shmobj *obj = NULL;
@@ -114,8 +132,11 @@ int shmobj_create(struct shmid_ds *ds, struct memobj **objp)
memset(obj, 0, sizeof(*obj));
obj->memobj.ops = &shmobj_ops;
obj->ref = 1;
obj->ds = *ds;
obj->ds.shm_perm.seq = the_seq++;
obj->ds.shm_nattch = 1;
obj->index = -1;
obj->real_segsz = (obj->ds.shm_segsz + PAGE_SIZE - 1) & PAGE_MASK;
page_list_init(obj);
ihk_mc_spinlock_init(&obj->memobj.lock);
@@ -127,65 +148,124 @@ out:
if (obj) {
kfree(obj);
}
dkprintf("shmobj_create(%p %#lx,%p):%d %p\n",
dkprintf("shmobj_create_indexed(%p %#lx,%p):%d %p\n",
ds, ds->shm_segsz, objp, error, *objp);
return error;
}
int shmobj_create_indexed(struct shmid_ds *ds, struct shmobj **objp)
{
int error;
struct memobj *obj;
error = shmobj_create(ds, &obj);
if (!error) {
obj->flags |= MF_SHMDT_OK | MF_IS_REMOVABLE;
*objp = to_shmobj(obj);
}
return error;
}
void shmobj_destroy(struct shmobj *obj)
{
extern struct shm_info the_shm_info;
extern struct list_head kds_free_list;
extern int the_maxi;
dkprintf("shmobj_destroy(%p [%d %o])\n", obj, obj->index, obj->ds.shm_perm.mode);
/* zap page_list */
for (;;) {
struct page *page;
int count;
page = page_list_first(obj);
if (!page) {
break;
}
page_list_remove(obj, page);
dkprintf("shmobj_destroy(%p):"
"release page. %p %#lx %d %d",
obj, page, page_to_phys(page),
page->mode, page->count);
count = ihk_atomic_sub_return(1, &page->count);
if (!((page->mode == PM_MAPPED) && (count == 0))) {
fkprintf("shmobj_destroy(%p): "
"page %p phys %#lx mode %#x"
" count %d off %#lx\n",
obj, page,
page_to_phys(page),
page->mode, count,
page->offset);
panic("shmobj_release");
}
/* XXX:NYI: large pages */
page->mode = PM_NONE;
free_pages(phys_to_virt(page_to_phys(page)), 1);
}
if (obj->index < 0) {
kfree(obj);
}
else {
list_del(&obj->chain);
--the_shm_info.used_ids;
list_add(&obj->chain, &kds_free_list);
for (;;) {
struct shmobj *p;
list_for_each_entry(p, &kds_free_list, chain) {
if (p->index == the_maxi) {
break;
}
}
if (&p->chain == &kds_free_list) {
break;
}
list_del(&p->chain);
kfree(p);
--the_maxi;
}
}
return;
}
static void shmobj_release(struct memobj *memobj)
{
struct shmobj *obj = to_shmobj(memobj);
struct shmobj *freeobj = NULL;
long newref;
extern time_t time(void);
extern pid_t getpid(void);
dkprintf("shmobj_release(%p)\n", memobj);
memobj_lock(&obj->memobj);
--obj->ref;
if (obj->ref <= 0) {
if (obj->ref < 0) {
if (obj->index >= 0) {
obj->ds.shm_dtime = time();
obj->ds.shm_lpid = getpid();
dkprintf("shmobj_release:drop shm_nattach %p %d\n", obj, obj->ds.shm_nattch);
}
newref = --obj->ds.shm_nattch;
if (newref <= 0) {
if (newref < 0) {
fkprintf("shmobj_release(%p):ref %ld\n",
memobj, obj->ref);
memobj, newref);
panic("shmobj_release:freeing free shmobj");
}
freeobj = obj;
if (obj->ds.shm_perm.mode & SHM_DEST) {
freeobj = obj;
}
}
memobj_unlock(&obj->memobj);
if (freeobj) {
/* zap page_list */
for (;;) {
struct page *page;
int count;
page = page_list_first(obj);
if (!page) {
break;
}
page_list_remove(obj, page);
dkprintf("shmobj_release(%p):"
"release page. %p %#lx %d %d",
memobj, page, page_to_phys(page),
page->mode, page->count);
count = ihk_atomic_sub_return(1, &page->count);
if (!((page->mode == PM_MAPPED) && (count == 0))) {
fkprintf("shmobj_release(%p): "
"page %p phys %#lx mode %#x"
" count %d off %#lx\n",
memobj, page,
page_to_phys(page),
page->mode, count,
page->offset);
panic("shmobj_release");
}
/* XXX:NYI: large pages */
page->mode = PM_NONE;
free_pages(phys_to_virt(page_to_phys(page)), 1);
}
dkprintf("shmobj_release(%p):free shmobj", memobj);
kfree(freeobj);
shmobj_list_lock();
shmobj_destroy(freeobj);
shmobj_list_unlock();
}
dkprintf("shmobj_release(%p):\n", memobj);
dkprintf("shmobj_release(%p): %ld\n", memobj, newref);
return;
}
@@ -193,17 +273,23 @@ static void shmobj_ref(struct memobj *memobj)
{
struct shmobj *obj = to_shmobj(memobj);
long newref;
extern time_t time(void);
extern pid_t getpid(void);
dkprintf("shmobj_ref(%p)\n", memobj);
memobj_lock(&obj->memobj);
newref = ++obj->ref;
newref = ++obj->ds.shm_nattch;
if (obj->index >= 0) {
obj->ds.shm_atime = time();
obj->ds.shm_lpid = getpid();
}
memobj_unlock(&obj->memobj);
dkprintf("shmobj_ref(%p): newref %ld\n", memobj, newref);
return;
}
static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
uintptr_t *physp)
uintptr_t *physp, unsigned long *pflag)
{
struct shmobj *obj = to_shmobj(memobj);
int error;
@@ -227,13 +313,13 @@ static int shmobj_get_page(struct memobj *memobj, off_t off, int p2align,
memobj, off, p2align, physp, error);
goto out;
}
if (obj->ds.shm_segsz <= off) {
if (obj->real_segsz <= off) {
error = -ERANGE;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):beyond the end. %d\n",
memobj, off, p2align, physp, error);
goto out;
}
if ((obj->ds.shm_segsz - off) < (PAGE_SIZE << p2align)) {
if ((obj->real_segsz - off) < (PAGE_SIZE << p2align)) {
error = -ENOSPC;
ekprintf("shmobj_get_page(%p,%#lx,%d,%p):too large. %d\n",
memobj, off, p2align, physp, error);
@@ -285,3 +371,30 @@ out:
memobj, off, p2align, physp, error);
return error;
}
static int shmobj_invalidate_page(struct memobj *memobj, uintptr_t phys,
size_t pgsize)
{
struct shmobj *obj = to_shmobj(memobj);
int error;
struct page *page;
dkprintf("shmobj_invalidate_page(%p,%#lx,%#lx)\n", memobj, phys, pgsize);
if (!(page = phys_to_page(phys))
|| !(page = page_list_lookup(obj, page->offset))) {
error = 0;
goto out;
}
if (ihk_atomic_read(&page->count) == 1) {
if (page_unmap(page)) {
ihk_mc_free_pages(phys_to_virt(phys), pgsize/PAGE_SIZE);
}
}
error = 0;
out:
dkprintf("shmobj_invalidate_page(%p,%#lx,%#lx):%d\n", memobj, phys, pgsize, error);
return error;
}

File diff suppressed because it is too large Load Diff

View File

@@ -38,10 +38,10 @@
#ifdef DEBUG_PRINT_TIMER
#define dkprintf kprintf
#else
#define dkprintf(...)
#define dkprintf(...) do { if (0) kprintf(__VA_ARGS__); } while (0)
#endif
#define LOOP_TIMEOUT 10
#define LOOP_TIMEOUT 500
struct list_head timers;
ihk_spinlock_t timers_lock;
@@ -57,23 +57,25 @@ uint64_t schedule_timeout(uint64_t timeout)
{
struct waitq_entry my_wait;
struct timer my_timer;
struct process *proc = cpu_local_var(current);
struct thread *thread = cpu_local_var(current);
int irqstate;
int spin_sleep;
ihk_mc_spinlock_lock_noirq(&proc->spin_sleep_lock);
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
dkprintf("schedule_timeout() spin sleep timeout: %lu\n", timeout);
proc->spin_sleep = 1;
ihk_mc_spinlock_unlock_noirq(&proc->spin_sleep_lock);
spin_sleep = ++thread->spin_sleep;
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
/* Spin sleep.. */
for (;;) {
uint64_t t_s = rdtsc();
uint64_t t_e;
int spin_over = 0;
ihk_mc_spinlock_lock_noirq(&proc->spin_sleep_lock);
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
/* Woken up by someone? */
if (!proc->spin_sleep) {
if (thread->spin_sleep < 1) {
t_e = rdtsc();
spin_over = 1;
@@ -85,32 +87,76 @@ uint64_t schedule_timeout(uint64_t timeout)
}
}
ihk_mc_spinlock_unlock_noirq(&proc->spin_sleep_lock);
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
t_s = rdtsc();
if (!spin_over) {
t_s = rdtsc();
int need_schedule;
struct cpu_local_var *v = get_this_cpu_local_var();
int irqstate = ihk_mc_spinlock_lock(&(v->runq_lock));
need_schedule = v->runq_len > 1 ? 1 : 0;
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
/* Give a chance to another thread (if any) in case the core is
* oversubscribed, but make sure we will be re-scheduled */
if (need_schedule) {
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
schedule();
xchg4(&(cpu_local_var(current)->status),
PS_INTERRUPTIBLE);
}
else {
/* Spin wait */
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
cpu_pause();
}
if (timeout < LOOP_TIMEOUT) {
timeout = 0;
spin_over = 1;
}
else {
timeout -= LOOP_TIMEOUT;
}
}
}
while ((rdtsc() - t_s) < LOOP_TIMEOUT) {
cpu_pause();
}
if (timeout < LOOP_TIMEOUT) {
timeout = 0;
spin_over = 1;
}
else {
timeout -= LOOP_TIMEOUT;
}
if (spin_over) {
dkprintf("schedule_timeout() spin woken up, timeout: %lu\n",
timeout);
/* Give a chance to another thread (if any) in case we timed out,
* but make sure we will be re-scheduled */
if (timeout == 0) {
int need_schedule;
struct cpu_local_var *v = get_this_cpu_local_var();
int irqstate =
ihk_mc_spinlock_lock(&(v->runq_lock));
need_schedule = v->runq_len > 1 ? 1 : 0;
ihk_mc_spinlock_unlock(&(v->runq_lock), irqstate);
if (need_schedule) {
xchg4(&(cpu_local_var(current)->status), PS_RUNNING);
schedule();
xchg4(&(cpu_local_var(current)->status),
PS_INTERRUPTIBLE);
}
}
irqstate = ihk_mc_spinlock_lock(&thread->spin_sleep_lock);
if (spin_sleep == thread->spin_sleep) {
--thread->spin_sleep;
}
ihk_mc_spinlock_unlock(&thread->spin_sleep_lock, irqstate);
return timeout;
}
}
/* Init waitq and wait entry for this timer */
my_timer.timeout = (timeout < LOOP_TIMEOUT) ? LOOP_TIMEOUT : timeout;
my_timer.proc = cpu_local_var(current);
my_timer.thread = cpu_local_var(current);
waitq_init(&my_timer.processes);
waitq_init_entry(&my_wait, cpu_local_var(current));
@@ -167,7 +213,7 @@ void wake_timers_loop(void)
list_del(&timer->list);
dkprintf("timers timeout occurred, waking up pid: %d\n",
timer->proc->pid);
timer->thread->proc->pid);
waitq_wakeup(&timer->processes);
}

View File

@@ -19,7 +19,7 @@ int
default_wake_function(waitq_entry_t *entry, unsigned mode,
int flags, void *key)
{
return sched_wakeup_process(entry->private, PS_NORMAL);
return sched_wakeup_thread(entry->private, PS_NORMAL);
}
void
@@ -30,7 +30,7 @@ waitq_init(waitq_t *waitq)
}
void
waitq_init_entry(waitq_entry_t *entry, struct process *proc)
waitq_init_entry(waitq_entry_t *entry, struct thread *proc)
{
entry->private = proc;
entry->func = default_wake_function;
@@ -89,14 +89,14 @@ waitq_prepare_to_wait(waitq_t *waitq, waitq_entry_t *entry, int state)
ihk_mc_spinlock_lock_noirq(&waitq->lock);
if (list_empty(&entry->link))
list_add(&entry->link, &waitq->waitq);
cpu_local_var(current)->ftn->status = state;
cpu_local_var(current)->status = state;
ihk_mc_spinlock_unlock_noirq(&waitq->lock);
}
void
waitq_finish_wait(waitq_t *waitq, waitq_entry_t *entry)
{
cpu_local_var(current)->ftn->status = PS_RUNNING;
cpu_local_var(current)->status = PS_RUNNING;
waitq_remove_entry(waitq, entry);
}

View File

@@ -3,7 +3,8 @@
* License details are found in the file LICENSE.
* \brief
* read-only zeroed page object
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com>
* \author Gou Nakamura <go.nakamura.yw@hitachi-solutions.com> \par
* Copyright (C) 2014 RIKEN AICS
*/
/*
* HISTORY:
@@ -165,7 +166,7 @@ out:
}
static int zeroobj_get_page(struct memobj *memobj, off_t off, int p2align,
uintptr_t *physp)
uintptr_t *physp, unsigned long *pflag)
{
int error;
struct zeroobj *obj = to_zeroobj(memobj);

View File

@@ -99,4 +99,9 @@ enum ihk_asr_type {
int ihk_mc_arch_set_special_register(enum ihk_asr_type, unsigned long value);
int ihk_mc_arch_get_special_register(enum ihk_asr_type, unsigned long *value);
extern unsigned int ihk_ikc_irq;
extern unsigned int ihk_ikc_irq_apicid;
extern int gettime_local_support;
#endif

View File

@@ -22,8 +22,8 @@ struct ihk_kmsg_buf {
};
extern int kprintf(const char *format, ...);
extern int kprintf_lock();
extern void kprintf_unlock(int irqflags);
extern unsigned long kprintf_lock(void);
extern void kprintf_unlock(unsigned long irqflags);
extern int __kprintf(const char *format, ...);
extern void panic(const char *msg);

View File

@@ -147,7 +147,8 @@ struct page_table *ihk_mc_pt_create(enum ihk_mc_ap_flag ap_flag);
void ihk_mc_pt_destroy(struct page_table *pt);
void ihk_mc_load_page_table(struct page_table *pt);
int ihk_mc_pt_virt_to_phys(struct page_table *pt,
void *virt, unsigned long *phys);
const void *virt, unsigned long *phys);
uint64_t ihk_mc_pt_virt_to_pagemap(struct page_table *pt, unsigned long virt);
void remote_flush_tlb_cpumask(struct process_vm *vm,
unsigned long addr, int cpu_id);