arm64: Scalable Vector Extension (SVE) support.
Change-Id: I3568687913f583edfaa297d5cf5ac91d319d97e9
This commit is contained in:
committed by
Masamichi Takagi
parent
dac99f708c
commit
07aa96ef95
@@ -1,4 +1,4 @@
|
||||
/* fpsimd.c COPYRIGHT FUJITSU LIMITED 2016-2018 */
|
||||
/* fpsimd.c COPYRIGHT FUJITSU LIMITED 2016-2019 */
|
||||
#include <thread_info.h>
|
||||
#include <fpsimd.h>
|
||||
#include <cpuinfo.h>
|
||||
@@ -11,6 +11,7 @@
|
||||
#include <kmalloc.h>
|
||||
#include <debug.h>
|
||||
#include <process.h>
|
||||
#include <bitmap.h>
|
||||
|
||||
//#define DEBUG_PRINT_FPSIMD
|
||||
|
||||
@@ -21,11 +22,87 @@
|
||||
|
||||
#ifdef CONFIG_ARM64_SVE
|
||||
|
||||
/* Set of available vector lengths, as vq_to_bit(vq): */
|
||||
static DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX);
|
||||
|
||||
/* Maximum supported vector length across all CPUs (initially poisoned) */
|
||||
int sve_max_vl = -1;
|
||||
|
||||
/* Default VL for tasks that don't set it explicitly: */
|
||||
int sve_default_vl = -1;
|
||||
|
||||
/*
|
||||
* Helpers to translate bit indices in sve_vq_map to VQ values (and
|
||||
* vice versa). This allows find_next_bit() to be used to find the
|
||||
* _maximum_ VQ not exceeding a certain value.
|
||||
*/
|
||||
|
||||
static unsigned int vq_to_bit(unsigned int vq)
|
||||
{
|
||||
return SVE_VQ_MAX - vq;
|
||||
}
|
||||
|
||||
static unsigned int bit_to_vq(unsigned int bit)
|
||||
{
|
||||
if (bit >= SVE_VQ_MAX) {
|
||||
bit = SVE_VQ_MAX - 1;
|
||||
}
|
||||
return SVE_VQ_MAX - bit;
|
||||
}
|
||||
|
||||
/*
|
||||
* All vector length selection from userspace comes through here.
|
||||
* We're on a slow path, so some sanity-checks are included.
|
||||
* If things go wrong there's a bug somewhere, but try to fall back to a
|
||||
* safe choice.
|
||||
*/
|
||||
static unsigned int find_supported_vector_length(unsigned int vl)
|
||||
{
|
||||
int bit;
|
||||
int max_vl = sve_max_vl;
|
||||
|
||||
if (!sve_vl_valid(vl)) {
|
||||
vl = SVE_VL_MIN;
|
||||
}
|
||||
|
||||
if (!sve_vl_valid(max_vl)) {
|
||||
max_vl = SVE_VL_MIN;
|
||||
}
|
||||
|
||||
if (vl > max_vl) {
|
||||
vl = max_vl;
|
||||
}
|
||||
|
||||
bit = find_next_bit(sve_vq_map, SVE_VQ_MAX,
|
||||
vq_to_bit(sve_vq_from_vl(vl)));
|
||||
return sve_vl_from_vq(bit_to_vq(bit));
|
||||
}
|
||||
|
||||
static void sve_probe_vqs(DECLARE_BITMAP(map, SVE_VQ_MAX))
|
||||
{
|
||||
unsigned int vq, vl;
|
||||
unsigned long zcr;
|
||||
|
||||
bitmap_zero(map, SVE_VQ_MAX);
|
||||
|
||||
zcr = ZCR_EL1_LEN_MASK;
|
||||
zcr = read_sysreg_s(SYS_ZCR_EL1) & ~zcr;
|
||||
|
||||
for (vq = SVE_VQ_MAX; vq >= SVE_VQ_MIN; --vq) {
|
||||
/* self-syncing */
|
||||
write_sysreg_s(zcr | (vq - 1), SYS_ZCR_EL1);
|
||||
vl = sve_get_vl();
|
||||
/* skip intervening lengths */
|
||||
vq = sve_vq_from_vl(vl);
|
||||
set_bit(vq_to_bit(vq), map);
|
||||
}
|
||||
}
|
||||
|
||||
void sve_init_vq_map(void)
|
||||
{
|
||||
sve_probe_vqs(sve_vq_map);
|
||||
}
|
||||
|
||||
size_t sve_state_size(struct thread const *thread)
|
||||
{
|
||||
unsigned int vl = thread->ctx.thread->sve_vl;
|
||||
@@ -75,19 +152,7 @@ int sve_set_vector_length(struct thread *thread,
|
||||
{
|
||||
struct thread_info *ti = thread->ctx.thread;
|
||||
|
||||
BUG_ON(thread == cpu_local_var(current) && cpu_local_var(no_preempt) == 0);
|
||||
|
||||
/*
|
||||
* To avoid accidents, forbid setting for individual threads of a
|
||||
* multithreaded process. User code that knows what it's doing can
|
||||
* pass PR_SVE_SET_VL_THREAD to override this restriction:
|
||||
*/
|
||||
if (!(flags & PR_SVE_SET_VL_THREAD) && get_nr_threads(thread->proc) != 1) {
|
||||
return -EINVAL;
|
||||
}
|
||||
flags &= ~(unsigned long)PR_SVE_SET_VL_THREAD;
|
||||
|
||||
if (flags & ~(unsigned long)(PR_SVE_SET_VL_INHERIT |
|
||||
if (flags & ~(unsigned long)(PR_SVE_VL_INHERIT |
|
||||
PR_SVE_SET_VL_ONEXEC)) {
|
||||
return -EINVAL;
|
||||
}
|
||||
@@ -96,13 +161,19 @@ int sve_set_vector_length(struct thread *thread,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (vl > sve_max_vl) {
|
||||
BUG_ON(!sve_vl_valid(sve_max_vl));
|
||||
vl = sve_max_vl;
|
||||
/*
|
||||
* Clamp to the maximum vector length that VL-agnostic SVE code can
|
||||
* work with. A flag may be assigned in the future to allow setting
|
||||
* of larger vector lengths without confusing older software.
|
||||
*/
|
||||
if (vl > SVE_VL_ARCH_MAX) {
|
||||
vl = SVE_VL_ARCH_MAX;
|
||||
}
|
||||
|
||||
if (flags & (PR_SVE_SET_VL_ONEXEC |
|
||||
PR_SVE_SET_VL_INHERIT)) {
|
||||
vl = find_supported_vector_length(vl);
|
||||
|
||||
if (flags & (PR_SVE_VL_INHERIT |
|
||||
PR_SVE_SET_VL_ONEXEC)) {
|
||||
ti->sve_vl_onexec = vl;
|
||||
} else {
|
||||
/* Reset VL to system default on next exec: */
|
||||
@@ -114,39 +185,42 @@ int sve_set_vector_length(struct thread *thread,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (vl != ti->sve_vl) {
|
||||
if ((elf_hwcap & HWCAP_SVE)) {
|
||||
fp_regs_struct fp_regs;
|
||||
memset(&fp_regs, 0, sizeof(fp_regs));
|
||||
if (vl == ti->sve_vl) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* for self at prctl syscall */
|
||||
if (thread == cpu_local_var(current)) {
|
||||
save_fp_regs(thread);
|
||||
clear_fp_regs();
|
||||
thread_sve_to_fpsimd(thread, &fp_regs);
|
||||
sve_free(thread);
|
||||
if ((elf_hwcap & HWCAP_SVE)) {
|
||||
fp_regs_struct fp_regs;
|
||||
|
||||
ti->sve_vl = vl;
|
||||
memset(&fp_regs, 0, sizeof(fp_regs));
|
||||
|
||||
sve_alloc(thread);
|
||||
thread_fpsimd_to_sve(thread, &fp_regs);
|
||||
restore_fp_regs(thread);
|
||||
/* for target thread at ptrace */
|
||||
} else {
|
||||
thread_sve_to_fpsimd(thread, &fp_regs);
|
||||
sve_free(thread);
|
||||
/* for self at prctl syscall */
|
||||
if (thread == cpu_local_var(current)) {
|
||||
save_fp_regs(thread);
|
||||
clear_fp_regs();
|
||||
thread_sve_to_fpsimd(thread, &fp_regs);
|
||||
sve_free(thread);
|
||||
|
||||
ti->sve_vl = vl;
|
||||
ti->sve_vl = vl;
|
||||
|
||||
sve_alloc(thread);
|
||||
thread_fpsimd_to_sve(thread, &fp_regs);
|
||||
}
|
||||
sve_alloc(thread);
|
||||
thread_fpsimd_to_sve(thread, &fp_regs);
|
||||
restore_fp_regs(thread);
|
||||
/* for target thread at ptrace */
|
||||
} else {
|
||||
thread_sve_to_fpsimd(thread, &fp_regs);
|
||||
sve_free(thread);
|
||||
|
||||
ti->sve_vl = vl;
|
||||
|
||||
sve_alloc(thread);
|
||||
thread_fpsimd_to_sve(thread, &fp_regs);
|
||||
}
|
||||
}
|
||||
ti->sve_vl = vl;
|
||||
|
||||
out:
|
||||
ti->sve_flags = flags & PR_SVE_SET_VL_INHERIT;
|
||||
ti->sve_flags = flags & PR_SVE_VL_INHERIT;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -156,44 +230,53 @@ out:
|
||||
* Encode the current vector length and flags for return.
|
||||
* This is only required for prctl(): ptrace has separate fields
|
||||
*/
|
||||
static int sve_prctl_status(const struct thread_info *ti)
|
||||
static int sve_prctl_status(unsigned long flags)
|
||||
{
|
||||
int ret = ti->sve_vl;
|
||||
int ret;
|
||||
struct thread_info *ti = cpu_local_var(current)->ctx.thread;
|
||||
|
||||
ret |= ti->sve_flags << 16;
|
||||
if (flags & PR_SVE_SET_VL_ONEXEC) {
|
||||
ret = ti->sve_vl_onexec;
|
||||
}
|
||||
else {
|
||||
ret = ti->sve_vl;
|
||||
}
|
||||
|
||||
if (ti->sve_flags & PR_SVE_VL_INHERIT) {
|
||||
ret |= PR_SVE_VL_INHERIT;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_set_task_vl */
|
||||
int sve_set_thread_vl(struct thread *thread, const unsigned long vector_length,
|
||||
const unsigned long flags)
|
||||
int sve_set_thread_vl(unsigned long arg)
|
||||
{
|
||||
unsigned long vl, flags;
|
||||
int ret;
|
||||
|
||||
if (!(elf_hwcap & HWCAP_SVE)) {
|
||||
vl = arg & PR_SVE_VL_LEN_MASK;
|
||||
flags = arg & ~vl;
|
||||
|
||||
/* Instead of system_supports_sve() */
|
||||
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
BUG_ON(thread != cpu_local_var(current));
|
||||
|
||||
preempt_disable();
|
||||
ret = sve_set_vector_length(thread, vector_length, flags);
|
||||
preempt_enable();
|
||||
|
||||
ret = sve_set_vector_length(cpu_local_var(current), vl, flags);
|
||||
if (ret) {
|
||||
return ret;
|
||||
}
|
||||
return sve_prctl_status(thread->ctx.thread);
|
||||
return sve_prctl_status(flags);
|
||||
}
|
||||
|
||||
/* @ref.impl arch/arm64/kernel/fpsimd.c::sve_get_ti_vl */
|
||||
int sve_get_thread_vl(const struct thread *thread)
|
||||
int sve_get_thread_vl(void)
|
||||
{
|
||||
if (!(elf_hwcap & HWCAP_SVE)) {
|
||||
/* Instead of system_supports_sve() */
|
||||
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
|
||||
return -EINVAL;
|
||||
}
|
||||
return sve_prctl_status(thread->ctx.thread);
|
||||
return sve_prctl_status(0);
|
||||
}
|
||||
|
||||
void do_sve_acc(unsigned int esr, struct pt_regs *regs)
|
||||
@@ -203,25 +286,48 @@ void do_sve_acc(unsigned int esr, struct pt_regs *regs)
|
||||
panic("");
|
||||
}
|
||||
|
||||
void init_sve_vl(void)
|
||||
void sve_setup(void)
|
||||
{
|
||||
extern unsigned long ihk_param_default_vl;
|
||||
uint64_t zcr;
|
||||
|
||||
/* Instead of system_supports_sve() */
|
||||
if (unlikely(!(elf_hwcap & HWCAP_SVE))) {
|
||||
return;
|
||||
}
|
||||
|
||||
zcr = read_system_reg(SYS_ZCR_EL1);
|
||||
BUG_ON(((zcr & ZCR_EL1_LEN_MASK) + 1) * 16 > sve_max_vl);
|
||||
/* init sve_vq_map bitmap */
|
||||
sve_init_vq_map();
|
||||
|
||||
/*
|
||||
* The SVE architecture mandates support for 128-bit vectors,
|
||||
* so sve_vq_map must have at least SVE_VQ_MIN set.
|
||||
* If something went wrong, at least try to patch it up:
|
||||
*/
|
||||
if (!test_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map)) {
|
||||
set_bit(vq_to_bit(SVE_VQ_MIN), sve_vq_map);
|
||||
}
|
||||
|
||||
zcr = read_system_reg(SYS_ZCR_EL1);
|
||||
sve_max_vl = sve_vl_from_vq((zcr & ZCR_EL1_LEN_MASK) + 1);
|
||||
|
||||
/*
|
||||
* Sanity-check that the max VL we determined through CPU features
|
||||
* corresponds properly to sve_vq_map. If not, do our best:
|
||||
*/
|
||||
if (sve_max_vl != find_supported_vector_length(sve_max_vl)) {
|
||||
sve_max_vl = find_supported_vector_length(sve_max_vl);
|
||||
}
|
||||
|
||||
sve_max_vl = ((zcr & ZCR_EL1_LEN_MASK) + 1) * 16;
|
||||
sve_default_vl = ihk_param_default_vl;
|
||||
|
||||
if (sve_default_vl == 0) {
|
||||
kprintf("SVE: Getting default VL = 0 from HOST-Linux.\n");
|
||||
sve_default_vl = sve_max_vl > 64 ? 64 : sve_max_vl;
|
||||
kprintf("SVE: Using default vl(%d byte).\n", sve_default_vl);
|
||||
if (ihk_param_default_vl !=
|
||||
find_supported_vector_length(ihk_param_default_vl)) {
|
||||
kprintf("SVE: Getting unsupported default VL = %d "
|
||||
"from HOST-Linux.\n", sve_default_vl);
|
||||
sve_default_vl = find_supported_vector_length(64);
|
||||
kprintf("SVE: Using default vl(%d byte).\n",
|
||||
sve_default_vl);
|
||||
}
|
||||
|
||||
kprintf("SVE: maximum available vector length %u bytes per vector\n",
|
||||
@@ -232,7 +338,7 @@ void init_sve_vl(void)
|
||||
|
||||
#else /* CONFIG_ARM64_SVE */
|
||||
|
||||
void init_sve_vl(void)
|
||||
void sve_setup(void)
|
||||
{
|
||||
/* nothing to do. */
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user