From e64d89cd48a8a0a9ce605ecc59faeb5a673c7fe2 Mon Sep 17 00:00:00 2001 From: Dominique Martinet Date: Fri, 15 Sep 2017 11:17:38 +0900 Subject: [PATCH] hfi: bases for user_exp_rcv This implements a skeleton setup function and call it on ioctl Many missing points: - missing pci mapping to make setup work - no clear (passed to linux, so will likely bug out) - missing locks/safe-guards Conflicts: kernel/Makefile.build.in --- kernel/Makefile.build.in | 2 +- kernel/chip.c | 115 +++++++ kernel/file_ops.c | 10 +- kernel/include/hfi1/chip.h | 58 ++++ kernel/include/hfi1/chip_registers.h | 64 ++++ kernel/include/hfi1/hfi.h | 31 +- kernel/include/hfi1/user_exp_rcv.h | 13 +- kernel/user_exp_rcv.c | 463 +++++++++++++++++++++++++++ 8 files changed, 740 insertions(+), 16 deletions(-) create mode 100644 kernel/chip.c create mode 100644 kernel/include/hfi1/chip.h create mode 100644 kernel/include/hfi1/chip_registers.h create mode 100644 kernel/user_exp_rcv.c diff --git a/kernel/Makefile.build.in b/kernel/Makefile.build.in index faf699a6..ee3002b9 100644 --- a/kernel/Makefile.build.in +++ b/kernel/Makefile.build.in @@ -8,7 +8,7 @@ OBJS += process.o copy.o waitq.o futex.o timer.o plist.o fileobj.o shmobj.o OBJS += zeroobj.o procfs.o devobj.o sysfs.o xpmem.o profile.o freeze.o OBJS += rbtree.o OBJS += pager.o -OBJS += file_ops.o user_sdma.o sdma.o +OBJS += file_ops.o user_sdma.o sdma.o user_exp_rcv.o chip.o # POSTK_DEBUG_ARCH_DEP_18 coredump arch separation. DEPSRCS=$(wildcard $(SRC)/*.c) diff --git a/kernel/chip.c b/kernel/chip.c new file mode 100644 index 00000000..73eceff9 --- /dev/null +++ b/kernel/chip.c @@ -0,0 +1,115 @@ +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains all of the code that is specific to the HFI chip, + * or what we use of them. + */ + +#include +#include +#include + +/* + * index is the index into the receive array + */ +void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, + u32 type, unsigned long pa, u16 order) +{ + u64 reg; + void __iomem *base = (dd->rcvarray_wc ? dd->rcvarray_wc : + (dd->kregbase + RCV_ARRAY)); + + if (!(dd->flags & HFI1_PRESENT)) + goto done; + + if (type == PT_INVALID) { + pa = 0; + } else if (type > PT_INVALID) { + kprintf("unexpected receive array type %u for index %u, not handled\n", + type, index); + goto done; + } + +#ifdef TIDRDMA_DEBUG + hfi1_cdbg(TID, "type %s, index 0x%x, pa 0x%lx, bsize 0x%lx", + pt_name(type), index, pa, (unsigned long)order); +#endif + +#define RT_ADDR_SHIFT 12 /* 4KB kernel address boundary */ + reg = RCV_ARRAY_RT_WRITE_ENABLE_SMASK + | (u64)order << RCV_ARRAY_RT_BUF_SIZE_SHIFT + | ((pa >> RT_ADDR_SHIFT) & RCV_ARRAY_RT_ADDR_MASK) + << RCV_ARRAY_RT_ADDR_SHIFT; + writeq(reg, base + (index * 8)); + + if (type == PT_EAGER) + /* + * Eager entries are written one-by-one so we have to push them + * after we write the entry. + */ + flush_wc(); +done: + return; +} + +void hfi1_clear_tids(struct hfi1_ctxtdata *rcd) +{ + struct hfi1_devdata *dd = rcd->dd; + u32 i; + +#if 0 + /* this could be optimized */ + for (i = rcd->eager_base; i < rcd->eager_base + + rcd->egrbufs.alloced; i++) + hfi1_put_tid(dd, i, PT_INVALID, 0, 0); +#endif + for (i = rcd->expected_base; + i < rcd->expected_base + rcd->expected_count; i++) + hfi1_put_tid(dd, i, PT_INVALID, 0, 0); +} + diff --git a/kernel/file_ops.c b/kernel/file_ops.c index 48fcc912..e75ee0f9 100644 --- a/kernel/file_ops.c +++ b/kernel/file_ops.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #ifdef __HFI1_ORIG__ @@ -421,9 +422,8 @@ long hfi1_file_ioctl(void *private_data, unsigned int cmd, { struct hfi1_filedata *fd = private_data; struct hfi1_ctxtdata *uctxt = fd->uctxt; -#if 0 struct hfi1_tid_info tinfo; -#endif + unsigned long addr; int ret = 0; hfi1_cdbg(IOCTL, "IOCTL recv: 0x%x", cmd); @@ -476,13 +476,13 @@ long hfi1_file_ioctl(void *private_data, unsigned int cmd, break; case HFI1_IOCTL_TID_UPDATE: -#if 0 + kprintf("%s: HFI1_IOCTL_TID_UPDATE \n", __FUNCTION__); if (copy_from_user(&tinfo, (struct hfi11_tid_info __user *)arg, sizeof(tinfo))) return -EFAULT; - ret = hfi1_user_exp_rcv_setup(fp, &tinfo); + ret = hfi1_user_exp_rcv_setup(fd, &tinfo); if (!ret) { /* * Copy the number of tidlist entries we used @@ -496,8 +496,6 @@ long hfi1_file_ioctl(void *private_data, unsigned int cmd, sizeof(tinfo.length))) ret = -EFAULT; } -#endif - kprintf("%s: HFI1_IOCTL_TID_UPDATE \n", __FUNCTION__); break; case HFI1_IOCTL_TID_FREE: diff --git a/kernel/include/hfi1/chip.h b/kernel/include/hfi1/chip.h new file mode 100644 index 00000000..86671b3d --- /dev/null +++ b/kernel/include/hfi1/chip.h @@ -0,0 +1,58 @@ +#ifndef _CHIP_H +#define _CHIP_H +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * This file contains all of the defines that is specific to the HFI chip + */ + +void hfi1_put_tid(struct hfi1_devdata *dd, u32 index, + u32 type, unsigned long pa, u16 order); +void hfi1_clear_tids(struct hfi1_ctxtdata *rcd); + +#endif /* _CHIP_H */ diff --git a/kernel/include/hfi1/chip_registers.h b/kernel/include/hfi1/chip_registers.h new file mode 100644 index 00000000..ec6aa292 --- /dev/null +++ b/kernel/include/hfi1/chip_registers.h @@ -0,0 +1,64 @@ +#ifndef DEF_CHIP_REG +#define DEF_CHIP_REG + +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#define CORE 0x000000000000 + + +#define RXE (CORE + 0x000001000000) + + +#define RCV_ARRAY (RXE + 0x000000200000) +#define RCV_ARRAY_CNT (RXE + 0x000000000018) +#define RCV_ARRAY_RT_ADDR_MASK 0xFFFFFFFFFull +#define RCV_ARRAY_RT_ADDR_SHIFT 0 +#define RCV_ARRAY_RT_BUF_SIZE_SHIFT 36 +#define RCV_ARRAY_RT_WRITE_ENABLE_SMASK 0x8000000000000000ull + +#endif /* DEF_CHIP_REG */ diff --git a/kernel/include/hfi1/hfi.h b/kernel/include/hfi1/hfi.h index 3a820135..3e5d32b7 100644 --- a/kernel/include/hfi1/hfi.h +++ b/kernel/include/hfi1/hfi.h @@ -192,11 +192,13 @@ struct ctxt_eager_bufs { } *rcvtids; }; +#endif /* __HFI1_ORIG__ */ struct exp_tid_set { struct list_head list; u32 count; }; +#ifdef __HFI1_ORIG__ struct tid_queue { struct list_head queue_head; /* queue head for QP TID resource waiters */ @@ -372,9 +374,26 @@ struct hfi1_ctxtdata { //TODO: Fix hfi1_ctxtdata and pport #ifndef __HFI1_ORIG__ struct hfi1_ctxtdata { + char __padding0[152]; unsigned ctxt; + char __padding1[180-156]; + /* number of RcvArray groups for this context. */ + u32 rcv_array_groups; + /* index of first eager TID entry. */ + u32 eager_base; + /* number of expected TID entries */ + u32 expected_count; + /* index of first expected TID entry. */ + u32 expected_base; + + struct exp_tid_set tid_group_list; + struct exp_tid_set tid_used_list; + struct exp_tid_set tid_full_list; + + char __padding2[440-272]; + struct hfi1_devdata *dd; }; -#endif /* __HFI1_ORIG__ */ +#endif /* !__HFI1_ORIG__ */ #ifdef __HFI1_ORIG__ /* @@ -842,12 +861,15 @@ typedef void (*opcode_handler)(struct hfi1_packet *packet); #define RHF_RCV_DONE 1 /* stop, this packet processed */ #define RHF_RCV_REPROCESS 2 /* stop. retain this packet */ +#endif /* __HFI1_ORIG__ */ + struct rcv_array_data { u8 group_size; u16 ngroups; u16 nctxt_extra; }; +#ifdef __HFI1_ORIG__ struct per_vl_data { u16 mtu; struct send_context *sc; @@ -1471,7 +1493,7 @@ struct hfi1_devdata { u64 gi_mask[CCE_NUM_INT_CSRS]; - char rcv_entries[6]; //struct rcv_array_data rcv_entries + struct rcv_array_data rcv_entries; u16 psxmitwait_check_rate; @@ -2086,6 +2108,8 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) lockdep_is_held(&ppd->cc_state_lock)); } +#endif /* __HFI1_ORIG__ */ + /* * values for dd->flags (_device_ related flags) */ @@ -2096,6 +2120,7 @@ struct cc_state *get_cc_state_protected(struct hfi1_pportdata *ppd) #define HFI1_HAS_SEND_DMA 0x10 /* Supports Send DMA */ #define HFI1_FORCED_FREEZE 0x80 /* driver forced freeze mode */ +#ifdef __HFI1_ORIG__ /* IB dword length mask in PBC (lower 11 bits); same for all chips */ #define HFI1_PBC_LENGTH_MASK ((1 << 11) - 1) @@ -2217,6 +2242,7 @@ const char *get_unit_name(int unit); const char *get_card_name(struct rvt_dev_info *rdi); struct pci_dev *get_pci_dev(struct rvt_dev_info *rdi); +#endif /* __HFI1_ORIG__ */ /* * Flush write combining store buffers (if present) and perform a write * barrier. @@ -2226,6 +2252,7 @@ static inline void flush_wc(void) asm volatile("sfence" : : : "memory"); } +#ifdef __HFI1_ORIG__ void handle_eflags(struct hfi1_packet *packet); int process_receive_ib(struct hfi1_packet *packet); int process_receive_bypass(struct hfi1_packet *packet); diff --git a/kernel/include/hfi1/user_exp_rcv.h b/kernel/include/hfi1/user_exp_rcv.h index 7ebc0bc0..be89c3da 100644 --- a/kernel/include/hfi1/user_exp_rcv.h +++ b/kernel/include/hfi1/user_exp_rcv.h @@ -46,9 +46,7 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * */ -#ifdef __HFI1_ORIG__ -#include "hfi.h" -#endif /* __HFI1_ORIG__ */ +#include "hfi1/hfi.h" #define EXP_TID_TIDLEN_MASK 0x7FFULL #define EXP_TID_TIDLEN_SHIFT 0 @@ -71,7 +69,6 @@ (tid) |= EXP_TID_SET(field, (value)); \ } while (0) -#ifdef __HFI1_ORIG__ struct tid_group { struct list_head list; unsigned base; @@ -147,15 +144,17 @@ static inline void tid_group_move(struct tid_group *group, tid_group_add_tail(group, s2); } +#ifdef __HFI1_ORIG__ u32 find_phys_blocks(struct page **, unsigned, struct tid_pageset *); int alloc_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); void free_ctxt_rcv_groups(struct hfi1_ctxtdata *rcd); int hfi1_user_exp_rcv_init(struct file *); int hfi1_user_exp_rcv_free(struct hfi1_filedata *); -int hfi1_user_exp_rcv_setup(struct file *, struct hfi1_tid_info *); -int hfi1_user_exp_rcv_clear(struct file *, struct hfi1_tid_info *); -int hfi1_user_exp_rcv_invalid(struct file *, struct hfi1_tid_info *); #endif /* __HFI1_ORIG__ */ +int hfi1_user_exp_rcv_setup(struct hfi1_filedata *, struct hfi1_tid_info *); +int hfi1_user_exp_rcv_clear(struct hfi1_filedata *, struct hfi1_tid_info *); +int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *, struct hfi1_tid_info *); + #endif /* _HFI1_USER_EXP_RCV_H */ diff --git a/kernel/user_exp_rcv.c b/kernel/user_exp_rcv.c new file mode 100644 index 00000000..ffd3a899 --- /dev/null +++ b/kernel/user_exp_rcv.c @@ -0,0 +1,463 @@ +/* + * Copyright(c) 2015, 2016 Intel Corporation. + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * BSD LICENSE + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * - Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * - Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include + +static int program_rcvarray(struct hfi1_filedata *, unsigned long, struct tid_group *, + u32, u32 *, unsigned *); +static int set_rcvarray_entry(struct hfi1_filedata *, unsigned long, + u32, struct tid_group *, + u32); +static int unprogram_rcvarray(struct hfi1_filedata *, u32, struct tid_group **); + + +/* + * RcvArray entry allocation for Expected Receives is done by the + * following algorithm: + * + * The context keeps 3 lists of groups of RcvArray entries: + * 1. List of empty groups - tid_group_list + * This list is created during user context creation and + * contains elements which describe sets (of 8) of empty + * RcvArray entries. + * 2. List of partially used groups - tid_used_list + * This list contains sets of RcvArray entries which are + * not completely used up. Another mapping request could + * use some of all of the remaining entries. + * 3. List of full groups - tid_full_list + * This is the list where sets that are completely used + * up go. + * + * An attempt to optimize the usage of RcvArray entries is + * made by finding all sets of physically contiguous pages in a + * user's buffer. + * These physically contiguous sets are further split into + * sizes supported by the receive engine of the HFI. The + * resulting sets of pages are stored in struct tid_pageset, + * which describes the sets as: + * * .count - number of pages in this set + * * .idx - starting index into struct page ** array + * of this set + * + * From this point on, the algorithm deals with the page sets + * described above. The number of pagesets is divided by the + * RcvArray group size to produce the number of full groups + * needed. + * + * Groups from the 3 lists are manipulated using the following + * rules: + * 1. For each set of 8 pagesets, a complete group from + * tid_group_list is taken, programmed, and moved to + * the tid_full_list list. + * 2. For all remaining pagesets: + * 2.1 If the tid_used_list is empty and the tid_group_list + * is empty, stop processing pageset and return only + * what has been programmed up to this point. + * 2.2 If the tid_used_list is empty and the tid_group_list + * is not empty, move a group from tid_group_list to + * tid_used_list. + * 2.3 For each group is tid_used_group, program as much as + * can fit into the group. If the group becomes fully + * used, move it to tid_full_list. + */ +int hfi1_user_exp_rcv_setup(struct hfi1_filedata *fd, struct hfi1_tid_info *tinfo) +{ + int ret = 0; + struct hfi1_ctxtdata *uctxt = fd->uctxt; + unsigned tididx = 0; + uintptr_t vaddr = tinfo->vaddr; + u32 tid; + struct process_vm *vm = cpu_local_var(current)->vm; + size_t base_pgsize; + + if (!tinfo->length) + return -EINVAL; + + if (tinfo->length / PAGE_SIZE > uctxt->expected_count) { + kprintf("Expected buffer too big\n"); + return -EINVAL; + } + + /* Verify that access is OK for the user buffer */ + // TODO: iterate over vm memory ranges for write access + // return -EFAULT; + + + pte_t *ptep; + ptep = ihk_mc_pt_lookup_pte(vm->address_space->page_table, + (void*)vaddr, 0, 0, &base_pgsize, 0); + if (unlikely(!ptep || !pte_is_present(ptep))) { + kprintf("%s: ERRROR: no valid PTE for 0x%lx\n", + __FUNCTION__, vaddr); + return -EFAULT; + } + + // TODO: lock between setup/clear + + /* Simplified design: vaddr to vaddr + tinfo->length is contiguous for us + * -> only have one request, always + */ + + { + struct tid_group *grp; + /* + * If we don't have any partially used tid groups, check + * if we have empty groups. If so, take one from there and + * put in the partially used list. + */ + if (!uctxt->tid_used_list.count) { + if (!uctxt->tid_group_list.count) + goto unlock; + + grp = tid_group_pop(&uctxt->tid_group_list); + tid_group_add_tail(grp, &uctxt->tid_used_list); + } + + + grp = list_first_entry(&uctxt->tid_used_list.list, + struct tid_group, list); + + ret = program_rcvarray(fd, vaddr, grp, tinfo->length, &tid, + &tididx); + if (ret < 0) { + hfi1_cdbg(TID, + "Failed to program RcvArray entries %d", + ret); + ret = -EFAULT; + } else if (WARN_ON(ret == 0)) { + ret = -EFAULT; + } + } +unlock: + // TODO: check if group is full + move it to full list + if (tididx) { + // TODO: can we use spin_lock with kernel locks? + spin_lock(&fd->tid_lock); + fd->tid_used += tididx; + spin_unlock(&fd->tid_lock); + tinfo->tidcnt = tididx; + + if (copy_to_user((void __user *)(unsigned long)tinfo->tidlist, + &tid, sizeof(tid))) { + /* + * On failure to copy to the user level, we need to undo + * everything done so far so we don't leak resources. + */ + tinfo->tidlist = (unsigned long)&tid; + hfi1_user_exp_rcv_clear(fd, tinfo); + tinfo->tidlist = 0; + ret = -EFAULT; + } + } + + return ret > 0 ? 0 : ret; +} + +int hfi1_user_exp_rcv_clear(struct hfi1_filedata *fd, struct hfi1_tid_info *tinfo) +{ +#if 0 + int ret = 0; + struct hfi1_ctxtdata *uctxt = fd->uctxt; + u32 *tidinfo; + unsigned tididx; + + tidinfo = kcalloc(tinfo->tidcnt, sizeof(*tidinfo), GFP_KERNEL); + if (!tidinfo) + return -ENOMEM; + + if (copy_from_user(tidinfo, (void __user *)(unsigned long) + tinfo->tidlist, sizeof(tidinfo[0]) * + tinfo->tidcnt)) { + ret = -EFAULT; + goto done; + } + + mutex_lock(&uctxt->exp_mutex); + for (tididx = 0; tididx < tinfo->tidcnt; tididx++) { + ret = unprogram_rcvarray(fd, tidinfo[tididx], NULL); + if (ret) { + hfi1_cdbg(TID, "Failed to unprogram rcv array %d", + ret); + break; + } + } + spin_lock(&fd->tid_lock); + fd->tid_used -= tididx; + spin_unlock(&fd->tid_lock); + tinfo->tidcnt = tididx; + mutex_unlock(&uctxt->exp_mutex); +done: + kfree(tidinfo); + return ret; +#endif + return 0; +} + +int hfi1_user_exp_rcv_invalid(struct hfi1_filedata *fd, struct hfi1_tid_info *tinfo) +{ +#if 0 + struct hfi1_ctxtdata *uctxt = fd->uctxt; + unsigned long *ev = uctxt->dd->events + + (((uctxt->ctxt - uctxt->dd->first_user_ctxt) * + HFI1_MAX_SHARED_CTXTS) + fd->subctxt); + u32 *array; + int ret = 0; + + if (!fd->invalid_tids) + return -EINVAL; + + /* + * copy_to_user() can sleep, which will leave the invalid_lock + * locked and cause the MMU notifier to be blocked on the lock + * for a long time. + * Copy the data to a local buffer so we can release the lock. + */ + array = kcalloc(uctxt->expected_count, sizeof(*array), GFP_KERNEL); + if (!array) + return -EFAULT; + + spin_lock(&fd->invalid_lock); + if (fd->invalid_tid_idx) { + memcpy(array, fd->invalid_tids, sizeof(*array) * + fd->invalid_tid_idx); + memset(fd->invalid_tids, 0, sizeof(*fd->invalid_tids) * + fd->invalid_tid_idx); + tinfo->tidcnt = fd->invalid_tid_idx; + fd->invalid_tid_idx = 0; + /* + * Reset the user flag while still holding the lock. + * Otherwise, PSM can miss events. + */ + clear_bit(_HFI1_EVENT_TID_MMU_NOTIFY_BIT, ev); + } else { + tinfo->tidcnt = 0; + } + spin_unlock(&fd->invalid_lock); + + if (tinfo->tidcnt) { + if (copy_to_user((void __user *)tinfo->tidlist, + array, sizeof(*array) * tinfo->tidcnt)) + ret = -EFAULT; + } + kfree(array); + + return ret; +#endif + return 0; +} +/** + * program_rcvarray() - program an RcvArray group with receive buffers + * @fd: file data + * @vaddr: starting user virtual address + * @grp: RcvArray group + * @sets: array of struct tid_pageset holding information on physically + * contiguous chunks from the user buffer + * @start: starting index into sets array + * @count: number of struct tid_pageset's to program + * @pages: an array of struct page * for the user buffer + * @ptid: information about the programmed RcvArray entries is to be encoded. + * @tididx: starting offset into tidlist + * + * This function will program up to 'count' number of RcvArray entries from the + * group 'grp'. To make best use of write-combining writes, the function will + * perform writes to the unused RcvArray entries which will be ignored by the + * HW. Each RcvArray entry will be programmed with a physically contiguous + * buffer chunk from the user's virtual buffer. + * + * Return: + * -EINVAL if the requested count is larger than the size of the group, + * -ENOMEM or -EFAULT on error from set_rcvarray_entry(), or + * number of RcvArray entries programmed. + */ +static int program_rcvarray(struct hfi1_filedata *fd, unsigned long vaddr, + struct tid_group *grp, + u32 len, + u32 *ptid, unsigned *tididx) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = uctxt->dd; + u16 idx; + u32 tidinfo = 0, rcventry; + + /* Find the first unused entry in the group */ + for (idx = 0; idx < grp->size; idx++) { + if (!(grp->map & (1 << idx))) { + break; + } + } + + int ret = 0; + + /* + * If this entry in the group is used, move to the next one. + * If we go past the end of the group, exit the loop. + */ + rcv_array_wc_fill(dd, grp->base + idx); + + rcventry = grp->base + idx; + + ret = set_rcvarray_entry(fd, vaddr, rcventry, grp, + len); + if (ret) + return ret; + + tidinfo = rcventry2tidinfo(rcventry - uctxt->expected_base); + *ptid = tidinfo; + grp->used++; + grp->map |= 1 << idx++; + + return 1; +} + +static int set_rcvarray_entry(struct hfi1_filedata *fd, unsigned long vaddr, + u32 rcventry, struct tid_group *grp, + u32 len) +{ + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = uctxt->dd; + u16 order = 1; +#if 0 + int ret; + struct tid_rb_node *node; + dma_addr_t phys; + + /* + * Allocate the node first so we can handle a potential + * failure before we've programmed anything. + */ + node = kzalloc(sizeof(*node) + (sizeof(struct page *) * npages), + GFP_KERNEL); + if (!node) + return -ENOMEM; + phys = pci_map_single(dd->pcidev, + __va(page_to_phys(pages[0])), + npages * PAGE_SIZE, PCI_DMA_FROMDEVICE); + if (dma_mapping_error(&dd->pcidev->dev, phys)) { + kprintf("Failed to DMA map Exp Rcv pages 0x%llx\n", + phys); + kfree(node); + return -EFAULT; + } + node->mmu.addr = vaddr; + node->mmu.len = npages * PAGE_SIZE; + node->phys = page_to_phys(pages[0]); + node->npages = npages; + node->rcventry = rcventry; + node->dma_addr = phys; + node->grp = grp; + node->freed = false; + memcpy(node->pages, pages, sizeof(struct page *) * npages); + if (!fd->handler) + ret = tid_rb_insert(fd, &node->mmu); + else + ret = hfi1_mmu_rb_insert(fd->handler, &node->mmu); + + if (ret) { + hfi1_cdbg(TID, "Failed to insert RB node %u 0x%lx, 0x%lx %d", + node->rcventry, node->mmu.addr, node->phys, ret); + pci_unmap_single(dd->pcidev, phys, npages * PAGE_SIZE, + PCI_DMA_FROMDEVICE); + kfree(node); + return -EFAULT; + } +#endif + while (len > 0) { + order++; + len >>= 1; + } + + // TODO: we probably pretty much need the real phys here, + // so we need to make that mapping. + + hfi1_put_tid(dd, rcventry, PT_EXPECTED, /* phys */ 0, order); +#if 0 + trace_hfi1_exp_tid_reg(uctxt->ctxt, fd->subctxt, rcventry, npages, + node->mmu.addr, node->phys, phys); +#endif + return 0; +} + +static int unprogram_rcvarray(struct hfi1_filedata *fd, u32 tidinfo, + struct tid_group **grp) +{ +#if 0 + struct hfi1_ctxtdata *uctxt = fd->uctxt; + struct hfi1_devdata *dd = uctxt->dd; + struct tid_rb_node *node; + u8 tidctrl = EXP_TID_GET(tidinfo, CTRL); + u32 tididx = EXP_TID_GET(tidinfo, IDX) << 1, rcventry; + + if (tididx >= uctxt->expected_count) { + kprintf("Invalid RcvArray entry (%u) index for ctxt %u\n", + tididx, uctxt->ctxt); + return -EINVAL; + } + + if (tidctrl == 0x3) + return -EINVAL; + + rcventry = tididx + (tidctrl - 1); + + node = fd->entry_to_rb[rcventry]; + if (!node || node->rcventry != (uctxt->expected_base + rcventry)) + return -EBADF; + + if (grp) + *grp = node->grp; + + if (!fd->handler) + cacheless_tid_rb_remove(fd, node); + else + hfi1_mmu_rb_remove(fd->handler, &node->mmu); +#endif + return 0; +} +