From e336d401eadf84385659d6f17db08729c2a65a46 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Fri, 7 Aug 2020 12:13:34 -0400 Subject: [PATCH] adding dogfood unit test --- driver/tests/basic/Makefile | 10 +- driver/tests/basic/basic.cpp | 2 + driver/tests/demo/Makefile | 10 +- driver/tests/demo/demo.cpp | 2 + driver/tests/dogfood/Makefile | 64 ++ driver/tests/dogfood/Memcpy/hw/rtl/_hdr | 603 ---------------- .../dogfood/Memcpy/hw/rtl/cci_hello.json | 18 - .../dogfood/Memcpy/hw/rtl/cci_hello_afu.sv | 653 ------------------ .../Memcpy/hw/rtl/cci_hello_afu_working.sv | 621 ----------------- .../tests/dogfood/Memcpy/hw/rtl/sources.txt | 2 - driver/tests/dogfood/Memcpy/hw/sim/setup_ase | 11 - driver/tests/dogfood/Memcpy/sw/Makefile | 41 -- driver/tests/dogfood/Memcpy/sw/cci_hello.c | 210 ------ .../dogfood/Memcpy/sw/obj/afu_json_info.h | 13 - .../tests/dogfood/Memcpy/sw/obj/cci_hello.o | Bin 5336 -> 0 bytes driver/tests/dogfood/common.h | 14 + driver/tests/dogfood/dogfood.cpp | 264 +++++++ driver/tests/dogfood/kernel.c | 354 ++++++++++ driver/tests/dogfood/testcases.h | 555 +++++++++++++++ 19 files changed, 1267 insertions(+), 2180 deletions(-) create mode 100644 driver/tests/dogfood/Makefile delete mode 100644 driver/tests/dogfood/Memcpy/hw/rtl/_hdr delete mode 100644 driver/tests/dogfood/Memcpy/hw/rtl/cci_hello.json delete mode 100644 driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu.sv delete mode 100644 driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu_working.sv delete mode 100644 driver/tests/dogfood/Memcpy/hw/rtl/sources.txt delete mode 100755 driver/tests/dogfood/Memcpy/hw/sim/setup_ase delete mode 100644 driver/tests/dogfood/Memcpy/sw/Makefile delete mode 100644 driver/tests/dogfood/Memcpy/sw/cci_hello.c delete mode 100644 driver/tests/dogfood/Memcpy/sw/obj/afu_json_info.h delete mode 100644 driver/tests/dogfood/Memcpy/sw/obj/cci_hello.o create mode 100644 driver/tests/dogfood/common.h create mode 100644 driver/tests/dogfood/dogfood.cpp create mode 100644 driver/tests/dogfood/kernel.c create mode 100644 driver/tests/dogfood/testcases.h diff --git a/driver/tests/basic/Makefile b/driver/tests/basic/Makefile index a3792edc..2edd71cf 100644 --- a/driver/tests/basic/Makefile +++ b/driver/tests/basic/Makefile @@ -1,6 +1,8 @@ RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops) VORTEX_RT_PATH ?= $(wildcard ../../../runtime) +OPTS ?= -n256 + VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++ VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump @@ -38,16 +40,16 @@ $(PROJECT): $(SRCS) $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@ run-fpga: $(PROJECT) - LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 256 + LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-ase: $(PROJECT) - ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 256 + ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-rtlsim: $(PROJECT) - LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 256 + LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-simx: $(PROJECT) - LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 256 + LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) .depend: $(SRCS) $(CXX) $(CXXFLAGS) -MM $^ > .depend; diff --git a/driver/tests/basic/basic.cpp b/driver/tests/basic/basic.cpp index 92a785f4..ee16533d 100755 --- a/driver/tests/basic/basic.cpp +++ b/driver/tests/basic/basic.cpp @@ -14,6 +14,8 @@ exit(-1); \ } while (false) +/////////////////////////////////////////////////////////////////////////////// + const char* kernel_file = "kernel.bin"; int test = -1; uint32_t count = 0; diff --git a/driver/tests/demo/Makefile b/driver/tests/demo/Makefile index e142e5fd..348e5d11 100644 --- a/driver/tests/demo/Makefile +++ b/driver/tests/demo/Makefile @@ -1,6 +1,8 @@ RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops) VORTEX_RT_PATH ?= $(wildcard ../../../runtime) +OPTS ?= -n64 + VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++ VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump @@ -36,16 +38,16 @@ $(PROJECT): $(SRCS) $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@ run-fpga: $(PROJECT) - LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 64 + LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-ase: $(PROJECT) - ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 64 + ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-rtlsim: $(PROJECT) - LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 64 + LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) run-simx: $(PROJECT) - LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) -n 64 + LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) .depend: $(SRCS) $(CXX) $(CXXFLAGS) -MM $^ > .depend; diff --git a/driver/tests/demo/demo.cpp b/driver/tests/demo/demo.cpp index 2d5b47f8..10d0b8ae 100644 --- a/driver/tests/demo/demo.cpp +++ b/driver/tests/demo/demo.cpp @@ -14,6 +14,8 @@ exit(-1); \ } while (false) +/////////////////////////////////////////////////////////////////////////////// + const char* kernel_file = "kernel.bin"; uint32_t count = 0; diff --git a/driver/tests/dogfood/Makefile b/driver/tests/dogfood/Makefile new file mode 100644 index 00000000..72fdf50b --- /dev/null +++ b/driver/tests/dogfood/Makefile @@ -0,0 +1,64 @@ +RISCV_TOOLCHAIN_PATH ?= $(wildcard ~/dev/riscv-gnu-toolchain/drops) +VORTEX_RT_PATH ?= $(wildcard ../../../runtime) + +OPTS ?= -n64 + +VX_CC = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-gcc +VX_CXX = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-g++ +VX_DP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objdump +VX_CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy + +VX_CFLAGS += -march=rv32imf -mabi=ilp32 -O3 -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -ffreestanding -nostartfiles -Wl,--gc-sections +VX_CFLAGS += -I$(VORTEX_RT_PATH)/include + +VX_LDFLAGS += $(VORTEX_RT_PATH)/libvortexrt.a +VX_LDFLAGS += -lm + +VX_SRCS = kernel.c + +CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors + +CXXFLAGS += -I../../include + +PROJECT = dogfood + +SRCS = dogfood.cpp + +all: $(PROJECT) kernel.bin kernel.dump + +kernel.dump: kernel.elf + $(VX_DP) -D kernel.elf > kernel.dump + +kernel.bin: kernel.elf + $(VX_CP) -O binary kernel.elf kernel.bin + +kernel.elf: $(VX_SRCS) + $(VX_CC) $(VX_CFLAGS) $(VX_SRCS) $(VX_LDFLAGS) -o kernel.elf + +$(PROJECT): $(SRCS) + $(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -L../../stub -lvortex -o $@ + +run-fpga: $(PROJECT) + LD_LIBRARY_PATH=../../opae:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-ase: $(PROJECT) + ASE_LOG=0 LD_LIBRARY_PATH=../../opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-rtlsim: $(PROJECT) + LD_LIBRARY_PATH=../../rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +run-simx: $(PROJECT) + LD_LIBRARY_PATH=../../simx:$(LD_LIBRARY_PATH) ./$(PROJECT) $(OPTS) + +.depend: $(SRCS) + $(CXX) $(CXXFLAGS) -MM $^ > .depend; + +clean: + rm -rf $(PROJECT) *.o .depend + +clean-all: + rm -rf $(PROJECT) *.o *.elf *.bin *.dump .depend + +ifneq ($(MAKECMDGOALS),clean) + -include .depend +endif \ No newline at end of file diff --git a/driver/tests/dogfood/Memcpy/hw/rtl/_hdr b/driver/tests/dogfood/Memcpy/hw/rtl/_hdr deleted file mode 100644 index 39a1dd9e..00000000 --- a/driver/tests/dogfood/Memcpy/hw/rtl/_hdr +++ /dev/null @@ -1,603 +0,0 @@ -// -// Copyright (c) 2017, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// Neither the name of the Intel Corporation nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - - -// Read from the memory locations first and then write to the memory locations - -`include "platform_if.vh" -`include "afu_json_info.vh" - - -module ccip_std_afu - ( - // CCI-P Clocks and Resets - input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock - input logic pClkDiv2, // 200MHz - CCI-P clock domain. - input logic pClkDiv4, // 100MHz - CCI-P clock domain. - input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock ** - input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock ** - input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset - input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State - input logic pck_cp2af_error, // CCI-P Protocol Error Detected - - // Interface structures - input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port - output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port - ); - - - // - // Run the entire design at the standard CCI-P frequency (400 MHz). - // - logic clk; - assign clk = pClk; - - logic reset; - assign reset = pck_cp2af_softReset; - - logic [511:0] wr_data; - logic [511:0] rd_data; - - logic get_write_addr; - logic do_update; - logic rd_end_of_list; - logic rd_needed; - logic wr_needed; - logic [15:0] cnt_list_length; - - // ========================================================================= - // - // Register requests. - // - // ========================================================================= - - // - // The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be - // registered. Here we register pck_cp2af_sRx and assign it to sRx. - // We also assign pck_af2cp_sTx to sTx here but don't register it. - // The code below never uses combinational logic to write sTx. - // - - t_if_ccip_Rx sRx; - always_ff @(posedge clk) - begin - sRx <= pck_cp2af_sRx; - end - - t_if_ccip_Tx sTx; - assign pck_af2cp_sTx = sTx; - - - // ========================================================================= - // - // CSR (MMIO) handling. - // - // ========================================================================= - - // The AFU ID is a unique ID for a given program. Here we generated - // one with the "uuidgen" program and stored it in the AFU's JSON file. - // ASE and synthesis setup scripts automatically invoke afu_json_mgr - // to extract the UUID into afu_json_info.vh. - logic [127:0] afu_id = `AFU_ACCEL_UUID; - - // - // A valid AFU must implement a device feature list, starting at MMIO - // address 0. Every entry in the feature list begins with 5 64-bit - // words: a device feature header, two AFU UUID words and two reserved - // words. - // - - // Is a CSR read request active this cycle? - logic is_csr_read; - assign is_csr_read = sRx.c0.mmioRdValid; - - // Is a CSR write request active this cycle? - logic is_csr_write; - assign is_csr_write = sRx.c0.mmioWrValid; - - // The MMIO request header is overlayed on the normal c0 memory read - // response data structure. Cast the c0Rx header to an MMIO request - // header. - t_ccip_c0_ReqMmioHdr mmio_req_hdr; - assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr); - - - // - // Implement the device feature list by responding to MMIO reads. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c2.mmioRdValid <= 1'b0; - end - else - begin - // Always respond with something for every read request - sTx.c2.mmioRdValid <= is_csr_read; - - // The unique transaction ID matches responses to requests - sTx.c2.hdr.tid <= mmio_req_hdr.tid; - - // Addresses are of 32-bit objects in MMIO space. Addresses - // of 64-bit objects are thus multiples of 2. - case (mmio_req_hdr.address) - 0: // AFU DFH (device feature header) - begin - // Here we define a trivial feature list. In this - // example, our AFU is the only entry in this list. - sTx.c2.data <= t_ccip_mmioData'(0); - // Feature type is AFU - sTx.c2.data[63:60] <= 4'h1; - // End of list (last entry in list) - sTx.c2.data[40] <= 1'b1; - end - - // AFU_ID_L - 2: sTx.c2.data <= afu_id[63:0]; - - // AFU_ID_H - 4: sTx.c2.data <= afu_id[127:64]; - - // DFH_RSVD0 - 6: sTx.c2.data <= t_ccip_mmioData'(0); - - // DFH_RSVD1 - 8: sTx.c2.data <= t_ccip_mmioData'(0); - - default: sTx.c2.data <= t_ccip_mmioData'(0); - endcase - end - end - - - // - // CSR write handling. Host software must tell the AFU the memory address - // to which it should be writing. The address is set by writing a CSR. - // - - // We use MMIO address 0 to set the memory address. The read and - // write MMIO spaces are logically separate so we are free to use - // whatever we like. This may not be good practice for cleanly - // organizing the MMIO address space, but it is legal. - logic is_mem_addr_csr_write; - assign is_mem_addr_csr_write = get_write_addr && is_csr_write && - (mmio_req_hdr.address == t_ccip_mmioAddr'(0)); - - // Memory address to which this AFU will write. - t_ccip_clAddr write_mem_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - get_write_addr <= 1'b1; - end - else if (is_mem_addr_csr_write) - begin - write_mem_addr <= t_ccip_clAddr'(sRx.c0.data); - get_write_addr <= 1'b0; - end - end - - - // We use MMIO address 0 to set the memory address for reading data. - logic is_mem_addr_csr_read; - assign is_mem_addr_csr_read = !get_write_addr && is_csr_write && - (mmio_req_hdr.address == t_ccip_mmioAddr'(0)); - - // Memory address from which this AFU will read. - logic start_read; - t_ccip_clAddr read_mem_addr; - - //logic start_traversal = 'b0; - //t_ccip_clAddr start_traversal_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - start_read <= 1'b0; - end - else if (is_mem_addr_csr_read) - begin - read_mem_addr <= t_ccip_clAddr'(sRx.c0.data); - start_read <= 'b1; - end - end - - - // ========================================================================= - // - // Main AFU logic - // - // ========================================================================= - - // - // States in our simple example. - // - //typedef enum logic [0:0] - typedef enum logic [1:0] - { - STATE_IDLE, - STATE_READ, - STATE_UPDATE, - STATE_WRITE - } - t_state; - - t_state state; - - // - // State machine - // - always_ff @(posedge clk) - begin - if (reset) - begin - state <= STATE_IDLE; - rd_end_of_list <= 1'b0; - end - else - begin - case (state) - STATE_IDLE: - begin - // Traversal begins when CSR 1 is written - if (start_read) - begin - state <= STATE_READ; - $display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr)); - end - end - - STATE_READ: - begin - if (rd_needed) - begin - // Read data from the address and update address - state <= STATE_UPDATE; - start_read <= 'b0; - $display("AFU reading data and pointing to next read address..."); - end - end - - STATE_UPDATE: - begin - // Update the read value to be written back - if (do_update) - begin - state <= STATE_WRITE; - $display("AFU performing comutations on the read values..."); - end - end - - STATE_WRITE: - begin - // Write the updated value to the address - // Point to new address after that - // if done then point to IDLE; else read new values - if (rd_end_of_list) - begin - state <= STATE_IDLE; - $display("AFU done..."); - end - else - begin - if (wr_needed) - begin - state <= STATE_READ; - $display("AFU reading again from read address..."); - end - end - end - endcase - end - end - - - // ========================================================================= - // - // Read logic. - // - // ========================================================================= - - // - // READ REQUEST - // - - // Did a write response just arrive - logic addr_next_valid; - - // Next read address - t_ccip_clAddr addr_next; - - always_ff @(posedge clk) - begin - // Next read address is valid when we have got the write response back - // and channel is not full - //addr_next_valid <= sRx.c0TxAlmFull; - addr_next_valid <= sRx.c1.rspValid; - - // Next address is current address plus address length - // Apurve - //addr_next <= addr_next + addr_size; - addr_next <= addr_next + 0; - - // End of list reached if we have read 10 times - rd_end_of_list <= (cnt_list_length == 'h10); - end - - // - // Since back pressure may prevent an immediate read request, we must - // record whether a read is needed and hold it until the request can - // be sent to the FIU. - // - t_ccip_clAddr rd_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - rd_needed <= 1'b0; - end - else - begin - // If reads are allowed this cycle then we can safely clear - // any previously requested reads. This simple AFU has only - // one read in flight at a time since it is walking a pointer - // chain. - if (rd_needed) - begin - rd_needed <= sRx.c0TxAlmFull; - end - else - begin - // Need a read under two conditions: - // - Starting a new walk - // - A read response just arrived from a line containing - // a next pointer. - rd_needed <= (start_read || (addr_next_valid && ! rd_end_of_list)); - rd_addr <= (start_read ? read_mem_addr : addr_next); - end - end - end - - // - // Emit read requests to the FIU. - // - - // Read header defines the request to the FIU - t_cci_c0_ReqMemHdr rd_hdr; - - always_comb - begin - rd_hdr = t_cci_c0_ReqMemHdr'(0); - - // Read request type - rd_hdr.req_type = eREQ_RDLINE_I; - // Virtual address (MPF virtual addressing is enabled) - rd_hdr.address = rd_addr; - // Let the FIU pick the channel - rd_hdr.vc_sel = eVC_VA; - // Read 4 lines (the size of an entry in the list) - rd_hdr.cl_len = eCL_LEN_4; - end - - // Send read requests to the FIU - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c0.valid <= 1'b0; - cnt_list_length <= 0; - end - else - begin - // Generate a read request when needed and the FIU isn't full - sTx.c0.valid <= (rd_needed && ! sRx.c0TxAlmFull); - sTx.c0.hdr <= rd_hdr; - - if (rd_needed && ! sRx.c0TxAlmFull) - begin - cnt_list_length <= cnt_list_length + 1; - //$display(" Reading from VA 0x%x", clAddrToByteAddr(rd_addr)); - $display("Incrementing read count..."); - end - end - end - - // - // READ RESPONSE HANDLING - // - - // - // Receive data (read responses). - // - always_ff @(posedge clk) - begin - if (reset) - begin - do_update <= 1'b0; - end - else - begin - if (state == STATE_READ) - begin - rd_data <= sRx.c0.data; - do_update <= 1'b1; - end - if (state == STATE_UPDATE) - begin - // Update the read data and put it in the write data to be written - wr_data <= rd_data + 1; - do_update <= 1'b0; - end - end - end - - - // ========================================================================= - // - // Write logic. - // - // ========================================================================= - - - // - // WRITE REQUEST - // - - // Did a write response just arrive - logic wr_addr_next_valid; - - // Next write address - t_ccip_clAddr wr_addr_next; - - always_ff @(posedge clk) - begin - // Next write address is valid when we have got the read response back - // and channel is not full - //wr_addr_next_valid <= sRx.c1TxAlmFull; - wr_addr_next_valid <= sRx.c0.rspValid; - - // Next address is current address plus address length - // Apurve - //wr_addr_next <= wr_addr_next + addr_size; - wr_addr_next <= wr_addr_next + 0; - end - - // - // Since back pressure may prevent an immediate write request, we must - // record whether a write is needed and hold it until the request can - // be sent to the FIU. - // - t_ccip_clAddr wr_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - wr_needed <= 1'b0; - end - else - begin - // If writes are allowed this cycle then we can safely clear - // any previously requested writes. This simple AFU has only - // one write in flight at a time since it is walking a pointer - // chain. - if (wr_needed) - begin - wr_needed <= sRx.c1TxAlmFull; - end - else - begin - // Need a write under two conditions: - // - Starting a new walk - // - A write response just arrived from a line containing - // a next pointer. - //wr_needed <= (start_write || (wr_addr_next_valid && ! rd_end_of_list)); - wr_needed <= (start_write || wr_addr_next_valid); - wr_addr <= (start_write ? write_mem_addr : wr_addr_next); - end - end - end - - // - // Emit write requests to the FIU. - // - - // Write header defines the request to the FIU - t_ccip_c1_ReqMemHdr wr_hdr; - - always_comb - begin - wr_hdr = t_cci_c1_ReqMemHdr'(0); - - // Write request type - wr_hdr.req_type = eREQ_RDLINE_I; - // Virtual address (MPF virtual addressing is enabled) - wr_hdr.address = wr_addr; - // Let the FIU pick the channel - wr_hdr.vc_sel = eVC_VA; - // Write 4 lines (the size of an entry in the list) - wr_hdr.cl_len = eCL_LEN_4; - // Start of packet is true (single line write) - wr_hdr.sop = 1'b1; - end - - // Send write requests to the FIU - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c1.valid <= 1'b0; - //cnt_list_length <= 0; - end - else - begin - // Generate a write request when needed and the FIU isn't full - sTx.c1.valid <= (wr_needed && ! sRx.c1TxAlmFull); - sTx.c1.hdr <= wr_hdr; - sTx.c1.data = t_ccip_clData'(wr_data); - - //if (wr_needed && ! sRx.c1TxAlmFull) - //begin - // cnt_list_length <= cnt_list_length + 1; - // //$display(" Writing from VA 0x%x", clAddrToByteAddr(rd_addr)); - // $display("Incrementing write count..."); - //end - end - end - - // - // WRITE RESPONSE HANDLING - // - - // Apurve: Check if a signal is to be sent to read to start reading in case - // write response does not work - // - // Send data (write requests). - // - //always_ff @(posedge clk) - //begin - // if (state == STATE_WRITE) - // begin - // rd_data <= sRx.c0.data; - // end - // if (state == STATE_UPDATE) - // begin - // // Update the write data and put it in the write data to be written - // wr_data <= rd_data + 1; - // end - //end - -endmodule diff --git a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello.json b/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello.json deleted file mode 100644 index 85d7a529..00000000 --- a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello.json +++ /dev/null @@ -1,18 +0,0 @@ -{ - "version": 1, - "afu-image": { - "power": 0, - "afu-top-interface": - { - "name": "ccip_std_afu" - }, - "accelerator-clusters": - [ - { - "name": "cci_hello", - "total-contexts": 1, - "accelerator-type-uuid": "c6aa954a-9b91-4a37-abc1-1d9f0709dcc3" - } - ] - } -} diff --git a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu.sv b/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu.sv deleted file mode 100644 index eaee72da..00000000 --- a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu.sv +++ /dev/null @@ -1,653 +0,0 @@ -// -// Copyright (c) 2017, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// Neither the name of the Intel Corporation nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - - -// Read from the memory locations first and then write to the memory locations - -`include "platform_if.vh" -`include "afu_json_info.vh" - - -module ccip_std_afu - ( - // CCI-P Clocks and Resets - input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock - input logic pClkDiv2, // 200MHz - CCI-P clock domain. - input logic pClkDiv4, // 100MHz - CCI-P clock domain. - input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock ** - input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock ** - input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset - input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State - input logic pck_cp2af_error, // CCI-P Protocol Error Detected - - // Interface structures - input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port - output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port - ); - - - // - // Run the entire design at the standard CCI-P frequency (400 MHz). - // - logic clk; - assign clk = pClk; - - logic reset; - assign reset = pck_cp2af_softReset; - - logic [511:0] wr_data; - logic [511:0] rd_data; - - logic do_update; - logic start_read; - logic start_write; - logic wr_addr_next_valid; - logic addr_next_valid; - logic rd_end_of_list; - logic rd_needed; - logic wr_needed; - logic read_req; - logic write_req; - logic [15:0] cnt_list_length; - t_ccip_clAddr rd_addr; - t_ccip_clAddr wr_addr; - t_ccip_clAddr addr_next; - t_ccip_clAddr wr_addr_next; - - // ========================================================================= - // - // Register requests. - // - // ========================================================================= - - // - // The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be - // registered. Here we register pck_cp2af_sRx and assign it to sRx. - // We also assign pck_af2cp_sTx to sTx here but don't register it. - // The code below never uses combinational logic to write sTx. - // - - t_if_ccip_Rx sRx; - always_ff @(posedge clk) - begin - sRx <= pck_cp2af_sRx; - end - - t_if_ccip_Tx sTx; - assign pck_af2cp_sTx = sTx; - - - // ========================================================================= - // - // CSR (MMIO) handling. - // - // ========================================================================= - - // The AFU ID is a unique ID for a given program. Here we generated - // one with the "uuidgen" program and stored it in the AFU's JSON file. - // ASE and synthesis setup scripts automatically invoke afu_json_mgr - // to extract the UUID into afu_json_info.vh. - logic [127:0] afu_id = `AFU_ACCEL_UUID; - - // - // A valid AFU must implement a device feature list, starting at MMIO - // address 0. Every entry in the feature list begins with 5 64-bit - // words: a device feature header, two AFU UUID words and two reserved - // words. - // - - // Is a CSR read request active this cycle? - logic is_csr_read; - assign is_csr_read = sRx.c0.mmioRdValid; - - // Is a CSR write request active this cycle? - logic is_csr_write; - assign is_csr_write = sRx.c0.mmioWrValid; - - // The MMIO request header is overlayed on the normal c0 memory read - // response data structure. Cast the c0Rx header to an MMIO request - // header. - t_ccip_c0_ReqMmioHdr mmio_req_hdr; - assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr); - - - // - // Implement the device feature list by responding to MMIO reads. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c2.mmioRdValid <= 1'b0; - end - else - begin - // Always respond with something for every read request - sTx.c2.mmioRdValid <= is_csr_read; - - // The unique transaction ID matches responses to requests - sTx.c2.hdr.tid <= mmio_req_hdr.tid; - - // Addresses are of 32-bit objects in MMIO space. Addresses - // of 64-bit objects are thus multiples of 2. - case (mmio_req_hdr.address) - 0: // AFU DFH (device feature header) - begin - // Here we define a trivial feature list. In this - // example, our AFU is the only entry in this list. - sTx.c2.data <= t_ccip_mmioData'(0); - // Feature type is AFU - sTx.c2.data[63:60] <= 4'h1; - // End of list (last entry in list) - sTx.c2.data[40] <= 1'b1; - end - - // AFU_ID_L - 2: sTx.c2.data <= afu_id[63:0]; - - // AFU_ID_H - 4: sTx.c2.data <= afu_id[127:64]; - - // DFH_RSVD0 - 6: sTx.c2.data <= t_ccip_mmioData'(0); - - // DFH_RSVD1 - 8: sTx.c2.data <= t_ccip_mmioData'(0); - - // Updated by apurve to check fpgaReadMMIO - 10: sTx.c2.data <= t_ccip_mmioData'(start_read); - - default: sTx.c2.data <= t_ccip_mmioData'(0); - endcase - end - end - - - // - // CSR write handling. Host software must tell the AFU the memory address - // to which it should be writing. The address is set by writing a CSR. - // - - // We use MMIO address 0 to set the memory address. The read and - // write MMIO spaces are logically separate so we are free to use - // whatever we like. This may not be good practice for cleanly - // organizing the MMIO address space, but it is legal. - logic is_mem_addr_csr_write; - assign is_mem_addr_csr_write = is_csr_write && - (mmio_req_hdr.address == t_ccip_mmioAddr'(0)); - - // Memory address to which this AFU will write. - t_ccip_clAddr write_mem_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - start_write <= 1'b0; - end - else if (is_mem_addr_csr_write) - begin - write_mem_addr <= t_ccip_clAddr'(sRx.c0.data); - start_write <= 1'b1; - //$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr)); - end - end - - - // We use MMIO address 8 to set the memory address for reading data. - logic is_mem_addr_csr_read; - assign is_mem_addr_csr_read = is_csr_write && - (mmio_req_hdr.address == t_ccip_mmioAddr'(2)); - - // Memory address from which this AFU will read. - t_ccip_clAddr read_mem_addr; - - //logic start_traversal = 'b0; - //t_ccip_clAddr start_traversal_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - start_read <= 1'b0; - end - else if (is_mem_addr_csr_read) - begin - read_mem_addr <= t_ccip_clAddr'(sRx.c0.data); - start_read <= 1'b1; - //$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr)); - end - end - - - // ========================================================================= - // - // Main AFU logic - // - // ========================================================================= - - // - // States in our simple example. - // - //typedef enum logic [0:0] - typedef enum logic [1:0] - { - STATE_IDLE, - STATE_READ, - STATE_UPDATE, - STATE_WRITE - } - t_state; - - t_state state; - - // - // State machine - // - always_ff @(posedge clk) - begin - if (reset) - begin - state <= STATE_IDLE; - rd_end_of_list <= 1'b0; - end - else - begin - case (state) - STATE_IDLE: - begin - // Traversal begins when CSR 1 is written - if (start_read) - begin - state <= STATE_READ; - $display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr)); - end - end - - STATE_READ: - begin - $display("AFU in READ..."); - $display("do_update is %d...",do_update); - $display("addr_next_valid is %d...",addr_next_valid); - $display("rd_needed is %d...",rd_needed); - if (!rd_needed && do_update) - begin - state <= STATE_UPDATE; - $display("AFU moving to UPDATE..."); - end - end - - STATE_UPDATE: - begin - // Update the read value to be written back - $display("AFU in UPDATE..."); - if (!do_update) - begin - state <= STATE_WRITE; - wr_needed <= 1'b1; - $display("AFU moving to WRITE..."); - end - end - - STATE_WRITE: - begin - // Write the updated value to the address - // Point to new address after that - // if done then point to IDLE; else read new values - $display("AFU in WRITE..."); - if (rd_end_of_list) - begin - state <= STATE_IDLE; - $display("AFU done..."); - end - else if (!wr_needed) - begin - state <= STATE_READ; - $display("AFU moving to READ from WRITE..."); - start_write <= 1'b0; - write_req <= 1'b0; - end - end - endcase - end - end - - - // ========================================================================= - // - // Read logic. - // - // ========================================================================= - - // - // READ REQUEST - // - - // Did a write response just arrive - - // Next read address - - always_ff @(posedge clk) - begin - // Next read address is valid when we have got the write response back - if (sRx.c1.rspValid) - begin - addr_next_valid <= sRx.c1.rspValid; - - //if (state == STATE_READ && !rd_needed) - //begin - // Apurve: Next address is current address plus address length - //addr_next <= addr_next + addr_size; - addr_next <= (addr_next_valid ? rd_addr + 0 : rd_addr); - - // End of list reached if we have read 5 times - rd_end_of_list <= (cnt_list_length == 'h5); - //end - end - end - - // - // Since back pressure may prevent an immediate read request, we must - // record whether a read is needed and hold it until the request can - // be sent to the FIU. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - rd_needed <= 1'b0; - end - else - begin - // If reads are allowed this cycle then we can safely clear - // any previously requested reads. This simple AFU has only - // one read in flight at a time since it is walking a pointer - // chain. - if (rd_needed) - begin - //rd_needed <= sRx.c0TxAlmFull; - //rd_needed <= (!sRx.c0TxAlmFull && !sRx.c0.rspValid); - rd_needed <= !sRx.c0.rspValid; - end - else if (state == STATE_READ) - begin - // Need a read under two conditions: - // - Starting a new walk - // - A read response just arrived from a line containing - // a next pointer. - rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list))); - rd_addr <= (start_read ? read_mem_addr : addr_next); - //$display("rd_addr is 0x%x", t_ccip_clAddr'(rd_addr)); - //$display("read mem addr is 0x%x", t_ccip_clAddr'(read_mem_addr)); - //$display("start read is %d", start_read); - end - end - end - - // - // Emit read requests to the FIU. - // - - // Read header defines the request to the FIU - t_ccip_c0_ReqMemHdr rd_hdr; - - always_comb - begin - rd_hdr = t_ccip_c0_ReqMemHdr'(0); - - // Read request type (No intention to cache) - //rd_hdr.req_type = 4'h0; - - // Virtual address (MPF virtual addressing is enabled) - rd_hdr.address = rd_addr; - - // Read over channel VA - //rd_hdr.vc_sel = 2'h0; - - // Read one cache line (64 bytes) - //rd_hdr.cl_len = 2'h0; - end - - // Send read requests to the FIU - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c0.valid <= 1'b0; - cnt_list_length <= 0; - read_req <= 1'b0; - end - else - begin - // Generate a read request when needed and the FIU isn't full - if (state == STATE_READ) - begin - sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull && !read_req); - - if (rd_needed && !sRx.c0TxAlmFull && !read_req) - begin - sTx.c0.hdr <= rd_hdr; - cnt_list_length <= cnt_list_length + 1; - read_req <= 1'b1; - $display("Incrementing read count...%d",cnt_list_length); - $display("Read address is 0x%x...",rd_hdr.address); - addr_next_valid <= 1'b0; - // Apurve: Add something to stop read once this section has been accessed - //rd_needed <= 1'b0; - end - end - end - end - - // - // READ RESPONSE HANDLING - // - - // - // Receive data (read responses). - // - always_ff @(posedge clk) - begin - if (reset) - begin - do_update <= 1'b0; - end - else - begin - if (!do_update && sRx.c0.rspValid) - begin - rd_data <= sRx.c0.data; - do_update <= 1'b1; - $display("rd data is %d...",rd_data); - end - - if ((state == STATE_UPDATE) && (do_update == 1'b1)) - begin - // Update the read data and put it in the write data to be written - wr_data <= rd_data + 2; - do_update <= 1'b0; - read_req <= 1'b0; - $display("write data is %d...",wr_data); - - // First read done. Next reads should be from the updated addresses - start_read <= 1'b0; - end - end - end - - - // ========================================================================= - // - // Write logic. - // - // ========================================================================= - - - // - // WRITE REQUEST - // - - // Did a write response just arrive - - // Next write address - - always_ff @(posedge clk) - begin - if (sRx.c0.rspValid) - begin - // Next write address is valid when we have got the read response back - wr_addr_next_valid <= sRx.c0.rspValid; - //wr_addr_next_valid <= (!start_write && sRx.c0.rspValid); - - //if (state == STATE_WRITE && !wr_needed) - //begin - // Apurve: Next address is current address plus address length - //wr_addr_next <= wr_addr + 0; - wr_addr_next <= (wr_addr_next_valid ? wr_addr + 0 : wr_addr); - //end - end - end - - // - // Since back pressure may prevent an immediate write request, we must - // record whether a write is needed and hold it until the request can - // be sent to the FIU. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - wr_needed <= 1'b0; - end - else - begin - // If writes are allowed this cycle then we can safely clear - // any previously requested writes. This simple AFU has only - // one write in flight at a time since it is walking a pointer - // chain. - if (wr_needed) - begin - //wr_needed <= sRx.c1TxAlmFull; - //wr_needed <= (!sRx.c1TxAlmFull && !sRx.c1.rspValid); - wr_needed <= !sRx.c1.rspValid; - end - else - begin - // Need a write under two conditions: - // - Starting a new walk - // - A write response just arrived from a line containing - // a next pointer. - wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid)); - wr_addr <= (start_write ? write_mem_addr : wr_addr_next); - //$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr)); - end - end - end - - // - // Emit write requests to the FIU. - // - - // Write header defines the request to the FIU - t_ccip_c1_ReqMemHdr wr_hdr; - - always_comb - begin - wr_hdr = t_ccip_c1_ReqMemHdr'(0); - - // Write request type - //wr_hdr.req_type = 4'h0; - - // Virtual address (MPF virtual addressing is enabled) - wr_hdr.address = wr_addr; - - // Let the FIU pick the channel - //wr_hdr.vc_sel = 2'h2; - - // Write 1 cache line (64 bytes) - //wr_hdr.cl_len = 2'h0; - - // Start of packet is true (single line write) - wr_hdr.sop = 1'b1; - end - - // Send write requests to the FIU - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c1.valid <= 1'b0; - write_req <= 1'b0; - end - else - begin - // Generate a write request when needed and the FIU isn't full - if (state == STATE_WRITE) - begin - sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull && !write_req); - if (wr_needed && !sRx.c1TxAlmFull && !write_req) - begin - sTx.c1.hdr <= wr_hdr; - sTx.c1.data <= t_ccip_clData'(wr_data); - write_req <= 1'b1; - wr_addr_next_valid <= 1'b0; - $display("Write address is 0x%x...", wr_hdr.address); - end - end - end - end - - - // - // WRITE RESPONSE HANDLING - // - - // Apurve: Check if a signal is to be sent to read to start reading in case - // write response does not work - // - // Send data (write requests). - // - //always_ff @(posedge clk) - //begin - // if (state == STATE_WRITE) - // begin - // rd_data <= sRx.c0.data; - // end - // if (state == STATE_UPDATE) - // begin - // // Update the write data and put it in the write data to be written - // wr_data <= rd_data + 1; - // end - //end - -endmodule diff --git a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu_working.sv b/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu_working.sv deleted file mode 100644 index 144b430e..00000000 --- a/driver/tests/dogfood/Memcpy/hw/rtl/cci_hello_afu_working.sv +++ /dev/null @@ -1,621 +0,0 @@ -// -// Copyright (c) 2017, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// Neither the name of the Intel Corporation nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - - -// Read from the memory locations first and then write to the memory locations - -`include "platform_if.vh" -`include "afu_json_info.vh" - - -module ccip_std_afu - ( - // CCI-P Clocks and Resets - input logic pClk, // 400MHz - CCI-P clock domain. Primary interface clock - input logic pClkDiv2, // 200MHz - CCI-P clock domain. - input logic pClkDiv4, // 100MHz - CCI-P clock domain. - input logic uClk_usr, // User clock domain. Refer to clock programming guide ** Currently provides fixed 300MHz clock ** - input logic uClk_usrDiv2, // User clock domain. Half the programmed frequency ** Currently provides fixed 150MHz clock ** - input logic pck_cp2af_softReset, // CCI-P ACTIVE HIGH Soft Reset - input logic [1:0] pck_cp2af_pwrState, // CCI-P AFU Power State - input logic pck_cp2af_error, // CCI-P Protocol Error Detected - - // Interface structures - input t_if_ccip_Rx pck_cp2af_sRx, // CCI-P Rx Port - output t_if_ccip_Tx pck_af2cp_sTx // CCI-P Tx Port - ); - - - // - // Run the entire design at the standard CCI-P frequency (400 MHz). - // - logic clk; - assign clk = pClk; - - logic reset; - assign reset = pck_cp2af_softReset; - - logic [511:0] wr_data; - logic [511:0] rd_data; - - logic do_update; - logic start_read; - logic start_write; - logic wr_addr_next_valid; - logic addr_next_valid; - logic rd_end_of_list; - logic rd_needed; - logic wr_needed; - logic [15:0] cnt_list_length; - t_ccip_clAddr rd_addr; - t_ccip_clAddr wr_addr; - t_ccip_clAddr addr_next; - t_ccip_clAddr wr_addr_next; - - // ========================================================================= - // - // Register requests. - // - // ========================================================================= - - // - // The incoming pck_cp2af_sRx and outgoing pck_af2cp_sTx must both be - // registered. Here we register pck_cp2af_sRx and assign it to sRx. - // We also assign pck_af2cp_sTx to sTx here but don't register it. - // The code below never uses combinational logic to write sTx. - // - - t_if_ccip_Rx sRx; - always_ff @(posedge clk) - begin - sRx <= pck_cp2af_sRx; - end - - t_if_ccip_Tx sTx; - assign pck_af2cp_sTx = sTx; - - - // ========================================================================= - // - // CSR (MMIO) handling. - // - // ========================================================================= - - // The AFU ID is a unique ID for a given program. Here we generated - // one with the "uuidgen" program and stored it in the AFU's JSON file. - // ASE and synthesis setup scripts automatically invoke afu_json_mgr - // to extract the UUID into afu_json_info.vh. - logic [127:0] afu_id = `AFU_ACCEL_UUID; - - // - // A valid AFU must implement a device feature list, starting at MMIO - // address 0. Every entry in the feature list begins with 5 64-bit - // words: a device feature header, two AFU UUID words and two reserved - // words. - // - - // Is a CSR read request active this cycle? - logic is_csr_read; - assign is_csr_read = sRx.c0.mmioRdValid; - - // Is a CSR write request active this cycle? - logic is_csr_write; - assign is_csr_write = sRx.c0.mmioWrValid; - - // The MMIO request header is overlayed on the normal c0 memory read - // response data structure. Cast the c0Rx header to an MMIO request - // header. - t_ccip_c0_ReqMmioHdr mmio_req_hdr; - assign mmio_req_hdr = t_ccip_c0_ReqMmioHdr'(sRx.c0.hdr); - - - // - // Implement the device feature list by responding to MMIO reads. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c2.mmioRdValid <= 1'b0; - end - else - begin - // Always respond with something for every read request - sTx.c2.mmioRdValid <= is_csr_read; - - // The unique transaction ID matches responses to requests - sTx.c2.hdr.tid <= mmio_req_hdr.tid; - - // Addresses are of 32-bit objects in MMIO space. Addresses - // of 64-bit objects are thus multiples of 2. - case (mmio_req_hdr.address) - 0: // AFU DFH (device feature header) - begin - // Here we define a trivial feature list. In this - // example, our AFU is the only entry in this list. - sTx.c2.data <= t_ccip_mmioData'(0); - // Feature type is AFU - sTx.c2.data[63:60] <= 4'h1; - // End of list (last entry in list) - sTx.c2.data[40] <= 1'b1; - end - - // AFU_ID_L - 2: sTx.c2.data <= afu_id[63:0]; - - // AFU_ID_H - 4: sTx.c2.data <= afu_id[127:64]; - - // DFH_RSVD0 - 6: sTx.c2.data <= t_ccip_mmioData'(0); - - // DFH_RSVD1 - 8: sTx.c2.data <= t_ccip_mmioData'(0); - - // Updated by apurve to check fpgaReadMMIO - 10: sTx.c2.data <= t_ccip_mmioData'(start_read); - - default: sTx.c2.data <= t_ccip_mmioData'(0); - endcase - end - end - - - // - // CSR write handling. Host software must tell the AFU the memory address - // to which it should be writing. The address is set by writing a CSR. - // - - // We use MMIO address 0 to set the memory address. The read and - // write MMIO spaces are logically separate so we are free to use - // whatever we like. This may not be good practice for cleanly - // organizing the MMIO address space, but it is legal. - logic is_mem_addr_csr_write; - assign is_mem_addr_csr_write = is_csr_write && - (mmio_req_hdr.address == t_ccip_mmioAddr'(0)); - - // Memory address to which this AFU will write. - t_ccip_clAddr write_mem_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - start_write <= 1'b0; - end - else if (is_mem_addr_csr_write) - begin - write_mem_addr <= t_ccip_clAddr'(sRx.c0.data); - start_write <= 1'b1; - //$display("Write mem address is 0x%x", t_ccip_clAddr'(write_mem_addr)); - end - end - - - // We use MMIO address 8 to set the memory address for reading data. - logic is_mem_addr_csr_read; - assign is_mem_addr_csr_read = is_csr_write && - (mmio_req_hdr.address == t_ccip_mmioAddr'(2)); - - // Memory address from which this AFU will read. - t_ccip_clAddr read_mem_addr; - - //logic start_traversal = 'b0; - //t_ccip_clAddr start_traversal_addr; - - always_ff @(posedge clk) - begin - if (reset) - begin - start_read <= 1'b0; - end - else if (is_mem_addr_csr_read) - begin - read_mem_addr <= t_ccip_clAddr'(sRx.c0.data); - start_read <= 1'b1; - //$display("Read mem address is 0x%x", t_ccip_clAddr'(read_mem_addr)); - end - end - - - // ========================================================================= - // - // Main AFU logic - // - // ========================================================================= - - // - // States in our simple example. - // - //typedef enum logic [0:0] - typedef enum logic [1:0] - { - STATE_IDLE, - STATE_READ, - STATE_UPDATE, - STATE_WRITE - } - t_state; - - t_state state; - - // - // State machine - // - always_ff @(posedge clk) - begin - if (reset) - begin - state <= STATE_IDLE; - rd_end_of_list <= 1'b0; - end - else - begin - case (state) - STATE_IDLE: - begin - // Traversal begins when CSR 1 is written - if (start_read) - begin - state <= STATE_READ; - $display("AFU starting traversal at 0x%x", t_ccip_clAddr'(read_mem_addr)); - end - end - - STATE_READ: - begin - $display("AFU in READ..."); - if (!rd_needed && do_update) - begin - state <= STATE_UPDATE; - $display("AFU moving to UPDATE..."); - end - end - - STATE_UPDATE: - begin - // Update the read value to be written back - $display("AFU in UPDATE..."); - if (!do_update) - begin - state <= STATE_WRITE; - wr_needed <= 1'b1; - $display("AFU moving to WRITE..."); - end - end - - STATE_WRITE: - begin - // Write the updated value to the address - // Point to new address after that - // if done then point to IDLE; else read new values - $display("AFU in WRITE..."); - if (rd_end_of_list) - begin - state <= STATE_IDLE; - $display("AFU done..."); - end - else if (!wr_needed) - begin - state <= STATE_READ; - $display("AFU moving to READ from WRITE..."); - start_write <= 1'b0; - end - end - endcase - end - end - - - // ========================================================================= - // - // Read logic. - // - // ========================================================================= - - // - // READ REQUEST - // - - // Did a write response just arrive - - // Next read address - - always_ff @(posedge clk) - begin - // Next read address is valid when we have got the write response back - addr_next_valid <= sRx.c1.rspValid; - - // Apurve: Next address is current address plus address length - //addr_next <= addr_next + addr_size; - addr_next <= rd_addr + 0; - - // End of list reached if we have read 5 times - rd_end_of_list <= (cnt_list_length == 'h5); - end - - // - // Since back pressure may prevent an immediate read request, we must - // record whether a read is needed and hold it until the request can - // be sent to the FIU. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - rd_needed <= 1'b0; - end - else - begin - // If reads are allowed this cycle then we can safely clear - // any previously requested reads. This simple AFU has only - // one read in flight at a time since it is walking a pointer - // chain. - if (rd_needed) - begin - rd_needed <= sRx.c0TxAlmFull; - end - else - begin - // Need a read under two conditions: - // - Starting a new walk - // - A read response just arrived from a line containing - // a next pointer. - rd_needed <= (start_read || (!sRx.c0TxAlmFull && (addr_next_valid && ! rd_end_of_list))); - rd_addr <= (start_read ? read_mem_addr : addr_next); - //$display("rd_addr is 0x%x", t_ccip_clAddr'(rd_addr)); - //$display("read mem addr is 0x%x", t_ccip_clAddr'(read_mem_addr)); - //$display("start read is %d", start_read); - end - end - end - - // - // Emit read requests to the FIU. - // - - // Read header defines the request to the FIU - t_ccip_c0_ReqMemHdr rd_hdr; - - always_comb - begin - rd_hdr = t_ccip_c0_ReqMemHdr'(0); - - // Read request type (No intention to cache) - //rd_hdr.req_type = 4'h0; - - // Virtual address (MPF virtual addressing is enabled) - rd_hdr.address = rd_addr; - - // Read over channel VA - //rd_hdr.vc_sel = 2'h0; - - // Read one cache line (64 bytes) - //rd_hdr.cl_len = 2'h0; - end - - // Send read requests to the FIU - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c0.valid <= 1'b0; - cnt_list_length <= 0; - end - else - begin - // Generate a read request when needed and the FIU isn't full - if (state == STATE_READ) - begin - sTx.c0.valid <= (rd_needed && !sRx.c0TxAlmFull); - - if (rd_needed && !sRx.c0TxAlmFull) - begin - sTx.c0.hdr <= rd_hdr; - cnt_list_length <= cnt_list_length + 1; - $display("Incrementing read count...%d",cnt_list_length); - $display("Read address is 0x%x...",rd_hdr.address); - // Apurve: Add something to stop read once this section has been accessed - end - end - end - end - - // - // READ RESPONSE HANDLING - // - - // - // Receive data (read responses). - // - always_ff @(posedge clk) - begin - if (reset) - begin - do_update <= 1'b0; - end - else - begin - if (sRx.c0.rspValid) - begin - rd_data <= sRx.c0.data; - do_update <= 1'b1; - //$display("rd data is %d...",rd_data); - end - - if (state == STATE_UPDATE) - begin - // Update the read data and put it in the write data to be written - wr_data <= rd_data + 2; - do_update <= 1'b0; - $display("write data is %d...",wr_data); - - // First read done. Next reads should be from the updated addresses - start_read <= 1'b0; - end - end - end - - - // ========================================================================= - // - // Write logic. - // - // ========================================================================= - - - // - // WRITE REQUEST - // - - // Did a write response just arrive - - // Next write address - - always_ff @(posedge clk) - begin - // Next write address is valid when we have got the read response back - wr_addr_next_valid <= sRx.c0.rspValid; - - // Apurve: Next address is current address plus address length - wr_addr_next <= wr_addr + 0; - - end - - // - // Since back pressure may prevent an immediate write request, we must - // record whether a write is needed and hold it until the request can - // be sent to the FIU. - // - - always_ff @(posedge clk) - begin - if (reset) - begin - wr_needed <= 1'b0; - end - else - begin - // If writes are allowed this cycle then we can safely clear - // any previously requested writes. This simple AFU has only - // one write in flight at a time since it is walking a pointer - // chain. - if (wr_needed) - begin - wr_needed <= sRx.c1TxAlmFull; - end - else - begin - // Need a write under two conditions: - // - Starting a new walk - // - A write response just arrived from a line containing - // a next pointer. - wr_needed <= (start_write || (!sRx.c1TxAlmFull && wr_addr_next_valid)); - wr_addr <= (start_write ? write_mem_addr : wr_addr_next); - //$display("Write mem address later is 0x%x", t_ccip_clAddr'(write_mem_addr)); - end - end - end - - // - // Emit write requests to the FIU. - // - - // Write header defines the request to the FIU - t_ccip_c1_ReqMemHdr wr_hdr; - - always_comb - begin - wr_hdr = t_ccip_c1_ReqMemHdr'(0); - - // Write request type - //wr_hdr.req_type = 4'h0; - - // Virtual address (MPF virtual addressing is enabled) - wr_hdr.address = wr_addr; - - // Let the FIU pick the channel - //wr_hdr.vc_sel = 2'h2; - - // Write 1 cache line (64 bytes) - //wr_hdr.cl_len = 2'h0; - - // Start of packet is true (single line write) - wr_hdr.sop = 1'b1; - end - - // Send write requests to the FIU - always_ff @(posedge clk) - begin - if (reset) - begin - sTx.c1.valid <= 1'b0; - end - else - begin - // Generate a write request when needed and the FIU isn't full - if (state == STATE_WRITE) - begin - sTx.c1.valid <= (wr_needed && !sRx.c1TxAlmFull); - if (wr_needed && !sRx.c1TxAlmFull) - begin - sTx.c1.hdr <= wr_hdr; - sTx.c1.data <= t_ccip_clData'(wr_data); - end - end - end - end - - - // - // WRITE RESPONSE HANDLING - // - - // Apurve: Check if a signal is to be sent to read to start reading in case - // write response does not work - // - // Send data (write requests). - // - //always_ff @(posedge clk) - //begin - // if (state == STATE_WRITE) - // begin - // rd_data <= sRx.c0.data; - // end - // if (state == STATE_UPDATE) - // begin - // // Update the write data and put it in the write data to be written - // wr_data <= rd_data + 1; - // end - //end - -endmodule diff --git a/driver/tests/dogfood/Memcpy/hw/rtl/sources.txt b/driver/tests/dogfood/Memcpy/hw/rtl/sources.txt deleted file mode 100644 index 8a73008b..00000000 --- a/driver/tests/dogfood/Memcpy/hw/rtl/sources.txt +++ /dev/null @@ -1,2 +0,0 @@ -cci_hello.json -cci_hello_afu.sv diff --git a/driver/tests/dogfood/Memcpy/hw/sim/setup_ase b/driver/tests/dogfood/Memcpy/hw/sim/setup_ase deleted file mode 100755 index a8414ac0..00000000 --- a/driver/tests/dogfood/Memcpy/hw/sim/setup_ase +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/sh - -## -## Setup ASE environment using ../rtl/sources.txt. -## - -# Absolute path to this script -SCRIPT=$(readlink -f "$0") -SCRIPT_PATH=$(dirname "$SCRIPT") - -afu_sim_setup --sources="${SCRIPT_PATH}/../rtl/sources.txt" $@ diff --git a/driver/tests/dogfood/Memcpy/sw/Makefile b/driver/tests/dogfood/Memcpy/sw/Makefile deleted file mode 100644 index f3b66c12..00000000 --- a/driver/tests/dogfood/Memcpy/sw/Makefile +++ /dev/null @@ -1,41 +0,0 @@ -include ../../common/sw/common_include.mk - -# Primary test name -TEST = cci_hello - -# Build directory -OBJDIR = obj -CFLAGS += -I./$(OBJDIR) -CPPFLAGS += -I./$(OBJDIR) - -# Files and folders -SRCS = $(TEST).c -OBJS = $(addprefix $(OBJDIR)/,$(patsubst %.c,%.o,$(SRCS))) - -# Targets (build only $(TEST)_ase by default) -all: $(TEST) $(TEST)_ase - -# AFU info from JSON file, including AFU UUID -AFU_JSON_INFO = $(OBJDIR)/afu_json_info.h - -$(AFU_JSON_INFO): ../hw/rtl/$(TEST).json | objdir - afu_json_mgr json-info --afu-json=$^ --c-hdr=$@ - -$(OBJS): $(AFU_JSON_INFO) - -$(TEST): $(OBJS) - $(CC) -o $@ $^ $(LDFLAGS) $(FPGA_LIBS) - -$(TEST)_ase: $(OBJS) - $(CC) -o $@ $^ $(LDFLAGS) $(ASE_LIBS) - -$(OBJDIR)/%.o: %.c | objdir - $(CC) $(CFLAGS) -c $< -o $@ - -clean: - rm -rf $(TEST) $(TEST)_ase $(OBJDIR) - -objdir: - @mkdir -p $(OBJDIR) - -.PHONY: all clean diff --git a/driver/tests/dogfood/Memcpy/sw/cci_hello.c b/driver/tests/dogfood/Memcpy/sw/cci_hello.c deleted file mode 100644 index f12d95c2..00000000 --- a/driver/tests/dogfood/Memcpy/sw/cci_hello.c +++ /dev/null @@ -1,210 +0,0 @@ -// -// Copyright (c) 2017, Intel Corporation -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are met: -// -// Redistributions of source code must retain the above copyright notice, this -// list of conditions and the following disclaimer. -// -// Redistributions in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// Neither the name of the Intel Corporation nor the names of its contributors -// may be used to endorse or promote products derived from this software -// without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE -// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR -// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF -// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN -// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) -// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE -// POSSIBILITY OF SUCH DAMAGE. - -#include -#include -#include -#include -#include -#include - -#include - -// State from the AFU's JSON file, extracted using OPAE's afu_json_mgr script -#include "afu_json_info.h" - -#define CACHELINE_BYTES 64 -#define CL(x) ((x) * CACHELINE_BYTES) - - -// -// Search for an accelerator matching the requested UUID and connect to it. -// -static fpga_handle connect_to_accel(const char *accel_uuid) -{ - fpga_properties filter = NULL; - fpga_guid guid; - fpga_token accel_token; - uint32_t num_matches; - fpga_handle accel_handle; - fpga_result r; - - // Don't print verbose messages in ASE by default - //setenv("ASE_LOG", "0", 0); - - // Set up a filter that will search for an accelerator - fpgaGetProperties(NULL, &filter); - fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR); - - // Add the desired UUID to the filter - uuid_parse(accel_uuid, guid); - fpgaPropertiesSetGUID(filter, guid); - - // Do the search across the available FPGA contexts - num_matches = 1; - fpgaEnumerate(&filter, 1, &accel_token, 1, &num_matches); - - // Not needed anymore - fpgaDestroyProperties(&filter); - - if (num_matches < 1) - { - fprintf(stderr, "Accelerator %s not found!\n", accel_uuid); - return 0; - } - - // Open accelerator - r = fpgaOpen(accel_token, &accel_handle, 0); - assert(FPGA_OK == r); - - // Done with token - fpgaDestroyToken(&accel_token); - - return accel_handle; -} - - -// -// Allocate a buffer in I/O memory, shared with the FPGA. -// -static volatile void* alloc_buffer(fpga_handle accel_handle, - ssize_t size, - uint64_t *wsid, - uint64_t *io_addr) -{ - fpga_result r; - volatile void* buf; - - r = fpgaPrepareBuffer(accel_handle, size, (void*)&buf, wsid, 0); - if (FPGA_OK != r) return NULL; - - // Get the physical address of the buffer in the accelerator - r = fpgaGetIOAddress(accel_handle, *wsid, io_addr); - assert(FPGA_OK == r); - - return buf; -} - - -int main(int argc, char *argv[]) -{ - fpga_handle accel_handle; - volatile char *buf; - volatile char *buf_r; - uint64_t wsid1; - uint64_t wsid2; - uint64_t buf_pa; - uint64_t ret_buf_pa; - uint64_t buf_rpa; - uint64_t ret_buf_rpa; - fpga_result r; - - // Find and connect to the accelerator - accel_handle = connect_to_accel(AFU_ACCEL_UUID); - - // Allocate a single page memory buffer for write - buf = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(), - &wsid1, &buf_pa); - // Allocate a single page memory buffer for read - buf_r = (volatile char*)alloc_buffer(accel_handle, 4 * getpagesize(), - &wsid2, &buf_rpa); - assert(NULL != buf); - - //// Set the low byte of the shared buffer to 0. The FPGA will write - //// a non-zero value to it. - //buf[0] = 0; - - // Set the low byte of the shared buffer buf_r to 0. The FPGA will read - // the values and write to buf address - buf[0] = 5; - buf_r[0] = 5; - - // Tell the accelerator the address of the buffer using cache line - // addresses. The accelerator will respond by writing to the buffer. - r = fpgaWriteMMIO64(accel_handle, 0, 0, buf_pa / CL(1)); - printf("Write address is %08lx\n", buf_pa); - printf("Write address div 64 is %08lx\n", buf_pa/ CL(1)); - assert(FPGA_OK == r); - - // Wait for response from FPGA. Check using fpgaReadMMIO - //r = fpgaReadMMIO64(accel_handle, 0, 0, &ret_buf_pa); - //printf("Returned write is %08lx\n", ret_buf_pa); - //assert(FPGA_OK == r); - -///////////////////// Added to check fpgaRead - // Wait for response from FPGA. Check using fpgaReadMMIO - r = fpgaReadMMIO64(accel_handle, 0, 5 * sizeof(uint64_t), &ret_buf_rpa); - printf("Returned read at 10 is %08lx\n", ret_buf_rpa); - assert(FPGA_OK == r); -/////////////////////////////////////////////// - - - // Tell the accelerator the address of the buffer using cache line - // addresses. The accelerator will read from the buffer. - // Write the address to MMIO 1 - r = fpgaWriteMMIO64(accel_handle, 0, sizeof(uint64_t), buf_rpa / CL(1)); - printf("Read address is %08lx\n", buf_rpa); - printf("Read address div64 is %08lx\n", buf_rpa / CL(1)); - assert(FPGA_OK == r); - - // Wait for response from FPGA. Check using fpgaReadMMIO - //r = fpgaReadMMIO64(accel_handle, 0, sizeof(uint64_t), &ret_buf_rpa); - //printf("Returned write is %08lx\n", ret_buf_rpa); - //assert(FPGA_OK == r); - - - - - - - - - // Update this - // Spin, waiting for the value in memory to change to something non-zero. - while (5 == buf[0]) - { - // A well-behaved program would use _mm_pause(), nanosleep() or - // equivalent to save power here. - }; - - // Print the string written by the FPGA - printf("%d\n", buf[0]); - - do { - //printf("%d\n", buf[0]); - } while (10 != buf[0]); - - // Done - fpgaReleaseBuffer(accel_handle, wsid1); - fpgaReleaseBuffer(accel_handle, wsid2); - fpgaClose(accel_handle); - - return 0; -} diff --git a/driver/tests/dogfood/Memcpy/sw/obj/afu_json_info.h b/driver/tests/dogfood/Memcpy/sw/obj/afu_json_info.h deleted file mode 100644 index e16a5349..00000000 --- a/driver/tests/dogfood/Memcpy/sw/obj/afu_json_info.h +++ /dev/null @@ -1,13 +0,0 @@ -// -// Generated by afu_json_mgr from ../hw/rtl/cci_hello.json -// - -#ifndef __AFU_JSON_INFO__ -#define __AFU_JSON_INFO__ - -#define AFU_ACCEL_NAME "cci_hello" -#define AFU_ACCEL_UUID "C6AA954A-9B91-4A37-ABC1-1D9F0709DCC3" -#define AFU_IMAGE_POWER 0 -#define AFU_TOP_IFC "ccip_std_afu" - -#endif // __AFU_JSON_INFO__ diff --git a/driver/tests/dogfood/Memcpy/sw/obj/cci_hello.o b/driver/tests/dogfood/Memcpy/sw/obj/cci_hello.o deleted file mode 100644 index a6d79f493c77264d59167e9a913e3aeedecb5e59..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 5336 zcmbW4e`p*<6vyY1CaLLlea%UXnZ83S2J2%|#B9OpjAS-r1CCH`8V_*+5E3P)Yr+O#aoQq$Z}r zN_tJzZg46odD%Oo%*u19RHo!PC3W>%Y%fxfmgg?wBuZ-50#m`8N?M*)QopPs&?tyA zXjYz~gQmJ>z%!3wJ5qDsAQPhDd{*SNJoCy-a*vWOvL+@Y|}ijX^ToK35yIaD5_O{83x)KkZfIuQkg5_=EN)6e@nkGqQA$Fgh&rN2qj7hLwDq+6)WFlyfdi63 zNBSFlzNY#*Uv*PUlefCgSKC%ly{%1co<>hotKVNsx`UmaQpEvjFgZjH8xd2N zv~bwa6A3Ajkg7cUqT{7Jk8F21a!hKdv#EM@Gik*1uw>|3Skg?%>&fyW%n98~wG2bZ ztcX=%I(H}@i|HX#HRGy=Tfy_uVBH}V5+BkHGO9)37u)XlKPl}F4klw}QmS{?yFJz3 znk4OdU-UM(J#~95EY*0tjb4w}1HrbifUXn0#5G>-+P=PI?J2k-9|4X9tsbvT-^TKj z1^x}ig&r43sBb^@67mMt++UDFH}yZm{GkGyzwo&NDqudb!l~AfzgI+#{ldI&8svrf zf7Xdvc2F71&jnGCt*U>!y)(Qpn+nAo-{=lAPyozyq z28E;qyt!6L@E4wIg`_wK9s^RILkW4^fxqv-@y5&-=X(eKs{{YTf#bpEi(d@;l8>>Tp zI4qbD4?a|2cypCOxeMPzXg-8s$np8+ybmE5a=e>3A4Uj4^CxWlVH+Q|@$EMLmW|_kk;nOvah}f>*8f!C zJpV5R&hw}LmOxxF|DOeY9ca*QF^+nCNAY}$F%dA4*D%iWtYG603Y_OD3%uV7OP*p} z%<}~s-vL^l&y)}cpK-LeZF&j%Tz^K;zl>tAFC6qUf}XGI2Z8f-{U~t0{|mPG_UGr0 zjaNV(JkR1nn%3%b_>t8m<&5M0NKm-`Q9;l1+#_(Fr$^wGEPg%X;{NyBxPAYR2yrg4 zIHpZ+&-0|9Uts!E4*K^5Jzv)uf%A2JF7Qeg|0~ADb^Rph?=byuHoZOnyEeWPw0!@| zFfbTacD~)lE5XP06136q_}L>s`_1%khV$>g0fF=H1L95`A2qc>s7=GFM|jK7qng{) z$4%l6YoijR)!G4R3vJi5ERJ6KKMO(4Kw`iMGYXruh+#Ad(uN`j$y zC5T^^&A`xhv3;_7tDnQXOYp8lN%JbP8UyRB`c-^MfMFNs;a|4+TSMCg1vxuDMAU|a zc{xX|-5!Sy#=O_@UKoop?Pm>LGREfLD@-VUV>Yqz{QZq%(N42<^LyK(nf>EoTfo8F zT!Zy>Hr~##Eou)lCgwlS#*YgD`2KU;3ypl|x5yT%Fd?2Fj^*R|`Qf*Rxc4~3!~S8? ouwyeZf9#97qg-JROel28f4TqVVE_OC diff --git a/driver/tests/dogfood/common.h b/driver/tests/dogfood/common.h new file mode 100644 index 00000000..da6e77d2 --- /dev/null +++ b/driver/tests/dogfood/common.h @@ -0,0 +1,14 @@ +#ifndef _COMMON_H_ +#define _COMMON_H_ + +#define KERNEL_ARG_DEV_MEM_ADDR 0x7ffff000 + +struct kernel_arg_t { + uint32_t testid; + uint32_t count; + uint32_t src0_ptr; + uint32_t src1_ptr; + uint32_t dst_ptr; +}; + +#endif \ No newline at end of file diff --git a/driver/tests/dogfood/dogfood.cpp b/driver/tests/dogfood/dogfood.cpp new file mode 100644 index 00000000..c54fcbf4 --- /dev/null +++ b/driver/tests/dogfood/dogfood.cpp @@ -0,0 +1,264 @@ +#include +#include +#include +#include +#include +#include "testcases.h" +#include "common.h" + +#define RT_CHECK(_expr) \ + do { \ + int _ret = _expr; \ + if (0 == _ret) \ + break; \ + printf("Error: '%s' returned %d!\n", #_expr, (int)_ret); \ + cleanup(); \ + exit(-1); \ + } while (false) + +/////////////////////////////////////////////////////////////////////////////// + +class TestMngr { +public: + TestMngr() { + this->add_test("iadd", new Test_IADD()); + this->add_test("imul", new Test_IMUL()); + this->add_test("idiv", new Test_IDIV()); + this->add_test("idiv-mul", new Test_IDIV_MUL()); + this->add_test("fadd", new Test_FADD()); + this->add_test("fsub", new Test_FSUB()); + this->add_test("fmul", new Test_FMUL()); + this->add_test("fmadd", new Test_FMADD()); + this->add_test("fmsub", new Test_FMSUB()); + this->add_test("fnmadd", new Test_FNMADD()); + this->add_test("fnmsub", new Test_FNMSUB()); + this->add_test("fnmadd-madd", new Test_FNMADD_MADD()); + this->add_test("fdiv", new Test_FDIV()); + this->add_test("fdiv2", new Test_FDIV2()); + this->add_test("fsqrt", new Test_FSQRT()); + this->add_test("ftoi", new Test_FTOI()); + this->add_test("ftou", new Test_FTOU()); + this->add_test("tof", new Test_ITOF()); + this->add_test("utof", new Test_UTOF()); + } + + ~TestMngr() { + for (size_t i = 0; i < _tests.size(); ++i) { + delete _tests[i]; + } + } + + const std::string& get_name(int testid) const { + return _names.at(testid); + } + + ITestCase* get_test(int testid) const { + return _tests.at(testid); + } + + void add_test(const char* name, ITestCase* test) { + _names.push_back(name); + _tests.push_back(test); + } + + size_t size() const { + return _tests.size(); + } + +private: + std::vector _names; + std::vector _tests; +}; + +/////////////////////////////////////////////////////////////////////////////// + +TestMngr testMngr; +const char* kernel_file = "kernel.bin"; +int count = 0; +int testid_s = 0; +int testid_e = (testMngr.size() - 1); + +vx_device_h device = nullptr; +vx_buffer_h arg_buf = nullptr; +vx_buffer_h src1_buf = nullptr; +vx_buffer_h src2_buf = nullptr; +vx_buffer_h dst_buf = nullptr; + +static void show_usage() { + std::cout << "Vortex Driver Test." << std::endl; + std::cout << "Usage: [-s:testid] [-e:testid] [-k: kernel] [-n words] [-h: help]" << std::endl; +} + +static void parse_args(int argc, char **argv) { + int c; + while ((c = getopt(argc, argv, "n:s:e:k:h?")) != -1) { + switch (c) { + case 'n': + count = atoi(optarg); + break; + case 's': + testid_s = atoi(optarg); + break; + case 'e': + testid_e = atoi(optarg); + break; + case 'k': + kernel_file = optarg; + break; + case 'h': + case '?': { + show_usage(); + exit(0); + } break; + default: + show_usage(); + exit(-1); + } + } +} + +void cleanup() { + if (arg_buf) { + vx_buf_release(arg_buf); + } + if (src1_buf) { + vx_buf_release(src1_buf); + } + if (src2_buf) { + vx_buf_release(src2_buf); + } + if (dst_buf) { + vx_buf_release(dst_buf); + } + if (device) { + vx_dev_close(device); + } +} + +int main(int argc, char *argv[]) { + size_t value; + kernel_arg_t kernel_arg; + + // parse command arguments + parse_args(argc, argv); + + if (count == 0) { + count = 1; + } + + std::cout << "test ids: " << testid_s << " - " << testid_e << std::endl; + std::cout << "workitem size: " << count << std::endl; + std::cout << "using kernel: " << kernel_file << std::endl; + + // open device connection + std::cout << "open device connection" << std::endl; + RT_CHECK(vx_dev_open(&device)); + + unsigned max_cores, max_warps, max_threads; + RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_CORES, &max_cores)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_WARPS, &max_warps)); + RT_CHECK(vx_dev_caps(device, VX_CAPS_MAX_THREADS, &max_threads)); + + int num_points = count * max_cores * max_warps * max_threads; + size_t buf_size = num_points * sizeof(uint32_t); + + std::cout << "number of points: " << num_points << std::endl; + std::cout << "number of points: " << num_points << std::endl; + std::cout << "number of points: " << num_points << std::endl; + std::cout << "buffer size: " << buf_size << " bytes" << std::endl; + + // upload program + std::cout << "upload kernel" << std::endl; + RT_CHECK(vx_upload_kernel_file(device, kernel_file)); + + // allocate device memory + std::cout << "allocate device memory" << std::endl; + + RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); + kernel_arg.src0_ptr = value; + RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); + kernel_arg.src1_ptr = value; + RT_CHECK(vx_alloc_dev_mem(device, buf_size, &value)); + kernel_arg.dst_ptr = value; + + kernel_arg.count = count; + + std::cout << "dev_src0=" << std::hex << kernel_arg.src0_ptr << std::endl; + std::cout << "dev_src1=" << std::hex << kernel_arg.src1_ptr << std::endl; + std::cout << "dev_dst=" << std::hex << kernel_arg.dst_ptr << std::endl; + + // allocate shared memory + std::cout << "allocate shared memory" << std::endl; + RT_CHECK(vx_alloc_shared_mem(device, sizeof(kernel_arg_t), &arg_buf)); + RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src1_buf)); + RT_CHECK(vx_alloc_shared_mem(device, buf_size, &src2_buf)); + RT_CHECK(vx_alloc_shared_mem(device, buf_size, &dst_buf)); + + for (int t = testid_s; t <= testid_e; ++t) { + auto name = testMngr.get_name(t); + auto test = testMngr.get_test(t); + + std::cout << "Test" << t << ": " << name << std::endl; + + // upload kernel argument + std::cout << "upload kernel argument" << std::endl; + kernel_arg.testid = t; + memcpy((void*)vx_host_ptr(arg_buf), &kernel_arg, sizeof(kernel_arg_t)); + RT_CHECK(vx_copy_to_dev(arg_buf, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0)); + + // get test arguments + std::cout << "get test arguments" << std::endl; + test->setup(num_points, (void*)vx_host_ptr(src1_buf), (void*)vx_host_ptr(src2_buf)); + + // upload source buffer0 + std::cout << "upload source buffer0" << std::endl; + RT_CHECK(vx_copy_to_dev(src1_buf, kernel_arg.src0_ptr, buf_size, 0)); + + // upload source buffer1 + std::cout << "upload source buffer1" << std::endl; + RT_CHECK(vx_copy_to_dev(src2_buf, kernel_arg.src1_ptr, buf_size, 0)); + + // clear destination buffer + std::cout << "clear destination buffer" << std::endl; + for (int i = 0; i < num_points; ++i) { + ((uint32_t*)vx_host_ptr(dst_buf))[i] = 0xdeadbeef; + } + RT_CHECK(vx_copy_to_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0)); + + // start device + std::cout << "start device" << std::endl; + RT_CHECK(vx_start(device)); + + // wait for completion + std::cout << "wait for completion" << std::endl; + RT_CHECK(vx_ready_wait(device, -1)); + + // flush the destination buffer caches + std::cout << "flush the destination buffer caches" << std::endl; + RT_CHECK(vx_flush_caches(device, kernel_arg.dst_ptr, buf_size)); + + // download destination buffer + std::cout << "download destination buffer" << std::endl; + RT_CHECK(vx_copy_from_dev(dst_buf, kernel_arg.dst_ptr, buf_size, 0)); + + // verify destination + std::cout << "verify test result" << std::endl; + int errors = test->verify(num_points, + (void*)vx_host_ptr(dst_buf), + (void*)vx_host_ptr(src1_buf), + (void*)vx_host_ptr(src2_buf)); + if (errors != 0) { + std::cout << "found " << errors << " errors!" << std::endl; + std::cout << "FAILED!" << std::endl << std::flush; + cleanup(); + exit(1); + } + std::cout << "PASSED!" << std::endl << std::flush; + } + + // cleanup + std::cout << "cleanup" << std::endl; + cleanup(); + + return 0; +} \ No newline at end of file diff --git a/driver/tests/dogfood/kernel.c b/driver/tests/dogfood/kernel.c new file mode 100644 index 00000000..2fc49872 --- /dev/null +++ b/driver/tests/dogfood/kernel.c @@ -0,0 +1,354 @@ +#include +#include +#include +#include +#include "common.h" + +typedef void (*PFN_Kernel)(void* arg); + +void kernel_iadd(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + int32_t* src0_ptr = (int32_t*)_arg->src0_ptr; + int32_t* src1_ptr = (int32_t*)_arg->src1_ptr; + int32_t* dst_ptr = (int32_t*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + int32_t a = src0_ptr[offset+i]; + int32_t b = src1_ptr[offset+i]; + int32_t c = a + b; + dst_ptr[offset+i] = c; + } +} + +void kernel_imul(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + int32_t* src0_ptr = (int32_t*)_arg->src0_ptr; + int32_t* src1_ptr = (int32_t*)_arg->src1_ptr; + int32_t* dst_ptr = (int32_t*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + int32_t a = src0_ptr[offset+i]; + int32_t b = src1_ptr[offset+i]; + int32_t c = a * b; + dst_ptr[offset+i] = c; + } +} + +void kernel_idiv(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + int32_t* src0_ptr = (int32_t*)_arg->src0_ptr; + int32_t* src1_ptr = (int32_t*)_arg->src1_ptr; + int32_t* dst_ptr = (int32_t*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + int32_t a = src0_ptr[offset+i]; + int32_t b = src1_ptr[offset+i]; + int32_t c = a / b; + dst_ptr[offset+i] = c; + } +} + +void kernel_idiv_mul(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + int32_t* src0_ptr = (int32_t*)_arg->src0_ptr; + int32_t* src1_ptr = (int32_t*)_arg->src1_ptr; + int32_t* dst_ptr = (int32_t*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + int32_t a = src0_ptr[offset+i]; + int32_t b = src1_ptr[offset+i]; + int32_t c = a / b; + int32_t d = a * b; + int32_t e = c + d; + dst_ptr[offset+i] = e; + } +} + +void kernel_fadd(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = a + b; + dst_ptr[offset+i] = c; + } +} + +void kernel_fsub(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = a - b; + dst_ptr[offset+i] = c; + } +} + +void kernel_fmul(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = a * b; + dst_ptr[offset+i] = c; + } +} + +void kernel_fmadd(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = a * 0.5f + b; + dst_ptr[offset+i] = c; + } +} + +void kernel_fmsub(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = a * 0.5f - b; + dst_ptr[offset+i] = c; + } +} + +void kernel_fnmadd(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = -a * 0.5f - b; + dst_ptr[offset+i] = c; + } +} + +void kernel_fnmsub(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = -a * 0.5f + b; + dst_ptr[offset+i] = c; + } +} + +void kernel_fnmadd_madd(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = -a * 0.25f - b; + float d = a * 0.25f + b; + float e = c + d; + dst_ptr[offset+i] = e; + } +} + +void kernel_fdiv(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = a / b; + dst_ptr[offset+i] = c; + } +} + +void kernel_fdiv2(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = a / b; + float d = b / a; + float e = c + d; + dst_ptr[offset+i] = e; + } +} + +void kernel_fsqrt(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = sqrt(a) + b; + dst_ptr[offset+i] = c; + } +} + +void kernel_ftoi(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + int32_t* dst_ptr = (int32_t*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = a + b; + int32_t d = (int32_t)c; + dst_ptr[offset+i] = d; + } +} + +void kernel_ftou(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + uint32_t* dst_ptr = (uint32_t*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + float c = a + b; + uint32_t d = (uint32_t)c; + dst_ptr[offset+i] = d; + } +} + +void kernel_itof(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + int32_t c = (int32_t)a; + int32_t d = (int32_t)b; + int32_t e = c + d; + float f = (float)e; + dst_ptr[offset+i] = f; + } +} + +void kernel_utof(void* arg) { + struct kernel_arg_t* _arg = (struct kernel_arg_t*)(arg); + uint32_t count = _arg->count; + float* src0_ptr = (float*)_arg->src0_ptr; + float* src1_ptr = (float*)_arg->src1_ptr; + float* dst_ptr = (float*)_arg->dst_ptr; + uint32_t offset = vx_thread_gid() * count; + + for (uint32_t i = 0; i < count; ++i) { + float a = src0_ptr[offset+i]; + float b = src1_ptr[offset+i]; + uint32_t c = (uint32_t)a; + uint32_t d = (uint32_t)b; + uint32_t e = c + d; + float f = (float)e; + dst_ptr[offset+i] = f; + } +} + +static const PFN_Kernel sc_tests[] = { + kernel_iadd, + kernel_imul, + kernel_idiv, + kernel_idiv_mul, + kernel_fadd, + kernel_fsub, + kernel_fmul, + kernel_fmadd, + kernel_fmsub, + kernel_fnmadd, + kernel_fnmsub, + kernel_fnmadd_madd, + kernel_fdiv, + kernel_fdiv2, + kernel_fsqrt, + kernel_ftoi, + kernel_ftou, + kernel_itof, + kernel_utof, +}; + +void main() { + struct kernel_arg_t* arg = (struct kernel_arg_t*)KERNEL_ARG_DEV_MEM_ADDR; + int num_warps = vx_num_warps(); + int num_threads = vx_num_threads(); + vx_spawn_warps(num_warps, num_threads, sc_tests[arg->testid], arg); +} \ No newline at end of file diff --git a/driver/tests/dogfood/testcases.h b/driver/tests/dogfood/testcases.h new file mode 100644 index 00000000..e2718a82 --- /dev/null +++ b/driver/tests/dogfood/testcases.h @@ -0,0 +1,555 @@ +#pragma once + +#include +#include + +class ITestCase { +public: + ITestCase() {} + virtual ~ITestCase() {} + + virtual void setup(int n, void* src1, void* src2) = 0; + virtual int verify(int n, void* dst, const void* src1, const void* src2) = 0; +}; + +class Test_IADD : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (int32_t*)src1; + auto b = (int32_t*)src2; + for (int i = 0; i < n; ++i) { + a[i] = n/2 + i; + b[i] = n/2 - i; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (int32_t*)src1; + auto b = (int32_t*)src2; + auto c = (int32_t*)dst; + for (int i = 0; i < n; ++i) { + auto ref = a[i] + b[i]; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_IMUL : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (int32_t*)src1; + auto b = (int32_t*)src2; + for (int i = 0; i < n; ++i) { + a[i] = n/2 + i; + b[i] = n/2 - i; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (int32_t*)src1; + auto b = (int32_t*)src2; + auto c = (int32_t*)dst; + for (int i = 0; i < n; ++i) { + auto ref = a[i] * b[i]; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_IDIV : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (int32_t*)src1; + auto b = (int32_t*)src2; + for (int i = 0; i < n; ++i) { + a[i] = n/2 - i; + b[i] = n/2 + i; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (int32_t*)src1; + auto b = (int32_t*)src2; + auto c = (int32_t*)dst; + for (int i = 0; i < n; ++i) { + auto ref = a[i] / b[i]; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_IDIV_MUL : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (int32_t*)src1; + auto b = (int32_t*)src2; + for (int i = 0; i < n; ++i) { + a[i] = n/2 - i; + b[i] = n/2 + i; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (int32_t*)src1; + auto b = (int32_t*)src2; + auto c = (int32_t*)dst; + for (int i = 0; i < n; ++i) { + auto x = a[i] / b[i]; + auto y = a[i] * b[i]; + auto ref = x + y; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FADD : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n + i) * 0.125f; + b[i] = (n - i) * 0.125f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto ref = a[i] + b[i]; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FSUB : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n + i) * 0.125f; + b[i] = (n - i) * 0.125f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto ref = a[i] - b[i]; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FMUL : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n + i) * 0.125f; + b[i] = (n - i) * 0.125f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto ref = a[i] * b[i]; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FMADD : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n + i) * 0.125f; + b[i] = (n - i) * 0.125f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto ref = a[i] * 0.5f + b[i]; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FMSUB : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n + i) * 0.125f; + b[i] = (n - i) * 0.125f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto ref = a[i] * 0.5f - b[i]; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FNMADD : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n + i) * 0.125f; + b[i] = (n - i) * 0.125f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto ref = -a[i] * 0.5f - b[i]; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FNMSUB : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n + i) * 0.125f; + b[i] = (n - i) * 0.125f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto ref = -a[i] * 0.5f + b[i]; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FNMADD_MADD : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n + i) * 0.125f; + b[i] = (n - i) * 0.125f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto x = -a[i] * 0.5f - b[i]; + auto y = a[i] * 0.5f + b[i]; + auto ref = x + y; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FDIV : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n - i) * 0.125f; + b[i] = (n + i) * 0.125f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto ref = a[i] / b[i]; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FDIV2 : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n - i) * 0.125f; + b[i] = (n + i) * 0.125f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto x = a[i] / b[i]; + auto y = b[i] / a[i]; + auto ref = x + y; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FSQRT : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n + i) * 0.125f; + b[i] = (n - i) * 0.125f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto ref = sqrt(a[i]) + b[i]; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FTOI : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n + i) * 0.5f; + b[i] = (n - i) * 0.5f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto x = a[i] + b[i]; + auto ref = (int32_t)x; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_FTOU : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (float*)src1; + auto b = (float*)src2; + for (int i = 0; i < n; ++i) { + a[i] = (n + i) * 0.5f; + b[i] = (n - i) * 0.5f; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (float*)src1; + auto b = (float*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto x = a[i] + b[i]; + auto ref = (uint32_t)x; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_ITOF : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (int32_t*)src1; + auto b = (int32_t*)src2; + for (int i = 0; i < n; ++i) { + a[i] = n/2 + i; + b[i] = n/2 - i; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (int32_t*)src1; + auto b = (int32_t*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto x = a[i] + b[i]; + auto ref = (float)x; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; + +class Test_UTOF : public ITestCase { +public: + + void setup(int n, void* src1, void* src2) override { + auto a = (uint32_t*)src1; + auto b = (uint32_t*)src2; + for (int i = 0; i < n; ++i) { + a[i] = n/2 + i; + b[i] = n/2 - i; + } + } + + int verify(int n, void* dst, const void* src1, const void* src2) override { + int errors = 0; + auto a = (uint32_t*)src1; + auto b = (uint32_t*)src2; + auto c = (float*)dst; + for (int i = 0; i < n; ++i) { + auto x = a[i] + b[i]; + auto ref = (float)x; + if (c[i] != ref) { + std::cout << "error at value " << i << ": actual 0x" << c[i] << ", expected 0x" << ref << std::endl; + ++errors; + } + } + return errors; + } +}; \ No newline at end of file