diff --git a/driver/hw/Makefile b/driver/hw/Makefile index cd91bd11..74d99109 100644 --- a/driver/hw/Makefile +++ b/driver/hw/Makefile @@ -17,6 +17,9 @@ $(BUILD_DIR)/Makefile: run-ase: cd $(BUILD_DIR) && MENT_VSIM_OPT="-dpicpppath /usr/bin/gcc" make sim +wave: + vsim -view $(BUILD_DIR)/work/vsim.wlf -do wave.do + run-fpga: # TODO diff --git a/driver/hw/ccip_std_afu.sv b/driver/hw/ccip_std_afu.sv index aaf4cd23..e9791b60 100644 --- a/driver/hw/ccip_std_afu.sv +++ b/driver/hw/ccip_std_afu.sv @@ -108,7 +108,7 @@ module ccip_std_afu #( .NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS) ) - hello_mem_afu_inst + vortex_afu_inst ( .clk (clk), .SoftReset (reset_T1), diff --git a/driver/hw/sources.txt b/driver/hw/sources.txt index 23d2cbf7..be07bef5 100644 --- a/driver/hw/sources.txt +++ b/driver/hw/sources.txt @@ -1,5 +1,7 @@ vortex_afu.json ++define+GLOBAL_BLOCK_SIZE_BYTES=64 + +incdir+. +incdir+../../rtl +incdir+../../rtl/shared_memory @@ -13,6 +15,7 @@ vortex_afu.json ../../rtl/VX_define.v ../../rtl/VX_cache/VX_cache_config.v ../../rtl/Vortex_SOC.v +../../rtl/Vortex_Cluster.v ../../rtl/Vortex.v ../../rtl/VX_front_end.v ../../rtl/VX_back_end.v diff --git a/driver/hw/vortex_afu.json b/driver/hw/vortex_afu.json index 3c9b3bb3..c8adc2e0 100644 --- a/driver/hw/vortex_afu.json +++ b/driver/hw/vortex_afu.json @@ -3,7 +3,19 @@ "afu-image": { "power": 0, "clock-frequency-high": "auto", - "clock-frequency-low": "auto", + "clock-frequency-low": "auto", + + "mmio-csr-cmd": 10, + "mmio-csr-status": 12, + "mmio-csr-io-addr": 14, + "mmio-csr-mem-addr": 16, + "mmio-csr-data-size": 18, + + "cmd-type-read": 1, + "cmd-type-write": 2, + "cmd-type-run": 3, + "cmd-type-snoop": 4, + "afu-top-interface": { "class": "ccip_std_afu_avalon_mm", diff --git a/driver/hw/vortex_afu.sv b/driver/hw/vortex_afu.sv index 0ef03275..11ae0ddb 100644 --- a/driver/hw/vortex_afu.sv +++ b/driver/hw/vortex_afu.sv @@ -1,5 +1,3 @@ -// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex - // Interface between CSR and FSM // All the MMIOs read/write are done from CSR and passed to the FSM for state transitions @@ -21,618 +19,565 @@ module vortex_afu #( input t_if_ccip_Rx cp2af_sRxPort, output t_if_ccip_Tx af2cp_sTxPort, - // Avalong signals for local memory access - output t_local_mem_data avs_writedata, - input t_local_mem_data avs_readdata, - output t_local_mem_addr avs_address, - input logic avs_waitrequest, - output logic avs_write, - output logic avs_read, - output t_local_mem_byte_mask avs_byteenable, - output t_local_mem_burst_cnt avs_burstcount, - input avs_readdatavalid, + // Avalon signals for local memory access + output t_local_mem_data avs_writedata, + input t_local_mem_data avs_readdata, + output t_local_mem_addr avs_address, + input logic avs_waitrequest, + output logic avs_write, + output logic avs_read, + output t_local_mem_byte_mask avs_byteenable, + output t_local_mem_burst_cnt avs_burstcount, + input avs_readdatavalid, output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select ); +localparam AVS_RD_QUEUE_SIZE = 16; +localparam VX_SNOOPING_DELAY = 300; + localparam AFU_ID_L = 16'h0002; // AFU ID Lower localparam AFU_ID_H = 16'h0004; // AFU ID Higher -localparam MEM_ADDRESS = 16'h0040; // AVMM Master Address -localparam MEM_BURSTCOUNT = 16'h0042; // AVMM Master Burst Count -localparam MEM_RDWR = 16'h0044; // AVMM Master Read/Write -localparam MEM_BANK_SELECT = 16'h0064; // Memory bank selection register -localparam READY_FOR_SW_CMD = 16'h0066; // "Ready for sw cmd" register. S/w must poll this register before issuing a read/write command to fsm -localparam MEM_BYTEENABLE = 16'h0068; // Test byteenable -// Added by Apurve to supporead and writeChange address size to buffer's address size -localparam DATA_SIZE = 16'h0046; // MMIO set by SW to denote the size od data to read/write -localparam BUFFER_IO_ADDRESS = 16'h0048; // MMIO set by SW to denote the buffer address space +localparam CMD_TYPE_READ = `AFU_IMAGE_CMD_TYPE_READ; +localparam CMD_TYPE_WRITE = `AFU_IMAGE_CMD_TYPE_WRITE; +localparam CMD_TYPE_RUN = `AFU_IMAGE_CMD_TYPE_RUN; +localparam CMD_TYPE_SNOOP = `AFU_IMAGE_CMD_TYPE_SNOOP; + +localparam MMIO_CSR_CMD = `AFU_IMAGE_MMIO_CSR_CMD; +localparam MMIO_CSR_STATUS = `AFU_IMAGE_MMIO_CSR_STATUS; +localparam MMIO_CSR_IO_ADDR = `AFU_IMAGE_MMIO_CSR_IO_ADDR; +localparam MMIO_CSR_MEM_ADDR = `AFU_IMAGE_MMIO_CSR_MEM_ADDR; +localparam MMIO_CSR_DATA_SIZE = `AFU_IMAGE_MMIO_CSR_DATA_SIZE; logic [127:0] afu_id = `AFU_ACCEL_UUID; -// cast c0 header into ReqMmioHdr -t_ccip_c0_ReqMmioHdr mmioHdr; -assign mmioHdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); +typedef enum logic[2:0] { + STATE_IDLE, + STATE_READ, + STATE_WRITE, + STATE_RUN, + STATE_SNOOP1, + STATE_SNOOP2 +} state_t; -logic [2:0] mem_RDWR = '0; - -//-- -logic ready_for_sw_cmd; -logic run_vortex; - -logic [15:0] avm_data_size; -t_ccip_clAddr avm_write_buffer_address; -t_ccip_clAddr avm_read_buffer_address; -logic avm_read; -logic avm_write; -t_local_mem_addr avm_address; -t_local_mem_burst_cnt avm_burstcount; -t_local_mem_byte_mask avm_byteenable; - -// Vortex signals - -logic vx_reset; -logic vx_dram_req; -logic vx_dram_req_write; -logic vx_dram_req_read; -logic vx_ebreak; -logic [31:0] vx_dram_req_addr; -logic [31:0] vx_local_addr; -logic [31:0] vx_dram_req_size; -logic [31:0] vx_count; -logic vx_dram_fill_rsp; - -logic [31:0] vx_dram_req_data[15:0]; -logic [31:0] vx_dram_fill_rsp_data[15:0]; -logic vx_dram_fill_accept; -logic [31:0] vx_dram_fill_rsp_addr; -logic [31:0] vx_dram_expected_lat; - -// -// MMIO control threads -// -always@(posedge clk) begin - if(SoftReset) begin - af2cp_sTxPort.c2.hdr <= '0; - af2cp_sTxPort.c2.data <= '0; - af2cp_sTxPort.c2.mmioRdValid <= '0; - avm_address <= '0; - avm_read <= '0; - avm_write <= '0; - avm_burstcount <= 12'd1; - mem_RDWR <= '0; - mem_bank_select <= 1'b1; - - // Change address size to buffer's address size - avm_data_size <= '0; - avm_write_buffer_address <= '0; - avm_read_buffer_address <= '0; - run_vortex <= '0; - end - else begin - af2cp_sTxPort.c2.mmioRdValid <= 0; - avm_read <= mem_RDWR[0] & mem_RDWR[1]; //[0] enable [1] 0-WR,1-RD - avm_write <= mem_RDWR[0] & !mem_RDWR[1]; - - // Added by Apurve. Run vortex whem RDWR is 7 - run_vortex <= mem_RDWR[0] & mem_RDWR[1] & mem_RDWR[2]; - - // set the registers on MMIO write request - // these are user-defined AFU registers at offset 0x40 and 0x41 - if(cp2af_sRxPort.c0.mmioWrValid == 1) - begin - case(mmioHdr.address) - MEM_ADDRESS: avm_address <= t_local_mem_addr'(cp2af_sRxPort.c0.data); - MEM_BURSTCOUNT: avm_burstcount <= cp2af_sRxPort.c0.data[11:0]; - MEM_RDWR: mem_RDWR <= cp2af_sRxPort.c0.data[2:0]; - MEM_BANK_SELECT: mem_bank_select <= $bits(mem_bank_select)'(cp2af_sRxPort.c0.data); - // Added by Apurve to support read and write buffers. Change address size to buffer's address size - DATA_SIZE:avm_data_size <= cp2af_sRxPort.c0.data[15:0]; - - BUFFER_IO_ADDRESS: begin - avm_write_buffer_address <= t_ccip_clAddr'(cp2af_sRxPort.c0.data); - avm_read_buffer_address <= t_ccip_clAddr'(cp2af_sRxPort.c0.data); - end - endcase - end - - // serve MMIO read requests - if(cp2af_sRxPort.c0.mmioRdValid == 1) - begin - af2cp_sTxPort.c2.hdr.tid <= mmioHdr.tid; // copy TID - case(mmioHdr.address) - // AFU header - 16'h0000: af2cp_sTxPort.c2.data <= { - 4'b0001, // Feature type = AFU - 8'b0, // reserved - 4'b0, // afu minor revision = 0 - 7'b0, // reserved - 1'b1, // end of DFH list = 1 - 24'b0, // next DFH offset = 0 - 4'b0, // afu major revision = 0 - 12'b0 // feature ID = 0 - }; - AFU_ID_L: af2cp_sTxPort.c2.data <= afu_id[63:0]; // afu id low - AFU_ID_H: af2cp_sTxPort.c2.data <= afu_id[127:64]; // afu id hi - 16'h0006: af2cp_sTxPort.c2.data <= 64'h0; // next AFU - 16'h0008: af2cp_sTxPort.c2.data <= 64'h0; // reserved - MEM_ADDRESS: af2cp_sTxPort.c2.data <= 64'(avm_address); - MEM_BURSTCOUNT: af2cp_sTxPort.c2.data <= 64'(avm_burstcount); - MEM_RDWR: af2cp_sTxPort.c2.data <= {62'd0, mem_RDWR}; - READY_FOR_SW_CMD: af2cp_sTxPort.c2.data <= ready_for_sw_cmd; - MEM_BANK_SELECT: af2cp_sTxPort.c2.data <= 64'(mem_bank_select); - default: af2cp_sTxPort.c2.data <= 64'h0; - endcase - af2cp_sTxPort.c2.mmioRdValid <= 1; // post response - end else - begin - if (avm_read | avm_write | run_vortex) mem_RDWR[0] <= 0; - end - end -end - - - - - -// FSM - -// Code reused from Intel OPAE's 04_local_memory sample program with changes made to fit Vortex - -// Interface between CSR and FSM -// All the MMIOs read/write passed from csr are used for state transitions -// Read: local memory to shared buffer -// Write: shared buffer to local memory - -// To be done: -// Review the FSM and implement read/write to shared buffer -// Vortex on/off signal -// check on byteenable and burst signals - -//cp2af_sRxPort -> sRx -//af2cp_sTxPort -> sTx - - -typedef enum logic[3:0] { IDLE, - VX_REQ, - VX_WR_REQ, - VX_RD_REQ, - VX_RSP, - RD_REQ, - RD_RSP, - WR_REQ, - WR_RSP } state_t; - - -// Added by Apurve for shared memory space write/read -t_ccip_clAddr wr_addr; -t_ccip_clAddr rd_addr; -logic [15:0] count; -logic [15:0] count_rsp; -logic start_read; -logic start_write; -t_local_mem_addr local_address; -logic init_avs_read; - -parameter ADDRESS_MAX_BIT = 10; state_t state; -assign avs_burstcount = avm_burstcount; -t_local_mem_burst_cnt burstcount; +// Vortex signals ///////////////////////////////////////////////////////////// -assign avs_byteenable = avm_byteenable; +logic vx_dram_req_read; +logic vx_dram_req_write; +logic [31:0] vx_dram_req_addr; +logic [31:0] vx_dram_req_data[15:0]; +logic vx_dram_req_delay; -always_ff @(posedge clk) begin - if(SoftReset) begin - local_address <= '0; - avs_write <= '0; - avs_read <= '0; - state <= IDLE; - burstcount <= 1; - ready_for_sw_cmd <= 0; - count <= 0; - count_rsp <= 0; - vx_reset <= 1'b0; - vx_count <= 0; +logic vx_dram_fill_accept; +logic vx_dram_fill_rsp; +logic [31:0] vx_dram_fill_rsp_addr; +logic [31:0] vx_dram_fill_rsp_data[15:0]; + +logic vx_snp_req; +logic [31:0] vx_snp_req_addr; +logic vx_snp_req_delay; + +logic vx_ebreak; + +// AVS Queues ///////////////////////////////////////////////////////////////// + +logic avs_raq_push; +t_local_mem_addr avs_raq_din; +logic avs_raq_pop; +t_local_mem_addr avs_raq_dout; +logic avs_raq_empty; +logic avs_raq_full; + +logic avs_rdq_push; +t_local_mem_data avs_rdq_din; +logic avs_rdq_pop; +t_local_mem_data avs_rdq_dout; +logic avs_rdq_empty; +logic avs_rdq_full; + +// CSR variables ////////////////////////////////////////////////////////////// + +logic [2:0] csr_cmd; +t_ccip_clAddr csr_io_addr; +t_local_mem_addr csr_mem_addr; +logic [31:0] csr_data_size; + +// MMIO controller //////////////////////////////////////////////////////////// + +t_ccip_c0_ReqMmioHdr mmioHdr; + +always_comb +begin + mmioHdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr); +end + +always_ff @(posedge clk) +begin + if (SoftReset) + begin + af2cp_sTxPort.c2.hdr <= 0; + af2cp_sTxPort.c2.data <= 0; + af2cp_sTxPort.c2.mmioRdValid <= 0; + csr_cmd <= 0; + csr_io_addr <= 0; + csr_mem_addr <= 0; + csr_data_size <= 0; end else begin - case(state) - IDLE: begin - ready_for_sw_cmd <= 1; - if (avm_write) begin - state <= WR_REQ; - ready_for_sw_cmd <= 0; - count <= 0; - count_rsp <= 0; - end else if (avm_read) begin - init_avs_read <= 1; - state <= RD_REQ; - ready_for_sw_cmd <= 0; - count <= 0; - count_rsp <= 0; - end else if (run_vortex) begin - state <= VX_REQ; - vx_reset <= 1'b1; - ready_for_sw_cmd <= 0; + csr_cmd <= 0; + af2cp_sTxPort.c2.mmioRdValid <= 0; + + // serve MMIO write request + if (cp2af_sRxPort.c0.mmioWrValid) + begin + case (mmioHdr.address) + MMIO_CSR_IO_ADDR: begin + csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6); + $display("%t: CSR_IO_ADDR: 0x%h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data >> 6)); end - end + MMIO_CSR_MEM_ADDR: begin + csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6); + $display("%t: CSR_MEM_ADDR: 0x%h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data >> 6)); + end + MMIO_CSR_DATA_SIZE: begin + csr_data_size <= $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6); + $display("%t: CSR_DATA_SIZE: %d", $time, $bits(csr_data_size)'((cp2af_sRxPort.c0.data + 63) >> 6)); + end + MMIO_CSR_CMD: begin + csr_cmd <= $bits(csr_cmd)'(cp2af_sRxPort.c0.data); + $display("%t: CSR_CMD: %d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data)); + end + endcase + end - WR_REQ: begin //AVL MM Posted Write - af2cp_sTxPort.c0.valid <= 1'b0; - avs_write <= 0; - if (~avs_waitrequest) + // serve MMIO read requests + if (cp2af_sRxPort.c0.mmioRdValid) + begin + af2cp_sTxPort.c2.hdr.tid <= mmioHdr.tid; // copy TID + case (mmioHdr.address) + // AFU header + 16'h0000: af2cp_sTxPort.c2.data <= { + 4'b0001, // Feature type = AFU + 8'b0, // reserved + 4'b0, // afu minor revision = 0 + 7'b0, // reserved + 1'b1, // end of DFH list = 1 + 24'b0, // next DFH offset = 0 + 4'b0, // afu major revision = 0 + 12'b0 // feature ID = 0 + }; + AFU_ID_L: af2cp_sTxPort.c2.data <= afu_id[63:0]; // afu id low + AFU_ID_H: af2cp_sTxPort.c2.data <= afu_id[127:64]; // afu id hi + 16'h0006: af2cp_sTxPort.c2.data <= 64'h0; // next AFU + 16'h0008: af2cp_sTxPort.c2.data <= 64'h0; // reserved + MMIO_CSR_STATUS: begin + $display("%t: STATUS: state=%d", $time, state); + af2cp_sTxPort.c2.data <= state; + end + default: af2cp_sTxPort.c2.data <= 64'h0; + endcase + af2cp_sTxPort.c2.mmioRdValid <= 1; // post response + end + end +end + +// COMMAND FSM //////////////////////////////////////////////////////////////// + +logic [31:0] cci_write_ctr; +logic [31:0] avs_read_ctr; +logic [31:0] avs_write_ctr; +logic [31:0] vx_snoop_ctr; +logic [31:0] vx_snoop_delay; +logic vx_reset; + +always_ff @(posedge clk) +begin + if (SoftReset) + begin + state <= STATE_IDLE; + vx_reset <= 0; + end + else begin + + vx_reset <= 0; + + case (state) + STATE_IDLE: begin + case (csr_cmd) + CMD_TYPE_READ: begin + $display("%t: CMD READ: ia=%h da=%h sz=%d", $time, csr_io_addr, csr_mem_addr, csr_data_size); + state <= STATE_READ; + end + CMD_TYPE_WRITE: begin + $display("%t: CMD WRITE: ia=%h da=%h sz=%d", $time, csr_io_addr, csr_mem_addr, csr_data_size); + state <= STATE_WRITE; + end + CMD_TYPE_RUN: begin + $display("%t: CMD START", $time); + vx_reset <= 1; + state <= STATE_RUN; + end + CMD_TYPE_SNOOP: begin + $display("%t: CMD SNOOP: da=%h sz=%d", $time, csr_mem_addr, csr_data_size); + state <= STATE_SNOOP1; + end + endcase + end + + STATE_READ: begin + if (cci_write_ctr >= csr_data_size) begin - if (count_rsp >= avm_data_size) - begin - state <= WR_RSP; - avs_write <= 0; - end + state <= STATE_IDLE; end end - WR_RSP: begin // wait for write response - avm_byteenable <= 64'hffffffffffffffff; - state <= IDLE; - end - - RD_REQ: begin // AVL MM Read non-posted - af2cp_sTxPort.c1.valid <= 1'b0; - if (~avs_waitrequest) begin - if (count_rsp >= avm_data_size) - begin - state <= RD_RSP; - avs_read <= 0; - end + STATE_WRITE: begin + if (avs_write_ctr >= csr_data_size) + begin + state <= STATE_IDLE; end end - RD_RSP: begin - state <= IDLE; - end - - VX_REQ: begin - vx_reset <= 1'b0; - if (vx_dram_req_write) begin - vx_count <= 0; - avs_write <= 1'b1; - state <= VX_WR_REQ; - end - - if (vx_dram_req_read) begin - vx_count <= 0; - avs_read <= 1'b1; - state <= VX_RD_REQ; - end - - if (vx_ebreak) begin - state <= VX_RSP; + STATE_RUN: begin + if (vx_ebreak) + begin + state <= STATE_IDLE; end end - VX_WR_REQ: begin - avs_write <= 1'b0; - if (vx_count >= vx_dram_req_size) - begin - state <= VX_REQ; - vx_count <= 0; + STATE_SNOOP1: begin + if (vx_snoop_delay >= VX_SNOOPING_DELAY) + begin + state <= STATE_SNOOP2; end end - VX_RD_REQ: begin - avs_read <= 1'b0; - vx_dram_fill_rsp <= 1'b0; - if (vx_count >= vx_dram_req_size) - begin - state <= VX_REQ; - vx_count <= 0; + STATE_SNOOP2: begin + if (vx_snoop_delay >= VX_SNOOPING_DELAY) + begin + state <= STATE_IDLE; end end - VX_RSP: begin - vx_count <= 0; - state <= IDLE; - end - endcase - end // end else reset -end // posedge clk + end +end +// AVS Controller ///////////////////////////////////////////////////////////// -// Vortex call - Vortex_SOC #() - vx_soc ( - .clk (clk), - .reset (vx_reset), +always_ff @(posedge clk) +begin + if (SoftReset) + begin + mem_bank_select <= 0; + avs_burstcount <= 1; + avs_byteenable <= 64'hffffffffffffffff; + avs_address <= 0; + avs_writedata <= 0; + avs_read <= 0; + avs_write <= 0; - // IO - //.io_valid[`NUMBER_CORES-1:0] (), - //.io_data [`NUMBER_CORES-1:0] (), - //.number_cores (), + avs_read_ctr <= 0; + avs_write_ctr <= 0; + end + else begin - // DRAM Dcache Req - .out_dram_req (vx_dram_req), - .out_dram_req_write (vx_dram_req_write), - .out_dram_req_read (vx_dram_req_read), - .out_dram_req_addr (vx_dram_req_addr), - .out_dram_req_size (vx_dram_req_size), - .out_dram_req_data (vx_dram_req_data), - .out_dram_expected_lat (vx_dram_expected_lat), + avs_read <= 0; + avs_write <= 0; - // DRAM Dcache Res - .out_dram_fill_accept (vx_dram_fill_accept), - .out_dram_fill_rsp (vx_dram_fill_rsp), - .out_dram_fill_rsp_addr (vx_dram_fill_rsp_addr), - .out_dram_fill_rsp_data (vx_dram_fill_rsp_data), - - //.l3c_snp_req (), - //.l3c_snp_req_addr (), - //.l3c_snp_req_delay (), - - .out_ebreak (vx_ebreak) - ); - - -// Local memory read/write address -//assign avs_address = (vx_dram_req ? (vx_count ? vx_local_addr : vx_dram_req_addr) : (count ? local_address : avm_address)); -assign avs_address = (((state == VX_WR_REQ) || (state == VX_RD_REQ)) ? (vx_count ? vx_local_addr : vx_dram_req_addr) : (count ? local_address : avm_address)); - - - -// Vortex DRAM requests and responses -// Handling of read/write data and vx_dram_req_size -// Is vx_dram_fill_accept for backpressure? -always_ff @(posedge clk) begin - if (state == VX_WR_REQ) begin - if (!avs_waitrequest & (vx_count < vx_dram_req_size)) begin - avs_write <= 1'b1; - //avs_writedata <= vx_dram_req_data; - avs_writedata[31:0] = vx_dram_req_data[0]; - avs_writedata[63:32] = vx_dram_req_data[1]; - avs_writedata[95:64] = vx_dram_req_data[2]; - avs_writedata[127:96] = vx_dram_req_data[3]; - avs_writedata[159:128] = vx_dram_req_data[4]; - avs_writedata[191:160] = vx_dram_req_data[5]; - avs_writedata[223:192] = vx_dram_req_data[6]; - avs_writedata[255:224] = vx_dram_req_data[7]; - avs_writedata[287:256] = vx_dram_req_data[8]; - avs_writedata[319:288] = vx_dram_req_data[9]; - avs_writedata[351:320] = vx_dram_req_data[10]; - avs_writedata[383:352] = vx_dram_req_data[11]; - avs_writedata[415:384] = vx_dram_req_data[12]; - avs_writedata[447:416] = vx_dram_req_data[13]; - avs_writedata[479:448] = vx_dram_req_data[14]; - avs_writedata[511:480] = vx_dram_req_data[15]; - - vx_local_addr <= (vx_count ? vx_local_addr + 1 : vx_dram_req_addr + 1); - - // Update the count value based on the number of bytes written - vx_count <= vx_count + 64; - - if ((vx_dram_req_size - vx_count) < 64) - begin - avm_byteenable <= 64'hffffffffffffffff >> (64 - (vx_dram_req_size - vx_count)); - end else - begin - avm_byteenable <= 64'hffffffffffffffff; + case (state) + STATE_IDLE: begin + avs_read_ctr <= 0; + avs_write_ctr <= 0; end - end + STATE_READ: begin + if (!avs_raq_full + && !avs_rdq_full + && !avs_waitrequest + && avs_read_ctr < csr_data_size) + begin + avs_address <= csr_mem_addr + avs_read_ctr; + avs_read <= 1; + avs_read_ctr <= avs_read_ctr + 1; + $display("%t: AVS Rd Req: addr=%h", $time, csr_mem_addr + avs_read_ctr); + end + end + + STATE_WRITE: begin + if (cp2af_sRxPort.c0.rspValid + && avs_write_ctr < csr_data_size) + begin + avs_writedata <= cp2af_sRxPort.c0.data; + avs_address <= csr_mem_addr + avs_write_ctr; + avs_write <= 1; + avs_write_ctr <= avs_write_ctr + 1; + $display("%t: AVS Wr Req: addr=%h value=%h", $time, csr_mem_addr + avs_write_ctr, cp2af_sRxPort.c0.data[63:0]); + end + end + + STATE_RUN: begin + if (vx_dram_req_read + && !avs_waitrequest + && !avs_raq_full + && !avs_rdq_full) + begin + avs_address <= (vx_dram_req_addr >> 6); + avs_read <= 1; + $display("%t: AVS Rd Req: addr=%h", $time, vx_dram_req_addr >> 6); + end + + if (vx_dram_req_write + && !avs_waitrequest) + begin + avs_writedata <= {>>{vx_dram_req_data}}; + avs_address <= (vx_dram_req_addr >> 6); + avs_write <= 1; + $display("%t: AVS Wr Req: addr=%h value=%h", $time, vx_dram_req_addr >> 6, {vx_dram_req_data[1], vx_dram_req_data[0]}); + end + end + endcase + + if (avs_readdatavalid) + begin + $display("%t: AVS Rd Rsp: value=%h", $time, avs_readdata[63:0]); + end end end -always_ff @(posedge clk) begin - //if (SoftReset) begin - if (vx_reset) begin - vx_dram_fill_rsp <= 1'b0; - //vx_dram_fill_rsp_data <= 0; - vx_dram_fill_rsp_data[0] <= 0; - vx_dram_fill_rsp_data[1] <= 0; - vx_dram_fill_rsp_data[2] <= 0; - vx_dram_fill_rsp_data[3] <= 0; - vx_dram_fill_rsp_data[4] <= 0; - vx_dram_fill_rsp_data[5] <= 0; - vx_dram_fill_rsp_data[6] <= 0; - vx_dram_fill_rsp_data[7] <= 0; - vx_dram_fill_rsp_data[8] <= 0; - vx_dram_fill_rsp_data[9] <= 0; - vx_dram_fill_rsp_data[10] <= 0; - vx_dram_fill_rsp_data[11] <= 0; - vx_dram_fill_rsp_data[12] <= 0; - vx_dram_fill_rsp_data[13] <= 0; - vx_dram_fill_rsp_data[14] <= 0; - vx_dram_fill_rsp_data[15] <= 0; - end +// Vortex DRAM requests stalling +assign vx_dram_req_delay = !(avs_read || avs_write); - if (state == VX_RD_REQ) begin - if (avs_readdatavalid & vx_dram_fill_accept) begin - avs_read <= 1'b1; - vx_dram_fill_rsp <= 1'b1; - //vx_dram_fill_rsp_data <= avs_readdata; - vx_dram_fill_rsp_data[0] <= avs_readdata[31:0]; - vx_dram_fill_rsp_data[1] <= avs_readdata[63:32]; - vx_dram_fill_rsp_data[2] <= avs_readdata[95:64]; - vx_dram_fill_rsp_data[3] <= avs_readdata[127:96]; - vx_dram_fill_rsp_data[4] <= avs_readdata[159:128]; - vx_dram_fill_rsp_data[5] <= avs_readdata[191:160]; - vx_dram_fill_rsp_data[6] <= avs_readdata[223:192]; - vx_dram_fill_rsp_data[7] <= avs_readdata[255:224]; - vx_dram_fill_rsp_data[8] <= avs_readdata[287:256]; - vx_dram_fill_rsp_data[9] <= avs_readdata[319:288]; - vx_dram_fill_rsp_data[10] <= avs_readdata[351:320]; - vx_dram_fill_rsp_data[11] <= avs_readdata[383:352]; - vx_dram_fill_rsp_data[12] <= avs_readdata[415:384]; - vx_dram_fill_rsp_data[13] <= avs_readdata[447:416]; - vx_dram_fill_rsp_data[14] <= avs_readdata[479:448]; - vx_dram_fill_rsp_data[15] <= avs_readdata[511:480]; - vx_local_addr <= (vx_count ? vx_local_addr + 1 : vx_dram_req_addr + 1); - vx_dram_fill_rsp_addr <= vx_local_addr; - // Update the count value based on the number of bytes written - vx_count <= vx_count + 64; - - end - end -end - - - - -// Read from local memory (avs_readdata) and write to shared space -// Implement write header -always_ff @(posedge clk) begin - if (state == RD_REQ & avs_readdatavalid & !cp2af_sRxPort.c1TxAlmFull & count < avm_data_size & !avs_waitrequest & start_write) - begin - wr_addr <= (count? wr_addr + 1 : avm_write_buffer_address + 1); - local_address <= (count? local_address + 1 : avm_address + 1); - start_write <= 1'b0; - end -end - -// Write header defines the request to the FIU -t_ccip_c1_ReqMemHdr wr_hdr; - -always_comb +// Vortex DRAM fill response +always_comb begin - wr_hdr = t_ccip_c1_ReqMemHdr'(0); - - // Virtual address (MPF virtual addressing is enabled) - wr_hdr.address = (count? wr_addr: avm_write_buffer_address); - - // Start of packet is true (single line write) - wr_hdr.sop = 1'b1; + vx_dram_fill_rsp = (STATE_RUN == state) && !avs_rdq_empty && vx_dram_fill_accept; + vx_dram_fill_rsp_addr = avs_raq_dout; + {>>{vx_dram_fill_rsp_data}} = avs_rdq_dout; end -// Send write requests to the FIU -always_ff @(posedge clk) +// AVS address read request queue ///////////////////////////////////////////// + +logic cci_write_req; + +always_comb begin - if (SoftReset) - begin - af2cp_sTxPort.c1.hdr <= '0; - af2cp_sTxPort.c1.data <= '0; - af2cp_sTxPort.c1.valid <= '0; - end - - // Generate a write request when needed and the FIU isn't full - if (state == RD_REQ & avs_readdatavalid & !cp2af_sRxPort.c1TxAlmFull & count < avm_data_size & !avs_waitrequest & start_write) - begin - af2cp_sTxPort.c1.hdr <= wr_hdr; - af2cp_sTxPort.c1.data <= t_ccip_clData'(avs_readdata); - af2cp_sTxPort.c1.valid <= 1'b1; - start_write <= 1'b0; - count <= count + 64; - end + avs_raq_pop = vx_dram_fill_rsp || cci_write_req; + avs_raq_din = avs_address; + avs_raq_push = avs_write; end -// Write response -always_ff @(posedge clk) +VX_generic_queue_ll #( + .DATAW($bits(t_local_mem_addr)), + .SIZE(AVS_RD_QUEUE_SIZE) +) vx_rd_addr_queue ( + .clk (clk), + .reset (SoftReset), + .push (avs_raq_push), + .in_data (avs_raq_din), + .pop (avs_raq_pop), + .out_data (avs_raq_dout), + .empty (avs_raq_empty), + .full (avs_raq_full) +); + +// AVS data read response queue /////////////////////////////////////////////// + +always_comb begin - if (SoftReset) - begin - start_write <= 1'b1; - end - - // Generate a read request when needed and the FIU isn't full - if (state == RD_REQ & cp2af_sRxPort.c1.rspValid) - begin - count_rsp <= count_rsp + 64; - start_write <= 1'b1; - init_avs_read <= 1'b1; - end + avs_rdq_pop = avs_raq_pop; + avs_rdq_din = avs_readdata; + avs_rdq_push = avs_readdatavalid; end +VX_generic_queue_ll #( + .DATAW($bits(t_local_mem_data)), + .SIZE(AVS_RD_QUEUE_SIZE) +) vx_rd_data_queue ( + .clk (clk), + .reset (SoftReset), + .push (avs_rdq_push), + .in_data (avs_rdq_din), + .pop (avs_rdq_pop), + .out_data (avs_rdq_dout), + .empty (avs_rdq_empty), + .full (avs_rdq_full) +); -// avs_read control +// CCI Read Request /////////////////////////////////////////////////////////// -always_ff @(posedge clk) -begin - if (SoftReset) - begin - init_avs_read <= 1'b0; - end - - if (init_avs_read & state <= RD_REQ) - begin - avs_read <= 1'b1; - init_avs_read <= 1'b0; - end else - begin - avs_read <= 1'b0; - end -end - - - - -// Write to local memory (avs_writedata) and read from shared space -// Implement read header -always_ff @(posedge clk) begin - if (SoftReset) - begin - rd_addr <= 0; - local_address <= 0; - end - - if (state == WR_REQ & !cp2af_sRxPort.c0TxAlmFull & count < avm_data_size & !avs_waitrequest & start_read) - begin - // Read address + 1 gives address for next block. Each block is 64B - rd_addr <= (count? rd_addr + 1 : avm_read_buffer_address + 1); - local_address <= (count? local_address + 1 : avm_address); - start_read <= 1'b0; - end -end - -// Read header defines the request to the FIU t_ccip_c0_ReqMemHdr rd_hdr; -always_comb +logic cci_read_pending; + +always_comb begin rd_hdr = t_ccip_c0_ReqMemHdr'(0); - rd_hdr.address = (count? rd_addr : avm_read_buffer_address); + rd_hdr.address = csr_io_addr + avs_write_ctr; end -// Send read requests to the FIU -always_ff @(posedge clk) +// Send read requests to CCI +always_ff @(posedge clk) begin - if (SoftReset) + if (SoftReset) begin - af2cp_sTxPort.c0.hdr <= '0; - af2cp_sTxPort.c0.valid <= '0; - end + af2cp_sTxPort.c0.hdr <= 0; + af2cp_sTxPort.c0.valid <= 0; + cci_read_pending <= 0; + end + else begin + af2cp_sTxPort.c0.valid <= 0; - // Generate a read request when needed and the FIU isn't full - if (state == WR_REQ & !cp2af_sRxPort.c0TxAlmFull & count < avm_data_size & !avs_waitrequest & start_read) - begin - af2cp_sTxPort.c0.hdr <= rd_hdr; - af2cp_sTxPort.c0.valid <= 1'b1; - start_read <= 1'b0; - count <= count + 64; - end -end - -// Read response -always_ff @(posedge clk) -begin - if (SoftReset) - begin - start_read <= 1'b1; - avm_byteenable <= 64'hffffffffffffffff; - end - - // Generate a read request when needed and the FIU isn't full - if (state == WR_REQ & cp2af_sRxPort.c0.rspValid) - begin - if ((avm_data_size - count_rsp) < 64) + if (STATE_WRITE == state + && !cp2af_sRxPort.c0TxAlmFull // ensure read queue not full + && !avs_waitrequest // ensure AVS write queue not full + && !cci_read_pending // ensure no read pending + && avs_write_ctr < csr_data_size) // ensure not done begin - avm_byteenable <= 64'hffffffffffffffff >> (64 - (avm_data_size - count_rsp)); - end else - begin - avm_byteenable <= 64'hffffffffffffffff; + af2cp_sTxPort.c0.hdr <= rd_hdr; + af2cp_sTxPort.c0.valid <= 1; + cci_read_pending <= 1; + $display("%t: CCI Rd Req: addr=%h", $time, rd_hdr.address); end - avs_writedata <= cp2af_sRxPort.c0.data; - avs_write <= 1; - count_rsp <= count_rsp + 64; - start_read <= 1'b1; + + if (cci_read_pending + && cp2af_sRxPort.c0.rspValid) + begin + $display("%t: CCI Rd Rsp: value=%h", $time, cp2af_sRxPort.c0.data[63:0]); + cci_read_pending <= 0; + end end end +// CCI Write Request ////////////////////////////////////////////////////////// + +t_ccip_c1_ReqMemHdr wr_hdr; + +logic cci_write_pending; + +always_comb +begin + cci_write_req = (STATE_READ == state) + && !avs_rdq_empty + && !cp2af_sRxPort.c1TxAlmFull + && !cci_write_pending + && cci_write_ctr < csr_data_size; + + wr_hdr = t_ccip_c1_ReqMemHdr'(0); + wr_hdr.address = csr_io_addr + cci_write_ctr; + wr_hdr.sop = 1; // single line write mode +end + +// Send write requests to CCI +always_ff @(posedge clk) +begin + if (SoftReset) + begin + af2cp_sTxPort.c1.hdr <= 0; + af2cp_sTxPort.c1.data <= 0; + af2cp_sTxPort.c1.valid <= 0; + cci_write_ctr <= 0; + cci_write_pending <= 0; + end + else begin + af2cp_sTxPort.c1.valid <= 0; + + if (STATE_IDLE == state) + begin + cci_write_ctr <= 0; + end + + if (cci_write_req) + begin + af2cp_sTxPort.c1.hdr <= wr_hdr; + af2cp_sTxPort.c1.data <= t_ccip_clData'(avs_rdq_dout); + af2cp_sTxPort.c1.valid <= 1; + cci_write_pending <= 1; + $display("%t: CCI Wr Req: addr=%h value=%h", $time, wr_hdr.address, avs_rdq_dout[63:0]); + end + + if (cci_write_pending + && cp2af_sRxPort.c1.rspValid) + begin + cci_write_ctr <= cci_write_ctr + 1; + cci_write_pending <= 0; + $display("%t: CCI Wr Rsp", $time); + end + end +end + +// Vortex cache snooping ////////////////////////////////////////////////////// + +always_ff @(posedge clk) +begin + if (SoftReset) + begin + vx_snp_req <= 0; + vx_snoop_ctr <= 0; + vx_snoop_delay <= 0; + end + else begin + if (STATE_IDLE == state) + begin + vx_snoop_ctr <= 0; + vx_snoop_delay <= 0; + end + + vx_snp_req <= 0; + + if ((STATE_SNOOP1 == state + || STATE_SNOOP2 == state) + && vx_snoop_ctr < csr_data_size + && vx_snp_req_delay) + begin + vx_snp_req <= 1; + vx_snoop_ctr <= vx_snoop_ctr + 1; + end + + if ((vx_snoop_ctr >= csr_data_size) + && (vx_snoop_delay < VX_SNOOPING_DELAY)) + begin + vx_snoop_delay <= vx_snoop_delay + 1; + end + + if (vx_snoop_delay >= VX_SNOOPING_DELAY) + begin + vx_snoop_ctr <= 0; + vx_snoop_delay <= 0; + end + end +end + +// Vortex binding ///////////////////////////////////////////////////////////// + +Vortex_SOC #() vx_soc ( + .clk (clk), + .reset (SoftReset || vx_reset), + + // DRAM Req + .out_dram_req_write (vx_dram_req_write), + .out_dram_req_read (vx_dram_req_read), + .out_dram_req_addr (vx_dram_req_addr), + .out_dram_req_data (vx_dram_req_data), + .out_dram_req_delay (vx_dram_req_delay), + + // DRAM Rsp + .out_dram_fill_accept (vx_dram_fill_accept), + .out_dram_fill_rsp (vx_dram_fill_rsp), + .out_dram_fill_rsp_addr (vx_dram_fill_rsp_addr), + .out_dram_fill_rsp_data (vx_dram_fill_rsp_data), + + // Cache Snooping Req + .llc_snp_req (vx_snp_req), + .llc_snp_req_addr (vx_snp_req_addr), + .llc_snp_req_delay (vx_snp_req_delay), + + // program exit signal + .out_ebreak (vx_ebreak) +); + endmodule diff --git a/driver/sw/opae/Makefile b/driver/sw/opae/Makefile index 05694604..08397a99 100644 --- a/driver/sw/opae/Makefile +++ b/driver/sw/opae/Makefile @@ -1,7 +1,7 @@ CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors -CXXFLAGS += -I../include -I/tools/opae/1.4.0/include +CXXFLAGS += -I../include -I/tools/opae/1.4.0/include -I../../../runtime LDFLAGS += -L/tools/opae/1.4.0/lib @@ -17,6 +17,8 @@ CXXFLAGS +=-fstack-protector # Position independent code CXXFLAGS += -fPIC +CXXFLAGS += -DGLOBAL_BLOCK_SIZE_BYTES=64 + LDFLAGS += -luuid LDFLAGS += -shared @@ -50,7 +52,7 @@ $(PROJECT_ASE): $(SRCS) $(ASE_DIR) $(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $@ vortex.o: vortex.cpp $(AFU_JSON_INFO) - $(CC) $(CXXFLAGS) -c vortex.cpp -o $@ + $(CXX) $(CXXFLAGS) -c vortex.cpp -o $@ $(ASE_DIR): mkdir -p ase diff --git a/driver/sw/opae/vortex.cpp b/driver/sw/opae/vortex.cpp index 1f62dcfc..11d29ee3 100755 --- a/driver/sw/opae/vortex.cpp +++ b/driver/sw/opae/vortex.cpp @@ -4,35 +4,35 @@ #include #include #include - #include #include #include "vortex_afu.h" -// MMIO Address Mappings -#define MMIO_COPY_IO_ADDRESS 0X120 -#define MMIO_COPY_AVM_ADDRESS 0x100 -#define MMIO_COPY_DATA_SIZE 0X118 - -#define MMIO_CMD_TYPE 0X110 -#define MMIO_READY_FOR_CMD 0X198 - -#define MMIO_CMD_TYPE_READ 0 -#define MMIO_CMD_TYPE_WRITE 1 -#define MMIO_CMD_TYPE_START 2 -#define MMIO_CMD_TYPE_SNOOP 3 - #define CHECK_RES(_expr) \ do { \ fpga_result res = _expr; \ if (res == FPGA_OK) \ break; \ - printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \ + printf("OPAE Error: '%s' returned %d, %s!\n", \ + #_expr, (int)res, fpgaErrStr(res)); \ return -1; \ } while (false) /////////////////////////////////////////////////////////////////////////////// +#define CMD_TYPE_READ AFU_IMAGE_CMD_TYPE_READ +#define CMD_TYPE_WRITE AFU_IMAGE_CMD_TYPE_WRITE +#define CMD_TYPE_RUN AFU_IMAGE_CMD_TYPE_RUN +#define CMD_TYPE_SNOOP AFU_IMAGE_CMD_TYPE_SNOOP + +#define MMIO_CSR_CMD (AFU_IMAGE_MMIO_CSR_CMD * 4) +#define MMIO_CSR_STATUS (AFU_IMAGE_MMIO_CSR_STATUS * 4) +#define MMIO_CSR_IO_ADDR (AFU_IMAGE_MMIO_CSR_IO_ADDR * 4) +#define MMIO_CSR_MEM_ADDR (AFU_IMAGE_MMIO_CSR_MEM_ADDR * 4) +#define MMIO_CSR_DATA_SIZE (AFU_IMAGE_MMIO_CSR_DATA_SIZE * 4) + +/////////////////////////////////////////////////////////////////////////////// + typedef struct vx_device_ { fpga_handle fpga; size_t mem_allocation; @@ -42,21 +42,19 @@ typedef struct vx_buffer_ { uint64_t wsid; volatile void* host_ptr; uint64_t io_addr; - fpga_handle fpga; + vx_device_h hdevice; size_t size; } vx_buffer_t; static size_t align_size(size_t size) { - uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE); + uint32_t cache_block_size = vx_dev_caps(VX_CAPS_CACHE_LINESIZE); return cache_block_size * ((size + cache_block_size - 1) / cache_block_size); } /////////////////////////////////////////////////////////////////////////////// -// Search for an accelerator matching the requested UUID and connect to it -// Convert this to void if required as storing the fpga_handle to params variable extern int vx_dev_open(vx_device_h* hdevice) { - fpga_properties filter = NULL; + fpga_properties filter = nullptr; fpga_result res; fpga_guid guid; fpga_token accel_token; @@ -64,11 +62,14 @@ extern int vx_dev_open(vx_device_h* hdevice) { fpga_handle accel_handle; vx_device_t* device; - if (NULL == hdevice) + if (nullptr == hdevice) return -1; + // ensure that the block size 64 + assert(64 == vx_dev_caps(VX_CAPS_CACHE_LINESIZE)); + // Set up a filter that will search for an accelerator - fpgaGetProperties(NULL, &filter); + fpgaGetProperties(nullptr, &filter); fpgaPropertiesSetObjectType(filter, FPGA_ACCELERATOR); // Add the desired UUID to the filter @@ -84,13 +85,13 @@ extern int vx_dev_open(vx_device_h* hdevice) { if (num_matches < 1) { fprintf(stderr, "Accelerator %s not found!\n", AFU_ACCEL_UUID); - return NULL; + return -1; } // Open accelerator res = fpgaOpen(accel_token, &accel_handle, 0); if (FPGA_OK != res) { - return NULL; + return -1; } // Done with token @@ -98,9 +99,9 @@ extern int vx_dev_open(vx_device_h* hdevice) { // allocate device object device = (vx_device_t*)malloc(sizeof(vx_device_t)); - if (NULL == device) { + if (nullptr == device) { fpgaClose(accel_handle); - return NULL; + return -1; } device->fpga = accel_handle; @@ -111,9 +112,8 @@ extern int vx_dev_open(vx_device_h* hdevice) { return 0; } -// Close the fpga when all the operations are done extern int vx_dev_close(vx_device_h hdevice) { - if (NULL == hdevice) + if (nullptr == hdevice) return -1; vx_device_t *device = ((vx_device_t*)hdevice); @@ -126,15 +126,15 @@ extern int vx_dev_close(vx_device_h hdevice) { } extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) { - if (NULL == hdevice - || NULL == dev_maddr + if (nullptr == hdevice + || nullptr == dev_maddr || 0 >= size) return -1; vx_device_t *device = ((vx_device_t*)hdevice); size_t asize = align_size(size); - auto dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE); + size_t dev_mem_size = vx_dev_caps(VX_CAPS_LOCAL_MEM_SIZE); if (device->mem_allocation + asize > dev_mem_size) return -1; @@ -151,9 +151,9 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb uint64_t io_addr; vx_buffer_t* buffer; - if (NULL == hdevice + if (nullptr == hdevice || 0 >= size - || NULL == hbuffer) + || nullptr == hbuffer) return -1; vx_device_t *device = ((vx_device_t*)hdevice); @@ -174,7 +174,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb // allocate buffer object buffer = (vx_buffer_t*)malloc(sizeof(vx_buffer_t)); - if (NULL == buffer) { + if (nullptr == buffer) { fpgaReleaseBuffer(device->fpga, wsid); return -1; } @@ -182,7 +182,7 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb buffer->wsid = wsid; buffer->host_ptr = host_ptr; buffer->io_addr = io_addr; - buffer->fpga = device->fpga; + buffer->hdevice = hdevice; buffer->size = size; *hbuffer = buffer; @@ -191,136 +191,30 @@ extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hb } extern volatile void* vx_host_ptr(vx_buffer_h hbuffer) { + if (nullptr == hbuffer) + return nullptr; + vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); - if (NULL == buffer) - return NULL; return buffer->host_ptr; } extern int vx_buf_release(vx_buffer_h hbuffer) { - vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); - if (NULL == buffer) + if (nullptr == hbuffer) return -1; - fpgaReleaseBuffer(buffer->fpga, buffer->wsid); + vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); + vx_device_t *device = ((vx_device_t*)buffer->hdevice); + + fpgaReleaseBuffer(device->fpga, buffer->wsid); free(buffer); return 0; } -// Check if HW is ready for SW -static int ready_for_sw(fpga_handle hdevice) { - uint64_t data = 0; - struct timespec sleep_time; - -#ifdef USE_ASE - sleep_time.tv_sec = 1; - sleep_time.tv_nsec = 0; -#else - sleep_time.tv_sec = 0; - sleep_time.tv_nsec = 1000000; -#endif - - do { - CHECK_RES(fpgaReadMMIO64(hdevice, 0, MMIO_READY_FOR_CMD, &data)); - nanosleep(&sleep_time, NULL); - } while (data != 0x1); - - return 0; -} - -extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) { - if (NULL == hbuffer - || 0 >= size) - return -1; - - vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); - - // bound checking - if (size + src_offset > buffer->size) - return -1; - - // Ensure ready for new command - if (ready_for_sw(buffer->fpga) != 0) - return -1; - - CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr)); - CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, buffer->io_addr + src_offset); - CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size)); - CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_WRITE)); - - // Wait for the write operation to finish - return ready_for_sw(buffer->fpga); -} - -extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) { - if (NULL == hbuffer - || 0 >= size) - return -1; - - vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); - - // bound checking - if (size + dest_offset > buffer->size) - return -1; - - // Ensure ready for new command - if (ready_for_sw(buffer->fpga) != 0) - return -1; - - CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr)); - CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, buffer->io_addr + dest_offset); - CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size)); - CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_READ)); - - // Wait for the write operation to finish - return ready_for_sw(buffer->fpga); -} - -extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) { - if (NULL == hbuffer - || 0 >= size) - return -1; - - vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); - - // bound checking - if (size + src_offset > buffer->size) - return -1; - - // Ensure ready for new command - if (ready_for_sw(buffer->fpga) != 0) - return -1; - - CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_AVM_ADDRESS, dev_maddr)); - CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_IO_ADDRESS, (buffer->io_addr + src_offset)/VX_CACHE_LINESIZE)); - CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_COPY_DATA_SIZE, size)); - CHECK_RES(fpgaWriteMMIO64(buffer->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_SNOOP)); - - // Wait for the write operation to finish - return ready_for_sw(buffer->fpga); - return 0; -} - -extern int vx_start(vx_device_h hdevice) { - if (NULL == hdevice) - return -1; - - vx_device_t *device = ((vx_device_t*)hdevice); - - // Ensure ready for new command - if (ready_for_sw(device->fpga) != 0) - return -1; - - CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, MMIO_CMD_TYPE_START)); - - return 0; -} - extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { - if (NULL == hdevice) + if (nullptr == hdevice) return -1; vx_device_t *device = ((vx_device_t*)hdevice); @@ -328,7 +222,7 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { uint64_t data = 0; struct timespec sleep_time; -#ifdef USE_ASE +#if defined(USE_ASE) sleep_time.tv_sec = 1; sleep_time.tv_nsec = 0; #else @@ -339,13 +233,106 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { // to milliseconds long long sleep_time_ms = (sleep_time.tv_sec * 1000) + (sleep_time.tv_nsec / 1000000); - do { - CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_READY_FOR_CMD, &data)); - nanosleep(&sleep_time, NULL); - sleep_time_ms -= sleep_time_ms; - if (timeout <= sleep_time_ms) - break; - } while (data != 0x1); + for (;;) { + CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_STATUS, &data)); + if (0 == data || 0 == timeout) + break; + nanosleep(&sleep_time, nullptr); + timeout -= sleep_time_ms; + }; + + return 0; +} + +extern int vx_copy_to_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t src_offset) { + if (nullptr == hbuffer + || 0 >= size) + return -1; + + vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); + vx_device_t *device = ((vx_device_t*)buffer->hdevice); + + // bound checking + if (size + src_offset > buffer->size) + return -1; + + // Ensure ready for new command + if (vx_ready_wait(buffer->hdevice, -1) != 0) + return -1; + + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + src_offset)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_WRITE)); + + // Wait for the write operation to finish + if (vx_ready_wait(buffer->hdevice, -1) != 0) + return -1; + + return 0; +} + +extern int vx_copy_from_dev(vx_buffer_h hbuffer, size_t dev_maddr, size_t size, size_t dest_offset) { + if (nullptr == hbuffer + || 0 >= size) + return -1; + + vx_buffer_t* buffer = ((vx_buffer_t*)hbuffer); + vx_device_t *device = ((vx_device_t*)buffer->hdevice); + + // bound checking + if (size + dest_offset > buffer->size) + return -1; + + // Ensure ready for new command + if (vx_ready_wait(buffer->hdevice, -1) != 0) + return -1; + + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_IO_ADDR, buffer->io_addr + dest_offset)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_READ)); + + // Wait for the write operation to finish + if (vx_ready_wait(buffer->hdevice, -1) != 0) + return -1; + + return 0; +} + +extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) { + if (nullptr == hdevice + || 0 >= size) + return -1; + + vx_device_t* device = ((vx_device_t*)hdevice); + + // Ensure ready for new command + if (vx_ready_wait(hdevice, -1) != 0) + return -1; + + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_MEM_ADDR, dev_maddr)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA_SIZE, size)); + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_SNOOP)); + + // Wait for the write operation to finish + if (vx_ready_wait(hdevice, -1) != 0) + return -1; + + return 0; +} + +extern int vx_start(vx_device_h hdevice) { + if (nullptr == hdevice) + return -1; + + vx_device_t *device = ((vx_device_t*)hdevice); + + // Ensure ready for new command + if (vx_ready_wait(hdevice, -1) != 0) + return -1; + + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CMD, CMD_TYPE_RUN)); return 0; } \ No newline at end of file diff --git a/driver/sw/rtlsim/vortex.cpp b/driver/sw/rtlsim/vortex.cpp index df816d87..58405f0e 100644 --- a/driver/sw/rtlsim/vortex.cpp +++ b/driver/sw/rtlsim/vortex.cpp @@ -11,17 +11,6 @@ #include #include -#define PAGE_SIZE 4096 - -#define CHECK_RES(_expr) \ - do { \ - fpga_result res = _expr; \ - if (res == FPGA_OK) \ - break; \ - printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \ - return -1; \ - } while (false) - /////////////////////////////////////////////////////////////////////////////// static size_t align_size(size_t size) { @@ -197,7 +186,7 @@ private: /////////////////////////////////////////////////////////////////////////////// extern int vx_dev_open(vx_device_h* hdevice) { - if (NULL == hdevice) + if (nullptr == hdevice) return -1; *hdevice = new vx_device(); @@ -217,8 +206,8 @@ extern int vx_dev_close(vx_device_h hdevice) { } extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) { - if (NULL == hdevice - || NULL == dev_maddr + if (nullptr == hdevice + || nullptr == dev_maddr || 0 >= size) return -1; @@ -227,7 +216,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) } extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) { - if (NULL == hdevice + if (nullptr == hdevice || 0 >= size) return -1; @@ -240,7 +229,7 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t dev_maddr, size_t size) { extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) { if (nullptr == hdevice || 0 >= size - || NULL == hbuffer) + || nullptr == hbuffer) return -1; vx_device *device = ((vx_device*)hdevice); diff --git a/driver/sw/simx/Makefile b/driver/sw/simx/Makefile index 8299fbc2..87eb39b0 100644 --- a/driver/sw/simx/Makefile +++ b/driver/sw/simx/Makefile @@ -1,7 +1,7 @@ CFLAGS += -std=c++11 -O3 -Wall -Wextra -pedantic -Wfatal-errors #CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -pedantic -Wfatal-errors -CFLAGS += -I../../include -I../../../../simX/include -I../../../../runtime +CFLAGS += -I../../include -I../../../../simX/include -I../../../../runtime CFLAGS += -fPIC diff --git a/driver/sw/simx/vortex.cpp b/driver/sw/simx/vortex.cpp index a6d03433..ef5a4b6d 100644 --- a/driver/sw/simx/vortex.cpp +++ b/driver/sw/simx/vortex.cpp @@ -13,15 +13,6 @@ #define PAGE_SIZE 4096 -#define CHECK_RES(_expr) \ - do { \ - fpga_result res = _expr; \ - if (res == FPGA_OK) \ - break; \ - printf("OPAE Error: '%s' returned %d!\n", #_expr, (int)res); \ - return -1; \ - } while (false) - /////////////////////////////////////////////////////////////////////////////// static size_t align_size(size_t size) { @@ -206,7 +197,7 @@ private: /////////////////////////////////////////////////////////////////////////////// extern int vx_dev_open(vx_device_h* hdevice) { - if (NULL == hdevice) + if (nullptr == hdevice) return -1; *hdevice = new vx_device(); @@ -226,8 +217,8 @@ extern int vx_dev_close(vx_device_h hdevice) { } extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) { - if (NULL == hdevice - || NULL == dev_maddr + if (nullptr == hdevice + || nullptr == dev_maddr || 0 >= size) return -1; @@ -236,7 +227,7 @@ extern int vx_alloc_dev_mem(vx_device_h hdevice, size_t size, size_t* dev_maddr) } extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t size) { - if (NULL == hdevice + if (nullptr == hdevice || 0 >= size) return -1; // this functionality is not need by simX @@ -246,7 +237,7 @@ extern int vx_flush_caches(vx_device_h hdevice, size_t /*dev_maddr*/, size_t siz extern int vx_alloc_shared_mem(vx_device_h hdevice, size_t size, vx_buffer_h* hbuffer) { if (nullptr == hdevice || 0 >= size - || NULL == hbuffer) + || nullptr == hbuffer) return -1; vx_device *device = ((vx_device*)hdevice); diff --git a/driver/tests/basic/Makefile b/driver/tests/basic/Makefile index 089739a1..91e1b9c9 100644 --- a/driver/tests/basic/Makefile +++ b/driver/tests/basic/Makefile @@ -18,7 +18,7 @@ run-fpga: $(PROJECT) LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) run-ase: $(PROJECT) - LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) + LIBOPAE_LOG=1 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) run-rtlsim: $(PROJECT) LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) diff --git a/driver/tests/basic/basic b/driver/tests/basic/basic index 1a2ab35e..f3a0e08b 100755 Binary files a/driver/tests/basic/basic and b/driver/tests/basic/basic differ diff --git a/driver/tests/basic/basic.cpp b/driver/tests/basic/basic.cpp index 7f5b98f4..02a3d3d9 100755 --- a/driver/tests/basic/basic.cpp +++ b/driver/tests/basic/basic.cpp @@ -1,6 +1,4 @@ -#include -#include -#include +#include #include #include @@ -9,8 +7,8 @@ static void parse_args(int argc, char **argv) { while ((c = getopt(argc, argv, "?")) != -1) { switch (c) { case '?': { - printf("Test.\n"); - printf("Usage: [-h: help]\n"); + std::cout << "Test." << std::endl; + std::cout << "Usage: [-h: help]" << std::endl; exit(0); } break; default: @@ -20,12 +18,17 @@ static void parse_args(int argc, char **argv) { } uint64_t shuffle(int i, uint64_t value) { - return (value << i) | (value & ((1 << i)-1));; + //return (value << i) | (value & ((1 << i)-1));; + return 0x0badf00ddeadbeef; } -int run_test(vx_buffer_h sbuf, vx_buffer_h dbuf, uint32_t address, uint64_t value, int num_blocks) { - int err; - int num_failures = 0; +int run_test(vx_buffer_h sbuf, + vx_buffer_h dbuf, + uint32_t address, + uint64_t value, + int num_blocks) { + int ret; + int errors = 0; // write sbuf data for (int i = 0; i < 8 * num_blocks; ++i) { @@ -33,75 +36,114 @@ int run_test(vx_buffer_h sbuf, vx_buffer_h dbuf, uint32_t address, uint64_t valu } // write buffer to local memory - err = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0); - if (err != 0) - return -1; + std::cout << "write buffer to local memory" << std::endl; + ret = vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0); + if (ret != 0) + return ret; // read buffer from local memory - err = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0); - if (err != 0) - return -1; + std::cout << "read buffer from local memory" << std::endl; + ret = vx_copy_from_dev(dbuf, address, 64 * num_blocks, 0); + if (ret != 0) + return ret; // verify result + std::cout << "verify result" << std::endl; for (int i = 0; i < 8 * num_blocks; ++i) { auto curr = ((uint64_t*)vx_host_ptr(dbuf))[i]; auto ref = shuffle(i, value); if (curr != ref) { - printf("error @ %x: actual %ld, expected %ld\n", address + 64 * i, curr, ref); - ++num_failures; + std::cout << "error @ " << std::hex << (address + 64 * i) + << ": actual " << curr << ", expected " << ref << std::endl; + ++errors; } - } - return num_failures; + } + + if (errors != 0) { + std::cout << "Found " << errors << " errors!" << std::endl; + std::cout << "FAILED!" << std::endl; + return 1; + } + + return 0; +} + +vx_device_h device = nullptr; +vx_buffer_h sbuf = nullptr; +vx_buffer_h dbuf = nullptr; + +void cleanup() { + if (sbuf) { + vx_buf_release(sbuf); + } + if (dbuf) { + vx_buf_release(dbuf); + } + if (device) { + vx_dev_close(device); + } } int main(int argc, char *argv[]) { - int err; - int num_failures = 0; + int ret; // parse command arguments parse_args(argc, argv); // open device connection + std::cout << "open device connection" << std::endl; vx_device_h device; - err = vx_dev_open(&device); - if (err != 0) - return -1; + ret = vx_dev_open(&device); + if (ret != 0) + return ret; // create source buffer - vx_buffer_h sbuf; - err = vx_alloc_shared_mem(device, 4096, &sbuf); - if (err != 0) { - vx_dev_close(device); - return -1; + std::cout << "create source buffer" << std::endl; + ret = vx_alloc_shared_mem(device, 4096, &sbuf); + if (ret != 0) { + cleanup(); + return ret; } // create destination buffer - vx_buffer_h dbuf; - err = vx_alloc_shared_mem(device, 4096, &dbuf); - if (err != 0) { - vx_buf_release(sbuf); - vx_dev_close(device); - return -1; + std::cout << "create destination buffer" << std::endl; + ret = vx_alloc_shared_mem(device, 4096, &dbuf); + if (ret != 0) { + cleanup(); + return ret; } // run tests - num_failures += run_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1); - num_failures += run_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 2); - num_failures += run_test(sbuf, dbuf, 0x20000000, 0xff00ff00ff00ff00, 4); - num_failures += run_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8); - - // releae buffers - vx_buf_release(sbuf); - vx_buf_release(dbuf); - - // close device - vx_dev_close(device); - - if (0 == num_failures) { - printf("Test PASSED\n"); - } else { - printf("Test FAILED\n"); + std::cout << "run tests" << std::endl; + ret = run_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 1); + if (ret != 0) { + cleanup(); + return ret; } - return num_failures; + ret = run_test(sbuf, dbuf, 0x10000000, 0x0badf00d00ff00ff, 2); + if (ret != 0) { + cleanup(); + return ret; + } + + ret = run_test(sbuf, dbuf, 0x20000000, 0xff00ff00ff00ff00, 4); + if (ret != 0) { + cleanup(); + return ret; + } + + ret = run_test(sbuf, dbuf, 0x20000000, 0x0badf00d40ff40ff, 8); + if (ret != 0) { + cleanup(); + return ret; + } + + // cleanup + std::cout << "cleanup" << std::endl; + cleanup(); + + std::cout << "Test PASSED" << std::endl; + + return 0; } diff --git a/driver/tests/demo/Makefile b/driver/tests/demo/Makefile index 48effb87..b5e3e2a9 100644 --- a/driver/tests/demo/Makefile +++ b/driver/tests/demo/Makefile @@ -46,7 +46,7 @@ run-fpga: $(PROJECT) LD_LIBRARY_PATH=../../sw/opae:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 run-ase: $(PROJECT) - LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 + LIBOPAE_LOG=1 LD_LIBRARY_PATH=../../sw/opae/ase:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 run-rtlsim: $(PROJECT) LD_LIBRARY_PATH=../../sw/rtlsim:$(LD_LIBRARY_PATH) ./$(PROJECT) -f kernel.bin -n 16 diff --git a/driver/tests/demo/demo b/driver/tests/demo/demo index 72482b23..513c7ed8 100755 Binary files a/driver/tests/demo/demo and b/driver/tests/demo/demo differ diff --git a/driver/tests/demo/demo.cpp b/driver/tests/demo/demo.cpp index 386b8ff4..3e84bbd0 100644 --- a/driver/tests/demo/demo.cpp +++ b/driver/tests/demo/demo.cpp @@ -1,6 +1,5 @@ #include #include -#include #include #include #include "common.h" @@ -40,21 +39,77 @@ static void parse_args(int argc, char **argv) { } } -vx_device_h device; -vx_buffer_h buffer; +int run_test(vx_device_h device, + vx_buffer_h buffer, + const kernel_arg_t& kernel_arg, + uint32_t buf_size, + uint32_t num_points) { + int ret; + + // start device + std::cout << "start device" << std::endl; + ret = vx_start(device); + if (ret != 0) { + return ret; + } + + // wait for completion + std::cout << "wait for completion" << std::endl; + ret = vx_ready_wait(device, -1); + if (ret != 0) { + return ret; + } + + // flush the destination buffer caches + std::cout << "flush the destination buffer caches" << std::endl; + ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size); + if (ret != 0) { + return ret; + } + + // download destination buffer + std::cout << "download destination buffer" << std::endl; + ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0); + if (ret != 0) { + return ret; + } + + // verify result + std::cout << "verify result" << std::endl; + { + int errors = 0; + auto buf_ptr = (int*)vx_host_ptr(buffer); + for (uint32_t i = 0; i < num_points; ++i) { + int ref = i * i; + int cur = buf_ptr[i]; + if (cur != ref) { + ++errors; + } + } + if (errors != 0) { + std::cout << "Found " << errors << " errors!" << std::endl; + std::cout << "FAILED!" << std::endl; + return 1; + } + } + + return 0; +} + +vx_device_h device = nullptr; +vx_buffer_h buffer = nullptr; void cleanup() { - if (device) { - vx_dev_close(device); - } if (buffer) { vx_buf_release(buffer); } + if (device) { + vx_dev_close(device); + } } int main(int argc, char *argv[]) { int ret; - int errors = 0; size_t value; kernel_arg_t kernel_arg; @@ -79,14 +134,14 @@ int main(int argc, char *argv[]) { std::cout << "open device connection" << std::endl; ret = vx_dev_open(&device); if (ret != 0) - return -1; + return ret; // upload program std::cout << "upload program" << std::endl; ret = vx_upload_kernel_file(device, program_file); if (ret != 0) { cleanup(); - return -1; + return ret; } // allocate device memory @@ -95,21 +150,21 @@ int main(int argc, char *argv[]) { ret = vx_alloc_dev_mem(device, buf_size, &value); if (ret != 0) { cleanup(); - return -1; + return ret; } kernel_arg.src0_ptr = value; ret = vx_alloc_dev_mem(device, buf_size, &value); if (ret != 0) { cleanup(); - return -1; + return ret; } kernel_arg.src1_ptr = value; ret = vx_alloc_dev_mem(device, buf_size, &value); if (ret != 0) { cleanup(); - return -1; + return ret; } kernel_arg.dst_ptr = value; @@ -119,7 +174,7 @@ int main(int argc, char *argv[]) { ret = vx_alloc_shared_mem(device, alloc_size, &buffer); if (ret != 0) { cleanup(); - return -1; + return ret; } // populate source buffer values @@ -137,13 +192,13 @@ int main(int argc, char *argv[]) { ret = vx_copy_to_dev(buffer, kernel_arg.src0_ptr, buf_size, 0); if (ret != 0) { cleanup(); - return -1; + return ret; } ret = vx_copy_to_dev(buffer, kernel_arg.src1_ptr, buf_size, 0); if (ret != 0) { cleanup(); - return -1; + return ret; } // upload kernel argument @@ -158,117 +213,29 @@ int main(int argc, char *argv[]) { ret = vx_copy_to_dev(buffer, KERNEL_ARG_DEV_MEM_ADDR, sizeof(kernel_arg_t), 0); if (ret != 0) { cleanup(); - return -1; + return ret; } } - // start device - std::cout << "start device" << std::endl; - ret = vx_start(device); + // run tests + std::cout << "run tests" << std::endl; + ret = run_test(device, buffer, kernel_arg, buf_size, num_points); if (ret != 0) { cleanup(); - return -1; + return ret; } - - // wait for completion - std::cout << "wait for completion" << std::endl; - ret = vx_ready_wait(device, -1); + + ret = run_test(device, buffer, kernel_arg, buf_size, num_points); if (ret != 0) { cleanup(); - return -1; - } - - // flush the destination buffer caches - std::cout << "flush the destination buffer caches" << std::endl; - ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size); - if (ret != 0) { - cleanup(); - return -1; - } - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0); - if (ret != 0) { - cleanup(); - return -1; - } - - // verify result - std::cout << "verify result" << std::endl; - { - auto buf_ptr = (int*)vx_host_ptr(buffer); - for (uint32_t i = 0; i < num_points; ++i) { - int ref = i * i; - int cur = buf_ptr[i]; - if (cur != ref) { - ++errors; - } - } - } - - if (errors != 0) { - printf("Found %d errors!\n", errors); - printf("FAILED!\n"); - cleanup(); - return -1; - } - - // start device - std::cout << "start device" << std::endl; - ret = vx_start(device); - if (ret != 0) { - cleanup(); - return -1; - } - - // wait for completion - std::cout << "wait for completion" << std::endl; - ret = vx_ready_wait(device, -1); - if (ret != 0) { - cleanup(); - return -1; - } - - // flush the destination buffer caches - std::cout << "flush the destination buffer caches" << std::endl; - ret = vx_flush_caches(device, kernel_arg.dst_ptr, buf_size); - if (ret != 0) { - cleanup(); - return -1; - } - - // download destination buffer - std::cout << "download destination buffer" << std::endl; - ret = vx_copy_from_dev(buffer, kernel_arg.dst_ptr, buf_size, 0); - if (ret != 0) { - cleanup(); - return -1; - } - - // verify result - std::cout << "verify result" << std::endl; - { - auto buf_ptr = (int*)vx_host_ptr(buffer); - for (uint32_t i = 0; i < num_points; ++i) { - int ref = i * i; - int cur = buf_ptr[i]; - if (cur != ref) { - ++errors; - } - } + return ret; } // cleanup std::cout << "cleanup" << std::endl; cleanup(); - if (0 == errors) { - printf("PASSED!\n"); - } else { - printf("Found %d errors!\n", errors); - printf("FAILED!\n"); - } + std::cout << "PASSED!" << std::endl; - return errors; + return 0; } \ No newline at end of file