diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index 72c383c4..d830f00a 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -215,7 +215,7 @@ extern int vx_dev_open(vx_device_h* hdevice) { #ifdef SCOPE { - int ret = vx_scope_start(accel_handle, 0); + int ret = vx_scope_start(accel_handle, 0, -1); if (ret != 0) { fpgaClose(accel_handle); return ret; @@ -235,7 +235,7 @@ extern int vx_dev_close(vx_device_h hdevice) { vx_device_t *device = ((vx_device_t*)hdevice); #ifdef SCOPE - vx_scope_stop(device->fpga, 0); + vx_scope_stop(device->fpga); #endif #ifdef DUMP_PERF_STATS diff --git a/driver/opae/vx_scope.cpp b/driver/opae/vx_scope.cpp index e46fee69..5cae4983 100644 --- a/driver/opae/vx_scope.cpp +++ b/driver/opae/vx_scope.cpp @@ -38,7 +38,7 @@ #define CMD_GET_DATA 1 #define CMD_GET_WIDTH 2 #define CMD_GET_COUNT 3 -#define CMD_SET_DELAY 4 +#define CMD_SET_START 4 #define CMD_SET_STOP 5 #define CMD_GET_OFFSET 6 @@ -58,7 +58,7 @@ static std::mutex g_timeout_mutex; static void timeout_callback(fpga_handle fpga) { std::this_thread::sleep_for(std::chrono::seconds{HANG_TIMEOUT}); - vx_scope_stop(fpga, HANG_TIMEOUT); + vx_scope_stop(fpga); fpgaClose(fpga); exit(0); } @@ -101,16 +101,21 @@ void dump_module(std::ofstream& ofs, int parent) { } } -int vx_scope_start(fpga_handle hfpga, uint64_t delay) { +int vx_scope_start(fpga_handle hfpga, uint64_t start_time, uint64_t stop_time) { if (nullptr == hfpga) return -1; - if (delay != uint64_t(-1)) { - // set start delay - uint64_t cmd_delay = ((delay << 3) | CMD_SET_DELAY); - CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_delay)); - std::cout << "scope start delay: " << std::dec << delay << "s" << std::endl; + if (stop_time != uint64_t(-1)) { + // set stop time + uint64_t cmd_stop = ((stop_time << 3) | CMD_SET_STOP); + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop)); + std::cout << "scope stop time: " << std::dec << stop_time << "s" << std::endl; } + + // start recording + uint64_t cmd_delay = ((start_time << 3) | CMD_SET_START); + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_delay)); + std::cout << "scope start time: " << std::dec << start_time << "s" << std::endl; #ifdef HANG_TIMEOUT g_timeout_thread = std::thread(timeout_callback, hfpga); @@ -120,7 +125,7 @@ int vx_scope_start(fpga_handle hfpga, uint64_t delay) { return 0; } -int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { +int vx_scope_stop(fpga_handle hfpga) { #ifdef HANG_TIMEOUT if (!g_timeout_mutex.try_lock()) return 0; @@ -128,13 +133,10 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { if (nullptr == hfpga) return -1; - - if (delay != uint64_t(-1)) { - // stop recording - uint64_t cmd_stop = ((delay << 3) | CMD_SET_STOP); - CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop)); - std::cout << "scope stop delay: " << std::dec << delay << "s" << std::endl; - } + + // forced stop + uint64_t cmd_stop = ((0 << 3) | CMD_SET_STOP); + CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop)); std::cout << "scope trace dump begin..." << std::endl; diff --git a/driver/opae/vx_scope.h b/driver/opae/vx_scope.h index a3e13455..bb8cad19 100644 --- a/driver/opae/vx_scope.h +++ b/driver/opae/vx_scope.h @@ -6,6 +6,6 @@ #define HANG_TIMEOUT (30*60) #endif -int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1); +int vx_scope_start(fpga_handle hfpga, uint64_t start_time = 0, uint64_t stop_time = -1); -int vx_scope_stop(fpga_handle hfpga, uint64_t delay = -1); \ No newline at end of file +int vx_scope_stop(fpga_handle hfpga); \ No newline at end of file diff --git a/hw/rtl/VX_scope.vh b/hw/rtl/VX_scope.vh index 27344d77..609b6ccb 100644 --- a/hw/rtl/VX_scope.vh +++ b/hw/rtl/VX_scope.vh @@ -7,7 +7,7 @@ `define SCOPE_ASSIGN(d,s) assign scope_``d = s -`define SCOPE_SIZE 4096 +`define SCOPE_SIZE 16384 `else diff --git a/hw/rtl/afu/VX_avs_wrapper.v b/hw/rtl/afu/VX_avs_wrapper.v index dcf19ee0..b4a5b045 100644 --- a/hw/rtl/afu/VX_avs_wrapper.v +++ b/hw/rtl/afu/VX_avs_wrapper.v @@ -109,10 +109,11 @@ module VX_avs_wrapper #( assign avs_address = dram_req_addr; assign avs_byteenable = dram_req_byteen; assign avs_writedata = dram_req_data; - assign dram_req_ready = !avs_waitrequest && !rsp_queue_going_full; assign avs_burstcount = avs_burstcount_r; assign avs_bankselect = avs_bankselect_r; + assign dram_req_ready = !avs_waitrequest && !rsp_queue_going_full; + assign dram_rsp_valid = !avs_rspq_empty; `ifdef DBG_PRINT_AVS diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index 75b4e628..dea9b298 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -51,7 +51,7 @@ localparam AVS_REQ_TAGW = `VX_DRAM_TAG_WIDTH + VX_DRAM_LINE_IDX; localparam CCI_RD_WINDOW_SIZE = 8; localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE; -localparam CCI_RW_QUEUE_SIZE = 1024; +localparam CCI_RW_PENDING_SIZE= 256; localparam AFU_ID_L = 16'h0002; // AFU ID Lower localparam AFU_ID_H = 16'h0004; // AFU ID Higher @@ -182,10 +182,6 @@ wire[$bits(cp2af_sRxPort.c0.hdr.mdata)-1:0] cp2af_sRxPort_c0_hdr_mdata = cp2af_s wire [2:0] cmd_type = (cp2af_sRxPort.c0.mmioWrValid && (MMIO_CMD_TYPE == mmio_hdr.address)) ? 3'(cp2af_sRxPort.c0.data) : 3'h0; -`ifdef SCOPE -reg scope_start; -`endif - // disable assertions until full reset `ifndef VERILATOR reg [$clog2(RESET_DELAY+1)-1:0] assert_delay_ctr; @@ -208,15 +204,9 @@ always @(posedge clk) begin if (reset) begin mmio_tx.mmioRdValid <= 0; mmio_tx.hdr <= 0; - `ifdef SCOPE - scope_start <= 0; - `endif end else begin mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid; mmio_tx.hdr.tid <= mmio_hdr.tid; - `ifdef SCOPE - scope_start <= cp2af_sRxPort.c0.mmioWrValid; - `endif end // serve MMIO write request @@ -636,7 +626,10 @@ end wire cci_dram_wr_req_fire = cci_dram_wr_req_valid && cci_dram_req_ready; wire cci_rd_req_fire = af2cp_sTxPort.c0.valid; -wire cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid; + +wire cci_rd_rsp_fire = (STATE_WRITE == state) + && cp2af_sRxPort.c0.rspValid + && (cp2af_sRxPort.c0.hdr.resp_type == eRSP_RDLINE); assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr); assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata); @@ -786,19 +779,23 @@ wire cci_dram_rd_req_fire = cci_dram_rd_req_valid && cci_dram_req_ready; wire cci_dram_rd_rsp_fire = cci_dram_rsp_valid && cci_dram_rsp_ready; wire cci_wr_req_fire = cci_dram_rd_rsp_fire; -wire cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid; -wire [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes; +wire cci_wr_rsp_fire = (STATE_READ == state) + && cp2af_sRxPort.c1.rspValid + && (cp2af_sRxPort.c1.hdr.resp_type == eRSP_WRLINE); + +wire [$clog2(CCI_RW_PENDING_SIZE+1)-1:0] cci_pending_writes; wire cci_pending_writes_empty; +wire cci_pending_writes_full; VX_pending_size #( - .SIZE (CCI_RW_QUEUE_SIZE) + .SIZE (CCI_RW_PENDING_SIZE) ) cci_wr_pending_size ( .clk (clk), .reset (reset), .push (cci_wr_req_fire), .pop (cci_wr_rsp_fire), .empty (cci_pending_writes_empty), - `UNUSED_PIN (full), + .full (cci_pending_writes_full), .size (cci_pending_writes) ); `UNUSED_VAR (cci_pending_writes) @@ -806,8 +803,8 @@ VX_pending_size #( assign cci_dram_rd_req_valid = (cci_dram_rd_req_ctr != 0); assign cci_dram_rd_req_addr = cci_dram_rd_req_addr_r; -assign af2cp_sTxPort.c1.valid = cci_dram_rsp_valid; -assign cci_dram_rsp_ready = !cp2af_sRxPort.c1TxAlmFull; +assign af2cp_sTxPort.c1.valid = cci_dram_rd_rsp_fire; +assign cci_dram_rsp_ready = !cp2af_sRxPort.c1TxAlmFull && !cci_pending_writes_full; assign cmd_read_done = (0 == cci_wr_req_ctr) && cci_pending_writes_empty; @@ -934,21 +931,21 @@ Vortex #() vortex ( `SCOPE_ASSIGN (cmd_type, cmd_type); `SCOPE_ASSIGN (state, state); -`SCOPE_ASSIGN (ccip_sRxPort_c0_mmioRdValid, cp2af_sRxPort.c0.mmioRdValid); -`SCOPE_ASSIGN (ccip_sRxPort_c0_mmioWrValid, cp2af_sRxPort.c0.mmioWrValid); +`SCOPE_ASSIGN (cci_sRxPort_c0_mmioRdValid, cp2af_sRxPort.c0.mmioRdValid); +`SCOPE_ASSIGN (cci_sRxPort_c0_mmioWrValid, cp2af_sRxPort.c0.mmioWrValid); `SCOPE_ASSIGN (mmio_hdr_address, mmio_hdr.address); `SCOPE_ASSIGN (mmio_hdr_length, mmio_hdr.length); -`SCOPE_ASSIGN (ccip_sRxPort_c0_hdr_mdata, cp2af_sRxPort.c0.hdr.mdata); -`SCOPE_ASSIGN (ccip_sRxPort_c0_rspValid, cp2af_sRxPort.c0.rspValid); -`SCOPE_ASSIGN (ccip_sRxPort_c1_rspValid, cp2af_sRxPort.c1.rspValid); -`SCOPE_ASSIGN (ccip_sTxPort_c0_valid, af2cp_sTxPort.c0.valid); -`SCOPE_ASSIGN (ccip_sTxPort_c0_hdr_address, af2cp_sTxPort.c0.hdr.address); -`SCOPE_ASSIGN (ccip_sTxPort_c0_hdr_mdata, af2cp_sTxPort.c0.hdr.mdata); -`SCOPE_ASSIGN (ccip_sTxPort_c1_valid, af2cp_sTxPort.c1.valid); -`SCOPE_ASSIGN (ccip_sTxPort_c1_hdr_address, af2cp_sTxPort.c1.hdr.address); -`SCOPE_ASSIGN (ccip_sTxPort_c2_mmioRdValid, af2cp_sTxPort.c2.mmioRdValid); -`SCOPE_ASSIGN (ccip_sRxPort_c0TxAlmFull, cp2af_sRxPort.c0TxAlmFull); -`SCOPE_ASSIGN (ccip_sRxPort_c1TxAlmFull, cp2af_sRxPort.c1TxAlmFull); +`SCOPE_ASSIGN (cci_sRxPort_c0_hdr_mdata, cp2af_sRxPort.c0.hdr.mdata); +`SCOPE_ASSIGN (cci_sRxPort_c0_rspValid, cp2af_sRxPort.c0.rspValid); +`SCOPE_ASSIGN (cci_sRxPort_c1_rspValid, cp2af_sRxPort.c1.rspValid); +`SCOPE_ASSIGN (cci_sTxPort_c0_valid, af2cp_sTxPort.c0.valid); +`SCOPE_ASSIGN (cci_sTxPort_c0_hdr_address, af2cp_sTxPort.c0.hdr.address); +`SCOPE_ASSIGN (cci_sTxPort_c0_hdr_mdata, af2cp_sTxPort.c0.hdr.mdata); +`SCOPE_ASSIGN (cci_sTxPort_c1_valid, af2cp_sTxPort.c1.valid); +`SCOPE_ASSIGN (cci_sTxPort_c1_hdr_address, af2cp_sTxPort.c1.hdr.address); +`SCOPE_ASSIGN (cci_sTxPort_c2_mmioRdValid, af2cp_sTxPort.c2.mmioRdValid); +`SCOPE_ASSIGN (cci_sRxPort_c0TxAlmFull, cp2af_sRxPort.c0TxAlmFull); +`SCOPE_ASSIGN (cci_sRxPort_c1TxAlmFull, cp2af_sRxPort.c1TxAlmFull); `SCOPE_ASSIGN (avs_address, avs_address); `SCOPE_ASSIGN (avs_waitrequest, avs_waitrequest); `SCOPE_ASSIGN (avs_write_fire, avs_write && !avs_waitrequest); @@ -957,11 +954,23 @@ Vortex #() vortex ( `SCOPE_ASSIGN (avs_burstcount, avs_burstcount); `SCOPE_ASSIGN (avs_readdatavalid, avs_readdatavalid); `SCOPE_ASSIGN (mem_bank_select, mem_bank_select); -`SCOPE_ASSIGN (ccip_dram_rd_req_ctr, cci_dram_rd_req_ctr); -`SCOPE_ASSIGN (ccip_dram_wr_req_ctr, cci_dram_wr_req_ctr); -`SCOPE_ASSIGN (ccip_rd_req_ctr, cci_rd_req_ctr); -`SCOPE_ASSIGN (ccip_rd_rsp_ctr, cci_rd_rsp_ctr); -`SCOPE_ASSIGN (ccip_wr_req_ctr, cci_wr_req_ctr); +`SCOPE_ASSIGN (cci_dram_rd_req_ctr, cci_dram_rd_req_ctr); +`SCOPE_ASSIGN (cci_dram_wr_req_ctr, cci_dram_wr_req_ctr); +`SCOPE_ASSIGN (cci_rd_req_ctr, cci_rd_req_ctr); +`SCOPE_ASSIGN (cci_rd_rsp_ctr, cci_rd_rsp_ctr); +`SCOPE_ASSIGN (cci_wr_req_ctr, cci_wr_req_ctr); +`SCOPE_ASSIGN (cci_wr_req_fire, cci_wr_req_fire); +`SCOPE_ASSIGN (cci_wr_rsp_fire, cci_wr_rsp_fire); +`SCOPE_ASSIGN (cci_rd_req_fire, cci_rd_req_fire); +`SCOPE_ASSIGN (cci_rd_rsp_fire, cci_rd_rsp_fire); +`SCOPE_ASSIGN (cci_pending_reads_full, cci_pending_reads_full); +`SCOPE_ASSIGN (cci_pending_writes_empty, cci_pending_writes_empty); +`SCOPE_ASSIGN (cci_pending_writes_full, cci_pending_writes_full); +`SCOPE_ASSIGN (afu_dram_req_fire, (dram_req_valid && dram_req_ready)); +`SCOPE_ASSIGN (afu_dram_req_addr, dram_req_addr); +`SCOPE_ASSIGN (afu_dram_req_tag, dram_req_tag); +`SCOPE_ASSIGN (afu_dram_rsp_fire, (dram_rsp_valid && dram_rsp_ready)); +`SCOPE_ASSIGN (afu_dram_rsp_tag, dram_rsp_tag); wire scope_changed = `SCOPE_TRIGGER; @@ -973,7 +982,7 @@ VX_scope #( ) scope ( .clk (clk), .reset (reset), - .start (scope_start), + .start (1'b0), .stop (1'b0), .changed (scope_changed), .data_in ({`SCOPE_DATA_LIST,`SCOPE_UPDATE_LIST}), diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index a8f45f5b..8cf69211 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -25,7 +25,7 @@ module VX_scope #( localparam CMD_GET_DATA = 3'd1; localparam CMD_GET_WIDTH = 3'd2; localparam CMD_GET_COUNT = 3'd3; - localparam CMD_SET_DELAY = 3'd4; + localparam CMD_SET_START = 3'd4; localparam CMD_SET_STOP = 3'd5; localparam CMD_GET_OFFSET= 3'd6; localparam CMD_RESERVED2 = 3'd7; @@ -48,7 +48,7 @@ module VX_scope #( reg [`LOG2UP(DATAW)-1:0] read_offset; - reg start_wait, recording, data_valid, read_delta, started, delta_flush; + reg cmd_start, started, start_wait, recording, data_valid, read_delta, delta_flush; reg [BUSW-3:0] delay_val, delay_cntr; @@ -62,18 +62,19 @@ module VX_scope #( always @(posedge clk) begin if (reset) begin get_cmd <= $bits(get_cmd)'(CMD_GET_VALID); - raddr <= 0; - waddr <= 0; + raddr <= 0; + waddr <= 0; waddr_end <= $bits(waddr)'(SIZE-1); + cmd_start <= 0; started <= 0; - start_wait <= 0; + start_wait <= 0; recording <= 0; delay_val <= 0; - delay_cntr <= 0; + delay_cntr <= 0; delta <= 0; delta_flush <= 0; prev_trigger_id <= 0; - read_offset <= 0; + read_offset <= 0; read_delta <= 0; data_valid <= 0; timestamp <= 0; @@ -88,14 +89,25 @@ module VX_scope #( CMD_GET_DATA, CMD_GET_WIDTH, CMD_GET_OFFSET, - CMD_GET_COUNT: get_cmd <= $bits(get_cmd)'(cmd_type); - CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data); - CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data); + CMD_GET_COUNT: get_cmd <= $bits(get_cmd)'(cmd_type); + CMD_SET_START: begin + delay_val <= $bits(delay_val)'(cmd_data); + cmd_start <= 1; + `ifdef DBG_PRINT_SCOPE + $display("*** scope:CMD_SET_START: delay_val=%0d", $bits(delay_val)'(cmd_data)); + `endif + end + CMD_SET_STOP: begin + waddr_end <= $bits(waddr)'(cmd_data); + `ifdef DBG_PRINT_SCOPE + $display("*** scope:CMD_SET_STOP: waddr_end=%0d", $bits(waddr)'(cmd_data)); + `endif + end default:; endcase end - if (start && !started) begin + if (!started && (start || cmd_start)) begin started <= 1; delta_flush <= 1; if (0 == delay_val) begin @@ -104,9 +116,11 @@ module VX_scope #( delta <= 0; delay_cntr <= 0; start_time <= timestamp; + `ifdef DBG_PRINT_SCOPE + $display("*** scope: recording start - start_time=%0d", timestamp); + `endif end else begin start_wait <= 1; - recording <= 0; delay_cntr <= delay_val; end end @@ -118,6 +132,9 @@ module VX_scope #( recording <= 1; delta <= 0; start_time <= timestamp; + `ifdef DBG_PRINT_SCOPE + $display("*** scope: recording start - start_time=%0d", timestamp); + `endif end end @@ -143,7 +160,10 @@ module VX_scope #( end if (stop - || (waddr == waddr_end)) begin + || (waddr >= waddr_end)) begin + `ifdef DBG_PRINT_SCOPE + $display("*** scope: recording stop - waddr=(%0d, %0d)", waddr, waddr_end); + `endif waddr <= waddr; // keep last address recording <= 0; data_valid <= 1; diff --git a/hw/scripts/scope.json b/hw/scripts/scope.json index e1abfccf..63e1e130 100644 --- a/hw/scripts/scope.json +++ b/hw/scripts/scope.json @@ -76,21 +76,21 @@ "afu": { "!cmd_type":3, "!state":3, - "?ccip_sRxPort_c0_mmioRdValid":1, - "?ccip_sRxPort_c0_mmioWrValid":1, + "?cci_sRxPort_c0_mmioRdValid":1, + "?cci_sRxPort_c0_mmioWrValid":1, "mmio_hdr_address":16, "mmio_hdr_length":2, - "ccip_sRxPort_c0_hdr_mdata":16, - "?ccip_sRxPort_c0_rspValid":1, - "?ccip_sRxPort_c1_rspValid":1, - "?ccip_sTxPort_c0_valid":1, - "ccip_sTxPort_c0_hdr_address":42, - "ccip_sTxPort_c0_hdr_mdata":16, - "?ccip_sTxPort_c1_valid":1, - "ccip_sTxPort_c1_hdr_address":42, - "ccip_sTxPort_c2_mmioRdValid":1, - "!ccip_sRxPort_c0TxAlmFull":1, - "!ccip_sRxPort_c1TxAlmFull":1, + "cci_sRxPort_c0_hdr_mdata":16, + "?cci_sRxPort_c0_rspValid":1, + "?cci_sRxPort_c1_rspValid":1, + "?cci_sTxPort_c0_valid":1, + "cci_sTxPort_c0_hdr_address":42, + "cci_sTxPort_c0_hdr_mdata":16, + "?cci_sTxPort_c1_valid":1, + "cci_sTxPort_c1_hdr_address":42, + "cci_sTxPort_c2_mmioRdValid":1, + "!cci_sRxPort_c0TxAlmFull":1, + "!cci_sRxPort_c1TxAlmFull":1, "avs_address":26, "!avs_waitrequest":1, "?avs_write_fire":1, @@ -99,11 +99,23 @@ "avs_burstcount":4, "avs_readdatavalid":1, "mem_bank_select":1, - "ccip_dram_rd_req_ctr":26, - "ccip_dram_wr_req_ctr":26, - "ccip_rd_req_ctr":26, - "ccip_rd_rsp_ctr":3, - "ccip_wr_req_ctr":26 + "cci_dram_rd_req_ctr":26, + "cci_dram_wr_req_ctr":26, + "cci_rd_req_ctr":26, + "cci_rd_rsp_ctr":3, + "cci_wr_req_ctr":26, + "?cci_wr_req_fire":1, + "?cci_wr_rsp_fire":1, + "?cci_rd_req_fire":1, + "?cci_rd_rsp_fire":1, + "!cci_pending_reads_full":1, + "!cci_pending_writes_empty":1, + "!cci_pending_writes_full": 1, + "?afu_dram_req_fire": 1, + "afu_dram_req_addr": 26, + "afu_dram_req_tag": 28, + "?afu_dram_rsp_fire": 1, + "afu_dram_rsp_tag": 28 }, "afu/vortex": { "!reset": 1,