diff --git a/.travis.yml b/.travis.yml index 0929a98f..a656a14b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -20,19 +20,19 @@ install: - export PATH=$VERILATOR_ROOT/bin:$PATH script: - - make -j + - make -j > /dev/null 2>&1 - ci/test_runtime.sh - ci/test_driver.sh - ci/test_riscv_isa.sh - ci/test_opencl.sh + - ci/blackbox.sh -run_debug + - ci/blackbox.sh -run_scope - ci/blackbox.sh -run_1c - ci/blackbox.sh -run_2c - ci/blackbox.sh -run_4c - ci/blackbox.sh -run_4c_l2 - ci/blackbox.sh -run_8c_2l2 - ci/blackbox.sh -run_16c_4l2_l3 - - ci/blackbox.sh -run_debug - - ci/blackbox.sh -run_scope after_success: # Gather code coverage diff --git a/ci/blackbox.sh b/ci/blackbox.sh index ec0ae49e..ebf30e0b 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -4,7 +4,7 @@ run_1c() { # test single core make -C driver/opae/vlsim clean - CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae/vlsim + CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae/vlsim > /dev/null 2>&1 make -C driver/tests/dogfood run-vlsim make -C benchmarks/opencl/sgemm run-vlsim } @@ -13,7 +13,7 @@ run_2c() { # test 2 cores make -C driver/opae/vlsim clean - CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0" make -C driver/opae/vlsim + CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0" make -C driver/opae/vlsim > /dev/null 2>&1 make -C driver/tests/dogfood run-vlsim make -C benchmarks/opencl/sgemm run-vlsim } @@ -22,7 +22,7 @@ run_4c() { # test 4 cores make -C driver/opae/vlsim clean - CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=0" make -C driver/opae/vlsim + CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=0" make -C driver/opae/vlsim > /dev/null 2>&1 make -C driver/tests/dogfood run-vlsim make -C benchmarks/opencl/sgemm run-vlsim } @@ -31,7 +31,7 @@ run_4c_l2() { # test 4 cores with L2 make -C driver/opae/vlsim clean - CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1" make -C driver/opae/vlsim + CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1" make -C driver/opae/vlsim > /dev/null 2>&1 make -C driver/tests/dogfood run-vlsim make -C benchmarks/opencl/sgemm run-vlsim } @@ -40,7 +40,7 @@ run_8c_2l2() { # test 8 cores with 2xL2 make -C driver/opae/vlsim clean - CONFIGS="-DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1" make -C driver/opae/vlsim + CONFIGS="-DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1" make -C driver/opae/vlsim > /dev/null 2>&1 make -C driver/tests/dogfood run-vlsim make -C benchmarks/opencl/sgemm run-vlsim } @@ -49,7 +49,7 @@ run_16c_4l2_l3() { # test 16 cores with L2 and L3 make -C driver/opae/vlsim clean - CONFIGS="-DNUM_CLUSTERS=4 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1" make -C driver/opae/vlsim + CONFIGS="-DNUM_CLUSTERS=4 -DNUM_CORES=4 -DL2_ENABLE=1 -DL3_ENABLE=1" make -C driver/opae/vlsim > /dev/null 2>&1 make -C driver/tests/dogfood run-vlsim make -C benchmarks/opencl/sgemm run-vlsim } @@ -58,7 +58,7 @@ run_debug() { # test debug build make -C driver/opae/vlsim clean - DEBUG=1 CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae/vlsim + DEBUG=1 CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae/vlsim > /dev/null 2>&1 make -C driver/tests/demo run-vlsim } @@ -66,7 +66,7 @@ run_scope() { # test build with scope analyzer make -C driver/opae clean - SCOPE=1 CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae + SCOPE=1 CONFIGS="-DNUM_CLUSTERS=1 -DNUM_CORES=1" make -C driver/opae > /dev/null 2>&1 make -C driver/tests/demo run-vlsim } diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index be90f49f..dde37dc0 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -115,12 +115,12 @@ wire vx_dram_rsp_ready; reg vx_snp_req_valid; reg [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; wire vx_snp_req_invalidate = 0; -reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; +wire [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; wire vx_snp_req_ready; -reg vx_snp_rsp_valid; +wire vx_snp_rsp_valid; `DEBUG_BEGIN -reg [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag; +wire [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag; `DEBUG_END reg vx_snp_rsp_ready; @@ -135,9 +135,10 @@ wire vx_csr_io_rsp_valid; wire [31:0] vx_csr_io_rsp_data; wire vx_csr_io_rsp_ready; +wire vx_busy; + reg vx_reset; reg vx_enabled; -wire vx_busy; // CMD variables ////////////////////////////////////////////////////////////// @@ -208,127 +209,125 @@ always @(posedge clk) begin `ifndef VERILATOR $asserton; // enable assertions `endif - mmio_tx.hdr <= 0; mmio_tx.mmioRdValid <= 0; + mmio_tx.hdr <= 0; `ifdef SCOPE scope_start <= 0; `endif + end else begin + mmio_tx.mmioRdValid <= cp2af_sRxPort.c0.mmioRdValid; + mmio_tx.hdr.tid <= mmio_hdr.tid; + `ifdef SCOPE + scope_start <= cp2af_sRxPort.c0.mmioWrValid; + `endif end - else begin - mmio_tx.mmioRdValid <= 0; - // serve MMIO write request - if (cp2af_sRxPort.c0.mmioWrValid) - begin - `ifdef SCOPE - scope_start <= 1; - `endif - case (mmio_hdr.address) - MMIO_IO_ADDR: begin - cmd_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_IO_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_ccip_clAddr'(cp2af_sRxPort.c0.data)); - `endif - end - MMIO_MEM_ADDR: begin - cmd_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_MEM_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_local_mem_addr'(cp2af_sRxPort.c0.data)); - `endif - end - MMIO_DATA_SIZE: begin - cmd_data_size <= $bits(cmd_data_size)'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_DATA_SIZE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data)); - `endif - end - MMIO_CMD_TYPE: begin - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CMD_TYPE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_type)'(cp2af_sRxPort.c0.data)); - `endif - end - `ifdef SCOPE - MMIO_SCOPE_WRITE: begin - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_SCOPE_WRITE: addr=%0h, data=%0h", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)); - `endif - end - `endif - MMIO_CSR_CORE: begin - cmd_csr_core <= $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_CORE: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data)); - `endif - end - MMIO_CSR_ADDR: begin - cmd_csr_addr <= $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_ADDR: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data)); - `endif - end - MMIO_CSR_DATA: begin - cmd_csr_wdata <= $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_DATA: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); - `endif - end - default: begin - `ifdef DBG_PRINT_OPAE - $display("%t: Unknown MMIO Wr: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); - `endif - end - endcase - end - // serve MMIO read requests - if (cp2af_sRxPort.c0.mmioRdValid) begin - mmio_tx.hdr.tid <= mmio_hdr.tid; // copy TID - case (mmio_hdr.address) - // AFU header - 16'h0000: mmio_tx.data <= { - 4'b0001, // Feature type = AFU - 8'b0, // reserved - 4'b0, // afu minor revision = 0 - 7'b0, // reserved - 1'b1, // end of DFH list = 1 - 24'b0, // next DFH offset = 0 - 4'b0, // afu major revision = 0 - 12'b0 // feature ID = 0 - }; - AFU_ID_L: mmio_tx.data <= afu_id[63:0]; // afu id low - AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi - 16'h0006: mmio_tx.data <= 64'h0; // next AFU - 16'h0008: mmio_tx.data <= 64'h0; // reserved - MMIO_STATUS: begin - mmio_tx.data <= 64'(state); + // serve MMIO write request + if (cp2af_sRxPort.c0.mmioWrValid) begin + case (mmio_hdr.address) + MMIO_IO_ADDR: begin + cmd_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_IO_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_ccip_clAddr'(cp2af_sRxPort.c0.data)); + `endif + end + MMIO_MEM_ADDR: begin + cmd_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_MEM_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_local_mem_addr'(cp2af_sRxPort.c0.data)); + `endif + end + MMIO_DATA_SIZE: begin + cmd_data_size <= $bits(cmd_data_size)'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_DATA_SIZE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_data_size)'(cp2af_sRxPort.c0.data)); + `endif + end + MMIO_CMD_TYPE: begin + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_CMD_TYPE: addr=%0h, data=%0d", $time, mmio_hdr.address, $bits(cmd_type)'(cp2af_sRxPort.c0.data)); + `endif + end + `ifdef SCOPE + MMIO_SCOPE_WRITE: begin + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_SCOPE_WRITE: addr=%0h, data=%0h", $time, mmio_hdr.address, 64'(cp2af_sRxPort.c0.data)); + `endif + end + `endif + MMIO_CSR_CORE: begin + cmd_csr_core <= $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_CSR_CORE: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data)); + `endif + end + MMIO_CSR_ADDR: begin + cmd_csr_addr <= $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_CSR_ADDR: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data)); + `endif + end + MMIO_CSR_DATA: begin + cmd_csr_wdata <= $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_CSR_DATA: addr=%0h, %0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); + `endif + end + default: begin `ifdef DBG_PRINT_OPAE - if (state != STATE_WIDTH'(mmio_tx.data)) begin - $display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state); - end - `endif - end - MMIO_CSR_READ: begin - mmio_tx.data <= 64'(cmd_csr_rdata); - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_CSR_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_csr_rdata); - `endif - end - `ifdef SCOPE - MMIO_SCOPE_READ: begin - mmio_tx.data <= cmd_scope_rdata; - `ifdef DBG_PRINT_OPAE - $display("%t: MMIO_SCOPE_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_scope_rdata); + $display("%t: Unknown MMIO Wr: addr=%0h, data=%0h", $time, mmio_hdr.address, $bits(cmd_csr_wdata)'(cp2af_sRxPort.c0.data)); `endif + end + endcase + end + + // serve MMIO read requests + if (cp2af_sRxPort.c0.mmioRdValid) begin + case (mmio_hdr.address) + // AFU header + 16'h0000: mmio_tx.data <= { + 4'b0001, // Feature type = AFU + 8'b0, // reserved + 4'b0, // afu minor revision = 0 + 7'b0, // reserved + 1'b1, // end of DFH list = 1 + 24'b0, // next DFH offset = 0 + 4'b0, // afu major revision = 0 + 12'b0 // feature ID = 0 + }; + AFU_ID_L: mmio_tx.data <= afu_id[63:0]; // afu id low + AFU_ID_H: mmio_tx.data <= afu_id[127:64]; // afu id hi + 16'h0006: mmio_tx.data <= 64'h0; // next AFU + 16'h0008: mmio_tx.data <= 64'h0; // reserved + MMIO_STATUS: begin + mmio_tx.data <= 64'(state); + `ifdef DBG_PRINT_OPAE + if (state != STATE_WIDTH'(mmio_tx.data)) begin + $display("%t: MMIO_STATUS: addr=%0h, state=%0d", $time, mmio_hdr.address, state); end `endif - default: begin - mmio_tx.data <= 64'h0; - `ifdef DBG_PRINT_OPAE - $display("%t: Unknown MMIO Rd: addr=%0h", $time, mmio_hdr.address); - `endif - end - endcase - mmio_tx.mmioRdValid <= 1; // post response - end + end + MMIO_CSR_READ: begin + mmio_tx.data <= 64'(cmd_csr_rdata); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_CSR_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_csr_rdata); + `endif + end + `ifdef SCOPE + MMIO_SCOPE_READ: begin + mmio_tx.data <= cmd_scope_rdata; + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_SCOPE_READ: addr=%0h, data=%0h", $time, mmio_hdr.address, cmd_scope_rdata); + `endif + end + `endif + default: begin + mmio_tx.data <= 64'h0; + `ifdef DBG_PRINT_OPAE + $display("%t: Unknown MMIO Rd: addr=%0h", $time, mmio_hdr.address); + `endif + end + endcase end end @@ -687,6 +686,7 @@ always @(posedge clk) begin cci_rd_req_enable <= 0; cci_rd_req_wait <= 0; cci_dram_wr_req_ctr <= 0; + cci_dram_wr_req_addr_unqual <= 0; end else begin if ((STATE_IDLE == state) @@ -819,6 +819,7 @@ begin cci_wr_req_ctr <= 0; cci_pending_writes <= 0; cci_dram_rd_req_ctr <= 0; + cci_dram_rd_req_addr_unqual <= 0; end else begin if ((STATE_IDLE == state) @@ -871,6 +872,8 @@ end else begin assign snp_req_size = cmd_data_size; end +assign vx_snp_req_tag = (`VX_SNP_TAG_WIDTH)'(snp_req_ctr); + assign vx_snp_req_fire = vx_snp_req_valid && vx_snp_req_ready; assign vx_snp_rsp_fire = vx_snp_rsp_valid && vx_snp_rsp_ready; @@ -886,17 +889,14 @@ always @(posedge clk) begin vx_snp_rsp_ready <= 0; snp_req_ctr <= 0; snp_rsp_ctr <= 0; - end - else begin - + end else begin if ((STATE_IDLE == state) && (CMD_CLFLUSH == cmd_type)) begin - vx_snp_req_addr <= snp_req_baseaddr; - vx_snp_req_tag <= 0; - snp_req_ctr <= 0; - snp_rsp_ctr <= snp_req_size; vx_snp_req_valid <= (snp_req_size != 0); + vx_snp_req_addr <= snp_req_baseaddr; vx_snp_rsp_ready <= (snp_req_size != 0); + snp_req_ctr <= 0; + snp_rsp_ctr <= snp_req_size; end if ((STATE_CLFLUSH == state) @@ -909,11 +909,9 @@ always @(posedge clk) begin vx_snp_rsp_ready <= 0; end - if (vx_snp_req_fire) - begin + if (vx_snp_req_fire) begin assert(snp_req_ctr < snp_req_size); vx_snp_req_addr <= vx_snp_req_addr + `VX_DRAM_ADDR_WIDTH'(1); - vx_snp_req_tag <= (`VX_SNP_TAG_WIDTH)'(snp_req_ctr_next); snp_req_ctr <= snp_req_ctr_next; `ifdef DBG_PRINT_OPAE $display("%t: AFU Snp Req: addr=%0h, tag=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), (`VX_SNP_TAG_WIDTH)'(vx_snp_req_tag), (snp_req_size - snp_req_ctr_next)); @@ -949,19 +947,19 @@ assign cmd_csr_done = (STATE_CSR_WRITE == state) ? vx_csr_io_req_ready : vx_csr_ always @(posedge clk) begin if (reset) begin csr_io_req_sent <= 0; - end - else begin + end else begin if (vx_csr_io_req_valid && vx_csr_io_req_ready) begin csr_io_req_sent <= 1; end if (cmd_csr_done) begin csr_io_req_sent <= 0; end - if ((STATE_CSR_READ == state) - && vx_csr_io_rsp_ready - && vx_csr_io_rsp_valid) begin - cmd_csr_rdata <= vx_csr_io_rsp_data; - end + end + + if ((STATE_CSR_READ == state) + && vx_csr_io_rsp_ready + && vx_csr_io_rsp_valid) begin + cmd_csr_rdata <= vx_csr_io_rsp_data; end end diff --git a/hw/rtl/VX_gpr_bypass.v b/hw/rtl/VX_gpr_bypass.v index fcb61320..e96f3862 100644 --- a/hw/rtl/VX_gpr_bypass.v +++ b/hw/rtl/VX_gpr_bypass.v @@ -40,7 +40,6 @@ module VX_gpr_bypass #( delayed_push <= push; assert(!use_buffer2 || use_buffer); if (pop) begin - buffer <= buffer2; use_buffer <= use_buffer2; use_buffer2 <= 0; end @@ -48,18 +47,29 @@ module VX_gpr_bypass #( if (use_buffer) begin assert(!use_buffer2); // full! use_buffer <= 1; - if (pop) begin - buffer <= data_in; - end else begin - buffer2 <= data_in; + if (!pop) begin use_buffer2 <= 1; end end else if (!pop) begin - buffer <= data_in; use_buffer <= 1; end end end + + if (pop) begin + buffer <= buffer2; + end + if (delayed_push) begin + if (use_buffer) begin + if (pop) begin + buffer <= data_in; + end else begin + buffer2 <= data_in; + end + end else if (!pop) begin + buffer <= data_in; + end + end end assign data_out = use_buffer ? buffer : data_in; diff --git a/hw/rtl/VX_gpr_stage.v b/hw/rtl/VX_gpr_stage.v index 1b996d3d..4a1bcd0f 100644 --- a/hw/rtl/VX_gpr_stage.v +++ b/hw/rtl/VX_gpr_stage.v @@ -38,15 +38,16 @@ module VX_gpr_stage #( always @(posedge clk) begin if (reset) begin - rsp_valid <= 0; + rsp_valid <= 0; end else begin - rsp_valid <= gpr_req_if.valid; - rsp_wid <= gpr_req_if.wid; - rsp_pc <= gpr_req_if.PC; - rs1_is_zero <= (0 == gpr_req_if.rs1); - rs2_is_zero <= (0 == gpr_req_if.rs2); + rsp_valid <= gpr_req_if.valid; end - end + + rsp_wid <= gpr_req_if.wid; + rsp_pc <= gpr_req_if.PC; + rs1_is_zero <= (0 == gpr_req_if.rs1); + rs2_is_zero <= (0 == gpr_req_if.rs2); + end `ifdef EXT_F_ENABLE @@ -62,16 +63,19 @@ module VX_gpr_stage #( end else begin if (rs3_delay) begin read_rs3 <= 1; - save_rs3 <= 1; end else if (read_fire) begin read_rs3 <= 0; - end - if (save_rs3) begin - rs3_data <= rs1_data; - save_rs3 <= 0; - end + end assert(!read_rs3 || rsp_wid == gpr_req_if.wid); - end + end + + if (rs3_delay) begin + save_rs3 <= 1; + end + if (save_rs3) begin + rs3_data <= rs1_data; + save_rs3 <= 0; + end end assign raddr1 = {gpr_req_if.wid, (rs3_delay ? gpr_req_if.rs3 : gpr_req_if.rs1)}; diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index 849d7610..1f0bcf0e 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -60,23 +60,20 @@ module VX_ibuffer #( if (reset) begin size_r[i] <= 0; end else begin - if (writing) begin - if (is_slot0) begin - q_data_out[i] <= q_data_in; - end - if (!reading) begin - size_r[i] <= size_r[i] + SIZEW'(1); - end + if (writing && !reading) begin + size_r[i] <= size_r[i] + SIZEW'(1); end - if (reading) begin - if (size_r[i] != 1) begin - q_data_out[i] <= q_data_prev[i]; - end - if (!writing) begin - size_r[i] <= size_r[i] - SIZEW'(1); - end + if (reading && !writing) begin + size_r[i] <= size_r[i] - SIZEW'(1); end - end + end + + if (writing && is_slot0) begin + q_data_out[i] <= q_data_in; + end + if (reading && (size_r[i] != 1)) begin + q_data_out[i] <= q_data_prev[i]; + end end assign q_full[i] = (size_r[i] == SIZE); diff --git a/hw/rtl/cache/VX_bank_core_req_arb.v b/hw/rtl/cache/VX_bank_core_req_arb.v index b398e0f6..e57879b3 100644 --- a/hw/rtl/cache/VX_bank_core_req_arb.v +++ b/hw/rtl/cache/VX_bank_core_req_arb.v @@ -126,14 +126,15 @@ module VX_bank_core_req_arb #( end else begin pop_mask[sel_idx] <= 1; end - end - if ((0 == q_valids_cnt_r) || pop) begin - sel_tid <= sel_idx; - sel_byteen <= q_byteen[sel_idx]; - sel_addr <= q_addr[sel_idx]; - sel_writedata <= q_writedata[sel_idx]; end end + + if ((0 == q_valids_cnt_r) || pop) begin + sel_tid <= sel_idx; + sel_byteen <= q_byteen[sel_idx]; + sel_addr <= q_addr[sel_idx]; + sel_writedata <= q_writedata[sel_idx]; + end end if (CORE_TAG_ID_BITS != 0) begin diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index cf97625e..87796ff6 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -123,8 +123,7 @@ module VX_cache_miss_resrv #( head_ptr <= 0; tail_ptr <= 0; size <= 0; - end else begin - + end else begin if (update_ready_st0) begin ready_table <= ready_table | valid_address_match; end @@ -140,7 +139,6 @@ module VX_cache_miss_resrv #( end else begin valid_table[tail_ptr] <= 1; ready_table[tail_ptr] <= enqueue_ready_st3; - addr_table[tail_ptr] <= enqueue_addr_st3; tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); size <= size + $bits(size)'(1); end @@ -159,6 +157,12 @@ module VX_cache_miss_resrv #( end end + always @(posedge clk) begin + if (enqueue_st3 && !enqueue_msrq_st3) begin + addr_table[tail_ptr] <= enqueue_addr_st3; + end + end + VX_dp_ram #( .DATAW(`MRVQ_METADATA_WIDTH), .SIZE(MRVQ_SIZE), diff --git a/hw/rtl/libs/VX_bypass_buffer.v b/hw/rtl/libs/VX_bypass_buffer.v index 843b89aa..84fcfd98 100644 --- a/hw/rtl/libs/VX_bypass_buffer.v +++ b/hw/rtl/libs/VX_bypass_buffer.v @@ -32,10 +32,13 @@ module VX_bypass_buffer #( end if (valid_in && ~ready_out) begin assert(!buffer_valid); - buffer <= data_in; buffer_valid <= 1; end end + + if (valid_in && ~ready_out) begin + buffer <= data_in; + end end assign ready_in = ready_out || !buffer_valid; diff --git a/hw/rtl/libs/VX_cam_buffer.v b/hw/rtl/libs/VX_cam_buffer.v index cb34af4c..f3ca5cfa 100644 --- a/hw/rtl/libs/VX_cam_buffer.v +++ b/hw/rtl/libs/VX_cam_buffer.v @@ -52,10 +52,6 @@ module VX_cam_buffer #( full_r <= 1'b0; write_addr_r <= ADDRW'(1'b0); end else begin - if (acquire_slot) begin - assert(1 == free_slots[write_addr]) else $display("%t: inused slot at port %d", $time, write_addr); - entries[write_addr] <= write_data; - end for (integer i = 0; i < CPORTS; i++) begin if (release_slot[i]) begin assert(0 == free_slots[release_addr[i]]) else $display("%t: freed slot at port %d", $time, release_addr[i]); @@ -65,6 +61,11 @@ module VX_cam_buffer #( write_addr_r <= free_index; full_r <= ~free_valid; end + + if (acquire_slot) begin + assert(1 == free_slots[write_addr]) else $display("%t: inused slot at port %d", $time, write_addr); + entries[write_addr] <= write_data; + end end for (genvar i = 0; i < RPORTS; i++) begin diff --git a/hw/rtl/libs/VX_index_queue.v b/hw/rtl/libs/VX_index_queue.v index b40aa2a0..4bdb5f9d 100644 --- a/hw/rtl/libs/VX_index_queue.v +++ b/hw/rtl/libs/VX_index_queue.v @@ -42,7 +42,6 @@ module VX_index_queue #( valid <= 0; end else begin if (enqueue) begin - entries[wr_a] <= write_data; valid[wr_a] <= 1; wr_ptr <= wr_ptr + 1; end @@ -53,6 +52,10 @@ module VX_index_queue #( valid[read_addr] <= 0; end end + + if (enqueue) begin + entries[wr_a] <= write_data; + end end assign write_addr = wr_a; diff --git a/hw/rtl/libs/VX_scope.v b/hw/rtl/libs/VX_scope.v index 8b089259..159fb9a1 100644 --- a/hw/rtl/libs/VX_scope.v +++ b/hw/rtl/libs/VX_scope.v @@ -77,6 +77,7 @@ module VX_scope #( read_delta <= 0; data_valid <= 0; timestamp <= 0; + start_time <= 0; end else begin timestamp <= timestamp + 1; @@ -177,6 +178,20 @@ module VX_scope #( end end end + + if (recording) begin + if (UPDW_ENABLE) begin + if (delta_flush + || changed + || (trigger_id != prev_trigger_id)) begin + delta_store[waddr] <= delta; + data_store[waddr] <= data_in; + end + end else begin + delta_store[waddr] <= 0; + data_store[waddr] <= data_in; + end + end end always @(*) begin diff --git a/hw/rtl/libs/VX_serial_div.v b/hw/rtl/libs/VX_serial_div.v index f3a5d09f..f1b27416 100644 --- a/hw/rtl/libs/VX_serial_div.v +++ b/hw/rtl/libs/VX_serial_div.v @@ -58,32 +58,32 @@ module VX_serial_div #( if (reset) begin cntr <= 0; is_busy <= 0; - end - else begin + end else begin if (push) begin - for (integer i = 0; i < LANES; ++i) begin - working[i] <= {{WIDTHD{1'b0}}, numer_qual[i], 1'b0}; - denom_r[i] <= denom_qual[i]; - inv_quot[i] <= (denom[i] != 0) && signed_mode && (numer[i][31] ^ denom[i][31]); - inv_rem[i] <= signed_mode && numer[i][31]; - end - tag_r <= tag_in; - cntr <= WIDTHN; + cntr <= WIDTHN; is_busy <= 1; - end - else begin - if (!done) begin - for (integer i = 0; i < LANES; ++i) begin - working[i] <= sub_result[i][WIDTHD] ? {working[i][WIDTHN+MIN_ND-1:0], 1'b0} : - {sub_result[i][WIDTHD-1:0], working[i][WIDTHN-1:0], 1'b1}; - end - cntr <= cntr - CNTRW'(1); - end + end else if (!done) begin + cntr <= cntr - CNTRW'(1); end if (pop) begin is_busy <= 0; end end + + if (push) begin + for (integer i = 0; i < LANES; ++i) begin + working[i] <= {{WIDTHD{1'b0}}, numer_qual[i], 1'b0}; + denom_r[i] <= denom_qual[i]; + inv_quot[i] <= (denom[i] != 0) && signed_mode && (numer[i][31] ^ denom[i][31]); + inv_rem[i] <= signed_mode && numer[i][31]; + end + tag_r <= tag_in; + end else if (!done) begin + for (integer i = 0; i < LANES; ++i) begin + working[i] <= sub_result[i][WIDTHD] ? {working[i][WIDTHN+MIN_ND-1:0], 1'b0} : + {sub_result[i][WIDTHD-1:0], working[i][WIDTHN-1:0], 1'b1}; + end + end end for (genvar i = 0; i < LANES; ++i) begin diff --git a/hw/rtl/libs/VX_skid_buffer.v b/hw/rtl/libs/VX_skid_buffer.v index cfcdcc77..b42ae448 100644 --- a/hw/rtl/libs/VX_skid_buffer.v +++ b/hw/rtl/libs/VX_skid_buffer.v @@ -27,18 +27,22 @@ module VX_skid_buffer #( if (ready_out) begin use_buffer <= 0; end - if (push) begin - buffer <= data_in; - if (valid_out_r && !ready_out) begin - assert(!use_buffer); - use_buffer <= 1; - end + if (push && valid_out_r && !ready_out) begin + assert(!use_buffer); + use_buffer <= 1; end if (!valid_out_r || ready_out) begin valid_out_r <= valid_in || use_buffer; - data_out_r <= use_buffer ? buffer : data_in; end end + + if (push) begin + buffer <= data_in; + end + + if (!valid_out_r || ready_out) begin + data_out_r <= use_buffer ? buffer : data_in; + end end assign ready_in = !use_buffer;