From 8775f63ec4d75b4fde1496cbaa150cfa0a3a5076 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 24 Jan 2021 16:49:22 -0800 Subject: [PATCH] lkg build rollout with 16cores optimization on arria10 --- .gitmodules | 3 + hw/opae/Makefile | 53 ++-- hw/opae/README | 1 + hw/opae/gen_sources.sh | 3 +- hw/opae/sources_16c.txt | 2 +- hw/opae/sources_32c.txt | 12 + hw/opae/sources_64c.txt | 12 + hw/opae/sources_8c.txt | 2 +- hw/rtl/VX_cluster.v | 2 +- hw/rtl/VX_config.vh | 6 +- hw/rtl/VX_csr_io_arb.v | 3 +- hw/rtl/VX_databus_arb.v | 6 +- hw/rtl/VX_ibuffer.v | 56 ++-- hw/rtl/VX_instr_demux.v | 26 +- hw/rtl/VX_platform.vh | 5 + hw/rtl/VX_scoreboard.v | 11 +- hw/rtl/Vortex.v | 2 +- hw/rtl/afu/VX_avs_wrapper.v | 6 +- hw/rtl/afu/vortex_afu.sv | 3 +- hw/rtl/cache/VX_bank.v | 344 ++++++++++++------------- hw/rtl/cache/VX_cache.v | 90 ++++--- hw/rtl/cache/VX_cache_core_rsp_merge.v | 15 +- hw/rtl/cache/VX_fifo_queue_xt.v | 125 --------- hw/rtl/cache/VX_flush_ctrl.v | 40 +++ hw/rtl/cache/VX_miss_resrv.v | 155 ++++------- hw/rtl/cache/VX_shared_mem.v | 6 +- hw/rtl/fp_cores/VX_fp_cvt.v | 25 +- hw/rtl/fp_cores/VX_fp_div.v | 5 +- hw/rtl/fp_cores/VX_fp_sqrt.v | 3 +- hw/rtl/libs/VX_fifo_queue.v | 209 +++++++++------ hw/rtl/libs/VX_fixed_arbiter.v | 26 +- hw/rtl/libs/VX_index_buffer.v | 9 +- hw/rtl/libs/VX_matrix_arbiter.v | 8 +- hw/rtl/libs/VX_onehot_encoder.v | 73 ++++++ hw/rtl/libs/VX_onehot_encooder.v | 28 -- hw/rtl/libs/VX_priority_encoder.v | 79 ++++-- hw/rtl/libs/VX_reset_relay.v | 37 ++- hw/rtl/libs/VX_rr_arbiter.v | 3 +- hw/rtl/libs/VX_scan.v | 60 +++++ hw/rtl/libs/VX_shift_register.v | 8 +- hw/rtl/libs/VX_skid_buffer.v | 100 ++++--- hw/rtl/libs/VX_stream_arbiter.v | 3 +- hw/rtl/libs/VX_stream_demux.v | 3 +- hw/syn/quartus/.gitignore | 8 +- hw/syn/quartus/cache/Makefile | 14 +- hw/syn/quartus/core/Makefile | 15 +- hw/syn/quartus/core8/Makefile | 72 ------ hw/syn/quartus/project.sdc | 4 +- hw/syn/quartus/project.tcl | 30 +-- hw/syn/quartus/top/Makefile | 19 +- hw/syn/quartus/top1/Makefile | 19 +- hw/syn/quartus/top16/Makefile | 7 +- hw/syn/quartus/top2/Makefile | 19 +- hw/syn/quartus/top32/Makefile | 7 +- hw/syn/quartus/top8/Makefile | 7 +- 55 files changed, 1021 insertions(+), 868 deletions(-) create mode 100644 hw/opae/sources_32c.txt create mode 100644 hw/opae/sources_64c.txt delete mode 100644 hw/rtl/cache/VX_fifo_queue_xt.v create mode 100644 hw/rtl/cache/VX_flush_ctrl.v create mode 100644 hw/rtl/libs/VX_onehot_encoder.v delete mode 100644 hw/rtl/libs/VX_onehot_encooder.v create mode 100644 hw/rtl/libs/VX_scan.v delete mode 100644 hw/syn/quartus/core8/Makefile diff --git a/.gitmodules b/.gitmodules index dd60e98f..2707d726 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +1,6 @@ [submodule "hw/rtl/fp_cores/fpnew"] path = hw/rtl/fp_cores/fpnew url = https://github.com/pulp-platform/fpnew.git +[submodule "hw/rtl/libs/basejump_stl"] + path = hw/rtl/libs/basejump_stl + url = https://github.com/bespoke-silicon-group/basejump_stl.git diff --git a/hw/opae/Makefile b/hw/opae/Makefile index 7d327f08..e92628f3 100644 --- a/hw/opae/Makefile +++ b/hw/opae/Makefile @@ -10,20 +10,21 @@ endif all: ase-1c -sources.txt: - ./gen_sources.sh > sources.txt +gen_sources_a10: + ./gen_sources.sh arria10 > sources.txt -gen_sources: sources.txt +gen_sources_s10: + ./gen_sources.sh stratix10 > sources.txt -ase-1c: gen_sources setup-ase-1c +ase-1c: gen_sources_a10 setup-ase-1c make -C $(ASE_BUILD_DIR)_1c cp $(RTL_DIR)/fp_cores/altera/arria10/*.hex $(ASE_BUILD_DIR)_1c/work -ase-2c: gen_sources setup-ase-2c +ase-2c: gen_sources_a10 setup-ase-2c make -C $(ASE_BUILD_DIR)_2c cp $(RTL_DIR)/fp_cores/altera/arria10/*.hex $(ASE_BUILD_DIR)_2c/work -ase-4c: gen_sources setup-ase-4c +ase-4c: gen_sources_a10 setup-ase-4c make -C $(ASE_BUILD_DIR)_4c cp $(RTL_DIR)/fp_cores/altera/arria10/*.hex $(ASE_BUILD_DIR)_4c/work @@ -33,29 +34,35 @@ setup-ase-2c: $(ASE_BUILD_DIR)_2c/Makefile setup-ase-4c: $(ASE_BUILD_DIR)_4c/Makefile -$(ASE_BUILD_DIR)_1c/Makefile: sources.txt +$(ASE_BUILD_DIR)_1c/Makefile: afu_sim_setup -s sources_1c.txt $(ASE_BUILD_DIR)_1c -$(ASE_BUILD_DIR)_2c/Makefile: sources.txt +$(ASE_BUILD_DIR)_2c/Makefile: afu_sim_setup -s sources_2c.txt $(ASE_BUILD_DIR)_2c -$(ASE_BUILD_DIR)_4c/Makefile: sources.txt +$(ASE_BUILD_DIR)_4c/Makefile: afu_sim_setup -s sources_4c.txt $(ASE_BUILD_DIR)_4c -fpga-1c: gen_sources setup-fpga-1c +fpga-1c: gen_sources_a10 setup-fpga-1c cd $(FPGA_BUILD_DIR)_1c && $(RUN_SYNTH) -fpga-2c: gen_sources setup-fpga-2c +fpga-2c: gen_sources_a10 setup-fpga-2c cd $(FPGA_BUILD_DIR)_2c && $(RUN_SYNTH) -fpga-4c: gen_sources setup-fpga-4c +fpga-4c: gen_sources_a10 setup-fpga-4c cd $(FPGA_BUILD_DIR)_4c && $(RUN_SYNTH) -fpga-8c: gen_sources setup-fpga-8c +fpga-8c: gen_sources_a10 setup-fpga-8c cd $(FPGA_BUILD_DIR)_8c && $(RUN_SYNTH) -fpga-16c: gen_sources setup-fpga-16c +fpga-16c: gen_sources_a10 setup-fpga-16c cd $(FPGA_BUILD_DIR)_16c && $(RUN_SYNTH) + +fpga-32c: gen_sources_s10 setup-fpga-32c + cd $(FPGA_BUILD_DIR)_32c && $(RUN_SYNTH) + +fpga-64c: gen_sources_s10 setup-fpga-64c + cd $(FPGA_BUILD_DIR)_64c && $(RUN_SYNTH) setup-fpga-1c: $(FPGA_BUILD_DIR)_1c/build/dcp.qpf @@ -67,6 +74,10 @@ setup-fpga-8c: $(FPGA_BUILD_DIR)_8c/build/dcp.qpf setup-fpga-16c: $(FPGA_BUILD_DIR)_16c/build/dcp.qpf +setup-fpga-32c: $(FPGA_BUILD_DIR)_32c/build/dcp.qpf + +setup-fpga-64c: $(FPGA_BUILD_DIR)_64c/build/dcp.qpf + $(FPGA_BUILD_DIR)_1c/build/dcp.qpf: afu_synth_setup -s sources_1c.txt $(FPGA_BUILD_DIR)_1c @@ -82,6 +93,12 @@ $(FPGA_BUILD_DIR)_8c/build/dcp.qpf: $(FPGA_BUILD_DIR)_16c/build/dcp.qpf: afu_synth_setup -s sources_16c.txt $(FPGA_BUILD_DIR)_16c +$(FPGA_BUILD_DIR)_32c/build/dcp.qpf: + afu_synth_setup -s sources_32c.txt $(FPGA_BUILD_DIR)_32c + +$(FPGA_BUILD_DIR)_64c/build/dcp.qpf: + afu_synth_setup -s sources_64c.txt $(FPGA_BUILD_DIR)_64c + run-ase-1c: cd $(ASE_BUILD_DIR)_1c && make sim @@ -115,5 +132,11 @@ clean-fpga-8c: clean-fpga-16c: rm -rf $(FPGA_BUILD_DIR)_16c sources.txt -clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c +clean-fpga-32c: + rm -rf $(FPGA_BUILD_DIR)_32c sources.txt + +clean-fpga-64c: + rm -rf $(FPGA_BUILD_DIR)_64c sources.txt + +clean: clean-ase-1c clean-ase-2c clean-ase-4c clean-fpga-1c clean-fpga-2c clean-fpga-4c clean-fpga-8c clean-fpga-16c clean-fpga-32c clean-fpga-64c rm sources.txt \ No newline at end of file diff --git a/hw/opae/README b/hw/opae/README index c4879b39..09565f70 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -110,6 +110,7 @@ make -C core8 clean && make -C core8 > core8/build.log 2>&1 & make -C vortex clean && make -C vortex > vortex/build.log 2>&1 & make -C top clean && make -C top > top/build.log 2>&1 & make -C top1 clean && make -C top1 > top1/build.log 2>&1 & +make -C top2 clean && make -C top2 > top2/build.log 2>&1 & make -C top8 clean && make -C top8 > top8/build.log 2>&1 & make -C top16 clean && make -C top16 > top16/build.log 2>&1 & make -C top32 clean && make -C top32 > top32/build.log 2>&1 & diff --git a/hw/opae/gen_sources.sh b/hw/opae/gen_sources.sh index 12af3164..04edf220 100755 --- a/hw/opae/gen_sources.sh +++ b/hw/opae/gen_sources.sh @@ -29,8 +29,7 @@ add_files() done } -add_dirs $rtl_dir/fp_cores/altera/arria10 -#add_dirs $rtl_dir/fp_cores/altera/stratix10 +add_dirs $rtl_dir/fp_cores/altera/$1 add_dirs $rtl_dir/libs $rtl_dir/interfaces $rtl_dir/fp_cores $rtl_dir/cache $rtl_dir $rtl_dir/afu diff --git a/hw/opae/sources_16c.txt b/hw/opae/sources_16c.txt index f82b85dc..94aeb46c 100644 --- a/hw/opae/sources_16c.txt +++ b/hw/opae/sources_16c.txt @@ -1,6 +1,6 @@ +define+NUM_CORES=4 +define+NUM_CLUSTERS=4 -+define+L3_ENABLE=1 +#+define+L3_ENABLE=1 +define+SYNTHESIS +define+QUARTUS diff --git a/hw/opae/sources_32c.txt b/hw/opae/sources_32c.txt new file mode 100644 index 00000000..e1bf6649 --- /dev/null +++ b/hw/opae/sources_32c.txt @@ -0,0 +1,12 @@ ++define+NUM_CORES=8 ++define+NUM_CLUSTERS=4 +#+define+L3_ENABLE=1 + ++define+SYNTHESIS ++define+QUARTUS +#+define+PERF_ENABLE + +vortex_afu.json +QI:vortex_afu.qsf + +C:sources.txt \ No newline at end of file diff --git a/hw/opae/sources_64c.txt b/hw/opae/sources_64c.txt new file mode 100644 index 00000000..8cc42e1b --- /dev/null +++ b/hw/opae/sources_64c.txt @@ -0,0 +1,12 @@ ++define+NUM_CORES=8 ++define+NUM_CLUSTERS=8 +#+define+L3_ENABLE=1 + ++define+SYNTHESIS ++define+QUARTUS +#+define+PERF_ENABLE + +vortex_afu.json +QI:vortex_afu.qsf + +C:sources.txt \ No newline at end of file diff --git a/hw/opae/sources_8c.txt b/hw/opae/sources_8c.txt index dc889b75..a41c281f 100644 --- a/hw/opae/sources_8c.txt +++ b/hw/opae/sources_8c.txt @@ -1,6 +1,6 @@ +define+NUM_CORES=4 +define+NUM_CLUSTERS=2 -+define+L3_ENABLE=1 +#+define+L3_ENABLE=1 +define+SYNTHESIS +define+QUARTUS diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index ce8d0dac..6d605790 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -72,7 +72,7 @@ module VX_cluster #( wire core_reset; VX_reset_relay #( - .PASSTHRU (`NUM_CORES <= 2) + .DEPTH (`NUM_CORES > 1) ) reset_relay ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 3005a78d..27b1bfb1 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -318,7 +318,7 @@ // DRAM Response Queue Size `ifndef DDRSQ_SIZE -`define DDRSQ_SIZE 4 +`define DDRSQ_SIZE `MAX(4, (`DNUM_BANKS * 2)) `endif // SM Configurable Knobs ////////////////////////////////////////////////////// @@ -382,7 +382,7 @@ // DRAM Response Queue Size `ifndef L2DRSQ_SIZE -`define L2DRSQ_SIZE 4 +`define L2DRSQ_SIZE `MAX(4, (`L2NUM_BANKS * 2)) `endif // L3cache Configurable Knobs ///////////////////////////////////////////////// @@ -419,7 +419,7 @@ // DRAM Response Queue Size `ifndef L3DRSQ_SIZE -`define L3DRSQ_SIZE 4 +`define L3DRSQ_SIZE `MAX(4, (`L3NUM_BANKS * 2)) `endif `endif diff --git a/hw/rtl/VX_csr_io_arb.v b/hw/rtl/VX_csr_io_arb.v index 50f23797..6550ad72 100644 --- a/hw/rtl/VX_csr_io_arb.v +++ b/hw/rtl/VX_csr_io_arb.v @@ -45,7 +45,8 @@ module VX_csr_io_arb ( // responses wire csr_io_rsp_ready; VX_skid_buffer #( - .DATAW (32) + .DATAW (32), + .BUFFERED (1) ) csr_io_out_buffer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_databus_arb.v b/hw/rtl/VX_databus_arb.v index 329cd4fd..c3c4934a 100644 --- a/hw/rtl/VX_databus_arb.v +++ b/hw/rtl/VX_databus_arb.v @@ -39,7 +39,8 @@ module VX_databus_arb ( && (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT)); VX_skid_buffer #( - .DATAW (REQ_DATAW) + .DATAW (REQ_DATAW), + .BUFFERED (1) ) cache_out_buffer ( .clk (clk), .reset (reset), @@ -52,7 +53,8 @@ module VX_databus_arb ( ); VX_skid_buffer #( - .DATAW (REQ_DATAW) + .DATAW (REQ_DATAW), + .BUFFERED (1) ) smem_out_buffer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_ibuffer.v b/hw/rtl/VX_ibuffer.v index 2589346d..b0457aa6 100644 --- a/hw/rtl/VX_ibuffer.v +++ b/hw/rtl/VX_ibuffer.v @@ -3,8 +3,8 @@ module VX_ibuffer #( parameter CORE_ID = 0 ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, // inputs input wire freeze, // keep current warp @@ -43,7 +43,7 @@ module VX_ibuffer #( VX_fifo_queue #( .DATAW (DATAW), .SIZE (SIZE), - .FASTRAM (1) + .BUFFERED (1) ) queue ( .clk (clk), .reset (reset), @@ -65,23 +65,20 @@ module VX_ibuffer #( empty_r[i] <= 1; sizeMany_r[i] <= 0; end else begin - if (writing && !reading) begin - empty_r[i] <= 0; - if (used_r[i] == ADDRW'(SIZE-1)) begin - full_r[i] <= 1; + if (writing) begin + if (!reading) begin + empty_r[i] <= 0; + if (used_r[i] == ADDRW'(SIZE-1)) + full_r[i] <= 1; + if (used_r[i] == 1) + sizeMany_r[i] <= 1; end - if (used_r[i] == 1) begin - sizeMany_r[i] <= 1; - end - end - if (reading && !writing) begin + end else if (reading) begin full_r[i] <= 0; - if (used_r[i] == ADDRW'(1)) begin + if (used_r[i] == ADDRW'(1)) empty_r[i] <= 1; - end - if (used_r[i] == ADDRW'(2)) begin + if (used_r[i] == ADDRW'(2)) sizeMany_r[i] <= 0; - end end used_r[i] <= used_r[i] + ADDRW'($signed(2'(writing) - 2'(reading))); end @@ -139,8 +136,8 @@ module VX_ibuffer #( deq_valid_n = (| schedule_table_n); for (integer i = 0; i < `NUM_WARPS; i++) begin if (schedule_table_n[i]) begin - deq_wid_n = `NW_BITS'(i); - deq_instr_n = q_data_out[i]; + deq_wid_n = `NW_BITS'(i); + deq_instr_n = q_data_out[i]; schedule_table_n[i] = 0; break; end @@ -168,33 +165,18 @@ module VX_ibuffer #( end deq_valid <= deq_valid_n; - deq_wid <= deq_wid_n; - deq_instr <= deq_instr_n; if (warp_added && !warp_removed) begin num_warps <= num_warps + NWARPSW'(1); end else if (warp_removed && !warp_added) begin num_warps <= num_warps - NWARPSW'(1); - end - - `ifdef VERILATOR - /*if (enq_fire || deq_fire || deq_valid) begin - $display("*** %t: cur=%b(%0d), nxt=%b(%0d), enq=%b(%0d), deq=%b(%0d), nw=%0d(%0d,%0d,%0d,%0d), sched=%b, sched_n=%b", - $time, deq_valid, deq_wid, deq_valid_n, deq_wid_n, enq_fire, ibuf_enq_if.wid, deq_fire, ibuf_deq_if.wid, num_warps, used_r[0], used_r[1], used_r[2], used_r[3], schedule_table, schedule_table_n); - end*/ - begin // verify 'num_warps' - integer nw = 0; - for (integer i = 0; i < `NUM_WARPS; i++) begin - nw += 32'(!q_empty[i]); - end - assert(nw == 32'(num_warps)) else $error("%t: error: invalid num_warps: nw=%0d, ref=%0d", $time, num_warps, nw); - assert(~deq_valid || !q_empty[deq_wid]) else $error("%t: error: invalid schedule: wid=%0d", $time, deq_wid); - assert(~deq_fire || !q_empty[deq_wid]) else $error("%t: error: invalid dequeu: wid=%0d", $time, deq_wid); end - `endif end + + deq_wid <= deq_wid_n; + deq_instr <= deq_instr_n; end - + assign ibuf_enq_if.ready = ~q_full[ibuf_enq_if.wid]; assign q_data_in = {ibuf_enq_if.tmask, ibuf_enq_if.PC, diff --git a/hw/rtl/VX_instr_demux.v b/hw/rtl/VX_instr_demux.v index ad260a18..a3e7d0bd 100644 --- a/hw/rtl/VX_instr_demux.v +++ b/hw/rtl/VX_instr_demux.v @@ -17,11 +17,13 @@ module VX_instr_demux ( VX_gpu_req_if gpu_req_if ); wire [`NT_BITS-1:0] tid; + VX_priority_encoder #( - .DATAW (`NUM_THREADS) + .N (`NUM_THREADS) ) tid_select ( - .data_in (execute_if.tmask), - .data_out (tid), + .data_in (execute_if.tmask), + .index (tid), + `UNUSED_PIN (onehot), `UNUSED_PIN (valid_out) ); @@ -36,7 +38,8 @@ module VX_instr_demux ( VX_skid_buffer #( .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)), - .NOBACKPRESSURE (1) // ALU has no back pressure + .NOBACKPRESSURE (1), // ALU has no back pressure, + .BUFFERED (0) ) alu_buffer ( .clk (clk), .reset (reset), @@ -54,7 +57,8 @@ module VX_instr_demux ( wire lsu_req_ready; VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)), + .BUFFERED (0) ) lsu_buffer ( .clk (clk), .reset (reset), @@ -72,7 +76,8 @@ module VX_instr_demux ( wire csr_req_ready; VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32), + .BUFFERED (0) ) csr_buffer ( .clk (clk), .reset (reset), @@ -91,7 +96,8 @@ module VX_instr_demux ( wire mul_req_ready; VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)), + .BUFFERED (0) ) mul_buffer ( .clk (clk), .reset (reset), @@ -111,7 +117,8 @@ module VX_instr_demux ( wire fpu_req_ready; VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)), + .BUFFERED (0) ) fpu_buffer ( .clk (clk), .reset (reset), @@ -132,7 +139,8 @@ module VX_instr_demux ( wire gpu_req_ready; VX_skid_buffer #( - .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)) + .DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)), + .BUFFERED (0) ) gpu_buffer ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_platform.vh b/hw/rtl/VX_platform.vh index 953aed86..ff5afe69 100644 --- a/hw/rtl/VX_platform.vh +++ b/hw/rtl/VX_platform.vh @@ -46,6 +46,10 @@ if (!(cond)) $error msg; \ endgenerate +`define SASSERT(cond, msg) \ + always @(posedge clk) \ + assert(cond) else $error msg; \ + `define TRACING_ON /* verilator tracing_on */ `define TRACING_OFF /* verilator tracing_off */ @@ -53,6 +57,7 @@ `define USE_FAST_BRAM (* ramstyle = "MLAB, no_rw_check" *) `define NO_RW_RAM_CHECK (* altera_attribute = "-name add_pass_through_logic_to_inferred_rams off" *) +`define DISABLE_BRAM (* ramstyle = "logic" *) /////////////////////////////////////////////////////////////////////////////// diff --git a/hw/rtl/VX_scoreboard.v b/hw/rtl/VX_scoreboard.v index eb8a43bd..a1cc2078 100644 --- a/hw/rtl/VX_scoreboard.v +++ b/hw/rtl/VX_scoreboard.v @@ -47,17 +47,18 @@ module VX_scoreboard #( end `endif - reg [31:0] stall_ctr; + reg [31:0] deadlock_ctr; + wire [31:0] deadlock_timeout = 1000 * (10 ** (`L2_ENABLE + `L3_ENABLE)); always @(posedge clk) begin if (reset) begin - stall_ctr <= 0; + deadlock_ctr <= 0; end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin - stall_ctr <= stall_ctr + 1; - assert(stall_ctr < 1000) else $error("*** %t: core%0d-stalled: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b", + deadlock_ctr <= deadlock_ctr + 1; + assert(deadlock_ctr < deadlock_timeout) else $error("*** %t: core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b", $time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb, deq_inuse_regs[ibuf_deq_if.rd], deq_inuse_regs[ibuf_deq_if.rs1], deq_inuse_regs[ibuf_deq_if.rs2], deq_inuse_regs[ibuf_deq_if.rs3]); end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin - stall_ctr <= 0; + deadlock_ctr <= 0; end end diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 47d6b964..43edeeaa 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -73,7 +73,7 @@ module Vortex ( wire cluster_reset; VX_reset_relay #( - .PASSTHRU (`NUM_CLUSTERS <= 2) + .DEPTH (`NUM_CLUSTERS > 1) ) reset_relay ( .clk (clk), .reset (reset), diff --git a/hw/rtl/afu/VX_avs_wrapper.v b/hw/rtl/afu/VX_avs_wrapper.v index 91386486..7c0860c0 100644 --- a/hw/rtl/afu/VX_avs_wrapper.v +++ b/hw/rtl/afu/VX_avs_wrapper.v @@ -74,8 +74,7 @@ module VX_avs_wrapper #( VX_fifo_queue #( .DATAW (REQ_TAGW), - .SIZE (RD_QUEUE_SIZE), - .FASTRAM (1) + .SIZE (RD_QUEUE_SIZE) ) rd_req_queue ( .clk (clk), .reset (reset), @@ -92,8 +91,7 @@ module VX_avs_wrapper #( VX_fifo_queue #( .DATAW (AVS_DATAW), - .SIZE (RD_QUEUE_SIZE), - .FASTRAM (1) + .SIZE (RD_QUEUE_SIZE) ) rd_rsp_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/afu/vortex_afu.sv b/hw/rtl/afu/vortex_afu.sv index 82769500..b70541c1 100644 --- a/hw/rtl/afu/vortex_afu.sv +++ b/hw/rtl/afu/vortex_afu.sv @@ -727,8 +727,7 @@ end VX_fifo_queue #( .DATAW (CCI_RD_RQ_DATAW), - .SIZE (CCI_RD_QUEUE_SIZE), - .FASTRAM (1) + .SIZE (CCI_RD_QUEUE_SIZE) ) cci_rd_req_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index a4e3b3b5..aa43bb5a 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -89,98 +89,65 @@ module VX_bank #( wire [31:0] debug_pc_st0, debug_pc_st1; wire [`NW_BITS-1:0] debug_wid_st0, debug_wid_st1; /* verilator lint_on UNUSED */ -`endif - - wire drsq_pop; - wire drsq_empty, drsq_empty_next; - wire [`LINE_ADDR_WIDTH-1:0] drsq_addr_next; - wire [`CACHE_LINE_WIDTH-1:0] drsq_filldata_next; - wire drsq_flush_next; - - wire drsq_push = dram_rsp_valid && dram_rsp_ready; - - wire drsq_full; - assign dram_rsp_ready = !drsq_full; - - VX_fifo_queue_xt #( - .DATAW (`LINE_ADDR_WIDTH + $bits(dram_rsp_data) + 1), - .SIZE (DRSQ_SIZE), - .FASTRAM (1) - ) dram_rsp_queue ( - .clk (clk), - .reset (reset), - .push (drsq_push), - .pop (drsq_pop), - .data_in ({dram_rsp_addr, dram_rsp_data, dram_rsp_flush}), - `UNUSED_PIN (data_out), - .empty (drsq_empty), - .data_out_next ({drsq_addr_next, drsq_filldata_next, drsq_flush_next}), - .empty_next (drsq_empty_next), - .full (drsq_full), - `UNUSED_PIN (size) - ); +`endif wire creq_pop; wire creq_full, creq_empty; - wire creq_rw_next; - wire [WORD_SIZE-1:0] creq_byteen_next; - wire [`REQS_BITS-1:0] creq_tid_next; + wire creq_rw; + wire [WORD_SIZE-1:0] creq_byteen; + wire [`REQS_BITS-1:0] creq_tid; `IGNORE_WARNINGS_BEGIN - wire [`WORD_ADDR_WIDTH-1:0] creq_addr_next_unqual; + wire [`WORD_ADDR_WIDTH-1:0] creq_addr_unqual; `IGNORE_WARNINGS_END - wire [`LINE_ADDR_WIDTH-1:0] creq_addr_next; - wire [`UP(`WORD_SELECT_BITS)-1:0] creq_wsel_next; - wire [`WORD_WIDTH-1:0] creq_writeword_next; - wire [CORE_TAG_WIDTH-1:0] creq_tag_next; + wire [`LINE_ADDR_WIDTH-1:0] creq_addr; + wire [`UP(`WORD_SELECT_BITS)-1:0] creq_wsel; + wire [`WORD_WIDTH-1:0] creq_writeword; + wire [CORE_TAG_WIDTH-1:0] creq_tag; - wire creq_push = (| core_req_valid) && core_req_ready; + wire creq_push = core_req_valid && core_req_ready; assign core_req_ready = !creq_full; if (BANK_ADDR_OFFSET == 0) begin - assign creq_addr_next = `LINE_SELECT_ADDR0(creq_addr_next_unqual); + assign creq_addr = `LINE_SELECT_ADDR0(creq_addr_unqual); end else begin - assign creq_addr_next = `LINE_SELECT_ADDRX(creq_addr_next_unqual); + assign creq_addr = `LINE_SELECT_ADDRX(creq_addr_unqual); end if (`WORD_SELECT_BITS != 0) begin - assign creq_wsel_next = creq_addr_next_unqual[`WORD_SELECT_BITS-1:0]; + assign creq_wsel = creq_addr_unqual[`WORD_SELECT_BITS-1:0]; end else begin - assign creq_wsel_next = 0; + assign creq_wsel = 0; end - VX_fifo_queue_xt #( - .DATAW (CORE_TAG_WIDTH + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + `WORD_WIDTH), - .SIZE (CREQ_SIZE), - .FASTRAM (1) + VX_fifo_queue #( + .DATAW (CORE_TAG_WIDTH + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + `WORD_WIDTH), + .SIZE (CREQ_SIZE) ) core_req_queue ( - .clk (clk), - .reset (reset), - .push (creq_push), - .pop (creq_pop), - .data_in ({core_req_tag, core_req_tid, core_req_rw, core_req_byteen, core_req_addr, core_req_data}), - `UNUSED_PIN (data_out), - .empty (creq_empty), - .data_out_next({creq_tag_next, creq_tid_next, creq_rw_next, creq_byteen_next, creq_addr_next_unqual, creq_writeword_next}), - `UNUSED_PIN (empty_next), - .full (creq_full), + .clk (clk), + .reset (reset), + .push (creq_push), + .pop (creq_pop), + .data_in ({core_req_tag, core_req_tid, core_req_rw, core_req_byteen, core_req_addr, core_req_data}), + .data_out ({creq_tag, creq_tid, creq_rw, creq_byteen, creq_addr_unqual, creq_writeword}), + .empty (creq_empty), + .full (creq_full), + `UNUSED_PIN (alm_empty), + `UNUSED_PIN (alm_full), `UNUSED_PIN (size) ); - wire crsq_alm_full; - wire dreq_alm_full; - wire mshr_alm_full; - - wire mshr_pop; - wire mshr_pending_unqual_st0; + wire mshr_alm_full; + wire mshr_pop; + wire mshr_push; + wire mshr_pending; wire mshr_valid; - wire mshr_valid_next; - wire [`REQS_BITS-1:0] mshr_tid_next; - wire [`LINE_ADDR_WIDTH-1:0] mshr_addr_next; - wire [`UP(`WORD_SELECT_BITS)-1:0] mshr_wsel_next; - wire [`WORD_WIDTH-1:0] mshr_writeword_next; - wire [`REQ_TAG_WIDTH-1:0] mshr_tag_next; - wire mshr_rw_next; - wire [WORD_SIZE-1:0] mshr_byteen_next; + wire [`REQS_BITS-1:0] mshr_tid; + wire [`LINE_ADDR_WIDTH-1:0] mshr_addr; + wire [`UP(`WORD_SELECT_BITS)-1:0] mshr_wsel; + wire [`WORD_WIDTH-1:0] mshr_writeword; + wire [`REQ_TAG_WIDTH-1:0] mshr_tag; + wire mshr_rw; + wire [WORD_SIZE-1:0] mshr_byteen; wire [`LINE_ADDR_WIDTH-1:0] addr_st0, addr_st1; wire [`UP(`WORD_SELECT_BITS)-1:0] wsel_st0, wsel_st1; @@ -194,55 +161,76 @@ module VX_bank #( wire is_mshr_st0, is_mshr_st1; wire [`CACHE_LINE_WIDTH-1:0] readdata_st1; wire miss_st0, miss_st1; + wire prev_miss_hazard_st0, prev_miss_hazard_st1; wire force_miss_st0, force_miss_st1; - wire do_writeback_st0, do_writeback_st1; wire writeen_unqual_st0, writeen_unqual_st1; - wire mshr_push_unqual_st0, mshr_push_unqual_st1; - wire dreq_push_unqual_st0, dreq_push_unqual_st1; - wire writeen_st1; - wire core_req_hit_st1; + wire incoming_fill_st0, incoming_fill_st1; wire is_flush_st0; + wire mshr_pending_st0; - wire is_mshr_miss_st1 = valid_st1 && is_mshr_st1 && (miss_st1 || force_miss_st1); + wire crsq_alm_full, crsq_push, crsq_pop; + wire dreq_alm_full, dreq_push, dreq_pop; + wire drsq_pop; + + VX_pending_size #( + .SIZE (MSHR_SIZE) + ) mshr_pending_size ( + .clk (clk), + .reset (reset), + .push (creq_pop && !creq_rw), + .pop (crsq_push), + .full (mshr_alm_full), + `UNUSED_PIN (empty), + `UNUSED_PIN (size) + ); - // determine which queue to pop next in piority order - wire mshr_pop_unqual = mshr_valid; - wire drsq_pop_unqual = !mshr_pop_unqual && !drsq_empty; + // determine which queue to pop next in priority order + wire mshr_pop_unqual = mshr_valid + && !dreq_alm_full; // ensure DRAM request queue not full (deadlock prevention) + wire drsq_pop_unqual = !mshr_pop_unqual && dram_rsp_valid; wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty; + wire is_miss_st1 = valid_st1 && !is_fill_st1 && (miss_st1 || force_miss_st1); assign mshr_pop = mshr_pop_unqual - && !crsq_alm_full // ensure core response ready - && !is_mshr_miss_st1; // do not schedule another mshr request when the previous one missed - + && !crsq_alm_full // ensure core response ready + && !(is_miss_st1 && is_mshr_st1); // do not schedule another mshr request if the previous one missed + + assign drsq_pop = drsq_pop_unqual; assign creq_pop = creq_pop_unqual - && !crsq_alm_full // ensure core response ready && !dreq_alm_full // ensure dram request ready + && !crsq_alm_full // ensure core response ready && !mshr_alm_full; // ensure mshr enqueue ready - assign valid_st0 = mshr_pop || drsq_pop || creq_pop; - assign is_mshr_st0 = mshr_pop_unqual; - assign is_fill_st0 = drsq_pop_unqual; + assign dram_rsp_ready = drsq_pop; + + // we have a miss in mshr or entering it for the current address + wire mshr_pending_sel = mshr_pending + || (is_miss_st1 && (creq_addr == addr_st1)); VX_pipe_register #( - .DATAW (`LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + 1 + WORD_SIZE + `CACHE_LINE_WIDTH + `REQS_BITS + `REQ_TAG_WIDTH + 1), - .RESETW (0) + .DATAW (1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + 1 + WORD_SIZE + `CACHE_LINE_WIDTH + `REQS_BITS + `REQ_TAG_WIDTH + 1 + 1), + .RESETW (1) ) pipe_reg0 ( .clk (clk), .reset (reset), .enable (1'b1), .data_in ({ - mshr_valid_next ? mshr_addr_next : (!drsq_empty_next ? drsq_addr_next : creq_addr_next), - mshr_valid_next ? mshr_wsel_next : creq_wsel_next, - mshr_valid_next ? mshr_rw_next : creq_rw_next, - mshr_valid_next ? mshr_byteen_next : creq_byteen_next, - mshr_valid_next ? {`WORDS_PER_LINE{mshr_writeword_next}} : (!drsq_empty_next ? drsq_filldata_next : {`WORDS_PER_LINE{creq_writeword_next}}), - mshr_valid_next ? mshr_tid_next : creq_tid_next, - mshr_valid_next ? `REQ_TAG_WIDTH'(mshr_tag_next) : `REQ_TAG_WIDTH'(creq_tag_next), - drsq_flush_next + mshr_pop || drsq_pop || creq_pop, + mshr_pop_unqual, + drsq_pop_unqual, + mshr_pop_unqual ? mshr_addr : (dram_rsp_valid ? dram_rsp_addr : creq_addr), + mshr_pop_unqual ? mshr_wsel : creq_wsel, + mshr_pop_unqual ? mshr_rw : creq_rw, + mshr_pop_unqual ? mshr_byteen : creq_byteen, + mshr_pop_unqual ? {`WORDS_PER_LINE{mshr_writeword}} : (dram_rsp_valid ? dram_rsp_data : {`WORDS_PER_LINE{creq_writeword}}), + mshr_pop_unqual ? mshr_tid : creq_tid, + mshr_pop_unqual ? `REQ_TAG_WIDTH'(mshr_tag) : `REQ_TAG_WIDTH'(creq_tag), + mshr_pending_sel, + dram_rsp_flush }), - .data_out ({addr_st0, wsel_st0, mem_rw_st0, byteen_st0, data_st0, req_tid_st0, tag_st0, is_flush_st0}) + .data_out ({valid_st0, is_mshr_st0, is_fill_st0, addr_st0, wsel_st0, mem_rw_st0, byteen_st0, data_st0, req_tid_st0, tag_st0, mshr_pending_st0, is_flush_st0}) ); `ifdef DBG_CACHE_REQ_INFO @@ -262,7 +250,7 @@ module VX_bank #( .NUM_BANKS (NUM_BANKS), .WORD_SIZE (WORD_SIZE), .BANK_ADDR_OFFSET (BANK_ADDR_OFFSET) - ) tag_access ( + ) tag_access ( .clk (clk), .reset (reset), @@ -272,54 +260,56 @@ module VX_bank #( `endif // read/Fill - .lookup (creq_pop || mshr_pop), + .lookup (valid_st0 && !is_fill_st0), .addr (addr_st0), - .fill (drsq_pop), + .fill (valid_st0 && is_fill_st0), .is_flush (is_flush_st0), .missed (miss_st0) ); // redundant fills - wire is_redundant_fill = is_fill_st0 && !miss_st0; + wire is_redundant_fill = is_fill_st0 && !miss_st0; - // we have a miss in mshr or going to it for the current address - wire mshr_pending_st0 = mshr_pending_unqual_st0 - || (valid_st1 && (miss_st1 || force_miss_st1) && (addr_st0 == addr_st1)); + // we had a miss with prior request for the current address + assign prev_miss_hazard_st0 = is_miss_st1 && (addr_st0 == addr_st1); // force miss to ensure commit order when a new request has pending previous requests to same block - assign force_miss_st0 = !is_mshr_st0 && !is_fill_st0 && mshr_pending_st0; + // also force a miss for mshr requests when previous requests got a miss + assign force_miss_st0 = (!is_fill_st0 && !is_mshr_st0 && (mshr_pending_st0 || prev_miss_hazard_st0)) + || (is_mshr_st0 && is_miss_st1 && is_mshr_st1); assign writeen_unqual_st0 = (!is_fill_st0 && !miss_st0 && mem_rw_st0) || (is_fill_st0 && !is_redundant_fill); - wire send_fill_req_st0 = !is_fill_st0 && miss_st0 && !mem_rw_st0; - - assign do_writeback_st0 = !is_fill_st0 && mem_rw_st0; - - assign dreq_push_unqual_st0 = send_fill_req_st0 || do_writeback_st0; - - assign mshr_push_unqual_st0 = !is_fill_st0 && !mem_rw_st0; + assign incoming_fill_st0 = dram_rsp_valid && (addr_st0 == dram_rsp_addr); VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), .RESETW (1) ) pipe_reg1 ( .clk (clk), .reset (reset), .enable (1'b1), - .data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, mshr_push_unqual_st0, dreq_push_unqual_st0, do_writeback_st0, miss_st0, force_miss_st0, addr_st0, wsel_st0, data_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}), - .data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, mshr_push_unqual_st1, dreq_push_unqual_st1, do_writeback_st1, miss_st1, force_miss_st1, addr_st1, wsel_st1, data_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}) + .data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, prev_miss_hazard_st0, incoming_fill_st0, miss_st0, force_miss_st0, addr_st0, wsel_st0, data_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}), + .data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, prev_miss_hazard_st1, incoming_fill_st1, miss_st1, force_miss_st1, addr_st1, wsel_st1, data_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}) ); - assign core_req_hit_st1 = !is_fill_st1 && !miss_st1 && !force_miss_st1; - - assign writeen_st1 = writeen_unqual_st1 && (is_fill_st1 || !force_miss_st1); + wire writeen_st1 = writeen_unqual_st1 && (is_fill_st1 || !force_miss_st1); - wire dreq_push_st1 = dreq_push_unqual_st1 && (do_writeback_st1 || !force_miss_st1); + wire crsq_push_st1 = !is_fill_st1 && !mem_rw_st1 && !miss_st1 && !force_miss_st1; - wire mshr_push_st1 = mshr_push_unqual_st1 && (miss_st1 || force_miss_st1); + wire mshr_push_st1 = !is_fill_st1 && !mem_rw_st1 && (miss_st1 || force_miss_st1); - wire crsq_push_st1 = core_req_hit_st1 && !mem_rw_st1; + wire incoming_fill_qual_st1 = (dram_rsp_valid && (addr_st1 == dram_rsp_addr)) + || incoming_fill_st1; + + wire send_fill_req_st1 = !is_fill_st1 && !mem_rw_st1 && miss_st1 + && (!force_miss_st1 || (is_mshr_st1 && !prev_miss_hazard_st1)) + && !incoming_fill_qual_st1; + + wire do_writeback_st1 = !is_fill_st1 && mem_rw_st1; + + wire dreq_push_st1 = send_fill_req_st1 || do_writeback_st1; VX_data_access #( .BANK_ID (BANK_ID), @@ -361,14 +351,15 @@ module VX_bank #( end `endif - wire incoming_fill_st1 = valid_st0 && is_fill_st0 && (addr_st1 == addr_st0); - - wire mshr_push = valid_st1 && mshr_push_st1; + assign mshr_push = valid_st1 && mshr_push_st1; wire mshr_dequeue = valid_st1 && is_mshr_st1 && !mshr_push_st1; // push a missed request as 'ready' if it was a forced miss that actually had a hit // or the fill request for this block is comming - wire mshr_init_ready_state = !miss_st1 || incoming_fill_st1; + wire mshr_init_ready_state = !miss_st1 || incoming_fill_qual_st1; + + // use dram rsp or core req address to lookup the mshr + wire [`LINE_ADDR_WIDTH-1:0] lookup_addr = dram_rsp_valid ? dram_rsp_addr : creq_addr; VX_miss_resrv #( .BANK_ID (BANK_ID), @@ -379,7 +370,7 @@ module VX_bank #( .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), .MSHR_SIZE (MSHR_SIZE), - .ALM_FULL (MSHR_SIZE-1), + .ALM_FULL (MSHR_SIZE-2), .CORE_TAG_WIDTH (CORE_TAG_WIDTH) ) miss_resrv ( .clk (clk), @@ -397,22 +388,20 @@ module VX_bank #( .enqueue_addr (addr_st1), .enqueue_data ({data_st1[`WORD_WIDTH-1:0], req_tid_st1, tag_st1, mem_rw_st1, byteen_st1, wsel_st1}), .enqueue_is_mshr (is_mshr_st1), - .enqueue_as_ready (mshr_init_ready_state), - .enqueue_almfull (mshr_alm_full), + .enqueue_as_ready (mshr_init_ready_state), + `UNUSED_PIN (enqueue_almfull), + `UNUSED_PIN (enqueue_full), // lookup - .lookup_ready (drsq_pop && !is_flush_st0), - .lookup_addr (addr_st0), - .lookup_match (mshr_pending_unqual_st0), + .lookup_ready (drsq_pop), + .lookup_addr (lookup_addr), + .lookup_match (mshr_pending), // schedule .schedule (mshr_pop), .schedule_valid (mshr_valid), - `UNUSED_PIN (schedule_addr), - `UNUSED_PIN (schedule_data), - .schedule_valid_next(mshr_valid_next), - .schedule_addr_next (mshr_addr_next), - .schedule_data_next ({mshr_writeword_next, mshr_tid_next, mshr_tag_next, mshr_rw_next, mshr_byteen_next, mshr_wsel_next}), + .schedule_addr (mshr_addr), + .schedule_data ({mshr_writeword, mshr_tid, mshr_tag, mshr_rw, mshr_byteen, mshr_wsel}), // dequeue .dequeue (mshr_dequeue) @@ -422,8 +411,8 @@ module VX_bank #( wire crsq_empty; - wire crsq_push = valid_st1 && crsq_push_st1; - wire crsq_pop = core_rsp_valid && core_rsp_ready; + assign crsq_push = valid_st1 && crsq_push_st1; + assign crsq_pop = core_rsp_valid && core_rsp_ready; wire [`REQS_BITS-1:0] crsq_tid_st1 = req_tid_st1; wire [CORE_TAG_WIDTH-1:0] crsq_tag_st1 = CORE_TAG_WIDTH'(tag_st1); @@ -438,19 +427,18 @@ module VX_bank #( VX_fifo_queue #( .DATAW (`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH), .SIZE (CRSQ_SIZE), - .ALM_FULL (CRSQ_SIZE-1), - .BUFFERED (1), - .FASTRAM (1) + .ALM_FULL (CRSQ_SIZE-2), + .BUFFERED (1) ) core_rsp_queue ( - .clk (clk), - .reset (reset), - .push (crsq_push), - .pop (crsq_pop), - .data_in ({crsq_tid_st1, crsq_tag_st1, crsq_data_st1}), - .data_out({core_rsp_tid, core_rsp_tag, core_rsp_data}), - .empty (crsq_empty), - .alm_full(crsq_alm_full), - `UNUSED_PIN (full), + .clk (clk), + .reset (reset), + .push (crsq_push), + .pop (crsq_pop), + .data_in ({crsq_tid_st1, crsq_tag_st1, crsq_data_st1}), + .data_out ({core_rsp_tid, core_rsp_tag, core_rsp_data}), + .empty (crsq_empty), + .alm_full (crsq_alm_full), + `UNUSED_PIN (full), `UNUSED_PIN (alm_empty), `UNUSED_PIN (size) ); @@ -461,10 +449,9 @@ module VX_bank #( wire dreq_empty; - wire dreq_push = valid_st1 && dreq_push_st1 - && (do_writeback_st1 || !incoming_fill_st1); + assign dreq_push = valid_st1 && dreq_push_st1; - wire dreq_pop = dram_req_valid && dram_req_ready; + assign dreq_pop = dram_req_valid && dram_req_ready; wire writeback = WRITE_ENABLE && do_writeback_st1; @@ -487,20 +474,18 @@ module VX_bank #( VX_fifo_queue #( .DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH), .SIZE (DREQ_SIZE), - .ALM_FULL (DREQ_SIZE-1), - .BUFFERED (NUM_BANKS == 1), - .FASTRAM (1) + .ALM_FULL (DREQ_SIZE-2) ) dram_req_queue ( - .clk (clk), - .reset (reset), - .push (dreq_push), - .pop (dreq_pop), - .data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}), - .data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), - .empty (dreq_empty), - .alm_full(dreq_alm_full), - `UNUSED_PIN (full), - `UNUSED_PIN (alm_empty), + .clk (clk), + .reset (reset), + .push (dreq_push), + .pop (dreq_pop), + .data_in ({writeback, dreq_byteen, dreq_addr, dreq_data}), + .data_out ({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), + .empty (dreq_empty), + .alm_full (dreq_alm_full), + `UNUSED_PIN (full), + `UNUSED_PIN (alm_empty), `UNUSED_PIN (size) ); @@ -510,9 +495,9 @@ module VX_bank #( `SCOPE_ASSIGN (valid_st1, valid_st1); `SCOPE_ASSIGN (is_fill_st0, is_fill_st0); `SCOPE_ASSIGN (is_mshr_st0, is_mshr_st0); - `SCOPE_ASSIGN (miss_st0, miss_st0); + `SCOPE_ASSIGN (miss_st0, miss_st0); `SCOPE_ASSIGN (force_miss_st0, force_miss_st0); - `SCOPE_ASSIGN (mshr_push, mshr_push); + `SCOPE_ASSIGN (mshr_push, mshr_push); `SCOPE_ASSIGN (crsq_alm_full, crsq_alm_full); `SCOPE_ASSIGN (dreq_alm_full, dreq_alm_full); `SCOPE_ASSIGN (mshr_alm_full, mshr_alm_full); @@ -528,24 +513,31 @@ module VX_bank #( `ifdef DBG_PRINT_CACHE_BANK always @(posedge clk) begin - if (valid_st1 && !is_fill_st1 && miss_st1 && incoming_fill_st1) begin + if (valid_st1 && !is_fill_st1 && miss_st1 && incoming_fill_qual_st1) begin $display("%t: miss with incoming fill - addr=%0h", $time, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID)); assert(!is_mshr_st1); end if (crsq_alm_full || dreq_alm_full || mshr_alm_full) begin $display("%t: cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_alm_full, dreq_alm_full, mshr_alm_full); end - if (drsq_pop) begin + if (valid_st0 && is_fill_st0) begin if (is_flush_st0) $display("%t: cache%0d:%0d flush: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID)); else $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), data_st0); end - if (creq_pop || mshr_pop) begin - if (mem_rw_st0) - $display("%t: cache%0d:%0d core-wr-req: addr=%0h, is_mshr=%b, tag=%0h, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), is_mshr_st0, tag_st0, req_tid_st0, byteen_st0, data_st0[`WORD_WIDTH-1:0], debug_wid_st0, debug_pc_st0); - else - $display("%t: cache%0d:%0d core-rd-req: addr=%0h, is_mshr=%b, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), is_mshr_st0, tag_st0, req_tid_st0, byteen_st0, debug_wid_st0, debug_pc_st0); + if (valid_st0 && !is_fill_st0) begin + if (is_mshr_st0) begin + if (mem_rw_st0) + $display("%t: cache%0d:%0d mshr-wr-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), tag_st0, req_tid_st0, byteen_st0, data_st0[`WORD_WIDTH-1:0], debug_wid_st0, debug_pc_st0); + else + $display("%t: cache%0d:%0d mshr-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), tag_st0, req_tid_st0, byteen_st0, debug_wid_st0, debug_pc_st0); + end else begin + if (mem_rw_st0) + $display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), tag_st0, req_tid_st0, byteen_st0, data_st0[`WORD_WIDTH-1:0], debug_wid_st0, debug_pc_st0); + else + $display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), tag_st0, req_tid_st0, byteen_st0, debug_wid_st0, debug_pc_st0); + end end if (crsq_push) begin $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag_st1, crsq_tid_st1, crsq_data_st1, debug_wid_st1, debug_pc_st1); diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index fa4cb7de..bab3c83d 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -85,7 +85,6 @@ module VX_cache #( ); `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) - `UNUSED_VAR (dram_rsp_tag) wire [NUM_BANKS-1:0] per_bank_core_req_valid; wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid; @@ -111,6 +110,11 @@ module VX_cache #( wire [NUM_BANKS-1:0] per_bank_dram_rsp_ready; + wire [`CACHE_LINE_WIDTH-1:0] dram_rsp_data_qual; + wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag_qual; + wire [`LINE_ADDR_WIDTH-1:0] flush_addr; + wire flush_enable; + `ifdef PERF_ENABLE wire [NUM_BANKS-1:0] perf_read_miss_per_bank; wire [NUM_BANKS-1:0] perf_write_miss_per_bank; @@ -118,24 +122,55 @@ module VX_cache #( wire [NUM_BANKS-1:0] perf_pipe_stall_per_bank; `endif - reg flush_enable; - reg [`LINE_SELECT_BITS-1:0] flush_ctr; + /////////////////////////////////////////////////////////////////////////// - always @(posedge clk) begin - if (reset || flush) begin - flush_enable <= 1; - flush_ctr <= 0; - end else begin - if (flush_enable && (& per_bank_dram_rsp_ready)) begin - if (flush_addr == ((2 ** `LINE_SELECT_BITS)-1)) begin - flush_enable <= 0; - end - flush_ctr <= flush_ctr + 1; - end - end + wire drsq_full, drsq_empty; + wire drsq_push, drsq_pop; + + assign drsq_push = dram_rsp_valid && dram_rsp_ready; + assign dram_rsp_ready = !drsq_full; + + VX_fifo_queue #( + .DATAW (DRAM_TAG_WIDTH + `CACHE_LINE_WIDTH), + .SIZE (DRSQ_SIZE) + ) dram_rsp_queue ( + .clk (clk), + .reset (reset), + .push (drsq_push), + .pop (drsq_pop), + .data_in ({dram_rsp_tag, dram_rsp_data}), + .data_out ({dram_rsp_tag_qual, dram_rsp_data_qual}), + .empty (drsq_empty), + .full (drsq_full), + `UNUSED_PIN (alm_full), + `UNUSED_PIN (alm_empty), + `UNUSED_PIN (size) + ); + + if (NUM_BANKS == 1) begin + `UNUSED_VAR (dram_rsp_tag_qual) + assign drsq_pop = !drsq_empty && per_bank_dram_rsp_ready && !flush_enable; + end else begin + assign drsq_pop = !drsq_empty && per_bank_dram_rsp_ready[`DRAM_ADDR_BANK(dram_rsp_tag_qual)] && !flush_enable; end - wire [`LINE_ADDR_WIDTH-1:0] flush_addr = `LINE_ADDR_WIDTH'(flush_ctr); + /////////////////////////////////////////////////////////////////////////// + + VX_flush_ctrl #( + .CACHE_SIZE (CACHE_SIZE), + .CACHE_LINE_SIZE (CACHE_LINE_SIZE), + .NUM_BANKS (NUM_BANKS), + .WORD_SIZE (WORD_SIZE) + ) flush_ctrl ( + .clk (clk), + .reset (reset), + .flush (flush), + .addr (flush_addr), + .ready_out ((& per_bank_dram_rsp_ready)), + .valid_out (flush_enable) + ); + + /////////////////////////////////////////////////////////////////////////// VX_cache_core_req_bank_sel #( .CACHE_LINE_SIZE (CACHE_LINE_SIZE), @@ -143,8 +178,7 @@ module VX_cache #( .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), - .BANK_ADDR_OFFSET(BANK_ADDR_OFFSET), - .BUFFERED (NUM_BANKS > 1) + .BANK_ADDR_OFFSET(BANK_ADDR_OFFSET) ) core_req_bank_sel ( .clk (clk), .reset (reset), @@ -170,13 +204,7 @@ module VX_cache #( .per_bank_core_req_ready (per_bank_core_req_ready) ); - assign dram_req_tag = dram_req_addr; - if (NUM_BANKS == 1) begin - `UNUSED_VAR (dram_rsp_tag) - assign dram_rsp_ready = per_bank_dram_rsp_ready && !flush_enable; - end else begin - assign dram_rsp_ready = per_bank_dram_rsp_ready[`DRAM_ADDR_BANK(dram_rsp_tag)] && !flush_enable; - end + /////////////////////////////////////////////////////////////////////////// for (genvar i = 0; i < NUM_BANKS; i++) begin wire curr_bank_core_req_valid; @@ -238,13 +266,13 @@ module VX_cache #( // DRAM response if (NUM_BANKS == 1) begin - assign curr_bank_dram_rsp_valid = dram_rsp_valid || flush_enable; - assign curr_bank_dram_rsp_addr = flush_enable ? flush_addr : dram_rsp_tag; + assign curr_bank_dram_rsp_valid = !drsq_empty || flush_enable; + assign curr_bank_dram_rsp_addr = flush_enable ? flush_addr : dram_rsp_tag_qual; end else begin - assign curr_bank_dram_rsp_valid = (dram_rsp_valid && (`DRAM_ADDR_BANK(dram_rsp_tag) == i)) || flush_enable; - assign curr_bank_dram_rsp_addr = flush_enable ? flush_addr : `DRAM_TO_LINE_ADDR(dram_rsp_tag); + assign curr_bank_dram_rsp_valid = (!drsq_empty && (`DRAM_ADDR_BANK(dram_rsp_tag_qual) == i)) || flush_enable; + assign curr_bank_dram_rsp_addr = flush_enable ? flush_addr : `DRAM_TO_LINE_ADDR(dram_rsp_tag_qual); end - assign curr_bank_dram_rsp_data = dram_rsp_data; + assign curr_bank_dram_rsp_data = dram_rsp_data_qual; assign curr_bank_dram_rsp_flush = flush_enable; assign per_bank_dram_rsp_ready[i] = curr_bank_dram_rsp_ready; @@ -352,6 +380,8 @@ module VX_cache #( .ready_out (dram_req_ready) ); + assign dram_req_tag = dram_req_addr; + `ifdef PERF_ENABLE // per cycle: core_reads, core_writes reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle; diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 47cfb5ec..d50b0e45 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -37,12 +37,10 @@ module VX_cache_core_rsp_merge #( if (CORE_TAG_ID_BITS != 0) begin reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual; - reg core_rsp_valid_unaual_any; wire core_rsp_ready_unqual; always @(*) begin core_rsp_valid_unqual = 0; - core_rsp_valid_unaual_any = 0; core_rsp_tag_unqual = 'x; core_rsp_data_unqual = 'x; core_rsp_bank_select = 0; @@ -55,8 +53,7 @@ module VX_cache_core_rsp_merge #( for (integer i = 0; i < NUM_BANKS; i++) begin if (per_bank_core_rsp_valid[i] - && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin - core_rsp_valid_unaual_any = 1; + && (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == core_rsp_tag_unqual[CORE_TAG_ID_BITS-1:0])) begin core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1; core_rsp_data_unqual[per_bank_core_rsp_tid[i]] = per_bank_core_rsp_data[i]; core_rsp_bank_select[i] = core_rsp_ready_unqual; @@ -66,13 +63,16 @@ module VX_cache_core_rsp_merge #( wire core_rsp_valid_out; wire [NUM_REQS-1:0] core_rsp_valid_out_mask; + + wire core_rsp_valid_any = (| per_bank_core_rsp_valid); VX_skid_buffer #( - .DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)) + .DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)), + .BUFFERED (1) ) pipe_reg ( .clk (clk), .reset (reset), - .valid_in (core_rsp_valid_unaual_any), + .valid_in (core_rsp_valid_any), .data_in ({core_rsp_valid_unqual, core_rsp_tag_unqual, core_rsp_data_unqual}), .ready_in (core_rsp_ready_unqual), .valid_out (core_rsp_valid_out), @@ -106,7 +106,8 @@ module VX_cache_core_rsp_merge #( for (genvar i = 0; i < NUM_REQS; i++) begin VX_skid_buffer #( - .DATAW (CORE_TAG_WIDTH + `WORD_WIDTH) + .DATAW (CORE_TAG_WIDTH + `WORD_WIDTH), + .BUFFERED (1) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_fifo_queue_xt.v b/hw/rtl/cache/VX_fifo_queue_xt.v deleted file mode 100644 index 02ebe786..00000000 --- a/hw/rtl/cache/VX_fifo_queue_xt.v +++ /dev/null @@ -1,125 +0,0 @@ -`include "VX_platform.vh" - -module VX_fifo_queue_xt #( - parameter DATAW = 1, - parameter SIZE = 2, - parameter ADDRW = $clog2(SIZE), - parameter SIZEW = $clog2(SIZE+1), - parameter FASTRAM = 0 -) ( - input wire clk, - input wire reset, - input wire push, - input wire pop, - input wire [DATAW-1:0] data_in, - output wire [DATAW-1:0] data_out, - output wire empty, - output wire [DATAW-1:0] data_out_next, - output wire empty_next, - output wire full, - output wire [SIZEW-1:0] size -); - wire [DATAW-1:0] dout; - reg [DATAW-1:0] dout_r, dout_n_r; - reg [ADDRW-1:0] wr_ptr_r; - reg [ADDRW-1:0] rd_ptr_r, rd_ptr_n_r; - reg full_r; - reg empty_r, empty_n_r; - reg [ADDRW-1:0] used_r; - - always @(posedge clk) begin - if (reset) begin - full_r <= 0; - used_r <= 0; - end else begin - assert(!push || !full); - assert(!pop || !empty_r); - if (push) begin - if (!pop) begin - if (used_r == ADDRW'(SIZE-1)) - full_r <= 1; - end - end else if (pop) begin - full_r <= 0; - end - - used_r <= used_r + ADDRW'($signed(2'(push) - 2'(pop))); - end - end - - always @(posedge clk) begin - if (reset) begin - wr_ptr_r <= 0; - rd_ptr_r <= 0; - rd_ptr_n_r <= 1; - end else begin - if (push) begin - wr_ptr_r <= wr_ptr_r + ADDRW'(1); - end - if (pop) begin - rd_ptr_r <= rd_ptr_n_r; - if (SIZE > 2) begin - rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); - end else begin // (SIZE == 2); - rd_ptr_n_r <= ~rd_ptr_n_r; - end - end - end - end - - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (SIZE), - .BUFFERED (0), - .RWCHECK (1), - .FASTRAM (FASTRAM) - ) dp_ram ( - .clk(clk), - .waddr(wr_ptr_r), - .raddr(rd_ptr_n_r), - .wren(push), - .byteen(1'b1), - .rden(1'b1), - .din(data_in), - .dout(dout) - ); - - always @(*) begin - empty_n_r = empty_r; - if (reset) begin - empty_n_r = 1; - end else begin - if (push) begin - if (!pop) begin - empty_n_r = 0; - end - end else if (pop) begin - if (used_r == ADDRW'(1)) begin - empty_n_r = 1; - end - end - end - end - - always @(*) begin - dout_n_r = dout_r; - if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin - dout_n_r = data_in; - end else if (pop) begin - dout_n_r = dout; - end - end - - always @(posedge clk) begin - empty_r <= empty_n_r; - dout_r <= dout_n_r; - end - - assign data_out = dout_r; - assign data_out_next = dout_n_r; - assign empty = empty_r; - assign empty_next = empty_n_r; - assign full = full_r; - assign size = {full_r, used_r}; - -endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_flush_ctrl.v b/hw/rtl/cache/VX_flush_ctrl.v new file mode 100644 index 00000000..a67ce512 --- /dev/null +++ b/hw/rtl/cache/VX_flush_ctrl.v @@ -0,0 +1,40 @@ +`include "VX_cache_config.vh" + +module VX_flush_ctrl #( + // Size of cache in bytes + parameter CACHE_SIZE = 16384, + // Size of line inside a bank in bytes + parameter CACHE_LINE_SIZE = 1, + // Number of banks + parameter NUM_BANKS = 1, + // Size of a word in bytes + parameter WORD_SIZE = 1 +) ( + input wire clk, + input wire reset, + input wire flush, + output wire [`LINE_ADDR_WIDTH-1:0] addr, + input wire ready_out, + output wire valid_out +); + reg flush_enable; + reg [`LINE_SELECT_BITS-1:0] flush_ctr; + + always @(posedge clk) begin + if (reset || flush) begin + flush_enable <= 1; + flush_ctr <= 0; + end else begin + if (flush_enable && ready_out) begin + if (flush_ctr == ((2 ** `LINE_SELECT_BITS)-1)) begin + flush_enable <= 0; + end + flush_ctr <= flush_ctr + 1; + end + end + end + + assign addr = `LINE_ADDR_WIDTH'(flush_ctr); + assign valid_out = flush_enable; + +endmodule \ No newline at end of file diff --git a/hw/rtl/cache/VX_miss_resrv.v b/hw/rtl/cache/VX_miss_resrv.v index be5f0c0d..ebdecf5c 100644 --- a/hw/rtl/cache/VX_miss_resrv.v +++ b/hw/rtl/cache/VX_miss_resrv.v @@ -38,6 +38,7 @@ module VX_miss_resrv #( input wire [`MSHR_DATA_WIDTH-1:0] enqueue_data, input wire enqueue_is_mshr, input wire enqueue_as_ready, + output wire enqueue_full, output wire enqueue_almfull, // lookup @@ -50,9 +51,6 @@ module VX_miss_resrv #( output wire schedule_valid, output wire [`LINE_ADDR_WIDTH-1:0] schedule_addr, output wire [`MSHR_DATA_WIDTH-1:0] schedule_data, - output wire schedule_valid_next, - output wire [`LINE_ADDR_WIDTH-1:0] schedule_addr_next, - output wire [`MSHR_DATA_WIDTH-1:0] schedule_data_next, // dequeue input wire dequeue @@ -63,16 +61,10 @@ module VX_miss_resrv #( reg [MSHR_SIZE-1:0] valid_table; reg [MSHR_SIZE-1:0] ready_table; - reg [ADDRW-1:0] schedule_ptr, schedule_n_ptr; - reg [ADDRW-1:0] restore_ptr; - reg [ADDRW-1:0] head_ptr, tail_ptr; + reg [ADDRW-1:0] head_ptr, tail_ptr; + reg [ADDRW-1:0] schedule_ptr, restore_ptr; reg [ADDRW-1:0] used_r; - reg full_r, almost_full_r; - - reg schedule_valid_r, schedule_valid_n_r; - reg [`LINE_ADDR_WIDTH-1:0] schedule_addr_r, schedule_addr_n_r; - reg [`MSHR_DATA_WIDTH-1:0] dout_r, dout_n_r; - wire [`MSHR_DATA_WIDTH-1:0] dout; + reg alm_full_r, full_r; wire [MSHR_SIZE-1:0] valid_address_match; for (genvar i = 0; i < MSHR_SIZE; i++) begin @@ -82,22 +74,22 @@ module VX_miss_resrv #( assign lookup_match = (| valid_address_match); wire push_new = enqueue && !enqueue_is_mshr; + wire restore = enqueue && enqueue_is_mshr; - wire [ADDRW-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1); + wire [`LOG2UP(MSHR_SIZE)-1:0] head_ptr_n = head_ptr + $bits(head_ptr)'(1); always @(posedge clk) begin if (reset) begin - valid_table <= 0; - ready_table <= 0; - schedule_ptr <= 0; - schedule_n_ptr <= 1; - restore_ptr <= 0; - head_ptr <= 0; - tail_ptr <= 0; - used_r <= 0; - full_r <= 0; - almost_full_r <= 0; + valid_table <= 0; + ready_table <= 0; + head_ptr <= 0; + tail_ptr <= 0; + schedule_ptr <= 0; + restore_ptr <= 0; + used_r <= 0; + alm_full_r <= 0; + full_r <= 0; end else begin // WARNING: lookup should happen enqueue for ready_table's correct update @@ -106,52 +98,46 @@ module VX_miss_resrv #( ready_table <= ready_table | valid_address_match; end - if (enqueue) begin - if (enqueue_is_mshr) begin - // restore schedule, returning missed msrq entry - valid_table[restore_ptr] <= 1; - ready_table[restore_ptr] <= enqueue_as_ready; - restore_ptr <= restore_ptr + $bits(restore_ptr)'(1); - schedule_ptr <= head_ptr; - schedule_n_ptr <= head_ptr_n; - end else begin - // push new entry - assert(!full_r); - valid_table[tail_ptr] <= 1; - ready_table[tail_ptr] <= enqueue_as_ready; - tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); - end + if (push_new) begin + // push new entry + assert(!full_r); + valid_table[tail_ptr] <= 1; + ready_table[tail_ptr] <= enqueue_as_ready; + tail_ptr <= tail_ptr + $bits(tail_ptr)'(1); + end else if (restore) begin + assert(!schedule); + // restore schedule, returning missed mshr entry + valid_table[restore_ptr] <= 1; + ready_table[restore_ptr] <= enqueue_as_ready; + restore_ptr <= restore_ptr + $bits(restore_ptr)'(1); + schedule_ptr <= head_ptr; end else if (dequeue) begin - // remove scheduled entry from buffer + // clear scheduled entry + assert(((head_ptr+$bits(head_ptr)'(1)) == schedule_ptr) + || ((head_ptr+$bits(head_ptr)'(2)) == schedule_ptr)) else $error("schedule_ptr=%0d, head_ptr=%0d", schedule_ptr, head_ptr); + valid_table[head_ptr] <= 0; head_ptr <= head_ptr_n; restore_ptr <= head_ptr_n; - valid_table[head_ptr] <= 0; end if (schedule) begin - // schedule next entry - assert(schedule_valid_r); - valid_table[schedule_ptr] <= 0; - ready_table[schedule_ptr] <= 0; - - schedule_ptr <= schedule_n_ptr; - if (MSHR_SIZE > 2) begin - schedule_n_ptr <= schedule_ptr + $bits(schedule_ptr)'(2); - end else begin // (MSHR_SIZE == 2); - schedule_n_ptr <= ~schedule_n_ptr; - end + // schedule next entry + assert(schedule_valid); + valid_table[schedule_ptr] <= 0; + ready_table[schedule_ptr] <= 0; + schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1); end if (push_new) begin if (!dequeue) begin + if (used_r == ADDRW'(ALM_FULL-1)) + alm_full_r <= 1; if (used_r == ADDRW'(MSHR_SIZE-1)) full_r <= 1; - if (used_r == ADDRW'(ALM_FULL-1)) - almost_full_r <= 1; end end else if (dequeue) begin if (used_r == ADDRW'(ALM_FULL)) - almost_full_r <= 0; + alm_full_r <= 0; full_r <= 0; end @@ -173,72 +159,33 @@ module VX_miss_resrv #( ) entries ( .clk(clk), .waddr(tail_ptr), - .raddr(schedule_n_ptr), + .raddr(schedule_ptr), .wren(push_new), .byteen(1'b1), .rden(1'b1), .din(enqueue_data), - .dout(dout) + .dout(schedule_data) ); - always @(*) begin - schedule_valid_n_r = schedule_valid_r; - if (reset) begin - schedule_valid_n_r = 0; - end else begin - if (restore) begin - schedule_valid_n_r = enqueue_as_ready; - end else if (lookup_ready) begin - schedule_valid_n_r = schedule_valid_r || (schedule_addr_r == lookup_addr); - end else if (schedule) begin - schedule_valid_n_r = ready_table[schedule_n_ptr]; - end - end - end - - always @(*) begin - schedule_addr_n_r = schedule_addr_r; - dout_n_r = dout_r; - if (restore - || (push_new && (used_r == 0 || (used_r == 1 && schedule)))) begin - schedule_addr_n_r = enqueue_addr; - dout_n_r = enqueue_data; - end else if (schedule) begin - schedule_addr_n_r = addr_table[schedule_n_ptr]; - dout_n_r = dout; - end - end - - always @(posedge clk) begin - schedule_valid_r <= schedule_valid_n_r; - schedule_addr_r <= schedule_addr_n_r; - dout_r <= dout_n_r; - end - - assign schedule_valid = schedule_valid_r; - assign schedule_addr = schedule_addr_r; - assign schedule_data = dout_r; - - assign schedule_valid_next = schedule_valid_n_r; - assign schedule_addr_next = schedule_addr_n_r; - assign schedule_data_next = dout_n_r; - - assign enqueue_almfull = almost_full_r; + assign schedule_valid = ready_table[schedule_ptr]; + assign schedule_addr = addr_table[schedule_ptr]; + assign enqueue_almfull = alm_full_r; + assign enqueue_full = full_r; `ifdef DBG_PRINT_CACHE_MSHR always @(posedge clk) begin if (lookup_ready || schedule || enqueue || dequeue) begin if (schedule) - $display("%t: cache%0d:%0d msrq-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(schedule_addr, BANK_ID), deq_debug_wid, deq_debug_pc); + $display("%t: cache%0d:%0d mshr-schedule: addr%0d=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, schedule_ptr, `LINE_TO_BYTE_ADDR(schedule_addr, BANK_ID), deq_debug_wid, deq_debug_pc); if (enqueue) begin if (enqueue_is_mshr) - $display("%t: cache%0d:%0d msrq-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_as_ready); + $display("%t: cache%0d:%0d mshr-restore: addr%0d=%0h, ready=%b", $time, CACHE_ID, BANK_ID, restore_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_as_ready); else - $display("%t: cache%0d:%0d msrq-enq: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_as_ready, enq_debug_wid, enq_debug_pc); + $display("%t: cache%0d:%0d mshr-enqueue: addr%0d=%0h, ready=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, tail_ptr, `LINE_TO_BYTE_ADDR(enqueue_addr, BANK_ID), enqueue_as_ready, enq_debug_wid, enq_debug_pc); end if (dequeue) - $display("%t: cache%0d:%0d msrq-deq addr%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, enq_debug_wid, enq_debug_pc); - $write("%t: cache%0d:%0d msrq-table", $time, CACHE_ID, BANK_ID); + $display("%t: cache%0d:%0d mshr-dequeue addr%0d, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, head_ptr, enq_debug_wid, enq_debug_pc); + $write("%t: cache%0d:%0d mshr-table", $time, CACHE_ID, BANK_ID); for (integer j = 0; j < MSHR_SIZE; j++) begin if (valid_table[j]) begin $write(" "); diff --git a/hw/rtl/cache/VX_shared_mem.v b/hw/rtl/cache/VX_shared_mem.v index a99239a6..b79350f5 100644 --- a/hw/rtl/cache/VX_shared_mem.v +++ b/hw/rtl/cache/VX_shared_mem.v @@ -134,8 +134,7 @@ module VX_shared_mem #( VX_fifo_queue #( .DATAW (NUM_BANKS * (1 + `REQS_BITS + 1 + WORD_SIZE + `LINE_SELECT_BITS + `WORD_WIDTH + CORE_TAG_WIDTH)), .SIZE (CREQ_SIZE), - .BUFFERED (1), - .FASTRAM (1) + .BUFFERED (1) ) core_req_queue ( .clk (clk), .reset (reset), @@ -217,8 +216,7 @@ module VX_shared_mem #( VX_fifo_queue #( .DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH), .SIZE (CRSQ_SIZE), - .BUFFERED (1), - .FASTRAM (1) + .BUFFERED (1) ) core_rsp_queue ( .clk (clk), .reset (reset), diff --git a/hw/rtl/fp_cores/VX_fp_cvt.v b/hw/rtl/fp_cores/VX_fp_cvt.v index f1d465ca..6471956e 100644 --- a/hw/rtl/fp_cores/VX_fp_cvt.v +++ b/hw/rtl/fp_cores/VX_fp_cvt.v @@ -140,6 +140,7 @@ module VX_fp_cvt #( wire signed [LANES-1:0][INT_EXP_WIDTH-1:0] destination_exp; // re-biased exponent for destination for (genvar i = 0; i < LANES; ++i) begin + `IGNORE_WARNINGS_BEGIN // Input mantissa needs to be normalized wire signed [INT_EXP_WIDTH-1:0] fp_input_exp; wire signed [INT_EXP_WIDTH-1:0] int_input_exp; @@ -153,17 +154,17 @@ module VX_fp_cvt #( // Unbias exponent and compensate for shift assign fp_input_exp = $signed(fmt_exponent_s0[i] + - INT_EXP_WIDTH'($signed({1'b0, in_a_type_s0[i].is_subnormal})) - - INT_EXP_WIDTH'($signed(EXP_BIAS)) - - INT_EXP_WIDTH'(renorm_shamt_sgn) + - INT_EXP_WIDTH'($signed(FMT_SHIFT_COMPENSATION))); + (($signed({1'b0, in_a_type_s0[i].is_subnormal}) + + $signed(FMT_SHIFT_COMPENSATION - EXP_BIAS)) - + renorm_shamt_sgn)); - assign int_input_exp = $signed(INT_EXP_WIDTH'(INT_MAN_WIDTH - 1) - INT_EXP_WIDTH'(renorm_shamt_sgn)); + assign int_input_exp = $signed(INT_MAN_WIDTH - 1 - renorm_shamt_sgn); assign input_exp[i] = is_itof_s0 ? int_input_exp : fp_input_exp; // Rebias the exponent - assign destination_exp[i] = input_exp[i] + INT_EXP_WIDTH'($signed(EXP_BIAS)); + assign destination_exp[i] = input_exp[i] + $signed(EXP_BIAS); + `IGNORE_WARNINGS_END end // Pipeline stage1 @@ -207,6 +208,7 @@ module VX_fp_cvt #( // Perform adjustments to mantissa and exponent for (genvar i = 0; i < LANES; ++i) begin always @(*) begin + `IGNORE_WARNINGS_BEGIN // Default assignment final_exp[i] = $unsigned(destination_exp_s1[i]); // take exponent as is, only look at lower bits preshift_mant[i] = 65'b0; // initialize mantissa container with zeroes @@ -221,16 +223,16 @@ module VX_fp_cvt #( // Overflow or infinities (for proper rounding) if ((destination_exp_s1[i] >= 2**EXP_BITS-1) || (~is_itof_s1 && in_a_type_s1[i].is_inf)) begin - final_exp[i] = INT_EXP_WIDTH'(2**EXP_BITS-2); // largest normal value + final_exp[i] = (2**EXP_BITS-2); // largest normal value preshift_mant[i] = ~0; // largest normal value and RS bits set of_before_round[i] = 1'b1; // Denormalize underflowing values end else if ((destination_exp_s1[i] < 1) - && (destination_exp_s1[i] >= INT_EXP_WIDTH'(-$signed(MAN_BITS)))) begin + && (destination_exp_s1[i] >= -$signed(MAN_BITS))) begin final_exp[i] = 0; // denormal result - denorm_shamt[i] = $unsigned(denorm_shamt[i] + SHAMT_BITS'(INT_EXP_WIDTH'(1) - destination_exp_s1[i])); // adjust right shifting + denorm_shamt[i] = $unsigned(denorm_shamt[i] + 1 - destination_exp_s1[i]); // adjust right shifting // Limit the shift to retain sticky bits - end else if (destination_exp_s1[i] < INT_EXP_WIDTH'(-$signed(MAN_BITS))) begin + end else if (destination_exp_s1[i] < -$signed(MAN_BITS)) begin final_exp[i] = 0; // denormal result denorm_shamt[i] = $unsigned(denorm_shamt[i] + SHAMT_BITS'(2 + MAN_BITS)); // to sticky end @@ -238,7 +240,7 @@ module VX_fp_cvt #( // By default right shift mantissa to be an integer denorm_shamt[i] = SHAMT_BITS'(MAX_INT_WIDTH-1) - SHAMT_BITS'(input_exp_s1[i]); // overflow: when converting to unsigned the range is larger by one - if (input_exp_s1[i] >= $signed(INT_EXP_WIDTH'(MAX_INT_WIDTH-1) + INT_EXP_WIDTH'(unsigned_s1))) begin + if (input_exp_s1[i] >= $signed(MAX_INT_WIDTH -1 + unsigned_s1)) begin denorm_shamt[i] = SHAMT_BITS'(1'b0); // prevent shifting of_before_round[i] = 1'b1; // underflow @@ -246,6 +248,7 @@ module VX_fp_cvt #( denorm_shamt[i] = MAX_INT_WIDTH + 1; // all bits go to the sticky end end + `IGNORE_WARNINGS_END end // Mantissa adjustment shift diff --git a/hw/rtl/fp_cores/VX_fp_div.v b/hw/rtl/fp_cores/VX_fp_div.v index 5d3eaafe..ce5330cd 100644 --- a/hw/rtl/fp_cores/VX_fp_div.v +++ b/hw/rtl/fp_cores/VX_fp_div.v @@ -29,14 +29,13 @@ module VX_fp_div #( wire stall = ~ready_out && valid_out; wire enable = ~stall; - wire _reset; - + wire _reset; VX_reset_relay reset_relay ( .clk (clk), .reset (reset), .reset_out (_reset) ); - + for (genvar i = 0; i < LANES; i++) begin `ifdef VERILATOR reg [31:0] r; diff --git a/hw/rtl/fp_cores/VX_fp_sqrt.v b/hw/rtl/fp_cores/VX_fp_sqrt.v index a00a9a37..5a8a8c39 100644 --- a/hw/rtl/fp_cores/VX_fp_sqrt.v +++ b/hw/rtl/fp_cores/VX_fp_sqrt.v @@ -28,8 +28,7 @@ module VX_fp_sqrt #( wire stall = ~ready_out && valid_out; wire enable = ~stall; - wire _reset; - + wire _reset; VX_reset_relay reset_relay ( .clk (clk), .reset (reset), diff --git a/hw/rtl/libs/VX_fifo_queue.v b/hw/rtl/libs/VX_fifo_queue.v index 4972d568..63b37060 100644 --- a/hw/rtl/libs/VX_fifo_queue.v +++ b/hw/rtl/libs/VX_fifo_queue.v @@ -91,96 +91,157 @@ module VX_fifo_queue #( if (used_r == ADDRW'(ALM_EMPTY+1)) alm_empty_r <= 1; end - used_r <= used_r + ADDRW'($signed(2'(push) - 2'(pop))); + if (SIZE > 2) begin + used_r <= used_r + ADDRW'($signed(2'(push) - 2'(pop))); + end else begin // (SIZE == 2); + `IGNORE_WARNINGS_BEGIN + used_r <= used_r ^ (push ^ pop); + `IGNORE_WARNINGS_END + end end end - if (0 == BUFFERED) begin + if (SIZE == 2) begin + + if (0 == BUFFERED) begin + + if (FASTRAM) begin + + `USE_FAST_BRAM reg [DATAW-1:0] shift_reg [SIZE]; + + always @(posedge clk) begin + if (push) begin + shift_reg[1] <= shift_reg[0]; + shift_reg[0] <= data_in; + end + end + + assign data_out = shift_reg[~used_r[0]]; - reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] wr_ptr_r; - - always @(posedge clk) begin - if (reset) begin - rd_ptr_r <= 0; - wr_ptr_r <= 0; end else begin - wr_ptr_r <= wr_ptr_r + ADDRW'(push); - rd_ptr_r <= rd_ptr_r + ADDRW'(pop); - end + + reg [DATAW-1:0] shift_reg [SIZE]; + + always @(posedge clk) begin + if (push) begin + shift_reg[1] <= shift_reg[0]; + shift_reg[0] <= data_in; + end + end + + assign data_out = shift_reg[~used_r[0]]; + + end + + end else begin + + reg [DATAW-1:0] data_out_r; + reg [DATAW-1:0] buffer; + + always @(posedge clk) begin + if (push) begin + buffer <= data_in; + end + if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin + data_out_r <= data_in; + end else if (pop) begin + data_out_r <= buffer; + end + end + + assign data_out = data_out_r; + end - - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (SIZE), - .BUFFERED (0), - .RWCHECK (1), - .FASTRAM (FASTRAM) - ) dp_ram ( - .clk(clk), - .waddr(wr_ptr_r), - .raddr(rd_ptr_r), - .wren(push), - .byteen(1'b1), - .rden(1'b1), - .din(data_in), - .dout(data_out) - ); - + end else begin - wire [DATAW-1:0] dout; - reg [DATAW-1:0] dout_r; - reg [ADDRW-1:0] wr_ptr_r; - reg [ADDRW-1:0] rd_ptr_r; - reg [ADDRW-1:0] rd_ptr_n_r; + if (0 == BUFFERED) begin - always @(posedge clk) begin - if (reset) begin - wr_ptr_r <= 0; - rd_ptr_r <= 0; - rd_ptr_n_r <= 1; - end else begin - if (push) begin - wr_ptr_r <= wr_ptr_r + ADDRW'(1); - end - if (pop) begin - rd_ptr_r <= rd_ptr_n_r; - if (SIZE > 2) begin - rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); - end else begin // (SIZE == 2); - rd_ptr_n_r <= ~rd_ptr_n_r; + reg [ADDRW-1:0] rd_ptr_r; + reg [ADDRW-1:0] wr_ptr_r; + + always @(posedge clk) begin + if (reset) begin + rd_ptr_r <= 0; + wr_ptr_r <= 0; + end else begin + wr_ptr_r <= wr_ptr_r + ADDRW'(push); + rd_ptr_r <= rd_ptr_r + ADDRW'(pop); + end + end + + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (SIZE), + .BUFFERED (0), + .RWCHECK (1), + .FASTRAM (FASTRAM) + ) dp_ram ( + .clk(clk), + .waddr(wr_ptr_r), + .raddr(rd_ptr_r), + .wren(push), + .byteen(1'b1), + .rden(1'b1), + .din(data_in), + .dout(data_out) + ); + + end else begin + + wire [DATAW-1:0] dout; + reg [DATAW-1:0] dout_r; + reg [ADDRW-1:0] wr_ptr_r; + reg [ADDRW-1:0] rd_ptr_r; + reg [ADDRW-1:0] rd_ptr_n_r; + + always @(posedge clk) begin + if (reset) begin + wr_ptr_r <= 0; + rd_ptr_r <= 0; + rd_ptr_n_r <= 1; + end else begin + if (push) begin + wr_ptr_r <= wr_ptr_r + ADDRW'(1); + end + if (pop) begin + rd_ptr_r <= rd_ptr_n_r; + if (SIZE > 2) begin + rd_ptr_n_r <= rd_ptr_r + ADDRW'(2); + end else begin // (SIZE == 2); + rd_ptr_n_r <= ~rd_ptr_n_r; + end end end end - end - VX_dp_ram #( - .DATAW (DATAW), - .SIZE (SIZE), - .BUFFERED (0), - .RWCHECK (1), - .FASTRAM (FASTRAM) - ) dp_ram ( - .clk(clk), - .waddr(wr_ptr_r), - .raddr(rd_ptr_n_r), - .wren(push), - .byteen(1'b1), - .rden(1'b1), - .din(data_in), - .dout(dout) - ); + VX_dp_ram #( + .DATAW (DATAW), + .SIZE (SIZE), + .BUFFERED (0), + .RWCHECK (1), + .FASTRAM (FASTRAM) + ) dp_ram ( + .clk(clk), + .waddr(wr_ptr_r), + .raddr(rd_ptr_n_r), + .wren(push), + .byteen(1'b1), + .rden(1'b1), + .din(data_in), + .dout(dout) + ); - always @(posedge clk) begin - if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin - dout_r <= data_in; - end else if (pop) begin - dout_r <= dout; + always @(posedge clk) begin + if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin + dout_r <= data_in; + end else if (pop) begin + dout_r <= dout; + end end - end - assign data_out = dout_r; + assign data_out = dout_r; + end end assign empty = empty_r; diff --git a/hw/rtl/libs/VX_fixed_arbiter.v b/hw/rtl/libs/VX_fixed_arbiter.v index e7cae8f1..5eb13654 100644 --- a/hw/rtl/libs/VX_fixed_arbiter.v +++ b/hw/rtl/libs/VX_fixed_arbiter.v @@ -25,26 +25,16 @@ module VX_fixed_arbiter #( assign grant_valid = requests[0]; end else begin - - reg [LOG_NUM_REQS-1:0] grant_index_r; - reg [NUM_REQS-1:0] grant_onehot_r; - always @(*) begin - grant_index_r = 'x; - grant_onehot_r = 'x; - for (integer i = 0; i < NUM_REQS; ++i) begin - if (requests[i]) begin - grant_index_r = LOG_NUM_REQS'(i); - grant_onehot_r = NUM_REQS'(0); - grant_onehot_r[i] = 1; - break; - end - end - end + VX_priority_encoder #( + .N (NUM_REQS) + ) tid_select ( + .data_in (requests), + .index (grant_index), + .onehot (grant_onehot), + .valid_out (grant_valid) + ); - assign grant_index = grant_index_r; - assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); end endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_index_buffer.v b/hw/rtl/libs/VX_index_buffer.v index c9f946ad..fa635d64 100644 --- a/hw/rtl/libs/VX_index_buffer.v +++ b/hw/rtl/libs/VX_index_buffer.v @@ -28,11 +28,12 @@ module VX_index_buffer #( wire [ADDRW-1:0] free_index; VX_priority_encoder #( - .DATAW (SIZE) + .N (SIZE) ) free_slots_encoder ( - .data_in (free_slots_n), - .data_out (free_index), - .valid_out (free_valid) + .data_in (free_slots_n), + .index (free_index), + `UNUSED_PIN (onehot), + .valid_out (free_valid) ); always @(*) begin diff --git a/hw/rtl/libs/VX_matrix_arbiter.v b/hw/rtl/libs/VX_matrix_arbiter.v index 1771a169..1476ed3d 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.v +++ b/hw/rtl/libs/VX_matrix_arbiter.v @@ -72,11 +72,11 @@ module VX_matrix_arbiter #( end VX_onehot_encoder #( - .NUM_REQS(NUM_REQS) + .N (NUM_REQS) ) encoder ( - .onehot (grant_unqual), - `UNUSED_PIN (valid), - .binary (grant_index) + .data_in (grant_unqual), + .data_out (grant_index), + `UNUSED_PIN (valid) ); assign grant_valid = (| requests); diff --git a/hw/rtl/libs/VX_onehot_encoder.v b/hw/rtl/libs/VX_onehot_encoder.v new file mode 100644 index 00000000..bace6dc4 --- /dev/null +++ b/hw/rtl/libs/VX_onehot_encoder.v @@ -0,0 +1,73 @@ +`include "VX_platform.vh" + +// Fast encoder using parallel prefix computation +// Adapter from BaseJump STL: http://bjump.org/index.html + +module VX_onehot_encoder #( + parameter N = 1, + parameter REVERSE = 0, + parameter FAST = 1 +) ( + input wire [N-1:0] data_in, + output wire [`LOG2UP(N)-1:0] data_out, + output wire valid +); + if (FAST) begin + `IGNORE_WARNINGS_BEGIN + localparam levels_lp = $clog2(N); + localparam aligned_width_lp = 1 << $clog2(N); + + wire [levels_lp:0][aligned_width_lp-1:0] addr; + wire [levels_lp:0][aligned_width_lp-1:0] v; + + // base case, also handle padding for non-power of two inputs + assign v[0] = REVERSE ? (data_in << (aligned_width_lp - N)) : ((aligned_width_lp)'(data_in)); + assign addr[0] = 'x; + + for (genvar level = 1; level < levels_lp+1; level=level+1) begin + localparam segments_lp = 2**(levels_lp-level); + localparam segment_slot_lp = aligned_width_lp/segments_lp; + localparam segment_width_lp = level; // how many bits are needed at each level + + for (genvar segment = 0; segment < segments_lp; segment=segment+1) begin + wire [1:0] vs = { + v[level-1][segment*segment_slot_lp+(segment_slot_lp >> 1)], + v[level-1][segment*segment_slot_lp] + }; + + assign v[level][segment*segment_slot_lp] = (| vs); + + if (level == 1) begin + assign addr[level][(segment*segment_slot_lp)+:segment_width_lp] = vs[!REVERSE]; + end else begin + assign addr[level][(segment*segment_slot_lp)+:segment_width_lp] = { + vs[!REVERSE], + addr[level-1][segment*segment_slot_lp+:segment_width_lp-1] | addr[level-1][segment*segment_slot_lp+(segment_slot_lp >> 1)+:segment_width_lp-1] + }; + end + end + end + + assign data_out = addr[levels_lp][`LOG2UP(N)-1:0]; + assign valid = v[levels_lp][0]; + `IGNORE_WARNINGS_END + end else begin + + reg [`LOG2UP(N)-1:0] data_out_r; + reg valid_r; + + always @(*) begin + data_out_r = 'x; + for (integer i = 0; i < N; i++) begin + if (data_in[i]) begin + data_out_r = `LOG2UP(N)'(i); + end + end + end + + assign data_out = data_out_r; + assign valid = (| data_in); + + end + +endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_onehot_encooder.v b/hw/rtl/libs/VX_onehot_encooder.v deleted file mode 100644 index e807a0a7..00000000 --- a/hw/rtl/libs/VX_onehot_encooder.v +++ /dev/null @@ -1,28 +0,0 @@ -`include "VX_platform.vh" - -module VX_onehot_encoder #( - parameter N = 6 -) ( - input wire [N-1:0] onehot, - output wire [`LOG2UP(N)-1:0] binary, - output wire valid -); - reg [`LOG2UP(N)-1:0] binary_r; - reg valid_r; - - always @(*) begin - binary_r = 'x; - valid_r = 1'b0; - for (integer i = 0; i < N; i++) begin - if (onehot[i]) begin - binary_r = `LOG2UP(N)'(i); - valid_r = 1'b1; - end - end - end - - assign binary = binary_r; - assign valid = valid_r; - -endmodule - diff --git a/hw/rtl/libs/VX_priority_encoder.v b/hw/rtl/libs/VX_priority_encoder.v index 6fa54380..d1c6c479 100644 --- a/hw/rtl/libs/VX_priority_encoder.v +++ b/hw/rtl/libs/VX_priority_encoder.v @@ -1,26 +1,73 @@ `include "VX_platform.vh" module VX_priority_encoder #( - parameter DATAW = 1, - parameter LDATAW = `LOG2UP(DATAW) + parameter N = 1, + parameter REVERSE = 0, + parameter FAST = 1, + parameter LN = `LOG2UP(N) ) ( - input wire [DATAW-1:0] data_in, - output wire [LDATAW-1:0] data_out, - output wire valid_out -); - reg [LDATAW-1:0] data_out_r; + input wire [N-1:0] data_in, + output wire [N-1:0] onehot, + output wire [LN-1:0] index, + output wire valid_out +); - always @(*) begin - data_out_r = 'x; - for (integer i = 0; i < DATAW; i++) begin - if (data_in[i]) begin - data_out_r = LDATAW'(i); - break; + if (N == 1) begin + + assign onehot = data_in; + assign index = 0; + assign valid_out = data_in; + + end else if (FAST) begin + + wire [N-1:0] scan_lo; + + VX_scan #( + .N (N), + .OP (2), + .REVERSE (REVERSE) + ) scan ( + .data_in (data_in), + .data_out (scan_lo) + ); + + if (REVERSE) begin + assign onehot = scan_lo & {1'b1, (~scan_lo[N-1:1])}; + assign valid_out = scan_lo[0]; + end else begin + assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1}; + assign valid_out = scan_lo[N-1]; + end + + VX_onehot_encoder #( + .N (N) + ) b ( + .data_in (onehot), + .data_out (index), + `UNUSED_PIN (valid) + ); + + end else begin + + reg [N-1:0] onehot_r; + reg [LN-1:0] index_r; + + always @(*) begin + index_r = 'x; + onehot_r = 0; + for (integer i = 0; i < N; i++) begin + if (data_in[i]) begin + index_r = LN'(i); + onehot_r[i] = 1'b1; + break; + end end end - end - assign data_out = data_out_r; - assign valid_out = (| data_in); + assign index = index_r; + assign onehot = onehot_r; + assign valid_out = (| data_in); + + end endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_reset_relay.v b/hw/rtl/libs/VX_reset_relay.v index 9fdbda8f..19a598ab 100644 --- a/hw/rtl/libs/VX_reset_relay.v +++ b/hw/rtl/libs/VX_reset_relay.v @@ -2,26 +2,45 @@ module VX_reset_relay #( parameter NUM_NODES = 1, - parameter PASSTHRU = 0 + parameter DEPTH = 1, + parameter ASYNC = 0 ) ( input wire clk, input wire reset, output wire [NUM_NODES-1:0] reset_out ); - if (PASSTHRU == 0) begin - reg [NUM_NODES-1:0] reset_r; - always @(posedge clk) begin - for (integer i = 0; i < NUM_NODES; ++i) begin - reset_r[i] <= reset; + if (DEPTH > 1) begin + `DISABLE_BRAM reg [NUM_NODES-1:0] reset_r [DEPTH-1:0]; + if (ASYNC) begin + always @(posedge clk or posedge reset) begin + for (integer i = DEPTH-1; i > 0; --i) + reset_r[i] <= reset_r[i-1]; + reset_r[0] <= {NUM_NODES{reset}}; + end + end else begin + always @(posedge clk) begin + for (integer i = DEPTH-1; i > 0; --i) + reset_r[i] <= reset_r[i-1]; + reset_r[0] <= {NUM_NODES{reset}}; end end + assign reset_out = reset_r[DEPTH-1]; + end else if (DEPTH == 1) begin + reg [NUM_NODES-1:0] reset_r; + if (ASYNC) begin + always @(posedge clk or posedge reset) begin + reset_r <= {NUM_NODES{reset}}; + end + end else begin + always @(posedge clk) begin + reset_r <= {NUM_NODES{reset}}; + end + end assign reset_out = reset_r; end else begin `UNUSED_VAR (clk) - for (genvar i = 0; i < NUM_NODES; ++i) begin - assign reset_out[i] = reset; - end + assign reset_out = {NUM_NODES{reset}}; end endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_rr_arbiter.v b/hw/rtl/libs/VX_rr_arbiter.v index 9c5524a1..69406011 100644 --- a/hw/rtl/libs/VX_rr_arbiter.v +++ b/hw/rtl/libs/VX_rr_arbiter.v @@ -55,7 +55,8 @@ module VX_rr_arbiter #( assign grant_index = grant_table[state]; assign grant_onehot = grant_onehot_r; - assign grant_valid = (| requests); + assign grant_valid = (| requests); + end endmodule \ No newline at end of file diff --git a/hw/rtl/libs/VX_scan.v b/hw/rtl/libs/VX_scan.v new file mode 100644 index 00000000..441dd59a --- /dev/null +++ b/hw/rtl/libs/VX_scan.v @@ -0,0 +1,60 @@ +`include "VX_platform.vh" + +// Fast Paralllel scan using Kogge-Stone style prefix tree with configurable operator +// Adapter from BaseJump STL: http://bjump.org/index.html + +module VX_scan #( + parameter N = 1, + parameter OP = 0, // 0: XOR, 1: AND, 2: OR + parameter REVERSE = 0 // 0: LO->HI, 1: HI->LO +) ( + input wire [N-1:0] data_in, + output wire [N-1:0] data_out +); +`IGNORE_WARNINGS_BEGIN + + wire [$clog2(N):0][N-1:0] t; + + // reverses bits + if (REVERSE) begin + assign t[0] = data_in; + end else begin + assign t[0] = {<<{data_in}}; + end + + // optimize for the common case of small and-scans + if ((N == 2) && (OP == 1)) begin + assign t[$clog2(N)] = {t[0][1], &t[0][1:0]}; + end else if ((N == 3) && (OP == 1)) begin + assign t[$clog2(N)] = {t[0][2], &t[0][2:1], &t[0][2:0]}; + end else if ((N == 4) && (OP == 1)) begin + assign t[$clog2(N)] = {t[0][3], &t[0][3:2], &t[0][3:1], &t[0][3:0]}; + end else begin + // general case + wire [N-1:0] fill; + for (genvar i = 0; i < $clog2(N); i++) begin + wire [N-1:0] shifted = N'({fill, t[i]} >> (1< - -$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES) - quartus_syn $(PROJECT) $(SYN_ARGS) - $(STAMP) fit.chg - -$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt - quartus_fit $(PROJECT) $(FIT_ARGS) - $(STAMP) asm.chg - $(STAMP) sta.chg - -$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt - quartus_asm $(PROJECT) $(ASM_ARGS) - -$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt - quartus_sta $(PROJECT) $(STA_ARGS) - -smart.log: $(PROJECT_FILES) - quartus_sh --determine_smart_action $(PROJECT) > smart.log - -# Project initialization -$(PROJECT_FILES): - quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NUM_THREADS=8" - -syn.chg: - $(STAMP) syn.chg - -fit.chg: - $(STAMP) fit.chg - -sta.chg: - $(STAMP) sta.chg - -asm.chg: - $(STAMP) asm.chg - -program: $(PROJECT).sof - quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof" - -clean: - rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox diff --git a/hw/syn/quartus/project.sdc b/hw/syn/quartus/project.sdc index a8170852..46f1af78 100644 --- a/hw/syn/quartus/project.sdc +++ b/hw/syn/quartus/project.sdc @@ -1,6 +1,4 @@ -set_time_format -unit ns -decimal_places 3 - -create_clock -name {clk} -period "220 MHz" -waveform { 0.0 1.0 } [get_ports {clk}] +create_clock -name {clk} -period "220 MHz" [get_ports {clk}] derive_pll_clocks -create_base_clocks derive_clock_uncertainty diff --git a/hw/syn/quartus/project.tcl b/hw/syn/quartus/project.tcl index b080e3bf..e98654c3 100644 --- a/hw/syn/quartus/project.tcl +++ b/hw/syn/quartus/project.tcl @@ -42,21 +42,21 @@ set_global_assignment -name MESSAGE_DISABLE 16818 set_global_assignment -name TIMEQUEST_DO_REPORT_TIMING ON set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED -set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" -set_global_assignment -name FITTER_EFFORT "STANDARD FIT" -set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" -set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM -set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON -set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 -set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 -set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)" -set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON -set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON -set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON -set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON -set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON -set_global_assignment -name POWER_USE_TA_VALUE 65 -set_global_assignment -name SEED 1 +#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" +#set_global_assignment -name FITTER_EFFORT "STANDARD FIT" +#set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" +#set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM +#set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON +#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 +#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 +#set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)" +#set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON +#set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON +#set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON +#set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON +#set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON +#set_global_assignment -name POWER_USE_TA_VALUE 65 +#set_global_assignment -name SEED 1 switch $opts(family) { "Arria 10" { diff --git a/hw/syn/quartus/top/Makefile b/hw/syn/quartus/top/Makefile index 981dc60b..36d350b9 100644 --- a/hw/syn/quartus/top/Makefile +++ b/hw/syn/quartus/top/Makefile @@ -1,13 +1,20 @@ +FAMILY = "Arria 10" +DEVICE = 10AX115N3F40E2SG +FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 + +#FAMILY = "Stratix 10" +#DEVICE = 1SX280HN2F43E2VG +#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10 + PROJECT = vortex_afu TOP_LEVEL_ENTITY = vortex_afu SRC_FILE = vortex_afu.sv -FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera/arria10;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src -RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip -PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf -# Part, Family -FAMILY = "Arria 10" -DEVICE = 10AX115N3F40E2SG +RTL_DIR=../../../rtl +FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src +RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE) + +PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf # Executable Configuration SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1 diff --git a/hw/syn/quartus/top1/Makefile b/hw/syn/quartus/top1/Makefile index 765e7c1a..f62d0095 100644 --- a/hw/syn/quartus/top1/Makefile +++ b/hw/syn/quartus/top1/Makefile @@ -1,13 +1,20 @@ +FAMILY = "Arria 10" +DEVICE = 10AX115N3F40E2SG +FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 + +#FAMILY = "Stratix 10" +#DEVICE = 1SX280HN2F43E2VG +#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10 + PROJECT = vortex_afu TOP_LEVEL_ENTITY = vortex_afu SRC_FILE = vortex_afu.sv -FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera/arria10;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src -RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip -PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf -# Part, Family -FAMILY = "Arria 10" -DEVICE = 10AX115N3F40E2SG +RTL_DIR=../../../rtl +FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src +RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE) + +PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf # Executable Configuration SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1 diff --git a/hw/syn/quartus/top16/Makefile b/hw/syn/quartus/top16/Makefile index 3583a832..36cc846e 100644 --- a/hw/syn/quartus/top16/Makefile +++ b/hw/syn/quartus/top16/Makefile @@ -9,8 +9,11 @@ FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 PROJECT = vortex_afu TOP_LEVEL_ENTITY = vortex_afu SRC_FILE = vortex_afu.sv -FPU_INCLUDE = ../../../rtl/fp_cores;$(FPU_CORE_PATH);../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src -RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip + +RTL_DIR=../../../rtl +FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src +RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE) + PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf # Executable Configuration diff --git a/hw/syn/quartus/top2/Makefile b/hw/syn/quartus/top2/Makefile index acb95d25..9257c5bf 100644 --- a/hw/syn/quartus/top2/Makefile +++ b/hw/syn/quartus/top2/Makefile @@ -1,13 +1,20 @@ +FAMILY = "Arria 10" +DEVICE = 10AX115N3F40E2SG +FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 + +#FAMILY = "Stratix 10" +#DEVICE = 1SX280HN2F43E2VG +#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10 + PROJECT = vortex_afu TOP_LEVEL_ENTITY = vortex_afu SRC_FILE = vortex_afu.sv -FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera/arria10;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src -RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip -PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf -# Part, Family -FAMILY = "Arria 10" -DEVICE = 10AX115N3F40E2SG +RTL_DIR=../../../rtl +FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src +RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE) + +PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf # Executable Configuration SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1 diff --git a/hw/syn/quartus/top32/Makefile b/hw/syn/quartus/top32/Makefile index 17b0b04c..e4bb9dfc 100644 --- a/hw/syn/quartus/top32/Makefile +++ b/hw/syn/quartus/top32/Makefile @@ -9,8 +9,11 @@ FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10 PROJECT = vortex_afu TOP_LEVEL_ENTITY = vortex_afu SRC_FILE = vortex_afu.sv -FPU_INCLUDE = ../../../rtl/fp_cores;$(FPU_CORE_PATH);../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src -RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip + +RTL_DIR=../../../rtl +FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src +RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE) + PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf # Executable Configuration diff --git a/hw/syn/quartus/top8/Makefile b/hw/syn/quartus/top8/Makefile index bdcc673d..9405eca5 100644 --- a/hw/syn/quartus/top8/Makefile +++ b/hw/syn/quartus/top8/Makefile @@ -9,8 +9,11 @@ FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 PROJECT = vortex_afu TOP_LEVEL_ENTITY = vortex_afu SRC_FILE = vortex_afu.sv -FPU_INCLUDE = ../../../rtl/fp_cores;$(FPU_CORE_PATH);../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src -RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip + +RTL_DIR=../../../rtl +FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src +RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE) + PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf # Executable Configuration