diff --git a/hw/opae/README b/hw/opae/README index 483a93fa..8e30eec6 100644 --- a/hw/opae/README +++ b/hw/opae/README @@ -109,6 +109,7 @@ make -C vortex clean && make -C vortex > vortex/build.log 2>&1 & make -C top clean && make -C top > top/build.log 2>&1 & make -C top1 clean && make -C top1 > top1/build.log 2>&1 & make -C top8 clean && make -C top8 > top8/build.log 2>&1 & +make -C top16 clean && make -C top16 > top16/build.log 2>&1 & # How to calculate the maximum operating frequency? 200 Mhz -> period = 1/200x10^6 = 5ns diff --git a/hw/rtl/VX_databus_arb.v b/hw/rtl/VX_databus_arb.v index 9e1bd17b..3eec3de5 100644 --- a/hw/rtl/VX_databus_arb.v +++ b/hw/rtl/VX_databus_arb.v @@ -37,8 +37,7 @@ module VX_databus_arb ( && (core_req_if.addr[i] < REQ_ADDRW'(`SHARED_MEM_BASE_ADDR >> 2)); VX_skid_buffer #( - .DATAW (REQ_DATAW), - .PASSTHRU (1) + .DATAW (REQ_DATAW) ) cache_out_buffer ( .clk (clk), .reset (reset), @@ -51,8 +50,7 @@ module VX_databus_arb ( ); VX_skid_buffer #( - .DATAW (REQ_DATAW), - .PASSTHRU (1) + .DATAW (REQ_DATAW) ) smem_out_buffer ( .clk (clk), .reset (reset), @@ -85,7 +83,7 @@ module VX_databus_arb ( assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE; VX_stream_arbiter #( - .NUM_REQS (2), + .NUM_REQS ((`SM_ENABLE ? 2 : 1)), .DATAW (RSP_DATAW), .BUFFERED (0) ) rsp_arb ( diff --git a/hw/rtl/VX_lsu_unit.v b/hw/rtl/VX_lsu_unit.v index 4cd04425..34476b11 100644 --- a/hw/rtl/VX_lsu_unit.v +++ b/hw/rtl/VX_lsu_unit.v @@ -77,7 +77,8 @@ module VX_lsu_unit #( VX_pipe_register #( .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))), - .RESETW (1) + .RESETW (1), + .DEPTH (0) ) req_pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 86a397fe..eab11f0b 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -251,9 +251,9 @@ module VX_mem_unit # ( .dram_req_ready (1'b0), // DRAM response - .dram_rsp_valid (0), - .dram_rsp_data (0), - .dram_rsp_tag (0), + .dram_rsp_valid (1'b0), + .dram_rsp_data ((`SCACHE_LINE_SIZE*8)'(0)), + .dram_rsp_tag (`LOG2UP(`SNUM_BANKS)'(0)), `UNUSED_PIN (dram_rsp_ready) ); diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index ef579d0e..7da1480f 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -226,16 +226,16 @@ module VX_bank #( wire force_miss_st0, force_miss_st1; wire dirty_st0; wire [CACHE_LINE_SIZE-1:0] dirtyb_st0, dirtyb_st1; - wire writeen_st0, writeen_st1; wire [`REQ_TAG_WIDTH-1:0] tag_st0, tag_st1; wire mem_rw_st0, mem_rw_st1; wire [WORD_SIZE-1:0] byteen_st0, byteen_st1; wire [`REQS_BITS-1:0] req_tid_st0, req_tid_st1; - wire core_req_hit_st0, core_req_hit_st1; wire do_writeback_st0, do_writeback_st1; - wire mshr_push_st0, mshr_push_st1; - wire crsq_push_st0, crsq_push_st1; - wire dreq_push_st0, dreq_push_st1; + wire writeen_unqual_st0, writeen_unqual_st1; + wire mshr_push_unqual_st0, mshr_push_unqual_st1; + wire dreq_push_unqual_st0, dreq_push_unqual_st1; + wire writeen_st1; + wire core_req_hit_st1; wire valid_st01; wire writeen_st01; @@ -351,24 +351,19 @@ if (DRAM_ENABLE) begin // force miss to ensure commit order when a new request has pending previous requests to same block assign force_miss_st0 = !is_mshr_st0 && !is_fill_st0 && mshr_pending_hazard_st0; - - assign core_req_hit_st0 = !is_fill_st0 && !miss_st0 && !force_miss_st0; - assign writeen_st0 = (core_req_hit_st0 && mem_rw_st0) - || (is_fill_st0 && !is_redundant_fill); + assign writeen_unqual_st0 = (!is_fill_st0 && !miss_st0 && mem_rw_st0) + || (is_fill_st0 && !is_redundant_fill); - wire send_fill_req_st0 = !is_fill_st0 && miss_st0 && !force_miss_st0 + wire send_fill_req_st0 = !is_fill_st0 && miss_st0 && !(WRITE_THROUGH && mem_rw_st0); assign do_writeback_st0 = (WRITE_THROUGH && !is_fill_st0 && mem_rw_st0) || (!WRITE_THROUGH && is_fill_st0 && dirty_st0 && !is_redundant_fill); - assign dreq_push_st0 = send_fill_req_st0 || do_writeback_st0; + assign dreq_push_unqual_st0 = send_fill_req_st0 || do_writeback_st0; - assign mshr_push_st0 = !is_fill_st0 && (miss_st0 || force_miss_st0) - && !(WRITE_THROUGH && mem_rw_st0); - - assign crsq_push_st0 = core_req_hit_st0 && !mem_rw_st0; + assign mshr_push_unqual_st0 = !is_fill_st0 && !(WRITE_THROUGH && mem_rw_st0); end else begin @@ -390,29 +385,37 @@ end else begin assign writeword_st01 = writeword_st0; assign tag_st01 = tag_st0; - assign writeen_st0 = mem_rw_st0; - assign miss_st0 = 0; - assign dirty_st0 = 0; - assign force_miss_st0 = 0; - assign readtag_st0 = 0; - assign core_req_hit_st0 = 1; - assign do_writeback_st0 = 0; - assign dreq_push_st0 = 0; - assign mshr_push_st0 = 0; - assign crsq_push_st0 = !mem_rw_st0; + assign miss_st0 = 0; + assign dirty_st0 = 0; + assign force_miss_st0 = 0; + assign readtag_st0 = 0; + assign do_writeback_st0 = 0; + assign writeen_unqual_st0 = mem_rw_st0; + assign dreq_push_unqual_st0 = 0; + assign mshr_push_unqual_st0 = 0; end VX_pipe_register #( - .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + 1 + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), + .DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_BITS) + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH + `WORD_WIDTH + `TAG_SELECT_BITS + `CACHE_LINE_WIDTH + 1 + WORD_SIZE + `REQS_BITS + `REQ_TAG_WIDTH), .RESETW (1) ) pipe_reg ( .clk (clk), .reset (reset), .enable (!pipeline_stall), - .data_in ({valid_st0, mshr_push_st0, crsq_push_st0, dreq_push_st0, do_writeback_st0, core_req_hit_st0, is_mshr_st0, writeen_st0, force_miss_st0, is_fill_st0, addr_st0, wsel_st0, dirtyb_st0, readdata_st0, writeword_st0, readtag_st0, miss_st0, filldata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}), - .data_out ({valid_st1, mshr_push_st1, crsq_push_st1, dreq_push_st1, do_writeback_st1, core_req_hit_st1, is_mshr_st1, writeen_st1, force_miss_st1, is_fill_st1, addr_st1, wsel_st1, dirtyb_st1, readdata_st1, writeword_st1, readtag_st1, miss_st1, filldata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}) + .data_in ({valid_st0, is_mshr_st0, is_fill_st0, writeen_unqual_st0, mshr_push_unqual_st0, dreq_push_unqual_st0, do_writeback_st0, miss_st0, force_miss_st0, addr_st0, wsel_st0, dirtyb_st0, readdata_st0, writeword_st0, readtag_st0, filldata_st0, mem_rw_st0, byteen_st0, req_tid_st0, tag_st0}), + .data_out ({valid_st1, is_mshr_st1, is_fill_st1, writeen_unqual_st1, mshr_push_unqual_st1, dreq_push_unqual_st1, do_writeback_st1, miss_st1, force_miss_st1, addr_st1, wsel_st1, dirtyb_st1, readdata_st1, writeword_st1, readtag_st1, filldata_st1, mem_rw_st1, byteen_st1, req_tid_st1, tag_st1}) ); + assign core_req_hit_st1 = !is_fill_st1 && !miss_st1 && !force_miss_st1; + + assign writeen_st1 = writeen_unqual_st1 && (is_fill_st1 || !force_miss_st1); + + wire dreq_push_st1 = dreq_push_unqual_st1 && (do_writeback_st1 || !force_miss_st1); + + wire mshr_push_st1 = mshr_push_unqual_st1 && (miss_st1 || force_miss_st1); + + wire crsq_push_st1 = core_req_hit_st1 && !mem_rw_st1; + `ifdef DBG_CACHE_REQ_INFO if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin assign {debug_pc_st01, debug_wid_st01} = tag_st01[CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS]; diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index f1024d74..eb868aee 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -4,9 +4,9 @@ module VX_cache #( parameter CACHE_ID = 0, // Size of cache in bytes - parameter CACHE_SIZE = 8092, + parameter CACHE_SIZE = 16384, // Size of line inside a bank in bytes - parameter CACHE_LINE_SIZE = 16, + parameter CACHE_LINE_SIZE = 64, // Number of banks parameter NUM_BANKS = 4, // Size of a word in bytes @@ -17,7 +17,7 @@ module VX_cache #( // Core Request Queue Size parameter CREQ_SIZE = 4, // Miss Reserv Queue Knob - parameter MSHR_SIZE = 8, + parameter MSHR_SIZE = 16, // DRAM Response Queue Size parameter DRSQ_SIZE = 4, @@ -39,7 +39,7 @@ module VX_cache #( parameter CORE_TAG_WIDTH = $clog2(MSHR_SIZE), // size of tag id in core request tag - parameter CORE_TAG_ID_BITS = 0, + parameter CORE_TAG_ID_BITS = CORE_TAG_WIDTH, // dram request tag size parameter DRAM_TAG_WIDTH = `LOG2UP(NUM_BANKS), diff --git a/hw/syn/quartus/top16/Makefile b/hw/syn/quartus/top16/Makefile new file mode 100644 index 00000000..3583a832 --- /dev/null +++ b/hw/syn/quartus/top16/Makefile @@ -0,0 +1,76 @@ +FAMILY = "Arria 10" +DEVICE = 10AX115N3F40E2SG +FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 + +#FAMILY = "Stratix 10" +#DEVICE = 1SX280HN2F43E2VG +#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10 + +PROJECT = vortex_afu +TOP_LEVEL_ENTITY = vortex_afu +SRC_FILE = vortex_afu.sv +FPU_INCLUDE = ../../../rtl/fp_cores;$(FPU_CORE_PATH);../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src +RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip +PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf + +# Executable Configuration +SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1 +FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on +ASM_ARGS = +STA_ARGS = --parallel --do_report_timing + +# Build targets +all: $(PROJECT).sta.rpt + +syn: $(PROJECT).syn.rpt + +fit: $(PROJECT).fit.rpt + +asm: $(PROJECT).asm.rpt + +sta: $(PROJECT).sta.rpt + +smart: smart.log + +# Target implementations +STAMP = echo done > + +$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES) + quartus_syn $(PROJECT) $(SYN_ARGS) + $(STAMP) fit.chg + +$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt + quartus_fit $(PROJECT) $(FIT_ARGS) + $(STAMP) asm.chg + $(STAMP) sta.chg + +$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt + quartus_asm $(PROJECT) $(ASM_ARGS) + +$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt + quartus_sta $(PROJECT) $(STA_ARGS) + +smart.log: $(PROJECT_FILES) + quartus_sh --determine_smart_action $(PROJECT) > smart.log + +# Project initialization +$(PROJECT_FILES): + quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=4" + +syn.chg: + $(STAMP) syn.chg + +fit.chg: + $(STAMP) fit.chg + +sta.chg: + $(STAMP) sta.chg + +asm.chg: + $(STAMP) asm.chg + +program: $(PROJECT).sof + quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof" + +clean: + rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox diff --git a/hw/syn/quartus/top8/Makefile b/hw/syn/quartus/top8/Makefile index 71d1bd39..bdcc673d 100644 --- a/hw/syn/quartus/top8/Makefile +++ b/hw/syn/quartus/top8/Makefile @@ -1,18 +1,18 @@ +FAMILY = "Arria 10" +DEVICE = 10AX115N3F40E2SG +FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10 + +#FAMILY = "Stratix 10" +#DEVICE = 1SX280HN2F43E2VG +#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10 + PROJECT = vortex_afu TOP_LEVEL_ENTITY = vortex_afu SRC_FILE = vortex_afu.sv -FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera/stratix10;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src +FPU_INCLUDE = ../../../rtl/fp_cores;$(FPU_CORE_PATH);../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache;../../../rtl/afu;../../../rtl/afu/ccip PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf -# Part, Family - -FAMILY = "Stratix 10" -DEVICE = 1SX280HN2F43E2VG - -#FAMILY = "Arria 10" -#DEVICE = 10AX115N3F40E2SG - # Executable Configuration SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1 FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on