diff --git a/ci/blackbox.sh b/ci/blackbox.sh index 7b8f01cb..ef7a8c39 100755 --- a/ci/blackbox.sh +++ b/ci/blackbox.sh @@ -89,6 +89,14 @@ case $DRIVER in DRIVER_PATH=driver/opae DRIVER_EXTRA=vlsim ;; + asesim) + DRIVER_PATH=driver/opae + DRIVER_EXTRA=asesim + ;; + fpga) + DRIVER_PATH=driver/opae + DRIVER_EXTRA=fpga + ;; *) echo "invalid driver: $DRIVER" exit -1 diff --git a/driver/opae/Makefile b/driver/opae/Makefile index 2af34539..c0135603 100644 --- a/driver/opae/Makefile +++ b/driver/opae/Makefile @@ -36,6 +36,10 @@ ASE_DIR = ase VLSIM_DIR = vlsim +RTL_DIR=../../hw/rtl + +SCRIPT_DIR=../../hw/scripts + PROJECT = libvortex.so PROJECT_ASE = $(ASE_DIR)/libvortex.so @@ -50,7 +54,8 @@ SRCS = vortex.cpp ../common/vx_utils.cpp ifdef SCOPE CXXFLAGS += -DSCOPE SRCS += vx_scope.cpp - SET_SCOPE = SCOPE=1 + SCOPE_ENABLE = SCOPE=1 + SCOPE_H = scope-defs.h endif all: vlsim @@ -59,7 +64,16 @@ all: vlsim json: ../../hw/opae/vortex_afu.json afu_json_mgr json-info --afu-json=$^ --c-hdr=$@ -fpga: $(SRCS) +scope-defs.h: $(SCRIPT_DIR)/scope.json + $(SCRIPT_DIR)/scope.py $(RTL_INCLUDE) $(CONFIGS) -cc scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json + +# generate scope data +scope: scope-defs.h + +vlsim-hw: $(SCOPE_H) + $(SCOPE_ENABLE) $(MAKE) -C vlsim + +fpga: $(SRCS) $(SCOPE_H) $(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT) asesim: $(SRCS) $(ASE_DIR) @@ -68,9 +82,6 @@ asesim: $(SRCS) $(ASE_DIR) vlsim: $(SRCS) vlsim-hw $(CXX) $(CXXFLAGS) -DUSE_VLSIM $(SRCS) $(LDFLAGS) -L./vlsim $(VLSIM_LIBS) -o $(PROJECT_VLSIM) -vlsim-hw: - $(SET_SCOPE) $(MAKE) -C vlsim - vortex.o: vortex.cpp $(CXX) $(CXXFLAGS) -c vortex.cpp -o $@ diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index ebcce532..07e31069 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -40,8 +40,6 @@ TOP = vortex_afu_shim RTL_DIR=../../../hw/rtl -SCRIPT_DIR=../../../hw/scripts - SRCS = fpga.cpp opae_sim.cpp SRCS += $(RTL_DIR)/fp_cores/svdpi/float_dpi.cpp @@ -70,7 +68,6 @@ endif ifdef SCOPE VL_FLAGS += -DSCOPE CFLAGS += -DSCOPE - SCOPE_VH = $(RTL_DIR)/scope-defs.vh endif # use our OPAE shim @@ -85,14 +82,8 @@ RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip PROJECT = libopae-c-vlsim.so all: $(PROJECT) - -# generate scope data -scope: $(RTL_DIR)/scope-defs.vh - -$(RTL_DIR)/scope-defs.vh: $(SCRIPT_DIR)/scope.json - $(SCRIPT_DIR)/scope.py $(RTL_INCLUDE) $(CONFIGS) -cc ../scope-defs.h -vl $(RTL_DIR)/scope-defs.vh $(SCRIPT_DIR)/scope.json -$(PROJECT): $(SRCS) $(SCOPE_VH) +$(PROJECT): $(SRCS) verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) make -j -C obj_dir -f V$(TOP).mk diff --git a/driver/opae/vx_scope.cpp b/driver/opae/vx_scope.cpp index 65320c0d..b40d74ee 100644 --- a/driver/opae/vx_scope.cpp +++ b/driver/opae/vx_scope.cpp @@ -57,7 +57,7 @@ static std::thread g_timeout_thread; static std::mutex g_timeout_mutex; static void timeout_callback(fpga_handle fpga) { - std::this_thread::sleep_for(std::chrono::seconds{60}); + std::this_thread::sleep_for(std::chrono::seconds{HANG_TIMEOUT}); vx_scope_stop(fpga, HANG_TIMEOUT); fpgaClose(fpga); exit(0); @@ -109,7 +109,7 @@ int vx_scope_start(fpga_handle hfpga, uint64_t delay) { // set start delay uint64_t cmd_delay = ((delay << 3) | CMD_SET_DELAY); CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_delay)); - std::cout << "scope start delay: " << delay << std::endl; + std::cout << "scope start delay: " << std::dec << delay << "s" << std::endl; } #ifdef HANG_TIMEOUT @@ -133,9 +133,11 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { // stop recording uint64_t cmd_stop = ((delay << 3) | CMD_SET_STOP); CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, cmd_stop)); - std::cout << "scope stop delay: " << delay << std::endl; + std::cout << "scope stop delay: " << std::dec << delay << "s" << std::endl; } + std::cout << "scope trace dump begin..." << std::endl; + std::ofstream ofs("vx_scope.vcd"); ofs << "$version Generated by Vortex Scope $end" << std::endl; @@ -146,6 +148,8 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { dump_taps(ofs, -1); ofs << "$upscope $end" << std::endl; ofs << "enddefinitions $end" << std::endl; + + std::cout << "OK" << std::flush << std::endl; uint64_t frame_width, max_frames, data_valid, offset, delta; uint64_t timestamp = 0; @@ -163,7 +167,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { std::this_thread::sleep_for(std::chrono::seconds(1)); } while (true); - std::cout << "scope trace dump begin..." << std::endl; + std::cout << "OK" << std::flush << std::endl; // get frame width CHECK_RES(fpgaWriteMMIO64(hfpga, 0, MMIO_SCOPE_WRITE, CMD_GET_WIDTH)); @@ -235,7 +239,7 @@ int vx_scope_stop(fpga_handle hfpga, uint64_t delay) { signal_id = num_taps; if (0 == (frame_no % FRAME_FLUSH_SIZE)) { ofs << std::flush; - std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::endl; + std::cout << "*** " << frame_no << " frames, timestamp=" << timestamp << std::flush << std::endl; } } } diff --git a/hw/opae/VX_avs_wrapper.v b/hw/opae/VX_avs_wrapper.v index c24ae25a..ca814e47 100644 --- a/hw/opae/VX_avs_wrapper.v +++ b/hw/opae/VX_avs_wrapper.v @@ -1,4 +1,4 @@ -`include "VX_platform.vh" +`include "VX_define.vh" module VX_avs_wrapper #( parameter AVS_DATAW = 1, diff --git a/hw/opae/sources_1c.txt b/hw/opae/sources_1c.txt index 5c2c735f..a95fab4c 100644 --- a/hw/opae/sources_1c.txt +++ b/hw/opae/sources_1c.txt @@ -3,21 +3,21 @@ +define+SYNTHESIS +define+QUARTUS +define+FPU_FAST -#+define+SCOPE ++define+SCOPE -#+define+DBG_PRINT_CORE_ICACHE -#+define+DBG_PRINT_CORE_DCACHE -#+define+DBG_PRINT_CACHE_BANK -#+define+DBG_PRINT_CACHE_SNP -#+define+DBG_PRINT_CACHE_MSRQ -#+define+DBG_PRINT_CACHE_TAG -#+define+DBG_PRINT_CACHE_DATA -#+define+DBG_PRINT_DRAM -#+define+DBG_PRINT_PIPELINE -#+define+DBG_PRINT_OPAE -#+define+DBG_PRINT_AVS -#+define+DBG_PRINT_SCOPE -#+define+DBG_CACHE_REQ_INFO ++define+DBG_PRINT_CORE_ICACHE ++define+DBG_PRINT_CORE_DCACHE ++define+DBG_PRINT_CACHE_BANK ++define+DBG_PRINT_CACHE_SNP ++define+DBG_PRINT_CACHE_MSRQ ++define+DBG_PRINT_CACHE_TAG ++define+DBG_PRINT_CACHE_DATA ++define+DBG_PRINT_DRAM ++define+DBG_PRINT_PIPELINE ++define+DBG_PRINT_OPAE ++define+DBG_PRINT_AVS ++define+DBG_PRINT_SCOPE ++define+DBG_CACHE_REQ_INFO vortex_afu.json QI:vortex_afu.qsf diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 5ff4a768..0de63bd4 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -336,7 +336,7 @@ module VX_cluster #( .NUM_REQS (`NUM_CORES), .CREQ_SIZE (`L2CREQ_SIZE), .MSHR_SIZE (`L2MSHR_SIZE), - .DRFQ_SIZE (`L2DRFQ_SIZE), + .DRPQ_SIZE (`L2DRPQ_SIZE), .SNRQ_SIZE (`L2SNRQ_SIZE), .CWBQ_SIZE (`L2CWBQ_SIZE), .DREQ_SIZE (`L2DREQ_SIZE), diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index bc7a2558..f468e172 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -218,8 +218,8 @@ `endif // DRAM Response Queue Size -`ifndef DDRFQ_SIZE -`define DDRFQ_SIZE 4 +`ifndef DDRPQ_SIZE +`define DDRPQ_SIZE 4 `endif // Snoop Response Queue Size @@ -260,8 +260,8 @@ `endif // DRAM Response Queue Size -`ifndef IDRFQ_SIZE -`define IDRFQ_SIZE 4 +`ifndef IDRPQ_SIZE +`define IDRPQ_SIZE 4 `endif // SM Configurable Knobs ////////////////////////////////////////////////////// @@ -319,8 +319,8 @@ `endif // DRAM Response Queue Size -`ifndef L2DRFQ_SIZE -`define L2DRFQ_SIZE 4 +`ifndef L2DRPQ_SIZE +`define L2DRPQ_SIZE 4 `endif // Snoop Request Queue Size @@ -366,8 +366,8 @@ `endif // DRAM Response Queue Size -`ifndef L3DRFQ_SIZE -`define L3DRFQ_SIZE 4 +`ifndef L3DRPQ_SIZE +`define L3DRPQ_SIZE 4 `endif // Snoop Request Queue Size diff --git a/hw/rtl/VX_mem_arb.v b/hw/rtl/VX_mem_arb.v index da8c66e7..b7f1f30e 100644 --- a/hw/rtl/VX_mem_arb.v +++ b/hw/rtl/VX_mem_arb.v @@ -32,16 +32,16 @@ module VX_mem_arb #( input wire req_ready_out, // input response - output wire [NUM_REQS-1:0] rsp_valid_out, - output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out, - output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_out, - input wire [NUM_REQS-1:0] rsp_ready_out, - - // output response input wire rsp_valid_in, input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in, input wire [DATA_WIDTH-1:0] rsp_data_in, - output wire rsp_ready_in + output wire rsp_ready_in, + + // output responses + output wire [NUM_REQS-1:0] rsp_valid_out, + output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out, + output wire [NUM_REQS-1:0][DATA_WIDTH-1:0] rsp_data_out, + input wire [NUM_REQS-1:0] rsp_ready_out ); localparam DATAW = TAG_OUT_WIDTH + ADDR_WIDTH + 1 + DATA_SIZE + DATA_WIDTH; diff --git a/hw/rtl/VX_mem_unit.v b/hw/rtl/VX_mem_unit.v index 1c693262..53ce9b1b 100644 --- a/hw/rtl/VX_mem_unit.v +++ b/hw/rtl/VX_mem_unit.v @@ -91,7 +91,7 @@ module VX_mem_unit # ( .NUM_REQS (`DNUM_REQUESTS), .CREQ_SIZE (`DCREQ_SIZE), .MSHR_SIZE (`DMSHR_SIZE), - .DRFQ_SIZE (`DDRFQ_SIZE), + .DRPQ_SIZE (`DDRPQ_SIZE), .SNRQ_SIZE (`DSNRQ_SIZE), .CWBQ_SIZE (`DCWBQ_SIZE), .DREQ_SIZE (`DDREQ_SIZE), @@ -164,7 +164,7 @@ module VX_mem_unit # ( .NUM_REQS (`INUM_REQUESTS), .CREQ_SIZE (`ICREQ_SIZE), .MSHR_SIZE (`IMSHR_SIZE), - .DRFQ_SIZE (`IDRFQ_SIZE), + .DRPQ_SIZE (`IDRPQ_SIZE), .SNRQ_SIZE (1), .CWBQ_SIZE (`ICWBQ_SIZE), .DREQ_SIZE (`IDREQ_SIZE), @@ -236,7 +236,7 @@ module VX_mem_unit # ( .NUM_REQS (`SNUM_REQUESTS), .CREQ_SIZE (`SCREQ_SIZE), .MSHR_SIZE (8), - .DRFQ_SIZE (1), + .DRPQ_SIZE (1), .SNRQ_SIZE (1), .CWBQ_SIZE (`SCWBQ_SIZE), .DREQ_SIZE (1), diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index cf88ed21..42fc3acc 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -339,7 +339,7 @@ module Vortex ( .NUM_REQS (`NUM_CLUSTERS), .CREQ_SIZE (`L3CREQ_SIZE), .MSHR_SIZE (`L3MSHR_SIZE), - .DRFQ_SIZE (`L3DRFQ_SIZE), + .DRPQ_SIZE (`L3DRPQ_SIZE), .SNRQ_SIZE (`L3SNRQ_SIZE), .CWBQ_SIZE (`L3CWBQ_SIZE), .DREQ_SIZE (`L3DREQ_SIZE), diff --git a/hw/rtl/cache/VX_bank.v b/hw/rtl/cache/VX_bank.v index 54b02c02..f089c6ab 100644 --- a/hw/rtl/cache/VX_bank.v +++ b/hw/rtl/cache/VX_bank.v @@ -20,7 +20,7 @@ module VX_bank #( // Miss Reserv Queue Knob parameter MSHR_SIZE = 1, // DRAM Response Queue Size - parameter DRFQ_SIZE = 1, + parameter DRPQ_SIZE = 1, // Snoop Req Queue Size parameter SNRQ_SIZE = 1, @@ -148,7 +148,7 @@ module VX_bank #( wire snrq_full; assign snp_req_ready = !snrq_full; - wire snp_req_fire = snp_req_valid && snp_req_ready; + wire snrq_push = snp_req_valid && snp_req_ready; VX_generic_queue #( .DATAW(`LINE_ADDR_WIDTH + 1 + SNP_TAG_WIDTH), @@ -157,7 +157,7 @@ module VX_bank #( ) snp_req_queue ( .clk (clk), .reset (reset), - .push (snp_req_fire), + .push (snrq_push), .pop (snrq_pop), .data_in ({snp_req_addr, snp_req_inv, snp_req_tag}), .data_out({snrq_addr_st0, snrq_inv_st0, snrq_tag_st0}), @@ -178,41 +178,41 @@ module VX_bank #( assign snp_req_ready = 0; end - wire dfpq_pop; - wire dfpq_empty; + wire drpq_pop; + wire drpq_empty; - wire [`LINE_ADDR_WIDTH-1:0] dfpq_addr_st0; - wire [`BANK_LINE_WIDTH-1:0] dfpq_filldata_st0; + wire [`LINE_ADDR_WIDTH-1:0] drpq_addr_st0; + wire [`BANK_LINE_WIDTH-1:0] drpq_filldata_st0; - wire dram_rsp_fire = dram_rsp_valid && dram_rsp_ready; + wire drpq_push = dram_rsp_valid && dram_rsp_ready; if (DRAM_ENABLE) begin - wire dfpq_full; - assign dram_rsp_ready = !dfpq_full; + wire drpq_full; + assign dram_rsp_ready = !drpq_full; VX_generic_queue #( .DATAW(`LINE_ADDR_WIDTH + $bits(dram_rsp_data)), - .SIZE(DRFQ_SIZE), + .SIZE(DRPQ_SIZE), .BUFFERED(1) - ) dfp_queue ( + ) dram_rsp_queue ( .clk (clk), .reset (reset), - .push (dram_rsp_fire), - .pop (dfpq_pop), + .push (drpq_push), + .pop (drpq_pop), .data_in ({dram_rsp_addr, dram_rsp_data}), - .data_out({dfpq_addr_st0, dfpq_filldata_st0}), - .empty (dfpq_empty), - .full (dfpq_full), + .data_out({drpq_addr_st0, drpq_filldata_st0}), + .empty (drpq_empty), + .full (drpq_full), `UNUSED_PIN (size) ); end else begin `UNUSED_VAR (dram_rsp_valid) `UNUSED_VAR (dram_rsp_addr) `UNUSED_VAR (dram_rsp_data) - assign dfpq_empty = 1; - assign dfpq_addr_st0 = 0; - assign dfpq_filldata_st0 = 0; + assign drpq_empty = 1; + assign drpq_addr_st0 = 0; + assign drpq_filldata_st0 = 0; assign dram_rsp_ready = 0; end @@ -228,21 +228,21 @@ module VX_bank #( wire [`WORD_WIDTH-1:0] creq_writeword_st0; wire [CORE_TAG_WIDTH-1:0] creq_tag_st0; - wire core_req_fire = (| core_req_valid) && core_req_ready; + wire creq_push = (| core_req_valid) && core_req_ready; assign core_req_ready = !creq_full; - VX_bank_core_req_arb #( + VX_bank_core_req_queue #( .WORD_SIZE (WORD_SIZE), .NUM_REQS (NUM_REQS), .CREQ_SIZE (CREQ_SIZE), .CORE_TAG_WIDTH (CORE_TAG_WIDTH), .CORE_TAG_ID_BITS (CORE_TAG_ID_BITS) - ) core_req_arb ( + ) core_req_queue ( .clk (clk), .reset (reset), // Enqueue - .push (core_req_fire), + .push (creq_push), .tag_in (core_req_tag), .valids_in (core_req_valid), .rw_in (core_req_rw), @@ -343,7 +343,7 @@ module VX_bank #( wire mshr_push_stall; wire cwbq_push_stall; - wire dwbq_push_stall; + wire dreq_push_stall; wire snpq_push_stall; wire pipeline_stall; @@ -356,13 +356,13 @@ module VX_bank #( // determine which queue to pop next in piority order wire mshr_pop_unqual = mshr_valid_st0; - wire dfpq_pop_unqual = !mshr_pop_unqual && !dfpq_empty; - wire creq_pop_unqual = !mshr_pop_unqual && !dfpq_pop_unqual && !creq_empty && !mshr_going_full; - wire snrq_pop_unqual = !mshr_pop_unqual && !dfpq_pop_unqual && !creq_pop_unqual && !snrq_empty && !mshr_going_full; + wire drpq_pop_unqual = !mshr_pop_unqual && !drpq_empty; + wire creq_pop_unqual = !mshr_pop_unqual && !drpq_pop_unqual && !creq_empty && !mshr_going_full; + wire snrq_pop_unqual = !mshr_pop_unqual && !drpq_pop_unqual && !creq_pop_unqual && !snrq_empty && !mshr_going_full; assign mshr_pop = mshr_pop_unqual && !pipeline_stall && !(is_mshr_miss_st2 || is_mshr_miss_st3); // stop if previous request was a miss - assign dfpq_pop = dfpq_pop_unqual && !pipeline_stall; + assign drpq_pop = drpq_pop_unqual && !pipeline_stall; assign creq_pop = creq_pop_unqual && !pipeline_stall; assign snrq_pop = snrq_pop_unqual && !pipeline_stall; @@ -377,12 +377,12 @@ module VX_bank #( end assign is_mshr_st0 = mshr_pop_unqual; - assign is_fill_st0 = dfpq_pop_unqual; + assign is_fill_st0 = drpq_pop_unqual; - assign valid_st0 = dfpq_pop || mshr_pop || creq_pop || snrq_pop; + assign valid_st0 = drpq_pop || mshr_pop || creq_pop || snrq_pop; assign addr_st0 = mshr_pop_unqual ? mshr_addr_st0 : - dfpq_pop_unqual ? dfpq_addr_st0 : + drpq_pop_unqual ? drpq_addr_st0 : creq_pop_unqual ? creq_addr_st0[`LINE_SELECT_ADDR_RNG] : snrq_pop_unqual ? snrq_addr_st0 : 0; @@ -396,7 +396,7 @@ module VX_bank #( assign wsel_st0 = 0; end - assign writedata_st0 = dfpq_filldata_st0; + assign writedata_st0 = drpq_filldata_st0; assign inst_meta_st0 = mshr_pop_unqual ? {`REQ_TAG_WIDTH'(mshr_tag_st0), mshr_rw_st0, mshr_byteen_st0, mshr_tid_st0} : creq_pop_unqual ? {`REQ_TAG_WIDTH'(creq_tag_st0), creq_rw_st0, creq_byteen_st0, creq_tid_st0} : @@ -519,7 +519,7 @@ if (DRAM_ENABLE) begin end else begin `UNUSED_VAR (mshr_pending_hazard_unqual_st0) - `UNUSED_VAR (dram_rsp_fire) + `UNUSED_VAR (drpq_push) `UNUSED_VAR (addr_st0) assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1; @@ -628,8 +628,8 @@ end wire incoming_fill_st3; // check if a matching fill request is comming - wire incoming_fill_dfp_st2 = dram_rsp_fire && (addr_st2 == dram_rsp_addr); - wire incoming_fill_st0_st2 = !dfpq_empty && (addr_st2 == dfpq_addr_st0); + wire incoming_fill_dfp_st2 = drpq_push && (addr_st2 == dram_rsp_addr); + wire incoming_fill_st0_st2 = !drpq_empty && (addr_st2 == drpq_addr_st0); wire incoming_fill_st1_st2 = is_fill_st1 && (addr_st2 == addr_st1); wire incoming_fill_st2 = incoming_fill_dfp_st2 || incoming_fill_st0_st2 @@ -678,7 +678,7 @@ end wire mshr_push = mshr_push_unqual && !cwbq_push_stall - && !dwbq_push_stall + && !dreq_push_stall && !snpq_push_stall; wire mshr_full; @@ -693,7 +693,7 @@ end wire mshr_dequeue_st3 = valid_st3 && is_mshr_st3 && !mshr_push_unqual && !pipeline_stall; // mark msrq entry that match DRAM fill as 'ready' - wire update_ready_st0 = dfpq_pop; + wire update_ready_st0 = drpq_pop; // push missed requests as 'ready' if it was a forced miss but actually had a hit // or the fill request is comming for the missed block @@ -792,7 +792,7 @@ end wire cwbq_push = cwbq_push_unqual && !cwbq_full && !mshr_push_stall - && !dwbq_push_stall + && !dreq_push_stall && !snpq_push_stall; wire cwbq_pop = core_rsp_valid && core_rsp_ready; @@ -821,62 +821,62 @@ end // Enqueue DRAM request - wire dwbq_empty, dwbq_full; + wire dreq_empty, dreq_full; - wire dwbq_push_unqual = valid_st3 && send_dwb_req_st3; + wire dreq_push_unqual = valid_st3 && send_dwb_req_st3; - assign dwbq_push_stall = dwbq_push_unqual && dwbq_full; + assign dreq_push_stall = dreq_push_unqual && dreq_full; - wire dwbq_push = dwbq_push_unqual - && !dwbq_full + wire dreq_push = dreq_push_unqual + && !dreq_full && !mshr_push_stall && !cwbq_push_stall && !snpq_push_stall; - wire dwbq_pop = dram_req_valid && dram_req_ready; + wire dreq_pop = dram_req_valid && dram_req_ready; wire writeback = WRITE_ENABLE && do_writeback_st3; - wire [`LINE_ADDR_WIDTH-1:0] dwbq_addr = writeback ? {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]} : + wire [`LINE_ADDR_WIDTH-1:0] dreq_addr = writeback ? {readtag_st3, addr_st3[`LINE_SELECT_BITS-1:0]} : addr_st3; - wire [BANK_LINE_SIZE-1:0] dwbq_byteen = writeback ? dirtyb_st3 : {BANK_LINE_SIZE{1'b1}}; + wire [BANK_LINE_SIZE-1:0] dreq_byteen = writeback ? dirtyb_st3 : {BANK_LINE_SIZE{1'b1}}; if (DRAM_ENABLE) begin VX_generic_queue #( .DATAW(1 + BANK_LINE_SIZE + `LINE_ADDR_WIDTH + `BANK_LINE_WIDTH), .SIZE(DREQ_SIZE), .BUFFERED(1) - ) dwb_queue ( + ) dram_req_queue ( .clk (clk), .reset (reset), - .push (dwbq_push), - .pop (dwbq_pop), - .data_in ({writeback, dwbq_byteen, dwbq_addr, readdata_st3}), + .push (dreq_push), + .pop (dreq_pop), + .data_in ({writeback, dreq_byteen, dreq_addr, readdata_st3}), .data_out({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}), - .empty (dwbq_empty), - .full (dwbq_full), + .empty (dreq_empty), + .full (dreq_full), `UNUSED_PIN (size) ); end else begin - `UNUSED_VAR (dwbq_push) - `UNUSED_VAR (dwbq_pop) - `UNUSED_VAR (dwbq_addr) - `UNUSED_VAR (dwbq_byteen) + `UNUSED_VAR (dreq_push) + `UNUSED_VAR (dreq_pop) + `UNUSED_VAR (dreq_addr) + `UNUSED_VAR (dreq_byteen) `UNUSED_VAR (readtag_st3) `UNUSED_VAR (dirtyb_st3) `UNUSED_VAR (readdata_st3) `UNUSED_VAR (writeback) `UNUSED_VAR (dram_req_ready) - assign dwbq_empty = 1; - assign dwbq_full = 0; + assign dreq_empty = 1; + assign dreq_full = 0; assign dram_req_rw = 0; assign dram_req_byteen = 0; assign dram_req_addr = 0; assign dram_req_data = 0; end - assign dram_req_valid = !dwbq_empty; + assign dram_req_valid = !dreq_empty; // Enqueue snoop response @@ -890,7 +890,7 @@ end && !snpq_full && !mshr_push_stall && !cwbq_push_stall - && !dwbq_push_stall; + && !dreq_push_stall; wire snpq_pop = snp_rsp_valid && snp_rsp_ready; @@ -923,12 +923,12 @@ end end assign snp_rsp_valid = !snpq_empty - && dwbq_empty; // ensure all writebacks are sent + && dreq_empty; // ensure all writebacks are sent // bank pipeline stall assign pipeline_stall = mshr_push_stall || cwbq_push_stall - || dwbq_push_stall + || dreq_push_stall || snpq_push_stall; `SCOPE_ASSIGN (valid_st0, valid_st0); @@ -949,17 +949,17 @@ end `SCOPE_ASSIGN (addr_st3, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID)); `ifdef DBG_PRINT_CACHE_BANK - wire incoming_fill_dfp_st3 = dram_rsp_fire && (addr_st3 == dram_rsp_addr); + wire incoming_fill_dfp_st3 = drpq_push && (addr_st3 == dram_rsp_addr); always @(posedge clk) begin if (valid_st3 && miss_st3 && (incoming_fill_st3 || incoming_fill_dfp_st3)) begin $display("%t: incoming fill - addr=%0h, st3=%b, dfp=%b", $time, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), incoming_fill_st3, incoming_fill_dfp_st3); assert(!is_mshr_st3); end if (pipeline_stall) begin - $display("%t: cache%0d:%0d pipeline-stall: msrq=%b, cwbq=%b, dwbq=%b, snpq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, cwbq_push_stall, dwbq_push_stall, snpq_push_stall); + $display("%t: cache%0d:%0d pipeline-stall: msrq=%b, cwbq=%b, dwbq=%b, snpq=%b", $time, CACHE_ID, BANK_ID, mshr_push_stall, cwbq_push_stall, dreq_push_stall, snpq_push_stall); end - if (dfpq_pop) begin - $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), dfpq_filldata_st0); + if (drpq_pop) begin + $display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), drpq_filldata_st0); end if (creq_pop) begin if (creq_rw_st0) @@ -973,11 +973,11 @@ end if (cwbq_push) begin $display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), cwbq_tag_st3, cwbq_tid_st3, cwbq_data_st3, debug_wid_st3, debug_pc_st3); end - if (dwbq_push) begin + if (dreq_push) begin if (do_writeback_st3) - $display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_addr, BANK_ID), readdata_st3, dirtyb_st3, debug_wid_st3, debug_pc_st3); + $display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), readdata_st3, dirtyb_st3, debug_wid_st3, debug_pc_st3); else - $display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dwbq_addr, BANK_ID), debug_wid_st3, debug_pc_st3); + $display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), debug_wid_st3, debug_pc_st3); end if (snpq_push) begin $display("%t: cache%0d:%0d snp-rsp: addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st3, BANK_ID), snpq_tag_st3); diff --git a/hw/rtl/cache/VX_bank_core_req_arb.v b/hw/rtl/cache/VX_bank_core_req_queue.v similarity index 99% rename from hw/rtl/cache/VX_bank_core_req_arb.v rename to hw/rtl/cache/VX_bank_core_req_queue.v index 42d1b72f..4fd6ae66 100644 --- a/hw/rtl/cache/VX_bank_core_req_arb.v +++ b/hw/rtl/cache/VX_bank_core_req_queue.v @@ -1,6 +1,6 @@ `include "VX_cache_config.vh" -module VX_bank_core_req_arb #( +module VX_bank_core_req_queue #( // Size of a word in bytes parameter WORD_SIZE = 1, // Number of Word requests per cycle diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 8b7f06ad..84836ec4 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -19,7 +19,7 @@ module VX_cache #( // Miss Reserv Queue Knob parameter MSHR_SIZE = 8, // DRAM Response Queue Size - parameter DRFQ_SIZE = 4, + parameter DRPQ_SIZE = 4, // Snoop Req Queue Size parameter SNRQ_SIZE = 4, @@ -265,7 +265,7 @@ module VX_cache #( .NUM_REQS (NUM_REQS), .CREQ_SIZE (CREQ_SIZE), .MSHR_SIZE (MSHR_SIZE), - .DRFQ_SIZE (DRFQ_SIZE), + .DRPQ_SIZE (DRPQ_SIZE), .SNRQ_SIZE (SNRQ_SIZE), .CWBQ_SIZE (CWBQ_SIZE), .DREQ_SIZE (DREQ_SIZE), diff --git a/hw/rtl/cache/VX_cache_config.vh b/hw/rtl/cache/VX_cache_config.vh index f706c6bd..ff90463f 100644 --- a/hw/rtl/cache/VX_cache_config.vh +++ b/hw/rtl/cache/VX_cache_config.vh @@ -80,6 +80,6 @@ `define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))} -`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)} +`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)} `endif diff --git a/hw/rtl/cache/VX_cache_core_rsp_merge.v b/hw/rtl/cache/VX_cache_core_rsp_merge.v index 1cba1f76..316e5d14 100644 --- a/hw/rtl/cache/VX_cache_core_rsp_merge.v +++ b/hw/rtl/cache/VX_cache_core_rsp_merge.v @@ -90,8 +90,7 @@ module VX_cache_core_rsp_merge #( VX_generic_register #( .N(NUM_REQS + (NUM_REQS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)), - .R(NUM_REQS), - .PASSTHRU(NUM_BANKS < 4) + .R(NUM_REQS) ) pipe_reg ( .clk (clk), .reset (reset), diff --git a/hw/rtl/cache/VX_snp_forwarder.v b/hw/rtl/cache/VX_snp_forwarder.v index c96ef798..9ca5cbf4 100644 --- a/hw/rtl/cache/VX_snp_forwarder.v +++ b/hw/rtl/cache/VX_snp_forwarder.v @@ -195,6 +195,14 @@ module VX_snp_forwarder #( .ready_out (fwdin_ready) ); + `ifdef DBG_PRINT_CACHE_SNP + always @(posedge clk) begin + if (fwdin_valid && fwdin_ready) begin + $display("%t: cache%0d snp-fwd-in: tag=%0h", $time, CACHE_ID, fwdin_tag); + end + end + `endif + end else begin `UNUSED_VAR (clk) @@ -222,9 +230,6 @@ module VX_snp_forwarder #( if (snp_fwdout_valid[0] && snp_fwdout_ready[0]) begin $display("%t: cache%0d snp-fwd-out: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, `TO_FULL_ADDR(snp_fwdout_addr[0]), snp_fwdout_inv[0], snp_fwdout_tag[0]); end - if (fwdin_valid && fwdin_ready) begin - $display("%t: cache%0d snp-fwd-in: tag=%0h", $time, CACHE_ID, fwdin_tag); - end if (snp_rsp_valid && snp_rsp_ready) begin $display("%t: cache%0d snp-fwd-rsp: addr=%0h, invalidate=%0d, tag=%0h", $time, CACHE_ID, snp_rsp_addr, snp_rsp_inv, snp_rsp_tag); end diff --git a/hw/syn/quartus/project.tcl b/hw/syn/quartus/project.tcl index 0e85bf48..c1e03adc 100644 --- a/hw/syn/quartus/project.tcl +++ b/hw/syn/quartus/project.tcl @@ -40,22 +40,22 @@ set_global_assignment -name VERILOG_MACRO NDEBUG set_global_assignment -name MESSAGE_DISABLE 16818 set_global_assignment -name VERILOG_MACRO FPU_FAST -set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 -set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 -set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)" -set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON -set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON -set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON -set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON -set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON -set_global_assignment -name POWER_USE_TA_VALUE 65 -set_global_assignment -name SEED 1 -set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON -set_global_assignment -name FITTER_EFFORT "STANDARD FIT" -set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" -set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED -set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM -set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" +#set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 +#set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 +#set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)" +#set_global_assignment -name ROUTER_CLOCKING_TOPOLOGY_ANALYSIS ON +#set_global_assignment -name ROUTER_LCELL_INSERTION_AND_LOGIC_DUPLICATION ON +#set_global_assignment -name TIMEQUEST_DO_CCPP_REMOVAL ON +#set_global_assignment -name SYNTH_TIMING_DRIVEN_SYNTHESIS ON +#set_global_assignment -name TIMEQUEST_MULTICORNER_ANALYSIS ON +#set_global_assignment -name POWER_USE_TA_VALUE 65 +#set_global_assignment -name SEED 1 +#set_global_assignment -name OPTIMIZE_MULTI_CORNER_TIMING ON +#set_global_assignment -name FITTER_EFFORT "STANDARD FIT" +#set_global_assignment -name OPTIMIZE_HOLD_TIMING "ALL PATHS" +#set_global_assignment -name OPTIMIZATION_TECHNIQUE SPEED +#set_global_assignment -name ROUTER_TIMING_OPTIMIZATION_LEVEL MAXIMUM +#set_global_assignment -name OPTIMIZATION_MODE "AGGRESSIVE PERFORMANCE" set idx 0 foreach arg $q_args_orig {