diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index eba1a7c5..d6f018df 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -246,8 +246,12 @@ extern int vx_ready_wait(vx_device_h hdevice, long long timeout) { for (;;) { CHECK_RES(fpgaReadMMIO64(device->fpga, 0, MMIO_CSR_STATUS, &data)); - if (0 == data || 0 == timeout) + if (0 == data || 0 == timeout) { + if (data != 0) { + fprintf(stdout, "ready-wait timed out: status=%ld\n", data); + } break; + } nanosleep(&sleep_time, nullptr); timeout -= sleep_time_ms; }; diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 7b75973f..664af3ca 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -7,7 +7,7 @@ vortex_afu.json +define+NUM_CORES=2 +define+NUM_WARPS=4 +define+NUM_THREADS=4 -+define+L2_ENABLE=0 ++define+L2_ENABLE=1 +define+DNUM_BANKS=4 +define+INUM_BANKS=1 diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index bee56a25..1c3628e7 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -12,7 +12,7 @@ `endif `ifndef NUM_WARPS -`define NUM_WARPS 8 +`define NUM_WARPS 4 `endif `ifndef NUM_THREADS @@ -87,7 +87,7 @@ // Number of banks {1, 2, 4, 8,...} `ifndef DNUM_BANKS -`define DNUM_BANKS 8 +`define DNUM_BANKS 4 `endif // Size of a word in bytes @@ -107,12 +107,12 @@ // Miss Reserv Queue Knob `ifndef DMRVQ_SIZE -`define DMRVQ_SIZE (`NUM_WARPS*`NUM_THREADS) +`define DMRVQ_SIZE `MAX(`NUM_WARPS*`NUM_THREADS, 8) `endif // Dram Fill Rsp Queue Size `ifndef DDFPQ_SIZE -`define DDFPQ_SIZE 32 +`define DDFPQ_SIZE 16 `endif // Snoop Req Queue Size @@ -137,7 +137,7 @@ // Prefetcher `ifndef DPRFQ_SIZE -`define DPRFQ_SIZE 32 +`define DPRFQ_SIZE 16 `endif `ifndef DPRFQ_STRIDE @@ -178,12 +178,12 @@ // Miss Reserv Queue Knob `ifndef IMRVQ_SIZE -`define IMRVQ_SIZE `ICREQ_SIZE +`define IMRVQ_SIZE `MAX(`ICREQ_SIZE, 8) `endif // Dram Fill Rsp Queue Size `ifndef IDFPQ_SIZE -`define IDFPQ_SIZE 32 +`define IDFPQ_SIZE 16 `endif // Core Writeback Queue Size @@ -203,7 +203,7 @@ // Prefetcher `ifndef IPRFQ_SIZE -`define IPRFQ_SIZE 32 +`define IPRFQ_SIZE 16 `endif `ifndef IPRFQ_STRIDE @@ -276,17 +276,17 @@ // Core Request Queue Size `ifndef L2CREQ_SIZE -`define L2CREQ_SIZE 32 +`define L2CREQ_SIZE 16 `endif // Miss Reserv Queue Knob `ifndef L2MRVQ_SIZE -`define L2MRVQ_SIZE 32 +`define L2MRVQ_SIZE `MAX(`L2CREQ_SIZE, 8) `endif // Dram Fill Rsp Queue Size `ifndef L2DFPQ_SIZE -`define L2DFPQ_SIZE 32 +`define L2DFPQ_SIZE 16 `endif // Snoop Req Queue Size @@ -311,7 +311,7 @@ // Prefetcher `ifndef L2PRFQ_SIZE -`define L2PRFQ_SIZE 32 +`define L2PRFQ_SIZE 16 `endif `ifndef L2PRFQ_STRIDE @@ -347,17 +347,17 @@ // Core Request Queue Size `ifndef L3CREQ_SIZE -`define L3CREQ_SIZE 32 +`define L3CREQ_SIZE 16 `endif // Miss Reserv Queue Knob `ifndef L3MRVQ_SIZE -`define L3MRVQ_SIZE `L3CREQ_SIZE +`define L3MRVQ_SIZE `MAX(`L3CREQ_SIZE, 8) `endif // Dram Fill Rsp Queue Size `ifndef L3DFPQ_SIZE -`define L3DFPQ_SIZE 32 +`define L3DFPQ_SIZE 16 `endif // Snoop Req Queue Size @@ -382,7 +382,7 @@ // Prefetcher `ifndef L3PRFQ_SIZE -`define L3PRFQ_SIZE 32 +`define L3PRFQ_SIZE 16 `endif `ifndef L3PRFQ_STRIDE diff --git a/hw/rtl/VX_dmem_ctrl.v b/hw/rtl/VX_dmem_ctrl.v index b7ecf5eb..c34bd055 100644 --- a/hw/rtl/VX_dmem_ctrl.v +++ b/hw/rtl/VX_dmem_ctrl.v @@ -60,13 +60,13 @@ module VX_dmem_ctrl # ( .NUM_REQUESTS (`SNUM_REQUESTS), .STAGE_1_CYCLES (`SSTAGE_1_CYCLES), .CREQ_SIZE (`SCREQ_SIZE), - .MRVQ_SIZE (1), - .DFPQ_SIZE (0), - .SNRQ_SIZE (0), + .MRVQ_SIZE (8), + .DFPQ_SIZE (1), + .SNRQ_SIZE (1), .CWBQ_SIZE (`SCWBQ_SIZE), - .DWBQ_SIZE (0), - .DFQQ_SIZE (0), - .PRFQ_SIZE (0), + .DWBQ_SIZE (1), + .DFQQ_SIZE (1), + .PRFQ_SIZE (1), .PRFQ_STRIDE (0), .SNOOP_FORWARDING (0), .DRAM_ENABLE (0), @@ -223,7 +223,7 @@ module VX_dmem_ctrl # ( .CREQ_SIZE (`ICREQ_SIZE), .MRVQ_SIZE (`IMRVQ_SIZE), .DFPQ_SIZE (`IDFPQ_SIZE), - .SNRQ_SIZE (0), + .SNRQ_SIZE (1), .CWBQ_SIZE (`ICWBQ_SIZE), .DWBQ_SIZE (`IDWBQ_SIZE), .DFQQ_SIZE (`IDFQQ_SIZE), diff --git a/hw/rtl/cache/VX_cache.v b/hw/rtl/cache/VX_cache.v index 64e31d28..59fbcd2f 100644 --- a/hw/rtl/cache/VX_cache.v +++ b/hw/rtl/cache/VX_cache.v @@ -125,13 +125,13 @@ module VX_cache #( `DEBUG_BLOCK( wire[31:0] debug_core_req_use_pc; - wire[1:0] debug_core_req_wb; - wire[2:0] debug_core_req_rmask; + wire[1:0] debug_core_req_wb; wire[4:0] debug_core_req_rd; wire[`NW_BITS-1:0] debug_core_req_warp_num; + wire[`LOG2UP(CREQ_SIZE)-1:0] debug_core_req_idx; if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin - assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rmask, debug_core_req_rd, debug_core_req_warp_num} = core_req_tag[0]; + assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_warp_num, debug_core_req_idx} = core_req_tag[0]; end ) wire [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid; diff --git a/hw/rtl/cache/VX_cache_miss_resrv.v b/hw/rtl/cache/VX_cache_miss_resrv.v index 23631381..2636b344 100644 --- a/hw/rtl/cache/VX_cache_miss_resrv.v +++ b/hw/rtl/cache/VX_cache_miss_resrv.v @@ -64,8 +64,10 @@ module VX_cache_miss_resrv #( reg [`LOG2UP(MRVQ_SIZE+1)-1:0] size; + `STATIC_ASSERT(MRVQ_SIZE > 5, "invalid size"); + assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE)); - assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); + assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-1)); wire enqueue_possible = !miss_resrv_full; wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; diff --git a/hw/rtl/libs/VX_generic_queue.v b/hw/rtl/libs/VX_generic_queue.v index bb7f4639..5176685c 100644 --- a/hw/rtl/libs/VX_generic_queue.v +++ b/hw/rtl/libs/VX_generic_queue.v @@ -15,166 +15,150 @@ module VX_generic_queue #( output wire full, output wire [`LOG2UP(SIZE+1)-1:0] size ); - `STATIC_ASSERT(0 == SIZE || `ISPOW2(SIZE), "must be 0 or power of 2!"); + `STATIC_ASSERT(`ISPOW2(SIZE), "must be 0 or power of 2!"); - if (SIZE == 0) begin + reg [`LOG2UP(SIZE+1)-1:0] size_r; + wire reading; + wire writing; - assign empty = 1; - assign data_out = 0; - assign full = 0; - assign size = 0; + assign reading = pop && !empty; + assign writing = push && !full; - `UNUSED_VAR (clk) - `UNUSED_VAR (reset) - `UNUSED_VAR (push) - `UNUSED_VAR (pop) - `UNUSED_VAR (data_in) + if (SIZE == 1) begin // (SIZE == 1) + + reg [DATAW-1:0] head_r; + + always @(posedge clk) begin + if (reset) begin + head_r <= 0; + size_r <= 0; + end else begin + if (writing && !reading) begin + size_r <= 1; + end else if (reading && !writing) begin + size_r <= 0; + end + + if (writing) begin + head_r <= data_in; + end + end + end + + assign data_out = head_r; + assign empty = (size_r == 0); + assign full = (size_r != 0); + assign size = size_r; + + end else begin // (SIZE > 1) - end else begin // (SIZE > 0) - `ifdef QUEUE_FORCE_MLAB (* syn_ramstyle = "mlab" *) reg [DATAW-1:0] data [SIZE-1:0]; `else reg [DATAW-1:0] data [SIZE-1:0]; `endif - reg [`LOG2UP(SIZE+1)-1:0] size_r; - wire reading; - wire writing; + if (0 == BUFFERED_OUTPUT) begin - assign reading = pop && !empty; - assign writing = push && !full; + reg [`LOG2UP(SIZE):0] wr_ptr_r; + reg [`LOG2UP(SIZE):0] rd_ptr_r; - if (SIZE == 1) begin // (SIZE == 1) - - reg [DATAW-1:0] head_r; + wire [`LOG2UP(SIZE)-1:0] wr_ptr_a = wr_ptr_r[`LOG2UP(SIZE)-1:0]; + wire [`LOG2UP(SIZE)-1:0] rd_ptr_a = rd_ptr_r[`LOG2UP(SIZE)-1:0]; always @(posedge clk) begin if (reset) begin - head_r <= 0; - size_r <= 0; + rd_ptr_r <= 0; + wr_ptr_r <= 0; + size_r <= 0; end else begin - if (writing && !reading) begin - size_r <= 1; - end else if (reading && !writing) begin - size_r <= 0; + if (writing) begin + data[wr_ptr_a] <= data_in; + wr_ptr_r <= wr_ptr_r + 1; + + if (!reading) begin + size_r <= size_r + 1; + end end - if (writing) begin - head_r <= data_in; + if (reading) begin + rd_ptr_r <= rd_ptr_r + 1; + if (!writing) begin + size_r <= size_r - 1; + end end + end + end + + assign data_out = data[rd_ptr_a]; + assign empty = (wr_ptr_r == rd_ptr_r); + assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[`LOG2UP(SIZE)] != rd_ptr_r[`LOG2UP(SIZE)]); + assign size = size_r; + + end else begin + + reg [DATAW-1:0] head_r; + reg [DATAW-1:0] curr_r; + reg [`LOG2UP(SIZE)-1:0] wr_ptr_r; + reg [`LOG2UP(SIZE)-1:0] rd_ptr_r; + reg [`LOG2UP(SIZE)-1:0] rd_ptr_next_r; + reg empty_r; + reg full_r; + reg bypass_r; + + always @(posedge clk) begin + if (reset) begin + wr_ptr_r <= 0; + rd_ptr_r <= 0; + rd_ptr_next_r <= 1; + empty_r <= 1; + full_r <= 0; + size_r <= 0; + end else begin + if (writing) begin + data[wr_ptr_r] <= data_in; + wr_ptr_r <= wr_ptr_r + 1; + + if (!reading) begin + empty_r <= 0; + if (size_r == SIZE-1) begin + full_r <= 1; + end + size_r <= size_r + 1; + end + end + + if (reading) begin + rd_ptr_r <= rd_ptr_next_r; + + if (SIZE > 2) begin + rd_ptr_next_r <= rd_ptr_r + 2; + end else begin // (SIZE == 2); + rd_ptr_next_r <= ~rd_ptr_next_r; + end + + if (!writing) begin + if (size_r == 1) begin + assert(rd_ptr_next_r == wr_ptr_r); + empty_r <= 1; + end; + full_r <= 0; + size_r <= size_r - 1; + end + end + + bypass_r <= writing + && (empty_r || ((1 == size_r) && reading)); // empty or about to go empty + + curr_r <= data_in; + head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r]; end - end + end - assign data_out = head_r; - assign empty = (size_r == 0); - assign full = (size_r != 0); + assign data_out = bypass_r ? curr_r : head_r; + assign empty = empty_r; + assign full = full_r; assign size = size_r; - - end else begin // (SIZE > 1) - - if (0 == BUFFERED_OUTPUT) begin - - reg [`LOG2UP(SIZE):0] wr_ptr_r; - reg [`LOG2UP(SIZE):0] rd_ptr_r; - - wire [`LOG2UP(SIZE)-1:0] wr_ptr_a = wr_ptr_r[`LOG2UP(SIZE)-1:0]; - wire [`LOG2UP(SIZE)-1:0] rd_ptr_a = rd_ptr_r[`LOG2UP(SIZE)-1:0]; - - always @(posedge clk) begin - if (reset) begin - rd_ptr_r <= 0; - wr_ptr_r <= 0; - size_r <= 0; - end else begin - if (writing) begin - data[wr_ptr_a] <= data_in; - wr_ptr_r <= wr_ptr_r + 1; - - if (!reading) begin - size_r <= size_r + 1; - end - end - - if (reading) begin - rd_ptr_r <= rd_ptr_r + 1; - if (!writing) begin - size_r <= size_r - 1; - end - end - end - end - - assign data_out = data[rd_ptr_a]; - assign empty = (wr_ptr_r == rd_ptr_r); - assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[`LOG2UP(SIZE)] != rd_ptr_r[`LOG2UP(SIZE)]); - assign size = size_r; - - end else begin - - reg [DATAW-1:0] head_r; - reg [DATAW-1:0] curr_r; - reg [`LOG2UP(SIZE)-1:0] wr_ptr_r; - reg [`LOG2UP(SIZE)-1:0] rd_ptr_r; - reg [`LOG2UP(SIZE)-1:0] rd_ptr_next_r; - reg empty_r; - reg full_r; - reg bypass_r; - - always @(posedge clk) begin - if (reset) begin - wr_ptr_r <= 0; - rd_ptr_r <= 0; - rd_ptr_next_r <= 1; - empty_r <= 1; - full_r <= 0; - size_r <= 0; - end else begin - if (writing) begin - data[wr_ptr_r] <= data_in; - wr_ptr_r <= wr_ptr_r + 1; - - if (!reading) begin - empty_r <= 0; - if (size_r == SIZE-1) begin - full_r <= 1; - end - size_r <= size_r + 1; - end - end - - if (reading) begin - rd_ptr_r <= rd_ptr_next_r; - - if (SIZE > 2) begin - rd_ptr_next_r <= rd_ptr_r + 2; - end else begin // (SIZE == 2); - rd_ptr_next_r <= ~rd_ptr_next_r; - end - - if (!writing) begin - if (size_r == 1) begin - assert(rd_ptr_next_r == wr_ptr_r); - empty_r <= 1; - end; - full_r <= 0; - size_r <= size_r - 1; - end - end - - bypass_r <= writing - && (empty_r || ((1 == size_r) && reading)); // empty or about to go empty - - curr_r <= data_in; - head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r]; - end - end - - assign data_out = bypass_r ? curr_r : head_r; - assign empty = empty_r; - assign full = full_r; - assign size = size_r; - end end end diff --git a/hw/syn/quartus/top/Makefile b/hw/syn/quartus/top/Makefile index 62b5cdd9..8feaf127 100644 --- a/hw/syn/quartus/top/Makefile +++ b/hw/syn/quartus/top/Makefile @@ -1,6 +1,6 @@ -PROJECT = Vortex_Socket -TOP_LEVEL_ENTITY = Vortex_Socket -SRC_FILE = Vortex_Socket.v +PROJECT = vortex_afu +TOP_LEVEL_ENTITY = vortex_afu +SRC_FILE = vortex_afu.sv PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf # Part, Family @@ -49,7 +49,7 @@ smart.log: $(PROJECT_FILES) # Project initialization $(PROJECT_FILES): - quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache" + quartus_sh -t project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc project.sdc -inc "../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/pipe_regs;../../../rtl/cache;../../../opae" syn.chg: $(STAMP) syn.chg diff --git a/hw/syn/quartus/top/vortex.sdc b/hw/syn/quartus/top/project.sdc similarity index 100% rename from hw/syn/quartus/top/vortex.sdc rename to hw/syn/quartus/top/project.sdc diff --git a/hw/syn/quartus/vortex/Makefile b/hw/syn/quartus/vortex/Makefile index 370d7320..62b5cdd9 100644 --- a/hw/syn/quartus/vortex/Makefile +++ b/hw/syn/quartus/vortex/Makefile @@ -1,6 +1,6 @@ -PROJECT = Vortex -TOP_LEVEL_ENTITY = Vortex -SRC_FILE = Vortex.v +PROJECT = Vortex_Socket +TOP_LEVEL_ENTITY = Vortex_Socket +SRC_FILE = Vortex_Socket.v PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf # Part, Family diff --git a/hw/syn/quartus/vortex/vortex.sdc b/hw/syn/quartus/vortex/project.sdc similarity index 100% rename from hw/syn/quartus/vortex/vortex.sdc rename to hw/syn/quartus/vortex/project.sdc diff --git a/hw/syn/quartus/vortex/timing.tcl b/hw/syn/quartus/vortex/timing.tcl index d5408ad1..411379dc 100644 --- a/hw/syn/quartus/vortex/timing.tcl +++ b/hw/syn/quartus/vortex/timing.tcl @@ -1,4 +1,4 @@ -project_open Vortex +project_open Vortex_Socket set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL @@ -6,7 +6,6 @@ create_timing_netlist read_sdc update_timing_netlist - foreach_in_collection op [get_available_operating_conditions] { set_operating_conditions $op