diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index 0db28ba0..e22a06e8 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -74,30 +74,35 @@ struct scope_signal_t { static const scope_signal_t scope_signals[] = { { 32, "icache_req_addr" }, - { 2 , "icache_req_tag" }, + { 2, "icache_req_warp_num" }, + { 2, "icache_req_tag" }, { 32, "icache_rsp_data" }, - { 2 , "icache_rsp_tag" }, + { 2, "icache_rsp_tag" }, { 32, "dcache_req_addr" }, - { 2 , "dcache_req_tag" }, + { 2, "dcache_req_warp_num" }, + { 2, "dcache_req_tag" }, { 32, "dcache_rsp_data" }, { 2 , "dcache_rsp_tag" }, + { 32, "dram_req_addr" }, { 29, "dram_req_tag" }, - { 29, "dram_rsp_tag" }, - { 2 , "icache_req_warp_num" }, - { 2 , "dcache_req_warp_num" }, + { 29, "dram_rsp_tag" }, + { 32, "snp_req_addr" }, + { 1, "snp_req_invalidate" }, + { 16, "snp_req_tag" }, + { 16, "snp_rsp_tag" }, + { 2, "decode_warp_num" }, { 32, "decode_curr_PC" }, - { 5 , "execute_rd" }, - { 2 , "execute_warp_num" }, + { 1, "decode_is_jal" }, + { 5, "decode_rs1" }, + { 5, "decode_rs2" }, + { 2, "execute_warp_num" }, + { 5, "execute_rd" }, { 32, "execute_a" }, - { 32, "execute_b" }, - { 5 , "writeback_rd" }, - { 2 , "writeback_warp_num" }, - { 32, "writeback_data" }, - { 2 , "decode_warp_num" }, - { 1 , "decode_is_jal" }, - { 5 , "decode_rs1" }, - { 5 , "decode_rs2" }, - { 2 , "writeback_wb" }, + { 32, "execute_b" }, + { 2, "writeback_warp_num" }, + { 2, "writeback_wb" }, + { 5, "writeback_rd" }, + { 32, "writeback_data" }, { 1, "icache_req_valid" }, { 1, "icache_req_ready" }, @@ -111,6 +116,10 @@ static const scope_signal_t scope_signals[] = { { 1, "dram_req_ready" }, { 1, "dram_rsp_valid" }, { 1, "dram_rsp_ready" }, + { 1, "snp_req_valid" }, + { 1, "snp_req_ready" }, + { 1, "snp_rsp_valid" }, + { 1, "snp_rsp_ready" }, { 4, "decode_valid" }, { 4, "execute_valid" }, { 4, "writeback_valid" }, diff --git a/driver/tests/basic/basic.cpp b/driver/tests/basic/basic.cpp index 510aa7b5..8b5d838b 100755 --- a/driver/tests/basic/basic.cpp +++ b/driver/tests/basic/basic.cpp @@ -66,6 +66,11 @@ int run_memcopy_test(vx_buffer_h sbuf, ((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, value); } + // clear dbuf data + for (int i = 0; i < (64 * num_blocks) / 8; ++i) { + ((uint64_t*)vx_host_ptr(dbuf))[i] = 0; + } + // write buffer to local memory std::cout << "write buffer to local memory" << std::endl; RT_CHECK(vx_copy_to_dev(sbuf, address, 64 * num_blocks, 0)); @@ -112,6 +117,11 @@ int run_kernel_test(vx_device_h device, ((uint64_t*)vx_host_ptr(sbuf))[i] = shuffle(i, seed); } + // clear dbuf data + for (int i = 0; i < (64 * num_blocks) / 8; ++i) { + ((uint64_t*)vx_host_ptr(dbuf))[i] = 0; + } + // write buffer to local memory std::cout << "write buffer to local memory" << std::endl; RT_CHECK(vx_copy_to_dev(sbuf, src_dev_addr, 64 * num_blocks, 0)); @@ -175,11 +185,11 @@ int main(int argc, char *argv[]) { RT_CHECK(vx_alloc_shared_mem(device, 4096, &dbuf)); // run tests - /*if (0 == test || -1 == test) { + if (0 == test || -1 == test) { std::cout << "run memcopy test" << std::endl; RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d00ff00ff, 1)); RT_CHECK(run_memcopy_test(sbuf, dbuf, DEV_MEM_SRC_ADDR, 0x0badf00d40ff40ff, 64)); - }*/ + } if (1 == test || -1 == test) { std::cout << "run kernel test" << std::endl; diff --git a/driver/tests/basic/kernel.bin b/driver/tests/basic/kernel.bin index 803639e3..6efd60e5 100644 Binary files a/driver/tests/basic/kernel.bin and b/driver/tests/basic/kernel.bin differ diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 277659e0..2e1182ea 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -99,7 +99,7 @@ vortex_afu.json ../rtl/VX_inst_multiplex.v ../rtl/VX_lsu_addr_gen.v ../rtl/VX_dcache_io_arb.v -../rtl/VX_dram_arb.v +../rtl/VX_mem_arb.v ../rtl/pipe_regs/VX_f_d_reg.v ../rtl/pipe_regs/VX_i_d_reg.v diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index 8aa0a4fb..1e90c7cd 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -102,6 +102,7 @@ logic vx_dram_rsp_ready; logic vx_snp_req_valid; logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr; +logic vx_snp_req_invalidate = 0; logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag; logic vx_snp_req_ready; @@ -798,26 +799,38 @@ end `SCOPE_SIGNALS_DECL `SCOPE_ASSIGN(scope_dram_req_valid, vx_dram_req_valid); +`SCOPE_ASSIGN(scope_dram_req_addr, {vx_dram_req_addr, 4'b0}); `SCOPE_ASSIGN(scope_dram_req_tag, vx_dram_req_tag); `SCOPE_ASSIGN(scope_dram_req_ready, vx_dram_req_ready); `SCOPE_ASSIGN(scope_dram_rsp_valid, vx_dram_rsp_valid); `SCOPE_ASSIGN(scope_dram_rsp_tag, vx_dram_rsp_tag); `SCOPE_ASSIGN(scope_dram_rsp_ready, vx_dram_rsp_ready); -`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 389, "oops!") +`SCOPE_ASSIGN(scope_snp_req_valid, vx_snp_req_valid); +`SCOPE_ASSIGN(scope_snp_req_addr, {vx_snp_req_addr, 4'b0}); +`SCOPE_ASSIGN(scope_snp_req_invalidate, vx_snp_req_invalidate); +`SCOPE_ASSIGN(scope_snp_req_tag, vx_snp_req_tag); +`SCOPE_ASSIGN(scope_snp_req_ready, vx_snp_req_ready); +`SCOPE_ASSIGN(scope_snp_rsp_valid, vx_snp_rsp_valid); +`SCOPE_ASSIGN(scope_snp_rsp_tag, vx_snp_rsp_tag); +`SCOPE_ASSIGN(scope_snp_rsp_ready, vx_snp_rsp_ready); + +`STATIC_ASSERT($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST}) == 490, "oops!") wire force_changed = (scope_icache_req_valid && scope_icache_req_ready) || (scope_icache_rsp_valid && scope_icache_rsp_ready) || ((| scope_dcache_req_valid) && scope_dcache_req_ready) || ((| scope_dcache_rsp_valid) && scope_dcache_rsp_ready) || (scope_dram_req_valid && scope_dram_req_ready) - || (scope_dram_rsp_valid && scope_dram_rsp_ready); + || (scope_dram_rsp_valid && scope_dram_rsp_ready) + || (scope_snp_req_valid && scope_snp_req_ready) + || (scope_snp_rsp_valid && scope_snp_rsp_ready); VX_scope #( - .DATAW ($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST})), - .BUSW (64), - .SIZE (4096), - .UPDW ($bits({`SCOPE_SIGNALS_UPD_LIST})) + .DATAW ($bits({`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST})), + .BUSW (64), + .SIZE (4096), + .UPDW ($bits({`SCOPE_SIGNALS_UPD_LIST})) ) scope ( .clk (clk), .reset (SoftReset), @@ -864,7 +877,7 @@ Vortex_Socket #() vx_socket ( // Snoop request .snp_req_valid (vx_snp_req_valid), .snp_req_addr (vx_snp_req_addr), - .snp_req_invalidate(0), + .snp_req_invalidate(vx_snp_req_invalidate), .snp_req_tag (vx_snp_req_tag), .snp_req_ready (vx_snp_req_ready), diff --git a/hw/rtl/VX_back_end.v b/hw/rtl/VX_back_end.v index e70e4e10..240112e2 100644 --- a/hw/rtl/VX_back_end.v +++ b/hw/rtl/VX_back_end.v @@ -141,6 +141,6 @@ module VX_back_end #( `SCOPE_ASSIGN(scope_writeback_wb, writeback_if.wb); `SCOPE_ASSIGN(scope_writeback_warp_num, writeback_if.warp_num); `SCOPE_ASSIGN(scope_writeback_rd, writeback_if.rd); - `SCOPE_ASSIGN(scope_writeback_data, writeback_if.data[0]); + `SCOPE_ASSIGN(scope_writeback_data, writeback_if.data[0]); endmodule \ No newline at end of file diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 1a6f54e9..526b6307 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -228,6 +228,9 @@ // Cache ID `define L2CACHE_ID (`L3_ENABLE ? 1 : 0) +// Core request tag bits +`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES)) + // DRAM request data bits `define L2DRAM_LINE_WIDTH (`L2_ENABLE ? (`L2BANK_LINE_SIZE * 8) : `DDRAM_LINE_WIDTH) @@ -251,6 +254,9 @@ // Cache ID `define L3CACHE_ID 0 +// Core request tag bits +`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS)) + // DRAM request data bits `define L3DRAM_LINE_WIDTH (`L3_ENABLE ? (`L3BANK_LINE_SIZE * 8) : `L2DRAM_LINE_WIDTH) @@ -285,30 +291,36 @@ `ifdef SCOPE `define SCOPE_SIGNALS_DATA_LIST \ scope_icache_req_addr, \ + scope_icache_req_warp_num, \ scope_icache_req_tag, \ scope_icache_rsp_data, \ scope_icache_rsp_tag, \ scope_dcache_req_addr, \ + scope_dcache_req_warp_num, \ scope_dcache_req_tag, \ scope_dcache_rsp_data, \ scope_dcache_rsp_tag, \ + scope_dram_req_addr, \ scope_dram_req_tag, \ scope_dram_rsp_tag, \ - scope_icache_req_warp_num, \ - scope_dcache_req_warp_num, \ - scope_decode_curr_PC, \ - scope_execute_rd, \ - scope_execute_warp_num, \ - scope_execute_a, \ - scope_execute_b, \ - scope_writeback_rd, \ - scope_writeback_warp_num, \ - scope_writeback_data, \ + scope_snp_req_addr, \ + scope_snp_req_invalidate, \ + scope_snp_req_tag, \ + scope_snp_rsp_tag, \ scope_decode_warp_num, \ + scope_decode_curr_PC, \ scope_decode_is_jal, \ scope_decode_rs1, \ scope_decode_rs2, \ - scope_writeback_wb, + scope_execute_warp_num, \ + scope_execute_rd, \ + scope_execute_a, \ + scope_execute_b, \ + scope_writeback_warp_num, \ + scope_writeback_wb, \ + scope_writeback_rd, \ + scope_writeback_data, + `define SCOPE_SIGNALS_UPD_LIST \ scope_icache_req_valid, \ @@ -323,6 +335,10 @@ scope_dram_req_ready, \ scope_dram_rsp_valid, \ scope_dram_rsp_ready, \ + scope_snp_req_valid, \ + scope_snp_req_ready, \ + scope_snp_rsp_valid, \ + scope_snp_rsp_ready, \ scope_decode_valid, \ scope_execute_valid, \ scope_writeback_valid, \ @@ -351,11 +367,20 @@ wire [`DCORE_TAG_WIDTH-1:0] scope_dcache_rsp_tag; \ wire scope_dcache_rsp_ready; \ wire scope_dram_req_valid; \ + wire [31:0] scope_dram_req_addr; \ wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \ wire scope_dram_req_ready; \ wire scope_dram_rsp_valid; \ wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \ wire scope_dram_rsp_ready; \ + wire scope_snp_req_valid; \ + wire [31:0] scope_snp_req_addr; \ + wire scope_snp_req_invalidate; \ + wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \ + wire scope_snp_req_ready; \ + wire scope_snp_rsp_valid; \ + wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \ + wire scope_snp_rsp_ready; \ wire scope_schedule_delay; \ wire scope_memory_delay; \ wire scope_exec_delay; \ @@ -406,6 +431,7 @@ `define SCOPE_SIGNALS_DRAM_IO \ /* verilator lint_off UNDRIVEN */ \ output wire scope_dram_req_valid, \ + output wire [31:0] scope_dram_req_addr, \ output wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag, \ output wire scope_dram_req_ready, \ output wire scope_dram_rsp_valid, \ @@ -413,6 +439,18 @@ output wire scope_dram_rsp_ready, \ /* verilator lint_on UNDRIVEN */ + `define SCOPE_SIGNALS_SNP_IO \ + /* verilator lint_off UNDRIVEN */ \ + output wire scope_snp_req_valid, \ + output wire [31:0] scope_snp_req_addr, \ + output wire scope_snp_req_invalidate, \ + output wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag, \ + output wire scope_snp_req_ready, \ + output wire scope_snp_rsp_valid, \ + output wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag, \ + output wire scope_snp_rsp_ready, \ + /* verilator lint_on UNDRIVEN */ + `define SCOPE_SIGNALS_CORE_IO \ /* verilator lint_off UNDRIVEN */ \ output wire scope_schedule_delay, \ @@ -438,7 +476,7 @@ output wire [1:0] scope_writeback_warp_num, \ output wire [1:0] scope_writeback_wb, \ output wire [4:0] scope_writeback_rd, \ - output wire [31:0] scope_writeback_data, \ + output wire [31:0] scope_writeback_data, /* verilator lint_on UNDRIVEN */ `define SCOPE_SIGNALS_ICACHE_ATTACH \ @@ -465,12 +503,23 @@ `define SCOPE_SIGNALS_DRAM_ATTACH \ .scope_dram_req_valid (scope_dram_req_valid), \ + .scope_dram_req_addr (scope_dram_req_addr), \ .scope_dram_req_tag (scope_dram_req_tag), \ .scope_dram_req_ready (scope_dram_req_ready), \ .scope_dram_rsp_valid (scope_dram_rsp_valid), \ .scope_dram_rsp_tag (scope_dram_rsp_tag), \ .scope_dram_rsp_ready (scope_dram_rsp_ready), + `define SCOPE_SIGNALS_SNP_ATTACH \ + .scope_snp_req_valid (scope_snp_req_valid), \ + .scope_snp_req_addr (scope_snp_req_addr), \ + .scope_snp_req_invalidate(scope_snp_req_invalidate), \ + .scope_snp_req_tag (scope_snp_req_tag), \ + .scope_snp_req_ready (scope_snp_req_ready), \ + .scope_snp_rsp_valid (scope_snp_rsp_valid), \ + .scope_snp_rsp_tag (scope_snp_rsp_tag), \ + .scope_snp_rsp_ready (scope_snp_rsp_ready), + `define SCOPE_SIGNALS_CORE_ATTACH \ .scope_schedule_delay (scope_schedule_delay), \ .scope_memory_delay (scope_memory_delay), \ diff --git a/hw/rtl/VX_dram_arb.v b/hw/rtl/VX_dram_arb.v deleted file mode 100644 index 5bf2b5a7..00000000 --- a/hw/rtl/VX_dram_arb.v +++ /dev/null @@ -1,77 +0,0 @@ -`include "VX_define.vh" - -module VX_dram_arb #( - parameter NUM_REQUESTS = 1, - parameter DRAM_LINE_SIZE = 1, - parameter CORE_TAG_WIDTH = 1, - parameter DRAM_TAG_WIDTH = 1, - - parameter DRAM_LINE_WIDTH = DRAM_LINE_SIZE * 8, - parameter DRAM_ADDR_WIDTH = 32 - `CLOG2(DRAM_LINE_SIZE) -) ( - input wire clk, - input wire reset, - - // Core request - input wire [NUM_REQUESTS-1:0] in_dram_req_valid, - input wire [NUM_REQUESTS-1:0] in_dram_req_rw, - input wire [NUM_REQUESTS-1:0][DRAM_LINE_SIZE-1:0] in_dram_req_byteen, - input wire [NUM_REQUESTS-1:0][DRAM_ADDR_WIDTH-1:0] in_dram_req_addr, - input wire [NUM_REQUESTS-1:0][DRAM_LINE_WIDTH-1:0] in_dram_req_data, - input wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] in_dram_req_tag, - output wire [NUM_REQUESTS-1:0] in_dram_req_ready, - - // Core response - output wire [NUM_REQUESTS-1:0] in_dram_rsp_valid, - output wire [NUM_REQUESTS-1:0][DRAM_LINE_WIDTH-1:0] in_dram_rsp_data, - output wire [NUM_REQUESTS-1:0][CORE_TAG_WIDTH-1:0] in_dram_rsp_tag, - input wire [NUM_REQUESTS-1:0] in_dram_rsp_ready, - - // DRAM request - output wire out_dram_req_valid, - output wire out_dram_req_rw, - output wire [DRAM_LINE_SIZE-1:0] out_dram_req_byteen, - output wire [DRAM_ADDR_WIDTH-1:0] out_dram_req_addr, - output wire [DRAM_LINE_WIDTH-1:0] out_dram_req_data, - output wire [DRAM_TAG_WIDTH-1:0] out_dram_req_tag, - input wire out_dram_req_ready, - - // DRAM response - input wire out_dram_rsp_valid, - input wire [DRAM_LINE_WIDTH-1:0] out_dram_rsp_data, - input wire [DRAM_TAG_WIDTH-1:0] out_dram_rsp_tag, - output wire out_dram_rsp_ready -); - reg [`REQS_BITS-1:0] bus_req_sel; - - always @(posedge clk) begin - if (reset) begin - bus_req_sel <= 0; - end else begin - bus_req_sel <= bus_req_sel + 1; - end - end - - assign out_dram_req_valid = in_dram_req_valid [bus_req_sel]; - assign out_dram_req_rw = in_dram_req_rw [bus_req_sel]; - assign out_dram_req_byteen= in_dram_req_byteen [bus_req_sel]; - assign out_dram_req_addr = in_dram_req_addr [bus_req_sel]; - assign out_dram_req_data = in_dram_req_data [bus_req_sel]; - assign out_dram_req_tag = {in_dram_req_tag [bus_req_sel], (`REQS_BITS)'(bus_req_sel)}; - - genvar i; - - for (i = 0; i < NUM_REQUESTS; i++) begin - assign in_dram_req_ready[i] = out_dram_req_ready && (bus_req_sel == `REQS_BITS'(i)); - end - - wire [`REQS_BITS-1:0] bus_rsp_sel = out_dram_rsp_tag[`REQS_BITS-1:0]; - - for (i = 0; i < NUM_REQUESTS; i++) begin - assign in_dram_rsp_valid[i] = out_dram_rsp_valid && (bus_rsp_sel == `REQS_BITS'(i)); - assign in_dram_rsp_data[i] = out_dram_rsp_data; - assign in_dram_rsp_tag[i] = out_dram_rsp_tag[`REQS_BITS +: CORE_TAG_WIDTH]; - end - assign out_dram_rsp_ready = in_dram_rsp_ready[bus_rsp_sel]; - -endmodule \ No newline at end of file diff --git a/hw/rtl/VX_gpr.v b/hw/rtl/VX_gpr.v index da579017..9456e056 100644 --- a/hw/rtl/VX_gpr.v +++ b/hw/rtl/VX_gpr.v @@ -18,8 +18,8 @@ module VX_gpr ( wire write_enable = write_ce && ((writeback_if.wb != 0)); - `ifndef ASIC - + `ifndef ASIC + VX_gpr_ram gpr_ram ( .we (write_enable), .clk (clk), diff --git a/hw/rtl/VX_gpr_ram.v b/hw/rtl/VX_gpr_ram.v index d89843d3..7977f839 100644 --- a/hw/rtl/VX_gpr_ram.v +++ b/hw/rtl/VX_gpr_ram.v @@ -12,24 +12,19 @@ module VX_gpr_ram ( output reg [`NUM_THREADS-1:0][31:0] q1, output reg [`NUM_THREADS-1:0][31:0] q2 ); - // Thread Byte Bit - logic [`NUM_THREADS-1:0][3:0][7:0] ram[31:0]; + reg [`NUM_THREADS-1:0][31:0] ram[31:0]; + + integer i; + + `UNUSED_VAR(reset) always @(posedge clk) begin - if (reset) begin - //-- - end else begin - if (we) begin - integer i; - for (i = 0; i < `NUM_THREADS; i++) begin - if (be[i]) begin - ram[waddr][i][0] <= wdata[i][7:0]; - ram[waddr][i][1] <= wdata[i][15:8]; - ram[waddr][i][2] <= wdata[i][23:16]; - ram[waddr][i][3] <= wdata[i][31:24]; - end + if (we) begin + for (i = 0; i < `NUM_THREADS; i++) begin + if (be[i]) begin + ram[waddr][i] <= wdata[i]; end - end + end end end diff --git a/hw/rtl/VX_gpr_wrapper.v b/hw/rtl/VX_gpr_wrapper.v index ec0f41a0..3da77564 100644 --- a/hw/rtl/VX_gpr_wrapper.v +++ b/hw/rtl/VX_gpr_wrapper.v @@ -8,7 +8,7 @@ module VX_gpr_wrapper ( VX_gpr_jal_if gpr_jal_if, output wire [`NUM_THREADS-1:0][31:0] a_reg_data, - output wire [`NUM_THREADS-1:0][31:0] b_reg_data + output wire [`NUM_THREADS-1:0][31:0] b_reg_data ); wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] tmp_a_reg_data; wire [`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] tmp_b_reg_data; diff --git a/hw/rtl/VX_mem_arb.v b/hw/rtl/VX_mem_arb.v new file mode 100644 index 00000000..ab63ceb5 --- /dev/null +++ b/hw/rtl/VX_mem_arb.v @@ -0,0 +1,100 @@ +`include "VX_define.vh" + +module VX_mem_arb #( + parameter NUM_REQUESTS = 1, + parameter WORD_SIZE = 1, + parameter TAG_IN_WIDTH = 1, + parameter TAG_OUT_WIDTH = 1, + + parameter WORD_WIDTH = WORD_SIZE * 8, + parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), + parameter REQS_BITS = `CLOG2(NUM_REQUESTS) +) ( + input wire clk, + input wire reset, + + // input requests + input wire [NUM_REQUESTS-1:0] in_mem_req_valid, + input wire [NUM_REQUESTS-1:0] in_mem_req_rw, + input wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] in_mem_req_byteen, + input wire [NUM_REQUESTS-1:0][ADDR_WIDTH-1:0] in_mem_req_addr, + input wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] in_mem_req_data, + input wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] in_mem_req_tag, + output wire [NUM_REQUESTS-1:0] in_mem_req_ready, + + // input response + output wire [NUM_REQUESTS-1:0] in_mem_rsp_valid, + output wire [NUM_REQUESTS-1:0][WORD_WIDTH-1:0] in_mem_rsp_data, + output wire [NUM_REQUESTS-1:0][TAG_IN_WIDTH-1:0] in_mem_rsp_tag, + input wire [NUM_REQUESTS-1:0] in_mem_rsp_ready, + + // output request + output wire out_mem_req_valid, + output wire out_mem_req_rw, + output wire [WORD_SIZE-1:0] out_mem_req_byteen, + output wire [ADDR_WIDTH-1:0] out_mem_req_addr, + output wire [WORD_WIDTH-1:0] out_mem_req_data, + output wire [TAG_OUT_WIDTH-1:0] out_mem_req_tag, + input wire out_mem_req_ready, + + // output response + input wire out_mem_rsp_valid, + input wire [WORD_WIDTH-1:0] out_mem_rsp_data, + input wire [TAG_OUT_WIDTH-1:0] out_mem_rsp_tag, + output wire out_mem_rsp_ready +); + if (NUM_REQUESTS == 1) begin + + `UNUSED_VAR (clk) + `UNUSED_VAR (reset) + + assign out_mem_req_valid = in_mem_req_valid; + assign out_mem_req_rw = in_mem_req_rw; + assign out_mem_req_byteen = in_mem_req_byteen; + assign out_mem_req_addr = in_mem_req_addr; + assign out_mem_req_data = in_mem_req_data; + assign out_mem_req_tag = in_mem_req_tag; + assign in_mem_req_ready = out_mem_req_ready; + + assign in_mem_rsp_valid = out_mem_rsp_valid; + assign in_mem_rsp_data = out_mem_rsp_data; + assign in_mem_rsp_tag = out_mem_rsp_tag; + assign out_mem_rsp_ready = in_mem_rsp_ready; + + end else begin + + reg [REQS_BITS-1:0] bus_req_sel; + + always @(posedge clk) begin + if (reset) begin + bus_req_sel <= 0; + end else begin + bus_req_sel <= bus_req_sel + 1; + end + end + + assign out_mem_req_valid = in_mem_req_valid [bus_req_sel]; + assign out_mem_req_rw = in_mem_req_rw [bus_req_sel]; + assign out_mem_req_byteen = in_mem_req_byteen [bus_req_sel]; + assign out_mem_req_addr = in_mem_req_addr [bus_req_sel]; + assign out_mem_req_data = in_mem_req_data [bus_req_sel]; + assign out_mem_req_tag = {in_mem_req_tag [bus_req_sel], REQS_BITS'(bus_req_sel)}; + + genvar i; + + for (i = 0; i < NUM_REQUESTS; i++) begin + assign in_mem_req_ready[i] = out_mem_req_ready && (bus_req_sel == REQS_BITS'(i)); + end + + wire [REQS_BITS-1:0] bus_rsp_sel = out_mem_rsp_tag[REQS_BITS-1:0]; + + for (i = 0; i < NUM_REQUESTS; i++) begin + assign in_mem_rsp_valid[i] = out_mem_rsp_valid && (bus_rsp_sel == REQS_BITS'(i)); + assign in_mem_rsp_data[i] = out_mem_rsp_data; + assign in_mem_rsp_tag[i] = out_mem_rsp_tag[REQS_BITS +: TAG_IN_WIDTH]; + end + assign out_mem_rsp_ready = in_mem_rsp_ready[bus_rsp_sel]; + + end + +endmodule \ No newline at end of file diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index 5e2f46c2..93c5dd5b 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -56,16 +56,16 @@ module Vortex #( // I/O request output wire io_req_valid, output wire io_req_rw, - output wire[3:0] io_req_byteen, - output wire[29:0] io_req_addr, - output wire[31:0] io_req_data, - output wire[`DCORE_TAG_WIDTH-1:0] io_req_tag, + output wire [3:0] io_req_byteen, + output wire [29:0] io_req_addr, + output wire [31:0] io_req_data, + output wire [`DCORE_TAG_WIDTH-1:0] io_req_tag, input wire io_req_ready, // I/O response input wire io_rsp_valid, - input wire[31:0] io_rsp_data, - input wire[`DCORE_TAG_WIDTH-1:0] io_rsp_tag, + input wire [31:0] io_rsp_data, + input wire [`DCORE_TAG_WIDTH-1:0] io_rsp_tag, output wire io_rsp_ready, // Status diff --git a/hw/rtl/Vortex_Cluster.v b/hw/rtl/Vortex_Cluster.v index 37d0d21a..97a2f8c7 100644 --- a/hw/rtl/Vortex_Cluster.v +++ b/hw/rtl/Vortex_Cluster.v @@ -15,98 +15,100 @@ module Vortex_Cluster #( // DRAM request output wire dram_req_valid, output wire dram_req_rw, - output wire[`L2DRAM_BYTEEN_WIDTH-1:0] dram_req_byteen, - output wire[`L2DRAM_ADDR_WIDTH-1:0] dram_req_addr, - output wire[`L2DRAM_LINE_WIDTH-1:0] dram_req_data, - output wire[`L2DRAM_TAG_WIDTH-1:0] dram_req_tag, + output wire [`L2DRAM_BYTEEN_WIDTH-1:0] dram_req_byteen, + output wire [`L2DRAM_ADDR_WIDTH-1:0] dram_req_addr, + output wire [`L2DRAM_LINE_WIDTH-1:0] dram_req_data, + output wire [`L2DRAM_TAG_WIDTH-1:0] dram_req_tag, input wire dram_req_ready, // DRAM response input wire dram_rsp_valid, - input wire[`L2DRAM_LINE_WIDTH-1:0] dram_rsp_data, - input wire[`L2DRAM_TAG_WIDTH-1:0] dram_rsp_tag, + input wire [`L2DRAM_LINE_WIDTH-1:0] dram_rsp_data, + input wire [`L2DRAM_TAG_WIDTH-1:0] dram_rsp_tag, output wire dram_rsp_ready, // Snoop request input wire snp_req_valid, - input wire[`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr, + input wire [`L2DRAM_ADDR_WIDTH-1:0] snp_req_addr, input wire snp_req_invalidate, - input wire[`L2SNP_TAG_WIDTH-1:0] snp_req_tag, + input wire [`L2SNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, // Snoop response output wire snp_rsp_valid, - output wire[`L2SNP_TAG_WIDTH-1:0] snp_rsp_tag, + output wire [`L2SNP_TAG_WIDTH-1:0] snp_rsp_tag, input wire snp_rsp_ready, // I/O request output wire io_req_valid, output wire io_req_rw, - output wire[3:0] io_req_byteen, - output wire[29:0] io_req_addr, - output wire[31:0] io_req_data, - output wire[`DCORE_TAG_WIDTH-1:0] io_req_tag, + output wire [3:0] io_req_byteen, + output wire [29:0] io_req_addr, + output wire [31:0] io_req_data, + output wire [`DCORE_TAG_WIDTH-1:0] io_req_tag, input wire io_req_ready, // I/O response input wire io_rsp_valid, - input wire[31:0] io_rsp_data, - input wire[`DCORE_TAG_WIDTH-1:0] io_rsp_tag, + input wire [31:0] io_rsp_data, + input wire [`DCORE_TAG_WIDTH-1:0] io_rsp_tag, output wire io_rsp_ready, // Status output wire busy, output wire ebreak ); - wire[`NUM_CORES-1:0] per_core_D_dram_req_valid; - wire[`NUM_CORES-1:0] per_core_D_dram_req_rw; - wire[`NUM_CORES-1:0][`DDRAM_BYTEEN_WIDTH-1:0] per_core_D_dram_req_byteen; - wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_D_dram_req_addr; - wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_req_data; - wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_req_tag; - wire[`NUM_CORES-1:0] per_core_D_dram_req_ready; + wire [`NUM_CORES-1:0] per_core_D_dram_req_valid; + wire [`NUM_CORES-1:0] per_core_D_dram_req_rw; + wire [`NUM_CORES-1:0][`DDRAM_BYTEEN_WIDTH-1:0] per_core_D_dram_req_byteen; + wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_D_dram_req_addr; + wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_req_data; + wire [`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_req_tag; + wire [`NUM_CORES-1:0] per_core_D_dram_req_ready; - wire[`NUM_CORES-1:0] per_core_D_dram_rsp_valid; - wire[`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_rsp_data; - wire[`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_rsp_tag; - wire[`NUM_CORES-1:0] per_core_D_dram_rsp_ready; + wire [`NUM_CORES-1:0] per_core_D_dram_rsp_valid; + wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_D_dram_rsp_data; + wire [`NUM_CORES-1:0][`DDRAM_TAG_WIDTH-1:0] per_core_D_dram_rsp_tag; + wire [`NUM_CORES-1:0] per_core_D_dram_rsp_ready; - wire[`NUM_CORES-1:0] per_core_I_dram_req_valid; - wire[`NUM_CORES-1:0] per_core_I_dram_req_rw; - wire[`NUM_CORES-1:0][`IDRAM_BYTEEN_WIDTH-1:0] per_core_I_dram_req_byteen; - wire[`NUM_CORES-1:0][`IDRAM_ADDR_WIDTH-1:0] per_core_I_dram_req_addr; - wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_req_data; - wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_req_tag; - wire[`NUM_CORES-1:0] per_core_I_dram_req_ready; + wire [`NUM_CORES-1:0] per_core_I_dram_req_valid; + wire [`NUM_CORES-1:0] per_core_I_dram_req_rw; + wire [`NUM_CORES-1:0][`IDRAM_BYTEEN_WIDTH-1:0] per_core_I_dram_req_byteen; + wire [`NUM_CORES-1:0][`IDRAM_ADDR_WIDTH-1:0] per_core_I_dram_req_addr; + wire [`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_req_data; + wire [`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_req_tag; + wire [`NUM_CORES-1:0] per_core_I_dram_req_ready; - wire[`NUM_CORES-1:0] per_core_I_dram_rsp_valid; - wire[`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_rsp_data; - wire[`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag; - wire[`NUM_CORES-1:0] per_core_I_dram_rsp_ready; + wire [`NUM_CORES-1:0] per_core_I_dram_rsp_valid; + wire [`NUM_CORES-1:0][`IDRAM_LINE_WIDTH-1:0] per_core_I_dram_rsp_data; + wire [`NUM_CORES-1:0][`IDRAM_TAG_WIDTH-1:0] per_core_I_dram_rsp_tag; + wire [`NUM_CORES-1:0] per_core_I_dram_rsp_ready; - wire[`NUM_CORES-1:0] per_core_snp_req_valid; - wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_req_addr; - wire[`NUM_CORES-1:0] per_core_snp_req_invalidate; - wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_req_tag; - wire[`NUM_CORES-1:0] per_core_snp_req_ready; + wire [`NUM_CORES-1:0] per_core_snp_req_valid; + wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_snp_req_addr; + wire [`NUM_CORES-1:0] per_core_snp_req_invalidate; + wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_req_tag; + wire [`NUM_CORES-1:0] per_core_snp_req_ready; - wire[`NUM_CORES-1:0] per_core_snp_rsp_valid; - wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_rsp_tag; - wire[`NUM_CORES-1:0] per_core_snp_rsp_ready; + wire [`NUM_CORES-1:0] per_core_snp_rsp_valid; + wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_rsp_tag; + wire [`NUM_CORES-1:0] per_core_snp_rsp_ready; -`IGNORE_WARNINGS_BEGIN - wire[`NUM_CORES-1:0] per_core_io_req_valid; - wire[`NUM_CORES-1:0] per_core_io_req_rw; - wire[`NUM_CORES-1:0][3:0] per_core_io_req_byteen; - wire[`NUM_CORES-1:0][29:0] per_core_io_req_addr; - wire[`NUM_CORES-1:0][31:0] per_core_io_req_data; - wire[`NUM_CORES-1:0][`DCORE_TAG_WIDTH-1:0] per_core_io_req_tag; + wire [`NUM_CORES-1:0] per_core_io_req_valid; + wire [`NUM_CORES-1:0] per_core_io_req_rw; + wire [`NUM_CORES-1:0][3:0] per_core_io_req_byteen; + wire [`NUM_CORES-1:0][29:0] per_core_io_req_addr; + wire [`NUM_CORES-1:0][31:0] per_core_io_req_data; + wire [`NUM_CORES-1:0][`DCORE_TAG_WIDTH-1:0] per_core_io_req_tag; + wire [`NUM_CORES-1:0] per_core_io_req_ready; - wire[`NUM_CORES-1:0] per_core_io_rsp_ready; -`IGNORE_WARNINGS_END + wire [`NUM_CORES-1:0] per_core_io_rsp_valid; + wire [`NUM_CORES-1:0][`DCORE_TAG_WIDTH-1:0] per_core_io_rsp_tag; + wire [`NUM_CORES-1:0][31:0] per_core_io_rsp_data; + wire [`NUM_CORES-1:0] per_core_io_rsp_ready; - wire[`NUM_CORES-1:0] per_core_busy; - wire[`NUM_CORES-1:0] per_core_ebreak; + wire [`NUM_CORES-1:0] per_core_busy; + wire [`NUM_CORES-1:0] per_core_ebreak; genvar i; for (i = 0; i < `NUM_CORES; i++) begin @@ -120,6 +122,7 @@ module Vortex_Cluster #( .clk (clk), .reset (reset), + .D_dram_req_valid (per_core_D_dram_req_valid [i]), .D_dram_req_rw (per_core_D_dram_req_rw [i]), .D_dram_req_byteen (per_core_D_dram_req_byteen [i]), @@ -160,27 +163,57 @@ module Vortex_Cluster #( .io_req_addr (per_core_io_req_addr [i]), .io_req_data (per_core_io_req_data [i]), .io_req_tag (per_core_io_req_tag [i]), - .io_req_ready (io_req_ready), + .io_req_ready (per_core_io_req_ready [i]), - .io_rsp_valid (io_rsp_valid), - .io_rsp_data (io_rsp_data), - .io_rsp_tag (io_rsp_tag), + .io_rsp_valid (per_core_io_rsp_valid [i]), + .io_rsp_data (per_core_io_rsp_data [i]), + .io_rsp_tag (per_core_io_rsp_tag [i]), .io_rsp_ready (per_core_io_rsp_ready [i]), .busy (per_core_busy [i]), .ebreak (per_core_ebreak [i]) ); - end + end - assign io_req_valid = per_core_io_req_valid[0]; - assign io_req_rw = per_core_io_req_rw[0]; - assign io_req_byteen = per_core_io_req_byteen[0]; - assign io_req_addr = per_core_io_req_addr[0]; - assign io_req_data = per_core_io_req_data[0]; - assign io_req_byteen = per_core_io_req_byteen[0]; - assign io_req_tag = per_core_io_req_tag[0]; + VX_mem_arb #( + .NUM_REQUESTS (`NUM_CLUSTERS), + .WORD_SIZE (4), + .TAG_IN_WIDTH (`DCORE_TAG_WIDTH), + .TAG_OUT_WIDTH (`L2CORE_TAG_WIDTH) + ) io_arb ( + .clk (clk), + .reset (reset), - assign io_rsp_ready = per_core_io_rsp_ready[0]; + // input requests + .in_mem_req_valid (per_core_io_req_valid), + .in_mem_req_rw (per_core_io_req_rw), + .in_mem_req_byteen (per_core_io_req_byteen), + .in_mem_req_addr (per_core_io_req_addr), + .in_mem_req_data (per_core_io_req_data), + .in_mem_req_tag (per_core_io_req_tag), + .in_mem_req_ready (per_core_io_req_ready), + + // input responses + .in_mem_rsp_valid (per_core_io_rsp_valid), + .in_mem_rsp_data (per_core_io_rsp_data), + .in_mem_rsp_tag (per_core_io_rsp_tag), + .in_mem_rsp_ready (per_core_io_rsp_ready), + + // output request + .out_mem_req_valid (io_req_valid), + .out_mem_req_rw (io_req_rw), + .out_mem_req_byteen (io_req_byteen), + .out_mem_req_addr (io_req_addr), + .out_mem_req_data (io_req_data), + .out_mem_req_tag (io_req_tag), + .out_mem_req_ready (io_req_ready), + + // output response + .out_mem_rsp_valid (io_rsp_valid), + .out_mem_rsp_tag (io_rsp_tag), + .out_mem_rsp_data (io_rsp_data), + .out_mem_rsp_ready (io_rsp_ready) + ); assign busy = (| per_core_busy); assign ebreak = (& per_core_ebreak); @@ -343,18 +376,18 @@ module Vortex_Cluster #( end else begin - wire[`L2NUM_REQUESTS-1:0] arb_core_req_valid; - wire[`L2NUM_REQUESTS-1:0] arb_core_req_rw; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_BYTEEN_WIDTH-1:0] arb_core_req_byteen; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_core_req_addr; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_core_req_tag; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] arb_core_req_data; - wire[`L2NUM_REQUESTS-1:0] arb_core_req_ready; + wire[`L2NUM_REQUESTS-1:0] arb_dram_req_valid; + wire[`L2NUM_REQUESTS-1:0] arb_dram_req_rw; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_BYTEEN_WIDTH-1:0] arb_dram_req_byteen; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_dram_req_addr; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_dram_req_tag; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] arb_dram_req_data; + wire[`L2NUM_REQUESTS-1:0] arb_dram_req_ready; - wire[`L2NUM_REQUESTS-1:0] arb_core_rsp_valid; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] arb_core_rsp_data; - wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_core_rsp_tag; - wire[`L2NUM_REQUESTS-1:0] arb_core_rsp_ready; + wire[`L2NUM_REQUESTS-1:0] arb_dram_rsp_valid; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_LINE_WIDTH-1:0] arb_dram_rsp_data; + wire[`L2NUM_REQUESTS-1:0][`DDRAM_TAG_WIDTH-1:0] arb_dram_rsp_tag; + wire[`L2NUM_REQUESTS-1:0] arb_dram_rsp_ready; wire[`NUM_CORES-1:0] arb_snp_fwdout_valid; wire[`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] arb_snp_fwdout_addr; @@ -367,38 +400,38 @@ module Vortex_Cluster #( wire[`NUM_CORES-1:0] arb_snp_fwdin_ready; for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin - assign arb_core_req_valid [i] = per_core_D_dram_req_valid[(i/2)]; - assign arb_core_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)]; + assign arb_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)]; + assign arb_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)]; - assign arb_core_req_rw [i] = per_core_D_dram_req_rw[(i/2)]; - assign arb_core_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)]; + assign arb_dram_req_rw [i] = per_core_D_dram_req_rw[(i/2)]; + assign arb_dram_req_rw [i+1] = per_core_I_dram_req_rw[(i/2)]; - assign arb_core_req_byteen[i] = per_core_D_dram_req_byteen[(i/2)]; - assign arb_core_req_byteen[i+1] = per_core_I_dram_req_byteen[(i/2)]; + assign arb_dram_req_byteen[i] = per_core_D_dram_req_byteen[(i/2)]; + assign arb_dram_req_byteen[i+1] = per_core_I_dram_req_byteen[(i/2)]; - assign arb_core_req_addr [i] = per_core_D_dram_req_addr[(i/2)]; - assign arb_core_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)]; + assign arb_dram_req_addr [i] = per_core_D_dram_req_addr[(i/2)]; + assign arb_dram_req_addr [i+1] = per_core_I_dram_req_addr[(i/2)]; - assign arb_core_req_data [i] = per_core_D_dram_req_data[(i/2)]; - assign arb_core_req_data [i+1] = per_core_I_dram_req_data[(i/2)]; + assign arb_dram_req_data [i] = per_core_D_dram_req_data[(i/2)]; + assign arb_dram_req_data [i+1] = per_core_I_dram_req_data[(i/2)]; - assign arb_core_req_tag [i] = per_core_D_dram_req_tag[(i/2)]; - assign arb_core_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)]; + assign arb_dram_req_tag [i] = per_core_D_dram_req_tag[(i/2)]; + assign arb_dram_req_tag [i+1] = per_core_I_dram_req_tag[(i/2)]; - assign per_core_D_dram_req_ready [(i/2)] = arb_core_req_ready[i]; - assign per_core_I_dram_req_ready [(i/2)] = arb_core_req_ready[i+1]; + assign per_core_D_dram_req_ready [(i/2)] = arb_dram_req_ready[i]; + assign per_core_I_dram_req_ready [(i/2)] = arb_dram_req_ready[i+1]; - assign per_core_D_dram_rsp_valid [(i/2)] = arb_core_rsp_valid[i]; - assign per_core_I_dram_rsp_valid [(i/2)] = arb_core_rsp_valid[i+1]; + assign per_core_D_dram_rsp_valid [(i/2)] = arb_dram_rsp_valid[i]; + assign per_core_I_dram_rsp_valid [(i/2)] = arb_dram_rsp_valid[i+1]; - assign per_core_D_dram_rsp_data [(i/2)] = arb_core_rsp_data[i]; - assign per_core_I_dram_rsp_data [(i/2)] = arb_core_rsp_data[i+1]; + assign per_core_D_dram_rsp_data [(i/2)] = arb_dram_rsp_data[i]; + assign per_core_I_dram_rsp_data [(i/2)] = arb_dram_rsp_data[i+1]; - assign per_core_D_dram_rsp_tag [(i/2)] = arb_core_rsp_tag[i]; - assign per_core_I_dram_rsp_tag [(i/2)] = arb_core_rsp_tag[i+1]; + assign per_core_D_dram_rsp_tag [(i/2)] = arb_dram_rsp_tag[i]; + assign per_core_I_dram_rsp_tag [(i/2)] = arb_dram_rsp_tag[i+1]; - assign arb_core_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)]; - assign arb_core_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)]; + assign arb_dram_rsp_ready [i] = per_core_D_dram_rsp_ready[(i/2)]; + assign arb_dram_rsp_ready [i+1] = per_core_I_dram_rsp_ready[(i/2)]; assign per_core_snp_req_valid [(i/2)] = arb_snp_fwdout_valid [(i/2)]; assign per_core_snp_req_addr [(i/2)] = arb_snp_fwdout_addr [(i/2)]; @@ -456,44 +489,44 @@ module Vortex_Cluster #( assign arb_snp_fwdin_ready = snp_rsp_ready; end - VX_dram_arb #( - .NUM_REQUESTS (`L2NUM_REQUESTS), - .DRAM_LINE_SIZE (`L2BANK_LINE_SIZE), - .CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH), - .DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH) + VX_mem_arb #( + .NUM_REQUESTS (`L2NUM_REQUESTS), + .WORD_SIZE (`L2BANK_LINE_SIZE), + .TAG_IN_WIDTH (`DDRAM_TAG_WIDTH), + .TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH) ) dram_arb ( - .clk (clk), - .reset (reset), + .clk (clk), + .reset (reset), // Core request - .in_dram_req_valid (arb_core_req_valid), - .in_dram_req_rw (arb_core_req_rw), - .in_dram_req_byteen (arb_core_req_byteen), - .in_dram_req_addr (arb_core_req_addr), - .in_dram_req_data (arb_core_req_data), - .in_dram_req_tag (arb_core_req_tag), - .in_dram_req_ready (arb_core_req_ready), + .in_mem_req_valid (arb_dram_req_valid), + .in_mem_req_rw (arb_dram_req_rw), + .in_mem_req_byteen (arb_dram_req_byteen), + .in_mem_req_addr (arb_dram_req_addr), + .in_mem_req_data (arb_dram_req_data), + .in_mem_req_tag (arb_dram_req_tag), + .in_mem_req_ready (arb_dram_req_ready), // Core response - .in_dram_rsp_valid (arb_core_rsp_valid), - .in_dram_rsp_data (arb_core_rsp_data), - .in_dram_rsp_tag (arb_core_rsp_tag), - .in_dram_rsp_ready (arb_core_rsp_ready), + .in_mem_rsp_valid (arb_dram_rsp_valid), + .in_mem_rsp_data (arb_dram_rsp_data), + .in_mem_rsp_tag (arb_dram_rsp_tag), + .in_mem_rsp_ready (arb_dram_rsp_ready), // DRAM request - .out_dram_req_valid (dram_req_valid), - .out_dram_req_rw (dram_req_rw), - .out_dram_req_byteen (dram_req_byteen), - .out_dram_req_addr (dram_req_addr), - .out_dram_req_data (dram_req_data), - .out_dram_req_tag (dram_req_tag), - .out_dram_req_ready (dram_req_ready), + .out_mem_req_valid (dram_req_valid), + .out_mem_req_rw (dram_req_rw), + .out_mem_req_byteen (dram_req_byteen), + .out_mem_req_addr (dram_req_addr), + .out_mem_req_data (dram_req_data), + .out_mem_req_tag (dram_req_tag), + .out_mem_req_ready (dram_req_ready), // DRAM response - .out_dram_rsp_valid (dram_rsp_valid), - .out_dram_rsp_tag (dram_rsp_tag), - .out_dram_rsp_data (dram_rsp_data), - .out_dram_rsp_ready (dram_rsp_ready) + .out_mem_rsp_valid (dram_rsp_valid), + .out_mem_rsp_tag (dram_rsp_tag), + .out_mem_rsp_data (dram_rsp_data), + .out_mem_rsp_ready (dram_rsp_ready) ); end diff --git a/hw/rtl/Vortex_Socket.v b/hw/rtl/Vortex_Socket.v index afb88ad2..3729b1f5 100644 --- a/hw/rtl/Vortex_Socket.v +++ b/hw/rtl/Vortex_Socket.v @@ -13,43 +13,43 @@ module Vortex_Socket ( // DRAM request output wire dram_req_valid, output wire dram_req_rw, - output wire[`VX_DRAM_BYTEEN_WIDTH-1:0] dram_req_byteen, - output wire[`VX_DRAM_ADDR_WIDTH-1:0] dram_req_addr, - output wire[`VX_DRAM_LINE_WIDTH-1:0] dram_req_data, - output wire[`VX_DRAM_TAG_WIDTH-1:0] dram_req_tag, + output wire [`VX_DRAM_BYTEEN_WIDTH-1:0] dram_req_byteen, + output wire [`VX_DRAM_ADDR_WIDTH-1:0] dram_req_addr, + output wire [`VX_DRAM_LINE_WIDTH-1:0] dram_req_data, + output wire [`VX_DRAM_TAG_WIDTH-1:0] dram_req_tag, input wire dram_req_ready, // DRAM response input wire dram_rsp_valid, - input wire[`VX_DRAM_LINE_WIDTH-1:0] dram_rsp_data, - input wire[`VX_DRAM_TAG_WIDTH-1:0] dram_rsp_tag, + input wire [`VX_DRAM_LINE_WIDTH-1:0] dram_rsp_data, + input wire [`VX_DRAM_TAG_WIDTH-1:0] dram_rsp_tag, output wire dram_rsp_ready, // Snoop request input wire snp_req_valid, - input wire[`VX_DRAM_ADDR_WIDTH-1:0] snp_req_addr, + input wire [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_addr, input wire snp_req_invalidate, - input wire[`VX_SNP_TAG_WIDTH-1:0] snp_req_tag, + input wire [`VX_SNP_TAG_WIDTH-1:0] snp_req_tag, output wire snp_req_ready, // Snoop response output wire snp_rsp_valid, - output wire[`VX_SNP_TAG_WIDTH-1:0] snp_rsp_tag, + output wire [`VX_SNP_TAG_WIDTH-1:0] snp_rsp_tag, input wire snp_rsp_ready, // I/O request output wire io_req_valid, output wire io_req_rw, - output wire[3:0] io_req_byteen, - output wire[29:0] io_req_addr, - output wire[31:0] io_req_data, - output wire[`VX_CORE_TAG_WIDTH-1:0] io_req_tag, + output wire [3:0] io_req_byteen, + output wire [29:0] io_req_addr, + output wire [31:0] io_req_data, + output wire [`VX_CORE_TAG_WIDTH-1:0] io_req_tag, input wire io_req_ready, // I/O response input wire io_rsp_valid, - input wire[31:0] io_rsp_data, - input wire[`VX_CORE_TAG_WIDTH-1:0] io_rsp_tag, + input wire [31:0] io_rsp_data, + input wire [`VX_CORE_TAG_WIDTH-1:0] io_rsp_tag, output wire io_rsp_ready, // Status @@ -111,42 +111,44 @@ module Vortex_Socket ( end else begin - wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid; - wire[`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw; - wire[`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen; - wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr; - wire[`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data; - wire[`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag; wire l3_core_req_ready; - wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid; - wire[`NUM_CLUSTERS-1:0][`L3DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data; - wire[`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag; - wire[`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid; + wire [`NUM_CLUSTERS-1:0][`L3DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data; + wire [`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready; - wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid; - wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr; - wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_invalidate; - wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag; - wire[`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_snp_req_addr; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_invalidate; + wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_req_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_ready; - wire[`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid; - wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag; - wire[`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_valid; + wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready; - `IGNORE_WARNINGS_BEGIN - wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_valid; - wire[`NUM_CLUSTERS-1:0] per_cluster_io_req_rw; - wire[`NUM_CLUSTERS-1:0][3:0] per_cluster_io_req_byteen; - wire[`NUM_CLUSTERS-1:0][29:0] per_cluster_io_req_addr; - wire[`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_data; - wire[`NUM_CLUSTERS-1:0][`DCORE_TAG_WIDTH-1:0] per_cluster_io_req_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_valid; + wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw; + wire [`NUM_CLUSTERS-1:0][3:0] per_cluster_io_req_byteen; + wire [`NUM_CLUSTERS-1:0][29:0] per_cluster_io_req_addr; + wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_req_data; + wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag; + wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready; - wire[`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready; - `IGNORE_WARNINGS_END + wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid; + wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag; + wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data; + wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready; - wire[`NUM_CLUSTERS-1:0] per_cluster_busy; - wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak; + wire [`NUM_CLUSTERS-1:0] per_cluster_busy; + wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak; genvar i; for (i = 0; i < `NUM_CLUSTERS; i++) begin @@ -190,62 +192,93 @@ module Vortex_Socket ( .io_req_addr (per_cluster_io_req_addr [i]), .io_req_data (per_cluster_io_req_data [i]), .io_req_tag (per_cluster_io_req_tag [i]), - .io_req_ready (io_req_ready), + .io_req_ready (per_cluster_io_req_ready [i]), - .io_rsp_valid (io_rsp_valid), - .io_rsp_data (io_rsp_data), - .io_rsp_tag (io_rsp_tag), + .io_rsp_valid (per_cluster_io_rsp_valid [i]), + .io_rsp_data (per_cluster_io_rsp_data [i]), + .io_rsp_tag (per_cluster_io_rsp_tag [i]), .io_rsp_ready (per_cluster_io_rsp_ready [i]), .busy (per_cluster_busy [i]), .ebreak (per_cluster_ebreak [i]) ); - end + end - assign io_req_valid = per_cluster_io_req_valid[0]; - assign io_req_rw = per_cluster_io_req_rw[0]; - assign io_req_byteen = per_cluster_io_req_byteen[0]; - assign io_req_addr = per_cluster_io_req_addr[0]; - assign io_req_data = per_cluster_io_req_data[0]; - assign io_req_tag = per_cluster_io_req_tag[0]; + VX_mem_arb #( + .NUM_REQUESTS (`NUM_CLUSTERS), + .WORD_SIZE (4), + .TAG_IN_WIDTH (`L2CORE_TAG_WIDTH), + .TAG_OUT_WIDTH (`L3CORE_TAG_WIDTH) + ) io_arb ( + .clk (clk), + .reset (reset), - assign io_rsp_ready = per_cluster_io_rsp_ready[0]; + // input requests + .in_mem_req_valid (per_cluster_io_req_valid), + .in_mem_req_rw (per_cluster_io_req_rw), + .in_mem_req_byteen (per_cluster_io_req_byteen), + .in_mem_req_addr (per_cluster_io_req_addr), + .in_mem_req_data (per_cluster_io_req_data), + .in_mem_req_tag (per_cluster_io_req_tag), + .in_mem_req_ready (per_cluster_io_req_ready), + + // input responses + .in_mem_rsp_valid (per_cluster_io_rsp_valid), + .in_mem_rsp_data (per_cluster_io_rsp_data), + .in_mem_rsp_tag (per_cluster_io_rsp_tag), + .in_mem_rsp_ready (per_cluster_io_rsp_ready), + + // output request + .out_mem_req_valid (io_req_valid), + .out_mem_req_rw (io_req_rw), + .out_mem_req_byteen (io_req_byteen), + .out_mem_req_addr (io_req_addr), + .out_mem_req_data (io_req_data), + .out_mem_req_tag (io_req_tag), + .out_mem_req_ready (io_req_ready), + + // output response + .out_mem_rsp_valid (io_rsp_valid), + .out_mem_rsp_tag (io_rsp_tag), + .out_mem_rsp_data (io_rsp_data), + .out_mem_rsp_ready (io_rsp_ready) + ); assign busy = (| per_cluster_busy); assign ebreak = (& per_cluster_ebreak); // L3 Cache /////////////////////////////////////////////////////////// - wire[`L3NUM_REQUESTS-1:0] l3_core_req_valid; - wire[`L3NUM_REQUESTS-1:0] l3_core_req_rw; - wire[`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen; - wire[`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr; - wire[`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data; - wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag; + wire [`L3NUM_REQUESTS-1:0] l3_core_req_valid; + wire [`L3NUM_REQUESTS-1:0] l3_core_req_rw; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] l3_core_req_byteen; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_core_req_addr; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_req_data; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_req_tag; - wire[`L3NUM_REQUESTS-1:0] l3_core_rsp_valid; - wire[`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data; - wire[`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag; - wire l3_core_rsp_ready; + wire [`L3NUM_REQUESTS-1:0] l3_core_rsp_valid; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_LINE_WIDTH-1:0] l3_core_rsp_data; + wire [`L3NUM_REQUESTS-1:0][`L2DRAM_TAG_WIDTH-1:0] l3_core_rsp_tag; + wire l3_core_rsp_ready; - wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid; - wire[`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr; - wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_invalidate; - wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag; - wire[`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready; + wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_valid; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] l3_snp_fwdout_addr; + wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_invalidate; + wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdout_tag; + wire [`NUM_CLUSTERS-1:0] l3_snp_fwdout_ready; - wire[`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid; - wire[`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag; - wire[`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready; + wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_valid; + wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag; + wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready; for (i = 0; i < `L3NUM_REQUESTS; i++) begin // Core Request - assign l3_core_req_valid [i] = per_cluster_dram_req_valid [i]; - assign l3_core_req_rw [i] = per_cluster_dram_req_rw [i]; - assign l3_core_req_byteen[i] = per_cluster_dram_req_byteen[i]; - assign l3_core_req_addr [i] = per_cluster_dram_req_addr [i]; - assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i]; - assign l3_core_req_data [i] = per_cluster_dram_req_data [i]; + assign l3_core_req_valid [i] = per_cluster_dram_req_valid [i]; + assign l3_core_req_rw [i] = per_cluster_dram_req_rw [i]; + assign l3_core_req_byteen [i] = per_cluster_dram_req_byteen[i]; + assign l3_core_req_addr [i] = per_cluster_dram_req_addr [i]; + assign l3_core_req_tag [i] = per_cluster_dram_req_tag [i]; + assign l3_core_req_data [i] = per_cluster_dram_req_data [i]; // Core Response assign per_cluster_dram_rsp_valid [i] = l3_core_rsp_valid [i] && l3_core_rsp_ready; @@ -268,31 +301,31 @@ module Vortex_Socket ( assign l3_core_rsp_ready = (& per_cluster_dram_rsp_ready); VX_cache #( - .CACHE_ID (0), - .CACHE_SIZE (`L3CACHE_SIZE), - .BANK_LINE_SIZE (`L3BANK_LINE_SIZE), - .NUM_BANKS (`L3NUM_BANKS), - .WORD_SIZE (`L3WORD_SIZE), - .NUM_REQUESTS (`L3NUM_REQUESTS), - .STAGE_1_CYCLES (`L3STAGE_1_CYCLES), - .CREQ_SIZE (`L3CREQ_SIZE), - .MRVQ_SIZE (`L3MRVQ_SIZE), - .DFPQ_SIZE (`L3DFPQ_SIZE), - .SNRQ_SIZE (`L3SNRQ_SIZE), - .CWBQ_SIZE (`L3CWBQ_SIZE), - .DWBQ_SIZE (`L3DWBQ_SIZE), - .DFQQ_SIZE (`L3DFQQ_SIZE), - .PRFQ_SIZE (`L3PRFQ_SIZE), - .PRFQ_STRIDE (`L3PRFQ_STRIDE), - .DRAM_ENABLE (1), - .WRITE_ENABLE (1), - .SNOOP_FORWARDING (1), - .CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH), - .CORE_TAG_ID_BITS (0), - .DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH), - .NUM_SNP_REQUESTS (`NUM_CLUSTERS), - .SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH), - .SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH) + .CACHE_ID (0), + .CACHE_SIZE (`L3CACHE_SIZE), + .BANK_LINE_SIZE (`L3BANK_LINE_SIZE), + .NUM_BANKS (`L3NUM_BANKS), + .WORD_SIZE (`L3WORD_SIZE), + .NUM_REQUESTS (`L3NUM_REQUESTS), + .STAGE_1_CYCLES (`L3STAGE_1_CYCLES), + .CREQ_SIZE (`L3CREQ_SIZE), + .MRVQ_SIZE (`L3MRVQ_SIZE), + .DFPQ_SIZE (`L3DFPQ_SIZE), + .SNRQ_SIZE (`L3SNRQ_SIZE), + .CWBQ_SIZE (`L3CWBQ_SIZE), + .DWBQ_SIZE (`L3DWBQ_SIZE), + .DFQQ_SIZE (`L3DFQQ_SIZE), + .PRFQ_SIZE (`L3PRFQ_SIZE), + .PRFQ_STRIDE (`L3PRFQ_STRIDE), + .DRAM_ENABLE (1), + .WRITE_ENABLE (1), + .SNOOP_FORWARDING (1), + .CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH), + .CORE_TAG_ID_BITS (0), + .DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH), + .NUM_SNP_REQUESTS (`NUM_CLUSTERS), + .SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH), + .SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH) ) gpu_l3cache ( .clk (clk), .reset (reset),