From 83a1695c73dec057cd5f1ce9e590d9d104c39d83 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Tue, 30 Jun 2020 18:14:06 -0700 Subject: [PATCH] OPAE CSR access --- driver/include/vortex.h | 4 +- driver/opae/vortex.cpp | 16 ++++--- driver/rtlsim/Makefile | 2 +- hw/opae/sources.txt | 4 ++ hw/opae/vortex_afu.json | 7 +-- hw/opae/vortex_afu.sv | 44 ++++++++++++++---- hw/opae/vortex_afu.vh | 7 +-- hw/rtl/VX_back_end.v | 35 +++++++------- hw/rtl/VX_cluster.v | 11 ++--- hw/rtl/VX_core.v | 50 +++++++------------- hw/rtl/VX_csr_arb.v | 68 +++++++++++++--------------- hw/rtl/VX_csr_io_arb.v | 35 +++++++------- hw/rtl/VX_define.vh | 1 + hw/rtl/VX_pipeline.v | 36 ++++++++++++--- hw/rtl/Vortex.v | 20 ++++---- hw/rtl/interfaces/VX_csr_io_req_if.v | 16 +++++++ hw/rtl/interfaces/VX_csr_io_rsp_if.v | 14 ++++++ hw/rtl/interfaces/VX_csr_req_if.v | 5 +- hw/rtl/interfaces/VX_wb_if.v | 6 +-- 19 files changed, 224 insertions(+), 157 deletions(-) create mode 100644 hw/rtl/interfaces/VX_csr_io_req_if.v create mode 100644 hw/rtl/interfaces/VX_csr_io_rsp_if.v diff --git a/driver/include/vortex.h b/driver/include/vortex.h index ded648db..d2a00a3f 100644 --- a/driver/include/vortex.h +++ b/driver/include/vortex.h @@ -58,10 +58,10 @@ int vx_start(vx_device_h hdevice); int vx_ready_wait(vx_device_h hdevice, long long timeout); // set device constant registers -int vx_csr_set(vx_device_h hdevice, int address, int value); +int vx_csr_set(vx_device_h hdevice, int core, int address, int value); // get device constant registers -int vx_csr_get(vx_device_h hdevice, int address, int* value); +int vx_csr_get(vx_device_h hdevice, int core, int address, int* value); ////////////////////////////// UTILITY FUNCIONS /////////////////////////////// diff --git a/driver/opae/vortex.cpp b/driver/opae/vortex.cpp index abb9c5b1..baac1204 100755 --- a/driver/opae/vortex.cpp +++ b/driver/opae/vortex.cpp @@ -43,6 +43,7 @@ #define MMIO_MEM_ADDR (AFU_IMAGE_MMIO_MEM_ADDR * 4) #define MMIO_DATA_SIZE (AFU_IMAGE_MMIO_DATA_SIZE * 4) #define MMIO_STATUS (AFU_IMAGE_MMIO_STATUS * 4) +#define MMIO_CSR_CORE (AFU_IMAGE_MMIO_CSR_CORE * 4) #define MMIO_CSR_ADDR (AFU_IMAGE_MMIO_CSR_ADDR * 4) #define MMIO_CSR_DATA (AFU_IMAGE_MMIO_CSR_DATA * 4) #define MMIO_CSR_READ (AFU_IMAGE_MMIO_CSR_READ * 4) @@ -172,10 +173,10 @@ extern int vx_dev_open(vx_device_h* hdevice) { { // Load device CAPS int ret = 0; - ret |= vx_csr_get(device, CSR_IMPL_ID, &device->implementation_id); - ret |= vx_csr_get(device, CSR_NC, &device->num_cores); - ret |= vx_csr_get(device, CSR_NW, &device->num_warps); - ret |= vx_csr_get(device, CSR_NT, &device->num_threads); + ret |= vx_csr_get(device, 0, CSR_IMPL_ID, &device->implementation_id); + ret |= vx_csr_get(device, 0, CSR_NC, &device->num_cores); + ret |= vx_csr_get(device, 0, CSR_NW, &device->num_warps); + ret |= vx_csr_get(device, 0, CSR_NT, &device->num_threads); if (ret != 0) { fpgaClose(accel_handle); return ret; @@ -467,7 +468,7 @@ extern int vx_start(vx_device_h hdevice) { } // set device constant registers -extern int vx_csr_set(vx_device_h hdevice, int address, int value) { +extern int vx_csr_set(vx_device_h hdevice, int core, int address, int value) { if (nullptr == hdevice) return -1; @@ -478,6 +479,7 @@ extern int vx_csr_set(vx_device_h hdevice, int address, int value) { return -1; // write CSR value + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_DATA, value)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_WRITE)); @@ -486,7 +488,7 @@ extern int vx_csr_set(vx_device_h hdevice, int address, int value) { } // get device constant registers -extern int vx_csr_get(vx_device_h hdevice, int address, int* value) { +extern int vx_csr_get(vx_device_h hdevice, int core, int address, int* value) { if (nullptr == hdevice || nullptr == value) return -1; @@ -496,7 +498,9 @@ extern int vx_csr_get(vx_device_h hdevice, int address, int* value) { if (vx_ready_wait(hdevice, -1) != 0) return -1; + // write CSR value + CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_CORE, core)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CSR_ADDR, address)); CHECK_RES(fpgaWriteMMIO64(device->fpga, 0, MMIO_CMD_TYPE, CMD_CSR_READ)); diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 19b39484..82ab979b 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -21,7 +21,7 @@ DBG_FLAGS += -DDBG_CORE_REQ_INFO #CONFIGS += -DNUM_CLUSTERS=1 -DNUM_CORES=2 #DEBUG=1 -AFU=1 +#AFU=1 CFLAGS += -fPIC diff --git a/hw/opae/sources.txt b/hw/opae/sources.txt index 0263e558..e8d0480c 100644 --- a/hw/opae/sources.txt +++ b/hw/opae/sources.txt @@ -48,6 +48,8 @@ QI:vortex_afu.qsf ../rtl/interfaces/VX_cache_snp_req_if.v ../rtl/interfaces/VX_cache_snp_rsp_if.v ../rtl/interfaces/VX_csr_req_if.v +../rtl/interfaces/VX_csr_io_req_if.v +../rtl/interfaces/VX_csr_io_rsp_if.v ../rtl/interfaces/VX_exec_unit_req_if.v ../rtl/interfaces/VX_backend_req_if.v ../rtl/interfaces/VX_gpr_read_if.v @@ -90,6 +92,8 @@ QI:vortex_afu.qsf ../rtl/VX_writeback.v ../rtl/VX_csr_pipe.v ../rtl/VX_csr_data.v +../rtl/VX_csr_arb.v +../rtl/VX_csr_io_arb.v ../rtl/VX_warp_sched.v ../rtl/VX_gpr_ram.v ../rtl/VX_gpr_stage.v diff --git a/hw/opae/vortex_afu.json b/hw/opae/vortex_afu.json index f198383c..fb1e908c 100644 --- a/hw/opae/vortex_afu.json +++ b/hw/opae/vortex_afu.json @@ -19,9 +19,10 @@ "mmio-status": 18, "mmio-scope-read": 20, "mmio-scope-write": 22, - "mmio-csr-addr": 24, - "mmio-csr-data": 26, - "mmio-csr-read": 28, + "mmio-csr-core": 24, + "mmio-csr-addr": 26, + "mmio-csr-data": 28, + "mmio-csr-read": 30, "afu-top-interface": { diff --git a/hw/opae/vortex_afu.sv b/hw/opae/vortex_afu.sv index b57a3bc9..89ee968a 100644 --- a/hw/opae/vortex_afu.sv +++ b/hw/opae/vortex_afu.sv @@ -69,6 +69,7 @@ localparam MMIO_STATUS = `AFU_IMAGE_MMIO_STATUS; localparam MMIO_SCOPE_READ = `AFU_IMAGE_MMIO_SCOPE_READ; localparam MMIO_SCOPE_WRITE = `AFU_IMAGE_MMIO_SCOPE_WRITE; +localparam MMIO_CSR_CORE = `AFU_IMAGE_MMIO_CSR_CORE; localparam MMIO_CSR_ADDR = `AFU_IMAGE_MMIO_CSR_ADDR; localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA; localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ; @@ -123,7 +124,7 @@ logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag; logic vx_snp_rsp_ready; logic vx_csr_io_req_valid; -logic [`NC_BITS-1:0] vx_csr_io_req_coreid; +logic [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid; logic [11:0] vx_csr_io_req_addr; logic vx_csr_io_req_rw; logic [31:0] vx_csr_io_req_data; @@ -167,6 +168,7 @@ logic cmd_scope_read; logic cmd_scope_write; `endif +logic [`VX_CSR_ID_WIDTH-1:0] cmd_csr_core; logic [11:0] cmd_csr_addr; logic [31:0] cmd_csr_rdata; logic [31:0] cmd_csr_wdata; @@ -238,6 +240,12 @@ begin `endif end `endif + MMIO_CSR_CORE: begin + cmd_csr_core <= $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data); + `ifdef DBG_PRINT_OPAE + $display("%t: MMIO_CSR_CORE: %0h", $time, $bits(cmd_csr_core)'(cp2af_sRxPort.c0.data)); + `endif + end MMIO_CSR_ADDR: begin cmd_csr_addr <= $bits(cmd_csr_addr)'(cp2af_sRxPort.c0.data); `ifdef DBG_PRINT_OPAE @@ -306,8 +314,7 @@ end logic cmd_read_done; logic cmd_write_done; logic cmd_clflush_done; -logic cmd_csr_read_done; -logic cmd_csr_write_done; +logic cmd_csr_done; logic cmd_run_done; always_ff @(posedge clk) @@ -395,13 +402,13 @@ begin end STATE_CSR_READ: begin - if (cmd_csr_read_done) begin + if (cmd_csr_done) begin state <= STATE_IDLE; end end STATE_CSR_WRITE: begin - if (cmd_csr_write_done) begin + if (cmd_csr_done) begin state <= STATE_IDLE; end end @@ -865,8 +872,11 @@ end // CSRs/////////////////////////////////////////////////////////////////////// -assign vx_csr_io_req_valid = (STATE_CSR_READ == state || STATE_CSR_WRITE == state); -assign vx_csr_io_req_coreid = 0; +logic csr_io_req_sent; + +assign vx_csr_io_req_valid = !csr_io_req_sent + && ((STATE_CSR_READ == state || STATE_CSR_WRITE == state)); +assign vx_csr_io_req_coreid = cmd_csr_core; assign vx_csr_io_req_rw = (STATE_CSR_WRITE == state); assign vx_csr_io_req_addr = cmd_csr_addr; assign vx_csr_io_req_data = cmd_csr_wdata; @@ -874,8 +884,22 @@ assign vx_csr_io_req_data = cmd_csr_wdata; assign cmd_csr_rdata = vx_csr_io_rsp_data; assign vx_csr_io_rsp_ready = 1; -assign cmd_csr_read_done = vx_csr_io_rsp_valid; -assign cmd_csr_write_done = vx_csr_io_req_ready; +assign cmd_csr_done = (STATE_CSR_WRITE == state) ? vx_csr_io_req_ready : vx_csr_io_rsp_valid; + +always_ff @(posedge clk) +begin + if (SoftReset) begin + csr_io_req_sent <= 0; + end + else begin + if (vx_csr_io_req_valid && vx_csr_io_req_ready) begin + csr_io_req_sent <= 1; + end + if (cmd_csr_done) begin + csr_io_req_sent <= 0; + end + end +end // Vortex ///////////////////////////////////////////////////////////////////// @@ -890,7 +914,7 @@ Vortex #() vortex ( `SCOPE_SIGNALS_BE_BIND .clk (clk), - .reset (vx_reset), + .reset (SoftReset | vx_reset), // DRAM request .dram_req_valid (vx_dram_req_valid), diff --git a/hw/opae/vortex_afu.vh b/hw/opae/vortex_afu.vh index 2b201bfd..910ea3af 100644 --- a/hw/opae/vortex_afu.vh +++ b/hw/opae/vortex_afu.vh @@ -20,9 +20,10 @@ `define AFU_IMAGE_CMD_MEM_WRITE 2 `define AFU_IMAGE_CMD_RUN 3 `define AFU_IMAGE_MMIO_CMD_TYPE 10 -`define AFU_IMAGE_MMIO_CSR_ADDR 24 -`define AFU_IMAGE_MMIO_CSR_DATA 26 -`define AFU_IMAGE_MMIO_CSR_READ 28 +`define AFU_IMAGE_MMIO_CSR_CORE 24 +`define AFU_IMAGE_MMIO_CSR_ADDR 26 +`define AFU_IMAGE_MMIO_CSR_DATA 28 +`define AFU_IMAGE_MMIO_CSR_READ 30 `define AFU_IMAGE_MMIO_DATA_SIZE 16 `define AFU_IMAGE_MMIO_IO_ADDR 12 `define AFU_IMAGE_MMIO_MEM_ADDR 14 diff --git a/hw/rtl/VX_back_end.v b/hw/rtl/VX_back_end.v index 00b118ee..8b284672 100644 --- a/hw/rtl/VX_back_end.v +++ b/hw/rtl/VX_back_end.v @@ -9,9 +9,8 @@ module VX_back_end #( input wire clk, input wire reset, - // IO CSR - VX_csr_req_if io_csr_req, - VX_wb_if io_csr_rsp, + VX_csr_io_req_if csr_io_req_if, + VX_csr_io_rsp_if csr_io_rsp_if, input wire schedule_delay, @@ -104,21 +103,23 @@ module VX_back_end #( .warp_ctl_if (warp_ctl_if) ); - VX_csr_req_if issued_csr_req(); + VX_csr_req_if issued_csr_req_if(); - VX_wb_if csr_pipe_rsp(); + VX_wb_if csr_pipe_rsp_if(); - VX_csr_arb csr_arbiter ( - .clk (clk), - .reset (reset), - .csr_pipe_stall(stall_gpr_csr), - .core_csr_req (csr_req_if), - .io_csr_req (io_csr_req), - .issued_csr_req(issued_csr_req), + VX_csr_arb csr_arb ( + .clk (clk), + .reset (reset), - .csr_pipe_rsp (csr_pipe_rsp), - .csr_wb_if (csr_wb_if), - .csr_io_rsp (io_csr_rsp) + .csr_pipe_stall (stall_gpr_csr), + + .csr_core_req_if (csr_req_if), + .csr_io_req_if (csr_io_req_if), + .issued_csr_req_if(issued_csr_req_if), + + .csr_pipe_rsp_if (csr_pipe_rsp_if), + .csr_wb_if (csr_wb_if), + .csr_io_rsp_if (csr_io_rsp_if) ); VX_csr_pipe #( @@ -127,9 +128,9 @@ module VX_back_end #( .clk (clk), .reset (reset), .no_slot_csr (no_slot_csr), - .csr_req_if (issued_csr_req), + .csr_req_if (issued_csr_req_if), .writeback_if (writeback_if), - .csr_wb_if (csr_pipe_rsp), + .csr_wb_if (csr_pipe_rsp_if), .stall_gpr_csr (stall_gpr_csr) ); diff --git a/hw/rtl/VX_cluster.v b/hw/rtl/VX_cluster.v index 9264a7a5..2633bf7d 100644 --- a/hw/rtl/VX_cluster.v +++ b/hw/rtl/VX_cluster.v @@ -123,7 +123,6 @@ module VX_cluster #( wire [`NUM_CORES-1:0] per_core_io_rsp_ready; wire [`NUM_CORES-1:0] per_core_csr_io_req_valid; - wire [`NUM_CORES-1:0][`NC_BITS-1:0] per_core_csr_io_req_coreid; wire [`NUM_CORES-1:0][11:0] per_core_csr_io_req_addr; wire [`NUM_CORES-1:0] per_core_csr_io_req_rw; wire [`NUM_CORES-1:0][31:0] per_core_csr_io_req_data; @@ -199,7 +198,7 @@ module VX_cluster #( .io_rsp_tag (per_core_io_rsp_tag [i]), .io_rsp_ready (per_core_io_rsp_ready [i]), - .csr_io_req_valid (per_core_csr_io_req_valid[i] && (per_core_csr_io_req_coreid[i] == `NC_BITS'(i))), + .csr_io_req_valid (per_core_csr_io_req_valid [i]), .csr_io_req_rw (per_core_csr_io_req_rw [i]), .csr_io_req_addr (per_core_csr_io_req_addr [i]), .csr_io_req_data (per_core_csr_io_req_data [i]), @@ -252,7 +251,7 @@ module VX_cluster #( .out_mem_rsp_tag (io_rsp_tag), .out_mem_rsp_data (io_rsp_data), .out_mem_rsp_ready (io_rsp_ready) - ); + ); VX_csr_io_arb #( .NUM_REQUESTS (`NUM_CORES) @@ -260,9 +259,10 @@ module VX_cluster #( .clk (clk), .reset (reset), + .request_id (csr_io_req_coreid), + // input requests - .in_csr_io_req_valid (csr_io_req_valid), - .in_csr_io_req_coreid (csr_io_req_coreid), + .in_csr_io_req_valid (csr_io_req_valid), .in_csr_io_req_addr (csr_io_req_addr), .in_csr_io_req_rw (csr_io_req_rw), .in_csr_io_req_data (csr_io_req_data), @@ -275,7 +275,6 @@ module VX_cluster #( // output request .out_csr_io_req_valid (per_core_csr_io_req_valid), - .out_csr_io_req_coreid (per_core_csr_io_req_coreid), .out_csr_io_req_addr (per_core_csr_io_req_addr), .out_csr_io_req_rw (per_core_csr_io_req_rw), .out_csr_io_req_data (per_core_csr_io_req_data), diff --git a/hw/rtl/VX_core.v b/hw/rtl/VX_core.v index 6ec07382..7d6dc7a1 100644 --- a/hw/rtl/VX_core.v +++ b/hw/rtl/VX_core.v @@ -70,46 +70,22 @@ module VX_core #( input wire [`DCORE_TAG_WIDTH-1:0] io_rsp_tag, output wire io_rsp_ready, - // CSR I/O Request + // CSR I/O request input wire csr_io_req_valid, - input wire[11:0] csr_io_req_addr, + input wire [11:0] csr_io_req_addr, input wire csr_io_req_rw, - input wire[31:0] csr_io_req_data, + input wire [31:0] csr_io_req_data, output wire csr_io_req_ready, - // CSR I/O Response + // CSR I/O response output wire csr_io_rsp_valid, - output wire[31:0] csr_io_rsp_data, + output wire [31:0] csr_io_rsp_data, input wire csr_io_rsp_ready, // Status output wire busy, output wire ebreak ); - - `UNUSED_VAR(csr_io_rsp_ready) - - // IO CSR request - VX_csr_req_if io_csr_req(); - wire temp_io_csr_req_valid = csr_io_req_valid; - assign io_csr_req.valid = {`NUM_THREADS{temp_io_csr_req_valid}}; - assign io_csr_req.is_csr = 1'b1; - assign io_csr_req.csr_address = csr_io_req_addr; - assign io_csr_req.alu_op = csr_io_req_rw ? `ALU_CSR_RW : `ALU_CSR_RS; - assign io_csr_req.csr_mask = csr_io_req_rw ? csr_io_req_data : 32'b0; - - VX_wb_if io_csr_rsp(); - assign csr_io_req_ready = io_csr_rsp.is_io; - assign csr_io_rsp_valid = io_csr_rsp.valid[0]; - assign csr_io_rsp_data = io_csr_rsp.data[0]; - -`IGNORE_WARNINGS_BEGIN - wire [4:0] unused_rd = io_csr_rsp.rd; - wire [1:0] unused_wb = io_csr_rsp.wb; - wire [31:0] unused_curr_PC = io_csr_rsp.curr_PC; -`IGNORE_WARNINGS_END - - // Dcache Interfaces VX_cache_dram_req_if #( .DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH), .DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH), @@ -212,10 +188,6 @@ module VX_core #( .clk(clk), .reset(reset), - // IO CSR - .io_csr_req (io_csr_req), - .io_csr_rsp (io_csr_rsp), - // Dcache core request .dcache_req_valid (core_dcache_req_if.core_req_valid), .dcache_req_rw (core_dcache_req_if.core_req_rw), @@ -246,6 +218,18 @@ module VX_core #( .icache_rsp_tag (core_icache_rsp_if.core_rsp_tag), .icache_rsp_ready (core_icache_rsp_if.core_rsp_ready), + // CSR I/O request + .csr_io_req_valid (csr_io_req_valid), + .csr_io_req_rw (csr_io_req_rw), + .csr_io_req_addr (csr_io_req_addr), + .csr_io_req_data (csr_io_req_data), + .csr_io_req_ready (csr_io_req_ready), + + // CSR I/O response + .csr_io_rsp_valid (csr_io_rsp_valid), + .csr_io_rsp_data (csr_io_rsp_data), + .csr_io_rsp_ready (csr_io_rsp_ready), + // Status .busy(busy), .ebreak(ebreak) diff --git a/hw/rtl/VX_csr_arb.v b/hw/rtl/VX_csr_arb.v index 7abe4f9a..5703d9ac 100644 --- a/hw/rtl/VX_csr_arb.v +++ b/hw/rtl/VX_csr_arb.v @@ -3,56 +3,52 @@ module VX_csr_arb ( input wire clk, input wire reset, - - input wire csr_pipe_stall, - - VX_csr_req_if core_csr_req, - VX_csr_req_if io_csr_req, - VX_csr_req_if issued_csr_req, - VX_wb_if csr_pipe_rsp, + input wire csr_pipe_stall, + + VX_csr_req_if csr_core_req_if, + VX_csr_io_req_if csr_io_req_if, + VX_csr_req_if issued_csr_req_if, + + VX_wb_if csr_pipe_rsp_if, VX_wb_if csr_wb_if, - VX_wb_if csr_io_rsp + VX_csr_io_rsp_if csr_io_rsp_if ); `UNUSED_VAR (clk) `UNUSED_VAR (reset) - wire pick_core = (|core_csr_req.valid); + wire pick_core = (| csr_core_req_if.valid); // Which request to pick - assign issued_csr_req.is_io = !pick_core; + assign issued_csr_req_if.is_io = !pick_core; // Mux between core and io - assign issued_csr_req.valid = pick_core ? core_csr_req.valid : io_csr_req.valid; - assign issued_csr_req.is_csr = pick_core ? core_csr_req.is_csr : io_csr_req.is_csr; - assign issued_csr_req.alu_op = pick_core ? core_csr_req.alu_op : io_csr_req.alu_op; - assign issued_csr_req.csr_address = pick_core ? core_csr_req.csr_address : io_csr_req.csr_address; - assign issued_csr_req.csr_mask = pick_core ? core_csr_req.csr_mask : io_csr_req.csr_mask; + assign issued_csr_req_if.valid = pick_core ? csr_core_req_if.valid : {`NUM_THREADS{csr_io_req_if.valid}}; + assign issued_csr_req_if.is_csr = pick_core ? csr_core_req_if.is_csr : 1'b1; + assign issued_csr_req_if.alu_op = pick_core ? csr_core_req_if.alu_op : (csr_io_req_if.rw ? `ALU_CSR_RW : `ALU_CSR_RS); + assign issued_csr_req_if.csr_address = pick_core ? csr_core_req_if.csr_address : csr_io_req_if.addr; + assign issued_csr_req_if.csr_mask = pick_core ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0); + + assign csr_io_req_if.ready = !(csr_pipe_stall || pick_core); // Core arguments - assign issued_csr_req.warp_num = core_csr_req.warp_num; - assign issued_csr_req.rd = core_csr_req.rd; - assign issued_csr_req.wb = core_csr_req.wb; + assign issued_csr_req_if.warp_num = csr_core_req_if.warp_num; + assign issued_csr_req_if.rd = csr_core_req_if.rd; + assign issued_csr_req_if.wb = csr_core_req_if.wb; - // Core Writeback + // Core Writeback + assign csr_wb_if.valid = csr_pipe_rsp_if.valid & {`NUM_THREADS{~csr_pipe_rsp_if.is_io}}; + assign csr_wb_if.data = csr_pipe_rsp_if.data; + assign csr_wb_if.warp_num = csr_pipe_rsp_if.warp_num; + assign csr_wb_if.rd = csr_pipe_rsp_if.rd; + assign csr_wb_if.wb = csr_pipe_rsp_if.wb; + assign csr_wb_if.curr_PC = csr_pipe_rsp_if.curr_PC; - assign csr_wb_if.valid = csr_pipe_rsp.valid & {`NUM_THREADS{~csr_pipe_rsp.is_io}}; - assign csr_wb_if.data = csr_pipe_rsp.data; - assign csr_wb_if.warp_num = csr_pipe_rsp.warp_num; - assign csr_wb_if.rd = csr_pipe_rsp.rd; - assign csr_wb_if.wb = csr_pipe_rsp.wb; - assign csr_wb_if.curr_PC = csr_pipe_rsp.curr_PC; - assign csr_wb_if.is_io = 1'b0; - - // CSR IO WB - - assign csr_io_rsp.valid = csr_pipe_rsp.valid & {`NUM_THREADS{csr_pipe_rsp.is_io}}; - assign csr_io_rsp.data = csr_pipe_rsp.data; - assign csr_io_rsp.warp_num = csr_pipe_rsp.warp_num; - assign csr_io_rsp.rd = csr_pipe_rsp.rd; - assign csr_io_rsp.wb = csr_pipe_rsp.wb; - assign csr_io_rsp.curr_PC = csr_pipe_rsp.curr_PC; - assign csr_io_rsp.is_io = !(csr_pipe_stall || pick_core); + // CSR I/O response + assign csr_io_rsp_if.valid = csr_pipe_rsp_if.valid[0] & csr_pipe_rsp_if.is_io; + assign csr_io_rsp_if.data = csr_pipe_rsp_if.data[0]; + wire x = csr_io_rsp_if.ready; + `UNUSED_VAR(x) endmodule diff --git a/hw/rtl/VX_csr_io_arb.v b/hw/rtl/VX_csr_io_arb.v index 3c5ada5e..bf0d7041 100644 --- a/hw/rtl/VX_csr_io_arb.v +++ b/hw/rtl/VX_csr_io_arb.v @@ -4,12 +4,13 @@ module VX_csr_io_arb #( parameter NUM_REQUESTS = 1, parameter REQS_BITS = `CLOG2(NUM_REQUESTS) ) ( - input wire clk, - input wire reset, + input wire clk, + input wire reset, + + input wire [REQS_BITS-1:0] request_id, // input requests input wire in_csr_io_req_valid, - input wire [`NC_BITS-1:0] in_csr_io_req_coreid, input wire [11:0] in_csr_io_req_addr, input wire in_csr_io_req_rw, input wire [31:0] in_csr_io_req_data, @@ -22,7 +23,6 @@ module VX_csr_io_arb #( // output request output wire [NUM_REQUESTS-1:0] out_csr_io_req_valid, - output wire [NUM_REQUESTS-1:0][`NC_BITS-1:0] out_csr_io_req_coreid, output wire [NUM_REQUESTS-1:0][11:0] out_csr_io_req_addr, output wire [NUM_REQUESTS-1:0] out_csr_io_req_rw, output wire [NUM_REQUESTS-1:0][31:0] out_csr_io_req_data, @@ -39,7 +39,6 @@ module VX_csr_io_arb #( `UNUSED_VAR (reset) assign out_csr_io_req_valid = in_csr_io_req_valid; - assign out_csr_io_req_coreid = in_csr_io_req_coreid; assign out_csr_io_req_rw = in_csr_io_req_rw; assign out_csr_io_req_addr = in_csr_io_req_addr; assign out_csr_io_req_data = in_csr_io_req_data; @@ -51,6 +50,17 @@ module VX_csr_io_arb #( end else begin + genvar i; + + for (i = 0; i < NUM_REQUESTS; i++) begin + assign out_csr_io_req_valid[i] = in_csr_io_req_valid && (request_id == `REQS_BITS'(i)); + assign out_csr_io_req_rw[i] = in_csr_io_req_rw; + assign out_csr_io_req_addr[i] = in_csr_io_req_addr; + assign out_csr_io_req_data[i] = in_csr_io_req_data; + end + + assign in_csr_io_req_ready = out_csr_io_req_ready[request_id]; + reg [REQS_BITS-1:0] bus_rsp_sel; VX_fixed_arbiter #( @@ -65,20 +75,11 @@ module VX_csr_io_arb #( ); assign out_csr_io_rsp_valid = in_csr_io_rsp_valid [bus_rsp_sel]; - assign out_csr_io_rsp_data = in_csr_io_rsp_data [bus_rsp_sel]; - assign in_csr_io_rsp_ready [bus_rsp_sel] = out_csr_io_rsp_ready; + assign out_csr_io_rsp_data = in_csr_io_rsp_data [bus_rsp_sel]; - genvar i; - - for (i = 0; i < NUM_REQUESTS; i++) begin - assign out_csr_io_req_valid[i] = in_csr_io_req_valid && in_csr_io_req_ready; - assign out_csr_io_req_coreid[i] = in_csr_io_req_coreid; - assign out_csr_io_req_rw[i] = in_csr_io_req_rw; - assign out_csr_io_req_addr[i] = in_csr_io_req_addr; - assign out_csr_io_req_data[i] = in_csr_io_req_data; + for (i = 0; i < NUM_REQUESTS; i++) begin + assign in_csr_io_rsp_ready[i] = out_csr_io_rsp_ready && (bus_rsp_sel == `REQS_BITS'(i)); end - - assign in_csr_io_req_ready = (& out_csr_io_req_ready); end diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 3ddc4dff..21118a76 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -288,6 +288,7 @@ `define VX_DRAM_TAG_WIDTH `L3DRAM_TAG_WIDTH `define VX_SNP_TAG_WIDTH `L3SNP_TAG_WIDTH `define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH +`define VX_CSR_ID_WIDTH `CLOG2(`NUM_CLUSTERS * `NUM_CORES) `define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)} diff --git a/hw/rtl/VX_pipeline.v b/hw/rtl/VX_pipeline.v index 1424e06c..83833f3d 100644 --- a/hw/rtl/VX_pipeline.v +++ b/hw/rtl/VX_pipeline.v @@ -12,10 +12,6 @@ module VX_pipeline #( input wire clk, input wire reset, - // IO CSR - VX_csr_req_if io_csr_req, - VX_wb_if io_csr_rsp, - // Dcache core request output wire [`NUM_THREADS-1:0] dcache_req_valid, output wire [`NUM_THREADS-1:0] dcache_req_rw, @@ -44,7 +40,19 @@ module VX_pipeline #( input wire icache_rsp_valid, input wire [31:0] icache_rsp_data, input wire [`ICORE_TAG_WIDTH-1:0] icache_rsp_tag, - output wire icache_rsp_ready, + output wire icache_rsp_ready, + + // CSR I/O Request + input wire csr_io_req_valid, + input wire[11:0] csr_io_req_addr, + input wire csr_io_req_rw, + input wire[31:0] csr_io_req_data, + output wire csr_io_req_ready, + + // CSR I/O Response + output wire csr_io_rsp_valid, + output wire[31:0] csr_io_rsp_data, + input wire csr_io_rsp_ready, // Status output wire busy, @@ -90,6 +98,20 @@ module VX_pipeline #( .CORE_TAG_ID_BITS(`ICORE_TAG_ID_BITS) ) core_icache_rsp_if(); + + // CSR I/O + VX_csr_io_req_if csr_io_req_if(); + assign csr_io_req_if.valid = csr_io_req_valid; + assign csr_io_req_if.rw = csr_io_req_rw; + assign csr_io_req_if.addr = csr_io_req_addr; + assign csr_io_req_if.data = csr_io_req_data; + assign csr_io_req_ready = csr_io_req_if.ready; + + VX_csr_io_rsp_if csr_io_rsp_if(); + assign csr_io_rsp_valid = csr_io_rsp_if.valid; + assign csr_io_rsp_data = csr_io_rsp_if.data; + assign csr_io_rsp_if.ready = csr_io_rsp_ready; + // Front-end to Back-end VX_backend_req_if bckE_req_if(); @@ -138,8 +160,8 @@ module VX_pipeline #( .clk (clk), .reset (reset), - .io_csr_req (io_csr_req), - .io_csr_rsp (io_csr_rsp), + .csr_io_req_if (csr_io_req_if), + .csr_io_rsp_if (csr_io_rsp_if), .schedule_delay (schedule_delay), .warp_ctl_if (warp_ctl_if), .bckE_req_if (bckE_req_if), diff --git a/hw/rtl/Vortex.v b/hw/rtl/Vortex.v index f3c3c56b..a606c15f 100644 --- a/hw/rtl/Vortex.v +++ b/hw/rtl/Vortex.v @@ -56,7 +56,7 @@ module Vortex ( // CSR I/O Request input wire csr_io_req_valid, - input wire [`NC_BITS-1:0] csr_io_req_coreid, + input wire [`VX_CSR_ID_WIDTH-1:0] csr_io_req_coreid, input wire [11:0] csr_io_req_addr, input wire csr_io_req_rw, input wire [31:0] csr_io_req_data, @@ -74,7 +74,7 @@ module Vortex ( if (`NUM_CLUSTERS == 1) begin VX_cluster #( - .CLUSTER_ID(`L3CACHE_ID) + .CLUSTER_ID(0) ) cluster ( `SCOPE_SIGNALS_ISTAGE_BIND `SCOPE_SIGNALS_LSU_BIND @@ -148,8 +148,8 @@ module Vortex ( wire l3_core_req_ready; wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid; - wire [`NUM_CLUSTERS-1:0][`L3DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data; - wire [`NUM_CLUSTERS-1:0][`L3DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data; + wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag; wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready; wire [`NUM_CLUSTERS-1:0] per_cluster_snp_req_valid; @@ -176,7 +176,6 @@ module Vortex ( wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready; wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid; - wire [`NUM_CLUSTERS-1:0][`NC_BITS-1:0] per_cluster_csr_io_req_coreid; wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr; wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw; wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_csr_io_req_data; @@ -189,6 +188,9 @@ module Vortex ( wire [`NUM_CLUSTERS-1:0] per_cluster_busy; wire [`NUM_CLUSTERS-1:0] per_cluster_ebreak; + wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS)); + wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid); + genvar i; for (i = 0; i < `NUM_CLUSTERS; i++) begin VX_cluster #( @@ -241,7 +243,7 @@ module Vortex ( .io_rsp_ready (per_cluster_io_rsp_ready [i]), .csr_io_req_valid (per_cluster_csr_io_req_valid[i]), - .csr_io_req_coreid (per_cluster_csr_io_req_coreid[i]), + .csr_io_req_coreid (per_cluster_csr_io_req_coreid), .csr_io_req_rw (per_cluster_csr_io_req_rw [i]), .csr_io_req_addr (per_cluster_csr_io_req_addr[i]), .csr_io_req_data (per_cluster_csr_io_req_data[i]), @@ -302,9 +304,10 @@ module Vortex ( .clk (clk), .reset (reset), + .request_id (csr_io_request_id), + // input requests - .in_csr_io_req_valid (csr_io_req_valid), - .in_csr_io_req_coreid (csr_io_req_coreid), + .in_csr_io_req_valid (csr_io_req_valid), .in_csr_io_req_addr (csr_io_req_addr), .in_csr_io_req_rw (csr_io_req_rw), .in_csr_io_req_data (csr_io_req_data), @@ -317,7 +320,6 @@ module Vortex ( // output request .out_csr_io_req_valid (per_cluster_csr_io_req_valid), - .out_csr_io_req_coreid (per_cluster_csr_io_req_coreid), .out_csr_io_req_addr (per_cluster_csr_io_req_addr), .out_csr_io_req_rw (per_cluster_csr_io_req_rw), .out_csr_io_req_data (per_cluster_csr_io_req_data), diff --git a/hw/rtl/interfaces/VX_csr_io_req_if.v b/hw/rtl/interfaces/VX_csr_io_req_if.v new file mode 100644 index 00000000..ce8d2fed --- /dev/null +++ b/hw/rtl/interfaces/VX_csr_io_req_if.v @@ -0,0 +1,16 @@ +`ifndef VX_CSR_IO_REQ_IF +`define VX_CSR_IO_REQ_IF + +`include "VX_define.vh" + +interface VX_csr_io_req_if (); + + wire valid; + wire rw; + wire [11:0] addr; + wire [31:0] data; + wire ready; + +endinterface + +`endif diff --git a/hw/rtl/interfaces/VX_csr_io_rsp_if.v b/hw/rtl/interfaces/VX_csr_io_rsp_if.v new file mode 100644 index 00000000..7c4c8f6d --- /dev/null +++ b/hw/rtl/interfaces/VX_csr_io_rsp_if.v @@ -0,0 +1,14 @@ +`ifndef VX_CSR_IO_RSP_IF +`define VX_CSR_IO_RSP_IF + +`include "VX_define.vh" + +interface VX_csr_io_rsp_if (); + + wire valid; + wire [31:0] data; + wire ready; + +endinterface + +`endif diff --git a/hw/rtl/interfaces/VX_csr_req_if.v b/hw/rtl/interfaces/VX_csr_req_if.v index 902f3833..a661e38f 100644 --- a/hw/rtl/interfaces/VX_csr_req_if.v +++ b/hw/rtl/interfaces/VX_csr_req_if.v @@ -15,9 +15,8 @@ interface VX_csr_req_if (); wire csr_immed; wire [31:0] csr_mask; -`IGNORE_WARNINGS_BEGIN - wire is_io; -`IGNORE_WARNINGS_END + wire is_io; + endinterface `endif diff --git a/hw/rtl/interfaces/VX_wb_if.v b/hw/rtl/interfaces/VX_wb_if.v index c9e57085..96b8acef 100644 --- a/hw/rtl/interfaces/VX_wb_if.v +++ b/hw/rtl/interfaces/VX_wb_if.v @@ -10,11 +10,9 @@ interface VX_wb_if (); wire [`NW_BITS-1:0] warp_num; wire [4:0] rd; wire [1:0] wb; - wire [31:0] curr_PC; - -`IGNORE_WARNINGS_BEGIN + wire [31:0] curr_PC; wire is_io; -`IGNORE_WARNINGS_END + endinterface `endif