Merge branch 'master' into graphics

This commit is contained in:
Blaise Tine
2021-05-26 23:33:06 -07:00
120 changed files with 4269 additions and 2329 deletions

View File

@@ -1,9 +1,9 @@
.PHONY: build_config
build_config:
./scripts/gen_config.py --outv ./rtl/VX_user_config.vh --outc ./VX_config.h
build_config: ./rtl/VX_config.vh
./scripts/gen_config.py -i ./rtl/VX_config.vh -o ./VX_config.h
$(MAKE) -C simulate
clean:
rm -f ./rtl/VX_user_config.vh ./VX_config.h
rm -f ./VX_config.h
$(MAKE) -C simulate clean

View File

@@ -9,20 +9,20 @@ module VX_cluster #(
input wire clk,
input wire reset,
// DRAM request
output wire dram_req_valid,
output wire dram_req_rw,
output wire [`L2DRAM_BYTEEN_WIDTH-1:0] dram_req_byteen,
output wire [`L2DRAM_ADDR_WIDTH-1:0] dram_req_addr,
output wire [`L2DRAM_LINE_WIDTH-1:0] dram_req_data,
output wire [`L2DRAM_TAG_WIDTH-1:0] dram_req_tag,
input wire dram_req_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [`L2MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
output wire [`L2MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`L2MEM_LINE_WIDTH-1:0] mem_req_data,
output wire [`L2MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// DRAM response
input wire dram_rsp_valid,
input wire [`L2DRAM_LINE_WIDTH-1:0] dram_rsp_data,
input wire [`L2DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`L2MEM_LINE_WIDTH-1:0] mem_rsp_data,
input wire [`L2MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready,
// CSR Request
input wire csr_req_valid,
@@ -42,31 +42,31 @@ module VX_cluster #(
output wire ebreak
);
wire [`NUM_CORES-1:0] per_core_dram_req_valid;
wire [`NUM_CORES-1:0] per_core_dram_req_rw;
wire [`NUM_CORES-1:0][`DDRAM_BYTEEN_WIDTH-1:0] per_core_dram_req_byteen;
wire [`NUM_CORES-1:0][`DDRAM_ADDR_WIDTH-1:0] per_core_dram_req_addr;
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_dram_req_data;
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] per_core_dram_req_tag;
wire [`NUM_CORES-1:0] per_core_dram_req_ready;
wire [`NUM_CORES-1:0] per_core_mem_req_valid;
wire [`NUM_CORES-1:0] per_core_mem_req_rw;
wire [`NUM_CORES-1:0][`DMEM_BYTEEN_WIDTH-1:0] per_core_mem_req_byteen;
wire [`NUM_CORES-1:0][`DMEM_ADDR_WIDTH-1:0] per_core_mem_req_addr;
wire [`NUM_CORES-1:0][`DMEM_LINE_WIDTH-1:0] per_core_mem_req_data;
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_req_tag;
wire [`NUM_CORES-1:0] per_core_mem_req_ready;
wire [`NUM_CORES-1:0] per_core_dram_rsp_valid;
wire [`NUM_CORES-1:0][`DDRAM_LINE_WIDTH-1:0] per_core_dram_rsp_data;
wire [`NUM_CORES-1:0][`XDRAM_TAG_WIDTH-1:0] per_core_dram_rsp_tag;
wire [`NUM_CORES-1:0] per_core_dram_rsp_ready;
wire [`NUM_CORES-1:0] per_core_mem_rsp_valid;
wire [`NUM_CORES-1:0][`DMEM_LINE_WIDTH-1:0] per_core_mem_rsp_data;
wire [`NUM_CORES-1:0][`XMEM_TAG_WIDTH-1:0] per_core_mem_rsp_tag;
wire [`NUM_CORES-1:0] per_core_mem_rsp_ready;
wire [`NUM_CORES-1:0] per_core_csr_req_valid;
wire [`NUM_CORES-1:0][11:0] per_core_csr_req_addr;
wire [`NUM_CORES-1:0] per_core_csr_req_rw;
wire [`NUM_CORES-1:0][31:0] per_core_csr_req_data;
wire [`NUM_CORES-1:0] per_core_csr_req_ready;
wire [`NUM_CORES-1:0] per_core_csr_req_valid;
wire [`NUM_CORES-1:0][11:0] per_core_csr_req_addr;
wire [`NUM_CORES-1:0] per_core_csr_req_rw;
wire [`NUM_CORES-1:0][31:0] per_core_csr_req_data;
wire [`NUM_CORES-1:0] per_core_csr_req_ready;
wire [`NUM_CORES-1:0] per_core_csr_rsp_valid;
wire [`NUM_CORES-1:0][31:0] per_core_csr_rsp_data;
wire [`NUM_CORES-1:0] per_core_csr_rsp_ready;
wire [`NUM_CORES-1:0] per_core_csr_rsp_valid;
wire [`NUM_CORES-1:0][31:0] per_core_csr_rsp_data;
wire [`NUM_CORES-1:0] per_core_csr_rsp_ready;
wire [`NUM_CORES-1:0] per_core_busy;
wire [`NUM_CORES-1:0] per_core_ebreak;
wire [`NUM_CORES-1:0] per_core_busy;
wire [`NUM_CORES-1:0] per_core_ebreak;
for (genvar i = 0; i < `NUM_CORES; i++) begin
@@ -87,18 +87,18 @@ module VX_cluster #(
.clk (clk),
.reset (core_reset),
.dram_req_valid (per_core_dram_req_valid[i]),
.dram_req_rw (per_core_dram_req_rw [i]),
.dram_req_byteen(per_core_dram_req_byteen[i]),
.dram_req_addr (per_core_dram_req_addr [i]),
.dram_req_data (per_core_dram_req_data [i]),
.dram_req_tag (per_core_dram_req_tag [i]),
.dram_req_ready (per_core_dram_req_ready[i]),
.mem_req_valid (per_core_mem_req_valid[i]),
.mem_req_rw (per_core_mem_req_rw [i]),
.mem_req_byteen (per_core_mem_req_byteen[i]),
.mem_req_addr (per_core_mem_req_addr [i]),
.mem_req_data (per_core_mem_req_data [i]),
.mem_req_tag (per_core_mem_req_tag [i]),
.mem_req_ready (per_core_mem_req_ready[i]),
.dram_rsp_valid (per_core_dram_rsp_valid[i]),
.dram_rsp_data (per_core_dram_rsp_data [i]),
.dram_rsp_tag (per_core_dram_rsp_tag [i]),
.dram_rsp_ready (per_core_dram_rsp_ready[i]),
.mem_rsp_valid (per_core_mem_rsp_valid[i]),
.mem_rsp_data (per_core_mem_rsp_data [i]),
.mem_rsp_tag (per_core_mem_rsp_tag [i]),
.mem_rsp_ready (per_core_mem_rsp_ready[i]),
.csr_req_valid (per_core_csr_req_valid [i]),
.csr_req_rw (per_core_csr_req_rw [i]),
@@ -169,12 +169,12 @@ module VX_cluster #(
.NUM_REQS (`NUM_CORES),
.CREQ_SIZE (`L2CREQ_SIZE),
.MSHR_SIZE (`L2MSHR_SIZE),
.DRSQ_SIZE (`L2DRSQ_SIZE),
.DREQ_SIZE (`L2DREQ_SIZE),
.MRSQ_SIZE (`L2MRSQ_SIZE),
.MREQ_SIZE (`L2MREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`XDRAM_TAG_WIDTH),
.CORE_TAG_WIDTH (`XMEM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH)
.MEM_TAG_WIDTH (`L2MEM_TAG_WIDTH)
) l2cache (
`SCOPE_BIND_VX_cluster_l2cache
@@ -188,78 +188,78 @@ module VX_cluster #(
`endif
// Core request
.core_req_valid (per_core_dram_req_valid),
.core_req_rw (per_core_dram_req_rw),
.core_req_byteen (per_core_dram_req_byteen),
.core_req_addr (per_core_dram_req_addr),
.core_req_data (per_core_dram_req_data),
.core_req_tag (per_core_dram_req_tag),
.core_req_ready (per_core_dram_req_ready),
.core_req_valid (per_core_mem_req_valid),
.core_req_rw (per_core_mem_req_rw),
.core_req_byteen (per_core_mem_req_byteen),
.core_req_addr (per_core_mem_req_addr),
.core_req_data (per_core_mem_req_data),
.core_req_tag (per_core_mem_req_tag),
.core_req_ready (per_core_mem_req_ready),
// Core response
.core_rsp_valid (per_core_dram_rsp_valid),
.core_rsp_data (per_core_dram_rsp_data),
.core_rsp_tag (per_core_dram_rsp_tag),
.core_rsp_ready (per_core_dram_rsp_ready),
.core_rsp_valid (per_core_mem_rsp_valid),
.core_rsp_data (per_core_mem_rsp_data),
.core_rsp_tag (per_core_mem_rsp_tag),
.core_rsp_ready (per_core_mem_rsp_ready),
// DRAM request
.dram_req_valid (dram_req_valid),
.dram_req_rw (dram_req_rw),
.dram_req_byteen (dram_req_byteen),
.dram_req_addr (dram_req_addr),
.dram_req_data (dram_req_data),
.dram_req_tag (dram_req_tag),
.dram_req_ready (dram_req_ready),
// Memory request
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
// DRAM response
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_ready (dram_rsp_ready)
// Memory response
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_ready (mem_rsp_ready)
);
end else begin
VX_mem_arb #(
.NUM_REQS (`NUM_CORES),
.DATA_WIDTH (`L2DRAM_LINE_WIDTH),
.TAG_IN_WIDTH (`XDRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L2DRAM_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) dram_arb (
.NUM_REQS (`NUM_CORES),
.DATA_WIDTH (`L2MEM_LINE_WIDTH),
.TAG_IN_WIDTH (`XMEM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L2MEM_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) mem_arb (
.clk (clk),
.reset (reset),
// Core request
.req_valid_in (per_core_dram_req_valid),
.req_rw_in (per_core_dram_req_rw),
.req_byteen_in (per_core_dram_req_byteen),
.req_addr_in (per_core_dram_req_addr),
.req_data_in (per_core_dram_req_data),
.req_tag_in (per_core_dram_req_tag),
.req_ready_in (per_core_dram_req_ready),
.req_valid_in (per_core_mem_req_valid),
.req_rw_in (per_core_mem_req_rw),
.req_byteen_in (per_core_mem_req_byteen),
.req_addr_in (per_core_mem_req_addr),
.req_data_in (per_core_mem_req_data),
.req_tag_in (per_core_mem_req_tag),
.req_ready_in (per_core_mem_req_ready),
// DRAM request
.req_valid_out (dram_req_valid),
.req_rw_out (dram_req_rw),
.req_byteen_out (dram_req_byteen),
.req_addr_out (dram_req_addr),
.req_data_out (dram_req_data),
.req_tag_out (dram_req_tag),
.req_ready_out (dram_req_ready),
// Memory request
.req_valid_out (mem_req_valid),
.req_rw_out (mem_req_rw),
.req_byteen_out (mem_req_byteen),
.req_addr_out (mem_req_addr),
.req_data_out (mem_req_data),
.req_tag_out (mem_req_tag),
.req_ready_out (mem_req_ready),
// Core response
.rsp_valid_out (per_core_dram_rsp_valid),
.rsp_data_out (per_core_dram_rsp_data),
.rsp_tag_out (per_core_dram_rsp_tag),
.rsp_ready_out (per_core_dram_rsp_ready),
.rsp_valid_out (per_core_mem_rsp_valid),
.rsp_data_out (per_core_mem_rsp_data),
.rsp_tag_out (per_core_mem_rsp_tag),
.rsp_ready_out (per_core_mem_rsp_ready),
// DRAM response
.rsp_valid_in (dram_rsp_valid),
.rsp_tag_in (dram_rsp_tag),
.rsp_data_in (dram_rsp_data),
.rsp_ready_in (dram_rsp_ready)
// Memory response
.rsp_valid_in (mem_rsp_valid),
.rsp_tag_in (mem_rsp_tag),
.rsp_data_in (mem_rsp_data),
.rsp_ready_in (mem_rsp_ready)
);
end

View File

@@ -1,8 +1,6 @@
`ifndef VX_CONFIG
`define VX_CONFIG
`include "VX_user_config.vh"
`ifndef NUM_CLUSTERS
`define NUM_CLUSTERS 1
`endif
@@ -35,8 +33,8 @@
`define SM_ENABLE 1
`endif
`ifndef GLOBAL_BLOCK_SIZE
`define GLOBAL_BLOCK_SIZE 64
`ifndef MEM_BLOCK_SIZE
`define MEM_BLOCK_SIZE 64
`endif
`ifndef L1_BLOCK_SIZE
@@ -209,14 +207,14 @@
`define CSR_MPM_SMEM_BANK_ST 12'hB18 // bank conflicts stalls
`define CSR_MPM_SMEM_BANK_ST_H 12'hB98
// PERF: memory
`define CSR_MPM_DRAM_READS 12'hB19 // dram reads
`define CSR_MPM_DRAM_READS_H 12'hB99
`define CSR_MPM_DRAM_WRITES 12'hB1A // dram writes
`define CSR_MPM_DRAM_WRITES_H 12'hB9A
`define CSR_MPM_DRAM_ST 12'hB1B // dram request stalls
`define CSR_MPM_DRAM_ST_H 12'hB9B
`define CSR_MPM_DRAM_LAT 12'hB1C // dram latency (total)
`define CSR_MPM_DRAM_LAT_H 12'hB9C
`define CSR_MPM_MEM_READS 12'hB19 // memory reads
`define CSR_MPM_MEM_READS_H 12'hB99
`define CSR_MPM_MEM_WRITES 12'hB1A // memory writes
`define CSR_MPM_MEM_WRITES_H 12'hB9A
`define CSR_MPM_MEM_ST 12'hB1B // memory request stalls
`define CSR_MPM_MEM_ST_H 12'hB9B
`define CSR_MPM_MEM_LAT 12'hB1C // memory latency (total)
`define CSR_MPM_MEM_LAT_H 12'hB9C
// Machine Information Registers
`define CSR_MVENDORID 12'hF11
@@ -281,14 +279,14 @@
`define IMSHR_SIZE `NUM_WARPS
`endif
// DRAM Request Queue Size
`ifndef IDREQ_SIZE
`define IDREQ_SIZE 4
// Memory Request Queue Size
`ifndef IMREQ_SIZE
`define IMREQ_SIZE 4
`endif
// DRAM Response Queue Size
`ifndef IDRSQ_SIZE
`define IDRSQ_SIZE 4
// Memory Response Queue Size
`ifndef IMRSQ_SIZE
`define IMRSQ_SIZE 4
`endif
// Dcache Configurable Knobs //////////////////////////////////////////////////
@@ -318,14 +316,14 @@
`define DMSHR_SIZE `LSUQ_SIZE
`endif
// DRAM Request Queue Size
`ifndef DDREQ_SIZE
`define DDREQ_SIZE 4
// Memory Request Queue Size
`ifndef DMREQ_SIZE
`define DMREQ_SIZE 4
`endif
// DRAM Response Queue Size
`ifndef DDRSQ_SIZE
`define DDRSQ_SIZE `MAX(4, (`DNUM_BANKS * 2))
// Memory Response Queue Size
`ifndef DMRSQ_SIZE
`define DMRSQ_SIZE `MAX(4, (`DNUM_BANKS * 2))
`endif
// SM Configurable Knobs //////////////////////////////////////////////////////
@@ -372,14 +370,14 @@
`define L2MSHR_SIZE 16
`endif
// DRAM Request Queue Size
`ifndef L2DREQ_SIZE
`define L2DREQ_SIZE 4
// L2 Request Queue Size
`ifndef L2MREQ_SIZE
`define L2MREQ_SIZE 4
`endif
// DRAM Response Queue Size
`ifndef L2DRSQ_SIZE
`define L2DRSQ_SIZE `MAX(4, (`L2NUM_BANKS * 2))
// L2 Response Queue Size
`ifndef L2MRSQ_SIZE
`define L2MRSQ_SIZE `MAX(4, (`L2NUM_BANKS * 2))
`endif
// L3cache Configurable Knobs /////////////////////////////////////////////////
@@ -404,14 +402,14 @@
`define L3MSHR_SIZE 16
`endif
// DRAM Request Queue Size
`ifndef L3DREQ_SIZE
`define L3DREQ_SIZE 4
// L3 Request Queue Size
`ifndef L3MREQ_SIZE
`define L3MREQ_SIZE 4
`endif
// DRAM Response Queue Size
`ifndef L3DRSQ_SIZE
`define L3DRSQ_SIZE `MAX(4, (`L3NUM_BANKS * 2))
// L3 Response Queue Size
`ifndef L3MRSQ_SIZE
`define L3MRSQ_SIZE `MAX(4, (`L3NUM_BANKS * 2))
`endif
`endif

View File

@@ -9,20 +9,20 @@ module VX_core #(
input wire clk,
input wire reset,
// DRAM request
output wire dram_req_valid,
output wire dram_req_rw,
output wire [`DDRAM_BYTEEN_WIDTH-1:0] dram_req_byteen,
output wire [`DDRAM_ADDR_WIDTH-1:0] dram_req_addr,
output wire [`DDRAM_LINE_WIDTH-1:0] dram_req_data,
output wire [`XDRAM_TAG_WIDTH-1:0] dram_req_tag,
input wire dram_req_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [`DMEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
output wire [`DMEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`DMEM_LINE_WIDTH-1:0] mem_req_data,
output wire [`XMEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// DRAM reponse
input wire dram_rsp_valid,
input wire [`DDRAM_LINE_WIDTH-1:0] dram_rsp_data,
input wire [`XDRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready,
// Memory reponse
input wire mem_rsp_valid,
input wire [`DMEM_LINE_WIDTH-1:0] mem_rsp_data,
input wire [`XMEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready,
// CSR request
input wire csr_req_valid,
@@ -44,29 +44,29 @@ module VX_core #(
VX_perf_memsys_if perf_memsys_if();
`endif
VX_cache_dram_req_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
.DRAM_ADDR_WIDTH(`DDRAM_ADDR_WIDTH),
.DRAM_TAG_WIDTH(`XDRAM_TAG_WIDTH)
) dram_req_if();
VX_cache_mem_req_if #(
.MEM_LINE_WIDTH(`DMEM_LINE_WIDTH),
.MEM_ADDR_WIDTH(`DMEM_ADDR_WIDTH),
.MEM_TAG_WIDTH(`XMEM_TAG_WIDTH)
) mem_req_if();
VX_cache_dram_rsp_if #(
.DRAM_LINE_WIDTH(`DDRAM_LINE_WIDTH),
.DRAM_TAG_WIDTH(`XDRAM_TAG_WIDTH)
) dram_rsp_if();
VX_cache_mem_rsp_if #(
.MEM_LINE_WIDTH(`DMEM_LINE_WIDTH),
.MEM_TAG_WIDTH(`XMEM_TAG_WIDTH)
) mem_rsp_if();
assign dram_req_valid = dram_req_if.valid;
assign dram_req_rw = dram_req_if.rw;
assign dram_req_byteen= dram_req_if.byteen;
assign dram_req_addr = dram_req_if.addr;
assign dram_req_data = dram_req_if.data;
assign dram_req_tag = dram_req_if.tag;
assign dram_req_if.ready = dram_req_ready;
assign mem_req_valid = mem_req_if.valid;
assign mem_req_rw = mem_req_if.rw;
assign mem_req_byteen= mem_req_if.byteen;
assign mem_req_addr = mem_req_if.addr;
assign mem_req_data = mem_req_if.data;
assign mem_req_tag = mem_req_if.tag;
assign mem_req_if.ready = mem_req_ready;
assign dram_rsp_if.valid = dram_rsp_valid;
assign dram_rsp_if.data = dram_rsp_data;
assign dram_rsp_if.tag = dram_rsp_tag;
assign dram_rsp_ready = dram_rsp_if.ready;
assign mem_rsp_if.valid = mem_rsp_valid;
assign mem_rsp_if.data = mem_rsp_data;
assign mem_rsp_if.tag = mem_rsp_tag;
assign mem_rsp_ready = mem_rsp_if.ready;
//--
@@ -168,9 +168,9 @@ module VX_core #(
.icache_core_req_if (icache_core_req_if),
.icache_core_rsp_if (icache_core_rsp_if),
// DRAM
.dram_req_if (dram_req_if),
.dram_rsp_if (dram_rsp_if)
// Memory
.mem_req_if (mem_req_if),
.mem_rsp_if (mem_rsp_if)
);
endmodule

View File

@@ -123,61 +123,61 @@ module VX_csr_data #(
`ifdef PERF_ENABLE
// PERF: pipeline
`CSR_MPM_IBUF_ST : read_data_r = perf_pipeline_if.ibf_stalls[31:0];
`CSR_MPM_IBUF_ST_H : read_data_r = 32'(perf_pipeline_if.ibf_stalls[43:32]);
`CSR_MPM_IBUF_ST_H : read_data_r = 32'(perf_pipeline_if.ibf_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SCRB_ST : read_data_r = perf_pipeline_if.scb_stalls[31:0];
`CSR_MPM_SCRB_ST_H : read_data_r = 32'(perf_pipeline_if.scb_stalls[43:32]);
`CSR_MPM_SCRB_ST_H : read_data_r = 32'(perf_pipeline_if.scb_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ALU_ST : read_data_r = perf_pipeline_if.alu_stalls[31:0];
`CSR_MPM_ALU_ST_H : read_data_r = 32'(perf_pipeline_if.alu_stalls[43:32]);
`CSR_MPM_ALU_ST_H : read_data_r = 32'(perf_pipeline_if.alu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_LSU_ST : read_data_r = perf_pipeline_if.lsu_stalls[31:0];
`CSR_MPM_LSU_ST_H : read_data_r = 32'(perf_pipeline_if.lsu_stalls[43:32]);
`CSR_MPM_LSU_ST_H : read_data_r = 32'(perf_pipeline_if.lsu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
`CSR_MPM_CSR_ST_H : read_data_r = 32'(perf_pipeline_if.csr_stalls[43:32]);
`CSR_MPM_CSR_ST_H : read_data_r = 32'(perf_pipeline_if.csr_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
`CSR_MPM_FPU_ST_H : read_data_r = 32'(perf_pipeline_if.fpu_stalls[43:32]);
`CSR_MPM_FPU_ST_H : read_data_r = 32'(perf_pipeline_if.fpu_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
`CSR_MPM_GPU_ST_H : read_data_r = 32'(perf_pipeline_if.gpu_stalls[43:32]);
`CSR_MPM_GPU_ST_H : read_data_r = 32'(perf_pipeline_if.gpu_stalls[`PERF_CTR_BITS-1:32]);
// PERF: icache
`CSR_MPM_ICACHE_READS : read_data_r = perf_memsys_if.icache_reads[31:0];
`CSR_MPM_ICACHE_READS_H : read_data_r = 32'(perf_memsys_if.icache_reads[43:32]);
`CSR_MPM_ICACHE_READS_H : read_data_r = 32'(perf_memsys_if.icache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_read_misses[31:0];
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.icache_read_misses[43:32]);
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.icache_read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ICACHE_PIPE_ST : read_data_r = perf_memsys_if.icache_pipe_stalls[31:0];
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.icache_pipe_stalls[43:32]);
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.icache_pipe_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_ICACHE_CRSP_ST : read_data_r = perf_memsys_if.icache_crsp_stalls[31:0];
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.icache_crsp_stalls[43:32]);
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.icache_crsp_stalls[`PERF_CTR_BITS-1:32]);
// PERF: dcache
`CSR_MPM_DCACHE_READS : read_data_r = perf_memsys_if.dcache_reads[31:0];
`CSR_MPM_DCACHE_READS_H : read_data_r = 32'(perf_memsys_if.dcache_reads[43:32]);
`CSR_MPM_DCACHE_READS_H : read_data_r = 32'(perf_memsys_if.dcache_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_WRITES : read_data_r = perf_memsys_if.dcache_writes[31:0];
`CSR_MPM_DCACHE_WRITES_H : read_data_r = 32'(perf_memsys_if.dcache_writes[43:32]);
`CSR_MPM_DCACHE_WRITES_H : read_data_r = 32'(perf_memsys_if.dcache_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MISS_R : read_data_r = perf_memsys_if.dcache_read_misses[31:0];
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.dcache_read_misses[43:32]);
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.dcache_read_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MISS_W : read_data_r = perf_memsys_if.dcache_write_misses[31:0];
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = 32'(perf_memsys_if.dcache_write_misses[43:32]);
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = 32'(perf_memsys_if.dcache_write_misses[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_BANK_ST : read_data_r = perf_memsys_if.dcache_bank_stalls[31:0];
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = 32'(perf_memsys_if.dcache_bank_stalls[43:32]);
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = 32'(perf_memsys_if.dcache_bank_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_mshr_stalls[31:0];
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = 32'(perf_memsys_if.dcache_mshr_stalls[43:32]);
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = 32'(perf_memsys_if.dcache_mshr_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_PIPE_ST : read_data_r = perf_memsys_if.dcache_pipe_stalls[31:0];
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.dcache_pipe_stalls[43:32]);
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.dcache_pipe_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_DCACHE_CRSP_ST : read_data_r = perf_memsys_if.dcache_crsp_stalls[31:0];
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.dcache_crsp_stalls[43:32]);
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.dcache_crsp_stalls[`PERF_CTR_BITS-1:32]);
// PERF: smem
`CSR_MPM_SMEM_READS : read_data_r = perf_memsys_if.smem_reads[31:0];
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[43:32]);
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SMEM_WRITES : read_data_r = perf_memsys_if.smem_writes[31:0];
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[43:32]);
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_SMEM_BANK_ST : read_data_r = perf_memsys_if.smem_bank_stalls[31:0];
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[43:32]);
// PERF: DRAM
`CSR_MPM_DRAM_READS : read_data_r = perf_memsys_if.dram_reads[31:0];
`CSR_MPM_DRAM_READS_H : read_data_r = 32'(perf_memsys_if.dram_reads[43:32]);
`CSR_MPM_DRAM_WRITES : read_data_r = perf_memsys_if.dram_writes[31:0];
`CSR_MPM_DRAM_WRITES_H : read_data_r = 32'(perf_memsys_if.dram_writes[43:32]);
`CSR_MPM_DRAM_ST : read_data_r = perf_memsys_if.dram_stalls[31:0];
`CSR_MPM_DRAM_ST_H : read_data_r = 32'(perf_memsys_if.dram_stalls[43:32]);
`CSR_MPM_DRAM_LAT : read_data_r = perf_memsys_if.dram_latency[31:0];
`CSR_MPM_DRAM_LAT_H : read_data_r = 32'(perf_memsys_if.dram_latency[43:32]);
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[`PERF_CTR_BITS-1:32]);
// PERF: MEM
`CSR_MPM_MEM_READS : read_data_r = perf_memsys_if.mem_reads[31:0];
`CSR_MPM_MEM_READS_H : read_data_r = 32'(perf_memsys_if.mem_reads[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_WRITES : read_data_r = perf_memsys_if.mem_writes[31:0];
`CSR_MPM_MEM_WRITES_H : read_data_r = 32'(perf_memsys_if.mem_writes[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_ST : read_data_r = perf_memsys_if.mem_stalls[31:0];
`CSR_MPM_MEM_ST_H : read_data_r = 32'(perf_memsys_if.mem_stalls[`PERF_CTR_BITS-1:32]);
`CSR_MPM_MEM_LAT : read_data_r = perf_memsys_if.mem_latency[31:0];
`CSR_MPM_MEM_LAT_H : read_data_r = 32'(perf_memsys_if.mem_latency[`PERF_CTR_BITS-1:32]);
`endif
`CSR_SATP : read_data_r = 32'(csr_satp);
@@ -195,9 +195,9 @@ module VX_csr_data #(
`CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]);
`CSR_CYCLE : read_data_r = csr_cycle[31:0];
`CSR_CYCLE_H : read_data_r = 32'(csr_cycle[43:32]);
`CSR_CYCLE_H : read_data_r = 32'(csr_cycle[`PERF_CTR_BITS-1:32]);
`CSR_INSTRET : read_data_r = csr_instret[31:0];
`CSR_INSTRET_H : read_data_r = 32'(csr_instret[43:32]);
`CSR_INSTRET_H : read_data_r = 32'(csr_instret[`PERF_CTR_BITS-1:32]);
`CSR_MVENDORID : read_data_r = `VENDOR_ID;
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;

View File

@@ -30,6 +30,8 @@
`define CSR_WIDTH 12
`define PERF_CTR_BITS 44
///////////////////////////////////////////////////////////////////////////////
`define INST_LUI 7'b0110111
@@ -244,7 +246,7 @@
`define ICACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0)
// Block size in bytes
`define ICACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
`define ICACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
// Word size in bytes
`define IWORD_SIZE 4
@@ -264,11 +266,11 @@
// Core request tag bits
`define ICORE_TAG_WIDTH (`DBG_CACHE_REQ_MDATAW + `ICORE_TAG_ID_BITS)
// DRAM request data bits
`define IDRAM_LINE_WIDTH (`ICACHE_LINE_SIZE * 8)
// Memory request data bits
`define IMEM_LINE_WIDTH (`ICACHE_LINE_SIZE * 8)
// DRAM byte enable bits
`define IDRAM_BYTEEN_WIDTH `ICACHE_LINE_SIZE
// Memory byte enable bits
`define IMEM_BYTEEN_WIDTH `ICACHE_LINE_SIZE
////////////////////////// Dcache Configurable Knobs //////////////////////////
@@ -276,7 +278,7 @@
`define DCACHE_ID (32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1)
// Block size in bytes
`define DCACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `GLOBAL_BLOCK_SIZE)
`define DCACHE_LINE_SIZE (`L2_ENABLE ? `L1_BLOCK_SIZE : `MEM_BLOCK_SIZE)
// Word size in bytes
`define DWORD_SIZE 4
@@ -299,14 +301,14 @@
// DRAM request data bits
`define DDRAM_LINE_WIDTH (`DCACHE_LINE_SIZE * 8)
// DRAM request address bits
`define DDRAM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE))
// Memory request address bits
`define DMEM_ADDR_WIDTH (32 - `CLOG2(`DCACHE_LINE_SIZE))
// DRAM byte enable bits
`define DDRAM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
// Memory byte enable bits
`define DMEM_BYTEEN_WIDTH `DCACHE_LINE_SIZE
// DRAM request tag bits
`define DDRAM_TAG_WIDTH `DDRAM_ADDR_WIDTH
// Memory request tag bits
`define DMEM_TAG_WIDTH `DMEM_ADDR_WIDTH
// Core request size
`define DNUM_REQUESTS `NUM_THREADS
@@ -334,7 +336,7 @@
`define L2CACHE_ID (32'(`L3_ENABLE) + CLUSTER_ID)
// Block size in bytes
`define L2CACHE_LINE_SIZE `GLOBAL_BLOCK_SIZE
`define L2CACHE_LINE_SIZE `MEM_BLOCK_SIZE
// Word size in bytes
`define L2WORD_SIZE `DCACHE_LINE_SIZE
@@ -342,17 +344,17 @@
// Core request tag bits
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))
// DRAM request data bits
`define L2DRAM_LINE_WIDTH (`L2CACHE_LINE_SIZE * 8)
// Memory request data bits
`define L2MEM_LINE_WIDTH (`L2CACHE_LINE_SIZE * 8)
// DRAM request address bits
`define L2DRAM_ADDR_WIDTH (32 - `CLOG2(`L2CACHE_LINE_SIZE))
// Memory request address bits
`define L2MEM_ADDR_WIDTH (32 - `CLOG2(`L2CACHE_LINE_SIZE))
// DRAM byte enable bits
`define L2DRAM_BYTEEN_WIDTH `L2CACHE_LINE_SIZE
// Memory byte enable bits
`define L2MEM_BYTEEN_WIDTH `L2CACHE_LINE_SIZE
// DRAM request tag bits
`define L2DRAM_TAG_WIDTH (`L2_ENABLE ? `L2DRAM_ADDR_WIDTH : (`XDRAM_TAG_WIDTH+`CLOG2(`NUM_CORES)))
// Memory request tag bits
`define L2MEM_TAG_WIDTH (`L2_ENABLE ? `L2MEM_ADDR_WIDTH : (`XMEM_TAG_WIDTH+`CLOG2(`NUM_CORES)))
////////////////////////// L3cache Configurable Knobs /////////////////////////
@@ -360,7 +362,7 @@
`define L3CACHE_ID 0
// Block size in bytes
`define L3CACHE_LINE_SIZE `GLOBAL_BLOCK_SIZE
`define L3CACHE_LINE_SIZE `MEM_BLOCK_SIZE
// Word size in bytes
`define L3WORD_SIZE `L2CACHE_LINE_SIZE
@@ -368,30 +370,30 @@
// Core request tag bits
`define L3CORE_TAG_WIDTH (`L2CORE_TAG_WIDTH + `CLOG2(`NUM_CLUSTERS))
// DRAM request data bits
`define L3DRAM_LINE_WIDTH (`L3CACHE_LINE_SIZE * 8)
// Memory request data bits
`define L3MEM_LINE_WIDTH (`L3CACHE_LINE_SIZE * 8)
// DRAM request address bits
`define L3DRAM_ADDR_WIDTH (32 - `CLOG2(`L3CACHE_LINE_SIZE))
// Memory request address bits
`define L3MEM_ADDR_WIDTH (32 - `CLOG2(`L3CACHE_LINE_SIZE))
// DRAM byte enable bits
`define L3DRAM_BYTEEN_WIDTH `L3CACHE_LINE_SIZE
// Memory byte enable bits
`define L3MEM_BYTEEN_WIDTH `L3CACHE_LINE_SIZE
// DRAM request tag bits
`define L3DRAM_TAG_WIDTH (`L3_ENABLE ? `L3DRAM_ADDR_WIDTH : (`L2DRAM_TAG_WIDTH+`CLOG2(`NUM_CLUSTERS)))
// Memory request tag bits
`define L3MEM_TAG_WIDTH (`L3_ENABLE ? `L3MEM_ADDR_WIDTH : (`L2MEM_TAG_WIDTH+`CLOG2(`NUM_CLUSTERS)))
///////////////////////////////////////////////////////////////////////////////
`define VX_DRAM_BYTEEN_WIDTH `L3DRAM_BYTEEN_WIDTH
`define VX_DRAM_ADDR_WIDTH `L3DRAM_ADDR_WIDTH
`define VX_DRAM_LINE_WIDTH `L3DRAM_LINE_WIDTH
`define VX_DRAM_TAG_WIDTH `L3DRAM_TAG_WIDTH
`define VX_MEM_BYTEEN_WIDTH `L3MEM_BYTEEN_WIDTH
`define VX_MEM_ADDR_WIDTH `L3MEM_ADDR_WIDTH
`define VX_MEM_LINE_WIDTH `L3MEM_LINE_WIDTH
`define VX_MEM_TAG_WIDTH `L3MEM_TAG_WIDTH
`define VX_CORE_TAG_WIDTH `L3CORE_TAG_WIDTH
`define VX_CSR_ID_WIDTH `LOG2UP(`NUM_CLUSTERS * `NUM_CORES)
`define TO_FULL_ADDR(x) {x, (32-$bits(x))'(0)}
`define XDRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH+`CLOG2(2))
`define XMEM_TAG_WIDTH (`DMEM_TAG_WIDTH+`CLOG2(2))
///////////////////////////////////////////////////////////////////////////////

View File

@@ -7,7 +7,6 @@ module VX_ibuffer #(
input wire reset,
// inputs
input wire freeze, // keep current warp
VX_decode_if ibuf_enq_if,
// outputs
@@ -117,18 +116,9 @@ module VX_ibuffer #(
deq_valid_n = 0;
deq_wid_n = 'x;
deq_instr_n = 'x;
schedule_table_n = 'x;
if ((0 == num_warps)
|| (1 == num_warps && deq_fire && q_alm_empty[deq_wid])) begin
deq_valid_n = enq_fire;
deq_wid_n = ibuf_enq_if.wid;
deq_instr_n = q_data_in;
end else if ((1 == num_warps) || freeze) begin
deq_valid_n = 1;
deq_wid_n = deq_wid;
deq_instr_n = deq_fire ? q_data_prev[deq_wid] : q_data_out[deq_wid];
end else begin
schedule_table_n = 'x;
if (num_warps > 1) begin
deq_valid_n = (| schedule_table);
schedule_table_n = schedule_table;
for (integer i = 0; i < `NUM_WARPS; i++) begin
@@ -139,6 +129,14 @@ module VX_ibuffer #(
break;
end
end
end else if (1 == num_warps && !(deq_fire && q_alm_empty[deq_wid])) begin
deq_valid_n = 1;
deq_wid_n = deq_wid;
deq_instr_n = deq_fire ? q_data_prev[deq_wid] : q_data_out[deq_wid];
end else begin
deq_valid_n = enq_fire;
deq_wid_n = ibuf_enq_if.wid;
deq_instr_n = q_data_in;
end
end

View File

@@ -33,7 +33,6 @@ module VX_issue #(
) ibuffer (
.clk (clk),
.reset (reset),
.freeze (1'b0),
.ibuf_enq_if (decode_if),
.ibuf_deq_if (ibuf_deq_if)
);
@@ -121,14 +120,14 @@ module VX_issue #(
`SCOPE_ASSIGN (writeback_eop, writeback_if.eop);
`ifdef PERF_ENABLE
reg [43:0] perf_ibf_stalls;
reg [43:0] perf_scb_stalls;
reg [43:0] perf_alu_stalls;
reg [43:0] perf_lsu_stalls;
reg [43:0] perf_csr_stalls;
reg [43:0] perf_gpu_stalls;
reg [`PERF_CTR_BITS-1:0] perf_ibf_stalls;
reg [`PERF_CTR_BITS-1:0] perf_scb_stalls;
reg [`PERF_CTR_BITS-1:0] perf_alu_stalls;
reg [`PERF_CTR_BITS-1:0] perf_lsu_stalls;
reg [`PERF_CTR_BITS-1:0] perf_csr_stalls;
reg [`PERF_CTR_BITS-1:0] perf_gpu_stalls;
`ifdef EXT_F_ENABLE
reg [43:0] perf_fpu_stalls;
reg [`PERF_CTR_BITS-1:0] perf_fpu_stalls;
`endif
always @(posedge clk) begin
@@ -144,26 +143,26 @@ module VX_issue #(
`endif
end else begin
if (decode_if.valid & !decode_if.ready) begin
perf_ibf_stalls <= perf_ibf_stalls + 44'd1;
perf_ibf_stalls <= perf_ibf_stalls + `PERF_CTR_BITS'd1;
end
if (ibuf_deq_if.valid & scoreboard_delay) begin
perf_scb_stalls <= perf_scb_stalls + 44'd1;
perf_scb_stalls <= perf_scb_stalls + `PERF_CTR_BITS'd1;
end
if (alu_req_if.valid & !alu_req_if.ready) begin
perf_alu_stalls <= perf_alu_stalls + 44'd1;
perf_alu_stalls <= perf_alu_stalls + `PERF_CTR_BITS'd1;
end
if (lsu_req_if.valid & !lsu_req_if.ready) begin
perf_lsu_stalls <= perf_lsu_stalls + 44'd1;
perf_lsu_stalls <= perf_lsu_stalls + `PERF_CTR_BITS'd1;
end
if (csr_req_if.valid & !csr_req_if.ready) begin
perf_csr_stalls <= perf_csr_stalls + 44'd1;
perf_csr_stalls <= perf_csr_stalls + `PERF_CTR_BITS'd1;
end
if (gpu_req_if.valid & !gpu_req_if.ready) begin
perf_gpu_stalls <= perf_gpu_stalls + 44'd1;
perf_gpu_stalls <= perf_gpu_stalls + `PERF_CTR_BITS'd1;
end
`ifdef EXT_F_ENABLE
if (fpu_req_if.valid & !fpu_req_if.ready) begin
perf_fpu_stalls <= perf_fpu_stalls + 44'd1;
perf_fpu_stalls <= perf_fpu_stalls + `PERF_CTR_BITS'd1;
end
`endif
end

View File

@@ -44,10 +44,6 @@ module VX_lsu_unit #(
end
wire is_dup_load = lsu_req_if.wb && lsu_req_if.tmask[0] && (& addr_matches);
`IGNORE_WARNINGS_BEGIN
reg [`LSUQ_SIZE-1:0][`LSUQ_ADDR_BITS-1:0] pending_tags;
`IGNORE_WARNINGS_END
wire ready_in;
wire stall_in = ~ready_in && req_valid;
@@ -79,7 +75,7 @@ module VX_lsu_unit #(
wire [`NUM_THREADS-1:0] rsp_tmask;
reg [`NUM_THREADS-1:0] req_sent_mask;
wire sent_all_ready;
wire req_ready_all;
wire [`LSUQ_ADDR_BITS-1:0] mbuf_waddr, mbuf_raddr;
wire mbuf_full;
@@ -118,13 +114,7 @@ module VX_lsu_unit #(
`UNUSED_PIN (empty)
);
always @(posedge clk) begin
if (mbuf_push) begin
pending_tags[mbuf_waddr] <= req_tag;
end
end
assign sent_all_ready = &(dcache_req_if.ready | req_sent_mask);
assign req_ready_all = &(dcache_req_if.ready | req_sent_mask | ~req_tmask);
wire [`NUM_THREADS-1:0] req_sent_dup = {{(`NUM_THREADS-1){dcache_req_fire[0] && req_is_dup}}, 1'b0};
@@ -132,19 +122,22 @@ module VX_lsu_unit #(
if (reset) begin
req_sent_mask <= 0;
end else begin
if (sent_all_ready)
if (req_ready_all)
req_sent_mask <= 0;
else
req_sent_mask <= req_sent_mask | dcache_req_fire | req_sent_dup;
end
end
wire is_req_start = (0 == req_sent_mask);
// need to hold the acquired tag index until the full request is submitted
reg [`LSUQ_ADDR_BITS-1:0] req_tag_hold;
wire [`LSUQ_ADDR_BITS-1:0] req_tag = (0 == req_sent_mask) ? mbuf_waddr : req_tag_hold;
reg [`DCORE_TAG_ID_BITS-1:0] req_tag_hold;
wire [`DCORE_TAG_ID_BITS-1:0] req_tag = is_req_start ? mbuf_waddr : req_tag_hold;
always @(posedge clk) begin
if (mbuf_push)
if (mbuf_push) begin
req_tag_hold <= mbuf_waddr;
end
end
wire [`NUM_THREADS-1:0] req_tmask_dup = req_tmask & {{(`NUM_THREADS-1){~req_is_dup}}, 1'b1};
@@ -160,7 +153,8 @@ module VX_lsu_unit #(
end
end
wire req_ready_dep = (req_wb && ~mbuf_full)
// ensure all dependencies for the requests are resolved
wire req_dep_ready = (req_wb && (~mbuf_full || ~is_req_start))
|| (~req_wb && st_commit_if.ready);
// DCache Request
@@ -193,7 +187,7 @@ module VX_lsu_unit #(
end
end
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask_dup & ~req_sent_mask;
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_dep_ready}} & req_tmask_dup & ~req_sent_mask;
assign dcache_req_if.rw = {`NUM_THREADS{~req_wb}};
assign dcache_req_if.addr = mem_req_addr;
assign dcache_req_if.byteen = mem_req_byteen;
@@ -205,11 +199,11 @@ module VX_lsu_unit #(
assign dcache_req_if.tag = {`NUM_THREADS{req_tag}};
`endif
assign ready_in = req_ready_dep && sent_all_ready;
assign ready_in = req_dep_ready && req_ready_all;
// send store commit
wire is_store_rsp = req_valid && ~req_wb && sent_all_ready;
wire is_store_rsp = req_valid && ~req_wb && req_ready_all;
assign st_commit_if.valid = is_store_rsp;
assign st_commit_if.wid = req_wid;
@@ -280,23 +274,46 @@ module VX_lsu_unit #(
`SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr);
`ifdef DBG_PRINT_CORE_DCACHE
`IGNORE_WARNINGS_BEGIN
reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH:0] pending_reqs;
`IGNORE_WARNINGS_END
always @(posedge clk) begin
if (reset) begin
pending_reqs <= '0;
end else if (mbuf_push) begin
pending_reqs[mbuf_waddr] <= {dcache_req_if.tag[0], 1'b1};
end else if (mbuf_pop) begin
pending_reqs[mbuf_raddr] <= '0;
end
end
always @(posedge clk) begin
if ((| dcache_req_fire)) begin
if ((| dcache_req_if.rw))
$display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h",
$time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_addr, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data);
else
$display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d, is_dup=%b",
$time, CORE_ID, req_wid, req_pc, dcache_req_fire, req_addr, dcache_req_if.tag, dcache_req_if.byteen, req_rd, req_is_dup);
if (dcache_req_if.rw[0]) begin
$write("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire);
`PRINT_ARRAY1D(req_addr, `NUM_THREADS);
$write(", tag=%0h, byteen=%0h, data=", dcache_req_if.tag[0], dcache_req_if.byteen);
`PRINT_ARRAY1D(dcache_req_if.data, `NUM_THREADS);
$write("\n");
end else begin
$write("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=", $time, CORE_ID, req_wid, req_pc, dcache_req_fire);
`PRINT_ARRAY1D(req_addr, `NUM_THREADS);
$write(", tag=%0h, byteen=%0h, rd=%0d, is_dup=%b\n", dcache_req_if.tag[0], dcache_req_if.byteen, req_rd, req_is_dup);
end
end
if (dcache_rsp_fire) begin
$display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h, is_dup=%b",
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data, rsp_is_dup);
$write("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=",
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd);
`PRINT_ARRAY1D(dcache_rsp_if.data, `NUM_THREADS);
$write(", is_dup=%b\n", rsp_is_dup);
end
if (mbuf_full) begin
$write("%t: D$%0d queue-full:", $time, CORE_ID);
$write("%t: *** D$%0d queue-full:", $time, CORE_ID);
for (integer j = 0; j < `LSUQ_SIZE; j++) begin
$write(" tag%0d=%0h", j, pending_tags[j]);
if (pending_reqs[j][0]) begin
$write(" %0d->%0h", j, pending_reqs[j][1 +: `DCORE_TAG_WIDTH]);
end
end
$write("\n");
end

View File

@@ -20,25 +20,25 @@ module VX_mem_unit # (
VX_icache_core_req_if icache_core_req_if,
VX_icache_core_rsp_if icache_core_rsp_if,
// DRAM
VX_cache_dram_req_if dram_req_if,
VX_cache_dram_rsp_if dram_rsp_if
// Memory
VX_cache_mem_req_if mem_req_if,
VX_cache_mem_rsp_if mem_rsp_if
);
`ifdef PERF_ENABLE
VX_perf_cache_if perf_icache_if(), perf_dcache_if(), perf_smem_if();
`endif
VX_cache_dram_req_if #(
.DRAM_LINE_WIDTH (`DDRAM_LINE_WIDTH),
.DRAM_ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH)
) dcache_dram_req_if(), icache_dram_req_if();
VX_cache_mem_req_if #(
.MEM_LINE_WIDTH (`DMEM_LINE_WIDTH),
.MEM_ADDR_WIDTH (`DMEM_ADDR_WIDTH),
.MEM_TAG_WIDTH (`DMEM_TAG_WIDTH)
) dcache_mem_req_if(), icache_mem_req_if();
VX_cache_dram_rsp_if #(
.DRAM_LINE_WIDTH (`DDRAM_LINE_WIDTH),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH)
) dcache_dram_rsp_if(), icache_dram_rsp_if();
VX_cache_mem_rsp_if #(
.MEM_LINE_WIDTH (`DMEM_LINE_WIDTH),
.MEM_TAG_WIDTH (`DMEM_TAG_WIDTH)
) dcache_mem_rsp_if(), icache_mem_rsp_if();
VX_dcache_core_req_if #(
.LANES (`DNUM_REQUESTS),
@@ -96,12 +96,12 @@ module VX_mem_unit # (
.NUM_REQS (1),
.CREQ_SIZE (`ICREQ_SIZE),
.MSHR_SIZE (`IMSHR_SIZE),
.DRSQ_SIZE (`IDRSQ_SIZE),
.DREQ_SIZE (`IDREQ_SIZE),
.MRSQ_SIZE (`IMRSQ_SIZE),
.MREQ_SIZE (`IMREQ_SIZE),
.WRITE_ENABLE (0),
.CORE_TAG_WIDTH (`ICORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH)
.MEM_TAG_WIDTH (`DMEM_TAG_WIDTH)
) icache (
`SCOPE_BIND_VX_mem_unit_icache
@@ -129,20 +129,20 @@ module VX_mem_unit # (
.perf_cache_if (perf_icache_if),
`endif
// DRAM Req
.dram_req_valid (icache_dram_req_if.valid),
.dram_req_rw (icache_dram_req_if.rw),
.dram_req_byteen (icache_dram_req_if.byteen),
.dram_req_addr (icache_dram_req_if.addr),
.dram_req_data (icache_dram_req_if.data),
.dram_req_tag (icache_dram_req_if.tag),
.dram_req_ready (icache_dram_req_if.ready),
// Memory Request
.mem_req_valid (icache_mem_req_if.valid),
.mem_req_rw (icache_mem_req_if.rw),
.mem_req_byteen (icache_mem_req_if.byteen),
.mem_req_addr (icache_mem_req_if.addr),
.mem_req_data (icache_mem_req_if.data),
.mem_req_tag (icache_mem_req_if.tag),
.mem_req_ready (icache_mem_req_if.ready),
// DRAM response
.dram_rsp_valid (icache_dram_rsp_if.valid),
.dram_rsp_data (icache_dram_rsp_if.data),
.dram_rsp_tag (icache_dram_rsp_if.tag),
.dram_rsp_ready (icache_dram_rsp_if.ready)
// Memory response
.mem_rsp_valid (icache_mem_rsp_if.valid),
.mem_rsp_data (icache_mem_rsp_if.data),
.mem_rsp_tag (icache_mem_rsp_if.tag),
.mem_rsp_ready (icache_mem_rsp_if.ready)
);
VX_cache #(
@@ -155,12 +155,12 @@ module VX_mem_unit # (
.NUM_REQS (`DNUM_REQUESTS),
.CREQ_SIZE (`DCREQ_SIZE),
.MSHR_SIZE (`DMSHR_SIZE),
.DRSQ_SIZE (`DDRSQ_SIZE),
.DREQ_SIZE (`DDREQ_SIZE),
.MRSQ_SIZE (`DMRSQ_SIZE),
.MREQ_SIZE (`DMREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH)
.MEM_TAG_WIDTH (`DMEM_TAG_WIDTH)
) dcache (
`SCOPE_BIND_VX_mem_unit_dcache
@@ -188,20 +188,20 @@ module VX_mem_unit # (
.perf_cache_if (perf_dcache_if),
`endif
// DRAM request
.dram_req_valid (dcache_dram_req_if.valid),
.dram_req_rw (dcache_dram_req_if.rw),
.dram_req_byteen (dcache_dram_req_if.byteen),
.dram_req_addr (dcache_dram_req_if.addr),
.dram_req_data (dcache_dram_req_if.data),
.dram_req_tag (dcache_dram_req_if.tag),
.dram_req_ready (dcache_dram_req_if.ready),
// Memory request
.mem_req_valid (dcache_mem_req_if.valid),
.mem_req_rw (dcache_mem_req_if.rw),
.mem_req_byteen (dcache_mem_req_if.byteen),
.mem_req_addr (dcache_mem_req_if.addr),
.mem_req_data (dcache_mem_req_if.data),
.mem_req_tag (dcache_mem_req_if.tag),
.mem_req_ready (dcache_mem_req_if.ready),
// DRAM response
.dram_rsp_valid (dcache_dram_rsp_if.valid),
.dram_rsp_data (dcache_dram_rsp_if.data),
.dram_rsp_tag (dcache_dram_rsp_if.tag),
.dram_rsp_ready (dcache_dram_rsp_if.ready)
// Memory response
.mem_rsp_valid (dcache_mem_rsp_if.valid),
.mem_rsp_data (dcache_mem_rsp_if.data),
.mem_rsp_tag (dcache_mem_rsp_if.tag),
.mem_rsp_ready (dcache_mem_rsp_if.ready)
);
if (`SM_ENABLE) begin
@@ -252,45 +252,45 @@ module VX_mem_unit # (
VX_mem_arb #(
.NUM_REQS (2),
.DATA_WIDTH (`DDRAM_LINE_WIDTH),
.ADDR_WIDTH (`DDRAM_ADDR_WIDTH),
.TAG_IN_WIDTH (`DDRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`XDRAM_TAG_WIDTH),
.DATA_WIDTH (`DMEM_LINE_WIDTH),
.ADDR_WIDTH (`DMEM_ADDR_WIDTH),
.TAG_IN_WIDTH (`DMEM_TAG_WIDTH),
.TAG_OUT_WIDTH (`XMEM_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (0)
) dram_arb (
) mem_arb (
.clk (clk),
.reset (reset),
// Source request
.req_valid_in ({dcache_dram_req_if.valid, icache_dram_req_if.valid}),
.req_rw_in ({dcache_dram_req_if.rw, icache_dram_req_if.rw}),
.req_byteen_in ({dcache_dram_req_if.byteen, icache_dram_req_if.byteen}),
.req_addr_in ({dcache_dram_req_if.addr, icache_dram_req_if.addr}),
.req_data_in ({dcache_dram_req_if.data, icache_dram_req_if.data}),
.req_tag_in ({dcache_dram_req_if.tag, icache_dram_req_if.tag}),
.req_ready_in ({dcache_dram_req_if.ready, icache_dram_req_if.ready}),
.req_valid_in ({dcache_mem_req_if.valid, icache_mem_req_if.valid}),
.req_rw_in ({dcache_mem_req_if.rw, icache_mem_req_if.rw}),
.req_byteen_in ({dcache_mem_req_if.byteen, icache_mem_req_if.byteen}),
.req_addr_in ({dcache_mem_req_if.addr, icache_mem_req_if.addr}),
.req_data_in ({dcache_mem_req_if.data, icache_mem_req_if.data}),
.req_tag_in ({dcache_mem_req_if.tag, icache_mem_req_if.tag}),
.req_ready_in ({dcache_mem_req_if.ready, icache_mem_req_if.ready}),
// DRAM request
.req_valid_out (dram_req_if.valid),
.req_rw_out (dram_req_if.rw),
.req_byteen_out (dram_req_if.byteen),
.req_addr_out (dram_req_if.addr),
.req_data_out (dram_req_if.data),
.req_tag_out (dram_req_if.tag),
.req_ready_out (dram_req_if.ready),
// Memory request
.req_valid_out (mem_req_if.valid),
.req_rw_out (mem_req_if.rw),
.req_byteen_out (mem_req_if.byteen),
.req_addr_out (mem_req_if.addr),
.req_data_out (mem_req_if.data),
.req_tag_out (mem_req_if.tag),
.req_ready_out (mem_req_if.ready),
// Source response
.rsp_valid_out ({dcache_dram_rsp_if.valid, icache_dram_rsp_if.valid}),
.rsp_data_out ({dcache_dram_rsp_if.data, icache_dram_rsp_if.data}),
.rsp_tag_out ({dcache_dram_rsp_if.tag, icache_dram_rsp_if.tag}),
.rsp_ready_out ({dcache_dram_rsp_if.ready, icache_dram_rsp_if.ready}),
.rsp_valid_out ({dcache_mem_rsp_if.valid, icache_mem_rsp_if.valid}),
.rsp_data_out ({dcache_mem_rsp_if.data, icache_mem_rsp_if.data}),
.rsp_tag_out ({dcache_mem_rsp_if.tag, icache_mem_rsp_if.tag}),
.rsp_ready_out ({dcache_mem_rsp_if.ready, icache_mem_rsp_if.ready}),
// DRAM response
.rsp_valid_in (dram_rsp_if.valid),
.rsp_tag_in (dram_rsp_if.tag),
.rsp_data_in (dram_rsp_if.data),
.rsp_ready_in (dram_rsp_if.ready)
// Memory response
.rsp_valid_in (mem_rsp_if.valid),
.rsp_tag_in (mem_rsp_if.tag),
.rsp_data_in (mem_rsp_if.data),
.rsp_ready_in (mem_rsp_if.ready)
);
`ifdef PERF_ENABLE
@@ -319,47 +319,47 @@ end else begin
assign perf_memsys_if.smem_bank_stalls = 0;
end
reg [43:0] perf_dram_lat_per_cycle;
reg [`PERF_CTR_BITS-1:0] perf_mem_lat_per_cycle;
always @(posedge clk) begin
if (reset) begin
perf_dram_lat_per_cycle <= 0;
perf_mem_lat_per_cycle <= 0;
end else begin
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle +
44'($signed(2'((dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready) && !(dram_rsp_if.valid && dram_rsp_if.ready)) -
2'((dram_rsp_if.valid && dram_rsp_if.ready) && !(dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready))));
perf_mem_lat_per_cycle <= perf_mem_lat_per_cycle +
`PERF_CTR_BITS'($signed(2'((mem_req_if.valid && !mem_req_if.rw && mem_req_if.ready) && !(mem_rsp_if.valid && mem_rsp_if.ready)) -
2'((mem_rsp_if.valid && mem_rsp_if.ready) && !(mem_req_if.valid && !mem_req_if.rw && mem_req_if.ready))));
end
end
reg [43:0] perf_dram_reads;
reg [43:0] perf_dram_writes;
reg [43:0] perf_dram_lat;
reg [43:0] perf_dram_stalls;
reg [`PERF_CTR_BITS-1:0] perf_mem_reads;
reg [`PERF_CTR_BITS-1:0] perf_mem_writes;
reg [`PERF_CTR_BITS-1:0] perf_mem_lat;
reg [`PERF_CTR_BITS-1:0] perf_mem_stalls;
always @(posedge clk) begin
if (reset) begin
perf_dram_reads <= 0;
perf_dram_writes <= 0;
perf_dram_lat <= 0;
perf_dram_stalls <= 0;
perf_mem_reads <= 0;
perf_mem_writes <= 0;
perf_mem_lat <= 0;
perf_mem_stalls <= 0;
end else begin
if (dram_req_if.valid && dram_req_if.ready && !dram_req_if.rw) begin
perf_dram_reads <= perf_dram_reads + 44'd1;
if (mem_req_if.valid && mem_req_if.ready && !mem_req_if.rw) begin
perf_mem_reads <= perf_mem_reads + `PERF_CTR_BITS'd1;
end
if (dram_req_if.valid && dram_req_if.ready && dram_req_if.rw) begin
perf_dram_writes <= perf_dram_writes + 44'd1;
if (mem_req_if.valid && mem_req_if.ready && mem_req_if.rw) begin
perf_mem_writes <= perf_mem_writes + `PERF_CTR_BITS'd1;
end
if (dram_req_if.valid && !dram_req_if.ready) begin
perf_dram_stalls <= perf_dram_stalls + 44'd1;
if (mem_req_if.valid && !mem_req_if.ready) begin
perf_mem_stalls <= perf_mem_stalls + `PERF_CTR_BITS'd1;
end
perf_dram_lat <= perf_dram_lat + perf_dram_lat_per_cycle;
perf_mem_lat <= perf_mem_lat + perf_mem_lat_per_cycle;
end
end
assign perf_memsys_if.dram_reads = perf_dram_reads;
assign perf_memsys_if.dram_writes = perf_dram_writes;
assign perf_memsys_if.dram_latency = perf_dram_lat;
assign perf_memsys_if.dram_stalls = perf_dram_stalls;
assign perf_memsys_if.mem_reads = perf_mem_reads;
assign perf_memsys_if.mem_writes = perf_mem_writes;
assign perf_memsys_if.mem_latency = perf_mem_lat;
assign perf_memsys_if.mem_stalls = perf_mem_stalls;
`endif
endmodule

View File

@@ -70,6 +70,8 @@
`define LOG2UP(x) (((x) > 1) ? $clog2(x) : 1)
`define ISPOW2(x) (((x) != 0) && (0 == ((x) & ((x) - 1))))
`define ABS(x) (($signed(x) < 0) ? (-$signed(x)) : x);
`define MIN(x, y) ((x < y) ? (x) : (y))
`define MAX(x, y) ((x > y) ? (x) : (y))

View File

@@ -31,7 +31,7 @@ module VX_scoreboard #(
if (release_reg) begin
inuse_regs[writeback_if.wid][writeback_if.rd] <= 0;
assert(inuse_regs[writeback_if.wid][writeback_if.rd] != 0)
else $error("*** %t: core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
else $error("%t: *** core%0d: invalid writeback register: wid=%0d, PC=%0h, rd=%0d",
$time, CORE_ID, writeback_if.wid, writeback_if.PC, writeback_if.rd);
end
end
@@ -40,7 +40,7 @@ module VX_scoreboard #(
`ifdef DBG_PRINT_PIPELINE
always @(posedge clk) begin
if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
$display("%t: core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
$display("%t: *** core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
deq_inuse_regs[ibuf_deq_if.rd], deq_inuse_regs[ibuf_deq_if.rs1], deq_inuse_regs[ibuf_deq_if.rs2], deq_inuse_regs[ibuf_deq_if.rs3]);
end
@@ -54,7 +54,7 @@ module VX_scoreboard #(
deadlock_ctr <= 0;
end else if (ibuf_deq_if.valid && ~ibuf_deq_if.ready) begin
deadlock_ctr <= deadlock_ctr + 1;
assert(deadlock_ctr < deadlock_timeout) else $error("*** %t: core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
assert(deadlock_ctr < deadlock_timeout) else $error("%t: *** core%0d-deadlock: wid=%0d, PC=%0h, rd=%0d, wb=%0d, inuse=%b%b%b%b",
$time, CORE_ID, ibuf_deq_if.wid, ibuf_deq_if.PC, ibuf_deq_if.rd, ibuf_deq_if.wb,
deq_inuse_regs[ibuf_deq_if.rd], deq_inuse_regs[ibuf_deq_if.rs1], deq_inuse_regs[ibuf_deq_if.rs2], deq_inuse_regs[ibuf_deq_if.rs3]);
end else if (ibuf_deq_if.valid && ibuf_deq_if.ready) begin

View File

@@ -34,7 +34,7 @@ module VX_smem_arb (
wire is_smem_addr_in, is_smem_addr_out;
// select shared memory bus
assign is_smem_addr_in = core_req_if.valid[i] && `SM_ENABLE
assign is_smem_addr_in = `SM_ENABLE
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] >= (32-SMEM_ASHIFT)'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> SMEM_ASHIFT))
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
@@ -51,13 +51,13 @@ module VX_smem_arb (
.ready_out (cache_req_ready_out)
);
if (`SM_ENABLE ) begin
if (`SM_ENABLE) begin
assign cache_req_if.valid[i] = cache_req_valid_out && ~is_smem_addr_out;
assign smem_req_if.valid[i] = cache_req_valid_out && is_smem_addr_out;
assign cache_req_ready_out = is_smem_addr_out ? smem_req_if.ready[i] : cache_req_if.ready[i];
assign smem_req_if.addr[i] = cache_req_if.addr[i];
assign smem_req_if.rw[i] = cache_req_if.rw[i];
assign smem_req_if.rw[i] = cache_req_if.rw[i];
assign smem_req_if.byteen[i] = cache_req_if.byteen[i];
assign smem_req_if.data[i] = cache_req_if.data[i];
assign smem_req_if.tag[i] = cache_req_if.tag[i];

View File

@@ -7,20 +7,20 @@ module Vortex (
input wire clk,
input wire reset,
// DRAM request
output wire dram_req_valid,
output wire dram_req_rw,
output wire [`VX_DRAM_BYTEEN_WIDTH-1:0] dram_req_byteen,
output wire [`VX_DRAM_ADDR_WIDTH-1:0] dram_req_addr,
output wire [`VX_DRAM_LINE_WIDTH-1:0] dram_req_data,
output wire [`VX_DRAM_TAG_WIDTH-1:0] dram_req_tag,
input wire dram_req_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [`VX_MEM_BYTEEN_WIDTH-1:0] mem_req_byteen,
output wire [`VX_MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`VX_MEM_LINE_WIDTH-1:0] mem_req_data,
output wire [`VX_MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// DRAM response
input wire dram_rsp_valid,
input wire [`VX_DRAM_LINE_WIDTH-1:0] dram_rsp_data,
input wire [`VX_DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`VX_MEM_LINE_WIDTH-1:0] mem_rsp_data,
input wire [`VX_MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready,
// CSR Request
input wire csr_req_valid,
@@ -40,18 +40,18 @@ module Vortex (
output wire ebreak
);
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_rw;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_BYTEEN_WIDTH-1:0] per_cluster_dram_req_byteen;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_ADDR_WIDTH-1:0] per_cluster_dram_req_addr;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_req_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_rw;
wire [`NUM_CLUSTERS-1:0][`L2MEM_BYTEEN_WIDTH-1:0] per_cluster_mem_req_byteen;
wire [`NUM_CLUSTERS-1:0][`L2MEM_ADDR_WIDTH-1:0] per_cluster_mem_req_addr;
wire [`NUM_CLUSTERS-1:0][`L2MEM_LINE_WIDTH-1:0] per_cluster_mem_req_data;
wire [`NUM_CLUSTERS-1:0][`L2MEM_TAG_WIDTH-1:0] per_cluster_mem_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_LINE_WIDTH-1:0] per_cluster_dram_rsp_data;
wire [`NUM_CLUSTERS-1:0][`L2DRAM_TAG_WIDTH-1:0] per_cluster_dram_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_dram_rsp_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2MEM_LINE_WIDTH-1:0] per_cluster_mem_rsp_data;
wire [`NUM_CLUSTERS-1:0][`L2MEM_TAG_WIDTH-1:0] per_cluster_mem_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_mem_rsp_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_req_valid;
wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_req_addr;
@@ -88,18 +88,18 @@ module Vortex (
.clk (clk),
.reset (cluster_reset),
.dram_req_valid (per_cluster_dram_req_valid [i]),
.dram_req_rw (per_cluster_dram_req_rw [i]),
.dram_req_byteen(per_cluster_dram_req_byteen[i]),
.dram_req_addr (per_cluster_dram_req_addr [i]),
.dram_req_data (per_cluster_dram_req_data [i]),
.dram_req_tag (per_cluster_dram_req_tag [i]),
.dram_req_ready (per_cluster_dram_req_ready [i]),
.mem_req_valid (per_cluster_mem_req_valid [i]),
.mem_req_rw (per_cluster_mem_req_rw [i]),
.mem_req_byteen (per_cluster_mem_req_byteen[i]),
.mem_req_addr (per_cluster_mem_req_addr [i]),
.mem_req_data (per_cluster_mem_req_data [i]),
.mem_req_tag (per_cluster_mem_req_tag [i]),
.mem_req_ready (per_cluster_mem_req_ready [i]),
.dram_rsp_valid (per_cluster_dram_rsp_valid [i]),
.dram_rsp_data (per_cluster_dram_rsp_data [i]),
.dram_rsp_tag (per_cluster_dram_rsp_tag [i]),
.dram_rsp_ready (per_cluster_dram_rsp_ready [i]),
.mem_rsp_valid (per_cluster_mem_rsp_valid [i]),
.mem_rsp_data (per_cluster_mem_rsp_data [i]),
.mem_rsp_tag (per_cluster_mem_rsp_tag [i]),
.mem_rsp_ready (per_cluster_mem_rsp_ready [i]),
.csr_req_valid (per_cluster_csr_req_valid [i]),
.csr_req_coreid (csr_core_id),
@@ -171,12 +171,12 @@ module Vortex (
.NUM_REQS (`NUM_CLUSTERS),
.CREQ_SIZE (`L3CREQ_SIZE),
.MSHR_SIZE (`L3MSHR_SIZE),
.DRSQ_SIZE (`L3DRSQ_SIZE),
.DREQ_SIZE (`L3DREQ_SIZE),
.MRSQ_SIZE (`L3MRSQ_SIZE),
.MREQ_SIZE (`L3MREQ_SIZE),
.WRITE_ENABLE (1),
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
.CORE_TAG_WIDTH (`L2MEM_TAG_WIDTH),
.CORE_TAG_ID_BITS (0),
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH)
.MEM_TAG_WIDTH (`L3MEM_TAG_WIDTH)
) l3cache (
`SCOPE_BIND_Vortex_l3cache
@@ -190,105 +190,105 @@ module Vortex (
`endif
// Core request
.core_req_valid (per_cluster_dram_req_valid),
.core_req_rw (per_cluster_dram_req_rw),
.core_req_byteen (per_cluster_dram_req_byteen),
.core_req_addr (per_cluster_dram_req_addr),
.core_req_data (per_cluster_dram_req_data),
.core_req_tag (per_cluster_dram_req_tag),
.core_req_ready (per_cluster_dram_req_ready),
.core_req_valid (per_cluster_mem_req_valid),
.core_req_rw (per_cluster_mem_req_rw),
.core_req_byteen (per_cluster_mem_req_byteen),
.core_req_addr (per_cluster_mem_req_addr),
.core_req_data (per_cluster_mem_req_data),
.core_req_tag (per_cluster_mem_req_tag),
.core_req_ready (per_cluster_mem_req_ready),
// Core response
.core_rsp_valid (per_cluster_dram_rsp_valid),
.core_rsp_data (per_cluster_dram_rsp_data),
.core_rsp_tag (per_cluster_dram_rsp_tag),
.core_rsp_ready (per_cluster_dram_rsp_ready),
.core_rsp_valid (per_cluster_mem_rsp_valid),
.core_rsp_data (per_cluster_mem_rsp_data),
.core_rsp_tag (per_cluster_mem_rsp_tag),
.core_rsp_ready (per_cluster_mem_rsp_ready),
// DRAM request
.dram_req_valid (dram_req_valid),
.dram_req_rw (dram_req_rw),
.dram_req_byteen (dram_req_byteen),
.dram_req_addr (dram_req_addr),
.dram_req_data (dram_req_data),
.dram_req_tag (dram_req_tag),
.dram_req_ready (dram_req_ready),
// Memory request
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
// DRAM response
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_ready (dram_rsp_ready)
// Memory response
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_ready (mem_rsp_ready)
);
end else begin
VX_mem_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (`L3DRAM_LINE_WIDTH),
.TAG_IN_WIDTH (`L2DRAM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L3DRAM_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) dram_arb (
.NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (`L3MEM_LINE_WIDTH),
.TAG_IN_WIDTH (`L2MEM_TAG_WIDTH),
.TAG_OUT_WIDTH (`L3MEM_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (1)
) mem_arb (
.clk (clk),
.reset (reset),
// Core request
.req_valid_in (per_cluster_dram_req_valid),
.req_rw_in (per_cluster_dram_req_rw),
.req_byteen_in (per_cluster_dram_req_byteen),
.req_addr_in (per_cluster_dram_req_addr),
.req_data_in (per_cluster_dram_req_data),
.req_tag_in (per_cluster_dram_req_tag),
.req_ready_in (per_cluster_dram_req_ready),
.req_valid_in (per_cluster_mem_req_valid),
.req_rw_in (per_cluster_mem_req_rw),
.req_byteen_in (per_cluster_mem_req_byteen),
.req_addr_in (per_cluster_mem_req_addr),
.req_data_in (per_cluster_mem_req_data),
.req_tag_in (per_cluster_mem_req_tag),
.req_ready_in (per_cluster_mem_req_ready),
// DRAM request
.req_valid_out (dram_req_valid),
.req_rw_out (dram_req_rw),
.req_byteen_out (dram_req_byteen),
.req_addr_out (dram_req_addr),
.req_data_out (dram_req_data),
.req_tag_out (dram_req_tag),
.req_ready_out (dram_req_ready),
// Memory request
.req_valid_out (mem_req_valid),
.req_rw_out (mem_req_rw),
.req_byteen_out (mem_req_byteen),
.req_addr_out (mem_req_addr),
.req_data_out (mem_req_data),
.req_tag_out (mem_req_tag),
.req_ready_out (mem_req_ready),
// Core response
.rsp_valid_out (per_cluster_dram_rsp_valid),
.rsp_data_out (per_cluster_dram_rsp_data),
.rsp_tag_out (per_cluster_dram_rsp_tag),
.rsp_ready_out (per_cluster_dram_rsp_ready),
.rsp_valid_out (per_cluster_mem_rsp_valid),
.rsp_data_out (per_cluster_mem_rsp_data),
.rsp_tag_out (per_cluster_mem_rsp_tag),
.rsp_ready_out (per_cluster_mem_rsp_ready),
// DRAM response
.rsp_valid_in (dram_rsp_valid),
.rsp_tag_in (dram_rsp_tag),
.rsp_data_in (dram_rsp_data),
.rsp_ready_in (dram_rsp_ready)
// Memory response
.rsp_valid_in (mem_rsp_valid),
.rsp_tag_in (mem_rsp_tag),
.rsp_data_in (mem_rsp_data),
.rsp_ready_in (mem_rsp_ready)
);
end
`SCOPE_ASSIGN (reset, reset);
`SCOPE_ASSIGN (dram_req_fire, dram_req_valid && dram_req_ready);
`SCOPE_ASSIGN (dram_req_addr, `TO_FULL_ADDR(dram_req_addr));
`SCOPE_ASSIGN (dram_req_rw, dram_req_rw);
`SCOPE_ASSIGN (dram_req_byteen, dram_req_byteen);
`SCOPE_ASSIGN (dram_req_data, dram_req_data);
`SCOPE_ASSIGN (dram_req_tag, dram_req_tag);
`SCOPE_ASSIGN (dram_rsp_fire, dram_rsp_valid && dram_rsp_ready);
`SCOPE_ASSIGN (dram_rsp_data, dram_rsp_data);
`SCOPE_ASSIGN (dram_rsp_tag, dram_rsp_tag);
`SCOPE_ASSIGN (mem_req_fire, mem_req_valid && mem_req_ready);
`SCOPE_ASSIGN (mem_req_addr, `TO_FULL_ADDR(mem_req_addr));
`SCOPE_ASSIGN (mem_req_rw, mem_req_rw);
`SCOPE_ASSIGN (mem_req_byteen, mem_req_byteen);
`SCOPE_ASSIGN (mem_req_data, mem_req_data);
`SCOPE_ASSIGN (mem_req_tag, mem_req_tag);
`SCOPE_ASSIGN (mem_rsp_fire, mem_rsp_valid && mem_rsp_ready);
`SCOPE_ASSIGN (mem_rsp_data, mem_rsp_data);
`SCOPE_ASSIGN (mem_rsp_tag, mem_rsp_tag);
`SCOPE_ASSIGN (busy, busy);
`ifdef DBG_PRINT_DRAM
`ifdef DBG_PRINT_MEM
always @(posedge clk) begin
if (dram_req_valid && dram_req_ready) begin
if (dram_req_rw)
$display("%t: DRAM Wr Req: addr=%0h, tag=%0h, byteen=%0h data=%0h", $time, `TO_FULL_ADDR(dram_req_addr), dram_req_tag, dram_req_byteen, dram_req_data);
if (mem_req_valid && mem_req_ready) begin
if (mem_req_rw)
$display("%t: MEM Wr Req: addr=%0h, tag=%0h, byteen=%0h data=%0h", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen, mem_req_data);
else
$display("%t: DRAM Rd Req: addr=%0h, tag=%0h, byteen=%0h", $time, `TO_FULL_ADDR(dram_req_addr), dram_req_tag, dram_req_byteen);
$display("%t: MEM Rd Req: addr=%0h, tag=%0h, byteen=%0h", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_tag, mem_req_byteen);
end
if (dram_rsp_valid && dram_rsp_ready) begin
$display("%t: DRAM Rsp: tag=%0h, data=%0h", $time, dram_rsp_tag, dram_rsp_data);
if (mem_rsp_valid && mem_rsp_ready) begin
$display("%t: MEM Rsp: tag=%0h, data=%0h", $time, mem_rsp_tag, mem_rsp_data);
end
end
`endif

View File

@@ -1,133 +1,166 @@
`include "VX_define.vh"
module VX_avs_wrapper #(
parameter AVS_DATAW = 1,
parameter AVS_ADDRW = 1,
parameter AVS_BURSTW = 1,
parameter AVS_BANKS = 1,
parameter REQ_TAGW = 1,
parameter RD_QUEUE_SIZE = 1,
parameter NUM_BANKS = 1,
parameter AVS_DATA_WIDTH = 1,
parameter AVS_ADDR_WIDTH = 1,
parameter AVS_BURST_WIDTH = 1,
parameter AVS_BANKS = 1,
parameter REQ_TAG_WIDTH = 1,
parameter RD_QUEUE_SIZE = 1,
parameter AVS_BYTEENW = (AVS_DATAW / 8),
parameter RD_QUEUE_ADDRW= $clog2(RD_QUEUE_SIZE+1),
parameter AVS_BANKS_BITS= $clog2(AVS_BANKS)
parameter AVS_BYTEENW = (AVS_DATA_WIDTH / 8),
parameter RD_QUEUE_ADDR_WIDTH = $clog2(RD_QUEUE_SIZE+1),
parameter AVS_BANKS_BITS = $clog2(AVS_BANKS)
) (
input wire clk,
input wire reset,
input wire clk,
input wire reset,
// Memory request
input wire mem_req_valid,
input wire mem_req_rw,
input wire [AVS_BYTEENW-1:0] mem_req_byteen,
input wire [AVS_ADDR_WIDTH-1:0] mem_req_addr,
input wire [AVS_DATA_WIDTH-1:0] mem_req_data,
input wire [REQ_TAG_WIDTH-1:0] mem_req_tag,
output wire mem_req_ready,
// Memory response
output wire mem_rsp_valid,
output wire [AVS_DATA_WIDTH-1:0] mem_rsp_data,
output wire [REQ_TAG_WIDTH-1:0] mem_rsp_tag,
input wire mem_rsp_ready,
// AVS bus
output wire [AVS_DATAW-1:0] avs_writedata,
input wire [AVS_DATAW-1:0] avs_readdata,
output wire [AVS_ADDRW-1:0] avs_address,
input wire avs_waitrequest,
output wire avs_write,
output wire avs_read,
output wire [AVS_BYTEENW-1:0] avs_byteenable,
output wire [AVS_BURSTW-1:0] avs_burstcount,
input avs_readdatavalid,
output wire [AVS_BANKS_BITS-1:0] avs_bankselect,
// DRAM request
input wire dram_req_valid,
input wire dram_req_rw,
input wire [AVS_BYTEENW-1:0] dram_req_byteen,
input wire [AVS_ADDRW-1:0] dram_req_addr,
input wire [AVS_DATAW-1:0] dram_req_data,
input wire [REQ_TAGW-1:0] dram_req_tag,
output wire dram_req_ready,
// DRAM response
output wire dram_rsp_valid,
output wire [AVS_DATAW-1:0] dram_rsp_data,
output wire [REQ_TAGW-1:0] dram_rsp_tag,
input wire dram_rsp_ready
output wire [AVS_DATA_WIDTH-1:0] avs_writedata [NUM_BANKS],
input wire [AVS_DATA_WIDTH-1:0] avs_readdata [NUM_BANKS],
output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS],
input wire avs_waitrequest [NUM_BANKS],
output wire avs_write [NUM_BANKS],
output wire avs_read [NUM_BANKS],
output wire [AVS_BYTEENW-1:0] avs_byteenable [NUM_BANKS],
output wire [AVS_BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS],
input avs_readdatavalid [NUM_BANKS]
);
reg [AVS_BANKS_BITS-1:0] avs_bankselect_r;
reg [AVS_BURSTW-1:0] avs_burstcount_r;
wire avs_reqq_push = dram_req_valid && dram_req_ready && !dram_req_rw;
wire avs_reqq_pop = dram_rsp_valid && dram_rsp_ready;
localparam BANK_ADDRW = `LOG2UP(NUM_BANKS);
wire avs_rspq_push = avs_readdatavalid;
wire avs_rspq_pop = avs_reqq_pop;
wire avs_rspq_empty;
wire rsp_queue_going_full;
wire [RD_QUEUE_ADDRW-1:0] rsp_queue_size;
VX_pending_size #(
.SIZE (RD_QUEUE_SIZE)
) pending_size (
.clk (clk),
.reset (reset),
.push (avs_reqq_push),
.pop (avs_rspq_pop),
`UNUSED_PIN (empty),
.full (rsp_queue_going_full),
.size (rsp_queue_size)
);
`UNUSED_VAR (rsp_queue_size)
always @(posedge clk) begin
avs_burstcount_r <= 1;
avs_bankselect_r <= 0;
end
// Requests handling
VX_fifo_queue #(
.DATAW (REQ_TAGW),
.SIZE (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
.reset (reset),
.push (avs_reqq_push),
.pop (avs_reqq_pop),
.data_in (dram_req_tag),
.data_out (dram_rsp_tag),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
wire [NUM_BANKS-1:0] avs_reqq_push, avs_reqq_pop, avs_reqq_ready;
wire [NUM_BANKS-1:0] req_queue_going_full;
wire [NUM_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size;
wire [NUM_BANKS-1:0][REQ_TAG_WIDTH-1:0] avs_reqq_data_out;
wire [BANK_ADDRW-1:0] req_bank_sel = (NUM_BANKS >= 2) ? mem_req_addr[BANK_ADDRW-1:0] : '0;
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign avs_reqq_ready[i] = !req_queue_going_full[i] && !avs_waitrequest[i];
assign avs_reqq_push[i] = mem_req_valid && !mem_req_rw && avs_reqq_ready[i] && (req_bank_sel == i);
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
VX_pending_size #(
.SIZE (RD_QUEUE_SIZE)
) pending_size (
.clk (clk),
.reset (reset),
.push (avs_reqq_push[i]),
.pop (avs_reqq_pop[i]),
.full (req_queue_going_full[i]),
.size (req_queue_size[i]),
`UNUSED_PIN (empty)
);
`UNUSED_VAR (req_queue_size)
VX_fifo_queue #(
.DATAW (REQ_TAG_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
.reset (reset),
.push (avs_reqq_push[i]),
.pop (avs_reqq_pop[i]),
.data_in (mem_req_tag),
.data_out (avs_reqq_data_out[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign avs_read[i] = mem_req_valid && !mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i);
assign avs_write[i] = mem_req_valid && mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i);
assign avs_address[i] = mem_req_addr;
assign avs_byteenable[i] = mem_req_byteen;
assign avs_writedata[i] = mem_req_data;
assign avs_burstcount[i] = AVS_BURST_WIDTH'(1);
end
assign mem_req_ready = avs_reqq_ready[req_bank_sel];
// Responses handling
wire [NUM_BANKS-1:0] rsp_arb_valid_in;
wire [NUM_BANKS-1:0][AVS_DATA_WIDTH+REQ_TAG_WIDTH-1:0] rsp_arb_data_in;
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
wire [NUM_BANKS-1:0][AVS_DATA_WIDTH-1:0] avs_rspq_data_out;
wire [NUM_BANKS-1:0] avs_rspq_empty;
for (genvar i = 0; i < NUM_BANKS; i++) begin
VX_fifo_queue #(
.DATAW (AVS_DATA_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_rsp_queue (
.clk (clk),
.reset (reset),
.push (avs_readdatavalid[i]),
.pop (avs_reqq_pop[i]),
.data_in (avs_readdata[i]),
.data_out (avs_rspq_data_out[i]),
.empty (avs_rspq_empty[i]),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign rsp_arb_valid_in[i] = !avs_rspq_empty[i];
assign rsp_arb_data_in[i] = {avs_rspq_data_out[i], avs_reqq_data_out[i]};
assign avs_reqq_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i];
end
VX_stream_arbiter #(
.NUM_REQS (NUM_BANKS),
.DATAW (AVS_DATA_WIDTH + REQ_TAG_WIDTH),
.BUFFERED (NUM_BANKS > 2)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_arb_valid_in),
.data_in (rsp_arb_data_in),
.ready_in (rsp_arb_ready_in),
.valid_out (mem_rsp_valid),
.data_out ({mem_rsp_data, mem_rsp_tag}),
.ready_out (mem_rsp_ready)
);
VX_fifo_queue #(
.DATAW (AVS_DATAW),
.SIZE (RD_QUEUE_SIZE)
) rd_rsp_queue (
.clk (clk),
.reset (reset),
.push (avs_rspq_push),
.pop (avs_rspq_pop),
.data_in (avs_readdata),
.data_out (dram_rsp_data),
.empty (avs_rspq_empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
assign avs_read = dram_req_valid && !dram_req_rw && !rsp_queue_going_full;
assign avs_write = dram_req_valid && dram_req_rw && !rsp_queue_going_full;
assign avs_address = dram_req_addr;
assign avs_byteenable = dram_req_byteen;
assign avs_writedata = dram_req_data;
assign avs_burstcount = avs_burstcount_r;
assign avs_bankselect = avs_bankselect_r;
assign dram_req_ready = !avs_waitrequest && !rsp_queue_going_full;
assign dram_rsp_valid = !avs_rspq_empty;
`ifdef DBG_PRINT_AVS
always @(posedge clk) begin
if (dram_req_valid && dram_req_ready) begin
if (dram_req_rw)
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `TO_FULL_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, dram_req_data);
if (mem_req_valid && mem_req_ready) begin
if (mem_req_rw)
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, mem_req_data);
else
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `TO_FULL_ADDR(dram_req_addr), dram_req_byteen, dram_req_tag, rsp_queue_size);
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, req_queue_size);
end
if (dram_rsp_valid && dram_rsp_ready) begin
$display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, dram_rsp_tag, dram_rsp_data, rsp_queue_size);
if (mem_rsp_valid && mem_rsp_ready) begin
$display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, mem_rsp_tag, mem_rsp_data, req_queue_size);
end
end
`endif

178
hw/rtl/afu/VX_to_mem.v Normal file
View File

@@ -0,0 +1,178 @@
`include "VX_define.vh"
module VX_to_mem #(
parameter SRC_DATA_WIDTH = 1,
parameter SRC_ADDR_WIDTH = 1,
parameter DST_DATA_WIDTH = 1,
parameter DST_ADDR_WIDTH = 1,
parameter SRC_TAG_WIDTH = 1,
parameter DST_TAG_WIDTH = 1,
parameter SRC_DATA_SIZE = (SRC_DATA_WIDTH / 8),
parameter DST_DATA_SIZE = (DST_DATA_WIDTH / 8)
) (
input wire clk,
input wire reset,
input wire mem_req_valid_in,
input wire [SRC_ADDR_WIDTH-1:0] mem_req_addr_in,
input wire mem_req_rw_in,
input wire [SRC_DATA_SIZE-1:0] mem_req_byteen_in,
input wire [SRC_DATA_WIDTH-1:0] mem_req_data_in,
input wire [SRC_TAG_WIDTH-1:0] mem_req_tag_in,
output wire mem_req_ready_in,
output wire mem_req_valid_out,
output wire [DST_ADDR_WIDTH-1:0] mem_req_addr_out,
output wire mem_req_rw_out,
output wire [DST_DATA_SIZE-1:0] mem_req_byteen_out,
output wire [DST_DATA_WIDTH-1:0] mem_req_data_out,
output wire [DST_TAG_WIDTH-1:0] mem_req_tag_out,
input wire mem_req_ready_out,
input wire mem_rsp_valid_in,
input wire [DST_DATA_WIDTH-1:0] mem_rsp_data_in,
input wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_in,
output wire mem_rsp_ready_in,
output wire mem_rsp_valid_out,
output wire [SRC_DATA_WIDTH-1:0] mem_rsp_data_out,
output wire [SRC_TAG_WIDTH-1:0] mem_rsp_tag_out,
input wire mem_rsp_ready_out
);
`STATIC_ASSERT ((DST_TAG_WIDTH >= SRC_TAG_WIDTH), ("oops!"))
localparam DST_LDATAW = $clog2(DST_DATA_WIDTH);
localparam SRC_LDATAW = $clog2(SRC_DATA_WIDTH);
localparam D = `ABS(DST_LDATAW - SRC_LDATAW);
localparam P = 2**D;
`UNUSED_VAR (mem_rsp_tag_in)
if (DST_LDATAW > SRC_LDATAW) begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
wire [D-1:0] req_idx = mem_req_addr_in[D-1:0];
wire [D-1:0] rsp_idx = mem_rsp_tag_in[D-1:0];
wire [SRC_ADDR_WIDTH-D-1:0] mem_req_addr_in_qual = mem_req_addr_in[SRC_ADDR_WIDTH-1:D];
wire [P-1:0][SRC_DATA_WIDTH-1:0] mem_rsp_data_in_w = mem_rsp_data_in;
if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH - D)) begin
`UNUSED_VAR (mem_req_addr_in_qual)
assign mem_req_addr_out = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0];
end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH - D)) begin
assign mem_req_addr_out = DST_ADDR_WIDTH'(mem_req_addr_in_qual);
end else begin
assign mem_req_addr_out = mem_req_addr_in_qual;
end
assign mem_req_valid_out = mem_req_valid_in;
assign mem_req_rw_out = mem_req_rw_in;
assign mem_req_byteen_out = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3));
assign mem_req_data_out = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW);
assign mem_req_tag_out = DST_TAG_WIDTH'({mem_req_tag_in, req_idx});
assign mem_req_ready_in = mem_req_ready_out;
assign mem_rsp_valid_out = mem_rsp_valid_in;
assign mem_rsp_data_out = mem_rsp_data_in_w[rsp_idx];
assign mem_rsp_tag_out = SRC_TAG_WIDTH'(mem_rsp_tag_in[SRC_TAG_WIDTH+D-1:D]);
assign mem_rsp_ready_in = mem_rsp_ready_out;
end else if (DST_LDATAW < SRC_LDATAW) begin
reg [D-1:0] req_ctr, rsp_ctr;
reg [P-1:0][DST_DATA_WIDTH-1:0] mem_rsp_data_out_r, mem_rsp_data_out_n;
wire mem_req_out_fire = mem_req_valid_out && mem_req_ready_out;
wire mem_rsp_in_fire = mem_rsp_valid_in && mem_rsp_ready_in;
wire [P-1:0][DST_DATA_WIDTH-1:0] mem_req_data_in_w = mem_req_data_in;
wire [P-1:0][DST_DATA_SIZE-1:0] mem_req_byteen_in_w = mem_req_byteen_in;
always @(*) begin
mem_rsp_data_out_n = mem_rsp_data_out_r;
mem_rsp_data_out_n[rsp_ctr] = mem_rsp_data_in;
end
always @(posedge clk) begin
if (reset) begin
req_ctr <= 0;
rsp_ctr <= 0;
end else begin
if (mem_req_out_fire) begin
req_ctr <= req_ctr + 1;
end
if (mem_rsp_in_fire) begin
rsp_ctr <= rsp_ctr + 1;
mem_rsp_data_out_r <= mem_rsp_data_out_n;
end
end
end
reg [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_r;
wire [DST_TAG_WIDTH-1:0] mem_rsp_tag_in_w;
always @(posedge clk) begin
if (mem_rsp_in_fire) begin
mem_rsp_tag_in_r <= mem_rsp_tag_in;
end
end
assign mem_rsp_tag_in_w = (rsp_ctr != 0) ? mem_rsp_tag_in_r : mem_rsp_tag_in;
`RUNTIME_ASSERT((mem_rsp_tag_in_w == mem_rsp_tag_in), ("oops!"))
wire [SRC_ADDR_WIDTH+D-1:0] mem_req_addr_in_qual = {mem_req_addr_in, req_ctr};
if (DST_ADDR_WIDTH < (SRC_ADDR_WIDTH + D)) begin
`UNUSED_VAR (mem_req_addr_in_qual)
assign mem_req_addr_out = mem_req_addr_in_qual[DST_ADDR_WIDTH-1:0];
end else if (DST_ADDR_WIDTH > (SRC_ADDR_WIDTH + D)) begin
assign mem_req_addr_out = DST_ADDR_WIDTH'(mem_req_addr_in_qual);
end else begin
assign mem_req_addr_out = mem_req_addr_in_qual;
end
assign mem_req_valid_out = mem_req_valid_in;
assign mem_req_rw_out = mem_req_rw_in;
assign mem_req_byteen_out = mem_req_byteen_in_w[req_ctr];
assign mem_req_data_out = mem_req_data_in_w[req_ctr];
assign mem_req_tag_out = DST_TAG_WIDTH'(mem_req_tag_in);
assign mem_req_ready_in = mem_req_ready_out && (req_ctr == (P-1));
assign mem_rsp_valid_out = mem_rsp_valid_in && (rsp_ctr == (P-1));
assign mem_rsp_data_out = mem_rsp_data_out_n;
assign mem_rsp_tag_out = SRC_TAG_WIDTH'(mem_rsp_tag_in);
assign mem_rsp_ready_in = mem_rsp_ready_out;
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
if (DST_ADDR_WIDTH < SRC_ADDR_WIDTH) begin
`UNUSED_VAR (mem_req_addr_in)
assign mem_req_addr_out = mem_req_addr_in[DST_ADDR_WIDTH-1:0];
end else if (DST_ADDR_WIDTH > SRC_ADDR_WIDTH) begin
assign mem_req_addr_out = DST_ADDR_WIDTH'(mem_req_addr_in);
end else begin
assign mem_req_addr_out = mem_req_addr_in;
end
assign mem_req_valid_out = mem_req_valid_in;
assign mem_req_rw_out = mem_req_rw_in;
assign mem_req_byteen_out = mem_req_byteen_in;
assign mem_req_data_out = mem_req_data_in;
assign mem_req_tag_out = DST_TAG_WIDTH'(mem_req_tag_in);
assign mem_req_ready_in = mem_req_ready_out;
assign mem_rsp_valid_out = mem_rsp_valid_in;
assign mem_rsp_data_out = mem_rsp_data_in;
assign mem_rsp_tag_out = SRC_TAG_WIDTH'(mem_rsp_tag_in);
assign mem_rsp_ready_in = mem_rsp_ready_out;
end
endmodule

View File

@@ -77,30 +77,28 @@ module ccip_std_afu #(
// User AFU goes here
// ====================================================================
//
// vortex_afu depends on CCI-P and local memory being in the same
// clock domain. This is accomplished by choosing a common clock
// in the AFU's JSON description. The platform instantiates clock-
// crossing shims automatically, as needed.
//
t_local_mem_byte_mask avs_byteenable [NUM_LOCAL_MEM_BANKS];
logic avs_waitrequest [NUM_LOCAL_MEM_BANKS];
t_local_mem_data avs_readdata [NUM_LOCAL_MEM_BANKS];
logic avs_readdatavalid [NUM_LOCAL_MEM_BANKS];
t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS];
t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS];
t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS];
logic avs_write [NUM_LOCAL_MEM_BANKS];
logic avs_read [NUM_LOCAL_MEM_BANKS];
//
// Memory banks are used very simply here. Only bank is active at
// a time, selected by mem_bank_select. mem_bank_select is set
// by a CSR from the host.
//
t_local_mem_byte_mask avs_byteenable;
logic avs_waitrequest;
t_local_mem_data avs_readdata;
logic avs_readdatavalid;
t_local_mem_burst_cnt avs_burstcount;
t_local_mem_data avs_writedata;
t_local_mem_addr avs_address;
logic avs_write;
logic avs_read;
// choose which memory bank to test
logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select;
for (genvar b = 0; b < NUM_LOCAL_MEM_BANKS; b++) begin
assign local_mem[b].burstcount = avs_burstcount[b];
assign local_mem[b].writedata = avs_writedata[b];
assign local_mem[b].address = avs_address[b];
assign local_mem[b].byteenable = avs_byteenable[b];
assign local_mem[b].write = avs_write[b];
assign local_mem[b].read = avs_read[b];
assign avs_waitrequest[b] = local_mem[b].waitrequest;
assign avs_readdata[b] = local_mem[b].readdata;
assign avs_readdatavalid[b] = local_mem[b].readdatavalid;
end
vortex_afu #(
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
@@ -108,6 +106,9 @@ module ccip_std_afu #(
.clk (clk),
.reset (reset_T1),
.cp2af_sRxPort (cp2af_sRx_T1),
.af2cp_sTxPort (af2cp_sTx_T0),
.avs_writedata (avs_writedata),
.avs_readdata (avs_readdata),
.avs_address (avs_address),
@@ -116,52 +117,7 @@ module ccip_std_afu #(
.avs_read (avs_read),
.avs_byteenable (avs_byteenable),
.avs_burstcount (avs_burstcount),
.avs_readdatavalid (avs_readdatavalid),
.mem_bank_select (mem_bank_select),
.cp2af_sRxPort (cp2af_sRx_T1),
.af2cp_sTxPort (af2cp_sTx_T0)
);
//
// Export the local memory interface signals as vectors so that bank
// selection can use array syntax.
//
logic avs_waitrequest_v[NUM_LOCAL_MEM_BANKS];
t_local_mem_data avs_readdata_v[NUM_LOCAL_MEM_BANKS];
logic avs_readdatavalid_v[NUM_LOCAL_MEM_BANKS];
genvar b;
generate
for (b = 0; b < NUM_LOCAL_MEM_BANKS; b = b + 1)
begin : lmb
always_comb
begin
// Local memory to AFU signals
avs_waitrequest_v[b] = local_mem[b].waitrequest;
avs_readdata_v[b] = local_mem[b].readdata;
avs_readdatavalid_v[b] = local_mem[b].readdatavalid;
// Replicate address and write data to all banks. Only
// the request signals have to be bank-specific.
local_mem[b].burstcount = avs_burstcount;
local_mem[b].writedata = avs_writedata;
local_mem[b].address = avs_address;
local_mem[b].byteenable = avs_byteenable;
// Request a write to this bank?
local_mem[b].write = avs_write &&
($bits(mem_bank_select)'(b) == mem_bank_select);
// Request a read from this bank?
local_mem[b].read = avs_read &&
($bits(mem_bank_select)'(b) == mem_bank_select);
end
end
endgenerate
assign avs_waitrequest = avs_waitrequest_v[mem_bank_select];
assign avs_readdata = avs_readdata_v[mem_bank_select];
assign avs_readdatavalid = avs_readdatavalid_v[mem_bank_select];
.avs_readdatavalid (avs_readdatavalid)
);
endmodule

View File

@@ -1,13 +1,18 @@
`include "VX_define.vh"
`ifndef NOPAE
`include "afu_json_info.vh"
`else
`include "VX_platform.vh"
`ifdef NOPAE
`IGNORE_WARNINGS_BEGIN
`include "vortex_afu.vh"
`IGNORE_WARNINGS_END
`else
`include "afu_json_info.vh"
`endif
/* verilator lint_off IMPORTSTAR */
import ccip_if_pkg::*;
import local_mem_cfg_pkg::*;
/* verilator lint_on IMPORTSTAR */
/* verilator lint_on IMPORTSTAR */
`include "VX_define.vh"
module vortex_afu #(
parameter NUM_LOCAL_MEM_BANKS = 2
@@ -21,30 +26,32 @@ module vortex_afu #(
output t_if_ccip_Tx af2cp_sTxPort,
// Avalon signals for local memory access
output t_local_mem_data avs_writedata,
input t_local_mem_data avs_readdata,
output t_local_mem_addr avs_address,
input logic avs_waitrequest,
output logic avs_write,
output logic avs_read,
output t_local_mem_byte_mask avs_byteenable,
output t_local_mem_burst_cnt avs_burstcount,
input avs_readdatavalid,
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
output t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS],
input t_local_mem_data avs_readdata [NUM_LOCAL_MEM_BANKS],
output t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS],
input logic avs_waitrequest [NUM_LOCAL_MEM_BANKS],
output logic avs_write [NUM_LOCAL_MEM_BANKS],
output logic avs_read [NUM_LOCAL_MEM_BANKS],
output t_local_mem_byte_mask avs_byteenable [NUM_LOCAL_MEM_BANKS],
output t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS],
input avs_readdatavalid [NUM_LOCAL_MEM_BANKS]
);
localparam RESET_DELAY = 3;
localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr);
localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data);
localparam DRAM_LINE_LW = $clog2(DRAM_LINE_WIDTH);
localparam LMEM_LINE_WIDTH = $bits(t_local_mem_data);
localparam LMEM_ADDR_WIDTH = $bits(t_local_mem_addr);
localparam LMEM_BURST_CTRW = $bits(t_local_mem_burst_cnt);
localparam VX_DRAM_LINE_LW = $clog2(`VX_DRAM_LINE_WIDTH);
localparam VX_DRAM_LINE_IDX = (DRAM_LINE_LW - VX_DRAM_LINE_LW);
localparam CCI_LINE_WIDTH = $bits(t_ccip_clData);
localparam CCI_LINE_SIZE = CCI_LINE_WIDTH / 8;
localparam CCI_ADDR_WIDTH = 32 - $clog2(CCI_LINE_WIDTH / 8);
localparam AVS_RD_QUEUE_SIZE = 16;
localparam AVS_REQ_TAGW = `VX_DRAM_TAG_WIDTH + VX_DRAM_LINE_IDX;
localparam AVS_REQ_TAGW_VX = `MAX(`VX_MEM_TAG_WIDTH, `VX_MEM_TAG_WIDTH + $clog2(LMEM_LINE_WIDTH) - $clog2(`VX_MEM_LINE_WIDTH));
localparam AVS_REQ_TAGW_CCI = `MAX(CCI_ADDR_WIDTH, CCI_ADDR_WIDTH + $clog2(LMEM_LINE_WIDTH) - $clog2(CCI_LINE_WIDTH));
localparam AVS_REQ_TAGW = `MAX(AVS_REQ_TAGW_VX, AVS_REQ_TAGW_CCI);
localparam CCI_RD_WINDOW_SIZE = 8;
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
@@ -74,7 +81,7 @@ localparam MMIO_CSR_DATA = `AFU_IMAGE_MMIO_CSR_DATA;
localparam MMIO_CSR_READ = `AFU_IMAGE_MMIO_CSR_READ;
localparam CCI_RD_RQ_TAGW = $clog2(CCI_RD_WINDOW_SIZE);
localparam CCI_RD_RQ_DATAW = $bits(t_ccip_clData) + CCI_RD_RQ_TAGW;
localparam CCI_RD_RQ_DATAW = CCI_LINE_WIDTH + CCI_RD_RQ_TAGW;
localparam STATE_IDLE = 0;
localparam STATE_READ = 1;
@@ -96,18 +103,18 @@ reg [STATE_WIDTH-1:0] state;
// Vortex ports ///////////////////////////////////////////////////////////////
wire vx_dram_req_valid;
wire vx_dram_req_rw;
wire [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen;
wire [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data;
wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
wire vx_dram_req_ready;
wire vx_mem_req_valid;
wire vx_mem_req_rw;
wire [`VX_MEM_BYTEEN_WIDTH-1:0] vx_mem_req_byteen;
wire [`VX_MEM_ADDR_WIDTH-1:0] vx_mem_req_addr;
wire [`VX_MEM_LINE_WIDTH-1:0] vx_mem_req_data;
wire [`VX_MEM_TAG_WIDTH-1:0] vx_mem_req_tag;
wire vx_mem_req_ready;
wire vx_dram_rsp_valid;
wire [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data;
wire [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
wire vx_dram_rsp_ready;
wire vx_mem_rsp_valid;
wire [`VX_MEM_LINE_WIDTH-1:0] vx_mem_rsp_data;
wire [`VX_MEM_TAG_WIDTH-1:0] vx_mem_rsp_tag;
wire vx_mem_rsp_ready;
wire vx_csr_io_req_valid;
wire [`VX_CSR_ID_WIDTH-1:0] vx_csr_io_req_coreid;
@@ -123,13 +130,13 @@ wire vx_csr_io_rsp_ready;
wire vx_busy;
reg vx_reset;
reg vx_dram_en;
reg vx_mem_en;
// CMD variables //////////////////////////////////////////////////////////////
t_ccip_clAddr cmd_io_addr;
reg [DRAM_ADDR_WIDTH-1:0] cmd_mem_addr;
reg [DRAM_ADDR_WIDTH-1:0] cmd_data_size;
reg [CCI_ADDR_WIDTH-1:0] cmd_mem_addr;
reg [CCI_ADDR_WIDTH-1:0] cmd_data_size;
`ifdef SCOPE
wire [63:0] cmd_scope_rdata;
@@ -216,9 +223,9 @@ always @(posedge clk) begin
`endif
end
MMIO_MEM_ADDR: begin
cmd_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data);
cmd_mem_addr <= $bits(cmd_mem_addr)'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
$display("%t: MMIO_MEM_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, t_local_mem_addr'(cp2af_sRxPort.c0.data));
$display("%t: MMIO_MEM_ADDR: addr=%0h, data=0x%0h", $time, mmio_hdr.address, $bits(cmd_mem_addr)'(cp2af_sRxPort.c0.data));
`endif
end
MMIO_DATA_SIZE: begin
@@ -335,7 +342,7 @@ always @(posedge clk) begin
if (reset) begin
state <= STATE_IDLE;
vx_reset <= 0;
vx_dram_en <= 0;
vx_mem_en <= 0;
end else begin
case (state)
STATE_IDLE: begin
@@ -399,14 +406,14 @@ always @(posedge clk) begin
// vortex reset cycles
if (vx_reset_ctr == $bits(vx_reset_ctr)'(RESET_DELAY)) begin
vx_reset <= 0;
vx_dram_en <= 1;
vx_mem_en <= 1;
state <= STATE_RUN;
end
end
STATE_RUN: begin
if (cmd_run_done) begin
vx_dram_en <= 0;
vx_mem_en <= 0;
state <= STATE_IDLE;
`ifdef DBG_PRINT_OPAE
$display("%t: STATE IDLE", $time);
@@ -442,187 +449,251 @@ end
// AVS Controller /////////////////////////////////////////////////////////////
wire dram_req_valid;
wire dram_req_rw;
t_local_mem_byte_mask dram_req_byteen;
t_local_mem_addr dram_req_addr;
t_local_mem_data dram_req_data;
wire [AVS_REQ_TAGW:0] dram_req_tag;
wire dram_req_ready;
wire dram_rsp_valid;
t_local_mem_data dram_rsp_data;
wire [AVS_REQ_TAGW:0] dram_rsp_tag;
wire dram_rsp_ready;
wire cci_dram_req_valid;
wire cci_dram_req_rw;
t_local_mem_byte_mask cci_dram_req_byteen;
t_local_mem_addr cci_dram_req_addr;
t_local_mem_data cci_dram_req_data;
wire [AVS_REQ_TAGW-1:0] cci_dram_req_tag;
wire cci_dram_req_ready;
wire cci_dram_rsp_valid;
t_local_mem_data cci_dram_rsp_data;
wire [AVS_REQ_TAGW-1:0] cci_dram_rsp_tag;
wire cci_dram_rsp_ready;
wire vx_dram_req_valid_qual;
t_local_mem_addr vx_dram_req_addr_qual;
t_local_mem_byte_mask vx_dram_req_byteen_qual;
t_local_mem_data vx_dram_req_data_qual;
wire [AVS_REQ_TAGW-1:0] vx_dram_req_tag_qual;
wire [(1 << VX_DRAM_LINE_IDX)-1:0][`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data_unqual;
wire [AVS_REQ_TAGW-1:0] vx_dram_rsp_tag_unqual;
wire cci_dram_rd_req_valid, cci_dram_wr_req_valid;
wire [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
wire cci_mem_rd_req_valid;
wire cci_mem_wr_req_valid;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_dout;
//--
wire cci_mem_req_valid;
wire cci_mem_req_rw;
wire [CCI_ADDR_WIDTH-1:0] cci_mem_req_addr;
wire [CCI_ADDR_WIDTH-1:0] cci_mem_req_tag;
wire cci_mem_req_ready;
assign cci_dram_req_valid = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_valid : cci_dram_rd_req_valid;
assign cci_dram_req_addr = (CMD_MEM_WRITE == state) ? cci_dram_wr_req_addr : cci_dram_rd_req_addr;
assign cci_dram_req_rw = (CMD_MEM_WRITE == state);
assign cci_dram_req_byteen = {64{1'b1}};
assign cci_dram_req_data = cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW];
assign cci_dram_req_tag = AVS_REQ_TAGW'(0);
`UNUSED_VAR (cci_dram_rsp_tag)
wire cci_mem_rsp_valid;
wire [CCI_LINE_WIDTH-1:0] cci_mem_rsp_data;
wire [CCI_ADDR_WIDTH-1:0] cci_mem_rsp_tag;
wire cci_mem_rsp_ready;
//--
assign vx_dram_req_valid_qual = vx_dram_req_valid && vx_dram_en;
wire cci_mem_req_arb_valid;
wire cci_mem_req_arb_rw;
t_local_mem_byte_mask cci_mem_req_arb_byteen;
t_local_mem_addr cci_mem_req_arb_addr;
t_local_mem_data cci_mem_req_arb_data;
wire [AVS_REQ_TAGW-1:0] cci_mem_req_arb_tag;
wire cci_mem_req_arb_ready;
assign vx_dram_req_addr_qual = vx_dram_req_addr[`VX_DRAM_ADDR_WIDTH-1:`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH];
wire cci_mem_rsp_arb_valid;
t_local_mem_data cci_mem_rsp_arb_data;
wire [AVS_REQ_TAGW-1:0] cci_mem_rsp_arb_tag;
wire cci_mem_rsp_arb_ready;
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
wire [VX_DRAM_LINE_IDX-1:0] vx_dram_req_idx = vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0];
wire [VX_DRAM_LINE_IDX-1:0] vx_dram_rsp_idx = vx_dram_rsp_tag_unqual[VX_DRAM_LINE_IDX-1:0];
assign vx_dram_req_byteen_qual = 64'(vx_dram_req_byteen) << (6'(vx_dram_req_addr[VX_DRAM_LINE_IDX-1:0]) << (VX_DRAM_LINE_LW-3));
assign vx_dram_req_data_qual = DRAM_LINE_WIDTH'(vx_dram_req_data) << ((DRAM_LINE_LW'(vx_dram_req_idx)) << VX_DRAM_LINE_LW);
assign vx_dram_req_tag_qual = {vx_dram_req_tag, vx_dram_req_idx};
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual[vx_dram_rsp_idx];
end else begin
assign vx_dram_req_byteen_qual = vx_dram_req_byteen;
assign vx_dram_req_tag_qual = vx_dram_req_tag;
assign vx_dram_req_data_qual = vx_dram_req_data;
assign vx_dram_rsp_data = vx_dram_rsp_data_unqual;
end
VX_to_mem #(
.SRC_DATA_WIDTH (CCI_LINE_WIDTH),
.DST_DATA_WIDTH (LMEM_LINE_WIDTH),
.SRC_ADDR_WIDTH (CCI_ADDR_WIDTH),
.DST_ADDR_WIDTH (LMEM_ADDR_WIDTH),
.SRC_TAG_WIDTH (CCI_ADDR_WIDTH),
.DST_TAG_WIDTH (AVS_REQ_TAGW)
) cci_to_mem (
.clk (clk),
.reset (reset),
assign vx_dram_rsp_tag = vx_dram_rsp_tag_unqual[`VX_DRAM_TAG_WIDTH+VX_DRAM_LINE_IDX-1:VX_DRAM_LINE_IDX];
.mem_req_valid_in (cci_mem_req_valid),
.mem_req_addr_in (cci_mem_req_addr),
.mem_req_rw_in (cci_mem_req_rw),
.mem_req_byteen_in ({CCI_LINE_SIZE{1'b1}}),
.mem_req_data_in (cci_rdq_dout[CCI_RD_RQ_DATAW-1:CCI_RD_RQ_TAGW]),
.mem_req_tag_in (cci_mem_req_tag),
.mem_req_ready_in (cci_mem_req_ready),
.mem_req_valid_out (cci_mem_req_arb_valid),
.mem_req_addr_out (cci_mem_req_arb_addr),
.mem_req_rw_out (cci_mem_req_arb_rw),
.mem_req_byteen_out (cci_mem_req_arb_byteen),
.mem_req_data_out (cci_mem_req_arb_data),
.mem_req_tag_out (cci_mem_req_arb_tag),
.mem_req_ready_out (cci_mem_req_arb_ready),
.mem_rsp_valid_in (cci_mem_rsp_arb_valid),
.mem_rsp_data_in (cci_mem_rsp_arb_data),
.mem_rsp_tag_in (cci_mem_rsp_arb_tag),
.mem_rsp_ready_in (cci_mem_rsp_arb_ready),
.mem_rsp_valid_out (cci_mem_rsp_valid),
.mem_rsp_data_out (cci_mem_rsp_data),
.mem_rsp_tag_out (cci_mem_rsp_tag),
.mem_rsp_ready_out (cci_mem_rsp_ready)
);
//--
wire vx_mem_req_arb_valid;
wire vx_mem_req_arb_rw;
t_local_mem_byte_mask vx_mem_req_arb_byteen;
t_local_mem_addr vx_mem_req_arb_addr;
t_local_mem_data vx_mem_req_arb_data;
wire [AVS_REQ_TAGW-1:0] vx_mem_req_arb_tag;
wire vx_mem_req_arb_ready;
wire vx_mem_rsp_arb_valid;
t_local_mem_data vx_mem_rsp_arb_data;
wire [AVS_REQ_TAGW-1:0] vx_mem_rsp_arb_tag;
wire vx_mem_rsp_arb_ready;
VX_to_mem #(
.SRC_DATA_WIDTH (`VX_MEM_LINE_WIDTH),
.DST_DATA_WIDTH (LMEM_LINE_WIDTH),
.SRC_ADDR_WIDTH (`VX_MEM_ADDR_WIDTH),
.DST_ADDR_WIDTH (LMEM_ADDR_WIDTH),
.SRC_TAG_WIDTH (`VX_MEM_TAG_WIDTH),
.DST_TAG_WIDTH (AVS_REQ_TAGW)
) vx_to_mem (
.clk (clk),
.reset (reset),
.mem_req_valid_in (vx_mem_req_valid && vx_mem_en),
.mem_req_addr_in (vx_mem_req_addr),
.mem_req_rw_in (vx_mem_req_rw),
.mem_req_byteen_in (vx_mem_req_byteen),
.mem_req_data_in (vx_mem_req_data),
.mem_req_tag_in (vx_mem_req_tag),
.mem_req_ready_in (vx_mem_req_ready),
.mem_req_valid_out (vx_mem_req_arb_valid),
.mem_req_addr_out (vx_mem_req_arb_addr),
.mem_req_rw_out (vx_mem_req_arb_rw),
.mem_req_byteen_out (vx_mem_req_arb_byteen),
.mem_req_data_out (vx_mem_req_arb_data),
.mem_req_tag_out (vx_mem_req_arb_tag),
.mem_req_ready_out (vx_mem_req_arb_ready),
.mem_rsp_valid_in (vx_mem_rsp_arb_valid),
.mem_rsp_data_in (vx_mem_rsp_arb_data),
.mem_rsp_tag_in (vx_mem_rsp_arb_tag),
.mem_rsp_ready_in (vx_mem_rsp_arb_ready),
.mem_rsp_valid_out (vx_mem_rsp_valid),
.mem_rsp_data_out (vx_mem_rsp_data),
.mem_rsp_tag_out (vx_mem_rsp_tag),
.mem_rsp_ready_out (vx_mem_rsp_ready)
);
//--
wire mem_req_valid;
wire mem_req_rw;
t_local_mem_byte_mask mem_req_byteen;
t_local_mem_addr mem_req_addr;
t_local_mem_data mem_req_data;
wire [AVS_REQ_TAGW:0] mem_req_tag;
wire mem_req_ready;
wire mem_rsp_valid;
t_local_mem_data mem_rsp_data;
wire [AVS_REQ_TAGW:0] mem_rsp_tag;
wire mem_rsp_ready;
VX_mem_arb #(
.NUM_REQS (2),
.DATA_WIDTH ($bits(t_local_mem_data)),
.ADDR_WIDTH ($bits(t_local_mem_addr)),
.DATA_WIDTH (LMEM_LINE_WIDTH),
.ADDR_WIDTH (LMEM_ADDR_WIDTH),
.TAG_IN_WIDTH (AVS_REQ_TAGW),
.TAG_OUT_WIDTH (AVS_REQ_TAGW+1)
) dram_arb (
) mem_arb (
.clk (clk),
.reset (reset),
// Source request
.req_valid_in ({cci_dram_req_valid, vx_dram_req_valid_qual}),
.req_rw_in ({cci_dram_req_rw, vx_dram_req_rw}),
.req_byteen_in ({cci_dram_req_byteen, vx_dram_req_byteen_qual}),
.req_addr_in ({cci_dram_req_addr, vx_dram_req_addr_qual}),
.req_data_in ({cci_dram_req_data, vx_dram_req_data_qual}),
.req_tag_in ({cci_dram_req_tag, vx_dram_req_tag_qual}),
.req_ready_in ({cci_dram_req_ready, vx_dram_req_ready}),
.req_valid_in ({cci_mem_req_arb_valid, vx_mem_req_arb_valid}),
.req_rw_in ({cci_mem_req_arb_rw, vx_mem_req_arb_rw}),
.req_byteen_in ({cci_mem_req_arb_byteen, vx_mem_req_arb_byteen}),
.req_addr_in ({cci_mem_req_arb_addr, vx_mem_req_arb_addr}),
.req_data_in ({cci_mem_req_arb_data, vx_mem_req_arb_data}),
.req_tag_in ({cci_mem_req_arb_tag, vx_mem_req_arb_tag}),
.req_ready_in ({cci_mem_req_arb_ready, vx_mem_req_arb_ready}),
// DRAM request
.req_valid_out (dram_req_valid),
.req_rw_out (dram_req_rw),
.req_byteen_out (dram_req_byteen),
.req_addr_out (dram_req_addr),
.req_data_out (dram_req_data),
.req_tag_out (dram_req_tag),
.req_ready_out (dram_req_ready),
// Memory request
.req_valid_out (mem_req_valid),
.req_rw_out (mem_req_rw),
.req_byteen_out (mem_req_byteen),
.req_addr_out (mem_req_addr),
.req_data_out (mem_req_data),
.req_tag_out (mem_req_tag),
.req_ready_out (mem_req_ready),
// Source response
.rsp_valid_out ({cci_dram_rsp_valid, vx_dram_rsp_valid}),
.rsp_data_out ({cci_dram_rsp_data, vx_dram_rsp_data_unqual}),
.rsp_tag_out ({cci_dram_rsp_tag, vx_dram_rsp_tag_unqual}),
.rsp_ready_out ({cci_dram_rsp_ready, vx_dram_rsp_ready}),
.rsp_valid_out ({cci_mem_rsp_arb_valid, vx_mem_rsp_arb_valid}),
.rsp_data_out ({cci_mem_rsp_arb_data, vx_mem_rsp_arb_data}),
.rsp_tag_out ({cci_mem_rsp_arb_tag, vx_mem_rsp_arb_tag}),
.rsp_ready_out ({cci_mem_rsp_arb_ready, vx_mem_rsp_arb_ready}),
// DRAM response
.rsp_valid_in (dram_rsp_valid),
.rsp_tag_in (dram_rsp_tag),
.rsp_data_in (dram_rsp_data),
.rsp_ready_in (dram_rsp_ready)
// Memory response
.rsp_valid_in (mem_rsp_valid),
.rsp_tag_in (mem_rsp_tag),
.rsp_data_in (mem_rsp_data),
.rsp_ready_in (mem_rsp_ready)
);
//--
VX_avs_wrapper #(
.AVS_DATAW ($bits(t_local_mem_data)),
.AVS_ADDRW ($bits(t_local_mem_addr)),
.AVS_BURSTW ($bits(t_local_mem_burst_cnt)),
.AVS_BANKS (NUM_LOCAL_MEM_BANKS),
.REQ_TAGW (AVS_REQ_TAGW+1),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE)
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
.AVS_DATA_WIDTH (LMEM_LINE_WIDTH),
.AVS_ADDR_WIDTH (LMEM_ADDR_WIDTH),
.AVS_BURST_WIDTH (LMEM_BURST_CTRW),
.AVS_BANKS (NUM_LOCAL_MEM_BANKS),
.REQ_TAG_WIDTH (AVS_REQ_TAGW + 1),
.RD_QUEUE_SIZE (AVS_RD_QUEUE_SIZE)
) avs_wrapper (
.clk (clk),
.reset (reset),
.clk (clk),
.reset (reset),
// Memory request
.mem_req_valid (mem_req_valid),
.mem_req_rw (mem_req_rw),
.mem_req_byteen (mem_req_byteen),
.mem_req_addr (mem_req_addr),
.mem_req_data (mem_req_data),
.mem_req_tag (mem_req_tag),
.mem_req_ready (mem_req_ready),
// Memory response
.mem_rsp_valid (mem_rsp_valid),
.mem_rsp_data (mem_rsp_data),
.mem_rsp_tag (mem_rsp_tag),
.mem_rsp_ready (mem_rsp_ready),
// AVS bus
.avs_writedata (avs_writedata),
.avs_readdata (avs_readdata),
.avs_address (avs_address),
.avs_waitrequest (avs_waitrequest),
.avs_write (avs_write),
.avs_read (avs_read),
.avs_byteenable (avs_byteenable),
.avs_burstcount (avs_burstcount),
.avs_readdatavalid (avs_readdatavalid),
.avs_bankselect (mem_bank_select),
// DRAM request
.dram_req_valid (dram_req_valid),
.dram_req_rw (dram_req_rw),
.dram_req_byteen (dram_req_byteen),
.dram_req_addr (dram_req_addr),
.dram_req_data (dram_req_data),
.dram_req_tag (dram_req_tag),
.dram_req_ready (dram_req_ready),
// DRAM response
.dram_rsp_valid (dram_rsp_valid),
.dram_rsp_data (dram_rsp_data),
.dram_rsp_tag (dram_rsp_tag),
.dram_rsp_ready (dram_rsp_ready)
.avs_writedata (avs_writedata),
.avs_readdata (avs_readdata),
.avs_address (avs_address),
.avs_waitrequest (avs_waitrequest),
.avs_write (avs_write),
.avs_read (avs_read),
.avs_byteenable (avs_byteenable),
.avs_burstcount (avs_burstcount),
.avs_readdatavalid(avs_readdatavalid)
);
// CCI-P Read Request ///////////////////////////////////////////////////////////
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_ctr;
reg [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr;
wire [DRAM_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_wr_req_addr_unqual;
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag, cci_rd_rsp_tag;
reg [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_ctr;
wire [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_addr;
reg [CCI_ADDR_WIDTH-1:0] cci_mem_wr_req_addr_unqual;
reg [CCI_ADDR_WIDTH-1:0] cci_rd_req_ctr;
wire [CCI_ADDR_WIDTH-1:0] cci_rd_req_ctr_next;
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_req_tag;
wire [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_tag;
reg [CCI_RD_RQ_TAGW-1:0] cci_rd_rsp_ctr;
t_ccip_clAddr cci_rd_req_addr;
reg cci_rd_req_enable, cci_rd_req_wait;
wire cci_rd_req_fire;
t_ccip_clAddr cci_rd_req_addr;
reg cci_rd_req_valid, cci_rd_req_wait;
wire cci_rdq_push, cci_rdq_pop;
wire [CCI_RD_RQ_DATAW-1:0] cci_rdq_din;
wire cci_rdq_empty;
always @(*) begin
af2cp_sTxPort.c0.valid = cci_rd_req_fire;
af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0);
af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr;
af2cp_sTxPort.c0.hdr.mdata = t_ccip_mdata'(cci_rd_req_tag);
end
wire cci_dram_wr_req_fire = cci_dram_wr_req_valid && cci_dram_req_ready;
wire cci_rd_req_fire = af2cp_sTxPort.c0.valid;
wire cci_mem_wr_req_fire = cci_mem_wr_req_valid && cci_mem_req_ready;
wire cci_rd_rsp_fire = (STATE_WRITE == state)
&& cp2af_sRxPort.c0.rspValid
@@ -631,10 +702,8 @@ wire cci_rd_rsp_fire = (STATE_WRITE == state)
assign cci_rd_req_tag = CCI_RD_RQ_TAGW'(cci_rd_req_ctr);
assign cci_rd_rsp_tag = CCI_RD_RQ_TAGW'(cp2af_sRxPort.c0.hdr.mdata);
assign cci_rd_req_ctr_next = cci_rd_req_ctr + DRAM_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0);
assign cci_rdq_pop = cci_dram_wr_req_fire;
assign cci_rdq_push = cci_rd_rsp_fire;
assign cci_rdq_pop = cci_mem_wr_req_fire;
assign cci_rdq_din = {cp2af_sRxPort.c0.data, cci_rd_rsp_tag};
wire [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads;
@@ -646,79 +715,80 @@ VX_pending_size #(
.reset (reset),
.push (cci_rd_req_fire),
.pop (cci_rdq_pop),
`UNUSED_PIN (empty),
.full (cci_pending_reads_full),
.size (cci_pending_reads)
.size (cci_pending_reads),
`UNUSED_PIN (empty)
);
`UNUSED_VAR (cci_pending_reads)
assign cci_dram_wr_req_valid = !cci_rdq_empty;
assign cci_rd_req_ctr_next = cci_rd_req_ctr + CCI_ADDR_WIDTH'(cci_rd_req_fire ? 1 : 0);
assign cci_dram_wr_req_addr = cci_dram_wr_req_addr_unqual + (DRAM_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
assign cci_rd_req_fire = cci_rd_req_valid && !(cci_rd_req_wait || cci_pending_reads_full);
assign cci_mem_wr_req_valid = !cci_rdq_empty;
assign cci_mem_wr_req_addr = cci_mem_wr_req_addr_unqual + (CCI_ADDR_WIDTH'(CCI_RD_RQ_TAGW'(cci_rdq_dout)));
assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && !cci_rd_req_wait;
assign cmd_write_done = (cci_dram_wr_req_ctr == cmd_data_size);
assign cmd_write_done = (cci_mem_wr_req_ctr == cmd_data_size);
// Send read requests to CCI
always @(posedge clk) begin
if (reset) begin
cci_rd_req_addr <= 0;
cci_rd_req_ctr <= 0;
cci_rd_rsp_ctr <= 0;
cci_rd_req_enable <= 0;
cci_rd_req_wait <= 0;
cci_dram_wr_req_ctr <= 0;
cci_dram_wr_req_addr_unqual <= 0;
end
else begin
cci_rd_req_valid <= 0;
cci_rd_req_wait <= 0;
end else begin
if ((STATE_IDLE == state)
&& (CMD_MEM_WRITE == cmd_type)) begin
cci_rd_req_addr <= cmd_io_addr;
cci_rd_req_ctr <= 0;
cci_rd_rsp_ctr <= 0;
cci_rd_req_enable <= (cmd_data_size != 0);
cci_rd_req_wait <= 0;
cci_dram_wr_req_ctr <= 0;
cci_dram_wr_req_addr_unqual <= cmd_mem_addr;
cci_rd_req_valid <= (cmd_data_size != 0);
cci_rd_req_wait <= 0;
end
cci_rd_req_enable <= (STATE_WRITE == state)
&& (cci_rd_req_ctr_next != cmd_data_size)
&& !cci_pending_reads_full
&& !cp2af_sRxPort.c0TxAlmFull;
cci_rd_req_valid <= (STATE_WRITE == state)
&& (cci_rd_req_ctr_next != cmd_data_size)
&& !cp2af_sRxPort.c0TxAlmFull;
if (cci_rd_req_fire) begin
cci_rd_req_addr <= cci_rd_req_addr + 1;
cci_rd_req_ctr <= cci_rd_req_ctr_next;
if (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 1; // end current request batch
end
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr_next), cci_pending_reads);
`endif
if (cci_rd_req_fire && (cci_rd_req_tag == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1))) begin
cci_rd_req_wait <= 1; // end current request batch
end
if (cci_rd_rsp_fire) begin
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1);
if (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 0; // restart new request batch
end
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data);
`endif
end
if (cci_rd_rsp_fire && (cci_rd_rsp_ctr == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1))) begin
cci_rd_req_wait <= 0; // begin new request batch
end
end
/*if (cci_rdq_pop) begin
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Queue Pop: pending=%0d", $time, cci_pending_reads);
`endif
end*/
if ((STATE_IDLE == state)
&& (CMD_MEM_WRITE == cmd_type)) begin
cci_rd_req_addr <= cmd_io_addr;
cci_rd_req_ctr <= 0;
cci_rd_rsp_ctr <= 0;
cci_mem_wr_req_ctr <= 0;
cci_mem_wr_req_addr_unqual <= cmd_mem_addr;
end
if (cci_dram_wr_req_fire) begin
cci_dram_wr_req_addr_unqual <= cci_dram_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_dram_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? DRAM_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : DRAM_ADDR_WIDTH'(0));
cci_dram_wr_req_ctr <= cci_dram_wr_req_ctr + DRAM_ADDR_WIDTH'(1);
end
if (cci_rd_req_fire) begin
cci_rd_req_addr <= cci_rd_req_addr + 1;
cci_rd_req_ctr <= cci_rd_req_ctr + 1;
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Req: addr=%0h, tag=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, cci_rd_req_tag, (cmd_data_size - cci_rd_req_ctr - 1), cci_pending_reads);
`endif
end
if (cci_rd_rsp_fire) begin
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + CCI_RD_RQ_TAGW'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d, data=%0h", $time, cci_rd_rsp_tag, cci_rd_rsp_ctr, cp2af_sRxPort.c0.data);
`endif
end
if (cci_rdq_pop) begin
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Queue Pop: pending=%0d", $time, cci_pending_reads);
`endif
end
if (cci_mem_wr_req_fire) begin
cci_mem_wr_req_addr_unqual <= cci_mem_wr_req_addr_unqual + ((CCI_RD_RQ_TAGW'(cci_mem_wr_req_ctr) == CCI_RD_RQ_TAGW'(CCI_RD_WINDOW_SIZE-1)) ? CCI_ADDR_WIDTH'(CCI_RD_WINDOW_SIZE) : CCI_ADDR_WIDTH'(0));
cci_mem_wr_req_ctr <= cci_mem_wr_req_ctr + CCI_ADDR_WIDTH'(1);
end
end
@@ -761,22 +831,24 @@ VX_fifo_queue #(
// CCI-P Write Request //////////////////////////////////////////////////////////
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_ctr;
reg [DRAM_ADDR_WIDTH-1:0] cci_wr_req_ctr;
reg [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr_r;
reg [CCI_ADDR_WIDTH-1:0] cci_mem_rd_req_ctr;
reg [CCI_ADDR_WIDTH-1:0] cci_mem_rd_req_addr;
reg [CCI_ADDR_WIDTH-1:0] cci_wr_req_ctr;
reg cci_wr_req_fire;
t_ccip_clAddr cci_wr_req_addr;
t_ccip_clData cci_wr_req_data;
always @(*) begin
af2cp_sTxPort.c1.valid = cci_wr_req_fire;
af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0);
af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr;
af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode
af2cp_sTxPort.c1.data = t_ccip_clData'(cci_dram_rsp_data);
af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr;
af2cp_sTxPort.c1.data = cci_wr_req_data;
end
wire cci_dram_rd_req_fire = cci_dram_rd_req_valid && cci_dram_req_ready;
wire cci_dram_rd_rsp_fire = cci_dram_rsp_valid && cci_dram_rsp_ready;
wire cci_wr_req_fire = cci_dram_rd_rsp_fire;
wire cci_mem_rd_req_fire = cci_mem_rd_req_valid && cci_mem_req_ready;
wire cci_mem_rd_rsp_fire = cci_mem_rsp_valid && cci_mem_rsp_ready;
wire cci_wr_rsp_fire = (STATE_READ == state)
&& cp2af_sRxPort.c1.rspValid
@@ -785,12 +857,13 @@ wire cci_wr_rsp_fire = (STATE_READ == state)
wire [$clog2(CCI_RW_PENDING_SIZE+1)-1:0] cci_pending_writes;
wire cci_pending_writes_empty;
wire cci_pending_writes_full;
VX_pending_size #(
.SIZE (CCI_RW_PENDING_SIZE)
) cci_wr_pending_size (
.clk (clk),
.reset (reset),
.push (cci_wr_req_fire),
.push (cci_mem_rd_rsp_fire),
.pop (cci_wr_rsp_fire),
.empty (cci_pending_writes_empty),
.full (cci_pending_writes_full),
@@ -798,54 +871,61 @@ VX_pending_size #(
);
`UNUSED_VAR (cci_pending_writes)
assign cci_dram_rd_req_valid = (cci_dram_rd_req_ctr != 0);
assign cci_dram_rd_req_addr = cci_dram_rd_req_addr_r;
assign cci_mem_rd_req_valid = (STATE_READ == state)
&& (cci_mem_rd_req_ctr != cmd_data_size);
assign af2cp_sTxPort.c1.valid = cci_dram_rd_rsp_fire;
assign cci_dram_rsp_ready = !cp2af_sRxPort.c1TxAlmFull && !cci_pending_writes_full;
assign cci_mem_rsp_ready = !cp2af_sRxPort.c1TxAlmFull
&& !cci_pending_writes_full;
assign cmd_read_done = (0 == cci_wr_req_ctr) && cci_pending_writes_empty;
assign cmd_read_done = (0 == cci_wr_req_ctr)
&& cci_pending_writes_empty;
// Send write requests to CCI
always @(posedge clk)
begin
if (reset) begin
cci_wr_req_addr <= 0;
cci_wr_req_ctr <= 0;
cci_dram_rd_req_ctr <= 0;
cci_dram_rd_req_addr_r <= 0;
cci_wr_req_fire <= 0;
end else begin
cci_wr_req_fire <= cci_mem_rd_rsp_fire;
end
else begin
if ((STATE_IDLE == state)
&& (CMD_MEM_READ == cmd_type)) begin
cci_wr_req_addr <= cmd_io_addr;
cci_wr_req_ctr <= cmd_data_size;
cci_dram_rd_req_ctr <= cmd_data_size;
cci_dram_rd_req_addr_r <= cmd_mem_addr;
end
if ((STATE_IDLE == state)
&& (CMD_MEM_READ == cmd_type)) begin
cci_mem_rd_req_ctr <= 0;
cci_mem_rd_req_addr <= cmd_mem_addr;
cci_wr_req_ctr <= cmd_data_size;
end
if (cci_wr_req_fire) begin
assert(cci_wr_req_ctr != 0);
cci_wr_req_addr <= cci_wr_req_addr + t_ccip_clAddr'(1);
cci_wr_req_ctr <= cci_wr_req_ctr - DRAM_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data);
`endif
end
if (cci_mem_rd_req_fire) begin
cci_mem_rd_req_addr <= cci_mem_rd_req_addr + CCI_ADDR_WIDTH'(1);
cci_mem_rd_req_ctr <= cci_mem_rd_req_ctr + CCI_ADDR_WIDTH'(1);
end
/*`ifdef DBG_PRINT_OPAE
if (cci_wr_rsp_fire) begin
$display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes);
end
`endif*/
cci_wr_req_addr <= cmd_io_addr + t_ccip_clAddr'(cci_mem_rsp_tag);
cci_wr_req_data <= t_ccip_clData'(cci_mem_rsp_data);
if (cci_dram_rd_req_fire) begin
cci_dram_rd_req_addr_r <= cci_dram_rd_req_addr_r + DRAM_ADDR_WIDTH'(1);
cci_dram_rd_req_ctr <= cci_dram_rd_req_ctr - DRAM_ADDR_WIDTH'(1);
end
if (cci_wr_req_fire) begin
assert(cci_wr_req_ctr != 0);
cci_wr_req_ctr <= cci_wr_req_ctr - CCI_ADDR_WIDTH'(1);
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d, data=%0h", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes, af2cp_sTxPort.c1.data);
`endif
end
if (cci_wr_rsp_fire) begin
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes);
`endif
end
end
//--
assign cci_mem_req_rw = (CMD_MEM_WRITE == state);
assign cci_mem_req_valid = cci_mem_req_rw ? cci_mem_wr_req_valid : cci_mem_rd_req_valid;
assign cci_mem_req_addr = cci_mem_req_rw ? cci_mem_wr_req_addr : cci_mem_rd_req_addr;
assign cci_mem_req_tag = cci_mem_req_rw ? cci_mem_wr_req_ctr : cci_mem_rd_req_ctr;
// CSRs ///////////////////////////////////////////////////////////////////////
reg csr_io_req_sent;
@@ -890,20 +970,20 @@ Vortex #() vortex (
.clk (clk),
.reset (reset | vx_reset),
// DRAM request
.dram_req_valid (vx_dram_req_valid),
.dram_req_rw (vx_dram_req_rw),
.dram_req_byteen(vx_dram_req_byteen),
.dram_req_addr (vx_dram_req_addr),
.dram_req_data (vx_dram_req_data),
.dram_req_tag (vx_dram_req_tag),
.dram_req_ready (vx_dram_req_ready),
// Memory request
.mem_req_valid (vx_mem_req_valid),
.mem_req_rw (vx_mem_req_rw),
.mem_req_byteen (vx_mem_req_byteen),
.mem_req_addr (vx_mem_req_addr),
.mem_req_data (vx_mem_req_data),
.mem_req_tag (vx_mem_req_tag),
.mem_req_ready (vx_mem_req_ready),
// DRAM response
.dram_rsp_valid (vx_dram_rsp_valid),
.dram_rsp_data (vx_dram_rsp_data),
.dram_rsp_tag (vx_dram_rsp_tag),
.dram_rsp_ready (vx_dram_rsp_ready),
// Memory response
.mem_rsp_valid (vx_mem_rsp_valid),
.mem_rsp_data (vx_mem_rsp_data),
.mem_rsp_tag (vx_mem_rsp_tag),
.mem_rsp_ready (vx_mem_rsp_ready),
// CSR Request
.csr_req_valid (vx_csr_io_req_valid),
@@ -944,16 +1024,15 @@ Vortex #() vortex (
`SCOPE_ASSIGN (cci_sTxPort_c2_mmioRdValid, af2cp_sTxPort.c2.mmioRdValid);
`SCOPE_ASSIGN (cci_sRxPort_c0TxAlmFull, cp2af_sRxPort.c0TxAlmFull);
`SCOPE_ASSIGN (cci_sRxPort_c1TxAlmFull, cp2af_sRxPort.c1TxAlmFull);
`SCOPE_ASSIGN (avs_address, avs_address);
`SCOPE_ASSIGN (avs_waitrequest, avs_waitrequest);
`SCOPE_ASSIGN (avs_write_fire, avs_write && !avs_waitrequest);
`SCOPE_ASSIGN (avs_read_fire, avs_read && !avs_waitrequest);
`SCOPE_ASSIGN (avs_byteenable, avs_byteenable);
`SCOPE_ASSIGN (avs_burstcount, avs_burstcount);
`SCOPE_ASSIGN (avs_readdatavalid, avs_readdatavalid);
`SCOPE_ASSIGN (mem_bank_select, mem_bank_select);
`SCOPE_ASSIGN (cci_dram_rd_req_ctr, cci_dram_rd_req_ctr);
`SCOPE_ASSIGN (cci_dram_wr_req_ctr, cci_dram_wr_req_ctr);
`SCOPE_ASSIGN (avs_address, avs_address[0]);
`SCOPE_ASSIGN (avs_waitrequest, avs_waitrequest[0]);
`SCOPE_ASSIGN (avs_write_fire, avs_write[0] && !avs_waitrequest[0]);
`SCOPE_ASSIGN (avs_read_fire, avs_read[0] && !avs_waitrequest[0]);
`SCOPE_ASSIGN (avs_byteenable, avs_byteenable[0]);
`SCOPE_ASSIGN (avs_burstcount, avs_burstcount[0]);
`SCOPE_ASSIGN (avs_readdatavalid, avs_readdatavalid[0]);
`SCOPE_ASSIGN (cci_mem_rd_req_ctr, cci_mem_rd_req_ctr);
`SCOPE_ASSIGN (cci_mem_wr_req_ctr, cci_mem_wr_req_ctr);
`SCOPE_ASSIGN (cci_rd_req_ctr, cci_rd_req_ctr);
`SCOPE_ASSIGN (cci_rd_rsp_ctr, cci_rd_rsp_ctr);
`SCOPE_ASSIGN (cci_wr_req_ctr, cci_wr_req_ctr);
@@ -964,11 +1043,11 @@ Vortex #() vortex (
`SCOPE_ASSIGN (cci_pending_reads_full, cci_pending_reads_full);
`SCOPE_ASSIGN (cci_pending_writes_empty, cci_pending_writes_empty);
`SCOPE_ASSIGN (cci_pending_writes_full, cci_pending_writes_full);
`SCOPE_ASSIGN (afu_dram_req_fire, (dram_req_valid && dram_req_ready));
`SCOPE_ASSIGN (afu_dram_req_addr, dram_req_addr);
`SCOPE_ASSIGN (afu_dram_req_tag, dram_req_tag);
`SCOPE_ASSIGN (afu_dram_rsp_fire, (dram_rsp_valid && dram_rsp_ready));
`SCOPE_ASSIGN (afu_dram_rsp_tag, dram_rsp_tag);
`SCOPE_ASSIGN (afu_mem_req_fire, (mem_req_valid && mem_req_ready));
`SCOPE_ASSIGN (afu_mem_req_addr, mem_req_addr);
`SCOPE_ASSIGN (afu_mem_req_tag, mem_req_tag);
`SCOPE_ASSIGN (afu_mem_rsp_fire, (mem_rsp_valid && mem_rsp_ready));
`SCOPE_ASSIGN (afu_mem_rsp_tag, mem_rsp_tag);
wire scope_changed = `SCOPE_TRIGGER;

View File

@@ -1,18 +1,27 @@
`ifndef __VORTEX_AFU__
`define __VORTEX_AFU__
`IGNORE_WARNINGS_BEGIN
`include "ccip_if_pkg.sv"
`IGNORE_WARNINGS_END
`define PLATFORM_PROVIDES_LOCAL_MEMORY
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH 26
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH 512
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH 4
`IGNORE_WARNINGS_BEGIN
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
`define PLATFORM_PARAM_LOCAL_MEMORY_BANKS 2
`endif
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH
`define PLATFORM_PARAM_LOCAL_MEMORY_ADDR_WIDTH 26
`endif
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH
`define PLATFORM_PARAM_LOCAL_MEMORY_DATA_WIDTH 512
`endif
`ifndef PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH
`define PLATFORM_PARAM_LOCAL_MEMORY_BURST_CNT_WIDTH 4
`endif
`include "local_mem_cfg_pkg.sv"
`IGNORE_WARNINGS_END
`define AFU_ACCEL_NAME "vortex_afu"
`define AFU_ACCEL_UUID 128'h35f9452b_25c2_434c_93d5_6f8c60db361c

168
hw/rtl/cache/VX_bank.v vendored
View File

@@ -22,8 +22,8 @@ module VX_bank #(
parameter CREQ_SIZE = 1,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 1,
// DRAM Request Queue Size
parameter DREQ_SIZE = 1,
// Memory Request Queue Size
parameter MREQ_SIZE = 1,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
@@ -35,10 +35,7 @@ module VX_bank #(
parameter CORE_TAG_ID_BITS = 0,
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0,
// in-order DRAN
parameter IN_ORDER_DRAM = 0
parameter BANK_ADDR_OFFSET = 0
) (
`SCOPE_IO_VX_bank
@@ -71,19 +68,19 @@ module VX_bank #(
output wire [CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire core_rsp_ready,
// DRAM request
output wire dram_req_valid,
output wire dram_req_rw,
output wire [CACHE_LINE_SIZE-1:0] dram_req_byteen,
output wire [`LINE_ADDR_WIDTH-1:0] dram_req_addr,
output wire [`CACHE_LINE_WIDTH-1:0] dram_req_data,
input wire dram_req_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [CACHE_LINE_SIZE-1:0] mem_req_byteen,
output wire [`LINE_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`CACHE_LINE_WIDTH-1:0] mem_req_data,
input wire mem_req_ready,
// DRAM response
input wire dram_rsp_valid,
input wire [`LINE_ADDR_WIDTH-1:0] dram_rsp_addr,
input wire [`CACHE_LINE_WIDTH-1:0] dram_rsp_data,
output wire dram_rsp_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`LINE_ADDR_WIDTH-1:0] mem_rsp_addr,
input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data,
output wire mem_rsp_ready,
// flush
input wire flush_enable,
@@ -93,10 +90,10 @@ module VX_bank #(
`UNUSED_PARAM (CORE_TAG_ID_BITS)
`ifdef DBG_CACHE_REQ_INFO
/* verilator lint_off UNUSED */
`IGNORE_WARNINGS_BEGIN
wire [31:0] debug_pc_sel, debug_pc_st0, debug_pc_st1;
wire [`NW_BITS-1:0] debug_wid_sel, debug_wid_st0, debug_wid_st1;
/* verilator lint_on UNUSED */
`IGNORE_WARNINGS_END
`endif
wire creq_pop;
@@ -167,8 +164,8 @@ module VX_bank #(
wire is_flush_st0;
wire crsq_in_valid, crsq_in_ready, crsq_in_stall;
wire dreq_alm_full;
wire drsq_pop;
wire mreq_alm_full;
wire mrsq_pop;
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
@@ -186,24 +183,24 @@ module VX_bank #(
// determine which queue to pop next in priority order
wire mshr_pop_unqual = mshr_valid
&& !dreq_alm_full; // ensure DRAM request queue not full (deadlock prevention)
wire drsq_pop_unqual = !mshr_pop_unqual && dram_rsp_valid;
wire creq_pop_unqual = !mshr_pop_unqual && !drsq_pop_unqual && !creq_empty && !flush_enable;
&& !mreq_alm_full; // ensure memory request queue not full (deadlock prevention)
wire mrsq_pop_unqual = !mshr_pop_unqual && mem_rsp_valid;
wire creq_pop_unqual = !mshr_pop_unqual && !mrsq_pop_unqual && !creq_empty && !flush_enable;
wire is_miss_st1 = valid_st1 && (miss_st1 || force_miss_st1);
assign mshr_pop = mshr_pop_unqual
&& !(!IN_ORDER_DRAM && is_miss_st1 && is_mshr_st1) // do not schedule another mshr request if the previous one missed
&& !(is_miss_st1 && is_mshr_st1) // do not schedule another mshr request if the previous one missed
&& !crsq_in_stall; // ensure core response ready
assign drsq_pop = drsq_pop_unqual
assign mrsq_pop = mrsq_pop_unqual
&& !crsq_in_stall; // ensure core response ready
assign creq_pop = creq_pop_unqual
&& !dreq_alm_full // ensure dram request ready
&& !mreq_alm_full // ensure memory request ready
&& !mshr_alm_full // ensure mshr enqueue ready
&& !crsq_in_stall; // ensure core response ready
assign dram_rsp_ready = drsq_pop;
assign mem_rsp_ready = mrsq_pop;
// we have a miss in mshr or entering it for the current address
wire mshr_pending_sel = mshr_pending
@@ -237,15 +234,7 @@ module VX_bank #(
end else begin
assign creq_line_data = creq_data;
end
wire [`LINE_ADDR_WIDTH-1:0] dram_rsp_addr_qual;
if (IN_ORDER_DRAM) begin
`UNUSED_VAR (dram_rsp_addr)
assign dram_rsp_addr_qual = mshr_addr;
end else begin
assign dram_rsp_addr_qual = dram_rsp_addr;
end
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH + 1 + 1),
.RESETW (1)
@@ -254,13 +243,13 @@ module VX_bank #(
.reset (reset),
.enable (!crsq_in_stall),
.data_in ({
flush_enable || mshr_pop || drsq_pop || creq_pop,
flush_enable || mshr_pop || mrsq_pop || creq_pop,
flush_enable,
mshr_pop_unqual,
drsq_pop_unqual || flush_enable,
mrsq_pop_unqual || flush_enable,
mshr_pop_unqual ? 1'b0 : creq_rw,
mshr_pop_unqual ? mshr_addr : (dram_rsp_valid ? dram_rsp_addr_qual : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)),
dram_rsp_valid ? dram_rsp_data : creq_line_data,
mshr_pop_unqual ? mshr_addr : (mem_rsp_valid ? mem_rsp_addr : (flush_enable ? `LINE_ADDR_WIDTH'(flush_addr) : creq_addr)),
mem_rsp_valid ? mem_rsp_data : creq_line_data,
mshr_pop_unqual ? mshr_wsel : creq_wsel,
mshr_pop_unqual ? mshr_byteen : creq_byteen,
mshr_pop_unqual ? mshr_tid : creq_tid,
@@ -307,7 +296,7 @@ module VX_bank #(
);
// redundant fills
wire is_redundant_fill_st0 = !IN_ORDER_DRAM && is_fill_st0 && tag_match_st0;
wire is_redundant_fill_st0 = is_fill_st0 && tag_match_st0;
// we had a miss with prior request for the current address
assign prev_miss_dep_st0 = is_miss_st1 && (addr_st0 == addr_st1);
@@ -322,9 +311,9 @@ module VX_bank #(
assign writeen_unqual_st0 = (WRITE_ENABLE && !is_fill_st0 && tag_match_st0 && mem_rw_st0)
|| (is_fill_st0 && !is_redundant_fill_st0);
assign incoming_fill_st0 = dram_rsp_valid && (addr_st0 == dram_rsp_addr_qual);
assign incoming_fill_st0 = mem_rsp_valid && (addr_st0 == mem_rsp_addr);
assign fill_req_unqual_st0 = !mem_rw_st0 && (!force_miss_st0 || (!IN_ORDER_DRAM && is_mshr_st0 && !prev_miss_dep_st0));
assign fill_req_unqual_st0 = !mem_rw_st0 && (!force_miss_st0 || (is_mshr_st0 && !prev_miss_dep_st0));
VX_pipe_register #(
.DATAW (1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH + (`UP(`WORD_SELECT_BITS) + WORD_SIZE + `REQS_BITS + 1) * NUM_PORTS + CORE_TAG_WIDTH),
@@ -351,12 +340,12 @@ module VX_bank #(
wire mshr_push_st1 = !is_fill_st1 && !mem_rw_st1 && (miss_st1 || force_miss_st1);
wire incoming_fill_qual_st1 = (dram_rsp_valid && (addr_st1 == dram_rsp_addr_qual))
wire incoming_fill_qual_st1 = (mem_rsp_valid && (addr_st1 == mem_rsp_addr))
|| incoming_fill_st1;
wire do_writeback_st1 = !is_fill_st1 && mem_rw_st1;
wire dreq_push_st1 = (miss_st1 && fill_req_unqual_st1 && !incoming_fill_qual_st1)
wire mreq_push_st1 = (miss_st1 && fill_req_unqual_st1 && !incoming_fill_qual_st1)
|| do_writeback_st1;
wire [`WORDS_PER_LINE-1:0][WORD_SIZE-1:0] line_byteen_st1;
@@ -408,15 +397,14 @@ module VX_bank #(
assign mshr_push = valid_st1 && mshr_push_st1;
wire mshr_dequeue = valid_st1 && is_mshr_st1 && !mshr_push_st1 && crsq_in_ready;
wire mshr_restore = !IN_ORDER_DRAM && is_mshr_st1;
`RUNTIME_ASSERT(!IN_ORDER_DRAM || !(mshr_push && mshr_restore), ("Oops!"))
wire mshr_restore = is_mshr_st1;
// push a missed request as 'ready' if it was a forced miss that actually had a hit
// or the fill request for this block is comming
wire mshr_init_ready_state = !miss_st1 || incoming_fill_qual_st1;
// use dram rsp or core req address to lookup the mshr
wire [`LINE_ADDR_WIDTH-1:0] lookup_addr = dram_rsp_valid ? dram_rsp_addr_qual : creq_addr;
// use memory rsp or core req address to lookup the mshr
wire [`LINE_ADDR_WIDTH-1:0] lookup_addr = mem_rsp_valid ? mem_rsp_addr : creq_addr;
VX_miss_resrv #(
.BANK_ID (BANK_ID),
@@ -450,7 +438,7 @@ module VX_bank #(
`UNUSED_PIN (enqueue_full),
// lookup
.lookup_ready (drsq_pop),
.lookup_ready (mrsq_pop),
.lookup_addr (lookup_addr),
.lookup_match (mshr_pending),
@@ -500,41 +488,41 @@ module VX_bank #(
.ready_out (core_rsp_ready)
);
// Enqueue DRAM request
// Enqueue memory request
wire [CACHE_LINE_SIZE-1:0] dreq_byteen;
wire [`LINE_ADDR_WIDTH-1:0] dreq_addr;
wire [`CACHE_LINE_WIDTH-1:0] dreq_data;
wire dreq_push, dreq_pop, dreq_empty, dreq_rw;
wire [CACHE_LINE_SIZE-1:0] mreq_byteen;
wire [`LINE_ADDR_WIDTH-1:0] mreq_addr;
wire [`CACHE_LINE_WIDTH-1:0] mreq_data;
wire mreq_push, mreq_pop, mreq_empty, mreq_rw;
assign dreq_push = valid_st1 && dreq_push_st1;
assign mreq_push = valid_st1 && mreq_push_st1;
assign dreq_pop = dram_req_valid && dram_req_ready;
assign mreq_pop = mem_req_valid && mem_req_ready;
assign dreq_rw = WRITE_ENABLE && do_writeback_st1;
assign dreq_byteen = dreq_rw ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}};
assign dreq_addr = addr_st1;
assign dreq_data = wdata_st1;
assign mreq_rw = WRITE_ENABLE && do_writeback_st1;
assign mreq_byteen = mreq_rw ? line_byteen_st1 : {CACHE_LINE_SIZE{1'b1}};
assign mreq_addr = addr_st1;
assign mreq_data = wdata_st1;
VX_fifo_queue #(
.DATAW (1 + CACHE_LINE_SIZE + `LINE_ADDR_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (DREQ_SIZE),
.ALM_FULL (DREQ_SIZE-2)
) dram_req_queue (
.SIZE (MREQ_SIZE),
.ALM_FULL (MREQ_SIZE-2)
) mem_req_queue (
.clk (clk),
.reset (reset),
.push (dreq_push),
.pop (dreq_pop),
.data_in ({dreq_rw, dreq_byteen, dreq_addr, dreq_data}),
.data_out ({dram_req_rw, dram_req_byteen, dram_req_addr, dram_req_data}),
.empty (dreq_empty),
.alm_full (dreq_alm_full),
.push (mreq_push),
.pop (mreq_pop),
.data_in ({mreq_rw, mreq_byteen, mreq_addr, mreq_data}),
.data_out ({mem_req_rw, mem_req_byteen, mem_req_addr, mem_req_data}),
.empty (mreq_empty),
.alm_full (mreq_alm_full),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
);
assign dram_req_valid = !dreq_empty;
assign mem_req_valid = !mreq_empty;
`SCOPE_ASSIGN (valid_st0, valid_st0);
`SCOPE_ASSIGN (valid_st1, valid_st1);
@@ -544,7 +532,7 @@ module VX_bank #(
`SCOPE_ASSIGN (force_miss_st0, force_miss_st0);
`SCOPE_ASSIGN (mshr_push, mshr_push);
`SCOPE_ASSIGN (crsq_in_stall, crsq_in_stall);
`SCOPE_ASSIGN (dreq_alm_full, dreq_alm_full);
`SCOPE_ASSIGN (mreq_alm_full, mreq_alm_full);
`SCOPE_ASSIGN (mshr_alm_full, mshr_alm_full);
`SCOPE_ASSIGN (addr_st0, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID));
`SCOPE_ASSIGN (addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
@@ -552,45 +540,45 @@ module VX_bank #(
`ifdef PERF_ENABLE
assign perf_read_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && !mem_rw_st1;
assign perf_write_misses = valid_st1 && !is_fill_st1 && !is_mshr_st1 && miss_st1 && mem_rw_st1;
assign perf_pipe_stalls = crsq_in_stall || dreq_alm_full || mshr_alm_full;
assign perf_pipe_stalls = crsq_in_stall || mreq_alm_full || mshr_alm_full;
assign perf_mshr_stalls = mshr_alm_full;
`endif
`ifdef DBG_PRINT_CACHE_BANK
always @(posedge clk) begin
/*if (valid_st1 && pmask_st1 == {NUM_PORTS{1'b1}}) begin
$display("%t: cache%0d:%0d full bank multi-porting - addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
end*/
/*if (crsq_in_fire && (NUM_PORTS > 1) && $countones(crsq_pmask) > 1) begin
$display("%t: *** cache%0d:%0d multi-port-out: pmask=%b, addr=%0h, tag=%0h", $time, CACHE_ID, BANK_ID, crsq_pmask, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag);
end */
if (valid_st1 && !is_fill_st1 && miss_st1 && incoming_fill_qual_st1) begin
$display("%t: cache%0d:%0d miss with incoming fill - addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
$display("%t: *** cache%0d:%0d miss with incoming fill - addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
assert(!is_mshr_st1);
end
if (crsq_in_stall || dreq_alm_full || mshr_alm_full) begin
$display("%t: cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_in_stall, dreq_alm_full, mshr_alm_full);
if (crsq_in_stall || mreq_alm_full || mshr_alm_full) begin
$display("%t: *** cache%0d:%0d pipeline-stall: cwbq=%b, dwbq=%b, mshr=%b", $time, CACHE_ID, BANK_ID, crsq_in_stall, mreq_alm_full, mshr_alm_full);
end
if (flush_enable) begin
$display("%t: cache%0d:%0d flush: addr=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(flush_addr, BANK_ID));
end
if (drsq_pop) begin
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dram_rsp_addr_qual, BANK_ID), dram_rsp_data);
if (mrsq_pop) begin
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mem_rsp_addr, BANK_ID), mem_rsp_data);
end
if (mshr_pop) begin
$display("%t: cache%0d:%0d mshr-rd-req: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, mshr_byteen, debug_wid_sel, debug_pc_sel);
$display("%t: cache%0d:%0d mshr-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mshr_addr, BANK_ID), mshr_tag, mshr_pmask, mshr_tid, mshr_byteen, debug_wid_sel, debug_pc_sel);
end
if (creq_pop) begin
if (creq_rw)
$display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, debug_wid_sel, debug_pc_sel);
$display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, creq_data, debug_wid_sel, debug_pc_sel);
else
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel);
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, pmask=%b, tid=%0d, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(creq_addr, BANK_ID), creq_tag, creq_pmask, creq_tid, creq_byteen, debug_wid_sel, debug_pc_sel);
end
if (crsq_in_fire) begin
$display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%0b, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1);
$display("%t: cache%0d:%0d core-rsp: addr=%0h, tag=%0h, pmask=%b, tid=%0d, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID), crsq_tag, crsq_pmask, crsq_tid, crsq_data, debug_wid_st1, debug_pc_st1);
end
if (dreq_push) begin
if (mreq_push) begin
if (do_writeback_st1)
$display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), dreq_data, dreq_byteen, debug_wid_st1, debug_pc_st1);
$display("%t: cache%0d:%0d writeback: addr=%0h, data=%0h, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), mreq_data, mreq_byteen, debug_wid_st1, debug_pc_st1);
else
$display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(dreq_addr, BANK_ID), debug_wid_st1, debug_pc_st1);
$display("%t: cache%0d:%0d fill-req: addr=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(mreq_addr, BANK_ID), debug_wid_st1, debug_pc_st1);
end
end
`endif

View File

@@ -21,10 +21,10 @@ module VX_cache #(
parameter CREQ_SIZE = 4,
// Miss Reserv Queue Knob
parameter MSHR_SIZE = 8,
// DRAM Response Queue Size
parameter DRSQ_SIZE = 4,
// DRAM Request Queue Size
parameter DREQ_SIZE = 4,
// Memory Response Queue Size
parameter MRSQ_SIZE = 4,
// Memory Request Queue Size
parameter MREQ_SIZE = 4,
// Enable cache writeable
parameter WRITE_ENABLE = 1,
@@ -35,22 +35,17 @@ module VX_cache #(
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = CORE_TAG_WIDTH,
// dram request tag size
parameter DRAM_TAG_WIDTH = (32 - $clog2(CACHE_LINE_SIZE)),
// Memory request tag size
parameter MEM_TAG_WIDTH = (32 - $clog2(CACHE_LINE_SIZE)),
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0,
// in-order DRAN
parameter IN_ORDER_DRAM = 0
parameter BANK_ADDR_OFFSET = 0
) (
`SCOPE_IO_VX_cache
input wire clk,
input wire reset,
input wire flush,
// Core request
input wire [NUM_REQS-1:0] core_req_valid,
input wire [NUM_REQS-1:0] core_req_rw,
@@ -66,29 +61,32 @@ module VX_cache #(
output wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_rsp_tag,
input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready,
// Memory request
output wire mem_req_valid,
output wire mem_req_rw,
output wire [CACHE_LINE_SIZE-1:0] mem_req_byteen,
output wire [`MEM_ADDR_WIDTH-1:0] mem_req_addr,
output wire [`CACHE_LINE_WIDTH-1:0] mem_req_data,
output wire [MEM_TAG_WIDTH-1:0] mem_req_tag,
input wire mem_req_ready,
// Memory response
input wire mem_rsp_valid,
input wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data,
input wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag,
output wire mem_rsp_ready,
// PERF
`ifdef PERF_ENABLE
VX_perf_cache_if perf_cache_if,
`endif
// DRAM request
output wire dram_req_valid,
output wire dram_req_rw,
output wire [CACHE_LINE_SIZE-1:0] dram_req_byteen,
output wire [`DRAM_ADDR_WIDTH-1:0] dram_req_addr,
output wire [`CACHE_LINE_WIDTH-1:0] dram_req_data,
output wire [DRAM_TAG_WIDTH-1:0] dram_req_tag,
input wire dram_req_ready,
// DRAM response
input wire dram_rsp_valid,
input wire [`CACHE_LINE_WIDTH-1:0] dram_rsp_data,
input wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag,
output wire dram_rsp_ready
// device flush
input wire flush
);
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
wire [NUM_BANKS-1:0][NUM_PORTS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][`UP(`WORD_SELECT_BITS)-1:0] per_bank_core_req_wsel;
wire [NUM_BANKS-1:0][NUM_PORTS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
@@ -106,17 +104,17 @@ module VX_cache #(
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_rsp_tag;
wire [NUM_BANKS-1:0] per_bank_core_rsp_ready;
wire [NUM_BANKS-1:0] per_bank_dram_req_valid;
wire [NUM_BANKS-1:0] per_bank_dram_req_rw;
wire [NUM_BANKS-1:0][CACHE_LINE_SIZE-1:0] per_bank_dram_req_byteen;
wire [NUM_BANKS-1:0][`DRAM_ADDR_WIDTH-1:0] per_bank_dram_req_addr;
wire [NUM_BANKS-1:0][`CACHE_LINE_WIDTH-1:0] per_bank_dram_req_data;
wire [NUM_BANKS-1:0] per_bank_dram_req_ready;
wire [NUM_BANKS-1:0] per_bank_mem_req_valid;
wire [NUM_BANKS-1:0] per_bank_mem_req_rw;
wire [NUM_BANKS-1:0][CACHE_LINE_SIZE-1:0] per_bank_mem_req_byteen;
wire [NUM_BANKS-1:0][`MEM_ADDR_WIDTH-1:0] per_bank_mem_req_addr;
wire [NUM_BANKS-1:0][`CACHE_LINE_WIDTH-1:0] per_bank_mem_req_data;
wire [NUM_BANKS-1:0] per_bank_mem_req_ready;
wire [NUM_BANKS-1:0] per_bank_dram_rsp_ready;
wire [NUM_BANKS-1:0] per_bank_mem_rsp_ready;
wire [`CACHE_LINE_WIDTH-1:0] dram_rsp_data_qual;
wire [DRAM_TAG_WIDTH-1:0] dram_rsp_tag_qual;
wire [`CACHE_LINE_WIDTH-1:0] mem_rsp_data_qual;
wire [MEM_TAG_WIDTH-1:0] mem_rsp_tag_qual;
wire [`LINE_SELECT_BITS-1:0] flush_addr;
wire flush_enable;
@@ -129,35 +127,35 @@ module VX_cache #(
///////////////////////////////////////////////////////////////////////////
wire drsq_full, drsq_empty;
wire drsq_push, drsq_pop;
wire mrsq_full, mrsq_empty;
wire mrsq_push, mrsq_pop;
assign drsq_push = dram_rsp_valid && dram_rsp_ready;
assign dram_rsp_ready = !drsq_full;
assign mrsq_push = mem_rsp_valid && mem_rsp_ready;
assign mem_rsp_ready = !mrsq_full;
VX_fifo_queue #(
.DATAW (DRAM_TAG_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (DRSQ_SIZE),
.DATAW (MEM_TAG_WIDTH + `CACHE_LINE_WIDTH),
.SIZE (MRSQ_SIZE),
.BUFFERED (1)
) dram_rsp_queue (
) mem_rsp_queue (
.clk (clk),
.reset (reset),
.push (drsq_push),
.pop (drsq_pop),
.data_in ({dram_rsp_tag, dram_rsp_data}),
.data_out ({dram_rsp_tag_qual, dram_rsp_data_qual}),
.empty (drsq_empty),
.full (drsq_full),
.push (mrsq_push),
.pop (mrsq_pop),
.data_in ({mem_rsp_tag, mem_rsp_data}),
.data_out ({mem_rsp_tag_qual, mem_rsp_data_qual}),
.empty (mrsq_empty),
.full (mrsq_full),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (size)
);
if (NUM_BANKS == 1) begin
`UNUSED_VAR (dram_rsp_tag_qual)
assign drsq_pop = !drsq_empty && per_bank_dram_rsp_ready;
`UNUSED_VAR (mem_rsp_tag_qual)
assign mrsq_pop = !mrsq_empty && per_bank_mem_rsp_ready;
end else begin
assign drsq_pop = !drsq_empty && per_bank_dram_rsp_ready[`DRAM_ADDR_BANK(dram_rsp_tag_qual)];
assign mrsq_pop = !mrsq_empty && per_bank_mem_rsp_ready[`MEM_ADDR_BANK(mem_rsp_tag_qual)];
end
///////////////////////////////////////////////////////////////////////////
@@ -176,6 +174,7 @@ module VX_cache #(
///////////////////////////////////////////////////////////////////////////
VX_cache_core_req_bank_sel #(
.CACHE_ID (CACHE_ID),
.CACHE_LINE_SIZE (CACHE_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_PORTS (NUM_PORTS),
@@ -227,17 +226,17 @@ module VX_cache #(
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag;
wire curr_bank_core_rsp_ready;
wire curr_bank_dram_req_valid;
wire curr_bank_dram_req_rw;
wire [CACHE_LINE_SIZE-1:0] curr_bank_dram_req_byteen;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_req_addr;
wire[`CACHE_LINE_WIDTH-1:0] curr_bank_dram_req_data;
wire curr_bank_dram_req_ready;
wire curr_bank_mem_req_valid;
wire curr_bank_mem_req_rw;
wire [CACHE_LINE_SIZE-1:0] curr_bank_mem_req_byteen;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_req_addr;
wire[`CACHE_LINE_WIDTH-1:0] curr_bank_mem_req_data;
wire curr_bank_mem_req_ready;
wire curr_bank_dram_rsp_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_rsp_addr;
wire [`CACHE_LINE_WIDTH-1:0] curr_bank_dram_rsp_data;
wire curr_bank_dram_rsp_ready;
wire curr_bank_mem_rsp_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_mem_rsp_addr;
wire [`CACHE_LINE_WIDTH-1:0] curr_bank_mem_rsp_data;
wire curr_bank_mem_rsp_ready;
// Core Req
assign curr_bank_core_req_valid = per_bank_core_req_valid[i];
@@ -258,28 +257,28 @@ module VX_cache #(
assign per_bank_core_rsp_tag [i] = curr_bank_core_rsp_tag;
assign per_bank_core_rsp_data [i] = curr_bank_core_rsp_data;
// DRAM request
assign per_bank_dram_req_valid[i] = curr_bank_dram_req_valid;
assign per_bank_dram_req_rw[i] = curr_bank_dram_req_rw;
assign per_bank_dram_req_byteen[i] = curr_bank_dram_req_byteen;
// Memory request
assign per_bank_mem_req_valid[i] = curr_bank_mem_req_valid;
assign per_bank_mem_req_rw[i] = curr_bank_mem_req_rw;
assign per_bank_mem_req_byteen[i] = curr_bank_mem_req_byteen;
if (NUM_BANKS == 1) begin
assign per_bank_dram_req_addr[i] = curr_bank_dram_req_addr;
assign per_bank_mem_req_addr[i] = curr_bank_mem_req_addr;
end else begin
assign per_bank_dram_req_addr[i] = `LINE_TO_DRAM_ADDR(curr_bank_dram_req_addr, i);
assign per_bank_mem_req_addr[i] = `LINE_TO_MEM_ADDR(curr_bank_mem_req_addr, i);
end
assign per_bank_dram_req_data[i] = curr_bank_dram_req_data;
assign curr_bank_dram_req_ready = per_bank_dram_req_ready[i];
assign per_bank_mem_req_data[i] = curr_bank_mem_req_data;
assign curr_bank_mem_req_ready = per_bank_mem_req_ready[i];
// DRAM response
// Memory response
if (NUM_BANKS == 1) begin
assign curr_bank_dram_rsp_valid = !drsq_empty;
assign curr_bank_dram_rsp_addr = dram_rsp_tag_qual;
assign curr_bank_mem_rsp_valid = !mrsq_empty;
assign curr_bank_mem_rsp_addr = mem_rsp_tag_qual;
end else begin
assign curr_bank_dram_rsp_valid = !drsq_empty && (`DRAM_ADDR_BANK(dram_rsp_tag_qual) == i);
assign curr_bank_dram_rsp_addr = `DRAM_TO_LINE_ADDR(dram_rsp_tag_qual);
assign curr_bank_mem_rsp_valid = !mrsq_empty && (`MEM_ADDR_BANK(mem_rsp_tag_qual) == i);
assign curr_bank_mem_rsp_addr = `MEM_TO_LINE_ADDR(mem_rsp_tag_qual);
end
assign curr_bank_dram_rsp_data = dram_rsp_data_qual;
assign per_bank_dram_rsp_ready[i] = curr_bank_dram_rsp_ready;
assign curr_bank_mem_rsp_data = mem_rsp_data_qual;
assign per_bank_mem_rsp_ready[i] = curr_bank_mem_rsp_ready;
VX_bank #(
.BANK_ID (i),
@@ -292,12 +291,11 @@ module VX_cache #(
.NUM_REQS (NUM_REQS),
.CREQ_SIZE (CREQ_SIZE),
.MSHR_SIZE (MSHR_SIZE),
.DREQ_SIZE (DREQ_SIZE),
.MREQ_SIZE (MREQ_SIZE),
.WRITE_ENABLE (WRITE_ENABLE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET),
.IN_ORDER_DRAM (IN_ORDER_DRAM)
.BANK_ADDR_OFFSET (BANK_ADDR_OFFSET)
) bank (
`SCOPE_BIND_VX_cache_bank(i)
@@ -330,19 +328,19 @@ module VX_cache #(
.core_rsp_tag (curr_bank_core_rsp_tag),
.core_rsp_ready (curr_bank_core_rsp_ready),
// DRAM request
.dram_req_valid (curr_bank_dram_req_valid),
.dram_req_rw (curr_bank_dram_req_rw),
.dram_req_byteen (curr_bank_dram_req_byteen),
.dram_req_addr (curr_bank_dram_req_addr),
.dram_req_data (curr_bank_dram_req_data),
.dram_req_ready (curr_bank_dram_req_ready),
// Memory request
.mem_req_valid (curr_bank_mem_req_valid),
.mem_req_rw (curr_bank_mem_req_rw),
.mem_req_byteen (curr_bank_mem_req_byteen),
.mem_req_addr (curr_bank_mem_req_addr),
.mem_req_data (curr_bank_mem_req_data),
.mem_req_ready (curr_bank_mem_req_ready),
// DRAM response
.dram_rsp_valid (curr_bank_dram_rsp_valid),
.dram_rsp_addr (curr_bank_dram_rsp_addr),
.dram_rsp_data (curr_bank_dram_rsp_data),
.dram_rsp_ready (curr_bank_dram_rsp_ready),
// Memory response
.mem_rsp_valid (curr_bank_mem_rsp_valid),
.mem_rsp_addr (curr_bank_mem_rsp_addr),
.mem_rsp_data (curr_bank_mem_rsp_data),
.mem_rsp_ready (curr_bank_mem_rsp_ready),
// flush
.flush_enable (flush_enable),
@@ -351,6 +349,7 @@ module VX_cache #(
end
VX_cache_core_rsp_merge #(
.CACHE_ID (CACHE_ID),
.NUM_BANKS (NUM_BANKS),
.NUM_PORTS (NUM_PORTS),
.WORD_SIZE (WORD_SIZE),
@@ -372,27 +371,27 @@ module VX_cache #(
.core_rsp_ready (core_rsp_ready)
);
wire [NUM_BANKS-1:0][(`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in;
wire [NUM_BANKS-1:0][(`MEM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH)-1:0] data_in;
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign data_in[i] = {per_bank_dram_req_addr[i], per_bank_dram_req_rw[i], per_bank_dram_req_byteen[i], per_bank_dram_req_data[i]};
assign data_in[i] = {per_bank_mem_req_addr[i], per_bank_mem_req_rw[i], per_bank_mem_req_byteen[i], per_bank_mem_req_data[i]};
end
VX_stream_arbiter #(
.NUM_REQS (NUM_BANKS),
.DATAW (`DRAM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH),
.DATAW (`MEM_ADDR_WIDTH + 1 + CACHE_LINE_SIZE + `CACHE_LINE_WIDTH),
.BUFFERED (1)
) dram_req_arb (
) mem_req_arb (
.clk (clk),
.reset (reset),
.valid_in (per_bank_dram_req_valid),
.valid_in (per_bank_mem_req_valid),
.data_in (data_in),
.ready_in (per_bank_dram_req_ready),
.valid_out (dram_req_valid),
.data_out ({dram_req_addr, dram_req_rw, dram_req_byteen, dram_req_data}),
.ready_out (dram_req_ready)
.ready_in (per_bank_mem_req_ready),
.valid_out (mem_req_valid),
.data_out ({mem_req_addr, mem_req_rw, mem_req_byteen, mem_req_data}),
.ready_out (mem_req_ready)
);
assign dram_req_tag = dram_req_addr;
assign mem_req_tag = mem_req_addr;
`ifdef PERF_ENABLE
// per cycle: core_reads, core_writes
@@ -420,13 +419,13 @@ module VX_cache #(
assign perf_mshr_stall_per_cycle = $countones(perf_mshr_stall_per_bank);
assign perf_pipe_stall_per_cycle = $countones(perf_pipe_stall_per_bank);
reg [43:0] perf_core_reads;
reg [43:0] perf_core_writes;
reg [43:0] perf_read_misses;
reg [43:0] perf_write_misses;
reg [43:0] perf_mshr_stalls;
reg [43:0] perf_pipe_stalls;
reg [43:0] perf_crsp_stalls;
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
reg [`PERF_CTR_BITS-1:0] perf_read_misses;
reg [`PERF_CTR_BITS-1:0] perf_write_misses;
reg [`PERF_CTR_BITS-1:0] perf_mshr_stalls;
reg [`PERF_CTR_BITS-1:0] perf_pipe_stalls;
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
always @(posedge clk) begin
if (reset) begin
@@ -438,13 +437,13 @@ module VX_cache #(
perf_pipe_stalls <= 0;
perf_crsp_stalls <= 0;
end else begin
perf_core_reads <= perf_core_reads + 44'(perf_core_reads_per_cycle);
perf_core_writes <= perf_core_writes + 44'(perf_core_writes_per_cycle);
perf_read_misses <= perf_read_misses + 44'(perf_read_miss_per_cycle);
perf_write_misses <= perf_write_misses+ 44'(perf_write_miss_per_cycle);
perf_mshr_stalls <= perf_mshr_stalls + 44'(perf_mshr_stall_per_cycle);
perf_pipe_stalls <= perf_pipe_stalls + 44'(perf_pipe_stall_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + 44'(perf_crsp_stall_per_cycle);
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
perf_read_misses <= perf_read_misses + `PERF_CTR_BITS'(perf_read_miss_per_cycle);
perf_write_misses <= perf_write_misses+ `PERF_CTR_BITS'(perf_write_miss_per_cycle);
perf_mshr_stalls <= perf_mshr_stalls + `PERF_CTR_BITS'(perf_mshr_stall_per_cycle);
perf_pipe_stalls <= perf_pipe_stalls + `PERF_CTR_BITS'(perf_pipe_stall_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
end
end

View File

@@ -1,6 +1,8 @@
`include "VX_cache_define.vh"
module VX_cache_core_req_bank_sel #(
parameter CACHE_ID = 0,
// Size of line inside a bank in bytes
parameter CACHE_LINE_SIZE = 64,
// Size of a word in bytes
@@ -22,7 +24,7 @@ module VX_cache_core_req_bank_sel #(
input wire reset,
`ifdef PERF_ENABLE
output wire [43:0] bank_stalls,
output wire [`PERF_CTR_BITS-1:0] bank_stalls,
`endif
input wire [NUM_REQS-1:0] core_req_valid,
@@ -43,6 +45,7 @@ module VX_cache_core_req_bank_sel #(
output wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag,
input wire [`BANK_READY_COUNT-1:0] per_bank_core_req_ready
);
`UNUSED_PARAM (CACHE_ID)
`STATIC_ASSERT (NUM_REQS >= NUM_BANKS, ("invalid number of banks"));
`UNUSED_VAR (clk)
@@ -148,7 +151,7 @@ module VX_cache_core_req_bank_sel #(
end
end
end
end else begin
always @(*) begin
@@ -303,13 +306,13 @@ module VX_cache_core_req_bank_sel #(
end
end
reg [43:0] bank_stalls_r;
reg [`PERF_CTR_BITS-1:0] bank_stalls_r;
always @(posedge clk) begin
if (reset) begin
bank_stalls_r <= 0;
end else begin
bank_stalls_r <= bank_stalls_r + 44'($countones(core_req_sel_r & ~core_req_ready));
bank_stalls_r <= bank_stalls_r + `PERF_CTR_BITS'($countones(core_req_sel_r & ~core_req_ready));
end
end

View File

@@ -1,6 +1,8 @@
`include "VX_cache_define.vh"
module VX_cache_core_rsp_merge #(
parameter CACHE_ID = 0,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Number of banks
@@ -31,6 +33,8 @@ module VX_cache_core_rsp_merge #(
output wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data,
input wire [`CORE_REQ_TAG_COUNT-1:0] core_rsp_ready
);
`UNUSED_PARAM (CACHE_ID)
if (NUM_BANKS > 1) begin
reg [NUM_REQS-1:0] core_rsp_valid_unqual;
@@ -39,6 +43,10 @@ module VX_cache_core_rsp_merge #(
if (CORE_TAG_ID_BITS != 0) begin
// The core response bus handles a single tag at the time
// We first need to select the current tag to process,
// then send all bank responses for that tag as a batch
reg [CORE_TAG_WIDTH-1:0] core_rsp_tag_unqual;
wire core_rsp_ready_unqual;

View File

@@ -21,8 +21,8 @@
`define WORDS_PER_LINE (CACHE_LINE_SIZE / WORD_SIZE)
`define WORD_ADDR_WIDTH (32-`CLOG2(WORD_SIZE))
`define DRAM_ADDR_WIDTH (32-`CLOG2(CACHE_LINE_SIZE))
`define LINE_ADDR_WIDTH (`DRAM_ADDR_WIDTH-`BANK_SELECT_BITS)
`define MEM_ADDR_WIDTH (32-`CLOG2(CACHE_LINE_SIZE))
`define LINE_ADDR_WIDTH (`MEM_ADDR_WIDTH-`BANK_SELECT_BITS)
// Word select
`define WORD_SELECT_BITS `CLOG2(`WORDS_PER_LINE)
@@ -59,11 +59,11 @@
`define BANK_READY_COUNT ((SHARED_BANK_READY != 0) ? 1 : NUM_BANKS)
`define DRAM_ADDR_BANK(x) x[`BANK_SELECT_BITS+BANK_ADDR_OFFSET-1 : BANK_ADDR_OFFSET]
`define MEM_ADDR_BANK(x) x[`BANK_SELECT_BITS+BANK_ADDR_OFFSET-1 : BANK_ADDR_OFFSET]
`define DRAM_TO_LINE_ADDR(x) x[`DRAM_ADDR_WIDTH-1 : `BANK_SELECT_BITS]
`define MEM_TO_LINE_ADDR(x) x[`MEM_ADDR_WIDTH-1 : `BANK_SELECT_BITS]
`define LINE_TO_DRAM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
`define LINE_TO_MEM_ADDR(x, i) {x, `BANK_SELECT_BITS'(i)}
`define LINE_TO_BYTE_ADDR(x, i) {x, (32-$bits(x))'(i << (32-$bits(x)-`BANK_SELECT_BITS))}

View File

@@ -4,25 +4,25 @@ module VX_shared_mem #(
parameter CACHE_ID = 0,
// Size of cache in bytes
parameter CACHE_SIZE = 16384,
parameter CACHE_SIZE = (1024*16),
// Number of banks
parameter NUM_BANKS = 4,
parameter NUM_BANKS = 2,
// Size of a word in bytes
parameter WORD_SIZE = 4,
// Number of Word requests per cycle
parameter NUM_REQS = NUM_BANKS,
parameter NUM_REQS = 4,
// Core Request Queue Size
parameter CREQ_SIZE = 4,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
parameter CREQ_SIZE = 8,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0,
parameter CORE_TAG_ID_BITS = 8,
// core request tag size
parameter CORE_TAG_WIDTH = (2 + CORE_TAG_ID_BITS),
// bank offset from beginning of index range
parameter BANK_ADDR_OFFSET = 0
parameter BANK_ADDR_OFFSET = `CLOG2(256)
) (
input wire clk,
input wire reset,
@@ -54,13 +54,6 @@ module VX_shared_mem #(
localparam CACHE_LINE_SIZE = WORD_SIZE;
`ifdef DBG_CACHE_REQ_INFO
/* verilator lint_off UNUSED */
wire [31:0] debug_pc_st0;
wire [`NW_BITS-1:0] debug_wid_st0;
/* verilator lint_on UNUSED */
`endif
wire [NUM_BANKS-1:0] per_bank_core_req_valid_unqual;
wire [NUM_BANKS-1:0] per_bank_core_req_rw_unqual;
wire [NUM_BANKS-1:0][`LINE_ADDR_WIDTH-1:0] per_bank_core_req_addr_unqual;
@@ -71,6 +64,7 @@ module VX_shared_mem #(
wire per_bank_core_req_ready_unqual;
VX_cache_core_req_bank_sel #(
.CACHE_ID (CACHE_ID),
.CACHE_LINE_SIZE (WORD_SIZE),
.NUM_BANKS (NUM_BANKS),
.NUM_PORTS (1),
@@ -108,20 +102,26 @@ module VX_shared_mem #(
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr;
wire [NUM_BANKS-1:0][WORD_SIZE-1:0] per_bank_core_req_byteen;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_req_data;
wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][CORE_TAG_WIDTH-1:0] per_bank_core_req_tag;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
wire creq_push, creq_pop, creq_empty, creq_full;
wire crsq_in_ready;
wire crsq_in_fire_last;
wire [NUM_BANKS-1:0] per_bank_rsp_valid = per_bank_core_req_valid & ~per_bank_core_req_rw;
wire core_req_has_read = (| per_bank_rsp_valid);
assign creq_push = (| core_req_valid) && !creq_full;
assign creq_pop = ~creq_empty && crsq_in_ready;
assign creq_push = (| core_req_valid) && ~creq_full;
assign creq_pop = (~creq_empty && ~core_req_has_read)
|| crsq_in_fire_last;
assign per_bank_core_req_ready_unqual = ~creq_full;
wire [NUM_REQS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual;
wire [NUM_BANKS-1:0][`LINE_SELECT_BITS-1:0] per_bank_core_req_addr_qual;
`UNUSED_VAR (per_bank_core_req_addr_unqual)
for (genvar i = 0; i < NUM_REQS; i++) begin
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign per_bank_core_req_addr_qual[i] = per_bank_core_req_addr_unqual[i][`LINE_SELECT_BITS-1:0];
end
@@ -155,9 +155,14 @@ module VX_shared_mem #(
`UNUSED_PIN (size)
);
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
wire [NUM_BANKS-1:0][`WORD_WIDTH-1:0] per_bank_core_rsp_data;
for (genvar i = 0; i < NUM_BANKS; i++) begin
wire wren = per_bank_core_req_rw[i]
&& per_bank_core_req_valid[i]
&& creq_pop;
VX_sp_ram #(
.DATAW (`WORD_WIDTH),
.SIZE (`LINES_PER_BANK),
@@ -166,13 +171,41 @@ module VX_shared_mem #(
) data (
.clk (clk),
.addr (per_bank_core_req_addr[i]),
.wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i]),
.wren (wren),
.byteen (per_bank_core_req_byteen[i]),
.rden (1'b1),
.din (per_bank_core_req_data[i]),
.dout (per_bank_core_rsp_data[i])
);
end
// The core response bus handles a single tag at the time
// We first need to select the current tag to process,
// then send all bank responses for that tag as a batch
wire crsq_in_valid, crsq_in_ready;
reg [NUM_BANKS-1:0] bank_rsp_sel, bank_rsp_sel_r;
wire [NUM_BANKS-1:0] bank_rsp_sel_n = bank_rsp_sel | bank_rsp_sel_r;
wire crsq_in_fire = crsq_in_valid && crsq_in_ready;
assign crsq_in_fire_last = crsq_in_fire && (bank_rsp_sel_n == per_bank_rsp_valid);
always @(posedge clk) begin
if (reset) begin
bank_rsp_sel <= 0;
end else begin
if (crsq_in_fire) begin
if (bank_rsp_sel_n == per_bank_rsp_valid) begin
bank_rsp_sel <= 0;
end else begin
bank_rsp_sel <= bank_rsp_sel_n;
end
end
end
end
reg [NUM_REQS-1:0] core_rsp_valids_in;
reg [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_rsp_data_in;
@@ -180,31 +213,30 @@ module VX_shared_mem #(
always @(*) begin
core_rsp_valids_in = 0;
core_rsp_data_in = 'x;
core_rsp_data_in = 'x;
core_rsp_tag_in = 'x;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_req_valid[i]) begin
core_rsp_valids_in[per_bank_core_req_tid[i]] = 1;
core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
bank_rsp_sel_r = 0;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_rsp_valid[i] && ~bank_rsp_sel[i]) begin
core_rsp_tag_in = per_bank_core_req_tag[i];
end
end
end
`ifdef DBG_CACHE_REQ_INFO
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st0, debug_wid_st0} = core_rsp_tag_in[`CACHE_REQ_INFO_RNG];
end else begin
assign {debug_pc_st0, debug_wid_st0} = 0;
for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_req_valid[i]
&& (core_rsp_tag_in[CORE_TAG_ID_BITS-1:0] == per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
core_rsp_valids_in[per_bank_core_req_tid[i]] = 1;
core_rsp_data_in[per_bank_core_req_tid[i]] = per_bank_core_rsp_data[i];
bank_rsp_sel_r[i] = 1;
end
end
end
`endif
wire [NUM_REQS-1:0] core_rsp_valids_out;
wire core_rsp_valid_out;
wire core_rsp_rw = | (per_bank_core_req_valid & per_bank_core_req_rw);
wire crsq_in_valid = ~creq_empty && ~core_rsp_rw;
assign crsq_in_valid = ~creq_empty && core_req_has_read;
VX_skid_buffer #(
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH)
@@ -221,16 +253,82 @@ module VX_shared_mem #(
assign core_rsp_valid = core_rsp_valids_out & {NUM_REQS{core_rsp_valid_out}};
`ifdef DBG_CACHE_REQ_INFO
`IGNORE_WARNINGS_BEGIN
wire [NUM_BANKS-1:0][31:0] debug_pc_st0, debug_pc_st1;
wire [NUM_BANKS-1:0][`NW_BITS-1:0] debug_wid_st0, debug_wid_st1;
`IGNORE_WARNINGS_END
for (genvar i = 0; i < NUM_BANKS; ++i) begin
if (CORE_TAG_WIDTH != CORE_TAG_ID_BITS && CORE_TAG_ID_BITS != 0) begin
assign {debug_pc_st0[i], debug_wid_st0[i]} = per_bank_core_req_tag_unqual[i][CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
assign {debug_pc_st1[i], debug_wid_st1[i]} = per_bank_core_req_tag[i][CORE_TAG_WIDTH-1:CORE_TAG_ID_BITS];
end else begin
assign {debug_pc_st0[i], debug_wid_st0[i]} = 0;
assign {debug_pc_st1[i], debug_wid_st1[i]} = 0;
end
end
`endif
`ifdef DBG_PRINT_CACHE_BANK
reg is_multi_tag_req;
`IGNORE_WARNINGS_BEGIN
reg [CORE_TAG_WIDTH-1:0] core_req_tag_sel;
`IGNORE_WARNINGS_END
always @(*) begin
core_req_tag_sel ='x;
for (integer i = NUM_BANKS-1; i >= 0; --i) begin
if (per_bank_core_req_valid[i]) begin
core_req_tag_sel = per_bank_core_req_tag[i];
end
end
is_multi_tag_req = 0;
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid[i]
&& (core_req_tag_sel[CORE_TAG_ID_BITS-1:0] != per_bank_core_req_tag[i][CORE_TAG_ID_BITS-1:0])) begin
is_multi_tag_req = !creq_empty;
end
end
end
always @(posedge clk) begin
if (!crsq_in_ready) begin
$display("%t: cache%0d pipeline-stall", $time, CACHE_ID);
$display("%t: *** cache%0d pipeline-stall", $time, CACHE_ID);
end
if (is_multi_tag_req) begin
$display("%t: *** cache%0d multi-tag request!", $time, CACHE_ID);
end
if (creq_push) begin
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid_unqual[i]) begin
if (per_bank_core_req_rw_unqual[i]) begin
$display("%t: cache%0d:%0d core-wr-req: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
$time, CACHE_ID, i, per_bank_core_req_addr_unqual[i], per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i], per_bank_core_req_data_unqual[i],
debug_wid_st0[i], debug_pc_st0[i]);
end else begin
$display("%t: cache%0d:%0d core-rd-req: addr=%0h, tag=%0h, byteen=%b, wid=%0d, PC=%0h",
$time, CACHE_ID, i, per_bank_core_req_addr_unqual[i], per_bank_core_req_tag_unqual[i], per_bank_core_req_byteen_unqual[i],
debug_wid_st0[i], debug_pc_st0[i]);
end
end
end
end
if (creq_pop) begin
if (core_rsp_rw)
$display("%t: cache%0d core-wr-req: tmask=%0b, addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, per_bank_core_req_valid, per_bank_core_req_addr, per_bank_core_req_tag, per_bank_core_req_byteen, per_bank_core_req_data, debug_wid_st0, debug_pc_st0);
else
$display("%t: cache%0d core-rd-req: tmask=%0b, addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, per_bank_core_req_valid, per_bank_core_req_addr, per_bank_core_req_tag, per_bank_core_req_byteen, per_bank_core_rsp_data, debug_wid_st0, debug_pc_st0);
for (integer i = 0; i < NUM_BANKS; ++i) begin
if (per_bank_core_req_valid[i]) begin
if (per_bank_core_req_rw[i]) begin
$display("%t: cache%0d:%0d core-wr-rsp: addr=%0h, tag=%0h, byteen=%b, data=%0h, wid=%0d, PC=%0h",
$time, CACHE_ID, i, per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_byteen[i], per_bank_core_req_data[i],
debug_wid_st1[i], debug_pc_st1[i]);
end else begin
$display("%t: cache%0d:%0d core-rd-rsp: addr=%0h, tag=%0h, byteen=%b, wid=%0d, PC=%0h",
$time, CACHE_ID, i, per_bank_core_req_addr[i], per_bank_core_req_tag[i], per_bank_core_req_byteen[i],
debug_wid_st1[i], debug_pc_st1[i]);
end
end
end
end
end
`endif
@@ -249,9 +347,9 @@ module VX_shared_mem #(
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
end
reg [43:0] perf_core_reads;
reg [43:0] perf_core_writes;
reg [43:0] perf_crsp_stalls;
reg [`PERF_CTR_BITS-1:0] perf_core_reads;
reg [`PERF_CTR_BITS-1:0] perf_core_writes;
reg [`PERF_CTR_BITS-1:0] perf_crsp_stalls;
always @(posedge clk) begin
if (reset) begin
@@ -259,9 +357,9 @@ module VX_shared_mem #(
perf_core_writes <= 0;
perf_crsp_stalls <= 0;
end else begin
perf_core_reads <= perf_core_reads + 44'(perf_core_reads_per_cycle);
perf_core_writes <= perf_core_writes + 44'(perf_core_writes_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + 44'(perf_crsp_stall_per_cycle);
perf_core_reads <= perf_core_reads + `PERF_CTR_BITS'(perf_core_reads_per_cycle);
perf_core_writes <= perf_core_writes + `PERF_CTR_BITS'(perf_core_writes_per_cycle);
perf_crsp_stalls <= perf_crsp_stalls + `PERF_CTR_BITS'(perf_crsp_stall_per_cycle);
end
end

View File

@@ -1,22 +0,0 @@
`ifndef VX_CACHE_DRAM_REQ_IF
`define VX_CACHE_DRAM_REQ_IF
`include "../cache/VX_cache_define.vh"
interface VX_cache_dram_req_if #(
parameter DRAM_LINE_WIDTH = 1,
parameter DRAM_ADDR_WIDTH = 1,
parameter DRAM_TAG_WIDTH = 1
) ();
wire valid;
wire rw;
wire [(DRAM_LINE_WIDTH/8)-1:0] byteen;
wire [DRAM_ADDR_WIDTH-1:0] addr;
wire [DRAM_LINE_WIDTH-1:0] data;
wire [DRAM_TAG_WIDTH-1:0] tag;
wire ready;
endinterface
`endif

View File

@@ -1,18 +0,0 @@
`ifndef VX_CACHE_DRAM_RSP_IF
`define VX_CACHE_DRAM_RSP_IF
`include "../cache/VX_cache_define.vh"
interface VX_cache_dram_rsp_if #(
parameter DRAM_LINE_WIDTH = 1,
parameter DRAM_TAG_WIDTH = 1
) ();
wire valid;
wire [DRAM_LINE_WIDTH-1:0] data;
wire [DRAM_TAG_WIDTH-1:0] tag;
wire ready;
endinterface
`endif

View File

@@ -0,0 +1,23 @@
`ifndef VX_CACHE_MEM_REQ_IF
`define VX_CACHE_MEM_REQ_IF
`include "../cache/VX_cache_config.vh"
interface VX_cache_mem_req_if #(
parameter MEM_LINE_WIDTH = 1,
parameter MEM_ADDR_WIDTH = 1,
parameter MEM_TAG_WIDTH = 1,
parameter MEM_LINE_SIZE = MEM_LINE_WIDTH / 8
) ();
wire valid;
wire rw;
wire [MEM_LINE_SIZE-1:0] byteen;
wire [MEM_ADDR_WIDTH-1:0] addr;
wire [MEM_LINE_WIDTH-1:0] data;
wire [MEM_TAG_WIDTH-1:0] tag;
wire ready;
endinterface
`endif

View File

@@ -0,0 +1,18 @@
`ifndef VX_CACHE_MEM_RSP_IF
`define VX_CACHE_MEM_RSP_IF
`include "../cache/VX_cache_config.vh"
interface VX_cache_mem_rsp_if #(
parameter MEM_LINE_WIDTH = 1,
parameter MEM_TAG_WIDTH = 1
) ();
wire valid;
wire [MEM_LINE_WIDTH-1:0] data;
wire [MEM_TAG_WIDTH-1:0] tag;
wire ready;
endinterface
`endif

View File

@@ -5,14 +5,14 @@
interface VX_perf_cache_if ();
wire [43:0] reads;
wire [43:0] writes;
wire [43:0] read_misses;
wire [43:0] write_misses;
wire [43:0] bank_stalls;
wire [43:0] mshr_stalls;
wire [43:0] pipe_stalls;
wire [43:0] crsp_stalls;
wire [`PERF_CTR_BITS-1:0] reads;
wire [`PERF_CTR_BITS-1:0] writes;
wire [`PERF_CTR_BITS-1:0] read_misses;
wire [`PERF_CTR_BITS-1:0] write_misses;
wire [`PERF_CTR_BITS-1:0] bank_stalls;
wire [`PERF_CTR_BITS-1:0] mshr_stalls;
wire [`PERF_CTR_BITS-1:0] pipe_stalls;
wire [`PERF_CTR_BITS-1:0] crsp_stalls;
endinterface

View File

@@ -5,28 +5,28 @@
interface VX_perf_memsys_if ();
wire [43:0] icache_reads;
wire [43:0] icache_read_misses;
wire [43:0] icache_pipe_stalls;
wire [43:0] icache_crsp_stalls;
wire [`PERF_CTR_BITS-1:0] icache_reads;
wire [`PERF_CTR_BITS-1:0] icache_read_misses;
wire [`PERF_CTR_BITS-1:0] icache_pipe_stalls;
wire [`PERF_CTR_BITS-1:0] icache_crsp_stalls;
wire [43:0] dcache_reads;
wire [43:0] dcache_writes;
wire [43:0] dcache_read_misses;
wire [43:0] dcache_write_misses;
wire [43:0] dcache_bank_stalls;
wire [43:0] dcache_mshr_stalls;
wire [43:0] dcache_pipe_stalls;
wire [43:0] dcache_crsp_stalls;
wire [`PERF_CTR_BITS-1:0] dcache_reads;
wire [`PERF_CTR_BITS-1:0] dcache_writes;
wire [`PERF_CTR_BITS-1:0] dcache_read_misses;
wire [`PERF_CTR_BITS-1:0] dcache_write_misses;
wire [`PERF_CTR_BITS-1:0] dcache_bank_stalls;
wire [`PERF_CTR_BITS-1:0] dcache_mshr_stalls;
wire [`PERF_CTR_BITS-1:0] dcache_pipe_stalls;
wire [`PERF_CTR_BITS-1:0] dcache_crsp_stalls;
wire [43:0] smem_reads;
wire [43:0] smem_writes;
wire [43:0] smem_bank_stalls;
wire [`PERF_CTR_BITS-1:0] smem_reads;
wire [`PERF_CTR_BITS-1:0] smem_writes;
wire [`PERF_CTR_BITS-1:0] smem_bank_stalls;
wire [43:0] dram_reads;
wire [43:0] dram_writes;
wire [43:0] dram_stalls;
wire [43:0] dram_latency;
wire [`PERF_CTR_BITS-1:0] mem_reads;
wire [`PERF_CTR_BITS-1:0] mem_writes;
wire [`PERF_CTR_BITS-1:0] mem_stalls;
wire [`PERF_CTR_BITS-1:0] mem_latency;
endinterface

View File

@@ -4,14 +4,14 @@
`include "VX_define.vh"
interface VX_perf_pipeline_if ();
wire [43:0] ibf_stalls;
wire [43:0] scb_stalls;
wire [43:0] lsu_stalls;
wire [43:0] csr_stalls;
wire [43:0] alu_stalls;
wire [43:0] gpu_stalls;
wire [`PERF_CTR_BITS-1:0] ibf_stalls;
wire [`PERF_CTR_BITS-1:0] scb_stalls;
wire [`PERF_CTR_BITS-1:0] lsu_stalls;
wire [`PERF_CTR_BITS-1:0] csr_stalls;
wire [`PERF_CTR_BITS-1:0] alu_stalls;
wire [`PERF_CTR_BITS-1:0] gpu_stalls;
`ifdef EXT_F_ENABLE
wire [43:0] fpu_stalls;
wire [`PERF_CTR_BITS-1:0] fpu_stalls;
`endif
endinterface

View File

@@ -94,13 +94,13 @@ module VX_scope #(
delay_val <= $bits(delay_val)'(cmd_data);
cmd_start <= 1;
`ifdef DBG_PRINT_SCOPE
$display("*** scope:CMD_SET_START: delay_val=%0d", $bits(delay_val)'(cmd_data));
$display("%t: *** scope: CMD_SET_START: delay_val=%0d", $time, $bits(delay_val)'(cmd_data));
`endif
end
CMD_SET_STOP: begin
waddr_end <= $bits(waddr)'(cmd_data);
`ifdef DBG_PRINT_SCOPE
$display("*** scope:CMD_SET_STOP: waddr_end=%0d", $bits(waddr)'(cmd_data));
$display("%t: *** scope: CMD_SET_STOP: waddr_end=%0d", $time, $bits(waddr)'(cmd_data));
`endif
end
default:;
@@ -117,7 +117,7 @@ module VX_scope #(
delay_cntr <= 0;
start_time <= timestamp;
`ifdef DBG_PRINT_SCOPE
$display("*** scope: recording start - start_time=%0d", timestamp);
$display("%t: *** scope: recording start - start_time=%0d", $time, timestamp);
`endif
end else begin
start_wait <= 1;
@@ -133,7 +133,7 @@ module VX_scope #(
delta <= 0;
start_time <= timestamp;
`ifdef DBG_PRINT_SCOPE
$display("*** scope: recording start - start_time=%0d", timestamp);
$display("%t: *** scope: recording start - start_time=%0d", $time, timestamp);
`endif
end
end
@@ -162,7 +162,7 @@ module VX_scope #(
if (stop
|| (waddr >= waddr_end)) begin
`ifdef DBG_PRINT_SCOPE
$display("*** scope: recording stop - waddr=(%0d, %0d)", waddr, waddr_end);
$display("%t: *** scope: recording stop - waddr=(%0d, %0d)", $time, waddr, waddr_end);
`endif
waddr <= waddr; // keep last address
recording <= 0;

View File

@@ -2,6 +2,7 @@
# coding=utf-8
from __future__ import print_function
import sys
import os
import os.path as path
import re
@@ -10,55 +11,19 @@ from datetime import datetime
script_dir = path.dirname(path.realpath(__file__))
defines = {}
for k, v in os.environ.items():
if k.upper().startswith('V_'):
defines[k[2:]] = v
print('Custom params:', ', '.join(['='.join(x) for x in defines.items()]))
parser = argparse.ArgumentParser()
parser.add_argument('--outc', default='none', help='Output C header')
parser.add_argument('--outv', default='none', help='Output Verilog header')
parser.add_argument('-i', "--input", default='none', help='Verilog header')
parser.add_argument('-o', "--output", default='none', help='C header')
args = parser.parse_args()
if args.outc == 'none' and args.outv == 'none':
print('Warning: not emitting any files. Specify arguments')
if args.outv != 'none':
with open(args.outv, 'w') as f:
print('''
// auto-generated by gen_config.py. DO NOT EDIT
// Generated at {date}
`ifndef VX_USER_CONFIG
`define VX_USER_CONFIG
'''[1:].format(date=datetime.now()), file=f)
for k, v in defines.items():
print('`define {} {}'.format(k, v), file=f)
print('\n`endif', file=f)
if args.outc != 'none':
with open(args.outc, 'w') as f:
print('''
// auto-generated by gen_config.py. DO NOT EDIT
// Generated at {date}
#ifndef VX_USER_CONFIG
#define VX_USER_CONFIG
'''[1:].format(date=datetime.now()), file=f)
for k, v in defines.items():
print('#define {} {}'.format(k, v), file=f)
print('\n#endif', file=f)
if args.input == 'none' or args.output == 'none':
print('Error: invalid arguments')
sys.exit()
translation_rules = [
# preprocessor directives
(re.compile(r'^\s*`include .*$'), r''),
(re.compile(r'`include\s+.*$'), r''),
(re.compile(r'`ifdef'), r'#ifdef'),
(re.compile(r'`ifndef'), r'#ifndef'),
(re.compile(r'`elif'), r'#elif'),
@@ -75,25 +40,24 @@ translation_rules = [
(re.compile(r"\d+'h([\da-fA-F]+)"), r'0x\1')
]
if args.outc != 'none':
with open(args.outc, 'a') as f:
print('''
with open(args.output, 'w') as f:
print('''
// auto-generated by gen_config.py. DO NOT EDIT
// Generated at {date}
// Translated from VX_config.vh:
'''[1:].format(date=datetime.now()), file=f)
with open(path.join(script_dir, '../rtl/VX_config.vh'), 'r') as r:
lineno = 0
for line in r:
for pat, repl in translation_rules:
match = pat.search(line)
if match:
line = re.sub(pat, repl, line)
#print("*** match @" + str(lineno) + ": " + match.group() + " => " + line)
f.write(line)
lineno = lineno + 1
print('''
with open(args.input, 'r') as r:
lineno = 0
for line in r:
for pat, repl in translation_rules:
match = pat.search(line)
if match:
line = re.sub(pat, repl, line)
#print("*** match @" + str(lineno) + ": " + match.group() + " => " + line)
f.write(line)
lineno = lineno + 1
print('''
'''[1:], file=f)

View File

@@ -97,9 +97,8 @@
"avs_byteenable":64,
"avs_burstcount":4,
"avs_readdatavalid":1,
"mem_bank_select":1,
"cci_dram_rd_req_ctr":26,
"cci_dram_wr_req_ctr":26,
"cci_mem_rd_req_ctr":26,
"cci_mem_wr_req_ctr":26,
"cci_rd_req_ctr":26,
"cci_rd_rsp_ctr":3,
"cci_wr_req_ctr":26,
@@ -110,23 +109,23 @@
"!cci_pending_reads_full":1,
"!cci_pending_writes_empty":1,
"!cci_pending_writes_full": 1,
"?afu_dram_req_fire": 1,
"afu_dram_req_addr": 26,
"afu_dram_req_tag": 28,
"?afu_dram_rsp_fire": 1,
"afu_dram_rsp_tag": 28
"?afu_mem_req_fire": 1,
"afu_mem_req_addr": 26,
"afu_mem_req_tag": 28,
"?afu_mem_rsp_fire": 1,
"afu_mem_rsp_tag": 28
},
"afu/vortex": {
"!reset": 1,
"?dram_req_fire": 1,
"dram_req_addr": 32,
"dram_req_rw": 1,
"dram_req_byteen":"`VX_DRAM_BYTEEN_WIDTH",
"dram_req_data":"`VX_DRAM_LINE_WIDTH",
"dram_req_tag":"`VX_DRAM_TAG_WIDTH",
"?dram_rsp_fire": 1,
"dram_rsp_data":"`VX_DRAM_LINE_WIDTH",
"dram_rsp_tag":"`VX_DRAM_TAG_WIDTH",
"?mem_req_fire": 1,
"mem_req_addr": 32,
"mem_req_rw": 1,
"mem_req_byteen":"`VX_MEM_BYTEEN_WIDTH",
"mem_req_data":"`VX_MEM_LINE_WIDTH",
"mem_req_tag":"`VX_MEM_TAG_WIDTH",
"?mem_rsp_fire": 1,
"mem_rsp_data":"`VX_MEM_LINE_WIDTH",
"mem_rsp_tag":"`VX_MEM_TAG_WIDTH",
"busy": 1
},
"afu/vortex/cluster/core/pipeline/fetch/icache_stage": {
@@ -207,7 +206,7 @@
"force_miss_st0": 1,
"mshr_push": 1,
"?crsq_in_stall": 1,
"?dreq_alm_full": 1,
"?mreq_alm_full": 1,
"?mshr_alm_full": 1
}
}

View File

@@ -1,7 +1,7 @@
CFLAGS += -std=c++11 -O2 -Wall -Wextra -Wfatal-errors
#CFLAGS += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors
CFLAGS += -Wno-aligned-new -Wno-maybe-uninitialized
CFLAGS += -Wno-maybe-uninitialized
CFLAGS += -I../..
@@ -13,7 +13,7 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
@@ -22,11 +22,11 @@ DBG_PRINT_FLAGS += -DDBG_PRINT_TEX
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0
SINGLECORE = -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0
#MULTICORE ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
#MULTICORE = -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
#MULTICORE = -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
MULTICORE = -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
SINGLECORE += $(CONFIGS)
MULTICORE += $(CONFIGS)
@@ -44,15 +44,16 @@ SRCS = simulator.cpp testbench.cpp
SRCS += $(DPI_DIR)/util_dpi.cpp $(DPI_DIR)/float_dpi.cpp
VL_FLAGS += -O2 --language 1800-2009 --assert -Wall -Wpedantic
VL_FLAGS += -Wno-DECLFILENAME
VL_FLAGS += -Wno-DECLFILENAME -Wno-REDEFMACRO
VL_FLAGS += --x-initial unique --x-assign unique
VL_FLAGS += verilator.vlt
VL_FLAGS += --exe $(SRCS) $(RTL_INCLUDE)
VL_FLAGS += --cc Vortex.v --top-module $(TOP)
# Use FPNEW PFU core
VL_FLAGS += -DFPU_FPNEW
# FPU backend
FPU_CORE ?= FPU_FPNEW
VL_FLAGS += -D$(FPU_CORE)
DBG_FLAGS += -DVCD_OUTPUT

View File

@@ -5,10 +5,23 @@
#define RESET_DELAY 4
#define ENABLE_DRAM_STALLS
#define DRAM_LATENCY 24
#define DRAM_RQ_SIZE 16
#define DRAM_STALLS_MODULO 16
#define ENABLE_MEM_STALLS
#ifndef MEM_LATENCY
#define MEM_LATENCY 24
#endif
#ifndef MEM_RQ_SIZE
#define MEM_RQ_SIZE 16
#endif
#ifndef MEM_STALLS_MODULO
#define MEM_STALLS_MODULO 16
#endif
#ifndef VERILATOR_RESET_VALUE
#define VERILATOR_RESET_VALUE 2
#endif
#define VL_WDATA_GETW(lwp, i, n, w) \
VL_SEL_IWII(0, n * w, 0, 0, lwp, i * w, w)
@@ -21,7 +34,7 @@ double sc_time_stamp() {
Simulator::Simulator() {
// force random values for unitialized signals
Verilated::randReset(2);
Verilated::randReset(VERILATOR_RESET_VALUE);
Verilated::randSeed(50);
// Turn off assertion before reset
@@ -56,19 +69,19 @@ Simulator::~Simulator() {
void Simulator::attach_ram(RAM* ram) {
ram_ = ram;
dram_rsp_vec_.clear();
mem_rsp_vec_.clear();
}
void Simulator::reset() {
print_bufs_.clear();
dram_rsp_vec_.clear();
mem_rsp_vec_.clear();
dram_rsp_active_ = false;
mem_rsp_active_ = false;
csr_req_active_ = false;
csr_rsp_value_ = nullptr;
vortex_->dram_rsp_valid = 0;
vortex_->dram_req_ready = 0;
vortex_->mem_rsp_valid = 0;
vortex_->mem_req_ready = 0;
//vortex_->io_req_ready = 0;
//vortex_->io_rsp_valid = 0;
vortex_->csr_req_valid = 0;
@@ -94,13 +107,13 @@ void Simulator::step() {
vortex_->clk = 0;
this->eval();
dram_rsp_ready_ = vortex_->dram_rsp_ready;
mem_rsp_ready_ = vortex_->mem_rsp_ready;
csr_req_ready_ = vortex_->csr_req_ready;
vortex_->clk = 1;
this->eval();
this->eval_dram_bus();
this->eval_mem_bus();
this->eval_io_bus();
this->eval_csr_bus();
@@ -117,83 +130,83 @@ void Simulator::eval() {
++timestamp;
}
void Simulator::eval_dram_bus() {
void Simulator::eval_mem_bus() {
if (ram_ == nullptr) {
vortex_->dram_req_ready = 0;
vortex_->mem_req_ready = 0;
return;
}
// update DRAM responses schedule
for (auto& rsp : dram_rsp_vec_) {
// update memory responses schedule
for (auto& rsp : mem_rsp_vec_) {
if (rsp.cycles_left > 0)
rsp.cycles_left -= 1;
}
// schedule DRAM responses in FIFO order
std::list<dram_req_t>::iterator dram_rsp_it(dram_rsp_vec_.end());
if (!dram_rsp_vec_.empty()
&& (0 == dram_rsp_vec_.begin()->cycles_left)) {
dram_rsp_it = dram_rsp_vec_.begin();
// schedule memory responses in FIFO order
std::list<mem_req_t>::iterator mem_rsp_it(mem_rsp_vec_.end());
if (!mem_rsp_vec_.empty()
&& (0 == mem_rsp_vec_.begin()->cycles_left)) {
mem_rsp_it = mem_rsp_vec_.begin();
}
// send DRAM response
if (dram_rsp_active_
&& vortex_->dram_rsp_valid && dram_rsp_ready_) {
dram_rsp_active_ = false;
// send memory response
if (mem_rsp_active_
&& vortex_->mem_rsp_valid && mem_rsp_ready_) {
mem_rsp_active_ = false;
}
if (!dram_rsp_active_) {
if (dram_rsp_it != dram_rsp_vec_.end()) {
vortex_->dram_rsp_valid = 1;
memcpy((uint8_t*)vortex_->dram_rsp_data, dram_rsp_it->block.data(), GLOBAL_BLOCK_SIZE);
vortex_->dram_rsp_tag = dram_rsp_it->tag;
dram_rsp_vec_.erase(dram_rsp_it);
dram_rsp_active_ = true;
if (!mem_rsp_active_) {
if (mem_rsp_it != mem_rsp_vec_.end()) {
vortex_->mem_rsp_valid = 1;
memcpy((uint8_t*)vortex_->mem_rsp_data, mem_rsp_it->block.data(), MEM_BLOCK_SIZE);
vortex_->mem_rsp_tag = mem_rsp_it->tag;
mem_rsp_vec_.erase(mem_rsp_it);
mem_rsp_active_ = true;
} else {
vortex_->dram_rsp_valid = 0;
vortex_->mem_rsp_valid = 0;
}
}
// handle DRAM stalls
bool dram_stalled = false;
#ifdef ENABLE_DRAM_STALLS
if (0 == ((timestamp/2) % DRAM_STALLS_MODULO)) {
dram_stalled = true;
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (dram_rsp_vec_.size() >= DRAM_RQ_SIZE) {
dram_stalled = true;
if (mem_rsp_vec_.size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process DRAM requests
if (!dram_stalled) {
if (vortex_->dram_req_valid) {
if (vortex_->dram_req_rw) {
uint64_t byteen = vortex_->dram_req_byteen;
unsigned base_addr = (vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE);
uint8_t* data = (uint8_t*)(vortex_->dram_req_data);
for (int i = 0; i < GLOBAL_BLOCK_SIZE; i++) {
// process memory requests
if (!mem_stalled) {
if (vortex_->mem_req_valid) {
if (vortex_->mem_req_rw) {
uint64_t byteen = vortex_->mem_req_byteen;
unsigned base_addr = (vortex_->mem_req_addr * MEM_BLOCK_SIZE);
uint8_t* data = (uint8_t*)(vortex_->mem_req_data);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
}
}
} else {
dram_req_t dram_req;
dram_req.tag = vortex_->dram_req_tag;
dram_req.addr = vortex_->dram_req_addr;
ram_->read(vortex_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.block.data());
dram_req.cycles_left = DRAM_LATENCY;
for (auto& rsp : dram_rsp_vec_) {
if (dram_req.addr == rsp.addr) {
dram_req.cycles_left = rsp.cycles_left;
mem_req_t mem_req;
mem_req.tag = vortex_->mem_req_tag;
mem_req.addr = vortex_->mem_req_addr;
ram_->read(vortex_->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.block.data());
mem_req.cycles_left = MEM_LATENCY;
for (auto& rsp : mem_rsp_vec_) {
if (mem_req.addr == rsp.addr) {
mem_req.cycles_left = rsp.cycles_left;
break;
}
}
dram_rsp_vec_.emplace_back(dram_req);
mem_rsp_vec_.emplace_back(mem_req);
}
}
}
vortex_->dram_req_ready = !dram_stalled;
vortex_->mem_req_ready = !mem_stalled;
}
void Simulator::eval_io_bus() {

View File

@@ -48,23 +48,23 @@ private:
typedef struct {
int cycles_left;
std::array<uint8_t, GLOBAL_BLOCK_SIZE> block;
std::array<uint8_t, MEM_BLOCK_SIZE> block;
uint32_t addr;
uint32_t tag;
} dram_req_t;
} mem_req_t;
std::unordered_map<int, std::stringstream> print_bufs_;
void eval();
void eval_dram_bus();
void eval_mem_bus();
void eval_io_bus();
void eval_csr_bus();
std::list<dram_req_t> dram_rsp_vec_;
bool dram_rsp_active_;
std::list<mem_req_t> mem_rsp_vec_;
bool mem_rsp_active_;
bool dram_rsp_ready_;
bool mem_rsp_ready_;
bool csr_req_ready_;
bool csr_req_active_;
uint32_t* csr_rsp_value_;

View File

@@ -1,32 +1,114 @@
ASE_BUILD_DIR=build_ase
FPGA_BUILD_DIR=build_fpga
DEVICE_FAMILY ?= arria10
ASE_BUILD_DIR ?= build_ase_$(DEVICE_FAMILY)
FPGA_BUILD_DIR ?= build_fpga_$(DEVICE_FAMILY)
RTL_DIR=../../rtl
ifeq (, $(shell which qsub-synth))
ifeq ($(shell which qsub-synth),)
RUN_SYNTH=$(OPAE_PLATFORM_ROOT)/bin/run.sh > build.log 2>&1 &
else
RUN_SYNTH=qsub-synth
endif
# control RTL debug print states
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSHR
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
DBG_PRINT_FLAGS += -DDBG_PRINT_MEM
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
DBG_FLAGS += $(DBG_PRINT_FLAGS)
DBG_FLAGS += -DDBG_CACHE_REQ_INFO
CONFIG1 := -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG2 := -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG4 := -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG8 := -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG16 := -DNUM_CLUSTERS=4 -DNUM_CORES=4 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG32 := -DNUM_CLUSTERS=4 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
CONFIG64 := -DNUM_CLUSTERS=8 -DNUM_CORES=8 -DL2_ENABLE=0 -DL3_ENABLE=0 $(CONFIGS)
FPU_INCLUDE = -I$(RTL_DIR)/fp_cores -I$(RTL_DIR)/fp_cores/altera/$(DEVICE_FAMILY)
RTL_INCLUDE = -I$(RTL_DIR)/libs -I$(RTL_DIR)/interfaces -I$(RTL_DIR)/cache $(FPU_INCLUDE) -I$(RTL_DIR) -I$(RTL_DIR)/afu
CFLAGS += $(RTL_INCLUDE)
# Debugigng
ifdef DEBUG
CFLAGS += $(DBG_FLAGS)
else
CFLAGS += -DNDEBUG
endif
# Enable scope analyzer
ifdef SCOPE
CFLAGS += -DSCOPE
endif
# Enable perf counters
ifdef PERF
CFLAGS += -DPERF_ENABLE
endif
all: ase-1c
gen_sources_a10:
./gen_sources.sh arria10 > sources.txt
$(ASE_BUILD_DIR)_1c/Makefile:
afu_sim_setup -s setup.cfg $(ASE_BUILD_DIR)_1c
gen_sources_s10:
./gen_sources.sh stratix10 > sources.txt
$(ASE_BUILD_DIR)_2c/Makefile:
afu_sim_setup -s setup.cfg $(ASE_BUILD_DIR)_2c
ase-1c: gen_sources_a10 setup-ase-1c
make -C $(ASE_BUILD_DIR)_1c
cp $(RTL_DIR)/fp_cores/altera/arria10/*.hex $(ASE_BUILD_DIR)_1c/work
$(ASE_BUILD_DIR)_4c/Makefile:
afu_sim_setup -s setup.cfg $(ASE_BUILD_DIR)_4c
ase-2c: gen_sources_a10 setup-ase-2c
make -C $(ASE_BUILD_DIR)_2c
cp $(RTL_DIR)/fp_cores/altera/arria10/*.hex $(ASE_BUILD_DIR)_2c/work
$(FPGA_BUILD_DIR)_1c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_1c
ase-4c: gen_sources_a10 setup-ase-4c
make -C $(ASE_BUILD_DIR)_4c
cp $(RTL_DIR)/fp_cores/altera/arria10/*.hex $(ASE_BUILD_DIR)_4c/work
$(FPGA_BUILD_DIR)_2c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_2c
$(FPGA_BUILD_DIR)_4c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_4c
$(FPGA_BUILD_DIR)_8c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_8c
$(FPGA_BUILD_DIR)_16c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_16c
$(FPGA_BUILD_DIR)_32c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_32c
$(FPGA_BUILD_DIR)_64c/build/dcp.qpf:
afu_synth_setup -s setup.cfg $(FPGA_BUILD_DIR)_64c
gen-sources-1c:
./gen_sources.sh $(CFLAGS) $(CONFIG1) > sources.txt
gen-sources-2c:
./gen_sources.sh $(CFLAGS) $(CONFIG2) > sources.txt
gen-sources-4c:
./gen_sources.sh $(CFLAGS) $(CONFIG4) > sources.txt
gen-sources-8c:
./gen_sources.sh $(CFLAGS) $(CONFIG8) > sources.txt
gen-sources-16c:
./gen_sources.sh $(CFLAGS) $(CONFIG16) > sources.txt
gen-sources-32c:
./gen_sources.sh $(CFLAGS) $(CONFIG32) > sources.txt
gen-sources-64c:
./gen_sources.sh $(CFLAGS) $(CONFIG64) > sources.txt
# setup
setup-ase-1c: $(ASE_BUILD_DIR)_1c/Makefile
@@ -34,36 +116,6 @@ setup-ase-2c: $(ASE_BUILD_DIR)_2c/Makefile
setup-ase-4c: $(ASE_BUILD_DIR)_4c/Makefile
$(ASE_BUILD_DIR)_1c/Makefile:
afu_sim_setup -s sources_1c.txt $(ASE_BUILD_DIR)_1c
$(ASE_BUILD_DIR)_2c/Makefile:
afu_sim_setup -s sources_2c.txt $(ASE_BUILD_DIR)_2c
$(ASE_BUILD_DIR)_4c/Makefile:
afu_sim_setup -s sources_4c.txt $(ASE_BUILD_DIR)_4c
fpga-1c: gen_sources_a10 setup-fpga-1c
cd $(FPGA_BUILD_DIR)_1c && $(RUN_SYNTH)
fpga-2c: gen_sources_a10 setup-fpga-2c
cd $(FPGA_BUILD_DIR)_2c && $(RUN_SYNTH)
fpga-4c: gen_sources_a10 setup-fpga-4c
cd $(FPGA_BUILD_DIR)_4c && $(RUN_SYNTH)
fpga-8c: gen_sources_a10 setup-fpga-8c
cd $(FPGA_BUILD_DIR)_8c && $(RUN_SYNTH)
fpga-16c: gen_sources_a10 setup-fpga-16c
cd $(FPGA_BUILD_DIR)_16c && $(RUN_SYNTH)
fpga-32c: gen_sources_s10 setup-fpga-32c
cd $(FPGA_BUILD_DIR)_32c && $(RUN_SYNTH)
fpga-64c: gen_sources_s10 setup-fpga-64c
cd $(FPGA_BUILD_DIR)_64c && $(RUN_SYNTH)
setup-fpga-1c: $(FPGA_BUILD_DIR)_1c/build/dcp.qpf
setup-fpga-2c: $(FPGA_BUILD_DIR)_2c/build/dcp.qpf
@@ -78,35 +130,42 @@ setup-fpga-32c: $(FPGA_BUILD_DIR)_32c/build/dcp.qpf
setup-fpga-64c: $(FPGA_BUILD_DIR)_64c/build/dcp.qpf
$(FPGA_BUILD_DIR)_1c/build/dcp.qpf:
afu_synth_setup -s sources_1c.txt $(FPGA_BUILD_DIR)_1c
# build
$(FPGA_BUILD_DIR)_2c/build/dcp.qpf:
afu_synth_setup -s sources_2c.txt $(FPGA_BUILD_DIR)_2c
ase-1c: gen-sources-1c setup-ase-1c
make -C $(ASE_BUILD_DIR)_1c
cp $(RTL_DIR)/fp_cores/altera/$(DEVICE_FAMILY)/*.hex $(ASE_BUILD_DIR)_1c/work
$(FPGA_BUILD_DIR)_4c/build/dcp.qpf:
afu_synth_setup -s sources_4c.txt $(FPGA_BUILD_DIR)_4c
ase-2c: gen-sources-2c setup-ase-2c
make -C $(ASE_BUILD_DIR)_2c
cp $(RTL_DIR)/fp_cores/altera/$(DEVICE_FAMILY)/*.hex $(ASE_BUILD_DIR)_2c/work
$(FPGA_BUILD_DIR)_8c/build/dcp.qpf:
afu_synth_setup -s sources_8c.txt $(FPGA_BUILD_DIR)_8c
ase-4c: gen-sources-4c setup-ase-4c
make -C $(ASE_BUILD_DIR)_4c
cp $(RTL_DIR)/fp_cores/altera/$(DEVICE_FAMILY)/*.hex $(ASE_BUILD_DIR)_4c/work
$(FPGA_BUILD_DIR)_16c/build/dcp.qpf:
afu_synth_setup -s sources_16c.txt $(FPGA_BUILD_DIR)_16c
fpga-1c: gen-sources-1c setup-fpga-1c
cd $(FPGA_BUILD_DIR)_1c && $(RUN_SYNTH)
$(FPGA_BUILD_DIR)_32c/build/dcp.qpf:
afu_synth_setup -s sources_32c.txt $(FPGA_BUILD_DIR)_32c
fpga-2c: gen-sources-2c setup-fpga-2c
cd $(FPGA_BUILD_DIR)_2c && $(RUN_SYNTH)
$(FPGA_BUILD_DIR)_64c/build/dcp.qpf:
afu_synth_setup -s sources_64c.txt $(FPGA_BUILD_DIR)_64c
fpga-4c: gen-sources-4c setup-fpga-4c
cd $(FPGA_BUILD_DIR)_4c && $(RUN_SYNTH)
run-ase-1c:
cd $(ASE_BUILD_DIR)_1c && make sim
fpga-8c: gen-sources-8c setup-fpga-8c
cd $(FPGA_BUILD_DIR)_8c && $(RUN_SYNTH)
run-ase-2c:
cd $(ASE_BUILD_DIR)_2c && make sim
fpga-16c: gen-sources-16c setup-fpga-16c
cd $(FPGA_BUILD_DIR)_16c && $(RUN_SYNTH)
run-ase-4c:
cd $(ASE_BUILD_DIR)_4c && make sim
fpga-32c: gen-sources-32c setup-fpga-32c
cd $(FPGA_BUILD_DIR)_32c && $(RUN_SYNTH)
fpga-64c: gen-sources-64c setup-fpga-64c
cd $(FPGA_BUILD_DIR)_64c && $(RUN_SYNTH)
# cleanup
clean-ase-1c:
rm -rf $(ASE_BUILD_DIR)_1c sources.txt

View File

@@ -44,6 +44,9 @@ fpgaconf vortex_afu.gbs
# If this says Multiple ports. Then use --bus with fpgaconf. #bus info can be found by fpgainfo port
fpgaconf --bus 0xaf vortex_afu.gbs
# get portid
fpgainfo port
# Running the Test case
cd /driver/tests/basic
make run-fpga
@@ -60,11 +63,13 @@ qsub-sim
make ase
# tests
./run_ase.sh build_ase_1c ../../../driver/tests/basic/basic -n16
./run_ase.sh build_ase_1c ../../../driver/tests/demo/demo -n16
./run_ase.sh build_ase_1c ../../../driver/tests/dogfood/dogfood -n16
./run_ase.sh build_ase_1c ../../../benchmarks/opencl/vecadd/vecadd
./run_ase.sh build_ase_1c ../../../benchmarks/opencl/sgemm/sgemm -n4
./run_ase.sh build_ase_arria10_1c ../../../driver/tests/basic/basic -n1 -t0
./run_ase.sh build_ase_arria10_1c ../../../driver/tests/basic/basic -n1 -t1
./run_ase.sh build_ase_arria10_1c ../../../driver/tests/basic/basic -n16
./run_ase.sh build_ase_arria10_1c ../../../driver/tests/demo/demo -n16
./run_ase.sh build_ase_arria10_1c ../../../driver/tests/dogfood/dogfood -n16
./run_ase.sh build_ase_arria10_1c ../../../benchmarks/opencl/vecadd/vecadd
./run_ase.sh build_ase_arria10_1c ../../../benchmarks/opencl/sgemm/sgemm -n4
# modify "vsim_run.tcl" to dump VCD trace
vcd file trace.vcd
@@ -75,17 +80,10 @@ run -all
tar -zcvf output_files_1c.tar.gz `find ./build_fpga_1c -type f \( -iname \*.rpt -o -iname \*.txt -o -iname \*summary -o -iname \*.log \)`
# compress VCD trace
tar -zcvf vortex.vcd.tar.gz ./build_ase_1c/work/vortex.vcd
tar -zcvf trace.vcd.tar.gz obj_dir/trace.vcd
tar -zcvf trace.fst.tar.gz trace.fst run.log
tar -zcvf run.log.tar.gz run.log
tar -cvjf vortex.vcd.tar.bz2 build_ase_1c/work/vortex.vcd
tar -zcvf vortex.vcd.tar.gz build_ase_1c/work/vortex.vcd
tar -zcvf run.log.tar.gz build_ase_1c/work/run.log
tar -zcvf vx_scope.vcd.tar.gz vx_scope.vcd
tar -cvjf vx_scope.vcd.tar.bz2 vx_scope.vcd
tar -cvjf trace.fst.tar.bz2 trace.fst run.log
tar -cvjf trace.vcd.tar.bz2 trace.vcd run.log
tar -cvjf trace.vcd.tar.bz2 build_ase_arria10_1c/work/run.log build_ase_arria10_1c/work/trace.vcd
# decompress VCD trace
tar -zxvf vortex.vcd.tar.gz
@@ -95,15 +93,4 @@ tar -xvf vortex.vcd.tar.bz2
lsof +D build_ase_1c
# quick off synthesis
make -C unittest clean && make -C unittest > unittest/build.log 2>&1 &
make -C pipeline clean && make -C pipeline > pipeline/build.log 2>&1 &
make -C cache clean && make -C cache > cache/build.log 2>&1 &
make -C core clean && make -C core > core/build.log 2>&1 &
make -C vortex clean && make -C vortex > vortex/build.log 2>&1 &
make -C top1 clean && make -C top1 > top1/build.log 2>&1 &
make -C top2 clean && make -C top2 > top2/build.log 2>&1 &
make -C top4 clean && make -C top4 > top4/build.log 2>&1 &
make -C top8 clean && make -C top8 > top8/build.log 2>&1 &
make -C top16 clean && make -C top16 > top16/build.log 2>&1 &
make -C top32 clean && make -C top32 > top32/build.log 2>&1 &
make -C top64 clean && make -C top64 > top64/build.log 2>&1 &
make core

View File

@@ -1,39 +1,46 @@
#!/bin/bash
rtl_dir="../../rtl"
exclude_list="VX_fpu_fpnew.v"
file_list=""
macros=()
includes=()
add_dirs()
{
for dir in $*; do
echo "+incdir+$dir"
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f); do
exclude=0
for fe in $exclude_list; do
if [[ $file =~ $fe ]]; then
exclude=1
fi
done
if [[ $exclude == 0 ]]; then
file_list="$file_list $file"
# parse command arguments
while getopts D:I:h flag
do
case "${flag}" in
D) macros+=( ${OPTARG} );;
I) includes+=( ${OPTARG} );;
h) echo "Usage: [-D macro] [-I include] [-h help]"
exit 0
;;
\?)
echo "Invalid option: -$OPTARG" 1>&2
exit 1
;;
esac
done
# dump macros
for value in ${macros[@]}; do
echo "+define+$value"
done
# dump include directories
for dir in ${includes[@]}; do
echo "+incdir+$dir"
done
# dump source files
for dir in ${includes[@]}; do
for file in $(find $dir -maxdepth 1 -name '*.v' -o -name '*.sv' -type f); do
exclude=0
for fe in $exclude_list; do
if [[ $file =~ $fe ]]; then
exclude=1
fi
done
if [[ $exclude == 0 ]]; then
echo $file
fi
done
}
add_files()
{
for file in $*; do
file_list="$file_list $file"
done
}
add_dirs $rtl_dir/fp_cores/altera/$1
add_dirs $rtl_dir/libs $rtl_dir/interfaces $rtl_dir/fp_cores $rtl_dir/cache $rtl_dir/tex_unit $rtl_dir $rtl_dir/afu
# dump file list
for file in $file_list; do
echo $file
done

View File

@@ -1,8 +1,5 @@
+define+NUM_CORES=2
+define+SYNTHESIS
+define+QUARTUS
#+define+PERF_ENABLE
vortex_afu.json
QI:vortex_afu.qsf

View File

@@ -1,12 +0,0 @@
+define+NUM_CORES=4
+define+NUM_CLUSTERS=4
#+define+L3_ENABLE=1
+define+SYNTHESIS
+define+QUARTUS
#+define+PERF_ENABLE
vortex_afu16.json
QI:vortex_afu.qsf
C:sources.txt

View File

@@ -1,24 +0,0 @@
+define+NUM_CORES=1
+define+SYNTHESIS
+define+QUARTUS
#+define+SCOPE
#+define+PERF_ENABLE
#+define+DBG_PRINT_CORE_ICACHE
#+define+DBG_PRINT_CORE_DCACHE
#+define+DBG_PRINT_CACHE_BANK
#+define+DBG_PRINT_CACHE_MSHR
#+define+DBG_PRINT_CACHE_TAG
#+define+DBG_PRINT_CACHE_DATA
#+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_PIPELINE
#+define+DBG_PRINT_OPAE
#+define+DBG_PRINT_AVS
#+define+DBG_PRINT_SCOPE
#+define+DBG_CACHE_REQ_INFO
vortex_afu.json
QI:vortex_afu.qsf
C:sources.txt

View File

@@ -1,14 +0,0 @@
+define+NUM_CORES=8
+define+NUM_CLUSTERS=4
#+define+L3_ENABLE=1
+define+GLOBAL_BLOCK_SIZE=16
+define+SYNTHESIS
+define+QUARTUS
#+define+PERF_ENABLE
vortex_afu.json
QI:vortex_afu.qsf
C:sources.txt

View File

@@ -1,10 +0,0 @@
+define+NUM_CORES=4
+define+SYNTHESIS
+define+QUARTUS
#+define+PERF_ENABLE
vortex_afu.json
QI:vortex_afu.qsf
C:sources.txt

View File

@@ -1,14 +0,0 @@
+define+NUM_CORES=8
+define+NUM_CLUSTERS=8
#+define+L3_ENABLE=1
+define+GLOBAL_BLOCK_SIZE=16
+define+SYNTHESIS
+define+QUARTUS
#+define+PERF_ENABLE
vortex_afu.json
QI:vortex_afu.qsf
C:sources.txt

View File

@@ -1,12 +0,0 @@
+define+NUM_CORES=4
+define+NUM_CLUSTERS=2
#+define+L3_ENABLE=1
+define+SYNTHESIS
+define+QUARTUS
#+define+PERF_ENABLE
vortex_afu8.json
QI:vortex_afu.qsf
C:sources.txt

View File

@@ -1,6 +1,9 @@
/unittest/*
!/unittest/Makefile
/smem/*
!/smem/Makefile
/cache/*
!/cache/Makefile

View File

@@ -1,37 +1,68 @@
.PHONY: unittest pipeline cache core vortex top1 top2 top4 top8 top16 top32 top64
BUILDIR ?= build
.PHONY: unittest pipeline smem cache core vortex top1 top2 top4 top8 top16 top32 top64
unittest:
$(MAKE) -C unittest clean && $(MAKE) -C unittest > unittest/build.log 2>&1 &
mkdir -p unittest/$(BUILDIR)
cp unittest/Makefile unittest/$(BUILDIR)
$(MAKE) -C unittest/$(BUILDIR) clean && $(MAKE) -C unittest/$(BUILDIR) > unittest//$(BUILDIR)build.log 2>&1 &
pipeline:
$(MAKE) -C pipeline clean && $(MAKE) -C pipeline > pipeline/build.log 2>&1 &
mkdir -p pipeline/$(BUILDIR)
cp pipeline/Makefile pipeline/$(BUILDIR)
$(MAKE) -C pipeline/$(BUILDIR) clean && $(MAKE) -C pipeline/$(BUILDIR) > pipeline/$(BUILDIR)/build.log 2>&1 &
smem:
mkdir -p smem/$(BUILDIR)
cp smem/Makefile smem/$(BUILDIR)
$(MAKE) -C smem/$(BUILDIR) clean && $(MAKE) -C smem/$(BUILDIR) > smem/$(BUILDIR)/build.log 2>&1 &
cache:
$(MAKE) -C cache clean && $(MAKE) -C cache > cache/build.log 2>&1 &
mkdir -p cache/$(BUILDIR)
cp cache/Makefile cache/$(BUILDIR)
$(MAKE) -C cache/$(BUILDIR) clean && $(MAKE) -C cache/$(BUILDIR) > cache/$(BUILDIR)/build.log 2>&1 &
core:
$(MAKE) -C core clean && $(MAKE) -C core > core/build.log 2>&1 &
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
$(MAKE) -C core/$(BUILDIR) clean && $(MAKE) -C core/$(BUILDIR) > core/$(BUILDIR)/build.log 2>&1 &
vortex:
$(MAKE) -C vortex clean && $(MAKE) -C vortex > vortex/build.log 2>&1 &
mkdir -p vortex/$(BUILDIR)
cp vortex/Makefile vortex/$(BUILDIR)
$(MAKE) -C vortex/$(BUILDIR) clean && $(MAKE) -C vortex/$(BUILDIR) > vortex/$(BUILDIR)/build.log 2>&1 &
top1:
$(MAKE) -C top1 clean && $(MAKE) -C top1 > top1/build.log 2>&1 &
mkdir -p top1/$(BUILDIR)
cp top1/Makefile top1/$(BUILDIR)
$(MAKE) -C top1/$(BUILDIR) clean && $(MAKE) -C top1/$(BUILDIR) > top1/$(BUILDIR)/build.log 2>&1 &
top2:
$(MAKE) -C top2 clean && $(MAKE) -C top2 > top2/build.log 2>&1 &
mkdir -p top2/$(BUILDIR)
cp top2/Makefile top2/$(BUILDIR)
$(MAKE) -C top2/$(BUILDIR) clean && $(MAKE) -C top2/$(BUILDIR) > top2/$(BUILDIR)/build.log 2>&1 &
top4:
$(MAKE) -C top4 clean && $(MAKE) -C top4 > top4/build.log 2>&1 &
mkdir -p top4/$(BUILDIR)
cp top4/Makefile top4/$(BUILDIR)
$(MAKE) -C top4/$(BUILDIR) clean && $(MAKE) -C top4/$(BUILDIR) > top4/$(BUILDIR)/build.log 2>&1 &
top8:
$(MAKE) -C top8 clean && $(MAKE) -C top8 > top8/build.log 2>&1 &
mkdir -p top8/$(BUILDIR)
cp top8/Makefile top8/$(BUILDIR)
$(MAKE) -C top8/$(BUILDIR) clean && $(MAKE) -C top8/$(BUILDIR) > top8/$(BUILDIR)/build.log 2>&1 &
top16:
$(MAKE) -C top16 clean && $(MAKE) -C top16 > top16/build.log 2>&1 &
mkdir -p top16/$(BUILDIR)
cp top16/Makefile top16/$(BUILDIR)
$(MAKE) -C top16/$(BUILDIR) clean && $(MAKE) -C top16/$(BUILDIR) > top16/$(BUILDIR)build.log 2>&1 &
top32:
$(MAKE) -C top32 clean && $(MAKE) -C top32 > top32/build.log 2>&1 &
mkdir -p top32/$(BUILDIR)
cp top32/Makefile top32/$(BUILDIR)
$(MAKE) -C top32/$(BUILDIR) clean && $(MAKE) -C top32/$(BUILDIR) > top32/$(BUILDIR)/build.log 2>&1 &
top64:
$(MAKE) -C top64 clean && $(MAKE) -C top64 > top64/build.log 2>&1 &
mkdir -p top64/$(BUILDIR)
cp top64/Makefile top64/$(BUILDIR)
$(MAKE) -C top64/$(BUILDIR) clean && $(MAKE) -C top64/$(BUILDIR) > top64/$(BUILDIR)/build.log 2>&1 &

View File

@@ -1,14 +1,12 @@
# Part, Family
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
PROJECT = VX_cache
TOP_LEVEL_ENTITY = VX_cache
SRC_FILE = VX_cache.v
RTL_DIR = ../../../../rtl
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
RTL_DIR=../../../rtl
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
@@ -53,7 +51,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../project.sdc -inc "$(RTL_INCLUDE)"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,13 +1,17 @@
# Part, Family
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
PROJECT = Core
TOP_LEVEL_ENTITY = VX_core
SRC_FILE = VX_core.v
RTL_DIR = ../../../../rtl
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(RTL_DIR)/fp_cores/altera/arria10;$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/stratix10
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(FPU_INCLUDE);$(TEX_INCLUDE)
@@ -55,7 +59,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,22 +1,22 @@
# Part, Family
PROJECT = VX_pipeline
TOP_LEVEL_ENTITY = VX_pipeline
SRC_FILE = VX_pipeline.v
RTL_DIR = ../../../rtl
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/arria10
PROJECT = Core
TOP_LEVEL_ENTITY = VX_core
SRC_FILE = VX_core.v
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/stratix10
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(RTL_DIR)/fp_cores/altera/arria10;$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(FPU_INCLUDE);$(TEX_INCLUDE)
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
@@ -59,7 +59,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg
@@ -74,7 +74,7 @@ asm.chg:
$(STAMP) asm.chg
timing: $(PROJECT_FILES)
quartus_sh -t ../timing-html.tcl -project $(PROJECT)
quartus_sh -t ../../timing-html.tcl -project $(PROJECT)
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"

72
hw/syn/quartus/smem/Makefile Executable file
View File

@@ -0,0 +1,72 @@
PROJECT = VX_shared_mem
TOP_LEVEL_ENTITY = VX_shared_mem
SRC_FILE = VX_shared_mem.v
RTL_DIR = ../../../../rtl
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
ASM_ARGS =
STA_ARGS = --parallel --do_report_timing
# Build targets
all: $(PROJECT).sta.rpt
syn: $(PROJECT).syn.rpt
fit: $(PROJECT).fit.rpt
asm: $(PROJECT).asm.rpt
sta: $(PROJECT).sta.rpt
smart: smart.log
# Target implementations
STAMP = echo done >
$(PROJECT).syn.rpt: smart.log syn.chg $(SOURCE_FILES)
quartus_syn $(PROJECT) $(SYN_ARGS)
$(STAMP) fit.chg
$(PROJECT).fit.rpt: smart.log fit.chg $(PROJECT).syn.rpt
quartus_fit $(PROJECT) $(FIT_ARGS)
$(STAMP) asm.chg
$(STAMP) sta.chg
$(PROJECT).asm.rpt: smart.log asm.chg $(PROJECT).fit.rpt
quartus_asm $(PROJECT) $(ASM_ARGS)
$(PROJECT).sta.rpt: smart.log sta.chg $(PROJECT).fit.rpt
quartus_sta $(PROJECT) $(STA_ARGS)
smart.log: $(PROJECT_FILES)
quartus_sh --determine_smart_action $(PROJECT) > smart.log
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src $(SRC_FILE) -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg
fit.chg:
$(STAMP) fit.chg
sta.chg:
$(STAMP) sta.chg
asm.chg:
$(STAMP) asm.chg
program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "$(PROJECT).sof"
clean:
rm -rf bin *.rpt *.chg *.qsf *.qpf *.qws *.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db tmp-clearbox

View File

@@ -1,16 +1,16 @@
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
RTL_DIR = ../../../../rtl
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/stratix10
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE);$(TEX_INCLUDE)
@@ -59,7 +59,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=1"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=1"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,16 +1,16 @@
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
RTL_DIR = ../../../../rtl
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/stratix10
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE);$(TEX_INCLUDE)
@@ -59,7 +59,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=4"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=4"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,16 +1,16 @@
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
RTL_DIR = ../../../../rtl
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH=$(RTL_DIR)/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/stratix10
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE);$(TEX_INCLUDE)
@@ -59,7 +59,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=2"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=2"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,16 +1,16 @@
#FAMILY = "Arria 10"
#DEVICE = 10AX115N3F40E2SG
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
FAMILY = "Stratix 10"
DEVICE = 1SX280HN2F43E2VG
FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
RTL_DIR = ../../../../rtl
#FAMILY = "Arria 10"
#DEVICE = 10AX115N3F40E2SG
#FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/arria10
FAMILY = "Stratix 10"
DEVICE = 1SX280HN2F43E2VG
FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/stratix10
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE);$(TEX_INCLUDE)
@@ -59,7 +59,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=8"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=8"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,16 +1,16 @@
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
RTL_DIR = ../../../../rtl
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/stratix10
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE);$(TEX_INCLUDE)
@@ -59,7 +59,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,16 +1,16 @@
#FAMILY = "Arria 10"
#DEVICE = 10AX115N3F40E2SG
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
FAMILY = "Stratix 10"
DEVICE = 1SX280HN2F43E2VG
FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
RTL_DIR=../../../../rtl
#FAMILY = "Arria 10"
#DEVICE = 10AX115N3F40E2SG
#FPU_CORE_PATH=$(RTL_DIR)/fp_cores/altera/arria10
FAMILY = "Stratix 10"
DEVICE = 1SX280HN2F43E2VG
FPU_CORE_PATH=$(RTL_DIR)/fp_cores/altera/stratix10
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE);$(TEX_INCLUDE)
@@ -59,7 +59,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=8"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=8" -set "NUM_CLUSTERS=8"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,16 +1,16 @@
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
PROJECT = vortex_afu
TOP_LEVEL_ENTITY = vortex_afu
SRC_FILE = vortex_afu.sv
RTL_DIR = ../../../../rtl
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/stratix10
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;../../../rtl/afu;../../../rtl/afu/ccip;$(FPU_INCLUDE);$(TEX_INCLUDE)
@@ -59,7 +59,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=2"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)" -set "NOPAE" -set "NUM_CORES=4" -set "NUM_CLUSTERS=2"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,13 +1,19 @@
PROJECT = Unittest
TOP_LEVEL_ENTITY = VX_cache_core_req_bank_sel
SRC_FILE = VX_cache_core_req_bank_sel.v
FPU_INCLUDE = ../../../rtl/fp_cores;../../../rtl/fp_cores/altera/arria10;../../../rtl/fp_cores/fpnew/src;../../../rtl/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;../../../rtl/fp_cores/fpnew/src/common_cells/include;../../../rtl/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfaces;../../../rtl/cache
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
RTL_DIR = ../../../../rtl
# Part, Family
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/stratix10
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
RTL_INCLUDE = $(FPU_INCLUDE);$(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on
@@ -51,7 +57,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg

View File

@@ -1,16 +1,16 @@
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH=../../../rtl/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH=../../../rtl/fp_cores/altera/stratix10
PROJECT = Vortex
TOP_LEVEL_ENTITY = Vortex
SRC_FILE = Vortex.sv
RTL_DIR = ../../../../rtl
FAMILY = "Arria 10"
DEVICE = 10AX115N3F40E2SG
FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/arria10
#FAMILY = "Stratix 10"
#DEVICE = 1SX280HN2F43E2VG
#FPU_CORE_PATH = $(RTL_DIR)/fp_cores/altera/stratix10
RTL_DIR=../../../rtl
FPU_INCLUDE = $(RTL_DIR)/fp_cores;$(FPU_CORE_PATH);$(RTL_DIR)/fp_cores/fpnew/src;$(RTL_DIR)/fp_cores/fpnew/src/fpu_div_sqrt_mvp/hdl;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/include;$(RTL_DIR)/fp_cores/fpnew/src/common_cells/src
TEX_INCLUDE = $(RTL_DIR)/tex_unit
RTL_INCLUDE = $(RTL_DIR);$(RTL_DIR)/libs;$(RTL_DIR)/interfaces;$(RTL_DIR)/cache;$(FPU_INCLUDE);$(TEX_INCLUDE)
@@ -59,7 +59,7 @@ smart.log: $(PROJECT_FILES)
# Project initialization
$(PROJECT_FILES):
quartus_sh -t ../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../project.sdc -inc "$(RTL_INCLUDE)"
quartus_sh -t ../../project.tcl -project $(PROJECT) -family $(FAMILY) -device $(DEVICE) -top $(TOP_LEVEL_ENTITY) -src "$(SRC_FILE)" -sdc ../../project.sdc -inc "$(RTL_INCLUDE)"
syn.chg:
$(STAMP) syn.chg

View File

@@ -2,7 +2,21 @@ TOP = VX_cache
PARAMS += -DCACHE_SIZE=4096 -DWORD_SIZE=4 -DCACHE_LINE_SIZE=16 -DNUM_BANKS=4 -DCREQ_SIZE=4 -DMRVQ_SIZE=16 -DDFPQ_SIZE=16 -DSNRQ_SIZE=16 -DCWBQ_SIZE=4 -DDWBQ_SIZE=4 -DFQQ_SIZE=4
INCLUDE = -I../../rtl/ -I../../rtl/libs -I../../rtl/cache
# control RTL debug print states
DBG_PRINT_FLAGS = -DDBG_PRINT_CORE_ICACHE \
-DDBG_PRINT_CORE_DCACHE \
-DDBG_PRINT_CACHE_BANK \
-DDBG_PRINT_CACHE_SNP \
-DDBG_PRINT_CACHE_MSHR \
-DDBG_PRINT_CACHE_TAG \
-DDBG_PRINT_CACHE_DATA \
-DDBG_PRINT_MEM \
-DDBG_PRINT_OPAE \
-DDBG_PRINT_AVS
#DBG_PRINT=$(DBG_PRINT_FLAGS)
INCLUDE = -I../../rtl/ -I../../rtl/cache -I../../rtl/libs
SRCS = cachesim.cpp testbench.cpp

View File

@@ -18,7 +18,7 @@ CacheSim::CacheSim() {
ram_ = nullptr;
cache_ = new VVX_cache();
dram_rsp_active_ = false;
mem_rsp_active_ = false;
snp_req_active_ = false;
//#ifdef VCD_OUTPUT
@@ -39,7 +39,7 @@ CacheSim::~CacheSim() {
void CacheSim::attach_ram(RAM* ram) {
ram_ = ram;
dram_rsp_vec_.clear();
mem_rsp_vec_.clear();
}
void CacheSim::reset() {
@@ -52,7 +52,7 @@ void CacheSim::reset() {
cache_->reset = 0;
this->step();
dram_rsp_vec_.clear();
mem_rsp_vec_.clear();
//clear req and rsp vecs
}
@@ -66,10 +66,10 @@ void CacheSim::step() {
cache_->clk = 1;
this->eval();
//handle core and dram reqs and rsps
//handle core and memory reqs and rsps
this->eval_reqs();
this->eval_rsps();
this->eval_dram_bus();
this->eval_mem_bus();
timestamp++;
}
@@ -104,7 +104,7 @@ void CacheSim::run(){
}
stalls--;
if (stalls == 20){
//stall_dram();
//stall_mem();
//send_snoop_req();
stalls--;
}
@@ -168,8 +168,8 @@ void CacheSim::eval_rsps(){
}
}
void CacheSim::stall_dram(){
cache_->dram_req_ready = 0;
void CacheSim::stall_mem(){
cache_->mem_req_ready = 0;
}
void CacheSim::send_snoop_req(){
@@ -179,81 +179,81 @@ void CacheSim::send_snoop_req(){
cache_->snp_req_tag = 0xff; */
}
void CacheSim::eval_dram_bus() {
void CacheSim::eval_mem_bus() {
if (ram_ == nullptr) {
cache_->dram_req_ready = 0;
cache_->mem_req_ready = 0;
return;
}
// schedule DRAM responses
// schedule memory responses
int dequeue_index = -1;
for (int i = 0; i < dram_rsp_vec_.size(); i++) {
if (dram_rsp_vec_[i].cycles_left > 0) {
dram_rsp_vec_[i].cycles_left -= 1;
for (int i = 0; i < mem_rsp_vec_.size(); i++) {
if (mem_rsp_vec_[i].cycles_left > 0) {
mem_rsp_vec_[i].cycles_left -= 1;
}
if ((dequeue_index == -1)
&& (dram_rsp_vec_[i].cycles_left == 0)) {
&& (mem_rsp_vec_[i].cycles_left == 0)) {
dequeue_index = i;
}
}
// send DRAM response
if (dram_rsp_active_
&& cache_->dram_rsp_valid
&& cache_->dram_rsp_ready) {
dram_rsp_active_ = false;
// send memory response
if (mem_rsp_active_
&& cache_->mem_rsp_valid
&& cache_->mem_rsp_ready) {
mem_rsp_active_ = false;
}
if (!dram_rsp_active_) {
if (!mem_rsp_active_) {
if (dequeue_index != -1) { //time to respond to the request
cache_->dram_rsp_valid = 1;
cache_->mem_rsp_valid = 1;
//copy data from the rsp queue to the cache module
memcpy((uint8_t*)cache_->dram_rsp_data, dram_rsp_vec_[dequeue_index].data, GLOBAL_BLOCK_SIZE);
memcpy((uint8_t*)cache_->mem_rsp_data, mem_rsp_vec_[dequeue_index].data, MEM_BLOCK_SIZE);
cache_->dram_rsp_tag = dram_rsp_vec_[dequeue_index].tag;
free(dram_rsp_vec_[dequeue_index].data); //take data out of the queue
dram_rsp_vec_.erase(dram_rsp_vec_.begin() + dequeue_index);
dram_rsp_active_ = true;
cache_->mem_rsp_tag = mem_rsp_vec_[dequeue_index].tag;
free(mem_rsp_vec_[dequeue_index].data); //take data out of the queue
mem_rsp_vec_.erase(mem_rsp_vec_.begin() + dequeue_index);
mem_rsp_active_ = true;
} else {
cache_->dram_rsp_valid = 0;
cache_->mem_rsp_valid = 0;
}
}
// handle DRAM stalls
bool dram_stalled = false;
#ifdef ENABLE_DRAM_STALLS
if (0 == ((timestamp/2) % DRAM_STALLS_MODULO)) {
dram_stalled = true;
// handle memory stalls
bool mem_stalled = false;
#ifdef ENABLE_MEM_STALLS
if (0 == ((timestamp/2) % MEM_STALLS_MODULO)) {
mem_stalled = true;
} else
if (dram_rsp_vec_.size() >= DRAM_RQ_SIZE) {
dram_stalled = true;
if (mem_rsp_vec_.size() >= MEM_RQ_SIZE) {
mem_stalled = true;
}
#endif
// process DRAM requests
if (!dram_stalled) {
if (cache_->dram_req_valid) {
if (cache_->dram_req_rw) { //write = 1
uint64_t byteen = cache_->dram_req_byteen;
unsigned base_addr = (cache_->dram_req_addr * GLOBAL_BLOCK_SIZE);
uint8_t* data = (uint8_t*)(cache_->dram_req_data);
for (int i = 0; i < GLOBAL_BLOCK_SIZE; i++) {
// process memory requests
if (!mem_stalled) {
if (cache_->mem_req_valid) {
if (cache_->mem_req_rw) { //write = 1
uint64_t byteen = cache_->mem_req_byteen;
unsigned base_addr = (cache_->mem_req_addr * MEM_BLOCK_SIZE);
uint8_t* data = (uint8_t*)(cache_->mem_req_data);
for (int i = 0; i < MEM_BLOCK_SIZE; i++) {
if ((byteen >> i) & 0x1) {
(*ram_)[base_addr + i] = data[i];
}
}
} else {
dram_req_t dram_req;
dram_req.cycles_left = DRAM_LATENCY;
dram_req.data = (uint8_t*)malloc(GLOBAL_BLOCK_SIZE);
dram_req.tag = cache_->dram_req_tag;
ram_->read(cache_->dram_req_addr * GLOBAL_BLOCK_SIZE, GLOBAL_BLOCK_SIZE, dram_req.data);
dram_rsp_vec_.push_back(dram_req);
mem_req_t mem_req;
mem_req.cycles_left = MEM_LATENCY;
mem_req.data = (uint8_t*)malloc(MEM_BLOCK_SIZE);
mem_req.tag = cache_->mem_req_tag;
ram_->read(cache_->mem_req_addr * MEM_BLOCK_SIZE, MEM_BLOCK_SIZE, mem_req.data);
mem_rsp_vec_.push_back(mem_req);
}
}
}
cache_->dram_req_ready = ~dram_stalled;
cache_->mem_req_ready = ~mem_stalled;
}
bool CacheSim::assert_equal(unsigned int* data, unsigned int tag){
@@ -302,19 +302,19 @@ void CacheSim::get_core_rsp(){
std::cout << std::hex << "core_rsp_tag: " << cache_->core_rsp_tag << std::endl;
}
void CacheSim::get_dram_req(){
std::cout << std::hex << "dram_req_valid: " << cache_->dram_req_valid << std::endl;
std::cout << std::hex << "dram_req_rw: " << cache_->dram_req_rw << std::endl;
std::cout << std::hex << "dram_req_byteen: " << cache_->dram_req_byteen << std::endl;
std::cout << std::hex << "dram_req_addr: " << cache_->dram_req_addr << std::endl;
std::cout << std::hex << "dram_req_data: " << cache_->dram_req_data << std::endl;
std::cout << std::hex << "dram_req_tag: " << cache_->dram_req_tag << std::endl;
void CacheSim::get_mem_req(){
std::cout << std::hex << "mem_req_valid: " << cache_->mem_req_valid << std::endl;
std::cout << std::hex << "mem_req_rw: " << cache_->mem_req_rw << std::endl;
std::cout << std::hex << "mem_req_byteen: " << cache_->mem_req_byteen << std::endl;
std::cout << std::hex << "mem_req_addr: " << cache_->mem_req_addr << std::endl;
std::cout << std::hex << "mem_req_data: " << cache_->mem_req_data << std::endl;
std::cout << std::hex << "mem_req_tag: " << cache_->mem_req_tag << std::endl;
}
void CacheSim::get_dram_rsp(){
std::cout << std::hex << "dram_rsp_valid: " << cache_->dram_rsp_valid << std::endl;
std::cout << std::hex << "dram_rsp_data: " << cache_->dram_rsp_data << std::endl;
std::cout << std::hex << "dram_rsp_tag: " << cache_->dram_rsp_tag << std::endl;
std::cout << std::hex << "dram_rsp_ready: " << cache_->dram_rsp_ready << std::endl;
void CacheSim::get_mem_rsp(){
std::cout << std::hex << "mem_rsp_valid: " << cache_->mem_rsp_valid << std::endl;
std::cout << std::hex << "mem_rsp_data: " << cache_->mem_rsp_data << std::endl;
std::cout << std::hex << "mem_rsp_tag: " << cache_->mem_rsp_tag << std::endl;
std::cout << std::hex << "mem_rsp_ready: " << cache_->mem_rsp_ready << std::endl;
}

View File

@@ -14,17 +14,17 @@
#include <vector>
#include <queue>
#define ENABLE_DRAM_STALLS
#define DRAM_LATENCY 100
#define DRAM_RQ_SIZE 16
#define DRAM_STALLS_MODULO 16
#define GLOBAL_BLOCK_SIZE 16
#define ENABLE_MEM_STALLS
#define MEM_LATENCY 100
#define MEM_RQ_SIZE 16
#define MEM_STALLS_MODULO 16
#define MEM_BLOCK_SIZE 16
typedef struct {
int cycles_left;
uint8_t *data;
unsigned tag;
} dram_req_t;
} mem_req_t;
typedef struct {
char valid;
@@ -52,7 +52,7 @@ public:
//req/rsp
void send_req(core_req_t *req);
void clear_req();
void stall_dram();
void stall_mem();
void send_snoop_req();
void send_snp_fwd_in();
@@ -60,12 +60,12 @@ public:
bool assert_equal(unsigned int* data, unsigned int tag);
//debug funcs
void get_dram_req();
void get_mem_req();
void get_core_req(unsigned int (&rsp)[4]);
void get_core_rsp();
bool get_core_req_ready();
bool get_core_rsp_ready();
void get_dram_rsp();
void get_mem_rsp();
void display_miss();
private:
@@ -73,12 +73,12 @@ private:
void eval();
void eval_reqs();
void eval_rsps();
void eval_dram_bus();
void eval_mem_bus();
std::queue<core_req_t*> core_req_vec_;
std::vector<dram_req_t> dram_rsp_vec_;
std::vector<mem_req_t> mem_rsp_vec_;
std::map<unsigned int, unsigned int*> core_rsp_vec_;
int dram_rsp_active_;
int mem_rsp_active_;
uint32_t snp_req_active_;
uint32_t snp_req_size_;

View File

@@ -175,7 +175,7 @@ int FLUSH(CacheSim *sim){
int BACK_PRESSURE(CacheSim *sim){
//happens whenever the core is stalled or DRAM is stalled
//happens whenever the core is stalled or memory is stalled
unsigned int addr[4] = {0x12222222, 0xabbbbbbb, 0xcddddddd, 0xe4444444};
unsigned int data[4] = {0xffffffff, 0x11111111, 0x22222222, 0x33333333};
unsigned int rsp[4] = {0,0,0,0};