adding support for multi-banks memory bus

This commit is contained in:
Blaise Tine
2021-05-04 07:32:03 -07:00
parent bdbf99c5b0
commit bde6a69ea0
11 changed files with 276 additions and 477 deletions

View File

@@ -1,6 +1,7 @@
`include "VX_define.vh"
module VX_avs_wrapper #(
parameter NUM_BANKS = 1,
parameter AVS_DATA_WIDTH = 1,
parameter AVS_ADDR_WIDTH = 1,
parameter AVS_BURST_WIDTH = 1,
@@ -31,103 +32,141 @@ module VX_avs_wrapper #(
input wire mem_rsp_ready,
// AVS bus
output wire [AVS_DATA_WIDTH-1:0] avs_writedata,
input wire [AVS_DATA_WIDTH-1:0] avs_readdata,
output wire [AVS_ADDR_WIDTH-1:0] avs_address,
input wire avs_waitrequest,
output wire avs_write,
output wire avs_read,
output wire [AVS_BYTEENW-1:0] avs_byteenable,
output wire [AVS_BURST_WIDTH-1:0] avs_burstcount,
input avs_readdatavalid,
output wire [AVS_BANKS_BITS-1:0] avs_bankselect
output wire [AVS_DATA_WIDTH-1:0] avs_writedata [NUM_BANKS],
input wire [AVS_DATA_WIDTH-1:0] avs_readdata [NUM_BANKS],
output wire [AVS_ADDR_WIDTH-1:0] avs_address [NUM_BANKS],
input wire avs_waitrequest [NUM_BANKS],
output wire avs_write [NUM_BANKS],
output wire avs_read [NUM_BANKS],
output wire [AVS_BYTEENW-1:0] avs_byteenable [NUM_BANKS],
output wire [AVS_BURST_WIDTH-1:0] avs_burstcount [NUM_BANKS],
input avs_readdatavalid [NUM_BANKS]
);
reg [AVS_BANKS_BITS-1:0] avs_bankselect_r;
reg [AVS_BURST_WIDTH-1:0] avs_burstcount_r;
wire avs_reqq_push = mem_req_valid && mem_req_ready && !mem_req_rw;
wire avs_reqq_pop = mem_rsp_valid && mem_rsp_ready;
localparam BANK_ADDRW = $clog2(NUM_BANKS);
wire avs_rspq_push = avs_readdatavalid;
wire avs_rspq_pop = avs_reqq_pop;
wire avs_rspq_empty;
// Requests handling
wire rsp_queue_going_full;
wire [RD_QUEUE_ADDR_WIDTH-1:0] rsp_queue_size;
VX_pending_size #(
.SIZE (RD_QUEUE_SIZE)
) pending_size (
.clk (clk),
.reset (reset),
.push (avs_reqq_push),
.pop (avs_rspq_pop),
`UNUSED_PIN (empty),
.full (rsp_queue_going_full),
.size (rsp_queue_size)
);
`UNUSED_VAR (rsp_queue_size)
always @(posedge clk) begin
avs_burstcount_r <= 1;
avs_bankselect_r <= 0;
end
reg [AVS_BURST_WIDTH-1:0] avs_burstcount_r;
VX_fifo_queue #(
.DATAW (REQ_TAG_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
.reset (reset),
.push (avs_reqq_push),
.pop (avs_reqq_pop),
.data_in (mem_req_tag),
.data_out (mem_rsp_tag),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
wire [NUM_BANKS-1:0] avs_reqq_pop;
wire [NUM_BANKS-1:0] req_queue_going_full;
wire [NUM_BANKS-1:0][RD_QUEUE_ADDR_WIDTH-1:0] req_queue_size;
wire [NUM_BANKS-1:0][REQ_TAG_WIDTH-1:0] avs_reqq_data_out;
wire [BANK_ADDRW-1:0] req_bank_sel = mem_req_addr [BANK_ADDRW-1:0];
wire avs_reqq_push = mem_req_valid && !mem_req_rw && mem_req_ready;
for (genvar i = 0; i < NUM_BANKS; i++) begin
VX_pending_size #(
.SIZE (RD_QUEUE_SIZE)
) pending_size (
.clk (clk),
.reset (reset),
.push (avs_reqq_push && (req_bank_sel == i)),
.pop (avs_reqq_pop[i]),
`UNUSED_PIN (empty),
.full (req_queue_going_full[i]),
.size (req_queue_size[i])
);
`UNUSED_VAR (req_queue_size)
always @(posedge clk) begin
avs_burstcount_r <= 1;
end
VX_fifo_queue #(
.DATAW (REQ_TAG_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_req_queue (
.clk (clk),
.reset (reset),
.push (avs_reqq_push && (req_bank_sel == i)),
.pop (avs_reqq_pop[i]),
.data_in (mem_req_tag),
.data_out (avs_reqq_data_out[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign avs_read[i] = mem_req_valid && !mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i);
assign avs_write[i] = mem_req_valid && mem_req_rw && !req_queue_going_full[i] && (req_bank_sel == i);
assign avs_address[i] = mem_req_addr;
assign avs_byteenable[i] = mem_req_byteen;
assign avs_writedata[i] = mem_req_data;
assign avs_burstcount[i] = avs_burstcount_r;
end
assign mem_req_ready = !(avs_waitrequest[req_bank_sel] || req_queue_going_full[req_bank_sel]);
// Responses handling
wire [NUM_BANKS-1:0] rsp_arb_valid_in;
wire [NUM_BANKS-1:0][AVS_DATA_WIDTH+REQ_TAG_WIDTH-1:0] rsp_arb_data_in;
wire [NUM_BANKS-1:0] rsp_arb_ready_in;
wire [NUM_BANKS-1:0][AVS_DATA_WIDTH-1:0] avs_rspq_data_out;
wire [NUM_BANKS-1:0] avs_rspq_empty;
for (genvar i = 0; i < NUM_BANKS; i++) begin
VX_fifo_queue #(
.DATAW (AVS_DATA_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_rsp_queue (
.clk (clk),
.reset (reset),
.push (avs_readdatavalid[i]),
.pop (avs_reqq_pop[i]),
.data_in (avs_readdata[i]),
.data_out (avs_rspq_data_out[i]),
.empty (avs_rspq_empty[i]),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
assign rsp_arb_valid_in[i] = !avs_rspq_empty[i];
assign rsp_arb_data_in[i] = {avs_rspq_data_out[i], avs_reqq_data_out[i]};
assign avs_reqq_pop[i] = rsp_arb_valid_in[i] && rsp_arb_ready_in[i];
end
VX_stream_arbiter #(
.NUM_REQS (NUM_BANKS),
.DATAW (AVS_DATA_WIDTH+REQ_TAG_WIDTH),
.BUFFERED (0)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_arb_valid_in),
.data_in (rsp_arb_data_in),
.ready_in (rsp_arb_ready_in),
.valid_out (mem_rsp_valid),
.data_out ({mem_rsp_data, mem_rsp_tag}),
.ready_out (mem_rsp_ready)
);
VX_fifo_queue #(
.DATAW (AVS_DATA_WIDTH),
.SIZE (RD_QUEUE_SIZE)
) rd_rsp_queue (
.clk (clk),
.reset (reset),
.push (avs_rspq_push),
.pop (avs_rspq_pop),
.data_in (avs_readdata),
.data_out (mem_rsp_data),
.empty (avs_rspq_empty),
`UNUSED_PIN (full),
`UNUSED_PIN (alm_empty),
`UNUSED_PIN (alm_full),
`UNUSED_PIN (size)
);
assign avs_read = mem_req_valid && !mem_req_rw && !rsp_queue_going_full;
assign avs_write = mem_req_valid && mem_req_rw && !rsp_queue_going_full;
assign avs_address = mem_req_addr;
assign avs_byteenable = mem_req_byteen;
assign avs_writedata = mem_req_data;
assign avs_burstcount = avs_burstcount_r;
assign avs_bankselect = avs_bankselect_r;
assign mem_req_ready = !avs_waitrequest && !rsp_queue_going_full;
assign mem_rsp_valid = !avs_rspq_empty;
`ifdef DBG_PRINT_AVS
always @(posedge clk) begin
if (mem_req_valid && mem_req_ready) begin
if (mem_req_rw)
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, mem_req_data);
else
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, rsp_queue_size);
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `TO_FULL_ADDR(mem_req_addr), mem_req_byteen, mem_req_tag, req_queue_size);
end
if (mem_rsp_valid && mem_rsp_ready) begin
$display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, mem_rsp_tag, mem_rsp_data, rsp_queue_size);
$display("%t: AVS Rd Rsp: tag=%0h, data=%0h, pending=%0d", $time, mem_rsp_tag, mem_rsp_data, req_queue_size);
end
end
`endif

View File

@@ -77,30 +77,28 @@ module ccip_std_afu #(
// User AFU goes here
// ====================================================================
//
// vortex_afu depends on CCI-P and local memory being in the same
// clock domain. This is accomplished by choosing a common clock
// in the AFU's JSON description. The platform instantiates clock-
// crossing shims automatically, as needed.
//
t_local_mem_byte_mask avs_byteenable [NUM_LOCAL_MEM_BANKS];
logic avs_waitrequest [NUM_LOCAL_MEM_BANKS];
t_local_mem_data avs_readdata [NUM_LOCAL_MEM_BANKS];
logic avs_readdatavalid [NUM_LOCAL_MEM_BANKS];
t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS];
t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS];
t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS];
logic avs_write [NUM_LOCAL_MEM_BANKS];
logic avs_read [NUM_LOCAL_MEM_BANKS];
//
// Memory banks are used very simply here. Only bank is active at
// a time, selected by mem_bank_select. mem_bank_select is set
// by a CSR from the host.
//
t_local_mem_byte_mask avs_byteenable;
logic avs_waitrequest;
t_local_mem_data avs_readdata;
logic avs_readdatavalid;
t_local_mem_burst_cnt avs_burstcount;
t_local_mem_data avs_writedata;
t_local_mem_addr avs_address;
logic avs_write;
logic avs_read;
// choose which memory bank to test
logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select;
for (genvar b = 0; b < NUM_LOCAL_MEM_BANKS; b++) begin
assign local_mem[b].burstcount = avs_burstcount[b];
assign local_mem[b].writedata = avs_writedata[b];
assign local_mem[b].address = avs_address[b];
assign local_mem[b].byteenable = avs_byteenable[b];
assign local_mem[b].write = avs_write[b];
assign local_mem[b].read = avs_read[b];
assign avs_waitrequest[b] = local_mem[b].waitrequest;
assign avs_readdata[b] = local_mem[b].readdata;
assign avs_readdatavalid[b] = local_mem[b].readdatavalid;
end
vortex_afu #(
.NUM_LOCAL_MEM_BANKS(NUM_LOCAL_MEM_BANKS)
@@ -108,6 +106,9 @@ module ccip_std_afu #(
.clk (clk),
.reset (reset_T1),
.cp2af_sRxPort (cp2af_sRx_T1),
.af2cp_sTxPort (af2cp_sTx_T0),
.avs_writedata (avs_writedata),
.avs_readdata (avs_readdata),
.avs_address (avs_address),
@@ -116,52 +117,7 @@ module ccip_std_afu #(
.avs_read (avs_read),
.avs_byteenable (avs_byteenable),
.avs_burstcount (avs_burstcount),
.avs_readdatavalid (avs_readdatavalid),
.mem_bank_select (mem_bank_select),
.cp2af_sRxPort (cp2af_sRx_T1),
.af2cp_sTxPort (af2cp_sTx_T0)
);
//
// Export the local memory interface signals as vectors so that bank
// selection can use array syntax.
//
logic avs_waitrequest_v[NUM_LOCAL_MEM_BANKS];
t_local_mem_data avs_readdata_v[NUM_LOCAL_MEM_BANKS];
logic avs_readdatavalid_v[NUM_LOCAL_MEM_BANKS];
genvar b;
generate
for (b = 0; b < NUM_LOCAL_MEM_BANKS; b = b + 1)
begin : lmb
always_comb
begin
// Local memory to AFU signals
avs_waitrequest_v[b] = local_mem[b].waitrequest;
avs_readdata_v[b] = local_mem[b].readdata;
avs_readdatavalid_v[b] = local_mem[b].readdatavalid;
// Replicate address and write data to all banks. Only
// the request signals have to be bank-specific.
local_mem[b].burstcount = avs_burstcount;
local_mem[b].writedata = avs_writedata;
local_mem[b].address = avs_address;
local_mem[b].byteenable = avs_byteenable;
// Request a write to this bank?
local_mem[b].write = avs_write &&
($bits(mem_bank_select)'(b) == mem_bank_select);
// Request a read from this bank?
local_mem[b].read = avs_read &&
($bits(mem_bank_select)'(b) == mem_bank_select);
end
end
endgenerate
assign avs_waitrequest = avs_waitrequest_v[mem_bank_select];
assign avs_readdata = avs_readdata_v[mem_bank_select];
assign avs_readdatavalid = avs_readdatavalid_v[mem_bank_select];
.avs_readdatavalid (avs_readdatavalid)
);
endmodule

View File

@@ -26,17 +26,15 @@ module vortex_afu #(
output t_if_ccip_Tx af2cp_sTxPort,
// Avalon signals for local memory access
output t_local_mem_data avs_writedata,
input t_local_mem_data avs_readdata,
output t_local_mem_addr avs_address,
input logic avs_waitrequest,
output logic avs_write,
output logic avs_read,
output t_local_mem_byte_mask avs_byteenable,
output t_local_mem_burst_cnt avs_burstcount,
input avs_readdatavalid,
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
output t_local_mem_data avs_writedata [NUM_LOCAL_MEM_BANKS],
input t_local_mem_data avs_readdata [NUM_LOCAL_MEM_BANKS],
output t_local_mem_addr avs_address [NUM_LOCAL_MEM_BANKS],
input logic avs_waitrequest [NUM_LOCAL_MEM_BANKS],
output logic avs_write [NUM_LOCAL_MEM_BANKS],
output logic avs_read [NUM_LOCAL_MEM_BANKS],
output t_local_mem_byte_mask avs_byteenable [NUM_LOCAL_MEM_BANKS],
output t_local_mem_burst_cnt avs_burstcount [NUM_LOCAL_MEM_BANKS],
input avs_readdatavalid [NUM_LOCAL_MEM_BANKS]
);
localparam RESET_DELAY = 3;
@@ -636,6 +634,7 @@ VX_mem_arb #(
//--
VX_avs_wrapper #(
.NUM_BANKS (NUM_LOCAL_MEM_BANKS),
.AVS_DATA_WIDTH (LMEM_LINE_WIDTH),
.AVS_ADDR_WIDTH (LMEM_ADDR_WIDTH),
.AVS_BURST_WIDTH (LMEM_BURST_CTRW),
@@ -670,8 +669,7 @@ VX_avs_wrapper #(
.avs_read (avs_read),
.avs_byteenable (avs_byteenable),
.avs_burstcount (avs_burstcount),
.avs_readdatavalid(avs_readdatavalid),
.avs_bankselect (mem_bank_select)
.avs_readdatavalid(avs_readdatavalid)
);
// CCI-P Read Request ///////////////////////////////////////////////////////////

View File

@@ -1,19 +0,0 @@
`include "VX_platform.vh"
module VX_tex_mgr (
input wire clk,
input wire reset
);
//--
endmodule

View File

@@ -1,50 +0,0 @@
`include "VX_platform.vh"
module VX_tex_unit #(
parameter TADDRW = 32,
parameter MADDRW = 32,
parameter DATAW = 32,
parameter MAXWTW = 8,
parameter MAXHTW = 8,
parameter MAXFTW = 2,
parameter MAXFMW = 1,
parameter MAXAMW = 2,
parameter TAGW = 16,
parameter NUMCRQS = 32
) (
input wire clk,
input wire reset,
// Texture Request
input wire tex_req_valid,
input wire [TADDRW-1:0] tex_req_u,
input wire [TADDRW-1:0] tex_req_v,
input wire [MADDRW-1:0] tex_req_addr,
input wire [MAXWTW-1:0] tex_req_width,
input wire [MAXHTW-1:0] tex_req_height,
input wire [MAXFTW-1:0] tex_req_format,
input wire [MAXFMW-1:0] tex_req_filter,
input wire [MAXAMW-1:0] tex_req_clamp,
input wire [TAGW-1:0] tex_req_tag,
output wire tex_req_ready,
// Texture Response
output wire tex_rsp_valid,
output wire [TAGW-1:0] tex_rsp_tag,
input wire [DATAW-1:0] tex_rsp_data,
input wire tex_rsp_ready,
// Cache Request
output wire [NUMCRQS-1:0] cache_req_valids,
output wire [NUMCRQS-1:0][MADDRW-1:0] cache_req_addrs,
input wire cache_req_ready,
// Cache Response
input wire cache_rsp_valid,
input wire [MADDRW-1:0] cache_rsp_addr,
input wire [DATAW-1:0] cache_rsp_data,
output wire cache_rsp_ready
);
endmodule

View File

@@ -1,6 +1,6 @@
ASE_BUILD_DIR ?= build_ase
FPGA_BUILD_DIR ?= build_fpga
DEVICE_FAMILY ?= arria10
ASE_BUILD_DIR ?= build_ase_$(DEVICE_FAMILY)
FPGA_BUILD_DIR ?= build_fpga_$(DEVICE_FAMILY)
RTL_DIR=../../rtl
ifeq ($(shell which qsub-synth),)

View File

@@ -3,18 +3,18 @@ BUILDIR ?= build
.PHONY: unittest pipeline cache core vortex top1 top2 top4 top8 top16 top32 top64
unittest:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p unittest/$(BUILDIR)
cp core/Makefile unittest/$(BUILDIR)
$(MAKE) -C unittest/$(BUILDIR) clean && $(MAKE) -C unittest/$(BUILDIR) > unittest//$(BUILDIR)build.log 2>&1 &
pipeline:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p pipeline/$(BUILDIR)
cp core/Makefile pipeline/$(BUILDIR)
$(MAKE) -C pipeline/$(BUILDIR) clean && $(MAKE) -C pipeline/$(BUILDIR) > pipeline/$(BUILDIR)/build.log 2>&1 &
cache:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p cache/$(BUILDIR)
cp core/Makefile cache/$(BUILDIR)
$(MAKE) -C cache/$(BUILDIR) clean && $(MAKE) -C cache/$(BUILDIR) > cache/$(BUILDIR)/build.log 2>&1 &
core:
@@ -23,41 +23,41 @@ core:
$(MAKE) -C core/$(BUILDIR) clean && $(MAKE) -C core/$(BUILDIR) > core/$(BUILDIR)/build.log 2>&1 &
vortex:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p vortex/$(BUILDIR)
cp core/Makefile vortex/$(BUILDIR)
$(MAKE) -C vortex/$(BUILDIR) clean && $(MAKE) -C vortex/$(BUILDIR) > vortex/$(BUILDIR)/build.log 2>&1 &
top1:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top1/$(BUILDIR)
cp core/Makefile top1/$(BUILDIR)
$(MAKE) -C top1/$(BUILDIR) clean && $(MAKE) -C top1/$(BUILDIR) > top1/$(BUILDIR)/build.log 2>&1 &
top2:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top2/$(BUILDIR)
cp core/Makefile top2/$(BUILDIR)
$(MAKE) -C top2/$(BUILDIR) clean && $(MAKE) -C top2/$(BUILDIR) > top2/$(BUILDIR)/build.log 2>&1 &
top4:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top4/$(BUILDIR)
cp core/Makefile top4/$(BUILDIR)
$(MAKE) -C top4/$(BUILDIR) clean && $(MAKE) -C top4/$(BUILDIR) > top4/$(BUILDIR)/build.log 2>&1 &
top8:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top8/$(BUILDIR)
cp core/Makefile top8/$(BUILDIR)
$(MAKE) -C top8/$(BUILDIR) clean && $(MAKE) -C top8/$(BUILDIR) > top8/$(BUILDIR)/build.log 2>&1 &
top16:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top16/$(BUILDIR)
cp core/Makefile top16/$(BUILDIR)
$(MAKE) -C top16/$(BUILDIR) clean && $(MAKE) -C top16/$(BUILDIR) > top16/$(BUILDIR)build.log 2>&1 &
top32:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top32/$(BUILDIR)
cp core/Makefile top32/$(BUILDIR)
$(MAKE) -C top32/$(BUILDIR) clean && $(MAKE) -C top32/$(BUILDIR) > top32/$(BUILDIR)/build.log 2>&1 &
top64:
mkdir -p core/$(BUILDIR)
cp core/Makefile core/$(BUILDIR)
mkdir -p top64/$(BUILDIR)
cp core/Makefile top64/$(BUILDIR)
$(MAKE) -C top64/$(BUILDIR) clean && $(MAKE) -C top64/$(BUILDIR) > top64/$(BUILDIR)/build.log 2>&1 &