opae rtl fixes

This commit is contained in:
Blaise Tine
2020-05-31 14:51:42 -07:00
parent 6a3b237054
commit 16d5a8a09c
24 changed files with 547 additions and 403 deletions

View File

@@ -2,7 +2,7 @@
ASE_BUILD_DIR=build_ase
FPGA_BUILD_DIR=build_fpga
all: ase ase-1c fpga fpga-1c
all: ase
ase: setup-ase
make -C $(ASE_BUILD_DIR)

View File

@@ -51,12 +51,14 @@ make run-fpga
#
## ASE build instructions
#
source /export/fpga/bin/setup-fpga-env fpga-pac-a10
# Acquire a sever node for running ASE simulations
qsub-sim
# build
make ase
# tests
./run_ase.sh build_ase ../../driver/tests/basic/basic
./run_ase.sh build_ase ../../driver/tests/demo/demo

View File

@@ -1,9 +1,7 @@
vortex_afu.json
+define+GLOBAL_BLOCK_SIZE=64
+define+DCACHE_SIZE=2048
+define+ICACHE_SIZE=1024
+define+DCACHE_SIZE=4096
+define+ICACHE_SIZE=2048
+define+SCACHE_SIZE=1024
+define+NUM_CORES=2
@@ -11,20 +9,20 @@ vortex_afu.json
+define+NUM_THREADS=4
+define+DNUM_BANKS=4
+define+INUM_BANKS=2
+define+INUM_BANKS=1
+define+SNUM_BANKS=4
+define+DDFPQ_SIZE=16
+define+IDFPQ_SIZE=16
+define+SDFPQ_SIZE=0
#+define+DBG_PRINT_CORE_ICACHE
#+define+DBG_PRINT_CORE_DCACHE
#+define+DBG_PRINT_CACHE_BANK
#+define+DBG_PRINT_CACHE_SNP
#+define+DBG_PRINT_CACHE_MSRQ
#+define+DBG_PRINT_DRAM
#+define+DBG_PRINT_OPAE
+define+DBG_PRINT_CORE_ICACHE
+define+DBG_PRINT_CORE_DCACHE
+define+DBG_PRINT_CACHE_BANK
+define+DBG_PRINT_CACHE_SNP
+define+DBG_PRINT_CACHE_MSRQ
+define+DBG_PRINT_DRAM
+define+DBG_PRINT_OPAE
+incdir+.
+incdir+../rtl
@@ -81,6 +79,7 @@ vortex_afu.json
../rtl/libs/VX_generic_priority_encoder.v
../rtl/libs/VX_priority_encoder.v
../rtl/libs/VX_generic_queue.v
../rtl/libs/VX_indexable_queue.v
../rtl/libs/VX_countones.v
../rtl/Vortex_Socket.v

View File

@@ -3,7 +3,7 @@ import local_mem_cfg_pkg::*;
`include "afu_json_info.vh"
`include "VX_define.vh"
`define DRAM_TO_BYTE_ADDR(x) {x, 6'b0}
`define VX_TO_DRAM_ADDR(x) x[`VX_DRAM_ADDR_WIDTH-1:(`VX_DRAM_ADDR_WIDTH-DRAM_ADDR_WIDTH)]
module vortex_afu #(
parameter NUM_LOCAL_MEM_BANKS = 2
@@ -30,17 +30,17 @@ module vortex_afu #(
output logic [$clog2(NUM_LOCAL_MEM_BANKS)-1:0] mem_bank_select
);
localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr);
localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data);
localparam DRAM_TAG_WIDTH = `L3DRAM_TAG_WIDTH;
localparam DRAM_ADDR_WIDTH = $bits(t_local_mem_addr);
localparam DRAM_LINE_WIDTH = $bits(t_local_mem_data);
`STATIC_ASSERT(DRAM_ADDR_WIDTH == `L3DRAM_ADDR_WIDTH, "invalid vortex dram bus!")
`STATIC_ASSERT(DRAM_LINE_WIDTH == `L3DRAM_LINE_WIDTH, "invalid vortex dram bus!")
localparam DRAM_LINE_LW = $clog2(DRAM_LINE_WIDTH);
localparam VX_DRAM_LINE_LW = $clog2(`VX_DRAM_LINE_WIDTH);
localparam AVS_RD_QUEUE_SIZE = 16;
localparam CCI_RD_WINDOW_SIZE = 8;
localparam CCI_RD_QUEUE_SIZE = 2 * CCI_RD_WINDOW_SIZE;
localparam CCI_RW_QUEUE_SIZE = 1024;
localparam AFU_ID_L = 16'h0002; // AFU ID Lower
localparam AFU_ID_H = 16'h0004; // AFU ID Higher
@@ -67,32 +67,33 @@ typedef enum logic[3:0] {
STATE_CLFLUSH
} state_t;
typedef logic [`LOG2UP(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag;
typedef logic [$clog2(CCI_RD_WINDOW_SIZE)-1:0] t_cci_rdq_tag;
typedef logic [$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:0] t_cci_rdq_data;
state_t state;
// Vortex ports ///////////////////////////////////////////////////////////////
logic vx_dram_req_read;
logic vx_dram_req_write;
logic [DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
logic [DRAM_LINE_WIDTH-1:0] vx_dram_req_data;
logic [DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
logic vx_dram_req_valid;
logic vx_dram_req_rw;
logic [`VX_DRAM_BYTEEN_WIDTH-1:0] vx_dram_req_byteen;
logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_dram_req_addr;
logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_req_data;
logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_req_tag;
logic vx_dram_req_ready;
logic vx_dram_rsp_valid;
logic [DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data;
logic [DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
logic [`VX_DRAM_LINE_WIDTH-1:0] vx_dram_rsp_data;
logic [`VX_DRAM_TAG_WIDTH-1:0] vx_dram_rsp_tag;
logic vx_dram_rsp_ready;
logic vx_snp_req_valid;
logic [DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr;
logic [0:0] vx_snp_req_tag;
logic [`VX_DRAM_ADDR_WIDTH-1:0] vx_snp_req_addr;
logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_req_tag;
logic vx_snp_req_ready;
logic vx_snp_rsp_valid;
logic [0:0] vx_snp_rsp_addr;
logic [`VX_SNP_TAG_WIDTH-1:0] vx_snp_rsp_tag;
logic vx_snp_rsp_ready;
logic vx_busy;
@@ -100,14 +101,11 @@ logic vx_busy;
// AVS Queues /////////////////////////////////////////////////////////////////
logic avs_rtq_push;
logic [DRAM_TAG_WIDTH-1:0] avs_rtq_din;
logic avs_rtq_pop;
logic [DRAM_TAG_WIDTH-1:0] avs_rtq_dout;
logic avs_rtq_empty;
logic avs_rtq_full;
logic avs_rdq_push;
t_local_mem_data avs_rdq_din;
logic avs_rdq_pop;
t_local_mem_data avs_rdq_dout;
logic avs_rdq_empty;
@@ -118,16 +116,11 @@ logic avs_rdq_full;
logic [2:0] csr_cmd;
t_ccip_clAddr csr_io_addr;
t_local_mem_addr csr_mem_addr;
logic [DRAM_ADDR_WIDTH-1:0] csr_data_size;
t_ccip_clAddr csr_data_size;
// MMIO controller ////////////////////////////////////////////////////////////
t_ccip_c0_ReqMmioHdr mmioHdr;
always_comb
begin
mmioHdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
end
t_ccip_c0_ReqMmioHdr mmioHdr = t_ccip_c0_ReqMmioHdr'(cp2af_sRxPort.c0.hdr);
always_ff @(posedge clk)
begin
@@ -151,27 +144,27 @@ begin
case (mmioHdr.address)
MMIO_CSR_IO_ADDR: begin
csr_io_addr <= t_ccip_clAddr'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
$display("%t: CSR_IO_ADDR: 0x%0h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
`endif
`ifdef DBG_PRINT_OPAE
$display("%t: CSR_IO_ADDR: 0x%0h", $time, t_ccip_clAddr'(cp2af_sRxPort.c0.data));
`endif
end
MMIO_CSR_MEM_ADDR: begin
csr_mem_addr <= t_local_mem_addr'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
$display("%t: CSR_MEM_ADDR: 0x%0h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data));
`endif
`ifdef DBG_PRINT_OPAE
$display("%t: CSR_MEM_ADDR: 0x%0h", $time, t_local_mem_addr'(cp2af_sRxPort.c0.data));
`endif
end
MMIO_CSR_DATA_SIZE: begin
csr_data_size <= $bits(csr_data_size)'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
$display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'(cp2af_sRxPort.c0.data));
`endif
`ifdef DBG_PRINT_OPAE
$display("%t: CSR_DATA_SIZE: %0d", $time, $bits(csr_data_size)'(cp2af_sRxPort.c0.data));
`endif
end
MMIO_CSR_CMD: begin
csr_cmd <= $bits(csr_cmd)'(cp2af_sRxPort.c0.data);
`ifdef DBG_PRINT_OPAE
$display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data));
`endif
`ifdef DBG_PRINT_OPAE
$display("%t: CSR_CMD: %0d", $time, $bits(csr_cmd)'(cp2af_sRxPort.c0.data));
`endif
end
default: begin
// user-defined CSRs
@@ -202,11 +195,11 @@ begin
16'h0006: af2cp_sTxPort.c2.data <= 64'h0; // next AFU
16'h0008: af2cp_sTxPort.c2.data <= 64'h0; // reserved
MMIO_CSR_STATUS: begin
`ifdef DBG_PRINT_OPAE
if (state != af2cp_sTxPort.c2.data) begin
$display("%t: STATUS: state=%0d", $time, state);
end
`endif
`ifdef DBG_PRINT_OPAE
if (state != af2cp_sTxPort.c2.data) begin
$display("%t: STATUS: state=%0d", $time, state);
end
`endif
af2cp_sTxPort.c2.data <= state;
end
default: af2cp_sTxPort.c2.data <= 64'h0;
@@ -218,20 +211,16 @@ end
// COMMAND FSM ////////////////////////////////////////////////////////////////
logic [DRAM_ADDR_WIDTH-1:0] cci_write_ctr;
logic [DRAM_ADDR_WIDTH-1:0] avs_read_ctr;
logic [DRAM_ADDR_WIDTH-1:0] avs_write_ctr;
t_ccip_clAddr cci_wr_req_ctr;
logic [DRAM_ADDR_WIDTH-1:0] avs_rd_req_ctr;
logic [DRAM_ADDR_WIDTH-1:0] avs_wr_req_ctr;
logic vx_reset;
logic cmd_read_done;
logic cmd_write_done;
logic cmd_run_done;
logic cmd_clflush_done;
always_comb
begin
cmd_run_done = !vx_busy;
end
logic cmd_run_done = !vx_busy;
always_ff @(posedge clk)
begin
@@ -247,28 +236,28 @@ begin
STATE_IDLE: begin
case (csr_cmd)
CMD_TYPE_READ: begin
`ifdef DBG_PRINT_OPAE
$display("%t: STATE READ: ia=%0h da=%0h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
`endif
`ifdef DBG_PRINT_OPAE
$display("%t: STATE READ: ia=%0h da=%0h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
`endif
state <= STATE_READ;
end
CMD_TYPE_WRITE: begin
`ifdef DBG_PRINT_OPAE
$display("%t: STATE WRITE: ia=%0h da=%0h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
`endif
`ifdef DBG_PRINT_OPAE
$display("%t: STATE WRITE: ia=%0h da=%0h sz=%0d", $time, csr_io_addr, csr_mem_addr, csr_data_size);
`endif
state <= STATE_WRITE;
end
CMD_TYPE_RUN: begin
`ifdef DBG_PRINT_OPAE
$display("%t: STATE START", $time);
`endif
`ifdef DBG_PRINT_OPAE
$display("%t: STATE START", $time);
`endif
vx_reset <= 1;
state <= STATE_START;
end
CMD_TYPE_CLFLUSH: begin
`ifdef DBG_PRINT_OPAE
$display("%t: STATE CFLUSH: da=%0h sz=%0d", $time, csr_mem_addr, csr_data_size);
`endif
`ifdef DBG_PRINT_OPAE
$display("%t: STATE CFLUSH: da=%0h sz=%0d", $time, csr_mem_addr, csr_data_size);
`endif
state <= STATE_CLFLUSH;
end
endcase
@@ -311,116 +300,132 @@ end
logic vortex_enabled;
logic cci_rdq_empty;
t_cci_rdq_data cci_rdq_dout;
logic cci_rdq_pop;
logic cci_dram_req_read_fire;
logic cci_dram_req_write_fire;
logic vx_dram_req_read_fire;
logic vx_dram_req_write_fire;
logic vx_dram_rsp_fire;
logic [`LOG2UP(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads, avs_pending_reads_next;
t_ccip_clAddr next_avs_address;
always_comb
begin
vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
logic cci_dram_rd_req_fire;
logic cci_dram_wr_req_fire;
logic vx_dram_rd_req_fire;
logic vx_dram_wr_req_fire;
logic vx_dram_rd_rsp_fire;
next_avs_address = csr_mem_addr + {avs_write_ctr[DRAM_ADDR_WIDTH-1:$bits(t_cci_rdq_tag)], t_cci_rdq_tag'(cci_rdq_dout)};
t_local_mem_byte_mask vx_dram_req_byteen_;
logic [$clog2(AVS_RD_QUEUE_SIZE+1)-1:0] avs_pending_reads, avs_pending_reads_next;
logic [DRAM_LINE_LW-1:0] vx_dram_req_offset, vx_dram_rsp_offset;
logic [DRAM_ADDR_WIDTH-1:0] cci_dram_rd_req_addr, cci_dram_wr_req_addr;
cci_rdq_pop = (state == STATE_WRITE
&& !cci_rdq_empty
&& !avs_waitrequest
&& avs_write_ctr < csr_data_size);
logic cci_dram_rd_req_enable, cci_dram_wr_req_enable;
logic vx_dram_req_enable, vx_dram_rd_req_enable, vx_dram_wr_req_enable;
cci_dram_req_read_fire = (state == STATE_READ)
&& (avs_pending_reads < AVS_RD_QUEUE_SIZE)
&& !avs_waitrequest
&& avs_read_ctr < csr_data_size;
assign vortex_enabled = (STATE_RUN == state) || (STATE_CLFLUSH == state);
cci_dram_req_write_fire = (state == STATE_WRITE)
&& cci_rdq_pop;
assign cci_dram_rd_req_enable = (state == STATE_READ)
&& (avs_pending_reads < AVS_RD_QUEUE_SIZE)
&& (avs_rd_req_ctr != 0);
vx_dram_req_read_fire = vx_dram_req_read && vx_dram_req_ready;
assign cci_dram_wr_req_enable = (state == STATE_WRITE)
&& !cci_rdq_empty
&& (avs_wr_req_ctr != 0);
vx_dram_req_write_fire = vx_dram_req_write && vx_dram_req_ready;
assign vx_dram_req_enable = vortex_enabled && (avs_pending_reads < AVS_RD_QUEUE_SIZE);
assign vx_dram_rd_req_enable = vx_dram_req_enable && vx_dram_req_valid && ~vx_dram_req_rw;
assign vx_dram_wr_req_enable = vx_dram_req_enable && vx_dram_req_valid && vx_dram_req_rw;
vx_dram_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready;
assign cci_dram_rd_req_fire = cci_dram_rd_req_enable && ~avs_waitrequest;
assign cci_dram_wr_req_fire = cci_dram_wr_req_enable && ~avs_waitrequest;
if ((cci_dram_req_read_fire || vx_dram_req_read_fire)
&& ~avs_rdq_pop) begin
avs_pending_reads_next = avs_pending_reads + 1;
end else
if (~(cci_dram_req_read_fire || vx_dram_req_read_fire)
&& avs_rdq_pop) begin
avs_pending_reads_next = avs_pending_reads - 1;
end else begin
avs_pending_reads_next = avs_pending_reads;
end
assign vx_dram_rd_req_fire = vx_dram_rd_req_enable && ~avs_waitrequest;
assign vx_dram_wr_req_fire = vx_dram_wr_req_enable && ~avs_waitrequest;
cmd_write_done = (avs_write_ctr >= csr_data_size);
assign vx_dram_rd_rsp_fire = vx_dram_rsp_valid && vx_dram_rsp_ready;
assign avs_pending_reads_next = avs_pending_reads
+ ((cci_dram_rd_req_fire || vx_dram_rd_req_fire) && ~avs_rdq_pop) ? 1 :
(~(cci_dram_rd_req_fire || vx_dram_rd_req_fire) && avs_rdq_pop) ? -1 : 0;
assign cmd_write_done = (0 == avs_wr_req_ctr);
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
assign vx_dram_req_offset = {{VX_DRAM_LINE_LW{1'b0}}, vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0]} << VX_DRAM_LINE_LW;
assign vx_dram_req_byteen_ = vx_dram_req_byteen << ({(VX_DRAM_LINE_LW - 3)'(0), vx_dram_req_addr[(DRAM_LINE_LW-VX_DRAM_LINE_LW)-1:0]} << (VX_DRAM_LINE_LW - 3));
end else begin
assign vx_dram_req_offset = 0;
assign vx_dram_req_byteen_ = 64'hffffffffffffffff;
end
always_comb
begin
case (state)
CMD_TYPE_READ: avs_address = cci_dram_rd_req_addr;
CMD_TYPE_WRITE: avs_address = cci_dram_wr_req_addr;
default: avs_address = `VX_TO_DRAM_ADDR(vx_dram_req_addr);
endcase
case (state)
CMD_TYPE_READ: avs_byteenable = 64'hffffffffffffffff;
CMD_TYPE_WRITE: avs_byteenable = 64'hffffffffffffffff;
default: avs_byteenable = vx_dram_req_byteen_;
endcase
case (state)
CMD_TYPE_WRITE: avs_writedata = cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)];
default: avs_writedata = vx_dram_req_data << vx_dram_req_offset;
endcase
end
assign avs_read = cci_dram_rd_req_enable || vx_dram_rd_req_enable;
assign avs_write = cci_dram_wr_req_enable || vx_dram_wr_req_enable;
always_ff @(posedge clk)
begin
if (SoftReset)
begin
mem_bank_select <= 0;
avs_burstcount <= 1;
avs_byteenable <= 64'hffffffffffffffff;
avs_read <= 0;
avs_write <= 0;
avs_read_ctr <= 0;
avs_write_ctr <= 0;
avs_pending_reads <= 0;
mem_bank_select <= 0;
avs_burstcount <= 1;
avs_rd_req_ctr <= 0;
avs_wr_req_ctr <= 0;
avs_pending_reads <= 0;
cci_dram_rd_req_addr <= 0;
cci_dram_wr_req_addr <= 0;
end
else begin
avs_read <= 0;
avs_write <= 0;
if (state == STATE_IDLE) begin
avs_read_ctr <= 0;
avs_write_ctr <= 0;
end
if (cci_dram_req_read_fire) begin
avs_address <= csr_mem_addr + avs_read_ctr;
avs_read_ctr <= avs_read_ctr + 1;
avs_read <= 1;
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Rd Req: addr=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(csr_mem_addr + avs_read_ctr), avs_pending_reads);
`endif
end
if (cci_dram_req_write_fire) begin
avs_writedata <= cci_rdq_dout[$bits(t_ccip_clData) + $bits(t_cci_rdq_tag)-1:$bits(t_cci_rdq_tag)];
avs_address <= next_avs_address;
avs_write_ctr <= avs_write_ctr + 1;
avs_write <= 1;
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Wr Req: addr=%0h (%0d/%0d)", $time, `DRAM_TO_BYTE_ADDR(next_avs_address), avs_write_ctr + 1, csr_data_size);
`endif
end
if (vx_dram_req_read_fire) begin
avs_address <= vx_dram_req_addr;
avs_read <= 1;
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Rd Req: addr=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_dram_req_addr), avs_pending_reads);
`endif
end
if (vx_dram_req_write_fire) begin
avs_address <= vx_dram_req_addr;
avs_writedata <= vx_dram_req_data;
avs_write <= 1;
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Wr Req: addr=%0h", $time, `DRAM_TO_BYTE_ADDR(vx_dram_req_addr));
`endif
end
if (state == STATE_IDLE) begin
if (CMD_TYPE_READ == csr_cmd) begin
cci_dram_rd_req_addr <= csr_mem_addr;
avs_rd_req_ctr <= csr_data_size;
end
else if (CMD_TYPE_WRITE == csr_cmd) begin
cci_dram_wr_req_addr <= csr_mem_addr;
avs_wr_req_ctr <= csr_data_size;
end
end
if (cci_dram_rd_req_fire) begin
cci_dram_rd_req_addr <= cci_dram_rd_req_addr + 1;
avs_rd_req_ctr <= avs_rd_req_ctr - 1;
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), (avs_rd_req_ctr - 1), avs_pending_reads_next);
`endif
end
if (cci_dram_wr_req_fire) begin
cci_dram_wr_req_addr <= ((cci_dram_wr_req_addr + 1) & ~(CCI_RD_WINDOW_SIZE-1)) | t_cci_rdq_tag'(cci_rdq_dout);
avs_wr_req_ctr <= avs_wr_req_ctr - 1;
`ifdef DBG_PRINT_OPAE
$display("%t: AVS Wr Req: addr=%0h, data=%0h, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_writedata, (avs_wr_req_ctr - 1));
`endif
end
`ifdef DBG_PRINT_OPAE
if (vx_dram_rd_req_fire) begin
$display("%t: AVS Rd Req: addr=%0h, byteen=%0h, tag=%0h, pending=%0d", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, vx_dram_req_tag, avs_pending_reads_next);
end
if (vx_dram_wr_req_fire) begin
$display("%t: AVS Wr Req: addr=%0h, byteen=%0h, tag=%0h, data=%0h", $time, `DRAM_TO_BYTE_ADDR(avs_address), avs_byteenable, vx_dram_req_tag, avs_writedata);
end
if (avs_readdatavalid) begin
$display("%t: AVS Rd Rsp: pending=%0d", $time, avs_pending_reads_next);
$display("%t: AVS Rd Rsp: data=%0h, pending=%0d", $time, avs_readdata, avs_pending_reads_next);
end
`endif
@@ -430,55 +435,42 @@ end
// Vortex DRAM requests
always_comb
begin
vx_dram_req_ready = vortex_enabled
&& !avs_waitrequest
&& (avs_pending_reads < AVS_RD_QUEUE_SIZE);
end
assign vx_dram_req_ready = vx_dram_req_enable && !avs_waitrequest;
// Vortex DRAM fill response
always_comb
begin
vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty;
vx_dram_rsp_tag = avs_rtq_dout;
vx_dram_rsp_data = avs_rdq_dout;
assign vx_dram_rsp_valid = vortex_enabled && !avs_rdq_empty;
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
assign vx_dram_rsp_data = (avs_rdq_dout >> vx_dram_rsp_offset);
end else begin
assign vx_dram_rsp_data = avs_rdq_dout;
end
// AVS address read request queue /////////////////////////////////////////////
logic cci_wr_req;
always_comb
begin
avs_rtq_push = vx_dram_req_read_fire;
avs_rtq_din = vx_dram_req_tag;
avs_rtq_pop = vx_dram_rsp_fire;
end
assign avs_rtq_push = vx_dram_rd_req_fire;
assign avs_rtq_pop = vx_dram_rd_rsp_fire;
VX_generic_queue #(
.DATAW(DRAM_TAG_WIDTH),
.DATAW(`VX_DRAM_TAG_WIDTH + DRAM_LINE_LW),
.SIZE(AVS_RD_QUEUE_SIZE)
) avs_rd_req_queue (
.clk (clk),
.reset (SoftReset),
.push (avs_rtq_push),
.data_in (avs_rtq_din),
.data_in ({vx_dram_req_tag, vx_dram_req_offset}),
.pop (avs_rtq_pop),
.data_out (avs_rtq_dout),
.data_out ({vx_dram_rsp_tag, vx_dram_rsp_offset}),
.empty (avs_rtq_empty),
.full (avs_rtq_full)
);
// AVS data read response queue ///////////////////////////////////////////////
always_comb
begin
avs_rdq_push = avs_readdatavalid;
avs_rdq_din = avs_readdata;
avs_rdq_pop = vx_dram_rsp_fire || cci_wr_req;
end
logic cci_wr_req_fire;
assign avs_rdq_push = avs_readdatavalid;
assign avs_rdq_pop = vx_dram_rd_rsp_fire || cci_wr_req_fire;
VX_generic_queue #(
.DATAW(DRAM_LINE_WIDTH),
@@ -487,81 +479,102 @@ VX_generic_queue #(
.clk (clk),
.reset (SoftReset),
.push (avs_rdq_push),
.data_in (avs_rdq_din),
.data_in (avs_readdata),
.pop (avs_rdq_pop),
.data_out (avs_rdq_dout),
.empty (avs_rdq_empty),
.full (avs_rdq_full)
);
// CCI Read Request ///////////////////////////////////////////////////////////
// CCI-P Read Request ///////////////////////////////////////////////////////////
t_ccip_c0_ReqMemHdr cci_read_hdr;
logic [$clog2(CCI_RD_QUEUE_SIZE+1)-1:0] cci_pending_reads, cci_pending_reads_next;
t_ccip_clAddr cci_rd_req_addr, cci_rd_req_ctr, cci_rd_req_ctr_next;
t_cci_rdq_tag cci_rd_rsp_ctr;
logic [DRAM_ADDR_WIDTH-1:0] cci_read_ctr;
t_cci_rdq_tag cci_rdq_ctr;
logic cci_rd_req_fire, cci_rd_rsp_fire;
logic cci_rd_req_enable, cci_rd_req_wait;
logic cci_rdq_full;
logic cci_rdq_push;
logic cci_rdq_full, cci_rdq_push, cci_rdq_pop;
t_cci_rdq_data cci_rdq_din;
logic cci_read_wait;
always_comb
begin
cci_read_hdr = t_ccip_c0_ReqMemHdr'(0);
cci_read_hdr.address = csr_io_addr + cci_read_ctr;
cci_read_hdr.mdata = t_cci_rdq_tag'(cci_read_ctr);
cci_rdq_push = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid;
cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)};
always_comb begin
af2cp_sTxPort.c0.hdr = t_ccip_c0_ReqMemHdr'(0);
af2cp_sTxPort.c0.hdr.address = cci_rd_req_addr;
af2cp_sTxPort.c0.hdr.mdata = t_cci_rdq_tag'(cci_rd_req_ctr);
end
assign cci_rd_req_fire = af2cp_sTxPort.c0.valid && !cp2af_sRxPort.c0TxAlmFull;
assign cci_rd_rsp_fire = (STATE_WRITE == state) && cp2af_sRxPort.c0.rspValid;
assign cci_rd_req_ctr_next = cci_rd_req_ctr + (cci_rd_req_fire ? 1 : 0);
assign cci_rdq_pop = cci_dram_wr_req_fire;
assign cci_rdq_push = cci_rd_rsp_fire;
assign cci_rdq_din = {cp2af_sRxPort.c0.data, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata)};
assign cci_pending_reads_next = cci_pending_reads
+ (cci_rd_req_fire && ~cci_rdq_pop) ? 1 :
(~cci_rd_req_fire && cci_rdq_pop) ? -1 : 0;
assign af2cp_sTxPort.c0.valid = cci_rd_req_enable && ~cci_rd_req_wait;
// Send read requests to CCI
always_ff @(posedge clk)
begin
if (SoftReset) begin
af2cp_sTxPort.c0.hdr <= 0;
af2cp_sTxPort.c0.valid <= 0;
cci_read_ctr <= 0;
cci_rdq_ctr <= 0;
cci_read_wait <= 0;
cci_rd_req_addr <= 0;
cci_rd_req_ctr <= 0;
cci_rd_rsp_ctr <= 0;
cci_pending_reads <= 0;
cci_rd_req_enable <= 0;
cci_rd_req_wait <= 0;
end
else begin
af2cp_sTxPort.c0.valid <= 0;
if (STATE_IDLE == state) begin
cci_read_ctr <= 0;
cci_rdq_ctr <= 0;
cci_read_wait <= 0;
if ((STATE_IDLE == state)
&& (CMD_TYPE_WRITE == csr_cmd)) begin
cci_rd_req_addr <= csr_io_addr;
cci_rd_req_ctr <= 0;
cci_rd_rsp_ctr <= 0;
cci_pending_reads <= 0;
cci_rd_req_enable <= (csr_data_size != 0);
cci_rd_req_wait <= 0;
end
if (STATE_WRITE == state
&& !cp2af_sRxPort.c0TxAlmFull // ensure read queue not full
&& !cci_rdq_full // ensure destination queue not full
&& !cci_read_wait // ensure the last batch has arrived
&& cci_read_ctr < csr_data_size) // ensure not done
begin
af2cp_sTxPort.c0.hdr <= cci_read_hdr;
af2cp_sTxPort.c0.valid <= 1;
cci_read_ctr <= cci_read_ctr + 1;
if (t_cci_rdq_tag'(cci_read_ctr) == (CCI_RD_WINDOW_SIZE-1)) begin
cci_read_wait <= 1; // end current request batch
cci_rd_req_enable <= (STATE_WRITE == state)
&& (cci_rd_req_ctr_next < csr_data_size)
&& (cci_pending_reads_next < CCI_RD_QUEUE_SIZE);
if (cci_rd_req_fire) begin
cci_rd_req_addr <= cci_rd_req_addr + 1;
cci_rd_req_ctr <= cci_rd_req_ctr_next;
if (t_cci_rdq_tag'(cci_rd_req_ctr) == (CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 1; // end current request batch
end
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Req: addr=%0h, ctr=%0d", $time, `DRAM_TO_BYTE_ADDR(cci_read_hdr.address), cci_read_ctr);
`endif
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_rd_req_addr, (csr_data_size - cci_rd_req_ctr_next), cci_pending_reads_next);
`endif
end
if (cci_rdq_push) begin
cci_rdq_ctr <= cci_rdq_ctr + 1;
if (cci_rdq_ctr == (CCI_RD_WINDOW_SIZE-1)) begin
cci_read_wait <= 0; // restart new request batch
if (cci_rd_rsp_fire) begin
cci_rd_rsp_ctr <= cci_rd_rsp_ctr + 1;
if (cci_rd_rsp_ctr == (CCI_RD_WINDOW_SIZE-1)) begin
cci_rd_req_wait <= 0; // restart new request batch
end
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata), cci_rdq_ctr);
`endif
end
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Rsp: idx=%0d, ctr=%0d", $time, t_cci_rdq_tag'(cp2af_sRxPort.c0.hdr.mdata), cci_rd_rsp_ctr);
`endif
end
if (cci_rdq_pop) begin
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Rd Queue Pop: pending=%0d", $time, cci_pending_reads_next);
`endif
end
cci_pending_reads <= cci_pending_reads_next;
end
end
@@ -579,67 +592,65 @@ VX_generic_queue #(
.full (cci_rdq_full)
);
// CCI Write Request //////////////////////////////////////////////////////////
// CCI-P Write Request //////////////////////////////////////////////////////////
t_ccip_c1_ReqMemHdr cci_write_hdr;
logic [$clog2(CCI_RW_QUEUE_SIZE+1)-1:0] cci_pending_writes, cci_pending_writes_next;
t_ccip_clAddr cci_wr_req_addr;
logic cci_wr_req_enable, cci_wr_rsp_fire;
logic [DRAM_ADDR_WIDTH:0] cci_pending_writes, cci_pending_writes_next;
always_comb begin
af2cp_sTxPort.c1.hdr = t_ccip_c1_ReqMemHdr'(0);
af2cp_sTxPort.c1.hdr.address = cci_wr_req_addr;
af2cp_sTxPort.c1.hdr.sop = 1; // single line write mode
af2cp_sTxPort.c1.data = t_ccip_clData'(avs_rdq_dout);
end
always_comb
begin
cci_wr_req = (STATE_READ == state)
&& !avs_rdq_empty
&& !cp2af_sRxPort.c1TxAlmFull
&& (cci_write_ctr < csr_data_size);
assign cci_wr_req_fire = af2cp_sTxPort.c1.valid && !cp2af_sRxPort.c1TxAlmFull;
assign cci_wr_rsp_fire = (STATE_READ == state) && cp2af_sRxPort.c1.rspValid;
if (cci_wr_req && ~cp2af_sRxPort.c1.rspValid) begin
cci_pending_writes_next = cci_pending_writes + 1;
end else
if (~cci_wr_req && cp2af_sRxPort.c1.rspValid) begin
cci_pending_writes_next = cci_pending_writes - 1;
end else begin
cci_pending_writes_next = cci_pending_writes;
end
assign cci_pending_writes_next = cci_pending_writes
+ (cci_wr_req_fire && ~cci_wr_rsp_fire) ? 1 :
(~cci_wr_req_fire && cci_wr_rsp_fire) ? -1 : 0;
cci_write_hdr = t_ccip_c1_ReqMemHdr'(0);
cci_write_hdr.address = csr_io_addr + cci_write_ctr;
cci_write_hdr.sop = 1; // single line write mode
assign cmd_read_done = (0 == cci_wr_req_ctr) && (0 == cci_pending_writes);
cmd_read_done = (cci_write_ctr >= csr_data_size) && (0 == cci_pending_writes);
end
assign af2cp_sTxPort.c1.valid = cci_wr_req_enable && ~avs_rdq_empty;
// Send write requests to CCI
always_ff @(posedge clk)
begin
if (SoftReset) begin
af2cp_sTxPort.c1.hdr <= 0;
af2cp_sTxPort.c1.data <= 0;
af2cp_sTxPort.c1.valid <= 0;
cci_write_ctr <= 0;
cci_pending_writes <= 0;
cci_wr_req_addr <= 0;
cci_wr_req_ctr <= 0;
cci_wr_req_enable <= 0;
cci_pending_writes <= 0;
end
else begin
af2cp_sTxPort.c1.valid <= 0;
if ((STATE_IDLE == state)
&& (CMD_TYPE_READ == csr_cmd)) begin
cci_wr_req_addr <= csr_io_addr;
cci_wr_req_ctr <= csr_data_size;
cci_pending_writes <= 0;
end
if (STATE_IDLE == state) begin
cci_write_ctr <= 0;
end
if (cci_wr_req) begin
af2cp_sTxPort.c1.hdr <= cci_write_hdr;
af2cp_sTxPort.c1.data <= t_ccip_clData'(avs_rdq_dout);
af2cp_sTxPort.c1.valid <= 1;
cci_write_ctr <= cci_write_ctr + 1;
`ifdef DBG_PRINT_OPAE
$display("%t: CCI Wr Req: addr=%0h (%0d/%0d)", $time, `DRAM_TO_BYTE_ADDR(cci_write_hdr.address), cci_write_ctr + 1, csr_data_size);
`endif
end
cci_wr_req_enable <= (STATE_READ == state)
&& (cci_pending_writes_next < CCI_RW_QUEUE_SIZE);
if (cci_wr_req_fire) begin
assert(cci_wr_req_ctr != 0);
cci_wr_req_addr <= cci_wr_req_addr + 1;
cci_wr_req_ctr <= cci_wr_req_ctr - 1;
`ifdef DBG_PRINT_OPAE
if (cp2af_sRxPort.c1.rspValid) begin
$display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes_next);
end
$display("%t: CCI Wr Req: addr=%0h, rem=%0d, pending=%0d", $time, cci_wr_req_addr, (cci_wr_req_ctr - 1), cci_pending_writes_next);
`endif
end
`ifdef DBG_PRINT_OPAE
if (cci_wr_rsp_fire) begin
$display("%t: CCI Wr Rsp: pending=%0d", $time, cci_pending_writes_next);
end
`endif
cci_pending_writes <= cci_pending_writes_next;
end
@@ -647,49 +658,72 @@ end
// Vortex cache snooping //////////////////////////////////////////////////////
logic [DRAM_ADDR_WIDTH-1:0] snp_req_ctr;
logic [DRAM_ADDR_WIDTH-1:0] snp_rsp_ctr;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_size;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_baseaddr;
logic [`VX_DRAM_ADDR_WIDTH-1:0] snp_req_ctr, snp_rsp_ctr;
logic vx_snp_rsp_fire;
logic vx_snp_req_fire, vx_snp_rsp_fire;
always_comb
begin
cmd_clflush_done = (snp_rsp_ctr >= csr_data_size);
vx_snp_rsp_fire = vx_snp_rsp_valid && vx_snp_rsp_ready;
if (`VX_DRAM_LINE_WIDTH != DRAM_LINE_WIDTH) begin
assign snp_req_baseaddr = {csr_mem_addr, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)};
assign snp_req_size = {csr_data_size, (`VX_DRAM_ADDR_WIDTH - DRAM_ADDR_WIDTH)'(0)};
end else begin
assign snp_req_baseaddr = csr_mem_addr;
assign snp_req_size = csr_data_size;
end
assign vx_snp_req_fire = vx_snp_req_valid && vx_snp_req_ready;
assign vx_snp_rsp_fire = vx_snp_rsp_valid && vx_snp_rsp_ready;
assign cmd_clflush_done = (0 == snp_rsp_ctr);
always_ff @(posedge clk)
begin
if (SoftReset) begin
vx_snp_req_valid <= 0;
vx_snp_req_addr <= 0;
vx_snp_req_tag <= 0;
vx_snp_rsp_ready <= 0;
snp_req_ctr <= 0;
snp_rsp_ctr <= 0;
end
else begin
if (STATE_IDLE == state) begin
snp_req_ctr <= 0;
snp_rsp_ctr <= 0;
vx_snp_rsp_ready <= 0;
end
vx_snp_req_valid <= 0;
if ((STATE_CLFLUSH == state)
&& (snp_req_ctr < csr_data_size)
&& vx_snp_req_ready)
begin
vx_snp_req_addr <= csr_mem_addr + snp_req_ctr;
snp_req_ctr <= snp_req_ctr + 1;
vx_snp_req_valid <= 1;
vx_snp_rsp_ready <= 1;
if ((STATE_IDLE == state)
&& (CMD_TYPE_CLFLUSH == csr_cmd)) begin
vx_snp_req_addr <= snp_req_baseaddr;
snp_req_ctr <= snp_req_size;
snp_rsp_ctr <= snp_req_size;
vx_snp_req_valid <= (snp_req_size != 0);
vx_snp_rsp_ready <= (snp_req_size != 0);
end
if ((STATE_CLFLUSH == state)
&& (0 == snp_rsp_ctr)) begin
vx_snp_rsp_ready <= 0;
end
if ((STATE_CLFLUSH == state)
&& (0 == snp_req_ctr)) begin
vx_snp_req_valid <= 0;
end
if (vx_snp_req_fire)
begin
vx_snp_req_addr <= vx_snp_req_addr + 1;
vx_snp_req_tag <= snp_req_ctr[`VX_SNP_TAG_WIDTH-1:0];
snp_req_ctr <= snp_req_ctr - 1;
`ifdef DBG_PRINT_OPAE
$display("%t: AFU Snp Req: addr=%0h, tag=%0d, rem=%0d", $time, `DRAM_TO_BYTE_ADDR(vx_snp_req_addr), vx_snp_req_tag, (snp_req_ctr - 1));
`endif
end
if ((STATE_CLFLUSH == state)
&& (snp_rsp_ctr < csr_data_size)
&& vx_snp_rsp_fire) begin
snp_rsp_ctr <= snp_rsp_ctr + 1;
assert(snp_rsp_ctr != 0);
snp_rsp_ctr <= snp_rsp_ctr - 1;
`ifdef DBG_PRINT_OPAE
$display("%t: AFU Snp Rsp: tag=%0d, rem=%0d", $time, vx_snp_rsp_tag, (snp_rsp_ctr - 1));
`endif
end
end
end
@@ -701,8 +735,9 @@ Vortex_Socket #() vx_socket (
.reset (vx_reset),
// DRAM request
.dram_req_write (vx_dram_req_write),
.dram_req_read (vx_dram_req_read),
.dram_req_valid (vx_dram_req_valid),
.dram_req_rw (vx_dram_req_rw),
.dram_req_byteen (vx_dram_req_byteen),
.dram_req_addr (vx_dram_req_addr),
.dram_req_data (vx_dram_req_data),
.dram_req_tag (vx_dram_req_tag),
@@ -726,18 +761,18 @@ Vortex_Socket #() vx_socket (
.snp_rsp_ready (vx_snp_rsp_ready),
// I/O request
.io_req_read (),
.io_req_write (),
.io_req_valid (),
.io_req_rw (),
.io_req_byteen (),
.io_req_addr (),
.io_req_data (),
.io_req_byteen (),
.io_req_data (),
.io_req_tag (),
.io_req_ready (1'b1),
.io_req_ready (1),
// I/O response
.io_rsp_valid (1'b0),
.io_rsp_data (32'b0),
.io_rsp_tag (`DCORE_TAG_WIDTH'(0)),
.io_rsp_valid (0),
.io_rsp_data (0),
.io_rsp_tag (0),
.io_rsp_ready (),
// status