allowing partial cache request submissions, io bus support broken

This commit is contained in:
Blaise Tine
2020-12-21 03:53:13 -08:00
parent 4bbd7bf408
commit 4b7d871d62
24 changed files with 342 additions and 968 deletions

View File

@@ -36,22 +36,7 @@ module VX_cluster #(
output wire [`L2SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// I/O request
output wire [`NUM_THREADS-1:0] io_req_valid,
output wire io_req_rw,
output wire [`NUM_THREADS-1:0][3:0] io_req_byteen,
output wire [`NUM_THREADS-1:0][29:0] io_req_addr,
output wire [`NUM_THREADS-1:0][31:0] io_req_data,
output wire [`L2CORE_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
// I/O response
input wire io_rsp_valid,
input wire [31:0] io_rsp_data,
input wire [`L2CORE_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// CSR I/O Request
// CSR Request
input wire csr_io_req_valid,
input wire [`NC_BITS-1:0] csr_io_req_coreid,
input wire [11:0] csr_io_req_addr,
@@ -59,7 +44,7 @@ module VX_cluster #(
input wire [31:0] csr_io_req_data,
output wire csr_io_req_ready,
// CSR I/O Response
// CSR Response
output wire csr_io_rsp_valid,
output wire [31:0] csr_io_rsp_data,
input wire csr_io_rsp_ready,
@@ -91,19 +76,6 @@ module VX_cluster #(
wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_rsp_tag;
wire [`NUM_CORES-1:0] per_core_snp_rsp_ready;
wire [`NUM_CORES-1:0][`NUM_THREADS-1:0] per_core_io_req_valid;
wire [`NUM_CORES-1:0] per_core_io_req_rw;
wire [`NUM_CORES-1:0][`NUM_THREADS-1:0][3:0] per_core_io_req_byteen;
wire [`NUM_CORES-1:0][`NUM_THREADS-1:0][29:0] per_core_io_req_addr;
wire [`NUM_CORES-1:0][`NUM_THREADS-1:0][31:0] per_core_io_req_data;
wire [`NUM_CORES-1:0][`DCORE_TAG_WIDTH-1:0] per_core_io_req_tag;
wire [`NUM_CORES-1:0] per_core_io_req_ready;
wire [`NUM_CORES-1:0] per_core_io_rsp_valid;
wire [`NUM_CORES-1:0][`DCORE_TAG_WIDTH-1:0] per_core_io_rsp_tag;
wire [`NUM_CORES-1:0][31:0] per_core_io_rsp_data;
wire [`NUM_CORES-1:0] per_core_io_rsp_ready;
wire [`NUM_CORES-1:0] per_core_csr_io_req_valid;
wire [`NUM_CORES-1:0][11:0] per_core_csr_io_req_addr;
wire [`NUM_CORES-1:0] per_core_csr_io_req_rw;
@@ -149,19 +121,6 @@ module VX_cluster #(
.snp_rsp_tag (per_core_snp_rsp_tag [i]),
.snp_rsp_ready (per_core_snp_rsp_ready [i]),
.io_req_valid (per_core_io_req_valid [i]),
.io_req_rw (per_core_io_req_rw [i]),
.io_req_byteen (per_core_io_req_byteen [i]),
.io_req_addr (per_core_io_req_addr [i]),
.io_req_data (per_core_io_req_data [i]),
.io_req_tag (per_core_io_req_tag [i]),
.io_req_ready (per_core_io_req_ready [i]),
.io_rsp_valid (per_core_io_rsp_valid [i]),
.io_rsp_data (per_core_io_rsp_data [i]),
.io_rsp_tag (per_core_io_rsp_tag [i]),
.io_rsp_ready (per_core_io_rsp_ready [i]),
.csr_io_req_valid (per_core_csr_io_req_valid[i]),
.csr_io_req_rw (per_core_csr_io_req_rw [i]),
.csr_io_req_addr (per_core_csr_io_req_addr [i]),
@@ -175,49 +134,7 @@ module VX_cluster #(
.busy (per_core_busy [i]),
.ebreak (per_core_ebreak [i])
);
end
VX_databus_arb #(
.NUM_REQS (`NUM_CORES),
.WORD_SIZE (4),
.TAG_IN_WIDTH (`DCORE_TAG_WIDTH),
.TAG_OUT_WIDTH (`L2CORE_TAG_WIDTH),
.BUFFERED_REQ (`NUM_CORES >= 4),
.BUFFERED_RSP (1)
) io_arb (
.clk (clk),
.reset (reset),
// input requests
.req_valid_in (per_core_io_req_valid),
.req_rw_in (per_core_io_req_rw),
.req_byteen_in (per_core_io_req_byteen),
.req_addr_in (per_core_io_req_addr),
.req_data_in (per_core_io_req_data),
.req_tag_in (per_core_io_req_tag),
.req_ready_in (per_core_io_req_ready),
// output request
.req_valid_out (io_req_valid),
.req_rw_out (io_req_rw),
.req_byteen_out (io_req_byteen),
.req_addr_out (io_req_addr),
.req_data_out (io_req_data),
.req_tag_out (io_req_tag),
.req_ready_out (io_req_ready),
// input response
.rsp_valid_in (io_rsp_valid),
.rsp_tag_in (io_rsp_tag),
.rsp_data_in (io_rsp_data),
.rsp_ready_in (io_rsp_ready),
// output responses
.rsp_valid_out (per_core_io_rsp_valid),
.rsp_data_out (per_core_io_rsp_data),
.rsp_tag_out (per_core_io_rsp_tag),
.rsp_ready_out (per_core_io_rsp_ready)
);
end
VX_csr_io_arb #(
.NUM_REQS (`NUM_CORES),

View File

@@ -289,7 +289,7 @@
// Size of cache in bytes
`ifndef DCACHE_SIZE
`define DCACHE_SIZE 4096
`define DCACHE_SIZE 8192
`endif
// Number of banks
@@ -336,7 +336,7 @@
// Size of cache in bytes
`ifndef SMEM_SIZE
`define SMEM_SIZE 2048
`define SMEM_SIZE 4096
`endif
// Number of banks

View File

@@ -35,21 +35,6 @@ module VX_core #(
output wire [`DSNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
// I/O request
output wire [`NUM_THREADS-1:0] io_req_valid,
output wire io_req_rw,
output wire [`NUM_THREADS-1:0][3:0] io_req_byteen,
output wire [`NUM_THREADS-1:0][29:0] io_req_addr,
output wire [`NUM_THREADS-1:0][31:0] io_req_data,
output wire [`DCORE_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
// I/O response
input wire io_rsp_valid,
input wire [31:0] io_rsp_data,
input wire [`DCORE_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// CSR I/O request
input wire csr_io_req_valid,
input wire [11:0] csr_io_req_addr,
@@ -117,35 +102,6 @@ module VX_core #(
//--
VX_cache_core_req_if #(
.NUM_REQS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS)
) io_req_if();
VX_cache_core_rsp_if #(
.NUM_REQS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS)
) io_rsp_if();
assign io_req_valid = io_req_if.valid;
assign io_req_rw = io_req_if.rw;
assign io_req_byteen = io_req_if.byteen;
assign io_req_addr = io_req_if.addr;
assign io_req_data = io_req_if.data;
assign io_req_tag = io_req_if.tag;
assign io_req_if.ready = io_req_ready;
assign io_rsp_if.valid = {{(`NUM_THREADS-1){1'b0}}, io_rsp_valid};
assign io_rsp_if.data[0] = io_rsp_data;
assign io_rsp_if.tag = io_rsp_tag;
assign io_rsp_ready = io_rsp_if.ready;
//--
VX_cache_core_req_if #(
.NUM_REQS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
@@ -259,11 +215,7 @@ module VX_core #(
// DRAM
.dram_req_if (dram_req_if),
.dram_rsp_if (dram_rsp_if),
// I/O
.io_req_if (io_req_if),
.io_rsp_if (io_rsp_if)
.dram_rsp_if (dram_rsp_if)
);
endmodule

View File

@@ -1,125 +1,107 @@
`include "VX_define.vh"
module VX_databus_arb #(
parameter NUM_REQS = 1,
parameter WORD_SIZE = 1,
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1,
parameter BUFFERED_REQ = 0,
parameter BUFFERED_RSP = 0,
module VX_databus_arb (
input wire clk,
input wire reset,
parameter WORD_WIDTH = WORD_SIZE * 8,
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE),
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) (
input wire clk,
input wire reset,
// input request
VX_cache_core_req_if core_req_if,
// input requests
input wire [NUM_REQS-1:0][`NUM_THREADS-1:0] req_valid_in,
input wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] req_tag_in,
input wire [NUM_REQS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] req_addr_in,
input wire [NUM_REQS-1:0] req_rw_in,
input wire [NUM_REQS-1:0][`NUM_THREADS-1:0][WORD_SIZE-1:0] req_byteen_in,
input wire [NUM_REQS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] req_data_in,
output wire [NUM_REQS-1:0] req_ready_in,
// output requests
VX_cache_core_req_if cache_req_if,
VX_cache_core_req_if smem_req_if,
// output request
output wire [`NUM_THREADS-1:0] req_valid_out,
output wire [TAG_OUT_WIDTH-1:0] req_tag_out,
output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] req_addr_out,
output wire req_rw_out,
output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] req_byteen_out,
output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] req_data_out,
input wire req_ready_out,
// input responses
VX_cache_core_rsp_if cache_rsp_if,
VX_cache_core_rsp_if smem_rsp_if,
// input response
input wire rsp_valid_in,
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
input wire [WORD_WIDTH-1:0] rsp_data_in,
output wire rsp_ready_in,
// output responses
output wire [NUM_REQS-1:0] rsp_valid_out,
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
output wire [NUM_REQS-1:0][WORD_WIDTH-1:0] rsp_data_out,
input wire [NUM_REQS-1:0] rsp_ready_out
// output response
VX_cache_core_rsp_if core_rsp_if
);
localparam REQ_DATAW = `NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH);
localparam RSP_DATAW = TAG_IN_WIDTH + WORD_WIDTH;
localparam REQ_ADDRW = 32 - `CLOG2(`DWORD_SIZE);
localparam REQ_DATAW = REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
if (NUM_REQS > 1) begin
//
// handle requests
//
wire [NUM_REQS-1:0] valids;
wire [NUM_REQS-1:0][REQ_DATAW-1:0] data_in;
wire [`NUM_THREADS-1:0] req_tmask_out;
wire req_valid_out_unqual;
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
for (genvar i = 0; i < NUM_REQS; i++) begin
assign valids[i] = (| req_valid_in[i]);
assign data_in[i] = {req_valid_in[i], {req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
wire cache_req_ready_in;
wire smem_req_ready_in;
VX_stream_arbiter #(
.NUM_REQS (NUM_REQS),
.DATAW (REQ_DATAW),
.BUFFERED (BUFFERED_REQ)
) req_arb (
// select shared memory bus
wire is_smem_addr = core_req_if.valid[i] && `SM_ENABLE
&& (core_req_if.addr[i] >= REQ_ADDRW'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> 2))
&& (core_req_if.addr[i] < REQ_ADDRW'(`SHARED_MEM_BASE_ADDR >> 2));
VX_skid_buffer #(
.DATAW (REQ_DATAW),
.PASSTHRU (1)
) cache_out_buffer (
.clk (clk),
.reset (reset),
.valid_in (valids),
.data_in (data_in),
.ready_in (req_ready_in),
.valid_out (req_valid_out_unqual),
.data_out ({req_tmask_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}),
.ready_out (req_ready_out)
.valid_in (core_req_if.valid[i] && !is_smem_addr),
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (cache_req_ready_in),
.valid_out (cache_req_if.valid[i]),
.data_out ({cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
.ready_out (cache_req_if.ready[i])
);
assign req_valid_out = {`NUM_THREADS{req_valid_out_unqual}} & req_tmask_out;
///////////////////////////////////////////////////////////////////////
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_merged_data_out;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_merged_data_out[i];
end
VX_stream_demux #(
.NUM_REQS (NUM_REQS),
.DATAW (RSP_DATAW),
.BUFFERED (BUFFERED_RSP)
) rsp_demux (
VX_skid_buffer #(
.DATAW (REQ_DATAW),
.PASSTHRU (1)
) smem_out_buffer (
.clk (clk),
.reset (reset),
.sel (rsp_sel),
.valid_in (rsp_valid_in),
.data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}),
.ready_in (rsp_ready_in),
.valid_out (rsp_valid_out),
.data_out (rsp_merged_data_out),
.ready_out (rsp_ready_out)
.valid_in (core_req_if.valid[i] && is_smem_addr),
.data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (smem_req_ready_in),
.valid_out (smem_req_if.valid[i]),
.data_out ({smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]}),
.ready_out (smem_req_if.ready[i])
);
end else begin
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign req_valid_out = req_valid_in;
assign req_tag_out = req_tag_in;
assign req_addr_out = req_addr_in;
assign req_rw_out = req_rw_in;
assign req_byteen_out = req_byteen_in;
assign req_data_out = req_data_in;
assign req_ready_in = req_ready_out;
assign rsp_valid_out = rsp_valid_in;
assign rsp_tag_out = rsp_tag_in;
assign rsp_data_out = rsp_data_in;
assign rsp_ready_in = rsp_ready_out;
assign core_req_if.ready[i] = is_smem_addr ? smem_req_ready_in : cache_req_ready_in;
end
//
// handle responses
//
wire [1:0][RSP_DATAW-1:0] rsp_data_in;
wire [1:0] rsp_valid_in;
wire [1:0] rsp_ready_in;
wire core_rsp_valid;
wire [`NUM_THREADS-1:0] core_rsp_tmask;
assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, cache_rsp_if.tag};
assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, smem_rsp_if.tag};
assign rsp_valid_in[0] = (| cache_rsp_if.valid);
assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE;
VX_stream_arbiter #(
.NUM_REQS (2),
.DATAW (RSP_DATAW),
.BUFFERED (0)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in),
.data_in (rsp_data_in),
.ready_in (rsp_ready_in),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tmask, core_rsp_if.data, core_rsp_if.tag}),
.ready_out (core_rsp_if.ready)
);
assign cache_rsp_if.ready = rsp_ready_in[0];
assign smem_rsp_if.ready = rsp_ready_in[1];
assign core_rsp_if.valid = core_rsp_tmask & {`NUM_THREADS{core_rsp_valid}};
endmodule

View File

@@ -1,156 +0,0 @@
`include "VX_define.vh"
module VX_dcache_arb (
input wire clk,
input wire reset,
// input request
VX_cache_core_req_if core_req_if,
// output requests
VX_cache_core_req_if cache_req_if,
VX_cache_core_req_if smem_req_if,
VX_cache_core_req_if io_req_if,
// input responses
VX_cache_core_rsp_if cache_rsp_if,
VX_cache_core_rsp_if smem_rsp_if,
VX_cache_core_rsp_if io_rsp_if,
// output response
VX_cache_core_rsp_if core_rsp_if
);
localparam REQ_ADDRW = 32 - `CLOG2(`DWORD_SIZE);
localparam REQ_DATAW = `NUM_THREADS + 1 + `NUM_THREADS * `DWORD_SIZE + `NUM_THREADS * REQ_ADDRW + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
//
// select request
//
// select shared memory bus
wire is_smem_addr = core_req_if.valid[0] && `SM_ENABLE
&& (core_req_if.addr[0] >= REQ_ADDRW'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> 2))
&& (core_req_if.addr[0] < REQ_ADDRW'(`SHARED_MEM_BASE_ADDR >> 2));
// select io bus
wire is_io_addr = core_req_if.valid[0]
&& (core_req_if.addr[0] >= REQ_ADDRW'(`IO_BUS_BASE_ADDR >> 2));
wire cache_req_valid_out;
wire [`NUM_THREADS-1:0] cache_req_tmask;
wire cache_req_ready_in;
wire smem_req_valid_out;
wire [`NUM_THREADS-1:0] smem_req_tmask;
wire smem_req_ready_in;
wire io_req_valid_out;
wire [`NUM_THREADS-1:0] io_req_tmask;
wire io_req_ready_in;
reg [2:0] req_select;
reg req_ready;
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) cache_out_buffer (
.clk (clk),
.reset (reset),
.valid_in (req_select[0]),
.data_in ({core_req_if.valid, core_req_if.addr, core_req_if.rw, core_req_if.byteen, core_req_if.data, core_req_if.tag}),
.ready_in (cache_req_ready_in),
.valid_out (cache_req_valid_out),
.data_out ({cache_req_tmask, cache_req_if.addr, cache_req_if.rw, cache_req_if.byteen, cache_req_if.data, cache_req_if.tag}),
.ready_out (cache_req_if.ready)
);
assign cache_req_if.valid = cache_req_tmask & {`NUM_THREADS{cache_req_valid_out}};
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) smem_out_buffer (
.clk (clk),
.reset (reset),
.valid_in (req_select[1]),
.data_in ({core_req_if.valid, core_req_if.addr, core_req_if.rw, core_req_if.byteen, core_req_if.data, core_req_if.tag}),
.ready_in (smem_req_ready_in),
.valid_out (smem_req_valid_out),
.data_out ({smem_req_tmask, smem_req_if.addr, smem_req_if.rw, smem_req_if.byteen, smem_req_if.data, smem_req_if.tag}),
.ready_out (smem_req_if.ready)
);
assign smem_req_if.valid = smem_req_tmask & {`NUM_THREADS{smem_req_valid_out}};
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) io_out_buffer (
.clk (clk),
.reset (reset),
.valid_in (req_select[2]),
.data_in ({core_req_if.valid, core_req_if.addr, core_req_if.rw, core_req_if.byteen, core_req_if.data, core_req_if.tag}),
.ready_in (io_req_ready_in),
.valid_out (io_req_valid_out),
.data_out ({io_req_tmask, io_req_if.addr, io_req_if.rw, io_req_if.byteen, io_req_if.data, io_req_if.tag}),
.ready_out (io_req_if.ready)
);
assign io_req_if.valid = io_req_tmask & {`NUM_THREADS{io_req_valid_out}};
always @(*) begin
req_select = 0;
if (is_smem_addr) begin
req_select[1] = 1;
req_ready = smem_req_ready_in;
end else if (is_io_addr) begin
req_select[2] = 1;
req_ready = io_req_ready_in;
end else begin
req_select[0] = 1;
req_ready = cache_req_ready_in;
end
end
assign core_req_if.ready = req_ready;
//
// select response
//
wire [2:0][RSP_DATAW-1:0] rsp_data_in;
wire [2:0] rsp_valid_in;
wire [2:0] rsp_ready_in;
wire core_rsp_valid;
wire [`NUM_THREADS-1:0] core_rsp_tmask;
assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, cache_rsp_if.tag};
assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, smem_rsp_if.tag};
assign rsp_data_in[2] = {io_rsp_if.valid, io_rsp_if.data, io_rsp_if.tag};
assign rsp_valid_in[0] = (| cache_rsp_if.valid);
assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE;
assign rsp_valid_in[2] = (| io_rsp_if.valid);
VX_stream_arbiter #(
.NUM_REQS (3),
.DATAW (RSP_DATAW),
.BUFFERED (1)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in),
.data_in (rsp_data_in),
.ready_in (rsp_ready_in),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tmask, core_rsp_if.data, core_rsp_if.tag}),
.ready_out (core_rsp_if.ready)
);
assign cache_rsp_if.ready = rsp_ready_in[0];
assign smem_rsp_if.ready = rsp_ready_in[1];
assign io_rsp_if.ready = rsp_ready_in[2];
assign core_rsp_if.valid = core_rsp_tmask & {`NUM_THREADS{core_rsp_valid}};
endmodule

View File

@@ -39,7 +39,7 @@ module VX_fpu_unit #(
wire fpuq_push = fpu_req_if.valid && fpu_req_if.ready;
wire fpuq_pop = valid_out && ready_out;
VX_cam_buffer #(
VX_index_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
.SIZE (`FPUQ_SIZE),
.FASTRAM (1)

View File

@@ -72,7 +72,8 @@ module VX_lsu_unit #(
reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags;
`IGNORE_WARNINGS_END
wire stall_in;
wire ready_in;
wire stall_in = ~ready_in & req_valid;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))),
@@ -86,79 +87,98 @@ module VX_lsu_unit #(
.data_out ({req_valid, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data})
);
// Can accept new request?
assign lsu_req_if.ready = ~stall_in;
wire [`NW_BITS-1:0] rsp_wid;
wire [31:0] rsp_pc;
wire [`NR_BITS-1:0] rsp_rd;
wire rsp_wb;
wire [`NUM_THREADS-1:0][1:0] rsp_offset;
wire [1:0] rsp_sext;
reg [`NUM_THREADS-1:0][31:0] rsp_data;
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask;
reg [`NUM_THREADS-1:0] req_sent_mask, rsp_rem_mask_n;
wire req_sent_all;
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] mem_rsp_mask;
wire [`DCORE_TAG_ID_BITS-1:0] mbuf_waddr, mbuf_raddr;
wire mbuf_full;
wire [`DCORE_TAG_ID_BITS-1:0] req_tag, rsp_tag;
wire lsuq_full;
wire mbuf_push = (| dcache_req_if.valid) && (| dcache_req_if.ready)
&& (0 == req_sent_mask) // first submission only
&& (0 == req_rw); // loads only
wire lsuq_push = (| dcache_req_if.valid) && dcache_req_if.ready
&& (0 == req_rw); // loads only
wire mbuf_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
wire lsuq_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
wire mbuf_pop = mbuf_pop_part && (0 == rsp_rem_mask_n);
assign rsp_tag = dcache_rsp_if.tag[0][`DCORE_TAG_ID_BITS-1:0];
assign mbuf_raddr = dcache_rsp_if.tag[0][`DCORE_TAG_ID_BITS-1:0];
wire [`NUM_THREADS-1:0] mem_rsp_mask_n = mem_rsp_mask[rsp_tag] & ~dcache_rsp_if.valid;
wire lsuq_pop = lsuq_pop_part && (0 == mem_rsp_mask_n);
VX_cam_buffer #(
VX_index_buffer #(
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2),
.SIZE (`LSUQ_SIZE),
.FASTRAM (1)
) req_metadata_buf (
.clk (clk),
.reset (reset),
.write_addr (req_tag),
.acquire_slot (lsuq_push),
.read_addr (rsp_tag),
.write_addr (mbuf_waddr),
.acquire_slot (mbuf_push),
.read_addr (mbuf_raddr),
.write_data ({req_wid, req_pc, req_rd, req_wb, req_offset, req_sext}),
.read_data ({rsp_wid, rsp_pc, rsp_rd, rsp_wb, rsp_offset, rsp_sext}),
.release_addr (rsp_tag),
.release_slot (lsuq_pop),
.full (lsuq_full)
.release_addr (mbuf_raddr),
.release_slot (mbuf_pop),
.full (mbuf_full)
);
assign req_sent_all = ((dcache_req_if.ready | req_sent_mask) & req_tmask) == req_tmask;
always @(posedge clk) begin
if (lsuq_push) begin
mem_rsp_mask[req_tag] <= req_tmask;
pending_tags[req_tag] <= dcache_req_if.tag;
if (reset) begin
req_sent_mask <= 0;
end else begin
if (req_sent_all)
req_sent_mask <= 0;
else
req_sent_mask <= req_sent_mask | (dcache_req_if.valid & dcache_req_if.ready);
end
end
// need to hold the acquired tag index until the full request is submitted
reg [`DCORE_TAG_ID_BITS-1:0] req_tag_hold;
wire [`DCORE_TAG_ID_BITS-1:0] req_tag = (0 == req_sent_mask) ? mbuf_waddr : req_tag_hold;
always @(posedge clk) begin
if (mbuf_push)
req_tag_hold <= mbuf_waddr;
end
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid;
always @(posedge clk) begin
if (mbuf_push) begin
rsp_rem_mask[mbuf_waddr] <= req_tmask;
pending_tags[mbuf_waddr] <= dcache_req_if.tag[0];
end
if (lsuq_pop_part) begin
mem_rsp_mask[rsp_tag] <= mem_rsp_mask_n;
if (mbuf_pop_part) begin
rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n;
end
end
wire load_req_stall = req_valid && !req_rw && lsuq_full;
wire store_req_stall = req_valid && req_rw && !st_commit_if.ready;
wire req_ready_dep = (!req_rw && !mbuf_full) || (req_rw && st_commit_if.ready);
// Core Request
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && !load_req_stall && !store_req_stall}} & req_tmask;
assign dcache_req_if.rw = req_rw;
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask & ~req_sent_mask;
assign dcache_req_if.rw = {`NUM_THREADS{req_rw}};
assign dcache_req_if.byteen = req_byteen;
assign dcache_req_if.addr = req_addr;
assign dcache_req_if.data = req_data;
`ifdef DBG_CACHE_REQ_INFO
assign dcache_req_if.tag = {req_pc, req_rd, req_wid, req_tag};
assign dcache_req_if.tag = {`NUM_THREADS{{req_pc, req_rd, req_wid, req_tag}}};
`else
assign dcache_req_if.tag = req_tag;
assign dcache_req_if.tag = {`NUM_THREADS{req_tag}};
`endif
assign stall_in = ~dcache_req_if.ready
|| load_req_stall
|| store_req_stall;
// Can accept new request?
assign lsu_req_if.ready = ~stall_in;
assign ready_in = req_ready_dep && req_sent_all;
// Core Response
for (genvar i = 0; i < `NUM_THREADS; i++) begin
@@ -174,7 +194,7 @@ module VX_lsu_unit #(
// send store commit
wire is_store_rsp = req_valid && req_rw && dcache_req_if.ready;
wire is_store_rsp = req_valid && req_rw && req_sent_all;
assign st_commit_if.valid = is_store_rsp;
assign st_commit_if.wid = req_wid;
@@ -206,7 +226,7 @@ module VX_lsu_unit #(
assign dcache_rsp_if.ready = ~load_rsp_stall;
// scope registration
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & {`NUM_THREADS{dcache_req_if.ready}});
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & dcache_req_if.ready);
`SCOPE_ASSIGN (dcache_req_wid, req_wid);
`SCOPE_ASSIGN (dcache_req_pc, req_pc);
`SCOPE_ASSIGN (dcache_req_addr, req_address);
@@ -216,23 +236,23 @@ module VX_lsu_unit #(
`SCOPE_ASSIGN (dcache_req_tag, req_tag);
`SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.valid & {`NUM_THREADS{dcache_rsp_if.ready}});
`SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data);
`SCOPE_ASSIGN (dcache_rsp_tag, rsp_tag);
`SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr);
`ifdef DBG_PRINT_CORE_DCACHE
always @(posedge clk) begin
if ((| dcache_req_if.valid) && dcache_req_if.ready) begin
if (dcache_req_if.rw)
if ((| dcache_req_if.valid) && (|dcache_req_if.ready)) begin
if (dcache_req_if.rw[0])
$display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h",
$time, CORE_ID, req_wid, req_pc, dcache_req_if.valid, req_address, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data);
$time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_address, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data);
else
$display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d",
$time, CORE_ID, req_wid, req_pc, dcache_req_if.valid, req_address, dcache_req_if.tag, dcache_req_if.byteen, req_rd);
$time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_address, dcache_req_if.tag, dcache_req_if.byteen, req_rd);
end
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
$display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data);
end
if (lsuq_full) begin
if (mbuf_full) begin
$write("%t: D$%0d queue-full:", $time, CORE_ID);
for (integer j = 0; j < `LSUQ_SIZE; j++) begin
$write(" tag%0d=%0h", j, pending_tags[j]);

View File

@@ -26,11 +26,7 @@ module VX_mem_unit # (
// DRAM
VX_cache_dram_req_if dram_req_if,
VX_cache_dram_rsp_if dram_rsp_if,
// I/O
VX_cache_core_req_if io_req_if,
VX_cache_core_rsp_if io_rsp_if
VX_cache_dram_rsp_if dram_rsp_if
);
`ifdef PERF_ENABLE
@@ -76,19 +72,17 @@ module VX_mem_unit # (
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS)
) smem_rsp_if();
VX_dcache_arb dcache_arb (
.clk (clk),
.reset (reset),
VX_databus_arb databus_arb (
.clk (clk),
.reset (reset),
.core_req_if (core_dcache_req_if),
.cache_req_if (dcache_req_if),
.smem_req_if (smem_req_if),
.io_req_if (io_req_if),
.core_req_if (core_dcache_req_if),
.cache_req_if (dcache_req_if),
.smem_req_if (smem_req_if),
.cache_rsp_if (dcache_rsp_if),
.smem_rsp_if (smem_rsp_if),
.io_rsp_if (io_rsp_if),
.core_rsp_if (core_dcache_rsp_if)
.cache_rsp_if (dcache_rsp_if),
.smem_rsp_if (smem_rsp_if),
.core_rsp_if (core_dcache_rsp_if)
);
VX_cache #(

View File

@@ -31,7 +31,7 @@ module VX_mul_unit #(
wire mulq_push = mul_req_if.valid && mul_req_if.ready;
wire mulq_pop = valid_out && ready_out;
VX_cam_buffer #(
VX_index_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
.SIZE (`MULQ_SIZE),
.FASTRAM (1)

View File

@@ -11,12 +11,12 @@ module VX_pipeline #(
// Dcache core request
output wire [`NUM_THREADS-1:0] dcache_req_valid,
output wire dcache_req_rw,
output wire [`NUM_THREADS-1:0] dcache_req_rw,
output wire [`NUM_THREADS-1:0][3:0] dcache_req_byteen,
output wire [`NUM_THREADS-1:0][29:0] dcache_req_addr,
output wire [`NUM_THREADS-1:0][31:0] dcache_req_data,
output wire [`DCORE_TAG_WIDTH-1:0] dcache_req_tag,
input wire dcache_req_ready,
output wire [`NUM_THREADS-1:0][`DCORE_TAG_WIDTH-1:0] dcache_req_tag,
input wire [`NUM_THREADS-1:0] dcache_req_ready,
// Dcache core reponse
input wire [`NUM_THREADS-1:0] dcache_rsp_valid,

View File

@@ -32,24 +32,9 @@ module Vortex (
// Snoop response
output wire snp_rsp_valid,
output wire [`VX_SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready,
input wire snp_rsp_ready,
// I/O request
output wire [`NUM_THREADS-1:0] io_req_valid,
output wire io_req_rw,
output wire [`NUM_THREADS-1:0][3:0] io_req_byteen,
output wire [`NUM_THREADS-1:0][29:0] io_req_addr,
output wire [`NUM_THREADS-1:0][31:0] io_req_data,
output wire [`VX_CORE_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
// I/O response
input wire io_rsp_valid,
input wire [31:0] io_rsp_data,
input wire [`VX_CORE_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// CSR I/O Request
// CSR Request
input wire csr_io_req_valid,
input wire [`VX_CSR_ID_WIDTH-1:0] csr_io_req_coreid,
input wire [11:0] csr_io_req_addr,
@@ -57,7 +42,7 @@ module Vortex (
input wire [31:0] csr_io_req_data,
output wire csr_io_req_ready,
// CSR I/O Response
// CSR Response
output wire csr_io_rsp_valid,
output wire [31:0] csr_io_rsp_data,
input wire csr_io_rsp_ready,
@@ -89,19 +74,6 @@ module Vortex (
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0] per_cluster_io_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][3:0] per_cluster_io_req_byteen;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][29:0] per_cluster_io_req_addr;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][31:0] per_cluster_io_req_data;
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid;
wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw;
@@ -150,19 +122,6 @@ module Vortex (
.snp_rsp_tag (per_cluster_snp_rsp_tag [i]),
.snp_rsp_ready (per_cluster_snp_rsp_ready [i]),
.io_req_valid (per_cluster_io_req_valid [i]),
.io_req_rw (per_cluster_io_req_rw [i]),
.io_req_byteen (per_cluster_io_req_byteen [i]),
.io_req_addr (per_cluster_io_req_addr [i]),
.io_req_data (per_cluster_io_req_data [i]),
.io_req_tag (per_cluster_io_req_tag [i]),
.io_req_ready (per_cluster_io_req_ready [i]),
.io_rsp_valid (per_cluster_io_rsp_valid [i]),
.io_rsp_data (per_cluster_io_rsp_data [i]),
.io_rsp_tag (per_cluster_io_rsp_tag [i]),
.io_rsp_ready (per_cluster_io_rsp_ready [i]),
.csr_io_req_valid (per_cluster_csr_io_req_valid[i]),
.csr_io_req_coreid (csr_io_core_id),
.csr_io_req_rw (per_cluster_csr_io_req_rw [i]),
@@ -179,48 +138,6 @@ module Vortex (
);
end
VX_databus_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.WORD_SIZE (4),
.TAG_IN_WIDTH (`L2CORE_TAG_WIDTH),
.TAG_OUT_WIDTH (`L3CORE_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (`NUM_CLUSTERS >= 4)
) io_arb (
.clk (clk),
.reset (reset),
// input requests
.req_valid_in (per_cluster_io_req_valid),
.req_rw_in (per_cluster_io_req_rw),
.req_byteen_in (per_cluster_io_req_byteen),
.req_addr_in (per_cluster_io_req_addr),
.req_data_in (per_cluster_io_req_data),
.req_tag_in (per_cluster_io_req_tag),
.req_ready_in (per_cluster_io_req_ready),
// output request
.req_valid_out (io_req_valid),
.req_rw_out (io_req_rw),
.req_byteen_out (io_req_byteen),
.req_addr_out (io_req_addr),
.req_data_out (io_req_data),
.req_tag_out (io_req_tag),
.req_ready_out (io_req_ready),
// input response
.rsp_valid_in (io_rsp_valid),
.rsp_tag_in (io_rsp_tag),
.rsp_data_in (io_rsp_data),
.rsp_ready_in (io_rsp_ready),
// output responses
.rsp_valid_out (per_cluster_io_rsp_valid),
.rsp_data_out (per_cluster_io_rsp_data),
.rsp_tag_out (per_cluster_io_rsp_tag),
.rsp_ready_out (per_cluster_io_rsp_ready)
);
VX_csr_io_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (32),

View File

@@ -999,21 +999,6 @@ Vortex #() vortex (
.snp_rsp_tag (vx_snp_rsp_tag),
.snp_rsp_ready (vx_snp_rsp_ready),
// I/O request
`UNUSED_PIN (io_req_valid),
`UNUSED_PIN (io_req_rw),
`UNUSED_PIN (io_req_byteen),
`UNUSED_PIN (io_req_addr),
`UNUSED_PIN (io_req_data),
`UNUSED_PIN (io_req_tag),
.io_req_ready (1'b1),
// I/O response
.io_rsp_valid (1'b0),
.io_rsp_data (0),
.io_rsp_tag (0),
`UNUSED_PIN (io_rsp_ready),
// CSR I/O Request
.csr_io_req_valid (vx_csr_io_req_valid),
.csr_io_req_coreid(vx_csr_io_req_coreid),

View File

@@ -55,13 +55,14 @@ module VX_bank #(
input wire reset,
// Core Request
input wire [NUM_REQS-1:0] core_req_valid,
input wire [`CORE_REQ_TAG_COUNT-1:0] core_req_rw,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready,
input wire core_req_valid,
input wire [`REQS_BITS-1:0] core_req_tid,
input wire core_req_rw,
input wire [WORD_SIZE-1:0] core_req_byteen,
input wire [`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [`WORD_WIDTH-1:0] core_req_data,
input wire [CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready,
// Core Response
output wire core_rsp_valid,
@@ -229,37 +230,21 @@ module VX_bank #(
wire creq_push = (| core_req_valid) && core_req_ready;
assign core_req_ready = !creq_full;
VX_bank_core_req_queue #(
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CREQ_SIZE (CREQ_SIZE),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
VX_generic_queue #(
.DATAW (CORE_TAG_WIDTH + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + `WORD_WIDTH),
.SIZE (CREQ_SIZE),
.BUFFERED (1),
.FASTRAM (1)
) core_req_queue (
.clk (clk),
.reset (reset),
// Enqueue
.push (creq_push),
.tag_in (core_req_tag),
.valids_in (core_req_valid),
.rw_in (core_req_rw),
.byteen_in (core_req_byteen),
.addr_in (core_req_addr),
.wdata_in (core_req_data),
// Dequeue
.pop (creq_pop),
.tag_out (creq_tag_st0),
.tid_out (creq_tid_st0),
.rw_out (creq_rw_st0),
.byteen_out (creq_byteen_st0),
.addr_out (creq_addr_st0),
.wdata_out (creq_writeword_st0),
// States
.empty (creq_empty),
.full (creq_full)
.clk (clk),
.reset (reset),
.push (creq_push),
.pop (creq_pop),
.data_in ({core_req_tag, core_req_tid, core_req_rw, core_req_byteen, core_req_addr, core_req_data}),
.data_out({creq_tag_st0, creq_tid_st0, creq_rw_st0, creq_byteen_st0, creq_addr_st0, creq_writeword_st0}),
.empty (creq_empty),
.full (creq_full),
`UNUSED_PIN (size)
);
reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size;

View File

@@ -1,215 +0,0 @@
`include "VX_cache_config.vh"
module VX_bank_core_req_queue #(
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Core Request Queue Size
parameter CREQ_SIZE = 1,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
) (
input wire clk,
input wire reset,
// Enqueue
input wire push,
input wire [NUM_REQS-1:0] valids_in,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag_in,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] addr_in,
input wire [`CORE_REQ_TAG_COUNT-1:0] rw_in,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] byteen_in,
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] wdata_in,
// Dequeue
input wire pop,
output wire [CORE_TAG_WIDTH-1:0] tag_out,
output wire [`WORD_ADDR_WIDTH-1:0] addr_out,
output wire rw_out,
output wire [WORD_SIZE-1:0] byteen_out,
output wire [`WORD_WIDTH-1:0] wdata_out,
output wire [`REQS_BITS-1:0] tid_out,
// States
output wire empty,
output wire full
);
wire [NUM_REQS-1:0] q_valids;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] q_tag;
wire [`CORE_REQ_TAG_COUNT-1:0] q_rw;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] q_byteen;
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] q_addr;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] q_wdata;
wire q_push;
wire q_pop;
wire q_empty;
wire q_full;
always @(*) begin
assert(!push || (| valids_in));
assert(!push || !full);
assert(!pop || !empty);
end
VX_generic_queue #(
.DATAW ($bits(valids_in) + $bits(tag_in) + $bits(addr_in) + $bits(rw_in) + $bits(byteen_in) + $bits(wdata_in)),
.SIZE (CREQ_SIZE),
.BUFFERED (1),
.FASTRAM (1)
) req_queue (
.clk (clk),
.reset (reset),
.push (q_push),
.pop (q_pop),
.data_in ({valids_in, tag_in, addr_in, rw_in, byteen_in, wdata_in}),
.data_out ({q_valids, q_tag, q_addr, q_rw, q_byteen, q_wdata}),
.empty (q_empty),
.full (q_full),
`UNUSED_PIN (size)
);
if (NUM_REQS > 1) begin
reg [`REQS_BITS-1:0] sel_idx, sel_idx_r;
reg [CORE_TAG_WIDTH-1:0] sel_tag, sel_tag_r;
reg [`WORD_ADDR_WIDTH-1:0] sel_addr, sel_addr_r;
reg sel_rw, sel_rw_r;
reg [WORD_SIZE-1:0] sel_byteen, sel_byteen_r;
reg [`WORD_WIDTH-1:0] sel_wdata, sel_wdata_r;
reg [$clog2(NUM_REQS+1)-1:0] q_valids_cnt_r;
wire [$clog2(NUM_REQS+1)-1:0] q_valids_cnt_n;
wire [$clog2(NUM_REQS+1)-1:0] q_valids_cnt;
reg [NUM_REQS-1:0] pop_mask;
reg fast_track;
wire fast_track_n;
reg req_eop; // request end of packet
reg empty_r;
assign q_push = push;
assign q_pop = pop && req_eop;
wire [NUM_REQS-1:0] requests = q_valids & ~pop_mask;
always @(*) begin
sel_idx = 0;
sel_tag = 'x;
sel_addr = 'x;
sel_rw = 'x;
sel_byteen = 'x;
sel_wdata = 'x;
for (integer i = 0; i < NUM_REQS; i++) begin
if (requests[i]) begin
sel_idx = `REQS_BITS'(i);
sel_addr = q_addr[i];
if (0 == CORE_TAG_ID_BITS) begin
sel_tag = q_tag[i];
sel_rw = q_rw[i];
end
sel_byteen = q_byteen[i];
sel_wdata = q_wdata[i];
break;
end
end
end
VX_countones #(
.N(NUM_REQS)
) counter (
.valids (q_valids),
.count (q_valids_cnt)
);
assign fast_track_n = (!q_empty && (empty_r || (pop && fast_track))) ? 0 :
pop ? (q_valids_cnt_r == 2) :
fast_track;
assign q_valids_cnt_n = (!q_empty && (empty_r || (pop && fast_track))) ? q_valids_cnt :
pop ? (q_valids_cnt_r - 1) :
q_valids_cnt_r;
always @(posedge clk) begin
if (reset) begin
pop_mask <= 0;
fast_track <= 0;
q_valids_cnt_r <= 0;
req_eop <= 0;
empty_r <= 1;
end else begin
if (!q_empty
&& (empty_r || (pop && fast_track))) begin
pop_mask <= (NUM_REQS'(1) << sel_idx);
end else if (pop) begin
if (q_valids_cnt_r == 1 || q_valids_cnt_r == 2) begin
pop_mask <= 0;
end else begin
pop_mask[sel_idx] <= 1;
end
end
q_valids_cnt_r <= q_valids_cnt_n;
fast_track <= fast_track_n;
req_eop <= (q_valids_cnt_n == 1 || q_valids_cnt_n == 2) && !fast_track_n;
empty_r <= (0 == q_valids_cnt_n);
end
if (empty_r || pop) begin
sel_idx_r <= sel_idx;
sel_byteen_r <= sel_byteen;
sel_addr_r <= sel_addr;
sel_wdata_r <= sel_wdata;
end
end
if (CORE_TAG_ID_BITS != 0) begin
`UNUSED_VAR (sel_tag)
`UNUSED_VAR (sel_rw)
always @(posedge clk) begin
if (empty_r || pop) begin
sel_tag_r <= q_tag;
sel_rw_r <= q_rw;
end
end
end else begin
always @(posedge clk) begin
if (empty_r || pop) begin
sel_tag_r <= sel_tag;
sel_rw_r <= sel_rw;
end
end
end
assign tag_out = sel_tag_r;
assign addr_out = sel_addr_r;
assign rw_out = sel_rw_r;
assign byteen_out = sel_byteen_r;
assign wdata_out = sel_wdata_r;
assign tid_out = sel_idx_r;
assign full = q_full;
assign empty = empty_r;
end else begin
`UNUSED_VAR (q_valids)
assign q_push = push;
assign q_pop = pop;
assign tag_out = q_tag;
assign addr_out = q_addr;
assign rw_out = q_rw;
assign byteen_out = q_byteen;
assign wdata_out = q_wdata;
assign tid_out = 0;
assign empty = q_empty;
assign full = q_full;
end
endmodule

View File

@@ -58,12 +58,12 @@ module VX_cache #(
// Core request
input wire [NUM_REQS-1:0] core_req_valid,
input wire [`CORE_REQ_TAG_COUNT-1:0] core_req_rw,
input wire [NUM_REQS-1:0] core_req_rw,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire [`CORE_REQ_TAG_COUNT-1:0] core_req_ready,
input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire [NUM_REQS-1:0] core_req_ready,
// Core response
output wire [NUM_REQS-1:0] core_rsp_valid,
@@ -108,8 +108,8 @@ module VX_cache #(
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
wire [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid;
wire [NUM_BANKS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
wire [NUM_BANKS-1:0] per_bank_core_req_ready;
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
@@ -155,14 +155,14 @@ module VX_cache #(
.BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS),
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
.NUM_REQS (NUM_REQS)
) cache_core_req_bank_sel (
.core_req_valid (core_req_valid),
.core_req_addr (core_req_addr),
.core_req_ready (core_req_ready),
.per_bank_valid (per_bank_valid),
.per_bank_ready (per_bank_core_req_ready)
.core_req_valid (core_req_valid),
.core_req_addr (core_req_addr),
.core_req_ready (core_req_ready),
.per_bank_valid (per_bank_core_req_valid),
.per_bank_tid (per_bank_core_req_tid),
.per_bank_ready (per_bank_core_req_ready)
);
assign dram_req_tag = dram_req_addr;
@@ -173,51 +173,53 @@ module VX_cache #(
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
wire [NUM_REQS-1:0] curr_bank_core_req_valid;
wire [`CORE_REQ_TAG_COUNT-1:0] curr_bank_core_req_rw;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data;
wire curr_bank_core_req_ready;
wire curr_bank_core_req_valid;
wire [`REQS_BITS-1:0] curr_bank_core_req_tid;
wire curr_bank_core_req_rw;
wire [WORD_SIZE-1:0] curr_bank_core_req_byteen;
wire [`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
wire [`WORD_WIDTH-1:0] curr_bank_core_req_data;
wire curr_bank_core_req_ready;
wire curr_bank_core_rsp_valid;
wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid;
wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag;
wire curr_bank_core_rsp_ready;
wire curr_bank_core_rsp_valid;
wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid;
wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag;
wire curr_bank_core_rsp_ready;
wire curr_bank_dram_req_valid;
wire curr_bank_dram_req_rw;
wire [BANK_LINE_SIZE-1:0] curr_bank_dram_req_byteen;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_req_addr;
wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_req_data;
wire curr_bank_dram_req_ready;
wire curr_bank_dram_req_valid;
wire curr_bank_dram_req_rw;
wire [BANK_LINE_SIZE-1:0] curr_bank_dram_req_byteen;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_req_addr;
wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_req_data;
wire curr_bank_dram_req_ready;
wire curr_bank_dram_rsp_valid;
wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_rsp_data;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_rsp_addr;
wire curr_bank_dram_rsp_ready;
wire curr_bank_dram_rsp_valid;
wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_rsp_data;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_rsp_addr;
wire curr_bank_dram_rsp_ready;
wire curr_bank_snp_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
wire curr_bank_snp_req_inv;
wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
wire curr_bank_snp_req_ready;
wire curr_bank_snp_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
wire curr_bank_snp_req_inv;
wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
wire curr_bank_snp_req_ready;
wire curr_bank_snp_rsp_valid;
wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
wire curr_bank_snp_rsp_ready;
wire curr_bank_snp_rsp_valid;
wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
wire curr_bank_snp_rsp_ready;
wire curr_bank_miss;
wire curr_bank_miss;
// Core Req
assign curr_bank_core_req_valid = per_bank_valid[i];
assign curr_bank_core_req_addr = core_req_addr;
assign curr_bank_core_req_rw = core_req_rw;
assign curr_bank_core_req_byteen = core_req_byteen;
assign curr_bank_core_req_data = core_req_data;
assign curr_bank_core_req_tag = core_req_tag;
assign curr_bank_core_req_valid = per_bank_core_req_valid[i];
assign curr_bank_core_req_tid = per_bank_core_req_tid[i];
assign curr_bank_core_req_addr = core_req_addr[per_bank_core_req_tid[i]];
assign curr_bank_core_req_rw = core_req_rw[per_bank_core_req_tid[i]];
assign curr_bank_core_req_byteen = core_req_byteen[per_bank_core_req_tid[i]];
assign curr_bank_core_req_data = core_req_data[per_bank_core_req_tid[i]];
assign curr_bank_core_req_tag = core_req_tag[per_bank_core_req_tid[i]];
assign per_bank_core_req_ready[i] = curr_bank_core_req_ready;
// Core WB
@@ -298,6 +300,7 @@ module VX_cache #(
.reset (reset),
// Core request
.core_req_valid (curr_bank_core_req_valid),
.core_req_tid (curr_bank_core_req_tid),
.core_req_rw (curr_bank_core_req_rw),
.core_req_byteen (curr_bank_core_req_byteen),
.core_req_addr (curr_bank_core_req_addr),

View File

@@ -2,77 +2,64 @@
module VX_cache_core_req_bank_sel #(
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 1,
parameter BANK_LINE_SIZE = 1,
// Size of a word in bytes
parameter WORD_SIZE = 1,
parameter WORD_SIZE = 1,
// Number of banks
parameter NUM_BANKS = 1,
parameter NUM_BANKS = 1,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 1
parameter NUM_REQS = 1
) (
input wire [NUM_REQS-1:0] core_req_valid,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
output wire [`CORE_REQ_TAG_COUNT-1:0] core_req_ready,
output wire [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid,
input wire [NUM_BANKS-1:0] per_bank_ready
output wire [NUM_REQS-1:0] core_req_ready,
output wire [NUM_BANKS-1:0] per_bank_valid,
output wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_tid,
input wire [NUM_BANKS-1:0] per_bank_ready
);
if (NUM_BANKS > 1) begin
if (NUM_BANKS > 1) begin
reg [NUM_BANKS-1:0] per_bank_valid_r;
reg [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_tid_r;
reg [NUM_REQS-1:0] core_req_ready_r;
wire [NUM_REQS-1:0][`BANK_BITS-1:0] core_req_bid;
reg [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid_r;
for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_bid[i] = core_req_addr[i][`BANK_SELECT_ADDR_RNG];
end
always @(*) begin
per_bank_valid_r = 0;
for (integer i = 0; i < NUM_REQS; i++) begin
per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
per_bank_valid_r = 0;
per_bank_tid_r = 'x;
for (integer i = NUM_REQS-1; i >= 0; --i) begin
if (core_req_valid[i]) begin
per_bank_valid_r[core_req_bid[i]] = 1;
per_bank_tid_r[core_req_bid[i]] = `REQS_BITS'(i);
end
end
end
end
if (CORE_TAG_ID_BITS != 0) begin
reg [NUM_BANKS-1:0] per_bank_ready_other, per_bank_ready_ignore;
always @(*) begin
per_bank_ready_other = {NUM_BANKS{1'b1}};
per_bank_ready_ignore = {NUM_BANKS{1'b1}};
for (integer i = 0; i < NUM_REQS; i++) begin
per_bank_ready_ignore[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 1'b0;
end
for (integer i = 0; i < NUM_BANKS; i++) begin
for (integer j = 0; j < NUM_BANKS; j++) begin
if (i != j) begin
per_bank_ready_other[i] &= (per_bank_ready[j] | per_bank_ready_ignore[j]);
end
always @(*) begin
core_req_ready_r = 0;
for (integer j = 0; j < NUM_BANKS; ++j) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
if (core_req_valid[i] && (core_req_bid[i] == `BANK_BITS'(j))) begin
core_req_ready_r[i] = per_bank_ready[j];
break;
end
end
end
for (genvar i = 0; i < NUM_BANKS; i++) begin
for (genvar j = 0; j < NUM_REQS; j++) begin
assign per_bank_valid[i][j] = per_bank_valid_r[i][j] && per_bank_ready_other[i];
end
end
assign core_req_ready[0] = & (per_bank_ready | per_bank_ready_ignore);
end else begin
assign per_bank_valid = per_bank_valid_r;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign core_req_ready[i] = per_bank_ready[core_req_addr[i][`BANK_SELECT_ADDR_RNG]];
end
end
assign per_bank_valid = per_bank_valid_r;
assign per_bank_tid = per_bank_tid_r;
assign core_req_ready = core_req_ready_r;
end else begin
`UNUSED_VAR (core_req_valid)
`UNUSED_VAR (core_req_addr)
assign per_bank_valid = core_req_valid;
assign per_bank_tid = 0;
assign core_req_ready[0] = per_bank_ready;
end

View File

@@ -106,7 +106,6 @@ module VX_cache_core_rsp_merge #(
end
for (genvar i = 0; i < NUM_REQS; i++) begin
assign stall[i] = ~core_rsp_ready[i] && core_rsp_valid[i];
VX_generic_register #(

View File

@@ -57,7 +57,7 @@ module VX_miss_resrv #(
output wire [`MSHR_DATA_WIDTH-1:0] dequeue_data_st0,
input wire dequeue_st3
);
reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0];
`USE_FAST_BRAM reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0];
reg [MSHR_SIZE-1:0] valid_table;
reg [MSHR_SIZE-1:0] ready_table;

View File

@@ -68,7 +68,7 @@ module VX_snp_forwarder #(
wire sfq_acquire = snp_req_valid && snp_req_ready;
wire sfq_release = snp_rsp_valid_unqual && snp_rsp_ready_unqual;
VX_cam_buffer #(
VX_index_buffer #(
.DATAW (SRC_ADDR_WIDTH + 1 + TAG_IN_WIDTH),
.SIZE (SREQ_SIZE),
.FASTRAM (1)

View File

@@ -5,15 +5,15 @@ Generation context:
HardFP is enabled enabling set to true
Faithful rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_fdiv
The new component name is acl_s10_fdiv
Frequency 250MHz
Deployment FPGA Arria10
Estimated resources LUTs 539, DSPs 5, RAMBits 32768, RAMBlocks 3
The pipeline depth of the block is 15 cycle(s)
Deployment FPGA Stratix10
Estimated resources LUTs 681, DSPs 5, RAMBits 32768, RAMBlocks 3
The pipeline depth of the block is 25 cycle(s)
@@start
@name FPDiv@
@latency 15@
@LUT 539@
@latency 25@
@LUT 681@
@DSP 5@
@RAMBits 32768@
@RAMBlockUsage 3@
@@ -34,15 +34,15 @@ Generation context:
HardFP is enabled enabling set to true
Faithful rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_fsqrt
The new component name is acl_s10_fsqrt
Frequency 250MHz
Deployment FPGA Arria10
Estimated resources LUTs 271, DSPs 3, RAMBits 15872, RAMBlocks 3
The pipeline depth of the block is 10 cycle(s)
Deployment FPGA Stratix10
Estimated resources LUTs 349, DSPs 3, RAMBits 15872, RAMBlocks 3
The pipeline depth of the block is 17 cycle(s)
@@start
@name FPSqrt@
@latency 10@
@LUT 271@
@latency 17@
@LUT 349@
@DSP 3@
@RAMBits 15872@
@RAMBlockUsage 3@
@@ -62,15 +62,15 @@ Generation context:
HardFP is enabled enabling set to true
Faithful rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_ftoi
The new component name is acl_s10_ftoi
Frequency 250MHz
Deployment FPGA Arria10
Estimated resources LUTs 327, DSPs 0, RAMBits 0, RAMBlocks 0
Deployment FPGA Stratix10
Estimated resources LUTs 344, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 3 cycle(s)
@@start
@name FPToFXP@
@latency 3@
@LUT 327@
@LUT 344@
@DSP 0@
@RAMBits 0@
@RAMBlockUsage 0@
@@ -90,15 +90,15 @@ Generation context:
HardFP is enabled enabling set to true
Faithful rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_ftou
The new component name is acl_s10_ftou
Frequency 250MHz
Deployment FPGA Arria10
Estimated resources LUTs 287, DSPs 0, RAMBits 0, RAMBlocks 0
Deployment FPGA Stratix10
Estimated resources LUTs 272, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 3 cycle(s)
@@start
@name FPToFXP@
@latency 3@
@LUT 287@
@LUT 272@
@DSP 0@
@RAMBits 0@
@RAMBlockUsage 0@
@@ -118,15 +118,15 @@ Generation context:
HardFP is enabled enabling set to true
Faithful rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_itof
The new component name is acl_s10_itof
Frequency 250MHz
Deployment FPGA Arria10
Estimated resources LUTs 397, DSPs 0, RAMBits 0, RAMBlocks 0
Deployment FPGA Stratix10
Estimated resources LUTs 362, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 7 cycle(s)
@@start
@name FXPToFP@
@latency 7@
@LUT 397@
@LUT 362@
@DSP 0@
@RAMBits 0@
@RAMBlockUsage 0@
@@ -146,15 +146,15 @@ Generation context:
HardFP is enabled enabling set to true
Faithful rounding constraint detected
Will not generate valid and channel signals
The new component name is acl_utof
The new component name is acl_s10_utof
Frequency 300MHz
Deployment FPGA Arria10
Estimated resources LUTs 363, DSPs 0, RAMBits 0, RAMBlocks 0
Deployment FPGA Stratix10
Estimated resources LUTs 310, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 7 cycle(s)
@@start
@name FXPToFP@
@latency 7@
@LUT 363@
@LUT 310@
@DSP 0@
@RAMBits 0@
@RAMBlockUsage 0@

View File

@@ -2,7 +2,7 @@
CMD_POLY_EVAL_PATH=$QUARTUS_HOME/dspba/backend/linux64
OPTIONS="-target Arria10 -lang verilog -enableHardFP 1 -printMachineReadable -faithfulRounding -noChanValid -enable -speedgrade 2"
OPTIONS="-target Stratix10 -lang verilog -enableHardFP 1 -printMachineReadable -faithfulRounding -noChanValid -enable -speedgrade 2"
export LD_LIBRARY_PATH=$CMD_POLY_EVAL_PATH:$LD_LIBRARY_PATH
@@ -14,12 +14,12 @@ FBITS="f$(($EXP_BITS + $MAN_BITS + 1))"
echo Generating IP cores for $FBITS
{
$CMD -name acl_fdiv -frequency 250 FPDiv $EXP_BITS $MAN_BITS 0
$CMD -name acl_fsqrt -frequency 250 FPSqrt $EXP_BITS $MAN_BITS
$CMD -name acl_ftoi -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 1
$CMD -name acl_ftou -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 0
$CMD -name acl_itof -frequency 250 FXPToFP 32 0 1 $EXP_BITS $MAN_BITS
$CMD -name acl_utof -frequency 300 FXPToFP 32 0 0 $EXP_BITS $MAN_BITS
$CMD -name acl_s10_fdiv -frequency 250 FPDiv $EXP_BITS $MAN_BITS 0
$CMD -name acl_s10_fsqrt -frequency 250 FPSqrt $EXP_BITS $MAN_BITS
$CMD -name acl_s10_ftoi -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 1
$CMD -name acl_s10_ftou -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 0
$CMD -name acl_s10_itof -frequency 250 FXPToFP 32 0 1 $EXP_BITS $MAN_BITS
$CMD -name acl_s10_utof -frequency 300 FXPToFP 32 0 0 $EXP_BITS $MAN_BITS
} > acl_gen.log 2>&1
#cp $QUARTUS_HOME/dspba/backend/Libraries/sv/base/dspba_library_ver.sv .

View File

@@ -10,13 +10,13 @@ interface VX_cache_core_req_if #(
parameter CORE_TAG_ID_BITS = 0
) ();
wire [NUM_REQS-1:0] valid;
wire [`CORE_REQ_TAG_COUNT-1:0] rw;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] byteen;
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] addr;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag;
wire ready;
wire [NUM_REQS-1:0] valid;
wire [NUM_REQS-1:0] rw;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] byteen;
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] addr;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data;
wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] tag;
wire [NUM_REQS-1:0] ready;
endinterface

View File

@@ -1,6 +1,6 @@
`include "VX_platform.vh"
module VX_cam_buffer #(
module VX_index_buffer #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter FASTRAM = 0,
@@ -48,16 +48,18 @@ module VX_cam_buffer #(
always @(posedge clk) begin
if (reset) begin
free_slots <= {SIZE{1'b1}};
full_r <= 1'b0;
write_addr_r <= ADDRW'(1'b0);
free_slots <= {SIZE{1'b1}};
full_r <= 1'b0;
end else begin
if (release_slot) begin
assert(0 == free_slots[release_addr]) else $error("%t: releasing invalid slot at port %d", $time, release_addr);
end
free_slots <= free_slots_n;
write_addr_r <= free_index;
full_r <= ~free_valid;
if (acquire_slot || full_r) begin
write_addr_r <= free_index;
end
free_slots <= free_slots_n;
full_r <= ~free_valid;
end
end

View File

@@ -6,10 +6,12 @@ RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfa
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family
FAMILY = "Arria 10"
#DEVICE = 1SX280HN2F43E2VG
DEVICE = 10AX115N3F40E2SG
FAMILY = "Stratix 10"
DEVICE = 1SX280HN2F43E2VG
#FAMILY = "Arria 10"
#DEVICE = 10AX115N3F40E2SG
# Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1