allowing partial cache request submissions, io bus support broken

This commit is contained in:
Blaise Tine
2020-12-21 03:53:13 -08:00
parent 4bbd7bf408
commit 4b7d871d62
24 changed files with 342 additions and 968 deletions

View File

@@ -36,22 +36,7 @@ module VX_cluster #(
output wire [`L2SNP_TAG_WIDTH-1:0] snp_rsp_tag, output wire [`L2SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready, input wire snp_rsp_ready,
// I/O request // CSR Request
output wire [`NUM_THREADS-1:0] io_req_valid,
output wire io_req_rw,
output wire [`NUM_THREADS-1:0][3:0] io_req_byteen,
output wire [`NUM_THREADS-1:0][29:0] io_req_addr,
output wire [`NUM_THREADS-1:0][31:0] io_req_data,
output wire [`L2CORE_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
// I/O response
input wire io_rsp_valid,
input wire [31:0] io_rsp_data,
input wire [`L2CORE_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// CSR I/O Request
input wire csr_io_req_valid, input wire csr_io_req_valid,
input wire [`NC_BITS-1:0] csr_io_req_coreid, input wire [`NC_BITS-1:0] csr_io_req_coreid,
input wire [11:0] csr_io_req_addr, input wire [11:0] csr_io_req_addr,
@@ -59,7 +44,7 @@ module VX_cluster #(
input wire [31:0] csr_io_req_data, input wire [31:0] csr_io_req_data,
output wire csr_io_req_ready, output wire csr_io_req_ready,
// CSR I/O Response // CSR Response
output wire csr_io_rsp_valid, output wire csr_io_rsp_valid,
output wire [31:0] csr_io_rsp_data, output wire [31:0] csr_io_rsp_data,
input wire csr_io_rsp_ready, input wire csr_io_rsp_ready,
@@ -91,19 +76,6 @@ module VX_cluster #(
wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_rsp_tag; wire [`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] per_core_snp_rsp_tag;
wire [`NUM_CORES-1:0] per_core_snp_rsp_ready; wire [`NUM_CORES-1:0] per_core_snp_rsp_ready;
wire [`NUM_CORES-1:0][`NUM_THREADS-1:0] per_core_io_req_valid;
wire [`NUM_CORES-1:0] per_core_io_req_rw;
wire [`NUM_CORES-1:0][`NUM_THREADS-1:0][3:0] per_core_io_req_byteen;
wire [`NUM_CORES-1:0][`NUM_THREADS-1:0][29:0] per_core_io_req_addr;
wire [`NUM_CORES-1:0][`NUM_THREADS-1:0][31:0] per_core_io_req_data;
wire [`NUM_CORES-1:0][`DCORE_TAG_WIDTH-1:0] per_core_io_req_tag;
wire [`NUM_CORES-1:0] per_core_io_req_ready;
wire [`NUM_CORES-1:0] per_core_io_rsp_valid;
wire [`NUM_CORES-1:0][`DCORE_TAG_WIDTH-1:0] per_core_io_rsp_tag;
wire [`NUM_CORES-1:0][31:0] per_core_io_rsp_data;
wire [`NUM_CORES-1:0] per_core_io_rsp_ready;
wire [`NUM_CORES-1:0] per_core_csr_io_req_valid; wire [`NUM_CORES-1:0] per_core_csr_io_req_valid;
wire [`NUM_CORES-1:0][11:0] per_core_csr_io_req_addr; wire [`NUM_CORES-1:0][11:0] per_core_csr_io_req_addr;
wire [`NUM_CORES-1:0] per_core_csr_io_req_rw; wire [`NUM_CORES-1:0] per_core_csr_io_req_rw;
@@ -149,19 +121,6 @@ module VX_cluster #(
.snp_rsp_tag (per_core_snp_rsp_tag [i]), .snp_rsp_tag (per_core_snp_rsp_tag [i]),
.snp_rsp_ready (per_core_snp_rsp_ready [i]), .snp_rsp_ready (per_core_snp_rsp_ready [i]),
.io_req_valid (per_core_io_req_valid [i]),
.io_req_rw (per_core_io_req_rw [i]),
.io_req_byteen (per_core_io_req_byteen [i]),
.io_req_addr (per_core_io_req_addr [i]),
.io_req_data (per_core_io_req_data [i]),
.io_req_tag (per_core_io_req_tag [i]),
.io_req_ready (per_core_io_req_ready [i]),
.io_rsp_valid (per_core_io_rsp_valid [i]),
.io_rsp_data (per_core_io_rsp_data [i]),
.io_rsp_tag (per_core_io_rsp_tag [i]),
.io_rsp_ready (per_core_io_rsp_ready [i]),
.csr_io_req_valid (per_core_csr_io_req_valid[i]), .csr_io_req_valid (per_core_csr_io_req_valid[i]),
.csr_io_req_rw (per_core_csr_io_req_rw [i]), .csr_io_req_rw (per_core_csr_io_req_rw [i]),
.csr_io_req_addr (per_core_csr_io_req_addr [i]), .csr_io_req_addr (per_core_csr_io_req_addr [i]),
@@ -177,48 +136,6 @@ module VX_cluster #(
); );
end end
VX_databus_arb #(
.NUM_REQS (`NUM_CORES),
.WORD_SIZE (4),
.TAG_IN_WIDTH (`DCORE_TAG_WIDTH),
.TAG_OUT_WIDTH (`L2CORE_TAG_WIDTH),
.BUFFERED_REQ (`NUM_CORES >= 4),
.BUFFERED_RSP (1)
) io_arb (
.clk (clk),
.reset (reset),
// input requests
.req_valid_in (per_core_io_req_valid),
.req_rw_in (per_core_io_req_rw),
.req_byteen_in (per_core_io_req_byteen),
.req_addr_in (per_core_io_req_addr),
.req_data_in (per_core_io_req_data),
.req_tag_in (per_core_io_req_tag),
.req_ready_in (per_core_io_req_ready),
// output request
.req_valid_out (io_req_valid),
.req_rw_out (io_req_rw),
.req_byteen_out (io_req_byteen),
.req_addr_out (io_req_addr),
.req_data_out (io_req_data),
.req_tag_out (io_req_tag),
.req_ready_out (io_req_ready),
// input response
.rsp_valid_in (io_rsp_valid),
.rsp_tag_in (io_rsp_tag),
.rsp_data_in (io_rsp_data),
.rsp_ready_in (io_rsp_ready),
// output responses
.rsp_valid_out (per_core_io_rsp_valid),
.rsp_data_out (per_core_io_rsp_data),
.rsp_tag_out (per_core_io_rsp_tag),
.rsp_ready_out (per_core_io_rsp_ready)
);
VX_csr_io_arb #( VX_csr_io_arb #(
.NUM_REQS (`NUM_CORES), .NUM_REQS (`NUM_CORES),
.DATA_WIDTH (32), .DATA_WIDTH (32),

View File

@@ -289,7 +289,7 @@
// Size of cache in bytes // Size of cache in bytes
`ifndef DCACHE_SIZE `ifndef DCACHE_SIZE
`define DCACHE_SIZE 4096 `define DCACHE_SIZE 8192
`endif `endif
// Number of banks // Number of banks
@@ -336,7 +336,7 @@
// Size of cache in bytes // Size of cache in bytes
`ifndef SMEM_SIZE `ifndef SMEM_SIZE
`define SMEM_SIZE 2048 `define SMEM_SIZE 4096
`endif `endif
// Number of banks // Number of banks

View File

@@ -35,21 +35,6 @@ module VX_core #(
output wire [`DSNP_TAG_WIDTH-1:0] snp_rsp_tag, output wire [`DSNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready, input wire snp_rsp_ready,
// I/O request
output wire [`NUM_THREADS-1:0] io_req_valid,
output wire io_req_rw,
output wire [`NUM_THREADS-1:0][3:0] io_req_byteen,
output wire [`NUM_THREADS-1:0][29:0] io_req_addr,
output wire [`NUM_THREADS-1:0][31:0] io_req_data,
output wire [`DCORE_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
// I/O response
input wire io_rsp_valid,
input wire [31:0] io_rsp_data,
input wire [`DCORE_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// CSR I/O request // CSR I/O request
input wire csr_io_req_valid, input wire csr_io_req_valid,
input wire [11:0] csr_io_req_addr, input wire [11:0] csr_io_req_addr,
@@ -117,35 +102,6 @@ module VX_core #(
//-- //--
VX_cache_core_req_if #(
.NUM_REQS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS)
) io_req_if();
VX_cache_core_rsp_if #(
.NUM_REQS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE),
.CORE_TAG_WIDTH(`DCORE_TAG_WIDTH),
.CORE_TAG_ID_BITS(`DCORE_TAG_ID_BITS)
) io_rsp_if();
assign io_req_valid = io_req_if.valid;
assign io_req_rw = io_req_if.rw;
assign io_req_byteen = io_req_if.byteen;
assign io_req_addr = io_req_if.addr;
assign io_req_data = io_req_if.data;
assign io_req_tag = io_req_if.tag;
assign io_req_if.ready = io_req_ready;
assign io_rsp_if.valid = {{(`NUM_THREADS-1){1'b0}}, io_rsp_valid};
assign io_rsp_if.data[0] = io_rsp_data;
assign io_rsp_if.tag = io_rsp_tag;
assign io_rsp_ready = io_rsp_if.ready;
//--
VX_cache_core_req_if #( VX_cache_core_req_if #(
.NUM_REQS(`DNUM_REQUESTS), .NUM_REQS(`DNUM_REQUESTS),
.WORD_SIZE(`DWORD_SIZE), .WORD_SIZE(`DWORD_SIZE),
@@ -259,11 +215,7 @@ module VX_core #(
// DRAM // DRAM
.dram_req_if (dram_req_if), .dram_req_if (dram_req_if),
.dram_rsp_if (dram_rsp_if), .dram_rsp_if (dram_rsp_if)
// I/O
.io_req_if (io_req_if),
.io_rsp_if (io_rsp_if)
); );
endmodule endmodule

View File

@@ -1,125 +1,107 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_databus_arb #( module VX_databus_arb (
parameter NUM_REQS = 1, input wire clk,
parameter WORD_SIZE = 1, input wire reset,
parameter TAG_IN_WIDTH = 1,
parameter TAG_OUT_WIDTH = 1,
parameter BUFFERED_REQ = 0,
parameter BUFFERED_RSP = 0,
parameter WORD_WIDTH = WORD_SIZE * 8, // input request
parameter ADDR_WIDTH = 32 - `CLOG2(WORD_SIZE), VX_cache_core_req_if core_req_if,
parameter LOG_NUM_REQS = `CLOG2(NUM_REQS)
) (
input wire clk,
input wire reset,
// input requests // output requests
input wire [NUM_REQS-1:0][`NUM_THREADS-1:0] req_valid_in, VX_cache_core_req_if cache_req_if,
input wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] req_tag_in, VX_cache_core_req_if smem_req_if,
input wire [NUM_REQS-1:0][`NUM_THREADS-1:0][ADDR_WIDTH-1:0] req_addr_in,
input wire [NUM_REQS-1:0] req_rw_in,
input wire [NUM_REQS-1:0][`NUM_THREADS-1:0][WORD_SIZE-1:0] req_byteen_in,
input wire [NUM_REQS-1:0][`NUM_THREADS-1:0][WORD_WIDTH-1:0] req_data_in,
output wire [NUM_REQS-1:0] req_ready_in,
// output request // input responses
output wire [`NUM_THREADS-1:0] req_valid_out, VX_cache_core_rsp_if cache_rsp_if,
output wire [TAG_OUT_WIDTH-1:0] req_tag_out, VX_cache_core_rsp_if smem_rsp_if,
output wire [`NUM_THREADS-1:0][ADDR_WIDTH-1:0] req_addr_out,
output wire req_rw_out,
output wire [`NUM_THREADS-1:0][WORD_SIZE-1:0] req_byteen_out,
output wire [`NUM_THREADS-1:0][WORD_WIDTH-1:0] req_data_out,
input wire req_ready_out,
// input response // output response
input wire rsp_valid_in, VX_cache_core_rsp_if core_rsp_if
input wire [TAG_OUT_WIDTH-1:0] rsp_tag_in,
input wire [WORD_WIDTH-1:0] rsp_data_in,
output wire rsp_ready_in,
// output responses
output wire [NUM_REQS-1:0] rsp_valid_out,
output wire [NUM_REQS-1:0][TAG_IN_WIDTH-1:0] rsp_tag_out,
output wire [NUM_REQS-1:0][WORD_WIDTH-1:0] rsp_data_out,
input wire [NUM_REQS-1:0] rsp_ready_out
); );
localparam REQ_DATAW = `NUM_THREADS + TAG_OUT_WIDTH + (`NUM_THREADS * ADDR_WIDTH) + 1 + (`NUM_THREADS * WORD_SIZE) + (`NUM_THREADS * WORD_WIDTH); localparam REQ_ADDRW = 32 - `CLOG2(`DWORD_SIZE);
localparam RSP_DATAW = TAG_IN_WIDTH + WORD_WIDTH; localparam REQ_DATAW = REQ_ADDRW + 1 + `DWORD_SIZE + (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
if (NUM_REQS > 1) begin //
// handle requests
//
wire [NUM_REQS-1:0] valids; for (genvar i = 0; i < `NUM_THREADS; ++i) begin
wire [NUM_REQS-1:0][REQ_DATAW-1:0] data_in;
wire [`NUM_THREADS-1:0] req_tmask_out;
wire req_valid_out_unqual;
for (genvar i = 0; i < NUM_REQS; i++) begin wire cache_req_ready_in;
assign valids[i] = (| req_valid_in[i]); wire smem_req_ready_in;
assign data_in[i] = {req_valid_in[i], {req_tag_in[i], LOG_NUM_REQS'(i)}, req_addr_in[i], req_rw_in[i], req_byteen_in[i], req_data_in[i]};
end
VX_stream_arbiter #( // select shared memory bus
.NUM_REQS (NUM_REQS), wire is_smem_addr = core_req_if.valid[i] && `SM_ENABLE
.DATAW (REQ_DATAW), && (core_req_if.addr[i] >= REQ_ADDRW'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> 2))
.BUFFERED (BUFFERED_REQ) && (core_req_if.addr[i] < REQ_ADDRW'(`SHARED_MEM_BASE_ADDR >> 2));
) req_arb (
VX_skid_buffer #(
.DATAW (REQ_DATAW),
.PASSTHRU (1)
) cache_out_buffer (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_in (valids), .valid_in (core_req_if.valid[i] && !is_smem_addr),
.data_in (data_in), .data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.ready_in (req_ready_in), .ready_in (cache_req_ready_in),
.valid_out (req_valid_out_unqual), .valid_out (cache_req_if.valid[i]),
.data_out ({req_tmask_out, req_tag_out, req_addr_out, req_rw_out, req_byteen_out, req_data_out}), .data_out ({cache_req_if.addr[i], cache_req_if.rw[i], cache_req_if.byteen[i], cache_req_if.data[i], cache_req_if.tag[i]}),
.ready_out (req_ready_out) .ready_out (cache_req_if.ready[i])
); );
assign req_valid_out = {`NUM_THREADS{req_valid_out_unqual}} & req_tmask_out; VX_skid_buffer #(
.DATAW (REQ_DATAW),
/////////////////////////////////////////////////////////////////////// .PASSTHRU (1)
) smem_out_buffer (
wire [LOG_NUM_REQS-1:0] rsp_sel = rsp_tag_in[LOG_NUM_REQS-1:0];
wire [NUM_REQS-1:0][RSP_DATAW-1:0] rsp_merged_data_out;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign {rsp_tag_out[i], rsp_data_out[i]} = rsp_merged_data_out[i];
end
VX_stream_demux #(
.NUM_REQS (NUM_REQS),
.DATAW (RSP_DATAW),
.BUFFERED (BUFFERED_RSP)
) rsp_demux (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.sel (rsp_sel), .valid_in (core_req_if.valid[i] && is_smem_addr),
.valid_in (rsp_valid_in), .data_in ({core_req_if.addr[i], core_req_if.rw[i], core_req_if.byteen[i], core_req_if.data[i], core_req_if.tag[i]}),
.data_in ({rsp_tag_in[LOG_NUM_REQS +: TAG_IN_WIDTH], rsp_data_in}), .ready_in (smem_req_ready_in),
.ready_in (rsp_ready_in), .valid_out (smem_req_if.valid[i]),
.valid_out (rsp_valid_out), .data_out ({smem_req_if.addr[i], smem_req_if.rw[i], smem_req_if.byteen[i], smem_req_if.data[i], smem_req_if.tag[i]}),
.data_out (rsp_merged_data_out), .ready_out (smem_req_if.ready[i])
.ready_out (rsp_ready_out)
); );
end else begin assign core_req_if.ready[i] = is_smem_addr ? smem_req_ready_in : cache_req_ready_in;
`UNUSED_VAR (clk)
`UNUSED_VAR (reset)
assign req_valid_out = req_valid_in;
assign req_tag_out = req_tag_in;
assign req_addr_out = req_addr_in;
assign req_rw_out = req_rw_in;
assign req_byteen_out = req_byteen_in;
assign req_data_out = req_data_in;
assign req_ready_in = req_ready_out;
assign rsp_valid_out = rsp_valid_in;
assign rsp_tag_out = rsp_tag_in;
assign rsp_data_out = rsp_data_in;
assign rsp_ready_in = rsp_ready_out;
end end
//
// handle responses
//
wire [1:0][RSP_DATAW-1:0] rsp_data_in;
wire [1:0] rsp_valid_in;
wire [1:0] rsp_ready_in;
wire core_rsp_valid;
wire [`NUM_THREADS-1:0] core_rsp_tmask;
assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, cache_rsp_if.tag};
assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, smem_rsp_if.tag};
assign rsp_valid_in[0] = (| cache_rsp_if.valid);
assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE;
VX_stream_arbiter #(
.NUM_REQS (2),
.DATAW (RSP_DATAW),
.BUFFERED (0)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in),
.data_in (rsp_data_in),
.ready_in (rsp_ready_in),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tmask, core_rsp_if.data, core_rsp_if.tag}),
.ready_out (core_rsp_if.ready)
);
assign cache_rsp_if.ready = rsp_ready_in[0];
assign smem_rsp_if.ready = rsp_ready_in[1];
assign core_rsp_if.valid = core_rsp_tmask & {`NUM_THREADS{core_rsp_valid}};
endmodule endmodule

View File

@@ -1,156 +0,0 @@
`include "VX_define.vh"
module VX_dcache_arb (
input wire clk,
input wire reset,
// input request
VX_cache_core_req_if core_req_if,
// output requests
VX_cache_core_req_if cache_req_if,
VX_cache_core_req_if smem_req_if,
VX_cache_core_req_if io_req_if,
// input responses
VX_cache_core_rsp_if cache_rsp_if,
VX_cache_core_rsp_if smem_rsp_if,
VX_cache_core_rsp_if io_rsp_if,
// output response
VX_cache_core_rsp_if core_rsp_if
);
localparam REQ_ADDRW = 32 - `CLOG2(`DWORD_SIZE);
localparam REQ_DATAW = `NUM_THREADS + 1 + `NUM_THREADS * `DWORD_SIZE + `NUM_THREADS * REQ_ADDRW + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
localparam RSP_DATAW = `NUM_THREADS + `NUM_THREADS * (`DWORD_SIZE*8) + `DCORE_TAG_WIDTH;
//
// select request
//
// select shared memory bus
wire is_smem_addr = core_req_if.valid[0] && `SM_ENABLE
&& (core_req_if.addr[0] >= REQ_ADDRW'((`SHARED_MEM_BASE_ADDR - `SMEM_SIZE) >> 2))
&& (core_req_if.addr[0] < REQ_ADDRW'(`SHARED_MEM_BASE_ADDR >> 2));
// select io bus
wire is_io_addr = core_req_if.valid[0]
&& (core_req_if.addr[0] >= REQ_ADDRW'(`IO_BUS_BASE_ADDR >> 2));
wire cache_req_valid_out;
wire [`NUM_THREADS-1:0] cache_req_tmask;
wire cache_req_ready_in;
wire smem_req_valid_out;
wire [`NUM_THREADS-1:0] smem_req_tmask;
wire smem_req_ready_in;
wire io_req_valid_out;
wire [`NUM_THREADS-1:0] io_req_tmask;
wire io_req_ready_in;
reg [2:0] req_select;
reg req_ready;
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) cache_out_buffer (
.clk (clk),
.reset (reset),
.valid_in (req_select[0]),
.data_in ({core_req_if.valid, core_req_if.addr, core_req_if.rw, core_req_if.byteen, core_req_if.data, core_req_if.tag}),
.ready_in (cache_req_ready_in),
.valid_out (cache_req_valid_out),
.data_out ({cache_req_tmask, cache_req_if.addr, cache_req_if.rw, cache_req_if.byteen, cache_req_if.data, cache_req_if.tag}),
.ready_out (cache_req_if.ready)
);
assign cache_req_if.valid = cache_req_tmask & {`NUM_THREADS{cache_req_valid_out}};
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) smem_out_buffer (
.clk (clk),
.reset (reset),
.valid_in (req_select[1]),
.data_in ({core_req_if.valid, core_req_if.addr, core_req_if.rw, core_req_if.byteen, core_req_if.data, core_req_if.tag}),
.ready_in (smem_req_ready_in),
.valid_out (smem_req_valid_out),
.data_out ({smem_req_tmask, smem_req_if.addr, smem_req_if.rw, smem_req_if.byteen, smem_req_if.data, smem_req_if.tag}),
.ready_out (smem_req_if.ready)
);
assign smem_req_if.valid = smem_req_tmask & {`NUM_THREADS{smem_req_valid_out}};
VX_skid_buffer #(
.DATAW (REQ_DATAW)
) io_out_buffer (
.clk (clk),
.reset (reset),
.valid_in (req_select[2]),
.data_in ({core_req_if.valid, core_req_if.addr, core_req_if.rw, core_req_if.byteen, core_req_if.data, core_req_if.tag}),
.ready_in (io_req_ready_in),
.valid_out (io_req_valid_out),
.data_out ({io_req_tmask, io_req_if.addr, io_req_if.rw, io_req_if.byteen, io_req_if.data, io_req_if.tag}),
.ready_out (io_req_if.ready)
);
assign io_req_if.valid = io_req_tmask & {`NUM_THREADS{io_req_valid_out}};
always @(*) begin
req_select = 0;
if (is_smem_addr) begin
req_select[1] = 1;
req_ready = smem_req_ready_in;
end else if (is_io_addr) begin
req_select[2] = 1;
req_ready = io_req_ready_in;
end else begin
req_select[0] = 1;
req_ready = cache_req_ready_in;
end
end
assign core_req_if.ready = req_ready;
//
// select response
//
wire [2:0][RSP_DATAW-1:0] rsp_data_in;
wire [2:0] rsp_valid_in;
wire [2:0] rsp_ready_in;
wire core_rsp_valid;
wire [`NUM_THREADS-1:0] core_rsp_tmask;
assign rsp_data_in[0] = {cache_rsp_if.valid, cache_rsp_if.data, cache_rsp_if.tag};
assign rsp_data_in[1] = {smem_rsp_if.valid, smem_rsp_if.data, smem_rsp_if.tag};
assign rsp_data_in[2] = {io_rsp_if.valid, io_rsp_if.data, io_rsp_if.tag};
assign rsp_valid_in[0] = (| cache_rsp_if.valid);
assign rsp_valid_in[1] = (| smem_rsp_if.valid) & `SM_ENABLE;
assign rsp_valid_in[2] = (| io_rsp_if.valid);
VX_stream_arbiter #(
.NUM_REQS (3),
.DATAW (RSP_DATAW),
.BUFFERED (1)
) rsp_arb (
.clk (clk),
.reset (reset),
.valid_in (rsp_valid_in),
.data_in (rsp_data_in),
.ready_in (rsp_ready_in),
.valid_out (core_rsp_valid),
.data_out ({core_rsp_tmask, core_rsp_if.data, core_rsp_if.tag}),
.ready_out (core_rsp_if.ready)
);
assign cache_rsp_if.ready = rsp_ready_in[0];
assign smem_rsp_if.ready = rsp_ready_in[1];
assign io_rsp_if.ready = rsp_ready_in[2];
assign core_rsp_if.valid = core_rsp_tmask & {`NUM_THREADS{core_rsp_valid}};
endmodule

View File

@@ -39,7 +39,7 @@ module VX_fpu_unit #(
wire fpuq_push = fpu_req_if.valid && fpu_req_if.ready; wire fpuq_push = fpu_req_if.valid && fpu_req_if.ready;
wire fpuq_pop = valid_out && ready_out; wire fpuq_pop = valid_out && ready_out;
VX_cam_buffer #( VX_index_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1), .DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
.SIZE (`FPUQ_SIZE), .SIZE (`FPUQ_SIZE),
.FASTRAM (1) .FASTRAM (1)

View File

@@ -72,7 +72,8 @@ module VX_lsu_unit #(
reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags; reg [`LSUQ_SIZE-1:0][`DCORE_TAG_WIDTH-1:0] pending_tags;
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
wire stall_in; wire ready_in;
wire stall_in = ~ready_in & req_valid;
VX_generic_register #( VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))), .N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32))),
@@ -86,79 +87,98 @@ module VX_lsu_unit #(
.data_out ({req_valid, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data}) .data_out ({req_valid, req_wid, req_tmask, req_pc, req_rw, req_rd, req_wb, req_address, req_sext, req_addr, req_offset, req_byteen, req_data})
); );
// Can accept new request?
assign lsu_req_if.ready = ~stall_in;
wire [`NW_BITS-1:0] rsp_wid; wire [`NW_BITS-1:0] rsp_wid;
wire [31:0] rsp_pc; wire [31:0] rsp_pc;
wire [`NR_BITS-1:0] rsp_rd; wire [`NR_BITS-1:0] rsp_rd;
wire rsp_wb; wire rsp_wb;
wire [`NUM_THREADS-1:0][1:0] rsp_offset; wire [`NUM_THREADS-1:0][1:0] rsp_offset;
wire [1:0] rsp_sext; wire [1:0] rsp_sext;
reg [`NUM_THREADS-1:0][31:0] rsp_data; reg [`NUM_THREADS-1:0][31:0] rsp_data;
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask;
reg [`NUM_THREADS-1:0] req_sent_mask, rsp_rem_mask_n;
wire req_sent_all;
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] mem_rsp_mask; wire [`DCORE_TAG_ID_BITS-1:0] mbuf_waddr, mbuf_raddr;
wire mbuf_full;
wire [`DCORE_TAG_ID_BITS-1:0] req_tag, rsp_tag; wire mbuf_push = (| dcache_req_if.valid) && (| dcache_req_if.ready)
wire lsuq_full; && (0 == req_sent_mask) // first submission only
&& (0 == req_rw); // loads only
wire lsuq_push = (| dcache_req_if.valid) && dcache_req_if.ready wire mbuf_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
&& (0 == req_rw); // loads only
wire lsuq_pop_part = (| dcache_rsp_if.valid) && dcache_rsp_if.ready; wire mbuf_pop = mbuf_pop_part && (0 == rsp_rem_mask_n);
assign rsp_tag = dcache_rsp_if.tag[0][`DCORE_TAG_ID_BITS-1:0]; assign mbuf_raddr = dcache_rsp_if.tag[0][`DCORE_TAG_ID_BITS-1:0];
wire [`NUM_THREADS-1:0] mem_rsp_mask_n = mem_rsp_mask[rsp_tag] & ~dcache_rsp_if.valid; VX_index_buffer #(
wire lsuq_pop = lsuq_pop_part && (0 == mem_rsp_mask_n);
VX_cam_buffer #(
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2), .DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2),
.SIZE (`LSUQ_SIZE), .SIZE (`LSUQ_SIZE),
.FASTRAM (1) .FASTRAM (1)
) req_metadata_buf ( ) req_metadata_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.write_addr (req_tag), .write_addr (mbuf_waddr),
.acquire_slot (lsuq_push), .acquire_slot (mbuf_push),
.read_addr (rsp_tag), .read_addr (mbuf_raddr),
.write_data ({req_wid, req_pc, req_rd, req_wb, req_offset, req_sext}), .write_data ({req_wid, req_pc, req_rd, req_wb, req_offset, req_sext}),
.read_data ({rsp_wid, rsp_pc, rsp_rd, rsp_wb, rsp_offset, rsp_sext}), .read_data ({rsp_wid, rsp_pc, rsp_rd, rsp_wb, rsp_offset, rsp_sext}),
.release_addr (rsp_tag), .release_addr (mbuf_raddr),
.release_slot (lsuq_pop), .release_slot (mbuf_pop),
.full (lsuq_full) .full (mbuf_full)
); );
assign req_sent_all = ((dcache_req_if.ready | req_sent_mask) & req_tmask) == req_tmask;
always @(posedge clk) begin always @(posedge clk) begin
if (lsuq_push) begin if (reset) begin
mem_rsp_mask[req_tag] <= req_tmask; req_sent_mask <= 0;
pending_tags[req_tag] <= dcache_req_if.tag; end else begin
end if (req_sent_all)
if (lsuq_pop_part) begin req_sent_mask <= 0;
mem_rsp_mask[rsp_tag] <= mem_rsp_mask_n; else
req_sent_mask <= req_sent_mask | (dcache_req_if.valid & dcache_req_if.ready);
end end
end end
wire load_req_stall = req_valid && !req_rw && lsuq_full; // need to hold the acquired tag index until the full request is submitted
wire store_req_stall = req_valid && req_rw && !st_commit_if.ready; reg [`DCORE_TAG_ID_BITS-1:0] req_tag_hold;
wire [`DCORE_TAG_ID_BITS-1:0] req_tag = (0 == req_sent_mask) ? mbuf_waddr : req_tag_hold;
always @(posedge clk) begin
if (mbuf_push)
req_tag_hold <= mbuf_waddr;
end
assign rsp_rem_mask_n = rsp_rem_mask[mbuf_raddr] & ~dcache_rsp_if.valid;
always @(posedge clk) begin
if (mbuf_push) begin
rsp_rem_mask[mbuf_waddr] <= req_tmask;
pending_tags[mbuf_waddr] <= dcache_req_if.tag[0];
end
if (mbuf_pop_part) begin
rsp_rem_mask[mbuf_raddr] <= rsp_rem_mask_n;
end
end
wire req_ready_dep = (!req_rw && !mbuf_full) || (req_rw && st_commit_if.ready);
// Core Request // Core Request
assign dcache_req_if.valid = {`NUM_THREADS{req_valid && !load_req_stall && !store_req_stall}} & req_tmask; assign dcache_req_if.valid = {`NUM_THREADS{req_valid && req_ready_dep}} & req_tmask & ~req_sent_mask;
assign dcache_req_if.rw = req_rw; assign dcache_req_if.rw = {`NUM_THREADS{req_rw}};
assign dcache_req_if.byteen = req_byteen; assign dcache_req_if.byteen = req_byteen;
assign dcache_req_if.addr = req_addr; assign dcache_req_if.addr = req_addr;
assign dcache_req_if.data = req_data; assign dcache_req_if.data = req_data;
`ifdef DBG_CACHE_REQ_INFO `ifdef DBG_CACHE_REQ_INFO
assign dcache_req_if.tag = {req_pc, req_rd, req_wid, req_tag}; assign dcache_req_if.tag = {`NUM_THREADS{{req_pc, req_rd, req_wid, req_tag}}};
`else `else
assign dcache_req_if.tag = req_tag; assign dcache_req_if.tag = {`NUM_THREADS{req_tag}};
`endif `endif
assign stall_in = ~dcache_req_if.ready assign ready_in = req_ready_dep && req_sent_all;
|| load_req_stall
|| store_req_stall;
// Can accept new request?
assign lsu_req_if.ready = ~stall_in;
// Core Response // Core Response
for (genvar i = 0; i < `NUM_THREADS; i++) begin for (genvar i = 0; i < `NUM_THREADS; i++) begin
@@ -174,7 +194,7 @@ module VX_lsu_unit #(
// send store commit // send store commit
wire is_store_rsp = req_valid && req_rw && dcache_req_if.ready; wire is_store_rsp = req_valid && req_rw && req_sent_all;
assign st_commit_if.valid = is_store_rsp; assign st_commit_if.valid = is_store_rsp;
assign st_commit_if.wid = req_wid; assign st_commit_if.wid = req_wid;
@@ -206,7 +226,7 @@ module VX_lsu_unit #(
assign dcache_rsp_if.ready = ~load_rsp_stall; assign dcache_rsp_if.ready = ~load_rsp_stall;
// scope registration // scope registration
`SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & {`NUM_THREADS{dcache_req_if.ready}}); `SCOPE_ASSIGN (dcache_req_fire, dcache_req_if.valid & dcache_req_if.ready);
`SCOPE_ASSIGN (dcache_req_wid, req_wid); `SCOPE_ASSIGN (dcache_req_wid, req_wid);
`SCOPE_ASSIGN (dcache_req_pc, req_pc); `SCOPE_ASSIGN (dcache_req_pc, req_pc);
`SCOPE_ASSIGN (dcache_req_addr, req_address); `SCOPE_ASSIGN (dcache_req_addr, req_address);
@@ -216,23 +236,23 @@ module VX_lsu_unit #(
`SCOPE_ASSIGN (dcache_req_tag, req_tag); `SCOPE_ASSIGN (dcache_req_tag, req_tag);
`SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.valid & {`NUM_THREADS{dcache_rsp_if.ready}}); `SCOPE_ASSIGN (dcache_rsp_fire, dcache_rsp_if.valid & {`NUM_THREADS{dcache_rsp_if.ready}});
`SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data); `SCOPE_ASSIGN (dcache_rsp_data, dcache_rsp_if.data);
`SCOPE_ASSIGN (dcache_rsp_tag, rsp_tag); `SCOPE_ASSIGN (dcache_rsp_tag, mbuf_raddr);
`ifdef DBG_PRINT_CORE_DCACHE `ifdef DBG_PRINT_CORE_DCACHE
always @(posedge clk) begin always @(posedge clk) begin
if ((| dcache_req_if.valid) && dcache_req_if.ready) begin if ((| dcache_req_if.valid) && (|dcache_req_if.ready)) begin
if (dcache_req_if.rw) if (dcache_req_if.rw[0])
$display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h", $display("%t: D$%0d Wr Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, data=%0h",
$time, CORE_ID, req_wid, req_pc, dcache_req_if.valid, req_address, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data); $time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_address, dcache_req_if.tag, dcache_req_if.byteen, dcache_req_if.data);
else else
$display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d", $display("%t: D$%0d Rd Req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, byteen=%0h, rd=%0d",
$time, CORE_ID, req_wid, req_pc, dcache_req_if.valid, req_address, dcache_req_if.tag, dcache_req_if.byteen, req_rd); $time, CORE_ID, req_wid, req_pc, (dcache_req_if.valid & dcache_req_if.ready), req_address, dcache_req_if.tag, dcache_req_if.byteen, req_rd);
end end
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
$display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h", $display("%t: D$%0d Rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
$time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data); $time, CORE_ID, dcache_rsp_if.valid, rsp_wid, rsp_pc, dcache_rsp_if.tag, rsp_rd, dcache_rsp_if.data);
end end
if (lsuq_full) begin if (mbuf_full) begin
$write("%t: D$%0d queue-full:", $time, CORE_ID); $write("%t: D$%0d queue-full:", $time, CORE_ID);
for (integer j = 0; j < `LSUQ_SIZE; j++) begin for (integer j = 0; j < `LSUQ_SIZE; j++) begin
$write(" tag%0d=%0h", j, pending_tags[j]); $write(" tag%0d=%0h", j, pending_tags[j]);

View File

@@ -26,11 +26,7 @@ module VX_mem_unit # (
// DRAM // DRAM
VX_cache_dram_req_if dram_req_if, VX_cache_dram_req_if dram_req_if,
VX_cache_dram_rsp_if dram_rsp_if, VX_cache_dram_rsp_if dram_rsp_if
// I/O
VX_cache_core_req_if io_req_if,
VX_cache_core_rsp_if io_rsp_if
); );
`ifdef PERF_ENABLE `ifdef PERF_ENABLE
@@ -76,19 +72,17 @@ module VX_mem_unit # (
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS) .CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS)
) smem_rsp_if(); ) smem_rsp_if();
VX_dcache_arb dcache_arb ( VX_databus_arb databus_arb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.core_req_if (core_dcache_req_if), .core_req_if (core_dcache_req_if),
.cache_req_if (dcache_req_if), .cache_req_if (dcache_req_if),
.smem_req_if (smem_req_if), .smem_req_if (smem_req_if),
.io_req_if (io_req_if),
.cache_rsp_if (dcache_rsp_if), .cache_rsp_if (dcache_rsp_if),
.smem_rsp_if (smem_rsp_if), .smem_rsp_if (smem_rsp_if),
.io_rsp_if (io_rsp_if), .core_rsp_if (core_dcache_rsp_if)
.core_rsp_if (core_dcache_rsp_if)
); );
VX_cache #( VX_cache #(

View File

@@ -31,7 +31,7 @@ module VX_mul_unit #(
wire mulq_push = mul_req_if.valid && mul_req_if.ready; wire mulq_push = mul_req_if.valid && mul_req_if.ready;
wire mulq_pop = valid_out && ready_out; wire mulq_pop = valid_out && ready_out;
VX_cam_buffer #( VX_index_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1), .DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
.SIZE (`MULQ_SIZE), .SIZE (`MULQ_SIZE),
.FASTRAM (1) .FASTRAM (1)

View File

@@ -11,12 +11,12 @@ module VX_pipeline #(
// Dcache core request // Dcache core request
output wire [`NUM_THREADS-1:0] dcache_req_valid, output wire [`NUM_THREADS-1:0] dcache_req_valid,
output wire dcache_req_rw, output wire [`NUM_THREADS-1:0] dcache_req_rw,
output wire [`NUM_THREADS-1:0][3:0] dcache_req_byteen, output wire [`NUM_THREADS-1:0][3:0] dcache_req_byteen,
output wire [`NUM_THREADS-1:0][29:0] dcache_req_addr, output wire [`NUM_THREADS-1:0][29:0] dcache_req_addr,
output wire [`NUM_THREADS-1:0][31:0] dcache_req_data, output wire [`NUM_THREADS-1:0][31:0] dcache_req_data,
output wire [`DCORE_TAG_WIDTH-1:0] dcache_req_tag, output wire [`NUM_THREADS-1:0][`DCORE_TAG_WIDTH-1:0] dcache_req_tag,
input wire dcache_req_ready, input wire [`NUM_THREADS-1:0] dcache_req_ready,
// Dcache core reponse // Dcache core reponse
input wire [`NUM_THREADS-1:0] dcache_rsp_valid, input wire [`NUM_THREADS-1:0] dcache_rsp_valid,

View File

@@ -34,22 +34,7 @@ module Vortex (
output wire [`VX_SNP_TAG_WIDTH-1:0] snp_rsp_tag, output wire [`VX_SNP_TAG_WIDTH-1:0] snp_rsp_tag,
input wire snp_rsp_ready, input wire snp_rsp_ready,
// I/O request // CSR Request
output wire [`NUM_THREADS-1:0] io_req_valid,
output wire io_req_rw,
output wire [`NUM_THREADS-1:0][3:0] io_req_byteen,
output wire [`NUM_THREADS-1:0][29:0] io_req_addr,
output wire [`NUM_THREADS-1:0][31:0] io_req_data,
output wire [`VX_CORE_TAG_WIDTH-1:0] io_req_tag,
input wire io_req_ready,
// I/O response
input wire io_rsp_valid,
input wire [31:0] io_rsp_data,
input wire [`VX_CORE_TAG_WIDTH-1:0] io_rsp_tag,
output wire io_rsp_ready,
// CSR I/O Request
input wire csr_io_req_valid, input wire csr_io_req_valid,
input wire [`VX_CSR_ID_WIDTH-1:0] csr_io_req_coreid, input wire [`VX_CSR_ID_WIDTH-1:0] csr_io_req_coreid,
input wire [11:0] csr_io_req_addr, input wire [11:0] csr_io_req_addr,
@@ -57,7 +42,7 @@ module Vortex (
input wire [31:0] csr_io_req_data, input wire [31:0] csr_io_req_data,
output wire csr_io_req_ready, output wire csr_io_req_ready,
// CSR I/O Response // CSR Response
output wire csr_io_rsp_valid, output wire csr_io_rsp_valid,
output wire [31:0] csr_io_rsp_data, output wire [31:0] csr_io_rsp_data,
input wire csr_io_rsp_ready, input wire csr_io_rsp_ready,
@@ -89,19 +74,6 @@ module Vortex (
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag; wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] per_cluster_snp_rsp_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready; wire [`NUM_CLUSTERS-1:0] per_cluster_snp_rsp_ready;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0] per_cluster_io_req_valid;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_rw;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][3:0] per_cluster_io_req_byteen;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][29:0] per_cluster_io_req_addr;
wire [`NUM_CLUSTERS-1:0][`NUM_THREADS-1:0][31:0] per_cluster_io_req_data;
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_req_tag;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_req_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_valid;
wire [`NUM_CLUSTERS-1:0][`L2CORE_TAG_WIDTH-1:0] per_cluster_io_rsp_tag;
wire [`NUM_CLUSTERS-1:0][31:0] per_cluster_io_rsp_data;
wire [`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid; wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_valid;
wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr; wire [`NUM_CLUSTERS-1:0][11:0] per_cluster_csr_io_req_addr;
wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw; wire [`NUM_CLUSTERS-1:0] per_cluster_csr_io_req_rw;
@@ -150,19 +122,6 @@ module Vortex (
.snp_rsp_tag (per_cluster_snp_rsp_tag [i]), .snp_rsp_tag (per_cluster_snp_rsp_tag [i]),
.snp_rsp_ready (per_cluster_snp_rsp_ready [i]), .snp_rsp_ready (per_cluster_snp_rsp_ready [i]),
.io_req_valid (per_cluster_io_req_valid [i]),
.io_req_rw (per_cluster_io_req_rw [i]),
.io_req_byteen (per_cluster_io_req_byteen [i]),
.io_req_addr (per_cluster_io_req_addr [i]),
.io_req_data (per_cluster_io_req_data [i]),
.io_req_tag (per_cluster_io_req_tag [i]),
.io_req_ready (per_cluster_io_req_ready [i]),
.io_rsp_valid (per_cluster_io_rsp_valid [i]),
.io_rsp_data (per_cluster_io_rsp_data [i]),
.io_rsp_tag (per_cluster_io_rsp_tag [i]),
.io_rsp_ready (per_cluster_io_rsp_ready [i]),
.csr_io_req_valid (per_cluster_csr_io_req_valid[i]), .csr_io_req_valid (per_cluster_csr_io_req_valid[i]),
.csr_io_req_coreid (csr_io_core_id), .csr_io_req_coreid (csr_io_core_id),
.csr_io_req_rw (per_cluster_csr_io_req_rw [i]), .csr_io_req_rw (per_cluster_csr_io_req_rw [i]),
@@ -179,48 +138,6 @@ module Vortex (
); );
end end
VX_databus_arb #(
.NUM_REQS (`NUM_CLUSTERS),
.WORD_SIZE (4),
.TAG_IN_WIDTH (`L2CORE_TAG_WIDTH),
.TAG_OUT_WIDTH (`L3CORE_TAG_WIDTH),
.BUFFERED_REQ (1),
.BUFFERED_RSP (`NUM_CLUSTERS >= 4)
) io_arb (
.clk (clk),
.reset (reset),
// input requests
.req_valid_in (per_cluster_io_req_valid),
.req_rw_in (per_cluster_io_req_rw),
.req_byteen_in (per_cluster_io_req_byteen),
.req_addr_in (per_cluster_io_req_addr),
.req_data_in (per_cluster_io_req_data),
.req_tag_in (per_cluster_io_req_tag),
.req_ready_in (per_cluster_io_req_ready),
// output request
.req_valid_out (io_req_valid),
.req_rw_out (io_req_rw),
.req_byteen_out (io_req_byteen),
.req_addr_out (io_req_addr),
.req_data_out (io_req_data),
.req_tag_out (io_req_tag),
.req_ready_out (io_req_ready),
// input response
.rsp_valid_in (io_rsp_valid),
.rsp_tag_in (io_rsp_tag),
.rsp_data_in (io_rsp_data),
.rsp_ready_in (io_rsp_ready),
// output responses
.rsp_valid_out (per_cluster_io_rsp_valid),
.rsp_data_out (per_cluster_io_rsp_data),
.rsp_tag_out (per_cluster_io_rsp_tag),
.rsp_ready_out (per_cluster_io_rsp_ready)
);
VX_csr_io_arb #( VX_csr_io_arb #(
.NUM_REQS (`NUM_CLUSTERS), .NUM_REQS (`NUM_CLUSTERS),
.DATA_WIDTH (32), .DATA_WIDTH (32),

View File

@@ -999,21 +999,6 @@ Vortex #() vortex (
.snp_rsp_tag (vx_snp_rsp_tag), .snp_rsp_tag (vx_snp_rsp_tag),
.snp_rsp_ready (vx_snp_rsp_ready), .snp_rsp_ready (vx_snp_rsp_ready),
// I/O request
`UNUSED_PIN (io_req_valid),
`UNUSED_PIN (io_req_rw),
`UNUSED_PIN (io_req_byteen),
`UNUSED_PIN (io_req_addr),
`UNUSED_PIN (io_req_data),
`UNUSED_PIN (io_req_tag),
.io_req_ready (1'b1),
// I/O response
.io_rsp_valid (1'b0),
.io_rsp_data (0),
.io_rsp_tag (0),
`UNUSED_PIN (io_rsp_ready),
// CSR I/O Request // CSR I/O Request
.csr_io_req_valid (vx_csr_io_req_valid), .csr_io_req_valid (vx_csr_io_req_valid),
.csr_io_req_coreid(vx_csr_io_req_coreid), .csr_io_req_coreid(vx_csr_io_req_coreid),

View File

@@ -55,13 +55,14 @@ module VX_bank #(
input wire reset, input wire reset,
// Core Request // Core Request
input wire [NUM_REQS-1:0] core_req_valid, input wire core_req_valid,
input wire [`CORE_REQ_TAG_COUNT-1:0] core_req_rw, input wire [`REQS_BITS-1:0] core_req_tid,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire core_req_rw,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, input wire [WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data, input wire [`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, input wire [`WORD_WIDTH-1:0] core_req_data,
output wire core_req_ready, input wire [CORE_TAG_WIDTH-1:0] core_req_tag,
output wire core_req_ready,
// Core Response // Core Response
output wire core_rsp_valid, output wire core_rsp_valid,
@@ -229,37 +230,21 @@ module VX_bank #(
wire creq_push = (| core_req_valid) && core_req_ready; wire creq_push = (| core_req_valid) && core_req_ready;
assign core_req_ready = !creq_full; assign core_req_ready = !creq_full;
VX_bank_core_req_queue #( VX_generic_queue #(
.WORD_SIZE (WORD_SIZE), .DATAW (CORE_TAG_WIDTH + `REQS_BITS + 1 + WORD_SIZE + `WORD_ADDR_WIDTH + `WORD_WIDTH),
.NUM_REQS (NUM_REQS), .SIZE (CREQ_SIZE),
.CREQ_SIZE (CREQ_SIZE), .BUFFERED (1),
.CORE_TAG_WIDTH (CORE_TAG_WIDTH), .FASTRAM (1)
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) core_req_queue ( ) core_req_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.push (creq_push),
// Enqueue .pop (creq_pop),
.push (creq_push), .data_in ({core_req_tag, core_req_tid, core_req_rw, core_req_byteen, core_req_addr, core_req_data}),
.tag_in (core_req_tag), .data_out({creq_tag_st0, creq_tid_st0, creq_rw_st0, creq_byteen_st0, creq_addr_st0, creq_writeword_st0}),
.valids_in (core_req_valid), .empty (creq_empty),
.rw_in (core_req_rw), .full (creq_full),
.byteen_in (core_req_byteen), `UNUSED_PIN (size)
.addr_in (core_req_addr),
.wdata_in (core_req_data),
// Dequeue
.pop (creq_pop),
.tag_out (creq_tag_st0),
.tid_out (creq_tid_st0),
.rw_out (creq_rw_st0),
.byteen_out (creq_byteen_st0),
.addr_out (creq_addr_st0),
.wdata_out (creq_writeword_st0),
// States
.empty (creq_empty),
.full (creq_full)
); );
reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size; reg [$clog2(MSHR_SIZE+1)-1:0] mshr_pending_size;

View File

@@ -1,215 +0,0 @@
`include "VX_cache_config.vh"
module VX_bank_core_req_queue #(
// Size of a word in bytes
parameter WORD_SIZE = 1,
// Number of Word requests per cycle
parameter NUM_REQS = 1,
// Core Request Queue Size
parameter CREQ_SIZE = 1,
// core request tag size
parameter CORE_TAG_WIDTH = 1,
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 0
) (
input wire clk,
input wire reset,
// Enqueue
input wire push,
input wire [NUM_REQS-1:0] valids_in,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag_in,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] addr_in,
input wire [`CORE_REQ_TAG_COUNT-1:0] rw_in,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] byteen_in,
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] wdata_in,
// Dequeue
input wire pop,
output wire [CORE_TAG_WIDTH-1:0] tag_out,
output wire [`WORD_ADDR_WIDTH-1:0] addr_out,
output wire rw_out,
output wire [WORD_SIZE-1:0] byteen_out,
output wire [`WORD_WIDTH-1:0] wdata_out,
output wire [`REQS_BITS-1:0] tid_out,
// States
output wire empty,
output wire full
);
wire [NUM_REQS-1:0] q_valids;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] q_tag;
wire [`CORE_REQ_TAG_COUNT-1:0] q_rw;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] q_byteen;
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] q_addr;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] q_wdata;
wire q_push;
wire q_pop;
wire q_empty;
wire q_full;
always @(*) begin
assert(!push || (| valids_in));
assert(!push || !full);
assert(!pop || !empty);
end
VX_generic_queue #(
.DATAW ($bits(valids_in) + $bits(tag_in) + $bits(addr_in) + $bits(rw_in) + $bits(byteen_in) + $bits(wdata_in)),
.SIZE (CREQ_SIZE),
.BUFFERED (1),
.FASTRAM (1)
) req_queue (
.clk (clk),
.reset (reset),
.push (q_push),
.pop (q_pop),
.data_in ({valids_in, tag_in, addr_in, rw_in, byteen_in, wdata_in}),
.data_out ({q_valids, q_tag, q_addr, q_rw, q_byteen, q_wdata}),
.empty (q_empty),
.full (q_full),
`UNUSED_PIN (size)
);
if (NUM_REQS > 1) begin
reg [`REQS_BITS-1:0] sel_idx, sel_idx_r;
reg [CORE_TAG_WIDTH-1:0] sel_tag, sel_tag_r;
reg [`WORD_ADDR_WIDTH-1:0] sel_addr, sel_addr_r;
reg sel_rw, sel_rw_r;
reg [WORD_SIZE-1:0] sel_byteen, sel_byteen_r;
reg [`WORD_WIDTH-1:0] sel_wdata, sel_wdata_r;
reg [$clog2(NUM_REQS+1)-1:0] q_valids_cnt_r;
wire [$clog2(NUM_REQS+1)-1:0] q_valids_cnt_n;
wire [$clog2(NUM_REQS+1)-1:0] q_valids_cnt;
reg [NUM_REQS-1:0] pop_mask;
reg fast_track;
wire fast_track_n;
reg req_eop; // request end of packet
reg empty_r;
assign q_push = push;
assign q_pop = pop && req_eop;
wire [NUM_REQS-1:0] requests = q_valids & ~pop_mask;
always @(*) begin
sel_idx = 0;
sel_tag = 'x;
sel_addr = 'x;
sel_rw = 'x;
sel_byteen = 'x;
sel_wdata = 'x;
for (integer i = 0; i < NUM_REQS; i++) begin
if (requests[i]) begin
sel_idx = `REQS_BITS'(i);
sel_addr = q_addr[i];
if (0 == CORE_TAG_ID_BITS) begin
sel_tag = q_tag[i];
sel_rw = q_rw[i];
end
sel_byteen = q_byteen[i];
sel_wdata = q_wdata[i];
break;
end
end
end
VX_countones #(
.N(NUM_REQS)
) counter (
.valids (q_valids),
.count (q_valids_cnt)
);
assign fast_track_n = (!q_empty && (empty_r || (pop && fast_track))) ? 0 :
pop ? (q_valids_cnt_r == 2) :
fast_track;
assign q_valids_cnt_n = (!q_empty && (empty_r || (pop && fast_track))) ? q_valids_cnt :
pop ? (q_valids_cnt_r - 1) :
q_valids_cnt_r;
always @(posedge clk) begin
if (reset) begin
pop_mask <= 0;
fast_track <= 0;
q_valids_cnt_r <= 0;
req_eop <= 0;
empty_r <= 1;
end else begin
if (!q_empty
&& (empty_r || (pop && fast_track))) begin
pop_mask <= (NUM_REQS'(1) << sel_idx);
end else if (pop) begin
if (q_valids_cnt_r == 1 || q_valids_cnt_r == 2) begin
pop_mask <= 0;
end else begin
pop_mask[sel_idx] <= 1;
end
end
q_valids_cnt_r <= q_valids_cnt_n;
fast_track <= fast_track_n;
req_eop <= (q_valids_cnt_n == 1 || q_valids_cnt_n == 2) && !fast_track_n;
empty_r <= (0 == q_valids_cnt_n);
end
if (empty_r || pop) begin
sel_idx_r <= sel_idx;
sel_byteen_r <= sel_byteen;
sel_addr_r <= sel_addr;
sel_wdata_r <= sel_wdata;
end
end
if (CORE_TAG_ID_BITS != 0) begin
`UNUSED_VAR (sel_tag)
`UNUSED_VAR (sel_rw)
always @(posedge clk) begin
if (empty_r || pop) begin
sel_tag_r <= q_tag;
sel_rw_r <= q_rw;
end
end
end else begin
always @(posedge clk) begin
if (empty_r || pop) begin
sel_tag_r <= sel_tag;
sel_rw_r <= sel_rw;
end
end
end
assign tag_out = sel_tag_r;
assign addr_out = sel_addr_r;
assign rw_out = sel_rw_r;
assign byteen_out = sel_byteen_r;
assign wdata_out = sel_wdata_r;
assign tid_out = sel_idx_r;
assign full = q_full;
assign empty = empty_r;
end else begin
`UNUSED_VAR (q_valids)
assign q_push = push;
assign q_pop = pop;
assign tag_out = q_tag;
assign addr_out = q_addr;
assign rw_out = q_rw;
assign byteen_out = q_byteen;
assign wdata_out = q_wdata;
assign tid_out = 0;
assign empty = q_empty;
assign full = q_full;
end
endmodule

View File

@@ -58,12 +58,12 @@ module VX_cache #(
// Core request // Core request
input wire [NUM_REQS-1:0] core_req_valid, input wire [NUM_REQS-1:0] core_req_valid,
input wire [`CORE_REQ_TAG_COUNT-1:0] core_req_rw, input wire [NUM_REQS-1:0] core_req_rw,
input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen, input wire [NUM_REQS-1:0][WORD_SIZE-1:0] core_req_byteen,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data, input wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] core_req_data,
input wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] core_req_tag, input wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] core_req_tag,
output wire [`CORE_REQ_TAG_COUNT-1:0] core_req_ready, output wire [NUM_REQS-1:0] core_req_ready,
// Core response // Core response
output wire [NUM_REQS-1:0] core_rsp_valid, output wire [NUM_REQS-1:0] core_rsp_valid,
@@ -108,8 +108,8 @@ module VX_cache #(
`STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value")) `STATIC_ASSERT(NUM_BANKS <= NUM_REQS, ("invalid value"))
wire [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid; wire [NUM_BANKS-1:0] per_bank_core_req_valid;
wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_core_req_tid;
wire [NUM_BANKS-1:0] per_bank_core_req_ready; wire [NUM_BANKS-1:0] per_bank_core_req_ready;
wire [NUM_BANKS-1:0] per_bank_core_rsp_valid; wire [NUM_BANKS-1:0] per_bank_core_rsp_valid;
@@ -155,14 +155,14 @@ module VX_cache #(
.BANK_LINE_SIZE (BANK_LINE_SIZE), .BANK_LINE_SIZE (BANK_LINE_SIZE),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
.WORD_SIZE (WORD_SIZE), .WORD_SIZE (WORD_SIZE),
.NUM_REQS (NUM_REQS), .NUM_REQS (NUM_REQS)
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
) cache_core_req_bank_sel ( ) cache_core_req_bank_sel (
.core_req_valid (core_req_valid), .core_req_valid (core_req_valid),
.core_req_addr (core_req_addr), .core_req_addr (core_req_addr),
.core_req_ready (core_req_ready), .core_req_ready (core_req_ready),
.per_bank_valid (per_bank_valid), .per_bank_valid (per_bank_core_req_valid),
.per_bank_ready (per_bank_core_req_ready) .per_bank_tid (per_bank_core_req_tid),
.per_bank_ready (per_bank_core_req_ready)
); );
assign dram_req_tag = dram_req_addr; assign dram_req_tag = dram_req_addr;
@@ -173,51 +173,53 @@ module VX_cache #(
end end
for (genvar i = 0; i < NUM_BANKS; i++) begin for (genvar i = 0; i < NUM_BANKS; i++) begin
wire [NUM_REQS-1:0] curr_bank_core_req_valid; wire curr_bank_core_req_valid;
wire [`CORE_REQ_TAG_COUNT-1:0] curr_bank_core_req_rw; wire [`REQS_BITS-1:0] curr_bank_core_req_tid;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen; wire curr_bank_core_req_rw;
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr; wire [WORD_SIZE-1:0] curr_bank_core_req_byteen;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag; wire [`WORD_ADDR_WIDTH-1:0] curr_bank_core_req_addr;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] curr_bank_core_req_data; wire [CORE_TAG_WIDTH-1:0] curr_bank_core_req_tag;
wire curr_bank_core_req_ready; wire [`WORD_WIDTH-1:0] curr_bank_core_req_data;
wire curr_bank_core_req_ready;
wire curr_bank_core_rsp_valid; wire curr_bank_core_rsp_valid;
wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid; wire [`REQS_BITS-1:0] curr_bank_core_rsp_tid;
wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data; wire [`WORD_WIDTH-1:0] curr_bank_core_rsp_data;
wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag; wire [CORE_TAG_WIDTH-1:0] curr_bank_core_rsp_tag;
wire curr_bank_core_rsp_ready; wire curr_bank_core_rsp_ready;
wire curr_bank_dram_req_valid; wire curr_bank_dram_req_valid;
wire curr_bank_dram_req_rw; wire curr_bank_dram_req_rw;
wire [BANK_LINE_SIZE-1:0] curr_bank_dram_req_byteen; wire [BANK_LINE_SIZE-1:0] curr_bank_dram_req_byteen;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_req_addr; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_req_addr;
wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_req_data; wire[`BANK_LINE_WIDTH-1:0] curr_bank_dram_req_data;
wire curr_bank_dram_req_ready; wire curr_bank_dram_req_ready;
wire curr_bank_dram_rsp_valid; wire curr_bank_dram_rsp_valid;
wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_rsp_data; wire [`BANK_LINE_WIDTH-1:0] curr_bank_dram_rsp_data;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_rsp_addr; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_dram_rsp_addr;
wire curr_bank_dram_rsp_ready; wire curr_bank_dram_rsp_ready;
wire curr_bank_snp_req_valid; wire curr_bank_snp_req_valid;
wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr; wire [`LINE_ADDR_WIDTH-1:0] curr_bank_snp_req_addr;
wire curr_bank_snp_req_inv; wire curr_bank_snp_req_inv;
wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_req_tag; wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_req_tag;
wire curr_bank_snp_req_ready; wire curr_bank_snp_req_ready;
wire curr_bank_snp_rsp_valid; wire curr_bank_snp_rsp_valid;
wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag; wire [SNP_TAG_WIDTH-1:0] curr_bank_snp_rsp_tag;
wire curr_bank_snp_rsp_ready; wire curr_bank_snp_rsp_ready;
wire curr_bank_miss; wire curr_bank_miss;
// Core Req // Core Req
assign curr_bank_core_req_valid = per_bank_valid[i]; assign curr_bank_core_req_valid = per_bank_core_req_valid[i];
assign curr_bank_core_req_addr = core_req_addr; assign curr_bank_core_req_tid = per_bank_core_req_tid[i];
assign curr_bank_core_req_rw = core_req_rw; assign curr_bank_core_req_addr = core_req_addr[per_bank_core_req_tid[i]];
assign curr_bank_core_req_byteen = core_req_byteen; assign curr_bank_core_req_rw = core_req_rw[per_bank_core_req_tid[i]];
assign curr_bank_core_req_data = core_req_data; assign curr_bank_core_req_byteen = core_req_byteen[per_bank_core_req_tid[i]];
assign curr_bank_core_req_tag = core_req_tag; assign curr_bank_core_req_data = core_req_data[per_bank_core_req_tid[i]];
assign curr_bank_core_req_tag = core_req_tag[per_bank_core_req_tid[i]];
assign per_bank_core_req_ready[i] = curr_bank_core_req_ready; assign per_bank_core_req_ready[i] = curr_bank_core_req_ready;
// Core WB // Core WB
@@ -298,6 +300,7 @@ module VX_cache #(
.reset (reset), .reset (reset),
// Core request // Core request
.core_req_valid (curr_bank_core_req_valid), .core_req_valid (curr_bank_core_req_valid),
.core_req_tid (curr_bank_core_req_tid),
.core_req_rw (curr_bank_core_req_rw), .core_req_rw (curr_bank_core_req_rw),
.core_req_byteen (curr_bank_core_req_byteen), .core_req_byteen (curr_bank_core_req_byteen),
.core_req_addr (curr_bank_core_req_addr), .core_req_addr (curr_bank_core_req_addr),

View File

@@ -2,77 +2,64 @@
module VX_cache_core_req_bank_sel #( module VX_cache_core_req_bank_sel #(
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 1, parameter BANK_LINE_SIZE = 1,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE = 1, parameter WORD_SIZE = 1,
// Number of banks // Number of banks
parameter NUM_BANKS = 1, parameter NUM_BANKS = 1,
// Number of Word requests per cycle // Number of Word requests per cycle
parameter NUM_REQS = 1, parameter NUM_REQS = 1
// size of tag id in core request tag
parameter CORE_TAG_ID_BITS = 1
) ( ) (
input wire [NUM_REQS-1:0] core_req_valid, input wire [NUM_REQS-1:0] core_req_valid,
input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr, input wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] core_req_addr,
output wire [`CORE_REQ_TAG_COUNT-1:0] core_req_ready, output wire [NUM_REQS-1:0] core_req_ready,
output wire [NUM_BANKS-1:0] per_bank_valid,
output wire [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid, output wire [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_tid,
input wire [NUM_BANKS-1:0] per_bank_ready input wire [NUM_BANKS-1:0] per_bank_ready
); );
if (NUM_BANKS > 1) begin if (NUM_BANKS > 1) begin
reg [NUM_BANKS-1:0] per_bank_valid_r;
reg [NUM_BANKS-1:0][`REQS_BITS-1:0] per_bank_tid_r;
reg [NUM_REQS-1:0] core_req_ready_r;
wire [NUM_REQS-1:0][`BANK_BITS-1:0] core_req_bid;
reg [NUM_BANKS-1:0][NUM_REQS-1:0] per_bank_valid_r; for (genvar i = 0; i < NUM_REQS; ++i) begin
assign core_req_bid[i] = core_req_addr[i][`BANK_SELECT_ADDR_RNG];
end
always @(*) begin always @(*) begin
per_bank_valid_r = 0; per_bank_valid_r = 0;
for (integer i = 0; i < NUM_REQS; i++) begin per_bank_tid_r = 'x;
per_bank_valid_r[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i]; for (integer i = NUM_REQS-1; i >= 0; --i) begin
if (core_req_valid[i]) begin
per_bank_valid_r[core_req_bid[i]] = 1;
per_bank_tid_r[core_req_bid[i]] = `REQS_BITS'(i);
end
end end
end end
if (CORE_TAG_ID_BITS != 0) begin always @(*) begin
core_req_ready_r = 0;
reg [NUM_BANKS-1:0] per_bank_ready_other, per_bank_ready_ignore; for (integer j = 0; j < NUM_BANKS; ++j) begin
for (integer i = 0; i < NUM_REQS; ++i) begin
always @(*) begin if (core_req_valid[i] && (core_req_bid[i] == `BANK_BITS'(j))) begin
per_bank_ready_other = {NUM_BANKS{1'b1}}; core_req_ready_r[i] = per_bank_ready[j];
per_bank_ready_ignore = {NUM_BANKS{1'b1}}; break;
for (integer i = 0; i < NUM_REQS; i++) begin
per_bank_ready_ignore[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 1'b0;
end
for (integer i = 0; i < NUM_BANKS; i++) begin
for (integer j = 0; j < NUM_BANKS; j++) begin
if (i != j) begin
per_bank_ready_other[i] &= (per_bank_ready[j] | per_bank_ready_ignore[j]);
end
end end
end end
end end
for (genvar i = 0; i < NUM_BANKS; i++) begin
for (genvar j = 0; j < NUM_REQS; j++) begin
assign per_bank_valid[i][j] = per_bank_valid_r[i][j] && per_bank_ready_other[i];
end
end
assign core_req_ready[0] = & (per_bank_ready | per_bank_ready_ignore);
end else begin
assign per_bank_valid = per_bank_valid_r;
for (genvar i = 0; i < NUM_REQS; i++) begin
assign core_req_ready[i] = per_bank_ready[core_req_addr[i][`BANK_SELECT_ADDR_RNG]];
end
end end
assign per_bank_valid = per_bank_valid_r;
assign per_bank_tid = per_bank_tid_r;
assign core_req_ready = core_req_ready_r;
end else begin end else begin
`UNUSED_VAR (core_req_valid)
`UNUSED_VAR (core_req_addr) `UNUSED_VAR (core_req_addr)
assign per_bank_valid = core_req_valid; assign per_bank_valid = core_req_valid;
assign per_bank_tid = 0;
assign core_req_ready[0] = per_bank_ready; assign core_req_ready[0] = per_bank_ready;
end end

View File

@@ -106,7 +106,6 @@ module VX_cache_core_rsp_merge #(
end end
for (genvar i = 0; i < NUM_REQS; i++) begin for (genvar i = 0; i < NUM_REQS; i++) begin
assign stall[i] = ~core_rsp_ready[i] && core_rsp_valid[i]; assign stall[i] = ~core_rsp_ready[i] && core_rsp_valid[i];
VX_generic_register #( VX_generic_register #(

View File

@@ -57,7 +57,7 @@ module VX_miss_resrv #(
output wire [`MSHR_DATA_WIDTH-1:0] dequeue_data_st0, output wire [`MSHR_DATA_WIDTH-1:0] dequeue_data_st0,
input wire dequeue_st3 input wire dequeue_st3
); );
reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0]; `USE_FAST_BRAM reg [`LINE_ADDR_WIDTH-1:0] addr_table [MSHR_SIZE-1:0];
reg [MSHR_SIZE-1:0] valid_table; reg [MSHR_SIZE-1:0] valid_table;
reg [MSHR_SIZE-1:0] ready_table; reg [MSHR_SIZE-1:0] ready_table;

View File

@@ -68,7 +68,7 @@ module VX_snp_forwarder #(
wire sfq_acquire = snp_req_valid && snp_req_ready; wire sfq_acquire = snp_req_valid && snp_req_ready;
wire sfq_release = snp_rsp_valid_unqual && snp_rsp_ready_unqual; wire sfq_release = snp_rsp_valid_unqual && snp_rsp_ready_unqual;
VX_cam_buffer #( VX_index_buffer #(
.DATAW (SRC_ADDR_WIDTH + 1 + TAG_IN_WIDTH), .DATAW (SRC_ADDR_WIDTH + 1 + TAG_IN_WIDTH),
.SIZE (SREQ_SIZE), .SIZE (SREQ_SIZE),
.FASTRAM (1) .FASTRAM (1)

View File

@@ -5,15 +5,15 @@ Generation context:
HardFP is enabled enabling set to true HardFP is enabled enabling set to true
Faithful rounding constraint detected Faithful rounding constraint detected
Will not generate valid and channel signals Will not generate valid and channel signals
The new component name is acl_fdiv The new component name is acl_s10_fdiv
Frequency 250MHz Frequency 250MHz
Deployment FPGA Arria10 Deployment FPGA Stratix10
Estimated resources LUTs 539, DSPs 5, RAMBits 32768, RAMBlocks 3 Estimated resources LUTs 681, DSPs 5, RAMBits 32768, RAMBlocks 3
The pipeline depth of the block is 15 cycle(s) The pipeline depth of the block is 25 cycle(s)
@@start @@start
@name FPDiv@ @name FPDiv@
@latency 15@ @latency 25@
@LUT 539@ @LUT 681@
@DSP 5@ @DSP 5@
@RAMBits 32768@ @RAMBits 32768@
@RAMBlockUsage 3@ @RAMBlockUsage 3@
@@ -34,15 +34,15 @@ Generation context:
HardFP is enabled enabling set to true HardFP is enabled enabling set to true
Faithful rounding constraint detected Faithful rounding constraint detected
Will not generate valid and channel signals Will not generate valid and channel signals
The new component name is acl_fsqrt The new component name is acl_s10_fsqrt
Frequency 250MHz Frequency 250MHz
Deployment FPGA Arria10 Deployment FPGA Stratix10
Estimated resources LUTs 271, DSPs 3, RAMBits 15872, RAMBlocks 3 Estimated resources LUTs 349, DSPs 3, RAMBits 15872, RAMBlocks 3
The pipeline depth of the block is 10 cycle(s) The pipeline depth of the block is 17 cycle(s)
@@start @@start
@name FPSqrt@ @name FPSqrt@
@latency 10@ @latency 17@
@LUT 271@ @LUT 349@
@DSP 3@ @DSP 3@
@RAMBits 15872@ @RAMBits 15872@
@RAMBlockUsage 3@ @RAMBlockUsage 3@
@@ -62,15 +62,15 @@ Generation context:
HardFP is enabled enabling set to true HardFP is enabled enabling set to true
Faithful rounding constraint detected Faithful rounding constraint detected
Will not generate valid and channel signals Will not generate valid and channel signals
The new component name is acl_ftoi The new component name is acl_s10_ftoi
Frequency 250MHz Frequency 250MHz
Deployment FPGA Arria10 Deployment FPGA Stratix10
Estimated resources LUTs 327, DSPs 0, RAMBits 0, RAMBlocks 0 Estimated resources LUTs 344, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 3 cycle(s) The pipeline depth of the block is 3 cycle(s)
@@start @@start
@name FPToFXP@ @name FPToFXP@
@latency 3@ @latency 3@
@LUT 327@ @LUT 344@
@DSP 0@ @DSP 0@
@RAMBits 0@ @RAMBits 0@
@RAMBlockUsage 0@ @RAMBlockUsage 0@
@@ -90,15 +90,15 @@ Generation context:
HardFP is enabled enabling set to true HardFP is enabled enabling set to true
Faithful rounding constraint detected Faithful rounding constraint detected
Will not generate valid and channel signals Will not generate valid and channel signals
The new component name is acl_ftou The new component name is acl_s10_ftou
Frequency 250MHz Frequency 250MHz
Deployment FPGA Arria10 Deployment FPGA Stratix10
Estimated resources LUTs 287, DSPs 0, RAMBits 0, RAMBlocks 0 Estimated resources LUTs 272, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 3 cycle(s) The pipeline depth of the block is 3 cycle(s)
@@start @@start
@name FPToFXP@ @name FPToFXP@
@latency 3@ @latency 3@
@LUT 287@ @LUT 272@
@DSP 0@ @DSP 0@
@RAMBits 0@ @RAMBits 0@
@RAMBlockUsage 0@ @RAMBlockUsage 0@
@@ -118,15 +118,15 @@ Generation context:
HardFP is enabled enabling set to true HardFP is enabled enabling set to true
Faithful rounding constraint detected Faithful rounding constraint detected
Will not generate valid and channel signals Will not generate valid and channel signals
The new component name is acl_itof The new component name is acl_s10_itof
Frequency 250MHz Frequency 250MHz
Deployment FPGA Arria10 Deployment FPGA Stratix10
Estimated resources LUTs 397, DSPs 0, RAMBits 0, RAMBlocks 0 Estimated resources LUTs 362, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 7 cycle(s) The pipeline depth of the block is 7 cycle(s)
@@start @@start
@name FXPToFP@ @name FXPToFP@
@latency 7@ @latency 7@
@LUT 397@ @LUT 362@
@DSP 0@ @DSP 0@
@RAMBits 0@ @RAMBits 0@
@RAMBlockUsage 0@ @RAMBlockUsage 0@
@@ -146,15 +146,15 @@ Generation context:
HardFP is enabled enabling set to true HardFP is enabled enabling set to true
Faithful rounding constraint detected Faithful rounding constraint detected
Will not generate valid and channel signals Will not generate valid and channel signals
The new component name is acl_utof The new component name is acl_s10_utof
Frequency 300MHz Frequency 300MHz
Deployment FPGA Arria10 Deployment FPGA Stratix10
Estimated resources LUTs 363, DSPs 0, RAMBits 0, RAMBlocks 0 Estimated resources LUTs 310, DSPs 0, RAMBits 0, RAMBlocks 0
The pipeline depth of the block is 7 cycle(s) The pipeline depth of the block is 7 cycle(s)
@@start @@start
@name FXPToFP@ @name FXPToFP@
@latency 7@ @latency 7@
@LUT 363@ @LUT 310@
@DSP 0@ @DSP 0@
@RAMBits 0@ @RAMBits 0@
@RAMBlockUsage 0@ @RAMBlockUsage 0@

View File

@@ -2,7 +2,7 @@
CMD_POLY_EVAL_PATH=$QUARTUS_HOME/dspba/backend/linux64 CMD_POLY_EVAL_PATH=$QUARTUS_HOME/dspba/backend/linux64
OPTIONS="-target Arria10 -lang verilog -enableHardFP 1 -printMachineReadable -faithfulRounding -noChanValid -enable -speedgrade 2" OPTIONS="-target Stratix10 -lang verilog -enableHardFP 1 -printMachineReadable -faithfulRounding -noChanValid -enable -speedgrade 2"
export LD_LIBRARY_PATH=$CMD_POLY_EVAL_PATH:$LD_LIBRARY_PATH export LD_LIBRARY_PATH=$CMD_POLY_EVAL_PATH:$LD_LIBRARY_PATH
@@ -14,12 +14,12 @@ FBITS="f$(($EXP_BITS + $MAN_BITS + 1))"
echo Generating IP cores for $FBITS echo Generating IP cores for $FBITS
{ {
$CMD -name acl_fdiv -frequency 250 FPDiv $EXP_BITS $MAN_BITS 0 $CMD -name acl_s10_fdiv -frequency 250 FPDiv $EXP_BITS $MAN_BITS 0
$CMD -name acl_fsqrt -frequency 250 FPSqrt $EXP_BITS $MAN_BITS $CMD -name acl_s10_fsqrt -frequency 250 FPSqrt $EXP_BITS $MAN_BITS
$CMD -name acl_ftoi -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 1 $CMD -name acl_s10_ftoi -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 1
$CMD -name acl_ftou -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 0 $CMD -name acl_s10_ftou -frequency 250 FPToFXP $EXP_BITS $MAN_BITS 32 0 0
$CMD -name acl_itof -frequency 250 FXPToFP 32 0 1 $EXP_BITS $MAN_BITS $CMD -name acl_s10_itof -frequency 250 FXPToFP 32 0 1 $EXP_BITS $MAN_BITS
$CMD -name acl_utof -frequency 300 FXPToFP 32 0 0 $EXP_BITS $MAN_BITS $CMD -name acl_s10_utof -frequency 300 FXPToFP 32 0 0 $EXP_BITS $MAN_BITS
} > acl_gen.log 2>&1 } > acl_gen.log 2>&1
#cp $QUARTUS_HOME/dspba/backend/Libraries/sv/base/dspba_library_ver.sv . #cp $QUARTUS_HOME/dspba/backend/Libraries/sv/base/dspba_library_ver.sv .

View File

@@ -10,13 +10,13 @@ interface VX_cache_core_req_if #(
parameter CORE_TAG_ID_BITS = 0 parameter CORE_TAG_ID_BITS = 0
) (); ) ();
wire [NUM_REQS-1:0] valid; wire [NUM_REQS-1:0] valid;
wire [`CORE_REQ_TAG_COUNT-1:0] rw; wire [NUM_REQS-1:0] rw;
wire [NUM_REQS-1:0][WORD_SIZE-1:0] byteen; wire [NUM_REQS-1:0][WORD_SIZE-1:0] byteen;
wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] addr; wire [NUM_REQS-1:0][`WORD_ADDR_WIDTH-1:0] addr;
wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data; wire [NUM_REQS-1:0][`WORD_WIDTH-1:0] data;
wire [`CORE_REQ_TAG_COUNT-1:0][CORE_TAG_WIDTH-1:0] tag; wire [NUM_REQS-1:0][CORE_TAG_WIDTH-1:0] tag;
wire ready; wire [NUM_REQS-1:0] ready;
endinterface endinterface

View File

@@ -1,6 +1,6 @@
`include "VX_platform.vh" `include "VX_platform.vh"
module VX_cam_buffer #( module VX_index_buffer #(
parameter DATAW = 1, parameter DATAW = 1,
parameter SIZE = 1, parameter SIZE = 1,
parameter FASTRAM = 0, parameter FASTRAM = 0,
@@ -48,16 +48,18 @@ module VX_cam_buffer #(
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
write_addr_r <= ADDRW'(1'b0);
free_slots <= {SIZE{1'b1}}; free_slots <= {SIZE{1'b1}};
full_r <= 1'b0; full_r <= 1'b0;
write_addr_r <= ADDRW'(1'b0);
end else begin end else begin
if (release_slot) begin if (release_slot) begin
assert(0 == free_slots[release_addr]) else $error("%t: releasing invalid slot at port %d", $time, release_addr); assert(0 == free_slots[release_addr]) else $error("%t: releasing invalid slot at port %d", $time, release_addr);
end end
free_slots <= free_slots_n; if (acquire_slot || full_r) begin
write_addr_r <= free_index; write_addr_r <= free_index;
full_r <= ~free_valid; end
free_slots <= free_slots_n;
full_r <= ~free_valid;
end end
end end

View File

@@ -6,10 +6,12 @@ RTL_INCLUDE = $(FPU_INCLUDE);../../../rtl;../../../rtl/libs;../../../rtl/interfa
PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf PROJECT_FILES = $(PROJECT).qpf $(PROJECT).qsf
# Part, Family # Part, Family
FAMILY = "Arria 10"
#DEVICE = 1SX280HN2F43E2VG FAMILY = "Stratix 10"
DEVICE = 10AX115N3F40E2SG DEVICE = 1SX280HN2F43E2VG
#FAMILY = "Arria 10"
#DEVICE = 10AX115N3F40E2SG
# Executable Configuration # Executable Configuration
SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1 SYN_ARGS = --parallel --read_settings_files=on --set=VERILOG_MACRO=NOPAE=1