per_bank_miss added to VX_cache.v

This commit is contained in:
trmontgomery
2020-11-02 12:07:10 -05:00
68 changed files with 2410 additions and 1634 deletions

View File

@@ -3,11 +3,7 @@
module VX_cluster #(
parameter CLUSTER_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_VX_cluster
// Clock
input wire clk,
@@ -138,11 +134,7 @@ module VX_cluster #(
VX_core #(
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
) core (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_VX_cluster_core(i)
.clk (clk),
.reset (reset),
@@ -380,7 +372,7 @@ module VX_cluster #(
.SNP_REQ_TAG_WIDTH (`L2SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`DSNP_TAG_WIDTH)
) l2cache (
`SCOPE_SIGNALS_CACHE_UNBIND
`SCOPE_BIND_VX_cluster_l2cache
.clk (clk),
.reset (reset),

View File

@@ -59,8 +59,6 @@
`define EXT_F_ENABLE
`endif
//`define FPU_FAST
// Device identification
`define VENDOR_ID 0
`define ARCHITECTURE_ID 0

View File

@@ -3,11 +3,7 @@
module VX_core #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_VX_core
// Clock
input wire clk,
@@ -179,10 +175,7 @@ module VX_core #(
VX_pipeline #(
.CORE_ID(CORE_ID)
) pipeline (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_VX_core_pipeline
.clk(clk),
.reset(reset),
@@ -258,7 +251,7 @@ module VX_core #(
VX_mem_unit #(
.CORE_ID(CORE_ID)
) mem_unit (
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_BIND_VX_core_mem_unit
.clk (clk),
.reset (reset),

View File

@@ -7,7 +7,7 @@ module VX_csr_unit #(
input wire reset,
VX_cmt_to_csr_if cmt_to_csr_if,
VX_csr_to_issue_if csr_to_issue_if,
VX_csr_to_issue_if csr_to_issue_if,
VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if,
@@ -15,8 +15,8 @@ module VX_csr_unit #(
VX_csr_req_if csr_req_if,
VX_exu_to_cmt_if csr_commit_if
);
VX_csr_req_if csr_pipe_req_if();
VX_exu_to_cmt_if csr_pipe_rsp_if();
VX_csr_req_if csr_pipe_req_if();
VX_exu_to_cmt_if csr_pipe_rsp_if();
wire select_io_req = csr_io_req_if.valid;
wire select_io_rsp;

View File

@@ -347,7 +347,7 @@ module VX_decode #(
assign decode_if.rd = rd;
assign decode_if.rs1 = rs1_qual;
assign decode_if.rs2 = rs2;
assign decode_if.rs3 = rs3;
assign decode_if.rs3 = 0;
`endif
assign decode_if.use_rs3 = use_rs3;

View File

@@ -6,11 +6,6 @@
///////////////////////////////////////////////////////////////////////////////
// `define SYNTHESIS 1
// `define ASIC 1
///////////////////////////////////////////////////////////////////////////////
`define NW_BITS `LOG2UP(`NUM_WARPS)
`define NT_BITS `LOG2UP(`NUM_THREADS)
@@ -248,7 +243,7 @@
////////////////////////// Dcache Configurable Knobs //////////////////////////
// Cache ID
`define DCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 0)
`define DCACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 0
// TAG sharing enable
`define DCORE_TAG_ID_BITS `LOG2UP(`LSUQ_SIZE)
@@ -277,7 +272,7 @@
////////////////////////// Icache Configurable Knobs //////////////////////////
// Cache ID
`define ICACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 1)
`define ICACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 1
// Core request address bits
`define ICORE_ADDR_WIDTH (32-`CLOG2(`IWORD_SIZE))
@@ -309,7 +304,7 @@
////////////////////////// SM Configurable Knobs //////////////////////////////
// Cache ID
`define SCACHE_ID (((`L3_ENABLE && `L2_ENABLE) ? 2 : `L2_ENABLE ? 1 : 0) + (CORE_ID * 3) + 2)
`define SCACHE_ID 32'(`L3_ENABLE) + 32'(`L2_ENABLE) * `NUM_CLUSTERS + CORE_ID * 3 + 2
// Number of Word requests per cycle {1, 2, 4, 8, ...}
`define SNUM_REQUESTS `NUM_THREADS
@@ -326,7 +321,7 @@
////////////////////////// L2cache Configurable Knobs /////////////////////////
// Cache ID
`define L2CACHE_ID (`L3_ENABLE ? 1 : 0)
`define L2CACHE_ID 32'(`L3_ENABLE) + CLUSTER_ID
// Core request tag bits
`define L2CORE_TAG_WIDTH (`DCORE_TAG_WIDTH + `CLOG2(`NUM_CORES))

View File

@@ -3,8 +3,7 @@
module VX_execute #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_VX_execute
input wire clk,
input wire reset,
@@ -55,7 +54,7 @@ module VX_execute #(
VX_lsu_unit #(
.CORE_ID(CORE_ID)
) lsu_unit (
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_BIND_VX_execute_lsu_unit
.clk (clk),
.reset (reset),
.dcache_req_if (dcache_req_if),
@@ -122,6 +121,7 @@ module VX_execute #(
VX_gpu_unit #(
.CORE_ID(CORE_ID)
) gpu_unit (
`SCOPE_BIND_VX_execute_gpu_unit
.clk (clk),
.reset (reset),
.gpu_req_if (gpu_req_if),

View File

@@ -3,7 +3,7 @@
module VX_fetch #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_IO_VX_fetch
input wire clk,
input wire reset,
@@ -29,6 +29,8 @@ module VX_fetch #(
VX_warp_sched #(
.CORE_ID(CORE_ID)
) warp_sched (
`SCOPE_BIND_VX_fetch_warp_sched
.clk (clk),
.reset (reset),
.warp_ctl_if (warp_ctl_if),
@@ -43,7 +45,7 @@ module VX_fetch #(
VX_icache_stage #(
.CORE_ID(CORE_ID)
) icache_stage (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_BIND_VX_fetch_icache_stage
.clk (clk),
.reset (reset),

View File

@@ -1,74 +0,0 @@
`include "VX_define.vh"
// control module to support multi-cycle read for fp register
module VX_gpr_fp_ctrl (
input wire clk,
input wire reset,
input wire [`NUM_THREADS-1:0][31:0] rs1_data,
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
VX_gpr_req_if gpr_req_if,
// outputs
output wire [`NW_BITS+`NR_BITS-1:0] raddr1,
VX_gpr_rsp_if gpr_rsp_if
);
reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data, rsp_rs3_data;
reg rsp_valid;
reg [31:0] rsp_pc;
reg [`NW_BITS-1:0] rsp_wid;
reg read_rs1;
wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && read_rs1;
wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready;
always @(posedge clk) begin
if (reset) begin
rsp_valid <= 0;
rsp_pc <= 0;
rsp_rs1_data <= 0;
rsp_rs2_data <= 0;
rsp_rs3_data <= 0;
rsp_wid <= 0;
read_rs1 <= 1;
end else begin
if (rs3_delay) begin
read_rs1 <= 0;
rsp_wid <= gpr_req_if.wid;
end else if (read_fire) begin
read_rs1 <= 1;
end
rsp_valid <= gpr_req_if.valid;
rsp_wid <= gpr_req_if.wid;
rsp_pc <= gpr_req_if.PC;
if (read_rs1) begin
rsp_rs1_data <= rs1_data;
end
rsp_rs2_data <= rs2_data;
rsp_rs3_data <= rs1_data;
assert(read_rs1 || rsp_wid == gpr_req_if.wid);
end
end
always @(posedge clk) begin
end
// outputs
wire [`NR_BITS-1:0] rs1 = read_rs1 ? gpr_req_if.rs1 : gpr_req_if.rs3;
assign raddr1 = {gpr_req_if.wid, rs1};
assign gpr_req_if.ready = ~rs3_delay;
assign gpr_rsp_if.valid = rsp_valid;
assign gpr_rsp_if.wid = rsp_wid;
assign gpr_rsp_if.PC = rsp_pc;
assign gpr_rsp_if.rs1_data = rsp_rs1_data;
assign gpr_rsp_if.rs2_data = rsp_rs2_data;
assign gpr_rsp_if.rs3_data = rsp_rs3_data;
endmodule

View File

@@ -10,136 +10,24 @@ module VX_gpr_ram (
output wire [`NUM_THREADS-1:0][31:0] rs1_data,
output wire [`NUM_THREADS-1:0][31:0] rs2_data
);
`ifndef ASIC
reg [`NUM_THREADS-1:0][3:0][7:0] ram [(`NUM_WARPS * `NUM_REGS)-1:0];
initial begin // initialize ram: set r0 = 0
for (integer j = 0; j < `NUM_WARPS; j++) begin
for (integer i = 0; i < `NUM_REGS; i++) begin
ram[j * `NUM_REGS + i] = (i == 0) ? {`NUM_THREADS{32'h0}} : {`NUM_THREADS{32'hx}};
end
end
end
always @(posedge clk) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
if (we[i]) begin
ram[waddr][i][0] <= wdata[i][07:00];
ram[waddr][i][1] <= wdata[i][15:08];
ram[waddr][i][2] <= wdata[i][23:16];
ram[waddr][i][3] <= wdata[i][31:24];
end
end
end
assign rs1_data = ram[rs1];
assign rs2_data = ram[rs2];
`else
wire [`NUM_THREADS-1:0][31:0] write_bit_mask;
reg [`NUM_THREADS-1:0][3:0][7:0] mem [(`NUM_WARPS * `NUM_REGS)-1:0];
reg [`NUM_THREADS-1:0][31:0] q1, q2;
always @(posedge clk) begin
for (integer i = 0; i < `NUM_THREADS; i++) begin
assign write_bit_mask[i] = {32{~we[i]}};
end
wire cenb = 0;
wire cena_1 = 0;
wire cena_2 = 0;
wire [`NUM_THREADS-1:0][31:0] tmp_a;
wire [`NUM_THREADS-1:0][31:0] tmp_b;
`ifndef SYNTHESIS
for (integer i = 0; i < `NUM_THREADS; i++) begin
for (integer j = 0; j < 32; j++) begin
assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
if (we[i]) begin
mem[waddr][i][0] <= wdata[i][07:00];
mem[waddr][i][1] <= wdata[i][15:08];
mem[waddr][i][2] <= wdata[i][23:16];
mem[waddr][i][3] <= wdata[i][31:24];
end
end
`else
assign rs1_data = tmp_a;
assign rs2_data = tmp_b;
`endif
for (integer i = 0; i < 'NT; i=i+4) begin
`IGNORE_WARNINGS_BEGIN
rf2_32x128_wm1 first_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(tmp_a[(i+3):(i)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_1),
.AA(rs1[(i+3):(i)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(i+3):(i)]),
.AB(waddr[(i+3):(i)]),
.DB(wdata[(i+3):(i)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
q1 <= mem[rs1];
q2 <= mem[rs2];
end
rf2_`NUM_GPRSx128_wm1 second_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(tmp_b[(i+3):(i)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_2),
.AA(rs2[(i+3):(i)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(i+3):(i)]),
.AB(waddr[(i+3):(i)]),
.DB(wdata[(i+3):(i)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
`IGNORE_WARNINGS_END
end
assign rs1_data = q1;
assign rs2_data = q2;
`endif
endmodule
endmodule

View File

@@ -15,9 +15,15 @@ module VX_gpr_stage #(
);
`UNUSED_VAR (reset)
wire [`NUM_THREADS-1:0][31:0] rs1_data;
wire [`NUM_THREADS-1:0][31:0] rs2_data;
wire [`NW_BITS+`NR_BITS-1:0] raddr1;
reg rsp_valid;
reg [`NW_BITS-1:0] rsp_wid;
reg [31:0] rsp_pc;
reg rs1_is_zero, rs2_is_zero;
wire [`NUM_THREADS-1:0][31:0] rs1_data, rs2_data;
wire [`NW_BITS+`NR_BITS-1:0] raddr1, raddr2;
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
VX_gpr_ram gpr_ram (
.clk (clk),
@@ -25,60 +31,77 @@ module VX_gpr_stage #(
.waddr ({writeback_if.wid, writeback_if.rd}),
.wdata (writeback_if.data),
.rs1 (raddr1),
.rs2 ({gpr_req_if.wid, gpr_req_if.rs2}),
.rs2 (raddr2),
.rs1_data (rs1_data),
.rs2_data (rs2_data)
);
);
`ifdef EXT_F_ENABLE
VX_gpr_fp_ctrl VX_gpr_fp_ctrl (
.clk (clk),
.reset (reset),
.rs1_data (rs1_data),
.rs2_data (rs2_data),
.raddr1 (raddr1),
.gpr_req_if (gpr_req_if),
.gpr_rsp_if (gpr_rsp_if)
);
`else
reg [`NUM_THREADS-1:0][31:0] rsp_rs1_data, rsp_rs2_data;
reg rsp_valid;
reg [`NW_BITS-1:0] rsp_wid;
reg [31:0] rsp_pc;
always @(posedge clk) begin
if (reset) begin
rsp_valid <= 0;
rsp_wid <= 0;
rsp_pc <= 0;
rsp_rs1_data <= 0;
rsp_rs2_data <= 0;
rsp_valid <= 0;
rsp_wid <= 0;
rsp_pc <= 0;
rs1_is_zero <= 0;
rs2_is_zero <= 0;
end else begin
rsp_valid <= gpr_req_if.valid;
rsp_wid <= gpr_req_if.wid;
rsp_pc <= gpr_req_if.PC;
rsp_rs1_data <= rs1_data;
rsp_rs2_data <= rs2_data;
rsp_valid <= gpr_req_if.valid;
rsp_wid <= gpr_req_if.wid;
rsp_pc <= gpr_req_if.PC;
rs1_is_zero <= (0 == gpr_req_if.rs1);
rs2_is_zero <= (0 == gpr_req_if.rs2);
end
end
`ifdef EXT_F_ENABLE
reg [`NUM_THREADS-1:0][31:0] rs3_data;
reg read_rs3, save_rs3;
wire rs3_delay = gpr_req_if.valid && gpr_req_if.use_rs3 && !read_rs3;
wire read_fire = gpr_req_if.valid && gpr_rsp_if.ready;
always @(posedge clk) begin
if (reset) begin
rs3_data <= 0;
read_rs3 <= 0;
end else begin
if (rs3_delay) begin
read_rs3 <= 1;
save_rs3 <= 1;
end else if (read_fire) begin
read_rs3 <= 0;
end
if (save_rs3) begin
rs3_data <= rs1_data;
save_rs3 <= 0;
end
assert(!read_rs3 || rsp_wid == gpr_req_if.wid);
end
end
assign raddr1 = {gpr_req_if.wid, (rs3_delay ? gpr_req_if.rs3 : gpr_req_if.rs1)};
assign gpr_req_if.ready = ~rs3_delay;
assign gpr_rsp_if.rs3_data = rs3_data;
`else
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
assign gpr_req_if.ready = 1;
assign gpr_rsp_if.valid = rsp_valid;
assign gpr_rsp_if.wid = rsp_wid;
assign gpr_rsp_if.PC = rsp_pc;
assign gpr_rsp_if.rs1_data = rsp_rs1_data;
assign gpr_rsp_if.rs2_data = rsp_rs2_data;
assign gpr_rsp_if.rs3_data = 0;
`UNUSED_VAR (gpr_req_if.valid);
`UNUSED_VAR (gpr_req_if.rs3);
`UNUSED_VAR (gpr_req_if.use_rs3);
`UNUSED_VAR (gpr_rsp_if.ready);
`endif
assign gpr_rsp_if.rs1_data = rs1_is_zero ? (`NUM_THREADS*32)'(0) : rs1_data;
assign gpr_rsp_if.rs2_data = rs2_is_zero ? (`NUM_THREADS*32)'(0) : rs2_data;
assign gpr_rsp_if.valid = rsp_valid;
assign gpr_rsp_if.wid = rsp_wid;
assign gpr_rsp_if.PC = rsp_pc;
assign writeback_if.ready = 1'b1;
endmodule
endmodule

View File

@@ -3,6 +3,8 @@
module VX_gpu_unit #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_gpu_unit
input wire clk,
input wire reset,
@@ -88,4 +90,18 @@ module VX_gpu_unit #(
// can accept new request?
assign gpu_req_if.ready = gpu_commit_if.ready;
`SCOPE_ASSIGN (scope_gpu_req_valid, gpu_req_if.valid);
`SCOPE_ASSIGN (scope_gpu_req_wid, gpu_req_if.wid);
`SCOPE_ASSIGN (scope_gpu_req_tmask, gpu_req_if.tmask);
`SCOPE_ASSIGN (scope_gpu_req_op_type, gpu_req_if.op_type);
`SCOPE_ASSIGN (scope_gpu_req_rs1, gpu_req_if.rs1_data[0]);
`SCOPE_ASSIGN (scope_gpu_req_rs2, gpu_req_if.rs2_data);
`SCOPE_ASSIGN (scope_gpu_req_ready, gpu_req_if.ready);
`SCOPE_ASSIGN (scope_gpu_rsp_valid, warp_ctl_if.valid);
`SCOPE_ASSIGN (scope_gpu_rsp_wid, warp_ctl_if.wid);
`SCOPE_ASSIGN (scope_gpu_rsp_tmc, warp_ctl_if.tmc);
`SCOPE_ASSIGN (scope_gpu_rsp_wspawn, warp_ctl_if.wspawn);
`SCOPE_ASSIGN (scope_gpu_rsp_split, warp_ctl_if.split);
`SCOPE_ASSIGN (scope_gpu_rsp_barrier, warp_ctl_if.barrier);
endmodule

View File

@@ -20,15 +20,12 @@ module VX_ibuffer #(
localparam ADDRW = $clog2(SIZE);
localparam NWARPSW = $clog2(`NUM_WARPS+1);
`USE_FAST_BRAM reg [DATAW-1:0] entries [`NUM_WARPS-1:0][SIZE-1:0];
reg [SIZEW-1:0] size_r [`NUM_WARPS-1:0];
reg [ADDRW:0] rd_ptr_r [`NUM_WARPS-1:0];
reg [ADDRW:0] wr_ptr_r [`NUM_WARPS-1:0];
wire [`NUM_WARPS-1:0] q_full;
wire [`NUM_WARPS-1:0][SIZEW-1:0] q_size;
wire [DATAW-1:0] q_data_in;
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
wire [`NUM_WARPS-1:0][DATAW-1:0] q_data_prev;
reg [`NUM_WARPS-1:0][DATAW-1:0] q_data_out;
wire enq_fire = ibuf_enq_if.valid && ibuf_enq_if.ready;
@@ -39,21 +36,33 @@ module VX_ibuffer #(
wire writing = enq_fire && (i == ibuf_enq_if.wid);
wire reading = deq_fire && (i == ibuf_deq_if.wid);
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[i][ADDRW-1:0];
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[i][ADDRW-1:0];
wire is_slot0 = (0 == size_r[i]) || ((1 == size_r[i]) && reading);
wire push = writing && !is_slot0;
wire pop = reading && (size_r[i] != 1);
VX_generic_queue #(
.DATAW(DATAW),
.SIZE(SIZE)
) queue (
.clk (clk),
.reset (reset),
.push (push),
.pop (pop),
.data_in (q_data_in),
.data_out (q_data_prev[i]),
`UNUSED_PIN (empty),
`UNUSED_PIN (full),
`UNUSED_PIN (size)
);
always @(posedge clk) begin
if (reset) begin
rd_ptr_r[i] <= 0;
wr_ptr_r[i] <= 0;
size_r[i] <= 0;
end else begin
if (writing) begin
if ((0 == size_r[i]) || ((1 == size_r[i]) && reading)) begin
size_r[i] <= 0;
end else begin
if (writing) begin
if (is_slot0) begin
q_data_out[i] <= q_data_in;
end else begin
entries[i][wr_ptr_a] <= q_data_in;
wr_ptr_r[i] <= wr_ptr_r[i] + ADDRW'(1);
end
if (!reading) begin
size_r[i] <= size_r[i] + SIZEW'(1);
@@ -62,18 +71,16 @@ module VX_ibuffer #(
if (reading) begin
if (size_r[i] != 1) begin
q_data_out[i] <= q_data_prev[i];
rd_ptr_r[i] <= rd_ptr_r[i] + ADDRW'(1);
end
if (!writing) begin
size_r[i] <= size_r[i] - SIZEW'(1);
end
end
end
end
assign q_data_prev[i] = entries[i][rd_ptr_a];
assign q_full[i] = (size_r[i] == SIZE);
assign q_size[i] = size_r[i];
end
assign q_full[i] = (size_r[i] == SIZE);
assign q_size[i] = size_r[i];
end
///////////////////////////////////////////////////////////////////////////
@@ -144,9 +151,9 @@ module VX_ibuffer #(
schedule_table[deq_wid_n] <= 0;
end
deq_valid <= deq_valid_n;
deq_wid <= deq_wid_n;
deq_instr <= deq_instr_n;
deq_valid <= deq_valid_n;
deq_wid <= deq_wid_n;
deq_instr <= deq_instr_n;
if (warp_added && !warp_removed) begin
num_warps <= num_warps + NWARPSW'(1);

View File

@@ -3,7 +3,7 @@
module VX_icache_stage #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_IO_VX_icache_stage
input wire clk,
input wire reset,
@@ -20,8 +20,8 @@ module VX_icache_stage #(
);
`UNUSED_VAR (reset)
reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
`NO_RW_RAM_CHECK reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
`NO_RW_RAM_CHECK reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
@@ -29,8 +29,8 @@ module VX_icache_stage #(
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0];
always @(posedge clk) begin
if (icache_req_fire) begin
rsp_PC_buf[req_tag] <= ifetch_req_if.PC;
if (icache_req_fire) begin
rsp_PC_buf[req_tag] <= ifetch_req_if.PC;
rsp_tmask_buf[req_tag] <= ifetch_req_if.tmask;
end
end

View File

@@ -1,4 +1,3 @@
`include "VX_platform.vh"
module VX_ipdom_stack #(
@@ -17,33 +16,55 @@ module VX_ipdom_stack #(
);
localparam STACK_SIZE = 2 ** DEPTH;
`USE_FAST_BRAM reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
`USE_FAST_BRAM reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
`USE_FAST_BRAM reg is_part [0:STACK_SIZE-1];
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1];
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
reg is_part [0:STACK_SIZE-1];
reg [DEPTH-1:0] rd_ptr, wr_ptr;
reg [WIDTH - 1:0] d1, d2;
reg p;
always @(posedge clk) begin
if (reset) begin
rd_ptr <= 0;
wr_ptr <= 0;
end else begin
if (push) begin
stack_1[wr_ptr] <= q1;
stack_2[wr_ptr] <= q2;
is_part[wr_ptr] <= 0;
rd_ptr <= wr_ptr;
wr_ptr <= wr_ptr + DEPTH'(1);
end else if (pop) begin
wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]);
rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]);
is_part[rd_ptr] <= 1;
end
end
end
assign d = is_part[rd_ptr] ? stack_1[rd_ptr] : stack_2[rd_ptr];
always @(posedge clk) begin
if (push) begin
stack_1[wr_ptr] <= q1;
end
end
assign d1 = stack_1[rd_ptr];
assign empty = (0 == wr_ptr);
always @(posedge clk) begin
if (push) begin
stack_2[wr_ptr] <= q2;
end
end
assign d2 = stack_2[rd_ptr];
always @(posedge clk) begin
if (push) begin
is_part[wr_ptr] <= 0;
end else if (pop) begin
is_part[rd_ptr] <= 1;
end
end
assign p = is_part[rd_ptr];
assign d = p ? d1 : d2;
assign empty = ~(| wr_ptr);
assign full = ((STACK_SIZE-1) == wr_ptr);
endmodule

View File

@@ -3,7 +3,7 @@
module VX_issue #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_IO_VX_issue
input wire clk,
input wire reset,

View File

@@ -3,7 +3,7 @@
module VX_lsu_unit #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_LSU_IO
`SCOPE_IO_VX_lsu_unit
input wire clk,
input wire reset,

View File

@@ -3,7 +3,7 @@
module VX_mem_unit # (
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_IO_VX_mem_unit
input wire clk,
input wire reset,
@@ -77,7 +77,7 @@ module VX_mem_unit # (
.CORE_TAG_ID_BITS (`DCORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`SDRAM_TAG_WIDTH)
) smem (
`SCOPE_SIGNALS_CACHE_UNBIND
`SCOPE_BIND_VX_mem_unit_smem
.clk (clk),
.reset (reset),
@@ -104,7 +104,7 @@ module VX_mem_unit # (
`UNUSED_PIN (dram_req_addr),
`UNUSED_PIN (dram_req_data),
`UNUSED_PIN (dram_req_tag),
.dram_req_ready (0),
.dram_req_ready (1'b0),
// DRAM response
.dram_rsp_valid (0),
@@ -113,7 +113,7 @@ module VX_mem_unit # (
`UNUSED_PIN (dram_rsp_ready),
// Snoop request
.snp_req_valid (0),
.snp_req_valid (1'b0),
.snp_req_addr (0),
.snp_req_invalidate (0),
.snp_req_tag (0),
@@ -122,17 +122,17 @@ module VX_mem_unit # (
// Snoop response
`UNUSED_PIN (snp_rsp_valid),
`UNUSED_PIN (snp_rsp_tag),
.snp_rsp_ready (0),
.snp_rsp_ready (1'b0),
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (0),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready)
);
@@ -159,7 +159,7 @@ module VX_mem_unit # (
.DRAM_TAG_WIDTH (`DDRAM_TAG_WIDTH),
.SNP_REQ_TAG_WIDTH (`DSNP_TAG_WIDTH)
) dcache (
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_BIND_VX_mem_unit_dcache
.clk (clk),
.reset (reset),
@@ -211,10 +211,10 @@ module VX_mem_unit # (
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (0),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready)
);
@@ -240,7 +240,7 @@ module VX_mem_unit # (
.CORE_TAG_ID_BITS (`ICORE_TAG_ID_BITS),
.DRAM_TAG_WIDTH (`IDRAM_TAG_WIDTH)
) icache (
`SCOPE_SIGNALS_CACHE_UNBIND
`SCOPE_BIND_VX_mem_unit_icache
.clk (clk),
.reset (reset),
@@ -276,26 +276,26 @@ module VX_mem_unit # (
.dram_rsp_ready (icache_dram_rsp_if.ready),
// Snoop request
.snp_req_valid (0),
.snp_req_valid (1'b0),
.snp_req_addr (0),
.snp_req_invalidate (0),
.snp_req_invalidate (1'b0),
.snp_req_tag (0),
`UNUSED_PIN (snp_req_ready),
// Snoop response
`UNUSED_PIN (snp_rsp_valid),
`UNUSED_PIN (snp_rsp_tag),
.snp_rsp_ready (0),
.snp_rsp_ready (1'b0),
// Snoop forward out
`UNUSED_PIN (snp_fwdout_valid),
`UNUSED_PIN (snp_fwdout_addr),
`UNUSED_PIN (snp_fwdout_invalidate),
`UNUSED_PIN (snp_fwdout_tag),
.snp_fwdout_ready (0),
.snp_fwdout_ready (1'b0),
// Snoop forward in
.snp_fwdin_valid (0),
.snp_fwdin_valid (1'b0),
.snp_fwdin_tag (0),
`UNUSED_PIN (snp_fwdin_ready)
);

View File

@@ -3,10 +3,7 @@
module VX_pipeline #(
parameter CORE_ID = 0
) (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_VX_pipeline
// Clock
input wire clk,
@@ -126,7 +123,7 @@ module VX_pipeline #(
VX_fetch #(
.CORE_ID(CORE_ID)
) fetch (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_BIND_VX_pipeline_fetch
.clk (clk),
.reset (reset),
.icache_req_if (core_icache_req_if),
@@ -153,7 +150,7 @@ module VX_pipeline #(
VX_issue #(
.CORE_ID(CORE_ID)
) issue (
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_BIND_VX_pipeline_issue
.clk (clk),
.reset (reset),
@@ -173,8 +170,8 @@ module VX_pipeline #(
VX_execute #(
.CORE_ID(CORE_ID)
) execute (
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_VX_pipeline_execute
.clk (clk),
.reset (reset),

View File

@@ -22,14 +22,16 @@
/* verilator lint_off WIDTH */ \
/* verilator lint_off UNOPTFLAT */ \
/* verilator lint_off UNDRIVEN */ \
/* verilator lint_off DECLFILENAME */
/* verilator lint_off DECLFILENAME */ \
/* verilator lint_off IMPLICIT */
`define IGNORE_WARNINGS_END /* verilator lint_on UNUSED */ \
/* verilator lint_on PINCONNECTEMPTY */ \
/* verilator lint_on WIDTH */ \
/* verilator lint_on UNOPTFLAT */ \
/* verilator lint_on UNDRIVEN */ \
/* verilator lint_on DECLFILENAME */
/* verilator lint_on DECLFILENAME */ \
/* verilator lint_on IMPLICIT */
`define UNUSED_VAR(x) always @(x) begin end
@@ -39,9 +41,9 @@
`define STRINGIFY(x) `"x`"
`define STATIC_ASSERT(cond, msg) \
generate \
if (!(cond)) $error(msg); \
`define STATIC_ASSERT(cond, msg) \
generate \
if (!(cond)) $error msg; \
endgenerate
`define ENABLE_TRACING /* verilator tracing_on */
@@ -49,8 +51,8 @@
///////////////////////////////////////////////////////////////////////////////
`define USE_FAST_BRAM (* syn_ramstyle = "mlab" *)
`define RELAX_BRAM_RW (* syn_ramstyle = "no_rw_check" *)
`define USE_FAST_BRAM (* ramstyle="mlab" *)
`define NO_RW_RAM_CHECK (* ramstyle="mlab, no_rw_check" *)
///////////////////////////////////////////////////////////////////////////////

View File

@@ -3,398 +3,85 @@
`ifdef SCOPE
`define SCOPE_SIGNALS_DATA_LIST \
scope_dram_req_addr, \
scope_dram_req_rw, \
scope_dram_req_byteen, \
scope_dram_req_data, \
scope_dram_req_tag, \
scope_dram_rsp_data, \
scope_dram_rsp_tag, \
scope_snp_req_addr, \
scope_snp_req_invalidate, \
scope_snp_req_tag, \
scope_snp_rsp_tag, \
scope_icache_req_wid, \
scope_icache_req_addr, \
scope_icache_req_tag, \
scope_icache_rsp_data, \
scope_icache_rsp_tag, \
scope_dcache_req_wid, \
scope_dcache_req_pc, \
scope_dcache_req_addr, \
scope_dcache_req_rw, \
scope_dcache_req_byteen, \
scope_dcache_req_data, \
scope_dcache_req_tag, \
scope_dcache_rsp_data, \
scope_dcache_rsp_tag, \
scope_issue_wid, \
scope_issue_tmask, \
scope_issue_pc, \
scope_issue_ex_type, \
scope_issue_op_type, \
scope_issue_op_mod, \
scope_issue_wb, \
scope_issue_rd, \
scope_issue_rs1, \
scope_issue_rs2, \
scope_issue_rs3, \
scope_issue_imm, \
scope_issue_rs1_is_pc, \
scope_issue_rs2_is_imm, \
scope_gpr_rsp_wid, \
scope_gpr_rsp_pc, \
scope_gpr_rsp_a, \
scope_gpr_rsp_b, \
scope_gpr_rsp_c, \
scope_writeback_wid, \
scope_writeback_pc, \
scope_writeback_rd, \
scope_writeback_data, \
scope_bank_addr_st0, \
scope_bank_addr_st1, \
scope_bank_addr_st2, \
scope_bank_is_mrvq_st1, \
scope_bank_miss_st1, \
scope_bank_dirty_st1, \
scope_bank_force_miss_st1,
`define SCOPE_SIGNALS_UPD_LIST \
scope_dram_req_valid, \
scope_dram_req_ready, \
scope_dram_rsp_valid, \
scope_dram_rsp_ready, \
scope_snp_req_valid, \
scope_snp_req_ready, \
scope_snp_rsp_valid, \
scope_snp_rsp_ready, \
scope_icache_req_valid, \
scope_icache_req_ready, \
scope_icache_rsp_valid, \
scope_icache_rsp_ready, \
scope_dcache_req_valid, \
scope_dcache_req_ready, \
scope_dcache_rsp_valid, \
scope_dcache_rsp_ready, \
scope_bank_valid_st0, \
scope_bank_valid_st1, \
scope_bank_valid_st2, \
scope_bank_stall_pipe, \
scope_issue_valid, \
scope_issue_ready, \
scope_gpr_rsp_valid, \
scope_writeback_valid, \
scope_scoreboard_delay, \
scope_gpr_delay, \
scope_execute_delay, \
scope_busy
`include "scope-defs.vh"
`define SCOPE_SIGNALS_DECL \
wire scope_dram_req_valid; \
wire [31:0] scope_dram_req_addr; \
wire scope_dram_req_rw; \
wire [15:0] scope_dram_req_byteen; \
wire [127:0] scope_dram_req_data; \
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_req_tag; \
wire scope_dram_req_ready; \
wire scope_dram_rsp_valid; \
wire [127:0] scope_dram_rsp_data; \
wire [`VX_DRAM_TAG_WIDTH-1:0] scope_dram_rsp_tag; \
wire scope_dram_rsp_ready; \
wire scope_snp_req_valid; \
wire [31:0] scope_snp_req_addr; \
wire scope_snp_req_invalidate; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_req_tag; \
wire scope_snp_req_ready; \
wire scope_snp_rsp_valid; \
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \
wire scope_icache_req_valid; \
wire [`NW_BITS-1:0] scope_icache_req_wid; \
wire [31:0] scope_icache_req_addr; \
wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_req_tag; \
wire scope_icache_req_ready; \
wire scope_icache_rsp_valid; \
wire [31:0] scope_icache_rsp_data; \
wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_rsp_tag; \
wire scope_icache_rsp_ready; \
wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \
wire [`NW_BITS-1:0] scope_dcache_req_wid; \
wire [31:0] scope_dcache_req_pc; \
wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_addr; \
wire scope_dcache_req_rw; \
wire [`NUM_THREADS-1:0][3:0] scope_dcache_req_byteen; \
wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_data; \
wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_req_tag; \
wire scope_dcache_req_ready; \
wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid; \
wire [`NUM_THREADS-1:0][31:0] scope_dcache_rsp_data; \
wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_rsp_tag; \
wire scope_dcache_rsp_ready; \
wire scope_snp_rsp_ready; \
wire [`NW_BITS-1:0] scope_issue_wid; \
wire [`NUM_THREADS-1:0] scope_issue_tmask; \
wire [31:0] scope_issue_pc; \
wire [`EX_BITS-1:0] scope_issue_ex_type; \
wire [`OP_BITS-1:0] scope_issue_op_type; \
wire [`MOD_BITS-1:0] scope_issue_op_mod; \
wire scope_issue_wb; \
wire [`NR_BITS-1:0] scope_issue_rd; \
wire [`NR_BITS-1:0] scope_issue_rs1; \
wire [`NR_BITS-1:0] scope_issue_rs2; \
wire [`NR_BITS-1:0] scope_issue_rs3; \
wire [31:0] scope_issue_imm; \
wire scope_issue_rs1_is_pc; \
wire scope_issue_rs2_is_imm; \
wire scope_gpr_rsp_valid; \
wire [`NW_BITS-1:0] scope_gpr_rsp_wid; \
wire [31:0] scope_gpr_rsp_pc; \
wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_a; \
wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_b; \
wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_c; \
wire scope_writeback_valid; \
wire [`NW_BITS-1:0] scope_writeback_wid; \
wire [31:0] scope_writeback_pc; \
wire [`NR_BITS-1:0] scope_writeback_rd; \
wire [`NUM_THREADS-1:0][31:0] scope_writeback_data; \
wire scope_bank_valid_st0; \
wire scope_bank_valid_st1; \
wire scope_bank_valid_st2; \
wire [31:0] scope_bank_addr_st0; \
wire [31:0] scope_bank_addr_st1; \
wire [31:0] scope_bank_addr_st2; \
wire scope_bank_is_mrvq_st1; \
wire scope_bank_miss_st1; \
wire scope_bank_dirty_st1; \
wire scope_bank_force_miss_st1; \
wire scope_bank_stall_pipe; \
wire scope_issue_valid; \
wire scope_issue_ready; \
wire scope_scoreboard_delay; \
wire scope_gpr_delay; \
wire scope_execute_delay; \
wire scope_busy;
`define SCOPE_ASSIGN(d,s) assign d = s
`define SCOPE_SIGNALS_ISTAGE_IO \
output wire scope_icache_req_valid, \
output wire [`NW_BITS-1:0] scope_icache_req_wid, \
output wire [31:0] scope_icache_req_addr, \
output wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_req_tag, \
output wire scope_icache_req_ready, \
output wire scope_icache_rsp_valid, \
output wire [31:0] scope_icache_rsp_data, \
output wire [`ICORE_TAG_ID_BITS-1:0] scope_icache_rsp_tag, \
output wire scope_icache_rsp_ready,
`define SCOPE_SIGNALS_LSU_IO \
output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \
output wire [`NW_BITS-1:0] scope_dcache_req_wid, \
output wire [31:0] scope_dcache_req_pc, \
output wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_addr, \
output wire scope_dcache_req_rw, \
output wire [`NUM_THREADS-1:0][3:0] scope_dcache_req_byteen, \
output wire [`NUM_THREADS-1:0][31:0] scope_dcache_req_data, \
output wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_req_tag, \
output wire scope_dcache_req_ready, \
output wire [`NUM_THREADS-1:0] scope_dcache_rsp_valid, \
output wire [`NUM_THREADS-1:0][31:0] scope_dcache_rsp_data, \
output wire [`DCORE_TAG_ID_BITS-1:0] scope_dcache_rsp_tag, \
output wire scope_dcache_rsp_ready,
`define SCOPE_SIGNALS_CACHE_IO \
output wire scope_bank_valid_st0, \
output wire scope_bank_valid_st1, \
output wire scope_bank_valid_st2, \
output wire [31:0] scope_bank_addr_st0, \
output wire [31:0] scope_bank_addr_st1, \
output wire [31:0] scope_bank_addr_st2, \
output wire scope_bank_is_mrvq_st1, \
output wire scope_bank_miss_st1, \
output wire scope_bank_dirty_st1, \
output wire scope_bank_force_miss_st1, \
output wire scope_bank_stall_pipe,
`define SCOPE_SIGNALS_ISSUE_IO \
output wire scope_issue_valid, \
output wire [`NW_BITS-1:0] scope_issue_wid, \
output wire [`NUM_THREADS-1:0] scope_issue_tmask, \
output wire [31:0] scope_issue_pc, \
output wire [`EX_BITS-1:0] scope_issue_ex_type, \
output wire [`OP_BITS-1:0] scope_issue_op_type, \
output wire [`MOD_BITS-1:0] scope_issue_op_mod, \
output wire scope_issue_wb, \
output wire [`NR_BITS-1:0] scope_issue_rd, \
output wire [`NR_BITS-1:0] scope_issue_rs1, \
output wire [`NR_BITS-1:0] scope_issue_rs2, \
output wire [`NR_BITS-1:0] scope_issue_rs3, \
output wire [31:0] scope_issue_imm, \
output wire scope_issue_rs1_is_pc, \
output wire scope_issue_rs2_is_imm, \
output wire scope_writeback_valid, \
output wire scope_gpr_rsp_valid, \
output wire [`NW_BITS-1:0] scope_gpr_rsp_wid, \
output wire [31:0] scope_gpr_rsp_pc, \
output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_a, \
output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_b, \
output wire [`NUM_THREADS-1:0][31:0] scope_gpr_rsp_c, \
output wire [`NW_BITS-1:0] scope_writeback_wid, \
output wire [31:0] scope_writeback_pc, \
output wire [`NR_BITS-1:0] scope_writeback_rd, \
output wire [`NUM_THREADS-1:0][31:0] scope_writeback_data, \
output wire scope_issue_ready, \
output wire scope_scoreboard_delay, \
output wire scope_gpr_delay, \
output wire scope_execute_delay,
`define SCOPE_SIGNALS_EXECUTE_IO
`define SCOPE_SIGNALS_ISTAGE_BIND \
.scope_icache_req_valid (scope_icache_req_valid), \
.scope_icache_req_wid (scope_icache_req_wid), \
.scope_icache_req_addr (scope_icache_req_addr), \
.scope_icache_req_tag (scope_icache_req_tag), \
.scope_icache_req_ready (scope_icache_req_ready), \
.scope_icache_rsp_valid (scope_icache_rsp_valid), \
.scope_icache_rsp_data (scope_icache_rsp_data), \
.scope_icache_rsp_tag (scope_icache_rsp_tag), \
.scope_icache_rsp_ready (scope_icache_rsp_ready),
`define SCOPE_SIGNALS_LSU_BIND \
.scope_dcache_req_valid (scope_dcache_req_valid), \
.scope_dcache_req_wid (scope_dcache_req_wid), \
.scope_dcache_req_pc (scope_dcache_req_pc), \
.scope_dcache_req_addr (scope_dcache_req_addr), \
.scope_dcache_req_rw (scope_dcache_req_rw), \
.scope_dcache_req_byteen(scope_dcache_req_byteen), \
.scope_dcache_req_data (scope_dcache_req_data), \
.scope_dcache_req_tag (scope_dcache_req_tag), \
.scope_dcache_req_ready (scope_dcache_req_ready), \
.scope_dcache_rsp_valid (scope_dcache_rsp_valid), \
.scope_dcache_rsp_data (scope_dcache_rsp_data), \
.scope_dcache_rsp_tag (scope_dcache_rsp_tag), \
.scope_dcache_rsp_ready (scope_dcache_rsp_ready),
`define SCOPE_SIGNALS_CACHE_BIND \
.scope_bank_valid_st0 (scope_bank_valid_st0), \
.scope_bank_valid_st1 (scope_bank_valid_st1), \
.scope_bank_valid_st2 (scope_bank_valid_st2), \
.scope_bank_addr_st0 (scope_bank_addr_st0), \
.scope_bank_addr_st1 (scope_bank_addr_st1), \
.scope_bank_addr_st2 (scope_bank_addr_st2), \
.scope_bank_is_mrvq_st1 (scope_bank_is_mrvq_st1), \
.scope_bank_miss_st1 (scope_bank_miss_st1), \
.scope_bank_dirty_st1 (scope_bank_dirty_st1), \
.scope_bank_force_miss_st1(scope_bank_force_miss_st1), \
.scope_bank_stall_pipe (scope_bank_stall_pipe),
`define SCOPE_SIGNALS_CACHE_UNBIND \
/* verilator lint_off PINCONNECTEMPTY */ \
.scope_bank_valid_st0 (), \
.scope_bank_valid_st1 (), \
.scope_bank_valid_st2 (), \
.scope_bank_addr_st0 (), \
.scope_bank_addr_st1 (), \
.scope_bank_addr_st2 (), \
.scope_bank_is_mrvq_st1 (), \
.scope_bank_miss_st1 (), \
.scope_bank_dirty_st1 (), \
.scope_bank_force_miss_st1 (), \
.scope_bank_stall_pipe (), \
/* verilator lint_on PINCONNECTEMPTY */
`define SCOPE_SIGNALS_CACHE_BANK_SELECT \
/* verilator lint_off UNUSED */ \
wire [NUM_BANKS-1:0] scope_per_bank_valid_st0; \
wire [NUM_BANKS-1:0] scope_per_bank_valid_st1; \
wire [NUM_BANKS-1:0] scope_per_bank_valid_st2; \
wire [NUM_BANKS-1:0][31:0] scope_per_bank_addr_st0; \
wire [NUM_BANKS-1:0][31:0] scope_per_bank_addr_st1; \
wire [NUM_BANKS-1:0][31:0] scope_per_bank_addr_st2; \
wire [NUM_BANKS-1:0] scope_per_bank_is_mrvq_st1; \
wire [NUM_BANKS-1:0] scope_per_bank_miss_st1; \
wire [NUM_BANKS-1:0] scope_per_bank_dirty_st1; \
wire [NUM_BANKS-1:0] scope_per_bank_force_miss_st1; \
wire [NUM_BANKS-1:0] scope_per_bank_stall_pipe; \
/* verilator lint_on UNUSED */ \
assign scope_bank_valid_st0 = scope_per_bank_valid_st0[0]; \
assign scope_bank_valid_st1 = scope_per_bank_valid_st1[0]; \
assign scope_bank_valid_st2 = scope_per_bank_valid_st2[0]; \
assign scope_bank_addr_st0 = scope_per_bank_addr_st0[0]; \
assign scope_bank_addr_st1 = scope_per_bank_addr_st1[0]; \
assign scope_bank_addr_st2 = scope_per_bank_addr_st2[0]; \
assign scope_bank_is_mrvq_st1 = scope_per_bank_is_mrvq_st1[0]; \
assign scope_bank_miss_st1 = scope_per_bank_miss_st1[0]; \
assign scope_bank_dirty_st1 = scope_per_bank_dirty_st1[0]; \
assign scope_bank_force_miss_st1 = scope_per_bank_force_miss_st1[0]; \
assign scope_bank_stall_pipe = scope_per_bank_stall_pipe[0];
`define SCOPE_SIGNALS_CACHE_BANK_BIND \
.scope_bank_valid_st0 (scope_per_bank_valid_st0[i]), \
.scope_bank_valid_st1 (scope_per_bank_valid_st1[i]), \
.scope_bank_valid_st2 (scope_per_bank_valid_st2[i]), \
.scope_bank_addr_st0 (scope_per_bank_addr_st0[i]), \
.scope_bank_addr_st1 (scope_per_bank_addr_st1[i]), \
.scope_bank_addr_st2 (scope_per_bank_addr_st2[i]), \
.scope_bank_is_mrvq_st1 (scope_per_bank_is_mrvq_st1[i]), \
.scope_bank_miss_st1 (scope_per_bank_miss_st1[i]), \
.scope_bank_dirty_st1 (scope_per_bank_dirty_st1[i]), \
.scope_bank_force_miss_st1 (scope_per_bank_force_miss_st1[i]), \
.scope_bank_stall_pipe (scope_per_bank_stall_pipe[i]),
`define SCOPE_SIGNALS_ISSUE_BIND \
.scope_issue_valid (scope_issue_valid), \
.scope_issue_wid (scope_issue_wid), \
.scope_issue_tmask (scope_issue_tmask), \
.scope_issue_pc (scope_issue_pc), \
.scope_issue_ex_type (scope_issue_ex_type), \
.scope_issue_op_type (scope_issue_op_type), \
.scope_issue_op_mod (scope_issue_op_mod), \
.scope_issue_wb (scope_issue_wb), \
.scope_issue_rd (scope_issue_rd), \
.scope_issue_rs1 (scope_issue_rs1), \
.scope_issue_rs2 (scope_issue_rs2), \
.scope_issue_rs3 (scope_issue_rs3), \
.scope_issue_imm (scope_issue_imm), \
.scope_issue_rs1_is_pc (scope_issue_rs1_is_pc), \
.scope_issue_rs2_is_imm (scope_issue_rs2_is_imm), \
.scope_writeback_valid (scope_writeback_valid), \
.scope_writeback_wid (scope_writeback_wid), \
.scope_writeback_pc (scope_writeback_pc), \
.scope_writeback_rd (scope_writeback_rd), \
.scope_writeback_data (scope_writeback_data), \
.scope_issue_ready (scope_issue_ready), \
.scope_gpr_rsp_valid (scope_gpr_rsp_valid), \
.scope_gpr_rsp_wid (scope_gpr_rsp_wid), \
.scope_gpr_rsp_pc (scope_gpr_rsp_pc), \
.scope_gpr_rsp_a (scope_gpr_rsp_a), \
.scope_gpr_rsp_b (scope_gpr_rsp_b), \
.scope_gpr_rsp_c (scope_gpr_rsp_c), \
.scope_scoreboard_delay (scope_scoreboard_delay), \
.scope_gpr_delay (scope_gpr_delay), \
.scope_execute_delay (scope_execute_delay), \
`define SCOPE_SIGNALS_EXECUTE_BIND
`define SCOPE_ASSIGN(d,s) assign d = s
`else
`define SCOPE_SIGNALS_ISTAGE_IO
`define SCOPE_SIGNALS_LSU_IO
`define SCOPE_SIGNALS_CACHE_IO
`define SCOPE_SIGNALS_ISSUE_IO
`define SCOPE_SIGNALS_EXECUTE_IO
`define SCOPE_SIGNALS_ISTAGE_BIND
`define SCOPE_SIGNALS_LSU_BIND
`define SCOPE_SIGNALS_CACHE_BIND
`define SCOPE_SIGNALS_ISSUE_BIND
`define SCOPE_SIGNALS_EXECUTE_BIND
`define SCOPE_SIGNALS_CACHE_UNBIND
`define SCOPE_SIGNALS_CACHE_BANK_SELECT
`define SCOPE_SIGNALS_CACHE_BANK_BIND
`define SCOPE_ASSIGN(d,s)
`define SCOPE_IO_VX_icache_stage
`define SCOPE_IO_VX_fetch
`define SCOPE_BIND_VX_fetch_icache_stage
`define SCOPE_BIND_VX_fetch_warp_sched
`define SCOPE_IO_VX_warp_sched
`define SCOPE_IO_VX_pipeline
`define SCOPE_BIND_VX_pipeline_fetch
`define SCOPE_IO_VX_core
`define SCOPE_BIND_VX_core_pipeline
`define SCOPE_IO_VX_cluster
`define SCOPE_BIND_VX_cluster_core(__i__)
`define SCOPE_IO_Vortex
`define SCOPE_BIND_Vortex_cluster(__i__)
`define SCOPE_BIND_top_vortex
`define SCOPE_IO_VX_lsu_unit
`define SCOPE_IO_VX_gpu_unit
`define SCOPE_IO_VX_execute
`define SCOPE_BIND_VX_execute_lsu_unit
`define SCOPE_BIND_VX_execute_gpu_unit
`define SCOPE_BIND_VX_pipeline_execute
`define SCOPE_IO_VX_issue
`define SCOPE_BIND_VX_pipeline_issue
`define SCOPE_IO_VX_bank
`define SCOPE_IO_VX_cache
`define SCOPE_BIND_VX_cache_bank(__i__)
`define SCOPE_BIND_Vortex_l3cache
`define SCOPE_BIND_VX_cluster_l2cache
`define SCOPE_IO_VX_mem_unit
`define SCOPE_BIND_VX_mem_unit_dcache
`define SCOPE_BIND_VX_core_mem_unit
`define SCOPE_BIND_VX_mem_unit_icache
`define SCOPE_BIND_VX_mem_unit_smem
`define SCOPE_DECL_SIGNALS
`define SCOPE_DATA_LIST
`define SCOPE_UPDATE_LIST
`define SCOPE_TRIGGER
`define SCOPE_ASSIGN(d,s)
`endif
// VX_SCOPE
`endif

View File

@@ -28,12 +28,16 @@ typedef struct packed {
logic [`NUM_THREADS-1:0] tmask;
} gpu_tmc_t;
`define GPU_TMC_SIZE (1+`NUM_THREADS)
typedef struct packed {
logic valid;
logic [`NUM_WARPS-1:0] wmask;
logic [31:0] pc;
} gpu_wspawn_t;
`define GPU_WSPAWN_SIZE (1+`NUM_WARPS+32)
typedef struct packed {
logic valid;
logic diverged;
@@ -42,10 +46,14 @@ typedef struct packed {
logic [31:0] pc;
} gpu_split_t;
`define GPU_SPLIT_SIZE (1+1+`NUM_THREADS+`NUM_THREADS+32)
typedef struct packed {
logic valid;
logic [`NB_BITS-1:0] id;
logic [`NW_BITS-1:0] size_m1;
} gpu_barrier_t;
`define GPU_BARRIER_SIZE (1+`NB_BITS+`NB_BITS)
`endif

View File

@@ -3,6 +3,8 @@
module VX_warp_sched #(
parameter CORE_ID = 0
) (
`SCOPE_IO_VX_warp_sched
input wire clk,
input wire reset,
@@ -248,4 +250,11 @@ module VX_warp_sched #(
assign busy = (active_warps != 0);
`SCOPE_ASSIGN (scope_wsched_scheduled_warp, scheduled_warp);
`SCOPE_ASSIGN (scope_wsched_active_warps, active_warps);
`SCOPE_ASSIGN (scope_wsched_schedule_table, schedule_table);
`SCOPE_ASSIGN (scope_wsched_schedule_ready, schedule_ready);
`SCOPE_ASSIGN (scope_wsched_warp_to_schedule, warp_to_schedule);
`SCOPE_ASSIGN (scope_wsched_warp_pc, warp_pc);
endmodule

View File

@@ -25,6 +25,7 @@ module VX_writeback #(
wire wb_valid;
wire [`NW_BITS-1:0] wb_wid;
wire [31:0] wb_PC;
wire [`NUM_THREADS-1:0] wb_tmask;
wire [`NR_BITS-1:0] wb_rd;
wire [`NUM_THREADS-1:0][31:0] wb_data;
@@ -42,6 +43,13 @@ module VX_writeback #(
mul_valid ? mul_commit_if.wid :
fpu_valid ? fpu_commit_if.wid :
0;
assign wb_PC = alu_valid ? alu_commit_if.PC :
lsu_valid ? lsu_commit_if.PC :
csr_valid ? csr_commit_if.PC :
mul_valid ? mul_commit_if.PC :
fpu_valid ? fpu_commit_if.PC :
0;
assign wb_tmask = alu_valid ? alu_commit_if.tmask :
lsu_valid ? lsu_commit_if.tmask :
@@ -68,16 +76,16 @@ module VX_writeback #(
wire stall = 0/*~writeback_if.ready && writeback_if.valid*/;
VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
) wb_reg (
.clk (clk),
.reset (reset),
.stall (stall),
.flush (1'b0),
.in ({wb_valid, wb_wid, wb_tmask, wb_rd, wb_data}),
.out ({writeback_if.valid, writeback_if.wid, writeback_if.tmask, writeback_if.rd, writeback_if.data})
.in ({wb_valid, wb_wid, wb_PC, wb_tmask, wb_rd, wb_data}),
.out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data})
);
assign alu_commit_if.ready = !stall;
assign lsu_commit_if.ready = !stall && !alu_valid;
assign csr_commit_if.ready = !stall && !alu_valid && !lsu_valid;

View File

@@ -1,11 +1,7 @@
`include "VX_define.vh"
module Vortex (
`SCOPE_SIGNALS_ISTAGE_IO
`SCOPE_SIGNALS_LSU_IO
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_SIGNALS_ISSUE_IO
`SCOPE_SIGNALS_EXECUTE_IO
`SCOPE_IO_Vortex
// Clock
input wire clk,
@@ -75,11 +71,7 @@ module Vortex (
VX_cluster #(
.CLUSTER_ID(0)
) cluster (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_Vortex_cluster(0)
.clk (clk),
.reset (reset),
@@ -193,11 +185,7 @@ module Vortex (
VX_cluster #(
.CLUSTER_ID(i)
) cluster (
`SCOPE_SIGNALS_ISTAGE_BIND
`SCOPE_SIGNALS_LSU_BIND
`SCOPE_SIGNALS_CACHE_BIND
`SCOPE_SIGNALS_ISSUE_BIND
`SCOPE_SIGNALS_EXECUTE_BIND
`SCOPE_BIND_Vortex_cluster(i)
.clk (clk),
.reset (reset),
@@ -384,7 +372,7 @@ module Vortex (
assign l3_core_rsp_ready = (& per_cluster_dram_rsp_ready);
VX_cache #(
.CACHE_ID (0),
.CACHE_ID (`L3CACHE_ID),
.CACHE_SIZE (`L3CACHE_SIZE),
.BANK_LINE_SIZE (`L3BANK_LINE_SIZE),
.NUM_BANKS (`L3NUM_BANKS),
@@ -407,7 +395,7 @@ module Vortex (
.SNP_REQ_TAG_WIDTH (`L3SNP_TAG_WIDTH),
.SNP_FWD_TAG_WIDTH (`L2SNP_TAG_WIDTH)
) l3cache (
`SCOPE_SIGNALS_CACHE_UNBIND
`SCOPE_BIND_Vortex_l3cache
.clk (clk),
.reset (reset),

View File

@@ -50,7 +50,7 @@ module VX_bank #(
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 0
) (
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_IO_VX_bank
input wire clk,
input wire reset,
@@ -146,7 +146,7 @@ module VX_bank #(
) snp_req_queue (
.clk (clk),
.reset (reset),
.push (snp_req_valid),
.push (snp_req_valid && snp_req_ready),
.data_in ({snp_req_addr, snp_req_invalidate, snp_req_tag}),
.pop (snrq_pop),
.data_out({snrq_addr_st0, snrq_invalidate_st0, snrq_tag_st0}),
@@ -169,7 +169,7 @@ module VX_bank #(
) dfp_queue (
.clk (clk),
.reset (reset),
.push (dram_fill_rsp_valid),
.push (dram_fill_rsp_valid && dram_fill_rsp_ready),
.data_in ({dram_fill_rsp_addr, dram_fill_rsp_data}),
.pop (dfpq_pop),
.data_out({dfpq_addr_st0, dfpq_filldata_st0}),
@@ -266,7 +266,9 @@ module VX_bank #(
`DEBUG_BEGIN
wire going_to_write_st1;
`DEBUG_END
//determines if the if it is time to pop a req from the queues
//unqual - the req does NOT qualify for execution in the bank.
wire mrvq_pop_unqual = mrvq_valid_st0;
wire dfpq_pop_unqual = !mrvq_pop_unqual && !dfpq_empty;
wire reqq_pop_unqual = !mrvq_stop && !mrvq_pop_unqual && !dfpq_pop_unqual && !reqq_empty && reqq_req_st0 && !is_fill_st1 && !is_fill_st1;
@@ -276,7 +278,8 @@ module VX_bank #(
assign dfpq_pop = dfpq_pop_unqual && !stall_bank_pipe;
assign reqq_pop = reqq_pop_unqual && !stall_bank_pipe;
assign snrq_pop = snrq_pop_unqual && !stall_bank_pipe;
//signals to progress to the next stage
wire qual_is_fill_st0;
wire qual_valid_st0;
wire [`LINE_ADDR_WIDTH-1:0] qual_addr_st0;
@@ -289,7 +292,8 @@ module VX_bank #(
wire qual_going_to_write_st0;
wire qual_is_snp_st0;
wire qual_snp_invalidate_st0;
//signals to be *used* in the next stage
wire valid_st1;
wire [`LINE_ADDR_WIDTH-1:0] addr_st1;
wire [`UP(`WORD_SELECT_WIDTH)-1:0] wsel_st1;
@@ -300,15 +304,19 @@ module VX_bank #(
wire snp_invalidate_st1;
wire is_mrvq_st1;
assign qual_is_fill_st0 = dfpq_pop_unqual;
//Determine which req will progress to the next stage
assign qual_is_fill_st0 = dfpq_pop_unqual; //dram is filling a request
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop;
assign qual_valid_st0 = dfpq_pop || mrvq_pop || reqq_pop || snrq_pop; //valid if something is being popped
assign qual_addr_st0 = dfpq_pop_unqual ? dfpq_addr_st0 :
mrvq_pop_unqual ? mrvq_addr_st0 :
//Decides which request to deal with. Priority: 1) Miss reserve 2) DRAM fill 3) Core req 4) Snp req
assign qual_addr_st0 = mrvq_pop_unqual ? mrvq_addr_st0 :
dfpq_pop_unqual ? dfpq_addr_st0 :
reqq_pop_unqual ? reqq_req_addr_st0[`LINE_SELECT_ADDR_RNG] :
snrq_pop_unqual ? snrq_addr_st0 :
0;
//Word select does ? Does this just pick a specific word from the line instead of the whole line?
if (`WORD_SELECT_WIDTH != 0) begin
assign qual_wsel_st0 = reqq_pop_unqual ? reqq_req_addr_st0[`WORD_SELECT_WIDTH-1:0] :
mrvq_pop_unqual ? mrvq_wsel_st0 :
@@ -318,30 +326,35 @@ module VX_bank #(
assign qual_wsel_st0 = 0;
end
//if you are filling from dram then that is the write data? What about core? What is 57?
assign qual_writedata_st0 = dfpq_pop_unqual ? dfpq_filldata_st0 : 57;
//note that this is stored even if a DRAM fill is processed
assign qual_inst_meta_st0 = mrvq_pop_unqual ? {`REQ_TAG_WIDTH'(mrvq_tag_st0) , mrvq_rw_st0, mrvq_byteen_st0, mrvq_tid_st0} :
reqq_pop_unqual ? {`REQ_TAG_WIDTH'(reqq_req_tag_st0), reqq_req_rw_st0, reqq_req_byteen_st0, reqq_req_tid_st0} :
snrq_pop_unqual ? {`REQ_TAG_WIDTH'(snrq_tag_st0), 1'b0, WORD_SIZE'(0), `REQS_BITS'(0)} :
0;
assign qual_going_to_write_st0 = dfpq_pop_unqual ? 1 :
(mrvq_pop_unqual && mrvq_rw_st0) ? 1 :
(reqq_pop_unqual && reqq_req_rw_st0) ? 1 :
0;
//snp signals check to see if the miss reserve as a snp in it first.
assign qual_is_snp_st0 = mrvq_pop_unqual ? mrvq_is_snp_st0 :
snrq_pop_unqual ? 1 :
0;
//if we are popping from the miss reserve then assign to the mrvq invalidate. If not and popping from the snoop queue use the snoop invalidate. Else this is 0
assign qual_snp_invalidate_st0 = mrvq_pop_unqual ? mrvq_snp_invalidate_st0 :
snrq_pop_unqual ? snrq_invalidate_st0 :
0;
//choose which word of the lien is being written to
assign qual_writeword_st0 = mrvq_pop_unqual ? mrvq_writeword_st0 :
reqq_pop_unqual ? reqq_req_writeword_st0 :
0;
assign qual_is_mrvq_st0 = mrvq_pop_unqual;
`ifdef DBG_CORE_REQ_INFO
@@ -356,7 +369,7 @@ module VX_bank #(
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (0),
.flush (1'b0),
.in ({qual_is_mrvq_st0, qual_is_snp_st0, qual_snp_invalidate_st0, qual_going_to_write_st0, qual_valid_st0, qual_addr_st0, qual_wsel_st0, qual_writeword_st0, qual_inst_meta_st0, qual_is_fill_st0, qual_writedata_st0}),
.out ({is_mrvq_st1 , is_snp_st1, snp_invalidate_st1, going_to_write_st1, valid_st1, addr_st1, wsel_st1, writeword_st1, inst_meta_st1, is_fill_st1, writedata_st1})
);
@@ -453,6 +466,8 @@ module VX_bank #(
`ifdef DBG_CORE_REQ_INFO
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = inst_meta_st1;
end else begin
assign {debug_pc_st1, debug_rd_st1, debug_wid_st1, debug_tagid_st1, debug_rw_st1, debug_byteen_st1, debug_tid_st1} = 0;
end
`endif
@@ -486,7 +501,7 @@ module VX_bank #(
.clk (clk),
.reset (reset),
.stall (stall_bank_pipe),
.flush (0),
.flush (1'b0),
.in ({mrvq_recover_ready_state_st1, is_mrvq_st1_st2, mrvq_init_ready_state_st1, snp_to_mrvq_st1, is_snp_st1, snp_invalidate_st1, fill_saw_dirty_st1, is_fill_st1, qual_valid_st1_2, addr_st1, wsel_st1, writeword_st1, readword_st1, readdata_st1, readtag_st1, miss_st1, dirty_st1, dirtyb_st1, inst_meta_st1}),
.out ({mrvq_recover_ready_state_st2 , is_mrvq_st2 , mrvq_init_ready_state_unqual_st2, snp_to_mrvq_st2 , is_snp_st2 , snp_invalidate_st2, fill_saw_dirty_st2 , is_fill_st2 , valid_st2 , addr_st2, wsel_st2, writeword_st2, readword_st2, readdata_st2, readtag_st2, miss_st2, dirty_st2, dirtyb_st2, inst_meta_st2})
);
@@ -728,18 +743,18 @@ module VX_bank #(
end
`endif
`SCOPE_ASSIGN (scope_bank_valid_st0, qual_valid_st0);
`SCOPE_ASSIGN (scope_bank_valid_st1, valid_st1);
`SCOPE_ASSIGN (scope_bank_valid_st2, valid_st2);
`SCOPE_ASSIGN (scope_valid_st0, qual_valid_st0);
`SCOPE_ASSIGN (scope_valid_st1, valid_st1);
`SCOPE_ASSIGN (scope_valid_st2, valid_st2);
`SCOPE_ASSIGN (scope_bank_is_mrvq_st1, is_mrvq_st1);
`SCOPE_ASSIGN (scope_bank_miss_st1, miss_st1);
`SCOPE_ASSIGN (scope_bank_dirty_st1, dirty_st1);
`SCOPE_ASSIGN (scope_bank_force_miss_st1, force_request_miss_st1);
`SCOPE_ASSIGN (scope_bank_stall_pipe, stall_bank_pipe);
`SCOPE_ASSIGN (scope_is_mrvq_st1, is_mrvq_st1);
`SCOPE_ASSIGN (scope_miss_st1, miss_st1);
`SCOPE_ASSIGN (scope_dirty_st1, dirty_st1);
`SCOPE_ASSIGN (scope_force_miss_st1, force_request_miss_st1);
`SCOPE_ASSIGN (scope_stall_pipe, stall_bank_pipe);
`SCOPE_ASSIGN (scope_bank_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
`SCOPE_ASSIGN (scope_bank_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`SCOPE_ASSIGN (scope_bank_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
`SCOPE_ASSIGN (scope_addr_st0, `LINE_TO_BYTE_ADDR(qual_addr_st0, BANK_ID));
`SCOPE_ASSIGN (scope_addr_st1, `LINE_TO_BYTE_ADDR(addr_st1, BANK_ID));
`SCOPE_ASSIGN (scope_addr_st2, `LINE_TO_BYTE_ADDR(addr_st2, BANK_ID));
endmodule

View File

@@ -51,15 +51,15 @@ module VX_cache #(
parameter DRAM_TAG_WIDTH = 28,
// Number of snoop forwarding requests
parameter NUM_SNP_REQUESTS = 2,
parameter NUM_SNP_REQUESTS = 1,
// Snooping request tag width
parameter SNP_REQ_TAG_WIDTH = 28,
parameter SNP_REQ_TAG_WIDTH = 1,
// Snooping forward tag width
parameter SNP_FWD_TAG_WIDTH = 1
) (
`SCOPE_SIGNALS_CACHE_IO
`SCOPE_IO_VX_cache
input wire clk,
input wire reset,
@@ -167,7 +167,7 @@ module VX_cache #(
wire [NUM_BANKS-1:0] per_bank_miss;
assign miss_vec = per_bank_miss;
`SCOPE_SIGNALS_CACHE_BANK_SELECT
wire snp_req_valid_qual;
wire [`DRAM_ADDR_WIDTH-1:0] snp_req_addr_qual;
@@ -376,7 +376,7 @@ module VX_cache #(
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS),
.SNP_REQ_TAG_WIDTH (SNP_REQ_TAG_WIDTH)
) bank (
`SCOPE_SIGNALS_CACHE_BANK_BIND
`SCOPE_BIND_VX_cache_bank(i)
.clk (clk),
.reset (reset),

View File

@@ -2,7 +2,6 @@
`define VX_CACHE_CONFIG
`include "VX_platform.vh"
`include "VX_scope.vh"
`ifdef DBG_CORE_REQ_INFO
`include "VX_define.vh"

View File

@@ -91,7 +91,7 @@ module VX_cache_core_rsp_merge #(
.clk (clk),
.reset (reset),
.stall (stall),
.flush (0),
.flush (1'b0),
.in ({core_rsp_valid_unqual, core_rsp_data_unqual, core_rsp_tag_unqual}),
.out ({core_rsp_valid, core_rsp_data, core_rsp_tag})
);

View File

@@ -56,8 +56,9 @@ module VX_cache_miss_resrv #(
output wire miss_resrv_is_snp_st0,
output wire miss_resrv_snp_invalidate_st0
);
reg [`MRVQ_METADATA_WIDTH-1:0] metadata_table[MRVQ_SIZE-1:0];
wire [`MRVQ_METADATA_WIDTH-1:0] metadata_table;
reg [MRVQ_SIZE-1:0][`LINE_ADDR_WIDTH-1:0] addr_table;
reg [MRVQ_SIZE-1:0] valid_table;
reg [MRVQ_SIZE-1:0] ready_table;
reg [`LOG2UP(MRVQ_SIZE)-1:0] schedule_ptr;
@@ -66,13 +67,13 @@ module VX_cache_miss_resrv #(
reg [`LOG2UP(MRVQ_SIZE+1)-1:0] size;
`STATIC_ASSERT(MRVQ_SIZE > 5, "invalid size")
`STATIC_ASSERT(MRVQ_SIZE > 5, ("invalid size"))
assign miss_resrv_full = (size == $bits(size)'(MRVQ_SIZE));
assign miss_resrv_stop = (size > $bits(size)'(MRVQ_SIZE-5)); // need to add 5 cycles to prevent pipeline lock
wire enqueue_possible = !miss_resrv_full;
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
wire enqueue_possible = !miss_resrv_full;
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
reg [MRVQ_SIZE-1:0] make_ready;
reg [MRVQ_SIZE-1:0] make_ready_push;
@@ -85,11 +86,11 @@ module VX_cache_miss_resrv #(
assign pending_hazard_st1 = |(valid_address_match);
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
wire dequeue_possible = valid_table[schedule_ptr] && ready_table[schedule_ptr];
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = schedule_ptr;
assign miss_resrv_valid_st0 = dequeue_possible;
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
assign miss_resrv_addr_st0 = addr_table[dequeue_index];
assign {miss_resrv_data_st0,
miss_resrv_tid_st0,
miss_resrv_tag_st0,
@@ -97,7 +98,7 @@ module VX_cache_miss_resrv #(
miss_resrv_byteen_st0,
miss_resrv_wsel_st0,
miss_resrv_is_snp_st0,
miss_resrv_snp_invalidate_st0} = metadata_table[dequeue_index];
miss_resrv_snp_invalidate_st0} = metadata_table;
wire mrvq_push = miss_add && enqueue_possible && !is_mrvq;
wire mrvq_pop = miss_resrv_pop && dequeue_possible;
@@ -124,13 +125,12 @@ module VX_cache_miss_resrv #(
valid_table[enqueue_index] <= 1;
ready_table[enqueue_index] <= mrvq_init_ready_state;
addr_table[enqueue_index] <= miss_add_addr;
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate};
tail_ptr <= tail_ptr + 1;
tail_ptr <= tail_ptr + $bits(tail_ptr)'(1);
end else if (increment_head) begin
valid_table[head_ptr] <= 0;
head_ptr <= head_ptr + 1;
head_ptr <= head_ptr + $bits(head_ptr)'(1);
end else if (recover_state) begin
schedule_ptr <= schedule_ptr - 1;
schedule_ptr <= schedule_ptr - $bits(schedule_ptr)'(1);
end
// update entry as 'ready' during DRAM fill response
@@ -140,20 +140,36 @@ module VX_cache_miss_resrv #(
if (mrvq_pop) begin
ready_table[dequeue_index] <= 0;
schedule_ptr <= schedule_ptr + 1;
schedule_ptr <= schedule_ptr + $bits(schedule_ptr)'(1);
end
if (!(mrvq_push && increment_head)) begin
if (mrvq_push) begin
size <= size + 1;
size <= size + $bits(size)'(1);
end
if (increment_head) begin
size <= size - 1;
size <= size - $bits(size)'(1);
end
end
end
end
VX_dp_ram #(
.DATAW(`MRVQ_METADATA_WIDTH),
.SIZE(MRVQ_SIZE),
.BYTEENW(1),
.BUFFERED(0),
.RWCHECK(1)
) metadata_ram (
.clk(clk),
.waddr(enqueue_index),
.raddr(dequeue_index),
.wren(mrvq_push),
.rden(1'b1),
.din({miss_add_data, miss_add_tid, miss_add_tag, miss_add_rw, miss_add_byteen, miss_add_wsel, miss_add_is_snp, miss_add_snp_invalidate}),
.dout(metadata_table)
);
`ifdef DBG_PRINT_CACHE_MSRQ
always @(posedge clk) begin
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin

View File

@@ -37,7 +37,7 @@ module VX_snp_forwarder #(
input wire [NUM_REQUESTS-1:0][`LOG2UP(SNRQ_SIZE)-1:0] snp_fwdin_tag,
output wire [NUM_REQUESTS-1:0] snp_fwdin_ready
);
`STATIC_ASSERT(NUM_REQUESTS > 1, "invalid value")
`STATIC_ASSERT(NUM_REQUESTS > 1, ("invalid value"))
reg [`REQS_BITS:0] pending_cntrs [SNRQ_SIZE-1:0];

View File

@@ -183,15 +183,15 @@ module VX_tag_data_access #(
if (valid_req_st1) begin
if ((| use_write_enable)) begin
if (writefill_st1) begin
$display("%t: cache%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
$display("%t: cache%0d:%0d data-fill: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, use_write_data);
end else begin
$display("%t: cache%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
$display("%t: cache%0d:%0d data-write: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, writeladdr_st1, writetag_st1, wordsel_st1, writeword_st1);
end
end else
if (miss_st1) begin
$display("%t: cache%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
$display("%t: cache%0d:%0d data-miss: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1);
end else begin
$display("%t: cache%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
$display("%t: cache%0d:%0d data-read: wid=%0d, PC=%0h, tag=%0h, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1, debug_pc_st1, debug_tagid_st1, debug_rd_st1, dirty_st1, readaddr_st1, qual_read_tag_st1, wordsel_st1, qual_read_data_st1);
end
end
end

View File

@@ -6,7 +6,7 @@ module VX_tag_data_store #(
// Size of line inside a bank in bytes
parameter BANK_LINE_SIZE = 0,
// Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 0,
parameter NUM_BANKS = 0, //unused parameter?
// Size of a word in bytes
parameter WORD_SIZE = 0
) (
@@ -30,7 +30,6 @@ module VX_tag_data_store #(
input wire fill_sent
);
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0][7:0] data [`BANK_LINE_COUNT-1:0];
reg [`TAG_SELECT_BITS-1:0] tag [`BANK_LINE_COUNT-1:0];
reg [`BANK_LINE_WORDS-1:0][WORD_SIZE-1:0] dirtyb[`BANK_LINE_COUNT-1:0];
reg [`BANK_LINE_COUNT-1:0] dirty;
@@ -40,8 +39,7 @@ module VX_tag_data_store #(
assign read_dirty = dirty [read_addr];
assign read_dirtyb = dirtyb [read_addr];
assign read_tag = tag [read_addr];
assign read_data = data [read_addr];
wire do_write = (| write_enable);
always @(posedge clk) begin
@@ -69,15 +67,26 @@ module VX_tag_data_store #(
if (invalidate) begin
valid[write_addr] <= 0;
end
for (integer j = 0; j < `BANK_LINE_WORDS; j++) begin
for (integer i = 0; i < WORD_SIZE; i++) begin
if (write_enable[j][i]) begin
data[write_addr][j][i] <= write_data[j * `WORD_WIDTH + i * 8 +: 8];
end
end
end
end
end
endmodule
wire [(`BANK_LINE_WORDS * WORD_SIZE)-1:0] ram_wren;
assign ram_wren = write_enable & {(`BANK_LINE_WORDS * WORD_SIZE){!stall_bank_pipe}};
VX_dp_ram #(
.DATAW(`BANK_LINE_WORDS * WORD_SIZE * 8),
.SIZE(`BANK_LINE_COUNT),
.BYTEENW(`BANK_LINE_WORDS * WORD_SIZE),
.BUFFERED(0),
.RWCHECK(1)
) dp_ram (
.clk(clk),
.waddr(write_addr),
.raddr(read_addr),
.wren(ram_wren),
.rden(1'b1),
.din(write_data),
.dout(read_data)
);
endmodule

145
hw/rtl/libs/VX_dp_ram.v Normal file
View File

@@ -0,0 +1,145 @@
`include "VX_platform.vh"
module VX_dp_ram #(
parameter DATAW = 1,
parameter SIZE = 1,
parameter BYTEENW = 1,
parameter BUFFERED = 1,
parameter RWCHECK = 1,
parameter RWBYPASS = 0,
parameter ADDRW = $clog2(SIZE),
parameter SIZEW = $clog2(SIZE+1)
) (
input wire clk,
input wire [ADDRW-1:0] waddr,
input wire [ADDRW-1:0] raddr,
input wire [BYTEENW-1:0] wren,
input wire rden,
input wire [DATAW-1:0] din,
output wire [DATAW-1:0] dout
);
if (BUFFERED) begin
reg [DATAW-1:0] mem [SIZE-1:0];
reg [DATAW-1:0] dout_r;
if (BYTEENW > 1) begin
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
end
end
end else begin
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
end
end
always @(posedge clk) begin
if (rden)
dout_r <= mem[raddr];
end
if (RWBYPASS) begin
reg [DATAW-1:0] din_r;
wire writing;
if (BYTEENW > 1) begin
assign writing = (| wren);
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8];
end
end
end else begin
assign writing = wren;
always @(posedge clk) begin
din_r <= din;
end
end
reg bypass_r;
always @(posedge clk) begin
bypass_r <= writing && (raddr == waddr);
end
assign dout = bypass_r ? din_r : dout_r;
end else begin
assign dout = dout_r;
end
end else begin
`UNUSED_VAR(rden)
if (RWCHECK) begin
reg [DATAW-1:0] mem [SIZE-1:0];
if (BYTEENW > 1) begin
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
end
end
end else begin
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
end
end
if (RWBYPASS) begin
reg [DATAW-1:0] din_r;
wire writing;
if (BYTEENW > 1) begin
assign writing = (| wren);
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
din_r[i * 8 +: 8] <= wren[i] ? din[i * 8 +: 8] : mem[waddr][i * 8 +: 8];
end
end
end else begin
assign writing = wren;
always @(posedge clk) begin
din_r <= din;
end
end
reg bypass_r;
always @(posedge clk) begin
bypass_r <= writing && (raddr == waddr);
end
assign dout = bypass_r ? din_r : mem[raddr];
end else begin
assign dout = mem[raddr];
end
end else begin
`NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
if (BYTEENW > 1) begin
always @(posedge clk) begin
for (integer i = 0; i < BYTEENW; i++) begin
if (wren[i])
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8];
end
end
end else begin
always @(posedge clk) begin
if (wren)
mem[waddr] <= din;
end
end
assign dout = mem[raddr];
end
end
endmodule

View File

@@ -3,7 +3,7 @@
module VX_generic_queue #(
parameter DATAW = 1,
parameter SIZE = 2,
parameter BUFFERED = 0,
parameter BUFFERED = 1,
parameter ADDRW = $clog2(SIZE),
parameter SIZEW = $clog2(SIZE+1)
) (
@@ -17,30 +17,26 @@ module VX_generic_queue #(
output wire full,
output wire [SIZEW-1:0] size
);
`STATIC_ASSERT(`ISPOW2(SIZE), "must be 0 or power of 2!")
reg [SIZEW-1:0] size_r;
wire reading;
wire writing;
assign reading = pop && !empty;
assign writing = push && !full;
if (SIZE == 1) begin // (SIZE == 1)
`STATIC_ASSERT(`ISPOW2(SIZE), ("must be 0 or power of 2!"))
if (SIZE == 1) begin
reg [DATAW-1:0] head_r;
reg size_r;
always @(posedge clk) begin
if (reset) begin
head_r <= 0;
size_r <= 0;
end else begin
if (writing && !reading) begin
if (push && !pop) begin
assert(!full);
size_r <= 1;
end else if (reading && !writing) begin
end else if (pop && !push) begin
assert(!empty);
size_r <= 0;
end
if (writing) begin
if (push) begin
head_r <= data_in;
end
end
@@ -51,15 +47,14 @@ module VX_generic_queue #(
assign full = (size_r != 0);
assign size = size_r;
end else begin // (SIZE > 1)
end else begin
`USE_FAST_BRAM reg [DATAW-1:0] data [SIZE-1:0];
if (0 == BUFFERED) begin
if (0 == BUFFERED) begin
reg [ADDRW:0] rd_ptr_r;
reg [ADDRW:0] wr_ptr_r;
reg [ADDRW-1:0] used_r;
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[ADDRW-1:0];
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[ADDRW-1:0];
@@ -67,96 +62,126 @@ module VX_generic_queue #(
if (reset) begin
rd_ptr_r <= 0;
wr_ptr_r <= 0;
size_r <= 0;
used_r <= 0;
end else begin
if (writing) begin
data[wr_ptr_a] <= data_in;
wr_ptr_r <= wr_ptr_r + 1;
if (!reading) begin
size_r <= size_r + 1;
if (push) begin
assert(!full);
wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(1);
if (!pop) begin
used_r <= used_r + ADDRW'(1);
end
end
if (reading) begin
rd_ptr_r <= rd_ptr_r + 1;
if (!writing) begin
size_r <= size_r - 1;
if (pop) begin
assert(!empty);
rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(1);
if (!push) begin
used_r <= used_r - ADDRW'(1);
end
end
end
end
end
assign data_out = data[rd_ptr_a];
assign empty = (wr_ptr_r == rd_ptr_r);
assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]);
assign size = size_r;
VX_dp_ram #(
.DATAW(DATAW),
.SIZE(SIZE),
.BUFFERED(0),
.RWCHECK(1)
) dp_ram (
.clk(clk),
.waddr(wr_ptr_a),
.raddr(rd_ptr_a),
.wren(push),
.rden(pop),
.din(data_in),
.dout(data_out)
);
assign empty = (wr_ptr_r == rd_ptr_r);
assign full = (wr_ptr_a == rd_ptr_a) && (wr_ptr_r[ADDRW] != rd_ptr_r[ADDRW]);
assign size = {full, used_r};
end else begin
reg [DATAW-1:0] head_r;
reg [DATAW-1:0] curr_r;
wire [DATAW-1:0] dout;
reg [DATAW-1:0] din_r;
reg [ADDRW-1:0] wr_ptr_r;
reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] rd_ptr_next_r;
reg [ADDRW-1:0] rd_ptr_n_r;
reg [ADDRW-1:0] used_r;
reg empty_r;
reg full_r;
reg bypass_r;
always @(posedge clk) begin
if (reset) begin
size_r <= 0;
head_r <= 0;
curr_r <= 0;
wr_ptr_r <= 0;
rd_ptr_r <= 0;
rd_ptr_next_r <= 1;
empty_r <= 1;
full_r <= 0;
if (reset) begin
wr_ptr_r <= 0;
rd_ptr_r <= 0;
rd_ptr_n_r <= 1;
empty_r <= 1;
full_r <= 0;
used_r <= 0;
end else begin
if (writing) begin
data[wr_ptr_r] <= data_in;
wr_ptr_r <= wr_ptr_r + 1;
if (push) begin
wr_ptr_r <= wr_ptr_r + ADDRW'(1);
if (!reading) begin
if (!pop) begin
empty_r <= 0;
if (size_r == ($bits(size_r)'(SIZE-1))) begin
if (used_r == ADDRW'(SIZE-1)) begin
full_r <= 1;
end
size_r <= size_r + 1;
used_r <= used_r + ADDRW'(1);
end
end
if (reading) begin
rd_ptr_r <= rd_ptr_next_r;
if (pop) begin
rd_ptr_r <= rd_ptr_n_r;
if (SIZE > 2) begin
rd_ptr_next_r <= rd_ptr_r + $bits(rd_ptr_r)'(2);
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
end else begin // (SIZE == 2);
rd_ptr_next_r <= ~rd_ptr_next_r;
rd_ptr_n_r <= ~rd_ptr_n_r;
end
if (!writing) begin
if (size_r == 1) begin
assert(rd_ptr_next_r == wr_ptr_r);
if (!push) begin
full_r <= 0;
if (used_r == ADDRW'(1)) begin
assert(rd_ptr_n_r == wr_ptr_r);
empty_r <= 1;
end;
full_r <= 0;
size_r <= size_r - 1;
end;
used_r <= used_r - ADDRW'(1);
end
end
bypass_r <= writing
&& (empty_r || ((1 == size_r) && reading)); // empty or about to go empty
curr_r <= data_in;
head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r];
end
end
end
assign data_out = bypass_r ? curr_r : head_r;
always @(posedge clk) begin
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
bypass_r <= 1;
din_r <= data_in;
end else if (pop)
bypass_r <= 0;
end
VX_dp_ram #(
.DATAW(DATAW),
.SIZE(SIZE),
.BUFFERED(1),
.RWCHECK(0)
) dp_ram (
.clk(clk),
.waddr(wr_ptr_r),
.raddr(rd_ptr_n_r),
.wren(push),
.rden(pop),
.din(data_in),
.dout(dout)
);
assign data_out = bypass_r ? din_r : dout;
assign empty = empty_r;
assign full = full_r;
assign size = size_r;
assign size = {full_r, used_r};
end
end

View File

@@ -28,9 +28,13 @@ module VX_index_queue #(
assign empty = (wr_ptr == rd_ptr);
assign full = (wr_a == rd_a) && (wr_ptr[`LOG2UP(SIZE)] != rd_ptr[`LOG2UP(SIZE)]);
assign enqueue = push && !full;
assign enqueue = push;
assign dequeue = !empty && !valid[rd_a]; // auto-remove when head is invalid
always @(*) begin
assert(!push || !full);
end
always @(posedge clk) begin
if (reset) begin
rd_ptr <= 0;

View File

@@ -18,7 +18,7 @@ module VX_scope #(
input wire bus_write,
input wire bus_read
);
localparam DELTA_ENABLE = (UPDW != 0);
localparam UPDW_ENABLE = (UPDW != 0);
localparam MAX_DELTA = (2 ** DELTAW) - 1;
localparam CMD_GET_VALID = 3'd0;
@@ -27,19 +27,22 @@ module VX_scope #(
localparam CMD_GET_COUNT = 3'd3;
localparam CMD_SET_DELAY = 3'd4;
localparam CMD_SET_STOP = 3'd5;
localparam CMD_RESERVED1 = 3'd6;
localparam CMD_GET_OFFSET= 3'd6;
localparam CMD_RESERVED2 = 3'd7;
localparam GET_VALID = 2'd0;
localparam GET_DATA = 2'd1;
localparam GET_WIDTH = 2'd2;
localparam GET_COUNT = 2'd3;
localparam GET_VALID = 3'd0;
localparam GET_DATA = 3'd1;
localparam GET_WIDTH = 3'd2;
localparam GET_COUNT = 3'd3;
localparam GET_OFFSET = 3'd6;
`NO_RW_RAM_CHECK reg [DATAW-1:0] data_store [SIZE-1:0];
`NO_RW_RAM_CHECK reg [DELTAW-1:0] delta_store [SIZE-1:0];
reg [DATAW-1:0] data_store [SIZE-1:0];
reg [DELTAW-1:0] delta_store [SIZE-1:0];
reg [UPDW-1:0] prev_trigger_id;
reg [DELTAW-1:0] delta;
reg [BUSW-1:0] bus_out_r;
reg [63:0] timestamp, start_time;
reg [`CLOG2(SIZE)-1:0] raddr, waddr, waddr_end;
@@ -49,8 +52,7 @@ module VX_scope #(
reg [BUSW-3:0] delay_val, delay_cntr;
reg [1:0] out_cmd;
reg [2:0] get_cmd;
wire [2:0] cmd_type;
wire [BUSW-4:0] cmd_data;
assign {cmd_data, cmd_type} = bus_in;
@@ -59,7 +61,7 @@ module VX_scope #(
always @(posedge clk) begin
if (reset) begin
out_cmd <= $bits(out_cmd)'(CMD_GET_VALID);
get_cmd <= $bits(get_cmd)'(CMD_GET_VALID);
raddr <= 0;
waddr <= 0;
waddr_end <= $bits(waddr)'(SIZE-1);
@@ -74,13 +76,18 @@ module VX_scope #(
read_offset <= 0;
read_delta <= 0;
data_valid <= 0;
timestamp <= 0;
end else begin
timestamp <= timestamp + 1;
if (bus_write) begin
case (cmd_type)
CMD_GET_VALID,
CMD_GET_DATA,
CMD_GET_WIDTH,
CMD_GET_COUNT: out_cmd <= $bits(out_cmd)'(cmd_type);
CMD_GET_OFFSET,
CMD_GET_COUNT: get_cmd <= $bits(get_cmd)'(cmd_type);
CMD_SET_DELAY: delay_val <= $bits(delay_val)'(cmd_data);
CMD_SET_STOP: waddr_end <= $bits(waddr)'(cmd_data);
default:;
@@ -92,8 +99,10 @@ module VX_scope #(
delta_flush <= 1;
if (0 == delay_val) begin
start_wait <= 0;
recording <= 1;
delay_cntr <= 0;
recording <= 1;
delta <= 0;
delay_cntr <= 0;
start_time <= timestamp;
end else begin
start_wait <= 1;
recording <= 0;
@@ -106,26 +115,29 @@ module VX_scope #(
if (1 == delay_cntr) begin
start_wait <= 0;
recording <= 1;
delta <= 0;
start_time <= timestamp;
end
end
if (recording) begin
if (DELTA_ENABLE) begin
if (UPDW_ENABLE) begin
if (delta_flush
|| changed
|| (trigger_id != prev_trigger_id)) begin
data_store[waddr] <= data_in;
delta_store[waddr] <= delta;
waddr <= waddr + 1;
data_store[waddr] <= data_in;
waddr <= waddr + $bits(waddr)'(1);
delta <= 0;
delta_flush <= 0;
end else begin
delta <= delta + 1;
delta <= delta + DELTAW'(1);
delta_flush <= (delta == (MAX_DELTA-1));
end
prev_trigger_id <= trigger_id;
end else begin
data_store[waddr] <= data_in;
delta_store[waddr] <= 0;
data_store[waddr] <= data_in;
waddr <= waddr + 1;
end
@@ -134,12 +146,12 @@ module VX_scope #(
waddr <= waddr; // keep last address
recording <= 0;
data_valid <= 1;
read_delta <= DELTA_ENABLE;
read_delta <= 1;
end
end
if (bus_read
&& (out_cmd == GET_DATA)
&& (get_cmd == GET_DATA)
&& data_valid) begin
if (read_delta) begin
read_delta <= 0;
@@ -148,16 +160,16 @@ module VX_scope #(
if (read_offset < $bits(read_offset)'(DATAW-BUSW)) begin
read_offset <= read_offset + $bits(read_offset)'(BUSW);
end else begin
raddr <= raddr + 1;
raddr <= raddr + $bits(raddr)'(1);
read_offset <= 0;
read_delta <= DELTA_ENABLE;
read_delta <= 1;
if (raddr == waddr) begin
data_valid <= 0;
end
end
end else begin
raddr <= raddr + 1;
read_delta <= DELTA_ENABLE;
read_delta <= 1;
if (raddr == waddr) begin
data_valid <= 0;
end
@@ -168,11 +180,14 @@ module VX_scope #(
end
always @(*) begin
case (out_cmd)
case (get_cmd)
GET_VALID : bus_out_r = BUSW'(data_valid);
GET_WIDTH : bus_out_r = BUSW'(DATAW);
GET_COUNT : bus_out_r = BUSW'(waddr) + BUSW'(1);
GET_OFFSET: bus_out_r = BUSW'(start_time);
/* verilator lint_off WIDTH */
GET_DATA : bus_out_r = read_delta ? BUSW'(delta_store[raddr]) : BUSW'(data_store[raddr] >> read_offset);
/* verilator lint_on WIDTH */
default : bus_out_r = 0;
endcase
end
@@ -182,7 +197,7 @@ module VX_scope #(
`ifdef DBG_PRINT_SCOPE
always @(posedge clk) begin
if (bus_read) begin
$display("%t: scope-read: cmd=%0d, out=%0h, addr=%0d", $time, out_cmd, bus_out, raddr);
$display("%t: scope-read: cmd=%0d, addr=%0d, value=%0h", $time, get_cmd, raddr, bus_out);
end
if (bus_write) begin
$display("%t: scope-write: cmd=%0d, value=%0d", $time, cmd_type, cmd_data);