RAM blocks inference fixes

This commit is contained in:
Blaise Tine
2020-11-30 14:02:47 -08:00
parent 5758ef9ebf
commit 97739e9dcf
27 changed files with 218 additions and 189 deletions

View File

@@ -42,7 +42,7 @@ module VX_fpu_unit #(
VX_cam_buffer #( VX_cam_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1), .DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
.SIZE (`FPUQ_SIZE) .SIZE (`FPUQ_SIZE)
) fpu_cam ( ) req_metadata_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.acquire_slot (fpuq_push), .acquire_slot (fpuq_push),

View File

@@ -1,6 +1,7 @@
`include "VX_define.vh" `include "VX_define.vh"
`TRACING_OFF `TRACING_OFF
module VX_gpr_ram ( module VX_gpr_ram (
input wire clk, input wire clk,
input wire [`NUM_THREADS-1:0] we, input wire [`NUM_THREADS-1:0] we,
@@ -32,4 +33,5 @@ module VX_gpr_ram (
assign rs2_data = q2; assign rs2_data = q2;
endmodule endmodule
`TRACING_ON `TRACING_ON

View File

@@ -43,7 +43,8 @@ module VX_ibuffer #(
VX_generic_queue #( VX_generic_queue #(
.DATAW(DATAW), .DATAW(DATAW),
.SIZE(SIZE) .SIZE(SIZE),
.BUFFERED(1)
) queue ( ) queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -20,20 +20,26 @@ module VX_icache_stage #(
); );
`UNUSED_VAR (reset) `UNUSED_VAR (reset)
`NO_RW_RAM_CHECK reg [31:0] rsp_PC_buf [`NUM_WARPS-1:0];
`NO_RW_RAM_CHECK reg [`NUM_THREADS-1:0] rsp_tmask_buf [`NUM_WARPS-1:0];
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready; wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
wire [`NW_BITS-1:0] req_tag = ifetch_req_if.wid; wire [`NW_BITS-1:0] req_tag = ifetch_req_if.wid;
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0]; wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0];
always @(posedge clk) begin VX_dp_ram #(
if (icache_req_fire) begin .DATAW(32 + `NUM_THREADS),
rsp_PC_buf[req_tag] <= ifetch_req_if.PC; .SIZE(`NUM_WARPS),
rsp_tmask_buf[req_tag] <= ifetch_req_if.tmask; .BUFFERED(0),
end .RWCHECK(0)
end ) req_metadata (
.clk(clk),
.waddr(req_tag),
.raddr(rsp_tag),
.wren(icache_req_fire),
.byteen(1'b1),
.rden(1'b1),
.din({ifetch_req_if.PC, ifetch_req_if.tmask}),
.dout({ifetch_rsp_if.PC, ifetch_rsp_if.tmask})
);
// Icache Request // Icache Request
assign icache_req_if.valid = ifetch_req_if.valid; assign icache_req_if.valid = ifetch_req_if.valid;
@@ -53,8 +59,6 @@ module VX_icache_stage #(
assign ifetch_rsp_if.valid = icache_rsp_if.valid; assign ifetch_rsp_if.valid = icache_rsp_if.valid;
assign ifetch_rsp_if.wid = rsp_tag; assign ifetch_rsp_if.wid = rsp_tag;
assign ifetch_rsp_if.tmask = rsp_tmask_buf[rsp_tag];
assign ifetch_rsp_if.PC = rsp_PC_buf[rsp_tag];
assign ifetch_rsp_if.instr = icache_rsp_if.data[0]; assign ifetch_rsp_if.instr = icache_rsp_if.data[0];
// Can accept new response? // Can accept new response?
@@ -66,7 +70,7 @@ module VX_icache_stage #(
`SCOPE_ASSIGN (icache_req_tag, req_tag); `SCOPE_ASSIGN (icache_req_tag, req_tag);
`SCOPE_ASSIGN (icache_rsp_fire, icache_rsp_if.valid && icache_rsp_if.ready); `SCOPE_ASSIGN (icache_rsp_fire, icache_rsp_if.valid && icache_rsp_if.ready);
`SCOPE_ASSIGN (icache_rsp_data, icache_rsp_if.data); `SCOPE_ASSIGN (icache_rsp_data, icache_rsp_if.data[0]);
`SCOPE_ASSIGN (icache_rsp_tag, rsp_tag); `SCOPE_ASSIGN (icache_rsp_tag, rsp_tag);
`ifdef DBG_PRINT_CORE_ICACHE `ifdef DBG_PRINT_CORE_ICACHE

View File

@@ -16,14 +16,11 @@ module VX_ipdom_stack #(
); );
localparam STACK_SIZE = 2 ** DEPTH; localparam STACK_SIZE = 2 ** DEPTH;
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_1 [0:STACK_SIZE-1]; reg is_part [STACK_SIZE-1:0];
`NO_RW_RAM_CHECK reg [WIDTH-1:0] stack_2 [0:STACK_SIZE-1];
reg is_part [0:STACK_SIZE-1];
reg [DEPTH-1:0] rd_ptr, wr_ptr; reg [DEPTH-1:0] rd_ptr, wr_ptr;
reg [WIDTH - 1:0] d1, d2; wire [WIDTH - 1:0] d1, d2;
reg p;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
@@ -38,22 +35,24 @@ module VX_ipdom_stack #(
rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]); rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]);
end end
end end
end end
always @(posedge clk) begin
if (push) begin
stack_1[wr_ptr] <= q1;
end
end
assign d1 = stack_1[rd_ptr];
always @(posedge clk) begin
if (push) begin
stack_2[wr_ptr] <= q2;
end
end
assign d2 = stack_2[rd_ptr];
VX_dp_ram #(
.DATAW(WIDTH * 2),
.SIZE(STACK_SIZE),
.BUFFERED(0),
.RWCHECK(0)
) store (
.clk(clk),
.waddr(wr_ptr),
.raddr(rd_ptr),
.wren(push),
.byteen(1'b1),
.rden(1'b1),
.din({q2, q1}),
.dout({d2, d1})
);
always @(posedge clk) begin always @(posedge clk) begin
if (push) begin if (push) begin
is_part[wr_ptr] <= 0; is_part[wr_ptr] <= 0;
@@ -61,7 +60,7 @@ module VX_ipdom_stack #(
is_part[rd_ptr] <= 1; is_part[rd_ptr] <= 1;
end end
end end
assign p = is_part[rd_ptr]; wire p = is_part[rd_ptr];
assign d = p ? d1 : d2; assign d = p ? d1 : d2;
assign empty = ~(| wr_ptr); assign empty = ~(| wr_ptr);

View File

@@ -112,7 +112,7 @@ module VX_lsu_unit #(
VX_cam_buffer #( VX_cam_buffer #(
.DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2), .DATAW (`NW_BITS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 2) + 2),
.SIZE (`LSUQ_SIZE) .SIZE (`LSUQ_SIZE)
) cam_buffer ( ) req_metadata_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.write_addr (req_tag), .write_addr (req_tag),

View File

@@ -34,7 +34,7 @@ module VX_mul_unit #(
VX_cam_buffer #( VX_cam_buffer #(
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1), .DATAW (`NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1),
.SIZE (`MULQ_SIZE) .SIZE (`MULQ_SIZE)
) mul_cam ( ) req_metadata_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.acquire_slot (mulq_push), .acquire_slot (mulq_push),

View File

@@ -498,6 +498,7 @@ if (DRAM_ENABLE) begin
end else begin end else begin
`UNUSED_VAR (mshr_pending_hazard_unqual_st0) `UNUSED_VAR (mshr_pending_hazard_unqual_st0)
`UNUSED_VAR (addr_st0)
assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1; assign {tag_st1, mem_rw_st1, mem_byteen_st1, tid_st1} = inst_meta_st1;
@@ -510,7 +511,7 @@ end else begin
assign writedata_st1= writedata_st0; assign writedata_st1= writedata_st0;
assign inst_meta_st1= inst_meta_st0; assign inst_meta_st1= inst_meta_st0;
assign snp_inv_st1 = snp_inv_st0; assign snp_inv_st1 = snp_inv_st0;
assign addr_st1 = addr_st0; assign addr_st1 = reqq_addr_st0[`LINE_SELECT_ADDR_RNG];
assign dirty_st1 = 0; assign dirty_st1 = 0;
assign readtag_st1 = 0; assign readtag_st1 = 0;
assign miss_st1 = 0; assign miss_st1 = 0;
@@ -782,7 +783,8 @@ end
VX_generic_queue #( VX_generic_queue #(
.DATAW(`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH), .DATAW(`REQS_BITS + CORE_TAG_WIDTH + `WORD_WIDTH),
.SIZE(CWBQ_SIZE) .SIZE(CWBQ_SIZE),
.BUFFERED(1)
) cwb_queue ( ) cwb_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),

View File

@@ -29,18 +29,6 @@ module VX_cache_core_rsp_merge #(
input wire core_rsp_ready input wire core_rsp_ready
); );
if (NUM_REQUESTS > 1) begin if (NUM_REQUESTS > 1) begin
wire [`BANK_BITS-1:0] sel_idx;
VX_rr_arbiter #(
.N(NUM_BANKS)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (per_bank_core_rsp_valid),
`UNUSED_PIN (grant_valid),
.grant_index (sel_idx),
`UNUSED_PIN (grant_onehot)
);
reg [NUM_REQUESTS-1:0] core_rsp_valid_unqual; reg [NUM_REQUESTS-1:0] core_rsp_valid_unqual;
reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual; reg [NUM_REQUESTS-1:0][`WORD_WIDTH-1:0] core_rsp_data_unqual;
@@ -48,6 +36,19 @@ module VX_cache_core_rsp_merge #(
reg [NUM_BANKS-1:0] core_rsp_bank_select; reg [NUM_BANKS-1:0] core_rsp_bank_select;
if (CORE_TAG_ID_BITS != 0) begin if (CORE_TAG_ID_BITS != 0) begin
wire [`BANK_BITS-1:0] sel_idx;
VX_rr_arbiter #(
.N(NUM_BANKS)
) sel_arb (
.clk (clk),
.reset (reset),
.requests (per_bank_core_rsp_valid),
`UNUSED_PIN (grant_valid),
.grant_index (sel_idx),
`UNUSED_PIN (grant_onehot)
);
always @(*) begin always @(*) begin
core_rsp_valid_unqual = 0; core_rsp_valid_unqual = 0;
core_rsp_tag_unqual = per_bank_core_rsp_tag[sel_idx]; core_rsp_tag_unqual = per_bank_core_rsp_tag[sel_idx];
@@ -65,17 +66,10 @@ module VX_cache_core_rsp_merge #(
end end
end else begin end else begin
always @(*) begin always @(*) begin
core_rsp_valid_unqual = 0; core_rsp_valid_unqual = 0;
core_rsp_valid_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_valid[sel_idx]; core_rsp_tag_unqual = 'x;
core_rsp_data_unqual = 'x;
core_rsp_tag_unqual = 'x; core_rsp_bank_select = 0;
core_rsp_tag_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_tag[sel_idx];
core_rsp_data_unqual = 'x;
core_rsp_data_unqual[per_bank_core_rsp_tid[sel_idx]] = per_bank_core_rsp_data[sel_idx];
core_rsp_bank_select = 0;
core_rsp_bank_select[sel_idx] = 1;
for (integer i = 0; i < NUM_BANKS; i++) begin for (integer i = 0; i < NUM_BANKS; i++) begin
if (per_bank_core_rsp_valid[i] if (per_bank_core_rsp_valid[i]

View File

@@ -43,9 +43,9 @@ module VX_data_store #(
end end
VX_dp_ram #( VX_dp_ram #(
.DATAW(`BANK_LINE_WORDS * WORD_SIZE * 8), .DATAW(BANK_LINE_SIZE * 8),
.SIZE(`BANK_LINE_COUNT), .SIZE(`BANK_LINE_COUNT),
.BYTEENW(`BANK_LINE_WORDS * WORD_SIZE), .BYTEENW(BANK_LINE_SIZE),
.BUFFERED(0), .BUFFERED(0),
.RWCHECK(1) .RWCHECK(1)
) data ( ) data (

View File

@@ -64,7 +64,7 @@ module VX_snp_forwarder #(
VX_cam_buffer #( VX_cam_buffer #(
.DATAW (SRC_ADDR_WIDTH + 1 + SNP_TAG_WIDTH), .DATAW (SRC_ADDR_WIDTH + 1 + SNP_TAG_WIDTH),
.SIZE (SNRQ_SIZE) .SIZE (SNRQ_SIZE)
) snp_fwd_cam ( ) req_metadata_buf (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.write_addr (sfq_write_addr), .write_addr (sfq_write_addr),

View File

@@ -178,14 +178,14 @@ module VX_fp_addmul #(
end end
VX_shift_register #( VX_shift_register #(
.DATAW(TAGW + 1 + 1 + 1), .DATAW(1 + TAGW + 1 + 1),
.DEPTH(`LATENCY_FADDMUL) .DEPTH(`LATENCY_FADDMUL)
) shift_reg ( ) shift_reg (
.clk(clk), .clk(clk),
.reset(reset), .reset(reset),
.enable(enable), .enable(enable),
.in({tag_in, valid_in, do_sub, do_mul}), .in({valid_in, tag_in, do_sub, do_mul}),
.out({tag_out, valid_out, do_sub_r, do_mul_r}) .out({valid_out, tag_out, do_sub_r, do_mul_r})
); );
assign ready_in = enable; assign ready_in = enable;

View File

@@ -50,14 +50,14 @@ module VX_fp_div #(
end end
VX_shift_register #( VX_shift_register #(
.DATAW(TAGW + 1), .DATAW(1 + TAGW),
.DEPTH(`LATENCY_FDIV) .DEPTH(`LATENCY_FDIV)
) shift_reg ( ) shift_reg (
.clk(clk), .clk(clk),
.reset(reset), .reset(reset),
.enable(enable), .enable(enable),
.in ({tag_in, valid_in}), .in ({valid_in, tag_in}),
.out({tag_out, valid_out}) .out({valid_out, tag_out})
); );
assign ready_in = enable; assign ready_in = enable;

View File

@@ -68,14 +68,14 @@ module VX_fp_ftoi #(
end end
VX_shift_register #( VX_shift_register #(
.DATAW(TAGW + 1 + 1), .DATAW(1 + TAGW + 1),
.DEPTH(`LATENCY_FTOI) .DEPTH(`LATENCY_FTOI)
) shift_reg ( ) shift_reg (
.clk(clk), .clk(clk),
.reset(reset), .reset(reset),
.enable(enable), .enable(enable),
.in ({tag_in, valid_in, is_signed}), .in ({valid_in, tag_in, is_signed}),
.out({tag_out, valid_out, is_signed_r}) .out({valid_out, tag_out, is_signed_r})
); );
assign ready_in = enable; assign ready_in = enable;

View File

@@ -68,14 +68,14 @@ module VX_fp_itof #(
end end
VX_shift_register #( VX_shift_register #(
.DATAW(TAGW + 1 + 1), .DATAW(1 + TAGW + 1),
.DEPTH(`LATENCY_ITOF) .DEPTH(`LATENCY_ITOF)
) shift_reg ( ) shift_reg (
.clk(clk), .clk(clk),
.reset(reset), .reset(reset),
.enable(enable), .enable(enable),
.in ({tag_in, valid_in, is_signed}), .in ({valid_in, tag_in, is_signed}),
.out({tag_out, valid_out, is_signed_r}) .out({valid_out, tag_out, is_signed_r})
); );
assign ready_in = enable; assign ready_in = enable;

View File

@@ -138,14 +138,14 @@ module VX_fp_madd #(
end end
VX_shift_register #( VX_shift_register #(
.DATAW(TAGW + 1 + 1 + 1), .DATAW(1 + TAGW + 1 + 1),
.DEPTH(`LATENCY_FMADD) .DEPTH(`LATENCY_FMADD)
) shift_reg ( ) shift_reg (
.clk(clk), .clk(clk),
.reset(reset), .reset(reset),
.enable(enable), .enable(enable),
.in({tag_in, valid_in, do_sub, do_neg}), .in({valid_in, tag_in, do_sub, do_neg}),
.out({tag_out, valid_out, do_sub_r, do_neg_r}) .out({valid_out, tag_out, do_sub_r, do_neg_r})
); );
assign ready_in = enable; assign ready_in = enable;

View File

@@ -48,14 +48,14 @@ module VX_fp_sqrt #(
end end
VX_shift_register #( VX_shift_register #(
.DATAW(TAGW + 1), .DATAW(1 + TAGW),
.DEPTH(`LATENCY_FSQRT) .DEPTH(`LATENCY_FSQRT)
) shift_reg ( ) shift_reg (
.clk(clk), .clk(clk),
.reset(reset), .reset(reset),
.enable(enable), .enable(enable),
.in ({tag_in, valid_in}), .in ({valid_in, tag_in}),
.out({tag_out, valid_out}) .out({valid_out, tag_out})
); );
assign ready_in = enable; assign ready_in = enable;

View File

@@ -78,7 +78,7 @@ module VX_fpnew
wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32; wire [FMTI_BITS-1:0] fpu_int_fmt = fpnew_pkg::INT32;
wire [`NUM_THREADS-1:0][31:0] fpu_result; wire [`NUM_THREADS-1:0][31:0] fpu_result;
fpnew_pkg::status_t [0:`NUM_THREADS-1] fpu_status; fpnew_pkg::status_t [`NUM_THREADS-1:0] fpu_status;
reg [FOP_BITS-1:0] fpu_op; reg [FOP_BITS-1:0] fpu_op;
reg [`FRM_BITS-1:0] fpu_rnd; reg [`FRM_BITS-1:0] fpu_rnd;

View File

@@ -3,8 +3,6 @@
module VX_cam_buffer #( module VX_cam_buffer #(
parameter DATAW = 1, parameter DATAW = 1,
parameter SIZE = 1, parameter SIZE = 1,
parameter RPORTS = 1,
parameter CPORTS = 1,
parameter ADDRW = `LOG2UP(SIZE) parameter ADDRW = `LOG2UP(SIZE)
) ( ) (
input wire clk, input wire clk,
@@ -12,13 +10,12 @@ module VX_cam_buffer #(
output wire [ADDRW-1:0] write_addr, output wire [ADDRW-1:0] write_addr,
input wire [DATAW-1:0] write_data, input wire [DATAW-1:0] write_data,
input wire acquire_slot, input wire acquire_slot,
input wire [RPORTS-1:0][ADDRW-1:0] read_addr, input wire [ADDRW-1:0] read_addr,
output wire [RPORTS-1:0][DATAW-1:0] read_data, output wire [DATAW-1:0] read_data,
input wire [CPORTS-1:0][ADDRW-1:0] release_addr, input wire [ADDRW-1:0] release_addr,
input wire [CPORTS-1:0] release_slot, input wire release_slot,
output wire full output wire full
); );
reg [DATAW-1:0] entries [SIZE-1:0];
reg [SIZE-1:0] free_slots, free_slots_n; reg [SIZE-1:0] free_slots, free_slots_n;
reg [ADDRW-1:0] write_addr_r; reg [ADDRW-1:0] write_addr_r;
reg full_r; reg full_r;
@@ -36,13 +33,12 @@ module VX_cam_buffer #(
always @(*) begin always @(*) begin
free_slots_n = free_slots; free_slots_n = free_slots;
for (integer i = 0; i < CPORTS; i++) begin if (release_slot) begin
if (release_slot[i]) begin free_slots_n[release_addr] = 1;
free_slots_n[release_addr[i]] = 1;
end
end end
if (acquire_slot) begin if (acquire_slot) begin
free_slots_n[write_addr_r] = 0; assert(1 == free_slots[write_addr]) else $error("%t: acquiring used slot at port %d", $time, write_addr);
free_slots_n[write_addr_r] = 0;
end end
end end
@@ -52,28 +48,33 @@ module VX_cam_buffer #(
full_r <= 1'b0; full_r <= 1'b0;
write_addr_r <= ADDRW'(1'b0); write_addr_r <= ADDRW'(1'b0);
end else begin end else begin
for (integer i = 0; i < CPORTS; i++) begin if (release_slot) begin
if (release_slot[i]) begin assert(0 == free_slots[release_addr]) else begin
assert(0 == free_slots[release_addr[i]]) else begin $display("%t: releasing invalid slot at port %d", $time, release_addr);
$display("%t: releasing invalid slot at port %d", $time, release_addr[i]);
end
end end
end end
free_slots <= free_slots_n; free_slots <= free_slots_n;
write_addr_r <= free_index; write_addr_r <= free_index;
full_r <= ~free_valid; full_r <= ~free_valid;
end end
if (acquire_slot) begin
assert(1 == free_slots[write_addr]) else $error("%t: acquiring used slot at port %d", $time, write_addr);
entries[write_addr] <= write_data;
end
end
for (genvar i = 0; i < RPORTS; i++) begin
assign read_data[i] = entries[read_addr[i]];
end end
VX_dp_ram #(
.DATAW(DATAW),
.SIZE(SIZE),
.BUFFERED(0),
.RWCHECK(0)
) req_metadata (
.clk(clk),
.waddr(write_addr),
.raddr(read_addr),
.wren(acquire_slot),
.byteen(1'b1),
.rden(1'b1),
.din(write_data),
.dout(read_data)
);
assign write_addr = write_addr_r; assign write_addr = write_addr_r;
assign full = full_r; assign full = full_r;

View File

@@ -73,8 +73,8 @@ module VX_divide #(
assign quotient = quotient_unqual [WIDTHQ-1:0]; assign quotient = quotient_unqual [WIDTHQ-1:0];
assign remainder = remainder_unqual [WIDTHR-1:0]; assign remainder = remainder_unqual [WIDTHR-1:0];
end else begin end else begin
reg [WIDTHN-1:0] quotient_pipe [0:LATENCY-1]; reg [WIDTHN-1:0] quotient_pipe [LATENCY-1:0];
reg [WIDTHD-1:0] remainder_pipe [0:LATENCY-1]; reg [WIDTHD-1:0] remainder_pipe [LATENCY-1:0];
for (genvar i = 0; i < LATENCY; i++) begin for (genvar i = 0; i < LATENCY; i++) begin
always @(posedge clk) begin always @(posedge clk) begin

View File

@@ -21,33 +21,40 @@ module VX_dp_ram #(
output wire [DATAW-1:0] dout output wire [DATAW-1:0] dout
); );
`STATIC_ASSERT((1 == BYTEENW) || ((BYTEENW > 1) && 0 == (BYTEENW % 4)), ("invalid parameter"))
localparam DATA32W = DATAW / 32;
localparam BYTEEN32W = BYTEENW / 4;
if (FASTRAM) begin if (FASTRAM) begin
if (BUFFERED) begin
if (BUFFERED) begin
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
reg [DATAW-1:0] dout_r; reg [DATAW-1:0] dout_r;
if (BYTEENW > 1) begin if (BYTEENW > 1) begin
`USE_FAST_BRAM reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
always @(posedge clk) begin always @(posedge clk) begin
if (wren) begin if (wren) begin
for (integer i = 0; i < BYTEENW; i++) begin for (integer j = 0; j < BYTEEN32W; j++) begin
if (byteen[i]) for (integer i = 0; i < 4; i++) begin
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; if (byteen[j * 4 + i])
mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8];
end
end end
end end
if (rden)
dout_r <= mem[raddr];
end end
end else begin end else begin
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
always @(posedge clk) begin always @(posedge clk) begin
if (wren && byteen) if (wren && byteen)
mem[waddr] <= din; mem[waddr] <= din;
if (rden)
dout_r <= mem[raddr];
end end
end end
always @(posedge clk) begin
if (rden)
dout_r <= mem[raddr];
end
assign dout = dout_r; assign dout = dout_r;
@@ -55,48 +62,58 @@ module VX_dp_ram #(
`UNUSED_VAR (rden) `UNUSED_VAR (rden)
if (RWCHECK) begin if (RWCHECK) begin
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
if (BYTEENW > 1) begin if (BYTEENW > 1) begin
`USE_FAST_BRAM reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
always @(posedge clk) begin always @(posedge clk) begin
if (wren) begin if (wren) begin
for (integer i = 0; i < BYTEENW; i++) begin for (integer j = 0; j < BYTEEN32W; j++) begin
if (byteen[i]) for (integer i = 0; i < 4; i++) begin
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; if (byteen[j * 4 + i])
mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8];
end
end end
end end
end end
assign dout = mem[raddr];
end else begin end else begin
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
always @(posedge clk) begin always @(posedge clk) begin
if (wren && byteen) if (wren && byteen)
mem[waddr] <= din; mem[waddr] <= din;
end end
assign dout = mem[raddr];
end end
assign dout = mem[raddr];
end else begin end else begin
`USE_FAST_BRAM `NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
if (BYTEENW > 1) begin if (BYTEENW > 1) begin
`USE_FAST_BRAM `NO_RW_RAM_CHECK reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
always @(posedge clk) begin always @(posedge clk) begin
if (wren) begin if (wren) begin
for (integer i = 0; i < BYTEENW; i++) begin for (integer j = 0; j < BYTEEN32W; j++) begin
if (byteen[i]) for (integer i = 0; i < 4; i++) begin
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; if (byteen[j * 4 + i])
mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8];
end
end end
end end
end end
assign dout = mem[raddr];
end else begin end else begin
`USE_FAST_BRAM `NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
always @(posedge clk) begin always @(posedge clk) begin
if (wren && byteen) if (wren && byteen)
mem[waddr] <= din; mem[waddr] <= din;
end end
end assign dout = mem[raddr];
assign dout = mem[raddr]; end
end end
end end
@@ -104,79 +121,88 @@ module VX_dp_ram #(
if (BUFFERED) begin if (BUFFERED) begin
reg [DATAW-1:0] mem [SIZE-1:0];
reg [DATAW-1:0] dout_r; reg [DATAW-1:0] dout_r;
if (BYTEENW > 1) begin if (BYTEENW > 1) begin
reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
always @(posedge clk) begin always @(posedge clk) begin
if (wren) begin if (wren) begin
for (integer i = 0; i < BYTEENW; i++) begin for (integer j = 0; j < BYTEEN32W; j++) begin
if (byteen[i]) for (integer i = 0; i < 4; i++) begin
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; if (byteen[j * 4 + i])
mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8];
end
end end
end end
if (rden)
dout_r <= mem[raddr];
end end
end else begin end else begin
reg [DATAW-1:0] mem [SIZE-1:0];
always @(posedge clk) begin always @(posedge clk) begin
if (wren && byteen) if (wren && byteen)
mem[waddr] <= din; mem[waddr] <= din;
if (rden)
dout_r <= mem[raddr];
end end
end end
always @(posedge clk) begin
if (rden)
dout_r <= mem[raddr];
end
assign dout = dout_r; assign dout = dout_r;
end else begin end else begin
`UNUSED_VAR (rden) `UNUSED_VAR (rden)
if (RWCHECK) begin if (RWCHECK) begin
reg [DATAW-1:0] mem [SIZE-1:0];
if (BYTEENW > 1) begin if (BYTEENW > 1) begin
reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
always @(posedge clk) begin always @(posedge clk) begin
if (wren) begin if (wren) begin
for (integer i = 0; i < BYTEENW; i++) begin for (integer j = 0; j < BYTEEN32W; j++) begin
if (byteen[i]) for (integer i = 0; i < 4; i++) begin
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; if (byteen[j * 4 + i])
mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8];
end
end end
end end
end end
assign dout = mem[raddr];
end else begin end else begin
reg [DATAW-1:0] mem [SIZE-1:0];
always @(posedge clk) begin always @(posedge clk) begin
if (wren && byteen) if (wren && byteen)
mem[waddr] <= din; mem[waddr] <= din;
end end
assign dout = mem[raddr];
end end
assign dout = mem[raddr];
end else begin end else begin
`NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
if (BYTEENW > 1) begin if (BYTEENW > 1) begin
`NO_RW_RAM_CHECK reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
always @(posedge clk) begin always @(posedge clk) begin
if (wren) begin if (wren) begin
for (integer i = 0; i < BYTEENW; i++) begin for (integer j = 0; j < BYTEEN32W; j++) begin
if (byteen[i]) for (integer i = 0; i < 4; i++) begin
mem[waddr][i * 8 +: 8] <= din[i * 8 +: 8]; if (byteen[j * 4 + i])
mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8];
end
end end
end end
end end
assign dout = mem[raddr];
end else begin end else begin
`NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
always @(posedge clk) begin always @(posedge clk) begin
if (wren && byteen) if (wren && byteen)
mem[waddr] <= din; mem[waddr] <= din;
end end
end assign dout = mem[raddr];
end
assign dout = mem[raddr];
end end
end end
end end

View File

@@ -6,7 +6,7 @@ module VX_generic_queue #(
parameter BUFFERED = 0, parameter BUFFERED = 0,
parameter ADDRW = $clog2(SIZE), parameter ADDRW = $clog2(SIZE),
parameter SIZEW = $clog2(SIZE+1), parameter SIZEW = $clog2(SIZE+1),
parameter FASTRAM = 1 parameter FASTRAM = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
@@ -117,7 +117,7 @@ module VX_generic_queue #(
.raddr(rd_ptr_a), .raddr(rd_ptr_a),
.wren(push), .wren(push),
.byteen(1'b1), .byteen(1'b1),
.rden(pop), .rden(1'b1),
.din(data_in), .din(data_in),
.dout(data_out) .dout(data_out)
); );
@@ -125,11 +125,10 @@ module VX_generic_queue #(
end else begin end else begin
wire [DATAW-1:0] dout; wire [DATAW-1:0] dout;
reg [DATAW-1:0] din_r; reg [DATAW-1:0] dout_r;
reg [ADDRW-1:0] wr_ptr_r; reg [ADDRW-1:0] wr_ptr_r;
reg [ADDRW-1:0] rd_ptr_r; reg [ADDRW-1:0] rd_ptr_r;
reg [ADDRW-1:0] rd_ptr_n_r; reg [ADDRW-1:0] rd_ptr_n_r;
reg bypass_r;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
@@ -151,19 +150,11 @@ module VX_generic_queue #(
end end
end end
always @(posedge clk) begin
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
bypass_r <= 1;
din_r <= data_in;
end else if (pop)
bypass_r <= 0;
end
VX_dp_ram #( VX_dp_ram #(
.DATAW(DATAW), .DATAW(DATAW),
.SIZE(SIZE), .SIZE(SIZE),
.BUFFERED(1), .BUFFERED(0),
.RWCHECK(0), .RWCHECK(1),
.FASTRAM(FASTRAM) .FASTRAM(FASTRAM)
) dp_ram ( ) dp_ram (
.clk(clk), .clk(clk),
@@ -171,12 +162,20 @@ module VX_generic_queue #(
.raddr(rd_ptr_n_r), .raddr(rd_ptr_n_r),
.wren(push), .wren(push),
.byteen(1'b1), .byteen(1'b1),
.rden(pop), .rden(1'b1),
.din(data_in), .din(data_in),
.dout(dout) .dout(dout)
); );
assign data_out = bypass_r ? din_r : dout; always @(posedge clk) begin
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
dout_r <= data_in;
end else if (pop) begin
dout_r <= dout;
end
end
assign data_out = dout_r;
end end
assign empty = empty_r; assign empty = empty_r;

View File

@@ -22,8 +22,8 @@ module VX_matrix_arbiter #(
end else begin end else begin
reg [N-1:1] state [0:N-1]; reg [N-1:1] state [N-1:0];
wire [N-1:0] pri [0:N-1]; wire [N-1:0] pri [N-1:0];
for (genvar i = 0; i < N; i++) begin for (genvar i = 0; i < N; i++) begin
for (genvar j = 0; j < N; j++) begin for (genvar j = 0; j < N; j++) begin

View File

@@ -47,7 +47,7 @@ module VX_multiplier #(
if (LATENCY == 0) begin if (LATENCY == 0) begin
assign result = result_unqual; assign result = result_unqual;
end else begin end else begin
reg [WIDTHP-1:0] result_pipe [0:LATENCY-1]; reg [WIDTHP-1:0] result_pipe [LATENCY-1:0];
for (genvar i = 0; i < LATENCY; i++) begin for (genvar i = 0; i < LATENCY; i++) begin
always @(posedge clk) begin always @(posedge clk) begin

View File

@@ -22,7 +22,7 @@ module VX_rr_arbiter #(
end else begin end else begin
reg [`CLOG2(N)-1:0] grant_table [0:N-1]; reg [`CLOG2(N)-1:0] grant_table [N-1:0];
reg [`CLOG2(N)-1:0] state; reg [`CLOG2(N)-1:0] state;
reg [N-1:0] grant_onehot_r; reg [N-1:0] grant_onehot_r;

View File

@@ -3,7 +3,7 @@
#include <fstream> #include <fstream>
#include <iomanip> #include <iomanip>
#define ALL_TESTS //#define ALL_TESTS
int main(int argc, char **argv) { int main(int argc, char **argv) {
bool passed = true; bool passed = true;

View File

@@ -40,6 +40,7 @@ set_global_assignment -name VERILOG_MACRO NDEBUG
set_global_assignment -name MESSAGE_DISABLE 16818 set_global_assignment -name MESSAGE_DISABLE 16818
set_global_assignment -name VERILOG_MACRO FPU_FAST set_global_assignment -name VERILOG_MACRO FPU_FAST
set_global_assignment -name AUTO_SHIFT_REGISTER_RECOGNITION AUTO
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0 set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100 set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)" set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"