performance refactoring - rebalanced stream buffers accross the device to enforce output buffering rule at compoments boudaries, finally resolved block ram R/W collusion discrepencies,
This commit is contained in:
@@ -1,9 +1,10 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_cam_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 1,
|
||||
parameter FASTRAM = 0,
|
||||
parameter ADDRW = `LOG2UP(SIZE)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
@@ -63,8 +64,8 @@ module VX_cam_buffer #(
|
||||
VX_dp_ram #(
|
||||
.DATAW(DATAW),
|
||||
.SIZE(SIZE),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(0)
|
||||
.RWCHECK(1),
|
||||
.FASTRAM(FASTRAM)
|
||||
) data_table (
|
||||
.clk(clk),
|
||||
.waddr(write_addr),
|
||||
|
||||
@@ -26,9 +26,7 @@ module VX_dp_ram #(
|
||||
localparam DATA32W = DATAW / 32;
|
||||
localparam BYTEEN32W = BYTEENW / 4;
|
||||
|
||||
//`ifndef QUARTUS
|
||||
|
||||
if (FASTRAM) begin
|
||||
if (FASTRAM) begin
|
||||
if (BUFFERED) begin
|
||||
reg [DATAW-1:0] dout_r;
|
||||
|
||||
@@ -57,72 +55,36 @@ module VX_dp_ram #(
|
||||
dout_r <= mem[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
assign dout = dout_r;
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (rden)
|
||||
|
||||
if (RWCHECK) begin
|
||||
if (BYTEENW > 1) begin
|
||||
`USE_FAST_BRAM reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
`USE_FAST_BRAM reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren) begin
|
||||
for (integer j = 0; j < BYTEEN32W; j++) begin
|
||||
for (integer i = 0; i < 4; i++) begin
|
||||
if (byteen[j * 4 + i])
|
||||
mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8];
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (wren) begin
|
||||
for (integer j = 0; j < BYTEEN32W; j++) begin
|
||||
for (integer i = 0; i < 4; i++) begin
|
||||
if (byteen[j * 4 + i])
|
||||
mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
|
||||
end else begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren && byteen)
|
||||
mem[waddr] <= din;
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
end
|
||||
|
||||
assign dout = mem[raddr];
|
||||
end else begin
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
`USE_FAST_BRAM `NO_RW_RAM_CHECK reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren) begin
|
||||
for (integer j = 0; j < BYTEEN32W; j++) begin
|
||||
for (integer i = 0; i < 4; i++) begin
|
||||
if (byteen[j * 4 + i])
|
||||
mem[waddr][j][i] <= din[j * 32 + i * 8 +: 8];
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
end else begin
|
||||
`USE_FAST_BRAM `NO_RW_RAM_CHECK reg [DATAW-1:0] mem [SIZE-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (wren && byteen)
|
||||
mem[waddr] <= din;
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
end
|
||||
end
|
||||
always @(posedge clk) begin
|
||||
if (wren && byteen)
|
||||
mem[waddr] <= din;
|
||||
end
|
||||
assign dout = mem[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
if (BUFFERED) begin
|
||||
|
||||
reg [DATAW-1:0] dout_r;
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
@@ -150,14 +112,11 @@ module VX_dp_ram #(
|
||||
dout_r <= mem[raddr];
|
||||
end
|
||||
end
|
||||
|
||||
assign dout = dout_r;
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (rden)
|
||||
|
||||
if (RWCHECK) begin
|
||||
|
||||
if (BYTEENW > 1) begin
|
||||
reg [DATA32W-1:0][3:0][7:0] mem [SIZE-1:0];
|
||||
|
||||
@@ -208,96 +167,6 @@ module VX_dp_ram #(
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
/*`else
|
||||
|
||||
localparam OUTDATA_REG_B = BUFFERED ? "CLOCK0" : "UNREGISTERED";
|
||||
localparam RAM_BLOCK_TYPE = FASTRAM ? "MLAB" : "AUTO";
|
||||
|
||||
if (RWCHECK) begin
|
||||
|
||||
altsyncram #(
|
||||
.init_file (),
|
||||
.operation_mode ("DUAL_PORT"),
|
||||
.numwords_a (SIZE),
|
||||
.numwords_b (SIZE),
|
||||
.widthad_a (ADDRW),
|
||||
.widthad_b (ADDRW),
|
||||
.width_a (DATAW),
|
||||
.width_b (DATAW),
|
||||
.width_byteena_a(BYTEENW),
|
||||
.address_reg_b ("CLOCK0"),
|
||||
.outdata_reg_b (OUTDATA_REG_B),
|
||||
.ram_block_type (RAM_BLOCK_TYPE)
|
||||
) mem (
|
||||
.clocken0 (1'b1),
|
||||
.clocken1 (),
|
||||
.clocken2 (),
|
||||
.clocken3 (),
|
||||
.clock0 (clk),
|
||||
.clock1 (),
|
||||
.address_a (waddr),
|
||||
.address_b (raddr),
|
||||
.byteena_a (byteen),
|
||||
.byteena_b (1'b1),
|
||||
.wren_a (wren),
|
||||
.wren_b (1'b0),
|
||||
.data_a (din),
|
||||
.data_b (),
|
||||
.rden_a (),
|
||||
.rden_b (1'b1),
|
||||
.q_a (),
|
||||
.q_b (dout),
|
||||
.addressstall_a (1'b0),
|
||||
.addressstall_b (1'b0),
|
||||
.aclr0 (1'b0),
|
||||
.aclr1 (1'b0),
|
||||
.eccstatus ()
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
`NO_RW_RAM_CHECK altsyncram #(
|
||||
.init_file (),
|
||||
.operation_mode ("DUAL_PORT"),
|
||||
.numwords_a (SIZE),
|
||||
.numwords_b (SIZE),
|
||||
.widthad_a (ADDRW),
|
||||
.widthad_b (ADDRW),
|
||||
.width_a (DATAW),
|
||||
.width_b (DATAW),
|
||||
.width_byteena_a(BYTEENW),
|
||||
.outdata_reg_b (OUTDATA_REG_B),
|
||||
.ram_block_type (RAM_BLOCK_TYPE)
|
||||
) mem (
|
||||
.clocken0 (1'b1),
|
||||
.clocken1 (1'b1),
|
||||
.clocken2 (1'b1),
|
||||
.clocken3 (1'b1),
|
||||
.clock0 (clk),
|
||||
.clock1 (clk),
|
||||
.address_a (waddr),
|
||||
.address_b (raddr),
|
||||
.byteena_a (byteen),
|
||||
.byteena_b (1'b1),
|
||||
.wren_a (wren),
|
||||
.wren_b (1'b0),
|
||||
.data_a (din),
|
||||
.data_b (),
|
||||
.rden_a (),
|
||||
.rden_b (1'b1),
|
||||
.q_a (),
|
||||
.q_b (dout),
|
||||
.addressstall_a (1'b0),
|
||||
.addressstall_b (1'b0),
|
||||
.aclr0 (1'b0),
|
||||
.aclr1 (1'b0),
|
||||
.eccstatus ()
|
||||
);
|
||||
|
||||
end
|
||||
|
||||
`endif*/
|
||||
|
||||
|
||||
endmodule
|
||||
`TRACING_ON
|
||||
@@ -3,10 +3,10 @@
|
||||
module VX_generic_queue #(
|
||||
parameter DATAW = 1,
|
||||
parameter SIZE = 2,
|
||||
parameter BUFFERED = 0,
|
||||
parameter ADDRW = $clog2(SIZE),
|
||||
parameter SIZEW = $clog2(SIZE+1),
|
||||
parameter FASTRAM = 0
|
||||
parameter BUFFERED = 0,
|
||||
parameter FASTRAM = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
@@ -78,25 +78,22 @@ module VX_generic_queue #(
|
||||
end;
|
||||
end
|
||||
end
|
||||
used_r <= used_r + ADDRW'(push) - ADDRW'(pop);
|
||||
used_r <= used_r + (ADDRW'(push) - ADDRW'(pop));
|
||||
end
|
||||
end
|
||||
|
||||
if (0 == BUFFERED) begin
|
||||
|
||||
reg [ADDRW:0] rd_ptr_r;
|
||||
reg [ADDRW:0] wr_ptr_r;
|
||||
|
||||
wire [ADDRW-1:0] rd_ptr_a = rd_ptr_r[ADDRW-1:0];
|
||||
wire [ADDRW-1:0] wr_ptr_a = wr_ptr_r[ADDRW-1:0];
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr_r <= 0;
|
||||
wr_ptr_r <= 0;
|
||||
end else begin
|
||||
wr_ptr_r <= wr_ptr_r + (ADDRW+1)'(push);
|
||||
rd_ptr_r <= rd_ptr_r + (ADDRW+1)'(pop);
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
|
||||
rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
|
||||
end
|
||||
end
|
||||
|
||||
@@ -108,8 +105,8 @@ module VX_generic_queue #(
|
||||
.FASTRAM(FASTRAM)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr_a),
|
||||
.raddr(rd_ptr_a),
|
||||
.waddr(wr_ptr_r),
|
||||
.raddr(rd_ptr_r),
|
||||
.wren(push),
|
||||
.byteen(1'b1),
|
||||
.rden(1'b1),
|
||||
@@ -149,7 +146,7 @@ module VX_generic_queue #(
|
||||
.DATAW(DATAW),
|
||||
.SIZE(SIZE),
|
||||
.BUFFERED(0),
|
||||
.RWCHECK(0),
|
||||
.RWCHECK(1),
|
||||
.FASTRAM(FASTRAM)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
@@ -166,7 +163,7 @@ module VX_generic_queue #(
|
||||
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
|
||||
dout_r <= data_in;
|
||||
end else if (pop) begin
|
||||
dout_r <= dout;
|
||||
dout_r <= dout; // BRAM R/W collision
|
||||
end
|
||||
end
|
||||
|
||||
@@ -178,4 +175,4 @@ module VX_generic_queue #(
|
||||
assign size = {full_r, used_r};
|
||||
end
|
||||
|
||||
endmodule
|
||||
endmodule
|
||||
@@ -143,7 +143,7 @@ module VX_scope #(
|
||||
end
|
||||
|
||||
if (stop
|
||||
|| (waddr >= waddr_end)) begin
|
||||
|| (waddr == waddr_end)) begin
|
||||
waddr <= waddr; // keep last address
|
||||
recording <= 0;
|
||||
data_valid <= 1;
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_stream_arbiter #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter TYPE = "R",
|
||||
parameter IN_BUFFER = 0,
|
||||
parameter OUT_BUFFER = 0
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter TYPE = "R",
|
||||
parameter BUFFERED = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
@@ -22,27 +21,6 @@ module VX_stream_arbiter #(
|
||||
localparam LOG_NUM_REQS = $clog2(NUM_REQS);
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
wire [NUM_REQS-1:0] valid_in_qual;
|
||||
wire [NUM_REQS-1:0][DATAW-1:0] data_in_qual;
|
||||
wire [NUM_REQS-1:0] ready_in_qual;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; ++i) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.PASSTHRU (!IN_BUFFER)
|
||||
) req_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_in[i]),
|
||||
.data_in (data_in[i]),
|
||||
.ready_in (ready_in[i]),
|
||||
.valid_out (valid_in_qual[i]),
|
||||
.data_out (data_in_qual[i]),
|
||||
.ready_out (ready_in_qual[i])
|
||||
);
|
||||
end
|
||||
|
||||
wire sel_enable;
|
||||
wire sel_valid;
|
||||
wire [LOG_NUM_REQS-1:0] sel_idx;
|
||||
@@ -56,7 +34,7 @@ module VX_stream_arbiter #(
|
||||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_in_qual),
|
||||
.requests (valid_in),
|
||||
.enable (sel_enable),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
@@ -71,7 +49,7 @@ module VX_stream_arbiter #(
|
||||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_in_qual),
|
||||
.requests (valid_in),
|
||||
.enable (sel_enable),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
@@ -86,7 +64,7 @@ module VX_stream_arbiter #(
|
||||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_in_qual),
|
||||
.requests (valid_in),
|
||||
.enable (sel_enable),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
@@ -101,47 +79,36 @@ module VX_stream_arbiter #(
|
||||
) sel_arb (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.requests (valid_in_qual),
|
||||
.requests (valid_in),
|
||||
.enable (sel_enable),
|
||||
.grant_valid (sel_valid),
|
||||
.grant_index (sel_idx),
|
||||
.grant_onehot (sel_1hot)
|
||||
);
|
||||
|
||||
end
|
||||
end
|
||||
|
||||
if (OUT_BUFFER) begin
|
||||
wire ready_out_unqual;
|
||||
|
||||
wire stall = ~ready_out && valid_out;
|
||||
assign sel_enable = ~stall;
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.PASSTHRU (!BUFFERED)
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (sel_valid),
|
||||
.data_in (data_in[sel_idx]),
|
||||
.ready_in (ready_out_unqual),
|
||||
.valid_out (valid_out),
|
||||
.data_out (data_out),
|
||||
.ready_out (ready_out)
|
||||
);
|
||||
|
||||
VX_generic_register #(
|
||||
.N(1 + DATAW),
|
||||
.R(1)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.stall (stall),
|
||||
.flush (1'b0),
|
||||
.data_in ({sel_valid, data_in_qual[sel_idx]}),
|
||||
.data_out ({valid_out, data_out})
|
||||
);
|
||||
assign sel_enable = ready_out_unqual;
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign ready_in_qual[i] = sel_1hot[i] && ~stall;
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
assign sel_enable = ready_out;
|
||||
assign valid_out = sel_valid;
|
||||
assign data_out = data_in_qual[sel_idx];
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign ready_in_qual[i] = sel_1hot[i] && ready_out;
|
||||
end
|
||||
|
||||
end
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign ready_in[i] = sel_1hot[i] && ready_out_unqual;
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
|
||||
68
hw/rtl/libs/VX_stream_demux.v
Normal file
68
hw/rtl/libs/VX_stream_demux.v
Normal file
@@ -0,0 +1,68 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_stream_demux #(
|
||||
parameter NUM_REQS = 1,
|
||||
parameter DATAW = 1,
|
||||
parameter BUFFERED = 0,
|
||||
localparam LOG_NUM_REQS = `LOG2UP(NUM_REQS)
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
input wire [LOG_NUM_REQS-1:0] sel,
|
||||
|
||||
input wire valid_in,
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire ready_in,
|
||||
|
||||
output wire [NUM_REQS-1:0] valid_out,
|
||||
output wire [NUM_REQS-1:0][DATAW-1:0] data_out,
|
||||
input wire [NUM_REQS-1:0] ready_out
|
||||
);
|
||||
|
||||
if (NUM_REQS > 1) begin
|
||||
|
||||
reg [NUM_REQS-1:0] valid_out_unqual;
|
||||
wire [NUM_REQS-1:0][DATAW-1:0] data_out_unqual;
|
||||
wire [NUM_REQS-1:0] ready_out_unqual;
|
||||
|
||||
always @(*) begin
|
||||
valid_out_unqual = '0;
|
||||
valid_out_unqual[sel] = valid_in;
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
assign data_out_unqual[i] = data_in;
|
||||
end
|
||||
|
||||
assign ready_in = ready_out_unqual[sel];
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.PASSTHRU (!BUFFERED)
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.valid_in (valid_out_unqual[i]),
|
||||
.data_in (data_out_unqual[i]),
|
||||
.ready_in (ready_out_unqual[i]),
|
||||
.valid_out (valid_out[i]),
|
||||
.data_out (data_out[i]),
|
||||
.ready_out (ready_out[i])
|
||||
);
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
`UNUSED_VAR (sel)
|
||||
|
||||
assign valid_out = valid_in;
|
||||
assign data_out = data_in;
|
||||
assign ready_in = ready_out;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
Reference in New Issue
Block a user