lkg build rollout with 16cores optimization on arria10
This commit is contained in:
@@ -91,96 +91,157 @@ module VX_fifo_queue #(
|
||||
if (used_r == ADDRW'(ALM_EMPTY+1))
|
||||
alm_empty_r <= 1;
|
||||
end
|
||||
used_r <= used_r + ADDRW'($signed(2'(push) - 2'(pop)));
|
||||
if (SIZE > 2) begin
|
||||
used_r <= used_r + ADDRW'($signed(2'(push) - 2'(pop)));
|
||||
end else begin // (SIZE == 2);
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
used_r <= used_r ^ (push ^ pop);
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if (0 == BUFFERED) begin
|
||||
if (SIZE == 2) begin
|
||||
|
||||
if (0 == BUFFERED) begin
|
||||
|
||||
if (FASTRAM) begin
|
||||
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] shift_reg [SIZE];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
shift_reg[1] <= shift_reg[0];
|
||||
shift_reg[0] <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = shift_reg[~used_r[0]];
|
||||
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr_r <= 0;
|
||||
wr_ptr_r <= 0;
|
||||
end else begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
|
||||
rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
|
||||
end
|
||||
|
||||
reg [DATAW-1:0] shift_reg [SIZE];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
shift_reg[1] <= shift_reg[0];
|
||||
shift_reg[0] <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = shift_reg[~used_r[0]];
|
||||
|
||||
end
|
||||
|
||||
end else begin
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg [DATAW-1:0] buffer;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
|
||||
data_out_r <= data_in;
|
||||
end else if (pop) begin
|
||||
data_out_r <= buffer;
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = data_out_r;
|
||||
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.BUFFERED (0),
|
||||
.RWCHECK (1),
|
||||
.FASTRAM (FASTRAM)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr_r),
|
||||
.raddr(rd_ptr_r),
|
||||
.wren(push),
|
||||
.byteen(1'b1),
|
||||
.rden(1'b1),
|
||||
.din(data_in),
|
||||
.dout(data_out)
|
||||
);
|
||||
|
||||
|
||||
end else begin
|
||||
|
||||
wire [DATAW-1:0] dout;
|
||||
reg [DATAW-1:0] dout_r;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] rd_ptr_n_r;
|
||||
if (0 == BUFFERED) begin
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_ptr_r <= 0;
|
||||
rd_ptr_r <= 0;
|
||||
rd_ptr_n_r <= 1;
|
||||
end else begin
|
||||
if (push) begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(1);
|
||||
end
|
||||
if (pop) begin
|
||||
rd_ptr_r <= rd_ptr_n_r;
|
||||
if (SIZE > 2) begin
|
||||
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
|
||||
end else begin // (SIZE == 2);
|
||||
rd_ptr_n_r <= ~rd_ptr_n_r;
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
rd_ptr_r <= 0;
|
||||
wr_ptr_r <= 0;
|
||||
end else begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(push);
|
||||
rd_ptr_r <= rd_ptr_r + ADDRW'(pop);
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.BUFFERED (0),
|
||||
.RWCHECK (1),
|
||||
.FASTRAM (FASTRAM)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr_r),
|
||||
.raddr(rd_ptr_r),
|
||||
.wren(push),
|
||||
.byteen(1'b1),
|
||||
.rden(1'b1),
|
||||
.din(data_in),
|
||||
.dout(data_out)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
wire [DATAW-1:0] dout;
|
||||
reg [DATAW-1:0] dout_r;
|
||||
reg [ADDRW-1:0] wr_ptr_r;
|
||||
reg [ADDRW-1:0] rd_ptr_r;
|
||||
reg [ADDRW-1:0] rd_ptr_n_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
wr_ptr_r <= 0;
|
||||
rd_ptr_r <= 0;
|
||||
rd_ptr_n_r <= 1;
|
||||
end else begin
|
||||
if (push) begin
|
||||
wr_ptr_r <= wr_ptr_r + ADDRW'(1);
|
||||
end
|
||||
if (pop) begin
|
||||
rd_ptr_r <= rd_ptr_n_r;
|
||||
if (SIZE > 2) begin
|
||||
rd_ptr_n_r <= rd_ptr_r + ADDRW'(2);
|
||||
end else begin // (SIZE == 2);
|
||||
rd_ptr_n_r <= ~rd_ptr_n_r;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.BUFFERED (0),
|
||||
.RWCHECK (1),
|
||||
.FASTRAM (FASTRAM)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr_r),
|
||||
.raddr(rd_ptr_n_r),
|
||||
.wren(push),
|
||||
.byteen(1'b1),
|
||||
.rden(1'b1),
|
||||
.din(data_in),
|
||||
.dout(dout)
|
||||
);
|
||||
VX_dp_ram #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.BUFFERED (0),
|
||||
.RWCHECK (1),
|
||||
.FASTRAM (FASTRAM)
|
||||
) dp_ram (
|
||||
.clk(clk),
|
||||
.waddr(wr_ptr_r),
|
||||
.raddr(rd_ptr_n_r),
|
||||
.wren(push),
|
||||
.byteen(1'b1),
|
||||
.rden(1'b1),
|
||||
.din(data_in),
|
||||
.dout(dout)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
|
||||
dout_r <= data_in;
|
||||
end else if (pop) begin
|
||||
dout_r <= dout;
|
||||
always @(posedge clk) begin
|
||||
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
|
||||
dout_r <= data_in;
|
||||
end else if (pop) begin
|
||||
dout_r <= dout;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = dout_r;
|
||||
assign data_out = dout_r;
|
||||
end
|
||||
end
|
||||
|
||||
assign empty = empty_r;
|
||||
|
||||
@@ -25,26 +25,16 @@ module VX_fixed_arbiter #(
|
||||
assign grant_valid = requests[0];
|
||||
|
||||
end else begin
|
||||
|
||||
reg [LOG_NUM_REQS-1:0] grant_index_r;
|
||||
reg [NUM_REQS-1:0] grant_onehot_r;
|
||||
|
||||
always @(*) begin
|
||||
grant_index_r = 'x;
|
||||
grant_onehot_r = 'x;
|
||||
for (integer i = 0; i < NUM_REQS; ++i) begin
|
||||
if (requests[i]) begin
|
||||
grant_index_r = LOG_NUM_REQS'(i);
|
||||
grant_onehot_r = NUM_REQS'(0);
|
||||
grant_onehot_r[i] = 1;
|
||||
break;
|
||||
end
|
||||
end
|
||||
end
|
||||
VX_priority_encoder #(
|
||||
.N (NUM_REQS)
|
||||
) tid_select (
|
||||
.data_in (requests),
|
||||
.index (grant_index),
|
||||
.onehot (grant_onehot),
|
||||
.valid_out (grant_valid)
|
||||
);
|
||||
|
||||
assign grant_index = grant_index_r;
|
||||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -28,11 +28,12 @@ module VX_index_buffer #(
|
||||
wire [ADDRW-1:0] free_index;
|
||||
|
||||
VX_priority_encoder #(
|
||||
.DATAW (SIZE)
|
||||
.N (SIZE)
|
||||
) free_slots_encoder (
|
||||
.data_in (free_slots_n),
|
||||
.data_out (free_index),
|
||||
.valid_out (free_valid)
|
||||
.data_in (free_slots_n),
|
||||
.index (free_index),
|
||||
`UNUSED_PIN (onehot),
|
||||
.valid_out (free_valid)
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
|
||||
@@ -72,11 +72,11 @@ module VX_matrix_arbiter #(
|
||||
end
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.NUM_REQS(NUM_REQS)
|
||||
.N (NUM_REQS)
|
||||
) encoder (
|
||||
.onehot (grant_unqual),
|
||||
`UNUSED_PIN (valid),
|
||||
.binary (grant_index)
|
||||
.data_in (grant_unqual),
|
||||
.data_out (grant_index),
|
||||
`UNUSED_PIN (valid)
|
||||
);
|
||||
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
73
hw/rtl/libs/VX_onehot_encoder.v
Normal file
73
hw/rtl/libs/VX_onehot_encoder.v
Normal file
@@ -0,0 +1,73 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
// Fast encoder using parallel prefix computation
|
||||
// Adapter from BaseJump STL: http://bjump.org/index.html
|
||||
|
||||
module VX_onehot_encoder #(
|
||||
parameter N = 1,
|
||||
parameter REVERSE = 0,
|
||||
parameter FAST = 1
|
||||
) (
|
||||
input wire [N-1:0] data_in,
|
||||
output wire [`LOG2UP(N)-1:0] data_out,
|
||||
output wire valid
|
||||
);
|
||||
if (FAST) begin
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
localparam levels_lp = $clog2(N);
|
||||
localparam aligned_width_lp = 1 << $clog2(N);
|
||||
|
||||
wire [levels_lp:0][aligned_width_lp-1:0] addr;
|
||||
wire [levels_lp:0][aligned_width_lp-1:0] v;
|
||||
|
||||
// base case, also handle padding for non-power of two inputs
|
||||
assign v[0] = REVERSE ? (data_in << (aligned_width_lp - N)) : ((aligned_width_lp)'(data_in));
|
||||
assign addr[0] = 'x;
|
||||
|
||||
for (genvar level = 1; level < levels_lp+1; level=level+1) begin
|
||||
localparam segments_lp = 2**(levels_lp-level);
|
||||
localparam segment_slot_lp = aligned_width_lp/segments_lp;
|
||||
localparam segment_width_lp = level; // how many bits are needed at each level
|
||||
|
||||
for (genvar segment = 0; segment < segments_lp; segment=segment+1) begin
|
||||
wire [1:0] vs = {
|
||||
v[level-1][segment*segment_slot_lp+(segment_slot_lp >> 1)],
|
||||
v[level-1][segment*segment_slot_lp]
|
||||
};
|
||||
|
||||
assign v[level][segment*segment_slot_lp] = (| vs);
|
||||
|
||||
if (level == 1) begin
|
||||
assign addr[level][(segment*segment_slot_lp)+:segment_width_lp] = vs[!REVERSE];
|
||||
end else begin
|
||||
assign addr[level][(segment*segment_slot_lp)+:segment_width_lp] = {
|
||||
vs[!REVERSE],
|
||||
addr[level-1][segment*segment_slot_lp+:segment_width_lp-1] | addr[level-1][segment*segment_slot_lp+(segment_slot_lp >> 1)+:segment_width_lp-1]
|
||||
};
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = addr[levels_lp][`LOG2UP(N)-1:0];
|
||||
assign valid = v[levels_lp][0];
|
||||
`IGNORE_WARNINGS_END
|
||||
end else begin
|
||||
|
||||
reg [`LOG2UP(N)-1:0] data_out_r;
|
||||
reg valid_r;
|
||||
|
||||
always @(*) begin
|
||||
data_out_r = 'x;
|
||||
for (integer i = 0; i < N; i++) begin
|
||||
if (data_in[i]) begin
|
||||
data_out_r = `LOG2UP(N)'(i);
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = data_out_r;
|
||||
assign valid = (| data_in);
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -1,28 +0,0 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_onehot_encoder #(
|
||||
parameter N = 6
|
||||
) (
|
||||
input wire [N-1:0] onehot,
|
||||
output wire [`LOG2UP(N)-1:0] binary,
|
||||
output wire valid
|
||||
);
|
||||
reg [`LOG2UP(N)-1:0] binary_r;
|
||||
reg valid_r;
|
||||
|
||||
always @(*) begin
|
||||
binary_r = 'x;
|
||||
valid_r = 1'b0;
|
||||
for (integer i = 0; i < N; i++) begin
|
||||
if (onehot[i]) begin
|
||||
binary_r = `LOG2UP(N)'(i);
|
||||
valid_r = 1'b1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign binary = binary_r;
|
||||
assign valid = valid_r;
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -1,26 +1,73 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_priority_encoder #(
|
||||
parameter DATAW = 1,
|
||||
parameter LDATAW = `LOG2UP(DATAW)
|
||||
parameter N = 1,
|
||||
parameter REVERSE = 0,
|
||||
parameter FAST = 1,
|
||||
parameter LN = `LOG2UP(N)
|
||||
) (
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [LDATAW-1:0] data_out,
|
||||
output wire valid_out
|
||||
);
|
||||
reg [LDATAW-1:0] data_out_r;
|
||||
input wire [N-1:0] data_in,
|
||||
output wire [N-1:0] onehot,
|
||||
output wire [LN-1:0] index,
|
||||
output wire valid_out
|
||||
);
|
||||
|
||||
always @(*) begin
|
||||
data_out_r = 'x;
|
||||
for (integer i = 0; i < DATAW; i++) begin
|
||||
if (data_in[i]) begin
|
||||
data_out_r = LDATAW'(i);
|
||||
break;
|
||||
if (N == 1) begin
|
||||
|
||||
assign onehot = data_in;
|
||||
assign index = 0;
|
||||
assign valid_out = data_in;
|
||||
|
||||
end else if (FAST) begin
|
||||
|
||||
wire [N-1:0] scan_lo;
|
||||
|
||||
VX_scan #(
|
||||
.N (N),
|
||||
.OP (2),
|
||||
.REVERSE (REVERSE)
|
||||
) scan (
|
||||
.data_in (data_in),
|
||||
.data_out (scan_lo)
|
||||
);
|
||||
|
||||
if (REVERSE) begin
|
||||
assign onehot = scan_lo & {1'b1, (~scan_lo[N-1:1])};
|
||||
assign valid_out = scan_lo[0];
|
||||
end else begin
|
||||
assign onehot = scan_lo & {(~scan_lo[N-2:0]), 1'b1};
|
||||
assign valid_out = scan_lo[N-1];
|
||||
end
|
||||
|
||||
VX_onehot_encoder #(
|
||||
.N (N)
|
||||
) b (
|
||||
.data_in (onehot),
|
||||
.data_out (index),
|
||||
`UNUSED_PIN (valid)
|
||||
);
|
||||
|
||||
end else begin
|
||||
|
||||
reg [N-1:0] onehot_r;
|
||||
reg [LN-1:0] index_r;
|
||||
|
||||
always @(*) begin
|
||||
index_r = 'x;
|
||||
onehot_r = 0;
|
||||
for (integer i = 0; i < N; i++) begin
|
||||
if (data_in[i]) begin
|
||||
index_r = LN'(i);
|
||||
onehot_r[i] = 1'b1;
|
||||
break;
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
assign data_out = data_out_r;
|
||||
assign valid_out = (| data_in);
|
||||
assign index = index_r;
|
||||
assign onehot = onehot_r;
|
||||
assign valid_out = (| data_in);
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -2,26 +2,45 @@
|
||||
|
||||
module VX_reset_relay #(
|
||||
parameter NUM_NODES = 1,
|
||||
parameter PASSTHRU = 0
|
||||
parameter DEPTH = 1,
|
||||
parameter ASYNC = 0
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
output wire [NUM_NODES-1:0] reset_out
|
||||
);
|
||||
|
||||
if (PASSTHRU == 0) begin
|
||||
reg [NUM_NODES-1:0] reset_r;
|
||||
always @(posedge clk) begin
|
||||
for (integer i = 0; i < NUM_NODES; ++i) begin
|
||||
reset_r[i] <= reset;
|
||||
if (DEPTH > 1) begin
|
||||
`DISABLE_BRAM reg [NUM_NODES-1:0] reset_r [DEPTH-1:0];
|
||||
if (ASYNC) begin
|
||||
always @(posedge clk or posedge reset) begin
|
||||
for (integer i = DEPTH-1; i > 0; --i)
|
||||
reset_r[i] <= reset_r[i-1];
|
||||
reset_r[0] <= {NUM_NODES{reset}};
|
||||
end
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
for (integer i = DEPTH-1; i > 0; --i)
|
||||
reset_r[i] <= reset_r[i-1];
|
||||
reset_r[0] <= {NUM_NODES{reset}};
|
||||
end
|
||||
end
|
||||
assign reset_out = reset_r[DEPTH-1];
|
||||
end else if (DEPTH == 1) begin
|
||||
reg [NUM_NODES-1:0] reset_r;
|
||||
if (ASYNC) begin
|
||||
always @(posedge clk or posedge reset) begin
|
||||
reset_r <= {NUM_NODES{reset}};
|
||||
end
|
||||
end else begin
|
||||
always @(posedge clk) begin
|
||||
reset_r <= {NUM_NODES{reset}};
|
||||
end
|
||||
end
|
||||
assign reset_out = reset_r;
|
||||
end else begin
|
||||
`UNUSED_VAR (clk)
|
||||
for (genvar i = 0; i < NUM_NODES; ++i) begin
|
||||
assign reset_out[i] = reset;
|
||||
end
|
||||
assign reset_out = {NUM_NODES{reset}};
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -55,7 +55,8 @@ module VX_rr_arbiter #(
|
||||
|
||||
assign grant_index = grant_table[state];
|
||||
assign grant_onehot = grant_onehot_r;
|
||||
assign grant_valid = (| requests);
|
||||
assign grant_valid = (| requests);
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
60
hw/rtl/libs/VX_scan.v
Normal file
60
hw/rtl/libs/VX_scan.v
Normal file
@@ -0,0 +1,60 @@
|
||||
`include "VX_platform.vh"
|
||||
|
||||
// Fast Paralllel scan using Kogge-Stone style prefix tree with configurable operator
|
||||
// Adapter from BaseJump STL: http://bjump.org/index.html
|
||||
|
||||
module VX_scan #(
|
||||
parameter N = 1,
|
||||
parameter OP = 0, // 0: XOR, 1: AND, 2: OR
|
||||
parameter REVERSE = 0 // 0: LO->HI, 1: HI->LO
|
||||
) (
|
||||
input wire [N-1:0] data_in,
|
||||
output wire [N-1:0] data_out
|
||||
);
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
|
||||
wire [$clog2(N):0][N-1:0] t;
|
||||
|
||||
// reverses bits
|
||||
if (REVERSE) begin
|
||||
assign t[0] = data_in;
|
||||
end else begin
|
||||
assign t[0] = {<<{data_in}};
|
||||
end
|
||||
|
||||
// optimize for the common case of small and-scans
|
||||
if ((N == 2) && (OP == 1)) begin
|
||||
assign t[$clog2(N)] = {t[0][1], &t[0][1:0]};
|
||||
end else if ((N == 3) && (OP == 1)) begin
|
||||
assign t[$clog2(N)] = {t[0][2], &t[0][2:1], &t[0][2:0]};
|
||||
end else if ((N == 4) && (OP == 1)) begin
|
||||
assign t[$clog2(N)] = {t[0][3], &t[0][3:2], &t[0][3:1], &t[0][3:0]};
|
||||
end else begin
|
||||
// general case
|
||||
wire [N-1:0] fill;
|
||||
for (genvar i = 0; i < $clog2(N); i++) begin
|
||||
wire [N-1:0] shifted = N'({fill, t[i]} >> (1<<i));
|
||||
if (OP == 0) begin
|
||||
assign fill = {N{1'b0}};
|
||||
assign t[i+1] = t[i] ^ shifted;
|
||||
end else if (OP == 1) begin
|
||||
assign fill = {N{1'b1}};
|
||||
assign t[i+1] = t[i] & shifted;
|
||||
end else if (OP == 2) begin
|
||||
assign fill = {N{1'b0}};
|
||||
assign t[i+1] = t[i] | shifted;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
// reverse bits
|
||||
if (REVERSE) begin
|
||||
assign data_out = t[$clog2(N)];
|
||||
end else begin
|
||||
for (genvar i = 0; i < N; i++) begin
|
||||
assign data_out[i] = t[$clog2(N)][N-1-i];
|
||||
end
|
||||
end
|
||||
|
||||
`IGNORE_WARNINGS_END
|
||||
endmodule
|
||||
@@ -12,7 +12,7 @@ module VX_shift_register_nr #(
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [(NTAPS*DATAW)-1:0] data_out
|
||||
);
|
||||
reg [DATAW-1:0] entries [DEPTH-1:0];
|
||||
`USE_FAST_BRAM reg [DATAW-1:0] entries [DEPTH-1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (enable) begin
|
||||
@@ -23,7 +23,7 @@ module VX_shift_register_nr #(
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NTAPS; ++i) begin
|
||||
assign data_out [i*DATAW+:DATAW] = entries [ TAPS[i*DEPTHW+:DEPTHW] ];
|
||||
assign data_out [i*DATAW+:DATAW] = entries [TAPS[i*DEPTHW+:DEPTHW]];
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -41,7 +41,7 @@ module VX_shift_register_wr #(
|
||||
input wire [DATAW-1:0] data_in,
|
||||
output wire [(NTAPS*DATAW)-1:0] data_out
|
||||
);
|
||||
reg [DEPTH-1:0][DATAW-1:0] entries;
|
||||
`USE_FAST_BRAM reg [DEPTH-1:0][DATAW-1:0] entries;
|
||||
|
||||
if (1 == DEPTH) begin
|
||||
|
||||
@@ -69,7 +69,7 @@ module VX_shift_register_wr #(
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < NTAPS; ++i) begin
|
||||
assign data_out [i*DATAW+:DATAW] = entries [ TAPS[i*DEPTHW+:DEPTHW] ];
|
||||
assign data_out [i*DATAW+:DATAW] = entries [TAPS[i*DEPTHW+:DEPTHW]];
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -3,7 +3,9 @@
|
||||
module VX_skid_buffer #(
|
||||
parameter DATAW = 1,
|
||||
parameter PASSTHRU = 0,
|
||||
parameter NOBACKPRESSURE = 0
|
||||
parameter NOBACKPRESSURE = 0,
|
||||
parameter BUFFERED = 0,
|
||||
parameter FASTRAM = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
@@ -49,44 +51,76 @@ module VX_skid_buffer #(
|
||||
|
||||
end else begin
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg [DATAW-1:0] buffer;
|
||||
reg valid_out_r;
|
||||
reg use_buffer;
|
||||
|
||||
wire push = valid_in && ready_in;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 0;
|
||||
use_buffer <= 0;
|
||||
end else begin
|
||||
if (ready_out) begin
|
||||
use_buffer <= 0;
|
||||
if (BUFFERED) begin
|
||||
|
||||
reg [DATAW-1:0] data_out_r;
|
||||
reg [DATAW-1:0] buffer;
|
||||
reg valid_out_r;
|
||||
reg use_buffer;
|
||||
|
||||
wire push = valid_in && ready_in;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 0;
|
||||
use_buffer <= 0;
|
||||
end else begin
|
||||
if (ready_out) begin
|
||||
use_buffer <= 0;
|
||||
end
|
||||
if (push && valid_out_r && !ready_out) begin
|
||||
assert(!use_buffer);
|
||||
use_buffer <= 1;
|
||||
end
|
||||
if (!valid_out_r || ready_out) begin
|
||||
valid_out_r <= valid_in || use_buffer;
|
||||
end
|
||||
end
|
||||
if (push && valid_out_r && !ready_out) begin
|
||||
assert(!use_buffer);
|
||||
use_buffer <= 1;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
if (!valid_out_r || ready_out) begin
|
||||
valid_out_r <= valid_in || use_buffer;
|
||||
data_out_r <= use_buffer ? buffer : data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = !use_buffer;
|
||||
assign valid_out = valid_out_r;
|
||||
assign data_out = data_out_r;
|
||||
|
||||
end else begin
|
||||
|
||||
wire q_push = valid_in && ready_in;
|
||||
wire q_pop = valid_out && ready_out;
|
||||
|
||||
wire q_empty, q_full;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.BUFFERED (BUFFERED),
|
||||
.FASTRAM (FASTRAM)
|
||||
) fifo (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (q_push),
|
||||
.pop (q_pop),
|
||||
.data_in (data_in),
|
||||
.data_out (data_out),
|
||||
.empty (q_empty),
|
||||
.alm_full (q_full),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
|
||||
assign ready_in = !q_full;
|
||||
assign valid_out = !q_empty;
|
||||
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
buffer <= data_in;
|
||||
end
|
||||
if (!valid_out_r || ready_out) begin
|
||||
data_out_r <= use_buffer ? buffer : data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = !use_buffer;
|
||||
assign valid_out = valid_out_r;
|
||||
assign data_out = data_out_r;
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -92,7 +92,8 @@ module VX_stream_arbiter #(
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.PASSTHRU (!BUFFERED)
|
||||
.PASSTHRU (!BUFFERED),
|
||||
.BUFFERED (1)
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -40,7 +40,8 @@ module VX_stream_demux #(
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.PASSTHRU (!BUFFERED)
|
||||
.PASSTHRU (!BUFFERED),
|
||||
.BUFFERED (1)
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
Reference in New Issue
Block a user