performance refactoring - rebalanced stream buffers accross the device to enforce output buffering rule at compoments boudaries, finally resolved block ram R/W collusion discrepencies,

This commit is contained in:
Blaise Tine
2020-12-19 02:45:06 -08:00
parent 29cd2f5dff
commit 4bbd7bf408
76 changed files with 1313 additions and 1098 deletions

View File

@@ -29,8 +29,8 @@ module VX_warp_sched #(
// Lock warp until instruction decode to resolve branches
reg [`NUM_WARPS-1:0] fetch_lock;
reg [`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0];
reg [31:0] warp_pcs[`NUM_WARPS-1:0];
reg [`NUM_THREADS-1:0] thread_masks [`NUM_WARPS-1:0];
reg [31:0] warp_pcs [`NUM_WARPS-1:0];
// barriers
reg [`NUM_WARPS-1:0] barrier_stall_mask[`NUM_BARRIERS-1:0]; // warps waiting on barrier
@@ -180,11 +180,11 @@ module VX_warp_sched #(
// split/join stack management
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];
wire [(1+32+`NUM_THREADS-1):0] ipdom [`NUM_WARPS-1:0];
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.wid]};
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, warp_ctl_if.split.pc, warp_ctl_if.split.else_mask};
assign {join_fall, join_pc, join_tm} = ipdom[join_if.wid];
assign {join_fall, join_pc, join_tm} = ipdom [join_if.wid];
for (genvar i = 0; i < `NUM_WARPS; i++) begin
wire push = warp_ctl_if.valid
@@ -196,7 +196,7 @@ module VX_warp_sched #(
VX_ipdom_stack #(
.WIDTH(1+32+`NUM_THREADS),
.DEPTH(`NT_BITS+1)
.DEPTH(2 ** (`NT_BITS+1))
) ipdom_stack (
.clk (clk),
.reset(reset),