diff --git a/hw/rtl/core/VX_schedule.sv b/hw/rtl/core/VX_schedule.sv index 36bdf9e0..9a074d31 100644 --- a/hw/rtl/core/VX_schedule.sv +++ b/hw/rtl/core/VX_schedule.sv @@ -231,7 +231,13 @@ module VX_schedule import VX_gpu_pkg::*; #( `ifdef GBAR_ENABLE if (warp_ctl_if.valid && warp_ctl_if.barrier.valid && warp_ctl_if.barrier.is_global + `ifdef GBAR_CLUSTER_ENABLE + // engage cluster barrier as soon as the barrier count is + // fulfilled, instead of requiring all warps to be synchronized + && (active_barrier_count[`NW_WIDTH-1:0] == warp_ctl_if.barrier.size_m1[`NW_WIDTH-1:0])) begin + `else && (curr_barrier_mask_n == active_warps)) begin + `endif gbar_req_valid <= 1; gbar_req_id <= warp_ctl_if.barrier.id; gbar_req_size_m1 <= warp_ctl_if.barrier.size_m1[`NC_WIDTH-1:0]; diff --git a/hw/rtl/core/VX_wctl_unit.sv b/hw/rtl/core/VX_wctl_unit.sv index 88b2f71e..5b1ad834 100644 --- a/hw/rtl/core/VX_wctl_unit.sv +++ b/hw/rtl/core/VX_wctl_unit.sv @@ -109,7 +109,13 @@ module VX_wctl_unit import VX_gpu_pkg::*; #( assign barrier.valid = is_bar; assign barrier.id = rs1_data[`NB_WIDTH-1:0]; `ifdef GBAR_ENABLE +`ifdef GBAR_CLUSTER_ENABLE + // all barriers are cluster-wide barriers, which is implemented by + // modifying the global barrier logic + assign barrier.is_global = 1'b1; +`else assign barrier.is_global = rs1_data[31]; +`endif `else assign barrier.is_global = 1'b0; `endif