RTL code refactoring

This commit is contained in:
Blaise Tine
2020-04-23 12:38:44 -04:00
parent f0e257bc8e
commit 28d054e295
39 changed files with 179 additions and 225 deletions

View File

@@ -7,8 +7,8 @@ module VX_back_end #(
input wire reset,
input wire schedule_delay,
VX_gpu_dcache_rsp_if dcache_rsp_if,
VX_gpu_dcache_req_if dcache_req_if,
VX_cache_core_rsp_if dcache_rsp_if,
VX_cache_core_req_if dcache_req_if,
output wire mem_delay,
output wire exec_delay,
@@ -44,7 +44,7 @@ VX_exec_unit_req_if exec_unit_req_if();
VX_inst_exec_wb_if inst_exec_wb_if();
// GPU unit input
VX_gpu_inst_req_if gpu_inst_req_if();
VX_gpgpu_inst_req_if gpgpu_inst_req_if();
// CSR unit inputs
VX_csr_req_if csr_req_if();
@@ -61,7 +61,7 @@ VX_gpr_stage gpr_stage (
// New
.exec_unit_req_if (exec_unit_req_if),
.lsu_req_if (lsu_req_if),
.gpu_inst_req_if (gpu_inst_req_if),
.gpgpu_inst_req_if (gpgpu_inst_req_if),
.csr_req_if (csr_req_if),
.stall_gpr_csr (stall_gpr_csr),
// End new
@@ -93,7 +93,7 @@ VX_exec_unit exec_unit (
);
VX_gpgpu_inst gpgpu_inst (
.gpu_inst_req_if(gpu_inst_req_if),
.gpgpu_inst_req_if(gpgpu_inst_req_if),
.warp_ctl_if (warp_ctl_if)
);

View File

@@ -27,6 +27,18 @@
`define GLOBAL_BLOCK_SIZE_BYTES 16
`endif
`ifndef NUM_CSRS
`define NUM_CSRS 1024
`endif
`ifndef IO_BUS_ADDR
`define IO_BUS_ADDR 32'h00010000
`endif
`ifndef STARTUP_ADDR
`define STARTUP_ADDR 32'h80000000
`endif
// ========================= Dcache Configurable Knobs ========================
// Size of cache in bytes

View File

@@ -48,8 +48,6 @@
`define CSR_ADDR_SIZE 12
`define NUM_CSRS 1024
`define CSR_WIDTH 12
`define CSR_CYCL_L 12'hC00;
@@ -142,12 +140,6 @@
`define ZERO_REG 5'h0
// IO BUS
`define IO_BUS_ADDR 32'h00010000
// Program startup address
`define STARTUP_ADDR 32'h80000000
////////////////////////// Dcache Configurable Knobs //////////////////////////
// Function ID

View File

@@ -5,29 +5,29 @@ module VX_dmem_ctrl (
input wire reset,
// Dram <-> Dcache
VX_gpu_dcache_dram_req_if gpu_dcache_dram_req_if,
VX_gpu_dcache_dram_rsp_if gpu_dcache_dram_rsp_if,
VX_gpu_snp_req_rsp_if gpu_dcache_snp_req_if,
VX_cache_dram_req_if cache_dram_req_if,
VX_cache_dram_rsp_if cache_dram_rsp_if,
VX_cache_snp_req_rsp_if gpu_dcache_snp_req_if,
// Dram <-> Icache
VX_gpu_dcache_dram_req_if gpu_icache_dram_req_if,
VX_gpu_dcache_dram_rsp_if gpu_icache_dram_rsp_if,
VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if,
VX_cache_dram_req_if gpu_icache_dram_req_if,
VX_cache_dram_rsp_if gpu_icache_dram_rsp_if,
VX_cache_snp_req_rsp_if gpu_icache_snp_req_if,
// Core <-> Dcache
VX_gpu_dcache_rsp_if dcache_rsp_if,
VX_gpu_dcache_req_if dcache_req_if,
VX_cache_core_rsp_if dcache_rsp_if,
VX_cache_core_req_if dcache_req_if,
// Core <-> Icache
VX_gpu_dcache_rsp_if icache_rsp_if,
VX_gpu_dcache_req_if icache_req_if
VX_cache_core_rsp_if icache_rsp_if,
VX_cache_core_req_if icache_req_if
);
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_smem_if();
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_smem_if();
VX_cache_core_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_smem_if();
VX_cache_core_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_smem_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_dcache_if();
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_dcache_if();
VX_cache_core_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_dcache_if();
VX_cache_core_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_dcache_if();
wire to_shm = dcache_req_if.core_req_addr[0][31:24] == 8'hFF;
wire dcache_wants_wb = (|dcache_rsp_dcache_if.core_rsp_valid);
@@ -68,8 +68,8 @@ module VX_dmem_ctrl (
assign dcache_req_if.core_req_ready = to_shm ? dcache_req_smem_if.core_req_ready : dcache_req_dcache_if.core_req_ready;
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_req_if();
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_rsp_if();
VX_cache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_req_if();
VX_cache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_rsp_if();
VX_cache #(
.CACHE_SIZE_BYTES (`SCACHE_SIZE_BYTES),
@@ -208,19 +208,19 @@ module VX_dmem_ctrl (
`IGNORE_WARNINGS_END
// DRAM response
.dram_rsp_valid (gpu_dcache_dram_rsp_if.dram_rsp_valid),
.dram_rsp_addr (gpu_dcache_dram_rsp_if.dram_rsp_addr),
.dram_rsp_data (gpu_dcache_dram_rsp_if.dram_rsp_data),
.dram_rsp_valid (cache_dram_rsp_if.dram_rsp_valid),
.dram_rsp_addr (cache_dram_rsp_if.dram_rsp_addr),
.dram_rsp_data (cache_dram_rsp_if.dram_rsp_data),
// DRAM accept response
.dram_rsp_ready (gpu_dcache_dram_req_if.dram_rsp_ready),
.dram_rsp_ready (cache_dram_req_if.dram_rsp_ready),
// DRAM Req
.dram_req_read (gpu_dcache_dram_req_if.dram_req_read),
.dram_req_write (gpu_dcache_dram_req_if.dram_req_write),
.dram_req_addr (gpu_dcache_dram_req_if.dram_req_addr),
.dram_req_data (gpu_dcache_dram_req_if.dram_req_data),
.dram_req_ready (gpu_dcache_dram_req_if.dram_req_ready),
.dram_req_read (cache_dram_req_if.dram_req_read),
.dram_req_write (cache_dram_req_if.dram_req_write),
.dram_req_addr (cache_dram_req_if.dram_req_addr),
.dram_req_data (cache_dram_req_if.dram_req_data),
.dram_req_ready (cache_dram_req_if.dram_req_ready),
// Snoop Request
.snp_req_valid (gpu_dcache_snp_req_if.snp_req_valid),

View File

@@ -44,13 +44,16 @@ module VX_fetch (
.wspawn (warp_ctl_if.wspawn),
.wsapwn_pc (warp_ctl_if.wspawn_pc),
.wspawn_new_active(warp_ctl_if.wspawn_new_active),
// CTM
.ctm (warp_ctl_if.change_mask),
.ctm_mask (warp_ctl_if.thread_mask),
.ctm_warp_num (warp_ctl_if.warp_num),
// WHALT
.whalt (warp_ctl_if.ebreak),
.whalt_warp_num (warp_ctl_if.warp_num),
// Wstall
.wstall (wstall_if.wstall),
.wstall_warp_num (wstall_if.warp_num),

View File

@@ -8,8 +8,8 @@ module VX_front_end (
VX_warp_ctl_if warp_ctl_if,
VX_gpu_dcache_rsp_if icache_rsp_if,
VX_gpu_dcache_req_if icache_req_if,
VX_cache_core_rsp_if icache_rsp_if,
VX_cache_core_req_if icache_req_if,
VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if,
@@ -58,7 +58,7 @@ module VX_front_end (
wire freeze_fi_reg = total_freeze || icache_stage_delay;
VX_f_d_reg f_i_reg(
VX_f_d_reg f_i_reg (
.clk (clk),
.reset (reset),
.freeze (freeze_fi_reg),
@@ -66,7 +66,7 @@ module VX_front_end (
.fd_inst_meta_de (fe_inst_meta_fi2)
);
VX_icache_stage icache_stage(
VX_icache_stage icache_stage (
.clk (clk),
.reset (reset),
.total_freeze (total_freeze),
@@ -79,7 +79,7 @@ module VX_front_end (
.icache_req_if (icache_req_if)
);
VX_i_d_reg i_d_reg(
VX_i_d_reg i_d_reg (
.clk (clk),
.reset (reset),
.freeze (total_freeze),
@@ -87,7 +87,7 @@ module VX_front_end (
.fd_inst_meta_de (fd_inst_meta_de)
);
VX_decode decode(
VX_decode decode (
.fd_inst_meta_de (fd_inst_meta_de),
.frE_to_bckE_req_if (frE_to_bckE_req_if),
.wstall_if (wstall_if),
@@ -97,7 +97,7 @@ module VX_front_end (
wire no_br_stall = 0;
VX_d_e_reg d_e_reg(
VX_d_e_reg d_e_reg (
.clk (clk),
.reset (reset),
.branch_stall (no_br_stall),

View File

@@ -2,50 +2,50 @@
module VX_gpgpu_inst (
// Input
VX_gpu_inst_req_if gpu_inst_req_if,
VX_gpgpu_inst_req_if gpgpu_inst_req_if,
// Output
VX_warp_ctl_if warp_ctl_if
);
wire[`NUM_THREADS-1:0] curr_valids = gpu_inst_req_if.valid;
wire is_split = (gpu_inst_req_if.is_split);
wire[`NUM_THREADS-1:0] curr_valids = gpgpu_inst_req_if.valid;
wire is_split = (gpgpu_inst_req_if.is_split);
wire[`NUM_THREADS-1:0] tmc_new_mask;
wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0];
wire all_threads = `NUM_THREADS < gpgpu_inst_req_if.a_reg_data[0];
genvar curr_t;
generate
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < gpu_inst_req_if.a_reg_data[0];
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < gpgpu_inst_req_if.a_reg_data[0];
end
endgenerate
wire valid_inst = (|curr_valids);
assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num;
assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst;
assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
assign warp_ctl_if.warp_num = gpgpu_inst_req_if.warp_num;
assign warp_ctl_if.change_mask = (gpgpu_inst_req_if.is_tmc) && valid_inst;
assign warp_ctl_if.thread_mask = gpgpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
// assign warp_ctl_if.ebreak = (gpu_inst_req_if.a_reg_data[0] == 0) && valid_inst;
// assign warp_ctl_if.ebreak = (gpgpu_inst_req_if.a_reg_data[0] == 0) && valid_inst;
assign warp_ctl_if.ebreak = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0);
wire wspawn = gpu_inst_req_if.is_wspawn;
wire[31:0] wspawn_pc = gpu_inst_req_if.rd2;
wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0];
wire wspawn = gpgpu_inst_req_if.is_wspawn;
wire[31:0] wspawn_pc = gpgpu_inst_req_if.rd2;
wire all_active = `NUM_WARPS < gpgpu_inst_req_if.a_reg_data[0];
wire[`NUM_WARPS-1:0] wspawn_new_active;
genvar curr_w;
generate
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < gpu_inst_req_if.a_reg_data[0];
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < gpgpu_inst_req_if.a_reg_data[0];
end
endgenerate
assign warp_ctl_if.is_barrier = gpu_inst_req_if.is_barrier && valid_inst;
assign warp_ctl_if.barrier_id = gpu_inst_req_if.a_reg_data[0];
assign warp_ctl_if.is_barrier = gpgpu_inst_req_if.is_barrier && valid_inst;
assign warp_ctl_if.barrier_id = gpgpu_inst_req_if.a_reg_data[0];
`DEBUG_BEGIN
wire[31:0] num_warps_m1 = gpu_inst_req_if.rd2 - 1;
wire[31:0] num_warps_m1 = gpgpu_inst_req_if.rd2 - 1;
`DEBUG_END
assign warp_ctl_if.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
@@ -61,7 +61,7 @@ module VX_gpgpu_inst (
genvar curr_s_t;
generate
for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init
wire curr_bool = (gpu_inst_req_if.a_reg_data[curr_s_t] == 32'b1);
wire curr_bool = (gpgpu_inst_req_if.a_reg_data[curr_s_t] == 32'b1);
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool);
@@ -83,11 +83,11 @@ module VX_gpgpu_inst (
assign warp_ctl_if.dont_split = warp_ctl_if.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
assign warp_ctl_if.split_new_mask = split_new_use_mask;
assign warp_ctl_if.split_later_mask = split_new_later_mask;
assign warp_ctl_if.split_save_pc = gpu_inst_req_if.pc_next;
assign warp_ctl_if.split_warp_num = gpu_inst_req_if.warp_num;
assign warp_ctl_if.split_save_pc = gpgpu_inst_req_if.pc_next;
assign warp_ctl_if.split_warp_num = gpgpu_inst_req_if.warp_num;
// gpu_inst_req_if.is_wspawn
// gpu_inst_req_if.is_split
// gpu_inst_req_if.is_barrier
// gpgpu_inst_req_if.is_wspawn
// gpgpu_inst_req_if.is_split
// gpgpu_inst_req_if.is_barrier
endmodule

View File

@@ -20,7 +20,7 @@ module VX_gpr_stage (
// Outputs
VX_exec_unit_req_if exec_unit_req_if,
VX_lsu_req_if lsu_req_if,
VX_gpu_inst_req_if gpu_inst_req_if,
VX_gpgpu_inst_req_if gpgpu_inst_req_if,
VX_csr_req_if csr_req_if
);
`DEBUG_BEGIN
@@ -65,7 +65,7 @@ module VX_gpr_stage (
// Outputs
VX_exec_unit_req_if exec_unit_req_temp_if();
VX_lsu_req_if lsu_req_temp_if();
VX_gpu_inst_req_if gpu_inst_req_temp_if();
VX_gpgpu_inst_req_if gpu_inst_req_temp_if();
VX_csr_req_if csr_req_temp_if();
VX_inst_multiplex inst_mult(
@@ -73,7 +73,7 @@ module VX_gpr_stage (
.gpr_data_if (gpr_datf_if),
.exec_unit_req_if(exec_unit_req_temp_if),
.lsu_req_if (lsu_req_temp_if),
.gpu_inst_req_if (gpu_inst_req_temp_if),
.gpgpu_inst_req_if (gpu_inst_req_temp_if),
.csr_req_if (csr_req_temp_if)
);
`DEBUG_BEGIN
@@ -164,11 +164,11 @@ module VX_gpr_stage (
.stall(stall_rest),
.flush(flush_rest),
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next}),
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next })
.out ({gpgpu_inst_req_if.valid , gpgpu_inst_req_if.warp_num , gpgpu_inst_req_if.is_wspawn , gpgpu_inst_req_if.is_tmc , gpgpu_inst_req_if.is_split , gpgpu_inst_req_if.is_barrier , gpgpu_inst_req_if.pc_next })
);
assign gpu_inst_req_if.a_reg_data = real_base_address;
assign gpu_inst_req_if.rd2 = real_store_data;
assign gpgpu_inst_req_if.a_reg_data = real_base_address;
assign gpgpu_inst_req_if.rd2 = real_store_data;
VX_generic_register #(
.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)
@@ -214,7 +214,7 @@ module VX_gpr_stage (
.stall(stall_rest),
.flush(flush_rest),
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next, gpu_inst_req_temp_if.a_reg_data, gpu_inst_req_temp_if.rd2}),
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next , gpu_inst_req_if.a_reg_data , gpu_inst_req_if.rd2 })
.out ({gpgpu_inst_req_if.valid , gpgpu_inst_req_if.warp_num , gpgpu_inst_req_if.is_wspawn , gpgpu_inst_req_if.is_tmc , gpgpu_inst_req_if.is_split , gpgpu_inst_req_if.is_barrier , gpgpu_inst_req_if.pc_next , gpgpu_inst_req_if.a_reg_data , gpgpu_inst_req_if.rd2 })
);
VX_generic_register #(

View File

@@ -10,8 +10,8 @@ module VX_icache_stage (
VX_inst_meta_if fe_inst_meta_fi,
VX_inst_meta_if fe_inst_meta_id,
VX_gpu_dcache_rsp_if icache_rsp_if,
VX_gpu_dcache_req_if icache_req_if
VX_cache_core_rsp_if icache_rsp_if,
VX_cache_core_req_if icache_req_if
);
reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];

View File

@@ -8,7 +8,7 @@ module VX_inst_multiplex (
// Outputs
VX_exec_unit_req_if exec_unit_req_if,
VX_lsu_req_if lsu_req_if,
VX_gpu_inst_req_if gpu_inst_req_if,
VX_gpgpu_inst_req_if gpgpu_inst_req_if,
VX_csr_req_if csr_req_if
);
@@ -68,15 +68,15 @@ module VX_inst_multiplex (
// GPR Req
assign gpu_inst_req_if.valid = bckE_req_if.valid & is_gpu_mask;
assign gpu_inst_req_if.warp_num = bckE_req_if.warp_num;
assign gpu_inst_req_if.is_wspawn = bckE_req_if.is_wspawn;
assign gpu_inst_req_if.is_tmc = bckE_req_if.is_tmc;
assign gpu_inst_req_if.is_split = bckE_req_if.is_split;
assign gpu_inst_req_if.is_barrier = bckE_req_if.is_barrier;
assign gpu_inst_req_if.a_reg_data = gpr_data_if.a_reg_data;
assign gpu_inst_req_if.rd2 = gpr_data_if.b_reg_data[0];
assign gpu_inst_req_if.pc_next = bckE_req_if.PC_next;
assign gpgpu_inst_req_if.valid = bckE_req_if.valid & is_gpu_mask;
assign gpgpu_inst_req_if.warp_num = bckE_req_if.warp_num;
assign gpgpu_inst_req_if.is_wspawn = bckE_req_if.is_wspawn;
assign gpgpu_inst_req_if.is_tmc = bckE_req_if.is_tmc;
assign gpgpu_inst_req_if.is_split = bckE_req_if.is_split;
assign gpgpu_inst_req_if.is_barrier = bckE_req_if.is_barrier;
assign gpgpu_inst_req_if.a_reg_data = gpr_data_if.a_reg_data;
assign gpgpu_inst_req_if.rd2 = gpr_data_if.b_reg_data[0];
assign gpgpu_inst_req_if.pc_next = bckE_req_if.PC_next;
// CSR Req

View File

@@ -9,8 +9,8 @@ module VX_lsu_unit (
// Write back to GPR
VX_inst_mem_wb_if mem_wb_if,
VX_gpu_dcache_rsp_if dcache_rsp_if,
VX_gpu_dcache_req_if dcache_req_if,
VX_cache_core_rsp_if dcache_rsp_if,
VX_cache_core_req_if dcache_req_if,
output wire delay
);
// Generate Addresses

View File

@@ -4,6 +4,7 @@ module VX_warp_sched (
input wire clk, // Clock
input wire reset,
input wire stall,
// Wspawn
input wire wspawn,
input wire[31:0] wsapwn_pc,
@@ -72,13 +73,13 @@ module VX_warp_sched (
`DEBUG_BEGIN
wire in_wspawn = wspawn;
wire in_ctm = ctm;
wire in_whalt = whalt;
wire in_ctm = ctm;
wire in_whalt = whalt;
wire in_wstall = wstall;
`DEBUG_END
reg[`NUM_WARPS-1:0] warp_active;
reg[`NUM_WARPS-1:0] warp_stalled;
reg[`NUM_WARPS-1:0] warp_active;
reg[`NUM_WARPS-1:0] warp_stalled;
reg [`NUM_WARPS-1:0] visible_active;
wire[`NUM_WARPS-1:0] use_active;
@@ -97,11 +98,11 @@ module VX_warp_sched (
wire [$clog2(`NUM_WARPS):0] b_count;
// wsapwn
reg [31:0] use_wsapwn_pc;
reg [31:0] use_wsapwn_pc;
reg [`NUM_WARPS-1:0] use_wsapwn;
wire [`NW_BITS-1:0] warp_to_schedule;
wire schedule;
wire schedule;
wire hazard;
wire global_stall;
@@ -249,6 +250,7 @@ module VX_warp_sched (
assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
// integer curr_b;
// always @(*) begin
// total_barrier_stall = 0;
@@ -260,8 +262,8 @@ module VX_warp_sched (
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc , split_later_mask};
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[split_warp_num]};
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc, split_later_mask};
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
@@ -328,14 +330,6 @@ module VX_warp_sched (
// $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]);
// end
// Valid counter
// assign num_active = $countones(visible_active);
// VX_one_counter valid_counter(
// .valids(visible_active),
// .ones_found()
// );
assign ebreak = (warp_active == 0);
endmodule

View File

@@ -64,28 +64,28 @@ module Vortex #(
wire schedule_delay;
// Dcache Interface
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_qual_if();
VX_cache_core_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_if();
VX_cache_core_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_if();
VX_cache_core_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_qual_if();
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_dcache_dram_req_if();
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_dcache_dram_rsp_if();
VX_cache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) cache_dram_req_if();
VX_cache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) cache_dram_rsp_if();
assign gpu_dcache_dram_rsp_if.dram_rsp_valid = dram_rsp_valid;
assign gpu_dcache_dram_rsp_if.dram_rsp_addr = dram_rsp_addr;
assign cache_dram_rsp_if.dram_rsp_valid = dram_rsp_valid;
assign cache_dram_rsp_if.dram_rsp_addr = dram_rsp_addr;
assign dram_req_write = gpu_dcache_dram_req_if.dram_req_write;
assign dram_req_read = gpu_dcache_dram_req_if.dram_req_read;
assign dram_req_addr = gpu_dcache_dram_req_if.dram_req_addr;
assign dram_rsp_ready = gpu_dcache_dram_req_if.dram_rsp_ready;
assign dram_req_write = cache_dram_req_if.dram_req_write;
assign dram_req_read = cache_dram_req_if.dram_req_read;
assign dram_req_addr = cache_dram_req_if.dram_req_addr;
assign dram_rsp_ready = cache_dram_req_if.dram_rsp_ready;
assign gpu_dcache_dram_req_if.dram_req_ready = dram_req_ready;
assign cache_dram_req_if.dram_req_ready = dram_req_ready;
genvar i;
generate
for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin
assign gpu_dcache_dram_rsp_if.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32];
assign dram_req_data[i * 32 +: 32] = gpu_dcache_dram_req_if.dram_req_data[i];
assign cache_dram_rsp_if.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32];
assign dram_req_data[i * 32 +: 32] = cache_dram_req_if.dram_req_data[i];
end
endgenerate
@@ -111,11 +111,11 @@ module Vortex #(
assign dcache_req_qual_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
assign dcache_req_qual_if.core_req_pc = dcache_req_if.core_req_pc;
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_rsp_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_req_if();
VX_cache_core_rsp_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_rsp_if();
VX_cache_core_req_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_req_if();
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_req_if();
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_rsp_if();
VX_cache_dram_req_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_req_if();
VX_cache_dram_rsp_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_rsp_if();
assign gpu_icache_dram_rsp_if.dram_rsp_valid = I_dram_rsp_valid;
assign gpu_icache_dram_rsp_if.dram_rsp_addr = I_dram_rsp_addr;
@@ -149,8 +149,8 @@ VX_jal_rsp_if jal_rsp_if(); // Jump resolution to Fetch
VX_warp_ctl_if warp_ctl_if();
// Cache snooping
VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if();
VX_gpu_snp_req_rsp_if gpu_dcache_snp_req_if();
VX_cache_snp_req_rsp_if gpu_icache_snp_req_if();
VX_cache_snp_req_rsp_if gpu_dcache_snp_req_if();
assign gpu_dcache_snp_req_if.snp_req_valid = llc_snp_req_valid;
assign gpu_dcache_snp_req_if.snp_req_addr = llc_snp_req_addr;
assign llc_snp_req_ready = gpu_dcache_snp_req_if.snp_req_ready;
@@ -203,8 +203,8 @@ VX_dmem_ctrl dmem_ctrl (
.reset (reset),
// Dram <-> Dcache
.gpu_dcache_dram_req_if (gpu_dcache_dram_req_if),
.gpu_dcache_dram_rsp_if (gpu_dcache_dram_rsp_if),
.cache_dram_req_if (cache_dram_req_if),
.cache_dram_rsp_if (cache_dram_rsp_if),
.gpu_dcache_snp_req_if (gpu_dcache_snp_req_if),
// Dram <-> Icache

View File

@@ -1,5 +1,5 @@
`ifndef VX_BRANCH_RSP
`define VX_BRANCH_RSP
`ifndef VX_BRANCH_RSP_IF
`define VX_BRANCH_RSP_IF
`include "VX_define.vh"

View File

@@ -1,9 +1,9 @@
`ifndef VX_GPU_DCACHE_REQ
`define VX_GPU_DCACHE_REQ
`ifndef VX_CACHE_CORE_REQ_IF
`define VX_CACHE_CORE_REQ_IF
`include "../cache/VX_cache_config.vh"
interface VX_gpu_dcache_req_if #(
interface VX_cache_core_req_if #(
parameter NUM_REQUESTS = 32
) ();

View File

@@ -1,9 +1,9 @@
`ifndef VX_GPU_DCACHE_RSP
`define VX_GPU_DCACHE_RSP
`ifndef VX_CACHE_CORE_RSP_IF
`define VX_CACHE_CORE_RSP_IF
`include "../cache/VX_cache_config.vh"
interface VX_gpu_dcache_rsp_if #(
interface VX_cache_core_rsp_if #(
parameter NUM_REQUESTS = 32
) ();

View File

@@ -1,9 +1,9 @@
`ifndef VX_GPU_DRAM_DCACHE_REQ
`define VX_GPU_DRAM_DCACHE_REQ
`ifndef VX_CACHE_DRAM_REQ_IF
`define VX_CACHE_DRAM_REQ_IF
`include "../cache/VX_cache_config.vh"
interface VX_gpu_dcache_dram_req_if #(
interface VX_cache_dram_req_if #(
parameter BANK_LINE_WORDS = 2
) ();

View File

@@ -1,9 +1,9 @@
`ifndef VX_GPU_DRAM_DCACHE_RSP
`define VX_GPU_DRAM_DCACHE_RSP
`ifndef VX_CACHE_DRAM_RSP_IF
`define VX_CACHE_DRAM_RSP_IF
`include "../cache/VX_cache_config.vh"
interface VX_gpu_dcache_dram_rsp_if #(
interface VX_cache_dram_rsp_if #(
parameter BANK_LINE_WORDS = 2
) ();
// DRAM Response

View File

@@ -1,9 +1,9 @@
`ifndef VX_GPU_SNP_REQ_RSP
`define VX_GPU_SNP_REQ_RSP
`ifndef VX_CACHE_SNP_REQ_RSP_IF
`define VX_CACHE_SNP_REQ_RSP_IF
`include "../cache/VX_cache_config.vh"
interface VX_gpu_snp_req_rsp_if ();
interface VX_cache_snp_req_rsp_if ();
// Snoop request
wire snp_req_valid;

View File

@@ -1,5 +1,5 @@
`ifndef VX_CSR_REQ
`define VX_CSR_REQ
`ifndef VX_CSR_REQ_IF
`define VX_CSR_REQ_IF
`include "VX_define.vh"

View File

@@ -1,5 +1,5 @@
`ifndef VX_CSR_WB_REQ
`define VX_CSR_WB_REQ
`ifndef VX_CSR_WB_IF
`define VX_CSR_WB_IF
`include "VX_define.vh"

View File

@@ -1,5 +1,5 @@
`ifndef VX_EXE_UNIT_REQ_INTER
`define VX_EXE_UNIT_REQ_INTER
`ifndef VX_EXE_UNIT_REQ_IF
`define VX_EXE_UNIT_REQ_IF
`include "VX_define.vh"

View File

@@ -1,5 +1,5 @@
`ifndef VX_FrE_to_BE_INTER
`define VX_FrE_to_BE_INTER
`ifndef VX_FrE_to_BCKBE_REQ_IF
`define VX_FrE_to_BCKBE_REQ_IF
`include "VX_define.vh"

View File

@@ -1,9 +1,9 @@
`ifndef VX_GPU_INST_REQ_IN
`define VX_GPU_INST_REQ_IN
`ifndef VX_GPGPU_INST_REQ_IF
`define VX_GPGPU_INST_REQ_IF
`include "VX_define.vh"
interface VX_gpu_inst_req_if();
interface VX_gpgpu_inst_req_if();
wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;

View File

@@ -1,6 +1,6 @@
`ifndef VX_gpr_data_INTER
`define VX_gpr_data_INTER
`ifndef VX_GPR_DATA_IF
`define VX_GPR_DATA_IF
`include "VX_define.vh"

View File

@@ -1,5 +1,5 @@
`ifndef VX_GPR_JAL_INTER
`define VX_GPR_JAL_INTER
`ifndef VX_GPR_JAL_IF
`define VX_GPR_JAL_IF
`include "VX_define.vh"

View File

@@ -1,5 +1,5 @@
`ifndef VX_GPR_READ
`define VX_GPR_READ
`ifndef VX_GPR_READ_IF
`define VX_GPR_READ_IF
`include "VX_define.vh"

View File

@@ -1,13 +0,0 @@
`ifndef VX_GPU_SNP_REQ
`define VX_GPU_SNP_REQ
`include "../cache/VX_cache_config.vh"
interface VX_gpu_dcache_snp_req_if ();
// Snoop Req
wire snp_req_valid;
wire [31:0] snp_req_addr;
endinterface
`endif

View File

@@ -1,15 +0,0 @@
`ifndef VX_ICACHE_RSP
`define VX_ICACHE_RSP
`include "VX_define.vh"
interface VX_icache_rsp_if ();
// wire ready;
// wire stall;
wire [31:0] instruction;
wire delay;
endinterface
`endif

View File

@@ -1,6 +1,6 @@
`ifndef VX_EXEC_UNIT_WB_INST_INTER
`define VX_EXEC_UNIT_WB_INST_INTER
`ifndef VX_INST_EXEC_WB_IF
`define VX_INST_EXEC_WB_IF
`include "VX_define.vh"

View File

@@ -1,6 +1,6 @@
`ifndef VX_MEM_WB_INST_INTER
`define VX_MEM_WB_INST_INTER
`ifndef VX_INST_MEM_WB_IF
`define VX_INST_MEM_WB_IF
`include "VX_define.vh"

View File

@@ -1,5 +1,5 @@
`ifndef VX_F_D_INTER
`define VX_F_D_INTER
`ifndef VX_INST_META_IF
`define VX_INST_META_IF
`include "VX_define.vh"

View File

@@ -1,6 +1,6 @@
`ifndef VX_JAL_RSP
`define VX_JAL_RSP
`ifndef VX_JAL_RSP_IF
`define VX_JAL_RSP_IF
`include "VX_define.vh"

View File

@@ -1,6 +1,6 @@
`ifndef VX_JOIN_INTER
`define VX_JOIN_INTER
`ifndef VX_JOIN_IF
`define VX_JOIN_IF
`include "VX_define.vh"

View File

@@ -1,6 +1,6 @@
`ifndef VX_LSU_REQ_INTER
`define VX_LSU_REQ_INTER
`ifndef VX_LSU_REQ_IF
`define VX_LSU_REQ_IF
`include "VX_define.vh"

View File

@@ -1,19 +0,0 @@
`ifndef VX_MW_WB_INTER
`define VX_MW_WB_INTER
`include "VX_define.vh"
interface VX_mw_wb_if ();
wire [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0][31:0] mem_result;
wire [4:0] rd;
wire [1:0] wb;
wire [31:0] PC_next;
wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num;
endinterface
`endif

View File

@@ -1,6 +1,6 @@
`ifndef VX_WARP_CTL_INTER
`define VX_WARP_CTL_INTER
`ifndef VX_WARP_CTL_IF
`define VX_WARP_CTL_IF
`include "VX_define.vh"

View File

@@ -1,5 +1,5 @@
`ifndef VX_WB_INTER
`define VX_WB_INTER
`ifndef VX_WB_IF
`define VX_WB_IF
`include "VX_define.vh"

View File

@@ -1,5 +1,5 @@
`ifndef VX_WSTALL_INTER
`define VX_WSTALL_INTER
`ifndef VX_WSTALL_IF
`define VX_WSTALL_IF
`include "VX_define.vh"