From 509850192cfbe6d63eed32ccd5961b3a5d9f5683 Mon Sep 17 00:00:00 2001 From: "Lyons, Ethan Tyler" Date: Thu, 21 Nov 2019 01:14:50 -0500 Subject: [PATCH 1/3] Warps/Threads Parameterization --- rtl/VX_define.v | 8 +++++--- rtl/VX_execute_unit.v | 4 ++-- rtl/VX_gpr_stage.v | 17 +++++++++-------- rtl/VX_lsu.v | 2 +- rtl/VX_writeback.v | 2 +- 5 files changed, 18 insertions(+), 15 deletions(-) diff --git a/rtl/VX_define.v b/rtl/VX_define.v index d9e15aa5..8f77fdb3 100644 --- a/rtl/VX_define.v +++ b/rtl/VX_define.v @@ -1,16 +1,18 @@ - `define NT 4 `define NT_M1 (`NT-1) // NW_M1 is actually log2(NW) -`define NW_M1 (3-1) +//`define NW_M1 (4-1) + `define NW 8 +`define NW_M1 (`CLOG2(`NW)) + // Uncomment the below line if NW=1 // `define ONLY // `define SYN 1 -`define ASIC 1 +//`define ASIC 1 `define NUM_BARRIERS 4 diff --git a/rtl/VX_execute_unit.v b/rtl/VX_execute_unit.v index 8d06ce00..c64c1181 100644 --- a/rtl/VX_execute_unit.v +++ b/rtl/VX_execute_unit.v @@ -133,7 +133,7 @@ module VX_execute_unit ( // .out ({VX_inst_exec_wb.rd , VX_inst_exec_wb.wb , VX_inst_exec_wb.wb_valid , VX_inst_exec_wb.wb_warp_num , VX_inst_exec_wb.alu_result , VX_inst_exec_wb.exec_wb_pc }) // ); - VX_generic_register #(.N(36)) jal_reg( + VX_generic_register #(.N(33 + `NW_M1 + 1)) jal_reg( .clk (clk), .reset(reset), .stall(zero), @@ -142,7 +142,7 @@ module VX_execute_unit ( .out ({VX_jal_rsp.jal , VX_jal_rsp.jal_dest , VX_jal_rsp.jal_warp_num}) ); - VX_generic_register #(.N(37)) branch_reg( + VX_generic_register #(.N(34 + `NW_M1 + 1)) branch_reg( .clk (clk), .reset(reset), .stall(zero), diff --git a/rtl/VX_gpr_stage.v b/rtl/VX_gpr_stage.v index 257c7169..89316cbb 100644 --- a/rtl/VX_gpr_stage.v +++ b/rtl/VX_gpr_stage.v @@ -133,7 +133,7 @@ module VX_gpr_stage ( assign VX_lsu_req.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address; - VX_generic_register #(.N(84)) lsu_reg( + VX_generic_register #(.N(77 + `NW_M1 + 65*(1 + `NT))) lsu_reg( .clk (clk), .reset(reset), .stall(stall_lsu), @@ -142,7 +142,7 @@ module VX_gpr_stage ( .out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc ,VX_lsu_req.warp_num , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb }) ); - VX_generic_register #(.N(231)) exec_unit_reg( + VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg( .clk (clk), .reset(reset), .stall(stall_rest), @@ -154,7 +154,7 @@ module VX_gpr_stage ( assign VX_exec_unit_req.a_reg_data = real_base_address; assign VX_exec_unit_req.b_reg_data = real_store_data; - VX_generic_register #(.N(43)) gpu_inst_reg( + VX_generic_register #(.N(68 + `NW_M1 + 1 + 33*(`NT))) gpu_inst_reg( .clk (clk), .reset(reset), .stall(stall_rest), @@ -166,7 +166,7 @@ module VX_gpr_stage ( assign VX_gpu_inst_req.a_reg_data = real_base_address; assign VX_gpu_inst_req.rd2 = real_store_data; - VX_generic_register #(.N(60)) csr_reg( + VX_generic_register #(.N(`NW_M1 + 1 + `NT + 53)) csr_reg( .clk (clk), .reset(reset), .stall(stall_rest), @@ -180,7 +180,8 @@ module VX_gpr_stage ( `else - VX_generic_register #(.N(340)) lsu_reg( + // 341 + VX_generic_register #(.N(77 + `NW_M1 + 1 + 65*(`NT))) lsu_reg( .clk (clk), .reset(reset), .stall(stall_lsu), @@ -189,7 +190,7 @@ module VX_gpr_stage ( .out ({VX_lsu_req.valid , VX_lsu_req.lsu_pc , VX_lsu_req.warp_num , VX_lsu_req.store_data , VX_lsu_req.base_address , VX_lsu_req.offset , VX_lsu_req.mem_read , VX_lsu_req.mem_write , VX_lsu_req.rd , VX_lsu_req.wb }) ); - VX_generic_register #(.N(487)) exec_unit_reg( + VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg( .clk (clk), .reset(reset), .stall(stall_rest), @@ -198,7 +199,7 @@ module VX_gpr_stage ( .out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask }) ); - VX_generic_register #(.N(203)) gpu_inst_reg( + VX_generic_register #(.N(68 + `NW_M1 + 1 + 33*(`NT))) gpu_inst_reg( .clk (clk), .reset(reset), .stall(stall_rest), @@ -207,7 +208,7 @@ module VX_gpr_stage ( .out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 }) ); - VX_generic_register #(.N(60)) csr_reg( + VX_generic_register #(.N(`NW_M1 + 1 + `NT + 53)) csr_reg( .clk (clk), .reset(reset), .stall(stall_rest), diff --git a/rtl/VX_lsu.v b/rtl/VX_lsu.v index 5f278b71..05def072 100644 --- a/rtl/VX_lsu.v +++ b/rtl/VX_lsu.v @@ -45,7 +45,7 @@ module VX_lsu ( wire zero = 0; - VX_generic_register #(.N(308)) lsu_buffer( + VX_generic_register #(.N(45 + `NW_M1 + 1 + `NT*65)) lsu_buffer( .clk (clk), .reset(reset), .stall(out_delay), diff --git a/rtl/VX_writeback.v b/rtl/VX_writeback.v index f67f5be9..037f7d2e 100644 --- a/rtl/VX_writeback.v +++ b/rtl/VX_writeback.v @@ -63,7 +63,7 @@ module VX_writeback ( wire zero = 0; - VX_generic_register #(.N(174)) wb_register( + VX_generic_register #(.N(39 + `NW_M1 + 1 + `NT*33)) wb_register( .clk (clk), .reset(reset), .stall(zero), From 9f58584207f00ec0169e782b2e6d16a8bc5f7096 Mon Sep 17 00:00:00 2001 From: "Lyons, Ethan Tyler" Date: Thu, 21 Nov 2019 01:15:21 -0500 Subject: [PATCH 2/3] Warps/Threads Parameterization --- rtl/pipe_regs/VX_d_e_reg.v | 2 +- rtl/pipe_regs/VX_f_d_reg.v | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/rtl/pipe_regs/VX_d_e_reg.v b/rtl/pipe_regs/VX_d_e_reg.v index c3e8f8ee..e25a0d88 100644 --- a/rtl/pipe_regs/VX_d_e_reg.v +++ b/rtl/pipe_regs/VX_d_e_reg.v @@ -18,7 +18,7 @@ module VX_d_e_reg ( wire flush = (in_branch_stall == `STALL); - VX_generic_register #(.N(240)) d_e_reg + VX_generic_register #(.N(233 + `NW_M1 + 1 + `NT)) d_e_reg ( .clk (clk), .reset(reset), diff --git a/rtl/pipe_regs/VX_f_d_reg.v b/rtl/pipe_regs/VX_f_d_reg.v index dd69e196..0d5d99a8 100644 --- a/rtl/pipe_regs/VX_f_d_reg.v +++ b/rtl/pipe_regs/VX_f_d_reg.v @@ -14,8 +14,7 @@ module VX_f_d_reg ( wire stall = in_freeze == 1'b1; - - VX_generic_register #(.N(71)) f_d_reg + VX_generic_register #(.N(64 + `NW_M1 + 1 + `NT)) f_d_reg ( .clk (clk), .reset(reset), From 52e881243e89d53913d3b2cde61bf480737d127e Mon Sep 17 00:00:00 2001 From: "Lyons, Ethan Tyler" Date: Thu, 21 Nov 2019 01:15:54 -0500 Subject: [PATCH 3/3] Warps/Threads Parameterization --- rtl/shared_memory/VX_priority_encoder_sm.v | 11 ++++++----- rtl/shared_memory/VX_shared_memory.v | 4 ++-- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/rtl/shared_memory/VX_priority_encoder_sm.v b/rtl/shared_memory/VX_priority_encoder_sm.v index 6fa47ece..ba571fd3 100644 --- a/rtl/shared_memory/VX_priority_encoder_sm.v +++ b/rtl/shared_memory/VX_priority_encoder_sm.v @@ -3,7 +3,8 @@ module VX_priority_encoder_sm #( parameter NB = 4, - parameter BITS_PER_BANK = 3 + parameter BITS_PER_BANK = 3, + parameter NUM_REQ = 3 ) ( //INPUTS @@ -19,7 +20,7 @@ module VX_priority_encoder_sm output reg[NB:0][31:0] out_data, // To Processor - output wire[NB:0][1:0] req_num, + output wire[NB:0][`CLOG2(NUM_REQ) - 1:0] req_num, output reg stall, output wire send_data // Finished all of the requests ); @@ -49,7 +50,7 @@ module VX_priority_encoder_sm generate for (curr_bank = 0; curr_bank <= NB; curr_bank = curr_bank + 1) begin - wire[$clog2(`NT):0] num_valids; + wire[`CLOG2(`NT):0] num_valids; VX_countones #(.N(`NT)) valids_counter ( .valids(bank_valids[curr_bank]), @@ -64,7 +65,7 @@ module VX_priority_encoder_sm assign stall = (|more_than_one_valid); assign send_data = (!stall) && (|in_valid); // change - wire[NB:0][1:0] internal_req_num; + wire[NB:0][(`CLOG2(NUM_REQ)) - 1:0] internal_req_num; wire[NB:0] internal_out_valid; @@ -73,7 +74,7 @@ module VX_priority_encoder_sm for (curr_bank_o = 0; curr_bank_o <= NB; curr_bank_o = curr_bank_o + 1) begin - VX_generic_priority_encoder #(.N(4)) vx_priority_encoder( + VX_generic_priority_encoder #(.N(NUM_REQ)) vx_priority_encoder( .valids(bank_valids[curr_bank_o]), .index(internal_req_num[curr_bank_o]), .found(internal_out_valid[curr_bank_o]) diff --git a/rtl/shared_memory/VX_shared_memory.v b/rtl/shared_memory/VX_shared_memory.v index 71149d70..bd9cce36 100644 --- a/rtl/shared_memory/VX_shared_memory.v +++ b/rtl/shared_memory/VX_shared_memory.v @@ -54,7 +54,7 @@ reg [SM_BANKS - 1:0][SM_LOG_WORDS_PER_READ-1:0] block_we; wire send_data; //reg[NB:0][1:0] req_num; -reg[SM_BANKS - 1:0][$clog2(NUM_REQ) - 1:0] req_num; // not positive about this +reg[SM_BANKS - 1:0][`CLOG2(NUM_REQ) - 1:0] req_num; // not positive about this wire [`NT_M1:0] orig_in_valid; @@ -71,7 +71,7 @@ genvar f; //VX_priority_encoder_sm #(.NB(NB), .BITS_PER_BANK(BITS_PER_BANK)) vx_priority_encoder_sm( -VX_priority_encoder_sm #(.NB(SM_BANKS - 1), .BITS_PER_BANK(BITS_PER_BANK)) vx_priority_encoder_sm( +VX_priority_encoder_sm #(.NB(SM_BANKS - 1), .BITS_PER_BANK(BITS_PER_BANK), .NUM_REQ(NUM_REQ)) vx_priority_encoder_sm( .clk(clk), .reset(reset), .in_valid(orig_in_valid),