Added CSR TID/WID reads

This commit is contained in:
felsabbagh3
2019-10-21 02:10:05 -04:00
parent 405926f66f
commit 84f5ccb484
25 changed files with 2339 additions and 2241 deletions

View File

@@ -8,7 +8,6 @@ module VX_alu(
input wire[31:0] in_itype_immed,
input wire[19:0] in_upper_immed,
input wire[4:0] in_alu_op,
input wire[31:0] in_csr_data, // done
input wire[31:0] in_curr_PC,
output reg[31:0] out_alu_result
);
@@ -60,9 +59,6 @@ module VX_alu(
`SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
`LUI_ALU: out_alu_result = upper_immed;
`AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed);
`CSR_ALU_RW: out_alu_result = in_csr_data;
`CSR_ALU_RS: out_alu_result = in_csr_data;
`CSR_ALU_RC: out_alu_result = in_csr_data;
`MUL: begin out_alu_result = mult_signed_result[31:0]; end
`MULH: out_alu_result = mult_signed_result[63:32];
`MULHSU: out_alu_result = mult_signed_un_result[63:32];

View File

@@ -3,7 +3,6 @@ module VX_back_end (
input wire reset,
input wire schedule_delay,
input wire[31:0] csr_decode_csr_data,
output wire out_mem_delay,
VX_jal_response_inter VX_jal_rsp,
@@ -16,20 +15,11 @@ module VX_back_end (
VX_warp_ctl_inter VX_warp_ctl,
VX_dcache_response_inter VX_dcache_rsp,
VX_dcache_request_inter VX_dcache_req,
VX_dcache_request_inter VX_dcache_req
VX_csr_write_request_inter VX_csr_w_req
);
wire[11:0] execute_csr_address;
wire execute_is_csr;
reg[31:0] execute_csr_result;
wire execute_jal;
wire[31:0] execute_jal_dest;
VX_wb_inter VX_writeback_temp();
assign VX_writeback_inter.wb = VX_writeback_temp.wb;
assign VX_writeback_inter.rd = VX_writeback_temp.rd;
@@ -63,6 +53,10 @@ VX_inst_exec_wb_inter VX_inst_exec_wb();
// GPU unit input
VX_gpu_inst_req_inter VX_gpu_inst_req();
// CSR unit inputs
VX_csr_req_inter VX_csr_req();
VX_csr_wb_inter VX_csr_wb();
VX_gpr_stage VX_gpr_stage(
.clk (clk),
.schedule_delay (schedule_delay),
@@ -78,7 +72,8 @@ VX_inst_multiplex VX_inst_mult(
.VX_gpr_data (VX_gpr_data),
.VX_exec_unit_req(VX_exec_unit_req),
.VX_lsu_req (VX_lsu_req),
.VX_gpu_inst_req (VX_gpu_inst_req)
.VX_gpu_inst_req (VX_gpu_inst_req),
.VX_csr_req (VX_csr_req)
);
@@ -97,12 +92,7 @@ VX_execute_unit VX_execUnit(
.VX_exec_unit_req(VX_exec_unit_req),
.VX_inst_exec_wb (VX_inst_exec_wb),
.VX_jal_rsp (VX_jal_rsp),
.VX_branch_rsp (VX_branch_rsp),
.in_csr_data (csr_decode_csr_data),
.out_csr_address (VX_csr_w_req.csr_address),
.out_is_csr (VX_csr_w_req.is_csr),
.out_csr_result (VX_csr_w_req.csr_result)
.VX_branch_rsp (VX_branch_rsp)
);
@@ -111,9 +101,15 @@ VX_gpgpu_inst VX_gpgpu_inst(
.VX_warp_ctl (VX_warp_ctl)
);
VX_csr_wrapper VX_csr_wrapper(
.VX_csr_req(VX_csr_req),
.VX_csr_wb (VX_csr_wb)
);
VX_writeback VX_wb(
.VX_mem_wb (VX_mem_wb),
.VX_inst_exec_wb (VX_inst_exec_wb),
.VX_csr_wb (VX_csr_wb),
.VX_writeback_inter(VX_writeback_temp)
);

35
rtl/VX_csr_wrapper.v Normal file
View File

@@ -0,0 +1,35 @@
module VX_csr_wrapper (
VX_csr_req_inter VX_csr_req,
VX_csr_wb_inter VX_csr_wb
);
wire[`NT_M1:0][31:0] thread_ids;
wire[`NT_M1:0][31:0] warp_ids;
genvar cur_t;
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
assign thread_ids[cur_t] = cur_t;
end
genvar cur_tw;
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, VX_csr_req.warp_num};
end
assign VX_csr_wb.valid = VX_csr_req.valid;
assign VX_csr_wb.warp_num = VX_csr_req.warp_num;
assign VX_csr_wb.rd = VX_csr_req.rd;
assign VX_csr_wb.wb = VX_csr_req.wb;
wire thread_select = VX_csr_req.csr_address == 12'h20;
wire warp_select = VX_csr_req.csr_address == 12'h21;
assign VX_csr_wb.csr_result = thread_select ? thread_ids :
warp_select ? warp_ids :
0;
endmodule

View File

@@ -126,8 +126,7 @@ module VX_decode(
assign VX_frE_to_bckE_req.csr_immed = is_csr_immed;
assign VX_frE_to_bckE_req.is_csr = is_csr;
assign VX_frE_to_bckE_req.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :

View File

@@ -10,12 +10,7 @@ module VX_execute_unit (
// JAL Response
VX_jal_response_inter VX_jal_rsp,
// Branch Response
VX_branch_response_inter VX_branch_rsp,
input wire[31:0] in_csr_data,
output wire[11:0] out_csr_address,
output wire out_is_csr,
output reg[31:0] out_csr_result
VX_branch_response_inter VX_branch_rsp
);
@@ -27,7 +22,6 @@ module VX_execute_unit (
wire[31:0] in_itype_immed;
wire[2:0] in_branch_type;
wire[19:0] in_upper_immed;
wire[31:0] in_csr_mask;
wire in_jal;
wire[31:0] in_jal_offset;
wire[31:0] in_curr_PC;
@@ -39,7 +33,6 @@ module VX_execute_unit (
assign in_itype_immed = VX_exec_unit_req.itype_immed;
assign in_branch_type = VX_exec_unit_req.branch_type;
assign in_upper_immed = VX_exec_unit_req.upper_immed;
assign in_csr_mask = VX_exec_unit_req.csr_mask;
assign in_jal = VX_exec_unit_req.jal;
assign in_jal_offset = VX_exec_unit_req.jal_offset;
assign in_curr_PC = VX_exec_unit_req.curr_PC;
@@ -58,7 +51,6 @@ module VX_execute_unit (
.in_itype_immed(in_itype_immed),
.in_upper_immed(in_upper_immed),
.in_alu_op (in_alu_op),
.in_csr_data (in_csr_data),
.in_curr_PC (in_curr_PC),
.out_alu_result(alu_result[index_out_reg])
);
@@ -110,18 +102,18 @@ module VX_execute_unit (
assign VX_branch_rsp.branch_dest = $signed(VX_exec_unit_req.curr_PC) + ($signed(VX_exec_unit_req.itype_immed) << 1); // itype_immed = branch_offset
always @(*) begin
case(in_alu_op)
`CSR_ALU_RW: out_csr_result = in_csr_mask;
`CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask;
`CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask);
default: out_csr_result = 32'hdeadbeef;
endcase
// always @(*) begin
// case(in_alu_op)
// `CSR_ALU_RW: out_csr_result = in_csr_mask;
// `CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask;
// `CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask);
// default: out_csr_result = 32'hdeadbeef;
// endcase
end
// end
assign out_is_csr = VX_exec_unit_req.is_csr;
assign out_csr_address = VX_exec_unit_req.csr_address;
// assign out_is_csr = VX_exec_unit_req.is_csr;
// assign out_csr_address = VX_exec_unit_req.csr_address;
endmodule

View File

@@ -42,6 +42,12 @@ module VX_fetch (
.wstall (VX_wstall.wstall),
.wstall_warp_num(VX_wstall.warp_num),
// Split
.is_split (VX_warp_ctl.is_split),
.split_new_mask (VX_warp_ctl.split_new_mask),
.split_later_mask(VX_warp_ctl.split_later_mask),
.split_save_pc (VX_warp_ctl.split_save_pc),
// JAL
.jal (VX_jal_rsp.jal),
.jal_dest (VX_jal_rsp.jal_dest),

View File

@@ -16,8 +16,6 @@ module VX_front_end (
VX_frE_to_bckE_req_inter VX_bckE_req,
output wire[11:0] decode_csr_address,
output wire fetch_ebreak
);
@@ -81,10 +79,6 @@ VX_d_e_reg vx_d_e_reg(
.VX_bckE_req (VX_bckE_req)
);
assign decode_csr_address = VX_frE_to_bckE_req.csr_address;
endmodule

40
rtl/VX_generic_stack.v Normal file
View File

@@ -0,0 +1,40 @@
module VX_generic_stack
#(
parameter WIDTH = 40,
parameter DEPTH = 2
)
(
input wire clk,
input wire reset,
input wire push,
input wire pop,
input wire[WIDTH - 1:0] d,
output reg [WIDTH - 1:0] q,
);
reg [DEPTH - 1:0] ptr;
reg [WIDTH - 1:0] stack [0:(1 << DEPTH) - 1];
always @(posedge clk) begin
if (reset)
ptr <= 0;
else if (push)
ptr <= ptr + 1;
else if (pop)
ptr <= ptr - 1;
end
always @(posedge clk) begin
if (push) begin
if(push)
stack[ptr] <= q;
end
end
always @(*) begin
if (pop)
q <= stack[ptr - 1];
end
endmodule

View File

@@ -26,6 +26,32 @@ module VX_gpgpu_inst (
assign VX_warp_ctl.wspawn_pc = 0;
wire[`NT_M1:0] split_new_use_mask;
wire[`NT_M1:0] split_new_later_mask;
// VX_gpu_inst_req.pc
genvar curr_s_t;
for (curr_s_t = 0; curr_s_t < `NT; curr_s_t=curr_s_t+1) begin
wire curr_bool = (VX_gpu_inst_req.a_reg_data == 32'b1);
assign split_new_use_mask[curr_s_t] = VX_gpu_inst_req.valid[curr_s_t] & (curr_bool);
assign split_new_later_mask[curr_s_t] = VX_gpu_inst_req.valid[curr_s_t] & (!curr_bool);
end
reg[$clog2(`NT)-1:0] num_valids;
integer z;
always @(*) begin
num_valids = 0;
for (z = 0; z < `NT; z=z+1) begin
if (VX_gpu_inst_req.valid) num_valids = num_valids + 1
end
end
assign VX_warp_ctl.is_split = (VX_gpu_inst_req.is_split) && (num_valids > 1);
assign VX_warp_ctl.split_new_mask = split_new_use_mask;
assign VX_warp_ctl.split_later_mask = split_new_later_mask;
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;
// VX_gpu_inst_req.is_wspawn
// VX_gpu_inst_req.is_split
// VX_gpu_inst_req.is_barrier

View File

@@ -15,112 +15,112 @@ module VX_gpr (
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd != 5'h0));
// byte_enabled_simple_dual_port_ram first_ram(
// .we (write_enable),
// .clk (clk),
// .waddr (VX_writeback_inter.rd),
// .raddr1(VX_gpr_read.rs1),
// .raddr2(VX_gpr_read.rs2),
// .be (VX_writeback_inter.wb_valid),
// .wdata (VX_writeback_inter.write_data),
// .q1 (out_a_reg_data),
// .q2 (out_b_reg_data)
// );
byte_enabled_simple_dual_port_ram first_ram(
.we (write_enable),
.clk (clk),
.waddr (VX_writeback_inter.rd),
.raddr1(VX_gpr_read.rs1),
.raddr2(VX_gpr_read.rs2),
.be (VX_writeback_inter.wb_valid),
.wdata (VX_writeback_inter.write_data),
.q1 (out_a_reg_data),
.q2 (out_b_reg_data)
);
wire[`NT_M1:0][31:0] write_bit_mask;
// wire[`NT_M1:0][31:0] write_bit_mask;
genvar curr_t;
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin
wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
assign write_bit_mask[curr_t] = {32{~local_write}};
end
// genvar curr_t;
// for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin
// wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
// assign write_bit_mask[curr_t] = {32{~local_write}};
// end
wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
// wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
wire cenb = !going_to_write;
// wire cenb = !going_to_write;
wire cena_1 = (VX_gpr_read.rs1 == 0);
wire cena_2 = (VX_gpr_read.rs2 == 0);
// wire cena_1 = (VX_gpr_read.rs1 == 0);
// wire cena_2 = (VX_gpr_read.rs2 == 0);
// wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 first_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(out_a_reg_data),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_1),
.AA(VX_gpr_read.rs1),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask),
.AB(VX_writeback_inter.rd),
.DB(VX_writeback_inter.write_data),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
// // wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
// /* verilator lint_off PINCONNECTEMPTY */
// rf2_32x128_wm1 first_ram (
// .CENYA(),
// .AYA(),
// .CENYB(),
// .WENYB(),
// .AYB(),
// .QA(out_a_reg_data),
// .SOA(),
// .SOB(),
// .CLKA(clk),
// .CENA(cena_1),
// .AA(VX_gpr_read.rs1),
// .CLKB(clk),
// .CENB(cenb),
// .WENB(write_bit_mask),
// .AB(VX_writeback_inter.rd),
// .DB(VX_writeback_inter.write_data),
// .EMAA(3'b011),
// .EMASA(1'b0),
// .EMAB(3'b011),
// .TENA(1'b1),
// .TCENA(1'b0),
// .TAA(5'b0),
// .TENB(1'b1),
// .TCENB(1'b0),
// .TWENB(128'b0),
// .TAB(5'b0),
// .TDB(128'b0),
// .RET1N(1'b1),
// .SIA(2'b0),
// .SEA(1'b0),
// .DFTRAMBYP(1'b0),
// .SIB(2'b0),
// .SEB(1'b0),
// .COLLDISN(1'b1)
// );
// /* verilator lint_on PINCONNECTEMPTY */
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 second_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(out_b_reg_data),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_2),
.AA(VX_gpr_read.rs2),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask),
.AB(VX_writeback_inter.rd),
.DB(VX_writeback_inter.write_data),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
// /* verilator lint_off PINCONNECTEMPTY */
// rf2_32x128_wm1 second_ram (
// .CENYA(),
// .AYA(),
// .CENYB(),
// .WENYB(),
// .AYB(),
// .QA(out_b_reg_data),
// .SOA(),
// .SOB(),
// .CLKA(clk),
// .CENA(cena_2),
// .AA(VX_gpr_read.rs2),
// .CLKB(clk),
// .CENB(cenb),
// .WENB(write_bit_mask),
// .AB(VX_writeback_inter.rd),
// .DB(VX_writeback_inter.write_data),
// .EMAA(3'b011),
// .EMASA(1'b0),
// .EMAB(3'b011),
// .TENA(1'b1),
// .TCENA(1'b0),
// .TAA(5'b0),
// .TENB(1'b1),
// .TCENB(1'b0),
// .TWENB(128'b0),
// .TAB(5'b0),
// .TDB(128'b0),
// .RET1N(1'b1),
// .SIA(2'b0),
// .SEA(1'b0),
// .DFTRAMBYP(1'b0),
// .SIB(2'b0),
// .SEB(1'b0),
// .COLLDISN(1'b1)
// );
// /* verilator lint_on PINCONNECTEMPTY */
endmodule

View File

@@ -6,21 +6,24 @@ module VX_inst_multiplex (
// Outputs
VX_exec_unit_req_inter VX_exec_unit_req,
VX_lsu_req_inter VX_lsu_req,
VX_gpu_inst_req_inter VX_gpu_inst_req
VX_gpu_inst_req_inter VX_gpu_inst_req,
VX_csr_req_inter VX_csr_req
);
wire[`NT_M1:0] is_mem_mask;
wire[`NT_M1:0] is_gpu_mask;
wire[`NT_M1:0] is_csr_mask;
wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ);
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
wire is_csr = VX_bckE_req.is_csr;
// wire is_gpu = 0;
genvar currT;
for (currT = 0; currT < `NT; currT = currT + 1) begin
assign is_mem_mask[currT] = is_mem;
assign is_gpu_mask[currT] = is_gpu;
assign is_csr_mask[currT] = is_csr;
end
// LSU Unit
@@ -38,7 +41,7 @@ module VX_inst_multiplex (
// Execute Unit
assign VX_exec_unit_req.valid = VX_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask);
assign VX_exec_unit_req.valid = VX_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
assign VX_exec_unit_req.warp_num = VX_bckE_req.warp_num;
assign VX_exec_unit_req.curr_PC = VX_bckE_req.curr_PC;
assign VX_exec_unit_req.PC_next = VX_bckE_req.PC_next;
@@ -57,10 +60,6 @@ module VX_inst_multiplex (
assign VX_exec_unit_req.jal = VX_bckE_req.jal;
assign VX_exec_unit_req.jal_offset = VX_bckE_req.jal_offset;
assign VX_exec_unit_req.ebreak = VX_bckE_req.ebreak;
assign VX_exec_unit_req.is_csr = VX_bckE_req.is_csr;
assign VX_exec_unit_req.csr_address = VX_bckE_req.csr_address;
assign VX_exec_unit_req.csr_immed = VX_bckE_req.csr_immed;
assign VX_exec_unit_req.csr_mask = VX_bckE_req.csr_mask;
// GPR Req
@@ -72,6 +71,18 @@ module VX_inst_multiplex (
assign VX_gpu_inst_req.is_barrier = VX_bckE_req.is_barrier;
assign VX_gpu_inst_req.a_reg_data = VX_gpr_data.a_reg_data;
assign VX_gpu_inst_req.rd2 = VX_gpr_data.b_reg_data[0];
assign VX_gpu_inst_req.pc_next = VX_bckE_req.PC_next;
// CSR Req
assign VX_csr_req.valid = VX_bckE_req.valid & is_csr_mask;
assign VX_csr_req.warp_num = VX_bckE_req.warp_num;
assign VX_csr_req.rd = VX_bckE_req.rd;
assign VX_csr_req.wb = VX_bckE_req.wb;
assign VX_csr_req.is_csr = VX_bckE_req.is_csr;
assign VX_csr_req.csr_address = VX_bckE_req.csr_address;
assign VX_csr_req.csr_immed = VX_bckE_req.csr_immed;
assign VX_csr_req.csr_mask = VX_bckE_req.csr_mask;
endmodule

View File

@@ -20,6 +20,12 @@ module VX_warp_scheduler (
input wire wstall,
input wire[`NW_M1:0] wstall_warp_num,
// Split
input wire is_split,
input wire[`NT_M1:0] split_new_mask,
input wire[`NT_M1:0] split_later_mask,
input wire[31:0] split_save_pc,
// JAL
input wire jal,
input wire[31:0] jal_dest,

View File

@@ -7,6 +7,8 @@ module VX_writeback (
VX_inst_mem_wb_inter VX_mem_wb,
// EXEC Unit WB info
VX_inst_exec_wb_inter VX_inst_exec_wb,
// CSR Unit WB info
VX_csr_wb_inter VX_csr_wb,
// Actual WB to GPR
VX_wb_inter VX_writeback_inter
@@ -16,66 +18,34 @@ module VX_writeback (
wire exec_wb = (VX_inst_exec_wb.wb != 0) && (|VX_inst_exec_wb.wb_valid);
wire mem_wb = (VX_mem_wb.wb != 0) && (|VX_mem_wb.wb_valid);
wire csr_wb = (VX_csr_wb.wb != 0) && (|VX_csr_wb.valid);
assign VX_writeback_inter.write_data = exec_wb ? VX_inst_exec_wb.alu_result :
mem_wb ? VX_mem_wb.loaded_data :
csr_wb ? VX_csr_wb.csr_result :
0;
assign VX_writeback_inter.wb_valid = exec_wb ? VX_inst_exec_wb.wb_valid :
mem_wb ? VX_mem_wb.wb_valid :
csr_wb ? VX_csr_wb.valid :
0;
assign VX_writeback_inter.rd = exec_wb ? VX_inst_exec_wb.rd :
mem_wb ? VX_mem_wb.rd :
csr_wb ? VX_csr_wb.rd :
0;
assign VX_writeback_inter.wb = exec_wb ? VX_inst_exec_wb.wb :
mem_wb ? VX_mem_wb.wb :
csr_wb ? VX_csr_wb.wb :
0;
assign VX_writeback_inter.wb_warp_num = exec_wb ? VX_inst_exec_wb.wb_warp_num :
mem_wb ? VX_mem_wb.wb_warp_num :
csr_wb ? VX_csr_wb.warp_num :
0;
// wire[`NT_M1:0][31:0] in_alu_result = VX_mw_wb.alu_result;
// wire[`NT_M1:0][31:0] in_mem_result = VX_mw_wb.mem_result;
// wire[4:0] in_rd = VX_mw_wb.rd;
// wire[1:0] in_wb = VX_mw_wb.wb;
// wire[31:0] in_PC_next = VX_mw_wb.PC_next;
// wire[`NT_M1:0] in_valid = VX_mw_wb.valid;
// wire [`NW_M1:0] in_warp_num = VX_mw_wb.warp_num;
// wire is_jal;
// wire uses_alu;
// wire[`NT_M1:0][31:0] out_pc_data;
// genvar i;
// generate
// for (i = 0; i < `NT; i=i+1)
// begin
// assign out_pc_data[i] = in_PC_next;
// end
// endgenerate
// // assign out_pc_data[0] = in_PC_next;
// // assign out_pc_data[1] = in_PC_next;
// assign is_jal = in_wb == `WB_JAL;
// assign uses_alu = in_wb == `WB_ALU;
// assign VX_writeback_inter.write_data = is_jal ? out_pc_data :
// uses_alu ? in_alu_result :
// in_mem_result;
// assign VX_writeback_inter.wb_valid = in_valid;
// assign VX_writeback_inter.rd = in_rd;
// assign VX_writeback_inter.wb = in_wb;
// assign VX_writeback_inter.wb_warp_num = in_warp_num;
endmodule // VX_writeback

View File

@@ -51,9 +51,7 @@ VX_branch_response_inter VX_branch_rsp(); // Branch Resolution to Fetc
VX_jal_response_inter VX_jal_rsp(); // Jump resolution to Fetch
// CSR Buses
VX_csr_write_request_inter VX_csr_w_req();
wire[31:0] csr_decode_csr_data;
wire[11:0] decode_csr_address;
// VX_csr_write_request_inter VX_csr_w_req();
VX_warp_ctl_inter VX_warp_ctl();
@@ -68,7 +66,6 @@ VX_front_end vx_front_end(
.reset (reset),
.VX_warp_ctl (VX_warp_ctl),
.VX_bckE_req (VX_bckE_req),
.decode_csr_address (decode_csr_address),
.schedule_delay (schedule_delay),
.icache_response_fe (icache_response_fe),
.icache_request_fe (icache_request_fe),
@@ -91,24 +88,22 @@ VX_back_end vx_back_end(
.schedule_delay (schedule_delay),
.VX_warp_ctl (VX_warp_ctl),
.VX_bckE_req (VX_bckE_req),
.csr_decode_csr_data (csr_decode_csr_data),
.VX_jal_rsp (VX_jal_rsp),
.VX_branch_rsp (VX_branch_rsp),
.VX_dcache_rsp (VX_dcache_rsp),
.VX_dcache_req (VX_dcache_req),
.VX_csr_w_req (VX_csr_w_req),
.VX_writeback_inter (VX_writeback_inter),
.out_mem_delay (memory_delay)
);
VX_csr_handler vx_csr_handler(
.clk (clk),
.in_decode_csr_address(decode_csr_address),
.VX_csr_w_req (VX_csr_w_req),
.in_wb_valid (VX_writeback_inter.wb_valid[0]),
// VX_csr_handler vx_csr_handler(
// .clk (clk),
// .in_decode_csr_address(decode_csr_address),
// .VX_csr_w_req (VX_csr_w_req),
// .in_wb_valid (VX_writeback_inter.wb_valid[0]),
.out_decode_csr_data (csr_decode_csr_data)
);
// .out_decode_csr_data (csr_decode_csr_data)
// );

View File

@@ -0,0 +1,24 @@
`include "../VX_define.v"
`ifndef VX_CSR_REQ
`define VX_CSR_REQ
interface VX_csr_req_inter ();
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire[4:0] rd;
wire[1:0] wb;
wire is_csr;
wire[11:0] csr_address;
wire csr_immed;
wire[31:0] csr_mask;
endinterface
`endif

View File

@@ -0,0 +1,21 @@
`include "../VX_define.v"
`ifndef VX_CSR_WB_REQ
`define VX_CSR_WB_REQ
interface VX_csr_wb_inter ();
wire[`NT_M1:0] valid;
wire[`NW_M1:0] warp_num;
wire[4:0] rd;
wire[1:0] wb;
wire[`NT_M1:0][31:0] csr_result;
endinterface
`endif

View File

@@ -1,18 +0,0 @@
`include "../VX_define.v"
`ifndef VX_CSR_W_REQ
`define VX_CSR_W_REQ
interface VX_csr_write_request_inter ();
wire is_csr;
wire[11:0] csr_address;
wire[31:0] csr_result;
endinterface
`endif

View File

@@ -13,6 +13,8 @@ interface VX_gpu_inst_req_inter();
wire is_split;
wire is_barrier;
wire pc_next;
wire[`NT_M1:0][31:0] a_reg_data;
wire[31:0] rd2;

View File

@@ -16,6 +16,13 @@ interface VX_warp_ctl_inter ();
wire ebreak;
wire is_split;
wire[`NT_M1:0] split_new_mask;
wire[`NT_M1:0] split_later_mask;
wire[31:0] split_save_pc;
endinterface

View File

@@ -1,7 +1,7 @@
# Dynamic Instructions: 15
# of total cycles: 28
# Dynamic Instructions: 12
# of total cycles: 25
# of forwarding stalls: 0
# of branch stalls: 0
# CPI: 1.86667
# time to simulate: 6.95313e-310 milliseconds
# CPI: 2.08333
# time to simulate: 6.95312e-310 milliseconds
# GRADE: Failed on test: 4294967295