Added CSR TID/WID reads
This commit is contained in:
@@ -8,7 +8,6 @@ module VX_alu(
|
||||
input wire[31:0] in_itype_immed,
|
||||
input wire[19:0] in_upper_immed,
|
||||
input wire[4:0] in_alu_op,
|
||||
input wire[31:0] in_csr_data, // done
|
||||
input wire[31:0] in_curr_PC,
|
||||
output reg[31:0] out_alu_result
|
||||
);
|
||||
@@ -60,9 +59,6 @@ module VX_alu(
|
||||
`SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
|
||||
`LUI_ALU: out_alu_result = upper_immed;
|
||||
`AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed);
|
||||
`CSR_ALU_RW: out_alu_result = in_csr_data;
|
||||
`CSR_ALU_RS: out_alu_result = in_csr_data;
|
||||
`CSR_ALU_RC: out_alu_result = in_csr_data;
|
||||
`MUL: begin out_alu_result = mult_signed_result[31:0]; end
|
||||
`MULH: out_alu_result = mult_signed_result[63:32];
|
||||
`MULHSU: out_alu_result = mult_signed_un_result[63:32];
|
||||
|
||||
@@ -3,7 +3,6 @@ module VX_back_end (
|
||||
input wire reset,
|
||||
input wire schedule_delay,
|
||||
|
||||
input wire[31:0] csr_decode_csr_data,
|
||||
output wire out_mem_delay,
|
||||
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
@@ -16,20 +15,11 @@ module VX_back_end (
|
||||
VX_warp_ctl_inter VX_warp_ctl,
|
||||
|
||||
VX_dcache_response_inter VX_dcache_rsp,
|
||||
VX_dcache_request_inter VX_dcache_req,
|
||||
VX_dcache_request_inter VX_dcache_req
|
||||
|
||||
|
||||
VX_csr_write_request_inter VX_csr_w_req
|
||||
);
|
||||
|
||||
|
||||
wire[11:0] execute_csr_address;
|
||||
wire execute_is_csr;
|
||||
reg[31:0] execute_csr_result;
|
||||
wire execute_jal;
|
||||
wire[31:0] execute_jal_dest;
|
||||
|
||||
|
||||
VX_wb_inter VX_writeback_temp();
|
||||
assign VX_writeback_inter.wb = VX_writeback_temp.wb;
|
||||
assign VX_writeback_inter.rd = VX_writeback_temp.rd;
|
||||
@@ -63,6 +53,10 @@ VX_inst_exec_wb_inter VX_inst_exec_wb();
|
||||
// GPU unit input
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req();
|
||||
|
||||
// CSR unit inputs
|
||||
VX_csr_req_inter VX_csr_req();
|
||||
VX_csr_wb_inter VX_csr_wb();
|
||||
|
||||
VX_gpr_stage VX_gpr_stage(
|
||||
.clk (clk),
|
||||
.schedule_delay (schedule_delay),
|
||||
@@ -78,7 +72,8 @@ VX_inst_multiplex VX_inst_mult(
|
||||
.VX_gpr_data (VX_gpr_data),
|
||||
.VX_exec_unit_req(VX_exec_unit_req),
|
||||
.VX_lsu_req (VX_lsu_req),
|
||||
.VX_gpu_inst_req (VX_gpu_inst_req)
|
||||
.VX_gpu_inst_req (VX_gpu_inst_req),
|
||||
.VX_csr_req (VX_csr_req)
|
||||
);
|
||||
|
||||
|
||||
@@ -97,12 +92,7 @@ VX_execute_unit VX_execUnit(
|
||||
.VX_exec_unit_req(VX_exec_unit_req),
|
||||
.VX_inst_exec_wb (VX_inst_exec_wb),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
|
||||
.in_csr_data (csr_decode_csr_data),
|
||||
.out_csr_address (VX_csr_w_req.csr_address),
|
||||
.out_is_csr (VX_csr_w_req.is_csr),
|
||||
.out_csr_result (VX_csr_w_req.csr_result)
|
||||
.VX_branch_rsp (VX_branch_rsp)
|
||||
);
|
||||
|
||||
|
||||
@@ -111,9 +101,15 @@ VX_gpgpu_inst VX_gpgpu_inst(
|
||||
.VX_warp_ctl (VX_warp_ctl)
|
||||
);
|
||||
|
||||
VX_csr_wrapper VX_csr_wrapper(
|
||||
.VX_csr_req(VX_csr_req),
|
||||
.VX_csr_wb (VX_csr_wb)
|
||||
);
|
||||
|
||||
VX_writeback VX_wb(
|
||||
.VX_mem_wb (VX_mem_wb),
|
||||
.VX_inst_exec_wb (VX_inst_exec_wb),
|
||||
.VX_csr_wb (VX_csr_wb),
|
||||
|
||||
.VX_writeback_inter(VX_writeback_temp)
|
||||
);
|
||||
|
||||
35
rtl/VX_csr_wrapper.v
Normal file
35
rtl/VX_csr_wrapper.v
Normal file
@@ -0,0 +1,35 @@
|
||||
module VX_csr_wrapper (
|
||||
VX_csr_req_inter VX_csr_req,
|
||||
|
||||
VX_csr_wb_inter VX_csr_wb
|
||||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] thread_ids;
|
||||
wire[`NT_M1:0][31:0] warp_ids;
|
||||
|
||||
genvar cur_t;
|
||||
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
|
||||
assign thread_ids[cur_t] = cur_t;
|
||||
end
|
||||
|
||||
genvar cur_tw;
|
||||
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
|
||||
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, VX_csr_req.warp_num};
|
||||
end
|
||||
|
||||
|
||||
assign VX_csr_wb.valid = VX_csr_req.valid;
|
||||
assign VX_csr_wb.warp_num = VX_csr_req.warp_num;
|
||||
assign VX_csr_wb.rd = VX_csr_req.rd;
|
||||
assign VX_csr_wb.wb = VX_csr_req.wb;
|
||||
|
||||
|
||||
wire thread_select = VX_csr_req.csr_address == 12'h20;
|
||||
wire warp_select = VX_csr_req.csr_address == 12'h21;
|
||||
|
||||
assign VX_csr_wb.csr_result = thread_select ? thread_ids :
|
||||
warp_select ? warp_ids :
|
||||
0;
|
||||
|
||||
endmodule
|
||||
@@ -126,8 +126,7 @@ module VX_decode(
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.csr_immed = is_csr_immed;
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.is_csr = is_csr;
|
||||
|
||||
|
||||
assign VX_frE_to_bckE_req.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
|
||||
|
||||
@@ -10,12 +10,7 @@ module VX_execute_unit (
|
||||
// JAL Response
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
// Branch Response
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
|
||||
input wire[31:0] in_csr_data,
|
||||
output wire[11:0] out_csr_address,
|
||||
output wire out_is_csr,
|
||||
output reg[31:0] out_csr_result
|
||||
VX_branch_response_inter VX_branch_rsp
|
||||
);
|
||||
|
||||
|
||||
@@ -27,7 +22,6 @@ module VX_execute_unit (
|
||||
wire[31:0] in_itype_immed;
|
||||
wire[2:0] in_branch_type;
|
||||
wire[19:0] in_upper_immed;
|
||||
wire[31:0] in_csr_mask;
|
||||
wire in_jal;
|
||||
wire[31:0] in_jal_offset;
|
||||
wire[31:0] in_curr_PC;
|
||||
@@ -39,7 +33,6 @@ module VX_execute_unit (
|
||||
assign in_itype_immed = VX_exec_unit_req.itype_immed;
|
||||
assign in_branch_type = VX_exec_unit_req.branch_type;
|
||||
assign in_upper_immed = VX_exec_unit_req.upper_immed;
|
||||
assign in_csr_mask = VX_exec_unit_req.csr_mask;
|
||||
assign in_jal = VX_exec_unit_req.jal;
|
||||
assign in_jal_offset = VX_exec_unit_req.jal_offset;
|
||||
assign in_curr_PC = VX_exec_unit_req.curr_PC;
|
||||
@@ -58,7 +51,6 @@ module VX_execute_unit (
|
||||
.in_itype_immed(in_itype_immed),
|
||||
.in_upper_immed(in_upper_immed),
|
||||
.in_alu_op (in_alu_op),
|
||||
.in_csr_data (in_csr_data),
|
||||
.in_curr_PC (in_curr_PC),
|
||||
.out_alu_result(alu_result[index_out_reg])
|
||||
);
|
||||
@@ -110,18 +102,18 @@ module VX_execute_unit (
|
||||
assign VX_branch_rsp.branch_dest = $signed(VX_exec_unit_req.curr_PC) + ($signed(VX_exec_unit_req.itype_immed) << 1); // itype_immed = branch_offset
|
||||
|
||||
|
||||
always @(*) begin
|
||||
case(in_alu_op)
|
||||
`CSR_ALU_RW: out_csr_result = in_csr_mask;
|
||||
`CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask;
|
||||
`CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask);
|
||||
default: out_csr_result = 32'hdeadbeef;
|
||||
endcase
|
||||
// always @(*) begin
|
||||
// case(in_alu_op)
|
||||
// `CSR_ALU_RW: out_csr_result = in_csr_mask;
|
||||
// `CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask;
|
||||
// `CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask);
|
||||
// default: out_csr_result = 32'hdeadbeef;
|
||||
// endcase
|
||||
|
||||
end
|
||||
// end
|
||||
|
||||
|
||||
assign out_is_csr = VX_exec_unit_req.is_csr;
|
||||
assign out_csr_address = VX_exec_unit_req.csr_address;
|
||||
// assign out_is_csr = VX_exec_unit_req.is_csr;
|
||||
// assign out_csr_address = VX_exec_unit_req.csr_address;
|
||||
|
||||
endmodule
|
||||
@@ -42,6 +42,12 @@ module VX_fetch (
|
||||
.wstall (VX_wstall.wstall),
|
||||
.wstall_warp_num(VX_wstall.warp_num),
|
||||
|
||||
// Split
|
||||
.is_split (VX_warp_ctl.is_split),
|
||||
.split_new_mask (VX_warp_ctl.split_new_mask),
|
||||
.split_later_mask(VX_warp_ctl.split_later_mask),
|
||||
.split_save_pc (VX_warp_ctl.split_save_pc),
|
||||
|
||||
// JAL
|
||||
.jal (VX_jal_rsp.jal),
|
||||
.jal_dest (VX_jal_rsp.jal_dest),
|
||||
|
||||
@@ -16,8 +16,6 @@ module VX_front_end (
|
||||
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
|
||||
|
||||
output wire[11:0] decode_csr_address,
|
||||
output wire fetch_ebreak
|
||||
);
|
||||
|
||||
@@ -81,10 +79,6 @@ VX_d_e_reg vx_d_e_reg(
|
||||
.VX_bckE_req (VX_bckE_req)
|
||||
);
|
||||
|
||||
|
||||
assign decode_csr_address = VX_frE_to_bckE_req.csr_address;
|
||||
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
|
||||
40
rtl/VX_generic_stack.v
Normal file
40
rtl/VX_generic_stack.v
Normal file
@@ -0,0 +1,40 @@
|
||||
module VX_generic_stack
|
||||
#(
|
||||
parameter WIDTH = 40,
|
||||
parameter DEPTH = 2
|
||||
)
|
||||
(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire push,
|
||||
input wire pop,
|
||||
input wire[WIDTH - 1:0] d,
|
||||
output reg [WIDTH - 1:0] q,
|
||||
);
|
||||
|
||||
|
||||
reg [DEPTH - 1:0] ptr;
|
||||
reg [WIDTH - 1:0] stack [0:(1 << DEPTH) - 1];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset)
|
||||
ptr <= 0;
|
||||
else if (push)
|
||||
ptr <= ptr + 1;
|
||||
else if (pop)
|
||||
ptr <= ptr - 1;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
if(push)
|
||||
stack[ptr] <= q;
|
||||
end
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
if (pop)
|
||||
q <= stack[ptr - 1];
|
||||
end
|
||||
|
||||
endmodule
|
||||
@@ -26,6 +26,32 @@ module VX_gpgpu_inst (
|
||||
assign VX_warp_ctl.wspawn_pc = 0;
|
||||
|
||||
|
||||
wire[`NT_M1:0] split_new_use_mask;
|
||||
wire[`NT_M1:0] split_new_later_mask;
|
||||
|
||||
// VX_gpu_inst_req.pc
|
||||
genvar curr_s_t;
|
||||
for (curr_s_t = 0; curr_s_t < `NT; curr_s_t=curr_s_t+1) begin
|
||||
wire curr_bool = (VX_gpu_inst_req.a_reg_data == 32'b1);
|
||||
|
||||
assign split_new_use_mask[curr_s_t] = VX_gpu_inst_req.valid[curr_s_t] & (curr_bool);
|
||||
assign split_new_later_mask[curr_s_t] = VX_gpu_inst_req.valid[curr_s_t] & (!curr_bool);
|
||||
end
|
||||
|
||||
reg[$clog2(`NT)-1:0] num_valids;
|
||||
integer z;
|
||||
always @(*) begin
|
||||
num_valids = 0;
|
||||
for (z = 0; z < `NT; z=z+1) begin
|
||||
if (VX_gpu_inst_req.valid) num_valids = num_valids + 1
|
||||
end
|
||||
end
|
||||
|
||||
assign VX_warp_ctl.is_split = (VX_gpu_inst_req.is_split) && (num_valids > 1);
|
||||
assign VX_warp_ctl.split_new_mask = split_new_use_mask;
|
||||
assign VX_warp_ctl.split_later_mask = split_new_later_mask;
|
||||
assign VX_warp_ctl.split_save_pc = VX_gpu_inst_req.pc_next;
|
||||
|
||||
// VX_gpu_inst_req.is_wspawn
|
||||
// VX_gpu_inst_req.is_split
|
||||
// VX_gpu_inst_req.is_barrier
|
||||
|
||||
196
rtl/VX_gpr.v
196
rtl/VX_gpr.v
@@ -15,112 +15,112 @@ module VX_gpr (
|
||||
|
||||
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd != 5'h0));
|
||||
|
||||
// byte_enabled_simple_dual_port_ram first_ram(
|
||||
// .we (write_enable),
|
||||
// .clk (clk),
|
||||
// .waddr (VX_writeback_inter.rd),
|
||||
// .raddr1(VX_gpr_read.rs1),
|
||||
// .raddr2(VX_gpr_read.rs2),
|
||||
// .be (VX_writeback_inter.wb_valid),
|
||||
// .wdata (VX_writeback_inter.write_data),
|
||||
// .q1 (out_a_reg_data),
|
||||
// .q2 (out_b_reg_data)
|
||||
// );
|
||||
byte_enabled_simple_dual_port_ram first_ram(
|
||||
.we (write_enable),
|
||||
.clk (clk),
|
||||
.waddr (VX_writeback_inter.rd),
|
||||
.raddr1(VX_gpr_read.rs1),
|
||||
.raddr2(VX_gpr_read.rs2),
|
||||
.be (VX_writeback_inter.wb_valid),
|
||||
.wdata (VX_writeback_inter.write_data),
|
||||
.q1 (out_a_reg_data),
|
||||
.q2 (out_b_reg_data)
|
||||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] write_bit_mask;
|
||||
// wire[`NT_M1:0][31:0] write_bit_mask;
|
||||
|
||||
genvar curr_t;
|
||||
for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin
|
||||
wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
|
||||
assign write_bit_mask[curr_t] = {32{~local_write}};
|
||||
end
|
||||
// genvar curr_t;
|
||||
// for (curr_t = 0; curr_t < `NT; curr_t=curr_t+1) begin
|
||||
// wire local_write = write_enable & VX_writeback_inter.wb_valid[curr_t];
|
||||
// assign write_bit_mask[curr_t] = {32{~local_write}};
|
||||
// end
|
||||
|
||||
wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
|
||||
// wire going_to_write = write_enable & (|VX_writeback_inter.wb_valid);
|
||||
|
||||
|
||||
wire cenb = !going_to_write;
|
||||
// wire cenb = !going_to_write;
|
||||
|
||||
wire cena_1 = (VX_gpr_read.rs1 == 0);
|
||||
wire cena_2 = (VX_gpr_read.rs2 == 0);
|
||||
// wire cena_1 = (VX_gpr_read.rs1 == 0);
|
||||
// wire cena_2 = (VX_gpr_read.rs2 == 0);
|
||||
|
||||
// wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 first_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(out_a_reg_data),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_1),
|
||||
.AA(VX_gpr_read.rs1),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask),
|
||||
.AB(VX_writeback_inter.rd),
|
||||
.DB(VX_writeback_inter.write_data),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
// // wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
|
||||
// /* verilator lint_off PINCONNECTEMPTY */
|
||||
// rf2_32x128_wm1 first_ram (
|
||||
// .CENYA(),
|
||||
// .AYA(),
|
||||
// .CENYB(),
|
||||
// .WENYB(),
|
||||
// .AYB(),
|
||||
// .QA(out_a_reg_data),
|
||||
// .SOA(),
|
||||
// .SOB(),
|
||||
// .CLKA(clk),
|
||||
// .CENA(cena_1),
|
||||
// .AA(VX_gpr_read.rs1),
|
||||
// .CLKB(clk),
|
||||
// .CENB(cenb),
|
||||
// .WENB(write_bit_mask),
|
||||
// .AB(VX_writeback_inter.rd),
|
||||
// .DB(VX_writeback_inter.write_data),
|
||||
// .EMAA(3'b011),
|
||||
// .EMASA(1'b0),
|
||||
// .EMAB(3'b011),
|
||||
// .TENA(1'b1),
|
||||
// .TCENA(1'b0),
|
||||
// .TAA(5'b0),
|
||||
// .TENB(1'b1),
|
||||
// .TCENB(1'b0),
|
||||
// .TWENB(128'b0),
|
||||
// .TAB(5'b0),
|
||||
// .TDB(128'b0),
|
||||
// .RET1N(1'b1),
|
||||
// .SIA(2'b0),
|
||||
// .SEA(1'b0),
|
||||
// .DFTRAMBYP(1'b0),
|
||||
// .SIB(2'b0),
|
||||
// .SEB(1'b0),
|
||||
// .COLLDISN(1'b1)
|
||||
// );
|
||||
// /* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
/* verilator lint_off PINCONNECTEMPTY */
|
||||
rf2_32x128_wm1 second_ram (
|
||||
.CENYA(),
|
||||
.AYA(),
|
||||
.CENYB(),
|
||||
.WENYB(),
|
||||
.AYB(),
|
||||
.QA(out_b_reg_data),
|
||||
.SOA(),
|
||||
.SOB(),
|
||||
.CLKA(clk),
|
||||
.CENA(cena_2),
|
||||
.AA(VX_gpr_read.rs2),
|
||||
.CLKB(clk),
|
||||
.CENB(cenb),
|
||||
.WENB(write_bit_mask),
|
||||
.AB(VX_writeback_inter.rd),
|
||||
.DB(VX_writeback_inter.write_data),
|
||||
.EMAA(3'b011),
|
||||
.EMASA(1'b0),
|
||||
.EMAB(3'b011),
|
||||
.TENA(1'b1),
|
||||
.TCENA(1'b0),
|
||||
.TAA(5'b0),
|
||||
.TENB(1'b1),
|
||||
.TCENB(1'b0),
|
||||
.TWENB(128'b0),
|
||||
.TAB(5'b0),
|
||||
.TDB(128'b0),
|
||||
.RET1N(1'b1),
|
||||
.SIA(2'b0),
|
||||
.SEA(1'b0),
|
||||
.DFTRAMBYP(1'b0),
|
||||
.SIB(2'b0),
|
||||
.SEB(1'b0),
|
||||
.COLLDISN(1'b1)
|
||||
);
|
||||
/* verilator lint_on PINCONNECTEMPTY */
|
||||
// /* verilator lint_off PINCONNECTEMPTY */
|
||||
// rf2_32x128_wm1 second_ram (
|
||||
// .CENYA(),
|
||||
// .AYA(),
|
||||
// .CENYB(),
|
||||
// .WENYB(),
|
||||
// .AYB(),
|
||||
// .QA(out_b_reg_data),
|
||||
// .SOA(),
|
||||
// .SOB(),
|
||||
// .CLKA(clk),
|
||||
// .CENA(cena_2),
|
||||
// .AA(VX_gpr_read.rs2),
|
||||
// .CLKB(clk),
|
||||
// .CENB(cenb),
|
||||
// .WENB(write_bit_mask),
|
||||
// .AB(VX_writeback_inter.rd),
|
||||
// .DB(VX_writeback_inter.write_data),
|
||||
// .EMAA(3'b011),
|
||||
// .EMASA(1'b0),
|
||||
// .EMAB(3'b011),
|
||||
// .TENA(1'b1),
|
||||
// .TCENA(1'b0),
|
||||
// .TAA(5'b0),
|
||||
// .TENB(1'b1),
|
||||
// .TCENB(1'b0),
|
||||
// .TWENB(128'b0),
|
||||
// .TAB(5'b0),
|
||||
// .TDB(128'b0),
|
||||
// .RET1N(1'b1),
|
||||
// .SIA(2'b0),
|
||||
// .SEA(1'b0),
|
||||
// .DFTRAMBYP(1'b0),
|
||||
// .SIB(2'b0),
|
||||
// .SEB(1'b0),
|
||||
// .COLLDISN(1'b1)
|
||||
// );
|
||||
// /* verilator lint_on PINCONNECTEMPTY */
|
||||
|
||||
endmodule
|
||||
|
||||
@@ -6,21 +6,24 @@ module VX_inst_multiplex (
|
||||
// Outputs
|
||||
VX_exec_unit_req_inter VX_exec_unit_req,
|
||||
VX_lsu_req_inter VX_lsu_req,
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req
|
||||
|
||||
VX_gpu_inst_req_inter VX_gpu_inst_req,
|
||||
VX_csr_req_inter VX_csr_req
|
||||
);
|
||||
|
||||
wire[`NT_M1:0] is_mem_mask;
|
||||
wire[`NT_M1:0] is_gpu_mask;
|
||||
wire[`NT_M1:0] is_csr_mask;
|
||||
|
||||
wire is_mem = (VX_bckE_req.mem_write != `NO_MEM_WRITE) || (VX_bckE_req.mem_read != `NO_MEM_READ);
|
||||
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
|
||||
wire is_csr = VX_bckE_req.is_csr;
|
||||
// wire is_gpu = 0;
|
||||
|
||||
genvar currT;
|
||||
for (currT = 0; currT < `NT; currT = currT + 1) begin
|
||||
assign is_mem_mask[currT] = is_mem;
|
||||
assign is_gpu_mask[currT] = is_gpu;
|
||||
assign is_csr_mask[currT] = is_csr;
|
||||
end
|
||||
|
||||
// LSU Unit
|
||||
@@ -38,7 +41,7 @@ module VX_inst_multiplex (
|
||||
|
||||
|
||||
// Execute Unit
|
||||
assign VX_exec_unit_req.valid = VX_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask);
|
||||
assign VX_exec_unit_req.valid = VX_bckE_req.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
|
||||
assign VX_exec_unit_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_exec_unit_req.curr_PC = VX_bckE_req.curr_PC;
|
||||
assign VX_exec_unit_req.PC_next = VX_bckE_req.PC_next;
|
||||
@@ -57,10 +60,6 @@ module VX_inst_multiplex (
|
||||
assign VX_exec_unit_req.jal = VX_bckE_req.jal;
|
||||
assign VX_exec_unit_req.jal_offset = VX_bckE_req.jal_offset;
|
||||
assign VX_exec_unit_req.ebreak = VX_bckE_req.ebreak;
|
||||
assign VX_exec_unit_req.is_csr = VX_bckE_req.is_csr;
|
||||
assign VX_exec_unit_req.csr_address = VX_bckE_req.csr_address;
|
||||
assign VX_exec_unit_req.csr_immed = VX_bckE_req.csr_immed;
|
||||
assign VX_exec_unit_req.csr_mask = VX_bckE_req.csr_mask;
|
||||
|
||||
|
||||
// GPR Req
|
||||
@@ -72,6 +71,18 @@ module VX_inst_multiplex (
|
||||
assign VX_gpu_inst_req.is_barrier = VX_bckE_req.is_barrier;
|
||||
assign VX_gpu_inst_req.a_reg_data = VX_gpr_data.a_reg_data;
|
||||
assign VX_gpu_inst_req.rd2 = VX_gpr_data.b_reg_data[0];
|
||||
assign VX_gpu_inst_req.pc_next = VX_bckE_req.PC_next;
|
||||
|
||||
|
||||
// CSR Req
|
||||
assign VX_csr_req.valid = VX_bckE_req.valid & is_csr_mask;
|
||||
assign VX_csr_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_csr_req.rd = VX_bckE_req.rd;
|
||||
assign VX_csr_req.wb = VX_bckE_req.wb;
|
||||
assign VX_csr_req.is_csr = VX_bckE_req.is_csr;
|
||||
assign VX_csr_req.csr_address = VX_bckE_req.csr_address;
|
||||
assign VX_csr_req.csr_immed = VX_bckE_req.csr_immed;
|
||||
assign VX_csr_req.csr_mask = VX_bckE_req.csr_mask;
|
||||
|
||||
endmodule
|
||||
|
||||
|
||||
@@ -20,6 +20,12 @@ module VX_warp_scheduler (
|
||||
input wire wstall,
|
||||
input wire[`NW_M1:0] wstall_warp_num,
|
||||
|
||||
// Split
|
||||
input wire is_split,
|
||||
input wire[`NT_M1:0] split_new_mask,
|
||||
input wire[`NT_M1:0] split_later_mask,
|
||||
input wire[31:0] split_save_pc,
|
||||
|
||||
// JAL
|
||||
input wire jal,
|
||||
input wire[31:0] jal_dest,
|
||||
|
||||
@@ -7,6 +7,8 @@ module VX_writeback (
|
||||
VX_inst_mem_wb_inter VX_mem_wb,
|
||||
// EXEC Unit WB info
|
||||
VX_inst_exec_wb_inter VX_inst_exec_wb,
|
||||
// CSR Unit WB info
|
||||
VX_csr_wb_inter VX_csr_wb,
|
||||
|
||||
// Actual WB to GPR
|
||||
VX_wb_inter VX_writeback_inter
|
||||
@@ -16,66 +18,34 @@ module VX_writeback (
|
||||
|
||||
wire exec_wb = (VX_inst_exec_wb.wb != 0) && (|VX_inst_exec_wb.wb_valid);
|
||||
wire mem_wb = (VX_mem_wb.wb != 0) && (|VX_mem_wb.wb_valid);
|
||||
wire csr_wb = (VX_csr_wb.wb != 0) && (|VX_csr_wb.valid);
|
||||
|
||||
|
||||
assign VX_writeback_inter.write_data = exec_wb ? VX_inst_exec_wb.alu_result :
|
||||
mem_wb ? VX_mem_wb.loaded_data :
|
||||
csr_wb ? VX_csr_wb.csr_result :
|
||||
0;
|
||||
|
||||
|
||||
assign VX_writeback_inter.wb_valid = exec_wb ? VX_inst_exec_wb.wb_valid :
|
||||
mem_wb ? VX_mem_wb.wb_valid :
|
||||
csr_wb ? VX_csr_wb.valid :
|
||||
0;
|
||||
|
||||
assign VX_writeback_inter.rd = exec_wb ? VX_inst_exec_wb.rd :
|
||||
mem_wb ? VX_mem_wb.rd :
|
||||
csr_wb ? VX_csr_wb.rd :
|
||||
0;
|
||||
|
||||
assign VX_writeback_inter.wb = exec_wb ? VX_inst_exec_wb.wb :
|
||||
mem_wb ? VX_mem_wb.wb :
|
||||
csr_wb ? VX_csr_wb.wb :
|
||||
0;
|
||||
|
||||
assign VX_writeback_inter.wb_warp_num = exec_wb ? VX_inst_exec_wb.wb_warp_num :
|
||||
mem_wb ? VX_mem_wb.wb_warp_num :
|
||||
csr_wb ? VX_csr_wb.warp_num :
|
||||
0;
|
||||
|
||||
// wire[`NT_M1:0][31:0] in_alu_result = VX_mw_wb.alu_result;
|
||||
// wire[`NT_M1:0][31:0] in_mem_result = VX_mw_wb.mem_result;
|
||||
// wire[4:0] in_rd = VX_mw_wb.rd;
|
||||
// wire[1:0] in_wb = VX_mw_wb.wb;
|
||||
// wire[31:0] in_PC_next = VX_mw_wb.PC_next;
|
||||
// wire[`NT_M1:0] in_valid = VX_mw_wb.valid;
|
||||
// wire [`NW_M1:0] in_warp_num = VX_mw_wb.warp_num;
|
||||
|
||||
// wire is_jal;
|
||||
// wire uses_alu;
|
||||
|
||||
// wire[`NT_M1:0][31:0] out_pc_data;
|
||||
|
||||
|
||||
// genvar i;
|
||||
// generate
|
||||
// for (i = 0; i < `NT; i=i+1)
|
||||
// begin
|
||||
// assign out_pc_data[i] = in_PC_next;
|
||||
// end
|
||||
// endgenerate
|
||||
|
||||
// // assign out_pc_data[0] = in_PC_next;
|
||||
|
||||
// // assign out_pc_data[1] = in_PC_next;
|
||||
|
||||
// assign is_jal = in_wb == `WB_JAL;
|
||||
// assign uses_alu = in_wb == `WB_ALU;
|
||||
|
||||
// assign VX_writeback_inter.write_data = is_jal ? out_pc_data :
|
||||
// uses_alu ? in_alu_result :
|
||||
// in_mem_result;
|
||||
|
||||
// assign VX_writeback_inter.wb_valid = in_valid;
|
||||
// assign VX_writeback_inter.rd = in_rd;
|
||||
// assign VX_writeback_inter.wb = in_wb;
|
||||
// assign VX_writeback_inter.wb_warp_num = in_warp_num;
|
||||
|
||||
|
||||
endmodule // VX_writeback
|
||||
21
rtl/Vortex.v
21
rtl/Vortex.v
@@ -51,9 +51,7 @@ VX_branch_response_inter VX_branch_rsp(); // Branch Resolution to Fetc
|
||||
VX_jal_response_inter VX_jal_rsp(); // Jump resolution to Fetch
|
||||
|
||||
// CSR Buses
|
||||
VX_csr_write_request_inter VX_csr_w_req();
|
||||
wire[31:0] csr_decode_csr_data;
|
||||
wire[11:0] decode_csr_address;
|
||||
// VX_csr_write_request_inter VX_csr_w_req();
|
||||
|
||||
|
||||
VX_warp_ctl_inter VX_warp_ctl();
|
||||
@@ -68,7 +66,6 @@ VX_front_end vx_front_end(
|
||||
.reset (reset),
|
||||
.VX_warp_ctl (VX_warp_ctl),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.decode_csr_address (decode_csr_address),
|
||||
.schedule_delay (schedule_delay),
|
||||
.icache_response_fe (icache_response_fe),
|
||||
.icache_request_fe (icache_request_fe),
|
||||
@@ -91,24 +88,22 @@ VX_back_end vx_back_end(
|
||||
.schedule_delay (schedule_delay),
|
||||
.VX_warp_ctl (VX_warp_ctl),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.csr_decode_csr_data (csr_decode_csr_data),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.VX_dcache_rsp (VX_dcache_rsp),
|
||||
.VX_dcache_req (VX_dcache_req),
|
||||
.VX_csr_w_req (VX_csr_w_req),
|
||||
.VX_writeback_inter (VX_writeback_inter),
|
||||
.out_mem_delay (memory_delay)
|
||||
);
|
||||
|
||||
VX_csr_handler vx_csr_handler(
|
||||
.clk (clk),
|
||||
.in_decode_csr_address(decode_csr_address),
|
||||
.VX_csr_w_req (VX_csr_w_req),
|
||||
.in_wb_valid (VX_writeback_inter.wb_valid[0]),
|
||||
// VX_csr_handler vx_csr_handler(
|
||||
// .clk (clk),
|
||||
// .in_decode_csr_address(decode_csr_address),
|
||||
// .VX_csr_w_req (VX_csr_w_req),
|
||||
// .in_wb_valid (VX_writeback_inter.wb_valid[0]),
|
||||
|
||||
.out_decode_csr_data (csr_decode_csr_data)
|
||||
);
|
||||
// .out_decode_csr_data (csr_decode_csr_data)
|
||||
// );
|
||||
|
||||
|
||||
|
||||
|
||||
24
rtl/interfaces/VX_csr_req_inter.v
Normal file
24
rtl/interfaces/VX_csr_req_inter.v
Normal file
@@ -0,0 +1,24 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_CSR_REQ
|
||||
|
||||
`define VX_CSR_REQ
|
||||
|
||||
interface VX_csr_req_inter ();
|
||||
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
|
||||
wire is_csr;
|
||||
wire[11:0] csr_address;
|
||||
wire csr_immed;
|
||||
wire[31:0] csr_mask;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
21
rtl/interfaces/VX_csr_wb_inter.v
Normal file
21
rtl/interfaces/VX_csr_wb_inter.v
Normal file
@@ -0,0 +1,21 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_CSR_WB_REQ
|
||||
|
||||
`define VX_CSR_WB_REQ
|
||||
|
||||
interface VX_csr_wb_inter ();
|
||||
|
||||
wire[`NT_M1:0] valid;
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
|
||||
wire[`NT_M1:0][31:0] csr_result;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -1,18 +0,0 @@
|
||||
|
||||
`include "../VX_define.v"
|
||||
|
||||
`ifndef VX_CSR_W_REQ
|
||||
|
||||
`define VX_CSR_W_REQ
|
||||
|
||||
interface VX_csr_write_request_inter ();
|
||||
|
||||
wire is_csr;
|
||||
wire[11:0] csr_address;
|
||||
wire[31:0] csr_result;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
`endif
|
||||
@@ -13,6 +13,8 @@ interface VX_gpu_inst_req_inter();
|
||||
wire is_split;
|
||||
wire is_barrier;
|
||||
|
||||
wire pc_next;
|
||||
|
||||
wire[`NT_M1:0][31:0] a_reg_data;
|
||||
wire[31:0] rd2;
|
||||
|
||||
|
||||
@@ -16,6 +16,13 @@ interface VX_warp_ctl_inter ();
|
||||
|
||||
wire ebreak;
|
||||
|
||||
|
||||
wire is_split;
|
||||
wire[`NT_M1:0] split_new_mask;
|
||||
wire[`NT_M1:0] split_later_mask;
|
||||
wire[31:0] split_save_pc;
|
||||
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
# Dynamic Instructions: 15
|
||||
# of total cycles: 28
|
||||
# Dynamic Instructions: 12
|
||||
# of total cycles: 25
|
||||
# of forwarding stalls: 0
|
||||
# of branch stalls: 0
|
||||
# CPI: 1.86667
|
||||
# time to simulate: 6.95313e-310 milliseconds
|
||||
# CPI: 2.08333
|
||||
# time to simulate: 6.95312e-310 milliseconds
|
||||
# GRADE: Failed on test: 4294967295
|
||||
|
||||
Reference in New Issue
Block a user