Support exec multi-cycle for div/mul

This commit is contained in:
wgulian3
2020-02-13 13:17:46 -05:00
parent c1bd731d7f
commit 8318aff69f
8 changed files with 100 additions and 58 deletions

View File

@@ -3,7 +3,7 @@ all: RUNFILE
# /rf2_256x128_wm1/ # /rf2_256x128_wm1/
BaseMEM=../models/memory/cln28hpm BaseMEM=../models/memory/cln28hpm
INCLUDE=-I. -Ishared_memory -Icache -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm0/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Isimulate INCLUDE=-I. -Ishared_memory -Icache -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm0/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Icompat/ -Isimulate
FILE=Vortex.v FILE=Vortex.v

View File

@@ -1,6 +1,8 @@
`include "VX_define.v" `include "VX_define.v"
module VX_alu( module VX_alu(
input wire clk,
input wire reset,
input wire[31:0] in_1, input wire[31:0] in_1,
input wire[31:0] in_2, input wire[31:0] in_2,
input wire in_rs2_src, input wire in_rs2_src,
@@ -8,9 +10,11 @@ module VX_alu(
input wire[19:0] in_upper_immed, input wire[19:0] in_upper_immed,
input wire[4:0] in_alu_op, input wire[4:0] in_alu_op,
input wire[31:0] in_curr_PC, input wire[31:0] in_curr_PC,
output reg[31:0] out_alu_result output reg[31:0] out_alu_result,
output reg out_alu_stall
); );
localparam div_pipeline_len = 3;
`ifdef SYN_FUNC `ifdef SYN_FUNC
wire which_in2; wire which_in2;
@@ -25,23 +29,25 @@ module VX_alu(
wire[31:0] signed_div_result; wire[31:0] signed_div_result;
wire[31:0] signed_rem_result; wire[31:0] signed_rem_result;
reg [15:0] inst_delay;
reg [15:0] inst_delay_count;
assign out_alu_stall = inst_delay != 0 || inst_delay_count != 0;
assign which_in2 = in_rs2_src == `RS2_IMMED; assign which_in2 = in_rs2_src == `RS2_IMMED;
assign ALU_in1 = in_1; assign ALU_in1 = in_1;
assign ALU_in2 = which_in2 ? in_itype_immed : in_2; assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
assign upper_immed = {in_upper_immed, {12{1'b0}}}; assign upper_immed = {in_upper_immed, {12{1'b0}}};
VX_divide #( VX_divide #(
.WIDTHN(32), .WIDTHN(32),
.WIDTHD(32), .WIDTHD(32),
.SPEED("HIGHEST"), .SPEED("HIGHEST"),
.PIPELINE(0) .PIPELINE(div_pipeline_len)
) unsigned_div ( ) unsigned_div (
.clk(0), .clock(clk),
.aclr(0), .aclr(0),
.clken(1), // TODO this could be disabled on inactive instructions .clken(1), // TODO this could be disabled on inactive instructions
.numer(ALU_in1), .numer(ALU_in1),
@@ -56,9 +62,9 @@ module VX_alu(
.NREP("SIGNED"), .NREP("SIGNED"),
.DREP("SIGNED"), .DREP("SIGNED"),
.SPEED("HIGHEST"), .SPEED("HIGHEST"),
.PIPELINE(0) .PIPELINE(div_pipeline_len)
) signed_div ( ) signed_div (
.clk(0), .clock(clk),
.aclr(0), .aclr(0),
.clken(1), // TODO this could be disabled on inactive instructions .clken(1), // TODO this could be disabled on inactive instructions
.numer(ALU_in1), .numer(ALU_in1),
@@ -101,6 +107,7 @@ module VX_alu(
`MULH: out_alu_result = mult_result[63:32]; `MULH: out_alu_result = mult_result[63:32];
`MULHSU: out_alu_result = mult_result[63:32]; `MULHSU: out_alu_result = mult_result[63:32];
`MULHU: out_alu_result = mult_result[63:32]; `MULHU: out_alu_result = mult_result[63:32];
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result; `DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result; `DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result; `REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
@@ -109,6 +116,25 @@ module VX_alu(
endcase // in_alu_op endcase // in_alu_op
end end
always @(*) begin
case(in_alu_op)
`DIV,
`DIVU,
`REM,
`REMU: inst_delay = div_pipeline_len;
default: inst_delay = 0;
endcase // in_alu_op
end
always @(posedge clk or posedge reset) begin
if (reset)
inst_delay_count <= 0;
else if (inst_delay_count > 0)
inst_delay_count <= inst_delay_count - 1;
else if (inst_delay != 0)
inst_delay_count <= inst_delay - 1;
end
`else `else
wire which_in2; wire which_in2;
@@ -169,4 +195,4 @@ module VX_alu(
end end
`endif `endif
endmodule endmodule : VX_alu

View File

@@ -6,6 +6,7 @@ module VX_back_end (
input wire schedule_delay, input wire schedule_delay,
output wire out_mem_delay, output wire out_mem_delay,
output wire out_exec_delay,
output wire gpr_stage_delay, output wire gpr_stage_delay,
VX_jal_response_inter VX_jal_rsp, VX_jal_response_inter VX_jal_rsp,
VX_branch_response_inter VX_branch_rsp, VX_branch_response_inter VX_branch_rsp,
@@ -32,7 +33,7 @@ assign VX_writeback_inter.wb_warp_num = VX_writeback_temp.wb_warp_num;
VX_mw_wb_inter VX_mw_wb(); VX_mw_wb_inter VX_mw_wb();
wire no_slot_mem; wire no_slot_mem, no_slot_exec;
VX_mem_req_inter VX_exe_mem_req(); VX_mem_req_inter VX_exe_mem_req();
@@ -69,6 +70,7 @@ VX_gpr_stage VX_gpr_stage(
.VX_csr_req (VX_csr_req), .VX_csr_req (VX_csr_req),
// End new // End new
.memory_delay (out_mem_delay), .memory_delay (out_mem_delay),
.exec_delay (out_exec_delay),
.gpr_stage_delay (gpr_stage_delay) .gpr_stage_delay (gpr_stage_delay)
); );
@@ -91,7 +93,9 @@ VX_execute_unit VX_execUnit(
.VX_exec_unit_req(VX_exec_unit_req), .VX_exec_unit_req(VX_exec_unit_req),
.VX_inst_exec_wb (VX_inst_exec_wb), .VX_inst_exec_wb (VX_inst_exec_wb),
.VX_jal_rsp (VX_jal_rsp), .VX_jal_rsp (VX_jal_rsp),
.VX_branch_rsp (VX_branch_rsp) .VX_branch_rsp (VX_branch_rsp),
.out_delay (out_exec_delay),
.no_slot_exec (no_slot_exec)
); );
@@ -113,7 +117,8 @@ VX_writeback VX_wb(
.VX_csr_wb (VX_csr_wb), .VX_csr_wb (VX_csr_wb),
.VX_writeback_inter(VX_writeback_temp), .VX_writeback_inter(VX_writeback_temp),
.no_slot_mem (no_slot_mem) .no_slot_mem (no_slot_mem),
.no_slot_exec (no_slot_exec)
); );
endmodule endmodule

View File

@@ -12,7 +12,10 @@ module VX_execute_unit (
// JAL Response // JAL Response
VX_jal_response_inter VX_jal_rsp, VX_jal_response_inter VX_jal_rsp,
// Branch Response // Branch Response
VX_branch_response_inter VX_branch_rsp VX_branch_response_inter VX_branch_rsp,
input wire no_slot_exec,
output wire out_delay
); );
@@ -41,10 +44,13 @@ module VX_execute_unit (
wire[`NT_M1:0][31:0] alu_result; wire[`NT_M1:0][31:0] alu_result;
wire[`NT_M1:0] alu_stall;
genvar index_out_reg; genvar index_out_reg;
generate generate
for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1) begin : alu_defs for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1) begin : alu_defs
VX_alu vx_alu( VX_alu vx_alu(
.clk(clk),
.reset(reset),
// .in_reg_data (in_reg_data[1:0]), // .in_reg_data (in_reg_data[1:0]),
.in_1 (in_a_reg_data[index_out_reg]), .in_1 (in_a_reg_data[index_out_reg]),
.in_2 (in_b_reg_data[index_out_reg]), .in_2 (in_b_reg_data[index_out_reg]),
@@ -53,11 +59,17 @@ module VX_execute_unit (
.in_upper_immed(in_upper_immed), .in_upper_immed(in_upper_immed),
.in_alu_op (in_alu_op), .in_alu_op (in_alu_op),
.in_curr_PC (in_curr_PC), .in_curr_PC (in_curr_PC),
.out_alu_result(alu_result[index_out_reg]) .out_alu_result(alu_result[index_out_reg]),
.out_alu_stall(alu_stall[index_out_reg])
); );
end end
endgenerate endgenerate
wire internal_stall;
assign internal_stall = |alu_stall;
assign out_delay = no_slot_exec || internal_stall;
wire [$clog2(`NT)-1:0] jal_branch_use_index; wire [$clog2(`NT)-1:0] jal_branch_use_index;
wire jal_branch_found_valid; wire jal_branch_found_valid;
@@ -103,7 +115,7 @@ module VX_execute_unit (
// Actual Writeback // Actual Writeback
assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd; assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd;
assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb; assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb;
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid; assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid && !internal_stall;
assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num; assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num;
assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result; assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result;
@@ -163,4 +175,4 @@ module VX_execute_unit (
// assign out_is_csr = VX_exec_unit_req.is_csr; // assign out_is_csr = VX_exec_unit_req.is_csr;
// assign out_csr_address = VX_exec_unit_req.csr_address; // assign out_csr_address = VX_exec_unit_req.csr_address;
endmodule endmodule : VX_execute_unit

View File

@@ -7,6 +7,7 @@ module VX_gpr_stage (
input wire schedule_delay, input wire schedule_delay,
input wire memory_delay, input wire memory_delay,
input wire exec_delay,
output wire gpr_stage_delay, output wire gpr_stage_delay,
// inputs // inputs
@@ -93,7 +94,10 @@ module VX_gpr_stage (
wire stall_lsu = memory_delay; wire stall_lsu = memory_delay;
wire flush_lsu = schedule_delay && !stall_lsu; wire flush_lsu = schedule_delay && !stall_lsu;
assign gpr_stage_delay = stall_lsu; wire stall_exec = exec_delay;
wire flush_exec = schedule_delay && !stall_exec;
assign gpr_stage_delay = stall_lsu || stall_exec;
`ifdef ASIC `ifdef ASIC
wire delayed_lsu_last_cycle; wire delayed_lsu_last_cycle;
@@ -145,8 +149,8 @@ module VX_gpr_stage (
VX_generic_register #(.N(224 + `NW_M1 + 1 + (`NT))) exec_unit_reg( VX_generic_register #(.N(224 + `NW_M1 + 1 + (`NT))) exec_unit_reg(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_rest), .stall(stall_exec),
.flush(flush_rest), .flush(flush_exec),
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}), .in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask }) .out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
); );
@@ -193,8 +197,8 @@ module VX_gpr_stage (
VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg( VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_rest), .stall(stall_exec),
.flush(flush_rest), .flush(flush_exec),
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.a_reg_data, VX_exec_unit_req_temp.b_reg_data, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}), .in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.a_reg_data, VX_exec_unit_req_temp.b_reg_data, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask }) .out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
); );
@@ -219,4 +223,4 @@ module VX_gpr_stage (
`endif `endif
endmodule endmodule : VX_gpr_stage

View File

@@ -6,6 +6,7 @@ module VX_scheduler (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire memory_delay, input wire memory_delay,
input wire exec_delay,
input wire gpr_stage_delay, input wire gpr_stage_delay,
VX_frE_to_bckE_req_inter VX_bckE_req, VX_frE_to_bckE_req_inter VX_bckE_req,
VX_wb_inter VX_writeback_inter, VX_wb_inter VX_writeback_inter,
@@ -27,7 +28,11 @@ module VX_scheduler (
wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE); wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE);
wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ); wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ);
// classify our next instruction.
wire is_mem = is_store || is_load; wire is_mem = is_store || is_load;
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
wire is_csr = VX_bckE_req.is_csr;
wire is_exec = !is_mem && !is_gpu && !is_csr;
wire rs1_pass = ((valid_wb && (VX_writeback_inter.rd == VX_bckE_req.rs1))); wire rs1_pass = ((valid_wb && (VX_writeback_inter.rd == VX_bckE_req.rs1)));
@@ -44,8 +49,10 @@ module VX_scheduler (
wire rename_valid = rs1_rename_qual || rs2_rename_qual ; wire rename_valid = rs1_rename_qual || rs2_rename_qual ;
assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid))
assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid)) || (memory_delay && (is_mem)) || (gpr_stage_delay && is_mem); || (memory_delay && is_mem)
|| (gpr_stage_delay && (is_mem || is_exec))
|| (exec_delay && is_exec);
integer i; integer i;
integer w; integer w;

View File

@@ -14,10 +14,10 @@ module VX_writeback (
// Actual WB to GPR // Actual WB to GPR
VX_wb_inter VX_writeback_inter, VX_wb_inter VX_writeback_inter,
output wire no_slot_mem output wire no_slot_mem,
output wire no_slot_exec
); );
VX_wb_inter VX_writeback_tempp(); VX_wb_inter VX_writeback_tempp();
wire exec_wb = (VX_inst_exec_wb.wb != 0) && (|VX_inst_exec_wb.wb_valid); wire exec_wb = (VX_inst_exec_wb.wb != 0) && (|VX_inst_exec_wb.wb_valid);
@@ -25,38 +25,39 @@ module VX_writeback (
wire csr_wb = (VX_csr_wb.wb != 0) && (|VX_csr_wb.valid); wire csr_wb = (VX_csr_wb.wb != 0) && (|VX_csr_wb.valid);
assign no_slot_mem = mem_wb && (exec_wb || csr_wb); assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
assign no_slot_exec = exec_wb && (csr_wb);
assign VX_writeback_tempp.write_data = exec_wb ? VX_inst_exec_wb.alu_result : assign VX_writeback_tempp.write_data = csr_wb ? VX_csr_wb.csr_result :
csr_wb ? VX_csr_wb.csr_result : exec_wb ? VX_inst_exec_wb.alu_result :
mem_wb ? VX_mem_wb.loaded_data : mem_wb ? VX_mem_wb.loaded_data :
0; 0;
assign VX_writeback_tempp.wb_valid = exec_wb ? VX_inst_exec_wb.wb_valid : assign VX_writeback_tempp.wb_valid = csr_wb ? VX_csr_wb.valid :
csr_wb ? VX_csr_wb.valid : exec_wb ? VX_inst_exec_wb.wb_valid :
mem_wb ? VX_mem_wb.wb_valid : mem_wb ? VX_mem_wb.wb_valid :
0; 0;
assign VX_writeback_tempp.rd = exec_wb ? VX_inst_exec_wb.rd : assign VX_writeback_tempp.rd = csr_wb ? VX_csr_wb.rd :
csr_wb ? VX_csr_wb.rd : exec_wb ? VX_inst_exec_wb.rd :
mem_wb ? VX_mem_wb.rd : mem_wb ? VX_mem_wb.rd :
0; 0;
assign VX_writeback_tempp.wb = exec_wb ? VX_inst_exec_wb.wb : assign VX_writeback_tempp.wb = csr_wb ? VX_csr_wb.wb :
csr_wb ? VX_csr_wb.wb : exec_wb ? VX_inst_exec_wb.wb :
mem_wb ? VX_mem_wb.wb : mem_wb ? VX_mem_wb.wb :
0; 0;
assign VX_writeback_tempp.wb_warp_num = exec_wb ? VX_inst_exec_wb.wb_warp_num : assign VX_writeback_tempp.wb_warp_num = csr_wb ? VX_csr_wb.warp_num :
csr_wb ? VX_csr_wb.warp_num : exec_wb ? VX_inst_exec_wb.wb_warp_num :
mem_wb ? VX_mem_wb.wb_warp_num : mem_wb ? VX_mem_wb.wb_warp_num :
0; 0;
assign VX_writeback_tempp.wb_pc = exec_wb ? VX_inst_exec_wb.exec_wb_pc : assign VX_writeback_tempp.wb_pc = csr_wb ? 32'hdeadbeef :
csr_wb ? 32'hdeadbeef : exec_wb ? VX_inst_exec_wb.exec_wb_pc :
mem_wb ? VX_mem_wb.mem_wb_pc : mem_wb ? VX_mem_wb.mem_wb_pc :
32'hdeadbeef; 32'hdeadbeef;
@@ -65,17 +66,6 @@ module VX_writeback (
wire[`NT-1:0][31:0] use_wb_data; wire[`NT-1:0][31:0] use_wb_data;
reg prev_is_mem;
always @(posedge clk, posedge reset) begin
if (reset)
begin
prev_is_mem = 0;
end begin
prev_is_mem = mem_wb && !no_slot_mem;
end
end
VX_generic_register #(.N(39 + `NW_M1 + 1 + `NT*33)) wb_register( VX_generic_register #(.N(39 + `NW_M1 + 1 + `NT*33)) wb_register(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
@@ -85,14 +75,9 @@ module VX_writeback (
.out ({use_wb_data , VX_writeback_inter.wb_valid, VX_writeback_inter.rd, VX_writeback_inter.wb, VX_writeback_inter.wb_warp_num, VX_writeback_inter.wb_pc}) .out ({use_wb_data , VX_writeback_inter.wb_valid, VX_writeback_inter.rd, VX_writeback_inter.wb, VX_writeback_inter.wb_warp_num, VX_writeback_inter.wb_pc})
); );
`ifdef SYN assign VX_writeback_inter.write_data = use_wb_data;
assign VX_writeback_inter.write_data = prev_is_mem ? VX_writeback_tempp.write_data : use_wb_data;
`else
assign VX_writeback_inter.write_data = use_wb_data;
`endif
endmodule : VX_writeback // VX_writeback
endmodule // VX_writeback

View File

@@ -46,6 +46,7 @@ module Vortex
wire memory_delay; wire memory_delay;
wire exec_delay;
wire gpr_stage_delay; wire gpr_stage_delay;
wire schedule_delay; wire schedule_delay;
@@ -179,6 +180,7 @@ VX_scheduler schedule(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.memory_delay (memory_delay), .memory_delay (memory_delay),
.exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay), .gpr_stage_delay (gpr_stage_delay),
.VX_bckE_req (VX_bckE_req), .VX_bckE_req (VX_bckE_req),
.VX_writeback_inter(VX_writeback_inter), .VX_writeback_inter(VX_writeback_inter),
@@ -197,6 +199,7 @@ VX_back_end vx_back_end(
.VX_dcache_req (VX_dcache_req), .VX_dcache_req (VX_dcache_req),
.VX_writeback_inter (VX_writeback_inter), .VX_writeback_inter (VX_writeback_inter),
.out_mem_delay (memory_delay), .out_mem_delay (memory_delay),
.out_exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay) .gpr_stage_delay (gpr_stage_delay)
); );