rtl refactoring
This commit is contained in:
@@ -20,7 +20,9 @@ module VX_back_end #(
|
|||||||
VX_frE_to_bckE_req_if bckE_req_if,
|
VX_frE_to_bckE_req_if bckE_req_if,
|
||||||
VX_wb_if writeback_if,
|
VX_wb_if writeback_if,
|
||||||
|
|
||||||
VX_warp_ctl_if warp_ctl_if
|
VX_warp_ctl_if warp_ctl_if,
|
||||||
|
|
||||||
|
output wire ebreak
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_wb_if wb_temp_if();
|
VX_wb_if wb_temp_if();
|
||||||
@@ -69,6 +71,8 @@ module VX_back_end #(
|
|||||||
.gpr_stage_delay (gpr_stage_delay)
|
.gpr_stage_delay (gpr_stage_delay)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assign ebreak = exec_unit_req_if.is_etype && (| exec_unit_req_if.valid);
|
||||||
|
|
||||||
VX_lsu_unit lsu_unit (
|
VX_lsu_unit lsu_unit (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|||||||
@@ -9,14 +9,14 @@ module VX_csr_wrapper (
|
|||||||
wire[`NUM_THREADS-1:0][31:0] thread_ids;
|
wire[`NUM_THREADS-1:0][31:0] thread_ids;
|
||||||
wire[`NUM_THREADS-1:0][31:0] warp_ids;
|
wire[`NUM_THREADS-1:0][31:0] warp_ids;
|
||||||
|
|
||||||
genvar cur_t, cur_tw;
|
genvar i;
|
||||||
generate
|
generate
|
||||||
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin : thread_ids_init
|
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : thread_ids_init
|
||||||
assign thread_ids[cur_t] = cur_t;
|
assign thread_ids[i] = i;
|
||||||
end
|
end
|
||||||
|
|
||||||
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init
|
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : warp_ids_init
|
||||||
assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
|
assign warp_ids[i] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
|
|||||||
@@ -8,11 +8,8 @@ module VX_decode(
|
|||||||
// Outputs
|
// Outputs
|
||||||
VX_frE_to_bckE_req_if frE_to_bckE_req_if,
|
VX_frE_to_bckE_req_if frE_to_bckE_req_if,
|
||||||
VX_wstall_if wstall_if,
|
VX_wstall_if wstall_if,
|
||||||
VX_join_if join_if,
|
VX_join_if join_if
|
||||||
|
|
||||||
output wire terminate_sim
|
|
||||||
);
|
);
|
||||||
|
|
||||||
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
wire[31:0] in_instruction = fd_inst_meta_de.instruction;
|
||||||
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
|
||||||
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
|
||||||
@@ -34,7 +31,7 @@ module VX_decode(
|
|||||||
wire is_auipc;
|
wire is_auipc;
|
||||||
wire is_csr;
|
wire is_csr;
|
||||||
wire is_csr_immed;
|
wire is_csr_immed;
|
||||||
wire is_e_inst;
|
wire is_etype;
|
||||||
|
|
||||||
wire is_gpgpu;
|
wire is_gpgpu;
|
||||||
wire is_wspawn;
|
wire is_wspawn;
|
||||||
@@ -47,7 +44,6 @@ module VX_decode(
|
|||||||
wire[6:0] func7;
|
wire[6:0] func7;
|
||||||
wire[11:0] u_12;
|
wire[11:0] u_12;
|
||||||
|
|
||||||
|
|
||||||
wire[7:0] jal_b_19_to_12;
|
wire[7:0] jal_b_19_to_12;
|
||||||
wire jal_b_11;
|
wire jal_b_11;
|
||||||
wire[9:0] jal_b_10_to_1;
|
wire[9:0] jal_b_10_to_1;
|
||||||
@@ -110,8 +106,6 @@ module VX_decode(
|
|||||||
assign is_auipc = (curr_opcode == `AUIPC_INST);
|
assign is_auipc = (curr_opcode == `AUIPC_INST);
|
||||||
assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0);
|
assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0);
|
||||||
assign is_csr_immed = (is_csr) && (func3[2] == 1);
|
assign is_csr_immed = (is_csr) && (func3[2] == 1);
|
||||||
// assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
|
|
||||||
assign is_e_inst = in_instruction == 32'h00000073;
|
|
||||||
|
|
||||||
assign is_gpgpu = (curr_opcode == `GPGPU_INST);
|
assign is_gpgpu = (curr_opcode == `GPGPU_INST);
|
||||||
|
|
||||||
@@ -132,7 +126,7 @@ module VX_decode(
|
|||||||
assign frE_to_bckE_req_if.csr_immed = is_csr_immed;
|
assign frE_to_bckE_req_if.csr_immed = is_csr_immed;
|
||||||
assign frE_to_bckE_req_if.is_csr = is_csr;
|
assign frE_to_bckE_req_if.is_csr = is_csr;
|
||||||
|
|
||||||
assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
|
assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_etype) ? `WB_JAL :
|
||||||
is_linst ? `WB_MEM :
|
is_linst ? `WB_MEM :
|
||||||
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
|
||||||
`NO_WB;
|
`NO_WB;
|
||||||
@@ -165,8 +159,8 @@ module VX_decode(
|
|||||||
assign jalr_immed = {func7, frE_to_bckE_req_if.rs2};
|
assign jalr_immed = {func7, frE_to_bckE_req_if.rs2};
|
||||||
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
|
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
|
||||||
|
|
||||||
assign jal_sys_cond1 = func3 == 3'h0;
|
assign jal_sys_cond1 = (func3 == 3'h0);
|
||||||
assign jal_sys_cond2 = u_12 < 12'h2;
|
assign jal_sys_cond2 = (u_12 < 12'h2);
|
||||||
|
|
||||||
assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0;
|
assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0;
|
||||||
assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef;
|
assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef;
|
||||||
@@ -202,12 +196,9 @@ module VX_decode(
|
|||||||
assign frE_to_bckE_req_if.jal = temp_jal;
|
assign frE_to_bckE_req_if.jal = temp_jal;
|
||||||
assign frE_to_bckE_req_if.jal_offset = temp_jal_offset;
|
assign frE_to_bckE_req_if.jal_offset = temp_jal_offset;
|
||||||
|
|
||||||
// wire is_ebreak;
|
// ecall/ebreak
|
||||||
|
assign is_etype = (curr_opcode == `SYS_INST) && jal_sys_jal;
|
||||||
// assign is_ebreak = is_e_inst;
|
assign frE_to_bckE_req_if.is_etype = is_etype;
|
||||||
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
|
|
||||||
assign frE_to_bckE_req_if.ebreak = ebreak;
|
|
||||||
assign terminate_sim = is_e_inst;
|
|
||||||
|
|
||||||
// CSR
|
// CSR
|
||||||
|
|
||||||
@@ -235,8 +226,7 @@ module VX_decode(
|
|||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (curr_opcode)
|
case (curr_opcode)
|
||||||
`B_INST:
|
`B_INST: begin
|
||||||
begin
|
|
||||||
// $display("BRANCH IN DECODE");
|
// $display("BRANCH IN DECODE");
|
||||||
temp_branch_stall = 1'b1 && (| in_valid);
|
temp_branch_stall = 1'b1 && (| in_valid);
|
||||||
case (func3)
|
case (func3)
|
||||||
@@ -249,19 +239,15 @@ module VX_decode(
|
|||||||
default: temp_branch_type = `NO_BRANCH;
|
default: temp_branch_type = `NO_BRANCH;
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
`JAL_INST: begin
|
||||||
`JAL_INST:
|
|
||||||
begin
|
|
||||||
temp_branch_type = `NO_BRANCH;
|
temp_branch_type = `NO_BRANCH;
|
||||||
temp_branch_stall = 1'b1 && (| in_valid);
|
temp_branch_stall = 1'b1 && (| in_valid);
|
||||||
end
|
end
|
||||||
`JALR_INST:
|
`JALR_INST: begin
|
||||||
begin
|
|
||||||
temp_branch_type = `NO_BRANCH;
|
temp_branch_type = `NO_BRANCH;
|
||||||
temp_branch_stall = 1'b1 && (| in_valid);
|
temp_branch_stall = 1'b1 && (| in_valid);
|
||||||
end
|
end
|
||||||
default:
|
default: begin
|
||||||
begin
|
|
||||||
temp_branch_type = `NO_BRANCH;
|
temp_branch_type = `NO_BRANCH;
|
||||||
temp_branch_stall = 1'b0 && (| in_valid);
|
temp_branch_stall = 1'b0 && (| in_valid);
|
||||||
end
|
end
|
||||||
@@ -326,7 +312,7 @@ module VX_decode(
|
|||||||
assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
|
assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
|
||||||
|
|
||||||
/*always_comb begin
|
/*always_comb begin
|
||||||
if (1'($time & 1) && |fd_inst_meta_de.valid) begin
|
if (1'($time & 1) && (| fd_inst_meta_de.valid)) begin
|
||||||
$display("*** %t: decode: opcode=%h", $time, curr_opcode);
|
$display("*** %t: decode: opcode=%h", $time, curr_opcode);
|
||||||
end
|
end
|
||||||
end*/
|
end*/
|
||||||
|
|||||||
@@ -64,7 +64,7 @@ module VX_dmem_ctrl (
|
|||||||
.PRFQ_SIZE (`SPRFQ_SIZE),
|
.PRFQ_SIZE (`SPRFQ_SIZE),
|
||||||
.PRFQ_STRIDE (`SPRFQ_STRIDE),
|
.PRFQ_STRIDE (`SPRFQ_STRIDE),
|
||||||
.FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE),
|
.FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE),
|
||||||
.SNOOP_FORWARDING_ENABLE(0),
|
.SNOOP_FORWARDING (0),
|
||||||
.DRAM_ENABLE (0),
|
.DRAM_ENABLE (0),
|
||||||
.WRITE_ENABLE (1),
|
.WRITE_ENABLE (1),
|
||||||
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
|
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
|
||||||
@@ -135,7 +135,7 @@ module VX_dmem_ctrl (
|
|||||||
.PRFQ_SIZE (`DPRFQ_SIZE),
|
.PRFQ_SIZE (`DPRFQ_SIZE),
|
||||||
.PRFQ_STRIDE (`DPRFQ_STRIDE),
|
.PRFQ_STRIDE (`DPRFQ_STRIDE),
|
||||||
.FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE),
|
.FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE),
|
||||||
.SNOOP_FORWARDING_ENABLE(0),
|
.SNOOP_FORWARDING (0),
|
||||||
.DRAM_ENABLE (1),
|
.DRAM_ENABLE (1),
|
||||||
.WRITE_ENABLE (1),
|
.WRITE_ENABLE (1),
|
||||||
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
|
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
|
||||||
@@ -206,7 +206,7 @@ module VX_dmem_ctrl (
|
|||||||
.PRFQ_SIZE (`IPRFQ_SIZE),
|
.PRFQ_SIZE (`IPRFQ_SIZE),
|
||||||
.PRFQ_STRIDE (`IPRFQ_STRIDE),
|
.PRFQ_STRIDE (`IPRFQ_STRIDE),
|
||||||
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
|
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
|
||||||
.SNOOP_FORWARDING_ENABLE(0),
|
.SNOOP_FORWARDING (0),
|
||||||
.DRAM_ENABLE (1),
|
.DRAM_ENABLE (1),
|
||||||
.WRITE_ENABLE (0),
|
.WRITE_ENABLE (0),
|
||||||
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
|
.CORE_TAG_WIDTH (`CORE_REQ_TAG_WIDTH),
|
||||||
|
|||||||
@@ -44,27 +44,28 @@ module VX_exec_unit (
|
|||||||
|
|
||||||
wire[`NUM_THREADS-1:0][31:0] alu_result;
|
wire[`NUM_THREADS-1:0][31:0] alu_result;
|
||||||
wire[`NUM_THREADS-1:0] alu_stall;
|
wire[`NUM_THREADS-1:0] alu_stall;
|
||||||
genvar index_out_reg;
|
|
||||||
|
genvar i;
|
||||||
generate
|
generate
|
||||||
for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs
|
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : alu_defs
|
||||||
VX_alu_unit alu_unit (
|
VX_alu_unit alu_unit (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.src_a (in_a_reg_data[index_out_reg]),
|
.src_a (in_a_reg_data[i]),
|
||||||
.src_b (in_b_reg_data[index_out_reg]),
|
.src_b (in_b_reg_data[i]),
|
||||||
.src_rs2 (in_rs2_src),
|
.src_rs2 (in_rs2_src),
|
||||||
.itype_immed (in_itype_immed),
|
.itype_immed (in_itype_immed),
|
||||||
.upper_immed (in_upper_immed),
|
.upper_immed (in_upper_immed),
|
||||||
.alu_op (in_alu_op),
|
.alu_op (in_alu_op),
|
||||||
.curr_PC (in_curr_PC),
|
.curr_PC (in_curr_PC),
|
||||||
.alu_result (alu_result[index_out_reg]),
|
.alu_result (alu_result[i]),
|
||||||
.alu_stall (alu_stall[index_out_reg])
|
.alu_stall (alu_stall[i])
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
wire internal_stall;
|
wire internal_stall;
|
||||||
assign internal_stall = |alu_stall;
|
assign internal_stall = (| alu_stall);
|
||||||
|
|
||||||
assign delay = no_slot_exec || internal_stall;
|
assign delay = no_slot_exec || internal_stall;
|
||||||
|
|
||||||
@@ -98,11 +99,10 @@ module VX_exec_unit (
|
|||||||
endcase // in_branch_type
|
endcase // in_branch_type
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data;
|
wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data;
|
||||||
genvar i;
|
|
||||||
generate
|
generate
|
||||||
for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup
|
for (i = 0; i < `NUM_THREADS; i=i+1) begin
|
||||||
assign duplicate_PC_data[i] = exec_unit_req_if.PC_next;
|
assign duplicate_PC_data[i] = exec_unit_req_if.PC_next;
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|||||||
@@ -9,8 +9,7 @@ module VX_fetch (
|
|||||||
input wire icache_stage_delay,
|
input wire icache_stage_delay,
|
||||||
input wire[`NW_BITS-1:0] icache_stage_wid,
|
input wire[`NW_BITS-1:0] icache_stage_wid,
|
||||||
input wire[`NUM_THREADS-1:0] icache_stage_valids,
|
input wire[`NUM_THREADS-1:0] icache_stage_valids,
|
||||||
|
output wire busy,
|
||||||
output wire ebreak,
|
|
||||||
VX_jal_rsp_if jal_rsp_if,
|
VX_jal_rsp_if jal_rsp_if,
|
||||||
VX_branch_rsp_if branch_rsp_if,
|
VX_branch_rsp_if branch_rsp_if,
|
||||||
VX_inst_meta_if fe_inst_meta_fi,
|
VX_inst_meta_if fe_inst_meta_fi,
|
||||||
@@ -45,7 +44,7 @@ module VX_fetch (
|
|||||||
.ctm_warp_num (warp_ctl_if.warp_num),
|
.ctm_warp_num (warp_ctl_if.warp_num),
|
||||||
|
|
||||||
// WHALT
|
// WHALT
|
||||||
.whalt (warp_ctl_if.ebreak),
|
.whalt (warp_ctl_if.whalt),
|
||||||
.whalt_warp_num (warp_ctl_if.warp_num),
|
.whalt_warp_num (warp_ctl_if.warp_num),
|
||||||
|
|
||||||
// Wstall
|
// Wstall
|
||||||
@@ -83,7 +82,7 @@ module VX_fetch (
|
|||||||
.thread_mask (thread_mask),
|
.thread_mask (thread_mask),
|
||||||
.warp_num (warp_num),
|
.warp_num (warp_num),
|
||||||
.warp_pc (warp_pc),
|
.warp_pc (warp_pc),
|
||||||
.ebreak (ebreak),
|
.busy (busy),
|
||||||
.scheduled_warp (scheduled_warp)
|
.scheduled_warp (scheduled_warp)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -15,8 +15,7 @@ module VX_front_end (
|
|||||||
VX_branch_rsp_if branch_rsp_if,
|
VX_branch_rsp_if branch_rsp_if,
|
||||||
|
|
||||||
VX_frE_to_bckE_req_if bckE_req_if,
|
VX_frE_to_bckE_req_if bckE_req_if,
|
||||||
|
output wire busy
|
||||||
output wire fetch_ebreak
|
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_inst_meta_if fe_inst_meta_fi();
|
VX_inst_meta_if fe_inst_meta_fi();
|
||||||
@@ -29,14 +28,9 @@ module VX_front_end (
|
|||||||
wire total_freeze = schedule_delay;
|
wire total_freeze = schedule_delay;
|
||||||
wire icache_stage_delay;
|
wire icache_stage_delay;
|
||||||
|
|
||||||
wire vortex_ebreak;
|
|
||||||
wire terminate_sim;
|
|
||||||
|
|
||||||
wire[`NW_BITS-1:0] icache_stage_wid;
|
wire[`NW_BITS-1:0] icache_stage_wid;
|
||||||
wire[`NUM_THREADS-1:0] icache_stage_valids;
|
wire[`NUM_THREADS-1:0] icache_stage_valids;
|
||||||
|
|
||||||
assign fetch_ebreak = vortex_ebreak || terminate_sim;
|
|
||||||
|
|
||||||
VX_wstall_if wstall_if();
|
VX_wstall_if wstall_if();
|
||||||
VX_join_if join_if();
|
VX_join_if join_if();
|
||||||
|
|
||||||
@@ -52,7 +46,7 @@ module VX_front_end (
|
|||||||
.warp_ctl_if (warp_ctl_if),
|
.warp_ctl_if (warp_ctl_if),
|
||||||
.icache_stage_delay (icache_stage_delay),
|
.icache_stage_delay (icache_stage_delay),
|
||||||
.branch_rsp_if (branch_rsp_if),
|
.branch_rsp_if (branch_rsp_if),
|
||||||
.ebreak (vortex_ebreak), // fetch_ebreak
|
.busy (busy),
|
||||||
.fe_inst_meta_fi (fe_inst_meta_fi)
|
.fe_inst_meta_fi (fe_inst_meta_fi)
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -91,8 +85,7 @@ module VX_front_end (
|
|||||||
.fd_inst_meta_de (fd_inst_meta_de),
|
.fd_inst_meta_de (fd_inst_meta_de),
|
||||||
.frE_to_bckE_req_if (frE_to_bckE_req_if),
|
.frE_to_bckE_req_if (frE_to_bckE_req_if),
|
||||||
.wstall_if (wstall_if),
|
.wstall_if (wstall_if),
|
||||||
.join_if (join_if),
|
.join_if (join_if)
|
||||||
.terminate_sim (terminate_sim)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
wire no_br_stall = 0;
|
wire no_br_stall = 0;
|
||||||
|
|||||||
@@ -32,10 +32,10 @@ module VX_gpr (
|
|||||||
wire going_to_write = write_enable & (| writeback_if.wb_valid);
|
wire going_to_write = write_enable & (| writeback_if.wb_valid);
|
||||||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
|
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
|
||||||
|
|
||||||
genvar curr_t;
|
genvar i;
|
||||||
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin
|
for (i = 0; i < `NUM_THREADS; i=i+1) begin
|
||||||
wire local_write = write_enable & writeback_if.wb_valid[curr_t];
|
wire local_write = write_enable & writeback_if.wb_valid[i];
|
||||||
assign write_bit_mask[curr_t] = {`NUM_GPRS{~local_write}};
|
assign write_bit_mask[i] = {`NUM_GPRS{~local_write}};
|
||||||
end
|
end
|
||||||
|
|
||||||
// wire cenb = !going_to_write;
|
// wire cenb = !going_to_write;
|
||||||
@@ -50,14 +50,11 @@ module VX_gpr (
|
|||||||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_b;
|
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_b;
|
||||||
|
|
||||||
`ifndef SYN
|
`ifndef SYN
|
||||||
genvar thread;
|
genvar j;
|
||||||
genvar curr_bit;
|
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
|
||||||
for (thread = 0; thread < `NUM_THREADS; thread = thread + 1)
|
for (j = 0; j < `NUM_GPRS; j=j+1) begin
|
||||||
begin
|
assign a_reg_data[i][j] = ((temp_a[i][j] === 1'dx) || cena_1 )? 1'b0 : temp_a[i][j];
|
||||||
for (curr_bit = 0; curr_bit < `NUM_GPRS; curr_bit=curr_bit+1)
|
assign b_reg_data[i][j] = ((temp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : temp_b[i][j];
|
||||||
begin
|
|
||||||
assign a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit];
|
|
||||||
assign b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit];
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`else
|
`else
|
||||||
@@ -67,8 +64,7 @@ module VX_gpr (
|
|||||||
|
|
||||||
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (writeback_if.rd != 0) ? writeback_if.write_data : 0;
|
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (writeback_if.rd != 0) ? writeback_if.write_data : 0;
|
||||||
|
|
||||||
genvar curr_base_thread;
|
for (i = 0; i < 'NT; i=i+4)
|
||||||
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)
|
|
||||||
begin
|
begin
|
||||||
`IGNORE_WARNINGS_BEGIN
|
`IGNORE_WARNINGS_BEGIN
|
||||||
rf2_32x128_wm1 first_ram (
|
rf2_32x128_wm1 first_ram (
|
||||||
@@ -77,17 +73,17 @@ module VX_gpr (
|
|||||||
.CENYB(),
|
.CENYB(),
|
||||||
.WENYB(),
|
.WENYB(),
|
||||||
.AYB(),
|
.AYB(),
|
||||||
.QA(temp_a[(curr_base_thread+3):(curr_base_thread)]),
|
.QA(temp_a[(i+3):(i)]),
|
||||||
.SOA(),
|
.SOA(),
|
||||||
.SOB(),
|
.SOB(),
|
||||||
.CLKA(clk),
|
.CLKA(clk),
|
||||||
.CENA(cena_1),
|
.CENA(cena_1),
|
||||||
.AA(gpr_read_if.rs1[(curr_base_thread+3):(curr_base_thread)]),
|
.AA(gpr_read_if.rs1[(i+3):(i)]),
|
||||||
.CLKB(clk),
|
.CLKB(clk),
|
||||||
.CENB(cenb),
|
.CENB(cenb),
|
||||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
.WENB(write_bit_mask[(i+3):(i)]),
|
||||||
.AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
|
.AB(writeback_if.rd[(i+3):(i)]),
|
||||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
.DB(to_write[(i+3):(i)]),
|
||||||
.EMAA(3'b011),
|
.EMAA(3'b011),
|
||||||
.EMASA(1'b0),
|
.EMASA(1'b0),
|
||||||
.EMAB(3'b011),
|
.EMAB(3'b011),
|
||||||
@@ -116,17 +112,17 @@ module VX_gpr (
|
|||||||
.CENYB(),
|
.CENYB(),
|
||||||
.WENYB(),
|
.WENYB(),
|
||||||
.AYB(),
|
.AYB(),
|
||||||
.QA(temp_b[(curr_base_thread+3):(curr_base_thread)]),
|
.QA(temp_b[(i+3):(i)]),
|
||||||
.SOA(),
|
.SOA(),
|
||||||
.SOB(),
|
.SOB(),
|
||||||
.CLKA(clk),
|
.CLKA(clk),
|
||||||
.CENA(cena_2),
|
.CENA(cena_2),
|
||||||
.AA(gpr_read_if.rs2[(curr_base_thread+3):(curr_base_thread)]),
|
.AA(gpr_read_if.rs2[(i+3):(i)]),
|
||||||
.CLKB(clk),
|
.CLKB(clk),
|
||||||
.CENB(cenb),
|
.CENB(cenb),
|
||||||
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
|
.WENB(write_bit_mask[(i+3):(i)]),
|
||||||
.AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
|
.AB(writeback_if.rd[(i+3):(i)]),
|
||||||
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
|
.DB(to_write[(i+3):(i)]),
|
||||||
.EMAA(3'b011),
|
.EMAA(3'b011),
|
||||||
.EMASA(1'b0),
|
.EMASA(1'b0),
|
||||||
.EMAB(3'b011),
|
.EMAB(3'b011),
|
||||||
|
|||||||
@@ -20,13 +20,13 @@ module VX_gpr_ram (
|
|||||||
//--
|
//--
|
||||||
end else begin
|
end else begin
|
||||||
if (we) begin
|
if (we) begin
|
||||||
integer t;
|
integer i;
|
||||||
for (t = 0; t < `NUM_THREADS; t = t + 1) begin
|
for (i = 0; i < `NUM_THREADS; i = i + 1) begin
|
||||||
if (be[t]) begin
|
if (be[i]) begin
|
||||||
ram[waddr][t][0] <= wdata[t][7:0];
|
ram[waddr][i][0] <= wdata[i][7:0];
|
||||||
ram[waddr][t][1] <= wdata[t][15:8];
|
ram[waddr][i][1] <= wdata[i][15:8];
|
||||||
ram[waddr][t][2] <= wdata[t][23:16];
|
ram[waddr][i][2] <= wdata[i][23:16];
|
||||||
ram[waddr][t][3] <= wdata[t][31:24];
|
ram[waddr][i][3] <= wdata[i][31:24];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -59,9 +59,6 @@ module VX_gpr_stage (
|
|||||||
.b_reg_data (gpr_datf_if.b_reg_data)
|
.b_reg_data (gpr_datf_if.b_reg_data)
|
||||||
);
|
);
|
||||||
|
|
||||||
// assign bckE_req_if.is_csr = is_csr;
|
|
||||||
// assign bckE_req_out_if.csr_mask = (bckE_req_if.sr_immed == 1'b1) ? {27'h0, bckE_req_if.rs1} : gpr_data_if.a_reg_data[0];
|
|
||||||
|
|
||||||
// Outputs
|
// Outputs
|
||||||
VX_exec_unit_req_if exec_unit_req_temp_if();
|
VX_exec_unit_req_if exec_unit_req_temp_if();
|
||||||
VX_lsu_req_if lsu_req_temp_if();
|
VX_lsu_req_if lsu_req_temp_if();
|
||||||
@@ -149,8 +146,8 @@ module VX_gpr_stage (
|
|||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall_exec),
|
.stall (stall_exec),
|
||||||
.flush (flush_exec),
|
.flush (flush_exec),
|
||||||
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
|
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
|
||||||
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
|
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
|
||||||
);
|
);
|
||||||
|
|
||||||
assign exec_unit_req_if.a_reg_data = real_base_address;
|
assign exec_unit_req_if.a_reg_data = real_base_address;
|
||||||
@@ -202,8 +199,8 @@ module VX_gpr_stage (
|
|||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall_exec),
|
.stall (stall_exec),
|
||||||
.flush (flush_exec),
|
.flush (flush_exec),
|
||||||
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
|
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.is_etype, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
|
||||||
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
|
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.is_etype , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
|
|||||||
@@ -15,10 +15,10 @@ module VX_gpr_wrapper (
|
|||||||
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data;
|
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data;
|
||||||
|
|
||||||
wire[`NUM_THREADS-1:0][31:0] jal_data;
|
wire[`NUM_THREADS-1:0][31:0] jal_data;
|
||||||
genvar index;
|
genvar i;
|
||||||
generate
|
generate
|
||||||
for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign
|
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : jal_data_assign
|
||||||
assign jal_data[index] = gpr_jal_if.curr_PC;
|
assign jal_data[i] = gpr_jal_if.curr_PC;
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
@@ -46,22 +46,19 @@ module VX_gpr_wrapper (
|
|||||||
|
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
genvar warp_index;
|
|
||||||
generate
|
generate
|
||||||
|
for (i = 0; i < `NUM_WARPS; i = i + 1) begin : warp_gprs
|
||||||
for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs
|
wire valid_write_request = i == writeback_if.warp_num;
|
||||||
wire valid_write_request = warp_index == writeback_if.warp_num;
|
|
||||||
VX_gpr gpr(
|
VX_gpr gpr(
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.valid_write_request (valid_write_request),
|
.valid_write_request (valid_write_request),
|
||||||
.gpr_read_if (gpr_read_if),
|
.gpr_read_if (gpr_read_if),
|
||||||
.writeback_if (writeback_if),
|
.writeback_if (writeback_if),
|
||||||
.a_reg_data (temp_a_reg_data[warp_index]),
|
.a_reg_data (temp_a_reg_data[i]),
|
||||||
.b_reg_data (temp_b_reg_data[warp_index])
|
.b_reg_data (temp_b_reg_data[i])
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
|
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|||||||
@@ -13,10 +13,10 @@ module VX_gpu_inst (
|
|||||||
wire[`NUM_THREADS-1:0] tmc_new_mask;
|
wire[`NUM_THREADS-1:0] tmc_new_mask;
|
||||||
wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0];
|
wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0];
|
||||||
|
|
||||||
genvar curr_t;
|
genvar i;
|
||||||
generate
|
generate
|
||||||
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init
|
for (i = 0; i < `NUM_THREADS; i=i+1) begin : tmc_new_mask_init
|
||||||
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < gpu_inst_req_if.a_reg_data[0];
|
assign tmc_new_mask[i] = all_threads ? 1 : i < gpu_inst_req_if.a_reg_data[0];
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
@@ -26,18 +26,16 @@ module VX_gpu_inst (
|
|||||||
assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst;
|
assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst;
|
||||||
assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
|
assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
|
||||||
|
|
||||||
// assign warp_ctl_if.ebreak = (gpu_inst_req_if.a_reg_data[0] == 0) && valid_inst;
|
assign warp_ctl_if.whalt = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0);
|
||||||
assign warp_ctl_if.ebreak = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0);
|
|
||||||
|
|
||||||
wire wspawn = gpu_inst_req_if.is_wspawn;
|
wire wspawn = gpu_inst_req_if.is_wspawn;
|
||||||
wire[31:0] wspawn_pc = gpu_inst_req_if.rd2;
|
wire[31:0] wspawn_pc = gpu_inst_req_if.rd2;
|
||||||
wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0];
|
wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0];
|
||||||
wire[`NUM_WARPS-1:0] wspawn_new_active;
|
wire[`NUM_WARPS-1:0] wspawn_new_active;
|
||||||
|
|
||||||
genvar curr_w;
|
|
||||||
generate
|
generate
|
||||||
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init
|
for (i = 0; i < `NUM_WARPS; i=i+1) begin : wspawn_new_active_init
|
||||||
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < gpu_inst_req_if.a_reg_data[0];
|
assign wspawn_new_active[i] = all_active ? 1 : i < gpu_inst_req_if.a_reg_data[0];
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
@@ -57,14 +55,11 @@ module VX_gpu_inst (
|
|||||||
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
||||||
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
||||||
|
|
||||||
// VX_gpu_inst_req.pc
|
|
||||||
genvar curr_s_t;
|
|
||||||
generate
|
generate
|
||||||
for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init
|
for (i = 0; i < `NUM_THREADS; i=i+1) begin : masks_init
|
||||||
wire curr_bool = (gpu_inst_req_if.a_reg_data[curr_s_t] == 32'b1);
|
wire curr_bool = (gpu_inst_req_if.a_reg_data[i] == 32'b1);
|
||||||
|
assign split_new_use_mask[i] = curr_valids[i] & (curr_bool);
|
||||||
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
|
assign split_new_later_mask[i] = curr_valids[i] & (!curr_bool);
|
||||||
assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool);
|
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
|
|||||||
@@ -45,11 +45,12 @@ module VX_icache_stage (
|
|||||||
// Core can't accept response
|
// Core can't accept response
|
||||||
assign icache_rsp_if.core_rsp_ready = ~total_freeze;
|
assign icache_rsp_if.core_rsp_ready = ~total_freeze;
|
||||||
|
|
||||||
integer w;
|
integer i;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
for (w = 0; w < `NUM_WARPS; w = w + 1) begin
|
for (i = 0; i < `NUM_WARPS; i = i + 1) begin
|
||||||
threads_active[w] <= 0;
|
threads_active[i] <= 0;
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
if (valid_inst && !icache_stage_delay) begin
|
if (valid_inst && !icache_stage_delay) begin
|
||||||
|
|||||||
@@ -21,12 +21,12 @@ module VX_inst_multiplex (
|
|||||||
wire is_csr = bckE_req_if.is_csr;
|
wire is_csr = bckE_req_if.is_csr;
|
||||||
// wire is_gpu = 0;
|
// wire is_gpu = 0;
|
||||||
|
|
||||||
genvar currT;
|
genvar i;
|
||||||
generate
|
generate
|
||||||
for (currT = 0; currT < `NUM_THREADS; currT = currT + 1) begin : mask_init
|
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : mask_init
|
||||||
assign is_mem_mask[currT] = is_mem;
|
assign is_mem_mask[i] = is_mem;
|
||||||
assign is_gpu_mask[currT] = is_gpu;
|
assign is_gpu_mask[i] = is_gpu;
|
||||||
assign is_csr_mask[currT] = is_csr;
|
assign is_csr_mask[i] = is_csr;
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
@@ -64,7 +64,7 @@ module VX_inst_multiplex (
|
|||||||
assign exec_unit_req_if.jalQual = bckE_req_if.jalQual;
|
assign exec_unit_req_if.jalQual = bckE_req_if.jalQual;
|
||||||
assign exec_unit_req_if.jal = bckE_req_if.jal;
|
assign exec_unit_req_if.jal = bckE_req_if.jal;
|
||||||
assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset;
|
assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset;
|
||||||
assign exec_unit_req_if.ebreak = bckE_req_if.ebreak;
|
assign exec_unit_req_if.is_etype = bckE_req_if.is_etype;
|
||||||
|
|
||||||
|
|
||||||
// GPR Req
|
// GPR Req
|
||||||
|
|||||||
@@ -61,10 +61,10 @@ module VX_lsu_unit (
|
|||||||
assign {mem_wb_if.pc, mem_wb_if.wb, mem_wb_if.rd, mem_wb_if.warp_num} = dcache_rsp_if.core_rsp_tag;
|
assign {mem_wb_if.pc, mem_wb_if.wb, mem_wb_if.rd, mem_wb_if.warp_num} = dcache_rsp_if.core_rsp_tag;
|
||||||
|
|
||||||
/*always_comb begin
|
/*always_comb begin
|
||||||
if (1'($time & 1) && dcache_req_if.core_req_ready && |dcache_req_if.core_req_valid) begin
|
if (1'($time & 1) && dcache_req_if.core_req_ready && (| dcache_req_if.core_req_valid)) begin
|
||||||
$display("*** %t: D$ req: valid=%b, addr=%0h, r=%d, w=%d, pc=%0h, rd=%d, warp=%d, data=%0h", $time, use_valid, use_address, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
|
$display("*** %t: D$ req: valid=%b, addr=%0h, r=%d, w=%d, pc=%0h, rd=%d, warp=%d, data=%0h", $time, use_valid, use_address, use_mem_read, use_mem_write, use_pc, use_rd, use_warp_num, use_store_data);
|
||||||
end
|
end
|
||||||
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && |dcache_rsp_if.core_rsp_valid) begin
|
if (1'($time & 1) && dcache_rsp_if.core_rsp_ready && (| dcache_rsp_if.core_rsp_valid)) begin
|
||||||
$display("*** %t: D$ rsp: valid=%b, pc=%0h, rd=%d, warp=%d, data=%0h", $time, mem_wb_if.valid, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
|
$display("*** %t: D$ rsp: valid=%b, pc=%0h, rd=%d, warp=%d, data=%0h", $time, mem_wb_if.valid, mem_wb_if.pc, mem_wb_if.rd, mem_wb_if.warp_num, mem_wb_if.data);
|
||||||
end
|
end
|
||||||
end*/
|
end*/
|
||||||
|
|||||||
@@ -25,12 +25,12 @@ module VX_warp (
|
|||||||
reg [`NUM_THREADS-1:0] valid_t;
|
reg [`NUM_THREADS-1:0] valid_t;
|
||||||
reg [`NUM_THREADS-1:0] valid_zero;
|
reg [`NUM_THREADS-1:0] valid_zero;
|
||||||
|
|
||||||
integer ti;
|
integer i;
|
||||||
initial begin
|
initial begin
|
||||||
real_PC = 0;
|
real_PC = 0;
|
||||||
for (ti = 1; ti < `NUM_THREADS; ti=ti+1) begin
|
for (i = 1; i < `NUM_THREADS; i=i+1) begin
|
||||||
valid_t[ti] = 0; // Thread 1 active
|
valid_t[i] = 0; // Thread 1 active
|
||||||
valid_zero[ti] = 0;
|
valid_zero[i] = 0;
|
||||||
end
|
end
|
||||||
valid_t = 1;
|
valid_t = 1;
|
||||||
valid_zero[0] = 0;
|
valid_zero[0] = 0;
|
||||||
@@ -44,10 +44,10 @@ module VX_warp (
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
genvar tv;
|
genvar i;
|
||||||
generate
|
generate
|
||||||
for (tv = 0; tv < `NUM_THREADS; tv = tv+1) begin : valid_assign
|
for (i = 0; i < `NUM_THREADS; i = i+1) begin : valid_assign
|
||||||
assign valid[tv] = change_mask ? thread_mask[tv] : stall ? 1'b0 : valid_t[tv];
|
assign valid[i] = change_mask ? thread_mask[i] : stall ? 1'b0 : valid_t[i];
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
|
|||||||
@@ -56,7 +56,7 @@ module VX_warp_sched (
|
|||||||
output wire[`NUM_THREADS-1:0] thread_mask,
|
output wire[`NUM_THREADS-1:0] thread_mask,
|
||||||
output wire[`NW_BITS-1:0] warp_num,
|
output wire[`NW_BITS-1:0] warp_num,
|
||||||
output wire[31:0] warp_pc,
|
output wire[31:0] warp_pc,
|
||||||
output wire ebreak,
|
output wire busy,
|
||||||
output wire scheduled_warp,
|
output wire scheduled_warp,
|
||||||
|
|
||||||
input wire[`NW_BITS-1:0] icache_stage_wid,
|
input wire[`NW_BITS-1:0] icache_stage_wid,
|
||||||
@@ -218,7 +218,7 @@ module VX_warp_sched (
|
|||||||
warp_lock[warp_num] <= 1'b1;
|
warp_lock[warp_num] <= 1'b1;
|
||||||
// warp_lock <= {`NUM_WARPS{1'b1}};
|
// warp_lock <= {`NUM_WARPS{1'b1}};
|
||||||
end
|
end
|
||||||
if (|icache_stage_valids && !stall) begin
|
if ((| icache_stage_valids) && !stall) begin
|
||||||
warp_lock[icache_stage_wid] <= 1'b0;
|
warp_lock[icache_stage_wid] <= 1'b0;
|
||||||
// warp_lock <= {`NUM_WARPS{1'b0}};
|
// warp_lock <= {`NUM_WARPS{1'b0}};
|
||||||
end
|
end
|
||||||
@@ -251,15 +251,6 @@ module VX_warp_sched (
|
|||||||
|
|
||||||
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
|
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
|
||||||
|
|
||||||
// integer curr_b;
|
|
||||||
// always @(*) begin
|
|
||||||
// total_barrier_stall = 0;
|
|
||||||
// for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1)
|
|
||||||
// begin
|
|
||||||
// total_barrier_stall[`NUM_WARPS-1:0] = total_barrier_stall[`NUM_WARPS-1:0] | barrier_stall_mask[curr_b];
|
|
||||||
// end
|
|
||||||
// end
|
|
||||||
|
|
||||||
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
|
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
|
||||||
|
|
||||||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[split_warp_num]};
|
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[split_warp_num]};
|
||||||
@@ -267,11 +258,11 @@ module VX_warp_sched (
|
|||||||
|
|
||||||
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
|
assign {join_fall, join_pc, join_tm} = d[join_warp_num];
|
||||||
|
|
||||||
genvar curr_warp;
|
genvar i;
|
||||||
generate
|
generate
|
||||||
for (curr_warp = 0; curr_warp < `NUM_WARPS; curr_warp = curr_warp + 1) begin : stacks
|
for (i = 0; i < `NUM_WARPS; i = i + 1) begin : stacks
|
||||||
wire correct_warp_s = (curr_warp == split_warp_num);
|
wire correct_warp_s = (i == split_warp_num);
|
||||||
wire correct_warp_j = (curr_warp == join_warp_num);
|
wire correct_warp_j = (i == join_warp_num);
|
||||||
|
|
||||||
wire push = (is_split && !dont_split) && correct_warp_s;
|
wire push = (is_split && !dont_split) && correct_warp_s;
|
||||||
wire pop = is_join && correct_warp_j;
|
wire pop = is_join && correct_warp_j;
|
||||||
@@ -284,7 +275,7 @@ module VX_warp_sched (
|
|||||||
.reset(reset),
|
.reset(reset),
|
||||||
.push (push),
|
.push (push),
|
||||||
.pop (pop),
|
.pop (pop),
|
||||||
.d (d[curr_warp]),
|
.d (d[i]),
|
||||||
.q1 (q1),
|
.q1 (q1),
|
||||||
.q2 (q2)
|
.q2 (q2)
|
||||||
);
|
);
|
||||||
@@ -330,6 +321,6 @@ module VX_warp_sched (
|
|||||||
// $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]);
|
// $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]);
|
||||||
// end
|
// end
|
||||||
|
|
||||||
assign ebreak = (warp_active == 0);
|
assign busy = (warp_active != 0);
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -56,7 +56,8 @@ module Vortex #(
|
|||||||
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
|
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
|
||||||
output wire io_rsp_ready,
|
output wire io_rsp_ready,
|
||||||
|
|
||||||
// Debug
|
// Status
|
||||||
|
output wire busy,
|
||||||
output wire ebreak
|
output wire ebreak
|
||||||
);
|
);
|
||||||
`DEBUG_BEGIN
|
`DEBUG_BEGIN
|
||||||
@@ -187,7 +188,7 @@ VX_front_end front_end (
|
|||||||
.icache_req_if (icache_core_req_if),
|
.icache_req_if (icache_core_req_if),
|
||||||
.jal_rsp_if (jal_rsp_if),
|
.jal_rsp_if (jal_rsp_if),
|
||||||
.branch_rsp_if (branch_rsp_if),
|
.branch_rsp_if (branch_rsp_if),
|
||||||
.fetch_ebreak (ebreak)
|
.busy (busy)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_scheduler scheduler (
|
VX_scheduler scheduler (
|
||||||
@@ -217,7 +218,8 @@ VX_back_end #(
|
|||||||
.writeback_if (writeback_if),
|
.writeback_if (writeback_if),
|
||||||
.mem_delay (memory_delay),
|
.mem_delay (memory_delay),
|
||||||
.exec_delay (exec_delay),
|
.exec_delay (exec_delay),
|
||||||
.gpr_stage_delay (gpr_stage_delay)
|
.gpr_stage_delay (gpr_stage_delay),
|
||||||
|
.ebreak (ebreak)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_dmem_ctrl dmem_ctrl (
|
VX_dmem_ctrl dmem_ctrl (
|
||||||
|
|||||||
@@ -42,7 +42,8 @@ module Vortex_Cluster #(
|
|||||||
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
|
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
|
||||||
output wire io_rsp_ready,
|
output wire io_rsp_ready,
|
||||||
|
|
||||||
// Debug
|
// Status
|
||||||
|
output wire busy,
|
||||||
output wire ebreak
|
output wire ebreak
|
||||||
);
|
);
|
||||||
wire[`NUM_CORES-1:0] per_core_D_dram_req_read;
|
wire[`NUM_CORES-1:0] per_core_D_dram_req_read;
|
||||||
@@ -83,6 +84,7 @@ module Vortex_Cluster #(
|
|||||||
wire[`NUM_CORES-1:0] per_core_io_rsp_ready;
|
wire[`NUM_CORES-1:0] per_core_io_rsp_ready;
|
||||||
`IGNORE_WARNINGS_END
|
`IGNORE_WARNINGS_END
|
||||||
|
|
||||||
|
wire[`NUM_CORES-1:0] per_core_busy;
|
||||||
wire[`NUM_CORES-1:0] per_core_ebreak;
|
wire[`NUM_CORES-1:0] per_core_ebreak;
|
||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
@@ -133,6 +135,7 @@ module Vortex_Cluster #(
|
|||||||
.io_rsp_tag (io_rsp_tag),
|
.io_rsp_tag (io_rsp_tag),
|
||||||
.io_rsp_ready (per_core_io_rsp_ready [i]),
|
.io_rsp_ready (per_core_io_rsp_ready [i]),
|
||||||
|
|
||||||
|
.busy (per_core_busy [i]),
|
||||||
.ebreak (per_core_ebreak [i])
|
.ebreak (per_core_ebreak [i])
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
@@ -146,6 +149,7 @@ module Vortex_Cluster #(
|
|||||||
|
|
||||||
assign io_rsp_ready = per_core_io_rsp_ready[0];
|
assign io_rsp_ready = per_core_io_rsp_ready[0];
|
||||||
|
|
||||||
|
assign busy = (| per_core_busy);
|
||||||
assign ebreak = (& per_core_ebreak);
|
assign ebreak = (& per_core_ebreak);
|
||||||
|
|
||||||
if (`L2_ENABLE) begin
|
if (`L2_ENABLE) begin
|
||||||
@@ -221,7 +225,7 @@ module Vortex_Cluster #(
|
|||||||
.FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
|
.FILL_INVALIDAOR_SIZE (`L2FILL_INVALIDAOR_SIZE),
|
||||||
.DRAM_ENABLE (1),
|
.DRAM_ENABLE (1),
|
||||||
.WRITE_ENABLE (1),
|
.WRITE_ENABLE (1),
|
||||||
.SNOOP_FORWARDING_ENABLE(1),
|
.SNOOP_FORWARDING (1),
|
||||||
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
|
.CORE_TAG_WIDTH (`DDRAM_TAG_WIDTH),
|
||||||
.CORE_TAG_ID_BITS (0),
|
.CORE_TAG_ID_BITS (0),
|
||||||
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH)
|
.DRAM_TAG_WIDTH (`L2DRAM_TAG_WIDTH)
|
||||||
|
|||||||
@@ -40,7 +40,8 @@ module Vortex_Socket (
|
|||||||
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
|
input wire[`CORE_REQ_TAG_WIDTH-1:0] io_rsp_tag,
|
||||||
output wire io_rsp_ready,
|
output wire io_rsp_ready,
|
||||||
|
|
||||||
// Debug
|
// Status
|
||||||
|
output wire busy,
|
||||||
output wire ebreak
|
output wire ebreak
|
||||||
);
|
);
|
||||||
if (`NUM_CLUSTERS == 1) begin
|
if (`NUM_CLUSTERS == 1) begin
|
||||||
@@ -80,6 +81,7 @@ module Vortex_Socket (
|
|||||||
.io_rsp_tag (io_rsp_tag),
|
.io_rsp_tag (io_rsp_tag),
|
||||||
.io_rsp_ready (io_rsp_ready),
|
.io_rsp_ready (io_rsp_ready),
|
||||||
|
|
||||||
|
.busy (busy),
|
||||||
.ebreak (ebreak)
|
.ebreak (ebreak)
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -112,6 +114,7 @@ module Vortex_Socket (
|
|||||||
wire[`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
|
wire[`NUM_CLUSTERS-1:0] per_cluster_io_rsp_ready;
|
||||||
`IGNORE_WARNINGS_END
|
`IGNORE_WARNINGS_END
|
||||||
|
|
||||||
|
wire[`NUM_CLUSTERS-1:0] per_cluster_busy;
|
||||||
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
|
wire[`NUM_CLUSTERS-1:0] per_cluster_ebreak;
|
||||||
|
|
||||||
genvar i;
|
genvar i;
|
||||||
@@ -151,6 +154,7 @@ module Vortex_Socket (
|
|||||||
.io_rsp_tag (io_rsp_tag),
|
.io_rsp_tag (io_rsp_tag),
|
||||||
.io_rsp_ready (per_cluster_io_rsp_ready [i]),
|
.io_rsp_ready (per_cluster_io_rsp_ready [i]),
|
||||||
|
|
||||||
|
.busy (per_cluster_busy [i]),
|
||||||
.ebreak (per_cluster_ebreak [i])
|
.ebreak (per_cluster_ebreak [i])
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
@@ -164,6 +168,7 @@ module Vortex_Socket (
|
|||||||
|
|
||||||
assign io_rsp_ready = per_cluster_io_rsp_ready[0];
|
assign io_rsp_ready = per_cluster_io_rsp_ready[0];
|
||||||
|
|
||||||
|
assign busy = (| per_cluster_busy);
|
||||||
assign ebreak = (& per_cluster_ebreak);
|
assign ebreak = (& per_cluster_ebreak);
|
||||||
|
|
||||||
// L3 Cache ///////////////////////////////////////////////////////////
|
// L3 Cache ///////////////////////////////////////////////////////////
|
||||||
@@ -219,7 +224,7 @@ module Vortex_Socket (
|
|||||||
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
|
.FILL_INVALIDAOR_SIZE (`L3FILL_INVALIDAOR_SIZE),
|
||||||
.DRAM_ENABLE (1),
|
.DRAM_ENABLE (1),
|
||||||
.WRITE_ENABLE (1),
|
.WRITE_ENABLE (1),
|
||||||
.SNOOP_FORWARDING_ENABLE(1),
|
.SNOOP_FORWARDING (1),
|
||||||
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
|
.CORE_TAG_WIDTH (`L2DRAM_TAG_WIDTH),
|
||||||
.CORE_TAG_ID_BITS (0),
|
.CORE_TAG_ID_BITS (0),
|
||||||
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH)
|
.DRAM_TAG_WIDTH (`L3DRAM_TAG_WIDTH)
|
||||||
|
|||||||
25
hw/rtl/cache/VX_bank.v
vendored
25
hw/rtl/cache/VX_bank.v
vendored
@@ -11,7 +11,7 @@ module VX_bank #(
|
|||||||
parameter WORD_SIZE = 4,
|
parameter WORD_SIZE = 4,
|
||||||
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
// Number of Word requests per cycle {1, 2, 4, 8, ...}
|
||||||
parameter NUM_REQUESTS = 2,
|
parameter NUM_REQUESTS = 2,
|
||||||
// Number of cycles to complete stage 1 (read from memory)
|
// Number of cycles to complete i 1 (read from memory)
|
||||||
parameter STAGE_1_CYCLES = 2,
|
parameter STAGE_1_CYCLES = 2,
|
||||||
|
|
||||||
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
// Queues feeding into banks Knobs {1, 2, 4, 8, ...}
|
||||||
@@ -46,7 +46,7 @@ module VX_bank #(
|
|||||||
parameter DRAM_ENABLE = 1,
|
parameter DRAM_ENABLE = 1,
|
||||||
|
|
||||||
// Enable snoop forwarding
|
// Enable snoop forwarding
|
||||||
parameter SNOOP_FORWARDING_ENABLE = 0,
|
parameter SNOOP_FORWARDING = 0,
|
||||||
|
|
||||||
// core request tag size
|
// core request tag size
|
||||||
parameter CORE_TAG_WIDTH = 1,
|
parameter CORE_TAG_WIDTH = 1,
|
||||||
@@ -108,7 +108,7 @@ module VX_bank #(
|
|||||||
if (reset) begin
|
if (reset) begin
|
||||||
snoop_state <= 0;
|
snoop_state <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING_ENABLE;
|
snoop_state <= (snoop_state | snp_req_valid) && SNOOP_FORWARDING;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -246,11 +246,11 @@ module VX_bank #(
|
|||||||
wire going_to_write_st1 [STAGE_1_CYCLES-1:0];
|
wire going_to_write_st1 [STAGE_1_CYCLES-1:0];
|
||||||
`DEBUG_END
|
`DEBUG_END
|
||||||
|
|
||||||
integer i;
|
integer j;
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
is_fill_in_pipe = 0;
|
is_fill_in_pipe = 0;
|
||||||
for (i = 0; i < STAGE_1_CYCLES; i=i+1) begin
|
for (j = 0; j < STAGE_1_CYCLES; j=j+1) begin
|
||||||
if (is_fill_st1[i]) begin
|
if (is_fill_st1[j]) begin
|
||||||
is_fill_in_pipe = 1;
|
is_fill_in_pipe = 1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -327,8 +327,8 @@ module VX_bank #(
|
|||||||
.out ({is_snp_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
|
.out ({is_snp_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
|
||||||
);
|
);
|
||||||
|
|
||||||
genvar stage;
|
genvar i;
|
||||||
for (stage = 1; stage < STAGE_1_CYCLES; stage = stage + 1) begin
|
for (i = 1; i < STAGE_1_CYCLES; i = i + 1) begin
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + (`BANK_LINE_WORDS*`WORD_WIDTH))
|
.N(1 + 1 + 1 + `LINE_ADDR_WIDTH + `BASE_ADDR_BITS + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + (`BANK_LINE_WORDS*`WORD_WIDTH))
|
||||||
) s0_1_cc (
|
) s0_1_cc (
|
||||||
@@ -336,8 +336,8 @@ module VX_bank #(
|
|||||||
.reset(reset),
|
.reset(reset),
|
||||||
.stall(stall_bank_pipe),
|
.stall(stall_bank_pipe),
|
||||||
.flush(0),
|
.flush(0),
|
||||||
.in ({is_snp_st1[stage-1], going_to_write_st1[stage-1], valid_st1[stage-1], addr_st1[stage-1], wsel_st1[stage-1], writeword_st1[stage-1], inst_meta_st1[stage-1], is_fill_st1[stage-1], writedata_st1[stage-1]}),
|
.in ({is_snp_st1[i-1], going_to_write_st1[i-1], valid_st1[i-1], addr_st1[i-1], wsel_st1[i-1], writeword_st1[i-1], inst_meta_st1[i-1], is_fill_st1[i-1], writedata_st1[i-1]}),
|
||||||
.out ({is_snp_st1[stage], going_to_write_st1[stage], valid_st1[stage], addr_st1[stage], wsel_st1[stage], writeword_st1[stage], inst_meta_st1[stage], is_fill_st1[stage], writedata_st1[stage]})
|
.out ({is_snp_st1[i], going_to_write_st1[i], valid_st1[i], addr_st1[i], wsel_st1[i], writeword_st1[i], inst_meta_st1[i], is_fill_st1[i], writedata_st1[i]})
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -506,9 +506,10 @@ module VX_bank #(
|
|||||||
);
|
);
|
||||||
|
|
||||||
// Enqueue to CWB Queue
|
// Enqueue to CWB Queue
|
||||||
|
// TODO: should investigae the need for "SNOOP_FORWARDING" here
|
||||||
wire cwbq_push = (valid_st2 && !miss_st2)
|
wire cwbq_push = (valid_st2 && !miss_st2)
|
||||||
&& !cwbq_full
|
&& !cwbq_full
|
||||||
&& !(SNOOP_FORWARDING_ENABLE && (miss_add_mem_write == `BYTE_EN_NO))
|
&& !(SNOOP_FORWARDING && (miss_add_mem_write == `BYTE_EN_NO))
|
||||||
&& !((is_snp_st2 && valid_st2 && ffsq_full)
|
&& !((is_snp_st2 && valid_st2 && ffsq_full)
|
||||||
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|
|| (((valid_st2 && miss_st2 && dirty_st2) || fill_saw_dirty_st2) && dwbq_full)
|
||||||
|| (valid_st2 && miss_st2 && mrvq_full)
|
|| (valid_st2 && miss_st2 && mrvq_full)
|
||||||
@@ -554,7 +555,7 @@ module VX_bank #(
|
|||||||
|
|
||||||
wire[`BANK_LINE_WORDS-1:0][`WORD_WIDTH-1:0] dwbq_req_data;
|
wire[`BANK_LINE_WORDS-1:0][`WORD_WIDTH-1:0] dwbq_req_data;
|
||||||
|
|
||||||
if (SNOOP_FORWARDING_ENABLE) begin
|
if (SNOOP_FORWARDING) begin
|
||||||
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
|
assign dwbq_req_data = (should_flush && dwbq_push) ? writeword_st2 : readdata_st2;
|
||||||
assign dwbq_req_addr = (should_flush && dwbq_push) ? addr_st2 : {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
|
assign dwbq_req_addr = (should_flush && dwbq_push) ? addr_st2 : {readtag_st2, addr_st2[`LINE_SELECT_BITS-1:0]};
|
||||||
end else begin
|
end else begin
|
||||||
|
|||||||
4
hw/rtl/cache/VX_cache.v
vendored
4
hw/rtl/cache/VX_cache.v
vendored
@@ -47,7 +47,7 @@ module VX_cache #(
|
|||||||
parameter DRAM_ENABLE = 1,
|
parameter DRAM_ENABLE = 1,
|
||||||
|
|
||||||
// Enable snoop forwarding
|
// Enable snoop forwarding
|
||||||
parameter SNOOP_FORWARDING_ENABLE = 0,
|
parameter SNOOP_FORWARDING = 0,
|
||||||
|
|
||||||
// Prefetcher
|
// Prefetcher
|
||||||
parameter PRFQ_SIZE = 64,
|
parameter PRFQ_SIZE = 64,
|
||||||
@@ -265,7 +265,7 @@ module VX_cache #(
|
|||||||
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
|
||||||
.DRAM_ENABLE (DRAM_ENABLE),
|
.DRAM_ENABLE (DRAM_ENABLE),
|
||||||
.WRITE_ENABLE (WRITE_ENABLE),
|
.WRITE_ENABLE (WRITE_ENABLE),
|
||||||
.SNOOP_FORWARDING_ENABLE(SNOOP_FORWARDING_ENABLE),
|
.SNOOP_FORWARDING (SNOOP_FORWARDING),
|
||||||
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
.CORE_TAG_WIDTH (CORE_TAG_WIDTH),
|
||||||
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
|
.CORE_TAG_ID_BITS (CORE_TAG_ID_BITS)
|
||||||
) bank (
|
) bank (
|
||||||
|
|||||||
6
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
6
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
@@ -90,10 +90,10 @@ module VX_cache_miss_resrv #(
|
|||||||
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
|
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
|
||||||
|
|
||||||
reg [MRVQ_SIZE-1:0] make_ready;
|
reg [MRVQ_SIZE-1:0] make_ready;
|
||||||
genvar curr_e;
|
genvar i;
|
||||||
generate
|
generate
|
||||||
for (curr_e = 0; curr_e < MRVQ_SIZE; curr_e=curr_e+1) begin
|
for (i = 0; i < MRVQ_SIZE; i=i+1) begin
|
||||||
assign make_ready[curr_e] = is_fill_st1 && valid_table[curr_e] && (addr_table[curr_e] == fill_addr_st1);
|
assign make_ready[i] = is_fill_st1 && valid_table[i] && (addr_table[i] == fill_addr_st1);
|
||||||
end
|
end
|
||||||
endgenerate
|
endgenerate
|
||||||
|
|
||||||
|
|||||||
6
hw/rtl/cache/VX_fill_invalidator.v
vendored
6
hw/rtl/cache/VX_fill_invalidator.v
vendored
@@ -60,10 +60,10 @@ module VX_fill_invalidator #(
|
|||||||
reg [FILL_INVALIDAOR_SIZE-1:0] matched_fill;
|
reg [FILL_INVALIDAOR_SIZE-1:0] matched_fill;
|
||||||
wire matched;
|
wire matched;
|
||||||
|
|
||||||
integer fi;
|
integer i;
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
for (fi = 0; fi < FILL_INVALIDAOR_SIZE; fi+=1) begin
|
for (i = 0; i < FILL_INVALIDAOR_SIZE; i+=1) begin
|
||||||
matched_fill[fi] = fills_active[fi] && (fills_address[fi] == fill_addr);
|
matched_fill[i] = fills_active[i] && (fills_address[i] == fill_addr);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -33,10 +33,8 @@ interface VX_exec_unit_req_if ();
|
|||||||
wire jal;
|
wire jal;
|
||||||
wire [31:0] jal_offset;
|
wire [31:0] jal_offset;
|
||||||
|
|
||||||
`IGNORE_WARNINGS_BEGIN
|
wire is_etype;
|
||||||
wire ebreak;
|
|
||||||
wire wspawn;
|
wire wspawn;
|
||||||
`IGNORE_WARNINGS_END
|
|
||||||
|
|
||||||
// CSR info
|
// CSR info
|
||||||
wire is_csr;
|
wire is_csr;
|
||||||
|
|||||||
@@ -21,9 +21,7 @@ interface VX_frE_to_bckE_req_if ();
|
|||||||
wire [2:0] branch_type;
|
wire [2:0] branch_type;
|
||||||
wire [19:0] upper_immed;
|
wire [19:0] upper_immed;
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
`IGNORE_WARNINGS_BEGIN
|
wire is_etype;
|
||||||
wire ebreak;
|
|
||||||
`IGNORE_WARNINGS_END
|
|
||||||
wire jalQual;
|
wire jalQual;
|
||||||
wire jal;
|
wire jal;
|
||||||
wire [31:0] jal_offset;
|
wire [31:0] jal_offset;
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ interface VX_warp_ctl_if ();
|
|||||||
wire [31:0] wspawn_pc;
|
wire [31:0] wspawn_pc;
|
||||||
wire [`NUM_WARPS-1:0] wspawn_new_active;
|
wire [`NUM_WARPS-1:0] wspawn_new_active;
|
||||||
|
|
||||||
wire ebreak;
|
wire whalt;
|
||||||
|
|
||||||
// barrier
|
// barrier
|
||||||
wire is_barrier;
|
wire is_barrier;
|
||||||
|
|||||||
@@ -65,16 +65,16 @@ module VX_divide #(
|
|||||||
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
|
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
|
||||||
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
|
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
|
||||||
|
|
||||||
genvar pipe_stage;
|
genvar i;
|
||||||
for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages
|
for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages
|
||||||
always @(posedge clock or posedge aclr) begin
|
always @(posedge clock or posedge aclr) begin
|
||||||
if (aclr) begin
|
if (aclr) begin
|
||||||
numer_pipe[pipe_stage+1] <= 0;
|
numer_pipe[i+1] <= 0;
|
||||||
denom_pipe[pipe_stage+1] <= 0;
|
denom_pipe[i+1] <= 0;
|
||||||
end
|
end
|
||||||
else if (clken) begin
|
else if (clken) begin
|
||||||
numer_pipe[pipe_stage+1] <= numer_pipe[pipe_stage];
|
numer_pipe[i+1] <= numer_pipe[i];
|
||||||
denom_pipe[pipe_stage+1] <= denom_pipe[pipe_stage];
|
denom_pipe[i+1] <= denom_pipe[i];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -83,16 +83,16 @@ module VX_mult #(
|
|||||||
reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1];
|
reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1];
|
||||||
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
|
reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1];
|
||||||
|
|
||||||
genvar pipe_stage;
|
genvar i;
|
||||||
for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages
|
for (i = 0; i < PIPELINE-1; i = i+1) begin : pipe_stages
|
||||||
always @(posedge clock or posedge aclr) begin
|
always @(posedge clock or posedge aclr) begin
|
||||||
if (aclr) begin
|
if (aclr) begin
|
||||||
dataa_pipe[pipe_stage+1] <= 0;
|
dataa_pipe[i+1] <= 0;
|
||||||
datab_pipe[pipe_stage+1] <= 0;
|
datab_pipe[i+1] <= 0;
|
||||||
end
|
end
|
||||||
else if (clken) begin
|
else if (clken) begin
|
||||||
dataa_pipe[pipe_stage+1] <= dataa_pipe[pipe_stage];
|
dataa_pipe[i+1] <= dataa_pipe[i];
|
||||||
datab_pipe[pipe_stage+1] <= datab_pipe[pipe_stage];
|
datab_pipe[i+1] <= datab_pipe[i];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -19,8 +19,8 @@ module VX_d_e_reg (
|
|||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (flush),
|
.flush (flush),
|
||||||
.in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.ebreak, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
|
.in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.is_etype, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
|
||||||
.out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.ebreak ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
|
.out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.is_etype ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
|
||||||
);
|
);
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|||||||
@@ -141,7 +141,7 @@ void Simulator::wait(uint32_t cycles) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool Simulator::is_busy() {
|
bool Simulator::is_busy() {
|
||||||
return (0 == vortex_->ebreak);
|
return vortex_->busy;
|
||||||
}
|
}
|
||||||
|
|
||||||
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
void Simulator::flush_caches(uint32_t mem_addr, uint32_t size) {
|
||||||
@@ -174,7 +174,8 @@ bool Simulator::run() {
|
|||||||
this->reset();
|
this->reset();
|
||||||
|
|
||||||
// execute program
|
// execute program
|
||||||
while (!vortex_->ebreak) {
|
while (vortex_->busy
|
||||||
|
&& !vortex_->ebreak) {
|
||||||
this->step();
|
this->step();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user