Merge branch 'fpga_synthesis' into fix_cache_m10k
This commit is contained in:
@@ -3,7 +3,7 @@ all: RUNFILE
|
||||
# /rf2_256x128_wm1/
|
||||
BaseMEM=../models/memory/cln28hpm
|
||||
|
||||
INCLUDE=-I. -Ishared_memory -Icache -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm0/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Isimulate
|
||||
INCLUDE=-I. -Ishared_memory -Icache -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm0/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Icompat/ -Isimulate
|
||||
|
||||
FILE=Vortex.v
|
||||
|
||||
@@ -33,7 +33,7 @@ VERILATOR:
|
||||
|
||||
VERILATORnoWarnings:
|
||||
echo "#define VCD_OFF" > simulate/tb_debug.h
|
||||
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(WNO)
|
||||
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(WNO) $(DEB)
|
||||
|
||||
compdebug:
|
||||
echo "#define VCD_OUTPUT" > simulate/tb_debug.h
|
||||
@@ -49,4 +49,4 @@ w: VERILATORnoWarnings
|
||||
$(MAKECPP)
|
||||
|
||||
clean:
|
||||
rm obj_dir/*
|
||||
rm obj_dir/*
|
||||
|
||||
132
rtl/VX_alu.v
132
rtl/VX_alu.v
@@ -1,6 +1,8 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_alu(
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire[31:0] in_1,
|
||||
input wire[31:0] in_2,
|
||||
input wire in_rs2_src,
|
||||
@@ -8,9 +10,85 @@ module VX_alu(
|
||||
input wire[19:0] in_upper_immed,
|
||||
input wire[4:0] in_alu_op,
|
||||
input wire[31:0] in_curr_PC,
|
||||
output reg[31:0] out_alu_result
|
||||
output reg[31:0] out_alu_result,
|
||||
output reg out_alu_stall
|
||||
);
|
||||
|
||||
localparam div_pipeline_len = 10;
|
||||
|
||||
wire[31:0] unsigned_div_result;
|
||||
wire[31:0] unsigned_rem_result;
|
||||
wire[31:0] signed_div_result;
|
||||
wire[31:0] signed_rem_result;
|
||||
|
||||
VX_divide #(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
.SPEED("HIGHEST"),
|
||||
.PIPELINE(div_pipeline_len)
|
||||
) unsigned_div (
|
||||
.clock(clk),
|
||||
.aclr(1'b0),
|
||||
.clken(1'b1), // TODO this could be disabled on inactive instructions
|
||||
.numer(ALU_in1),
|
||||
.denom(ALU_in2),
|
||||
.quotient(unsigned_div_result),
|
||||
.remainder(unsigned_rem_result)
|
||||
);
|
||||
|
||||
VX_divide #(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
.NREP("SIGNED"),
|
||||
.DREP("SIGNED"),
|
||||
.SPEED("HIGHEST"),
|
||||
.PIPELINE(div_pipeline_len)
|
||||
) signed_div (
|
||||
.clock(clk),
|
||||
.aclr(1'b0),
|
||||
.clken(1'b1), // TODO this could be disabled on inactive instructions
|
||||
.numer(ALU_in1),
|
||||
.denom(ALU_in2),
|
||||
.quotient(signed_div_result),
|
||||
.remainder(signed_rem_result)
|
||||
);
|
||||
|
||||
reg [15:0] curr_inst_delay;
|
||||
reg [15:0] inst_delay;
|
||||
reg inst_was_stalling;
|
||||
|
||||
wire inst_delay_stall = inst_was_stalling ? inst_delay != 0 : curr_inst_delay != 0;
|
||||
assign out_alu_stall = inst_delay_stall;
|
||||
|
||||
always @(*) begin
|
||||
case(in_alu_op)
|
||||
`DIV,
|
||||
`DIVU,
|
||||
`REM,
|
||||
`REMU: curr_inst_delay = div_pipeline_len;
|
||||
default: curr_inst_delay = 0;
|
||||
endcase // in_alu_op
|
||||
end
|
||||
|
||||
always @(posedge clk or posedge reset) begin
|
||||
if (reset) begin
|
||||
inst_delay <= 0;
|
||||
inst_was_stalling <= 0;
|
||||
end
|
||||
else if (inst_delay_stall) begin
|
||||
if (inst_was_stalling) begin
|
||||
if (inst_delay > 0)
|
||||
inst_delay <= inst_delay - 1;
|
||||
end
|
||||
else begin
|
||||
inst_was_stalling <= 1;
|
||||
inst_delay <= curr_inst_delay - 1;
|
||||
end
|
||||
end
|
||||
else begin
|
||||
inst_was_stalling <= 0;
|
||||
end
|
||||
end
|
||||
|
||||
`ifdef SYN_FUNC
|
||||
wire which_in2;
|
||||
@@ -20,54 +98,14 @@ module VX_alu(
|
||||
wire[63:0] ALU_in1_mult;
|
||||
wire[63:0] ALU_in2_mult;
|
||||
wire[31:0] upper_immed;
|
||||
wire[31:0] unsigned_div_result;
|
||||
wire[31:0] unsigned_rem_result;
|
||||
wire[31:0] signed_div_result;
|
||||
wire[31:0] signed_rem_result;
|
||||
|
||||
|
||||
assign which_in2 = in_rs2_src == `RS2_IMMED;
|
||||
|
||||
assign ALU_in1 = in_1;
|
||||
|
||||
assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
|
||||
|
||||
|
||||
assign upper_immed = {in_upper_immed, {12{1'b0}}};
|
||||
|
||||
VX_divide #(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
.SPEED("HIGHEST"),
|
||||
.PIPELINE(0)
|
||||
) unsigned_div (
|
||||
.clk(0),
|
||||
.aclr(0),
|
||||
.clken(1), // TODO this could be disabled on inactive instructions
|
||||
.numer(ALU_in1),
|
||||
.denom(ALU_in2),
|
||||
.quotient(unsigned_div_result),
|
||||
.remainder(unsigned_rem_result)
|
||||
);
|
||||
|
||||
VX_divide #(
|
||||
.WIDTHN(32),
|
||||
.WIDTHD(32),
|
||||
.NREP("SIGNED"),
|
||||
.DREP("SIGNED"),
|
||||
.SPEED("HIGHEST"),
|
||||
.PIPELINE(0)
|
||||
) signed_div (
|
||||
.clk(0),
|
||||
.aclr(0),
|
||||
.clken(1), // TODO this could be disabled on inactive instructions
|
||||
.numer(ALU_in1),
|
||||
.denom(ALU_in2),
|
||||
.quotient(signed_div_result),
|
||||
.remainder(signed_rem_result)
|
||||
);
|
||||
|
||||
|
||||
//always @(posedge `MUL) begin
|
||||
|
||||
|
||||
@@ -101,6 +139,7 @@ module VX_alu(
|
||||
`MULH: out_alu_result = mult_result[63:32];
|
||||
`MULHSU: out_alu_result = mult_result[63:32];
|
||||
`MULHU: out_alu_result = mult_result[63:32];
|
||||
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
|
||||
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
|
||||
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
|
||||
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
|
||||
@@ -160,13 +199,14 @@ module VX_alu(
|
||||
`MULH: out_alu_result = mult_signed_result[63:32];
|
||||
`MULHSU: out_alu_result = mult_signed_un_result[63:32];
|
||||
`MULHU: out_alu_result = mult_unsigned_result[63:32];
|
||||
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : $signed($signed(ALU_in1) / $signed(ALU_in2));
|
||||
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : ALU_in1 / ALU_in2;
|
||||
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : $signed($signed(ALU_in1) % $signed(ALU_in2));
|
||||
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2;
|
||||
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
|
||||
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
|
||||
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
|
||||
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
|
||||
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
|
||||
default: out_alu_result = 32'h0;
|
||||
endcase // in_alu_op
|
||||
end
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
endmodule : VX_alu
|
||||
|
||||
@@ -6,6 +6,7 @@ module VX_back_end (
|
||||
input wire schedule_delay,
|
||||
|
||||
output wire out_mem_delay,
|
||||
output wire out_exec_delay,
|
||||
output wire gpr_stage_delay,
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
@@ -32,7 +33,8 @@ assign VX_writeback_inter.wb_warp_num = VX_writeback_temp.wb_warp_num;
|
||||
|
||||
|
||||
VX_mw_wb_inter VX_mw_wb();
|
||||
wire no_slot_mem;
|
||||
wire no_slot_mem;
|
||||
wire no_slot_exec;
|
||||
|
||||
|
||||
VX_mem_req_inter VX_exe_mem_req();
|
||||
@@ -55,6 +57,8 @@ VX_gpu_inst_req_inter VX_gpu_inst_req();
|
||||
// CSR unit inputs
|
||||
VX_csr_req_inter VX_csr_req();
|
||||
VX_csr_wb_inter VX_csr_wb();
|
||||
wire no_slot_csr;
|
||||
wire stall_gpr_csr;
|
||||
|
||||
VX_gpr_stage VX_gpr_stage(
|
||||
.clk (clk),
|
||||
@@ -67,8 +71,10 @@ VX_gpr_stage VX_gpr_stage(
|
||||
.VX_lsu_req (VX_lsu_req),
|
||||
.VX_gpu_inst_req (VX_gpu_inst_req),
|
||||
.VX_csr_req (VX_csr_req),
|
||||
.stall_gpr_csr (stall_gpr_csr),
|
||||
// End new
|
||||
.memory_delay (out_mem_delay),
|
||||
.exec_delay (out_exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
|
||||
@@ -91,7 +97,9 @@ VX_execute_unit VX_execUnit(
|
||||
.VX_exec_unit_req(VX_exec_unit_req),
|
||||
.VX_inst_exec_wb (VX_inst_exec_wb),
|
||||
.VX_jal_rsp (VX_jal_rsp),
|
||||
.VX_branch_rsp (VX_branch_rsp)
|
||||
.VX_branch_rsp (VX_branch_rsp),
|
||||
.out_delay (out_exec_delay),
|
||||
.no_slot_exec (no_slot_exec)
|
||||
);
|
||||
|
||||
|
||||
@@ -100,9 +108,19 @@ VX_gpgpu_inst VX_gpgpu_inst(
|
||||
.VX_warp_ctl (VX_warp_ctl)
|
||||
);
|
||||
|
||||
VX_csr_wrapper VX_csr_wrapper(
|
||||
.VX_csr_req(VX_csr_req),
|
||||
.VX_csr_wb (VX_csr_wb)
|
||||
// VX_csr_wrapper VX_csr_wrapper(
|
||||
// .VX_csr_req(VX_csr_req),
|
||||
// .VX_csr_wb (VX_csr_wb)
|
||||
// );
|
||||
|
||||
VX_csr_pipe VX_csr_pipe(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.no_slot_csr (no_slot_csr),
|
||||
.VX_csr_req (VX_csr_req),
|
||||
.VX_writeback(VX_writeback_temp),
|
||||
.VX_csr_wb (VX_csr_wb),
|
||||
.stall_gpr_csr(stall_gpr_csr)
|
||||
);
|
||||
|
||||
VX_writeback VX_wb(
|
||||
@@ -113,7 +131,9 @@ VX_writeback VX_wb(
|
||||
.VX_csr_wb (VX_csr_wb),
|
||||
|
||||
.VX_writeback_inter(VX_writeback_temp),
|
||||
.no_slot_mem (no_slot_mem)
|
||||
.no_slot_mem (no_slot_mem),
|
||||
.no_slot_exec (no_slot_exec),
|
||||
.no_slot_csr (no_slot_csr)
|
||||
);
|
||||
|
||||
endmodule
|
||||
87
rtl/VX_csr_data.v
Normal file
87
rtl/VX_csr_data.v
Normal file
@@ -0,0 +1,87 @@
|
||||
`include "../VX_define.v"
|
||||
|
||||
module VX_csr_data (
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
|
||||
input wire[11:0] in_read_csr_address,
|
||||
|
||||
input wire in_write_valid,
|
||||
input wire[31:0] in_write_csr_data,
|
||||
input wire[11:0] in_write_csr_address,
|
||||
|
||||
output wire[31:0] out_read_csr_data,
|
||||
|
||||
// For instruction retire counting
|
||||
input wire in_writeback_valid
|
||||
|
||||
);
|
||||
|
||||
|
||||
// wire[`NT_M1:0][31:0] thread_ids;
|
||||
// wire[`NT_M1:0][31:0] warp_ids;
|
||||
|
||||
// genvar cur_t;
|
||||
// for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
|
||||
// assign thread_ids[cur_t] = cur_t;
|
||||
// end
|
||||
|
||||
// genvar cur_tw;
|
||||
// for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
|
||||
// assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, in_read_warp_num};
|
||||
// end
|
||||
|
||||
reg[11:0] csr[1023:0];
|
||||
reg[63:0] cycle;
|
||||
reg[63:0] instret;
|
||||
|
||||
|
||||
wire read_cycle;
|
||||
wire read_cycleh;
|
||||
wire read_instret;
|
||||
wire read_instreth;
|
||||
|
||||
assign read_cycle = in_read_csr_address == 12'hC00;
|
||||
assign read_cycleh = in_read_csr_address == 12'hC80;
|
||||
assign read_instret = in_read_csr_address == 12'hC02;
|
||||
assign read_instreth = in_read_csr_address == 12'hC82;
|
||||
|
||||
// wire thread_select = in_read_csr_address == 12'h20;
|
||||
// wire warp_select = in_read_csr_address == 12'h21;
|
||||
|
||||
// assign out_read_csr_data = thread_select ? thread_ids :
|
||||
// warp_select ? warp_ids :
|
||||
// 0;
|
||||
|
||||
integer curr_e;
|
||||
always @(posedge clk or posedge reset) begin
|
||||
if (reset) begin
|
||||
for (curr_e = 0; curr_e < 1024; curr_e=curr_e+1) begin
|
||||
`ifdef VERILATOR
|
||||
// - Verilator does not support delayed assignment in loops.
|
||||
csr[curr_e] = 0;
|
||||
`else
|
||||
csr[curr_e] <= 0;
|
||||
`endif
|
||||
end
|
||||
cycle <= 0;
|
||||
instret <= 0;
|
||||
end else begin
|
||||
cycle <= cycle + 1;
|
||||
if (in_write_valid) begin
|
||||
csr[in_write_csr_address] <= in_write_csr_data[11:0];
|
||||
end
|
||||
if (in_writeback_valid) begin
|
||||
instret <= instret + 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
assign out_read_csr_data = read_cycle ? cycle[31:0] :
|
||||
read_cycleh ? cycle[63:32] :
|
||||
read_instret ? instret[31:0] :
|
||||
read_instreth ? instret[63:32] :
|
||||
{{20{1'b0}}, csr[in_read_csr_address]};
|
||||
|
||||
endmodule : VX_csr_data
|
||||
106
rtl/VX_csr_pipe.v
Normal file
106
rtl/VX_csr_pipe.v
Normal file
@@ -0,0 +1,106 @@
|
||||
`include "VX_define.v"
|
||||
|
||||
module VX_csr_pipe (
|
||||
input wire clk, // Clock
|
||||
input wire reset,
|
||||
input wire no_slot_csr,
|
||||
VX_csr_req_inter VX_csr_req,
|
||||
VX_wb_inter VX_writeback,
|
||||
VX_csr_wb_inter VX_csr_wb,
|
||||
output wire stall_gpr_csr
|
||||
|
||||
);
|
||||
|
||||
wire[`NT_M1:0] valid_s2;
|
||||
wire[`NW_M1:0] warp_num_s2;
|
||||
wire[4:0] rd_s2;
|
||||
wire[1:0] wb_s2;
|
||||
wire[4:0] alu_op_s2;
|
||||
wire is_csr_s2;
|
||||
wire[11:0] csr_address_s2;
|
||||
wire[31:0] csr_read_data_s2;
|
||||
wire[31:0] csr_updated_data_s2;
|
||||
|
||||
wire[31:0] csr_read_data_unqual;
|
||||
wire[31:0] csr_read_data;
|
||||
|
||||
assign stall_gpr_csr = no_slot_csr && VX_csr_req.is_csr && |(VX_csr_req.valid);
|
||||
|
||||
assign csr_read_data = (csr_address_s2 == VX_csr_req.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
|
||||
|
||||
wire writeback = |VX_writeback.wb_valid;
|
||||
VX_csr_data VX_csr_data(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.in_read_csr_address (VX_csr_req.csr_address),
|
||||
|
||||
.in_write_valid (is_csr_s2),
|
||||
.in_write_csr_data (csr_updated_data_s2),
|
||||
.in_write_csr_address(csr_address_s2),
|
||||
|
||||
.out_read_csr_data (csr_read_data_unqual),
|
||||
|
||||
.in_writeback_valid (writeback)
|
||||
);
|
||||
|
||||
|
||||
|
||||
reg[31:0] csr_updated_data;
|
||||
always @(*) begin
|
||||
case(VX_csr_req.alu_op)
|
||||
`CSR_ALU_RW: csr_updated_data = VX_csr_req.csr_mask;
|
||||
`CSR_ALU_RS: csr_updated_data = csr_read_data | VX_csr_req.csr_mask;
|
||||
`CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - VX_csr_req.csr_mask);
|
||||
default: csr_updated_data = 32'hdeadbeef;
|
||||
endcase
|
||||
end
|
||||
|
||||
wire zero = 0;
|
||||
|
||||
VX_generic_register #(.N(`NT + `NW_M1 + 1 + 5 + 2 + 5 + 12 + 64)) csr_reg_s2 (
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(no_slot_csr),
|
||||
.flush(zero),
|
||||
.in ({VX_csr_req.valid, VX_csr_req.warp_num, VX_csr_req.rd, VX_csr_req.wb, VX_csr_req.is_csr, VX_csr_req.csr_address, csr_read_data , csr_updated_data }),
|
||||
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2})
|
||||
);
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] final_csr_data;
|
||||
|
||||
wire[`NT_M1:0][31:0] thread_ids;
|
||||
wire[`NT_M1:0][31:0] warp_ids;
|
||||
wire[`NT_M1:0][31:0] csr_vec_read_data_s2;
|
||||
|
||||
genvar cur_t;
|
||||
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
|
||||
assign thread_ids[cur_t] = cur_t;
|
||||
end
|
||||
|
||||
genvar cur_tw;
|
||||
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
|
||||
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, warp_num_s2};
|
||||
end
|
||||
|
||||
genvar cur_v;
|
||||
for (cur_v = 0; cur_v < `NT; cur_v = cur_v + 1) begin
|
||||
assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2;
|
||||
end
|
||||
|
||||
wire thread_select = csr_address_s2 == 12'h20;
|
||||
wire warp_select = csr_address_s2 == 12'h21;
|
||||
|
||||
assign final_csr_data = thread_select ? thread_ids :
|
||||
warp_select ? warp_ids :
|
||||
csr_vec_read_data_s2;
|
||||
|
||||
|
||||
|
||||
assign VX_csr_wb.valid = valid_s2;
|
||||
assign VX_csr_wb.warp_num = warp_num_s2;
|
||||
assign VX_csr_wb.rd = rd_s2;
|
||||
assign VX_csr_wb.wb = wb_s2;
|
||||
assign VX_csr_wb.csr_result = final_csr_data;
|
||||
|
||||
endmodule
|
||||
@@ -119,7 +119,8 @@ module VX_decode(
|
||||
assign is_auipc = (curr_opcode == `AUIPC_INST);
|
||||
assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0);
|
||||
assign is_csr_immed = (is_csr) && (func3[2] == 1);
|
||||
assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
|
||||
// assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
|
||||
assign is_e_inst = in_instruction == 32'h00000073;
|
||||
|
||||
assign is_gpgpu = (curr_opcode == `GPGPU_INST);
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@
|
||||
|
||||
// `define SYN 1
|
||||
// `define ASIC 1
|
||||
`define SYN_FUNC 1
|
||||
// `define SYN_FUNC 1
|
||||
|
||||
`define NUM_BARRIERS 4
|
||||
|
||||
@@ -128,14 +128,16 @@
|
||||
|
||||
// `define PARAM
|
||||
|
||||
// oooooo
|
||||
|
||||
//Cache configurations
|
||||
//Cache configurations
|
||||
//Bytes
|
||||
`define ICACHE_SIZE 1024
|
||||
`define ICACHE_SIZE 4096
|
||||
`define ICACHE_WAYS 2
|
||||
//Bytes
|
||||
`define ICACHE_BLOCK 16
|
||||
`define ICACHE_BANKS 1
|
||||
`define ICACHE_BLOCK 64
|
||||
`define ICACHE_BANKS 4
|
||||
`define ICACHE_LOG_NUM_BANKS `CLOG2(`ICACHE_BANKS)
|
||||
|
||||
`define ICACHE_NUM_WORDS_PER_BLOCK (`ICACHE_BLOCK / (`ICACHE_BANKS * 4))
|
||||
|
||||
@@ -12,7 +12,10 @@ module VX_execute_unit (
|
||||
// JAL Response
|
||||
VX_jal_response_inter VX_jal_rsp,
|
||||
// Branch Response
|
||||
VX_branch_response_inter VX_branch_rsp
|
||||
VX_branch_response_inter VX_branch_rsp,
|
||||
|
||||
input wire no_slot_exec,
|
||||
output wire out_delay
|
||||
);
|
||||
|
||||
|
||||
@@ -41,10 +44,13 @@ module VX_execute_unit (
|
||||
|
||||
|
||||
wire[`NT_M1:0][31:0] alu_result;
|
||||
wire[`NT_M1:0] alu_stall;
|
||||
genvar index_out_reg;
|
||||
generate
|
||||
for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1) begin : alu_defs
|
||||
VX_alu vx_alu(
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
// .in_reg_data (in_reg_data[1:0]),
|
||||
.in_1 (in_a_reg_data[index_out_reg]),
|
||||
.in_2 (in_b_reg_data[index_out_reg]),
|
||||
@@ -53,11 +59,17 @@ module VX_execute_unit (
|
||||
.in_upper_immed(in_upper_immed),
|
||||
.in_alu_op (in_alu_op),
|
||||
.in_curr_PC (in_curr_PC),
|
||||
.out_alu_result(alu_result[index_out_reg])
|
||||
.out_alu_result(alu_result[index_out_reg]),
|
||||
.out_alu_stall(alu_stall[index_out_reg])
|
||||
);
|
||||
end
|
||||
endgenerate
|
||||
|
||||
wire internal_stall;
|
||||
assign internal_stall = |alu_stall;
|
||||
|
||||
assign out_delay = no_slot_exec || internal_stall;
|
||||
|
||||
|
||||
wire [$clog2(`NT)-1:0] jal_branch_use_index;
|
||||
wire jal_branch_found_valid;
|
||||
@@ -103,7 +115,7 @@ module VX_execute_unit (
|
||||
// Actual Writeback
|
||||
assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd;
|
||||
assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb;
|
||||
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid;
|
||||
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid && !internal_stall;
|
||||
assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num;
|
||||
assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result;
|
||||
|
||||
@@ -163,4 +175,4 @@ module VX_execute_unit (
|
||||
// assign out_is_csr = VX_exec_unit_req.is_csr;
|
||||
// assign out_csr_address = VX_exec_unit_req.csr_address;
|
||||
|
||||
endmodule
|
||||
endmodule : VX_execute_unit
|
||||
@@ -7,6 +7,8 @@ module VX_gpr_stage (
|
||||
input wire schedule_delay,
|
||||
|
||||
input wire memory_delay,
|
||||
input wire exec_delay,
|
||||
input wire stall_gpr_csr,
|
||||
output wire gpr_stage_delay,
|
||||
|
||||
// inputs
|
||||
@@ -93,7 +95,12 @@ module VX_gpr_stage (
|
||||
wire stall_lsu = memory_delay;
|
||||
wire flush_lsu = schedule_delay && !stall_lsu;
|
||||
|
||||
assign gpr_stage_delay = stall_lsu;
|
||||
wire stall_exec = exec_delay;
|
||||
wire flush_exec = schedule_delay && !stall_exec;
|
||||
|
||||
wire stall_csr = stall_gpr_csr && VX_bckE_req.is_csr && (|VX_bckE_req.valid);
|
||||
|
||||
assign gpr_stage_delay = stall_lsu || stall_exec || stall_csr;
|
||||
|
||||
`ifdef ASIC
|
||||
wire delayed_lsu_last_cycle;
|
||||
@@ -145,8 +152,8 @@ module VX_gpr_stage (
|
||||
VX_generic_register #(.N(224 + `NW_M1 + 1 + (`NT))) exec_unit_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.flush(flush_rest),
|
||||
.stall(stall_exec),
|
||||
.flush(flush_exec),
|
||||
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
|
||||
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
|
||||
);
|
||||
@@ -166,13 +173,13 @@ module VX_gpr_stage (
|
||||
assign VX_gpu_inst_req.a_reg_data = real_base_address;
|
||||
assign VX_gpu_inst_req.rd2 = real_store_data;
|
||||
|
||||
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 53)) csr_reg(
|
||||
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.stall(stall_gpr_csr),
|
||||
.flush(flush_rest),
|
||||
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
|
||||
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
|
||||
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
|
||||
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
|
||||
);
|
||||
|
||||
|
||||
@@ -193,8 +200,8 @@ module VX_gpr_stage (
|
||||
VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.flush(flush_rest),
|
||||
.stall(stall_exec),
|
||||
.flush(flush_exec),
|
||||
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.a_reg_data, VX_exec_unit_req_temp.b_reg_data, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
|
||||
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
|
||||
);
|
||||
@@ -208,15 +215,15 @@ module VX_gpr_stage (
|
||||
.out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 })
|
||||
);
|
||||
|
||||
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 53)) csr_reg(
|
||||
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
.stall(stall_rest),
|
||||
.stall(stall_gpr_csr),
|
||||
.flush(flush_rest),
|
||||
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
|
||||
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
|
||||
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
|
||||
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
|
||||
);
|
||||
|
||||
`endif
|
||||
|
||||
endmodule
|
||||
endmodule : VX_gpr_stage
|
||||
@@ -84,6 +84,7 @@ module VX_inst_multiplex (
|
||||
assign VX_csr_req.warp_num = VX_bckE_req.warp_num;
|
||||
assign VX_csr_req.rd = VX_bckE_req.rd;
|
||||
assign VX_csr_req.wb = VX_bckE_req.wb;
|
||||
assign VX_csr_req.alu_op = VX_bckE_req.alu_op;
|
||||
assign VX_csr_req.is_csr = VX_bckE_req.is_csr;
|
||||
assign VX_csr_req.csr_address = VX_bckE_req.csr_address;
|
||||
assign VX_csr_req.csr_immed = VX_bckE_req.csr_immed;
|
||||
|
||||
@@ -6,6 +6,7 @@ module VX_scheduler (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
input wire memory_delay,
|
||||
input wire exec_delay,
|
||||
input wire gpr_stage_delay,
|
||||
VX_frE_to_bckE_req_inter VX_bckE_req,
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
@@ -27,7 +28,11 @@ module VX_scheduler (
|
||||
wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE);
|
||||
wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ);
|
||||
|
||||
// classify our next instruction.
|
||||
wire is_mem = is_store || is_load;
|
||||
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
|
||||
wire is_csr = VX_bckE_req.is_csr;
|
||||
wire is_exec = !is_mem && !is_gpu && !is_csr;
|
||||
|
||||
|
||||
wire rs1_pass = ((valid_wb && (VX_writeback_inter.rd == VX_bckE_req.rs1)));
|
||||
@@ -44,8 +49,10 @@ module VX_scheduler (
|
||||
|
||||
wire rename_valid = rs1_rename_qual || rs2_rename_qual ;
|
||||
|
||||
|
||||
assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid)) || (memory_delay && (is_mem)) || (gpr_stage_delay && is_mem);
|
||||
assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid))
|
||||
|| (memory_delay && is_mem)
|
||||
|| (gpr_stage_delay && (is_mem || is_exec))
|
||||
|| (exec_delay && is_exec);
|
||||
|
||||
integer i;
|
||||
integer w;
|
||||
|
||||
@@ -14,10 +14,11 @@ module VX_writeback (
|
||||
|
||||
// Actual WB to GPR
|
||||
VX_wb_inter VX_writeback_inter,
|
||||
output wire no_slot_mem
|
||||
output wire no_slot_mem,
|
||||
output wire no_slot_exec,
|
||||
output wire no_slot_csr
|
||||
);
|
||||
|
||||
|
||||
VX_wb_inter VX_writeback_tempp();
|
||||
|
||||
wire exec_wb = (VX_inst_exec_wb.wb != 0) && (|VX_inst_exec_wb.wb_valid);
|
||||
@@ -26,6 +27,8 @@ module VX_writeback (
|
||||
|
||||
|
||||
assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
|
||||
assign no_slot_csr = csr_wb && (exec_wb);
|
||||
assign no_slot_exec = 0;
|
||||
|
||||
assign VX_writeback_tempp.write_data = exec_wb ? VX_inst_exec_wb.alu_result :
|
||||
csr_wb ? VX_csr_wb.csr_result :
|
||||
@@ -65,17 +68,6 @@ module VX_writeback (
|
||||
|
||||
wire[`NT-1:0][31:0] use_wb_data;
|
||||
|
||||
reg prev_is_mem;
|
||||
|
||||
always @(posedge clk, posedge reset) begin
|
||||
if (reset)
|
||||
begin
|
||||
prev_is_mem = 0;
|
||||
end begin
|
||||
prev_is_mem = mem_wb && !no_slot_mem;
|
||||
end
|
||||
end
|
||||
|
||||
VX_generic_register #(.N(39 + `NW_M1 + 1 + `NT*33)) wb_register(
|
||||
.clk (clk),
|
||||
.reset(reset),
|
||||
@@ -85,14 +77,16 @@ module VX_writeback (
|
||||
.out ({use_wb_data , VX_writeback_inter.wb_valid, VX_writeback_inter.rd, VX_writeback_inter.wb, VX_writeback_inter.wb_warp_num, VX_writeback_inter.wb_pc})
|
||||
);
|
||||
|
||||
`ifdef SYN
|
||||
assign VX_writeback_inter.write_data = prev_is_mem ? VX_writeback_tempp.write_data : use_wb_data;
|
||||
`else
|
||||
assign VX_writeback_inter.write_data = use_wb_data;
|
||||
`endif
|
||||
reg[31:0] last_data_wb;
|
||||
always @(posedge clk) begin
|
||||
if ((|VX_writeback_inter.wb_valid) && (VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd == 28)) begin
|
||||
last_data_wb <= use_wb_data[0];
|
||||
end
|
||||
end
|
||||
|
||||
assign VX_writeback_inter.write_data = use_wb_data;
|
||||
|
||||
endmodule // VX_writeback
|
||||
endmodule : VX_writeback
|
||||
|
||||
|
||||
|
||||
|
||||
23
rtl/Vortex.v
23
rtl/Vortex.v
@@ -44,8 +44,29 @@ module Vortex
|
||||
);
|
||||
|
||||
|
||||
reg[31:0] icache_banks = `ICACHE_BANKS;
|
||||
reg[31:0] icache_num_words_per_block = `ICACHE_NUM_WORDS_PER_BLOCK;
|
||||
|
||||
|
||||
reg[31:0] dcache_banks = `DCACHE_BANKS;
|
||||
reg[31:0] dcache_num_words_per_block = `DCACHE_NUM_WORDS_PER_BLOCK;
|
||||
|
||||
reg[31:0] number_threads = `NT;
|
||||
reg[31:0] number_warps = `NW;
|
||||
|
||||
always @(posedge clk) begin
|
||||
icache_banks <= icache_banks;
|
||||
icache_num_words_per_block <= icache_num_words_per_block;
|
||||
|
||||
dcache_banks <= dcache_banks;
|
||||
dcache_num_words_per_block <= dcache_num_words_per_block;
|
||||
|
||||
number_threads <= number_threads;
|
||||
number_warps <= number_warps;
|
||||
end
|
||||
|
||||
wire memory_delay;
|
||||
wire exec_delay;
|
||||
wire gpr_stage_delay;
|
||||
wire schedule_delay;
|
||||
|
||||
@@ -179,6 +200,7 @@ VX_scheduler schedule(
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.memory_delay (memory_delay),
|
||||
.exec_delay (exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay),
|
||||
.VX_bckE_req (VX_bckE_req),
|
||||
.VX_writeback_inter(VX_writeback_inter),
|
||||
@@ -197,6 +219,7 @@ VX_back_end vx_back_end(
|
||||
.VX_dcache_req (VX_dcache_req),
|
||||
.VX_writeback_inter (VX_writeback_inter),
|
||||
.out_mem_delay (memory_delay),
|
||||
.out_exec_delay (exec_delay),
|
||||
.gpr_stage_delay (gpr_stage_delay)
|
||||
);
|
||||
|
||||
|
||||
@@ -13,8 +13,8 @@ module VX_divide
|
||||
input [WIDTHN-1:0] numer,
|
||||
input [WIDTHD-1:0] denom,
|
||||
|
||||
output [WIDTHN-1:0] quotient,
|
||||
output [WIDTHD-1:0] remainder
|
||||
output reg [WIDTHN-1:0] quotient,
|
||||
output reg [WIDTHD-1:0] remainder
|
||||
);
|
||||
|
||||
// synthesis read_comments_as_HDL on
|
||||
@@ -49,65 +49,90 @@ module VX_divide
|
||||
.numer(numer),
|
||||
.denom(denom),
|
||||
.quotient(quotient),
|
||||
.remainder(remainder)
|
||||
.remain(remainder)
|
||||
);
|
||||
|
||||
end
|
||||
else if (PIPELINE == 0) begin
|
||||
if (NREP == "SIGNED") begin
|
||||
assign quotient = $signed($signed(numer)/$signed(denom));
|
||||
assign remainder = $signed($signed(numer)%$signed(denom));
|
||||
end
|
||||
else begin
|
||||
assign quotient = numer/denom;
|
||||
assign remainder = numer%denom;
|
||||
end
|
||||
end
|
||||
else begin
|
||||
|
||||
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
|
||||
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
|
||||
wire [WIDTHN-1:0] numer_pipe_end;
|
||||
wire [WIDTHD-1:0] denom_pipe_end;
|
||||
if (PIPELINE == 0) begin
|
||||
assign numer_pipe_end = numer;
|
||||
assign denom_pipe_end = denom;
|
||||
end else begin
|
||||
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
|
||||
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
|
||||
|
||||
genvar pipe_stage;
|
||||
for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
numer_pipe[pipe_stage+1] <= 0;
|
||||
denom_pipe[pipe_stage+1] <= 0;
|
||||
end
|
||||
else if (clken) begin
|
||||
numer_pipe[pipe_stage+1] <= numer_pipe[pipe_stage];
|
||||
denom_pipe[pipe_stage+1] <= denom_pipe[pipe_stage];
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
genvar pipe_stage;
|
||||
for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
numer_pipe[pipe_stage+1] <= 0;
|
||||
denom_pipe[pipe_stage+1] <= 0;
|
||||
numer_pipe[0] <= 0;
|
||||
denom_pipe[0] <= 0;
|
||||
end
|
||||
else if (clken) begin
|
||||
numer_pipe[pipe_stage+1] <= numer_pipe[pipe_stage];
|
||||
denom_pipe[pipe_stage+1] <= denom_pipe[pipe_stage];
|
||||
numer_pipe[0] <= numer;
|
||||
denom_pipe[0] <= denom;
|
||||
end
|
||||
end
|
||||
|
||||
assign numer_pipe_end = numer_pipe[PIPELINE-1];
|
||||
assign denom_pipe_end = denom_pipe[PIPELINE-1];
|
||||
end
|
||||
|
||||
always @(posedge clock or posedge aclr) begin
|
||||
if (aclr) begin
|
||||
numer_pipe[0] <= 0;
|
||||
denom_pipe[0] <= 0;
|
||||
end
|
||||
else if (clken) begin
|
||||
numer_pipe[0] <= numer;
|
||||
denom_pipe[0] <= denom;
|
||||
end
|
||||
end
|
||||
|
||||
wire [WIDTHN-1:0] numer_pipe_end;
|
||||
assign numer_pipe_end = numer_pipe[PIPELINE-1];
|
||||
wire [WIDTHD-1:0] denom_pipe_end;
|
||||
assign denom_pipe_end = denom_pipe[PIPELINE-1];
|
||||
/* * * * * * * * * * * * * * * * * * * * * * */
|
||||
/* Do the actual fallback computation here */
|
||||
/* * * * * * * * * * * * * * * * * * * * * * */
|
||||
|
||||
if (NREP == "SIGNED") begin
|
||||
assign quotient = $signed($signed(numer_pipe_end)/$signed(denom_pipe_end));
|
||||
assign remainder = $signed($signed(numer_pipe_end)%$signed(denom_pipe_end));
|
||||
|
||||
/*VX_divide_internal_signed #(
|
||||
.WIDTHN,
|
||||
.WIDTHD
|
||||
)div(
|
||||
.numer(numer_pipe_end),
|
||||
.denom(denom_pipe_end),
|
||||
.quotient,
|
||||
.remainder
|
||||
);*/
|
||||
|
||||
always @(*) begin
|
||||
if (denom_pipe_end == 0) begin
|
||||
quotient = 32'hffffffff;
|
||||
remainder = numer_pipe_end;
|
||||
end
|
||||
else if (denom_pipe_end == 32'hffffffff && numer_pipe_end == 32'h80000000) begin
|
||||
// this edge case kills verilator in some cases by causing a division
|
||||
// overflow exception. INT_MIN / -1 (on x86)
|
||||
quotient = 0;
|
||||
remainder = 0;
|
||||
end
|
||||
else begin
|
||||
quotient = $signed($signed(numer_pipe_end)/$signed(denom_pipe_end));
|
||||
remainder = $signed($signed(numer_pipe_end)%$signed(denom_pipe_end));
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
else begin
|
||||
assign quotient = numer_pipe_end/denom_pipe_end;
|
||||
assign remainder = numer_pipe_end%denom_pipe_end;
|
||||
assign quotient = (denom_pipe_end == 0) ? 32'hffffffff : numer_pipe_end/denom_pipe_end;
|
||||
assign remainder = (denom_pipe_end == 0) ? numer_pipe_end : numer_pipe_end%denom_pipe_end;
|
||||
end
|
||||
|
||||
end
|
||||
endgenerate
|
||||
|
||||
endmodule: VX_divide
|
||||
endmodule : VX_divide
|
||||
|
||||
@@ -11,7 +11,7 @@ interface VX_csr_req_inter ();
|
||||
wire[`NW_M1:0] warp_num;
|
||||
wire[4:0] rd;
|
||||
wire[1:0] wb;
|
||||
|
||||
wire[4:0] alu_op;
|
||||
wire is_csr;
|
||||
wire[11:0] csr_address;
|
||||
wire csr_immed;
|
||||
|
||||
@@ -33,6 +33,7 @@ STAMP = echo done >
|
||||
|
||||
$(PROJECT).syn.rpt: syn.chg $(SOURCE_FILES)
|
||||
$(QUARTUS_ROOT)/quartus/bin/quartus_syn $(PROJECT) $(SYN_ARGS)
|
||||
$(QUARTUS_ROOT)/quartus/bin/quartus_sh -t make_pins_virtual.tcl
|
||||
$(STAMP) fit.chg
|
||||
|
||||
$(PROJECT).fit.rpt: fit.chg $(PROJECT).syn.rpt
|
||||
@@ -45,6 +46,7 @@ $(PROJECT).asm.rpt: asm.chg $(PROJECT).fit.rpt
|
||||
|
||||
$(PROJECT).sta.rpt: sta.chg $(PROJECT).fit.rpt
|
||||
$(QUARTUS_ROOT)/quartus/bin/quartus_sta $(PROJECT) $(STA_ARGS)
|
||||
$(QUARTUS_ROOT)/quartus/bin/quartus_sta -t VX_timing.tcl
|
||||
|
||||
smart.log: $(PROJECT_FILES)
|
||||
$(QUARTUS_ROOT)/quartus/bin/quartus_sh --determine_smart_action $(PROJECT) > smart.log
|
||||
@@ -69,4 +71,4 @@ program: $(PROJECT).sof
|
||||
quartus_pgm --no_banner --mode=jtag -o "P;$(PROJECT).sof"
|
||||
|
||||
clean:
|
||||
rm -rf *.rpt *.chg *.qsf *.qpf smart.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db output_files tmp-clearbox
|
||||
rm -rf *.rpt *.chg *.qsf *.qpf smart.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db output_files tmp-clearbox bin/
|
||||
|
||||
@@ -2,10 +2,12 @@ load_package flow
|
||||
|
||||
package require cmdline
|
||||
|
||||
project_open Vortex
|
||||
|
||||
proc make_all_pins_virtual { args } {
|
||||
|
||||
remove_all_instance_assignments -name VIRTUAL_PIN
|
||||
execute_module -tool map
|
||||
# execute_module -tool map
|
||||
set name_ids [get_names -filter * -node_type pin]
|
||||
|
||||
foreach_in_collection name_id $name_ids {
|
||||
|
||||
@@ -70,6 +70,7 @@ set_global_assignment -name VERILOG_FILE ../shared_memory/VX_shared_memory_block
|
||||
set_global_assignment -name VERILOG_FILE ../shared_memory/VX_shared_memory.v
|
||||
set_global_assignment -name VERILOG_FILE ../shared_memory/VX_priority_encoder_sm.v
|
||||
set_global_assignment -name VERILOG_FILE ../shared_memory/VX_bank_valids.v
|
||||
set_global_assignment -name VERILOG_FILE ../compat/VX_divide.v
|
||||
set_global_assignment -name VERILOG_FILE ../VX_alu.v
|
||||
set_global_assignment -name VERILOG_FILE ../VX_back_end.v
|
||||
set_global_assignment -name VERILOG_FILE ../VX_context.v
|
||||
@@ -106,6 +107,10 @@ set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
|
||||
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
|
||||
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
|
||||
|
||||
|
||||
# set where [file dirname [info script]]
|
||||
# source [file join $where make_pins_virtual.tcl]
|
||||
|
||||
project_close
|
||||
|
||||
# set_global_assignment -name VERILOG_FILE $opts(src)
|
||||
|
||||
@@ -4,3 +4,6 @@ create_clock -name {clk} -period "400 MHz" -waveform { 0.0 1.0 } [get_ports {clk
|
||||
|
||||
derive_pll_clocks -create_base_clocks
|
||||
derive_clock_uncertainty
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -3,8 +3,8 @@
|
||||
|
||||
#define NW 8
|
||||
|
||||
// #define CACHE_NUM_BANKS 8
|
||||
// #define CACHE_WORDS_PER_BLOCK 4
|
||||
#define CACHE_NUM_BANKS 8
|
||||
#define CACHE_WORDS_PER_BLOCK 4
|
||||
|
||||
#define R_INST 51
|
||||
#define L_INST 3
|
||||
|
||||
@@ -167,11 +167,12 @@ uint32_t hToI(char *c, uint32_t size) {
|
||||
|
||||
|
||||
|
||||
void loadHexImpl(char * path,RAM* mem) {
|
||||
void loadHexImpl(const char *path, RAM* mem) {
|
||||
mem->clear();
|
||||
FILE *fp = fopen(&path[0], "r");
|
||||
FILE *fp = fopen(path, "r");
|
||||
if(fp == 0){
|
||||
printf("Path not found %s\n", path);
|
||||
return;
|
||||
// std::cout << path << " not found" << std::endl;
|
||||
}
|
||||
//Preload 0x0 <-> 0x80000000 jumps
|
||||
|
||||
@@ -12,83 +12,94 @@ int main(int argc, char **argv)
|
||||
Verilated::traceEverOn(true);
|
||||
|
||||
|
||||
#define ALL_TESTS
|
||||
#ifdef ALL_TESTS
|
||||
bool passed = true;
|
||||
std::string tests[NUM_TESTS] = {
|
||||
"../../emulator/riscv_tests/rv32ui-p-add.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-addi.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-and.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-andi.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-auipc.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-beq.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-bge.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-bgeu.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-blt.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-bltu.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-bne.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-jal.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-jalr.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-lb.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-lbu.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-lh.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-lhu.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-lui.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-lw.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-or.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-ori.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-sb.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-sh.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-simple.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-sll.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-slli.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-slt.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-slti.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-sltiu.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-sltu.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-sra.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-srai.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-srl.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-srli.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-sub.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-sw.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-xor.hex",
|
||||
"../../emulator/riscv_tests/rv32ui-p-xori.hex",
|
||||
"../../emulator/riscv_tests/rv32um-p-div.hex",
|
||||
"../../emulator/riscv_tests/rv32um-p-divu.hex",
|
||||
"../../emulator/riscv_tests/rv32um-p-mul.hex",
|
||||
"../../emulator/riscv_tests/rv32um-p-mulh.hex",
|
||||
"../../emulator/riscv_tests/rv32um-p-mulhsu.hex",
|
||||
"../../emulator/riscv_tests/rv32um-p-mulhu.hex",
|
||||
"../../emulator/riscv_tests/rv32um-p-rem.hex",
|
||||
"../../emulator/riscv_tests/rv32um-p-remu.hex"
|
||||
};
|
||||
|
||||
// bool passed = true;
|
||||
// std::string tests[NUM_TESTS] = {
|
||||
// "../../emulator/riscv_tests/rv32ui-p-add.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-addi.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-and.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-andi.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-auipc.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-beq.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-bge.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-bgeu.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-blt.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-bltu.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-bne.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-jal.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-jalr.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-lb.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-lbu.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-lh.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-lhu.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-lui.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-lw.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-or.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-ori.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-sb.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-sh.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-simple.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-sll.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-slli.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-slt.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-slti.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-sltiu.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-sltu.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-sra.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-srai.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-srl.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-srli.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-sub.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-sw.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-xor.hex",
|
||||
// "../../emulator/riscv_tests/rv32ui-p-xori.hex",
|
||||
// "../../emulator/riscv_tests/rv32um-p-div.hex",
|
||||
// "../../emulator/riscv_tests/rv32um-p-divu.hex",
|
||||
// "../../emulator/riscv_tests/rv32um-p-mul.hex",
|
||||
// "../../emulator/riscv_tests/rv32um-p-mulh.hex",
|
||||
// "../../emulator/riscv_tests/rv32um-p-mulhsu.hex",
|
||||
// "../../emulator/riscv_tests/rv32um-p-mulhu.hex",
|
||||
// "../../emulator/riscv_tests/rv32um-p-rem.hex",
|
||||
// "../../emulator/riscv_tests/rv32um-p-remu.hex"
|
||||
// };
|
||||
for (std::string s : tests) {
|
||||
Vortex v;
|
||||
|
||||
// for (int ii = 0; ii < NUM_TESTS; ii++)
|
||||
// // for (int ii = 5; ii < 6; ii++)
|
||||
// {
|
||||
// std::cout << "TESTING: " << tests[ii] << '\n';
|
||||
// Vortex v;
|
||||
// bool curr = v.simulate(tests[ii]);
|
||||
std::cerr << s << std::endl;
|
||||
|
||||
// if ( curr) std::cerr << GREEN << "Test Passed: " << tests[ii] << std::endl;
|
||||
// if (!curr) std::cerr << RED << "Test Failed: " << tests[ii] << std::endl;
|
||||
// passed = passed && curr;
|
||||
bool curr = v.simulate(s);
|
||||
if ( curr) std::cerr << GREEN << "Test Passed: " << s << std::endl;
|
||||
if (!curr) std::cerr << RED << "Test Failed: " << s << std::endl;
|
||||
passed = passed && curr;
|
||||
}
|
||||
|
||||
// std::cerr << DEFAULT;
|
||||
// }
|
||||
if( passed) std::cerr << DEFAULT << "PASSED ALL TESTS\n";
|
||||
if(!passed) std::cerr << DEFAULT << "Failed one or more tests\n";
|
||||
|
||||
// if( passed) std::cerr << DEFAULT << "PASSED ALL TESTS\n";
|
||||
// if(!passed) std::cerr << DEFAULT << "Failed one or more tests\n";
|
||||
return !passed;
|
||||
|
||||
#else
|
||||
|
||||
// char testing[] = "../../emulator/riscv_tests/rv32ui-p-sw.hex";
|
||||
char testing[] = "../../emulator/riscv_tests/rv32ui-p-sw.hex";
|
||||
Vortex v;
|
||||
char testing[] = "../../kernel/vortex_test.hex";
|
||||
const char *testing;
|
||||
|
||||
if (argc >= 2) {
|
||||
testing = argv[1];
|
||||
} else {
|
||||
testing = "../../kernel/vortex_test.hex";
|
||||
}
|
||||
|
||||
std::cerr << testing << std::endl;
|
||||
|
||||
|
||||
bool curr = v.simulate(testing);
|
||||
if ( curr) std::cerr << GREEN << "Test Passed: " << testing << std::endl;
|
||||
if (!curr) std::cerr << RED << "Test Failed: " << testing << std::endl;
|
||||
|
||||
return 0;
|
||||
return !curr;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -46,8 +46,10 @@ class Vortex
|
||||
VVortex * vortex;
|
||||
|
||||
unsigned start_pc;
|
||||
bool refill;
|
||||
unsigned refill_addr;
|
||||
bool refill_d;
|
||||
unsigned refill_addr_d;
|
||||
bool refill_i;
|
||||
unsigned refill_addr_i;
|
||||
long int curr_cycle;
|
||||
bool stop;
|
||||
bool unit_test;
|
||||
@@ -100,7 +102,7 @@ Vortex::~Vortex()
|
||||
|
||||
void Vortex::ProcessFile(void)
|
||||
{
|
||||
loadHexImpl("../../kernel/vortex_test.hex", &this->ram);
|
||||
loadHexImpl(this->instruction_file_name.c_str(), &this->ram);
|
||||
}
|
||||
|
||||
void Vortex::print_stats(bool cycle_test)
|
||||
@@ -154,38 +156,66 @@ void Vortex::print_stats(bool cycle_test)
|
||||
bool Vortex::ibus_driver()
|
||||
{
|
||||
|
||||
////////////////////// IBUS //////////////////////
|
||||
unsigned new_PC;
|
||||
bool stop = false;
|
||||
uint32_t curr_inst = 0;
|
||||
vortex->i_m_ready_i = false;
|
||||
|
||||
curr_inst = 0xdeadbeef;
|
||||
|
||||
new_PC = vortex->icache_request_pc_address;
|
||||
ram.getWord(new_PC, &curr_inst);
|
||||
vortex->icache_response_instruction = curr_inst;
|
||||
|
||||
// std::cout << std::hex << "IReq: " << vortex->icache_request_pc_address << "\tResp: " << curr_inst << "\n";
|
||||
|
||||
// printf("\n\n---------------------------------------------\n(%x) Inst: %x\n", new_PC, curr_inst);
|
||||
// printf("\n");
|
||||
////////////////////// IBUS //////////////////////
|
||||
|
||||
|
||||
////////////////////// STATS //////////////////////
|
||||
|
||||
|
||||
if (((((unsigned int)curr_inst) != 0) && (((unsigned int)curr_inst) != 0xffffffff)))
|
||||
{
|
||||
++stats_dynamic_inst;
|
||||
stop = false;
|
||||
} else
|
||||
{
|
||||
// printf("Ibus requesting stop: %x\n", curr_inst);
|
||||
stop = true;
|
||||
|
||||
// int dcache_num_words_per_block
|
||||
|
||||
if (refill_i)
|
||||
{
|
||||
refill_i = false;
|
||||
vortex->i_m_ready_i = true;
|
||||
|
||||
for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__icache_banks; curr_bank++)
|
||||
{
|
||||
for (int curr_word = 0; curr_word < vortex->Vortex__DOT__icache_num_words_per_block; curr_word++)
|
||||
{
|
||||
unsigned curr_index = (curr_word * vortex->Vortex__DOT__icache_banks) + curr_bank;
|
||||
unsigned curr_addr = refill_addr_i + (4*curr_index);
|
||||
|
||||
unsigned curr_value;
|
||||
ram.getWord(curr_addr, &curr_value);
|
||||
|
||||
vortex->i_m_readdata_i[curr_bank][curr_word] = curr_value;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (vortex->o_m_valid_i)
|
||||
{
|
||||
|
||||
if (vortex->o_m_read_or_write_i)
|
||||
{
|
||||
// fprintf(stderr, "++++++++++++++++++++++++++++++++\n");
|
||||
unsigned base_addr = vortex->o_m_evict_addr_i;
|
||||
|
||||
for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__icache_banks; curr_bank++)
|
||||
{
|
||||
for (int curr_word = 0; curr_word < vortex->Vortex__DOT__icache_num_words_per_block; curr_word++)
|
||||
{
|
||||
unsigned curr_index = (curr_word * vortex->Vortex__DOT__icache_banks) + curr_bank;
|
||||
unsigned curr_addr = base_addr + (4*curr_index);
|
||||
|
||||
unsigned curr_value = vortex->o_m_writedata_i[curr_bank][curr_word];
|
||||
|
||||
ram.writeWord( curr_addr, &curr_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Respond next cycle
|
||||
refill_i = true;
|
||||
refill_addr_i = vortex->o_m_read_addr_i;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return stop;
|
||||
|
||||
return false;
|
||||
|
||||
}
|
||||
|
||||
@@ -197,6 +227,7 @@ void Vortex::io_handler()
|
||||
|
||||
char c = (char) data_write;
|
||||
std::cerr << c;
|
||||
// std::cout << c;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -204,75 +235,62 @@ void Vortex::io_handler()
|
||||
bool Vortex::dbus_driver()
|
||||
{
|
||||
|
||||
// printf("****************************\n");
|
||||
vortex->i_m_ready_d = false;
|
||||
|
||||
vortex->i_m_ready = 0;
|
||||
for (int i = 0; i < CACHE_NUM_BANKS; i++)
|
||||
{
|
||||
for (int j = 0; j < CACHE_WORDS_PER_BLOCK; j++)
|
||||
|
||||
// int dcache_num_words_per_block
|
||||
|
||||
if (refill_d)
|
||||
{
|
||||
vortex->i_m_readdata[i][j] = 0;
|
||||
}
|
||||
}
|
||||
refill_d = false;
|
||||
vortex->i_m_ready_d = true;
|
||||
|
||||
|
||||
if (this->refill)
|
||||
{
|
||||
this->refill = false;
|
||||
|
||||
vortex->i_m_ready = 1;
|
||||
for (int curr_e = 0; curr_e < (CACHE_NUM_BANKS*CACHE_WORDS_PER_BLOCK); curr_e++)
|
||||
{
|
||||
unsigned new_addr = this->refill_addr + (4*curr_e);
|
||||
|
||||
|
||||
unsigned addr_without_byte = new_addr >> 2;
|
||||
unsigned bank_num = addr_without_byte & 0x7;
|
||||
unsigned addr_wihtout_bank = addr_without_byte >> 3;
|
||||
unsigned offset_num = addr_wihtout_bank & 0x3;
|
||||
|
||||
unsigned value;
|
||||
ram.getWord(new_addr, &value);
|
||||
|
||||
// printf("-------- (%x) i_m_readdata[%d][%d] (%d) = %d\n", new_addr, bank_num, offset_num, curr_e, value);
|
||||
vortex->i_m_readdata[bank_num][offset_num] = value;
|
||||
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (vortex->o_m_valid)
|
||||
{
|
||||
// printf("Valid o_m_valid\n");
|
||||
if (vortex->o_m_read_or_write)
|
||||
for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__dcache_banks; curr_bank++)
|
||||
{
|
||||
// printf("Valid write\n");
|
||||
|
||||
for (int curr_e = 0; curr_e < (CACHE_NUM_BANKS*CACHE_WORDS_PER_BLOCK); curr_e++)
|
||||
for (int curr_word = 0; curr_word < vortex->Vortex__DOT__dcache_num_words_per_block; curr_word++)
|
||||
{
|
||||
unsigned new_addr = vortex->o_m_evict_addr + (4*curr_e);
|
||||
unsigned curr_index = (curr_word * vortex->Vortex__DOT__dcache_banks) + curr_bank;
|
||||
unsigned curr_addr = refill_addr_d + (4*curr_index);
|
||||
|
||||
unsigned curr_value;
|
||||
ram.getWord(curr_addr, &curr_value);
|
||||
|
||||
unsigned addr_without_byte = new_addr >> 2;
|
||||
unsigned bank_num = addr_without_byte & 0x7;
|
||||
unsigned addr_wihtout_bank = addr_without_byte >> 3;
|
||||
unsigned offset_num = addr_wihtout_bank & 0x3;
|
||||
vortex->i_m_readdata_d[curr_bank][curr_word] = curr_value;
|
||||
|
||||
|
||||
unsigned new_value = vortex->o_m_writedata[bank_num][offset_num];
|
||||
|
||||
ram.writeWord( new_addr, &new_value);
|
||||
|
||||
// printf("+++++++ (%x) writeback[%d][%d] (%d) = %d\n", new_addr, bank_num, offset_num, curr_e, new_value);
|
||||
// printf("+++++++ (%x) i_m_readdata[%d][%d] (%d) = %d\n", new_addr, bank_num, offset_num, curr_e, value);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// Respond next cycle
|
||||
this->refill = true;
|
||||
this->refill_addr = vortex->o_m_read_addr;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (vortex->o_m_valid_d)
|
||||
{
|
||||
|
||||
if (vortex->o_m_read_or_write_d)
|
||||
{
|
||||
// fprintf(stderr, "++++++++++++++++++++++++++++++++\n");
|
||||
unsigned base_addr = vortex->o_m_evict_addr_d;
|
||||
|
||||
for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__dcache_banks; curr_bank++)
|
||||
{
|
||||
for (int curr_word = 0; curr_word < vortex->Vortex__DOT__dcache_num_words_per_block; curr_word++)
|
||||
{
|
||||
unsigned curr_index = (curr_word * vortex->Vortex__DOT__dcache_banks) + curr_bank;
|
||||
unsigned curr_addr = base_addr + (4*curr_index);
|
||||
|
||||
unsigned curr_value = vortex->o_m_writedata_d[curr_bank][curr_word];
|
||||
|
||||
ram.writeWord( curr_addr, &curr_value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Respond next cycle
|
||||
refill_d = true;
|
||||
refill_addr_d = vortex->o_m_read_addr_d;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
@@ -397,7 +415,9 @@ bool Vortex::simulate(std::string file_to_simulate)
|
||||
|
||||
std::cerr << "New Total Cycles: " << (this->stats_total_cycles) << "\n";
|
||||
|
||||
// int status = (unsigned int) vortex->Vortex__DOT__vx_front_end__DOT__vx_decode__DOT__vx_grp_wrapper__DOT__genblk2__BRA__0__KET____DOT__vx_gpr__DOT__first_ram__DOT__GPR[28][0] & 0xf;
|
||||
int status = (unsigned int) vortex->Vortex__DOT__vx_back_end__DOT__VX_wb__DOT__last_data_wb & 0xf;
|
||||
|
||||
// std::cout << "Last wb: " << std::hex << ((unsigned int) vortex->Vortex__DOT__vx_back_end__DOT__VX_wb__DOT__last_data_wb) << "\n";
|
||||
|
||||
// std::cout << "Something: " << result << '\n';
|
||||
|
||||
@@ -408,6 +428,6 @@ bool Vortex::simulate(std::string file_to_simulate)
|
||||
|
||||
|
||||
|
||||
// return (status == 1);
|
||||
return (1 == 1);
|
||||
return (status == 1);
|
||||
// return (1 == 1);
|
||||
}
|
||||
Reference in New Issue
Block a user