Merge branch 'fpga_synthesis' into fix_cache_m10k

This commit is contained in:
Codetector
2020-02-19 16:03:23 -05:00
78 changed files with 93790 additions and 2224 deletions

View File

@@ -3,7 +3,7 @@ all: RUNFILE
# /rf2_256x128_wm1/
BaseMEM=../models/memory/cln28hpm
INCLUDE=-I. -Ishared_memory -Icache -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm0/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Isimulate
INCLUDE=-I. -Ishared_memory -Icache -I$(BaseMEM)/rf2_128x128_wm1/ -I$(BaseMEM)/rf2_256x128_wm1/ -I$(BaseMEM)/rf2_256x19_wm0/ -I$(BaseMEM)/rf2_32x128_wm1/ -Iinterfaces/ -Ipipe_regs/ -Icompat/ -Isimulate
FILE=Vortex.v
@@ -33,7 +33,7 @@ VERILATOR:
VERILATORnoWarnings:
echo "#define VCD_OFF" > simulate/tb_debug.h
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(WNO)
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(WNO) $(DEB)
compdebug:
echo "#define VCD_OUTPUT" > simulate/tb_debug.h
@@ -49,4 +49,4 @@ w: VERILATORnoWarnings
$(MAKECPP)
clean:
rm obj_dir/*
rm obj_dir/*

View File

@@ -1,6 +1,8 @@
`include "VX_define.v"
module VX_alu(
input wire clk,
input wire reset,
input wire[31:0] in_1,
input wire[31:0] in_2,
input wire in_rs2_src,
@@ -8,9 +10,85 @@ module VX_alu(
input wire[19:0] in_upper_immed,
input wire[4:0] in_alu_op,
input wire[31:0] in_curr_PC,
output reg[31:0] out_alu_result
output reg[31:0] out_alu_result,
output reg out_alu_stall
);
localparam div_pipeline_len = 10;
wire[31:0] unsigned_div_result;
wire[31:0] unsigned_rem_result;
wire[31:0] signed_div_result;
wire[31:0] signed_rem_result;
VX_divide #(
.WIDTHN(32),
.WIDTHD(32),
.SPEED("HIGHEST"),
.PIPELINE(div_pipeline_len)
) unsigned_div (
.clock(clk),
.aclr(1'b0),
.clken(1'b1), // TODO this could be disabled on inactive instructions
.numer(ALU_in1),
.denom(ALU_in2),
.quotient(unsigned_div_result),
.remainder(unsigned_rem_result)
);
VX_divide #(
.WIDTHN(32),
.WIDTHD(32),
.NREP("SIGNED"),
.DREP("SIGNED"),
.SPEED("HIGHEST"),
.PIPELINE(div_pipeline_len)
) signed_div (
.clock(clk),
.aclr(1'b0),
.clken(1'b1), // TODO this could be disabled on inactive instructions
.numer(ALU_in1),
.denom(ALU_in2),
.quotient(signed_div_result),
.remainder(signed_rem_result)
);
reg [15:0] curr_inst_delay;
reg [15:0] inst_delay;
reg inst_was_stalling;
wire inst_delay_stall = inst_was_stalling ? inst_delay != 0 : curr_inst_delay != 0;
assign out_alu_stall = inst_delay_stall;
always @(*) begin
case(in_alu_op)
`DIV,
`DIVU,
`REM,
`REMU: curr_inst_delay = div_pipeline_len;
default: curr_inst_delay = 0;
endcase // in_alu_op
end
always @(posedge clk or posedge reset) begin
if (reset) begin
inst_delay <= 0;
inst_was_stalling <= 0;
end
else if (inst_delay_stall) begin
if (inst_was_stalling) begin
if (inst_delay > 0)
inst_delay <= inst_delay - 1;
end
else begin
inst_was_stalling <= 1;
inst_delay <= curr_inst_delay - 1;
end
end
else begin
inst_was_stalling <= 0;
end
end
`ifdef SYN_FUNC
wire which_in2;
@@ -20,54 +98,14 @@ module VX_alu(
wire[63:0] ALU_in1_mult;
wire[63:0] ALU_in2_mult;
wire[31:0] upper_immed;
wire[31:0] unsigned_div_result;
wire[31:0] unsigned_rem_result;
wire[31:0] signed_div_result;
wire[31:0] signed_rem_result;
assign which_in2 = in_rs2_src == `RS2_IMMED;
assign ALU_in1 = in_1;
assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
assign upper_immed = {in_upper_immed, {12{1'b0}}};
VX_divide #(
.WIDTHN(32),
.WIDTHD(32),
.SPEED("HIGHEST"),
.PIPELINE(0)
) unsigned_div (
.clk(0),
.aclr(0),
.clken(1), // TODO this could be disabled on inactive instructions
.numer(ALU_in1),
.denom(ALU_in2),
.quotient(unsigned_div_result),
.remainder(unsigned_rem_result)
);
VX_divide #(
.WIDTHN(32),
.WIDTHD(32),
.NREP("SIGNED"),
.DREP("SIGNED"),
.SPEED("HIGHEST"),
.PIPELINE(0)
) signed_div (
.clk(0),
.aclr(0),
.clken(1), // TODO this could be disabled on inactive instructions
.numer(ALU_in1),
.denom(ALU_in2),
.quotient(signed_div_result),
.remainder(signed_rem_result)
);
//always @(posedge `MUL) begin
@@ -101,6 +139,7 @@ module VX_alu(
`MULH: out_alu_result = mult_result[63:32];
`MULHSU: out_alu_result = mult_result[63:32];
`MULHU: out_alu_result = mult_result[63:32];
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
@@ -160,13 +199,14 @@ module VX_alu(
`MULH: out_alu_result = mult_signed_result[63:32];
`MULHSU: out_alu_result = mult_signed_un_result[63:32];
`MULHU: out_alu_result = mult_unsigned_result[63:32];
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : $signed($signed(ALU_in1) / $signed(ALU_in2));
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : ALU_in1 / ALU_in2;
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : $signed($signed(ALU_in1) % $signed(ALU_in2));
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2;
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
default: out_alu_result = 32'h0;
endcase // in_alu_op
end
`endif
endmodule
endmodule : VX_alu

View File

@@ -6,6 +6,7 @@ module VX_back_end (
input wire schedule_delay,
output wire out_mem_delay,
output wire out_exec_delay,
output wire gpr_stage_delay,
VX_jal_response_inter VX_jal_rsp,
VX_branch_response_inter VX_branch_rsp,
@@ -32,7 +33,8 @@ assign VX_writeback_inter.wb_warp_num = VX_writeback_temp.wb_warp_num;
VX_mw_wb_inter VX_mw_wb();
wire no_slot_mem;
wire no_slot_mem;
wire no_slot_exec;
VX_mem_req_inter VX_exe_mem_req();
@@ -55,6 +57,8 @@ VX_gpu_inst_req_inter VX_gpu_inst_req();
// CSR unit inputs
VX_csr_req_inter VX_csr_req();
VX_csr_wb_inter VX_csr_wb();
wire no_slot_csr;
wire stall_gpr_csr;
VX_gpr_stage VX_gpr_stage(
.clk (clk),
@@ -67,8 +71,10 @@ VX_gpr_stage VX_gpr_stage(
.VX_lsu_req (VX_lsu_req),
.VX_gpu_inst_req (VX_gpu_inst_req),
.VX_csr_req (VX_csr_req),
.stall_gpr_csr (stall_gpr_csr),
// End new
.memory_delay (out_mem_delay),
.exec_delay (out_exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);
@@ -91,7 +97,9 @@ VX_execute_unit VX_execUnit(
.VX_exec_unit_req(VX_exec_unit_req),
.VX_inst_exec_wb (VX_inst_exec_wb),
.VX_jal_rsp (VX_jal_rsp),
.VX_branch_rsp (VX_branch_rsp)
.VX_branch_rsp (VX_branch_rsp),
.out_delay (out_exec_delay),
.no_slot_exec (no_slot_exec)
);
@@ -100,9 +108,19 @@ VX_gpgpu_inst VX_gpgpu_inst(
.VX_warp_ctl (VX_warp_ctl)
);
VX_csr_wrapper VX_csr_wrapper(
.VX_csr_req(VX_csr_req),
.VX_csr_wb (VX_csr_wb)
// VX_csr_wrapper VX_csr_wrapper(
// .VX_csr_req(VX_csr_req),
// .VX_csr_wb (VX_csr_wb)
// );
VX_csr_pipe VX_csr_pipe(
.clk (clk),
.reset (reset),
.no_slot_csr (no_slot_csr),
.VX_csr_req (VX_csr_req),
.VX_writeback(VX_writeback_temp),
.VX_csr_wb (VX_csr_wb),
.stall_gpr_csr(stall_gpr_csr)
);
VX_writeback VX_wb(
@@ -113,7 +131,9 @@ VX_writeback VX_wb(
.VX_csr_wb (VX_csr_wb),
.VX_writeback_inter(VX_writeback_temp),
.no_slot_mem (no_slot_mem)
.no_slot_mem (no_slot_mem),
.no_slot_exec (no_slot_exec),
.no_slot_csr (no_slot_csr)
);
endmodule

87
rtl/VX_csr_data.v Normal file
View File

@@ -0,0 +1,87 @@
`include "../VX_define.v"
module VX_csr_data (
input wire clk, // Clock
input wire reset,
input wire[11:0] in_read_csr_address,
input wire in_write_valid,
input wire[31:0] in_write_csr_data,
input wire[11:0] in_write_csr_address,
output wire[31:0] out_read_csr_data,
// For instruction retire counting
input wire in_writeback_valid
);
// wire[`NT_M1:0][31:0] thread_ids;
// wire[`NT_M1:0][31:0] warp_ids;
// genvar cur_t;
// for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
// assign thread_ids[cur_t] = cur_t;
// end
// genvar cur_tw;
// for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
// assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, in_read_warp_num};
// end
reg[11:0] csr[1023:0];
reg[63:0] cycle;
reg[63:0] instret;
wire read_cycle;
wire read_cycleh;
wire read_instret;
wire read_instreth;
assign read_cycle = in_read_csr_address == 12'hC00;
assign read_cycleh = in_read_csr_address == 12'hC80;
assign read_instret = in_read_csr_address == 12'hC02;
assign read_instreth = in_read_csr_address == 12'hC82;
// wire thread_select = in_read_csr_address == 12'h20;
// wire warp_select = in_read_csr_address == 12'h21;
// assign out_read_csr_data = thread_select ? thread_ids :
// warp_select ? warp_ids :
// 0;
integer curr_e;
always @(posedge clk or posedge reset) begin
if (reset) begin
for (curr_e = 0; curr_e < 1024; curr_e=curr_e+1) begin
`ifdef VERILATOR
// - Verilator does not support delayed assignment in loops.
csr[curr_e] = 0;
`else
csr[curr_e] <= 0;
`endif
end
cycle <= 0;
instret <= 0;
end else begin
cycle <= cycle + 1;
if (in_write_valid) begin
csr[in_write_csr_address] <= in_write_csr_data[11:0];
end
if (in_writeback_valid) begin
instret <= instret + 1;
end
end
end
assign out_read_csr_data = read_cycle ? cycle[31:0] :
read_cycleh ? cycle[63:32] :
read_instret ? instret[31:0] :
read_instreth ? instret[63:32] :
{{20{1'b0}}, csr[in_read_csr_address]};
endmodule : VX_csr_data

106
rtl/VX_csr_pipe.v Normal file
View File

@@ -0,0 +1,106 @@
`include "VX_define.v"
module VX_csr_pipe (
input wire clk, // Clock
input wire reset,
input wire no_slot_csr,
VX_csr_req_inter VX_csr_req,
VX_wb_inter VX_writeback,
VX_csr_wb_inter VX_csr_wb,
output wire stall_gpr_csr
);
wire[`NT_M1:0] valid_s2;
wire[`NW_M1:0] warp_num_s2;
wire[4:0] rd_s2;
wire[1:0] wb_s2;
wire[4:0] alu_op_s2;
wire is_csr_s2;
wire[11:0] csr_address_s2;
wire[31:0] csr_read_data_s2;
wire[31:0] csr_updated_data_s2;
wire[31:0] csr_read_data_unqual;
wire[31:0] csr_read_data;
assign stall_gpr_csr = no_slot_csr && VX_csr_req.is_csr && |(VX_csr_req.valid);
assign csr_read_data = (csr_address_s2 == VX_csr_req.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
wire writeback = |VX_writeback.wb_valid;
VX_csr_data VX_csr_data(
.clk (clk),
.reset (reset),
.in_read_csr_address (VX_csr_req.csr_address),
.in_write_valid (is_csr_s2),
.in_write_csr_data (csr_updated_data_s2),
.in_write_csr_address(csr_address_s2),
.out_read_csr_data (csr_read_data_unqual),
.in_writeback_valid (writeback)
);
reg[31:0] csr_updated_data;
always @(*) begin
case(VX_csr_req.alu_op)
`CSR_ALU_RW: csr_updated_data = VX_csr_req.csr_mask;
`CSR_ALU_RS: csr_updated_data = csr_read_data | VX_csr_req.csr_mask;
`CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - VX_csr_req.csr_mask);
default: csr_updated_data = 32'hdeadbeef;
endcase
end
wire zero = 0;
VX_generic_register #(.N(`NT + `NW_M1 + 1 + 5 + 2 + 5 + 12 + 64)) csr_reg_s2 (
.clk (clk),
.reset(reset),
.stall(no_slot_csr),
.flush(zero),
.in ({VX_csr_req.valid, VX_csr_req.warp_num, VX_csr_req.rd, VX_csr_req.wb, VX_csr_req.is_csr, VX_csr_req.csr_address, csr_read_data , csr_updated_data }),
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2})
);
wire[`NT_M1:0][31:0] final_csr_data;
wire[`NT_M1:0][31:0] thread_ids;
wire[`NT_M1:0][31:0] warp_ids;
wire[`NT_M1:0][31:0] csr_vec_read_data_s2;
genvar cur_t;
for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin
assign thread_ids[cur_t] = cur_t;
end
genvar cur_tw;
for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin
assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, warp_num_s2};
end
genvar cur_v;
for (cur_v = 0; cur_v < `NT; cur_v = cur_v + 1) begin
assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2;
end
wire thread_select = csr_address_s2 == 12'h20;
wire warp_select = csr_address_s2 == 12'h21;
assign final_csr_data = thread_select ? thread_ids :
warp_select ? warp_ids :
csr_vec_read_data_s2;
assign VX_csr_wb.valid = valid_s2;
assign VX_csr_wb.warp_num = warp_num_s2;
assign VX_csr_wb.rd = rd_s2;
assign VX_csr_wb.wb = wb_s2;
assign VX_csr_wb.csr_result = final_csr_data;
endmodule

View File

@@ -119,7 +119,8 @@ module VX_decode(
assign is_auipc = (curr_opcode == `AUIPC_INST);
assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0);
assign is_csr_immed = (is_csr) && (func3[2] == 1);
assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
// assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
assign is_e_inst = in_instruction == 32'h00000073;
assign is_gpgpu = (curr_opcode == `GPGPU_INST);

View File

@@ -12,7 +12,7 @@
// `define SYN 1
// `define ASIC 1
`define SYN_FUNC 1
// `define SYN_FUNC 1
`define NUM_BARRIERS 4
@@ -128,14 +128,16 @@
// `define PARAM
// oooooo
//Cache configurations
//Cache configurations
//Bytes
`define ICACHE_SIZE 1024
`define ICACHE_SIZE 4096
`define ICACHE_WAYS 2
//Bytes
`define ICACHE_BLOCK 16
`define ICACHE_BANKS 1
`define ICACHE_BLOCK 64
`define ICACHE_BANKS 4
`define ICACHE_LOG_NUM_BANKS `CLOG2(`ICACHE_BANKS)
`define ICACHE_NUM_WORDS_PER_BLOCK (`ICACHE_BLOCK / (`ICACHE_BANKS * 4))

View File

@@ -12,7 +12,10 @@ module VX_execute_unit (
// JAL Response
VX_jal_response_inter VX_jal_rsp,
// Branch Response
VX_branch_response_inter VX_branch_rsp
VX_branch_response_inter VX_branch_rsp,
input wire no_slot_exec,
output wire out_delay
);
@@ -41,10 +44,13 @@ module VX_execute_unit (
wire[`NT_M1:0][31:0] alu_result;
wire[`NT_M1:0] alu_stall;
genvar index_out_reg;
generate
for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1) begin : alu_defs
VX_alu vx_alu(
.clk(clk),
.reset(reset),
// .in_reg_data (in_reg_data[1:0]),
.in_1 (in_a_reg_data[index_out_reg]),
.in_2 (in_b_reg_data[index_out_reg]),
@@ -53,11 +59,17 @@ module VX_execute_unit (
.in_upper_immed(in_upper_immed),
.in_alu_op (in_alu_op),
.in_curr_PC (in_curr_PC),
.out_alu_result(alu_result[index_out_reg])
.out_alu_result(alu_result[index_out_reg]),
.out_alu_stall(alu_stall[index_out_reg])
);
end
endgenerate
wire internal_stall;
assign internal_stall = |alu_stall;
assign out_delay = no_slot_exec || internal_stall;
wire [$clog2(`NT)-1:0] jal_branch_use_index;
wire jal_branch_found_valid;
@@ -103,7 +115,7 @@ module VX_execute_unit (
// Actual Writeback
assign VX_inst_exec_wb.rd = VX_exec_unit_req.rd;
assign VX_inst_exec_wb.wb = VX_exec_unit_req.wb;
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid;
assign VX_inst_exec_wb.wb_valid = VX_exec_unit_req.valid && !internal_stall;
assign VX_inst_exec_wb.wb_warp_num = VX_exec_unit_req.warp_num;
assign VX_inst_exec_wb.alu_result = VX_exec_unit_req.jal ? duplicate_PC_data : alu_result;
@@ -163,4 +175,4 @@ module VX_execute_unit (
// assign out_is_csr = VX_exec_unit_req.is_csr;
// assign out_csr_address = VX_exec_unit_req.csr_address;
endmodule
endmodule : VX_execute_unit

View File

@@ -7,6 +7,8 @@ module VX_gpr_stage (
input wire schedule_delay,
input wire memory_delay,
input wire exec_delay,
input wire stall_gpr_csr,
output wire gpr_stage_delay,
// inputs
@@ -93,7 +95,12 @@ module VX_gpr_stage (
wire stall_lsu = memory_delay;
wire flush_lsu = schedule_delay && !stall_lsu;
assign gpr_stage_delay = stall_lsu;
wire stall_exec = exec_delay;
wire flush_exec = schedule_delay && !stall_exec;
wire stall_csr = stall_gpr_csr && VX_bckE_req.is_csr && (|VX_bckE_req.valid);
assign gpr_stage_delay = stall_lsu || stall_exec || stall_csr;
`ifdef ASIC
wire delayed_lsu_last_cycle;
@@ -145,8 +152,8 @@ module VX_gpr_stage (
VX_generic_register #(.N(224 + `NW_M1 + 1 + (`NT))) exec_unit_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
.flush(flush_rest),
.stall(stall_exec),
.flush(flush_exec),
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
);
@@ -166,13 +173,13 @@ module VX_gpr_stage (
assign VX_gpu_inst_req.a_reg_data = real_base_address;
assign VX_gpu_inst_req.rd2 = real_store_data;
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 53)) csr_reg(
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
.stall(stall_gpr_csr),
.flush(flush_rest),
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
);
@@ -193,8 +200,8 @@ module VX_gpr_stage (
VX_generic_register #(.N(224 + `NW_M1 + 1 + 65*(`NT))) exec_unit_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
.flush(flush_rest),
.stall(stall_exec),
.flush(flush_exec),
.in ({VX_exec_unit_req_temp.valid, VX_exec_unit_req_temp.warp_num, VX_exec_unit_req_temp.curr_PC, VX_exec_unit_req_temp.PC_next, VX_exec_unit_req_temp.rd, VX_exec_unit_req_temp.wb, VX_exec_unit_req_temp.a_reg_data, VX_exec_unit_req_temp.b_reg_data, VX_exec_unit_req_temp.alu_op, VX_exec_unit_req_temp.rs1, VX_exec_unit_req_temp.rs2, VX_exec_unit_req_temp.rs2_src, VX_exec_unit_req_temp.itype_immed, VX_exec_unit_req_temp.upper_immed, VX_exec_unit_req_temp.branch_type, VX_exec_unit_req_temp.jalQual, VX_exec_unit_req_temp.jal, VX_exec_unit_req_temp.jal_offset, VX_exec_unit_req_temp.ebreak, VX_exec_unit_req_temp.wspawn, VX_exec_unit_req_temp.is_csr, VX_exec_unit_req_temp.csr_address, VX_exec_unit_req_temp.csr_immed, VX_exec_unit_req_temp.csr_mask}),
.out ({VX_exec_unit_req.valid , VX_exec_unit_req.warp_num , VX_exec_unit_req.curr_PC , VX_exec_unit_req.PC_next , VX_exec_unit_req.rd , VX_exec_unit_req.wb , VX_exec_unit_req.a_reg_data , VX_exec_unit_req.b_reg_data , VX_exec_unit_req.alu_op , VX_exec_unit_req.rs1 , VX_exec_unit_req.rs2 , VX_exec_unit_req.rs2_src , VX_exec_unit_req.itype_immed , VX_exec_unit_req.upper_immed , VX_exec_unit_req.branch_type , VX_exec_unit_req.jalQual , VX_exec_unit_req.jal , VX_exec_unit_req.jal_offset , VX_exec_unit_req.ebreak , VX_exec_unit_req.wspawn , VX_exec_unit_req.is_csr , VX_exec_unit_req.csr_address , VX_exec_unit_req.csr_immed , VX_exec_unit_req.csr_mask })
);
@@ -208,15 +215,15 @@ module VX_gpr_stage (
.out ({VX_gpu_inst_req.valid , VX_gpu_inst_req.warp_num , VX_gpu_inst_req.is_wspawn , VX_gpu_inst_req.is_tmc , VX_gpu_inst_req.is_split , VX_gpu_inst_req.is_barrier , VX_gpu_inst_req.pc_next , VX_gpu_inst_req.a_reg_data , VX_gpu_inst_req.rd2 })
);
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 53)) csr_reg(
VX_generic_register #(.N(`NW_M1 + 1 + `NT + 58)) csr_reg(
.clk (clk),
.reset(reset),
.stall(stall_rest),
.stall(stall_gpr_csr),
.flush(flush_rest),
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
.in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}),
.out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask })
);
`endif
endmodule
endmodule : VX_gpr_stage

View File

@@ -84,6 +84,7 @@ module VX_inst_multiplex (
assign VX_csr_req.warp_num = VX_bckE_req.warp_num;
assign VX_csr_req.rd = VX_bckE_req.rd;
assign VX_csr_req.wb = VX_bckE_req.wb;
assign VX_csr_req.alu_op = VX_bckE_req.alu_op;
assign VX_csr_req.is_csr = VX_bckE_req.is_csr;
assign VX_csr_req.csr_address = VX_bckE_req.csr_address;
assign VX_csr_req.csr_immed = VX_bckE_req.csr_immed;

View File

@@ -6,6 +6,7 @@ module VX_scheduler (
input wire clk,
input wire reset,
input wire memory_delay,
input wire exec_delay,
input wire gpr_stage_delay,
VX_frE_to_bckE_req_inter VX_bckE_req,
VX_wb_inter VX_writeback_inter,
@@ -27,7 +28,11 @@ module VX_scheduler (
wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE);
wire is_load = (VX_bckE_req.mem_read != `NO_MEM_READ);
// classify our next instruction.
wire is_mem = is_store || is_load;
wire is_gpu = (VX_bckE_req.is_wspawn || VX_bckE_req.is_tmc || VX_bckE_req.is_barrier || VX_bckE_req.is_split);
wire is_csr = VX_bckE_req.is_csr;
wire is_exec = !is_mem && !is_gpu && !is_csr;
wire rs1_pass = ((valid_wb && (VX_writeback_inter.rd == VX_bckE_req.rs1)));
@@ -44,8 +49,10 @@ module VX_scheduler (
wire rename_valid = rs1_rename_qual || rs2_rename_qual ;
assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid)) || (memory_delay && (is_mem)) || (gpr_stage_delay && is_mem);
assign schedule_delay = ((rename_valid) && (|VX_bckE_req.valid))
|| (memory_delay && is_mem)
|| (gpr_stage_delay && (is_mem || is_exec))
|| (exec_delay && is_exec);
integer i;
integer w;

View File

@@ -14,10 +14,11 @@ module VX_writeback (
// Actual WB to GPR
VX_wb_inter VX_writeback_inter,
output wire no_slot_mem
output wire no_slot_mem,
output wire no_slot_exec,
output wire no_slot_csr
);
VX_wb_inter VX_writeback_tempp();
wire exec_wb = (VX_inst_exec_wb.wb != 0) && (|VX_inst_exec_wb.wb_valid);
@@ -26,6 +27,8 @@ module VX_writeback (
assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
assign no_slot_csr = csr_wb && (exec_wb);
assign no_slot_exec = 0;
assign VX_writeback_tempp.write_data = exec_wb ? VX_inst_exec_wb.alu_result :
csr_wb ? VX_csr_wb.csr_result :
@@ -65,17 +68,6 @@ module VX_writeback (
wire[`NT-1:0][31:0] use_wb_data;
reg prev_is_mem;
always @(posedge clk, posedge reset) begin
if (reset)
begin
prev_is_mem = 0;
end begin
prev_is_mem = mem_wb && !no_slot_mem;
end
end
VX_generic_register #(.N(39 + `NW_M1 + 1 + `NT*33)) wb_register(
.clk (clk),
.reset(reset),
@@ -85,14 +77,16 @@ module VX_writeback (
.out ({use_wb_data , VX_writeback_inter.wb_valid, VX_writeback_inter.rd, VX_writeback_inter.wb, VX_writeback_inter.wb_warp_num, VX_writeback_inter.wb_pc})
);
`ifdef SYN
assign VX_writeback_inter.write_data = prev_is_mem ? VX_writeback_tempp.write_data : use_wb_data;
`else
assign VX_writeback_inter.write_data = use_wb_data;
`endif
reg[31:0] last_data_wb;
always @(posedge clk) begin
if ((|VX_writeback_inter.wb_valid) && (VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd == 28)) begin
last_data_wb <= use_wb_data[0];
end
end
assign VX_writeback_inter.write_data = use_wb_data;
endmodule // VX_writeback
endmodule : VX_writeback

View File

@@ -44,8 +44,29 @@ module Vortex
);
reg[31:0] icache_banks = `ICACHE_BANKS;
reg[31:0] icache_num_words_per_block = `ICACHE_NUM_WORDS_PER_BLOCK;
reg[31:0] dcache_banks = `DCACHE_BANKS;
reg[31:0] dcache_num_words_per_block = `DCACHE_NUM_WORDS_PER_BLOCK;
reg[31:0] number_threads = `NT;
reg[31:0] number_warps = `NW;
always @(posedge clk) begin
icache_banks <= icache_banks;
icache_num_words_per_block <= icache_num_words_per_block;
dcache_banks <= dcache_banks;
dcache_num_words_per_block <= dcache_num_words_per_block;
number_threads <= number_threads;
number_warps <= number_warps;
end
wire memory_delay;
wire exec_delay;
wire gpr_stage_delay;
wire schedule_delay;
@@ -179,6 +200,7 @@ VX_scheduler schedule(
.clk (clk),
.reset (reset),
.memory_delay (memory_delay),
.exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay),
.VX_bckE_req (VX_bckE_req),
.VX_writeback_inter(VX_writeback_inter),
@@ -197,6 +219,7 @@ VX_back_end vx_back_end(
.VX_dcache_req (VX_dcache_req),
.VX_writeback_inter (VX_writeback_inter),
.out_mem_delay (memory_delay),
.out_exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay)
);

View File

@@ -13,8 +13,8 @@ module VX_divide
input [WIDTHN-1:0] numer,
input [WIDTHD-1:0] denom,
output [WIDTHN-1:0] quotient,
output [WIDTHD-1:0] remainder
output reg [WIDTHN-1:0] quotient,
output reg [WIDTHD-1:0] remainder
);
// synthesis read_comments_as_HDL on
@@ -49,65 +49,90 @@ module VX_divide
.numer(numer),
.denom(denom),
.quotient(quotient),
.remainder(remainder)
.remain(remainder)
);
end
else if (PIPELINE == 0) begin
if (NREP == "SIGNED") begin
assign quotient = $signed($signed(numer)/$signed(denom));
assign remainder = $signed($signed(numer)%$signed(denom));
end
else begin
assign quotient = numer/denom;
assign remainder = numer%denom;
end
end
else begin
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
wire [WIDTHN-1:0] numer_pipe_end;
wire [WIDTHD-1:0] denom_pipe_end;
if (PIPELINE == 0) begin
assign numer_pipe_end = numer;
assign denom_pipe_end = denom;
end else begin
reg [WIDTHN-1:0] numer_pipe [0:PIPELINE-1];
reg [WIDTHD-1:0] denom_pipe [0:PIPELINE-1];
genvar pipe_stage;
for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages
always @(posedge clock or posedge aclr) begin
if (aclr) begin
numer_pipe[pipe_stage+1] <= 0;
denom_pipe[pipe_stage+1] <= 0;
end
else if (clken) begin
numer_pipe[pipe_stage+1] <= numer_pipe[pipe_stage];
denom_pipe[pipe_stage+1] <= denom_pipe[pipe_stage];
end
end
end
genvar pipe_stage;
for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages
always @(posedge clock or posedge aclr) begin
if (aclr) begin
numer_pipe[pipe_stage+1] <= 0;
denom_pipe[pipe_stage+1] <= 0;
numer_pipe[0] <= 0;
denom_pipe[0] <= 0;
end
else if (clken) begin
numer_pipe[pipe_stage+1] <= numer_pipe[pipe_stage];
denom_pipe[pipe_stage+1] <= denom_pipe[pipe_stage];
numer_pipe[0] <= numer;
denom_pipe[0] <= denom;
end
end
assign numer_pipe_end = numer_pipe[PIPELINE-1];
assign denom_pipe_end = denom_pipe[PIPELINE-1];
end
always @(posedge clock or posedge aclr) begin
if (aclr) begin
numer_pipe[0] <= 0;
denom_pipe[0] <= 0;
end
else if (clken) begin
numer_pipe[0] <= numer;
denom_pipe[0] <= denom;
end
end
wire [WIDTHN-1:0] numer_pipe_end;
assign numer_pipe_end = numer_pipe[PIPELINE-1];
wire [WIDTHD-1:0] denom_pipe_end;
assign denom_pipe_end = denom_pipe[PIPELINE-1];
/* * * * * * * * * * * * * * * * * * * * * * */
/* Do the actual fallback computation here */
/* * * * * * * * * * * * * * * * * * * * * * */
if (NREP == "SIGNED") begin
assign quotient = $signed($signed(numer_pipe_end)/$signed(denom_pipe_end));
assign remainder = $signed($signed(numer_pipe_end)%$signed(denom_pipe_end));
/*VX_divide_internal_signed #(
.WIDTHN,
.WIDTHD
)div(
.numer(numer_pipe_end),
.denom(denom_pipe_end),
.quotient,
.remainder
);*/
always @(*) begin
if (denom_pipe_end == 0) begin
quotient = 32'hffffffff;
remainder = numer_pipe_end;
end
else if (denom_pipe_end == 32'hffffffff && numer_pipe_end == 32'h80000000) begin
// this edge case kills verilator in some cases by causing a division
// overflow exception. INT_MIN / -1 (on x86)
quotient = 0;
remainder = 0;
end
else begin
quotient = $signed($signed(numer_pipe_end)/$signed(denom_pipe_end));
remainder = $signed($signed(numer_pipe_end)%$signed(denom_pipe_end));
end
end
end
else begin
assign quotient = numer_pipe_end/denom_pipe_end;
assign remainder = numer_pipe_end%denom_pipe_end;
assign quotient = (denom_pipe_end == 0) ? 32'hffffffff : numer_pipe_end/denom_pipe_end;
assign remainder = (denom_pipe_end == 0) ? numer_pipe_end : numer_pipe_end%denom_pipe_end;
end
end
endgenerate
endmodule: VX_divide
endmodule : VX_divide

View File

@@ -11,7 +11,7 @@ interface VX_csr_req_inter ();
wire[`NW_M1:0] warp_num;
wire[4:0] rd;
wire[1:0] wb;
wire[4:0] alu_op;
wire is_csr;
wire[11:0] csr_address;
wire csr_immed;

View File

@@ -33,6 +33,7 @@ STAMP = echo done >
$(PROJECT).syn.rpt: syn.chg $(SOURCE_FILES)
$(QUARTUS_ROOT)/quartus/bin/quartus_syn $(PROJECT) $(SYN_ARGS)
$(QUARTUS_ROOT)/quartus/bin/quartus_sh -t make_pins_virtual.tcl
$(STAMP) fit.chg
$(PROJECT).fit.rpt: fit.chg $(PROJECT).syn.rpt
@@ -45,6 +46,7 @@ $(PROJECT).asm.rpt: asm.chg $(PROJECT).fit.rpt
$(PROJECT).sta.rpt: sta.chg $(PROJECT).fit.rpt
$(QUARTUS_ROOT)/quartus/bin/quartus_sta $(PROJECT) $(STA_ARGS)
$(QUARTUS_ROOT)/quartus/bin/quartus_sta -t VX_timing.tcl
smart.log: $(PROJECT_FILES)
$(QUARTUS_ROOT)/quartus/bin/quartus_sh --determine_smart_action $(PROJECT) > smart.log
@@ -69,4 +71,4 @@ program: $(PROJECT).sof
quartus_pgm --no_banner --mode=jtag -o "P;$(PROJECT).sof"
clean:
rm -rf *.rpt *.chg *.qsf *.qpf smart.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db output_files tmp-clearbox
rm -rf *.rpt *.chg *.qsf *.qpf smart.log *.htm *.eqn *.pin *.sof *.pof qdb incremental_db output_files tmp-clearbox bin/

View File

@@ -2,10 +2,12 @@ load_package flow
package require cmdline
project_open Vortex
proc make_all_pins_virtual { args } {
remove_all_instance_assignments -name VIRTUAL_PIN
execute_module -tool map
# execute_module -tool map
set name_ids [get_names -filter * -node_type pin]
foreach_in_collection name_id $name_ids {

View File

@@ -70,6 +70,7 @@ set_global_assignment -name VERILOG_FILE ../shared_memory/VX_shared_memory_block
set_global_assignment -name VERILOG_FILE ../shared_memory/VX_shared_memory.v
set_global_assignment -name VERILOG_FILE ../shared_memory/VX_priority_encoder_sm.v
set_global_assignment -name VERILOG_FILE ../shared_memory/VX_bank_valids.v
set_global_assignment -name VERILOG_FILE ../compat/VX_divide.v
set_global_assignment -name VERILOG_FILE ../VX_alu.v
set_global_assignment -name VERILOG_FILE ../VX_back_end.v
set_global_assignment -name VERILOG_FILE ../VX_context.v
@@ -106,6 +107,10 @@ set_global_assignment -name MAX_CORE_JUNCTION_TEMP 100
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY bin
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
# set where [file dirname [info script]]
# source [file join $where make_pins_virtual.tcl]
project_close
# set_global_assignment -name VERILOG_FILE $opts(src)

View File

@@ -4,3 +4,6 @@ create_clock -name {clk} -period "400 MHz" -waveform { 0.0 1.0 } [get_ports {clk
derive_pll_clocks -create_base_clocks
derive_clock_uncertainty

View File

@@ -3,8 +3,8 @@
#define NW 8
// #define CACHE_NUM_BANKS 8
// #define CACHE_WORDS_PER_BLOCK 4
#define CACHE_NUM_BANKS 8
#define CACHE_WORDS_PER_BLOCK 4
#define R_INST 51
#define L_INST 3

View File

@@ -167,11 +167,12 @@ uint32_t hToI(char *c, uint32_t size) {
void loadHexImpl(char * path,RAM* mem) {
void loadHexImpl(const char *path, RAM* mem) {
mem->clear();
FILE *fp = fopen(&path[0], "r");
FILE *fp = fopen(path, "r");
if(fp == 0){
printf("Path not found %s\n", path);
return;
// std::cout << path << " not found" << std::endl;
}
//Preload 0x0 <-> 0x80000000 jumps

View File

@@ -12,83 +12,94 @@ int main(int argc, char **argv)
Verilated::traceEverOn(true);
#define ALL_TESTS
#ifdef ALL_TESTS
bool passed = true;
std::string tests[NUM_TESTS] = {
"../../emulator/riscv_tests/rv32ui-p-add.hex",
"../../emulator/riscv_tests/rv32ui-p-addi.hex",
"../../emulator/riscv_tests/rv32ui-p-and.hex",
"../../emulator/riscv_tests/rv32ui-p-andi.hex",
"../../emulator/riscv_tests/rv32ui-p-auipc.hex",
"../../emulator/riscv_tests/rv32ui-p-beq.hex",
"../../emulator/riscv_tests/rv32ui-p-bge.hex",
"../../emulator/riscv_tests/rv32ui-p-bgeu.hex",
"../../emulator/riscv_tests/rv32ui-p-blt.hex",
"../../emulator/riscv_tests/rv32ui-p-bltu.hex",
"../../emulator/riscv_tests/rv32ui-p-bne.hex",
"../../emulator/riscv_tests/rv32ui-p-jal.hex",
"../../emulator/riscv_tests/rv32ui-p-jalr.hex",
"../../emulator/riscv_tests/rv32ui-p-lb.hex",
"../../emulator/riscv_tests/rv32ui-p-lbu.hex",
"../../emulator/riscv_tests/rv32ui-p-lh.hex",
"../../emulator/riscv_tests/rv32ui-p-lhu.hex",
"../../emulator/riscv_tests/rv32ui-p-lui.hex",
"../../emulator/riscv_tests/rv32ui-p-lw.hex",
"../../emulator/riscv_tests/rv32ui-p-or.hex",
"../../emulator/riscv_tests/rv32ui-p-ori.hex",
"../../emulator/riscv_tests/rv32ui-p-sb.hex",
"../../emulator/riscv_tests/rv32ui-p-sh.hex",
"../../emulator/riscv_tests/rv32ui-p-simple.hex",
"../../emulator/riscv_tests/rv32ui-p-sll.hex",
"../../emulator/riscv_tests/rv32ui-p-slli.hex",
"../../emulator/riscv_tests/rv32ui-p-slt.hex",
"../../emulator/riscv_tests/rv32ui-p-slti.hex",
"../../emulator/riscv_tests/rv32ui-p-sltiu.hex",
"../../emulator/riscv_tests/rv32ui-p-sltu.hex",
"../../emulator/riscv_tests/rv32ui-p-sra.hex",
"../../emulator/riscv_tests/rv32ui-p-srai.hex",
"../../emulator/riscv_tests/rv32ui-p-srl.hex",
"../../emulator/riscv_tests/rv32ui-p-srli.hex",
"../../emulator/riscv_tests/rv32ui-p-sub.hex",
"../../emulator/riscv_tests/rv32ui-p-sw.hex",
"../../emulator/riscv_tests/rv32ui-p-xor.hex",
"../../emulator/riscv_tests/rv32ui-p-xori.hex",
"../../emulator/riscv_tests/rv32um-p-div.hex",
"../../emulator/riscv_tests/rv32um-p-divu.hex",
"../../emulator/riscv_tests/rv32um-p-mul.hex",
"../../emulator/riscv_tests/rv32um-p-mulh.hex",
"../../emulator/riscv_tests/rv32um-p-mulhsu.hex",
"../../emulator/riscv_tests/rv32um-p-mulhu.hex",
"../../emulator/riscv_tests/rv32um-p-rem.hex",
"../../emulator/riscv_tests/rv32um-p-remu.hex"
};
// bool passed = true;
// std::string tests[NUM_TESTS] = {
// "../../emulator/riscv_tests/rv32ui-p-add.hex",
// "../../emulator/riscv_tests/rv32ui-p-addi.hex",
// "../../emulator/riscv_tests/rv32ui-p-and.hex",
// "../../emulator/riscv_tests/rv32ui-p-andi.hex",
// "../../emulator/riscv_tests/rv32ui-p-auipc.hex",
// "../../emulator/riscv_tests/rv32ui-p-beq.hex",
// "../../emulator/riscv_tests/rv32ui-p-bge.hex",
// "../../emulator/riscv_tests/rv32ui-p-bgeu.hex",
// "../../emulator/riscv_tests/rv32ui-p-blt.hex",
// "../../emulator/riscv_tests/rv32ui-p-bltu.hex",
// "../../emulator/riscv_tests/rv32ui-p-bne.hex",
// "../../emulator/riscv_tests/rv32ui-p-jal.hex",
// "../../emulator/riscv_tests/rv32ui-p-jalr.hex",
// "../../emulator/riscv_tests/rv32ui-p-lb.hex",
// "../../emulator/riscv_tests/rv32ui-p-lbu.hex",
// "../../emulator/riscv_tests/rv32ui-p-lh.hex",
// "../../emulator/riscv_tests/rv32ui-p-lhu.hex",
// "../../emulator/riscv_tests/rv32ui-p-lui.hex",
// "../../emulator/riscv_tests/rv32ui-p-lw.hex",
// "../../emulator/riscv_tests/rv32ui-p-or.hex",
// "../../emulator/riscv_tests/rv32ui-p-ori.hex",
// "../../emulator/riscv_tests/rv32ui-p-sb.hex",
// "../../emulator/riscv_tests/rv32ui-p-sh.hex",
// "../../emulator/riscv_tests/rv32ui-p-simple.hex",
// "../../emulator/riscv_tests/rv32ui-p-sll.hex",
// "../../emulator/riscv_tests/rv32ui-p-slli.hex",
// "../../emulator/riscv_tests/rv32ui-p-slt.hex",
// "../../emulator/riscv_tests/rv32ui-p-slti.hex",
// "../../emulator/riscv_tests/rv32ui-p-sltiu.hex",
// "../../emulator/riscv_tests/rv32ui-p-sltu.hex",
// "../../emulator/riscv_tests/rv32ui-p-sra.hex",
// "../../emulator/riscv_tests/rv32ui-p-srai.hex",
// "../../emulator/riscv_tests/rv32ui-p-srl.hex",
// "../../emulator/riscv_tests/rv32ui-p-srli.hex",
// "../../emulator/riscv_tests/rv32ui-p-sub.hex",
// "../../emulator/riscv_tests/rv32ui-p-sw.hex",
// "../../emulator/riscv_tests/rv32ui-p-xor.hex",
// "../../emulator/riscv_tests/rv32ui-p-xori.hex",
// "../../emulator/riscv_tests/rv32um-p-div.hex",
// "../../emulator/riscv_tests/rv32um-p-divu.hex",
// "../../emulator/riscv_tests/rv32um-p-mul.hex",
// "../../emulator/riscv_tests/rv32um-p-mulh.hex",
// "../../emulator/riscv_tests/rv32um-p-mulhsu.hex",
// "../../emulator/riscv_tests/rv32um-p-mulhu.hex",
// "../../emulator/riscv_tests/rv32um-p-rem.hex",
// "../../emulator/riscv_tests/rv32um-p-remu.hex"
// };
for (std::string s : tests) {
Vortex v;
// for (int ii = 0; ii < NUM_TESTS; ii++)
// // for (int ii = 5; ii < 6; ii++)
// {
// std::cout << "TESTING: " << tests[ii] << '\n';
// Vortex v;
// bool curr = v.simulate(tests[ii]);
std::cerr << s << std::endl;
// if ( curr) std::cerr << GREEN << "Test Passed: " << tests[ii] << std::endl;
// if (!curr) std::cerr << RED << "Test Failed: " << tests[ii] << std::endl;
// passed = passed && curr;
bool curr = v.simulate(s);
if ( curr) std::cerr << GREEN << "Test Passed: " << s << std::endl;
if (!curr) std::cerr << RED << "Test Failed: " << s << std::endl;
passed = passed && curr;
}
// std::cerr << DEFAULT;
// }
if( passed) std::cerr << DEFAULT << "PASSED ALL TESTS\n";
if(!passed) std::cerr << DEFAULT << "Failed one or more tests\n";
// if( passed) std::cerr << DEFAULT << "PASSED ALL TESTS\n";
// if(!passed) std::cerr << DEFAULT << "Failed one or more tests\n";
return !passed;
#else
// char testing[] = "../../emulator/riscv_tests/rv32ui-p-sw.hex";
char testing[] = "../../emulator/riscv_tests/rv32ui-p-sw.hex";
Vortex v;
char testing[] = "../../kernel/vortex_test.hex";
const char *testing;
if (argc >= 2) {
testing = argv[1];
} else {
testing = "../../kernel/vortex_test.hex";
}
std::cerr << testing << std::endl;
bool curr = v.simulate(testing);
if ( curr) std::cerr << GREEN << "Test Passed: " << testing << std::endl;
if (!curr) std::cerr << RED << "Test Failed: " << testing << std::endl;
return 0;
return !curr;
#endif
}

View File

@@ -46,8 +46,10 @@ class Vortex
VVortex * vortex;
unsigned start_pc;
bool refill;
unsigned refill_addr;
bool refill_d;
unsigned refill_addr_d;
bool refill_i;
unsigned refill_addr_i;
long int curr_cycle;
bool stop;
bool unit_test;
@@ -100,7 +102,7 @@ Vortex::~Vortex()
void Vortex::ProcessFile(void)
{
loadHexImpl("../../kernel/vortex_test.hex", &this->ram);
loadHexImpl(this->instruction_file_name.c_str(), &this->ram);
}
void Vortex::print_stats(bool cycle_test)
@@ -154,38 +156,66 @@ void Vortex::print_stats(bool cycle_test)
bool Vortex::ibus_driver()
{
////////////////////// IBUS //////////////////////
unsigned new_PC;
bool stop = false;
uint32_t curr_inst = 0;
vortex->i_m_ready_i = false;
curr_inst = 0xdeadbeef;
new_PC = vortex->icache_request_pc_address;
ram.getWord(new_PC, &curr_inst);
vortex->icache_response_instruction = curr_inst;
// std::cout << std::hex << "IReq: " << vortex->icache_request_pc_address << "\tResp: " << curr_inst << "\n";
// printf("\n\n---------------------------------------------\n(%x) Inst: %x\n", new_PC, curr_inst);
// printf("\n");
////////////////////// IBUS //////////////////////
////////////////////// STATS //////////////////////
if (((((unsigned int)curr_inst) != 0) && (((unsigned int)curr_inst) != 0xffffffff)))
{
++stats_dynamic_inst;
stop = false;
} else
{
// printf("Ibus requesting stop: %x\n", curr_inst);
stop = true;
// int dcache_num_words_per_block
if (refill_i)
{
refill_i = false;
vortex->i_m_ready_i = true;
for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__icache_banks; curr_bank++)
{
for (int curr_word = 0; curr_word < vortex->Vortex__DOT__icache_num_words_per_block; curr_word++)
{
unsigned curr_index = (curr_word * vortex->Vortex__DOT__icache_banks) + curr_bank;
unsigned curr_addr = refill_addr_i + (4*curr_index);
unsigned curr_value;
ram.getWord(curr_addr, &curr_value);
vortex->i_m_readdata_i[curr_bank][curr_word] = curr_value;
}
}
}
else
{
if (vortex->o_m_valid_i)
{
if (vortex->o_m_read_or_write_i)
{
// fprintf(stderr, "++++++++++++++++++++++++++++++++\n");
unsigned base_addr = vortex->o_m_evict_addr_i;
for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__icache_banks; curr_bank++)
{
for (int curr_word = 0; curr_word < vortex->Vortex__DOT__icache_num_words_per_block; curr_word++)
{
unsigned curr_index = (curr_word * vortex->Vortex__DOT__icache_banks) + curr_bank;
unsigned curr_addr = base_addr + (4*curr_index);
unsigned curr_value = vortex->o_m_writedata_i[curr_bank][curr_word];
ram.writeWord( curr_addr, &curr_value);
}
}
}
// Respond next cycle
refill_i = true;
refill_addr_i = vortex->o_m_read_addr_i;
}
}
}
return stop;
return false;
}
@@ -197,6 +227,7 @@ void Vortex::io_handler()
char c = (char) data_write;
std::cerr << c;
// std::cout << c;
}
}
@@ -204,75 +235,62 @@ void Vortex::io_handler()
bool Vortex::dbus_driver()
{
// printf("****************************\n");
vortex->i_m_ready_d = false;
vortex->i_m_ready = 0;
for (int i = 0; i < CACHE_NUM_BANKS; i++)
{
for (int j = 0; j < CACHE_WORDS_PER_BLOCK; j++)
// int dcache_num_words_per_block
if (refill_d)
{
vortex->i_m_readdata[i][j] = 0;
}
}
refill_d = false;
vortex->i_m_ready_d = true;
if (this->refill)
{
this->refill = false;
vortex->i_m_ready = 1;
for (int curr_e = 0; curr_e < (CACHE_NUM_BANKS*CACHE_WORDS_PER_BLOCK); curr_e++)
{
unsigned new_addr = this->refill_addr + (4*curr_e);
unsigned addr_without_byte = new_addr >> 2;
unsigned bank_num = addr_without_byte & 0x7;
unsigned addr_wihtout_bank = addr_without_byte >> 3;
unsigned offset_num = addr_wihtout_bank & 0x3;
unsigned value;
ram.getWord(new_addr, &value);
// printf("-------- (%x) i_m_readdata[%d][%d] (%d) = %d\n", new_addr, bank_num, offset_num, curr_e, value);
vortex->i_m_readdata[bank_num][offset_num] = value;
}
}
else
{
if (vortex->o_m_valid)
{
// printf("Valid o_m_valid\n");
if (vortex->o_m_read_or_write)
for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__dcache_banks; curr_bank++)
{
// printf("Valid write\n");
for (int curr_e = 0; curr_e < (CACHE_NUM_BANKS*CACHE_WORDS_PER_BLOCK); curr_e++)
for (int curr_word = 0; curr_word < vortex->Vortex__DOT__dcache_num_words_per_block; curr_word++)
{
unsigned new_addr = vortex->o_m_evict_addr + (4*curr_e);
unsigned curr_index = (curr_word * vortex->Vortex__DOT__dcache_banks) + curr_bank;
unsigned curr_addr = refill_addr_d + (4*curr_index);
unsigned curr_value;
ram.getWord(curr_addr, &curr_value);
unsigned addr_without_byte = new_addr >> 2;
unsigned bank_num = addr_without_byte & 0x7;
unsigned addr_wihtout_bank = addr_without_byte >> 3;
unsigned offset_num = addr_wihtout_bank & 0x3;
vortex->i_m_readdata_d[curr_bank][curr_word] = curr_value;
unsigned new_value = vortex->o_m_writedata[bank_num][offset_num];
ram.writeWord( new_addr, &new_value);
// printf("+++++++ (%x) writeback[%d][%d] (%d) = %d\n", new_addr, bank_num, offset_num, curr_e, new_value);
// printf("+++++++ (%x) i_m_readdata[%d][%d] (%d) = %d\n", new_addr, bank_num, offset_num, curr_e, value);
}
}
// Respond next cycle
this->refill = true;
this->refill_addr = vortex->o_m_read_addr;
}
else
{
if (vortex->o_m_valid_d)
{
if (vortex->o_m_read_or_write_d)
{
// fprintf(stderr, "++++++++++++++++++++++++++++++++\n");
unsigned base_addr = vortex->o_m_evict_addr_d;
for (int curr_bank = 0; curr_bank < vortex->Vortex__DOT__dcache_banks; curr_bank++)
{
for (int curr_word = 0; curr_word < vortex->Vortex__DOT__dcache_num_words_per_block; curr_word++)
{
unsigned curr_index = (curr_word * vortex->Vortex__DOT__dcache_banks) + curr_bank;
unsigned curr_addr = base_addr + (4*curr_index);
unsigned curr_value = vortex->o_m_writedata_d[curr_bank][curr_word];
ram.writeWord( curr_addr, &curr_value);
}
}
}
// Respond next cycle
refill_d = true;
refill_addr_d = vortex->o_m_read_addr_d;
}
}
}
@@ -397,7 +415,9 @@ bool Vortex::simulate(std::string file_to_simulate)
std::cerr << "New Total Cycles: " << (this->stats_total_cycles) << "\n";
// int status = (unsigned int) vortex->Vortex__DOT__vx_front_end__DOT__vx_decode__DOT__vx_grp_wrapper__DOT__genblk2__BRA__0__KET____DOT__vx_gpr__DOT__first_ram__DOT__GPR[28][0] & 0xf;
int status = (unsigned int) vortex->Vortex__DOT__vx_back_end__DOT__VX_wb__DOT__last_data_wb & 0xf;
// std::cout << "Last wb: " << std::hex << ((unsigned int) vortex->Vortex__DOT__vx_back_end__DOT__VX_wb__DOT__last_data_wb) << "\n";
// std::cout << "Something: " << result << '\n';
@@ -408,6 +428,6 @@ bool Vortex::simulate(std::string file_to_simulate)
// return (status == 1);
return (1 == 1);
return (status == 1);
// return (1 == 1);
}