fixed FPU-CSR data dependence

This commit is contained in:
Blaise Tine
2020-11-25 09:05:38 -08:00
parent 71b98b166c
commit 461be0880d
24 changed files with 191 additions and 187 deletions

View File

@@ -22,12 +22,12 @@ install:
script: script:
- make -s - make -s
- ./ci/test_runtime.sh - ./ci/test_runtime.sh
- ./ci/test_driver.sh
- ./ci/test_riscv_isa.sh - ./ci/test_riscv_isa.sh
- ./ci/test_opencl.sh - ./ci/test_opencl.sh
- ./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=4 --l2cache - ./ci/test_driver.sh
- ./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --debug - ./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --debug
- ./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=demo --args="-n1" - ./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=demo --args="-n1"
- ./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=4 --l2cache
- ./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --clusters=2 - ./ci/travis_run.py ./ci/blackbox.sh --driver=rtlsim --cores=2 --l2cache --clusters=2
after_success: after_success:

View File

@@ -83,6 +83,9 @@ VL_FLAGS += -DFPU_FAST
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
OPT_FAST = "-Wno-aligned-new -Wmaybe-uninitialized"
OPT_SLOW = "-Wno-aligned-new -Wmaybe-uninitialized"
PROJECT = libopae-c-vlsim.so PROJECT = libopae-c-vlsim.so
all: $(PROJECT) all: $(PROJECT)
@@ -95,7 +98,7 @@ $(RTL_DIR)/scope-defs.vh: $(SCRIPT_DIR)/scope.json
$(PROJECT): $(SRCS) $(SCOPE_VH) $(PROJECT): $(SRCS) $(SCOPE_VH)
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make OPT_FAST="-Wno-aligned-new -Wmaybe-uninitialized" OPT_SLOW="-Wno-aligned-new -Wmaybe-uninitialized" -j -C obj_dir -f V$(TOP).mk make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f V$(TOP).mk
clean: clean:
rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh rm -rf $(PROJECT) obj_dir ../scope-defs.h $(RTL_DIR)/scope-defs.vh

View File

@@ -67,6 +67,9 @@ endif
# use DPI FPU # use DPI FPU
VL_FLAGS += -DFPU_FAST VL_FLAGS += -DFPU_FAST
OPT_FAST = "-Wno-aligned-new -Wmaybe-uninitialized"
OPT_SLOW = "-Wno-aligned-new -Wmaybe-uninitialized"
PROJECT = libvortex.so PROJECT = libvortex.so
# PROJECT = libvortex.dylib # PROJECT = libvortex.dylib
@@ -74,7 +77,7 @@ all: $(PROJECT)
$(PROJECT): $(SRCS) $(PROJECT): $(SRCS)
verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT) verilator --exe --cc $(TOP) --top-module $(TOP) $(RTL_INCLUDE) $(VL_FLAGS) $(SRCS) -CFLAGS '$(CFLAGS)' -LDFLAGS '$(LDFLAGS)' -o ../$(PROJECT)
make OPT_FAST="-Wno-aligned-new -Wmaybe-uninitialized" OPT_SLOW="-Wno-aligned-new -Wmaybe-uninitialized" -j -C obj_dir -f V$(TOP).mk make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f V$(TOP).mk
clean: clean:
rm -rf $(PROJECT) obj_dir rm -rf $(PROJECT) obj_dir

View File

@@ -11,7 +11,7 @@ module VX_alu_unit #(
// Outputs // Outputs
VX_branch_ctl_if branch_ctl_if, VX_branch_ctl_if branch_ctl_if,
VX_exu_to_cmt_if alu_commit_if VX_commit_if alu_commit_if
); );
reg [`NUM_THREADS-1:0][31:0] alu_result; reg [`NUM_THREADS-1:0][31:0] alu_result;
reg [`NUM_THREADS-1:0][31:0] add_result; reg [`NUM_THREADS-1:0][31:0] add_result;

View File

@@ -3,16 +3,16 @@
module VX_commit #( module VX_commit #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
// inputs // inputs
VX_exu_to_cmt_if alu_commit_if, VX_commit_if alu_commit_if,
VX_exu_to_cmt_if lsu_commit_if, VX_commit_if lsu_commit_if,
VX_exu_to_cmt_if mul_commit_if, VX_commit_if mul_commit_if,
VX_exu_to_cmt_if csr_commit_if, VX_commit_if csr_commit_if,
VX_fpu_to_cmt_if fpu_commit_if, VX_commit_if fpu_commit_if,
VX_exu_to_cmt_if gpu_commit_if, VX_commit_if gpu_commit_if,
// outputs // outputs
VX_writeback_if writeback_if, VX_writeback_if writeback_if,
@@ -52,39 +52,8 @@ module VX_commit #(
.count (commit_size) .count (commit_size)
); );
fflags_t fflags; assign cmt_to_csr_if.valid = commit_fire;
always @(*) begin assign cmt_to_csr_if.commit_size = commit_size;
fflags = 0;
for (integer i = 0; i < `NUM_THREADS; i++) begin
if (fpu_commit_if.tmask[i]) begin
fflags.NX |= fpu_commit_if.fflags[i].NX;
fflags.UF |= fpu_commit_if.fflags[i].UF;
fflags.OF |= fpu_commit_if.fflags[i].OF;
fflags.DZ |= fpu_commit_if.fflags[i].DZ;
fflags.NV |= fpu_commit_if.fflags[i].NV;
end
end
end
reg csr_update_r;
reg [`NW_BITS-1:0] wid_r;
reg [CMTW-1:0] commit_size_r;
reg has_fflags_r;
fflags_t fflags_r;
always @(posedge clk) begin
csr_update_r <= commit_fire;
wid_r <= fpu_commit_if.wid;
commit_size_r <= commit_size;
has_fflags_r <= fpu_commit_if.has_fflags;
fflags_r <= fflags;
end
assign cmt_to_csr_if.valid = csr_update_r;
assign cmt_to_csr_if.wid = wid_r;
assign cmt_to_csr_if.commit_size = commit_size_r;
assign cmt_to_csr_if.has_fflags = has_fflags_r;
assign cmt_to_csr_if.fflags = fflags_r;
// Writeback // Writeback

View File

@@ -9,10 +9,10 @@ module VX_csr_arb (
VX_csr_req_if csr_req_if, VX_csr_req_if csr_req_if,
// input // input
VX_exu_to_cmt_if csr_rsp_if, VX_commit_if csr_rsp_if,
// outputs // outputs
VX_exu_to_cmt_if csr_commit_if, VX_commit_if csr_commit_if,
VX_csr_io_rsp_if csr_io_rsp_if, VX_csr_io_rsp_if csr_io_rsp_if,
input wire select_io_req, input wire select_io_req,

View File

@@ -7,7 +7,7 @@ module VX_csr_data #(
input wire reset, input wire reset,
VX_cmt_to_csr_if cmt_to_csr_if, VX_cmt_to_csr_if cmt_to_csr_if,
VX_csr_to_fpu_if csr_to_fpu_if, VX_fpu_to_csr_if fpu_to_csr_if,
input wire read_enable, input wire read_enable,
input wire[`CSR_ADDR_BITS-1:0] read_addr, input wire[`CSR_ADDR_BITS-1:0] read_addr,
@@ -40,9 +40,9 @@ module VX_csr_data #(
reg [31:0] read_data_r; reg [31:0] read_data_r;
always @(posedge clk) begin always @(posedge clk) begin
if (cmt_to_csr_if.valid && cmt_to_csr_if.has_fflags) begin if (fpu_to_csr_if.write_enable) begin
csr_fflags[cmt_to_csr_if.wid] <= cmt_to_csr_if.fflags; csr_fflags[fpu_to_csr_if.write_wid] <= fpu_to_csr_if.write_fflags;
csr_fcsr[cmt_to_csr_if.wid][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags; csr_fcsr[fpu_to_csr_if.write_wid][`FFG_BITS-1:0] <= fpu_to_csr_if.write_fflags;
end end
if (write_enable) begin if (write_enable) begin
@@ -144,6 +144,6 @@ module VX_csr_data #(
end end
assign read_data = read_data_r; assign read_data = read_data_r;
assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.wid]; assign fpu_to_csr_if.read_frm = csr_frm[fpu_to_csr_if.read_wid];
endmodule endmodule

View File

@@ -7,18 +7,20 @@ module VX_csr_unit #(
input wire reset, input wire reset,
VX_cmt_to_csr_if cmt_to_csr_if, VX_cmt_to_csr_if cmt_to_csr_if,
VX_csr_to_fpu_if csr_to_fpu_if, VX_fpu_to_csr_if fpu_to_csr_if,
VX_csr_io_req_if csr_io_req_if, VX_csr_io_req_if csr_io_req_if,
VX_csr_io_rsp_if csr_io_rsp_if, VX_csr_io_rsp_if csr_io_rsp_if,
VX_csr_req_if csr_req_if, VX_csr_req_if csr_req_if,
VX_exu_to_cmt_if csr_commit_if, VX_commit_if csr_commit_if,
input wire busy input wire busy,
input wire[`NUM_WARPS-1:0] fpu_pending,
output wire[`NUM_WARPS-1:0] pending
); );
VX_csr_req_if csr_pipe_req_if(); VX_csr_req_if csr_pipe_req_if();
VX_exu_to_cmt_if csr_pipe_rsp_if(); VX_commit_if csr_pipe_rsp_if();
wire select_io_req = csr_io_req_if.valid; wire select_io_req = csr_io_req_if.valid;
wire select_io_rsp; wire select_io_rsp;
@@ -47,7 +49,7 @@ module VX_csr_unit #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.cmt_to_csr_if (cmt_to_csr_if), .cmt_to_csr_if (cmt_to_csr_if),
.csr_to_fpu_if (csr_to_fpu_if), .fpu_to_csr_if (fpu_to_csr_if),
.read_enable (csr_pipe_req_if.valid), .read_enable (csr_pipe_req_if.valid),
.read_addr (csr_pipe_req_if.csr_addr), .read_addr (csr_pipe_req_if.csr_addr),
.read_wid (csr_pipe_req_if.wid), .read_wid (csr_pipe_req_if.wid),
@@ -90,7 +92,8 @@ module VX_csr_unit #(
wire csr_we_s0 = csr_we_s0_unqual && csr_pipe_req_if.valid; wire csr_we_s0 = csr_we_s0_unqual && csr_pipe_req_if.valid;
wire stall = ~csr_pipe_rsp_if.ready && csr_pipe_rsp_if.valid; wire stall = (~csr_pipe_rsp_if.ready && csr_pipe_rsp_if.valid)
|| fpu_pending[csr_pipe_req_if.wid];
VX_generic_register #( VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32) .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
@@ -112,4 +115,20 @@ module VX_csr_unit #(
// can accept new request? // can accept new request?
assign csr_pipe_req_if.ready = ~stall; assign csr_pipe_req_if.ready = ~stall;
// pending request
reg [`NUM_WARPS-1:0] pending_r;
always @(posedge clk) begin
if (reset) begin
pending_r <= 0;
end else begin
if (csr_pipe_rsp_if.valid && csr_pipe_rsp_if.ready) begin
pending_r[csr_pipe_rsp_if.wid] <= 0;
end
if (csr_pipe_req_if.valid && csr_pipe_req_if.ready) begin
pending_r[csr_pipe_req_if.wid] <= 1;
end
end
end
assign pending = pending_r;
endmodule endmodule

View File

@@ -30,17 +30,19 @@ module VX_execute #(
// outputs // outputs
VX_branch_ctl_if branch_ctl_if, VX_branch_ctl_if branch_ctl_if,
VX_warp_ctl_if warp_ctl_if, VX_warp_ctl_if warp_ctl_if,
VX_exu_to_cmt_if alu_commit_if, VX_commit_if alu_commit_if,
VX_exu_to_cmt_if lsu_commit_if, VX_commit_if lsu_commit_if,
VX_exu_to_cmt_if csr_commit_if, VX_commit_if csr_commit_if,
VX_exu_to_cmt_if mul_commit_if, VX_commit_if mul_commit_if,
VX_fpu_to_cmt_if fpu_commit_if, VX_commit_if fpu_commit_if,
VX_exu_to_cmt_if gpu_commit_if, VX_commit_if gpu_commit_if,
input wire busy, input wire busy,
output wire ebreak output wire ebreak
); );
VX_csr_to_fpu_if csr_to_fpu_if(); VX_fpu_to_csr_if fpu_to_csr_if();
wire[`NUM_WARPS-1:0] csr_pending;
wire[`NUM_WARPS-1:0] fpu_pending;
VX_alu_unit #( VX_alu_unit #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
@@ -70,11 +72,13 @@ module VX_execute #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.cmt_to_csr_if (cmt_to_csr_if), .cmt_to_csr_if (cmt_to_csr_if),
.csr_to_fpu_if (csr_to_fpu_if), .fpu_to_csr_if (fpu_to_csr_if),
.csr_io_req_if (csr_io_req_if), .csr_io_req_if (csr_io_req_if),
.csr_io_rsp_if (csr_io_rsp_if), .csr_io_rsp_if (csr_io_rsp_if),
.csr_req_if (csr_req_if), .csr_req_if (csr_req_if),
.csr_commit_if (csr_commit_if), .csr_commit_if (csr_commit_if),
.fpu_pending (fpu_pending),
.pending (csr_pending),
.busy (busy) .busy (busy)
); );
@@ -105,8 +109,10 @@ module VX_execute #(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.fpu_req_if (fpu_req_if), .fpu_req_if (fpu_req_if),
.csr_to_fpu_if (csr_to_fpu_if), .fpu_to_csr_if (fpu_to_csr_if),
.fpu_commit_if (fpu_commit_if) .fpu_commit_if (fpu_commit_if),
.csr_pending (csr_pending),
.pending (fpu_pending)
); );
`else `else
assign fpu_req_if.ready = 0; assign fpu_req_if.ready = 0;

View File

@@ -9,10 +9,13 @@ module VX_fpu_unit #(
// inputs // inputs
VX_fpu_req_if fpu_req_if, VX_fpu_req_if fpu_req_if,
VX_csr_to_fpu_if csr_to_fpu_if,
// outputs // outputs
VX_fpu_to_cmt_if fpu_commit_if VX_fpu_to_csr_if fpu_to_csr_if,
VX_commit_if fpu_commit_if,
input wire[`NUM_WARPS-1:0] csr_pending,
output wire[`NUM_WARPS-1:0] pending
); );
localparam FPUQ_BITS = `LOG2UP(`FPUQ_SIZE); localparam FPUQ_BITS = `LOG2UP(`FPUQ_SIZE);
@@ -53,13 +56,13 @@ module VX_fpu_unit #(
); );
// can accept new request? // can accept new request?
assign fpu_req_if.ready = ready_in && ~fpuq_full; assign fpu_req_if.ready = ready_in && ~fpuq_full && !csr_pending[fpu_req_if.wid];
wire valid_in = fpu_req_if.valid && ~fpuq_full; wire valid_in = fpu_req_if.valid && ~fpuq_full && !csr_pending[fpu_req_if.wid];
// resolve dynamic FRM // resolve dynamic FRM from CSR
assign csr_to_fpu_if.wid = fpu_req_if.wid; assign fpu_to_csr_if.read_wid = fpu_req_if.wid;
wire [`FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.op_mod; wire [`FRM_BITS-1:0] fpu_frm = (fpu_req_if.op_mod == `FRM_DYN) ? fpu_to_csr_if.read_frm : fpu_req_if.op_mod;
`ifdef FPU_FAST `ifdef FPU_FAST
@@ -127,19 +130,57 @@ module VX_fpu_unit #(
`endif `endif
reg has_fflags_r;
fflags_t fflags_r;
fflags_t rsp_fflags;
always @(*) begin
rsp_fflags = 0;
for (integer i = 0; i < `NUM_THREADS; i++) begin
if (rsp_tmask[i]) begin
rsp_fflags.NX |= fflags[i].NX;
rsp_fflags.UF |= fflags[i].UF;
rsp_fflags.OF |= fflags[i].OF;
rsp_fflags.DZ |= fflags[i].DZ;
rsp_fflags.NV |= fflags[i].NV;
end
end
end
wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid; wire stall_out = ~fpu_commit_if.ready && fpu_commit_if.valid;
VX_generic_register #( VX_generic_register #(
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS)) .N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `FFG_BITS)
) pipe_reg ( ) pipe_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall_out), .stall (stall_out),
.flush (1'b0), .flush (1'b0),
.in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, fflags}), .in ({valid_out, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, result, has_fflags, rsp_fflags}),
.out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, fpu_commit_if.has_fflags, fpu_commit_if.fflags}) .out ({fpu_commit_if.valid, fpu_commit_if.wid, fpu_commit_if.tmask, fpu_commit_if.PC, fpu_commit_if.rd, fpu_commit_if.wb, fpu_commit_if.data, has_fflags_r, fflags_r})
); );
assign ready_out = ~stall_out; assign ready_out = ~stall_out;
// CSR fflags Update
assign fpu_to_csr_if.write_enable = fpu_commit_if.valid && fpu_commit_if.ready && has_fflags_r;
assign fpu_to_csr_if.write_wid = fpu_commit_if.wid;
assign fpu_to_csr_if.write_fflags = fflags_r;
// pending request
reg [`NUM_WARPS-1:0] pending_r;
always @(posedge clk) begin
if (reset) begin
pending_r <= 0;
end else begin
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
pending_r[fpu_commit_if.wid] <= 0;
end
if (fpu_req_if.valid && fpu_req_if.ready) begin
pending_r[fpu_req_if.wid] <= 1;
end
end
end
assign pending = pending_r;
endmodule endmodule

View File

@@ -5,15 +5,15 @@ module VX_gpu_unit #(
) ( ) (
`SCOPE_IO_VX_gpu_unit `SCOPE_IO_VX_gpu_unit
input wire clk, input wire clk,
input wire reset, input wire reset,
// Inputs // Inputs
VX_gpu_req_if gpu_req_if, VX_gpu_req_if gpu_req_if,
// Outputs // Outputs
VX_warp_ctl_if warp_ctl_if, VX_warp_ctl_if warp_ctl_if,
VX_exu_to_cmt_if gpu_commit_if VX_commit_if gpu_commit_if
); );
`UNUSED_VAR (clk) `UNUSED_VAR (clk)
`UNUSED_VAR (reset) `UNUSED_VAR (reset)

View File

@@ -13,10 +13,10 @@ module VX_lsu_unit #(
VX_cache_core_rsp_if dcache_rsp_if, VX_cache_core_rsp_if dcache_rsp_if,
// inputs // inputs
VX_lsu_req_if lsu_req_if, VX_lsu_req_if lsu_req_if,
// outputs // outputs
VX_exu_to_cmt_if lsu_commit_if VX_commit_if lsu_commit_if
); );
wire [`NUM_THREADS-1:0] req_tmask; wire [`NUM_THREADS-1:0] req_tmask;
wire req_rw; wire req_rw;

View File

@@ -7,10 +7,10 @@ module VX_mul_unit #(
input wire reset, input wire reset,
// Inputs // Inputs
VX_mul_req_if mul_req_if, VX_mul_req_if mul_req_if,
// Outputs // Outputs
VX_exu_to_cmt_if mul_commit_if VX_commit_if mul_commit_if
); );
localparam MULQ_BITS = `LOG2UP(`MULQ_SIZE); localparam MULQ_BITS = `LOG2UP(`MULQ_SIZE);

View File

@@ -112,12 +112,12 @@ module VX_pipeline #(
VX_writeback_if writeback_if(); VX_writeback_if writeback_if();
VX_wstall_if wstall_if(); VX_wstall_if wstall_if();
VX_join_if join_if(); VX_join_if join_if();
VX_exu_to_cmt_if alu_commit_if(); VX_commit_if alu_commit_if();
VX_exu_to_cmt_if lsu_commit_if(); VX_commit_if lsu_commit_if();
VX_exu_to_cmt_if csr_commit_if(); VX_commit_if csr_commit_if();
VX_exu_to_cmt_if mul_commit_if(); VX_commit_if mul_commit_if();
VX_fpu_to_cmt_if fpu_commit_if(); VX_commit_if fpu_commit_if();
VX_exu_to_cmt_if gpu_commit_if(); VX_commit_if gpu_commit_if();
VX_fetch #( VX_fetch #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)

View File

@@ -3,19 +3,19 @@
module VX_writeback #( module VX_writeback #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
// inputs // inputs
VX_exu_to_cmt_if alu_commit_if, VX_commit_if alu_commit_if,
VX_exu_to_cmt_if lsu_commit_if, VX_commit_if lsu_commit_if,
VX_exu_to_cmt_if csr_commit_if, VX_commit_if csr_commit_if,
VX_exu_to_cmt_if mul_commit_if, VX_commit_if mul_commit_if,
VX_fpu_to_cmt_if fpu_commit_if, VX_commit_if fpu_commit_if,
VX_exu_to_cmt_if gpu_commit_if, VX_commit_if gpu_commit_if,
// outputs // outputs
VX_writeback_if writeback_if VX_writeback_if writeback_if
); );
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb; wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
wire lsu_valid = lsu_commit_if.valid && lsu_commit_if.wb; wire lsu_valid = lsu_commit_if.valid && lsu_commit_if.wb;

View File

@@ -5,12 +5,8 @@
interface VX_cmt_to_csr_if (); interface VX_cmt_to_csr_if ();
wire valid; wire valid;
wire [`NW_BITS-1:0] wid;
wire [$clog2(`NUM_THREADS+1)-1:0] commit_size; wire [$clog2(`NUM_THREADS+1)-1:0] commit_size;
wire has_fflags;
fflags_t fflags;
endinterface endinterface

View File

@@ -1,9 +1,9 @@
`ifndef VX_EXU_TO_CMT_IF `ifndef VX_COMMIT_IF
`define VX_EXU_TO_CMT_IF `define VX_COMMIT_IF
`include "VX_define.vh" `include "VX_define.vh"
interface VX_exu_to_cmt_if (); interface VX_commit_if ();
wire valid; wire valid;

View File

@@ -1,17 +0,0 @@
`ifndef VX_CSR_TO_FPU_IF
`define VX_CSR_TO_FPU_IF
`include "VX_define.vh"
`ifndef EXTF_F_ENABLE
`IGNORE_WARNINGS_BEGIN
`endif
interface VX_csr_to_fpu_if ();
wire [`NW_BITS-1:0] wid;
wire [`FRM_BITS-1:0] frm;
endinterface
`endif

View File

@@ -3,19 +3,14 @@
`include "VX_define.vh" `include "VX_define.vh"
`ifndef EXTF_F_ENABLE
`IGNORE_WARNINGS_BEGIN
`endif
interface VX_fpu_to_csr_if (); interface VX_fpu_to_csr_if ();
wire valid; wire write_enable;
wire [`NW_BITS-1:0] wid; wire [`NW_BITS-1:0] write_wid;
wire fflags_NV; fflags_t write_fflags;
wire fflags_DZ;
wire fflags_OF; wire [`NW_BITS-1:0] read_wid;
wire fflags_UF; wire [`FRM_BITS-1:0] read_frm;
wire fflags_NX;
endinterface endinterface

View File

@@ -29,9 +29,10 @@ SRCS += ../rtl/fp_cores/svdpi/float_dpi.cpp
all: build-s all: build-s
CF += -std=c++11 -fms-extensions -I../.. CF += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors -I../..
#CF += -std=c++11 -g -O0 -Wall -Wextra -Wfatal-errors -I../..
VF += --language 1800-2009 --assert -Wall -Wpedantic VF += -O2 --language 1800-2009 --assert -Wall -Wpedantic
VF += -Wno-DECLFILENAME VF += -Wno-DECLFILENAME
VF += --x-initial unique --x-assign unique VF += --x-initial unique --x-assign unique
VF += --exe $(SRCS) $(INCLUDE) VF += --exe $(SRCS) $(INCLUDE)
@@ -42,41 +43,44 @@ DBG += -DVCD_OUTPUT $(DBG_FLAGS)
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))') THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
OPT_FAST = "-Wno-aligned-new -Wmaybe-uninitialized"
OPT_SLOW = "-Wno-aligned-new -Wmaybe-uninitialized"
gen-s: gen-s:
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)' verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)'
gen-sd: gen-sd:
verilator $(VF) -O0 $(SINGLECORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(SINGLECORE)' --trace --trace-structs $(DBG) verilator $(VF) $(SINGLECORE) -CFLAGS '$(CF) $(DBG) $(SINGLECORE)' --trace --trace-structs $(DBG)
gen-st: gen-st:
verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(SINGLECORE)' --threads $(THREADS) verilator $(VF) -DNDEBUG $(SINGLECORE) -CFLAGS '$(CF) -DNDEBUG $(SINGLECORE)' --threads $(THREADS)
gen-m: gen-m:
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)' verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)'
gen-md: gen-md:
verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) -O0 -g $(DBG) $(MULTICORE)' --trace --trace-structs $(DBG) verilator $(VF) $(MULTICORE) -CFLAGS '$(CF) $(DBG) $(MULTICORE)' --trace --trace-structs $(DBG)
gen-mt: gen-mt:
verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG -O2 $(MULTICORE)' --threads $(THREADS) verilator $(VF) -DNDEBUG $(MULTICORE) -CFLAGS '$(CF) -DNDEBUG $(MULTICORE)' --threads $(THREADS)
build-s: gen-s build-s: gen-s
(cd obj_dir && make -j -f VVortex.mk) make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f VVortex.mk
build-sd: gen-sd build-sd: gen-sd
(cd obj_dir && make -j -f VVortex.mk) make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f VVortex.mk
build-st: gen-st build-st: gen-st
(cd obj_dir && make -j -f VVortex.mk) make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f VVortex.mk
build-m: gen-m build-m: gen-m
(cd obj_dir && make -j -f VVortex.mk) make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f VVortex.mk
build-md: gen-md build-md: gen-md
(cd obj_dir && make -j -f VVortex.mk) make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f VVortex.mk
build-mt: gen-mt build-mt: gen-mt
(cd obj_dir && make -j -f VVortex.mk) make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f VVortex.mk
run: run-s run: run-s

View File

@@ -61,10 +61,6 @@ void Simulator::reset() {
print_bufs_.clear(); print_bufs_.clear();
dram_rsp_vec_.clear(); dram_rsp_vec_.clear();
dram_rsp_active_ = false;
snp_req_active_ = false;
csr_req_active_ = false;
snp_req_size_ = 0; snp_req_size_ = 0;
pending_snp_reqs_ = 0; pending_snp_reqs_ = 0;
csr_rsp_value_ = nullptr; csr_rsp_value_ = nullptr;
@@ -96,10 +92,6 @@ void Simulator::step() {
vortex_->clk = 0; vortex_->clk = 0;
this->eval(); this->eval();
dram_rsp_ready_ = vortex_->dram_rsp_ready;
snp_req_ready_ = vortex_->snp_req_ready;
csr_io_req_ready_ = vortex_->csr_io_req_ready;
vortex_->clk = 1; vortex_->clk = 1;
this->eval(); this->eval();
@@ -140,7 +132,7 @@ void Simulator::eval_dram_bus() {
// send DRAM response // send DRAM response
if (dram_rsp_active_ if (dram_rsp_active_
&& vortex_->dram_rsp_valid && dram_rsp_ready_) { && vortex_->dram_rsp_valid && vortex_->dram_rsp_ready) {
dram_rsp_active_ = false; dram_rsp_active_ = false;
} }
if (!dram_rsp_active_) { if (!dram_rsp_active_) {
@@ -213,7 +205,7 @@ void Simulator::eval_io_bus() {
void Simulator::eval_snp_bus() { void Simulator::eval_snp_bus() {
if (snp_req_active_) { if (snp_req_active_) {
if (vortex_->snp_req_valid && snp_req_ready_) { if (vortex_->snp_req_valid && vortex_->snp_req_ready) {
assert(snp_req_size_); assert(snp_req_size_);
#ifdef DBG_PRINT_CACHE_SNP #ifdef DBG_PRINT_CACHE_SNP
std::cout << std::dec << timestamp << ": [sim] SNP Req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << (snp_req_size_-1) << std::endl; std::cout << std::dec << timestamp << ": [sim] SNP Req: addr=" << std::hex << vortex_->snp_req_addr << " tag=" << vortex_->snp_req_tag << " remain=" << (snp_req_size_-1) << std::endl;
@@ -246,7 +238,7 @@ void Simulator::eval_snp_bus() {
void Simulator::eval_csr_bus() { void Simulator::eval_csr_bus() {
if (csr_req_active_) { if (csr_req_active_) {
if (vortex_->csr_io_req_valid && csr_io_req_ready_) { if (vortex_->csr_io_req_valid && vortex_->csr_io_req_ready) {
#ifndef NDEBUG #ifndef NDEBUG
if (vortex_->csr_io_req_rw) if (vortex_->csr_io_req_rw)
std::cout << std::dec << timestamp << ": [sim] CSR Wr Req: core=" << (int)vortex_->csr_io_req_coreid << ", addr=" << std::hex << vortex_->csr_io_req_addr << ", value=" << vortex_->csr_io_req_data << std::endl; std::cout << std::dec << timestamp << ": [sim] CSR Wr Req: core=" << (int)vortex_->csr_io_req_coreid << ", addr=" << std::hex << vortex_->csr_io_req_addr << ", value=" << vortex_->csr_io_req_data << std::endl;

View File

@@ -66,10 +66,6 @@ private:
std::list<dram_req_t> dram_rsp_vec_; std::list<dram_req_t> dram_rsp_vec_;
bool dram_rsp_active_; bool dram_rsp_active_;
bool dram_rsp_ready_;
bool snp_req_ready_;
bool csr_io_req_ready_;
bool snp_req_active_; bool snp_req_active_;
bool csr_req_active_; bool csr_req_active_;

View File

@@ -3,10 +3,6 @@
#include <fstream> #include <fstream>
#include <iomanip> #include <iomanip>
#define GREEN "\\033[32m"
#define RED "\\033[31m"
#define DEFAULT "\\033[39m"
#define ALL_TESTS #define ALL_TESTS
int main(int argc, char **argv) { int main(int argc, char **argv) {
@@ -14,7 +10,7 @@ int main(int argc, char **argv) {
if (argc == 1) { if (argc == 1) {
#ifdef ALL_TESTS #ifdef ALL_TESTS
std::string tests[] = { std::string tests[] = {
"../../../benchmarks/riscv_tests/isa/rv32ui-p-add.hex", "../../../benchmarks/riscv_tests/isa/rv32ui-p-add.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-addi.hex", "../../../benchmarks/riscv_tests/isa/rv32ui-p-addi.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-and.hex", "../../../benchmarks/riscv_tests/isa/rv32ui-p-and.hex",
"../../../benchmarks/riscv_tests/isa/rv32ui-p-andi.hex", "../../../benchmarks/riscv_tests/isa/rv32ui-p-andi.hex",
@@ -81,7 +77,7 @@ int main(int argc, char **argv) {
}; };
for (std::string test : tests) { for (std::string test : tests) {
std::cout << DEFAULT << "\n---------------------------------------\n"; std::cout << "\n---------------------------------------\n";
std::cout << test << std::endl; std::cout << test << std::endl;
@@ -93,16 +89,15 @@ int main(int argc, char **argv) {
bool status = (1 == simulator.get_last_wb_value(3)); bool status = (1 == simulator.get_last_wb_value(3));
if (status) std::cout << GREEN << "Test Passed: " << test << std::endl; if (status) std::cout << "Passed: " << test << std::endl;
if (!status) std::cout << RED << "Test Failed: " << test << std::endl; if (!status) std::cout << "Failed: " << test << std::endl;
std::cout << DEFAULT;
passed = passed && status; passed = passed && status;
if (!passed) if (!passed)
break; break;
} }
for (std::string test : tests_fp) { for (std::string test : tests_fp) {
std::cout << DEFAULT << "\n---------------------------------------\n"; std::cout << "\n---------------------------------------\n";
std::cout << test << std::endl; std::cout << test << std::endl;
@@ -114,18 +109,17 @@ int main(int argc, char **argv) {
bool status = (1 == simulator.get_last_wb_value(3)); bool status = (1 == simulator.get_last_wb_value(3));
if (status) std::cout << GREEN << "Test Passed: " << test << std::endl; if (status) std::cout << "Passed: " << test << std::endl;
if (!status) std::cout << RED << "Test Failed: " << test << std::endl; if (!status) std::cout << "Failed: " << test << std::endl;
std::cout << DEFAULT;
passed = passed && status; passed = passed && status;
if (!passed) if (!passed)
break; break;
} }
std::cout << DEFAULT << "\n***************************************\n"; std::cout << "\n***************************************\n";
if (passed) std::cout << DEFAULT << "PASSED ALL TESTS\n"; if (passed) std::cout << "PASSED ALL TESTS\n";
if (!passed) std::cout << DEFAULT << "Failed one or more tests\n"; if (!passed) std::cout << "Failed one or more tests\n";
return !passed; return !passed;

View File

@@ -20,6 +20,9 @@ LIGHTW=-Wno-UNOPTFLAT -Wno-WIDTH
DEB=--trace -DVL_DEBUG=1 DEB=--trace -DVL_DEBUG=1
EXE=--exe $(LIB_OBJS) EXE=--exe $(LIB_OBJS)
OPT_FAST = "-Wno-aligned-new -Wmaybe-uninitialized"
OPT_SLOW = "-Wno-aligned-new -Wmaybe-uninitialized"
all: simX all: simX
# simX: # simX:
@@ -28,7 +31,7 @@ all: simX
simX: simX:
verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(LIGHTW) $(DEB) verilator $(COMP) -cc $(FILE) $(INCLUDE) $(EXE) $(LIB) $(CF) $(LIGHTW) $(DEB)
(cd obj_dir && make -j`nproc` -f Vcache_simX.mk OPT='-DVL_DEBUG' VL_DEBUG=1 DVL_DEBUG=1) make OPT_FAST=$(OPT_FAST) OPT_SLOW=$(OPT_SLOW) -j -C obj_dir -f Vcache_simX.mk OPT='-DVL_DEBUG' VL_DEBUG=1 DVL_DEBUG=1
clean: clean:
rm -rf *~ \#* *.o *.a *.so include/*~ include/\#* simX.run obj_dir rm -rf *~ \#* *.o *.a *.so include/*~ include/\#* simX.run obj_dir