fixed register file initialization to zero synthesis inference
This commit is contained in:
@@ -85,7 +85,7 @@ vlsim-hw: $(SCOPE_H)
|
|||||||
fpga: $(SRCS) $(SCOPE_H)
|
fpga: $(SRCS) $(SCOPE_H)
|
||||||
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
|
$(CXX) $(CXXFLAGS) -DUSE_FPGA $^ $(LDFLAGS) $(FPGA_LIBS) -o $(PROJECT)
|
||||||
|
|
||||||
asesim: $(SRCS) $(ASE_DIR)
|
asesim: $(SRCS) $(ASE_DIR) $(SCOPE_H)
|
||||||
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
|
$(CXX) $(CXXFLAGS) -DUSE_ASE $(SRCS) $(LDFLAGS) $(ASE_LIBS) -o $(PROJECT_ASE)
|
||||||
|
|
||||||
vlsim: $(SRCS) vlsim-hw
|
vlsim: $(SRCS) vlsim-hw
|
||||||
|
|||||||
@@ -1,6 +1,10 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#if defined(USE_FPGA)
|
||||||
#define HANG_TIMEOUT 60
|
#define HANG_TIMEOUT 60
|
||||||
|
#else
|
||||||
|
#define HANG_TIMEOUT (30*60)
|
||||||
|
#endif
|
||||||
|
|
||||||
int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1);
|
int vx_scope_start(fpga_handle hfpga, uint64_t delay = -1);
|
||||||
|
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ make ase
|
|||||||
|
|
||||||
# tests
|
# tests
|
||||||
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n16
|
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n16
|
||||||
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
|
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n16
|
||||||
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n16
|
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n16
|
||||||
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
||||||
|
|
||||||
|
|||||||
@@ -4,21 +4,21 @@
|
|||||||
+define+QUARTUS
|
+define+QUARTUS
|
||||||
+define+FPU_FAST
|
+define+FPU_FAST
|
||||||
#+define+SCOPE
|
#+define+SCOPE
|
||||||
#+define+PERF_ENABLE
|
+define+PERF_ENABLE
|
||||||
|
|
||||||
#+define+DBG_PRINT_CORE_ICACHE
|
+define+DBG_PRINT_CORE_ICACHE
|
||||||
#+define+DBG_PRINT_CORE_DCACHE
|
+define+DBG_PRINT_CORE_DCACHE
|
||||||
#+define+DBG_PRINT_CACHE_BANK
|
+define+DBG_PRINT_CACHE_BANK
|
||||||
#+define+DBG_PRINT_CACHE_SNP
|
+define+DBG_PRINT_CACHE_SNP
|
||||||
#+define+DBG_PRINT_CACHE_MSRQ
|
+define+DBG_PRINT_CACHE_MSRQ
|
||||||
#+define+DBG_PRINT_CACHE_TAG
|
+define+DBG_PRINT_CACHE_TAG
|
||||||
#+define+DBG_PRINT_CACHE_DATA
|
+define+DBG_PRINT_CACHE_DATA
|
||||||
#+define+DBG_PRINT_DRAM
|
+define+DBG_PRINT_DRAM
|
||||||
#+define+DBG_PRINT_PIPELINE
|
+define+DBG_PRINT_PIPELINE
|
||||||
#+define+DBG_PRINT_OPAE
|
+define+DBG_PRINT_OPAE
|
||||||
#+define+DBG_PRINT_AVS
|
+define+DBG_PRINT_AVS
|
||||||
#+define+DBG_PRINT_SCOPE
|
+define+DBG_PRINT_SCOPE
|
||||||
#+define+DBG_CACHE_REQ_INFO
|
+define+DBG_CACHE_REQ_INFO
|
||||||
|
|
||||||
vortex_afu.json
|
vortex_afu.json
|
||||||
QI:vortex_afu.qsf
|
QI:vortex_afu.qsf
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ module VX_csr_unit #(
|
|||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
wire stall_in = fpu_pending[csr_pipe_req_if.wid];
|
wire stall_in = !csr_pipe_req_if.is_io && fpu_pending[csr_pipe_req_if.wid];
|
||||||
|
|
||||||
wire pipe_req_valid_qual = csr_pipe_req_if.valid && !stall_in;
|
wire pipe_req_valid_qual = csr_pipe_req_if.valid && !stall_in;
|
||||||
|
|
||||||
|
|||||||
@@ -1,83 +0,0 @@
|
|||||||
`include "VX_define.vh"
|
|
||||||
|
|
||||||
`TRACING_OFF
|
|
||||||
|
|
||||||
module VX_gpr_ram (
|
|
||||||
input wire clk,
|
|
||||||
input wire wren,
|
|
||||||
input wire [`NUM_THREADS-1:0] tmask,
|
|
||||||
input wire [`NW_BITS+`NR_BITS-1:0] waddr,
|
|
||||||
input wire [`NUM_THREADS-1:0][31:0] wdata,
|
|
||||||
input wire [`NW_BITS+`NR_BITS-1:0] raddr1,
|
|
||||||
input wire [`NW_BITS+`NR_BITS-1:0] raddr2,
|
|
||||||
input wire [`NW_BITS+`NR_BITS-1:0] raddr3,
|
|
||||||
output wire [`NUM_THREADS-1:0][31:0] rdata1,
|
|
||||||
output wire [`NUM_THREADS-1:0][31:0] rdata2,
|
|
||||||
output wire [`NUM_THREADS-1:0][31:0] rdata3
|
|
||||||
);
|
|
||||||
localparam RAM_DATAW = `NUM_THREADS * 32;
|
|
||||||
localparam RAM_ADDRW = `NW_BITS + `NR_BITS;
|
|
||||||
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
|
|
||||||
localparam RAM_BYTEEN = `NUM_THREADS * 4;
|
|
||||||
|
|
||||||
`UNUSED_VAR (raddr3)
|
|
||||||
|
|
||||||
`ifdef EXT_F_ENABLE
|
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
|
||||||
|
|
||||||
reg [31:0] mem_i [(RAM_DEPTH/2)-1:0];
|
|
||||||
reg [31:0] mem_f [(RAM_DEPTH/2)-1:0];
|
|
||||||
|
|
||||||
initial mem_i = '{default: 0};
|
|
||||||
|
|
||||||
wire waddr_is_fp = waddr[RAM_ADDRW-1];
|
|
||||||
wire raddr1_is_fp = raddr1[RAM_ADDRW-1];
|
|
||||||
wire raddr2_is_fp = raddr2[RAM_ADDRW-1];
|
|
||||||
|
|
||||||
wire [RAM_ADDRW-2:0] waddr_qual = waddr[RAM_ADDRW-2:0];
|
|
||||||
wire [RAM_ADDRW-2:0] raddr1_qual = raddr1[RAM_ADDRW-2:0];
|
|
||||||
wire [RAM_ADDRW-2:0] raddr2_qual = raddr2[RAM_ADDRW-2:0];
|
|
||||||
wire [RAM_ADDRW-2:0] raddr3_qual = raddr3[RAM_ADDRW-2:0];
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (wren && tmask[i] && !waddr_is_fp) begin
|
|
||||||
mem_i[waddr_qual] <= wdata[i];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (wren && tmask[i] && waddr_is_fp) begin
|
|
||||||
mem_f[waddr_qual] <= wdata[i];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
assign rdata1[i] = raddr1_is_fp ? mem_f[raddr1_qual] : mem_i[raddr1_qual];
|
|
||||||
assign rdata2[i] = raddr2_is_fp ? mem_f[raddr2_qual] : mem_i[raddr2_qual];
|
|
||||||
assign rdata3[i] = mem_f[raddr3_qual];
|
|
||||||
end
|
|
||||||
|
|
||||||
`else
|
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_THREADS; ++i) begin
|
|
||||||
|
|
||||||
reg [31:0] mem [RAM_DEPTH-1:0];
|
|
||||||
|
|
||||||
initial mem = '{default: 0};
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (wren && tmask[i]) begin
|
|
||||||
mem[waddr] <= wdata[i];
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
assign rdata1[i] = mem[raddr1];
|
|
||||||
assign rdata2[i] = mem[raddr2];
|
|
||||||
assign rdata3[i] = 0;
|
|
||||||
end
|
|
||||||
|
|
||||||
`endif
|
|
||||||
|
|
||||||
endmodule
|
|
||||||
|
|
||||||
`TRACING_ON
|
|
||||||
35
hw/rtl/VX_gpr_ram_f.v
Normal file
35
hw/rtl/VX_gpr_ram_f.v
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
`TRACING_OFF
|
||||||
|
|
||||||
|
module VX_gpr_ram_f #(
|
||||||
|
parameter DATAW = 1,
|
||||||
|
parameter DEPTH = 1,
|
||||||
|
parameter ADDRW = $clog2(DEPTH)
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire wren,
|
||||||
|
input wire [ADDRW-1:0] waddr,
|
||||||
|
input wire [DATAW-1:0] wdata,
|
||||||
|
input wire [ADDRW-1:0] raddr1,
|
||||||
|
input wire [ADDRW-1:0] raddr2,
|
||||||
|
input wire [ADDRW-1:0] raddr3,
|
||||||
|
output wire [DATAW-1:0] rdata1,
|
||||||
|
output wire [DATAW-1:0] rdata2,
|
||||||
|
output wire [DATAW-1:0] rdata3
|
||||||
|
);
|
||||||
|
reg [DATAW-1:0] mem [DEPTH-1:0];
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (wren) begin
|
||||||
|
mem [waddr] <= wdata;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
assign rdata1 = mem [raddr1];
|
||||||
|
assign rdata2 = mem [raddr2];
|
||||||
|
assign rdata3 = mem [raddr3];
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
|
||||||
|
`TRACING_ON
|
||||||
34
hw/rtl/VX_gpr_ram_i.v
Normal file
34
hw/rtl/VX_gpr_ram_i.v
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
`TRACING_OFF
|
||||||
|
|
||||||
|
module VX_gpr_ram_i #(
|
||||||
|
parameter DATAW = 1,
|
||||||
|
parameter DEPTH = 1,
|
||||||
|
parameter ADDRW = $clog2(DEPTH)
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire wren,
|
||||||
|
input wire [ADDRW-1:0] waddr,
|
||||||
|
input wire [DATAW-1:0] wdata,
|
||||||
|
input wire [ADDRW-1:0] raddr1,
|
||||||
|
input wire [ADDRW-1:0] raddr2,
|
||||||
|
output wire [DATAW-1:0] rdata1,
|
||||||
|
output wire [DATAW-1:0] rdata2
|
||||||
|
);
|
||||||
|
reg [DATAW-1:0] mem [DEPTH-1:0];
|
||||||
|
|
||||||
|
initial mem = '{default: 0};
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (wren) begin
|
||||||
|
mem [waddr] <= wdata;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
assign rdata1 = mem [raddr1];
|
||||||
|
assign rdata2 = mem [raddr2];
|
||||||
|
|
||||||
|
endmodule
|
||||||
|
|
||||||
|
`TRACING_ON
|
||||||
@@ -15,39 +15,90 @@ module VX_gpr_stage #(
|
|||||||
);
|
);
|
||||||
`UNUSED_VAR (reset)
|
`UNUSED_VAR (reset)
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rdata1, rdata2, rdata3;
|
|
||||||
wire [`NW_BITS+`NR_BITS-1:0] waddr, raddr1, raddr2, raddr3;
|
|
||||||
|
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
assign waddr = {writeback_if.rd[`NR_BITS-1], writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]};
|
localparam RAM_DEPTH = `NUM_WARPS * (`NUM_REGS / 2);
|
||||||
assign raddr1 = {gpr_req_if.rs1[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]};
|
wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i, rdata1_f, rdata2_f, rdata3_f;
|
||||||
assign raddr2 = {gpr_req_if.rs2[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]};
|
wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2, raddr3;
|
||||||
assign raddr3 = {gpr_req_if.rs3[`NR_BITS-1], gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]};
|
|
||||||
|
wire waddr_is_fp = writeback_if.rd[`NR_BITS-1];
|
||||||
|
wire raddr1_is_fp = gpr_req_if.rs1[`NR_BITS-1];
|
||||||
|
wire raddr2_is_fp = gpr_req_if.rs2[`NR_BITS-1];
|
||||||
|
wire raddr3_is_fp = gpr_req_if.rs3[`NR_BITS-1];
|
||||||
|
`UNUSED_VAR (raddr3_is_fp)
|
||||||
|
|
||||||
|
assign waddr = {writeback_if.wid, writeback_if.rd[`NR_BITS-2:0]};
|
||||||
|
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1[`NR_BITS-2:0]};
|
||||||
|
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2[`NR_BITS-2:0]};
|
||||||
|
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3[`NR_BITS-2:0]};
|
||||||
|
|
||||||
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
VX_gpr_ram_i #(
|
||||||
|
.DATAW (32),
|
||||||
|
.DEPTH (RAM_DEPTH)
|
||||||
|
) gpr_ram_i (
|
||||||
|
.clk (clk),
|
||||||
|
.wren (writeback_if.valid && writeback_if.tmask[i] && !waddr_is_fp),
|
||||||
|
.waddr (waddr),
|
||||||
|
.wdata (writeback_if.data[i]),
|
||||||
|
.raddr1 (raddr1),
|
||||||
|
.raddr2 (raddr2),
|
||||||
|
.rdata1 (rdata1_i[i]),
|
||||||
|
.rdata2 (rdata2_i[i])
|
||||||
|
);
|
||||||
|
end
|
||||||
|
|
||||||
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
VX_gpr_ram_f #(
|
||||||
|
.DATAW (32),
|
||||||
|
.DEPTH (RAM_DEPTH)
|
||||||
|
) gpr_ram_f (
|
||||||
|
.clk (clk),
|
||||||
|
.wren (writeback_if.valid && writeback_if.tmask[i] && waddr_is_fp),
|
||||||
|
.waddr (waddr),
|
||||||
|
.wdata (writeback_if.data[i]),
|
||||||
|
.raddr1 (raddr1),
|
||||||
|
.raddr2 (raddr2),
|
||||||
|
.raddr3 (raddr3),
|
||||||
|
.rdata1 (rdata1_f[i]),
|
||||||
|
.rdata2 (rdata2_f[i]),
|
||||||
|
.rdata3 (rdata3_f[i])
|
||||||
|
);
|
||||||
|
end
|
||||||
|
|
||||||
|
assign gpr_rsp_if.rs1_data = raddr1_is_fp ? rdata1_f : rdata1_i;
|
||||||
|
assign gpr_rsp_if.rs2_data = raddr2_is_fp ? rdata2_f : rdata2_i;
|
||||||
|
assign gpr_rsp_if.rs3_data = rdata3_f;
|
||||||
`else
|
`else
|
||||||
|
localparam RAM_DEPTH = `NUM_WARPS * `NUM_REGS;
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] rdata1_i, rdata2_i;
|
||||||
|
wire [$clog2(RAM_DEPTH)-1:0] waddr, raddr1, raddr2;
|
||||||
|
|
||||||
assign waddr = {writeback_if.wid, writeback_if.rd};
|
assign waddr = {writeback_if.wid, writeback_if.rd};
|
||||||
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
assign raddr1 = {gpr_req_if.wid, gpr_req_if.rs1};
|
||||||
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
assign raddr2 = {gpr_req_if.wid, gpr_req_if.rs2};
|
||||||
assign raddr3 = {gpr_req_if.wid, gpr_req_if.rs3};
|
`UNUSED_VAR (gpr_req_if.rs3)
|
||||||
|
|
||||||
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
VX_gpr_ram_i #(
|
||||||
|
.DATAW (32),
|
||||||
|
.DEPTH (RAM_DEPTH)
|
||||||
|
) gpr_ram_i (
|
||||||
|
.clk (clk),
|
||||||
|
.wren (writeback_if.valid && writeback_if.tmask[i]),
|
||||||
|
.waddr (waddr),
|
||||||
|
.wdata (writeback_if.data[i]),
|
||||||
|
.raddr1 (raddr1),
|
||||||
|
.raddr2 (raddr2),
|
||||||
|
.rdata1 (rdata1_i[i]),
|
||||||
|
.rdata2 (rdata2_i[i])
|
||||||
|
);
|
||||||
|
end
|
||||||
|
|
||||||
|
assign gpr_rsp_if.rs1_data = rdata1_i;
|
||||||
|
assign gpr_rsp_if.rs2_data = rdata2_i;
|
||||||
|
assign gpr_rsp_if.rs3_data = 0;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
VX_gpr_ram gpr_ram (
|
|
||||||
.clk (clk),
|
|
||||||
.wren (writeback_if.valid),
|
|
||||||
.tmask (writeback_if.tmask),
|
|
||||||
.waddr (waddr),
|
|
||||||
.wdata (writeback_if.data),
|
|
||||||
.raddr1 (raddr1),
|
|
||||||
.raddr2 (raddr2),
|
|
||||||
.raddr3 (raddr3),
|
|
||||||
.rdata1 (rdata1),
|
|
||||||
.rdata2 (rdata2),
|
|
||||||
.rdata3 (rdata3)
|
|
||||||
);
|
|
||||||
|
|
||||||
assign gpr_rsp_if.rs1_data = rdata1;
|
|
||||||
assign gpr_rsp_if.rs2_data = rdata2;
|
|
||||||
assign gpr_rsp_if.rs3_data = rdata3;
|
|
||||||
|
|
||||||
assign writeback_if.ready = 1'b1;
|
assign writeback_if.ready = 1'b1;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -54,6 +54,6 @@ typedef struct packed {
|
|||||||
logic [`NW_BITS-1:0] size_m1;
|
logic [`NW_BITS-1:0] size_m1;
|
||||||
} gpu_barrier_t;
|
} gpu_barrier_t;
|
||||||
|
|
||||||
`define GPU_BARRIER_SIZE (1+`NB_BITS+`NB_BITS)
|
`define GPU_BARRIER_SIZE (1+`NB_BITS+`NW_BITS)
|
||||||
|
|
||||||
`endif
|
`endif
|
||||||
Reference in New Issue
Block a user