fixed shared memory addressing critical path, fixed VX_fp_noncomp output bug
This commit is contained in:
@@ -79,7 +79,7 @@ VL_FLAGS += -DNOPAE
|
|||||||
CFLAGS += -DNOPAE
|
CFLAGS += -DNOPAE
|
||||||
|
|
||||||
# use DPI FPU
|
# use DPI FPU
|
||||||
VL_FLAGS += -DFPU_FAST
|
#VL_FLAGS += -DFPU_FAST
|
||||||
|
|
||||||
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
RTL_INCLUDE += -I../../../hw/opae -I../../../hw/opae/ccip
|
||||||
|
|
||||||
|
|||||||
@@ -4,15 +4,15 @@ CFLAGS += -std=c++11 -O2 -DNDEBUG -Wall -Wextra -Wfatal-errors
|
|||||||
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
|
CFLAGS += -I../../include -I../../../hw/simulate -I../../../hw
|
||||||
|
|
||||||
# control RTL debug print states
|
# control RTL debug print states
|
||||||
|
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_ICACHE
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
DBG_PRINT_FLAGS += -DDBG_PRINT_CORE_DCACHE
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_BANK
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_MSRQ
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_TAG
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_DATA
|
||||||
|
DBG_PRINT_FLAGS += -DDBG_PRINT_CACHE_SNP
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
DBG_PRINT_FLAGS += -DDBG_PRINT_DRAM
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_PIPELINE
|
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
DBG_PRINT_FLAGS += -DDBG_PRINT_OPAE
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
DBG_PRINT_FLAGS += -DDBG_PRINT_AVS
|
||||||
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
DBG_PRINT_FLAGS += -DDBG_PRINT_SCOPE
|
||||||
@@ -65,7 +65,7 @@ else
|
|||||||
endif
|
endif
|
||||||
|
|
||||||
# use DPI FPU
|
# use DPI FPU
|
||||||
VL_FLAGS += -DFPU_FAST
|
#VL_FLAGS += -DFPU_FAST
|
||||||
|
|
||||||
PROJECT = libvortex.so
|
PROJECT = libvortex.so
|
||||||
# PROJECT = libvortex.dylib
|
# PROJECT = libvortex.dylib
|
||||||
|
|||||||
@@ -98,7 +98,7 @@ module VX_alu_unit #(
|
|||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33)
|
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + `BR_BITS + 32 + 33)
|
||||||
) alu_reg (
|
) pipe_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall_out),
|
.stall (stall_out),
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ module VX_csr_unit #(
|
|||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
|
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
|
||||||
) csr_reg (
|
) pipe_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
|
|||||||
@@ -131,7 +131,7 @@ module VX_fpu_unit #(
|
|||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS))
|
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + (`NUM_THREADS * `FFG_BITS))
|
||||||
) fpu_reg (
|
) pipe_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall_out),
|
.stall (stall_out),
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ module VX_gpu_unit #(
|
|||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE)
|
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + `GPU_TMC_SIZE + `GPU_WSPAWN_SIZE + `GPU_SPLIT_SIZE + `GPU_BARRIER_SIZE)
|
||||||
) csr_reg (
|
) pipe_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ module VX_lsu_unit #(
|
|||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32)))
|
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 1 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 2 + (`NUM_THREADS * (30 + 2 + 4 + 32)))
|
||||||
) req_reg (
|
) pipe_reg0 (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall_in),
|
.stall (stall_in),
|
||||||
@@ -181,7 +181,7 @@ module VX_lsu_unit #(
|
|||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
||||||
) rsp_reg (
|
) pipe_reg1 (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall_out),
|
.stall (stall_out),
|
||||||
|
|||||||
@@ -41,7 +41,9 @@ module VX_mem_unit # (
|
|||||||
) core_dcache_rsp_qual_if(), core_smem_rsp_if();
|
) core_dcache_rsp_qual_if(), core_smem_rsp_if();
|
||||||
|
|
||||||
// select shared memory bus
|
// select shared memory bus
|
||||||
wire is_smem_addr = (({core_dcache_req_if.addr[0], 2'b0} - `SHARED_MEM_BASE_ADDR) <= `SCACHE_SIZE);
|
wire is_smem_addr = ({core_dcache_req_if.addr[0], 2'b0} >= `SHARED_MEM_BASE_ADDR)
|
||||||
|
&& ({core_dcache_req_if.addr[0], 2'b0} < (`SHARED_MEM_BASE_ADDR + `SCACHE_SIZE));
|
||||||
|
|
||||||
wire smem_req_select = (| core_dcache_req_if.valid) ? is_smem_addr : 0;
|
wire smem_req_select = (| core_dcache_req_if.valid) ? is_smem_addr : 0;
|
||||||
wire smem_rsp_select = (| core_smem_rsp_if.valid);
|
wire smem_rsp_select = (| core_smem_rsp_if.valid);
|
||||||
|
|
||||||
|
|||||||
@@ -145,7 +145,7 @@ module VX_mul_unit #(
|
|||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32))
|
||||||
) mul_reg (
|
) pipe_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall_out),
|
.stall (stall_out),
|
||||||
|
|||||||
@@ -239,7 +239,7 @@ module VX_warp_sched #(
|
|||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
|
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
|
||||||
) fetch_reg (
|
) pipe_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall_out),
|
.stall (stall_out),
|
||||||
|
|||||||
@@ -77,7 +77,7 @@ module VX_writeback #(
|
|||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
|
.N(1 + `NW_BITS + 32 + `NUM_THREADS + `NR_BITS + (`NUM_THREADS * 32))
|
||||||
) wb_reg (
|
) pipe_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
|
|||||||
2
hw/rtl/cache/VX_bank.v
vendored
2
hw/rtl/cache/VX_bank.v
vendored
@@ -869,7 +869,7 @@ module VX_bank #(
|
|||||||
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), dfpq_filldata_st0);
|
$display("%t: cache%0d:%0d fill-rsp: addr=%0h, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), dfpq_filldata_st0);
|
||||||
end
|
end
|
||||||
if (reqq_pop) begin
|
if (reqq_pop) begin
|
||||||
$display("%t: cache%0d:%0d core-req: addr=%0h, tag=%0h, tid=%0d, rw=%b, byteen=%b, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_rw_st0, reqq_byteen_st0, debug_wid_st0, debug_pc_st0);
|
$display("%t: cache%0d:%0d core-req: addr=%0h, tag=%0h, tid=%0d, rw=%b, byteen=%b, data=%0h, wid=%0d, PC=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), reqq_tag_st0, reqq_tid_st0, reqq_rw_st0, reqq_byteen_st0, reqq_writeword_st0, debug_wid_st0, debug_pc_st0);
|
||||||
end
|
end
|
||||||
if (snrq_pop) begin
|
if (snrq_pop) begin
|
||||||
$display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_invalidate_st0);
|
$display("%t: cache%0d:%0d snp-req: addr=%0h, tag=%0h, invalidate=%0d", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_st0, BANK_ID), snrq_tag_st0, snrq_invalidate_st0);
|
||||||
|
|||||||
2
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
2
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
@@ -94,7 +94,7 @@ module VX_cache_core_rsp_merge #(
|
|||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)),
|
.N(NUM_REQUESTS + (NUM_REQUESTS *`WORD_WIDTH) + (`CORE_REQ_TAG_COUNT * CORE_TAG_WIDTH)),
|
||||||
.PASSTHRU(NUM_BANKS <= 2)
|
.PASSTHRU(NUM_BANKS <= 2)
|
||||||
) core_wb_reg (
|
) pipe_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
|
|||||||
2
hw/rtl/cache/VX_data_access.v
vendored
2
hw/rtl/cache/VX_data_access.v
vendored
@@ -127,7 +127,7 @@ module VX_data_access #(
|
|||||||
if (is_fill_in) begin
|
if (is_fill_in) begin
|
||||||
$display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), dirtyb_out, addrline, use_write_data);
|
$display("%t: cache%0d:%0d data-fill: addr=%0h, dirty=%b, blk_addr=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), dirtyb_out, addrline, use_write_data);
|
||||||
end else begin
|
end else begin
|
||||||
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, writeword_in);
|
$display("%t: cache%0d:%0d data-write: addr=%0h, wid=%0d, PC=%0h, byteen=%b, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, byte_enable, dirtyb_out, addrline, wordsel_in, writeword_in);
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, qual_read_data);
|
$display("%t: cache%0d:%0d data-read: addr=%0h, wid=%0d, PC=%0h, dirty=%b, blk_addr=%0d, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, `LINE_TO_BYTE_ADDR(addr_in, BANK_ID), debug_wid, debug_pc, dirtyb_out, addrline, wordsel_in, qual_read_data);
|
||||||
|
|||||||
@@ -38,6 +38,8 @@ module VX_fp_noncomp #(
|
|||||||
SIG_NAN = 32'h00000100,
|
SIG_NAN = 32'h00000100,
|
||||||
QUT_NAN = 32'h00000200;
|
QUT_NAN = 32'h00000200;
|
||||||
|
|
||||||
|
reg valid_in_r;
|
||||||
|
reg [TAGW-1:0] tag_in_r;
|
||||||
reg [`FPU_BITS-1:0] op_type_r;
|
reg [`FPU_BITS-1:0] op_type_r;
|
||||||
reg [`FRM_BITS-1:0] frm_r;
|
reg [`FRM_BITS-1:0] frm_r;
|
||||||
|
|
||||||
@@ -87,7 +89,7 @@ module VX_fp_noncomp #(
|
|||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1)
|
.N(1 + 1 + 8 + 23 + $bits(fp_type_t) + $bits(fp_type_t) + 1 + 1)
|
||||||
) fnc1_reg (
|
) pipe_reg0 (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
@@ -98,14 +100,14 @@ module VX_fp_noncomp #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(`FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32))
|
.N(1 + TAGW + `FPU_BITS + `FRM_BITS + (2 * `NUM_THREADS * 32))
|
||||||
) fnc2_reg (
|
) pipe_reg1 (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (1'b0),
|
.flush (1'b0),
|
||||||
.in ({op_type, frm, dataa, datab}),
|
.in ({valid_in, tag_in, op_type, frm, dataa, datab}),
|
||||||
.out ({op_type_r, frm_r, dataa_r, datab_r})
|
.out ({valid_in_r, tag_in_r, op_type_r, frm_r, dataa_r, datab_r})
|
||||||
);
|
);
|
||||||
|
|
||||||
// FCLASS
|
// FCLASS
|
||||||
@@ -155,7 +157,7 @@ module VX_fp_noncomp #(
|
|||||||
for (genvar i = 0; i < LANES; i++) begin
|
for (genvar i = 0; i < LANES; i++) begin
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (frm_r)
|
case (frm_r)
|
||||||
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
0: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||||
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
|
1: fsgnj_res[i] = {~b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||||
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
|
2: fsgnj_res[i] = { a_sign[i] ^ b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||||
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
|
default: fsgnj_res[i] = 32'hdeadbeaf; // don't care value
|
||||||
@@ -249,13 +251,13 @@ module VX_fp_noncomp #(
|
|||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS))
|
.N(1 + TAGW + (LANES * 32) + 1 + (LANES * `FFG_BITS))
|
||||||
) nc_reg (
|
) pipe_reg2 (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (1'b0),
|
.flush (1'b0),
|
||||||
.in ({valid_in, tag_in, tmp_result, tmp_has_fflags, tmp_fflags}),
|
.in ({valid_in_r, tag_in_r, tmp_result, tmp_has_fflags, tmp_fflags}),
|
||||||
.out ({valid_out, tag_out, result, has_fflags, fflags})
|
.out ({valid_out, tag_out, result, has_fflags, fflags})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign ready_in = ~stall;
|
assign ready_in = ~stall;
|
||||||
|
|||||||
Reference in New Issue
Block a user