moving MUL unit into ALU unit
This commit is contained in:
@@ -1,9 +1,19 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
# exit when any command fails
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Dogfood tests
|
||||||
./ci/test_runtime.sh
|
./ci/test_runtime.sh
|
||||||
./ci/test_riscv_isa.sh
|
./ci/test_riscv_isa.sh
|
||||||
./ci/test_opencl.sh
|
./ci/test_opencl.sh
|
||||||
./ci/test_driver.sh
|
./ci/test_driver.sh
|
||||||
|
|
||||||
|
# Build tests disabling extensions
|
||||||
|
CONFIGS=-DEXT_M_DISABLE make -C hw/simulate
|
||||||
|
CONFIGS=-DEXT_F_DISABLE make -C hw/simulate
|
||||||
|
|
||||||
|
# Blackbox tests
|
||||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
|
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --perf --app=demo --args="-n1"
|
||||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
|
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --debug --app=demo --args="-n1"
|
||||||
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=demo --args="-n1"
|
./ci/travis_run.py ./ci/blackbox.sh --driver=vlsim --cores=1 --scope --app=demo --args="-n1"
|
||||||
|
|||||||
@@ -93,7 +93,6 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||||||
uint64_t scoreboard_stalls = 0;
|
uint64_t scoreboard_stalls = 0;
|
||||||
uint64_t lsu_stalls = 0;
|
uint64_t lsu_stalls = 0;
|
||||||
uint64_t fpu_stalls = 0;
|
uint64_t fpu_stalls = 0;
|
||||||
uint64_t mul_stalls = 0;
|
|
||||||
uint64_t csr_stalls = 0;
|
uint64_t csr_stalls = 0;
|
||||||
uint64_t alu_stalls = 0;
|
uint64_t alu_stalls = 0;
|
||||||
uint64_t gpu_stalls = 0;
|
uint64_t gpu_stalls = 0;
|
||||||
@@ -158,12 +157,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||||||
uint64_t csr_stalls_per_core;
|
uint64_t csr_stalls_per_core;
|
||||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_CSR_ST, CSR_MPM_CSR_ST_H, &csr_stalls_per_core);
|
ret |= vx_csr_get_l(device, core_id, CSR_MPM_CSR_ST, CSR_MPM_CSR_ST_H, &csr_stalls_per_core);
|
||||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: csr unit stalls=%ld\n", core_id, csr_stalls_per_core);
|
if (num_cores > 1) fprintf(stream, "PERF: core%d: csr unit stalls=%ld\n", core_id, csr_stalls_per_core);
|
||||||
csr_stalls += csr_stalls_per_core;
|
csr_stalls += csr_stalls_per_core;
|
||||||
// mul_stall
|
|
||||||
uint64_t mul_stalls_per_core;
|
|
||||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_MUL_ST, CSR_MPM_MUL_ST_H, &mul_stalls_per_core);
|
|
||||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: mul unit stalls=%ld\n", core_id, mul_stalls_per_core);
|
|
||||||
mul_stalls += mul_stalls_per_core;
|
|
||||||
// fpu_stall
|
// fpu_stall
|
||||||
uint64_t fpu_stalls_per_core;
|
uint64_t fpu_stalls_per_core;
|
||||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_FPU_ST, CSR_MPM_FPU_ST_H, &fpu_stalls_per_core);
|
ret |= vx_csr_get_l(device, core_id, CSR_MPM_FPU_ST, CSR_MPM_FPU_ST_H, &fpu_stalls_per_core);
|
||||||
@@ -295,7 +289,6 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
|||||||
fprintf(stream, "PERF: alu unit stalls=%ld\n", alu_stalls);
|
fprintf(stream, "PERF: alu unit stalls=%ld\n", alu_stalls);
|
||||||
fprintf(stream, "PERF: lsu unit stalls=%ld\n", lsu_stalls);
|
fprintf(stream, "PERF: lsu unit stalls=%ld\n", lsu_stalls);
|
||||||
fprintf(stream, "PERF: csr unit stalls=%ld\n", csr_stalls);
|
fprintf(stream, "PERF: csr unit stalls=%ld\n", csr_stalls);
|
||||||
fprintf(stream, "PERF: mul unit stalls=%ld\n", mul_stalls);
|
|
||||||
fprintf(stream, "PERF: fpu unit stalls=%ld\n", fpu_stalls);
|
fprintf(stream, "PERF: fpu unit stalls=%ld\n", fpu_stalls);
|
||||||
fprintf(stream, "PERF: gpu unit stalls=%ld\n", gpu_stalls);
|
fprintf(stream, "PERF: gpu unit stalls=%ld\n", gpu_stalls);
|
||||||
fprintf(stream, "PERF: icache reads=%ld\n", icache_reads);
|
fprintf(stream, "PERF: icache reads=%ld\n", icache_reads);
|
||||||
|
|||||||
@@ -13,13 +13,16 @@ module VX_alu_unit #(
|
|||||||
VX_branch_ctl_if branch_ctl_if,
|
VX_branch_ctl_if branch_ctl_if,
|
||||||
VX_commit_if alu_commit_if
|
VX_commit_if alu_commit_if
|
||||||
);
|
);
|
||||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||||
reg [`NUM_THREADS-1:0][31:0] add_result;
|
wire [`NUM_THREADS-1:0][31:0] add_result;
|
||||||
reg [`NUM_THREADS-1:0][32:0] sub_result;
|
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
||||||
reg [`NUM_THREADS-1:0][31:0] shr_result;
|
wire [`NUM_THREADS-1:0][31:0] shr_result;
|
||||||
reg [`NUM_THREADS-1:0][31:0] msc_result;
|
reg [`NUM_THREADS-1:0][31:0] msc_result;
|
||||||
|
|
||||||
wire is_br_op = alu_req_if.is_br_op;
|
wire stall_in, stall_out;
|
||||||
|
|
||||||
|
`UNUSED_VAR (alu_req_if.op_mod)
|
||||||
|
wire is_br_op = `IS_BR_MOD(alu_req_if.op_mod);
|
||||||
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type);
|
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op_type);
|
||||||
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type);
|
wire [`BR_BITS-1:0] br_op = `BR_OP(alu_req_if.op_type);
|
||||||
wire alu_signed = `ALU_SIGNED(alu_op);
|
wire alu_signed = `ALU_SIGNED(alu_op);
|
||||||
@@ -34,17 +37,13 @@ module VX_alu_unit #(
|
|||||||
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.rs2_is_imm && !is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
wire [`NUM_THREADS-1:0][31:0] alu_in2_less = (alu_req_if.rs2_is_imm && !is_br_op) ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
always @(*) begin
|
assign add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
||||||
add_result[i] = alu_in1_PC[i] + alu_in2_imm[i];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
wire [32:0] sub_in1 = {alu_signed & alu_in1[i][31], alu_in1[i]};
|
||||||
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
|
wire [32:0] sub_in2 = {alu_signed & alu_in2_less[i][31], alu_in2_less[i]};
|
||||||
always @(*) begin
|
assign sub_result[i] = $signed(sub_in1) - $signed(sub_in2);
|
||||||
sub_result[i] = $signed(sub_in1) - $signed(sub_in2);
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
@@ -52,9 +51,7 @@ module VX_alu_unit #(
|
|||||||
`IGNORE_WARNINGS_BEGIN
|
`IGNORE_WARNINGS_BEGIN
|
||||||
wire [32:0] shr_value = $signed(shr_in1) >>> alu_in2_imm[i][4:0];
|
wire [32:0] shr_value = $signed(shr_in1) >>> alu_in2_imm[i][4:0];
|
||||||
`IGNORE_WARNINGS_END
|
`IGNORE_WARNINGS_END
|
||||||
always @(*) begin
|
assign shr_result[i] = shr_value[31:0];
|
||||||
shr_result[i] = shr_value[31:0];
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
@@ -92,13 +89,94 @@ module VX_alu_unit #(
|
|||||||
wire br_neg = `BR_NEG(br_op);
|
wire br_neg = `BR_NEG(br_op);
|
||||||
wire br_less = `BR_LESS(br_op);
|
wire br_less = `BR_LESS(br_op);
|
||||||
wire br_static = `BR_STATIC(br_op);
|
wire br_static = `BR_STATIC(br_op);
|
||||||
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
|
wire br_taken = ((br_less ? is_less : is_equal) ^ br_neg) | br_static;
|
||||||
|
|
||||||
// output
|
// output
|
||||||
|
|
||||||
|
wire result_valid;
|
||||||
|
wire [`NW_BITS-1:0] result_wid;
|
||||||
|
wire [`NUM_THREADS-1:0] result_tmask;
|
||||||
|
wire [31:0] result_PC;
|
||||||
|
wire [`NR_BITS-1:0] result_rd;
|
||||||
|
wire result_wb;
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] result_data;
|
||||||
|
wire result_is_br;
|
||||||
|
|
||||||
|
`ifdef EXT_M_ENABLE
|
||||||
|
|
||||||
|
wire mul_ready_in;
|
||||||
|
wire mul_valid_out;
|
||||||
|
wire mul_ready_out;
|
||||||
|
wire [`NW_BITS-1:0] mul_wid;
|
||||||
|
wire [`NUM_THREADS-1:0] mul_tmask;
|
||||||
|
wire [31:0] mul_PC;
|
||||||
|
wire [`NR_BITS-1:0] mul_rd;
|
||||||
|
wire mul_wb;
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] mul_data;
|
||||||
|
|
||||||
|
wire is_mul_op = `IS_MUL_MOD(alu_req_if.op_mod);
|
||||||
|
|
||||||
|
VX_muldiv muldiv (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
|
||||||
|
// Inputs
|
||||||
|
.alu_op (`MUL_OP(alu_req_if.op_type)),
|
||||||
|
.wid_in (alu_req_if.wid),
|
||||||
|
.tmask_in (alu_req_if.tmask),
|
||||||
|
.PC_in (alu_req_if.PC),
|
||||||
|
.rd_in (alu_req_if.rd),
|
||||||
|
.wb_in (alu_req_if.wb),
|
||||||
|
.alu_in1 (alu_req_if.rs1_data),
|
||||||
|
.alu_in2 (alu_req_if.rs2_data),
|
||||||
|
|
||||||
|
// Outputs
|
||||||
|
.wid_out (mul_wid),
|
||||||
|
.tmask_out (mul_tmask),
|
||||||
|
.PC_out (mul_PC),
|
||||||
|
.rd_out (mul_rd),
|
||||||
|
.wb_out (mul_wb),
|
||||||
|
.data_out (mul_data),
|
||||||
|
|
||||||
|
// handshake
|
||||||
|
.valid_in (alu_req_if.valid && is_mul_op),
|
||||||
|
.ready_in (mul_ready_in),
|
||||||
|
.valid_out (mul_valid_out),
|
||||||
|
.ready_out (mul_ready_out)
|
||||||
|
);
|
||||||
|
|
||||||
|
assign stall_in = (is_mul_op && ~mul_ready_in)
|
||||||
|
|| (~is_mul_op && (mul_valid_out || stall_out));
|
||||||
|
|
||||||
|
assign mul_ready_out = !stall_out;
|
||||||
|
|
||||||
|
assign result_valid = mul_valid_out | (alu_req_if.valid && ~is_mul_op);
|
||||||
|
assign result_wid = mul_valid_out ? mul_wid : alu_req_if.wid;
|
||||||
|
assign result_tmask = mul_valid_out ? mul_tmask : alu_req_if.tmask;
|
||||||
|
assign result_PC = mul_valid_out ? mul_PC : alu_req_if.PC;
|
||||||
|
assign result_rd = mul_valid_out ? mul_rd : alu_req_if.rd;
|
||||||
|
assign result_wb = mul_valid_out ? mul_wb : alu_req_if.wb;
|
||||||
|
assign result_data = mul_valid_out ? mul_data : alu_jal_result;
|
||||||
|
assign result_is_br = !mul_valid_out && is_br_op;
|
||||||
|
|
||||||
|
`else
|
||||||
|
|
||||||
|
assign stall_in = 0;
|
||||||
|
|
||||||
|
assign result_valid = alu_req_if.valid;
|
||||||
|
assign result_wid = alu_req_if.wid;
|
||||||
|
assign result_tmask = alu_req_if.tmask;
|
||||||
|
assign result_PC = alu_req_if.PC;
|
||||||
|
assign result_rd = alu_req_if.rd;
|
||||||
|
assign result_wb = alu_req_if.wb;
|
||||||
|
assign result_data = alu_jal_result;
|
||||||
|
assign result_is_br = is_br_op;
|
||||||
|
|
||||||
|
`endif
|
||||||
|
|
||||||
wire is_br_op_r;
|
wire is_br_op_r;
|
||||||
|
|
||||||
wire stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
|
assign stall_out = ~alu_commit_if.ready && alu_commit_if.valid;
|
||||||
|
|
||||||
VX_pipe_register #(
|
VX_pipe_register #(
|
||||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + 1 + 32),
|
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32) + 1 + 1 + 32),
|
||||||
@@ -107,8 +185,8 @@ module VX_alu_unit #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (!stall_out),
|
.enable (!stall_out),
|
||||||
.data_in ({alu_req_if.valid, alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.rd, alu_req_if.wb, alu_jal_result, is_br_op, br_taken, br_dest}),
|
.data_in ({result_valid, result_wid, result_tmask, result_PC, result_rd, result_wb, result_data, result_is_br, br_taken, br_dest}),
|
||||||
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, branch_ctl_if.taken, branch_ctl_if.dest})
|
.data_out ({alu_commit_if.valid, alu_commit_if.wid, alu_commit_if.tmask, alu_commit_if.PC, alu_commit_if.rd, alu_commit_if.wb, alu_commit_if.data, is_br_op_r, branch_ctl_if.taken, branch_ctl_if.dest})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign alu_commit_if.eop = 1'b1;
|
assign alu_commit_if.eop = 1'b1;
|
||||||
@@ -117,6 +195,6 @@ module VX_alu_unit #(
|
|||||||
assign branch_ctl_if.wid = alu_commit_if.wid;
|
assign branch_ctl_if.wid = alu_commit_if.wid;
|
||||||
|
|
||||||
// can accept new request?
|
// can accept new request?
|
||||||
assign alu_req_if.ready = ~stall_out;
|
assign alu_req_if.ready = ~stall_in;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -9,8 +9,7 @@ module VX_commit #(
|
|||||||
// inputs
|
// inputs
|
||||||
VX_commit_if alu_commit_if,
|
VX_commit_if alu_commit_if,
|
||||||
VX_commit_if ld_commit_if,
|
VX_commit_if ld_commit_if,
|
||||||
VX_commit_if st_commit_if,
|
VX_commit_if st_commit_if,
|
||||||
VX_commit_if mul_commit_if,
|
|
||||||
VX_commit_if csr_commit_if,
|
VX_commit_if csr_commit_if,
|
||||||
VX_commit_if fpu_commit_if,
|
VX_commit_if fpu_commit_if,
|
||||||
VX_commit_if gpu_commit_if,
|
VX_commit_if gpu_commit_if,
|
||||||
@@ -27,7 +26,6 @@ module VX_commit #(
|
|||||||
wire ld_commit_fire = ld_commit_if.valid && ld_commit_if.ready;
|
wire ld_commit_fire = ld_commit_if.valid && ld_commit_if.ready;
|
||||||
wire st_commit_fire = st_commit_if.valid && st_commit_if.ready;
|
wire st_commit_fire = st_commit_if.valid && st_commit_if.ready;
|
||||||
wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready;
|
wire csr_commit_fire = csr_commit_if.valid && csr_commit_if.ready;
|
||||||
wire mul_commit_fire = mul_commit_if.valid && mul_commit_if.ready;
|
|
||||||
wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready;
|
wire fpu_commit_fire = fpu_commit_if.valid && fpu_commit_if.ready;
|
||||||
wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready;
|
wire gpu_commit_fire = gpu_commit_if.valid && gpu_commit_if.ready;
|
||||||
|
|
||||||
@@ -35,7 +33,6 @@ module VX_commit #(
|
|||||||
|| ld_commit_fire
|
|| ld_commit_fire
|
||||||
|| st_commit_fire
|
|| st_commit_fire
|
||||||
|| csr_commit_fire
|
|| csr_commit_fire
|
||||||
|| mul_commit_fire
|
|
||||||
|| fpu_commit_fire
|
|| fpu_commit_fire
|
||||||
|| gpu_commit_fire;
|
|| gpu_commit_fire;
|
||||||
|
|
||||||
@@ -44,7 +41,6 @@ module VX_commit #(
|
|||||||
assign commit_tmask1 = alu_commit_fire ? alu_commit_if.tmask:
|
assign commit_tmask1 = alu_commit_fire ? alu_commit_if.tmask:
|
||||||
ld_commit_fire ? ld_commit_if.tmask:
|
ld_commit_fire ? ld_commit_if.tmask:
|
||||||
csr_commit_fire ? csr_commit_if.tmask:
|
csr_commit_fire ? csr_commit_if.tmask:
|
||||||
mul_commit_fire ? mul_commit_if.tmask:
|
|
||||||
fpu_commit_fire ? fpu_commit_if.tmask:
|
fpu_commit_fire ? fpu_commit_if.tmask:
|
||||||
0;
|
0;
|
||||||
|
|
||||||
@@ -76,7 +72,6 @@ module VX_commit #(
|
|||||||
.alu_commit_if (alu_commit_if),
|
.alu_commit_if (alu_commit_if),
|
||||||
.ld_commit_if (ld_commit_if),
|
.ld_commit_if (ld_commit_if),
|
||||||
.csr_commit_if (csr_commit_if),
|
.csr_commit_if (csr_commit_if),
|
||||||
.mul_commit_if (mul_commit_if),
|
|
||||||
.fpu_commit_if (fpu_commit_if),
|
.fpu_commit_if (fpu_commit_if),
|
||||||
|
|
||||||
.writeback_if (writeback_if)
|
.writeback_if (writeback_if)
|
||||||
@@ -99,10 +94,7 @@ module VX_commit #(
|
|||||||
end
|
end
|
||||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
||||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
|
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=CSR, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, csr_commit_if.wid, csr_commit_if.PC, csr_commit_if.tmask, csr_commit_if.wb, csr_commit_if.rd, csr_commit_if.data);
|
||||||
end
|
end
|
||||||
if (mul_commit_if.valid && mul_commit_if.ready) begin
|
|
||||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=MUL, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, mul_commit_if.wid, mul_commit_if.PC, mul_commit_if.tmask, mul_commit_if.wb, mul_commit_if.rd, mul_commit_if.data);
|
|
||||||
end
|
|
||||||
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
||||||
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data);
|
$display("%t: core%0d-commit: wid=%0d, PC=%0h, ex=FPU, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, fpu_commit_if.wid, fpu_commit_if.PC, fpu_commit_if.tmask, fpu_commit_if.wb, fpu_commit_if.rd, fpu_commit_if.data);
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -167,54 +167,52 @@
|
|||||||
`define CSR_MPM_LSU_ST_H 12'hB86
|
`define CSR_MPM_LSU_ST_H 12'hB86
|
||||||
`define CSR_MPM_CSR_ST 12'hB07
|
`define CSR_MPM_CSR_ST 12'hB07
|
||||||
`define CSR_MPM_CSR_ST_H 12'hB87
|
`define CSR_MPM_CSR_ST_H 12'hB87
|
||||||
`define CSR_MPM_MUL_ST 12'hB08
|
`define CSR_MPM_FPU_ST 12'hB08
|
||||||
`define CSR_MPM_MUL_ST_H 12'hB88
|
`define CSR_MPM_FPU_ST_H 12'hB88
|
||||||
`define CSR_MPM_FPU_ST 12'hB09
|
`define CSR_MPM_GPU_ST 12'hB09
|
||||||
`define CSR_MPM_FPU_ST_H 12'hB89
|
`define CSR_MPM_GPU_ST_H 12'hB89
|
||||||
`define CSR_MPM_GPU_ST 12'hB0A
|
|
||||||
`define CSR_MPM_GPU_ST_H 12'hB8A
|
|
||||||
// PERF: icache
|
// PERF: icache
|
||||||
`define CSR_MPM_ICACHE_READS 12'hB0B // total reads
|
`define CSR_MPM_ICACHE_READS 12'hB0A // total reads
|
||||||
`define CSR_MPM_ICACHE_READS_H 12'hB8B
|
`define CSR_MPM_ICACHE_READS_H 12'hB8A
|
||||||
`define CSR_MPM_ICACHE_MISS_R 12'hB0C // total misses
|
`define CSR_MPM_ICACHE_MISS_R 12'hB0B // total misses
|
||||||
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8C
|
`define CSR_MPM_ICACHE_MISS_R_H 12'hB8B
|
||||||
`define CSR_MPM_ICACHE_PIPE_ST 12'hB0D // pipeline stalls
|
`define CSR_MPM_ICACHE_PIPE_ST 12'hB0C // pipeline stalls
|
||||||
`define CSR_MPM_ICACHE_PIPE_ST_H 12'hB8D
|
`define CSR_MPM_ICACHE_PIPE_ST_H 12'hB8C
|
||||||
`define CSR_MPM_ICACHE_CRSP_ST 12'hB0E // core response stalls
|
`define CSR_MPM_ICACHE_CRSP_ST 12'hB0D // core response stalls
|
||||||
`define CSR_MPM_ICACHE_CRSP_ST_H 12'hB8E
|
`define CSR_MPM_ICACHE_CRSP_ST_H 12'hB8D
|
||||||
// PERF: dcache
|
// PERF: dcache
|
||||||
`define CSR_MPM_DCACHE_READS 12'hB0F // total reads
|
`define CSR_MPM_DCACHE_READS 12'hB0E // total reads
|
||||||
`define CSR_MPM_DCACHE_READS_H 12'hB8F
|
`define CSR_MPM_DCACHE_READS_H 12'hB8E
|
||||||
`define CSR_MPM_DCACHE_WRITES 12'hB10 // total writes
|
`define CSR_MPM_DCACHE_WRITES 12'hB0F // total writes
|
||||||
`define CSR_MPM_DCACHE_WRITES_H 12'hB90
|
`define CSR_MPM_DCACHE_WRITES_H 12'hB8F
|
||||||
`define CSR_MPM_DCACHE_MISS_R 12'hB11 // read misses
|
`define CSR_MPM_DCACHE_MISS_R 12'hB10 // read misses
|
||||||
`define CSR_MPM_DCACHE_MISS_R_H 12'hB91
|
`define CSR_MPM_DCACHE_MISS_R_H 12'hB90
|
||||||
`define CSR_MPM_DCACHE_MISS_W 12'hB12 // write misses
|
`define CSR_MPM_DCACHE_MISS_W 12'hB11 // write misses
|
||||||
`define CSR_MPM_DCACHE_MISS_W_H 12'hB92
|
`define CSR_MPM_DCACHE_MISS_W_H 12'hB91
|
||||||
`define CSR_MPM_DCACHE_BANK_ST 12'hB13 // bank conflicts stalls
|
`define CSR_MPM_DCACHE_BANK_ST 12'hB12 // bank conflicts stalls
|
||||||
`define CSR_MPM_DCACHE_BANK_ST_H 12'hB93
|
`define CSR_MPM_DCACHE_BANK_ST_H 12'hB92
|
||||||
`define CSR_MPM_DCACHE_MSHR_ST 12'hB14 // MSHR stalls
|
`define CSR_MPM_DCACHE_MSHR_ST 12'hB13 // MSHR stalls
|
||||||
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB94
|
`define CSR_MPM_DCACHE_MSHR_ST_H 12'hB93
|
||||||
`define CSR_MPM_DCACHE_PIPE_ST 12'hB15 // pipeline stalls
|
`define CSR_MPM_DCACHE_PIPE_ST 12'hB14 // pipeline stalls
|
||||||
`define CSR_MPM_DCACHE_PIPE_ST_H 12'hB95
|
`define CSR_MPM_DCACHE_PIPE_ST_H 12'hB94
|
||||||
`define CSR_MPM_DCACHE_CRSP_ST 12'hB16 // core response stalls
|
`define CSR_MPM_DCACHE_CRSP_ST 12'hB15 // core response stalls
|
||||||
`define CSR_MPM_DCACHE_CRSP_ST_H 12'hB96
|
`define CSR_MPM_DCACHE_CRSP_ST_H 12'hB95
|
||||||
// PERF: smem
|
// PERF: smem
|
||||||
`define CSR_MPM_SMEM_READS 12'hB17 // total reads
|
`define CSR_MPM_SMEM_READS 12'hB16 // total reads
|
||||||
`define CSR_MPM_SMEM_READS_H 12'hB97
|
`define CSR_MPM_SMEM_READS_H 12'hB96
|
||||||
`define CSR_MPM_SMEM_WRITES 12'hB18 // total writes
|
`define CSR_MPM_SMEM_WRITES 12'hB17 // total writes
|
||||||
`define CSR_MPM_SMEM_WRITES_H 12'hB98
|
`define CSR_MPM_SMEM_WRITES_H 12'hB97
|
||||||
`define CSR_MPM_SMEM_BANK_ST 12'hB19 // bank conflicts stalls
|
`define CSR_MPM_SMEM_BANK_ST 12'hB18 // bank conflicts stalls
|
||||||
`define CSR_MPM_SMEM_BANK_ST_H 12'hB99
|
`define CSR_MPM_SMEM_BANK_ST_H 12'hB98
|
||||||
// PERF: memory
|
// PERF: memory
|
||||||
`define CSR_MPM_DRAM_READS 12'hB1A // dram reads
|
`define CSR_MPM_DRAM_READS 12'hB19 // dram reads
|
||||||
`define CSR_MPM_DRAM_READS_H 12'hB9A
|
`define CSR_MPM_DRAM_READS_H 12'hB99
|
||||||
`define CSR_MPM_DRAM_WRITES 12'hB1B // dram writes
|
`define CSR_MPM_DRAM_WRITES 12'hB1A // dram writes
|
||||||
`define CSR_MPM_DRAM_WRITES_H 12'hB9B
|
`define CSR_MPM_DRAM_WRITES_H 12'hB9A
|
||||||
`define CSR_MPM_DRAM_ST 12'hB1C // dram request stalls
|
`define CSR_MPM_DRAM_ST 12'hB1B // dram request stalls
|
||||||
`define CSR_MPM_DRAM_ST_H 12'hB9C
|
`define CSR_MPM_DRAM_ST_H 12'hB9B
|
||||||
`define CSR_MPM_DRAM_LAT 12'hB1D // dram latency (total)
|
`define CSR_MPM_DRAM_LAT 12'hB1C // dram latency (total)
|
||||||
`define CSR_MPM_DRAM_LAT_H 12'hB9D
|
`define CSR_MPM_DRAM_LAT_H 12'hB9C
|
||||||
|
|
||||||
// Machine Information Registers
|
// Machine Information Registers
|
||||||
`define CSR_MVENDORID 12'hF11
|
`define CSR_MVENDORID 12'hF11
|
||||||
|
|||||||
@@ -132,8 +132,6 @@ module VX_csr_data #(
|
|||||||
`CSR_MPM_LSU_ST_H : read_data_r = perf_pipeline_if.lsu_stalls[63:32];
|
`CSR_MPM_LSU_ST_H : read_data_r = perf_pipeline_if.lsu_stalls[63:32];
|
||||||
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
|
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
|
||||||
`CSR_MPM_CSR_ST_H : read_data_r = perf_pipeline_if.csr_stalls[63:32];
|
`CSR_MPM_CSR_ST_H : read_data_r = perf_pipeline_if.csr_stalls[63:32];
|
||||||
`CSR_MPM_MUL_ST : read_data_r = perf_pipeline_if.mul_stalls[31:0];
|
|
||||||
`CSR_MPM_MUL_ST_H : read_data_r = perf_pipeline_if.mul_stalls[63:32];
|
|
||||||
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
|
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
|
||||||
`CSR_MPM_FPU_ST_H : read_data_r = perf_pipeline_if.fpu_stalls[63:32];
|
`CSR_MPM_FPU_ST_H : read_data_r = perf_pipeline_if.fpu_stalls[63:32];
|
||||||
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
|
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
|
||||||
|
|||||||
@@ -21,10 +21,10 @@ module VX_decode #(
|
|||||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||||
|
|
||||||
reg [`ALU_BITS-1:0] alu_op;
|
reg [`ALU_BITS-1:0] alu_op;
|
||||||
reg [`BR_BITS-1:0] br_op;
|
reg [`BR_BITS-1:0] br_op;
|
||||||
|
reg [`MUL_BITS-1:0] mul_op;
|
||||||
reg [`LSU_BITS-1:0] lsu_op;
|
reg [`LSU_BITS-1:0] lsu_op;
|
||||||
reg [`CSR_BITS-1:0] csr_op;
|
reg [`CSR_BITS-1:0] csr_op;
|
||||||
reg [`MUL_BITS-1:0] mul_op;
|
|
||||||
reg [`FPU_BITS-1:0] fpu_op;
|
reg [`FPU_BITS-1:0] fpu_op;
|
||||||
reg [`GPU_BITS-1:0] gpu_op;
|
reg [`GPU_BITS-1:0] gpu_op;
|
||||||
|
|
||||||
@@ -120,16 +120,11 @@ module VX_decode #(
|
|||||||
`INST_JAL: br_op = `BR_JAL;
|
`INST_JAL: br_op = `BR_JAL;
|
||||||
`INST_JALR: br_op = `BR_JALR;
|
`INST_JALR: br_op = `BR_JALR;
|
||||||
`INST_SYS: begin
|
`INST_SYS: begin
|
||||||
if (is_jals) begin
|
if (is_jals && u_12 == 12'h000) br_op = `BR_ECALL;
|
||||||
case (u_12)
|
if (is_jals && u_12 == 12'h001) br_op = `BR_EBREAK;
|
||||||
12'h000: br_op = `BR_ECALL;
|
if (is_jals && u_12 == 12'h302) br_op = `BR_MRET;
|
||||||
12'h001: br_op = `BR_EBREAK;
|
if (is_jals && u_12 == 12'h102) br_op = `BR_SRET;
|
||||||
12'h302: br_op = `BR_MRET;
|
if (is_jals && u_12 == 12'h7B2) br_op = `BR_DRET;
|
||||||
12'h102: br_op = `BR_SRET;
|
|
||||||
12'h7B2: br_op = `BR_DRET;
|
|
||||||
default:;
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
default:;
|
default:;
|
||||||
endcase
|
endcase
|
||||||
@@ -174,7 +169,7 @@ module VX_decode #(
|
|||||||
|
|
||||||
// MUL
|
// MUL
|
||||||
`ifdef EXT_M_ENABLE
|
`ifdef EXT_M_ENABLE
|
||||||
wire is_mul = is_rtype && (func7 == 7'h1);
|
wire is_mul = is_rtype && (func7 == 7'h1);
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
mul_op = `MUL_MUL;
|
mul_op = `MUL_MUL;
|
||||||
case (func3)
|
case (func3)
|
||||||
@@ -238,11 +233,11 @@ module VX_decode #(
|
|||||||
7'h0C: fpu_op = `FPU_DIV;
|
7'h0C: fpu_op = `FPU_DIV;
|
||||||
7'h10: begin
|
7'h10: begin
|
||||||
fpu_op = `FPU_MISC;
|
fpu_op = `FPU_MISC;
|
||||||
frm = func3[1] ? 2 : (func3[0] ? 1 : 0);
|
frm = func3[1] ? 3'b010 : {2'b0, func3[0]};
|
||||||
end
|
end
|
||||||
7'h14: begin
|
7'h14: begin
|
||||||
fpu_op = `FPU_MISC;
|
fpu_op = `FPU_MISC;
|
||||||
frm = (func3 == 3'h0) ? 3 : 4;
|
frm = (func3 == 3'h0) ? 3'b011 : 3'b100;
|
||||||
end
|
end
|
||||||
7'h2C: begin
|
7'h2C: begin
|
||||||
fpu_op = `FPU_SQRT;
|
fpu_op = `FPU_SQRT;
|
||||||
@@ -272,6 +267,7 @@ module VX_decode #(
|
|||||||
wire is_fpu = 0;
|
wire is_fpu = 0;
|
||||||
wire is_fpu_no_mem= 0;
|
wire is_fpu_no_mem= 0;
|
||||||
wire [2:0] frm = 0;
|
wire [2:0] frm = 0;
|
||||||
|
wire is_fsqrt = 0;
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
fpu_op = `FPU_MISC;
|
fpu_op = `FPU_MISC;
|
||||||
@@ -334,25 +330,23 @@ module VX_decode #(
|
|||||||
assign decode_if.tmask = ifetch_rsp_if.tmask;
|
assign decode_if.tmask = ifetch_rsp_if.tmask;
|
||||||
assign decode_if.PC = ifetch_rsp_if.PC;
|
assign decode_if.PC = ifetch_rsp_if.PC;
|
||||||
|
|
||||||
assign decode_if.ex_type = is_lsu ? `EX_LSU :
|
assign decode_if.ex_type = is_gpu ? `EX_GPU :
|
||||||
is_csr ? `EX_CSR :
|
is_csr ? `EX_CSR :
|
||||||
is_mul ? `EX_MUL :
|
is_fpu_no_mem ? `EX_FPU :
|
||||||
is_fpu_no_mem ? `EX_FPU :
|
is_lsu ? `EX_LSU :
|
||||||
is_gpu ? `EX_GPU :
|
(is_br || is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||||
is_br ? `EX_ALU :
|
`EX_NOP;
|
||||||
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
|
||||||
`EX_NOP;
|
|
||||||
|
|
||||||
assign decode_if.op_type = is_lsu ? `OP_BITS'(lsu_op) :
|
assign decode_if.op_type = is_gpu ? `OP_BITS'(gpu_op) :
|
||||||
is_csr ? `OP_BITS'(csr_op) :
|
is_csr ? `OP_BITS'(csr_op) :
|
||||||
is_mul ? `OP_BITS'(mul_op) :
|
is_mul ? `OP_BITS'(mul_op) :
|
||||||
is_fpu_no_mem ? `OP_BITS'(fpu_op) :
|
is_fpu_no_mem ? `OP_BITS'(fpu_op) :
|
||||||
is_gpu ? `OP_BITS'(gpu_op) :
|
is_lsu ? `OP_BITS'(lsu_op) :
|
||||||
is_br ? `OP_BITS'(br_op) :
|
is_br ? `OP_BITS'(br_op) :
|
||||||
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
||||||
0;
|
0;
|
||||||
|
|
||||||
assign decode_if.wb = use_rd;
|
assign decode_if.wb = use_rd && (decode_if.ex_type != `EX_NOP);
|
||||||
|
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || is_fmvw_clss);
|
wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || is_fmvw_clss);
|
||||||
@@ -370,13 +364,10 @@ module VX_decode #(
|
|||||||
assign decode_if.rs3 = rs3;
|
assign decode_if.rs3 = rs3;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
wire is_nop = (decode_if.ex_type == `EX_NOP);
|
assign decode_if.used_regs = (`NUM_REGS'(use_rd) << decode_if.rd)
|
||||||
|
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
|
||||||
assign decode_if.used_regs = is_nop ? `NUM_REGS'(0) :
|
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
|
||||||
((`NUM_REGS'(use_rd) << decode_if.rd)
|
| (`NUM_REGS'(use_rs3) << decode_if.rs3);
|
||||||
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
|
|
||||||
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
|
|
||||||
| (`NUM_REGS'(use_rs3) << decode_if.rs3));
|
|
||||||
|
|
||||||
assign decode_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
|
assign decode_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
|
||||||
(is_jal || is_jalr || is_jals) ? jalx_offset :
|
(is_jal || is_jalr || is_jals) ? jalx_offset :
|
||||||
@@ -386,7 +377,7 @@ module VX_decode #(
|
|||||||
assign decode_if.rs1_is_PC = is_auipc || is_btype || is_jal || is_jals;
|
assign decode_if.rs1_is_PC = is_auipc || is_btype || is_jal || is_jals;
|
||||||
assign decode_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm || is_br;
|
assign decode_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm || is_br;
|
||||||
|
|
||||||
wire [`MOD_BITS-1:0] alu_mod = is_br ? 1 : 0;
|
wire [`MOD_BITS-1:0] alu_mod = {1'b0, is_mul, is_br};
|
||||||
assign decode_if.op_mod = is_fpu_no_mem ? frm : alu_mod;
|
assign decode_if.op_mod = is_fpu_no_mem ? frm : alu_mod;
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|||||||
@@ -68,9 +68,8 @@
|
|||||||
`define EX_ALU 3'h1
|
`define EX_ALU 3'h1
|
||||||
`define EX_LSU 3'h2
|
`define EX_LSU 3'h2
|
||||||
`define EX_CSR 3'h3
|
`define EX_CSR 3'h3
|
||||||
`define EX_MUL 3'h4
|
`define EX_FPU 3'h4
|
||||||
`define EX_FPU 3'h5
|
`define EX_GPU 3'h5
|
||||||
`define EX_GPU 3'h6
|
|
||||||
`define EX_BITS 3
|
`define EX_BITS 3
|
||||||
|
|
||||||
`define NUM_EXS 6
|
`define NUM_EXS 6
|
||||||
@@ -118,10 +117,21 @@
|
|||||||
`define BR_NEG(x) x[1]
|
`define BR_NEG(x) x[1]
|
||||||
`define BR_LESS(x) x[2]
|
`define BR_LESS(x) x[2]
|
||||||
`define BR_STATIC(x) x[3]
|
`define BR_STATIC(x) x[3]
|
||||||
`define ALU_BR_BITS 4
|
|
||||||
`define ALU_BR_OP(x) x[`ALU_BR_BITS-1:0]
|
|
||||||
`define IS_BR_MOD(x) x[0]
|
`define IS_BR_MOD(x) x[0]
|
||||||
|
|
||||||
|
`define MUL_MUL 3'h0
|
||||||
|
`define MUL_MULH 3'h1
|
||||||
|
`define MUL_MULHSU 3'h2
|
||||||
|
`define MUL_MULHU 3'h3
|
||||||
|
`define MUL_DIV 3'h4
|
||||||
|
`define MUL_DIVU 3'h5
|
||||||
|
`define MUL_REM 3'h6
|
||||||
|
`define MUL_REMU 3'h7
|
||||||
|
`define MUL_BITS 3
|
||||||
|
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
||||||
|
`define IS_DIV_OP(x) x[2]
|
||||||
|
`define IS_MUL_MOD(x) x[1]
|
||||||
|
|
||||||
`define LSU_SB 3'h0
|
`define LSU_SB 3'h0
|
||||||
`define LSU_SH 3'h1
|
`define LSU_SH 3'h1
|
||||||
`define LSU_SW 3'h2
|
`define LSU_SW 3'h2
|
||||||
@@ -138,18 +148,6 @@
|
|||||||
`define CSR_BITS 2
|
`define CSR_BITS 2
|
||||||
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
||||||
|
|
||||||
`define MUL_MUL 3'h0
|
|
||||||
`define MUL_MULH 3'h1
|
|
||||||
`define MUL_MULHSU 3'h2
|
|
||||||
`define MUL_MULHU 3'h3
|
|
||||||
`define MUL_DIV 3'h4
|
|
||||||
`define MUL_DIVU 3'h5
|
|
||||||
`define MUL_REM 3'h6
|
|
||||||
`define MUL_REMU 3'h7
|
|
||||||
`define MUL_BITS 3
|
|
||||||
`define MUL_OP(x) x[`MUL_BITS-1:0]
|
|
||||||
`define IS_DIV_OP(x) x[2]
|
|
||||||
|
|
||||||
`define FPU_ADD 4'h0
|
`define FPU_ADD 4'h0
|
||||||
`define FPU_SUB 4'h1
|
`define FPU_SUB 4'h1
|
||||||
`define FPU_MUL 4'h2
|
`define FPU_MUL 4'h2
|
||||||
|
|||||||
@@ -27,8 +27,7 @@ module VX_execute #(
|
|||||||
// inputs
|
// inputs
|
||||||
VX_alu_req_if alu_req_if,
|
VX_alu_req_if alu_req_if,
|
||||||
VX_lsu_req_if lsu_req_if,
|
VX_lsu_req_if lsu_req_if,
|
||||||
VX_csr_req_if csr_req_if,
|
VX_csr_req_if csr_req_if,
|
||||||
VX_mul_req_if mul_req_if,
|
|
||||||
VX_fpu_req_if fpu_req_if,
|
VX_fpu_req_if fpu_req_if,
|
||||||
VX_gpu_req_if gpu_req_if,
|
VX_gpu_req_if gpu_req_if,
|
||||||
|
|
||||||
@@ -39,7 +38,6 @@ module VX_execute #(
|
|||||||
VX_commit_if ld_commit_if,
|
VX_commit_if ld_commit_if,
|
||||||
VX_commit_if st_commit_if,
|
VX_commit_if st_commit_if,
|
||||||
VX_commit_if csr_commit_if,
|
VX_commit_if csr_commit_if,
|
||||||
VX_commit_if mul_commit_if,
|
|
||||||
VX_commit_if fpu_commit_if,
|
VX_commit_if fpu_commit_if,
|
||||||
VX_commit_if gpu_commit_if,
|
VX_commit_if gpu_commit_if,
|
||||||
|
|
||||||
@@ -93,26 +91,6 @@ module VX_execute #(
|
|||||||
.busy (busy)
|
.busy (busy)
|
||||||
);
|
);
|
||||||
|
|
||||||
`ifdef EXT_M_ENABLE
|
|
||||||
VX_mul_unit #(
|
|
||||||
.CORE_ID(CORE_ID)
|
|
||||||
) mul_unit (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.mul_req_if (mul_req_if),
|
|
||||||
.mul_commit_if (mul_commit_if)
|
|
||||||
);
|
|
||||||
`else
|
|
||||||
assign mul_req_if.ready = 0;
|
|
||||||
assign mul_commit_if.valid = 0;
|
|
||||||
assign mul_commit_if.wid = 0;
|
|
||||||
assign mul_commit_if.PC = 0;
|
|
||||||
assign mul_commit_if.tmask = 0;
|
|
||||||
assign mul_commit_if.wb = 0;
|
|
||||||
assign mul_commit_if.rd = 0;
|
|
||||||
assign mul_commit_if.data = 0;
|
|
||||||
`endif
|
|
||||||
|
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
VX_fpu_unit #(
|
VX_fpu_unit #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
@@ -155,7 +133,7 @@ module VX_execute #(
|
|||||||
);
|
);
|
||||||
|
|
||||||
assign ebreak = alu_req_if.valid
|
assign ebreak = alu_req_if.valid
|
||||||
&& alu_req_if.is_br_op
|
&& `IS_BR_MOD(alu_req_if.op_mod)
|
||||||
&& (`BR_OP(alu_req_if.op_type) == `BR_EBREAK
|
&& (`BR_OP(alu_req_if.op_type) == `BR_EBREAK
|
||||||
|| `BR_OP(alu_req_if.op_type) == `BR_ECALL);
|
|| `BR_OP(alu_req_if.op_type) == `BR_ECALL);
|
||||||
|
|
||||||
|
|||||||
@@ -12,11 +12,15 @@ module VX_instr_demux (
|
|||||||
VX_alu_req_if alu_req_if,
|
VX_alu_req_if alu_req_if,
|
||||||
VX_lsu_req_if lsu_req_if,
|
VX_lsu_req_if lsu_req_if,
|
||||||
VX_csr_req_if csr_req_if,
|
VX_csr_req_if csr_req_if,
|
||||||
VX_mul_req_if mul_req_if,
|
|
||||||
VX_fpu_req_if fpu_req_if,
|
VX_fpu_req_if fpu_req_if,
|
||||||
VX_gpu_req_if gpu_req_if
|
VX_gpu_req_if gpu_req_if
|
||||||
);
|
);
|
||||||
wire [`NT_BITS-1:0] tid;
|
wire [`NT_BITS-1:0] tid;
|
||||||
|
wire alu_req_ready;
|
||||||
|
wire lsu_req_ready;
|
||||||
|
wire csr_req_ready;
|
||||||
|
wire fpu_req_ready;
|
||||||
|
wire gpu_req_ready;
|
||||||
|
|
||||||
VX_priority_encoder #(
|
VX_priority_encoder #(
|
||||||
.N (`NUM_THREADS)
|
.N (`NUM_THREADS)
|
||||||
@@ -32,20 +36,17 @@ module VX_instr_demux (
|
|||||||
// ALU unit
|
// ALU unit
|
||||||
|
|
||||||
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
|
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
|
||||||
wire alu_req_ready;
|
|
||||||
|
|
||||||
wire is_br_op = `IS_BR_MOD(execute_if.op_mod);
|
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BR_BITS + 1 + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||||
.NOBACKPRESSURE (1) // ALU has no back pressure
|
.BUFFERED (1)
|
||||||
) alu_buffer (
|
) alu_buffer (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.valid_in (alu_req_valid),
|
.valid_in (alu_req_valid),
|
||||||
.ready_in (alu_req_ready),
|
.ready_in (alu_req_ready),
|
||||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_BR_OP(execute_if.op_type), is_br_op, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, next_PC, `ALU_OP(execute_if.op_type), execute_if.op_mod, execute_if.imm, execute_if.rs1_is_PC, execute_if.rs2_is_imm, execute_if.rd, execute_if.wb, tid, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
||||||
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.is_br_op, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
.data_out ({alu_req_if.wid, alu_req_if.tmask, alu_req_if.PC, alu_req_if.next_PC, alu_req_if.op_type, alu_req_if.op_mod, alu_req_if.imm, alu_req_if.rs1_is_PC, alu_req_if.rs2_is_imm, alu_req_if.rd, alu_req_if.wb, alu_req_if.tid, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||||
.valid_out (alu_req_if.valid),
|
.valid_out (alu_req_if.valid),
|
||||||
.ready_out (alu_req_if.ready)
|
.ready_out (alu_req_if.ready)
|
||||||
);
|
);
|
||||||
@@ -53,7 +54,6 @@ module VX_instr_demux (
|
|||||||
// lsu unit
|
// lsu unit
|
||||||
|
|
||||||
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
||||||
wire lsu_req_ready;
|
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||||
@@ -72,7 +72,6 @@ module VX_instr_demux (
|
|||||||
// csr unit
|
// csr unit
|
||||||
|
|
||||||
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
||||||
wire csr_req_ready;
|
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||||
@@ -88,33 +87,11 @@ module VX_instr_demux (
|
|||||||
.ready_out (csr_req_if.ready)
|
.ready_out (csr_req_if.ready)
|
||||||
);
|
);
|
||||||
|
|
||||||
// mul unit
|
|
||||||
|
|
||||||
`ifdef EXT_M_ENABLE
|
|
||||||
wire mul_req_valid = execute_if.valid && (execute_if.ex_type == `EX_MUL);
|
|
||||||
wire mul_req_ready;
|
|
||||||
|
|
||||||
VX_skid_buffer #(
|
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `MUL_BITS + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
|
||||||
.BUFFERED (1)
|
|
||||||
) mul_buffer (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.valid_in (mul_req_valid),
|
|
||||||
.ready_in (mul_req_ready),
|
|
||||||
.data_in ({execute_if.wid, execute_if.tmask, execute_if.PC, `MUL_OP(execute_if.op_type), execute_if.rd, execute_if.wb, gpr_rsp_if.rs1_data, gpr_rsp_if.rs2_data}),
|
|
||||||
.data_out ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.op_type, mul_req_if.rd, mul_req_if.wb, mul_req_if.rs1_data, mul_req_if.rs2_data}),
|
|
||||||
.valid_out (mul_req_if.valid),
|
|
||||||
.ready_out (mul_req_if.ready)
|
|
||||||
);
|
|
||||||
`endif
|
|
||||||
|
|
||||||
// fpu unit
|
// fpu unit
|
||||||
|
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
||||||
wire fpu_req_ready;
|
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||||
.BUFFERED (1)
|
.BUFFERED (1)
|
||||||
@@ -130,12 +107,12 @@ module VX_instr_demux (
|
|||||||
);
|
);
|
||||||
`else
|
`else
|
||||||
`UNUSED_VAR (gpr_rsp_if.rs3_data)
|
`UNUSED_VAR (gpr_rsp_if.rs3_data)
|
||||||
|
assign fpu_req_ready = 0;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// gpu unit
|
// gpu unit
|
||||||
|
|
||||||
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
||||||
wire gpu_req_ready;
|
|
||||||
|
|
||||||
VX_skid_buffer #(
|
VX_skid_buffer #(
|
||||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
|
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
|
||||||
@@ -158,7 +135,6 @@ module VX_instr_demux (
|
|||||||
`EX_ALU: ready_r = alu_req_ready;
|
`EX_ALU: ready_r = alu_req_ready;
|
||||||
`EX_LSU: ready_r = lsu_req_ready;
|
`EX_LSU: ready_r = lsu_req_ready;
|
||||||
`EX_CSR: ready_r = csr_req_ready;
|
`EX_CSR: ready_r = csr_req_ready;
|
||||||
`EX_MUL: ready_r = mul_req_ready;
|
|
||||||
`EX_FPU: ready_r = fpu_req_ready;
|
`EX_FPU: ready_r = fpu_req_ready;
|
||||||
`EX_GPU: ready_r = gpu_req_ready;
|
`EX_GPU: ready_r = gpu_req_ready;
|
||||||
default: ready_r = 1'b1; // ignore NOPs
|
default: ready_r = 1'b1; // ignore NOPs
|
||||||
|
|||||||
@@ -18,7 +18,6 @@ module VX_issue #(
|
|||||||
VX_alu_req_if alu_req_if,
|
VX_alu_req_if alu_req_if,
|
||||||
VX_lsu_req_if lsu_req_if,
|
VX_lsu_req_if lsu_req_if,
|
||||||
VX_csr_req_if csr_req_if,
|
VX_csr_req_if csr_req_if,
|
||||||
VX_mul_req_if mul_req_if,
|
|
||||||
VX_fpu_req_if fpu_req_if,
|
VX_fpu_req_if fpu_req_if,
|
||||||
VX_gpu_req_if gpu_req_if
|
VX_gpu_req_if gpu_req_if
|
||||||
);
|
);
|
||||||
@@ -86,7 +85,6 @@ module VX_issue #(
|
|||||||
.alu_req_if (alu_req_if),
|
.alu_req_if (alu_req_if),
|
||||||
.lsu_req_if (lsu_req_if),
|
.lsu_req_if (lsu_req_if),
|
||||||
.csr_req_if (csr_req_if),
|
.csr_req_if (csr_req_if),
|
||||||
.mul_req_if (mul_req_if),
|
|
||||||
.fpu_req_if (fpu_req_if),
|
.fpu_req_if (fpu_req_if),
|
||||||
.gpu_req_if (gpu_req_if)
|
.gpu_req_if (gpu_req_if)
|
||||||
);
|
);
|
||||||
@@ -129,9 +127,6 @@ module VX_issue #(
|
|||||||
reg [63:0] perf_lsu_stalls;
|
reg [63:0] perf_lsu_stalls;
|
||||||
reg [63:0] perf_csr_stalls;
|
reg [63:0] perf_csr_stalls;
|
||||||
reg [63:0] perf_gpu_stalls;
|
reg [63:0] perf_gpu_stalls;
|
||||||
`ifdef EXT_M_ENABLE
|
|
||||||
reg [63:0] perf_mul_stalls;
|
|
||||||
`endif
|
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
reg [63:0] perf_fpu_stalls;
|
reg [63:0] perf_fpu_stalls;
|
||||||
`endif
|
`endif
|
||||||
@@ -144,9 +139,6 @@ module VX_issue #(
|
|||||||
perf_lsu_stalls <= 0;
|
perf_lsu_stalls <= 0;
|
||||||
perf_csr_stalls <= 0;
|
perf_csr_stalls <= 0;
|
||||||
perf_gpu_stalls <= 0;
|
perf_gpu_stalls <= 0;
|
||||||
`ifdef EXT_M_ENABLE
|
|
||||||
perf_mul_stalls <= 0;
|
|
||||||
`endif
|
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
perf_fpu_stalls <= 0;
|
perf_fpu_stalls <= 0;
|
||||||
`endif
|
`endif
|
||||||
@@ -169,11 +161,6 @@ module VX_issue #(
|
|||||||
if (gpu_req_if.valid & !gpu_req_if.ready) begin
|
if (gpu_req_if.valid & !gpu_req_if.ready) begin
|
||||||
perf_gpu_stalls <= perf_gpu_stalls + 64'd1;
|
perf_gpu_stalls <= perf_gpu_stalls + 64'd1;
|
||||||
end
|
end
|
||||||
`ifdef EXT_M_ENABLE
|
|
||||||
if (mul_req_if.valid & !mul_req_if.ready) begin
|
|
||||||
perf_mul_stalls <= perf_mul_stalls + 64'd1;
|
|
||||||
end
|
|
||||||
`endif
|
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
if (fpu_req_if.valid & !fpu_req_if.ready) begin
|
if (fpu_req_if.valid & !fpu_req_if.ready) begin
|
||||||
perf_fpu_stalls <= perf_fpu_stalls + 64'd1;
|
perf_fpu_stalls <= perf_fpu_stalls + 64'd1;
|
||||||
@@ -188,9 +175,6 @@ module VX_issue #(
|
|||||||
assign perf_pipeline_if.lsu_stalls = perf_lsu_stalls;
|
assign perf_pipeline_if.lsu_stalls = perf_lsu_stalls;
|
||||||
assign perf_pipeline_if.csr_stalls = perf_csr_stalls;
|
assign perf_pipeline_if.csr_stalls = perf_csr_stalls;
|
||||||
assign perf_pipeline_if.gpu_stalls = perf_gpu_stalls;
|
assign perf_pipeline_if.gpu_stalls = perf_gpu_stalls;
|
||||||
`ifdef EXT_M_ENABLE
|
|
||||||
assign perf_pipeline_if.mul_stalls = perf_mul_stalls;
|
|
||||||
`endif
|
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
assign perf_pipeline_if.fpu_stalls = perf_fpu_stalls;
|
assign perf_pipeline_if.fpu_stalls = perf_fpu_stalls;
|
||||||
`endif
|
`endif
|
||||||
@@ -207,9 +191,6 @@ module VX_issue #(
|
|||||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.rs1_data);
|
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=CSR, tmask=%b, rd=%0d, addr=%0h, rs1_data=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.PC, csr_req_if.tmask, csr_req_if.rd, csr_req_if.csr_addr, csr_req_if.rs1_data);
|
||||||
end
|
end
|
||||||
if (mul_req_if.valid && mul_req_if.ready) begin
|
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=MUL, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.PC, mul_req_if.tmask, mul_req_if.rd, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
|
||||||
end
|
|
||||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||||
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
$display("%t: core%0d-issue: wid=%0d, PC=%0h, ex=FPU, tmask=%b, rd=%0d, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.PC, fpu_req_if.tmask, fpu_req_if.rd, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -1,26 +1,35 @@
|
|||||||
`include "VX_define.vh"
|
`include "VX_define.vh"
|
||||||
|
|
||||||
module VX_mul_unit #(
|
module VX_muldiv (
|
||||||
parameter CORE_ID = 0
|
|
||||||
) (
|
|
||||||
input wire clk,
|
input wire clk,
|
||||||
input wire reset,
|
input wire reset,
|
||||||
|
|
||||||
// Inputs
|
// Inputs
|
||||||
VX_mul_req_if mul_req_if,
|
input wire [`MUL_BITS-1:0] alu_op,
|
||||||
|
input wire [`NW_BITS-1:0] wid_in,
|
||||||
|
input wire [`NUM_THREADS-1:0] tmask_in,
|
||||||
|
input wire [31:0] PC_in,
|
||||||
|
input wire [`NR_BITS-1:0] rd_in,
|
||||||
|
input wire wb_in,
|
||||||
|
input wire [`NUM_THREADS-1:0][31:0] alu_in1,
|
||||||
|
input wire [`NUM_THREADS-1:0][31:0] alu_in2,
|
||||||
|
|
||||||
// Outputs
|
// Outputs
|
||||||
VX_commit_if mul_commit_if
|
output wire [`NW_BITS-1:0] wid_out,
|
||||||
|
output wire [`NUM_THREADS-1:0] tmask_out,
|
||||||
|
output wire [31:0] PC_out,
|
||||||
|
output wire [`NR_BITS-1:0] rd_out,
|
||||||
|
output wire wb_out,
|
||||||
|
output wire [`NUM_THREADS-1:0][31:0] data_out,
|
||||||
|
|
||||||
|
// handshake
|
||||||
|
input wire valid_in,
|
||||||
|
output wire ready_in,
|
||||||
|
output wire valid_out,
|
||||||
|
input wire ready_out
|
||||||
);
|
);
|
||||||
|
|
||||||
wire [`MUL_BITS-1:0] alu_op = mul_req_if.op_type;
|
wire is_div_op = `IS_DIV_OP(alu_op);
|
||||||
wire is_div_op = `IS_DIV_OP(alu_op);
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = mul_req_if.rs1_data;
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = mul_req_if.rs2_data;
|
|
||||||
|
|
||||||
wire ready_out;
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] mul_result;
|
wire [`NUM_THREADS-1:0][31:0] mul_result;
|
||||||
wire [`NW_BITS-1:0] mul_wid_out;
|
wire [`NW_BITS-1:0] mul_wid_out;
|
||||||
@@ -29,9 +38,11 @@ module VX_mul_unit #(
|
|||||||
wire [`NR_BITS-1:0] mul_rd_out;
|
wire [`NR_BITS-1:0] mul_rd_out;
|
||||||
wire mul_wb_out;
|
wire mul_wb_out;
|
||||||
|
|
||||||
|
wire stall_out;
|
||||||
|
|
||||||
wire mul_valid_out;
|
wire mul_valid_out;
|
||||||
wire mul_valid_in = mul_req_if.valid && !is_div_op;
|
wire mul_valid_in = valid_in && !is_div_op;
|
||||||
wire mul_ready_in = ready_out || ~mul_valid_out;
|
wire mul_ready_in = ~stall_out || ~mul_valid_out;
|
||||||
|
|
||||||
wire is_mulh_in = (alu_op != `MUL_MUL);
|
wire is_mulh_in = (alu_op != `MUL_MUL);
|
||||||
wire is_mulh_out;
|
wire is_mulh_out;
|
||||||
@@ -68,8 +79,8 @@ module VX_mul_unit #(
|
|||||||
.clk(clk),
|
.clk(clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (mul_ready_in),
|
.enable (mul_ready_in),
|
||||||
.data_in ({mul_valid_in, mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb, is_mulh_in}),
|
.data_in ({mul_valid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, is_mulh_in}),
|
||||||
.data_out ({mul_valid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out})
|
.data_out ({mul_valid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out})
|
||||||
);
|
);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -83,8 +94,8 @@ module VX_mul_unit #(
|
|||||||
|
|
||||||
wire is_rem_op_in = (alu_op == `MUL_REM) || (alu_op == `MUL_REMU);
|
wire is_rem_op_in = (alu_op == `MUL_REM) || (alu_op == `MUL_REMU);
|
||||||
wire is_signed_div = (alu_op == `MUL_DIV) || (alu_op == `MUL_REM);
|
wire is_signed_div = (alu_op == `MUL_DIV) || (alu_op == `MUL_REM);
|
||||||
wire div_valid_in = mul_req_if.valid && is_div_op;
|
wire div_valid_in = valid_in && is_div_op;
|
||||||
wire div_ready_out = ready_out && ~mul_valid_out; // arbitration prioritizes MUL
|
wire div_ready_out = ~stall_out && ~mul_valid_out; // arbitration prioritizes MUL
|
||||||
wire div_ready_in;
|
wire div_ready_in;
|
||||||
wire div_valid_out;
|
wire div_valid_out;
|
||||||
wire is_rem_op_out;
|
wire is_rem_op_out;
|
||||||
@@ -102,7 +113,7 @@ module VX_mul_unit #(
|
|||||||
.valid_in (div_valid_in),
|
.valid_in (div_valid_in),
|
||||||
.ready_in (div_ready_in),
|
.ready_in (div_ready_in),
|
||||||
.signed_mode(is_signed_div),
|
.signed_mode(is_signed_div),
|
||||||
.tag_in ({mul_req_if.wid, mul_req_if.tmask, mul_req_if.PC, mul_req_if.rd, mul_req_if.wb, is_rem_op_in}),
|
.tag_in ({wid_in, tmask_in, PC_in, rd_in, wb_in, is_rem_op_in}),
|
||||||
.numer (alu_in1),
|
.numer (alu_in1),
|
||||||
.denom (alu_in2),
|
.denom (alu_in2),
|
||||||
.quotient (div_result_tmp),
|
.quotient (div_result_tmp),
|
||||||
@@ -116,9 +127,6 @@ module VX_mul_unit #(
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
wire stall_out = ~mul_commit_if.ready && mul_commit_if.valid;
|
|
||||||
assign ready_out = ~stall_out;
|
|
||||||
|
|
||||||
wire rsp_valid = mul_valid_out || div_valid_out;
|
wire rsp_valid = mul_valid_out || div_valid_out;
|
||||||
wire [`NW_BITS-1:0] rsp_wid = mul_valid_out ? mul_wid_out : div_wid_out;
|
wire [`NW_BITS-1:0] rsp_wid = mul_valid_out ? mul_wid_out : div_wid_out;
|
||||||
wire [`NUM_THREADS-1:0] rsp_tmask = mul_valid_out ? mul_tmask_out : div_tmask_out;
|
wire [`NUM_THREADS-1:0] rsp_tmask = mul_valid_out ? mul_tmask_out : div_tmask_out;
|
||||||
@@ -127,20 +135,20 @@ module VX_mul_unit #(
|
|||||||
wire rsp_wb = mul_valid_out ? mul_wb_out : div_wb_out;
|
wire rsp_wb = mul_valid_out ? mul_wb_out : div_wb_out;
|
||||||
wire [`NUM_THREADS-1:0][31:0] rsp_data = mul_valid_out ? mul_result : div_result;
|
wire [`NUM_THREADS-1:0][31:0] rsp_data = mul_valid_out ? mul_result : div_result;
|
||||||
|
|
||||||
|
assign stall_out = ~ready_out && valid_out;
|
||||||
|
|
||||||
VX_pipe_register #(
|
VX_pipe_register #(
|
||||||
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
.DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)),
|
||||||
.RESETW (1)
|
.RESETW (1)
|
||||||
) pipe_reg (
|
) pipe_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.enable (!stall_out),
|
.enable (~stall_out),
|
||||||
.data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}),
|
.data_in ({rsp_valid, rsp_wid, rsp_tmask, rsp_PC, rsp_rd, rsp_wb, rsp_data}),
|
||||||
.data_out ({mul_commit_if.valid, mul_commit_if.wid, mul_commit_if.tmask, mul_commit_if.PC, mul_commit_if.rd, mul_commit_if.wb, mul_commit_if.data})
|
.data_out ({valid_out, wid_out, tmask_out, PC_out, rd_out, wb_out, data_out})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign mul_commit_if.eop = 1'b1;
|
|
||||||
|
|
||||||
// can accept new request?
|
// can accept new request?
|
||||||
assign mul_req_if.ready = is_div_op ? div_ready_in : mul_ready_in;
|
assign ready_in = is_div_op ? div_ready_in : mul_ready_in;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -150,8 +150,7 @@ module VX_pipeline #(
|
|||||||
VX_ifetch_rsp_if ifetch_rsp_if();
|
VX_ifetch_rsp_if ifetch_rsp_if();
|
||||||
VX_alu_req_if alu_req_if();
|
VX_alu_req_if alu_req_if();
|
||||||
VX_lsu_req_if lsu_req_if();
|
VX_lsu_req_if lsu_req_if();
|
||||||
VX_csr_req_if csr_req_if();
|
VX_csr_req_if csr_req_if();
|
||||||
VX_mul_req_if mul_req_if();
|
|
||||||
VX_fpu_req_if fpu_req_if();
|
VX_fpu_req_if fpu_req_if();
|
||||||
VX_gpu_req_if gpu_req_if();
|
VX_gpu_req_if gpu_req_if();
|
||||||
VX_writeback_if writeback_if();
|
VX_writeback_if writeback_if();
|
||||||
@@ -160,8 +159,7 @@ module VX_pipeline #(
|
|||||||
VX_commit_if alu_commit_if();
|
VX_commit_if alu_commit_if();
|
||||||
VX_commit_if ld_commit_if();
|
VX_commit_if ld_commit_if();
|
||||||
VX_commit_if st_commit_if();
|
VX_commit_if st_commit_if();
|
||||||
VX_commit_if csr_commit_if();
|
VX_commit_if csr_commit_if();
|
||||||
VX_commit_if mul_commit_if();
|
|
||||||
VX_commit_if fpu_commit_if();
|
VX_commit_if fpu_commit_if();
|
||||||
VX_commit_if gpu_commit_if();
|
VX_commit_if gpu_commit_if();
|
||||||
|
|
||||||
@@ -214,7 +212,6 @@ module VX_pipeline #(
|
|||||||
.alu_req_if (alu_req_if),
|
.alu_req_if (alu_req_if),
|
||||||
.lsu_req_if (lsu_req_if),
|
.lsu_req_if (lsu_req_if),
|
||||||
.csr_req_if (csr_req_if),
|
.csr_req_if (csr_req_if),
|
||||||
.mul_req_if (mul_req_if),
|
|
||||||
.fpu_req_if (fpu_req_if),
|
.fpu_req_if (fpu_req_if),
|
||||||
.gpu_req_if (gpu_req_if)
|
.gpu_req_if (gpu_req_if)
|
||||||
);
|
);
|
||||||
@@ -243,7 +240,6 @@ module VX_pipeline #(
|
|||||||
.alu_req_if (alu_req_if),
|
.alu_req_if (alu_req_if),
|
||||||
.lsu_req_if (lsu_req_if),
|
.lsu_req_if (lsu_req_if),
|
||||||
.csr_req_if (csr_req_if),
|
.csr_req_if (csr_req_if),
|
||||||
.mul_req_if (mul_req_if),
|
|
||||||
.fpu_req_if (fpu_req_if),
|
.fpu_req_if (fpu_req_if),
|
||||||
.gpu_req_if (gpu_req_if),
|
.gpu_req_if (gpu_req_if),
|
||||||
|
|
||||||
@@ -253,7 +249,6 @@ module VX_pipeline #(
|
|||||||
.ld_commit_if (ld_commit_if),
|
.ld_commit_if (ld_commit_if),
|
||||||
.st_commit_if (st_commit_if),
|
.st_commit_if (st_commit_if),
|
||||||
.csr_commit_if (csr_commit_if),
|
.csr_commit_if (csr_commit_if),
|
||||||
.mul_commit_if (mul_commit_if),
|
|
||||||
.fpu_commit_if (fpu_commit_if),
|
.fpu_commit_if (fpu_commit_if),
|
||||||
.gpu_commit_if (gpu_commit_if),
|
.gpu_commit_if (gpu_commit_if),
|
||||||
|
|
||||||
@@ -271,7 +266,6 @@ module VX_pipeline #(
|
|||||||
.ld_commit_if (ld_commit_if),
|
.ld_commit_if (ld_commit_if),
|
||||||
.st_commit_if (st_commit_if),
|
.st_commit_if (st_commit_if),
|
||||||
.csr_commit_if (csr_commit_if),
|
.csr_commit_if (csr_commit_if),
|
||||||
.mul_commit_if (mul_commit_if),
|
|
||||||
.fpu_commit_if (fpu_commit_if),
|
.fpu_commit_if (fpu_commit_if),
|
||||||
.gpu_commit_if (gpu_commit_if),
|
.gpu_commit_if (gpu_commit_if),
|
||||||
|
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ task print_ex_type (
|
|||||||
`EX_ALU: $write("ALU");
|
`EX_ALU: $write("ALU");
|
||||||
`EX_LSU: $write("LSU");
|
`EX_LSU: $write("LSU");
|
||||||
`EX_CSR: $write("CSR");
|
`EX_CSR: $write("CSR");
|
||||||
`EX_MUL: $write("MUL");
|
|
||||||
`EX_FPU: $write("FPU");
|
`EX_FPU: $write("FPU");
|
||||||
`EX_GPU: $write("GPU");
|
`EX_GPU: $write("GPU");
|
||||||
default: $write("NOP");
|
default: $write("NOP");
|
||||||
@@ -41,6 +40,18 @@ task print_ex_op (
|
|||||||
`BR_DRET: $write("DRET");
|
`BR_DRET: $write("DRET");
|
||||||
default: $write("?");
|
default: $write("?");
|
||||||
endcase
|
endcase
|
||||||
|
end else if (`IS_MUL_MOD(op_mod)) begin
|
||||||
|
case (`MUL_BITS'(op_type))
|
||||||
|
`MUL_MUL: $write("MUL");
|
||||||
|
`MUL_MULH: $write("MULH");
|
||||||
|
`MUL_MULHSU:$write("MULHSU");
|
||||||
|
`MUL_MULHU: $write("MULHU");
|
||||||
|
`MUL_DIV: $write("DIV");
|
||||||
|
`MUL_DIVU: $write("DIVU");
|
||||||
|
`MUL_REM: $write("REM");
|
||||||
|
`MUL_REMU: $write("REMU");
|
||||||
|
default: $write("?");
|
||||||
|
endcase
|
||||||
end else begin
|
end else begin
|
||||||
case (`ALU_BITS'(op_type))
|
case (`ALU_BITS'(op_type))
|
||||||
`ALU_ADD: $write("ADD");
|
`ALU_ADD: $write("ADD");
|
||||||
@@ -77,19 +88,6 @@ task print_ex_op (
|
|||||||
default: $write("?");
|
default: $write("?");
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
`EX_MUL: begin
|
|
||||||
case (`MUL_BITS'(op_type))
|
|
||||||
`MUL_MUL: $write("MUL");
|
|
||||||
`MUL_MULH: $write("MULH");
|
|
||||||
`MUL_MULHSU:$write("MULHSU");
|
|
||||||
`MUL_MULHU: $write("MULHU");
|
|
||||||
`MUL_DIV: $write("DIV");
|
|
||||||
`MUL_DIVU: $write("DIVU");
|
|
||||||
`MUL_REM: $write("REM");
|
|
||||||
`MUL_REMU: $write("REMU");
|
|
||||||
default: $write("?");
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
`EX_FPU: begin
|
`EX_FPU: begin
|
||||||
case (`FPU_BITS'(op_type))
|
case (`FPU_BITS'(op_type))
|
||||||
`FPU_ADD: $write("ADD");
|
`FPU_ADD: $write("ADD");
|
||||||
|
|||||||
@@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
`define SCOPE_ASSIGN(d,s) assign scope_``d = s
|
`define SCOPE_ASSIGN(d,s) assign scope_``d = s
|
||||||
|
|
||||||
`define SCOPE_SIZE 4096
|
`define SCOPE_SIZE 1024
|
||||||
|
|
||||||
`else
|
`else
|
||||||
|
|
||||||
|
|||||||
@@ -10,17 +10,15 @@ module VX_writeback #(
|
|||||||
VX_commit_if alu_commit_if,
|
VX_commit_if alu_commit_if,
|
||||||
VX_commit_if ld_commit_if,
|
VX_commit_if ld_commit_if,
|
||||||
VX_commit_if csr_commit_if,
|
VX_commit_if csr_commit_if,
|
||||||
VX_commit_if mul_commit_if,
|
|
||||||
VX_commit_if fpu_commit_if,
|
VX_commit_if fpu_commit_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_writeback_if writeback_if
|
VX_writeback_if writeback_if
|
||||||
);
|
);
|
||||||
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
|
wire ld_valid = ld_commit_if.valid && ld_commit_if.wb;
|
||||||
wire ld_valid = ld_commit_if.valid && ld_commit_if.wb;
|
wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;
|
||||||
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
|
wire csr_valid = csr_commit_if.valid && csr_commit_if.wb;
|
||||||
wire mul_valid = mul_commit_if.valid && mul_commit_if.wb;
|
wire alu_valid = alu_commit_if.valid && alu_commit_if.wb;
|
||||||
/*wire fpu_valid = fpu_commit_if.valid && fpu_commit_if.wb;*/
|
|
||||||
|
|
||||||
wire wb_valid;
|
wire wb_valid;
|
||||||
wire [`NW_BITS-1:0] wb_wid;
|
wire [`NW_BITS-1:0] wb_wid;
|
||||||
@@ -30,47 +28,40 @@ module VX_writeback #(
|
|||||||
wire [`NUM_THREADS-1:0][31:0] wb_data;
|
wire [`NUM_THREADS-1:0][31:0] wb_data;
|
||||||
wire wb_eop;
|
wire wb_eop;
|
||||||
|
|
||||||
assign wb_valid = alu_valid ? alu_commit_if.valid :
|
assign wb_valid = ld_valid |
|
||||||
ld_valid ? ld_commit_if.valid :
|
fpu_valid |
|
||||||
csr_valid ? csr_commit_if.valid :
|
csr_valid |
|
||||||
mul_valid ? mul_commit_if.valid :
|
alu_valid;
|
||||||
/*fpu_valid ?*/ fpu_commit_if.valid;
|
|
||||||
|
|
||||||
assign wb_wid = alu_valid ? alu_commit_if.wid :
|
assign wb_wid = ld_valid ? ld_commit_if.wid :
|
||||||
ld_valid ? ld_commit_if.wid :
|
fpu_valid ? fpu_commit_if.wid :
|
||||||
csr_valid ? csr_commit_if.wid :
|
csr_valid ? csr_commit_if.wid :
|
||||||
mul_valid ? mul_commit_if.wid :
|
/*alu_valid ?*/ alu_commit_if.wid;
|
||||||
/*fpu_valid ?*/ fpu_commit_if.wid;
|
|
||||||
|
|
||||||
assign wb_PC = alu_valid ? alu_commit_if.PC :
|
assign wb_PC = ld_valid ? ld_commit_if.PC :
|
||||||
ld_valid ? ld_commit_if.PC :
|
fpu_valid ? fpu_commit_if.PC :
|
||||||
csr_valid ? csr_commit_if.PC :
|
csr_valid ? csr_commit_if.PC :
|
||||||
mul_valid ? mul_commit_if.PC :
|
/*alu_valid ?*/ alu_commit_if.PC;
|
||||||
/*fpu_valid ?*/ fpu_commit_if.PC;
|
|
||||||
|
|
||||||
assign wb_tmask = alu_valid ? alu_commit_if.tmask :
|
assign wb_tmask = ld_valid ? ld_commit_if.tmask :
|
||||||
ld_valid ? ld_commit_if.tmask :
|
fpu_valid ? fpu_commit_if.tmask :
|
||||||
csr_valid ? csr_commit_if.tmask :
|
csr_valid ? csr_commit_if.tmask :
|
||||||
mul_valid ? mul_commit_if.tmask :
|
/*alu_valid ?*/ alu_commit_if.tmask;
|
||||||
/*fpu_valid ?*/ fpu_commit_if.tmask;
|
|
||||||
|
|
||||||
assign wb_rd = alu_valid ? alu_commit_if.rd :
|
assign wb_rd = ld_valid ? ld_commit_if.rd :
|
||||||
ld_valid ? ld_commit_if.rd :
|
fpu_valid ? fpu_commit_if.rd :
|
||||||
csr_valid ? csr_commit_if.rd :
|
csr_valid ? csr_commit_if.rd :
|
||||||
mul_valid ? mul_commit_if.rd :
|
/*alu_valid ?*/ alu_commit_if.rd;
|
||||||
/*fpu_valid ?*/ fpu_commit_if.rd;
|
|
||||||
|
|
||||||
assign wb_data = alu_valid ? alu_commit_if.data :
|
assign wb_data = ld_valid ? ld_commit_if.data :
|
||||||
ld_valid ? ld_commit_if.data :
|
fpu_valid ? fpu_commit_if.data :
|
||||||
csr_valid ? csr_commit_if.data :
|
csr_valid ? csr_commit_if.data :
|
||||||
mul_valid ? mul_commit_if.data :
|
/*alu_valid ?*/ alu_commit_if.data;
|
||||||
/*fpu_valid ?*/ fpu_commit_if.data;
|
|
||||||
|
|
||||||
assign wb_eop = alu_valid ? alu_commit_if.eop :
|
assign wb_eop = ld_valid ? ld_commit_if.eop :
|
||||||
ld_valid ? ld_commit_if.eop :
|
fpu_valid ? fpu_commit_if.eop :
|
||||||
csr_valid ? csr_commit_if.eop :
|
csr_valid ? csr_commit_if.eop :
|
||||||
mul_valid ? mul_commit_if.eop :
|
/*alu_valid ?*/ alu_commit_if.eop;
|
||||||
/*fpu_valid ?*/ fpu_commit_if.eop;
|
|
||||||
|
|
||||||
wire stall = ~writeback_if.ready && writeback_if.valid;
|
wire stall = ~writeback_if.ready && writeback_if.valid;
|
||||||
|
|
||||||
@@ -85,11 +76,10 @@ module VX_writeback #(
|
|||||||
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data, writeback_if.eop})
|
.data_out ({writeback_if.valid, writeback_if.wid, writeback_if.PC, writeback_if.tmask, writeback_if.rd, writeback_if.data, writeback_if.eop})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign alu_commit_if.ready = !stall;
|
assign ld_commit_if.ready = !stall;
|
||||||
assign ld_commit_if.ready = !stall && !alu_valid;
|
assign fpu_commit_if.ready = !stall && !ld_valid;
|
||||||
assign csr_commit_if.ready = !stall && !alu_valid && !ld_valid;
|
assign csr_commit_if.ready = !stall && !ld_valid && !fpu_valid;
|
||||||
assign mul_commit_if.ready = !stall && !alu_valid && !ld_valid && !csr_valid;
|
assign alu_commit_if.ready = !stall && !ld_valid && !fpu_valid && !csr_valid;
|
||||||
assign fpu_commit_if.ready = !stall && !alu_valid && !ld_valid && !csr_valid && !mul_valid;
|
|
||||||
|
|
||||||
// special workaround to get RISC-V tests Pass/Fail status
|
// special workaround to get RISC-V tests Pass/Fail status
|
||||||
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
||||||
|
|||||||
2
hw/rtl/cache/VX_bank.v
vendored
2
hw/rtl/cache/VX_bank.v
vendored
@@ -301,7 +301,7 @@ module VX_bank #(
|
|||||||
// read/Fill
|
// read/Fill
|
||||||
.lookup (valid_st0 && !is_fill_st0),
|
.lookup (valid_st0 && !is_fill_st0),
|
||||||
.addr (addr_st0),
|
.addr (addr_st0),
|
||||||
.fill (valid_st0 && is_fill_st0),
|
.fill (valid_st0 && is_fill_st0 && !crsq_in_stall),
|
||||||
.is_flush (is_flush_st0),
|
.is_flush (is_flush_st0),
|
||||||
.tag_match (tag_match_st0)
|
.tag_match (tag_match_st0)
|
||||||
);
|
);
|
||||||
|
|||||||
2
hw/rtl/cache/VX_cache.v
vendored
2
hw/rtl/cache/VX_cache.v
vendored
@@ -20,7 +20,7 @@ module VX_cache #(
|
|||||||
// Core Request Queue Size
|
// Core Request Queue Size
|
||||||
parameter CREQ_SIZE = 4,
|
parameter CREQ_SIZE = 4,
|
||||||
// Miss Reserv Queue Knob
|
// Miss Reserv Queue Knob
|
||||||
parameter MSHR_SIZE = 16,
|
parameter MSHR_SIZE = 8,
|
||||||
// DRAM Response Queue Size
|
// DRAM Response Queue Size
|
||||||
parameter DRSQ_SIZE = 4,
|
parameter DRSQ_SIZE = 4,
|
||||||
// DRAM Request Queue Size
|
// DRAM Request Queue Size
|
||||||
|
|||||||
2
hw/rtl/cache/VX_shared_mem.v
vendored
2
hw/rtl/cache/VX_shared_mem.v
vendored
@@ -164,7 +164,7 @@ module VX_shared_mem #(
|
|||||||
) data (
|
) data (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.addr (per_bank_core_req_addr[i]),
|
.addr (per_bank_core_req_addr[i]),
|
||||||
.wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i] && crsq_in_ready),
|
.wren (per_bank_core_req_valid[i] && per_bank_core_req_rw[i]),
|
||||||
.byteen (per_bank_core_req_byteen[i]),
|
.byteen (per_bank_core_req_byteen[i]),
|
||||||
.rden (1'b1),
|
.rden (1'b1),
|
||||||
.din (per_bank_core_req_data[i]),
|
.din (per_bank_core_req_data[i]),
|
||||||
|
|||||||
@@ -10,8 +10,8 @@ interface VX_alu_req_if ();
|
|||||||
wire [`NUM_THREADS-1:0] tmask;
|
wire [`NUM_THREADS-1:0] tmask;
|
||||||
wire [31:0] PC;
|
wire [31:0] PC;
|
||||||
wire [31:0] next_PC;
|
wire [31:0] next_PC;
|
||||||
wire [`ALU_BR_BITS-1:0] op_type;
|
wire [`ALU_BITS-1:0] op_type;
|
||||||
wire is_br_op;
|
wire [`MOD_BITS-1:0] op_mod;
|
||||||
wire rs1_is_PC;
|
wire rs1_is_PC;
|
||||||
wire rs2_is_imm;
|
wire rs2_is_imm;
|
||||||
wire [31:0] imm;
|
wire [31:0] imm;
|
||||||
|
|||||||
@@ -1,25 +0,0 @@
|
|||||||
`ifndef VX_MUL_REQ_IF
|
|
||||||
`define VX_MUL_REQ_IF
|
|
||||||
|
|
||||||
`include "VX_define.vh"
|
|
||||||
|
|
||||||
`ifndef EXT_M_ENABLE
|
|
||||||
`IGNORE_WARNINGS_BEGIN
|
|
||||||
`endif
|
|
||||||
|
|
||||||
interface VX_mul_req_if ();
|
|
||||||
|
|
||||||
wire valid;
|
|
||||||
wire [`NW_BITS-1:0] wid;
|
|
||||||
wire [`NUM_THREADS-1:0] tmask;
|
|
||||||
wire [31:0] PC;
|
|
||||||
wire [`MUL_BITS-1:0] op_type;
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
|
||||||
wire [`NR_BITS-1:0] rd;
|
|
||||||
wire wb;
|
|
||||||
wire ready;
|
|
||||||
|
|
||||||
endinterface
|
|
||||||
|
|
||||||
`endif
|
|
||||||
@@ -10,9 +10,6 @@ interface VX_perf_pipeline_if ();
|
|||||||
wire [63:0] csr_stalls;
|
wire [63:0] csr_stalls;
|
||||||
wire [63:0] alu_stalls;
|
wire [63:0] alu_stalls;
|
||||||
wire [63:0] gpu_stalls;
|
wire [63:0] gpu_stalls;
|
||||||
`ifdef EXT_M_ENABLE
|
|
||||||
wire [63:0] mul_stalls;
|
|
||||||
`endif
|
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
wire [63:0] fpu_stalls;
|
wire [63:0] fpu_stalls;
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -105,33 +105,16 @@ module VX_fifo_queue #(
|
|||||||
|
|
||||||
if (0 == BUFFERED) begin
|
if (0 == BUFFERED) begin
|
||||||
|
|
||||||
if (FASTRAM) begin
|
reg [1:0][DATAW-1:0] shift_reg;
|
||||||
|
|
||||||
`USE_FAST_BRAM reg [DATAW-1:0] shift_reg [SIZE];
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (push) begin
|
if (push) begin
|
||||||
shift_reg[1] <= shift_reg[0];
|
shift_reg[1] <= shift_reg[0];
|
||||||
shift_reg[0] <= data_in;
|
shift_reg[0] <= data_in;
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
assign data_out = shift_reg[~used_r[0]];
|
|
||||||
|
|
||||||
end else begin
|
|
||||||
|
|
||||||
reg [DATAW-1:0] shift_reg [SIZE];
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (push) begin
|
|
||||||
shift_reg[1] <= shift_reg[0];
|
|
||||||
shift_reg[0] <= data_in;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
assign data_out = shift_reg[~used_r[0]];
|
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
assign data_out = shift_reg[!used_r[0]];
|
||||||
|
|
||||||
end else begin
|
end else begin
|
||||||
|
|
||||||
@@ -142,7 +125,7 @@ module VX_fifo_queue #(
|
|||||||
if (push) begin
|
if (push) begin
|
||||||
buffer <= data_in;
|
buffer <= data_in;
|
||||||
end
|
end
|
||||||
if (push && (empty_r || ((used_r == ADDRW'(1)) && pop))) begin
|
if (push && (empty_r || (used_r && pop))) begin
|
||||||
data_out_r <= data_in;
|
data_out_r <= data_in;
|
||||||
end else if (pop) begin
|
end else if (pop) begin
|
||||||
data_out_r <= buffer;
|
data_out_r <= buffer;
|
||||||
|
|||||||
@@ -59,6 +59,7 @@ module VX_skid_buffer #(
|
|||||||
reg use_buffer;
|
reg use_buffer;
|
||||||
|
|
||||||
wire push = valid_in && ready_in;
|
wire push = valid_in && ready_in;
|
||||||
|
wire pop = !valid_out_r || ready_out;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
@@ -68,11 +69,11 @@ module VX_skid_buffer #(
|
|||||||
if (ready_out) begin
|
if (ready_out) begin
|
||||||
use_buffer <= 0;
|
use_buffer <= 0;
|
||||||
end
|
end
|
||||||
if (push && valid_out_r && !ready_out) begin
|
if (push && !pop) begin
|
||||||
assert(!use_buffer);
|
assert(!use_buffer);
|
||||||
use_buffer <= 1;
|
use_buffer <= 1;
|
||||||
end
|
end
|
||||||
if (!valid_out_r || ready_out) begin
|
if (pop) begin
|
||||||
valid_out_r <= valid_in || use_buffer;
|
valid_out_r <= valid_in || use_buffer;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -82,7 +83,7 @@ module VX_skid_buffer #(
|
|||||||
if (push) begin
|
if (push) begin
|
||||||
buffer <= data_in;
|
buffer <= data_in;
|
||||||
end
|
end
|
||||||
if (!valid_out_r || ready_out) begin
|
if (pop) begin
|
||||||
data_out_r <= use_buffer ? buffer : data_in;
|
data_out_r <= use_buffer ? buffer : data_in;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -118,8 +119,8 @@ module VX_skid_buffer #(
|
|||||||
);
|
);
|
||||||
|
|
||||||
assign ready_in = !q_full;
|
assign ready_in = !q_full;
|
||||||
assign valid_out = !q_empty;
|
assign valid_out = !q_empty;
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -27,6 +27,9 @@ SINGLECORE += -DNUM_CLUSTERS=1 -DNUM_CORES=1 -DL2_ENABLE=0
|
|||||||
#MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
#MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||||
MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
MULTICORE ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||||
|
|
||||||
|
SINGLECORE += $(CONFIGS)
|
||||||
|
MULTICORE += $(CONFIGS)
|
||||||
|
|
||||||
TOP = Vortex
|
TOP = Vortex
|
||||||
|
|
||||||
RTL_DIR=../rtl
|
RTL_DIR=../rtl
|
||||||
@@ -49,7 +52,7 @@ VL_FLAGS += --cc Vortex.v --top-module $(TOP)
|
|||||||
# Use FPNEW PFU core
|
# Use FPNEW PFU core
|
||||||
VL_FLAGS += -DFPU_FPNEW
|
VL_FLAGS += -DFPU_FPNEW
|
||||||
|
|
||||||
DBG_FLAGS += -DVCD_OUTPUT $(DBG_FLAGS)
|
DBG_FLAGS += -DVCD_OUTPUT
|
||||||
|
|
||||||
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
THREADS ?= $(shell python3 -c 'import multiprocessing as mp; print(max(1, mp.cpu_count() // 2))')
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ CP = $(RISCV_TOOLCHAIN_PATH)/bin/riscv32-unknown-elf-objcopy
|
|||||||
CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
CFLAGS += -march=rv32imf -mabi=ilp32f -O3 -Wstack-usage=1024 -ffreestanding -nostartfiles -fdata-sections -ffunction-sections
|
||||||
CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
CFLAGS += -I$(VORTEX_RT_PATH)/include -I$(VORTEX_RT_PATH)/../hw
|
||||||
|
|
||||||
LDFLAGS += -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
LDFLAGS += -lm -Wl,-Bstatic,-T,$(VORTEX_RT_PATH)/linker/vx_link.ld -Wl,--gc-sections $(VORTEX_RT_PATH)/libvortexrt.a
|
||||||
|
|
||||||
PROJECT = vx_nl_main
|
PROJECT = vx_nl_main
|
||||||
|
|
||||||
|
|||||||
@@ -1,16 +1,22 @@
|
|||||||
#include <vx_intrinsics.h>
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
#include <vx_print.h>
|
#include <vx_print.h>
|
||||||
|
|
||||||
int main()
|
const int Num = 9;
|
||||||
{
|
const float fNum = 9.0f;
|
||||||
// Main is called with all threads active of warp 0
|
|
||||||
vx_tmc(1);
|
|
||||||
|
|
||||||
vx_prints("Newlib Main ");
|
int fibonacci(int n) {
|
||||||
vx_printx(456);
|
if (n <= 1)
|
||||||
vx_prints(" \n");
|
return n;
|
||||||
|
return fibonacci(n-1) + fibonacci(n-2);
|
||||||
|
}
|
||||||
|
|
||||||
|
int main() {
|
||||||
|
int fib = fibonacci(Num);
|
||||||
|
float isq = 1.0f / sqrt(fNum);
|
||||||
|
vx_printf("fibonacci(%d) = %d\n", Num, fib);
|
||||||
|
vx_printf("invAqrt(%f) = %f\n", fNum, isq);
|
||||||
vx_prints("Passed!\n");
|
vx_prints("Passed!\n");
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user