diff --git a/driver/opae/vlsim/Makefile b/driver/opae/vlsim/Makefile index 58d2fcd4..fe3ca8bb 100644 --- a/driver/opae/vlsim/Makefile +++ b/driver/opae/vlsim/Makefile @@ -75,6 +75,10 @@ endif VL_FLAGS += -DNOPAE CFLAGS += -DNOPAE +# ALU backend +VL_FLAGS += -DIMUL_DPI +VL_FLAGS += -DIDIV_DPI + # FPU backend FPU_CORE ?= FPU_DPI VL_FLAGS += -D$(FPU_CORE) diff --git a/driver/rtlsim/Makefile b/driver/rtlsim/Makefile index 76e7185d..65eb1ac0 100644 --- a/driver/rtlsim/Makefile +++ b/driver/rtlsim/Makefile @@ -64,6 +64,10 @@ ifdef PERF CFLAGS += -DPERF_ENABLE endif +# ALU backend +VL_FLAGS += -DIMUL_DPI +VL_FLAGS += -DIDIV_DPI + # FPU backend FPU_CORE ?= FPU_DPI VL_FLAGS += -D$(FPU_CORE) diff --git a/hw/dpi/util_dpi.cpp b/hw/dpi/util_dpi.cpp index e9baa90c..9469bc6e 100644 --- a/hw/dpi/util_dpi.cpp +++ b/hw/dpi/util_dpi.cpp @@ -9,6 +9,9 @@ #include "VX_config.h" extern "C" { + void dpi_imul(int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth); + void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder); + int dpi_register(); void dpi_assert(int inst, bool cond, int delay); } @@ -81,4 +84,53 @@ void dpi_assert(int inst, bool cond, int delay) { printf("delayed assertion at %s!\n", svGetNameFromScope(svGetScope())); std::abort(); } +} + +void dpi_imul(int a, int b, bool is_signed_a, bool is_signed_b, int* resultl, int* resulth) { + uint64_t first = a; + uint64_t second = b; + + if (is_signed_a && (a & 0x80000000)) { + first |= 0xFFFFFFFF00000000; + } + + if (is_signed_b && (b & 0x80000000)) { + second |= 0xFFFFFFFF00000000; + } + + uint64_t result; + if (is_signed_a || is_signed_b) { + result = (int64_t)first * (int64_t)second; + } else { + result = first * second; + } + + *resultl = result & 0xFFFFFFFF; + *resulth = (result >> 32) & 0xFFFFFFFF; +} + +void dpi_idiv(int a, int b, bool is_signed, int* quotient, int* remainder) { + uint32_t dividen = a; + uint32_t divisor = b; + + if (is_signed) { + if (b == 0) { + *quotient = -1; + *remainder = dividen; + } else if (dividen == 0x80000000 && divisor == 0xffffffff) { + *remainder = 0; + *quotient = dividen; + } else { + *quotient = (int32_t)dividen / (int32_t)divisor; + *remainder = (int32_t)dividen % (int32_t)divisor; + } + } else { + if (b == 0) { + *quotient = -1; + *remainder = dividen; + } else { + *quotient = dividen / divisor; + *remainder = dividen % divisor; + } + } } \ No newline at end of file diff --git a/hw/dpi/util_dpi.vh b/hw/dpi/util_dpi.vh index 77294974..553641a7 100644 --- a/hw/dpi/util_dpi.vh +++ b/hw/dpi/util_dpi.vh @@ -1,6 +1,9 @@ `ifndef UTIL_DPI `define UTIL_DPI +import "DPI-C" context function void dpi_imul(input int a, input int b, input logic is_signed_a, input logic is_signed_b, output int resultl, output int resulth); +import "DPI-C" context function void dpi_idiv(input int a, input int b, input logic is_signed, output int quotient, output int remainder); + import "DPI-C" context function int dpi_register(); import "DPI-C" context function void dpi_assert(int inst, input logic cond, input int delay); diff --git a/hw/rtl/VX_muldiv.v b/hw/rtl/VX_muldiv.v index 48792ff8..36ba9d28 100644 --- a/hw/rtl/VX_muldiv.v +++ b/hw/rtl/VX_muldiv.v @@ -1,5 +1,9 @@ `include "VX_define.vh" +`ifndef SYNTHESIS +`include "util_dpi.vh" +`endif + module VX_muldiv ( input wire clk, input wire reset, @@ -43,13 +47,42 @@ module VX_muldiv ( wire mul_valid_out; wire mul_valid_in = valid_in && !is_div_op; wire mul_ready_in = ~stall_out || ~mul_valid_out; - - wire is_mulh_in = (alu_op != `MUL_MUL); + + wire is_mulh_in = (alu_op != `MUL_MUL); + wire is_signed_mul_a = (alu_op != `MUL_MULHU); + wire is_signed_mul_b = (alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU); + +`ifdef IMUL_DPI + + wire [`NUM_THREADS-1:0][31:0] mul_result_tmp; + + for (genvar i = 0; i < `NUM_THREADS; i++) begin + wire [31:0] mul_resultl, mul_resulth; + always @(*) begin + dpi_imul (alu_in1[i], alu_in2[i], is_signed_mul_a, is_signed_mul_b, mul_resultl, mul_resulth); + end + assign mul_result_tmp[i] = is_mulh_in ? mul_resulth : mul_resultl; + end + + VX_shift_register #( + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), + .DEPTH (`LATENCY_IMUL), + .RESETW (1) + ) mul_shift_reg ( + .clk(clk), + .reset (reset), + .enable (mul_ready_in), + .data_in ({mul_valid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, mul_result_tmp}), + .data_out ({mul_valid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, mul_result}) + ); + +`else + wire is_mulh_out; for (genvar i = 0; i < `NUM_THREADS; i++) begin - wire [32:0] mul_in1 = {(alu_op != `MUL_MULHU) & alu_in1[i][31], alu_in1[i]}; - wire [32:0] mul_in2 = {(alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU) & alu_in2[i][31], alu_in2[i]}; + wire [32:0] mul_in1 = {is_signed_mul_a & alu_in1[i][31], alu_in1[i]}; + wire [32:0] mul_in2 = {is_signed_mul_b & alu_in2[i][31], alu_in2[i]}; `IGNORE_WARNINGS_BEGIN wire [65:0] mul_result_tmp; `IGNORE_WARNINGS_END @@ -83,9 +116,11 @@ module VX_muldiv ( .data_out ({mul_valid_out, mul_wid_out, mul_tmask_out, mul_PC_out, mul_rd_out, mul_wb_out, is_mulh_out}) ); +`endif + /////////////////////////////////////////////////////////////////////////// - wire [`NUM_THREADS-1:0][31:0] div_result_tmp, rem_result_tmp; + wire [`NUM_THREADS-1:0][31:0] div_result; wire [`NW_BITS-1:0] div_wid_out; wire [`NUM_THREADS-1:0] div_tmask_out; wire [31:0] div_PC_out; @@ -98,6 +133,36 @@ module VX_muldiv ( wire div_ready_out = ~stall_out && ~mul_valid_out; // arbitration prioritizes MUL wire div_ready_in; wire div_valid_out; + +`ifdef IDIV_DPI + + wire [`NUM_THREADS-1:0][31:0] div_result_tmp; + + for (genvar i = 0; i < `NUM_THREADS; i++) begin + wire [31:0] div_quotient, div_remainder; + always @(*) begin + dpi_idiv (alu_in1[i], alu_in2[i], is_signed_div, div_quotient, div_remainder); + end + assign div_result_tmp[i] = is_rem_op_in ? div_remainder : div_quotient; + end + + VX_shift_register #( + .DATAW (1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + 1 + (`NUM_THREADS * 32)), + .DEPTH (`LATENCY_IMUL), + .RESETW (1) + ) div_shift_reg ( + .clk(clk), + .reset (reset), + .enable (div_ready_in), + .data_in ({div_valid_in, wid_in, tmask_in, PC_in, rd_in, wb_in, div_result_tmp}), + .data_out ({div_valid_out, div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, div_result}) + ); + + assign div_ready_in = div_ready_out || ~div_valid_out; + +`else + + wire [`NUM_THREADS-1:0][31:0] div_result_tmp, rem_result_tmp; wire is_rem_op_out; VX_serial_div #( @@ -123,7 +188,9 @@ module VX_muldiv ( .tag_out ({div_wid_out, div_tmask_out, div_PC_out, div_rd_out, div_wb_out, is_rem_op_out}) ); - wire [`NUM_THREADS-1:0][31:0] div_result = is_rem_op_out ? rem_result_tmp : div_result_tmp; + assign div_result = is_rem_op_out ? rem_result_tmp : div_result_tmp; + +`endif ///////////////////////////////////////////////////////////////////////////