From 2c40874cc59258ebed28a58dec02956e0f7bcbcb Mon Sep 17 00:00:00 2001 From: wgulian3 Date: Fri, 21 Feb 2020 20:50:14 -0500 Subject: [PATCH] Add multi-cycle compat module and use it in ALU --- rtl/VX_alu.v | 58 +++++++++++++--------- rtl/compat/VX_mult.v | 104 ++++++++++++++++++++++++++++++++++++++++ rtl/quartus/project.tcl | 1 + 3 files changed, 141 insertions(+), 22 deletions(-) create mode 100644 rtl/compat/VX_mult.v diff --git a/rtl/VX_alu.v b/rtl/VX_alu.v index bdafab4f..384da8cd 100644 --- a/rtl/VX_alu.v +++ b/rtl/VX_alu.v @@ -14,13 +14,17 @@ module VX_alu( output reg out_alu_stall ); - localparam div_pipeline_len = 10; + localparam div_pipeline_len = 10; + localparam mul_pipeline_len = 3; - wire[31:0] unsigned_div_result; + wire[31:0] unsigned_div_result; wire[31:0] unsigned_rem_result; wire[31:0] signed_div_result; wire[31:0] signed_rem_result; + wire[63:0] mul_data_a, mul_data_b; + wire[63:0] mul_result; + VX_divide #( .WIDTHN(32), .WIDTHD(32), @@ -53,6 +57,28 @@ module VX_alu( .remainder(signed_rem_result) ); + VX_mult #( + .WIDTHA(64), + .WIDTHB(64), + .WIDTHP(64), + .SPEED("HIGHEST"), + .PIPELINE(mul_pipeline_len) + ) multiplier ( + .clock(clk), + .aclr(1'b0), + .clken(1'b1), // TODO this could be disabled on inactive instructions + .dataa(mul_data_a), + .datab(mul_data_b), + .result(mul_result) + ); + + // MUL, MULH (signed*signed), MULHSU (signed*unsigned), MULHU (unsigned*unsigned) + wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1}; + wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2}; + assign mul_data_a = (in_alu_op == `MULHU) ? {32'b0, ALU_in1} : alu_in1_signed; + assign mul_data_b = (in_alu_op == `MULHU || in_alu_op == `MULHSU) ? {32'b0, ALU_in2} : alu_in2_signed; + + reg [15:0] curr_inst_delay; reg [15:0] inst_delay; reg inst_was_stalling; @@ -66,6 +92,10 @@ module VX_alu( `DIVU, `REM, `REMU: curr_inst_delay = div_pipeline_len; + `MUL, + `MULH, + `MULHSU, + `MULHU: curr_inst_delay = mul_pipeline_len; default: curr_inst_delay = 0; endcase // in_alu_op end @@ -95,8 +125,6 @@ module VX_alu( wire[31:0] ALU_in1; wire[31:0] ALU_in2; - wire[63:0] ALU_in1_mult; - wire[63:0] ALU_in2_mult; wire[31:0] upper_immed; assign which_in2 = in_rs2_src == `RS2_IMMED; @@ -106,20 +134,6 @@ module VX_alu( assign upper_immed = {in_upper_immed, {12{1'b0}}}; - //always @(posedge `MUL) begin - - - /* verilator lint_off UNUSED */ - - - wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1}; - wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2}; - assign ALU_in1_mult = (in_alu_op == `MULHU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in1} : alu_in1_signed; - assign ALU_in2_mult = (in_alu_op == `MULHU || in_alu_op == `MULHSU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in2} : alu_in2_signed; - wire[63:0] mult_result = ALU_in1_mult * ALU_in2_mult; - - /* verilator lint_on UNUSED */ - always @(in_alu_op or ALU_in1 or ALU_in2) begin case(in_alu_op) `ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2); @@ -135,11 +149,11 @@ module VX_alu( `SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff; `LUI_ALU: out_alu_result = upper_immed; `AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed); - `MUL: out_alu_result = mult_result[31:0]; - `MULH: out_alu_result = mult_result[63:32]; - `MULHSU: out_alu_result = mult_result[63:32]; - `MULHU: out_alu_result = mult_result[63:32]; // TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible? + `MUL: out_alu_result = mul_result[31:0]; + `MULH: out_alu_result = mul_result[63:32]; + `MULHSU: out_alu_result = mul_result[63:32]; + `MULHU: out_alu_result = mul_result[63:32]; `DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result; `DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result; `REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result; diff --git a/rtl/compat/VX_mult.v b/rtl/compat/VX_mult.v new file mode 100644 index 00000000..c1f14c0e --- /dev/null +++ b/rtl/compat/VX_mult.v @@ -0,0 +1,104 @@ +module VX_mult + #( + parameter WIDTHA=1, + parameter WIDTHB=1, + parameter WIDTHP=1, + parameter REP="UNSIGNED", + parameter SPEED="MIXED", // "MIXED" or "HIGHEST" + parameter PIPELINE=0 + ) + ( + input clock, aclr, clken, + + input [WIDTHA-1:0] dataa, + input [WIDTHB-1:0] datab, + + output reg [WIDTHP-1:0] result + ); + +// synthesis read_comments_as_HDL on +// localparam IMPL = "quartus"; +// synthesis read_comments_as_HDL off + +// altera translate_off + localparam IMPL="fallback"; +// altera translate_on + + generate + + if (IMPL == "quartus") begin + + localparam lpm_speed=SPEED == "HIGHEST" ? 10:5; + + lpm_mult#( + .LPM_WIDTHA(WIDTHA), + .LPM_WIDTHB(WIDTHB), + .LPM_WIDTHP(WIDTHP), + .LPM_REPRESENTATION(REP), + .LPM_PIPELINE(PIPELINE), + .MAXIMIZE_SPEED(lpm_speed) + ) quartus_mult( + .clock(clock), + .aclr(aclr), + .clken(clken), + .dataa(dataa), + .datab(datab), + .result(result) + ); + + end + else begin + + wire [WIDTHA-1:0] dataa_pipe_end; + wire [WIDTHB-1:0] datab_pipe_end; + if (PIPELINE == 0) begin + assign dataa_pipe_end = dataa; + assign datab_pipe_end = datab; + end else begin + reg [WIDTHA-1:0] dataa_pipe [0:PIPELINE-1]; + reg [WIDTHB-1:0] datab_pipe [0:PIPELINE-1]; + + genvar pipe_stage; + for (pipe_stage = 0; pipe_stage < PIPELINE-1; pipe_stage = pipe_stage+1) begin : pipe_stages + always @(posedge clock or posedge aclr) begin + if (aclr) begin + dataa_pipe[pipe_stage+1] <= 0; + datab_pipe[pipe_stage+1] <= 0; + end + else if (clken) begin + dataa_pipe[pipe_stage+1] <= dataa_pipe[pipe_stage]; + datab_pipe[pipe_stage+1] <= datab_pipe[pipe_stage]; + end + end + end + + always @(posedge clock or posedge aclr) begin + if (aclr) begin + dataa_pipe[0] <= 0; + datab_pipe[0] <= 0; + end + else if (clken) begin + dataa_pipe[0] <= dataa; + datab_pipe[0] <= datab; + end + end + + assign dataa_pipe_end = dataa_pipe[PIPELINE-1]; + assign datab_pipe_end = datab_pipe[PIPELINE-1]; + end + + /* * * * * * * * * * * * * * * * * * * * * * */ + /* Do the actual fallback computation here */ + /* * * * * * * * * * * * * * * * * * * * * * */ + + if (REP == "SIGNED") begin + assign result = $signed($signed(dataa_pipe_end) * $signed(datab_pipe_end)); + end + else begin + assign result = dataa_pipe_end * datab_pipe_end; + end + + end + endgenerate + +endmodule : VX_mult diff --git a/rtl/quartus/project.tcl b/rtl/quartus/project.tcl index c8447092..3a19b62e 100644 --- a/rtl/quartus/project.tcl +++ b/rtl/quartus/project.tcl @@ -71,6 +71,7 @@ set_global_assignment -name VERILOG_FILE ../shared_memory/VX_shared_memory.v set_global_assignment -name VERILOG_FILE ../shared_memory/VX_priority_encoder_sm.v set_global_assignment -name VERILOG_FILE ../shared_memory/VX_bank_valids.v set_global_assignment -name VERILOG_FILE ../compat/VX_divide.v +set_global_assignment -name VERILOG_FILE ../compat/VX_mult.v set_global_assignment -name VERILOG_FILE ../VX_alu.v set_global_assignment -name VERILOG_FILE ../VX_back_end.v set_global_assignment -name VERILOG_FILE ../VX_context.v