`include "VX_define.v" module VX_decode( // Fetch Inputs input wire clk, input wire[31:0] in_instruction, input wire[31:0] in_curr_PC, input wire in_valid[`NT_M1:0], // WriteBack inputs input wire[31:0] in_write_data[`NT_M1:0], input wire[4:0] in_rd, input wire[1:0] in_wb, input wire in_wb_valid[`NT_M1:0], // FORWARDING INPUTS input wire in_src1_fwd, input wire[31:0] in_src1_fwd_data[`NT_M1:0], input wire in_src2_fwd, input wire[31:0] in_src2_fwd_data[`NT_M1:0], output wire[11:0] out_csr_address, output wire out_is_csr, output wire[31:0] out_csr_mask, // Outputs output wire[4:0] out_rd, output wire[4:0] out_rs1, output wire[4:0] out_rs2, output wire[31:0] out_a_reg_data[`NT_M1:0], output wire[31:0] out_b_reg_data[`NT_M1:0], output wire[1:0] out_wb, output wire[4:0] out_alu_op, output wire out_rs2_src, output reg[31:0] out_itype_immed, output wire[2:0] out_mem_read, output wire[2:0] out_mem_write, output reg[2:0] out_branch_type, output reg out_branch_stall, output reg out_jal, output reg[31:0] out_jal_offset, output reg[19:0] out_upper_immed, output wire[31:0] out_PC_next, output reg out_clone_stall, output wire out_change_mask, output wire out_thread_mask[`NT_M1:0], output wire out_valid[`NT_M1:0] ); wire[6:0] curr_opcode; wire[31:0] rd1_register[`NT_M1:0]; wire[31:0] rd2_register[`NT_M1:0]; wire is_itype; wire is_rtype; wire is_stype; wire is_btype; wire is_linst; wire is_jal; wire is_jalr; wire is_lui; wire is_auipc; wire is_csr; wire is_csr_immed; wire is_e_inst; wire is_gpgpu; wire is_clone; wire is_jalrs; wire is_jmprt; wire write_register; wire[2:0] func3; wire[6:0] func7; wire[11:0] u_12; wire[7:0] jal_b_19_to_12; wire jal_b_11; wire[9:0] jal_b_10_to_1; wire jal_b_20; wire jal_b_0; wire[20:0] jal_unsigned_offset; wire[31:0] jal_1_offset; wire[11:0] jalr_immed; wire[31:0] jal_2_offset; wire jal_sys_cond1; wire jal_sys_cond2; wire jal_sys_jal; wire[31:0] jal_sys_off; wire csr_cond1; wire csr_cond2; wire[11:0] alu_tempp; wire alu_shift_i; wire[11:0] alu_shift_i_immed; wire[1:0] csr_type; reg[4:0] csr_alu; reg[4:0] alu_op; reg[4:0] mul_alu; // wire[31:0] internal_rd1; // wire[31:0] internal_rd2; // VX_register_file vx_register_file_0( // .clk(clk), // .in_valid(in_wb_valid[0]), // .in_write_register(write_register), // .in_rd(in_rd), // .in_data(in_write_data[1:0]), // .in_src1(out_rs1), // .in_src2(out_rs2), // .out_src1_data(rd1_register), // .out_src2_data(rd2_register) // ); // VX_register_file vx_register_file_1( // .clk(clk), // .in_valid(in_wb_valid), // .in_write_register(write_register), // .in_rd(in_rd), // .in_data(in_write_data), // .in_src1(out_rs1), // .in_src2(out_rs2), // .out_src1_data(rd1_register), // .out_src2_data(rd2_register) // ); assign out_valid = in_valid; assign write_register = (in_wb != 2'h0) ? (1'b1) : (1'b0); // always @(*) begin // $display("DECODE PC: %h",in_curr_PC); // end // always @(posedge clk) begin // $display("Decode: curr_pc: %h", in_curr_PC); // end /* verilator lint_off UNUSED */ wire[31:0] clone_regsiters[31:0]; /* verilator lint_on UNUSED */ VX_register_file vx_register_file_master( .clk (clk), .in_valid (in_wb_valid[0]), .in_write_register (write_register), .in_rd (in_rd), .in_data (in_write_data[0]), .in_src1 (out_rs1), .in_src2 (out_rs2), .out_regs (clone_regsiters), .out_src1_data (rd1_register[0]), .out_src2_data (rd2_register[0]) ); // wire to_clone_1 = (1 == rd1_register[0]) && (state_stall == 1); // VX_register_file_slave vx_register_file_slave( // .clk (clk), // .in_valid (in_wb_valid[1]), // .in_write_register (write_register), // .in_rd (in_rd), // .in_data (in_write_data[1]), // .in_src1 (out_rs1), // .in_src2 (out_rs2), // .in_clone (is_clone), // .in_to_clone (to_clone_1), // .in_regs (clone_regsiters), // .out_src1_data (rd1_register[1]), // .out_src2_data (rd2_register[1]) // ); genvar index; generate for (index=1; index < `NT; index=index+1) begin: gen_code_label wire to_clone; assign to_clone = (index == rd1_register[0]) && (state_stall == 1); VX_register_file_slave vx_register_file_slave( .clk (clk), .in_valid (in_wb_valid[index]), .in_write_register (write_register), .in_rd (in_rd), .in_data (in_write_data[index]), .in_src1 (out_rs1), .in_src2 (out_rs2), .in_clone (is_clone), .in_to_clone (to_clone), .in_regs (clone_regsiters), .out_src1_data (rd1_register[index]), .out_src2_data (rd2_register[index]) ); end endgenerate assign curr_opcode = in_instruction[6:0]; assign out_rd = in_instruction[11:7]; assign out_rs1 = in_instruction[19:15]; assign out_rs2 = in_instruction[24:20]; assign func3 = in_instruction[14:12]; assign func7 = in_instruction[31:25]; assign u_12 = in_instruction[31:20]; assign out_PC_next = in_curr_PC + 32'h4; // Write Back sigal assign is_rtype = (curr_opcode == `R_INST); assign is_linst = (curr_opcode == `L_INST); assign is_itype = (curr_opcode == `ALU_INST) || is_linst; assign is_stype = (curr_opcode == `S_INST); assign is_btype = (curr_opcode == `B_INST); assign is_jal = (curr_opcode == `JAL_INST); assign is_jalr = (curr_opcode == `JALR_INST); assign is_lui = (curr_opcode == `LUI_INST); assign is_auipc = (curr_opcode == `AUIPC_INST); assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0); assign is_csr_immed = (is_csr) && (func3[2] == 1); assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0); assign is_gpgpu = (curr_opcode == `GPGPU_INST); assign is_clone = is_gpgpu && (func3 == 5); assign is_jalrs = is_gpgpu && (func3 == 6); assign is_jmprt = is_gpgpu && (func3 == 4); // always @(*) begin // if (is_jalrs) begin // $display("JALRS WOHOOO: rs2 - %h", out_b_reg_data[0]); // end // end wire jalrs_thread_mask[`NT_M1:0]; wire jmprt_thread_mask[`NT_M1:0]; genvar tm_i; generate for (tm_i = 0; tm_i < `NT; tm_i = tm_i + 1) begin assign jalrs_thread_mask[tm_i] = tm_i <= $signed(out_b_reg_data[0]); end endgenerate genvar tm_ji; generate assign jmprt_thread_mask[0] = 1; for (tm_ji = 1; tm_ji < `NT; tm_ji = tm_ji + 1) begin assign jmprt_thread_mask[tm_ji] = 0; end endgenerate assign out_thread_mask = is_jalrs ? jalrs_thread_mask : jmprt_thread_mask; assign out_change_mask = is_jalrs || is_jmprt; // assign out_clone = is_clone; // always @(in_instruction) begin // $display("Decode inst: %h", in_instruction); // end reg[5:0] state_stall = 0; always @(posedge clk) begin if ((is_clone) && state_stall == 0) begin state_stall <= 10; // $display("CLONEEE BITCH %d, 1 =? %h = %h -- %d", state_stall, rd1_register[0], to_clone_1, is_clone); end else if (state_stall == 1) begin // $display("ENDING CLONE, 1 =? %h = %h -- %d", rd1_register[0], to_clone_1, is_clone); state_stall <= 0; end else if (state_stall > 0) begin state_stall <= state_stall - 1; // $display("CLONEEE BITCH %d, 1 =? %h = %h -- %d", state_stall, rd1_register[0], to_clone_1, is_clone); end end assign out_clone_stall = ((state_stall == 0) && is_clone) || ((state_stall != 1) && is_clone); // ch_print("DECODE: PC: {0}, INSTRUCTION: {1}", in_curr_PC, in_instruction); genvar index_out_reg; generate for (index_out_reg = 0; index_out_reg < `NT; index_out_reg = index_out_reg + 1) begin assign out_a_reg_data[index_out_reg] = ( (is_jal == 1'b1) ? in_curr_PC : ((in_src1_fwd == 1'b1) ? in_src1_fwd_data[index_out_reg] : rd1_register[index_out_reg])); assign out_b_reg_data[index_out_reg] = (in_src2_fwd == 1'b1) ? in_src2_fwd_data[index_out_reg] : rd2_register[index_out_reg]; end endgenerate // assign out_reg_data[0] = ( (is_jal == 1'b1) ? in_curr_PC : ((in_src1_fwd == 1'b1) ? in_src1_fwd_data[0] : rd1_register[0])); // assign out_reg_data[1] = (in_src2_fwd == 1'b1) ? in_src2_fwd_data[0] : rd2_register[0]; // assign out_reg_data[2] = ( (is_jal == 1'b1) ? in_curr_PC : ((in_src1_fwd == 1'b1) ? in_src1_fwd_data[1] : rd1_register[1])); // assign out_reg_data[3] = (in_src2_fwd == 1'b1) ? in_src2_fwd_data[1] : rd2_register[1]; // assign internal_rd1 = ((is_jal == 1'b1) ? in_curr_PC : ((in_src1_fwd == 1'b1) ? in_src1_fwd_data : rd1_register)); // assign internal_rd2 = (in_src2_fwd == 1'b1) ? in_src2_fwd_data : rd2_register; // assign out_reg_data[0] = internal_rd1; // assign out_reg_data[1] = internal_rd2; // always @(negedge clk) begin // if (in_curr_PC == 32'h800001f0) begin // $display("IN DECODE: Going to write to: %d with val: %h [%h, %h, %h]", out_rd, internal_rd1, in_curr_PC, in_src1_fwd_data, rd1_register); // end // end assign out_is_csr = is_csr; assign out_csr_mask = (is_csr_immed == 1'b1) ? {27'h0, out_rs1} : out_a_reg_data[0]; assign out_wb = (is_jal || is_jalr || is_jalrs || is_e_inst) ? `WB_JAL : is_linst ? `WB_MEM : (is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU : `NO_WB; assign out_rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG; // MEM signals assign out_mem_read = (is_linst) ? func3 : `NO_MEM_READ; assign out_mem_write = (is_stype) ? func3 : `NO_MEM_WRITE; // UPPER IMMEDIATE always @(*) begin case(curr_opcode) `LUI_INST: out_upper_immed = {func7, out_rs2, out_rs1, func3}; `AUIPC_INST: out_upper_immed = {func7, out_rs2, out_rs1, func3}; default: out_upper_immed = 20'h0; endcase // curr_opcode end assign jal_b_19_to_12 = in_instruction[19:12]; assign jal_b_11 = in_instruction[20]; assign jal_b_10_to_1 = in_instruction[30:21]; assign jal_b_20 = in_instruction[31]; assign jal_b_0 = 1'b0; assign jal_unsigned_offset = {jal_b_20, jal_b_19_to_12, jal_b_11, jal_b_10_to_1, jal_b_0}; assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset}; assign jalr_immed = {func7, out_rs2}; assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed}; assign jal_sys_cond1 = func3 == 3'h0; assign jal_sys_cond2 = u_12 < 12'h2; assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0; assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef; // JAL always @(*) begin case(curr_opcode) `JAL_INST: begin out_jal = 1'b1 && in_valid[0]; out_jal_offset = jal_1_offset; end `JALR_INST: begin out_jal = 1'b1 && in_valid[0]; out_jal_offset = jal_2_offset; end `GPGPU_INST: begin if (is_jalrs || is_jmprt) begin out_jal = 1'b1 && in_valid[0]; out_jal_offset = 32'h0; end end `SYS_INST: begin out_jal = jal_sys_jal && in_valid[0]; out_jal_offset = jal_sys_off; end default: begin out_jal = 1'b0 && in_valid[0]; out_jal_offset = 32'hdeadbeef; end endcase end // CSR assign csr_cond1 = func3 != 3'h0; assign csr_cond2 = u_12 >= 12'h2; assign out_csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55; // ITYPE IMEED assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5); assign alu_shift_i_immed = {{7{1'b0}}, out_rs2}; assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12; always @(*) begin case(curr_opcode) `ALU_INST: out_itype_immed = {{20{alu_tempp[11]}}, alu_tempp}; `S_INST: out_itype_immed = {{20{func7[6]}}, func7, out_rd}; `L_INST: out_itype_immed = {{20{u_12[11]}}, u_12}; `B_INST: out_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]}; default: out_itype_immed = 32'hdeadbeef; endcase end always @(*) begin case(curr_opcode) `B_INST: begin out_branch_stall = 1'b1 && in_valid[0]; case(func3) 3'h0: out_branch_type = `BEQ; 3'h1: out_branch_type = `BNE; 3'h4: out_branch_type = `BLT; 3'h5: out_branch_type = `BGT; 3'h6: out_branch_type = `BLTU; 3'h7: out_branch_type = `BGTU; default: out_branch_type = `NO_BRANCH; endcase end `JAL_INST: begin out_branch_type = `NO_BRANCH; out_branch_stall = 1'b1 && in_valid[0]; end `JALR_INST: begin out_branch_type = `NO_BRANCH; out_branch_stall = 1'b1 && in_valid[0]; end `GPGPU_INST: begin if (is_jalrs || is_jmprt) begin out_branch_type = `NO_BRANCH; out_branch_stall = 1'b1 && in_valid[0]; end end default: begin out_branch_type = `NO_BRANCH; out_branch_stall = 1'b0 && in_valid[0]; end endcase end always @(*) begin // ALU OP case(func3) 3'h0: alu_op = (curr_opcode == `ALU_INST) ? `ADD : (func7 == 7'h0 ? `ADD : `SUB); 3'h1: alu_op = `SLLA; 3'h2: alu_op = `SLT; 3'h3: alu_op = `SLTU; 3'h4: alu_op = `XOR; 3'h5: alu_op = (func7 == 7'h0) ? `SRL : `SRA; 3'h6: alu_op = `OR; 3'h7: alu_op = `AND; default: alu_op = `NO_ALU; endcase end always @(*) begin // ALU OP case(func3) 3'h0: mul_alu = `MUL; 3'h1: mul_alu = `MULH; 3'h2: mul_alu = `MULHSU; 3'h3: mul_alu = `MULHU; 3'h4: mul_alu = `DIV; 3'h5: mul_alu = `DIVU; 3'h6: mul_alu = `REM; 3'h7: mul_alu = `REMU; default: mul_alu = `NO_ALU; endcase end assign csr_type = func3[1:0]; always @(*) begin case(csr_type) 2'h1: csr_alu = `CSR_ALU_RW; 2'h2: csr_alu = `CSR_ALU_RS; 2'h3: csr_alu = `CSR_ALU_RC; default: csr_alu = `NO_ALU; endcase end wire[4:0] temp_final_alu; assign temp_final_alu = is_btype ? ((out_branch_type < `BLTU) ? `SUB : `SUBU) : is_lui ? `LUI_ALU : is_auipc ? `AUIPC_ALU : is_csr ? csr_alu : (is_stype || is_linst) ? `ADD : alu_op; assign out_alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu; endmodule