From be66e51613147ab38f95dbdb6073f2e27f66109a Mon Sep 17 00:00:00 2001 From: felsabbagh3 Date: Mon, 17 Feb 2020 22:22:27 -0800 Subject: [PATCH] Added CSRs, some Load unit tests are failing --- rtl/VX_back_end.v | 24 +++++-- rtl/VX_csr_data.v | 82 +++++++++++++++++++++++ rtl/VX_csr_pipe.v | 105 ++++++++++++++++++++++++++++++ rtl/VX_decode.v | 3 +- rtl/VX_gpr_stage.v | 11 ++-- rtl/VX_inst_multiplex.v | 1 + rtl/VX_writeback.v | 11 +++- rtl/interfaces/VX_csr_req_inter.v | 2 +- rtl/simulate/test_bench.h | 8 ++- 9 files changed, 231 insertions(+), 16 deletions(-) create mode 100644 rtl/VX_csr_data.v create mode 100644 rtl/VX_csr_pipe.v diff --git a/rtl/VX_back_end.v b/rtl/VX_back_end.v index a58847f3..640def5f 100644 --- a/rtl/VX_back_end.v +++ b/rtl/VX_back_end.v @@ -32,7 +32,7 @@ assign VX_writeback_inter.wb_warp_num = VX_writeback_temp.wb_warp_num; VX_mw_wb_inter VX_mw_wb(); -wire no_slot_mem; +wire no_slot_mem; VX_mem_req_inter VX_exe_mem_req(); @@ -55,6 +55,8 @@ VX_gpu_inst_req_inter VX_gpu_inst_req(); // CSR unit inputs VX_csr_req_inter VX_csr_req(); VX_csr_wb_inter VX_csr_wb(); +wire no_slot_csr; +wire stall_gpr_csr; VX_gpr_stage VX_gpr_stage( .clk (clk), @@ -67,6 +69,7 @@ VX_gpr_stage VX_gpr_stage( .VX_lsu_req (VX_lsu_req), .VX_gpu_inst_req (VX_gpu_inst_req), .VX_csr_req (VX_csr_req), + .stall_gpr_csr (stall_gpr_csr), // End new .memory_delay (out_mem_delay), .gpr_stage_delay (gpr_stage_delay) @@ -100,9 +103,19 @@ VX_gpgpu_inst VX_gpgpu_inst( .VX_warp_ctl (VX_warp_ctl) ); -VX_csr_wrapper VX_csr_wrapper( - .VX_csr_req(VX_csr_req), - .VX_csr_wb (VX_csr_wb) +// VX_csr_wrapper VX_csr_wrapper( +// .VX_csr_req(VX_csr_req), +// .VX_csr_wb (VX_csr_wb) +// ); + +VX_csr_pipe VX_csr_pipe( + .clk (clk), + .reset (reset), + .no_slot_csr (no_slot_csr), + .VX_csr_req (VX_csr_req), + .VX_writeback(VX_writeback_temp), + .VX_csr_wb (VX_csr_wb), + .stall_gpr_csr(stall_gpr_csr) ); VX_writeback VX_wb( @@ -113,7 +126,8 @@ VX_writeback VX_wb( .VX_csr_wb (VX_csr_wb), .VX_writeback_inter(VX_writeback_temp), - .no_slot_mem (no_slot_mem) + .no_slot_mem (no_slot_mem), + .no_slot_csr (no_slot_csr) ); endmodule \ No newline at end of file diff --git a/rtl/VX_csr_data.v b/rtl/VX_csr_data.v new file mode 100644 index 00000000..ab62aa23 --- /dev/null +++ b/rtl/VX_csr_data.v @@ -0,0 +1,82 @@ +`include "../VX_define.v" + +module VX_csr_data ( + input wire clk, // Clock + input wire reset, + + input wire[11:0] in_read_csr_address, + + input wire in_write_valid, + input wire[31:0] in_write_csr_data, + input wire[11:0] in_write_csr_address, + + output wire[31:0] out_read_csr_data, + + // For instruction retire counting + input wire in_writeback_valid + +); + + + // wire[`NT_M1:0][31:0] thread_ids; + // wire[`NT_M1:0][31:0] warp_ids; + + // genvar cur_t; + // for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin + // assign thread_ids[cur_t] = cur_t; + // end + + // genvar cur_tw; + // for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin + // assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, in_read_warp_num}; + // end + + reg[11:0] csr[1023:0]; + reg[63:0] cycle; + reg[63:0] instret; + + + wire read_cycle; + wire read_cycleh; + wire read_instret; + wire read_instreth; + + assign read_cycle = in_read_csr_address == 12'hC00; + assign read_cycleh = in_read_csr_address == 12'hC80; + assign read_instret = in_read_csr_address == 12'hC02; + assign read_instreth = in_read_csr_address == 12'hC82; + + // wire thread_select = in_read_csr_address == 12'h20; + // wire warp_select = in_read_csr_address == 12'h21; + + // assign out_read_csr_data = thread_select ? thread_ids : + // warp_select ? warp_ids : + // 0; + + integer curr_e; + always @(posedge clk or posedge reset) begin + if (reset) begin + for (curr_e = 0; curr_e < 1024; curr_e=curr_e+1) begin + assign csr[curr_e] = 0; + end + cycle <= 0; + instret <= 0; + end else begin + cycle <= cycle + 1; + if (in_write_valid) begin + csr[in_write_csr_address] <= in_write_csr_data[11:0]; + end + if (in_writeback_valid) begin + instret <= instret + 1; + end + end + end + + + assign out_read_csr_data = read_cycle ? cycle[31:0] : + read_cycleh ? cycle[63:32] : + read_instret ? instret[31:0] : + read_instreth ? instret[63:32] : + {{20{1'b0}}, csr[in_read_csr_address]}; + +endmodule \ No newline at end of file diff --git a/rtl/VX_csr_pipe.v b/rtl/VX_csr_pipe.v new file mode 100644 index 00000000..a5727c60 --- /dev/null +++ b/rtl/VX_csr_pipe.v @@ -0,0 +1,105 @@ + +module VX_csr_pipe ( + input wire clk, // Clock + input wire reset, + input wire no_slot_csr, + VX_csr_req_inter VX_csr_req, + VX_wb_inter VX_writeback, + VX_csr_wb_inter VX_csr_wb, + output wire stall_gpr_csr + +); + + wire[`NT_M1:0] valid_s2; + wire[`NW_M1:0] warp_num_s2; + wire[4:0] rd_s2; + wire[1:0] wb_s2; + wire[4:0] alu_op_s2; + wire is_csr_s2; + wire[11:0] csr_address_s2; + wire[31:0] csr_read_data_s2; + wire[31:0] csr_updated_data_s2; + + wire[31:0] csr_read_data_unqual; + wire[31:0] csr_read_data; + + assign stall_gpr_csr = no_slot_csr && VX_csr_req.is_csr && |(VX_csr_req.valid); + + assign csr_read_data = (csr_address_s2 == VX_csr_req.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual; + + wire writeback = |VX_writeback.wb_valid; + VX_csr_data VX_csr_data( + .clk (clk), + .reset (reset), + .in_read_csr_address (VX_csr_req.csr_address), + + .in_write_valid (is_csr_s2), + .in_write_csr_data (csr_updated_data_s2), + .in_write_csr_address(csr_address_s2), + + .out_read_csr_data (csr_read_data_unqual), + + .in_writeback_valid (writeback) + ); + + + + reg[31:0] csr_updated_data; + always @(*) begin + case(VX_csr_req.alu_op) + `CSR_ALU_RW: csr_updated_data = VX_csr_req.csr_mask; + `CSR_ALU_RS: csr_updated_data = csr_read_data | VX_csr_req.csr_mask; + `CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - VX_csr_req.csr_mask); + default: csr_updated_data = 32'hdeadbeef; + endcase + end + + wire zero = 0; + + VX_generic_register #(.N(`NT + `NW_M1 + 1 + 5 + 2 + 5 + 12 + 64)) csr_reg_s2 ( + .clk (clk), + .reset(reset), + .stall(no_slot_csr), + .flush(zero), + .in ({VX_csr_req.valid, VX_csr_req.warp_num, VX_csr_req.rd, VX_csr_req.wb, VX_csr_req.is_csr, VX_csr_req.csr_address, csr_read_data , csr_updated_data }), + .out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2}) + ); + + + wire[`NT_M1:0][31:0] final_csr_data; + + wire[`NT_M1:0][31:0] thread_ids; + wire[`NT_M1:0][31:0] warp_ids; + wire[`NT_M1:0][31:0] csr_vec_read_data_s2; + + genvar cur_t; + for (cur_t = 0; cur_t < `NT; cur_t = cur_t + 1) begin + assign thread_ids[cur_t] = cur_t; + end + + genvar cur_tw; + for (cur_tw = 0; cur_tw < `NT; cur_tw = cur_tw + 1) begin + assign warp_ids[cur_tw] = {{(31-`NW_M1){1'b0}}, warp_num_s2}; + end + + genvar cur_v; + for (cur_v = 0; cur_v < `NT; cur_v = cur_v + 1) begin + assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2; + end + + wire thread_select = csr_address_s2 == 12'h20; + wire warp_select = csr_address_s2 == 12'h21; + + assign final_csr_data = thread_select ? thread_ids : + warp_select ? warp_ids : + csr_vec_read_data_s2; + + + + assign VX_csr_wb.valid = valid_s2; + assign VX_csr_wb.warp_num = warp_num_s2; + assign VX_csr_wb.rd = rd_s2; + assign VX_csr_wb.wb = wb_s2; + assign VX_csr_wb.csr_result = final_csr_data; + +endmodule \ No newline at end of file diff --git a/rtl/VX_decode.v b/rtl/VX_decode.v index 7fb2f90a..4f33bbd1 100644 --- a/rtl/VX_decode.v +++ b/rtl/VX_decode.v @@ -119,7 +119,8 @@ module VX_decode( assign is_auipc = (curr_opcode == `AUIPC_INST); assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0); assign is_csr_immed = (is_csr) && (func3[2] == 1); - assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0); + // assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0); + assign is_e_inst = in_instruction == 32'h00000073; assign is_gpgpu = (curr_opcode == `GPGPU_INST); diff --git a/rtl/VX_gpr_stage.v b/rtl/VX_gpr_stage.v index 3d556a83..5101dc85 100644 --- a/rtl/VX_gpr_stage.v +++ b/rtl/VX_gpr_stage.v @@ -7,6 +7,7 @@ module VX_gpr_stage ( input wire schedule_delay, input wire memory_delay, + input wire stall_gpr_csr, output wire gpr_stage_delay, // inputs @@ -93,7 +94,7 @@ module VX_gpr_stage ( wire stall_lsu = memory_delay; wire flush_lsu = schedule_delay && !stall_lsu; - assign gpr_stage_delay = stall_lsu; + assign gpr_stage_delay = stall_lsu || (stall_gpr_csr && VX_bckE_req.is_csr && (|VX_bckE_req.valid)); `ifdef ASIC wire delayed_lsu_last_cycle; @@ -169,10 +170,10 @@ module VX_gpr_stage ( VX_generic_register #(.N(`NW_M1 + 1 + `NT + 53)) csr_reg( .clk (clk), .reset(reset), - .stall(stall_rest), + .stall(stall_gpr_csr), .flush(flush_rest), - .in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}), - .out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask }) + .in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.alu_op, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}), + .out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.alu_op , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask }) ); @@ -211,7 +212,7 @@ module VX_gpr_stage ( VX_generic_register #(.N(`NW_M1 + 1 + `NT + 53)) csr_reg( .clk (clk), .reset(reset), - .stall(stall_rest), + .stall(stall_gpr_csr), .flush(flush_rest), .in ({VX_csr_req_temp.valid, VX_csr_req_temp.warp_num, VX_csr_req_temp.rd, VX_csr_req_temp.wb, VX_csr_req_temp.is_csr, VX_csr_req_temp.csr_address, VX_csr_req_temp.csr_immed, VX_csr_req_temp.csr_mask}), .out ({VX_csr_req.valid , VX_csr_req.warp_num , VX_csr_req.rd , VX_csr_req.wb , VX_csr_req.is_csr , VX_csr_req.csr_address , VX_csr_req.csr_immed , VX_csr_req.csr_mask }) diff --git a/rtl/VX_inst_multiplex.v b/rtl/VX_inst_multiplex.v index 3aa17510..86da67de 100644 --- a/rtl/VX_inst_multiplex.v +++ b/rtl/VX_inst_multiplex.v @@ -82,6 +82,7 @@ module VX_inst_multiplex ( assign VX_csr_req.warp_num = VX_bckE_req.warp_num; assign VX_csr_req.rd = VX_bckE_req.rd; assign VX_csr_req.wb = VX_bckE_req.wb; + assign VX_csr_req.alu_op = VX_bckE_req.alu_op; assign VX_csr_req.is_csr = VX_bckE_req.is_csr; assign VX_csr_req.csr_address = VX_bckE_req.csr_address; assign VX_csr_req.csr_immed = VX_bckE_req.csr_immed; diff --git a/rtl/VX_writeback.v b/rtl/VX_writeback.v index c9616d43..2f684bae 100644 --- a/rtl/VX_writeback.v +++ b/rtl/VX_writeback.v @@ -14,7 +14,8 @@ module VX_writeback ( // Actual WB to GPR VX_wb_inter VX_writeback_inter, - output wire no_slot_mem + output wire no_slot_mem, + output wire no_slot_csr ); @@ -26,6 +27,7 @@ module VX_writeback ( assign no_slot_mem = mem_wb && (exec_wb || csr_wb); + assign no_slot_csr = csr_wb && (exec_wb); assign VX_writeback_tempp.write_data = exec_wb ? VX_inst_exec_wb.alu_result : csr_wb ? VX_csr_wb.csr_result : @@ -85,6 +87,13 @@ module VX_writeback ( .out ({use_wb_data , VX_writeback_inter.wb_valid, VX_writeback_inter.rd, VX_writeback_inter.wb, VX_writeback_inter.wb_warp_num, VX_writeback_inter.wb_pc}) ); + reg[31:0] last_data_wb; + always @(posedge clk) begin + if ((|VX_writeback_inter.wb_valid) && (VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd == 28)) begin + last_data_wb <= use_wb_data[0]; + end + end + `ifdef SYN assign VX_writeback_inter.write_data = prev_is_mem ? VX_writeback_tempp.write_data : use_wb_data; `else diff --git a/rtl/interfaces/VX_csr_req_inter.v b/rtl/interfaces/VX_csr_req_inter.v index c8daf257..9080f0e1 100644 --- a/rtl/interfaces/VX_csr_req_inter.v +++ b/rtl/interfaces/VX_csr_req_inter.v @@ -11,7 +11,7 @@ interface VX_csr_req_inter (); wire[`NW_M1:0] warp_num; wire[4:0] rd; wire[1:0] wb; - + wire[4:0] alu_op; wire is_csr; wire[11:0] csr_address; wire csr_immed; diff --git a/rtl/simulate/test_bench.h b/rtl/simulate/test_bench.h index 5bd7c900..3a001377 100644 --- a/rtl/simulate/test_bench.h +++ b/rtl/simulate/test_bench.h @@ -415,12 +415,14 @@ bool Vortex::simulate(std::string file_to_simulate) std::cerr << "New Total Cycles: " << (this->stats_total_cycles) << "\n"; - // int status = (unsigned int) vortex->Vortex__DOT__vx_front_end__DOT__vx_decode__DOT__vx_grp_wrapper__DOT__genblk2__BRA__0__KET____DOT__vx_gpr__DOT__first_ram__DOT__GPR[28][0] & 0xf; + int status = (unsigned int) vortex->Vortex__DOT__vx_back_end__DOT__VX_wb__DOT__last_data_wb & 0xf; + + // std::cout << "Last wb: " << std::hex << ((unsigned int) vortex->Vortex__DOT__vx_back_end__DOT__VX_wb__DOT__last_data_wb) << "\n"; // std::cout << "Something: " << result << '\n'; - uint32_t status; - ram.getWord(0, &status); + // uint32_t status; + // ram.getWord(0, &status); this->print_stats();