Rename Stage that removes the need for forwarding

This commit is contained in:
felsabbagh3
2019-10-17 00:48:54 -04:00
parent 9a9afbbb6b
commit 95047fcadc
12 changed files with 177 additions and 183 deletions

View File

@@ -3,7 +3,7 @@ all: RUNFILE
# -LDFLAGS '-lsystemc' # -LDFLAGS '-lsystemc'
VERILATOR: VERILATOR:
echo "#define VCD_OFF" > tb_debug.h echo "#define VCD_OFF" > tb_debug.h
verilator --compiler gcc -Wno-fatal -Wno-UNOPTFLAT -Wno-UNDRIVEN -Wno-UNSIGNED -Wno-lint -cc Vortex.v -I. -I../models/memory/cln28hpc/rf2_32x128_wm1/ -I/usr/local/systemc/ -Iinterfaces/ -Ipipe_regs/ --exe test_bench.cpp -CFLAGS '-std=c++11 -O3' -LDFLAGS '-L/usr/local/systemc/' verilator --compiler gcc --Wno-PINMISSING -cc Vortex.v -I. -I../models/memory/cln28hpc/rf2_32x128_wm1/ -I/usr/local/systemc/ -Iinterfaces/ -Ipipe_regs/ --exe test_bench.cpp -CFLAGS '-std=c++11 -O3' -LDFLAGS '-L/usr/local/systemc/'
compdebug: compdebug:
echo "#define VCD_OUTPUT" > tb_debug.h echo "#define VCD_OUTPUT" > tb_debug.h

View File

@@ -2,8 +2,9 @@ module VX_back_end (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire fetch_delay, input wire fetch_delay,
input wire schedule_delay,
input wire[31:0] csr_decode_csr_data, input wire[31:0] csr_decode_csr_data,
output wire execute_branch_stall, output wire execute_branch_stall,
input wire in_fwd_stall, input wire in_fwd_stall,
@@ -64,6 +65,7 @@ VX_frE_to_bckE_req_inter VX_bckE_req_out();
VX_gpr_stage VX_gpr_stage( VX_gpr_stage VX_gpr_stage(
.clk (clk), .clk (clk),
.schedule_delay (schedule_delay),
.VX_writeback_inter(VX_writeback_inter), .VX_writeback_inter(VX_writeback_inter),
.VX_fwd_rsp (VX_fwd_rsp), .VX_fwd_rsp (VX_fwd_rsp),
.in_fwd_stall (in_fwd_stall), .in_fwd_stall (in_fwd_stall),

View File

@@ -8,6 +8,7 @@ module VX_fetch (
input wire in_fwd_stall, input wire in_fwd_stall,
input wire in_branch_stall_exe, input wire in_branch_stall_exe,
input wire in_gpr_stall, input wire in_gpr_stall,
input wire schedule_delay,
VX_icache_response_inter icache_response, VX_icache_response_inter icache_response,
VX_icache_request_inter icache_request, VX_icache_request_inter icache_request,
@@ -28,7 +29,7 @@ module VX_fetch (
wire warp_stall; wire warp_stall;
assign pipe_stall = in_gpr_stall || in_fwd_stall || in_freeze; assign pipe_stall = in_gpr_stall || in_fwd_stall || in_freeze || schedule_delay;
assign warp_stall = in_branch_stall || (in_branch_stall_exe && 0); assign warp_stall = in_branch_stall || (in_branch_stall_exe && 0);

View File

@@ -111,7 +111,8 @@ module VX_forwarding (
(!src1_mem_fwd)); (!src1_mem_fwd));
assign out_src1_fwd = src1_exe_fwd || src1_mem_fwd || (src1_wb_fwd && 0); // assign out_src1_fwd = src1_exe_fwd || src1_mem_fwd || (src1_wb_fwd && 0);
assign out_src1_fwd = 0;
@@ -137,15 +138,19 @@ module VX_forwarding (
(in_writeback_warp_num == in_decode_warp_num); (in_writeback_warp_num == in_decode_warp_num);
assign out_src2_fwd = src2_exe_fwd || src2_mem_fwd || (src2_wb_fwd && 0); // assign out_src2_fwd = src2_exe_fwd || src2_mem_fwd || (src2_wb_fwd && 0);
assign out_src2_fwd = 0;
wire exe_mem_read_stall = ((src1_exe_fwd || src2_exe_fwd) && exe_mem_read) ? `STALL : `NO_STALL; // wire exe_mem_read_stall = ((src1_exe_fwd || src2_exe_fwd) && exe_mem_read) ? `STALL : `NO_STALL;
wire mem_mem_read_stall = ((src1_mem_fwd || src2_mem_fwd) && mem_mem_read) ? `STALL : `NO_STALL; // wire mem_mem_read_stall = ((src1_mem_fwd || src2_mem_fwd) && mem_mem_read) ? `STALL : `NO_STALL;
wire exe_mem_read_stall = `NO_STALL;
wire mem_mem_read_stall = `NO_STALL;
assign out_fwd_stall = exe_mem_read_stall || mem_mem_read_stall; // assign out_fwd_stall = exe_mem_read_stall || mem_mem_read_stall;
assign out_fwd_stall = 0;
// always @(*) begin // always @(*) begin
// if (out_fwd_stall) $display("FWD STALL"); // if (out_fwd_stall) $display("FWD STALL");

View File

@@ -9,6 +9,7 @@ module VX_front_end (
input wire execute_branch_stall, input wire execute_branch_stall,
input wire in_gpr_stall, input wire in_gpr_stall,
input wire schedule_delay,
VX_warp_ctl_inter VX_warp_ctl, VX_warp_ctl_inter VX_warp_ctl,
@@ -18,7 +19,6 @@ module VX_front_end (
VX_jal_response_inter VX_jal_rsp, VX_jal_response_inter VX_jal_rsp,
VX_branch_response_inter VX_branch_rsp, VX_branch_response_inter VX_branch_rsp,
VX_wb_inter VX_writeback_inter,
VX_frE_to_bckE_req_inter VX_bckE_req, VX_frE_to_bckE_req_inter VX_bckE_req,
@@ -38,7 +38,7 @@ wire decode_branch_stall;
wire decode_gpr_stall; wire decode_gpr_stall;
wire total_freeze = memory_delay || fetch_delay || in_gpr_stall; wire total_freeze = memory_delay || fetch_delay || in_gpr_stall || schedule_delay;
/* verilator lint_off UNUSED */ /* verilator lint_off UNUSED */
wire real_fetch_ebreak; wire real_fetch_ebreak;
@@ -49,6 +49,7 @@ VX_fetch vx_fetch(
.in_memory_delay (memory_delay), .in_memory_delay (memory_delay),
.in_branch_stall (decode_branch_stall), .in_branch_stall (decode_branch_stall),
.in_fwd_stall (forwarding_fwd_stall), .in_fwd_stall (forwarding_fwd_stall),
.schedule_delay (schedule_delay),
.in_branch_stall_exe(execute_branch_stall), .in_branch_stall_exe(execute_branch_stall),
.in_gpr_stall (decode_gpr_stall), .in_gpr_stall (decode_gpr_stall),
.VX_jal_rsp (VX_jal_rsp), .VX_jal_rsp (VX_jal_rsp),

View File

@@ -15,13 +15,6 @@ module VX_gpr (
assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd != 5'h0)); assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd != 5'h0));
// <<<<<<< HEAD
// always @(*) begin
// if(write_enable) $display("Writing to %d: %d = %h",VX_writeback_inter.wb_warp_num, VX_writeback_inter.rd, VX_writeback_inter.write_data[0][31:0]);
// end
// byte_enabled_simple_dual_port_ram first_ram( // byte_enabled_simple_dual_port_ram first_ram(
// .we (write_enable), // .we (write_enable),
// .clk (clk), // .clk (clk),
@@ -35,160 +28,102 @@ module VX_gpr (
// ); // );
// ======= // =======
// byte_enabled_simple_dual_port_ram first_ram( byte_enabled_simple_dual_port_ram first_ram(
// .we (write_enable), .we (write_enable),
// .clk (clk), .clk (clk),
// .waddr (VX_writeback_inter.rd), .waddr (VX_writeback_inter.rd),
// .raddr1(VX_gpr_read.rs1), .raddr1(VX_gpr_read.rs1),
// .be (VX_writeback_inter.wb_valid), .be (VX_writeback_inter.wb_valid),
// .wdata (VX_writeback_inter.write_data), .wdata (VX_writeback_inter.write_data),
// .q1 (out_a_reg_data) .q1 (out_a_reg_data)
// ); );
// byte_enabled_simple_dual_port_ram first_ram( byte_enabled_simple_dual_port_ram second_ram(
// .we (write_enable), .we (write_enable),
// .clk (clk), .clk (clk),
// .waddr (VX_writeback_inter.rd), .waddr (VX_writeback_inter.rd),
// .raddr1(VX_gpr_read.rs2), .raddr1(VX_gpr_read.rs2),
// .be (VX_writeback_inter.wb_valid), .be (VX_writeback_inter.wb_valid),
// .wdata (VX_writeback_inter.write_data), .wdata (VX_writeback_inter.write_data),
// .q1 (out_b_reg_data) .q1 (out_b_reg_data)
// ); );
wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}}; // wire[127:0] write_bit_mask = {{32{~(VX_writeback_inter.wb_valid[3])}}, {32{~(VX_writeback_inter.wb_valid[2])}}, {32{~(VX_writeback_inter.wb_valid[1])}}, {32{~(VX_writeback_inter.wb_valid[0])}}};
// /* verilator lint_off PINCONNECTEMPTY */
// Port A is a read port, Port B is a write port // rf2_32x128_wm1 first_ram (
// .CENYA(),
/* verilator lint_off PINCONNECTEMPTY */ // .AYA(),
rf2_32x128_wm1 first_ram ( // .CENYB(),
.CENYA(), // .WENYB(),
.AYA(), // .AYB(),
.CENYB(), // .QA(out_a_reg_data),
.WENYB(), // .SOA(),
.AYB(), // .SOB(),
.QA(out_a_reg_data), // .CLKA(clk),
.SOA(), // .CENA(1'b0),
.SOB(), // .AA(VX_gpr_read.rs1),
.CLKA(clk), // .CLKB(clk),
.CENA(1'b0), // .CENB(1'b0),
.AA(VX_gpr_read.rs1), // .WENB(write_bit_mask),
.CLKB(clk), // .AB(VX_writeback_inter.rd),
.CENB(1'b0), // .DB(VX_writeback_inter.write_data),
.WENB(write_bit_mask), // .EMAA(3'b011),
.AB(VX_writeback_inter.rd), // .EMASA(1'b0),
.DB(VX_writeback_inter.write_data), // .EMAB(3'b011),
.EMAA(3'b011), // .TENA(1'b1),
.EMASA(1'b0), // .TCENA(1'b0),
.EMAB(3'b011), // .TAA(5'b0),
.TENA(1'b1), // .TENB(1'b1),
.TCENA(1'b0), // .TCENB(1'b0),
.TAA(5'b0), // .TWENB(128'b0),
.TENB(1'b1), // .TAB(5'b0),
.TCENB(1'b0), // .TDB(128'b0),
.TWENB(128'b0), // .RET1N(1'b1),
.TAB(5'b0), // .SIA(2'b0),
.TDB(128'b0), // .SEA(1'b0),
.RET1N(1'b1), // .DFTRAMBYP(1'b0),
.SIA(2'b0), // .SIB(2'b0),
.SEA(1'b0), // .SEB(1'b0),
.DFTRAMBYP(1'b0), // .COLLDISN(1'b1)
.SIB(2'b0), // );
.SEB(1'b0), // /* verilator lint_on PINCONNECTEMPTY */
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 second_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(out_b_reg_data),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(1'b0),
.AA(VX_gpr_read.rs2),
.CLKB(clk),
.CENB(1'b0),
.WENB(write_bit_mask),
.AB(VX_writeback_inter.rd),
.DB(VX_writeback_inter.write_data),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
// >>>>>>> 5680b997b599ce2900997cab976681fe3881e880
// // USING RAM blocks
// // First RAM
// byte_enabled_simple_dual_port_ram first_ram(
// .we (write_enable),
// .clk (clk),
// .waddr(VX_writeback_inter.rd),
// .raddr(VX_gpr_read.rs1),
// .be (VX_writeback_inter.wb_valid),
// .wdata(VX_writeback_inter.write_data),
// .q (out_a_reg_data)
// );
// // Second RAM block
// byte_enabled_simple_dual_port_ram second_ram(
// .we (write_enable),
// .clk (clk),
// .waddr(VX_writeback_inter.rd),
// .raddr(VX_gpr_read.rs2),
// .be (VX_writeback_inter.wb_valid),
// .wdata(VX_writeback_inter.write_data),
// .q (out_b_reg_data)
// );
// logic[`NT_M1:0][31:0] gpr[31:0]; // gpr[register_number][thread_number][data_bits]
// wire write_enable;
// assign write_enable = valid_write_request && ((VX_writeback_inter.wb != 0) && (VX_writeback_inter.rd != 5'h0));
// assign read_enable = valid_request;
// // Using Registers
// integer thread_index;
// always_ff@(posedge clk)
// begin
// if (write_enable) begin
// for (thread_index = 0; thread_index <= `NT_M1; thread_index = thread_index + 1) begin
// if (VX_writeback_inter.wb_valid[thread_index]) begin
// gpr[VX_writeback_inter.rd][thread_index] <= VX_writeback_inter.write_data[thread_index];
// end
// end
// end
// out_a_reg_data <= gpr[VX_gpr_read.rs1];
// out_b_reg_data <= gpr[VX_gpr_read.rs2];
// end
// /* verilator lint_off PINCONNECTEMPTY */
// rf2_32x128_wm1 second_ram (
// .CENYA(),
// .AYA(),
// .CENYB(),
// .WENYB(),
// .AYB(),
// .QA(out_b_reg_data),
// .SOA(),
// .SOB(),
// .CLKA(clk),
// .CENA(1'b0),
// .AA(VX_gpr_read.rs2),
// .CLKB(clk),
// .CENB(1'b0),
// .WENB(write_bit_mask),
// .AB(VX_writeback_inter.rd),
// .DB(VX_writeback_inter.write_data),
// .EMAA(3'b011),
// .EMASA(1'b0),
// .EMAB(3'b011),
// .TENA(1'b1),
// .TCENA(1'b0),
// .TAA(5'b0),
// .TENB(1'b1),
// .TCENB(1'b0),
// .TWENB(128'b0),
// .TAB(5'b0),
// .TDB(128'b0),
// .RET1N(1'b1),
// .SIA(2'b0),
// .SEA(1'b0),
// .DFTRAMBYP(1'b0),
// .SIB(2'b0),
// .SEB(1'b0),
// .COLLDISN(1'b1)
// );
endmodule endmodule

View File

@@ -1,6 +1,7 @@
module VX_gpr_stage ( module VX_gpr_stage (
input wire clk, input wire clk,
input wire in_fwd_stall, input wire in_fwd_stall,
input wire schedule_delay,
// inputs // inputs
// Instruction Information // Instruction Information
VX_frE_to_bckE_req_inter VX_bckE_req, VX_frE_to_bckE_req_inter VX_bckE_req,
@@ -62,7 +63,7 @@ module VX_gpr_stage (
// assign VX_bckE_req_out.csr_mask = (VX_bckE_req.sr_immed == 1'b1) ? {27'h0, VX_bckE_req.rs1} : VX_gpr_data.a_reg_data[0]; // assign VX_bckE_req_out.csr_mask = (VX_bckE_req.sr_immed == 1'b1) ? {27'h0, VX_bckE_req.rs1} : VX_gpr_data.a_reg_data[0];
VX_gpr_data_inter VX_gpr_datf; VX_gpr_data_inter VX_gpr_datf;
VX_generic_register #(.N(256)) d_e_reg VX_generic_register #(.N(256)) reg_data
( (
.clk (clk), .clk (clk),
.reset(0), .reset(0),
@@ -72,10 +73,12 @@ module VX_gpr_stage (
.out ({VX_gpr_data.a_reg_data, VX_gpr_data.b_reg_data}) .out ({VX_gpr_data.a_reg_data, VX_gpr_data.b_reg_data})
); );
VX_d_e_reg vx_d_e_reg( wire stall = in_fwd_stall || schedule_delay;
VX_d_e_reg gpr_stage_reg(
.clk (clk), .clk (clk),
.reset (0), .reset (0),
.in_fwd_stall (in_fwd_stall), .in_fwd_stall (stall),
.in_branch_stall (0), .in_branch_stall (0),
.in_freeze (0), .in_freeze (0),
.in_gpr_stall (out_gpr_stall), .in_gpr_stall (out_gpr_stall),

View File

@@ -1,11 +1,50 @@
`include "VX_define.v"
module VX_scheduler ( module VX_scheduler (
input clk, input wire clk,
input VX_frE_to_bckE_req_inter VX_bckE_req,
VX_wb_inter VX_writeback_inter,
output wire schedule_delay
); );
reg rename_table[31:0];
initial begin
integer i;
for (i = 0; i < 32; i = i + 1) rename_table[i] = 0;
end
wire valid_wb = (VX_writeback_inter.wb != 0) && (|VX_writeback_inter.wb_valid) && (VX_writeback_inter.rd != 0);
wire wb_inc = (VX_bckE_req.wb != 0) && (VX_bckE_req.rd != 0);
// wire pass_through = ((VX_bckE_req.rs1 == VX_writeback_inter.rd) || (VX_bckE_req.rs2 == VX_writeback_inter.rd)) && valid_wb;
// wire pass_through = 0;
wire rs1_rename = rename_table[VX_bckE_req.rs1];
wire rs2_rename = rename_table[VX_bckE_req.rs2];
wire is_store = (VX_bckE_req.mem_write != `NO_MEM_WRITE);
wire rs1_rename_qual = (rs1_rename && (VX_bckE_req.rs1 != 0));
wire rs2_rename_qual = (rs2_rename && (VX_bckE_req.rs2 != 0) && ((VX_bckE_req.rs2_src == `RS2_REG) || is_store));
wire rename_valid = rs1_rename_qual || rs2_rename_qual ;
assign schedule_delay = (rename_valid) && (|VX_bckE_req.valid);
always @(posedge clk) begin
if (valid_wb ) rename_table[VX_writeback_inter.rd] <= 0;
if (!schedule_delay && wb_inc) rename_table[VX_bckE_req.rd] <= 1;
end
endmodule endmodule

View File

@@ -73,6 +73,7 @@ VX_warp_ctl_inter VX_warp_ctl();
wire out_gpr_stall; wire out_gpr_stall;
wire schedule_delay;
VX_front_end vx_front_end( VX_front_end vx_front_end(
@@ -81,11 +82,11 @@ VX_front_end vx_front_end(
.VX_warp_ctl (VX_warp_ctl), .VX_warp_ctl (VX_warp_ctl),
.forwarding_fwd_stall(forwarding_fwd_stall), .forwarding_fwd_stall(forwarding_fwd_stall),
.execute_branch_stall(execute_branch_stall), .execute_branch_stall(execute_branch_stall),
.VX_writeback_inter (VX_writeback_inter),
.VX_bckE_req (VX_bckE_req), .VX_bckE_req (VX_bckE_req),
.decode_csr_address (decode_csr_address), .decode_csr_address (decode_csr_address),
.memory_delay (memory_delay), .memory_delay (memory_delay),
.fetch_delay (fetch_delay), .fetch_delay (fetch_delay),
.schedule_delay (schedule_delay),
.icache_response_fe (icache_response_fe), .icache_response_fe (icache_response_fe),
.icache_request_fe (icache_request_fe), .icache_request_fe (icache_request_fe),
.VX_jal_rsp (VX_jal_rsp), .VX_jal_rsp (VX_jal_rsp),
@@ -94,10 +95,17 @@ VX_front_end vx_front_end(
.in_gpr_stall (out_gpr_stall) .in_gpr_stall (out_gpr_stall)
); );
VX_scheduler schedule(
.clk (clk),
.VX_bckE_req (VX_bckE_req),
.VX_writeback_inter(VX_writeback_inter),
.schedule_delay (schedule_delay)
);
VX_back_end vx_back_end( VX_back_end vx_back_end(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.schedule_delay (schedule_delay),
.fetch_delay (fetch_delay), .fetch_delay (fetch_delay),
.in_fwd_stall (forwarding_fwd_stall), .in_fwd_stall (forwarding_fwd_stall),
.VX_fwd_req_de (VX_fwd_req_de), .VX_fwd_req_de (VX_fwd_req_de),

View File

@@ -1,7 +1,7 @@
# Dynamic Instructions: 13 # Dynamic Instructions: 67875
# of total cycles: 24 # of total cycles: 67891
# of forwarding stalls: 0 # of forwarding stalls: 0
# of branch stalls: 0 # of branch stalls: 0
# CPI: 1.84615 # CPI: 1.00024
# time to simulate: 6.95312e-310 milliseconds # time to simulate: 0 milliseconds
# GRADE: Failed on test: 4294967295 # GRADE: Failed on test: 4294967295

View File

@@ -1 +1 @@
#define VCD_OUTPUT #define VCD_OFF

View File

@@ -372,11 +372,11 @@ bool Vortex::simulate(std::string file_to_simulate)
// unsigned cycles; // unsigned cycles;
counter = 0; counter = 0;
this->stats_total_cycles = 10; this->stats_total_cycles = 10;
while (this->stop && ((counter < 5))) while (this->stop && ((counter < 6)))
// while (this->stats_total_cycles < 10) // while (this->stats_total_cycles < 10)
{ {
// std::cout << "Counter: " << counter << "\n"; // std::cout << "Counter: " << counter << "\n";
if ((this->stats_total_cycles) % 5000 == 0) std::cout << "************* Cycle: " << (this->stats_total_cycles) << "\n"; // if ((this->stats_total_cycles) % 5000 == 0) std::cout << "************* Cycle: " << (this->stats_total_cycles) << "\n";
// dstop = !dbus_driver(); // dstop = !dbus_driver();
vortex->clk = 1; vortex->clk = 1;