remove tab spaces

This commit is contained in:
Blaise Tine
2020-04-21 03:19:47 -04:00
parent 43a8bf4326
commit d85c0af5d6
75 changed files with 4388 additions and 4382 deletions

View File

@@ -1,207 +1,207 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_alu ( module VX_alu (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire[31:0] src_a, input wire[31:0] src_a,
input wire[31:0] src_b, input wire[31:0] src_b,
input wire src_rs2, input wire src_rs2,
input wire[31:0] itype_immed, input wire[31:0] itype_immed,
input wire[19:0] upper_immed, input wire[19:0] upper_immed,
input wire[4:0] alu_op, input wire[4:0] alu_op,
input wire[31:0] curr_PC, input wire[31:0] curr_PC,
output reg[31:0] alu_result, output reg[31:0] alu_result,
output reg alu_stall output reg alu_stall
); );
localparam div_pipeline_len = 20; localparam div_pipeline_len = 20;
localparam mul_pipeline_len = 8; localparam mul_pipeline_len = 8;
wire[31:0] unsigned_div_result; wire[31:0] unsigned_div_result;
wire[31:0] unsigned_rem_result; wire[31:0] unsigned_rem_result;
wire[31:0] signed_div_result; wire[31:0] signed_div_result;
wire[31:0] signed_rem_result; wire[31:0] signed_rem_result;
wire[63:0] mul_data_a, mul_data_b; wire[63:0] mul_data_a, mul_data_b;
wire[63:0] mul_result; wire[63:0] mul_result;
wire[31:0] ALU_in1; wire[31:0] ALU_in1;
wire[31:0] ALU_in2; wire[31:0] ALU_in2;
VX_divide #( VX_divide #(
.WIDTHN(32), .WIDTHN(32),
.WIDTHD(32), .WIDTHD(32),
.SPEED("HIGHEST"), .SPEED("HIGHEST"),
.PIPELINE(div_pipeline_len) .PIPELINE(div_pipeline_len)
) unsigned_div ( ) unsigned_div (
.clock(clk), .clock(clk),
.aclr(1'b0), .aclr(1'b0),
.clken(1'b1), // TODO this could be disabled on inactive instructions .clken(1'b1), // TODO this could be disabled on inactive instructions
.numer(ALU_in1), .numer(ALU_in1),
.denom(ALU_in2), .denom(ALU_in2),
.quotient(unsigned_div_result), .quotient(unsigned_div_result),
.remainder(unsigned_rem_result) .remainder(unsigned_rem_result)
); );
VX_divide #( VX_divide #(
.WIDTHN(32), .WIDTHN(32),
.WIDTHD(32), .WIDTHD(32),
.NREP("SIGNED"), .NREP("SIGNED"),
.DREP("SIGNED"), .DREP("SIGNED"),
.SPEED("HIGHEST"), .SPEED("HIGHEST"),
.PIPELINE(div_pipeline_len) .PIPELINE(div_pipeline_len)
) signed_div ( ) signed_div (
.clock(clk), .clock(clk),
.aclr(1'b0), .aclr(1'b0),
.clken(1'b1), // TODO this could be disabled on inactive instructions .clken(1'b1), // TODO this could be disabled on inactive instructions
.numer(ALU_in1), .numer(ALU_in1),
.denom(ALU_in2), .denom(ALU_in2),
.quotient(signed_div_result), .quotient(signed_div_result),
.remainder(signed_rem_result) .remainder(signed_rem_result)
); );
VX_mult #( VX_mult #(
.WIDTHA(64), .WIDTHA(64),
.WIDTHB(64), .WIDTHB(64),
.WIDTHP(64), .WIDTHP(64),
.SPEED("HIGHEST"), .SPEED("HIGHEST"),
.FORCE_LE("YES"), .FORCE_LE("YES"),
.PIPELINE(mul_pipeline_len) .PIPELINE(mul_pipeline_len)
) multiplier ( ) multiplier (
.clock(clk), .clock(clk),
.aclr(1'b0), .aclr(1'b0),
.clken(1'b1), // TODO this could be disabled on inactive instructions .clken(1'b1), // TODO this could be disabled on inactive instructions
.dataa(mul_data_a), .dataa(mul_data_a),
.datab(mul_data_b), .datab(mul_data_b),
.result(mul_result) .result(mul_result)
); );
// MUL, MULH (signed*signed), MULHSU (signed*unsigned), MULHU (unsigned*unsigned) // MUL, MULH (signed*signed), MULHSU (signed*unsigned), MULHU (unsigned*unsigned)
wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1}; wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1};
wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2}; wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2};
assign mul_data_a = (alu_op == `MULHU) ? {32'b0, ALU_in1} : alu_in1_signed; assign mul_data_a = (alu_op == `MULHU) ? {32'b0, ALU_in1} : alu_in1_signed;
assign mul_data_b = (alu_op == `MULHU || alu_op == `MULHSU) ? {32'b0, ALU_in2} : alu_in2_signed; assign mul_data_b = (alu_op == `MULHU || alu_op == `MULHSU) ? {32'b0, ALU_in2} : alu_in2_signed;
reg [15:0] curr_inst_delay; reg [15:0] curr_inst_delay;
reg [15:0] inst_delay; reg [15:0] inst_delay;
reg inst_was_stalling; reg inst_was_stalling;
wire inst_delay_stall = inst_was_stalling ? inst_delay != 0 : curr_inst_delay != 0; wire inst_delay_stall = inst_was_stalling ? inst_delay != 0 : curr_inst_delay != 0;
assign alu_stall = inst_delay_stall; assign alu_stall = inst_delay_stall;
always @(*) begin always @(*) begin
case(alu_op) case(alu_op)
`DIV, `DIV,
`DIVU, `DIVU,
`REM, `REM,
`REMU: curr_inst_delay = div_pipeline_len; `REMU: curr_inst_delay = div_pipeline_len;
`MUL, `MUL,
`MULH, `MULH,
`MULHSU, `MULHSU,
`MULHU: curr_inst_delay = mul_pipeline_len; `MULHU: curr_inst_delay = mul_pipeline_len;
default: curr_inst_delay = 0; default: curr_inst_delay = 0;
endcase // alu_op endcase // alu_op
end end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
inst_delay <= 0; inst_delay <= 0;
inst_was_stalling <= 0; inst_was_stalling <= 0;
end end
else if (inst_delay_stall) begin else if (inst_delay_stall) begin
if (inst_was_stalling) begin if (inst_was_stalling) begin
if (inst_delay > 0) if (inst_delay > 0)
inst_delay <= inst_delay - 1; inst_delay <= inst_delay - 1;
end end
else begin else begin
inst_was_stalling <= 1; inst_was_stalling <= 1;
inst_delay <= curr_inst_delay - 1; inst_delay <= curr_inst_delay - 1;
end end
end end
else begin else begin
inst_was_stalling <= 0; inst_was_stalling <= 0;
end end
end end
`ifdef SYN_FUNC `ifdef SYN_FUNC
wire which_in2; wire which_in2;
wire[31:0] upper_immed; wire[31:0] upper_immed;
assign which_in2 = src_rs2 == `RS2_IMMED; assign which_in2 = src_rs2 == `RS2_IMMED;
assign ALU_in1 = src_a; assign ALU_in1 = src_a;
assign ALU_in2 = which_in2 ? itype_immed : src_b; assign ALU_in2 = which_in2 ? itype_immed : src_b;
assign upper_immed = {upper_immed, {12{1'b0}}}; assign upper_immed = {upper_immed, {12{1'b0}}};
always @(*) begin always @(*) begin
case(alu_op) case(alu_op)
`ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2); `ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2);
`SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2); `SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2);
`SLLA: alu_result = ALU_in1 << ALU_in2[4:0]; `SLLA: alu_result = ALU_in1 << ALU_in2[4:0];
`SLT: alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0; `SLT: alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
`SLTU: alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0; `SLTU: alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
`XOR: alu_result = ALU_in1 ^ ALU_in2; `XOR: alu_result = ALU_in1 ^ ALU_in2;
`SRL: alu_result = ALU_in1 >> ALU_in2[4:0]; `SRL: alu_result = ALU_in1 >> ALU_in2[4:0];
`SRA: alu_result = $signed(ALU_in1) >>> ALU_in2[4:0]; `SRA: alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
`OR: alu_result = ALU_in1 | ALU_in2; `OR: alu_result = ALU_in1 | ALU_in2;
`AND: alu_result = ALU_in2 & ALU_in1; `AND: alu_result = ALU_in2 & ALU_in1;
`SUBU: alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff; `SUBU: alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
`LUI_ALU: alu_result = upper_immed; `LUI_ALU: alu_result = upper_immed;
`AUIPC_ALU: alu_result = $signed(curr_PC) + $signed(upper_immed); `AUIPC_ALU: alu_result = $signed(curr_PC) + $signed(upper_immed);
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible? // TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
`MUL: alu_result = mul_result[31:0]; `MUL: alu_result = mul_result[31:0];
`MULH: alu_result = mul_result[63:32]; `MULH: alu_result = mul_result[63:32];
`MULHSU: alu_result = mul_result[63:32]; `MULHSU: alu_result = mul_result[63:32];
`MULHU: alu_result = mul_result[63:32]; `MULHU: alu_result = mul_result[63:32];
`DIV: alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result; `DIV: alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
`DIVU: alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result; `DIVU: alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
`REM: alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result; `REM: alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
`REMU: alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result; `REMU: alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
default: alu_result = 32'h0; default: alu_result = 32'h0;
endcase // alu_op endcase // alu_op
end end
`else `else
wire which_in2; wire which_in2;
wire[31:0] upper_immed_s; wire[31:0] upper_immed_s;
assign which_in2 = src_rs2 == `RS2_IMMED; assign which_in2 = src_rs2 == `RS2_IMMED;
assign ALU_in1 = src_a; assign ALU_in1 = src_a;
assign ALU_in2 = which_in2 ? itype_immed : src_b; assign ALU_in2 = which_in2 ? itype_immed : src_b;
assign upper_immed_s = {upper_immed, {12{1'b0}}}; assign upper_immed_s = {upper_immed, {12{1'b0}}};
always @(*) begin always @(*) begin
case(alu_op) case(alu_op)
`ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2); `ADD: alu_result = $signed(ALU_in1) + $signed(ALU_in2);
`SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2); `SUB: alu_result = $signed(ALU_in1) - $signed(ALU_in2);
`SLLA: alu_result = ALU_in1 << ALU_in2[4:0]; `SLLA: alu_result = ALU_in1 << ALU_in2[4:0];
`SLT: alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0; `SLT: alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
`SLTU: alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0; `SLTU: alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
`XOR: alu_result = ALU_in1 ^ ALU_in2; `XOR: alu_result = ALU_in1 ^ ALU_in2;
`SRL: alu_result = ALU_in1 >> ALU_in2[4:0]; `SRL: alu_result = ALU_in1 >> ALU_in2[4:0];
`SRA: alu_result = $signed(ALU_in1) >>> ALU_in2[4:0]; `SRA: alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
`OR: alu_result = ALU_in1 | ALU_in2; `OR: alu_result = ALU_in1 | ALU_in2;
`AND: alu_result = ALU_in2 & ALU_in1; `AND: alu_result = ALU_in2 & ALU_in1;
`SUBU: alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff; `SUBU: alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
`LUI_ALU: alu_result = upper_immed_s; `LUI_ALU: alu_result = upper_immed_s;
`AUIPC_ALU: alu_result = $signed(curr_PC) + $signed(upper_immed_s); `AUIPC_ALU: alu_result = $signed(curr_PC) + $signed(upper_immed_s);
// TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible? // TODO profitable to roll these exceptional cases into inst_delay to avoid pipeline when possible?
`MUL: alu_result = mul_result[31:0]; `MUL: alu_result = mul_result[31:0];
`MULH: alu_result = mul_result[63:32]; `MULH: alu_result = mul_result[63:32];
`MULHSU: alu_result = mul_result[63:32]; `MULHSU: alu_result = mul_result[63:32];
`MULHU: alu_result = mul_result[63:32]; `MULHU: alu_result = mul_result[63:32];
`DIV: alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result; `DIV: alu_result = (ALU_in2 == 0) ? 32'hffffffff : signed_div_result;
`DIVU: alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result; `DIVU: alu_result = (ALU_in2 == 0) ? 32'hffffffff : unsigned_div_result;
`REM: alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result; `REM: alu_result = (ALU_in2 == 0) ? ALU_in1 : signed_rem_result;
`REMU: alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result; `REMU: alu_result = (ALU_in2 == 0) ? ALU_in1 : unsigned_rem_result;
default: alu_result = 32'h0; default: alu_result = 32'h0;
endcase // alu_op endcase // alu_op
end end
`endif `endif

View File

@@ -1,25 +1,25 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_back_end #( module VX_back_end #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire schedule_delay, input wire schedule_delay,
VX_gpu_dcache_rsp_if dcache_rsp_if, VX_gpu_dcache_rsp_if dcache_rsp_if,
VX_gpu_dcache_req_if dcache_req_if, VX_gpu_dcache_req_if dcache_req_if,
output wire mem_delay, output wire mem_delay,
output wire exec_delay, output wire exec_delay,
output wire gpr_stage_delay, output wire gpr_stage_delay,
VX_jal_rsp_if jal_rsp_if, VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if, VX_branch_rsp_if branch_rsp_if,
VX_frE_to_bckE_req_if bckE_req_if, VX_frE_to_bckE_req_if bckE_req_if,
VX_wb_if writeback_if, VX_wb_if writeback_if,
VX_warp_ctl_if warp_ctl_if VX_warp_ctl_if warp_ctl_if
); );
VX_wb_if writeback_temp_if(); VX_wb_if writeback_temp_if();
@@ -33,7 +33,7 @@ assign writeback_if.wb_pc = writeback_temp_if.wb_pc;
// assign VX_writeback_if(writeback_temp_if); // assign VX_writeback_if(writeback_temp_if);
wire no_slot_mem; wire no_slot_mem;
wire no_slot_exec; wire no_slot_exec;
// LSU input + output // LSU input + output
VX_lsu_req_if lsu_req_if(); VX_lsu_req_if lsu_req_if();
@@ -47,79 +47,79 @@ VX_inst_exec_wb_if inst_exec_wb_if();
VX_gpu_inst_req_if gpu_inst_req_if(); VX_gpu_inst_req_if gpu_inst_req_if();
// CSR unit inputs // CSR unit inputs
VX_csr_req_if csr_req_if(); VX_csr_req_if csr_req_if();
VX_csr_wb_if csr_wb_if(); VX_csr_wb_if csr_wb_if();
wire no_slot_csr; wire no_slot_csr;
wire stall_gpr_csr; wire stall_gpr_csr;
VX_gpr_stage gpr_stage ( VX_gpr_stage gpr_stage (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.schedule_delay (schedule_delay), .schedule_delay (schedule_delay),
.writeback_if (writeback_temp_if), .writeback_if (writeback_temp_if),
.bckE_req_if (bckE_req_if), .bckE_req_if (bckE_req_if),
// New // New
.exec_unit_req_if (exec_unit_req_if), .exec_unit_req_if (exec_unit_req_if),
.lsu_req_if (lsu_req_if), .lsu_req_if (lsu_req_if),
.gpu_inst_req_if (gpu_inst_req_if), .gpu_inst_req_if (gpu_inst_req_if),
.csr_req_if (csr_req_if), .csr_req_if (csr_req_if),
.stall_gpr_csr (stall_gpr_csr), .stall_gpr_csr (stall_gpr_csr),
// End new // End new
.memory_delay (mem_delay), .memory_delay (mem_delay),
.exec_delay (exec_delay), .exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay) .gpr_stage_delay (gpr_stage_delay)
); );
VX_lsu load_store_unit ( VX_lsu load_store_unit (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.lsu_req_if (lsu_req_if), .lsu_req_if (lsu_req_if),
.mem_wb_if (mem_wb_if), .mem_wb_if (mem_wb_if),
.dcache_rsp_if (dcache_rsp_if), .dcache_rsp_if (dcache_rsp_if),
.dcache_req_if (dcache_req_if), .dcache_req_if (dcache_req_if),
.delay (mem_delay), .delay (mem_delay),
.no_slot_mem (no_slot_mem) .no_slot_mem (no_slot_mem)
); );
VX_exec_unit exec_unit ( VX_exec_unit exec_unit (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.exec_unit_req_if(exec_unit_req_if), .exec_unit_req_if(exec_unit_req_if),
.inst_exec_wb_if (inst_exec_wb_if), .inst_exec_wb_if (inst_exec_wb_if),
.jal_rsp_if (jal_rsp_if), .jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if), .branch_rsp_if (branch_rsp_if),
.delay (exec_delay), .delay (exec_delay),
.no_slot_exec (no_slot_exec) .no_slot_exec (no_slot_exec)
); );
VX_gpgpu_inst gpgpu_inst ( VX_gpgpu_inst gpgpu_inst (
.gpu_inst_req_if(gpu_inst_req_if), .gpu_inst_req_if(gpu_inst_req_if),
.warp_ctl_if (warp_ctl_if) .warp_ctl_if (warp_ctl_if)
); );
VX_csr_pipe #( VX_csr_pipe #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) csr_pipe ( ) csr_pipe (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.no_slot_csr (no_slot_csr), .no_slot_csr (no_slot_csr),
.csr_req_if (csr_req_if), .csr_req_if (csr_req_if),
.writeback_if(writeback_temp_if), .writeback_if(writeback_temp_if),
.csr_wb_if (csr_wb_if), .csr_wb_if (csr_wb_if),
.stall_gpr_csr(stall_gpr_csr) .stall_gpr_csr(stall_gpr_csr)
); );
VX_writeback wb ( VX_writeback wb (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.mem_wb_if (mem_wb_if), .mem_wb_if (mem_wb_if),
.inst_exec_wb_if (inst_exec_wb_if), .inst_exec_wb_if (inst_exec_wb_if),
.csr_wb_if (csr_wb_if), .csr_wb_if (csr_wb_if),
.writeback_if (writeback_temp_if), .writeback_if (writeback_temp_if),
.no_slot_mem (no_slot_mem), .no_slot_mem (no_slot_mem),
.no_slot_exec (no_slot_exec), .no_slot_exec (no_slot_exec),
.no_slot_csr (no_slot_csr) .no_slot_csr (no_slot_csr)
); );
endmodule endmodule

View File

@@ -1,84 +1,84 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_csr_data ( module VX_csr_data (
input wire clk, // Clock input wire clk, // Clock
input wire reset, input wire reset,
input wire[`CSR_ADDR_SIZE-1:0] read_csr_address, input wire[`CSR_ADDR_SIZE-1:0] read_csr_address,
input wire write_valid, input wire write_valid,
input wire[`CSR_WIDTH-1:0] write_csr_data, input wire[`CSR_WIDTH-1:0] write_csr_data,
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
// We use a smaller storage for CSRs than the standard 4KB in RISC-V // We use a smaller storage for CSRs than the standard 4KB in RISC-V
input wire[`CSR_ADDR_SIZE-1:0] write_csr_address, input wire[`CSR_ADDR_SIZE-1:0] write_csr_address,
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
output wire[31:0] read_csr_data, output wire[31:0] read_csr_data,
// For instruction retire counting // For instruction retire counting
input wire writeback_valid input wire writeback_valid
); );
// wire[`NUM_THREADS-1:0][31:0] thread_ids; // wire[`NUM_THREADS-1:0][31:0] thread_ids;
// wire[`NUM_THREADS-1:0][31:0] warp_ids; // wire[`NUM_THREADS-1:0][31:0] warp_ids;
// genvar cur_t; // genvar cur_t;
// for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin // for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
// assign thread_ids[cur_t] = cur_t; // assign thread_ids[cur_t] = cur_t;
// end // end
// genvar cur_tw; // genvar cur_tw;
// for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin // for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
// assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, in_read_warp_num}; // assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, in_read_warp_num};
// end // end
reg [`CSR_WIDTH-1:0] csr[`NUM_CSRS-1:0]; reg [`CSR_WIDTH-1:0] csr[`NUM_CSRS-1:0];
reg [63:0] cycle; reg [63:0] cycle;
reg [63:0] instret; reg [63:0] instret;
wire read_cycle; wire read_cycle;
wire read_cycleh; wire read_cycleh;
wire read_instret; wire read_instret;
wire read_instreth; wire read_instreth;
assign read_cycle = read_csr_address == `CSR_CYCL_L; assign read_cycle = read_csr_address == `CSR_CYCL_L;
assign read_cycleh = read_csr_address == `CSR_CYCL_H; assign read_cycleh = read_csr_address == `CSR_CYCL_H;
assign read_instret = read_csr_address == `CSR_INST_L; assign read_instret = read_csr_address == `CSR_INST_L;
assign read_instreth = read_csr_address == `CSR_INST_H; assign read_instreth = read_csr_address == `CSR_INST_H;
wire [$clog2(`NUM_CSRS)-1:0] read_addr, write_addr; wire [$clog2(`NUM_CSRS)-1:0] read_addr, write_addr;
// cast address to physical CSR range // cast address to physical CSR range
assign read_addr = $size(read_addr)'(read_csr_address); assign read_addr = $size(read_addr)'(read_csr_address);
assign write_addr = $size(write_addr)'(write_csr_address); assign write_addr = $size(write_addr)'(write_csr_address);
// wire thread_select = read_csr_address == 12'h20; // wire thread_select = read_csr_address == 12'h20;
// wire warp_select = read_csr_address == 12'h21; // wire warp_select = read_csr_address == 12'h21;
// assign read_csr_data = thread_select ? thread_ids : // assign read_csr_data = thread_select ? thread_ids :
// warp_select ? warp_ids : // warp_select ? warp_ids :
// 0; // 0;
genvar curr_e; genvar curr_e;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
cycle <= 0; cycle <= 0;
instret <= 0; instret <= 0;
end else begin end else begin
cycle <= cycle + 1; cycle <= cycle + 1;
if (write_valid) begin if (write_valid) begin
csr[write_addr] <= write_csr_data; csr[write_addr] <= write_csr_data;
end end
if (writeback_valid) begin if (writeback_valid) begin
instret <= instret + 1; instret <= instret + 1;
end end
end end
end end
assign read_csr_data = read_cycle ? cycle[31:0] : assign read_csr_data = read_cycle ? cycle[31:0] :
read_cycleh ? cycle[63:32] : read_cycleh ? cycle[63:32] :
read_instret ? instret[31:0] : read_instret ? instret[31:0] :
read_instreth ? instret[63:32] : read_instreth ? instret[63:32] :
{{20{1'b0}}, csr[read_addr]}; {{20{1'b0}}, csr[read_addr]};
endmodule : VX_csr_data endmodule : VX_csr_data

View File

@@ -1,106 +1,106 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_csr_pipe #( module VX_csr_pipe #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire no_slot_csr, input wire no_slot_csr,
VX_csr_req_if csr_req_if, VX_csr_req_if csr_req_if,
VX_wb_if writeback_if, VX_wb_if writeback_if,
VX_csr_wb_if csr_wb_if, VX_csr_wb_if csr_wb_if,
output wire stall_gpr_csr output wire stall_gpr_csr
); );
wire[`NUM_THREADS-1:0] valid_s2; wire[`NUM_THREADS-1:0] valid_s2;
wire[`NW_BITS-1:0] warp_num_s2; wire[`NW_BITS-1:0] warp_num_s2;
wire[4:0] rd_s2; wire[4:0] rd_s2;
wire[1:0] wb_s2; wire[1:0] wb_s2;
wire is_csr_s2; wire is_csr_s2;
wire[`CSR_ADDR_SIZE-1:0] csr_address_s2; wire[`CSR_ADDR_SIZE-1:0] csr_address_s2;
wire[31:0] csr_read_data_s2; wire[31:0] csr_read_data_s2;
wire[31:0] csr_updated_data_s2; wire[31:0] csr_updated_data_s2;
wire[31:0] csr_read_data_unqual; wire[31:0] csr_read_data_unqual;
wire[31:0] csr_read_data; wire[31:0] csr_read_data;
assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && |(csr_req_if.valid); assign stall_gpr_csr = no_slot_csr && csr_req_if.is_csr && |(csr_req_if.valid);
assign csr_read_data = (csr_address_s2 == csr_req_if.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual; assign csr_read_data = (csr_address_s2 == csr_req_if.csr_address) ? csr_updated_data_s2 : csr_read_data_unqual;
wire writeback = |writeback_if.wb_valid; wire writeback = |writeback_if.wb_valid;
VX_csr_data csr_data( VX_csr_data csr_data(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.read_csr_address (csr_req_if.csr_address), .read_csr_address (csr_req_if.csr_address),
.write_valid (is_csr_s2), .write_valid (is_csr_s2),
.write_csr_data (csr_updated_data_s2[`CSR_WIDTH-1:0]), .write_csr_data (csr_updated_data_s2[`CSR_WIDTH-1:0]),
.write_csr_address (csr_address_s2), .write_csr_address (csr_address_s2),
.read_csr_data (csr_read_data_unqual), .read_csr_data (csr_read_data_unqual),
.writeback_valid (writeback) .writeback_valid (writeback)
); );
reg [31:0] csr_updated_data; reg [31:0] csr_updated_data;
always @(*) begin always @(*) begin
case (csr_req_if.alu_op) case (csr_req_if.alu_op)
`CSR_ALU_RW: csr_updated_data = csr_req_if.csr_mask; `CSR_ALU_RW: csr_updated_data = csr_req_if.csr_mask;
`CSR_ALU_RS: csr_updated_data = csr_read_data | csr_req_if.csr_mask; `CSR_ALU_RS: csr_updated_data = csr_read_data | csr_req_if.csr_mask;
`CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - csr_req_if.csr_mask); `CSR_ALU_RC: csr_updated_data = csr_read_data & (32'hFFFFFFFF - csr_req_if.csr_mask);
default: csr_updated_data = 32'hdeadbeef; default: csr_updated_data = 32'hdeadbeef;
endcase endcase
end end
wire zero = 0; wire zero = 0;
VX_generic_register #( VX_generic_register #(
.N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS) .N(32 + 32 + 12 + 1 + 2 + 5 + (`NW_BITS-1+1) + `NUM_THREADS)
) csr_reg_s2 ( ) csr_reg_s2 (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(no_slot_csr), .stall(no_slot_csr),
.flush(zero), .flush(zero),
.in ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_csr, csr_req_if.csr_address, csr_read_data , csr_updated_data }), .in ({csr_req_if.valid, csr_req_if.warp_num, csr_req_if.rd, csr_req_if.wb, csr_req_if.is_csr, csr_req_if.csr_address, csr_read_data , csr_updated_data }),
.out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2}) .out ({valid_s2 , warp_num_s2 , rd_s2 , wb_s2 , is_csr_s2 , csr_address_s2 , csr_read_data_s2, csr_updated_data_s2})
); );
wire [`NUM_THREADS-1:0][31:0] final_csr_data; wire [`NUM_THREADS-1:0][31:0] final_csr_data;
wire [`NUM_THREADS-1:0][31:0] thread_ids; wire [`NUM_THREADS-1:0][31:0] thread_ids;
wire [`NUM_THREADS-1:0][31:0] warp_ids; wire [`NUM_THREADS-1:0][31:0] warp_ids;
wire [`NUM_THREADS-1:0][31:0] warp_idz; wire [`NUM_THREADS-1:0][31:0] warp_idz;
wire [`NUM_THREADS-1:0][31:0] csr_vec_read_data_s2; wire [`NUM_THREADS-1:0][31:0] csr_vec_read_data_s2;
genvar cur_t; genvar cur_t;
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin
assign thread_ids[cur_t] = cur_t; assign thread_ids[cur_t] = cur_t;
end end
genvar cur_tw; genvar cur_tw;
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin
assign warp_ids[cur_tw] = 32'(warp_num_s2); assign warp_ids[cur_tw] = 32'(warp_num_s2);
assign warp_idz[cur_tw] = 32'(warp_num_s2) + (CORE_ID * `NUM_WARPS); assign warp_idz[cur_tw] = 32'(warp_num_s2) + (CORE_ID * `NUM_WARPS);
end end
genvar cur_v; genvar cur_v;
for (cur_v = 0; cur_v < `NUM_THREADS; cur_v = cur_v + 1) begin for (cur_v = 0; cur_v < `NUM_THREADS; cur_v = cur_v + 1) begin
assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2; assign csr_vec_read_data_s2[cur_v] = csr_read_data_s2;
end end
wire thread_select = csr_address_s2 == 12'h20; wire thread_select = csr_address_s2 == 12'h20;
wire warp_select = csr_address_s2 == 12'h21; wire warp_select = csr_address_s2 == 12'h21;
wire warp_id_select = csr_address_s2 == 12'h22; wire warp_id_select = csr_address_s2 == 12'h22;
assign final_csr_data = thread_select ? thread_ids : assign final_csr_data = thread_select ? thread_ids :
warp_select ? warp_ids : warp_select ? warp_ids :
warp_id_select ? warp_idz : warp_id_select ? warp_idz :
csr_vec_read_data_s2; csr_vec_read_data_s2;
assign csr_wb_if.valid = valid_s2; assign csr_wb_if.valid = valid_s2;
assign csr_wb_if.warp_num = warp_num_s2; assign csr_wb_if.warp_num = warp_num_s2;
assign csr_wb_if.rd = rd_s2; assign csr_wb_if.rd = rd_s2;
assign csr_wb_if.wb = wb_s2; assign csr_wb_if.wb = wb_s2;
assign csr_wb_if.csr_result = final_csr_data; assign csr_wb_if.csr_result = final_csr_data;
endmodule endmodule

View File

@@ -2,37 +2,36 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_csr_wrapper ( module VX_csr_wrapper (
VX_csr_req_if csr_req_if, VX_csr_req_if csr_req_if,
VX_csr_wb_if csr_wb_if VX_csr_wb_if csr_wb_if
); );
wire[`NUM_THREADS-1:0][31:0] thread_ids;
wire[`NUM_THREADS-1:0][31:0] warp_ids;
wire[`NUM_THREADS-1:0][31:0] thread_ids; genvar cur_t, cur_tw;
wire[`NUM_THREADS-1:0][31:0] warp_ids; generate
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin : thread_ids_init
assign thread_ids[cur_t] = cur_t;
end
genvar cur_t, cur_tw; for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init
generate assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
for (cur_t = 0; cur_t < `NUM_THREADS; cur_t = cur_t + 1) begin : thread_ids_init end
assign thread_ids[cur_t] = cur_t; endgenerate
end
for (cur_tw = 0; cur_tw < `NUM_THREADS; cur_tw = cur_tw + 1) begin : warp_ids_init
assign warp_ids[cur_tw] = {{(31-`NW_BITS-1){1'b0}}, csr_req_if.warp_num};
end
endgenerate
assign csr_wb_if.valid = csr_req_if.valid; assign csr_wb_if.valid = csr_req_if.valid;
assign csr_wb_if.warp_num = csr_req_if.warp_num; assign csr_wb_if.warp_num = csr_req_if.warp_num;
assign csr_wb_if.rd = csr_req_if.rd; assign csr_wb_if.rd = csr_req_if.rd;
assign csr_wb_if.wb = csr_req_if.wb; assign csr_wb_if.wb = csr_req_if.wb;
wire thread_select = csr_req_if.csr_address == 12'h20; wire thread_select = csr_req_if.csr_address == 12'h20;
wire warp_select = csr_req_if.csr_address == 12'h21; wire warp_select = csr_req_if.csr_address == 12'h21;
assign csr_wb_if.csr_result = thread_select ? thread_ids : assign csr_wb_if.csr_result = thread_select ? thread_ids :
warp_select ? warp_ids : warp_select ? warp_ids :
0; 0;
endmodule endmodule

View File

@@ -2,328 +2,328 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_decode( module VX_decode(
// Fetch Inputs // Fetch Inputs
VX_inst_meta_if fd_inst_meta_de, VX_inst_meta_if fd_inst_meta_de,
// Outputs // Outputs
VX_frE_to_bckE_req_if frE_to_bckE_req_if, VX_frE_to_bckE_req_if frE_to_bckE_req_if,
VX_wstall_if wstall_if, VX_wstall_if wstall_if,
VX_join_if join_if, VX_join_if join_if,
output wire terminate_sim output wire terminate_sim
); );
wire[31:0] in_instruction = fd_inst_meta_de.instruction; wire[31:0] in_instruction = fd_inst_meta_de.instruction;
wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc; wire[31:0] in_curr_PC = fd_inst_meta_de.inst_pc;
wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num; wire[`NW_BITS-1:0] in_warp_num = fd_inst_meta_de.warp_num;
assign frE_to_bckE_req_if.curr_PC = in_curr_PC; assign frE_to_bckE_req_if.curr_PC = in_curr_PC;
wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid; wire[`NUM_THREADS-1:0] in_valid = fd_inst_meta_de.valid;
wire[6:0] curr_opcode; wire[6:0] curr_opcode;
wire is_itype; wire is_itype;
wire is_rtype; wire is_rtype;
wire is_stype; wire is_stype;
wire is_btype; wire is_btype;
wire is_linst; wire is_linst;
wire is_jal; wire is_jal;
wire is_jalr; wire is_jalr;
wire is_lui; wire is_lui;
wire is_auipc; wire is_auipc;
wire is_csr; wire is_csr;
wire is_csr_immed; wire is_csr_immed;
wire is_e_inst; wire is_e_inst;
wire is_gpgpu; wire is_gpgpu;
wire is_wspawn; wire is_wspawn;
wire is_tmc; wire is_tmc;
wire is_split; wire is_split;
wire is_join; wire is_join;
wire is_barrier; wire is_barrier;
wire[2:0] func3; wire[2:0] func3;
wire[6:0] func7; wire[6:0] func7;
wire[11:0] u_12; wire[11:0] u_12;
wire[7:0] jal_b_19_to_12; wire[7:0] jal_b_19_to_12;
wire jal_b_11; wire jal_b_11;
wire[9:0] jal_b_10_to_1; wire[9:0] jal_b_10_to_1;
wire jal_b_20; wire jal_b_20;
wire jal_b_0; wire jal_b_0;
wire[20:0] jal_unsigned_offset; wire[20:0] jal_unsigned_offset;
wire[31:0] jal_1_offset; wire[31:0] jal_1_offset;
wire[11:0] jalr_immed; wire[11:0] jalr_immed;
wire[31:0] jal_2_offset; wire[31:0] jal_2_offset;
wire jal_sys_cond1; wire jal_sys_cond1;
wire jal_sys_cond2; wire jal_sys_cond2;
wire jal_sys_jal; wire jal_sys_jal;
wire[31:0] jal_sys_off; wire[31:0] jal_sys_off;
wire csr_cond1; wire csr_cond1;
wire csr_cond2; wire csr_cond2;
wire[11:0] alu_tempp; wire[11:0] alu_tempp;
wire alu_shift_i; wire alu_shift_i;
wire[11:0] alu_shift_i_immed; wire[11:0] alu_shift_i_immed;
wire[1:0] csr_type; wire[1:0] csr_type;
reg[4:0] csr_alu; reg[4:0] csr_alu;
reg[4:0] alu_op; reg[4:0] alu_op;
reg[4:0] mul_alu; reg[4:0] mul_alu;
reg[19:0] temp_upper_immed; reg[19:0] temp_upper_immed;
reg temp_jal; reg temp_jal;
reg[31:0] temp_jal_offset; reg[31:0] temp_jal_offset;
reg[31:0] temp_itype_immed; reg[31:0] temp_itype_immed;
reg[2:0] temp_branch_type; reg[2:0] temp_branch_type;
reg temp_branch_stall; reg temp_branch_stall;
assign frE_to_bckE_req_if.valid = fd_inst_meta_de.valid; assign frE_to_bckE_req_if.valid = fd_inst_meta_de.valid;
assign frE_to_bckE_req_if.warp_num = in_warp_num; assign frE_to_bckE_req_if.warp_num = in_warp_num;
assign curr_opcode = in_instruction[6:0]; assign curr_opcode = in_instruction[6:0];
assign frE_to_bckE_req_if.rd = in_instruction[11:7]; assign frE_to_bckE_req_if.rd = in_instruction[11:7];
assign frE_to_bckE_req_if.rs1 = in_instruction[19:15]; assign frE_to_bckE_req_if.rs1 = in_instruction[19:15];
assign frE_to_bckE_req_if.rs2 = in_instruction[24:20]; assign frE_to_bckE_req_if.rs2 = in_instruction[24:20];
assign func3 = in_instruction[14:12]; assign func3 = in_instruction[14:12];
assign func7 = in_instruction[31:25]; assign func7 = in_instruction[31:25];
assign u_12 = in_instruction[31:20]; assign u_12 = in_instruction[31:20];
assign frE_to_bckE_req_if.PC_next = in_curr_PC + 32'h4; assign frE_to_bckE_req_if.PC_next = in_curr_PC + 32'h4;
// Write Back sigal // Write Back sigal
assign is_rtype = (curr_opcode == `R_INST); assign is_rtype = (curr_opcode == `R_INST);
assign is_linst = (curr_opcode == `L_INST); assign is_linst = (curr_opcode == `L_INST);
assign is_itype = (curr_opcode == `ALU_INST) || is_linst; assign is_itype = (curr_opcode == `ALU_INST) || is_linst;
assign is_stype = (curr_opcode == `S_INST); assign is_stype = (curr_opcode == `S_INST);
assign is_btype = (curr_opcode == `B_INST); assign is_btype = (curr_opcode == `B_INST);
assign is_jal = (curr_opcode == `JAL_INST); assign is_jal = (curr_opcode == `JAL_INST);
assign is_jalr = (curr_opcode == `JALR_INST); assign is_jalr = (curr_opcode == `JALR_INST);
assign is_lui = (curr_opcode == `LUI_INST); assign is_lui = (curr_opcode == `LUI_INST);
assign is_auipc = (curr_opcode == `AUIPC_INST); assign is_auipc = (curr_opcode == `AUIPC_INST);
assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0); assign is_csr = (curr_opcode == `SYS_INST) && (func3 != 0);
assign is_csr_immed = (is_csr) && (func3[2] == 1); assign is_csr_immed = (is_csr) && (func3[2] == 1);
// assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0); // assign is_e_inst = (curr_opcode == `SYS_INST) && (func3 == 0);
assign is_e_inst = in_instruction == 32'h00000073; assign is_e_inst = in_instruction == 32'h00000073;
assign is_gpgpu = (curr_opcode == `GPGPU_INST); assign is_gpgpu = (curr_opcode == `GPGPU_INST);
assign is_tmc = is_gpgpu && (func3 == 0); // Goes to BE assign is_tmc = is_gpgpu && (func3 == 0); // Goes to BE
assign is_wspawn = is_gpgpu && (func3 == 1); // Goes to BE assign is_wspawn = is_gpgpu && (func3 == 1); // Goes to BE
assign is_barrier = is_gpgpu && (func3 == 4); // Goes to BE assign is_barrier = is_gpgpu && (func3 == 4); // Goes to BE
assign is_split = is_gpgpu && (func3 == 2); // Goes to BE assign is_split = is_gpgpu && (func3 == 2); // Goes to BE
assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE assign is_join = is_gpgpu && (func3 == 3); // Doesn't go to BE
assign join_if.is_join = is_join; assign join_if.is_join = is_join;
assign join_if.join_warp_num = in_warp_num; assign join_if.join_warp_num = in_warp_num;
assign frE_to_bckE_req_if.is_wspawn = is_wspawn; assign frE_to_bckE_req_if.is_wspawn = is_wspawn;
assign frE_to_bckE_req_if.is_tmc = is_tmc; assign frE_to_bckE_req_if.is_tmc = is_tmc;
assign frE_to_bckE_req_if.is_split = is_split; assign frE_to_bckE_req_if.is_split = is_split;
assign frE_to_bckE_req_if.is_barrier = is_barrier; assign frE_to_bckE_req_if.is_barrier = is_barrier;
assign frE_to_bckE_req_if.csr_immed = is_csr_immed; assign frE_to_bckE_req_if.csr_immed = is_csr_immed;
assign frE_to_bckE_req_if.is_csr = is_csr; assign frE_to_bckE_req_if.is_csr = is_csr;
assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL : assign frE_to_bckE_req_if.wb = (is_jal || is_jalr || is_e_inst) ? `WB_JAL :
is_linst ? `WB_MEM : is_linst ? `WB_MEM :
(is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU : (is_itype || is_rtype || is_lui || is_auipc || is_csr) ? `WB_ALU :
`NO_WB; `NO_WB;
assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG; assign frE_to_bckE_req_if.rs2_src = (is_itype || is_stype) ? `RS2_IMMED : `RS2_REG;
// MEM signals // MEM signals
assign frE_to_bckE_req_if.mem_read = (is_linst) ? func3 : `NO_MEM_READ; assign frE_to_bckE_req_if.mem_read = (is_linst) ? func3 : `NO_MEM_READ;
assign frE_to_bckE_req_if.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE; assign frE_to_bckE_req_if.mem_write = (is_stype) ? func3 : `NO_MEM_WRITE;
// UPPER IMMEDIATE // UPPER IMMEDIATE
always @(*) begin always @(*) begin
case(curr_opcode) case(curr_opcode)
`LUI_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3}; `LUI_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
`AUIPC_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3}; `AUIPC_INST: temp_upper_immed = {func7, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.rs1, func3};
default: temp_upper_immed = 20'h0; default: temp_upper_immed = 20'h0;
endcase // curr_opcode endcase // curr_opcode
end end
assign frE_to_bckE_req_if.upper_immed = temp_upper_immed; assign frE_to_bckE_req_if.upper_immed = temp_upper_immed;
assign jal_b_19_to_12 = in_instruction[19:12]; assign jal_b_19_to_12 = in_instruction[19:12];
assign jal_b_11 = in_instruction[20]; assign jal_b_11 = in_instruction[20];
assign jal_b_10_to_1 = in_instruction[30:21]; assign jal_b_10_to_1 = in_instruction[30:21];
assign jal_b_20 = in_instruction[31]; assign jal_b_20 = in_instruction[31];
assign jal_b_0 = 1'b0; assign jal_b_0 = 1'b0;
assign jal_unsigned_offset = {jal_b_20, jal_b_19_to_12, jal_b_11, jal_b_10_to_1, jal_b_0}; assign jal_unsigned_offset = {jal_b_20, jal_b_19_to_12, jal_b_11, jal_b_10_to_1, jal_b_0};
assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset}; assign jal_1_offset = {{11{jal_b_20}}, jal_unsigned_offset};
assign jalr_immed = {func7, frE_to_bckE_req_if.rs2}; assign jalr_immed = {func7, frE_to_bckE_req_if.rs2};
assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed}; assign jal_2_offset = {{20{jalr_immed[11]}}, jalr_immed};
assign jal_sys_cond1 = func3 == 3'h0; assign jal_sys_cond1 = func3 == 3'h0;
assign jal_sys_cond2 = u_12 < 12'h2; assign jal_sys_cond2 = u_12 < 12'h2;
assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0; assign jal_sys_jal = (jal_sys_cond1 && jal_sys_cond2) ? 1'b1 : 1'b0;
assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef; assign jal_sys_off = (jal_sys_cond1 && jal_sys_cond2) ? 32'hb0000000 : 32'hdeadbeef;
// JAL // JAL
always @(*) begin always @(*) begin
case(curr_opcode) case(curr_opcode)
`JAL_INST: `JAL_INST:
begin begin
temp_jal = 1'b1 && (|in_valid); temp_jal = 1'b1 && (|in_valid);
temp_jal_offset = jal_1_offset; temp_jal_offset = jal_1_offset;
end end
`JALR_INST: `JALR_INST:
begin begin
temp_jal = 1'b1 && (|in_valid); temp_jal = 1'b1 && (|in_valid);
temp_jal_offset = jal_2_offset; temp_jal_offset = jal_2_offset;
end end
`SYS_INST: `SYS_INST:
begin begin
// $display("SYS EBREAK %h", (jal_sys_jal && (|in_valid)) ); // $display("SYS EBREAK %h", (jal_sys_jal && (|in_valid)) );
temp_jal = jal_sys_jal && (|in_valid); temp_jal = jal_sys_jal && (|in_valid);
temp_jal_offset = jal_sys_off; temp_jal_offset = jal_sys_off;
end end
default: default:
begin begin
temp_jal = 1'b0 && (|in_valid); temp_jal = 1'b0 && (|in_valid);
temp_jal_offset = 32'hdeadbeef; temp_jal_offset = 32'hdeadbeef;
end end
endcase endcase
end end
assign frE_to_bckE_req_if.jalQual = is_jal; assign frE_to_bckE_req_if.jalQual = is_jal;
assign frE_to_bckE_req_if.jal = temp_jal; assign frE_to_bckE_req_if.jal = temp_jal;
assign frE_to_bckE_req_if.jal_offset = temp_jal_offset; assign frE_to_bckE_req_if.jal_offset = temp_jal_offset;
// wire is_ebreak; // wire is_ebreak;
// assign is_ebreak = is_e_inst; // assign is_ebreak = is_e_inst;
wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid)); wire ebreak = (curr_opcode == `SYS_INST) && (jal_sys_jal && (|in_valid));
assign frE_to_bckE_req_if.ebreak = ebreak; assign frE_to_bckE_req_if.ebreak = ebreak;
assign terminate_sim = is_e_inst; assign terminate_sim = is_e_inst;
// CSR // CSR
assign csr_cond1 = func3 != 3'h0; assign csr_cond1 = func3 != 3'h0;
assign csr_cond2 = u_12 >= 12'h2; assign csr_cond2 = u_12 >= 12'h2;
assign frE_to_bckE_req_if.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55; assign frE_to_bckE_req_if.csr_address = (csr_cond1 && csr_cond2) ? u_12 : 12'h55;
// ITYPE IMEED // ITYPE IMEED
assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5); assign alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
assign alu_shift_i_immed = {{7{1'b0}}, frE_to_bckE_req_if.rs2}; assign alu_shift_i_immed = {{7{1'b0}}, frE_to_bckE_req_if.rs2};
assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12; assign alu_tempp = alu_shift_i ? alu_shift_i_immed : u_12;
always @(*) begin always @(*) begin
case(curr_opcode) case(curr_opcode)
`ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp}; `ALU_INST: temp_itype_immed = {{20{alu_tempp[11]}}, alu_tempp};
`S_INST: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd}; `S_INST: temp_itype_immed = {{20{func7[6]}}, func7, frE_to_bckE_req_if.rd};
`L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12}; `L_INST: temp_itype_immed = {{20{u_12[11]}}, u_12};
`B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]}; `B_INST: temp_itype_immed = {{20{in_instruction[31]}}, in_instruction[31], in_instruction[7], in_instruction[30:25], in_instruction[11:8]};
default: temp_itype_immed = 32'hdeadbeef; default: temp_itype_immed = 32'hdeadbeef;
endcase endcase
end end
assign frE_to_bckE_req_if.itype_immed = temp_itype_immed; assign frE_to_bckE_req_if.itype_immed = temp_itype_immed;
always @(*) begin always @(*) begin
case(curr_opcode) case(curr_opcode)
`B_INST: `B_INST:
begin begin
// $display("BRANCH IN DECODE"); // $display("BRANCH IN DECODE");
temp_branch_stall = 1'b1 && (|in_valid); temp_branch_stall = 1'b1 && (|in_valid);
case(func3) case(func3)
3'h0: temp_branch_type = `BEQ; 3'h0: temp_branch_type = `BEQ;
3'h1: temp_branch_type = `BNE; 3'h1: temp_branch_type = `BNE;
3'h4: temp_branch_type = `BLT; 3'h4: temp_branch_type = `BLT;
3'h5: temp_branch_type = `BGT; 3'h5: temp_branch_type = `BGT;
3'h6: temp_branch_type = `BLTU; 3'h6: temp_branch_type = `BLTU;
3'h7: temp_branch_type = `BGTU; 3'h7: temp_branch_type = `BGTU;
default: temp_branch_type = `NO_BRANCH; default: temp_branch_type = `NO_BRANCH;
endcase endcase
end end
`JAL_INST: `JAL_INST:
begin begin
temp_branch_type = `NO_BRANCH; temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b1 && (|in_valid); temp_branch_stall = 1'b1 && (|in_valid);
end end
`JALR_INST: `JALR_INST:
begin begin
temp_branch_type = `NO_BRANCH; temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b1 && (|in_valid); temp_branch_stall = 1'b1 && (|in_valid);
end end
default: default:
begin begin
temp_branch_type = `NO_BRANCH; temp_branch_type = `NO_BRANCH;
temp_branch_stall = 1'b0 && (|in_valid); temp_branch_stall = 1'b0 && (|in_valid);
end end
endcase endcase
end end
assign frE_to_bckE_req_if.branch_type = temp_branch_type; assign frE_to_bckE_req_if.branch_type = temp_branch_type;
assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid); assign wstall_if.wstall = (temp_branch_stall || is_tmc || is_split || is_barrier) && (|in_valid);
assign wstall_if.warp_num = in_warp_num; assign wstall_if.warp_num = in_warp_num;
always @(*) begin always @(*) begin
// ALU OP // ALU OP
case(func3) case(func3)
3'h0: alu_op = (curr_opcode == `ALU_INST) ? `ADD : (func7 == 7'h0 ? `ADD : `SUB); 3'h0: alu_op = (curr_opcode == `ALU_INST) ? `ADD : (func7 == 7'h0 ? `ADD : `SUB);
3'h1: alu_op = `SLLA; 3'h1: alu_op = `SLLA;
3'h2: alu_op = `SLT; 3'h2: alu_op = `SLT;
3'h3: alu_op = `SLTU; 3'h3: alu_op = `SLTU;
3'h4: alu_op = `XOR; 3'h4: alu_op = `XOR;
3'h5: alu_op = (func7 == 7'h0) ? `SRL : `SRA; 3'h5: alu_op = (func7 == 7'h0) ? `SRL : `SRA;
3'h6: alu_op = `OR; 3'h6: alu_op = `OR;
3'h7: alu_op = `AND; 3'h7: alu_op = `AND;
default: alu_op = `NO_ALU; default: alu_op = `NO_ALU;
endcase endcase
end end
always @(*) begin always @(*) begin
// ALU OP // ALU OP
case(func3) case(func3)
3'h0: mul_alu = `MUL; 3'h0: mul_alu = `MUL;
3'h1: mul_alu = `MULH; 3'h1: mul_alu = `MULH;
3'h2: mul_alu = `MULHSU; 3'h2: mul_alu = `MULHSU;
3'h3: mul_alu = `MULHU; 3'h3: mul_alu = `MULHU;
3'h4: mul_alu = `DIV; 3'h4: mul_alu = `DIV;
3'h5: mul_alu = `DIVU; 3'h5: mul_alu = `DIVU;
3'h6: mul_alu = `REM; 3'h6: mul_alu = `REM;
3'h7: mul_alu = `REMU; 3'h7: mul_alu = `REMU;
default: mul_alu = `NO_ALU; default: mul_alu = `NO_ALU;
endcase endcase
end end
assign csr_type = func3[1:0]; assign csr_type = func3[1:0];
always @(*) begin always @(*) begin
case(csr_type) case(csr_type)
2'h1: csr_alu = `CSR_ALU_RW; 2'h1: csr_alu = `CSR_ALU_RW;
2'h2: csr_alu = `CSR_ALU_RS; 2'h2: csr_alu = `CSR_ALU_RS;
2'h3: csr_alu = `CSR_ALU_RC; 2'h3: csr_alu = `CSR_ALU_RC;
default: csr_alu = `NO_ALU; default: csr_alu = `NO_ALU;
endcase endcase
end end
wire[4:0] temp_final_alu; wire[4:0] temp_final_alu;
assign temp_final_alu = is_btype ? ((frE_to_bckE_req_if.branch_type < `BLTU) ? `SUB : `SUBU) : assign temp_final_alu = is_btype ? ((frE_to_bckE_req_if.branch_type < `BLTU) ? `SUB : `SUBU) :
is_lui ? `LUI_ALU : is_lui ? `LUI_ALU :
is_auipc ? `AUIPC_ALU : is_auipc ? `AUIPC_ALU :
is_csr ? csr_alu : is_csr ? csr_alu :
(is_stype || is_linst) ? `ADD : (is_stype || is_linst) ? `ADD :
alu_op; alu_op;
assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu; assign frE_to_bckE_req_if.alu_op = ((func7[0] == 1'b1) && is_rtype) ? mul_alu : temp_final_alu;
endmodule endmodule

View File

@@ -1,56 +1,56 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_dmem_ctrl ( module VX_dmem_ctrl (
input wire clk, input wire clk,
input wire reset, input wire reset,
// Dram <-> Dcache // Dram <-> Dcache
VX_gpu_dcache_dram_req_if gpu_dcache_dram_req_if, VX_gpu_dcache_dram_req_if gpu_dcache_dram_req_if,
VX_gpu_dcache_dram_rsp_if gpu_dcache_dram_res_if, VX_gpu_dcache_dram_rsp_if gpu_dcache_dram_res_if,
VX_gpu_snp_req_rsp_if gpu_dcache_snp_req_if, VX_gpu_snp_req_rsp_if gpu_dcache_snp_req_if,
// Dram <-> Icache // Dram <-> Icache
VX_gpu_dcache_dram_req_if gpu_icache_dram_req_if, VX_gpu_dcache_dram_req_if gpu_icache_dram_req_if,
VX_gpu_dcache_dram_rsp_if gpu_icache_dram_res_if, VX_gpu_dcache_dram_rsp_if gpu_icache_dram_res_if,
VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if, VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if,
// Core <-> Dcache // Core <-> Dcache
VX_gpu_dcache_rsp_if dcache_rsp_if, VX_gpu_dcache_rsp_if dcache_rsp_if,
VX_gpu_dcache_req_if dcache_req_if, VX_gpu_dcache_req_if dcache_req_if,
// Core <-> Icache // Core <-> Icache
VX_gpu_dcache_rsp_if icache_rsp_if, VX_gpu_dcache_rsp_if icache_rsp_if,
VX_gpu_dcache_req_if icache_req_if VX_gpu_dcache_req_if icache_req_if
); );
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_smem_if(); VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_smem_if();
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_smem_if(); VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_smem_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_dcache_if(); VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_dcache_if();
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_dcache_if(); VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_dcache_if();
wire to_shm = dcache_req_if.core_req_addr[0][31:24] == 8'hFF; wire to_shm = dcache_req_if.core_req_addr[0][31:24] == 8'hFF;
wire dcache_wants_wb = (|dcache_rsp_dcache_if.core_rsp_valid); wire dcache_wants_wb = (|dcache_rsp_dcache_if.core_rsp_valid);
// Dcache Request // Dcache Request
assign dcache_req_dcache_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~to_shm}}; assign dcache_req_dcache_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~to_shm}};
assign dcache_req_dcache_if.core_req_read = dcache_req_if.core_req_read; assign dcache_req_dcache_if.core_req_read = dcache_req_if.core_req_read;
assign dcache_req_dcache_if.core_req_write = dcache_req_if.core_req_write; assign dcache_req_dcache_if.core_req_write = dcache_req_if.core_req_write;
assign dcache_req_dcache_if.core_req_addr = dcache_req_if.core_req_addr; assign dcache_req_dcache_if.core_req_addr = dcache_req_if.core_req_addr;
assign dcache_req_dcache_if.core_req_data = dcache_req_if.core_req_data; assign dcache_req_dcache_if.core_req_data = dcache_req_if.core_req_data;
assign dcache_req_dcache_if.core_req_rd = dcache_req_if.core_req_rd; assign dcache_req_dcache_if.core_req_rd = dcache_req_if.core_req_rd;
assign dcache_req_dcache_if.core_req_wb = dcache_req_if.core_req_wb; assign dcache_req_dcache_if.core_req_wb = dcache_req_if.core_req_wb;
assign dcache_req_dcache_if.core_req_warp_num = dcache_req_if.core_req_warp_num; assign dcache_req_dcache_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
assign dcache_req_dcache_if.core_req_pc = dcache_req_if.core_req_pc; assign dcache_req_dcache_if.core_req_pc = dcache_req_if.core_req_pc;
assign dcache_rsp_dcache_if.core_rsp_ready = dcache_rsp_if.core_rsp_ready; assign dcache_rsp_dcache_if.core_rsp_ready = dcache_rsp_if.core_rsp_ready;
// Shared Memory Request // Shared Memory Request
assign dcache_req_smem_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{to_shm}}; assign dcache_req_smem_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{to_shm}};
assign dcache_req_smem_if.core_req_addr = dcache_req_if.core_req_addr; assign dcache_req_smem_if.core_req_addr = dcache_req_if.core_req_addr;
assign dcache_req_smem_if.core_req_data = dcache_req_if.core_req_data; assign dcache_req_smem_if.core_req_data = dcache_req_if.core_req_data;
assign dcache_req_smem_if.core_req_read = dcache_req_if.core_req_read; assign dcache_req_smem_if.core_req_read = dcache_req_if.core_req_read;
assign dcache_req_smem_if.core_req_write = dcache_req_if.core_req_write; assign dcache_req_smem_if.core_req_write = dcache_req_if.core_req_write;
assign dcache_req_smem_if.core_req_rd = dcache_req_if.core_req_rd; assign dcache_req_smem_if.core_req_rd = dcache_req_if.core_req_rd;
assign dcache_req_smem_if.core_req_wb = dcache_req_if.core_req_wb; assign dcache_req_smem_if.core_req_wb = dcache_req_if.core_req_wb;
assign dcache_req_smem_if.core_req_warp_num = dcache_req_if.core_req_warp_num; assign dcache_req_smem_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
@@ -58,262 +58,262 @@ module VX_dmem_ctrl (
assign dcache_rsp_smem_if.core_rsp_ready = dcache_rsp_if.core_rsp_ready && ~dcache_wants_wb; assign dcache_rsp_smem_if.core_rsp_ready = dcache_rsp_if.core_rsp_ready && ~dcache_wants_wb;
// Dcache Response // Dcache Response
assign dcache_rsp_if.core_rsp_valid = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_valid : dcache_rsp_smem_if.core_rsp_valid; assign dcache_rsp_if.core_rsp_valid = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_valid : dcache_rsp_smem_if.core_rsp_valid;
assign dcache_rsp_if.core_rsp_read = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_read : dcache_rsp_smem_if.core_rsp_read; assign dcache_rsp_if.core_rsp_read = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_read : dcache_rsp_smem_if.core_rsp_read;
assign dcache_rsp_if.core_rsp_write = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_write : dcache_rsp_smem_if.core_rsp_write; assign dcache_rsp_if.core_rsp_write = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_write : dcache_rsp_smem_if.core_rsp_write;
assign dcache_rsp_if.core_rsp_pc = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_pc : dcache_rsp_smem_if.core_rsp_pc; assign dcache_rsp_if.core_rsp_pc = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_pc : dcache_rsp_smem_if.core_rsp_pc;
assign dcache_rsp_if.core_rsp_data = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_data : dcache_rsp_smem_if.core_rsp_data; assign dcache_rsp_if.core_rsp_data = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_data : dcache_rsp_smem_if.core_rsp_data;
assign dcache_rsp_if.core_rsp_warp_num = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_warp_num : dcache_rsp_smem_if.core_rsp_warp_num; assign dcache_rsp_if.core_rsp_warp_num = dcache_wants_wb ? dcache_rsp_dcache_if.core_rsp_warp_num : dcache_rsp_smem_if.core_rsp_warp_num;
assign dcache_req_if.core_req_ready = to_shm ? dcache_req_smem_if.core_req_ready : dcache_req_dcache_if.core_req_ready; assign dcache_req_if.core_req_ready = to_shm ? dcache_req_smem_if.core_req_ready : dcache_req_dcache_if.core_req_ready;
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_req_if(); VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_req_if();
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_res_if(); VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_smem_dram_res_if();
VX_cache #( VX_cache #(
.CACHE_SIZE_BYTES (`SCACHE_SIZE_BYTES), .CACHE_SIZE_BYTES (`SCACHE_SIZE_BYTES),
.BANK_LINE_SIZE_BYTES (`SBANK_LINE_SIZE_BYTES), .BANK_LINE_SIZE_BYTES (`SBANK_LINE_SIZE_BYTES),
.NUM_BANKS (`SNUM_BANKS), .NUM_BANKS (`SNUM_BANKS),
.WORD_SIZE_BYTES (`SWORD_SIZE_BYTES), .WORD_SIZE_BYTES (`SWORD_SIZE_BYTES),
.NUM_REQUESTS (`SNUM_REQUESTS), .NUM_REQUESTS (`SNUM_REQUESTS),
.STAGE_1_CYCLES (`SSTAGE_1_CYCLES), .STAGE_1_CYCLES (`SSTAGE_1_CYCLES),
.FUNC_ID (`SFUNC_ID), .FUNC_ID (`SFUNC_ID),
.REQQ_SIZE (`SREQQ_SIZE), .REQQ_SIZE (`SREQQ_SIZE),
.MRVQ_SIZE (`SMRVQ_SIZE), .MRVQ_SIZE (`SMRVQ_SIZE),
.DFPQ_SIZE (`SDFPQ_SIZE), .DFPQ_SIZE (`SDFPQ_SIZE),
.SNRQ_SIZE (`SSNRQ_SIZE), .SNRQ_SIZE (`SSNRQ_SIZE),
.CWBQ_SIZE (`SCWBQ_SIZE), .CWBQ_SIZE (`SCWBQ_SIZE),
.DWBQ_SIZE (`SDWBQ_SIZE), .DWBQ_SIZE (`SDWBQ_SIZE),
.DFQQ_SIZE (`SDFQQ_SIZE), .DFQQ_SIZE (`SDFQQ_SIZE),
.LLVQ_SIZE (`SLLVQ_SIZE), .LLVQ_SIZE (`SLLVQ_SIZE),
.FFSQ_SIZE (`SFFSQ_SIZE), .FFSQ_SIZE (`SFFSQ_SIZE),
.PRFQ_SIZE (`SPRFQ_SIZE), .PRFQ_SIZE (`SPRFQ_SIZE),
.PRFQ_STRIDE (`SPRFQ_STRIDE), .PRFQ_STRIDE (`SPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`SFILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(`SSIMULATED_DRAM_LATENCY_CYCLES) .SIMULATED_DRAM_LATENCY_CYCLES(`SSIMULATED_DRAM_LATENCY_CYCLES)
) gpu_smem ( ) gpu_smem (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
// Core req // Core req
.core_req_valid (dcache_req_smem_if.core_req_valid), .core_req_valid (dcache_req_smem_if.core_req_valid),
.core_req_read (dcache_req_smem_if.core_req_read), .core_req_read (dcache_req_smem_if.core_req_read),
.core_req_write (dcache_req_smem_if.core_req_write), .core_req_write (dcache_req_smem_if.core_req_write),
.core_req_addr (dcache_req_smem_if.core_req_addr), .core_req_addr (dcache_req_smem_if.core_req_addr),
.core_req_data (dcache_req_smem_if.core_req_data), .core_req_data (dcache_req_smem_if.core_req_data),
.core_req_rd (dcache_req_smem_if.core_req_rd), .core_req_rd (dcache_req_smem_if.core_req_rd),
.core_req_wb (dcache_req_smem_if.core_req_wb), .core_req_wb (dcache_req_smem_if.core_req_wb),
.core_req_warp_num (dcache_req_smem_if.core_req_warp_num), .core_req_warp_num (dcache_req_smem_if.core_req_warp_num),
.core_req_pc (dcache_req_smem_if.core_req_pc), .core_req_pc (dcache_req_smem_if.core_req_pc),
// Can submit core Req // Can submit core Req
.core_req_ready (dcache_req_smem_if.core_req_ready), .core_req_ready (dcache_req_smem_if.core_req_ready),
// Core Cache Can't WB // Core Cache Can't WB
.core_rsp_ready (dcache_rsp_smem_if.core_rsp_ready), .core_rsp_ready (dcache_rsp_smem_if.core_rsp_ready),
// Cache CWB // Cache CWB
.core_rsp_valid (dcache_rsp_smem_if.core_rsp_valid), .core_rsp_valid (dcache_rsp_smem_if.core_rsp_valid),
.core_rsp_read (dcache_rsp_smem_if.core_rsp_read), .core_rsp_read (dcache_rsp_smem_if.core_rsp_read),
.core_rsp_write (dcache_rsp_smem_if.core_rsp_write), .core_rsp_write (dcache_rsp_smem_if.core_rsp_write),
.core_rsp_warp_num (dcache_rsp_smem_if.core_rsp_warp_num), .core_rsp_warp_num (dcache_rsp_smem_if.core_rsp_warp_num),
.core_rsp_data (dcache_rsp_smem_if.core_rsp_data), .core_rsp_data (dcache_rsp_smem_if.core_rsp_data),
.core_rsp_pc (dcache_rsp_smem_if.core_rsp_pc), .core_rsp_pc (dcache_rsp_smem_if.core_rsp_pc),
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
.core_rsp_addr (), .core_rsp_addr (),
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
// DRAM response // DRAM response
.dram_rsp_valid (gpu_smem_dram_res_if.dram_rsp_valid), .dram_rsp_valid (gpu_smem_dram_res_if.dram_rsp_valid),
.dram_rsp_addr (gpu_smem_dram_res_if.dram_rsp_addr), .dram_rsp_addr (gpu_smem_dram_res_if.dram_rsp_addr),
.dram_rsp_data (gpu_smem_dram_res_if.dram_rsp_data), .dram_rsp_data (gpu_smem_dram_res_if.dram_rsp_data),
// DRAM accept response // DRAM accept response
.dram_rsp_ready (gpu_smem_dram_req_if.dram_rsp_ready), .dram_rsp_ready (gpu_smem_dram_req_if.dram_rsp_ready),
// DRAM Req // DRAM Req
.dram_req_read (gpu_smem_dram_req_if.dram_req_read), .dram_req_read (gpu_smem_dram_req_if.dram_req_read),
.dram_req_write (gpu_smem_dram_req_if.dram_req_write), .dram_req_write (gpu_smem_dram_req_if.dram_req_write),
.dram_req_addr (gpu_smem_dram_req_if.dram_req_addr), .dram_req_addr (gpu_smem_dram_req_if.dram_req_addr),
.dram_req_data (gpu_smem_dram_req_if.dram_req_data), .dram_req_data (gpu_smem_dram_req_if.dram_req_data),
.dram_req_ready (0), .dram_req_ready (0),
// Snoop Request // Snoop Request
.snp_req_valid (0), .snp_req_valid (0),
.snp_req_addr (0), .snp_req_addr (0),
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
.snp_req_ready (), .snp_req_ready (),
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
// Snoop Forward // Snoop Forward
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
.snp_fwd_valid (), .snp_fwd_valid (),
.snp_fwd_addr (), .snp_fwd_addr (),
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
.snp_fwd_ready (0) .snp_fwd_ready (0)
); );
VX_cache #( VX_cache #(
.CACHE_SIZE_BYTES (`DCACHE_SIZE_BYTES), .CACHE_SIZE_BYTES (`DCACHE_SIZE_BYTES),
.BANK_LINE_SIZE_BYTES (`DBANK_LINE_SIZE_BYTES), .BANK_LINE_SIZE_BYTES (`DBANK_LINE_SIZE_BYTES),
.NUM_BANKS (`DNUM_BANKS), .NUM_BANKS (`DNUM_BANKS),
.WORD_SIZE_BYTES (`DWORD_SIZE_BYTES), .WORD_SIZE_BYTES (`DWORD_SIZE_BYTES),
.NUM_REQUESTS (`DNUM_REQUESTS), .NUM_REQUESTS (`DNUM_REQUESTS),
.STAGE_1_CYCLES (`DSTAGE_1_CYCLES), .STAGE_1_CYCLES (`DSTAGE_1_CYCLES),
.FUNC_ID (`DFUNC_ID), .FUNC_ID (`DFUNC_ID),
.REQQ_SIZE (`DREQQ_SIZE), .REQQ_SIZE (`DREQQ_SIZE),
.MRVQ_SIZE (`DMRVQ_SIZE), .MRVQ_SIZE (`DMRVQ_SIZE),
.DFPQ_SIZE (`DDFPQ_SIZE), .DFPQ_SIZE (`DDFPQ_SIZE),
.SNRQ_SIZE (`DSNRQ_SIZE), .SNRQ_SIZE (`DSNRQ_SIZE),
.CWBQ_SIZE (`DCWBQ_SIZE), .CWBQ_SIZE (`DCWBQ_SIZE),
.DWBQ_SIZE (`DDWBQ_SIZE), .DWBQ_SIZE (`DDWBQ_SIZE),
.DFQQ_SIZE (`DDFQQ_SIZE), .DFQQ_SIZE (`DDFQQ_SIZE),
.LLVQ_SIZE (`DLLVQ_SIZE), .LLVQ_SIZE (`DLLVQ_SIZE),
.FFSQ_SIZE (`DFFSQ_SIZE), .FFSQ_SIZE (`DFFSQ_SIZE),
.PRFQ_SIZE (`DPRFQ_SIZE), .PRFQ_SIZE (`DPRFQ_SIZE),
.PRFQ_STRIDE (`DPRFQ_STRIDE), .PRFQ_STRIDE (`DPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`DFILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(`DSIMULATED_DRAM_LATENCY_CYCLES) .SIMULATED_DRAM_LATENCY_CYCLES(`DSIMULATED_DRAM_LATENCY_CYCLES)
) gpu_dcache ( ) gpu_dcache (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
// Core req // Core req
.core_req_valid (dcache_req_dcache_if.core_req_valid), .core_req_valid (dcache_req_dcache_if.core_req_valid),
.core_req_read (dcache_req_dcache_if.core_req_read), .core_req_read (dcache_req_dcache_if.core_req_read),
.core_req_write (dcache_req_dcache_if.core_req_write), .core_req_write (dcache_req_dcache_if.core_req_write),
.core_req_addr (dcache_req_dcache_if.core_req_addr), .core_req_addr (dcache_req_dcache_if.core_req_addr),
.core_req_data (dcache_req_dcache_if.core_req_data), .core_req_data (dcache_req_dcache_if.core_req_data),
.core_req_rd (dcache_req_dcache_if.core_req_rd), .core_req_rd (dcache_req_dcache_if.core_req_rd),
.core_req_wb (dcache_req_dcache_if.core_req_wb), .core_req_wb (dcache_req_dcache_if.core_req_wb),
.core_req_warp_num (dcache_req_dcache_if.core_req_warp_num), .core_req_warp_num (dcache_req_dcache_if.core_req_warp_num),
.core_req_pc (dcache_req_dcache_if.core_req_pc), .core_req_pc (dcache_req_dcache_if.core_req_pc),
// Can submit core Req // Can submit core Req
.core_req_ready (dcache_req_dcache_if.core_req_ready), .core_req_ready (dcache_req_dcache_if.core_req_ready),
// Core Cache Can't WB // Core Cache Can't WB
.core_rsp_ready (dcache_rsp_dcache_if.core_rsp_ready), .core_rsp_ready (dcache_rsp_dcache_if.core_rsp_ready),
// Cache CWB // Cache CWB
.core_rsp_valid (dcache_rsp_dcache_if.core_rsp_valid), .core_rsp_valid (dcache_rsp_dcache_if.core_rsp_valid),
.core_rsp_read (dcache_rsp_dcache_if.core_rsp_read), .core_rsp_read (dcache_rsp_dcache_if.core_rsp_read),
.core_rsp_write (dcache_rsp_dcache_if.core_rsp_write), .core_rsp_write (dcache_rsp_dcache_if.core_rsp_write),
.core_rsp_warp_num (dcache_rsp_dcache_if.core_rsp_warp_num), .core_rsp_warp_num (dcache_rsp_dcache_if.core_rsp_warp_num),
.core_rsp_data (dcache_rsp_dcache_if.core_rsp_data), .core_rsp_data (dcache_rsp_dcache_if.core_rsp_data),
.core_rsp_pc (dcache_rsp_dcache_if.core_rsp_pc), .core_rsp_pc (dcache_rsp_dcache_if.core_rsp_pc),
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
.core_rsp_addr (), .core_rsp_addr (),
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
// DRAM response // DRAM response
.dram_rsp_valid (gpu_dcache_dram_res_if.dram_rsp_valid), .dram_rsp_valid (gpu_dcache_dram_res_if.dram_rsp_valid),
.dram_rsp_addr (gpu_dcache_dram_res_if.dram_rsp_addr), .dram_rsp_addr (gpu_dcache_dram_res_if.dram_rsp_addr),
.dram_rsp_data (gpu_dcache_dram_res_if.dram_rsp_data), .dram_rsp_data (gpu_dcache_dram_res_if.dram_rsp_data),
// DRAM accept response // DRAM accept response
.dram_rsp_ready (gpu_dcache_dram_req_if.dram_rsp_ready), .dram_rsp_ready (gpu_dcache_dram_req_if.dram_rsp_ready),
// DRAM Req // DRAM Req
.dram_req_read (gpu_dcache_dram_req_if.dram_req_read), .dram_req_read (gpu_dcache_dram_req_if.dram_req_read),
.dram_req_write (gpu_dcache_dram_req_if.dram_req_write), .dram_req_write (gpu_dcache_dram_req_if.dram_req_write),
.dram_req_addr (gpu_dcache_dram_req_if.dram_req_addr), .dram_req_addr (gpu_dcache_dram_req_if.dram_req_addr),
.dram_req_data (gpu_dcache_dram_req_if.dram_req_data), .dram_req_data (gpu_dcache_dram_req_if.dram_req_data),
.dram_req_ready (gpu_dcache_dram_req_if.dram_req_ready), .dram_req_ready (gpu_dcache_dram_req_if.dram_req_ready),
// Snoop Request // Snoop Request
.snp_req_valid (gpu_dcache_snp_req_if.snp_req_valid), .snp_req_valid (gpu_dcache_snp_req_if.snp_req_valid),
.snp_req_addr (gpu_dcache_snp_req_if.snp_req_addr), .snp_req_addr (gpu_dcache_snp_req_if.snp_req_addr),
.snp_req_ready (gpu_dcache_snp_req_if.snp_req_ready), .snp_req_ready (gpu_dcache_snp_req_if.snp_req_ready),
// Snoop Forward // Snoop Forward
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
.snp_fwd_valid (), .snp_fwd_valid (),
.snp_fwd_addr (), .snp_fwd_addr (),
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
.snp_fwd_ready (0) .snp_fwd_ready (0)
); );
VX_cache #( VX_cache #(
.CACHE_SIZE_BYTES (`ICACHE_SIZE_BYTES), .CACHE_SIZE_BYTES (`ICACHE_SIZE_BYTES),
.BANK_LINE_SIZE_BYTES (`IBANK_LINE_SIZE_BYTES), .BANK_LINE_SIZE_BYTES (`IBANK_LINE_SIZE_BYTES),
.NUM_BANKS (`INUM_BANKS), .NUM_BANKS (`INUM_BANKS),
.WORD_SIZE_BYTES (`IWORD_SIZE_BYTES), .WORD_SIZE_BYTES (`IWORD_SIZE_BYTES),
.NUM_REQUESTS (`INUM_REQUESTS), .NUM_REQUESTS (`INUM_REQUESTS),
.STAGE_1_CYCLES (`ISTAGE_1_CYCLES), .STAGE_1_CYCLES (`ISTAGE_1_CYCLES),
.FUNC_ID (`IFUNC_ID), .FUNC_ID (`IFUNC_ID),
.REQQ_SIZE (`IREQQ_SIZE), .REQQ_SIZE (`IREQQ_SIZE),
.MRVQ_SIZE (`IMRVQ_SIZE), .MRVQ_SIZE (`IMRVQ_SIZE),
.DFPQ_SIZE (`IDFPQ_SIZE), .DFPQ_SIZE (`IDFPQ_SIZE),
.SNRQ_SIZE (`ISNRQ_SIZE), .SNRQ_SIZE (`ISNRQ_SIZE),
.CWBQ_SIZE (`ICWBQ_SIZE), .CWBQ_SIZE (`ICWBQ_SIZE),
.DWBQ_SIZE (`IDWBQ_SIZE), .DWBQ_SIZE (`IDWBQ_SIZE),
.DFQQ_SIZE (`IDFQQ_SIZE), .DFQQ_SIZE (`IDFQQ_SIZE),
.LLVQ_SIZE (`ILLVQ_SIZE), .LLVQ_SIZE (`ILLVQ_SIZE),
.FFSQ_SIZE (`IFFSQ_SIZE), .FFSQ_SIZE (`IFFSQ_SIZE),
.PRFQ_SIZE (`IPRFQ_SIZE), .PRFQ_SIZE (`IPRFQ_SIZE),
.PRFQ_STRIDE (`IPRFQ_STRIDE), .PRFQ_STRIDE (`IPRFQ_STRIDE),
.FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (`IFILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(`ISIMULATED_DRAM_LATENCY_CYCLES) .SIMULATED_DRAM_LATENCY_CYCLES(`ISIMULATED_DRAM_LATENCY_CYCLES)
) gpu_icache ( ) gpu_icache (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
// Core req // Core req
.core_req_valid (icache_req_if.core_req_valid), .core_req_valid (icache_req_if.core_req_valid),
.core_req_read (icache_req_if.core_req_read), .core_req_read (icache_req_if.core_req_read),
.core_req_write (icache_req_if.core_req_write), .core_req_write (icache_req_if.core_req_write),
.core_req_addr (icache_req_if.core_req_addr), .core_req_addr (icache_req_if.core_req_addr),
.core_req_data (icache_req_if.core_req_data), .core_req_data (icache_req_if.core_req_data),
.core_req_rd (icache_req_if.core_req_rd), .core_req_rd (icache_req_if.core_req_rd),
.core_req_wb (icache_req_if.core_req_wb), .core_req_wb (icache_req_if.core_req_wb),
.core_req_warp_num (icache_req_if.core_req_warp_num), .core_req_warp_num (icache_req_if.core_req_warp_num),
.core_req_pc (icache_req_if.core_req_pc), .core_req_pc (icache_req_if.core_req_pc),
// Can submit core Req // Can submit core Req
.core_req_ready (icache_req_if.core_req_ready), .core_req_ready (icache_req_if.core_req_ready),
// Core Cache Can't WB // Core Cache Can't WB
.core_rsp_ready (icache_rsp_if.core_rsp_ready), .core_rsp_ready (icache_rsp_if.core_rsp_ready),
// Cache CWB // Cache CWB
.core_rsp_valid (icache_rsp_if.core_rsp_valid), .core_rsp_valid (icache_rsp_if.core_rsp_valid),
.core_rsp_read (icache_rsp_if.core_rsp_read), .core_rsp_read (icache_rsp_if.core_rsp_read),
.core_rsp_write (icache_rsp_if.core_rsp_write), .core_rsp_write (icache_rsp_if.core_rsp_write),
.core_rsp_warp_num (icache_rsp_if.core_rsp_warp_num), .core_rsp_warp_num (icache_rsp_if.core_rsp_warp_num),
.core_rsp_data (icache_rsp_if.core_rsp_data), .core_rsp_data (icache_rsp_if.core_rsp_data),
.core_rsp_pc (icache_rsp_if.core_rsp_pc), .core_rsp_pc (icache_rsp_if.core_rsp_pc),
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
.core_rsp_addr (), .core_rsp_addr (),
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
// DRAM response // DRAM response
.dram_rsp_valid (gpu_icache_dram_res_if.dram_rsp_valid), .dram_rsp_valid (gpu_icache_dram_res_if.dram_rsp_valid),
.dram_rsp_addr (gpu_icache_dram_res_if.dram_rsp_addr), .dram_rsp_addr (gpu_icache_dram_res_if.dram_rsp_addr),
.dram_rsp_data (gpu_icache_dram_res_if.dram_rsp_data), .dram_rsp_data (gpu_icache_dram_res_if.dram_rsp_data),
// DRAM accept response // DRAM accept response
.dram_rsp_ready (gpu_icache_dram_req_if.dram_rsp_ready), .dram_rsp_ready (gpu_icache_dram_req_if.dram_rsp_ready),
// DRAM Req // DRAM Req
.dram_req_read (gpu_icache_dram_req_if.dram_req_read), .dram_req_read (gpu_icache_dram_req_if.dram_req_read),
.dram_req_write (gpu_icache_dram_req_if.dram_req_write), .dram_req_write (gpu_icache_dram_req_if.dram_req_write),
.dram_req_addr (gpu_icache_dram_req_if.dram_req_addr), .dram_req_addr (gpu_icache_dram_req_if.dram_req_addr),
.dram_req_data (gpu_icache_dram_req_if.dram_req_data), .dram_req_data (gpu_icache_dram_req_if.dram_req_data),
.dram_req_ready (gpu_icache_dram_req_if.dram_req_ready), .dram_req_ready (gpu_icache_dram_req_if.dram_req_ready),
// Snoop Request // Snoop Request
.snp_req_valid (gpu_icache_snp_req_if.snp_req_valid), .snp_req_valid (gpu_icache_snp_req_if.snp_req_valid),
.snp_req_addr (gpu_icache_snp_req_if.snp_req_addr), .snp_req_addr (gpu_icache_snp_req_if.snp_req_addr),
.snp_req_ready (gpu_icache_snp_req_if.snp_req_ready), .snp_req_ready (gpu_icache_snp_req_if.snp_req_ready),
// Snoop Forward // Snoop Forward
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
.snp_fwd_valid (), .snp_fwd_valid (),
.snp_fwd_addr (), .snp_fwd_addr (),
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
.snp_fwd_ready (0) .snp_fwd_ready (0)
); );
endmodule endmodule

View File

@@ -1,183 +1,183 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_exec_unit ( module VX_exec_unit (
input wire clk, input wire clk,
input wire reset, input wire reset,
// Request // Request
VX_exec_unit_req_if exec_unit_req_if, VX_exec_unit_req_if exec_unit_req_if,
// Output // Output
// Writeback // Writeback
VX_inst_exec_wb_if inst_exec_wb_if, VX_inst_exec_wb_if inst_exec_wb_if,
// JAL Response // JAL Response
VX_jal_rsp_if jal_rsp_if, VX_jal_rsp_if jal_rsp_if,
// Branch Response // Branch Response
VX_branch_rsp_if branch_rsp_if, VX_branch_rsp_if branch_rsp_if,
input wire no_slot_exec, input wire no_slot_exec,
output wire delay output wire delay
); );
wire[`NUM_THREADS-1:0][31:0] in_a_reg_data; wire[`NUM_THREADS-1:0][31:0] in_a_reg_data;
wire[`NUM_THREADS-1:0][31:0] in_b_reg_data; wire[`NUM_THREADS-1:0][31:0] in_b_reg_data;
wire[4:0] in_alu_op; wire[4:0] in_alu_op;
wire in_rs2_src; wire in_rs2_src;
wire[31:0] in_itype_immed; wire[31:0] in_itype_immed;
`DEBUG_BEGIN `DEBUG_BEGIN
wire[2:0] in_branch_type; wire[2:0] in_branch_type;
`DEBUG_END `DEBUG_END
wire[19:0] in_upper_immed; wire[19:0] in_upper_immed;
wire in_jal; wire in_jal;
wire[31:0] in_jal_offset; wire[31:0] in_jal_offset;
wire[31:0] in_curr_PC; wire[31:0] in_curr_PC;
assign in_a_reg_data = exec_unit_req_if.a_reg_data; assign in_a_reg_data = exec_unit_req_if.a_reg_data;
assign in_b_reg_data = exec_unit_req_if.b_reg_data; assign in_b_reg_data = exec_unit_req_if.b_reg_data;
assign in_alu_op = exec_unit_req_if.alu_op; assign in_alu_op = exec_unit_req_if.alu_op;
assign in_rs2_src = exec_unit_req_if.rs2_src; assign in_rs2_src = exec_unit_req_if.rs2_src;
assign in_itype_immed = exec_unit_req_if.itype_immed; assign in_itype_immed = exec_unit_req_if.itype_immed;
assign in_branch_type = exec_unit_req_if.branch_type; assign in_branch_type = exec_unit_req_if.branch_type;
assign in_upper_immed = exec_unit_req_if.upper_immed; assign in_upper_immed = exec_unit_req_if.upper_immed;
assign in_jal = exec_unit_req_if.jal; assign in_jal = exec_unit_req_if.jal;
assign in_jal_offset = exec_unit_req_if.jal_offset; assign in_jal_offset = exec_unit_req_if.jal_offset;
assign in_curr_PC = exec_unit_req_if.curr_PC; assign in_curr_PC = exec_unit_req_if.curr_PC;
wire[`NUM_THREADS-1:0][31:0] alu_result; wire[`NUM_THREADS-1:0][31:0] alu_result;
wire[`NUM_THREADS-1:0] alu_stall; wire[`NUM_THREADS-1:0] alu_stall;
genvar index_out_reg; genvar index_out_reg;
generate generate
for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs for (index_out_reg = 0; index_out_reg < `NUM_THREADS; index_out_reg = index_out_reg + 1) begin : alu_defs
VX_alu alu( VX_alu alu(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.src_a (in_a_reg_data[index_out_reg]), .src_a (in_a_reg_data[index_out_reg]),
.src_b (in_b_reg_data[index_out_reg]), .src_b (in_b_reg_data[index_out_reg]),
.src_rs2 (in_rs2_src), .src_rs2 (in_rs2_src),
.itype_immed (in_itype_immed), .itype_immed (in_itype_immed),
.upper_immed (in_upper_immed), .upper_immed (in_upper_immed),
.alu_op (in_alu_op), .alu_op (in_alu_op),
.curr_PC (in_curr_PC), .curr_PC (in_curr_PC),
.alu_result (alu_result[index_out_reg]), .alu_result (alu_result[index_out_reg]),
.alu_stall (alu_stall[index_out_reg]) .alu_stall (alu_stall[index_out_reg])
); );
end end
endgenerate endgenerate
wire internal_stall; wire internal_stall;
assign internal_stall = |alu_stall; assign internal_stall = |alu_stall;
assign delay = no_slot_exec || internal_stall; assign delay = no_slot_exec || internal_stall;
`DEBUG_BEGIN `DEBUG_BEGIN
wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index; wire [$clog2(`NUM_THREADS)-1:0] jal_branch_use_index;
wire jal_branch_found_valid; wire jal_branch_found_valid;
`DEBUG_END `DEBUG_END
VX_generic_priority_encoder #( VX_generic_priority_encoder #(
.N(`NUM_THREADS) .N(`NUM_THREADS)
) choose_alu_result ( ) choose_alu_result (
.valids(exec_unit_req_if.valid), .valids(exec_unit_req_if.valid),
.index (jal_branch_use_index), .index (jal_branch_use_index),
.found (jal_branch_found_valid) .found (jal_branch_found_valid)
); );
wire[31:0] branch_use_alu_result = alu_result[jal_branch_use_index]; wire[31:0] branch_use_alu_result = alu_result[jal_branch_use_index];
reg temp_branch_dir; reg temp_branch_dir;
always @(*) always @(*)
begin begin
case (exec_unit_req_if.branch_type) case (exec_unit_req_if.branch_type)
`BEQ: temp_branch_dir = (branch_use_alu_result == 0) ? `TAKEN : `NOT_TAKEN; `BEQ: temp_branch_dir = (branch_use_alu_result == 0) ? `TAKEN : `NOT_TAKEN;
`BNE: temp_branch_dir = (branch_use_alu_result == 0) ? `NOT_TAKEN : `TAKEN; `BNE: temp_branch_dir = (branch_use_alu_result == 0) ? `NOT_TAKEN : `TAKEN;
`BLT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN; `BLT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
`BGT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN; `BGT: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN;
`BLTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN; `BLTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `NOT_TAKEN : `TAKEN;
`BGTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN; `BGTU: temp_branch_dir = (branch_use_alu_result[31] == 0) ? `TAKEN : `NOT_TAKEN;
`NO_BRANCH: temp_branch_dir = `NOT_TAKEN; `NO_BRANCH: temp_branch_dir = `NOT_TAKEN;
default: temp_branch_dir = `NOT_TAKEN; default: temp_branch_dir = `NOT_TAKEN;
endcase // in_branch_type endcase // in_branch_type
end end
wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data; wire[`NUM_THREADS-1:0][31:0] duplicate_PC_data;
genvar i; genvar i;
generate generate
for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup for (i = 0; i < `NUM_THREADS; i=i+1) begin : pc_data_setup
assign duplicate_PC_data[i] = exec_unit_req_if.PC_next; assign duplicate_PC_data[i] = exec_unit_req_if.PC_next;
end end
endgenerate endgenerate
// VX_inst_exec_wb_if inst_exec_wb_temp_if(); // VX_inst_exec_wb_if inst_exec_wb_temp_if();
// JAL Response // JAL Response
VX_jal_rsp_if jal_rsp_temp_if(); VX_jal_rsp_if jal_rsp_temp_if();
// Branch Response // Branch Response
VX_branch_rsp_if branch_rsp_temp_if(); VX_branch_rsp_if branch_rsp_temp_if();
// Actual Writeback // Actual Writeback
assign inst_exec_wb_if.rd = exec_unit_req_if.rd; assign inst_exec_wb_if.rd = exec_unit_req_if.rd;
assign inst_exec_wb_if.wb = exec_unit_req_if.wb; assign inst_exec_wb_if.wb = exec_unit_req_if.wb;
assign inst_exec_wb_if.wb_valid = exec_unit_req_if.valid & {`NUM_THREADS{!internal_stall}}; assign inst_exec_wb_if.wb_valid = exec_unit_req_if.valid & {`NUM_THREADS{!internal_stall}};
assign inst_exec_wb_if.wb_warp_num = exec_unit_req_if.warp_num; assign inst_exec_wb_if.wb_warp_num = exec_unit_req_if.warp_num;
assign inst_exec_wb_if.alu_result = exec_unit_req_if.jal ? duplicate_PC_data : alu_result; assign inst_exec_wb_if.alu_result = exec_unit_req_if.jal ? duplicate_PC_data : alu_result;
assign inst_exec_wb_if.exec_wb_pc = in_curr_PC; assign inst_exec_wb_if.exec_wb_pc = in_curr_PC;
// Jal rsp // Jal rsp
assign jal_rsp_temp_if.jal = in_jal; assign jal_rsp_temp_if.jal = in_jal;
assign jal_rsp_temp_if.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset); assign jal_rsp_temp_if.jal_dest = $signed(in_a_reg_data[jal_branch_use_index]) + $signed(in_jal_offset);
assign jal_rsp_temp_if.jal_warp_num = exec_unit_req_if.warp_num; assign jal_rsp_temp_if.jal_warp_num = exec_unit_req_if.warp_num;
// Branch rsp // Branch rsp
assign branch_rsp_temp_if.valid_branch = (exec_unit_req_if.branch_type != `NO_BRANCH) && (|exec_unit_req_if.valid); assign branch_rsp_temp_if.valid_branch = (exec_unit_req_if.branch_type != `NO_BRANCH) && (|exec_unit_req_if.valid);
assign branch_rsp_temp_if.branch_dir = temp_branch_dir; assign branch_rsp_temp_if.branch_dir = temp_branch_dir;
assign branch_rsp_temp_if.branch_warp_num = exec_unit_req_if.warp_num; assign branch_rsp_temp_if.branch_warp_num = exec_unit_req_if.warp_num;
assign branch_rsp_temp_if.branch_dest = $signed(exec_unit_req_if.curr_PC) + ($signed(exec_unit_req_if.itype_immed) << 1); // itype_immed = branch_offset assign branch_rsp_temp_if.branch_dest = $signed(exec_unit_req_if.curr_PC) + ($signed(exec_unit_req_if.itype_immed) << 1); // itype_immed = branch_offset
wire zero = 0; wire zero = 0;
// VX_generic_register #(.N(174)) exec_reg( // VX_generic_register #(.N(174)) exec_reg(
// .clk (clk), // .clk (clk),
// .reset(reset), // .reset(reset),
// .stall(zero), // .stall(zero),
// .flush(zero), // .flush(zero),
// .in ({inst_exec_wb_temp_if.rd, inst_exec_wb_temp_if.wb, inst_exec_wb_temp_if.wb_valid, inst_exec_wb_temp_if.wb_warp_num, inst_exec_wb_temp_if.alu_result, inst_exec_wb_temp_if.exec_wb_pc}), // .in ({inst_exec_wb_temp_if.rd, inst_exec_wb_temp_if.wb, inst_exec_wb_temp_if.wb_valid, inst_exec_wb_temp_if.wb_warp_num, inst_exec_wb_temp_if.alu_result, inst_exec_wb_temp_if.exec_wb_pc}),
// .out ({inst_exec_wb_if.rd , inst_exec_wb_if.wb , inst_exec_wb_if.wb_valid , inst_exec_wb_if.wb_warp_num , inst_exec_wb_if.alu_result , inst_exec_wb_if.exec_wb_pc }) // .out ({inst_exec_wb_if.rd , inst_exec_wb_if.wb , inst_exec_wb_if.wb_valid , inst_exec_wb_if.wb_warp_num , inst_exec_wb_if.alu_result , inst_exec_wb_if.exec_wb_pc })
// ); // );
VX_generic_register #( VX_generic_register #(
.N(33 + `NW_BITS-1 + 1) .N(33 + `NW_BITS-1 + 1)
) jal_reg ( ) jal_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(zero), .stall(zero),
.flush(zero), .flush(zero),
.in ({jal_rsp_temp_if.jal, jal_rsp_temp_if.jal_dest, jal_rsp_temp_if.jal_warp_num}), .in ({jal_rsp_temp_if.jal, jal_rsp_temp_if.jal_dest, jal_rsp_temp_if.jal_warp_num}),
.out ({jal_rsp_if.jal , jal_rsp_if.jal_dest , jal_rsp_if.jal_warp_num}) .out ({jal_rsp_if.jal , jal_rsp_if.jal_dest , jal_rsp_if.jal_warp_num})
); );
VX_generic_register #( VX_generic_register #(
.N(34 + `NW_BITS-1 + 1) .N(34 + `NW_BITS-1 + 1)
) branch_reg ( ) branch_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(zero), .stall(zero),
.flush(zero), .flush(zero),
.in ({branch_rsp_temp_if.valid_branch, branch_rsp_temp_if.branch_dir, branch_rsp_temp_if.branch_warp_num, branch_rsp_temp_if.branch_dest}), .in ({branch_rsp_temp_if.valid_branch, branch_rsp_temp_if.branch_dir, branch_rsp_temp_if.branch_warp_num, branch_rsp_temp_if.branch_dest}),
.out ({branch_rsp_if.valid_branch , branch_rsp_if.branch_dir , branch_rsp_if.branch_warp_num , branch_rsp_if.branch_dest }) .out ({branch_rsp_if.valid_branch , branch_rsp_if.branch_dir , branch_rsp_if.branch_warp_num , branch_rsp_if.branch_dest })
); );
// always @(*) begin // always @(*) begin
// case(in_alu_op) // case(in_alu_op)
// `CSR_ALU_RW: out_csr_result = in_csr_mask; // `CSR_ALU_RW: out_csr_result = in_csr_mask;
// `CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask; // `CSR_ALU_RS: out_csr_result = in_csr_data | in_csr_mask;
// `CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask); // `CSR_ALU_RC: out_csr_result = in_csr_data & (32'hFFFFFFFF - in_csr_mask);
// default: out_csr_result = 32'hdeadbeef; // default: out_csr_result = 32'hdeadbeef;
// endcase // endcase
// end // end
// assign out_is_csr = exec_unit_req_if.is_csr; // assign out_is_csr = exec_unit_req_if.is_csr;
// assign out_csr_address = exec_unit_req_if.csr_address; // assign out_csr_address = exec_unit_req_if.csr_address;
endmodule : VX_exec_unit endmodule : VX_exec_unit

View File

@@ -1,102 +1,102 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_fetch ( module VX_fetch (
input wire clk, input wire clk,
input wire reset, input wire reset,
VX_wstall_if wstall_if, VX_wstall_if wstall_if,
VX_join_if join_if, VX_join_if join_if,
input wire schedule_delay, input wire schedule_delay,
input wire icache_stage_delay, input wire icache_stage_delay,
input wire[`NW_BITS-1:0] icache_stage_wid, input wire[`NW_BITS-1:0] icache_stage_wid,
input wire[`NUM_THREADS-1:0] icache_stage_valids, input wire[`NUM_THREADS-1:0] icache_stage_valids,
output wire ebreak, output wire ebreak,
VX_jal_rsp_if jal_rsp_if, VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if, VX_branch_rsp_if branch_rsp_if,
VX_inst_meta_if fe_inst_meta_fi, VX_inst_meta_if fe_inst_meta_fi,
VX_warp_ctl_if warp_ctl_if VX_warp_ctl_if warp_ctl_if
); );
wire[`NUM_THREADS-1:0] thread_mask; wire[`NUM_THREADS-1:0] thread_mask;
wire[`NW_BITS-1:0] warp_num; wire[`NW_BITS-1:0] warp_num;
wire[31:0] warp_pc; wire[31:0] warp_pc;
wire scheduled_warp; wire scheduled_warp;
wire pipe_stall; wire pipe_stall;
// Only reason this is there is because there is a hidden assumption that decode is exactly after fetch // Only reason this is there is because there is a hidden assumption that decode is exactly after fetch
// Locals // Locals
assign pipe_stall = schedule_delay || icache_stage_delay; assign pipe_stall = schedule_delay || icache_stage_delay;
VX_warp_sched warp_sched ( VX_warp_sched warp_sched (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (pipe_stall), .stall (pipe_stall),
.is_barrier (warp_ctl_if.is_barrier), .is_barrier (warp_ctl_if.is_barrier),
.barrier_id (warp_ctl_if.barrier_id), .barrier_id (warp_ctl_if.barrier_id),
.num_warps (warp_ctl_if.num_warps), .num_warps (warp_ctl_if.num_warps),
.barrier_warp_num (warp_ctl_if.warp_num), .barrier_warp_num (warp_ctl_if.warp_num),
// Wspawn // Wspawn
.wspawn (warp_ctl_if.wspawn), .wspawn (warp_ctl_if.wspawn),
.wsapwn_pc (warp_ctl_if.wspawn_pc), .wsapwn_pc (warp_ctl_if.wspawn_pc),
.wspawn_new_active(warp_ctl_if.wspawn_new_active), .wspawn_new_active(warp_ctl_if.wspawn_new_active),
// CTM // CTM
.ctm (warp_ctl_if.change_mask), .ctm (warp_ctl_if.change_mask),
.ctm_mask (warp_ctl_if.thread_mask), .ctm_mask (warp_ctl_if.thread_mask),
.ctm_warp_num (warp_ctl_if.warp_num), .ctm_warp_num (warp_ctl_if.warp_num),
// WHALT // WHALT
.whalt (warp_ctl_if.ebreak), .whalt (warp_ctl_if.ebreak),
.whalt_warp_num (warp_ctl_if.warp_num), .whalt_warp_num (warp_ctl_if.warp_num),
// Wstall // Wstall
.wstall (wstall_if.wstall), .wstall (wstall_if.wstall),
.wstall_warp_num (wstall_if.warp_num), .wstall_warp_num (wstall_if.warp_num),
// Lock/release Stuff // Lock/release Stuff
.icache_stage_valids(icache_stage_valids), .icache_stage_valids(icache_stage_valids),
.icache_stage_wid (icache_stage_wid), .icache_stage_wid (icache_stage_wid),
// Join // Join
.is_join (join_if.is_join), .is_join (join_if.is_join),
.join_warp_num (join_if.join_warp_num), .join_warp_num (join_if.join_warp_num),
// Split // Split
.is_split (warp_ctl_if.is_split), .is_split (warp_ctl_if.is_split),
.dont_split (warp_ctl_if.dont_split), .dont_split (warp_ctl_if.dont_split),
.split_new_mask (warp_ctl_if.split_new_mask), .split_new_mask (warp_ctl_if.split_new_mask),
.split_later_mask (warp_ctl_if.split_later_mask), .split_later_mask (warp_ctl_if.split_later_mask),
.split_save_pc (warp_ctl_if.split_save_pc), .split_save_pc (warp_ctl_if.split_save_pc),
.split_warp_num (warp_ctl_if.warp_num), .split_warp_num (warp_ctl_if.warp_num),
// JAL // JAL
.jal (jal_rsp_if.jal), .jal (jal_rsp_if.jal),
.jal_dest (jal_rsp_if.jal_dest), .jal_dest (jal_rsp_if.jal_dest),
.jal_warp_num (jal_rsp_if.jal_warp_num), .jal_warp_num (jal_rsp_if.jal_warp_num),
// Branch // Branch
.branch_valid (branch_rsp_if.valid_branch), .branch_valid (branch_rsp_if.valid_branch),
.branch_dir (branch_rsp_if.branch_dir), .branch_dir (branch_rsp_if.branch_dir),
.branch_dest (branch_rsp_if.branch_dest), .branch_dest (branch_rsp_if.branch_dest),
.branch_warp_num (branch_rsp_if.branch_warp_num), .branch_warp_num (branch_rsp_if.branch_warp_num),
// Outputs // Outputs
.thread_mask (thread_mask), .thread_mask (thread_mask),
.warp_num (warp_num), .warp_num (warp_num),
.warp_pc (warp_pc), .warp_pc (warp_pc),
.ebreak (ebreak), .ebreak (ebreak),
.scheduled_warp (scheduled_warp) .scheduled_warp (scheduled_warp)
); );
assign fe_inst_meta_fi.warp_num = warp_num; assign fe_inst_meta_fi.warp_num = warp_num;
assign fe_inst_meta_fi.valid = thread_mask; assign fe_inst_meta_fi.valid = thread_mask;
assign fe_inst_meta_fi.instruction = 32'h0; assign fe_inst_meta_fi.instruction = 32'h0;
assign fe_inst_meta_fi.inst_pc = warp_pc; assign fe_inst_meta_fi.inst_pc = warp_pc;
`DEBUG_BEGIN `DEBUG_BEGIN
wire start_mat_add = scheduled_warp && (warp_pc == 32'h80000ed8) && (warp_num == 0); wire start_mat_add = scheduled_warp && (warp_pc == 32'h80000ed8) && (warp_num == 0);
wire end_mat_add = scheduled_warp && (warp_pc == 32'h80000fbc) && (warp_num == 0); wire end_mat_add = scheduled_warp && (warp_pc == 32'h80000fbc) && (warp_num == 0);
`DEBUG_END `DEBUG_END
endmodule endmodule

View File

@@ -1,110 +1,110 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_front_end ( module VX_front_end (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire schedule_delay, input wire schedule_delay,
VX_warp_ctl_if warp_ctl_if, VX_warp_ctl_if warp_ctl_if,
VX_gpu_dcache_rsp_if icache_rsp_if, VX_gpu_dcache_rsp_if icache_rsp_if,
VX_gpu_dcache_req_if icache_req_if, VX_gpu_dcache_req_if icache_req_if,
VX_jal_rsp_if jal_rsp_if, VX_jal_rsp_if jal_rsp_if,
VX_branch_rsp_if branch_rsp_if, VX_branch_rsp_if branch_rsp_if,
VX_frE_to_bckE_req_if bckE_req_if, VX_frE_to_bckE_req_if bckE_req_if,
output wire fetch_ebreak output wire fetch_ebreak
); );
VX_inst_meta_if fe_inst_meta_fi(); VX_inst_meta_if fe_inst_meta_fi();
VX_inst_meta_if fe_inst_meta_fi2(); VX_inst_meta_if fe_inst_meta_fi2();
VX_inst_meta_if fe_inst_meta_id(); VX_inst_meta_if fe_inst_meta_id();
VX_frE_to_bckE_req_if frE_to_bckE_req_if(); VX_frE_to_bckE_req_if frE_to_bckE_req_if();
VX_inst_meta_if fd_inst_meta_de(); VX_inst_meta_if fd_inst_meta_de();
wire total_freeze = schedule_delay; wire total_freeze = schedule_delay;
wire icache_stage_delay; wire icache_stage_delay;
wire vortex_ebreak; wire vortex_ebreak;
wire terminate_sim; wire terminate_sim;
wire[`NW_BITS-1:0] icache_stage_wid; wire[`NW_BITS-1:0] icache_stage_wid;
wire[`NUM_THREADS-1:0] icache_stage_valids; wire[`NUM_THREADS-1:0] icache_stage_valids;
assign fetch_ebreak = vortex_ebreak || terminate_sim; assign fetch_ebreak = vortex_ebreak || terminate_sim;
VX_wstall_if wstall_if(); VX_wstall_if wstall_if();
VX_join_if join_if(); VX_join_if join_if();
VX_fetch fetch( VX_fetch fetch(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.icache_stage_wid (icache_stage_wid), .icache_stage_wid (icache_stage_wid),
.icache_stage_valids(icache_stage_valids), .icache_stage_valids(icache_stage_valids),
.wstall_if (wstall_if), .wstall_if (wstall_if),
.join_if (join_if), .join_if (join_if),
.schedule_delay (schedule_delay), .schedule_delay (schedule_delay),
.jal_rsp_if (jal_rsp_if), .jal_rsp_if (jal_rsp_if),
.warp_ctl_if (warp_ctl_if), .warp_ctl_if (warp_ctl_if),
.icache_stage_delay (icache_stage_delay), .icache_stage_delay (icache_stage_delay),
.branch_rsp_if (branch_rsp_if), .branch_rsp_if (branch_rsp_if),
.ebreak (vortex_ebreak), // fetch_ebreak .ebreak (vortex_ebreak), // fetch_ebreak
.fe_inst_meta_fi (fe_inst_meta_fi) .fe_inst_meta_fi (fe_inst_meta_fi)
); );
wire freeze_fi_reg = total_freeze || icache_stage_delay; wire freeze_fi_reg = total_freeze || icache_stage_delay;
VX_f_d_reg f_i_reg( VX_f_d_reg f_i_reg(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.freeze (freeze_fi_reg), .freeze (freeze_fi_reg),
.fe_inst_meta_fd(fe_inst_meta_fi), .fe_inst_meta_fd(fe_inst_meta_fi),
.fd_inst_meta_de(fe_inst_meta_fi2) .fd_inst_meta_de(fe_inst_meta_fi2)
); );
VX_icache_stage icache_stage( VX_icache_stage icache_stage(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.total_freeze (total_freeze), .total_freeze (total_freeze),
.icache_stage_delay (icache_stage_delay), .icache_stage_delay (icache_stage_delay),
.icache_stage_valids(icache_stage_valids), .icache_stage_valids(icache_stage_valids),
.icache_stage_wid (icache_stage_wid), .icache_stage_wid (icache_stage_wid),
.fe_inst_meta_fi (fe_inst_meta_fi2), .fe_inst_meta_fi (fe_inst_meta_fi2),
.fe_inst_meta_id (fe_inst_meta_id), .fe_inst_meta_id (fe_inst_meta_id),
.icache_rsp_if (icache_rsp_if), .icache_rsp_if (icache_rsp_if),
.icache_req_if (icache_req_if) .icache_req_if (icache_req_if)
); );
VX_i_d_reg i_d_reg( VX_i_d_reg i_d_reg(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.freeze (total_freeze), .freeze (total_freeze),
.fe_inst_meta_fd (fe_inst_meta_id), .fe_inst_meta_fd (fe_inst_meta_id),
.fd_inst_meta_de (fd_inst_meta_de) .fd_inst_meta_de (fd_inst_meta_de)
); );
VX_decode decode( VX_decode decode(
.fd_inst_meta_de (fd_inst_meta_de), .fd_inst_meta_de (fd_inst_meta_de),
.frE_to_bckE_req_if (frE_to_bckE_req_if), .frE_to_bckE_req_if (frE_to_bckE_req_if),
.wstall_if (wstall_if), .wstall_if (wstall_if),
.join_if (join_if), .join_if (join_if),
.terminate_sim (terminate_sim) .terminate_sim (terminate_sim)
); );
wire no_br_stall = 0; wire no_br_stall = 0;
VX_d_e_reg d_e_reg( VX_d_e_reg d_e_reg(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.branch_stall (no_br_stall), .branch_stall (no_br_stall),
.freeze (total_freeze), .freeze (total_freeze),
.frE_to_bckE_req_if (frE_to_bckE_req_if), .frE_to_bckE_req_if (frE_to_bckE_req_if),
.bckE_req_if (bckE_req_if) .bckE_req_if (bckE_req_if)
); );
endmodule endmodule

View File

@@ -1,93 +1,93 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_gpgpu_inst ( module VX_gpgpu_inst (
// Input // Input
VX_gpu_inst_req_if gpu_inst_req_if, VX_gpu_inst_req_if gpu_inst_req_if,
// Output // Output
VX_warp_ctl_if warp_ctl_if VX_warp_ctl_if warp_ctl_if
); );
wire[`NUM_THREADS-1:0] curr_valids = gpu_inst_req_if.valid; wire[`NUM_THREADS-1:0] curr_valids = gpu_inst_req_if.valid;
wire is_split = (gpu_inst_req_if.is_split); wire is_split = (gpu_inst_req_if.is_split);
wire[`NUM_THREADS-1:0] tmc_new_mask; wire[`NUM_THREADS-1:0] tmc_new_mask;
wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0]; wire all_threads = `NUM_THREADS < gpu_inst_req_if.a_reg_data[0];
genvar curr_t; genvar curr_t;
generate generate
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin : tmc_new_mask_init
assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < gpu_inst_req_if.a_reg_data[0]; assign tmc_new_mask[curr_t] = all_threads ? 1 : curr_t < gpu_inst_req_if.a_reg_data[0];
end end
endgenerate endgenerate
wire valid_inst = (|curr_valids); wire valid_inst = (|curr_valids);
assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num; assign warp_ctl_if.warp_num = gpu_inst_req_if.warp_num;
assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst; assign warp_ctl_if.change_mask = (gpu_inst_req_if.is_tmc) && valid_inst;
assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0; assign warp_ctl_if.thread_mask = gpu_inst_req_if.is_tmc ? tmc_new_mask : 0;
// assign warp_ctl_if.ebreak = (gpu_inst_req_if.a_reg_data[0] == 0) && valid_inst; // assign warp_ctl_if.ebreak = (gpu_inst_req_if.a_reg_data[0] == 0) && valid_inst;
assign warp_ctl_if.ebreak = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0); assign warp_ctl_if.ebreak = warp_ctl_if.change_mask && (warp_ctl_if.thread_mask == 0);
wire wspawn = gpu_inst_req_if.is_wspawn; wire wspawn = gpu_inst_req_if.is_wspawn;
wire[31:0] wspawn_pc = gpu_inst_req_if.rd2; wire[31:0] wspawn_pc = gpu_inst_req_if.rd2;
wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0]; wire all_active = `NUM_WARPS < gpu_inst_req_if.a_reg_data[0];
wire[`NUM_WARPS-1:0] wspawn_new_active; wire[`NUM_WARPS-1:0] wspawn_new_active;
genvar curr_w; genvar curr_w;
generate generate
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin : wspawn_new_active_init
assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < gpu_inst_req_if.a_reg_data[0]; assign wspawn_new_active[curr_w] = all_active ? 1 : curr_w < gpu_inst_req_if.a_reg_data[0];
end end
endgenerate endgenerate
assign warp_ctl_if.is_barrier = gpu_inst_req_if.is_barrier && valid_inst; assign warp_ctl_if.is_barrier = gpu_inst_req_if.is_barrier && valid_inst;
assign warp_ctl_if.barrier_id = gpu_inst_req_if.a_reg_data[0]; assign warp_ctl_if.barrier_id = gpu_inst_req_if.a_reg_data[0];
`DEBUG_BEGIN `DEBUG_BEGIN
wire[31:0] num_warps_m1 = gpu_inst_req_if.rd2 - 1; wire[31:0] num_warps_m1 = gpu_inst_req_if.rd2 - 1;
`DEBUG_END `DEBUG_END
assign warp_ctl_if.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0]; assign warp_ctl_if.num_warps = num_warps_m1[$clog2(`NUM_WARPS):0];
assign warp_ctl_if.wspawn = wspawn; assign warp_ctl_if.wspawn = wspawn;
assign warp_ctl_if.wspawn_pc = wspawn_pc; assign warp_ctl_if.wspawn_pc = wspawn_pc;
assign warp_ctl_if.wspawn_new_active = wspawn_new_active; assign warp_ctl_if.wspawn_new_active = wspawn_new_active;
wire[`NUM_THREADS-1:0] split_new_use_mask; wire[`NUM_THREADS-1:0] split_new_use_mask;
wire[`NUM_THREADS-1:0] split_new_later_mask; wire[`NUM_THREADS-1:0] split_new_later_mask;
// VX_gpu_inst_req.pc // VX_gpu_inst_req.pc
genvar curr_s_t; genvar curr_s_t;
generate generate
for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init for (curr_s_t = 0; curr_s_t < `NUM_THREADS; curr_s_t=curr_s_t+1) begin : masks_init
wire curr_bool = (gpu_inst_req_if.a_reg_data[curr_s_t] == 32'b1); wire curr_bool = (gpu_inst_req_if.a_reg_data[curr_s_t] == 32'b1);
assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool); assign split_new_use_mask[curr_s_t] = curr_valids[curr_s_t] & (curr_bool);
assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool); assign split_new_later_mask[curr_s_t] = curr_valids[curr_s_t] & (!curr_bool);
end end
endgenerate endgenerate
wire[$clog2(`NUM_THREADS):0] num_valids; wire[$clog2(`NUM_THREADS):0] num_valids;
VX_countones #( VX_countones #(
.N(`NUM_THREADS) .N(`NUM_THREADS)
) valids_counter ( ) valids_counter (
.valids(curr_valids), .valids(curr_valids),
.count (num_valids) .count (num_valids)
); );
// wire[`NW_BITS-1:0] num_valids = $countones(curr_valids); // wire[`NW_BITS-1:0] num_valids = $countones(curr_valids);
assign warp_ctl_if.is_split = is_split && (num_valids > 1); assign warp_ctl_if.is_split = is_split && (num_valids > 1);
assign warp_ctl_if.dont_split = warp_ctl_if.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}})); assign warp_ctl_if.dont_split = warp_ctl_if.is_split && ((split_new_use_mask == 0) || (split_new_use_mask == {`NUM_THREADS{1'b1}}));
assign warp_ctl_if.split_new_mask = split_new_use_mask; assign warp_ctl_if.split_new_mask = split_new_use_mask;
assign warp_ctl_if.split_later_mask = split_new_later_mask; assign warp_ctl_if.split_later_mask = split_new_later_mask;
assign warp_ctl_if.split_save_pc = gpu_inst_req_if.pc_next; assign warp_ctl_if.split_save_pc = gpu_inst_req_if.pc_next;
assign warp_ctl_if.split_warp_num = gpu_inst_req_if.warp_num; assign warp_ctl_if.split_warp_num = gpu_inst_req_if.warp_num;
// gpu_inst_req_if.is_wspawn // gpu_inst_req_if.is_wspawn
// gpu_inst_req_if.is_split // gpu_inst_req_if.is_split
// gpu_inst_req_if.is_barrier // gpu_inst_req_if.is_barrier
endmodule endmodule

View File

@@ -1,154 +1,154 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_gpr ( module VX_gpr (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire valid_write_request, input wire valid_write_request,
VX_gpr_read_if gpr_read_if, VX_gpr_read_if gpr_read_if,
VX_wb_if writeback_if, VX_wb_if writeback_if,
output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data, output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] a_reg_data,
output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data output reg[`NUM_THREADS-1:0][`NUM_GPRS-1:0] b_reg_data
); );
wire write_enable; wire write_enable;
`ifndef ASIC `ifndef ASIC
assign write_enable = valid_write_request && ((writeback_if.wb != 0)) && (writeback_if.rd != 0); assign write_enable = valid_write_request && ((writeback_if.wb != 0)) && (writeback_if.rd != 0);
byte_enabled_simple_dual_port_ram first_ram( byte_enabled_simple_dual_port_ram first_ram(
.we (write_enable), .we (write_enable),
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.waddr (writeback_if.rd), .waddr (writeback_if.rd),
.raddr1(gpr_read_if.rs1), .raddr1(gpr_read_if.rs1),
.raddr2(gpr_read_if.rs2), .raddr2(gpr_read_if.rs2),
.be (writeback_if.wb_valid), .be (writeback_if.wb_valid),
.wdata (writeback_if.write_data), .wdata (writeback_if.write_data),
.q1 (a_reg_data), .q1 (a_reg_data),
.q2 (b_reg_data) .q2 (b_reg_data)
); );
`else `else
assign write_enable = valid_write_request && ((writeback_if.wb != 0)); assign write_enable = valid_write_request && ((writeback_if.wb != 0));
wire going_to_write = write_enable & (|writeback_if.wb_valid); wire going_to_write = write_enable & (|writeback_if.wb_valid);
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask; wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] write_bit_mask;
genvar curr_t; genvar curr_t;
for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin for (curr_t = 0; curr_t < `NUM_THREADS; curr_t=curr_t+1) begin
wire local_write = write_enable & writeback_if.wb_valid[curr_t]; wire local_write = write_enable & writeback_if.wb_valid[curr_t];
assign write_bit_mask[curr_t] = {`NUM_GPRS{~local_write}}; assign write_bit_mask[curr_t] = {`NUM_GPRS{~local_write}};
end end
// wire cenb = !going_to_write; // wire cenb = !going_to_write;
wire cenb = 0; wire cenb = 0;
// wire cena_1 = (gpr_read_if.rs1 == 0); // wire cena_1 = (gpr_read_if.rs1 == 0);
// wire cena_2 = (gpr_read_if.rs2 == 0); // wire cena_2 = (gpr_read_if.rs2 == 0);
wire cena_1 = 0; wire cena_1 = 0;
wire cena_2 = 0; wire cena_2 = 0;
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_a; wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_a;
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_b; wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] temp_b;
`ifndef SYN `ifndef SYN
genvar thread; genvar thread;
genvar curr_bit; genvar curr_bit;
for (thread = 0; thread < `NUM_THREADS; thread = thread + 1) for (thread = 0; thread < `NUM_THREADS; thread = thread + 1)
begin begin
for (curr_bit = 0; curr_bit < `NUM_GPRS; curr_bit=curr_bit+1) for (curr_bit = 0; curr_bit < `NUM_GPRS; curr_bit=curr_bit+1)
begin begin
assign a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit]; assign a_reg_data[thread][curr_bit] = ((temp_a[thread][curr_bit] === 1'dx) || cena_1 )? 1'b0 : temp_a[thread][curr_bit];
assign b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit]; assign b_reg_data[thread][curr_bit] = ((temp_b[thread][curr_bit] === 1'dx) || cena_2) ? 1'b0 : temp_b[thread][curr_bit];
end end
end end
`else `else
assign a_reg_data = temp_a; assign a_reg_data = temp_a;
assign b_reg_data = temp_b; assign b_reg_data = temp_b;
`endif `endif
wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (writeback_if.rd != 0) ? writeback_if.write_data : 0; wire[`NUM_THREADS-1:0][`NUM_GPRS-1:0] to_write = (writeback_if.rd != 0) ? writeback_if.write_data : 0;
genvar curr_base_thread; genvar curr_base_thread;
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4) for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)
begin begin
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
rf2_32x128_wm1 first_ram ( rf2_32x128_wm1 first_ram (
.CENYA(), .CENYA(),
.AYA(), .AYA(),
.CENYB(), .CENYB(),
.WENYB(), .WENYB(),
.AYB(), .AYB(),
.QA(temp_a[(curr_base_thread+3):(curr_base_thread)]), .QA(temp_a[(curr_base_thread+3):(curr_base_thread)]),
.SOA(), .SOA(),
.SOB(), .SOB(),
.CLKA(clk), .CLKA(clk),
.CENA(cena_1), .CENA(cena_1),
.AA(gpr_read_if.rs1[(curr_base_thread+3):(curr_base_thread)]), .AA(gpr_read_if.rs1[(curr_base_thread+3):(curr_base_thread)]),
.CLKB(clk), .CLKB(clk),
.CENB(cenb), .CENB(cenb),
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]), .WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
.AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]), .AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]), .DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
.EMAA(3'b011), .EMAA(3'b011),
.EMASA(1'b0), .EMASA(1'b0),
.EMAB(3'b011), .EMAB(3'b011),
.TENA(1'b1), .TENA(1'b1),
.TCENA(1'b0), .TCENA(1'b0),
.TAA(5'b0), .TAA(5'b0),
.TENB(1'b1), .TENB(1'b1),
.TCENB(1'b0), .TCENB(1'b0),
.TWENB(128'b0), .TWENB(128'b0),
.TAB(5'b0), .TAB(5'b0),
.TDB(128'b0), .TDB(128'b0),
.RET1N(1'b1), .RET1N(1'b1),
.SIA(2'b0), .SIA(2'b0),
.SEA(1'b0), .SEA(1'b0),
.DFTRAMBYP(1'b0), .DFTRAMBYP(1'b0),
.SIB(2'b0), .SIB(2'b0),
.SEB(1'b0), .SEB(1'b0),
.COLLDISN(1'b1) .COLLDISN(1'b1)
); );
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
rf2_`NUM_GPRSx128_wm1 second_ram ( rf2_`NUM_GPRSx128_wm1 second_ram (
.CENYA(), .CENYA(),
.AYA(), .AYA(),
.CENYB(), .CENYB(),
.WENYB(), .WENYB(),
.AYB(), .AYB(),
.QA(temp_b[(curr_base_thread+3):(curr_base_thread)]), .QA(temp_b[(curr_base_thread+3):(curr_base_thread)]),
.SOA(), .SOA(),
.SOB(), .SOB(),
.CLKA(clk), .CLKA(clk),
.CENA(cena_2), .CENA(cena_2),
.AA(gpr_read_if.rs2[(curr_base_thread+3):(curr_base_thread)]), .AA(gpr_read_if.rs2[(curr_base_thread+3):(curr_base_thread)]),
.CLKB(clk), .CLKB(clk),
.CENB(cenb), .CENB(cenb),
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]), .WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
.AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]), .AB(writeback_if.rd[(curr_base_thread+3):(curr_base_thread)]),
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]), .DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
.EMAA(3'b011), .EMAA(3'b011),
.EMASA(1'b0), .EMASA(1'b0),
.EMAB(3'b011), .EMAB(3'b011),
.TENA(1'b1), .TENA(1'b1),
.TCENA(1'b0), .TCENA(1'b0),
.TAA(5'b0), .TAA(5'b0),
.TENB(1'b1), .TENB(1'b1),
.TCENB(1'b0), .TCENB(1'b0),
.TWENB(128'b0), .TWENB(128'b0),
.TAB(5'b0), .TAB(5'b0),
.TDB(128'b0), .TDB(128'b0),
.RET1N(1'b1), .RET1N(1'b1),
.SIA(2'b0), .SIA(2'b0),
.SEA(1'b0), .SEA(1'b0),
.DFTRAMBYP(1'b0), .DFTRAMBYP(1'b0),
.SIB(2'b0), .SIB(2'b0),
.SEB(1'b0), .SEB(1'b0),
.COLLDISN(1'b1) .COLLDISN(1'b1)
); );
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
end end
`endif `endif
endmodule endmodule

View File

@@ -1,232 +1,232 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_gpr_stage ( module VX_gpr_stage (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire schedule_delay, input wire schedule_delay,
input wire memory_delay, input wire memory_delay,
input wire exec_delay, input wire exec_delay,
input wire stall_gpr_csr, input wire stall_gpr_csr,
output wire gpr_stage_delay, output wire gpr_stage_delay,
// inputs // inputs
// Instruction Information // Instruction Information
VX_frE_to_bckE_req_if bckE_req_if, VX_frE_to_bckE_req_if bckE_req_if,
// WriteBack inputs // WriteBack inputs
VX_wb_if writeback_if, VX_wb_if writeback_if,
// Outputs // Outputs
VX_exec_unit_req_if exec_unit_req_if, VX_exec_unit_req_if exec_unit_req_if,
VX_lsu_req_if lsu_req_if, VX_lsu_req_if lsu_req_if,
VX_gpu_inst_req_if gpu_inst_req_if, VX_gpu_inst_req_if gpu_inst_req_if,
VX_csr_req_if csr_req_if VX_csr_req_if csr_req_if
); );
`DEBUG_BEGIN `DEBUG_BEGIN
wire[31:0] curr_PC = bckE_req_if.curr_PC; wire[31:0] curr_PC = bckE_req_if.curr_PC;
wire[2:0] branchType = bckE_req_if.branch_type; wire[2:0] branchType = bckE_req_if.branch_type;
wire is_store = (bckE_req_if.mem_write != `NO_MEM_WRITE); wire is_store = (bckE_req_if.mem_write != `NO_MEM_WRITE);
wire is_load = (bckE_req_if.mem_read != `NO_MEM_READ); wire is_load = (bckE_req_if.mem_read != `NO_MEM_READ);
wire jalQual = bckE_req_if.jalQual; wire jalQual = bckE_req_if.jalQual;
`DEBUG_END `DEBUG_END
VX_gpr_read_if gpr_read_if(); VX_gpr_read_if gpr_read_if();
assign gpr_read_if.rs1 = bckE_req_if.rs1; assign gpr_read_if.rs1 = bckE_req_if.rs1;
assign gpr_read_if.rs2 = bckE_req_if.rs2; assign gpr_read_if.rs2 = bckE_req_if.rs2;
assign gpr_read_if.warp_num = bckE_req_if.warp_num; assign gpr_read_if.warp_num = bckE_req_if.warp_num;
`ifndef ASIC `ifndef ASIC
VX_gpr_jal_if gpr_jal_if(); VX_gpr_jal_if gpr_jal_if();
assign gpr_jal_if.is_jal = bckE_req_if.jalQual; assign gpr_jal_if.is_jal = bckE_req_if.jalQual;
assign gpr_jal_if.curr_PC = bckE_req_if.curr_PC; assign gpr_jal_if.curr_PC = bckE_req_if.curr_PC;
`else `else
VX_gpr_jal_if gpr_jal_if(); VX_gpr_jal_if gpr_jal_if();
assign gpr_jal_if.is_jal = exec_unit_req_if.jalQual; assign gpr_jal_if.is_jal = exec_unit_req_if.jalQual;
assign gpr_jal_if.curr_PC = exec_unit_req_if.curr_PC; assign gpr_jal_if.curr_PC = exec_unit_req_if.curr_PC;
`endif `endif
VX_gpr_data_if gpr_datf_if(); VX_gpr_data_if gpr_datf_if();
VX_gpr_wrapper grp_wrapper ( VX_gpr_wrapper grp_wrapper (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.writeback_if (writeback_if), .writeback_if (writeback_if),
.gpr_read_if (gpr_read_if), .gpr_read_if (gpr_read_if),
.gpr_jal_if (gpr_jal_if), .gpr_jal_if (gpr_jal_if),
.a_reg_data (gpr_datf_if.a_reg_data), .a_reg_data (gpr_datf_if.a_reg_data),
.b_reg_data (gpr_datf_if.b_reg_data) .b_reg_data (gpr_datf_if.b_reg_data)
); );
// assign bckE_req_if.is_csr = is_csr; // assign bckE_req_if.is_csr = is_csr;
// assign bckE_req_out_if.csr_mask = (bckE_req_if.sr_immed == 1'b1) ? {27'h0, bckE_req_if.rs1} : gpr_data_if.a_reg_data[0]; // assign bckE_req_out_if.csr_mask = (bckE_req_if.sr_immed == 1'b1) ? {27'h0, bckE_req_if.rs1} : gpr_data_if.a_reg_data[0];
// Outputs // Outputs
VX_exec_unit_req_if exec_unit_req_temp_if(); VX_exec_unit_req_if exec_unit_req_temp_if();
VX_lsu_req_if lsu_req_temp_if(); VX_lsu_req_if lsu_req_temp_if();
VX_gpu_inst_req_if gpu_inst_req_temp_if(); VX_gpu_inst_req_if gpu_inst_req_temp_if();
VX_csr_req_if csr_req_temp_if(); VX_csr_req_if csr_req_temp_if();
VX_inst_multiplex inst_mult( VX_inst_multiplex inst_mult(
.bckE_req_if (bckE_req_if), .bckE_req_if (bckE_req_if),
.gpr_data_if (gpr_datf_if), .gpr_data_if (gpr_datf_if),
.exec_unit_req_if(exec_unit_req_temp_if), .exec_unit_req_if(exec_unit_req_temp_if),
.lsu_req_if (lsu_req_temp_if), .lsu_req_if (lsu_req_temp_if),
.gpu_inst_req_if (gpu_inst_req_temp_if), .gpu_inst_req_if (gpu_inst_req_temp_if),
.csr_req_if (csr_req_temp_if) .csr_req_if (csr_req_temp_if)
); );
`DEBUG_BEGIN `DEBUG_BEGIN
wire is_lsu = (|lsu_req_temp_if.valid); wire is_lsu = (|lsu_req_temp_if.valid);
`DEBUG_END `DEBUG_END
wire stall_rest = 0; wire stall_rest = 0;
wire flush_rest = schedule_delay; wire flush_rest = schedule_delay;
wire stall_lsu = memory_delay; wire stall_lsu = memory_delay;
wire flush_lsu = schedule_delay && !stall_lsu; wire flush_lsu = schedule_delay && !stall_lsu;
wire stall_exec = exec_delay; wire stall_exec = exec_delay;
wire flush_exec = schedule_delay && !stall_exec; wire flush_exec = schedule_delay && !stall_exec;
wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (|bckE_req_if.valid); wire stall_csr = stall_gpr_csr && bckE_req_if.is_csr && (|bckE_req_if.valid);
assign gpr_stage_delay = stall_lsu || stall_exec || stall_csr; assign gpr_stage_delay = stall_lsu || stall_exec || stall_csr;
`ifdef ASIC `ifdef ASIC
wire delayed_lsu_last_cycle; wire delayed_lsu_last_cycle;
VX_generic_register #( VX_generic_register #(
.N(1) .N(1)
) delayed_reg ( ) delayed_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_rest), .stall(stall_rest),
.flush(stall_rest), .flush(stall_rest),
.in (stall_lsu), .in (stall_lsu),
.out (delayed_lsu_last_cycle) .out (delayed_lsu_last_cycle)
); );
wire[`NUM_THREADS-1:0][31:0] temp_store_data; wire[`NUM_THREADS-1:0][31:0] temp_store_data;
wire[`NUM_THREADS-1:0][31:0] temp_base_address; // A reg data wire[`NUM_THREADS-1:0][31:0] temp_base_address; // A reg data
wire[`NUM_THREADS-1:0][31:0] real_store_data; wire[`NUM_THREADS-1:0][31:0] real_store_data;
wire[`NUM_THREADS-1:0][31:0] real_base_address; // A reg data wire[`NUM_THREADS-1:0][31:0] real_base_address; // A reg data
wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu; wire store_curr_real = !delayed_lsu_last_cycle && stall_lsu;
VX_generic_register #( VX_generic_register #(
.N(`NUM_THREADS*32*2) .N(`NUM_THREADS*32*2)
) lsu_data ( ) lsu_data (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(!store_curr_real), .stall(!store_curr_real),
.flush(stall_rest), .flush(stall_rest),
.in ({real_store_data, real_base_address}), .in ({real_store_data, real_base_address}),
.out ({temp_store_data, temp_base_address}) .out ({temp_store_data, temp_base_address})
); );
assign real_store_data = lsu_req_temp_if.store_data; assign real_store_data = lsu_req_temp_if.store_data;
assign real_base_address = lsu_req_temp_if.base_address; assign real_base_address = lsu_req_temp_if.base_address;
assign lsu_req_if.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data; assign lsu_req_if.store_data = (delayed_lsu_last_cycle) ? temp_store_data : real_store_data;
assign lsu_req_if.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address; assign lsu_req_if.base_address = (delayed_lsu_last_cycle) ? temp_base_address : real_base_address;
VX_generic_register #( VX_generic_register #(
.N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS)) .N(77 + `NW_BITS-1 + 1 + (`NUM_THREADS))
) lsu_reg ( ) lsu_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_lsu), .stall(stall_lsu),
.flush(flush_lsu), .flush(flush_lsu),
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}), .in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
.out ({lsu_req_if.valid , lsu_req_if.lsu_pc ,lsu_req_if.warp_num , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb }) .out ({lsu_req_if.valid , lsu_req_if.lsu_pc ,lsu_req_if.warp_num , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
); );
VX_generic_register #( VX_generic_register #(
.N(224 + `NW_BITS-1 + 1 + (`NUM_THREADS)) .N(224 + `NW_BITS-1 + 1 + (`NUM_THREADS))
) exec_unit_reg ( ) exec_unit_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_exec), .stall(stall_exec),
.flush(flush_exec), .flush(flush_exec),
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}), .in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask }) .out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
); );
assign exec_unit_req_if.a_reg_data = real_base_address; assign exec_unit_req_if.a_reg_data = real_base_address;
assign exec_unit_req_if.b_reg_data = real_store_data; assign exec_unit_req_if.b_reg_data = real_store_data;
VX_generic_register #( VX_generic_register #(
.N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS)) .N(36 + `NW_BITS-1 + 1 + (`NUM_THREADS))
) gpu_inst_reg ( ) gpu_inst_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_rest), .stall(stall_rest),
.flush(flush_rest), .flush(flush_rest),
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next}), .in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next}),
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next }) .out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next })
); );
assign gpu_inst_req_if.a_reg_data = real_base_address; assign gpu_inst_req_if.a_reg_data = real_base_address;
assign gpu_inst_req_if.rd2 = real_store_data; assign gpu_inst_req_if.rd2 = real_store_data;
VX_generic_register #( VX_generic_register #(
.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58) .N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)
) csr_reg ( ) csr_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_gpr_csr), .stall(stall_gpr_csr),
.flush(flush_rest), .flush(flush_rest),
.in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_address, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}), .in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_address, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}),
.out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_address , csr_req_if.csr_immed , csr_req_if.csr_mask }) .out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_address , csr_req_if.csr_immed , csr_req_if.csr_mask })
); );
`else `else
// 341 // 341
VX_generic_register #( VX_generic_register #(
.N(77 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS)) .N(77 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))
) lsu_reg ( ) lsu_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_lsu), .stall(stall_lsu),
.flush(flush_lsu), .flush(flush_lsu),
.in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.store_data, lsu_req_temp_if.base_address, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}), .in ({lsu_req_temp_if.valid, lsu_req_temp_if.lsu_pc, lsu_req_temp_if.warp_num, lsu_req_temp_if.store_data, lsu_req_temp_if.base_address, lsu_req_temp_if.offset, lsu_req_temp_if.mem_read, lsu_req_temp_if.mem_write, lsu_req_temp_if.rd, lsu_req_temp_if.wb}),
.out ({lsu_req_if.valid , lsu_req_if.lsu_pc , lsu_req_if.warp_num , lsu_req_if.store_data , lsu_req_if.base_address , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb }) .out ({lsu_req_if.valid , lsu_req_if.lsu_pc , lsu_req_if.warp_num , lsu_req_if.store_data , lsu_req_if.base_address , lsu_req_if.offset , lsu_req_if.mem_read , lsu_req_if.mem_write , lsu_req_if.rd , lsu_req_if.wb })
); );
VX_generic_register #( VX_generic_register #(
.N(224 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS)) .N(224 + `NW_BITS-1 + 1 + 65*(`NUM_THREADS))
) exec_unit_reg ( ) exec_unit_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_exec), .stall(stall_exec),
.flush(flush_exec), .flush(flush_exec),
.in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}), .in ({exec_unit_req_temp_if.valid, exec_unit_req_temp_if.warp_num, exec_unit_req_temp_if.curr_PC, exec_unit_req_temp_if.PC_next, exec_unit_req_temp_if.rd, exec_unit_req_temp_if.wb, exec_unit_req_temp_if.a_reg_data, exec_unit_req_temp_if.b_reg_data, exec_unit_req_temp_if.alu_op, exec_unit_req_temp_if.rs1, exec_unit_req_temp_if.rs2, exec_unit_req_temp_if.rs2_src, exec_unit_req_temp_if.itype_immed, exec_unit_req_temp_if.upper_immed, exec_unit_req_temp_if.branch_type, exec_unit_req_temp_if.jalQual, exec_unit_req_temp_if.jal, exec_unit_req_temp_if.jal_offset, exec_unit_req_temp_if.ebreak, exec_unit_req_temp_if.wspawn, exec_unit_req_temp_if.is_csr, exec_unit_req_temp_if.csr_address, exec_unit_req_temp_if.csr_immed, exec_unit_req_temp_if.csr_mask}),
.out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask }) .out ({exec_unit_req_if.valid , exec_unit_req_if.warp_num , exec_unit_req_if.curr_PC , exec_unit_req_if.PC_next , exec_unit_req_if.rd , exec_unit_req_if.wb , exec_unit_req_if.a_reg_data , exec_unit_req_if.b_reg_data , exec_unit_req_if.alu_op , exec_unit_req_if.rs1 , exec_unit_req_if.rs2 , exec_unit_req_if.rs2_src , exec_unit_req_if.itype_immed , exec_unit_req_if.upper_immed , exec_unit_req_if.branch_type , exec_unit_req_if.jalQual , exec_unit_req_if.jal , exec_unit_req_if.jal_offset , exec_unit_req_if.ebreak , exec_unit_req_if.wspawn , exec_unit_req_if.is_csr , exec_unit_req_if.csr_address , exec_unit_req_if.csr_immed , exec_unit_req_if.csr_mask })
); );
VX_generic_register #( VX_generic_register #(
.N(68 + `NW_BITS-1 + 1 + 33*(`NUM_THREADS)) .N(68 + `NW_BITS-1 + 1 + 33*(`NUM_THREADS))
) gpu_inst_reg ( ) gpu_inst_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_rest), .stall(stall_rest),
.flush(flush_rest), .flush(flush_rest),
.in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next, gpu_inst_req_temp_if.a_reg_data, gpu_inst_req_temp_if.rd2}), .in ({gpu_inst_req_temp_if.valid, gpu_inst_req_temp_if.warp_num, gpu_inst_req_temp_if.is_wspawn, gpu_inst_req_temp_if.is_tmc, gpu_inst_req_temp_if.is_split, gpu_inst_req_temp_if.is_barrier, gpu_inst_req_temp_if.pc_next, gpu_inst_req_temp_if.a_reg_data, gpu_inst_req_temp_if.rd2}),
.out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next , gpu_inst_req_if.a_reg_data , gpu_inst_req_if.rd2 }) .out ({gpu_inst_req_if.valid , gpu_inst_req_if.warp_num , gpu_inst_req_if.is_wspawn , gpu_inst_req_if.is_tmc , gpu_inst_req_if.is_split , gpu_inst_req_if.is_barrier , gpu_inst_req_if.pc_next , gpu_inst_req_if.a_reg_data , gpu_inst_req_if.rd2 })
); );
VX_generic_register #( VX_generic_register #(
.N(`NW_BITS-1 + 1 + `NUM_THREADS + 58) .N(`NW_BITS-1 + 1 + `NUM_THREADS + 58)
) csr_reg ( ) csr_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall_gpr_csr), .stall(stall_gpr_csr),
.flush(flush_rest), .flush(flush_rest),
.in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_address, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}), .in ({csr_req_temp_if.valid, csr_req_temp_if.warp_num, csr_req_temp_if.rd, csr_req_temp_if.wb, csr_req_temp_if.alu_op, csr_req_temp_if.is_csr, csr_req_temp_if.csr_address, csr_req_temp_if.csr_immed, csr_req_temp_if.csr_mask}),
.out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_address , csr_req_if.csr_immed , csr_req_if.csr_mask }) .out ({csr_req_if.valid , csr_req_if.warp_num , csr_req_if.rd , csr_req_if.wb , csr_req_if.alu_op , csr_req_if.is_csr , csr_req_if.csr_address , csr_req_if.csr_immed , csr_req_if.csr_mask })
); );
`endif `endif

View File

@@ -1,68 +1,68 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_gpr_wrapper ( module VX_gpr_wrapper (
input wire clk, input wire clk,
input wire reset, input wire reset,
VX_gpr_read_if gpr_read_if, VX_gpr_read_if gpr_read_if,
VX_wb_if writeback_if, VX_wb_if writeback_if,
VX_gpr_jal_if gpr_jal_if, VX_gpr_jal_if gpr_jal_if,
output wire[`NUM_THREADS-1:0][31:0] a_reg_data, output wire[`NUM_THREADS-1:0][31:0] a_reg_data,
output wire[`NUM_THREADS-1:0][31:0] b_reg_data output wire[`NUM_THREADS-1:0][31:0] b_reg_data
); );
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_a_reg_data; wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_a_reg_data;
wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data; wire[`NUM_WARPS-1:0][`NUM_THREADS-1:0][31:0] temp_b_reg_data;
wire[`NUM_THREADS-1:0][31:0] jal_data; wire[`NUM_THREADS-1:0][31:0] jal_data;
genvar index; genvar index;
generate generate
for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign for (index = 0; index < `NUM_THREADS; index = index + 1) begin : jal_data_assign
assign jal_data[index] = gpr_jal_if.curr_PC; assign jal_data[index] = gpr_jal_if.curr_PC;
end end
endgenerate endgenerate
`ifndef ASIC `ifndef ASIC
assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[gpr_read_if.warp_num])); assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[gpr_read_if.warp_num]));
assign b_reg_data = (temp_b_reg_data[gpr_read_if.warp_num]); assign b_reg_data = (temp_b_reg_data[gpr_read_if.warp_num]);
`else `else
wire zer = 0; wire zer = 0;
wire[`NW_BITS-1:0] old_warp_num; wire[`NW_BITS-1:0] old_warp_num;
VX_generic_register #( VX_generic_register #(
.N(`NW_BITS-1+1) .N(`NW_BITS-1+1)
) store_wn ( ) store_wn (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(zer), .stall(zer),
.flush(zer), .flush(zer),
.in (gpr_read_if.warp_num), .in (gpr_read_if.warp_num),
.out (old_warp_num) .out (old_warp_num)
); );
assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[old_warp_num])); assign a_reg_data = (gpr_jal_if.is_jal ? jal_data : (temp_a_reg_data[old_warp_num]));
assign b_reg_data = (temp_b_reg_data[old_warp_num]); assign b_reg_data = (temp_b_reg_data[old_warp_num]);
`endif `endif
genvar warp_index; genvar warp_index;
generate generate
for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs for (warp_index = 0; warp_index < `NUM_WARPS; warp_index = warp_index + 1) begin : warp_gprs
wire valid_write_request = warp_index == writeback_if.wb_warp_num; wire valid_write_request = warp_index == writeback_if.wb_warp_num;
VX_gpr gpr( VX_gpr gpr(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.valid_write_request (valid_write_request), .valid_write_request (valid_write_request),
.gpr_read_if (gpr_read_if), .gpr_read_if (gpr_read_if),
.writeback_if (writeback_if), .writeback_if (writeback_if),
.a_reg_data (temp_a_reg_data[warp_index]), .a_reg_data (temp_a_reg_data[warp_index]),
.b_reg_data (temp_b_reg_data[warp_index]) .b_reg_data (temp_b_reg_data[warp_index])
); );
end end
endgenerate endgenerate
endmodule endmodule

View File

@@ -1,60 +1,60 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_icache_stage ( module VX_icache_stage (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire total_freeze, input wire total_freeze,
output wire icache_stage_delay, output wire icache_stage_delay,
output wire[`NW_BITS-1:0] icache_stage_wid, output wire[`NW_BITS-1:0] icache_stage_wid,
output wire[`NUM_THREADS-1:0] icache_stage_valids, output wire[`NUM_THREADS-1:0] icache_stage_valids,
VX_inst_meta_if fe_inst_meta_fi, VX_inst_meta_if fe_inst_meta_fi,
VX_inst_meta_if fe_inst_meta_id, VX_inst_meta_if fe_inst_meta_id,
VX_gpu_dcache_rsp_if icache_rsp_if, VX_gpu_dcache_rsp_if icache_rsp_if,
VX_gpu_dcache_req_if icache_req_if VX_gpu_dcache_req_if icache_req_if
); );
reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0]; reg[`NUM_THREADS-1:0] threads_active[`NUM_WARPS-1:0];
wire valid_inst = (|fe_inst_meta_fi.valid); wire valid_inst = (|fe_inst_meta_fi.valid);
// Icache Request // Icache Request
assign icache_req_if.core_req_valid = valid_inst && !total_freeze; assign icache_req_if.core_req_valid = valid_inst && !total_freeze;
assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc; assign icache_req_if.core_req_addr = fe_inst_meta_fi.inst_pc;
assign icache_req_if.core_req_data = 32'b0; assign icache_req_if.core_req_data = 32'b0;
assign icache_req_if.core_req_read = `LW_MEM_READ; assign icache_req_if.core_req_read = `LW_MEM_READ;
assign icache_req_if.core_req_write = `NO_MEM_WRITE; assign icache_req_if.core_req_write = `NO_MEM_WRITE;
assign icache_req_if.core_req_rd = 5'b0; assign icache_req_if.core_req_rd = 5'b0;
assign icache_req_if.core_req_wb = {1{2'b1}}; assign icache_req_if.core_req_wb = {1{2'b1}};
assign icache_req_if.core_req_warp_num = fe_inst_meta_fi.warp_num; assign icache_req_if.core_req_warp_num = fe_inst_meta_fi.warp_num;
assign icache_req_if.core_req_pc = fe_inst_meta_fi.inst_pc; assign icache_req_if.core_req_pc = fe_inst_meta_fi.inst_pc;
assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_data[0][31:0]; assign fe_inst_meta_id.instruction = icache_rsp_if.core_rsp_data[0][31:0];
assign fe_inst_meta_id.inst_pc = icache_rsp_if.core_rsp_pc[0]; assign fe_inst_meta_id.inst_pc = icache_rsp_if.core_rsp_pc[0];
assign fe_inst_meta_id.warp_num = icache_rsp_if.core_rsp_warp_num; assign fe_inst_meta_id.warp_num = icache_rsp_if.core_rsp_warp_num;
assign fe_inst_meta_id.valid = icache_rsp_if.core_rsp_valid ? threads_active[icache_rsp_if.core_rsp_warp_num] : 0; assign fe_inst_meta_id.valid = icache_rsp_if.core_rsp_valid ? threads_active[icache_rsp_if.core_rsp_warp_num] : 0;
assign icache_stage_wid = fe_inst_meta_id.warp_num; assign icache_stage_wid = fe_inst_meta_id.warp_num;
assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}}; assign icache_stage_valids = fe_inst_meta_id.valid & {`NUM_THREADS{!icache_stage_delay}};
// Cache can't accept request // Cache can't accept request
assign icache_stage_delay = ~icache_req_if.core_req_ready; assign icache_stage_delay = ~icache_req_if.core_req_ready;
// Core can't accept response // Core can't accept response
assign icache_rsp_if.core_rsp_ready = ~total_freeze; assign icache_rsp_if.core_rsp_ready = ~total_freeze;
integer curr_w; integer curr_w;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin for (curr_w = 0; curr_w < `NUM_WARPS; curr_w=curr_w+1) begin
threads_active[curr_w] <= 0; threads_active[curr_w] <= 0;
end end
end else begin end else begin
if (valid_inst && !icache_stage_delay) begin if (valid_inst && !icache_stage_delay) begin
threads_active[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid; threads_active[fe_inst_meta_fi.warp_num] <= fe_inst_meta_fi.valid;
end end
end end
end end
endmodule endmodule

View File

@@ -1,94 +1,94 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_inst_multiplex ( module VX_inst_multiplex (
// Inputs // Inputs
VX_frE_to_bckE_req_if bckE_req_if, VX_frE_to_bckE_req_if bckE_req_if,
VX_gpr_data_if gpr_data_if, VX_gpr_data_if gpr_data_if,
// Outputs // Outputs
VX_exec_unit_req_if exec_unit_req_if, VX_exec_unit_req_if exec_unit_req_if,
VX_lsu_req_if lsu_req_if, VX_lsu_req_if lsu_req_if,
VX_gpu_inst_req_if gpu_inst_req_if, VX_gpu_inst_req_if gpu_inst_req_if,
VX_csr_req_if csr_req_if VX_csr_req_if csr_req_if
); );
wire[`NUM_THREADS-1:0] is_mem_mask; wire[`NUM_THREADS-1:0] is_mem_mask;
wire[`NUM_THREADS-1:0] is_gpu_mask; wire[`NUM_THREADS-1:0] is_gpu_mask;
wire[`NUM_THREADS-1:0] is_csr_mask; wire[`NUM_THREADS-1:0] is_csr_mask;
wire is_mem = (bckE_req_if.mem_write != `NO_MEM_WRITE) || (bckE_req_if.mem_read != `NO_MEM_READ); wire is_mem = (bckE_req_if.mem_write != `NO_MEM_WRITE) || (bckE_req_if.mem_read != `NO_MEM_READ);
wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split); wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split);
wire is_csr = bckE_req_if.is_csr; wire is_csr = bckE_req_if.is_csr;
// wire is_gpu = 0; // wire is_gpu = 0;
genvar currT; genvar currT;
generate generate
for (currT = 0; currT < `NUM_THREADS; currT = currT + 1) begin : mask_init for (currT = 0; currT < `NUM_THREADS; currT = currT + 1) begin : mask_init
assign is_mem_mask[currT] = is_mem; assign is_mem_mask[currT] = is_mem;
assign is_gpu_mask[currT] = is_gpu; assign is_gpu_mask[currT] = is_gpu;
assign is_csr_mask[currT] = is_csr; assign is_csr_mask[currT] = is_csr;
end end
endgenerate endgenerate
// LSU Unit // LSU Unit
assign lsu_req_if.valid = bckE_req_if.valid & is_mem_mask; assign lsu_req_if.valid = bckE_req_if.valid & is_mem_mask;
assign lsu_req_if.warp_num = bckE_req_if.warp_num; assign lsu_req_if.warp_num = bckE_req_if.warp_num;
assign lsu_req_if.base_address = gpr_data_if.a_reg_data; assign lsu_req_if.base_address = gpr_data_if.a_reg_data;
assign lsu_req_if.store_data = gpr_data_if.b_reg_data; assign lsu_req_if.store_data = gpr_data_if.b_reg_data;
assign lsu_req_if.offset = bckE_req_if.itype_immed; assign lsu_req_if.offset = bckE_req_if.itype_immed;
assign lsu_req_if.mem_read = bckE_req_if.mem_read; assign lsu_req_if.mem_read = bckE_req_if.mem_read;
assign lsu_req_if.mem_write = bckE_req_if.mem_write; assign lsu_req_if.mem_write = bckE_req_if.mem_write;
assign lsu_req_if.rd = bckE_req_if.rd; assign lsu_req_if.rd = bckE_req_if.rd;
assign lsu_req_if.wb = bckE_req_if.wb; assign lsu_req_if.wb = bckE_req_if.wb;
assign lsu_req_if.lsu_pc = bckE_req_if.curr_PC; assign lsu_req_if.lsu_pc = bckE_req_if.curr_PC;
// Execute Unit // Execute Unit
assign exec_unit_req_if.valid = bckE_req_if.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask); assign exec_unit_req_if.valid = bckE_req_if.valid & (~is_mem_mask & ~is_gpu_mask & ~is_csr_mask);
assign exec_unit_req_if.warp_num = bckE_req_if.warp_num; assign exec_unit_req_if.warp_num = bckE_req_if.warp_num;
assign exec_unit_req_if.curr_PC = bckE_req_if.curr_PC; assign exec_unit_req_if.curr_PC = bckE_req_if.curr_PC;
assign exec_unit_req_if.PC_next = bckE_req_if.PC_next; assign exec_unit_req_if.PC_next = bckE_req_if.PC_next;
assign exec_unit_req_if.rd = bckE_req_if.rd; assign exec_unit_req_if.rd = bckE_req_if.rd;
assign exec_unit_req_if.wb = bckE_req_if.wb; assign exec_unit_req_if.wb = bckE_req_if.wb;
assign exec_unit_req_if.a_reg_data = gpr_data_if.a_reg_data; assign exec_unit_req_if.a_reg_data = gpr_data_if.a_reg_data;
assign exec_unit_req_if.b_reg_data = gpr_data_if.b_reg_data; assign exec_unit_req_if.b_reg_data = gpr_data_if.b_reg_data;
assign exec_unit_req_if.alu_op = bckE_req_if.alu_op; assign exec_unit_req_if.alu_op = bckE_req_if.alu_op;
assign exec_unit_req_if.rs1 = bckE_req_if.rs1; assign exec_unit_req_if.rs1 = bckE_req_if.rs1;
assign exec_unit_req_if.rs2 = bckE_req_if.rs2; assign exec_unit_req_if.rs2 = bckE_req_if.rs2;
assign exec_unit_req_if.rs2_src = bckE_req_if.rs2_src; assign exec_unit_req_if.rs2_src = bckE_req_if.rs2_src;
assign exec_unit_req_if.itype_immed = bckE_req_if.itype_immed; assign exec_unit_req_if.itype_immed = bckE_req_if.itype_immed;
assign exec_unit_req_if.upper_immed = bckE_req_if.upper_immed; assign exec_unit_req_if.upper_immed = bckE_req_if.upper_immed;
assign exec_unit_req_if.branch_type = bckE_req_if.branch_type; assign exec_unit_req_if.branch_type = bckE_req_if.branch_type;
assign exec_unit_req_if.jalQual = bckE_req_if.jalQual; assign exec_unit_req_if.jalQual = bckE_req_if.jalQual;
assign exec_unit_req_if.jal = bckE_req_if.jal; assign exec_unit_req_if.jal = bckE_req_if.jal;
assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset; assign exec_unit_req_if.jal_offset = bckE_req_if.jal_offset;
assign exec_unit_req_if.ebreak = bckE_req_if.ebreak; assign exec_unit_req_if.ebreak = bckE_req_if.ebreak;
// GPR Req // GPR Req
assign gpu_inst_req_if.valid = bckE_req_if.valid & is_gpu_mask; assign gpu_inst_req_if.valid = bckE_req_if.valid & is_gpu_mask;
assign gpu_inst_req_if.warp_num = bckE_req_if.warp_num; assign gpu_inst_req_if.warp_num = bckE_req_if.warp_num;
assign gpu_inst_req_if.is_wspawn = bckE_req_if.is_wspawn; assign gpu_inst_req_if.is_wspawn = bckE_req_if.is_wspawn;
assign gpu_inst_req_if.is_tmc = bckE_req_if.is_tmc; assign gpu_inst_req_if.is_tmc = bckE_req_if.is_tmc;
assign gpu_inst_req_if.is_split = bckE_req_if.is_split; assign gpu_inst_req_if.is_split = bckE_req_if.is_split;
assign gpu_inst_req_if.is_barrier = bckE_req_if.is_barrier; assign gpu_inst_req_if.is_barrier = bckE_req_if.is_barrier;
assign gpu_inst_req_if.a_reg_data = gpr_data_if.a_reg_data; assign gpu_inst_req_if.a_reg_data = gpr_data_if.a_reg_data;
assign gpu_inst_req_if.rd2 = gpr_data_if.b_reg_data[0]; assign gpu_inst_req_if.rd2 = gpr_data_if.b_reg_data[0];
assign gpu_inst_req_if.pc_next = bckE_req_if.PC_next; assign gpu_inst_req_if.pc_next = bckE_req_if.PC_next;
// CSR Req // CSR Req
assign csr_req_if.valid = bckE_req_if.valid & is_csr_mask; assign csr_req_if.valid = bckE_req_if.valid & is_csr_mask;
assign csr_req_if.warp_num = bckE_req_if.warp_num; assign csr_req_if.warp_num = bckE_req_if.warp_num;
assign csr_req_if.rd = bckE_req_if.rd; assign csr_req_if.rd = bckE_req_if.rd;
assign csr_req_if.wb = bckE_req_if.wb; assign csr_req_if.wb = bckE_req_if.wb;
assign csr_req_if.alu_op = bckE_req_if.alu_op; assign csr_req_if.alu_op = bckE_req_if.alu_op;
assign csr_req_if.is_csr = bckE_req_if.is_csr; assign csr_req_if.is_csr = bckE_req_if.is_csr;
assign csr_req_if.csr_address = bckE_req_if.csr_address; assign csr_req_if.csr_address = bckE_req_if.csr_address;
assign csr_req_if.csr_immed = bckE_req_if.csr_immed; assign csr_req_if.csr_immed = bckE_req_if.csr_immed;
assign csr_req_if.csr_mask = bckE_req_if.csr_mask; assign csr_req_if.csr_mask = bckE_req_if.csr_mask;
endmodule endmodule

View File

@@ -1,87 +1,89 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_lsu ( module VX_lsu (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire no_slot_mem, input wire no_slot_mem,
VX_lsu_req_if lsu_req_if, VX_lsu_req_if lsu_req_if,
// Write back to GPR // Write back to GPR
VX_inst_mem_wb_if mem_wb_if, VX_inst_mem_wb_if mem_wb_if,
VX_gpu_dcache_rsp_if dcache_rsp_if, VX_gpu_dcache_rsp_if dcache_rsp_if,
VX_gpu_dcache_req_if dcache_req_if, VX_gpu_dcache_req_if dcache_req_if,
output wire delay output wire delay
); );
// Generate Addresses // Generate Addresses
wire[`NUM_THREADS-1:0][31:0] address; wire[`NUM_THREADS-1:0][31:0] address;
VX_lsu_addr_gen VX_lsu_addr_gen ( VX_lsu_addr_gen VX_lsu_addr_gen (
.base_address (lsu_req_if.base_address), .base_address (lsu_req_if.base_address),
.offset (lsu_req_if.offset), .offset (lsu_req_if.offset),
.address (address) .address (address)
); );
wire[`NUM_THREADS-1:0][31:0] use_address; wire[`NUM_THREADS-1:0][31:0] use_address;
wire[`NUM_THREADS-1:0][31:0] use_store_data; wire[`NUM_THREADS-1:0][31:0] use_store_data;
wire[`NUM_THREADS-1:0] use_valid; wire[`NUM_THREADS-1:0] use_valid;
wire[2:0] use_mem_read; wire[2:0] use_mem_read;
wire[2:0] use_mem_write; wire[2:0] use_mem_write;
wire[4:0] use_rd; wire[4:0] use_rd;
wire[`NW_BITS-1:0] use_warp_num; wire[`NW_BITS-1:0] use_warp_num;
wire[1:0] use_wb; wire[1:0] use_wb;
wire[31:0] use_pc; wire[31:0] use_pc;
wire zero = 0; wire zero = 0;
VX_generic_register #( VX_generic_register #(
.N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65) .N(45 + `NW_BITS-1 + 1 + `NUM_THREADS*65)
) lsu_buffer( ) lsu_buffer(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(delay), .stall(delay),
.flush(zero), .flush(zero),
.in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.lsu_pc}), .in ({address , lsu_req_if.store_data, lsu_req_if.valid, lsu_req_if.mem_read, lsu_req_if.mem_write, lsu_req_if.rd, lsu_req_if.warp_num, lsu_req_if.wb, lsu_req_if.lsu_pc}),
.out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc }) .out ({use_address, use_store_data , use_valid , use_mem_read , use_mem_write , use_rd , use_warp_num , use_wb , use_pc })
); );
// Core Request // Core Request
assign dcache_req_if.core_req_valid = use_valid; assign dcache_req_if.core_req_valid = use_valid;
assign dcache_req_if.core_req_addr = use_address; assign dcache_req_if.core_req_addr = use_address;
assign dcache_req_if.core_req_data = use_store_data; assign dcache_req_if.core_req_data = use_store_data;
assign dcache_req_if.core_req_read = {`NUM_THREADS{use_mem_read}}; assign dcache_req_if.core_req_read = {`NUM_THREADS{use_mem_read}};
assign dcache_req_if.core_req_write = {`NUM_THREADS{use_mem_write}}; assign dcache_req_if.core_req_write = {`NUM_THREADS{use_mem_write}};
assign dcache_req_if.core_req_rd = use_rd; assign dcache_req_if.core_req_rd = use_rd;
assign dcache_req_if.core_req_wb = {`NUM_THREADS{use_wb}}; assign dcache_req_if.core_req_wb = {`NUM_THREADS{use_wb}};
assign dcache_req_if.core_req_warp_num = use_warp_num; assign dcache_req_if.core_req_warp_num = use_warp_num;
assign dcache_req_if.core_req_pc = use_pc; assign dcache_req_if.core_req_pc = use_pc;
// Core can't accept response // Core can't accept response
assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem; assign dcache_rsp_if.core_rsp_ready = ~no_slot_mem;
// Cache can't accept request // Cache can't accept request
assign delay = ~dcache_req_if.core_req_ready; assign delay = ~dcache_req_if.core_req_ready;
// Core Response // Core Response
assign mem_wb_if.rd = dcache_rsp_if.core_rsp_read; assign mem_wb_if.rd = dcache_rsp_if.core_rsp_read;
assign mem_wb_if.wb = dcache_rsp_if.core_rsp_write; assign mem_wb_if.wb = dcache_rsp_if.core_rsp_write;
assign mem_wb_if.wb_valid = dcache_rsp_if.core_rsp_valid; assign mem_wb_if.wb_valid = dcache_rsp_if.core_rsp_valid;
assign mem_wb_if.wb_warp_num = dcache_rsp_if.core_rsp_warp_num; assign mem_wb_if.wb_warp_num = dcache_rsp_if.core_rsp_warp_num;
assign mem_wb_if.loaded_data = dcache_rsp_if.core_rsp_data; assign mem_wb_if.loaded_data = dcache_rsp_if.core_rsp_data;
wire[(`LOG2UP(`NUM_THREADS))-1:0] use_pc_index; wire[(`LOG2UP(`NUM_THREADS))-1:0] use_pc_index;
`DEBUG_BEGIN `DEBUG_BEGIN
wire found; wire found;
`DEBUG_END `DEBUG_END
VX_generic_priority_encoder #(.N(`NUM_THREADS)) pick_first_pc( VX_generic_priority_encoder #(
.valids(dcache_rsp_if.core_rsp_valid), .N(`NUM_THREADS)
.index (use_pc_index), ) pick_first_pc (
.found (found) .valids(dcache_rsp_if.core_rsp_valid),
); .index (use_pc_index),
.found (found)
);
assign mem_wb_if.mem_wb_pc = dcache_rsp_if.core_rsp_pc[use_pc_index]; assign mem_wb_if.mem_wb_pc = dcache_rsp_if.core_rsp_pc[use_pc_index];
endmodule // Memory endmodule // Memory

View File

@@ -1,16 +1,16 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_lsu_addr_gen ( module VX_lsu_addr_gen (
input wire[`NUM_THREADS-1:0][31:0] base_address, input wire[`NUM_THREADS-1:0][31:0] base_address,
input wire[31:0] offset, input wire[31:0] offset,
output wire[`NUM_THREADS-1:0][31:0] address output wire[`NUM_THREADS-1:0][31:0] address
); );
genvar i; genvar i;
generate generate
for (i = 0; i < `NUM_THREADS; i = i + 1) begin : addresses for (i = 0; i < `NUM_THREADS; i = i + 1) begin : addresses
assign address[i] = base_address[i] + offset; assign address[i] = base_address[i] + offset;
end end
endgenerate endgenerate
endmodule endmodule

View File

@@ -1,80 +1,80 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_scheduler ( module VX_scheduler (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire memory_delay, input wire memory_delay,
input wire exec_delay, input wire exec_delay,
input wire gpr_stage_delay, input wire gpr_stage_delay,
VX_frE_to_bckE_req_if bckE_req_if, VX_frE_to_bckE_req_if bckE_req_if,
VX_wb_if writeback_if, VX_wb_if writeback_if,
output wire schedule_delay, output wire schedule_delay,
output wire is_empty output wire is_empty
); );
reg[31:0] count_valid; reg[31:0] count_valid;
assign is_empty = count_valid == 0; assign is_empty = count_valid == 0;
reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0]; reg[31:0][`NUM_THREADS-1:0] rename_table[`NUM_WARPS-1:0];
wire valid_wb = (writeback_if.wb != 0) && (|writeback_if.wb_valid) && (writeback_if.rd != 0); wire valid_wb = (writeback_if.wb != 0) && (|writeback_if.wb_valid) && (writeback_if.rd != 0);
wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0); wire wb_inc = (bckE_req_if.wb != 0) && (bckE_req_if.rd != 0);
wire rs1_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0; wire rs1_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs1] != 0;
wire rs2_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0; wire rs2_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rs2] != 0;
wire rd_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0; wire rd_rename = rename_table[bckE_req_if.warp_num][bckE_req_if.rd ] != 0;
wire is_store = (bckE_req_if.mem_write != `NO_MEM_WRITE); wire is_store = (bckE_req_if.mem_write != `NO_MEM_WRITE);
wire is_load = (bckE_req_if.mem_read != `NO_MEM_READ); wire is_load = (bckE_req_if.mem_read != `NO_MEM_READ);
// classify our next instruction. // classify our next instruction.
wire is_mem = is_store || is_load; wire is_mem = is_store || is_load;
wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split); wire is_gpu = (bckE_req_if.is_wspawn || bckE_req_if.is_tmc || bckE_req_if.is_barrier || bckE_req_if.is_split);
wire is_csr = bckE_req_if.is_csr; wire is_csr = bckE_req_if.is_csr;
wire is_exec = !is_mem && !is_gpu && !is_csr; wire is_exec = !is_mem && !is_gpu && !is_csr;
wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn; wire using_rs2 = (bckE_req_if.rs2_src == `RS2_REG) || is_store || bckE_req_if.is_barrier || bckE_req_if.is_wspawn;
wire rs1_rename_qual = ((rs1_rename) && (bckE_req_if.rs1 != 0)); wire rs1_rename_qual = ((rs1_rename) && (bckE_req_if.rs1 != 0));
wire rs2_rename_qual = ((rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2)); wire rs2_rename_qual = ((rs2_rename) && (bckE_req_if.rs2 != 0 && using_rs2));
wire rd_rename_qual = ((rd_rename ) && (bckE_req_if.rd != 0)); wire rd_rename_qual = ((rd_rename ) && (bckE_req_if.rd != 0));
wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual; wire rename_valid = rs1_rename_qual || rs2_rename_qual || rd_rename_qual;
assign schedule_delay = ((rename_valid) && (|bckE_req_if.valid)) assign schedule_delay = ((rename_valid) && (|bckE_req_if.valid))
|| (memory_delay && is_mem) || (memory_delay && is_mem)
|| (gpr_stage_delay && (is_mem || is_exec)) || (gpr_stage_delay && (is_mem || is_exec))
|| (exec_delay && is_exec); || (exec_delay && is_exec);
integer i; integer i;
integer w; integer w;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
for (w = 0; w < `NUM_WARPS; w=w+1) begin for (w = 0; w < `NUM_WARPS; w=w+1) begin
for (i = 0; i < 32; i = i + 1) begin for (i = 0; i < 32; i = i + 1) begin
rename_table[w][i] <= 0; rename_table[w][i] <= 0;
end end
end end
end else begin end else begin
if (valid_wb) begin if (valid_wb) begin
rename_table[writeback_if.wb_warp_num][writeback_if.rd] <= rename_table[writeback_if.wb_warp_num][writeback_if.rd] & (~writeback_if.wb_valid); rename_table[writeback_if.wb_warp_num][writeback_if.rd] <= rename_table[writeback_if.wb_warp_num][writeback_if.rd] & (~writeback_if.wb_valid);
end end
if (!schedule_delay && wb_inc) begin if (!schedule_delay && wb_inc) begin
rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid; rename_table[bckE_req_if.warp_num][bckE_req_if.rd] <= bckE_req_if.valid;
end end
if (valid_wb if (valid_wb
&& (0 == (rename_table[writeback_if.wb_warp_num][writeback_if.rd] & ~writeback_if.wb_valid))) begin && (0 == (rename_table[writeback_if.wb_warp_num][writeback_if.rd] & ~writeback_if.wb_valid))) begin
count_valid <= count_valid - 1; count_valid <= count_valid - 1;
end end
if (!schedule_delay && wb_inc) begin if (!schedule_delay && wb_inc) begin
count_valid <= count_valid + 1; count_valid <= count_valid + 1;
end end
end end
end end
endmodule endmodule

View File

@@ -2,86 +2,86 @@
module VX_warp ( module VX_warp (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire stall, input wire stall,
input wire remove, input wire remove,
input wire[`NUM_THREADS-1:0] thread_mask, input wire[`NUM_THREADS-1:0] thread_mask,
input wire change_mask, input wire change_mask,
input wire jal, input wire jal,
input wire[31:0] jal_dest, input wire[31:0] jal_dest,
input wire branch_dir, input wire branch_dir,
input wire[31:0] branch_dest, input wire[31:0] branch_dest,
input wire wspawn, input wire wspawn,
input wire[31:0] wspawn_pc, input wire[31:0] wspawn_pc,
output wire[31:0] PC, output wire[31:0] PC,
output wire[`NUM_THREADS-1:0] valid output wire[`NUM_THREADS-1:0] valid
); );
reg[31:0] real_PC; reg[31:0] real_PC;
logic [31:0] temp_PC; logic [31:0] temp_PC;
logic [31:0] use_PC; logic [31:0] use_PC;
reg[`NUM_THREADS-1:0] valid; reg[`NUM_THREADS-1:0] valid;
reg[`NUM_THREADS-1:0] valid_zero; reg[`NUM_THREADS-1:0] valid_zero;
integer ini_cur_th = 0; integer ini_cur_th = 0;
initial begin initial begin
real_PC = 0; real_PC = 0;
for (ini_cur_th = 1; ini_cur_th < `NUM_THREADS; ini_cur_th=ini_cur_th+1) begin for (ini_cur_th = 1; ini_cur_th < `NUM_THREADS; ini_cur_th=ini_cur_th+1) begin
valid[ini_cur_th] = 0; // Thread 1 active valid[ini_cur_th] = 0; // Thread 1 active
valid_zero[ini_cur_th] = 0; valid_zero[ini_cur_th] = 0;
end end
valid[0] = 1; valid[0] = 1;
valid_zero[0] = 0; valid_zero[0] = 0;
end end
always @(posedge clk) begin always @(posedge clk) begin
if (remove) begin if (remove) begin
valid <= valid_zero; valid <= valid_zero;
end else if (change_mask) begin end else if (change_mask) begin
valid <= thread_mask; valid <= thread_mask;
end end
end end
genvar out_cur_th; genvar out_cur_th;
generate generate
for (out_cur_th = 0; out_cur_th < `NUM_THREADS; out_cur_th = out_cur_th+1) begin : valid_assign for (out_cur_th = 0; out_cur_th < `NUM_THREADS; out_cur_th = out_cur_th+1) begin : valid_assign
assign valid[out_cur_th] = change_mask ? thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th]; assign valid[out_cur_th] = change_mask ? thread_mask[out_cur_th] : stall ? 1'b0 : valid[out_cur_th];
end end
endgenerate endgenerate
always @(*) begin always @(*) begin
if (jal == 1'b1) begin if (jal == 1'b1) begin
temp_PC = jal_dest; temp_PC = jal_dest;
// $display("LINKING TO %h", temp_PC); // $display("LINKING TO %h", temp_PC);
end else if (branch_dir == 1'b1) begin end else if (branch_dir == 1'b1) begin
temp_PC = branch_dest; temp_PC = branch_dest;
end else begin end else begin
temp_PC = real_PC; temp_PC = real_PC;
end end
end end
assign use_PC = temp_PC; assign use_PC = temp_PC;
assign PC = temp_PC; assign PC = temp_PC;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
real_PC <= 0; real_PC <= 0;
end else if (wspawn == 1'b1) begin end else if (wspawn == 1'b1) begin
// $display("Inside warp ***** Spawn @ %H",wspawn_pc); // $display("Inside warp ***** Spawn @ %H",wspawn_pc);
real_PC <= wspawn_pc; real_PC <= wspawn_pc;
end else if (!stall) begin end else if (!stall) begin
real_PC <= use_PC + 32'h4; real_PC <= use_PC + 32'h4;
end else begin end else begin
real_PC <= use_PC; real_PC <= use_PC;
end end
end end
endmodule endmodule

View File

@@ -1,341 +1,341 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_warp_sched ( module VX_warp_sched (
input wire clk, // Clock input wire clk, // Clock
input wire reset, input wire reset,
input wire stall, input wire stall,
// Wspawn // Wspawn
input wire wspawn, input wire wspawn,
input wire[31:0] wsapwn_pc, input wire[31:0] wsapwn_pc,
input wire[`NUM_WARPS-1:0] wspawn_new_active, input wire[`NUM_WARPS-1:0] wspawn_new_active,
// CTM // CTM
input wire ctm, input wire ctm,
input wire[`NUM_THREADS-1:0] ctm_mask, input wire[`NUM_THREADS-1:0] ctm_mask,
input wire[`NW_BITS-1:0] ctm_warp_num, input wire[`NW_BITS-1:0] ctm_warp_num,
// WHALT // WHALT
input wire whalt, input wire whalt,
input wire[`NW_BITS-1:0] whalt_warp_num, input wire[`NW_BITS-1:0] whalt_warp_num,
input wire is_barrier, input wire is_barrier,
`DEBUG_BEGIN `DEBUG_BEGIN
input wire[31:0] barrier_id, input wire[31:0] barrier_id,
`DEBUG_END `DEBUG_END
input wire[$clog2(`NUM_WARPS):0] num_warps, input wire[$clog2(`NUM_WARPS):0] num_warps,
input wire[`NW_BITS-1:0] barrier_warp_num, input wire[`NW_BITS-1:0] barrier_warp_num,
// WSTALL // WSTALL
input wire wstall, input wire wstall,
input wire[`NW_BITS-1:0] wstall_warp_num, input wire[`NW_BITS-1:0] wstall_warp_num,
// Split // Split
input wire is_split, input wire is_split,
input wire dont_split, input wire dont_split,
input wire[`NUM_THREADS-1:0] split_new_mask, input wire[`NUM_THREADS-1:0] split_new_mask,
input wire[`NUM_THREADS-1:0] split_later_mask, input wire[`NUM_THREADS-1:0] split_later_mask,
input wire[31:0] split_save_pc, input wire[31:0] split_save_pc,
input wire[`NW_BITS-1:0] split_warp_num, input wire[`NW_BITS-1:0] split_warp_num,
// Join // Join
input wire is_join, input wire is_join,
input wire[`NW_BITS-1:0] join_warp_num, input wire[`NW_BITS-1:0] join_warp_num,
// JAL // JAL
input wire jal, input wire jal,
input wire[31:0] jal_dest, input wire[31:0] jal_dest,
input wire[`NW_BITS-1:0] jal_warp_num, input wire[`NW_BITS-1:0] jal_warp_num,
// Branch // Branch
input wire branch_valid, input wire branch_valid,
input wire branch_dir, input wire branch_dir,
input wire[31:0] branch_dest, input wire[31:0] branch_dest,
input wire[`NW_BITS-1:0] branch_warp_num, input wire[`NW_BITS-1:0] branch_warp_num,
output wire[`NUM_THREADS-1:0] thread_mask, output wire[`NUM_THREADS-1:0] thread_mask,
output wire[`NW_BITS-1:0] warp_num, output wire[`NW_BITS-1:0] warp_num,
output wire[31:0] warp_pc, output wire[31:0] warp_pc,
output wire ebreak, output wire ebreak,
output wire scheduled_warp, output wire scheduled_warp,
input wire[`NW_BITS-1:0] icache_stage_wid, input wire[`NW_BITS-1:0] icache_stage_wid,
input wire[`NUM_THREADS-1:0] icache_stage_valids input wire[`NUM_THREADS-1:0] icache_stage_valids
); );
wire update_use_wspawn; wire update_use_wspawn;
wire update_visible_active; wire update_visible_active;
wire[(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0]; wire[(1+32+`NUM_THREADS-1):0] d[`NUM_WARPS-1:0];
wire join_fall; wire join_fall;
wire[31:0] join_pc; wire[31:0] join_pc;
wire[`NUM_THREADS-1:0] join_tm; wire[`NUM_THREADS-1:0] join_tm;
`DEBUG_BEGIN `DEBUG_BEGIN
wire in_wspawn = wspawn; wire in_wspawn = wspawn;
wire in_ctm = ctm; wire in_ctm = ctm;
wire in_whalt = whalt; wire in_whalt = whalt;
wire in_wstall = wstall; wire in_wstall = wstall;
`DEBUG_END `DEBUG_END
reg[`NUM_WARPS-1:0] warp_active; reg[`NUM_WARPS-1:0] warp_active;
reg[`NUM_WARPS-1:0] warp_stalled; reg[`NUM_WARPS-1:0] warp_stalled;
reg [`NUM_WARPS-1:0] visible_active; reg [`NUM_WARPS-1:0] visible_active;
wire[`NUM_WARPS-1:0] use_active; wire[`NUM_WARPS-1:0] use_active;
reg [`NUM_WARPS-1:0] warp_lock; reg [`NUM_WARPS-1:0] warp_lock;
wire wstall_this_cycle; wire wstall_this_cycle;
reg[`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0]; reg[`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0];
reg[31:0] warp_pcs[`NUM_WARPS-1:0]; reg[31:0] warp_pcs[`NUM_WARPS-1:0];
// barriers // barriers
reg[`NUM_WARPS-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0]; reg[`NUM_WARPS-1:0] barrier_stall_mask[(`NUM_BARRIERS-1):0];
wire reached_barrier_limit; wire reached_barrier_limit;
wire[`NUM_WARPS-1:0] curr_barrier_mask; wire[`NUM_WARPS-1:0] curr_barrier_mask;
wire[$clog2(`NUM_WARPS):0] curr_barrier_count; wire[$clog2(`NUM_WARPS):0] curr_barrier_count;
// wsapwn // wsapwn
reg[31:0] use_wsapwn_pc; reg[31:0] use_wsapwn_pc;
reg[`NUM_WARPS-1:0] use_wsapwn; reg[`NUM_WARPS-1:0] use_wsapwn;
wire[`NW_BITS-1:0] warp_to_schedule; wire[`NW_BITS-1:0] warp_to_schedule;
wire schedule; wire schedule;
wire hazard; wire hazard;
wire global_stall; wire global_stall;
wire real_schedule; wire real_schedule;
wire[31:0] new_pc; wire[31:0] new_pc;
reg[`NUM_WARPS-1:0] total_barrier_stall; reg[`NUM_WARPS-1:0] total_barrier_stall;
reg didnt_split; reg didnt_split;
integer curr_w_help; integer curr_w_help;
integer curr_barrier; integer curr_barrier;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
for (curr_barrier = 0; curr_barrier < `NUM_BARRIERS; curr_barrier=curr_barrier+1) begin for (curr_barrier = 0; curr_barrier < `NUM_BARRIERS; curr_barrier=curr_barrier+1) begin
barrier_stall_mask[curr_barrier] <= 0; barrier_stall_mask[curr_barrier] <= 0;
end end
use_wsapwn_pc <= 0; use_wsapwn_pc <= 0;
use_wsapwn <= 0; use_wsapwn <= 0;
warp_pcs[0] <= (32'h80000000 - 4); warp_pcs[0] <= (32'h80000000 - 4);
warp_active[0] <= 1; // Activating first warp warp_active[0] <= 1; // Activating first warp
visible_active[0] <= 1; // Activating first warp visible_active[0] <= 1; // Activating first warp
thread_masks[0] <= 1; // Activating first thread in first warp thread_masks[0] <= 1; // Activating first thread in first warp
warp_stalled <= 0; warp_stalled <= 0;
didnt_split <= 0; didnt_split <= 0;
warp_lock <= 0; warp_lock <= 0;
// total_barrier_stall = 0; // total_barrier_stall = 0;
for (curr_w_help = 1; curr_w_help < `NUM_WARPS; curr_w_help=curr_w_help+1) begin for (curr_w_help = 1; curr_w_help < `NUM_WARPS; curr_w_help=curr_w_help+1) begin
warp_pcs[curr_w_help] <= 0; warp_pcs[curr_w_help] <= 0;
warp_active[curr_w_help] <= 0; // Activating first warp warp_active[curr_w_help] <= 0; // Activating first warp
visible_active[curr_w_help] <= 0; // Activating first warp visible_active[curr_w_help] <= 0; // Activating first warp
thread_masks[curr_w_help] <= 1; // Activating first thread in first warp thread_masks[curr_w_help] <= 1; // Activating first thread in first warp
end end
end else begin end else begin
// Wsapwning warps // Wsapwning warps
if (wspawn) begin if (wspawn) begin
warp_active <= wspawn_new_active; warp_active <= wspawn_new_active;
use_wsapwn_pc <= wsapwn_pc; use_wsapwn_pc <= wsapwn_pc;
use_wsapwn <= wspawn_new_active & (~`NUM_WARPS'b1); use_wsapwn <= wspawn_new_active & (~`NUM_WARPS'b1);
end end
if (is_barrier) begin if (is_barrier) begin
warp_stalled[barrier_warp_num] <= 0; warp_stalled[barrier_warp_num] <= 0;
if (reached_barrier_limit) begin if (reached_barrier_limit) begin
barrier_stall_mask[barrier_id] <= 0; barrier_stall_mask[barrier_id] <= 0;
end else begin end else begin
barrier_stall_mask[barrier_id][barrier_warp_num] <= 1; barrier_stall_mask[barrier_id][barrier_warp_num] <= 1;
end end
end else if (ctm) begin end else if (ctm) begin
thread_masks[ctm_warp_num] <= ctm_mask; thread_masks[ctm_warp_num] <= ctm_mask;
warp_stalled[ctm_warp_num] <= 0; warp_stalled[ctm_warp_num] <= 0;
end else if (is_join && !didnt_split) begin end else if (is_join && !didnt_split) begin
if (!join_fall) begin if (!join_fall) begin
warp_pcs[join_warp_num] <= join_pc; warp_pcs[join_warp_num] <= join_pc;
end end
thread_masks[join_warp_num] <= join_tm; thread_masks[join_warp_num] <= join_tm;
didnt_split <= 0; didnt_split <= 0;
end else if (is_split) begin end else if (is_split) begin
warp_stalled[split_warp_num] <= 0; warp_stalled[split_warp_num] <= 0;
if (!dont_split) begin if (!dont_split) begin
thread_masks[split_warp_num] <= split_new_mask; thread_masks[split_warp_num] <= split_new_mask;
didnt_split <= 0; didnt_split <= 0;
end else begin end else begin
didnt_split <= 1; didnt_split <= 1;
end end
end end
if (whalt) begin if (whalt) begin
warp_active[whalt_warp_num] <= 0; warp_active[whalt_warp_num] <= 0;
visible_active[whalt_warp_num] <= 0; visible_active[whalt_warp_num] <= 0;
end end
if (update_use_wspawn) begin if (update_use_wspawn) begin
use_wsapwn[warp_to_schedule] <= 0; use_wsapwn[warp_to_schedule] <= 0;
thread_masks[warp_to_schedule] <= 1; thread_masks[warp_to_schedule] <= 1;
end end
// Stalling the scheduling of warps // Stalling the scheduling of warps
if (wstall) begin if (wstall) begin
warp_stalled[wstall_warp_num] <= 1; warp_stalled[wstall_warp_num] <= 1;
visible_active[wstall_warp_num] <= 0; visible_active[wstall_warp_num] <= 0;
end end
// Refilling active warps // Refilling active warps
if (update_visible_active) begin if (update_visible_active) begin
visible_active <= warp_active & (~warp_stalled) & (~total_barrier_stall) & ~warp_lock; visible_active <= warp_active & (~warp_stalled) & (~total_barrier_stall) & ~warp_lock;
end end
// Don't change state if stall // Don't change state if stall
if (!global_stall && real_schedule && (thread_mask != 0)) begin if (!global_stall && real_schedule && (thread_mask != 0)) begin
visible_active[warp_to_schedule] <= 0; visible_active[warp_to_schedule] <= 0;
warp_pcs[warp_to_schedule] <= new_pc; warp_pcs[warp_to_schedule] <= new_pc;
end end
// Jal // Jal
if (jal) begin if (jal) begin
warp_pcs[jal_warp_num] <= jal_dest; warp_pcs[jal_warp_num] <= jal_dest;
warp_stalled[jal_warp_num] <= 0; warp_stalled[jal_warp_num] <= 0;
end end
// Branch // Branch
if (branch_valid) begin if (branch_valid) begin
if (branch_dir) warp_pcs[branch_warp_num] <= branch_dest; if (branch_dir) warp_pcs[branch_warp_num] <= branch_dest;
warp_stalled[branch_warp_num] <= 0; warp_stalled[branch_warp_num] <= 0;
end end
// Lock/Release // Lock/Release
if (scheduled_warp && !stall) begin if (scheduled_warp && !stall) begin
warp_lock[warp_num] <= 1'b1; warp_lock[warp_num] <= 1'b1;
// warp_lock <= {`NUM_WARPS{1'b1}}; // warp_lock <= {`NUM_WARPS{1'b1}};
end end
if (|icache_stage_valids && !stall) begin if (|icache_stage_valids && !stall) begin
warp_lock[icache_stage_wid] <= 1'b0; warp_lock[icache_stage_wid] <= 1'b0;
// warp_lock <= {`NUM_WARPS{1'b0}}; // warp_lock <= {`NUM_WARPS{1'b0}};
end end
end end
end end
VX_countones #( VX_countones #(
.N(`NUM_WARPS) .N(`NUM_WARPS)
) barrier_count ( ) barrier_count (
.valids(curr_barrier_mask), .valids(curr_barrier_mask),
.count (curr_barrier_count) .count (curr_barrier_count)
); );
wire [$clog2(`NUM_WARPS):0] count_visible_active; wire [$clog2(`NUM_WARPS):0] count_visible_active;
VX_countones #( VX_countones #(
.N(`NUM_WARPS) .N(`NUM_WARPS)
) num_visible ( ) num_visible (
.valids(visible_active), .valids(visible_active),
.count (count_visible_active) .count (count_visible_active)
); );
// assign curr_barrier_count = $countones(curr_barrier_mask); // assign curr_barrier_count = $countones(curr_barrier_mask);
assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NUM_WARPS-1:0]; assign curr_barrier_mask = barrier_stall_mask[barrier_id][`NUM_WARPS-1:0];
assign reached_barrier_limit = curr_barrier_count == (num_warps); assign reached_barrier_limit = curr_barrier_count == (num_warps);
assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug assign wstall_this_cycle = wstall && (wstall_warp_num == warp_to_schedule); // Maybe bug
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3]; assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
// integer curr_b; // integer curr_b;
// always @(*) begin // always @(*) begin
// total_barrier_stall = 0; // total_barrier_stall = 0;
// for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1) // for (curr_b = 0; curr_b < `NUM_BARRIERS; curr_b=curr_b+1)
// begin // begin
// total_barrier_stall[`NUM_WARPS-1:0] = total_barrier_stall[`NUM_WARPS-1:0] | barrier_stall_mask[curr_b]; // total_barrier_stall[`NUM_WARPS-1:0] = total_barrier_stall[`NUM_WARPS-1:0] | barrier_stall_mask[curr_b];
// end // end
// end // end
assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join); assign update_visible_active = (count_visible_active < 1) && !(stall || wstall_this_cycle || hazard || is_join);
wire[(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]}; wire[(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0 , thread_masks[split_warp_num]};
wire[(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc , split_later_mask}; wire[(1+32+`NUM_THREADS-1):0] q2 = {1'b0, split_save_pc , split_later_mask};
assign {join_fall, join_pc, join_tm} = d[join_warp_num]; assign {join_fall, join_pc, join_tm} = d[join_warp_num];
genvar curr_warp; genvar curr_warp;
generate generate
for (curr_warp = 0; curr_warp < `NUM_WARPS; curr_warp = curr_warp + 1) begin : stacks for (curr_warp = 0; curr_warp < `NUM_WARPS; curr_warp = curr_warp + 1) begin : stacks
wire correct_warp_s = (curr_warp == split_warp_num); wire correct_warp_s = (curr_warp == split_warp_num);
wire correct_warp_j = (curr_warp == join_warp_num); wire correct_warp_j = (curr_warp == join_warp_num);
wire push = (is_split && !dont_split) && correct_warp_s; wire push = (is_split && !dont_split) && correct_warp_s;
wire pop = is_join && correct_warp_j; wire pop = is_join && correct_warp_j;
VX_generic_stack #( VX_generic_stack #(
.WIDTH(1+32+`NUM_THREADS), .WIDTH(1+32+`NUM_THREADS),
.DEPTH($clog2(`NUM_THREADS)+1) .DEPTH($clog2(`NUM_THREADS)+1)
) ipdom_stack( ) ipdom_stack(
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.push (push), .push (push),
.pop (pop), .pop (pop),
.d (d[curr_warp]), .d (d[curr_warp]),
.q1 (q1), .q1 (q1),
.q2 (q2) .q2 (q2)
); );
end end
endgenerate endgenerate
// wire should_stall = stall || (jal && (warp_to_schedule == jal_warp_num)) || (branch_dir && (warp_to_schedule == branch_warp_num)); // wire should_stall = stall || (jal && (warp_to_schedule == jal_warp_num)) || (branch_dir && (warp_to_schedule == branch_warp_num));
wire should_jal = (jal && (warp_to_schedule == jal_warp_num)); wire should_jal = (jal && (warp_to_schedule == jal_warp_num));
wire should_bra = (branch_dir && (warp_to_schedule == branch_warp_num)); wire should_bra = (branch_dir && (warp_to_schedule == branch_warp_num));
assign hazard = (should_jal || should_bra) && schedule; assign hazard = (should_jal || should_bra) && schedule;
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule] && !warp_lock[0]; assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule] && !warp_lock[0];
assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || is_join); assign global_stall = (stall || wstall_this_cycle || hazard || !real_schedule || is_join);
assign scheduled_warp = !(wstall_this_cycle || hazard || !real_schedule || is_join) && !reset; assign scheduled_warp = !(wstall_this_cycle || hazard || !real_schedule || is_join) && !reset;
wire real_use_wspawn = use_wsapwn[warp_to_schedule]; wire real_use_wspawn = use_wsapwn[warp_to_schedule];
assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule]; assign warp_pc = real_use_wspawn ? use_wsapwn_pc : warp_pcs[warp_to_schedule];
assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NUM_THREADS'b1 : thread_masks[warp_to_schedule]); assign thread_mask = (global_stall) ? 0 : (real_use_wspawn ? `NUM_THREADS'b1 : thread_masks[warp_to_schedule]);
assign warp_num = warp_to_schedule; assign warp_num = warp_to_schedule;
assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall; assign update_use_wspawn = use_wsapwn[warp_to_schedule] && !global_stall;
assign new_pc = warp_pc + 4; assign new_pc = warp_pc + 4;
assign use_active = (count_visible_active < 1) ? (warp_active & (~warp_stalled) & (~total_barrier_stall) & (~warp_lock)) : visible_active; assign use_active = (count_visible_active < 1) ? (warp_active & (~warp_stalled) & (~total_barrier_stall) & (~warp_lock)) : visible_active;
// Choosing a warp to schedule // Choosing a warp to schedule
VX_priority_encoder #( VX_priority_encoder #(
.N(`NUM_WARPS) .N(`NUM_WARPS)
) choose_schedule ( ) choose_schedule (
.valids(use_active), .valids(use_active),
.index (warp_to_schedule), .index (warp_to_schedule),
.found (schedule) .found (schedule)
); );
// always @(*) begin // always @(*) begin
// $display("WarpPC: %h",warp_pc); // $display("WarpPC: %h",warp_pc);
// $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]); // $display("real_schedule: %d, schedule: %d, warp_stalled: %d, warp_to_schedule: %d, total_barrier_stall: %d",real_schedule, schedule, warp_stalled[warp_to_schedule], warp_to_schedule, total_barrier_stall[warp_to_schedule]);
// end // end
// Valid counter // Valid counter
// assign num_active = $countones(visible_active); // assign num_active = $countones(visible_active);
// VX_one_counter valid_counter( // VX_one_counter valid_counter(
// .valids(visible_active), // .valids(visible_active),
// .ones_found() // .ones_found()
// ); // );
assign ebreak = (warp_active == 0); assign ebreak = (warp_active == 0);
endmodule endmodule

View File

@@ -1,86 +1,86 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_writeback ( module VX_writeback (
input wire clk, input wire clk,
input wire reset, input wire reset,
// Mem WB info // Mem WB info
VX_inst_mem_wb_if mem_wb_if, VX_inst_mem_wb_if mem_wb_if,
// EXEC Unit WB info // EXEC Unit WB info
VX_inst_exec_wb_if inst_exec_wb_if, VX_inst_exec_wb_if inst_exec_wb_if,
// CSR Unit WB info // CSR Unit WB info
VX_csr_wb_if csr_wb_if, VX_csr_wb_if csr_wb_if,
// Actual WB to GPR // Actual WB to GPR
VX_wb_if writeback_if, VX_wb_if writeback_if,
output wire no_slot_mem, output wire no_slot_mem,
output wire no_slot_exec, output wire no_slot_exec,
output wire no_slot_csr output wire no_slot_csr
); );
VX_wb_if writeback_tempp_if(); VX_wb_if writeback_tempp_if();
wire exec_wb = (inst_exec_wb_if.wb != 0) && (|inst_exec_wb_if.wb_valid); wire exec_wb = (inst_exec_wb_if.wb != 0) && (|inst_exec_wb_if.wb_valid);
wire mem_wb = (mem_wb_if.wb != 0) && (|mem_wb_if.wb_valid); wire mem_wb = (mem_wb_if.wb != 0) && (|mem_wb_if.wb_valid);
wire csr_wb = (csr_wb_if.wb != 0) && (|csr_wb_if.valid); wire csr_wb = (csr_wb_if.wb != 0) && (|csr_wb_if.valid);
assign no_slot_mem = mem_wb && (exec_wb || csr_wb); assign no_slot_mem = mem_wb && (exec_wb || csr_wb);
assign no_slot_csr = csr_wb && (exec_wb); assign no_slot_csr = csr_wb && (exec_wb);
assign no_slot_exec = 0; assign no_slot_exec = 0;
assign writeback_tempp_if.write_data = exec_wb ? inst_exec_wb_if.alu_result : assign writeback_tempp_if.write_data = exec_wb ? inst_exec_wb_if.alu_result :
csr_wb ? csr_wb_if.csr_result : csr_wb ? csr_wb_if.csr_result :
mem_wb ? mem_wb_if.loaded_data : mem_wb ? mem_wb_if.loaded_data :
0; 0;
assign writeback_tempp_if.wb_valid = exec_wb ? inst_exec_wb_if.wb_valid : assign writeback_tempp_if.wb_valid = exec_wb ? inst_exec_wb_if.wb_valid :
csr_wb ? csr_wb_if.valid : csr_wb ? csr_wb_if.valid :
mem_wb ? mem_wb_if.wb_valid : mem_wb ? mem_wb_if.wb_valid :
0; 0;
assign writeback_tempp_if.rd = exec_wb ? inst_exec_wb_if.rd : assign writeback_tempp_if.rd = exec_wb ? inst_exec_wb_if.rd :
csr_wb ? csr_wb_if.rd : csr_wb ? csr_wb_if.rd :
mem_wb ? mem_wb_if.rd : mem_wb ? mem_wb_if.rd :
0; 0;
assign writeback_tempp_if.wb = exec_wb ? inst_exec_wb_if.wb : assign writeback_tempp_if.wb = exec_wb ? inst_exec_wb_if.wb :
csr_wb ? csr_wb_if.wb : csr_wb ? csr_wb_if.wb :
mem_wb ? mem_wb_if.wb : mem_wb ? mem_wb_if.wb :
0; 0;
assign writeback_tempp_if.wb_warp_num = exec_wb ? inst_exec_wb_if.wb_warp_num : assign writeback_tempp_if.wb_warp_num = exec_wb ? inst_exec_wb_if.wb_warp_num :
csr_wb ? csr_wb_if.warp_num : csr_wb ? csr_wb_if.warp_num :
mem_wb ? mem_wb_if.wb_warp_num : mem_wb ? mem_wb_if.wb_warp_num :
0; 0;
assign writeback_tempp_if.wb_pc = exec_wb ? inst_exec_wb_if.exec_wb_pc : assign writeback_tempp_if.wb_pc = exec_wb ? inst_exec_wb_if.exec_wb_pc :
csr_wb ? 32'hdeadbeef : csr_wb ? 32'hdeadbeef :
mem_wb ? mem_wb_if.mem_wb_pc : mem_wb ? mem_wb_if.mem_wb_pc :
32'hdeadbeef; 32'hdeadbeef;
wire zero = 0; wire zero = 0;
wire [`NUM_THREADS-1:0][31:0] use_wb_data; wire [`NUM_THREADS-1:0][31:0] use_wb_data;
VX_generic_register #( VX_generic_register #(
.N(39 + `NW_BITS-1 + 1 + `NUM_THREADS*33) .N(39 + `NW_BITS-1 + 1 + `NUM_THREADS*33)
) wb_register ( ) wb_register (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(zero), .stall(zero),
.flush(zero), .flush(zero),
.in ({writeback_tempp_if.write_data, writeback_tempp_if.wb_valid, writeback_tempp_if.rd, writeback_tempp_if.wb, writeback_tempp_if.wb_warp_num, writeback_tempp_if.wb_pc}), .in ({writeback_tempp_if.write_data, writeback_tempp_if.wb_valid, writeback_tempp_if.rd, writeback_tempp_if.wb, writeback_tempp_if.wb_warp_num, writeback_tempp_if.wb_pc}),
.out ({use_wb_data , writeback_if.wb_valid, writeback_if.rd, writeback_if.wb, writeback_if.wb_warp_num, writeback_if.wb_pc}) .out ({use_wb_data , writeback_if.wb_valid, writeback_if.rd, writeback_if.wb, writeback_if.wb_warp_num, writeback_if.wb_pc})
); );
reg [31:0] last_data_wb /* verilator public */; reg [31:0] last_data_wb /* verilator public */;
always @(posedge clk) begin always @(posedge clk) begin
if ((|writeback_if.wb_valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin if ((|writeback_if.wb_valid) && (writeback_if.wb != 0) && (writeback_if.rd == 28)) begin
last_data_wb <= use_wb_data[0]; last_data_wb <= use_wb_data[0];
end end
end end
assign writeback_if.write_data = use_wb_data; assign writeback_if.write_data = use_wb_data;
endmodule : VX_writeback endmodule : VX_writeback

View File

@@ -2,142 +2,142 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module Vortex #( module Vortex #(
parameter CORE_ID = 0 parameter CORE_ID = 0
) ( ) (
// Clock // Clock
input wire clk, input wire clk,
input wire reset, input wire reset,
// IO // IO
output wire io_valid, output wire io_valid,
output wire [31:0] io_data, output wire [31:0] io_data,
// DRAM Dcache Req // DRAM Dcache Req
output wire dram_req_read, output wire dram_req_read,
output wire dram_req_write, output wire dram_req_write,
output wire [31:0] dram_req_addr, output wire [31:0] dram_req_addr,
output wire [`DBANK_LINE_SIZE-1:0] dram_req_data, output wire [`DBANK_LINE_SIZE-1:0] dram_req_data,
input wire dram_req_ready, input wire dram_req_ready,
// DRAM Dcache Rsp // DRAM Dcache Rsp
input wire dram_rsp_valid, input wire dram_rsp_valid,
input wire [31:0] dram_rsp_addr, input wire [31:0] dram_rsp_addr,
input wire [`DBANK_LINE_SIZE-1:0] dram_rsp_data, input wire [`DBANK_LINE_SIZE-1:0] dram_rsp_data,
output wire dram_rsp_ready, output wire dram_rsp_ready,
// DRAM Icache Req // DRAM Icache Req
output wire I_dram_req_read, output wire I_dram_req_read,
output wire I_dram_req_write, output wire I_dram_req_write,
output wire [31:0] I_dram_req_addr, output wire [31:0] I_dram_req_addr,
output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data, output wire [`IBANK_LINE_SIZE-1:0] I_dram_req_data,
input wire I_dram_req_ready, input wire I_dram_req_ready,
// DRAM Icache Rsp // DRAM Icache Rsp
input wire I_dram_rsp_valid, input wire I_dram_rsp_valid,
input wire [31:0] I_dram_rsp_addr, input wire [31:0] I_dram_rsp_addr,
input wire [`IBANK_LINE_SIZE-1:0] I_dram_rsp_data, input wire [`IBANK_LINE_SIZE-1:0] I_dram_rsp_data,
output wire I_dram_rsp_ready, output wire I_dram_rsp_ready,
// LLC Snooping // LLC Snooping
input wire llc_snp_req_valid, input wire llc_snp_req_valid,
input wire [31:0] llc_snp_req_addr, input wire [31:0] llc_snp_req_addr,
output wire llc_snp_req_ready, output wire llc_snp_req_ready,
output wire ebreak output wire ebreak
); );
`DEBUG_BEGIN `DEBUG_BEGIN
wire scheduler_empty; wire scheduler_empty;
`DEBUG_END `DEBUG_END
wire memory_delay; wire memory_delay;
wire exec_delay; wire exec_delay;
wire gpr_stage_delay; wire gpr_stage_delay;
wire schedule_delay; wire schedule_delay;
// Dcache Interface // Dcache Interface
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_if(); VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_rsp_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_if(); VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_qual_if(); VX_gpu_dcache_req_if #(.NUM_REQUESTS(`DNUM_REQUESTS)) dcache_req_qual_if();
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_dcache_dram_req_if(); VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_dcache_dram_req_if();
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_dcache_dram_res_if(); VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`DBANK_LINE_WORDS)) gpu_dcache_dram_res_if();
assign gpu_dcache_dram_res_if.dram_rsp_valid = dram_rsp_valid; assign gpu_dcache_dram_res_if.dram_rsp_valid = dram_rsp_valid;
assign gpu_dcache_dram_res_if.dram_rsp_addr = dram_rsp_addr; assign gpu_dcache_dram_res_if.dram_rsp_addr = dram_rsp_addr;
assign dram_req_write = gpu_dcache_dram_req_if.dram_req_write; assign dram_req_write = gpu_dcache_dram_req_if.dram_req_write;
assign dram_req_read = gpu_dcache_dram_req_if.dram_req_read; assign dram_req_read = gpu_dcache_dram_req_if.dram_req_read;
assign dram_req_addr = gpu_dcache_dram_req_if.dram_req_addr; assign dram_req_addr = gpu_dcache_dram_req_if.dram_req_addr;
assign dram_rsp_ready = gpu_dcache_dram_req_if.dram_rsp_ready; assign dram_rsp_ready = gpu_dcache_dram_req_if.dram_rsp_ready;
assign gpu_dcache_dram_req_if.dram_req_ready = dram_req_ready; assign gpu_dcache_dram_req_if.dram_req_ready = dram_req_ready;
genvar i; genvar i;
generate generate
for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin for (i = 0; i < `DBANK_LINE_WORDS; i=i+1) begin
assign gpu_dcache_dram_res_if.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32]; assign gpu_dcache_dram_res_if.dram_rsp_data[i] = dram_rsp_data[i * 32 +: 32];
assign dram_req_data[i * 32 +: 32] = gpu_dcache_dram_req_if.dram_req_data[i]; assign dram_req_data[i * 32 +: 32] = gpu_dcache_dram_req_if.dram_req_data[i];
end end
endgenerate endgenerate
wire temp_io_valid = (!memory_delay) wire temp_io_valid = (!memory_delay)
&& (|dcache_req_if.core_req_valid) && (|dcache_req_if.core_req_valid)
&& (dcache_req_if.core_req_write[0] != `NO_MEM_WRITE) && (dcache_req_if.core_req_write[0] != `NO_MEM_WRITE)
&& (dcache_req_if.core_req_addr[0] == `IO_BUS_ADDR); && (dcache_req_if.core_req_addr[0] == `IO_BUS_ADDR);
wire [31:0] temp_io_data = dcache_req_if.core_req_data[0]; wire [31:0] temp_io_data = dcache_req_if.core_req_data[0];
assign io_valid = temp_io_valid; assign io_valid = temp_io_valid;
assign io_data = temp_io_data; assign io_data = temp_io_data;
assign dcache_req_qual_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~io_valid}}; assign dcache_req_qual_if.core_req_valid = dcache_req_if.core_req_valid & {`NUM_THREADS{~io_valid}};
assign dcache_req_qual_if.core_req_read = dcache_req_if.core_req_read; assign dcache_req_qual_if.core_req_read = dcache_req_if.core_req_read;
assign dcache_req_qual_if.core_req_write = dcache_req_if.core_req_write; assign dcache_req_qual_if.core_req_write = dcache_req_if.core_req_write;
assign dcache_req_qual_if.core_req_addr = dcache_req_if.core_req_addr; assign dcache_req_qual_if.core_req_addr = dcache_req_if.core_req_addr;
assign dcache_req_qual_if.core_req_data = dcache_req_if.core_req_data; assign dcache_req_qual_if.core_req_data = dcache_req_if.core_req_data;
assign dcache_req_if.core_req_ready = dcache_req_qual_if.core_req_ready; assign dcache_req_if.core_req_ready = dcache_req_qual_if.core_req_ready;
assign dcache_req_qual_if.core_req_rd = dcache_req_if.core_req_rd; assign dcache_req_qual_if.core_req_rd = dcache_req_if.core_req_rd;
assign dcache_req_qual_if.core_req_wb = dcache_req_if.core_req_wb; assign dcache_req_qual_if.core_req_wb = dcache_req_if.core_req_wb;
assign dcache_req_qual_if.core_req_warp_num = dcache_req_if.core_req_warp_num; assign dcache_req_qual_if.core_req_warp_num = dcache_req_if.core_req_warp_num;
assign dcache_req_qual_if.core_req_pc = dcache_req_if.core_req_pc; assign dcache_req_qual_if.core_req_pc = dcache_req_if.core_req_pc;
VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_rsp_if(); VX_gpu_dcache_rsp_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_rsp_if();
VX_gpu_dcache_req_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_req_if(); VX_gpu_dcache_req_if #(.NUM_REQUESTS(`INUM_REQUESTS)) icache_req_if();
VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_req_if(); VX_gpu_dcache_dram_req_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_req_if();
VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_res_if(); VX_gpu_dcache_dram_rsp_if #(.BANK_LINE_WORDS(`IBANK_LINE_WORDS)) gpu_icache_dram_res_if();
assign gpu_icache_dram_res_if.dram_rsp_valid = I_dram_rsp_valid; assign gpu_icache_dram_res_if.dram_rsp_valid = I_dram_rsp_valid;
assign gpu_icache_dram_res_if.dram_rsp_addr = I_dram_rsp_addr; assign gpu_icache_dram_res_if.dram_rsp_addr = I_dram_rsp_addr;
assign I_dram_req_write = gpu_icache_dram_req_if.dram_req_write; assign I_dram_req_write = gpu_icache_dram_req_if.dram_req_write;
assign I_dram_req_read = gpu_icache_dram_req_if.dram_req_read; assign I_dram_req_read = gpu_icache_dram_req_if.dram_req_read;
assign I_dram_req_addr = gpu_icache_dram_req_if.dram_req_addr; assign I_dram_req_addr = gpu_icache_dram_req_if.dram_req_addr;
assign I_dram_rsp_ready = gpu_icache_dram_req_if.dram_rsp_ready; assign I_dram_rsp_ready = gpu_icache_dram_req_if.dram_rsp_ready;
assign gpu_icache_dram_req_if.dram_req_ready = I_dram_req_ready; assign gpu_icache_dram_req_if.dram_req_ready = I_dram_req_ready;
genvar j; genvar j;
generate generate
for (j = 0; j < `IBANK_LINE_WORDS; j = j + 1) begin for (j = 0; j < `IBANK_LINE_WORDS; j = j + 1) begin
assign gpu_icache_dram_res_if.dram_rsp_data[j] = I_dram_rsp_data[j * 32 +: 32]; assign gpu_icache_dram_res_if.dram_rsp_data[j] = I_dram_rsp_data[j * 32 +: 32];
assign I_dram_req_data[j * 32 +: 32] = gpu_icache_dram_req_if.dram_req_data[j]; assign I_dram_req_data[j * 32 +: 32] = gpu_icache_dram_req_if.dram_req_data[j];
end end
endgenerate endgenerate
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Front-end to Back-end // Front-end to Back-end
VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM VX_frE_to_bckE_req_if bckE_req_if(); // New instruction request to EXE/MEM
// Back-end to Front-end // Back-end to Front-end
VX_wb_if writeback_if(); // Writeback to GPRs VX_wb_if writeback_if(); // Writeback to GPRs
VX_branch_rsp_if branch_rsp_if(); // Branch Resolution to Fetch VX_branch_rsp_if branch_rsp_if(); // Branch Resolution to Fetch
VX_jal_rsp_if jal_rsp_if(); // Jump resolution to Fetch VX_jal_rsp_if jal_rsp_if(); // Jump resolution to Fetch
// Warp controls // Warp controls
VX_warp_ctl_if warp_ctl_if(); VX_warp_ctl_if warp_ctl_if();
// Cache snooping // Cache snooping
VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if(); VX_gpu_snp_req_rsp_if gpu_icache_snp_req_if();
@@ -147,69 +147,69 @@ assign gpu_dcache_snp_req_if.snp_req_addr = llc_snp_req_addr;
assign llc_snp_req_ready = gpu_dcache_snp_req_if.snp_req_ready; assign llc_snp_req_ready = gpu_dcache_snp_req_if.snp_req_ready;
VX_front_end front_end ( VX_front_end front_end (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.warp_ctl_if (warp_ctl_if), .warp_ctl_if (warp_ctl_if),
.bckE_req_if (bckE_req_if), .bckE_req_if (bckE_req_if),
.schedule_delay (schedule_delay), .schedule_delay (schedule_delay),
.icache_rsp_if (icache_rsp_if), .icache_rsp_if (icache_rsp_if),
.icache_req_if (icache_req_if), .icache_req_if (icache_req_if),
.jal_rsp_if (jal_rsp_if), .jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if), .branch_rsp_if (branch_rsp_if),
.fetch_ebreak (ebreak) .fetch_ebreak (ebreak)
); );
VX_scheduler schedule ( VX_scheduler schedule (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.memory_delay (memory_delay), .memory_delay (memory_delay),
.exec_delay (exec_delay), .exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay), .gpr_stage_delay (gpr_stage_delay),
.bckE_req_if (bckE_req_if), .bckE_req_if (bckE_req_if),
.writeback_if (writeback_if), .writeback_if (writeback_if),
.schedule_delay (schedule_delay), .schedule_delay (schedule_delay),
.is_empty (scheduler_empty) .is_empty (scheduler_empty)
); );
VX_back_end #( VX_back_end #(
.CORE_ID(CORE_ID) .CORE_ID(CORE_ID)
) back_end ( ) back_end (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.schedule_delay (schedule_delay), .schedule_delay (schedule_delay),
.warp_ctl_if (warp_ctl_if), .warp_ctl_if (warp_ctl_if),
.bckE_req_if (bckE_req_if), .bckE_req_if (bckE_req_if),
.jal_rsp_if (jal_rsp_if), .jal_rsp_if (jal_rsp_if),
.branch_rsp_if (branch_rsp_if), .branch_rsp_if (branch_rsp_if),
.dcache_rsp_if (dcache_rsp_if), .dcache_rsp_if (dcache_rsp_if),
.dcache_req_if (dcache_req_if), .dcache_req_if (dcache_req_if),
.writeback_if (writeback_if), .writeback_if (writeback_if),
.mem_delay (memory_delay), .mem_delay (memory_delay),
.exec_delay (exec_delay), .exec_delay (exec_delay),
.gpr_stage_delay (gpr_stage_delay) .gpr_stage_delay (gpr_stage_delay)
); );
VX_dmem_ctrl dmem_controller ( VX_dmem_ctrl dmem_controller (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
// Dram <-> Dcache // Dram <-> Dcache
.gpu_dcache_dram_req_if (gpu_dcache_dram_req_if), .gpu_dcache_dram_req_if (gpu_dcache_dram_req_if),
.gpu_dcache_dram_res_if (gpu_dcache_dram_res_if), .gpu_dcache_dram_res_if (gpu_dcache_dram_res_if),
.gpu_dcache_snp_req_if (gpu_dcache_snp_req_if), .gpu_dcache_snp_req_if (gpu_dcache_snp_req_if),
// Dram <-> Icache // Dram <-> Icache
.gpu_icache_dram_req_if (gpu_icache_dram_req_if), .gpu_icache_dram_req_if (gpu_icache_dram_req_if),
.gpu_icache_dram_res_if (gpu_icache_dram_res_if), .gpu_icache_dram_res_if (gpu_icache_dram_res_if),
.gpu_icache_snp_req_if (gpu_icache_snp_req_if), .gpu_icache_snp_req_if (gpu_icache_snp_req_if),
// Core <-> Icache // Core <-> Icache
.icache_req_if (icache_req_if), .icache_req_if (icache_req_if),
.icache_rsp_if (icache_rsp_if), .icache_rsp_if (icache_rsp_if),
// Core <-> Dcache // Core <-> Dcache
.dcache_req_if (dcache_req_qual_if), .dcache_req_if (dcache_req_qual_if),
.dcache_rsp_if (dcache_rsp_if) .dcache_rsp_if (dcache_rsp_if)
); );
endmodule // Vortex endmodule // Vortex

View File

@@ -3,48 +3,51 @@
module byte_enabled_simple_dual_port_ram module byte_enabled_simple_dual_port_ram
( (
input we, clk, input clk;
input wire reset, input wire reset;
input wire[4:0] waddr, raddr1, raddr2, input wire we;
input wire[`NUM_THREADS-1:0] be, input wire[4:0] waddr,
input wire[`NUM_THREADS-1:0][31:0] wdata, input wire[4:0] raddr1,
output reg[`NUM_THREADS-1:0][31:0] q1, q2 input wire[4:0] raddr2,
input wire[`NUM_THREADS-1:0] be,
input wire[`NUM_THREADS-1:0][31:0] wdata,
output reg[`NUM_THREADS-1:0][31:0] q1
output reg[`NUM_THREADS-1:0][31:0] q2
); );
// integer regi; // integer regi;
// integer threadi; // integer threadi;
// Thread Byte Bit // Thread Byte Bit
logic [`NUM_THREADS-1:0][3:0][7:0] GPR[31:0]; logic [`NUM_THREADS-1:0][3:0][7:0] GPR[31:0];
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
//-- //--
end else begin end else begin
if (we) begin if (we) begin
integer thread_ind; integer thread_ind;
for (thread_ind = 0; thread_ind < `NUM_THREADS; thread_ind = thread_ind + 1) begin for (thread_ind = 0; thread_ind < `NUM_THREADS; thread_ind = thread_ind + 1) begin
if (be[thread_ind]) begin if (be[thread_ind]) begin
GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0]; GPR[waddr][thread_ind][0] <= wdata[thread_ind][7:0];
GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8]; GPR[waddr][thread_ind][1] <= wdata[thread_ind][15:8];
GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16]; GPR[waddr][thread_ind][2] <= wdata[thread_ind][23:16];
GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24]; GPR[waddr][thread_ind][3] <= wdata[thread_ind][31:24];
end end
end end
end end
// $display("^^^^^^^^^^^^^^^^^^^^^^^"); // $display("^^^^^^^^^^^^^^^^^^^^^^^");
// for (regi = 0; regi <= 31; regi = regi + 1) begin // for (regi = 0; regi <= 31; regi = regi + 1) begin
// for (threadi = 0; threadi < `NUM_THREADS; threadi = threadi + 1) begin // for (threadi = 0; threadi < `NUM_THREADS; threadi = threadi + 1) begin
// if (GPR[regi][threadi] != 0) $display("$%d: %h",regi, GPR[regi][threadi]); // if (GPR[regi][threadi] != 0) $display("$%d: %h",regi, GPR[regi][threadi]);
// end // end
// end // end
end end
end end
assign q1 = GPR[raddr1]; assign q1 = GPR[raddr1];
assign q2 = GPR[raddr2]; assign q2 = GPR[raddr2];
// assign q1 = (raddr1 == waddr && (we)) ? wdata : GPR[raddr1]; // assign q1 = (raddr1 == waddr && (we)) ? wdata : GPR[raddr1];
// assign q2 = (raddr2 == waddr && (we)) ? wdata : GPR[raddr2]; // assign q2 = (raddr2 == waddr && (we)) ? wdata : GPR[raddr2];
endmodule endmodule

948
hw/rtl/cache/VX_bank.v vendored

File diff suppressed because it is too large Load Diff

View File

@@ -49,8 +49,8 @@ module VX_cache #(
// Dram knobs // Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
// Core request // Core request
input wire [NUM_REQUESTS-1:0] core_req_valid, input wire [NUM_REQUESTS-1:0] core_req_valid,

View File

@@ -2,66 +2,66 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module VX_cache_core_req_bank_sel #( module VX_cache_core_req_bank_sel #(
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE_BYTES = 1024, parameter CACHE_SIZE_BYTES = 1024,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter BANK_LINE_SIZE_BYTES = 16, parameter BANK_LINE_SIZE_BYTES = 16,
// Number of banks {1, 2, 4, 8,...} // Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8, parameter NUM_BANKS = 8,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE_BYTES = 4, parameter WORD_SIZE_BYTES = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2, parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory) // Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2, parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2} // Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 0, parameter FUNC_ID = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size // Core Request Queue Size
parameter REQQ_SIZE = 8, parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob // Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8, parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size // Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2, parameter DFPQ_SIZE = 2,
// Snoop Req Queue // Snoop Req Queue
parameter SNRQ_SIZE = 8, parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...} // Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size // Core Writeback Queue Size
parameter CWBQ_SIZE = 8, parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size // Dram Writeback Queue Size
parameter DWBQ_SIZE = 4, parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size // Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8, parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size // Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16, parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
// Dram knobs // Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
) ( ) (
input wire [NUM_REQUESTS-1:0] core_req_valid, input wire [NUM_REQUESTS-1:0] core_req_valid,
input wire [NUM_REQUESTS-1:0][31:0] core_req_addr, input wire [NUM_REQUESTS-1:0][31:0] core_req_addr,
output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valids output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valids
); );
generate generate
integer curr_req; integer curr_req;
always @(*) begin always @(*) begin
per_bank_valids = 0; per_bank_valids = 0;
for (curr_req = 0; curr_req < NUM_REQUESTS; curr_req = curr_req + 1) begin for (curr_req = 0; curr_req < NUM_REQUESTS; curr_req = curr_req + 1) begin
if (NUM_BANKS == 1) begin if (NUM_BANKS == 1) begin
// If there is only one bank, then only map requests to that bank // If there is only one bank, then only map requests to that bank
per_bank_valids[0][curr_req] = core_req_valid[curr_req]; per_bank_valids[0][curr_req] = core_req_valid[curr_req];
end else begin end else begin
per_bank_valids[core_req_addr[curr_req][`BANK_SELECT_ADDR_RNG]][curr_req] = core_req_valid[curr_req]; per_bank_valids[core_req_addr[curr_req][`BANK_SELECT_ADDR_RNG]][curr_req] = core_req_valid[curr_req];
end end
end end
end end
endgenerate endgenerate
endmodule endmodule

View File

@@ -1,56 +1,56 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module VX_cache_dfq_queue #( module VX_cache_dfq_queue #(
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE_BYTES = 1024, parameter CACHE_SIZE_BYTES = 1024,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter BANK_LINE_SIZE_BYTES = 16, parameter BANK_LINE_SIZE_BYTES = 16,
// Number of banks {1, 2, 4, 8,...} // Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8, parameter NUM_BANKS = 8,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE_BYTES = 4, parameter WORD_SIZE_BYTES = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2, parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory) // Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2, parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size // Core Request Queue Size
parameter REQQ_SIZE = 8, parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob // Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8, parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size // Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2, parameter DFPQ_SIZE = 2,
// Snoop Req Queue // Snoop Req Queue
parameter SNRQ_SIZE = 8, parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...} // Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size // Core Writeback Queue Size
parameter CWBQ_SIZE = 8, parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size // Dram Writeback Queue Size
parameter DWBQ_SIZE = 4, parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size // Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8, parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size // Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16, parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
// Dram knobs // Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire dfqq_push, input wire dfqq_push,
input wire[NUM_BANKS-1:0] per_bank_dram_fill_req_valid, input wire[NUM_BANKS-1:0] per_bank_dram_fill_req_valid,
input wire[NUM_BANKS-1:0][31:0] per_bank_dram_fill_req_addr, input wire[NUM_BANKS-1:0][31:0] per_bank_dram_fill_req_addr,
input wire dfqq_pop, input wire dfqq_pop,
output wire dfqq_req, output wire dfqq_req,
output wire[31:0] dfqq_req_addr, output wire[31:0] dfqq_req_addr,
output wire dfqq_empty, output wire dfqq_empty,
output wire dfqq_full output wire dfqq_full
); );
wire[NUM_BANKS-1:0] out_per_bank_dram_fill_req; wire[NUM_BANKS-1:0] out_per_bank_dram_fill_req;
@@ -66,56 +66,56 @@ module VX_cache_dfq_queue #(
wire o_empty; wire o_empty;
wire use_empty = !(|use_per_bank_dram_fill_req); wire use_empty = !(|use_per_bank_dram_fill_req);
wire out_empty = !(|out_per_bank_dram_fill_req) || o_empty; wire out_empty = !(|out_per_bank_dram_fill_req) || o_empty;
wire push_qual = dfqq_push && !dfqq_full; wire push_qual = dfqq_push && !dfqq_full;
wire pop_qual = dfqq_pop && use_empty && !out_empty; wire pop_qual = dfqq_pop && use_empty && !out_empty;
VX_generic_queue #( VX_generic_queue #(
.DATAW(NUM_BANKS * (1+32)), .DATAW(NUM_BANKS * (1+32)),
.SIZE(DFQQ_SIZE) .SIZE(DFQQ_SIZE)
) dfqq_queue ( ) dfqq_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.push (push_qual), .push (push_qual),
.data_in ({per_bank_dram_fill_req_valid, per_bank_dram_fill_req_addr}), .data_in ({per_bank_dram_fill_req_valid, per_bank_dram_fill_req_addr}),
.pop (pop_qual), .pop (pop_qual),
.data_out({out_per_bank_dram_fill_req, out_per_bank_dram_fill_req_addr}), .data_out({out_per_bank_dram_fill_req, out_per_bank_dram_fill_req_addr}),
.empty (o_empty), .empty (o_empty),
.full (dfqq_full) .full (dfqq_full)
); );
assign qual_bank_dram_fill_req = use_empty ? (out_per_bank_dram_fill_req & {NUM_BANKS{!o_empty}}) : (use_per_bank_dram_fill_req & {NUM_BANKS{!use_empty}}); assign qual_bank_dram_fill_req = use_empty ? (out_per_bank_dram_fill_req & {NUM_BANKS{!o_empty}}) : (use_per_bank_dram_fill_req & {NUM_BANKS{!use_empty}});
assign qual_bank_dram_fill_req_addr = use_empty ? out_per_bank_dram_fill_req_addr : use_per_bank_dram_fill_req_addr; assign qual_bank_dram_fill_req_addr = use_empty ? out_per_bank_dram_fill_req_addr : use_per_bank_dram_fill_req_addr;
wire[`LOG2UP(NUM_BANKS)-1:0] qual_request_index; wire[`LOG2UP(NUM_BANKS)-1:0] qual_request_index;
wire qual_has_request; wire qual_has_request;
VX_generic_priority_encoder #( VX_generic_priority_encoder #(
.N(NUM_BANKS) .N(NUM_BANKS)
) sel_bank ( ) sel_bank (
.valids(qual_bank_dram_fill_req), .valids(qual_bank_dram_fill_req),
.index (qual_request_index), .index (qual_request_index),
.found (qual_has_request) .found (qual_has_request)
); );
assign dfqq_empty = !qual_has_request; assign dfqq_empty = !qual_has_request;
assign dfqq_req = qual_bank_dram_fill_req [qual_request_index]; assign dfqq_req = qual_bank_dram_fill_req [qual_request_index];
assign dfqq_req_addr = qual_bank_dram_fill_req_addr[qual_request_index]; assign dfqq_req_addr = qual_bank_dram_fill_req_addr[qual_request_index];
assign updated_bank_dram_fill_req = qual_bank_dram_fill_req & (~(1 << qual_request_index)); assign updated_bank_dram_fill_req = qual_bank_dram_fill_req & (~(1 << qual_request_index));
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
use_per_bank_dram_fill_req <= 0; use_per_bank_dram_fill_req <= 0;
use_per_bank_dram_fill_req_addr <= 0; use_per_bank_dram_fill_req_addr <= 0;
end else begin end else begin
if (dfqq_pop && qual_has_request) begin if (dfqq_pop && qual_has_request) begin
use_per_bank_dram_fill_req <= updated_bank_dram_fill_req; use_per_bank_dram_fill_req <= updated_bank_dram_fill_req;
use_per_bank_dram_fill_req_addr <= qual_bank_dram_fill_req_addr; use_per_bank_dram_fill_req_addr <= qual_bank_dram_fill_req_addr;
end end
end end
end end
endmodule endmodule

View File

@@ -1,58 +1,58 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module VX_cache_dram_req_arb #( module VX_cache_dram_req_arb #(
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE_BYTES = 1024, parameter CACHE_SIZE_BYTES = 1024,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter BANK_LINE_SIZE_BYTES = 16, parameter BANK_LINE_SIZE_BYTES = 16,
// Number of banks {1, 2, 4, 8,...} // Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8, parameter NUM_BANKS = 8,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE_BYTES = 4, parameter WORD_SIZE_BYTES = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2, parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory) // Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2, parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size // Core Request Queue Size
parameter REQQ_SIZE = 8, parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob // Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8, parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size // Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2, parameter DFPQ_SIZE = 2,
// Snoop Req Queue // Snoop Req Queue
parameter SNRQ_SIZE = 8, parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...} // Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size // Core Writeback Queue Size
parameter CWBQ_SIZE = 8, parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size // Dram Writeback Queue Size
parameter DWBQ_SIZE = 4, parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size // Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8, parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size // Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16, parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
// Prefetcher // Prefetcher
parameter PRFQ_SIZE = 64, parameter PRFQ_SIZE = 64,
parameter PRFQ_STRIDE = 2, parameter PRFQ_STRIDE = 2,
// Dram knobs // Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
// Fill Request // Fill Request
output wire dfqq_full, output wire dfqq_full,
input wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid, input wire [NUM_BANKS-1:0] per_bank_dram_fill_req_valid,
input wire [NUM_BANKS-1:0][31:0] per_bank_dram_fill_req_addr, input wire [NUM_BANKS-1:0][31:0] per_bank_dram_fill_req_addr,
// DFQ Request // DFQ Request
output wire [NUM_BANKS-1:0] per_bank_dram_wb_queue_pop, output wire [NUM_BANKS-1:0] per_bank_dram_wb_queue_pop,
input wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid, input wire [NUM_BANKS-1:0] per_bank_dram_wb_req_valid,
@@ -60,80 +60,80 @@ module VX_cache_dram_req_arb #(
input wire [NUM_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data, input wire [NUM_BANKS-1:0][`BANK_LINE_WORDS-1:0][`WORD_SIZE-1:0] per_bank_dram_wb_req_data,
// real Dram request // real Dram request
output wire dram_req_read, output wire dram_req_read,
output wire dram_req_write, output wire dram_req_write,
output wire [31:0] dram_req_addr, output wire [31:0] dram_req_addr,
output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data, output wire [`IBANK_LINE_WORDS-1:0][31:0] dram_req_data,
input wire dram_req_ready input wire dram_req_ready
); );
wire pref_pop; wire pref_pop;
wire pref_valid; wire pref_valid;
wire[31:0] pref_addr; wire[31:0] pref_addr;
wire dwb_valid; wire dwb_valid;
wire dfqq_req; wire dfqq_req;
assign pref_pop = !dwb_valid && !dfqq_req && dram_req_ready && pref_valid; assign pref_pop = !dwb_valid && !dfqq_req && dram_req_ready && pref_valid;
VX_prefetcher #( VX_prefetcher #(
.PRFQ_SIZE (PRFQ_SIZE), .PRFQ_SIZE (PRFQ_SIZE),
.PRFQ_STRIDE (PRFQ_STRIDE), .PRFQ_STRIDE (PRFQ_STRIDE),
.BANK_LINE_SIZE_BYTES(BANK_LINE_SIZE_BYTES), .BANK_LINE_SIZE_BYTES(BANK_LINE_SIZE_BYTES),
.WORD_SIZE_BYTES (WORD_SIZE_BYTES) .WORD_SIZE_BYTES (WORD_SIZE_BYTES)
) prfqq ( ) prfqq (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.dram_req (dram_req_read), .dram_req (dram_req_read),
.dram_req_addr(dram_req_addr), .dram_req_addr(dram_req_addr),
.pref_pop (pref_pop), .pref_pop (pref_pop),
.pref_valid (pref_valid), .pref_valid (pref_valid),
.pref_addr (pref_addr) .pref_addr (pref_addr)
); );
wire[31:0] dfqq_req_addr; wire[31:0] dfqq_req_addr;
`DEBUG_BEGIN `DEBUG_BEGIN
wire dfqq_empty; wire dfqq_empty;
`DEBUG_END `DEBUG_END
wire dfqq_pop = !dwb_valid && dfqq_req && dram_req_ready; // If no dwb, and dfqq has valids, then pop wire dfqq_pop = !dwb_valid && dfqq_req && dram_req_ready; // If no dwb, and dfqq has valids, then pop
wire dfqq_push = (|per_bank_dram_fill_req_valid); wire dfqq_push = (|per_bank_dram_fill_req_valid);
VX_cache_dfq_queue cache_dfq_queue( VX_cache_dfq_queue cache_dfq_queue(
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.dfqq_push (dfqq_push), .dfqq_push (dfqq_push),
.per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid), .per_bank_dram_fill_req_valid (per_bank_dram_fill_req_valid),
.per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr), .per_bank_dram_fill_req_addr (per_bank_dram_fill_req_addr),
.dfqq_pop (dfqq_pop), .dfqq_pop (dfqq_pop),
.dfqq_req (dfqq_req), .dfqq_req (dfqq_req),
.dfqq_req_addr (dfqq_req_addr), .dfqq_req_addr (dfqq_req_addr),
.dfqq_empty (dfqq_empty), .dfqq_empty (dfqq_empty),
.dfqq_full (dfqq_full) .dfqq_full (dfqq_full)
); );
wire [`LOG2UP(NUM_BANKS)-1:0] dwb_bank; wire [`LOG2UP(NUM_BANKS)-1:0] dwb_bank;
wire [NUM_BANKS-1:0] use_wb_valid = per_bank_dram_wb_req_valid; wire [NUM_BANKS-1:0] use_wb_valid = per_bank_dram_wb_req_valid;
VX_generic_priority_encoder #( VX_generic_priority_encoder #(
.N(NUM_BANKS) .N(NUM_BANKS)
) sel_dwb ( ) sel_dwb (
.valids(use_wb_valid), .valids(use_wb_valid),
.index (dwb_bank), .index (dwb_bank),
.found (dwb_valid) .found (dwb_valid)
); );
assign per_bank_dram_wb_queue_pop = dram_req_ready ? (use_wb_valid & ((1 << dwb_bank))) : 0; assign per_bank_dram_wb_queue_pop = dram_req_ready ? (use_wb_valid & ((1 << dwb_bank))) : 0;
wire dram_req = dwb_valid || dfqq_req || pref_pop; wire dram_req = dwb_valid || dfqq_req || pref_pop;
assign dram_req_read = ((dfqq_req && !dwb_valid) || pref_pop) && dram_req; assign dram_req_read = ((dfqq_req && !dwb_valid) || pref_pop) && dram_req;
assign dram_req_write = dwb_valid && dram_req; assign dram_req_write = dwb_valid && dram_req;
assign dram_req_addr = (dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : (dfqq_req ? dfqq_req_addr : pref_addr)) & `BASE_ADDR_MASK; assign dram_req_addr = (dwb_valid ? per_bank_dram_wb_req_addr[dwb_bank] : (dfqq_req ? dfqq_req_addr : pref_addr)) & `BASE_ADDR_MASK;
assign {dram_req_data} = dwb_valid ? {per_bank_dram_wb_req_data[dwb_bank] }: 0; assign {dram_req_data} = dwb_valid ? {per_bank_dram_wb_req_data[dwb_bank] }: 0;
endmodule endmodule

View File

@@ -2,169 +2,169 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module VX_cache_miss_resrv #( module VX_cache_miss_resrv #(
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE_BYTES = 1024, parameter CACHE_SIZE_BYTES = 1024,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter BANK_LINE_SIZE_BYTES = 16, parameter BANK_LINE_SIZE_BYTES = 16,
// Number of banks {1, 2, 4, 8,...} // Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8, parameter NUM_BANKS = 8,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE_BYTES = 4, parameter WORD_SIZE_BYTES = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2, parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory) // Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2, parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size // Core Request Queue Size
parameter REQQ_SIZE = 8, parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob // Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8, parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size // Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2, parameter DFPQ_SIZE = 2,
// Snoop Req Queue // Snoop Req Queue
parameter SNRQ_SIZE = 8, parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...} // Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size // Core Writeback Queue Size
parameter CWBQ_SIZE = 8, parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size // Dram Writeback Queue Size
parameter DWBQ_SIZE = 4, parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size // Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8, parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size // Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16, parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
// Dram knobs // Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
// Miss enqueue // Miss enqueue
input wire miss_add, input wire miss_add,
input wire[31:0] miss_add_addr, input wire[31:0] miss_add_addr,
input wire[`WORD_SIZE_RNG] miss_add_data, input wire[`WORD_SIZE_RNG] miss_add_data,
input wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_add_tid, input wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_add_tid,
input wire[4:0] miss_add_rd, input wire[4:0] miss_add_rd,
input wire[1:0] miss_add_wb, input wire[1:0] miss_add_wb,
input wire[`NW_BITS-1:0] miss_add_warp_num, input wire[`NW_BITS-1:0] miss_add_warp_num,
input wire[2:0] miss_add_mem_read, input wire[2:0] miss_add_mem_read,
input wire[2:0] miss_add_mem_write, input wire[2:0] miss_add_mem_write,
input wire[31:0] miss_add_pc, input wire[31:0] miss_add_pc,
output wire miss_resrv_full, output wire miss_resrv_full,
output wire miss_resrv_stop, output wire miss_resrv_stop,
// Broadcast Fill // Broadcast Fill
input wire is_fill_st1, input wire is_fill_st1,
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
// TODO: should fix this // TODO: should fix this
input wire[31:0] fill_addr_st1, input wire[31:0] fill_addr_st1,
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
// Miss dequeue // Miss dequeue
input wire miss_resrv_pop, input wire miss_resrv_pop,
output wire miss_resrv_valid_st0, output wire miss_resrv_valid_st0,
output wire[31:0] miss_resrv_addr_st0, output wire[31:0] miss_resrv_addr_st0,
output wire[`WORD_SIZE_RNG] miss_resrv_data_st0, output wire[`WORD_SIZE_RNG] miss_resrv_data_st0,
output wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_resrv_tid_st0, output wire[`LOG2UP(NUM_REQUESTS)-1:0] miss_resrv_tid_st0,
output wire[4:0] miss_resrv_rd_st0, output wire[4:0] miss_resrv_rd_st0,
output wire[1:0] miss_resrv_wb_st0, output wire[1:0] miss_resrv_wb_st0,
output wire[`NW_BITS-1:0] miss_resrv_warp_num_st0, output wire[`NW_BITS-1:0] miss_resrv_warp_num_st0,
output wire[2:0] miss_resrv_mem_read_st0, output wire[2:0] miss_resrv_mem_read_st0,
output wire[31:0] miss_resrv_pc_st0, output wire[31:0] miss_resrv_pc_st0,
output wire[2:0] miss_resrv_mem_write_st0 output wire[2:0] miss_resrv_mem_write_st0
); );
// Size of metadata = 32 + `LOG2UP(NUM_REQUESTS) + 5 + 2 + (`NW_BITS-1 + 1) // Size of metadata = 32 + `LOG2UP(NUM_REQUESTS) + 5 + 2 + (`NW_BITS-1 + 1)
reg [`MRVQ_METADATA_SIZE-1:0] metadata_table[MRVQ_SIZE-1:0]; reg [`MRVQ_METADATA_SIZE-1:0] metadata_table[MRVQ_SIZE-1:0];
reg [MRVQ_SIZE-1:0][31:0] addr_table; reg [MRVQ_SIZE-1:0][31:0] addr_table;
reg [MRVQ_SIZE-1:0][31:0] pc_table; reg [MRVQ_SIZE-1:0][31:0] pc_table;
reg [MRVQ_SIZE-1:0] valid_table; reg [MRVQ_SIZE-1:0] valid_table;
reg [MRVQ_SIZE-1:0] ready_table; reg [MRVQ_SIZE-1:0] ready_table;
reg [`LOG2UP(MRVQ_SIZE)-1:0] head_ptr; reg [`LOG2UP(MRVQ_SIZE)-1:0] head_ptr;
reg [`LOG2UP(MRVQ_SIZE)-1:0] tail_ptr; reg [`LOG2UP(MRVQ_SIZE)-1:0] tail_ptr;
reg [31:0] size; reg [31:0] size;
// assign miss_resrv_full = (MRVQ_SIZE != 2) && (tail_ptr+1) == head_ptr; // assign miss_resrv_full = (MRVQ_SIZE != 2) && (tail_ptr+1) == head_ptr;
assign miss_resrv_full = (MRVQ_SIZE != 2) && (size == MRVQ_SIZE ); assign miss_resrv_full = (MRVQ_SIZE != 2) && (size == MRVQ_SIZE );
assign miss_resrv_stop = (MRVQ_SIZE != 2) && (size > (MRVQ_SIZE-5)); assign miss_resrv_stop = (MRVQ_SIZE != 2) && (size > (MRVQ_SIZE-5));
wire enqueue_possible = !miss_resrv_full; wire enqueue_possible = !miss_resrv_full;
wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr; wire [`LOG2UP(MRVQ_SIZE)-1:0] enqueue_index = tail_ptr;
reg [MRVQ_SIZE-1:0] make_ready; reg [MRVQ_SIZE-1:0] make_ready;
genvar curr_e; genvar curr_e;
generate generate
for (curr_e = 0; curr_e < MRVQ_SIZE; curr_e=curr_e+1) begin for (curr_e = 0; curr_e < MRVQ_SIZE; curr_e=curr_e+1) begin
assign make_ready[curr_e] = is_fill_st1 && valid_table[curr_e] assign make_ready[curr_e] = is_fill_st1 && valid_table[curr_e]
&& addr_table[curr_e][31:`LINE_SELECT_ADDR_START] == fill_addr_st1[31:`LINE_SELECT_ADDR_START]; && addr_table[curr_e][31:`LINE_SELECT_ADDR_START] == fill_addr_st1[31:`LINE_SELECT_ADDR_START];
end end
endgenerate endgenerate
wire dequeue_possible = valid_table[head_ptr] && ready_table[head_ptr]; wire dequeue_possible = valid_table[head_ptr] && ready_table[head_ptr];
wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = head_ptr; wire [`LOG2UP(MRVQ_SIZE)-1:0] dequeue_index = head_ptr;
assign miss_resrv_valid_st0 = (MRVQ_SIZE != 2) && dequeue_possible; assign miss_resrv_valid_st0 = (MRVQ_SIZE != 2) && dequeue_possible;
assign miss_resrv_pc_st0 = pc_table[dequeue_index]; assign miss_resrv_pc_st0 = pc_table[dequeue_index];
assign miss_resrv_addr_st0 = addr_table[dequeue_index]; assign miss_resrv_addr_st0 = addr_table[dequeue_index];
assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_rd_st0, miss_resrv_wb_st0, miss_resrv_warp_num_st0, miss_resrv_mem_read_st0, miss_resrv_mem_write_st0} = metadata_table[dequeue_index]; assign {miss_resrv_data_st0, miss_resrv_tid_st0, miss_resrv_rd_st0, miss_resrv_wb_st0, miss_resrv_warp_num_st0, miss_resrv_mem_read_st0, miss_resrv_mem_write_st0} = metadata_table[dequeue_index];
wire mrvq_push = miss_add && enqueue_possible && (MRVQ_SIZE != 2); wire mrvq_push = miss_add && enqueue_possible && (MRVQ_SIZE != 2);
wire mrvq_pop = miss_resrv_pop && dequeue_possible; wire mrvq_pop = miss_resrv_pop && dequeue_possible;
wire update_ready = (|make_ready); wire update_ready = (|make_ready);
integer i; integer i;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
for (i = 0; i < MRVQ_SIZE; i=i+1) begin for (i = 0; i < MRVQ_SIZE; i=i+1) begin
metadata_table[i] <= 0; metadata_table[i] <= 0;
end end
valid_table <= 0; valid_table <= 0;
ready_table <= 0; ready_table <= 0;
addr_table <= 0; addr_table <= 0;
pc_table <= 0; pc_table <= 0;
size <= 0; size <= 0;
head_ptr <= 0; head_ptr <= 0;
tail_ptr <= 0; tail_ptr <= 0;
end else begin end else begin
if (mrvq_push) begin if (mrvq_push) begin
valid_table[enqueue_index] <= 1; valid_table[enqueue_index] <= 1;
ready_table[enqueue_index] <= 0; ready_table[enqueue_index] <= 0;
pc_table[enqueue_index] <= miss_add_pc; pc_table[enqueue_index] <= miss_add_pc;
addr_table[enqueue_index] <= miss_add_addr; addr_table[enqueue_index] <= miss_add_addr;
metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_rd, miss_add_wb, miss_add_warp_num, miss_add_mem_read, miss_add_mem_write}; metadata_table[enqueue_index] <= {miss_add_data, miss_add_tid, miss_add_rd, miss_add_wb, miss_add_warp_num, miss_add_mem_read, miss_add_mem_write};
tail_ptr <= tail_ptr + 1; tail_ptr <= tail_ptr + 1;
end end
if (update_ready) begin if (update_ready) begin
ready_table <= ready_table | make_ready; ready_table <= ready_table | make_ready;
end end
if (mrvq_pop) begin if (mrvq_pop) begin
valid_table[dequeue_index] <= 0; valid_table[dequeue_index] <= 0;
ready_table[dequeue_index] <= 0; ready_table[dequeue_index] <= 0;
addr_table[dequeue_index] <= 0; addr_table[dequeue_index] <= 0;
metadata_table[dequeue_index] <= 0; metadata_table[dequeue_index] <= 0;
pc_table[dequeue_index] <= 0; pc_table[dequeue_index] <= 0;
head_ptr <= head_ptr + 1; head_ptr <= head_ptr + 1;
end end
if (!(mrvq_push && mrvq_pop)) begin if (!(mrvq_push && mrvq_pop)) begin
if (mrvq_push) begin if (mrvq_push) begin
size <= size + 1; size <= size + 1;
end end
if (mrvq_pop) begin if (mrvq_pop) begin
size <= size - 1; size <= size - 1;
end end
end end
end end
end end
endmodule endmodule

View File

@@ -1,107 +1,107 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module VX_cache_req_queue #( module VX_cache_req_queue #(
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE_BYTES = 1024, parameter CACHE_SIZE_BYTES = 1024,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter BANK_LINE_SIZE_BYTES = 16, parameter BANK_LINE_SIZE_BYTES = 16,
// Number of banks {1, 2, 4, 8,...} // Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8, parameter NUM_BANKS = 8,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE_BYTES = 4, parameter WORD_SIZE_BYTES = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2, parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory) // Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2, parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size // Core Request Queue Size
parameter REQQ_SIZE = 8, parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob // Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8, parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size // Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2, parameter DFPQ_SIZE = 2,
// Snoop Req Queue // Snoop Req Queue
parameter SNRQ_SIZE = 8, parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...} // Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size // Core Writeback Queue Size
parameter CWBQ_SIZE = 8, parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size // Dram Writeback Queue Size
parameter DWBQ_SIZE = 4, parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size // Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8, parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size // Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16, parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
// Dram knobs // Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
// Enqueue Data // Enqueue Data
input wire reqq_push, input wire reqq_push,
input wire [NUM_REQUESTS-1:0] bank_valids, input wire [NUM_REQUESTS-1:0] bank_valids,
input wire [NUM_REQUESTS-1:0][31:0] bank_addr, input wire [NUM_REQUESTS-1:0][31:0] bank_addr,
input wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] bank_writedata, input wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] bank_writedata,
input wire [4:0] bank_rd, input wire [4:0] bank_rd,
input wire [NUM_REQUESTS-1:0][1:0] bank_wb, input wire [NUM_REQUESTS-1:0][1:0] bank_wb,
input wire [`NW_BITS-1:0] bank_warp_num, input wire [`NW_BITS-1:0] bank_warp_num,
input wire [NUM_REQUESTS-1:0][2:0] bank_mem_read, input wire [NUM_REQUESTS-1:0][2:0] bank_mem_read,
input wire [NUM_REQUESTS-1:0][2:0] bank_mem_write, input wire [NUM_REQUESTS-1:0][2:0] bank_mem_write,
input wire [31:0] bank_pc, input wire [31:0] bank_pc,
// Dequeue Data // Dequeue Data
input wire reqq_pop, input wire reqq_pop,
output wire reqq_req_st0, output wire reqq_req_st0,
output wire [`LOG2UP(NUM_REQUESTS)-1:0] reqq_req_tid_st0, output wire [`LOG2UP(NUM_REQUESTS)-1:0] reqq_req_tid_st0,
output wire [31:0] reqq_req_addr_st0, output wire [31:0] reqq_req_addr_st0,
output wire [`WORD_SIZE_RNG] reqq_req_writedata_st0, output wire [`WORD_SIZE_RNG] reqq_req_writedata_st0,
output wire [4:0] reqq_req_rd_st0, output wire [4:0] reqq_req_rd_st0,
output wire [1:0] reqq_req_wb_st0, output wire [1:0] reqq_req_wb_st0,
output wire [`NW_BITS-1:0] reqq_req_warp_num_st0, output wire [`NW_BITS-1:0] reqq_req_warp_num_st0,
output wire [2:0] reqq_req_mem_read_st0, output wire [2:0] reqq_req_mem_read_st0,
output wire [2:0] reqq_req_mem_write_st0, output wire [2:0] reqq_req_mem_write_st0,
output wire [31:0] reqq_req_pc_st0, output wire [31:0] reqq_req_pc_st0,
// State Data // State Data
output wire reqq_empty, output wire reqq_empty,
output wire reqq_full output wire reqq_full
); );
wire [NUM_REQUESTS-1:0] out_per_valids; wire [NUM_REQUESTS-1:0] out_per_valids;
wire [NUM_REQUESTS-1:0][31:0] out_per_addr; wire [NUM_REQUESTS-1:0][31:0] out_per_addr;
wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] out_per_writedata; wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] out_per_writedata;
wire [4:0] out_per_rd; wire [4:0] out_per_rd;
wire [NUM_REQUESTS-1:0][1:0] out_per_wb; wire [NUM_REQUESTS-1:0][1:0] out_per_wb;
wire [`NW_BITS-1:0] out_per_warp_num; wire [`NW_BITS-1:0] out_per_warp_num;
wire [NUM_REQUESTS-1:0][2:0] out_per_mem_read; wire [NUM_REQUESTS-1:0][2:0] out_per_mem_read;
wire [NUM_REQUESTS-1:0][2:0] out_per_mem_write; wire [NUM_REQUESTS-1:0][2:0] out_per_mem_write;
wire [31:0] out_per_pc; wire [31:0] out_per_pc;
reg [NUM_REQUESTS-1:0] use_per_valids; reg [NUM_REQUESTS-1:0] use_per_valids;
reg [NUM_REQUESTS-1:0][31:0] use_per_addr; reg [NUM_REQUESTS-1:0][31:0] use_per_addr;
reg [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] use_per_writedata; reg [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] use_per_writedata;
reg [4:0] use_per_rd; reg [4:0] use_per_rd;
reg [NUM_REQUESTS-1:0][1:0] use_per_wb; reg [NUM_REQUESTS-1:0][1:0] use_per_wb;
reg [31:0] use_per_pc; reg [31:0] use_per_pc;
reg [`NW_BITS-1:0] use_per_warp_num; reg [`NW_BITS-1:0] use_per_warp_num;
reg [NUM_REQUESTS-1:0][2:0] use_per_mem_read; reg [NUM_REQUESTS-1:0][2:0] use_per_mem_read;
reg [NUM_REQUESTS-1:0][2:0] use_per_mem_write; reg [NUM_REQUESTS-1:0][2:0] use_per_mem_write;
wire [NUM_REQUESTS-1:0] qual_valids; wire [NUM_REQUESTS-1:0] qual_valids;
wire [NUM_REQUESTS-1:0][31:0] qual_addr; wire [NUM_REQUESTS-1:0][31:0] qual_addr;
wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] qual_writedata; wire [NUM_REQUESTS-1:0][`WORD_SIZE_RNG] qual_writedata;
wire [4:0] qual_rd; wire [4:0] qual_rd;
wire [NUM_REQUESTS-1:0][1:0] qual_wb; wire [NUM_REQUESTS-1:0][1:0] qual_wb;
wire [`NW_BITS-1:0] qual_warp_num; wire [`NW_BITS-1:0] qual_warp_num;
wire [NUM_REQUESTS-1:0][2:0] qual_mem_read; wire [NUM_REQUESTS-1:0][2:0] qual_mem_read;
wire [NUM_REQUESTS-1:0][2:0] qual_mem_write; wire [NUM_REQUESTS-1:0][2:0] qual_mem_write;
wire [31:0] qual_pc; wire [31:0] qual_pc;
`DEBUG_BEGIN `DEBUG_BEGIN
reg [NUM_REQUESTS-1:0] updated_valids; reg [NUM_REQUESTS-1:0] updated_valids;
@@ -109,97 +109,97 @@ module VX_cache_req_queue #(
wire o_empty; wire o_empty;
wire use_empty = !(|use_per_valids); wire use_empty = !(|use_per_valids);
wire out_empty = !(|out_per_valids) || o_empty; wire out_empty = !(|out_per_valids) || o_empty;
wire push_qual = reqq_push && !reqq_full; wire push_qual = reqq_push && !reqq_full;
wire pop_qual = !out_empty && use_empty; wire pop_qual = !out_empty && use_empty;
VX_generic_queue #( VX_generic_queue #(
.DATAW( (NUM_REQUESTS * (1+32+`WORD_SIZE)) + 5 + (NUM_REQUESTS*2) + (`NW_BITS-1+1) + (NUM_REQUESTS * (3 + 3)) + 32 ), .DATAW( (NUM_REQUESTS * (1+32+`WORD_SIZE)) + 5 + (NUM_REQUESTS*2) + (`NW_BITS-1+1) + (NUM_REQUESTS * (3 + 3)) + 32 ),
.SIZE(REQQ_SIZE) .SIZE(REQQ_SIZE)
) reqq_queue ( ) reqq_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.push (push_qual), .push (push_qual),
.data_in ({bank_valids , bank_addr , bank_writedata , bank_rd , bank_wb , bank_warp_num , bank_mem_read , bank_mem_write , bank_pc}), .data_in ({bank_valids , bank_addr , bank_writedata , bank_rd , bank_wb , bank_warp_num , bank_mem_read , bank_mem_write , bank_pc}),
.pop (pop_qual), .pop (pop_qual),
.data_out ({out_per_valids, out_per_addr, out_per_writedata, out_per_rd, out_per_wb, out_per_warp_num, out_per_mem_read, out_per_mem_write, out_per_pc}), .data_out ({out_per_valids, out_per_addr, out_per_writedata, out_per_rd, out_per_wb, out_per_warp_num, out_per_mem_read, out_per_mem_write, out_per_pc}),
.empty (o_empty), .empty (o_empty),
.full (reqq_full) .full (reqq_full)
); );
wire[NUM_REQUESTS-1:0] real_out_per_valids = out_per_valids & {NUM_REQUESTS{~out_empty}}; wire[NUM_REQUESTS-1:0] real_out_per_valids = out_per_valids & {NUM_REQUESTS{~out_empty}};
assign qual_valids = use_per_valids; assign qual_valids = use_per_valids;
assign qual_addr = use_per_addr; assign qual_addr = use_per_addr;
assign qual_writedata = use_per_writedata; assign qual_writedata = use_per_writedata;
assign qual_rd = use_per_rd; assign qual_rd = use_per_rd;
assign qual_wb = use_per_wb; assign qual_wb = use_per_wb;
assign qual_warp_num = use_per_warp_num; assign qual_warp_num = use_per_warp_num;
assign qual_mem_read = use_per_mem_read; assign qual_mem_read = use_per_mem_read;
assign qual_mem_write = use_per_mem_write; assign qual_mem_write = use_per_mem_write;
assign qual_pc = use_per_pc; assign qual_pc = use_per_pc;
wire[`LOG2UP(NUM_REQUESTS)-1:0] qual_request_index; wire[`LOG2UP(NUM_REQUESTS)-1:0] qual_request_index;
wire qual_has_request; wire qual_has_request;
VX_generic_priority_encoder #( VX_generic_priority_encoder #(
.N(NUM_REQUESTS) .N(NUM_REQUESTS)
) sel_bank ( ) sel_bank (
.valids(qual_valids), .valids(qual_valids),
.index (qual_request_index), .index (qual_request_index),
.found (qual_has_request) .found (qual_has_request)
); );
assign reqq_empty = !qual_has_request; assign reqq_empty = !qual_has_request;
assign reqq_req_st0 = qual_has_request; assign reqq_req_st0 = qual_has_request;
assign reqq_req_tid_st0 = qual_request_index; assign reqq_req_tid_st0 = qual_request_index;
assign reqq_req_addr_st0 = qual_addr[qual_request_index]; assign reqq_req_addr_st0 = qual_addr[qual_request_index];
assign reqq_req_writedata_st0 = qual_writedata[qual_request_index]; assign reqq_req_writedata_st0 = qual_writedata[qual_request_index];
assign reqq_req_rd_st0 = qual_rd; assign reqq_req_rd_st0 = qual_rd;
assign reqq_req_wb_st0 = qual_wb[qual_request_index]; assign reqq_req_wb_st0 = qual_wb[qual_request_index];
assign reqq_req_warp_num_st0 = qual_warp_num; assign reqq_req_warp_num_st0 = qual_warp_num;
assign reqq_req_mem_read_st0 = qual_mem_read [qual_request_index]; assign reqq_req_mem_read_st0 = qual_mem_read [qual_request_index];
assign reqq_req_mem_write_st0 = qual_mem_write[qual_request_index]; assign reqq_req_mem_write_st0 = qual_mem_write[qual_request_index];
assign reqq_req_pc_st0 = qual_pc; assign reqq_req_pc_st0 = qual_pc;
always @(*) begin always @(*) begin
updated_valids = qual_valids; updated_valids = qual_valids;
if (qual_has_request) begin if (qual_has_request) begin
updated_valids[qual_request_index] = 0; updated_valids[qual_request_index] = 0;
end end
end end
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
use_per_valids <= 0; use_per_valids <= 0;
use_per_addr <= 0; use_per_addr <= 0;
use_per_writedata <= 0; use_per_writedata <= 0;
use_per_rd <= 0; use_per_rd <= 0;
use_per_wb <= 0; use_per_wb <= 0;
use_per_warp_num <= 0; use_per_warp_num <= 0;
use_per_mem_read <= 0; use_per_mem_read <= 0;
use_per_mem_write <= 0; use_per_mem_write <= 0;
use_per_pc <= 0; use_per_pc <= 0;
end else begin end else begin
if (pop_qual) begin if (pop_qual) begin
use_per_valids <= real_out_per_valids; use_per_valids <= real_out_per_valids;
use_per_addr <= out_per_addr; use_per_addr <= out_per_addr;
use_per_writedata <= out_per_writedata; use_per_writedata <= out_per_writedata;
use_per_rd <= out_per_rd; use_per_rd <= out_per_rd;
use_per_wb <= out_per_wb; use_per_wb <= out_per_wb;
use_per_warp_num <= out_per_warp_num; use_per_warp_num <= out_per_warp_num;
use_per_mem_read <= out_per_mem_read; use_per_mem_read <= out_per_mem_read;
use_per_mem_write <= out_per_mem_write; use_per_mem_write <= out_per_mem_write;
use_per_pc <= out_per_pc; use_per_pc <= out_per_pc;
end else if (reqq_pop) begin end else if (reqq_pop) begin
use_per_valids[qual_request_index] <= 0; use_per_valids[qual_request_index] <= 0;
end end
// else if (reqq_pop) begin // else if (reqq_pop) begin
// use_per_valids[qual_request_index] <= updated_valids; // use_per_valids[qual_request_index] <= updated_valids;
// end // end
end end
end end
endmodule endmodule

View File

@@ -1,49 +1,49 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module VX_cache_wb_sel_merge #( module VX_cache_wb_sel_merge #(
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE_BYTES = 1024, parameter CACHE_SIZE_BYTES = 1024,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter BANK_LINE_SIZE_BYTES = 16, parameter BANK_LINE_SIZE_BYTES = 16,
// Number of banks {1, 2, 4, 8,...} // Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8, parameter NUM_BANKS = 8,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE_BYTES = 4, parameter WORD_SIZE_BYTES = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2, parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory) // Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2, parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2} // Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 0, parameter FUNC_ID = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size // Core Request Queue Size
parameter REQQ_SIZE = 8, parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob // Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8, parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size // Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2, parameter DFPQ_SIZE = 2,
// Snoop Req Queue // Snoop Req Queue
parameter SNRQ_SIZE = 8, parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...} // Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size // Core Writeback Queue Size
parameter CWBQ_SIZE = 8, parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size // Dram Writeback Queue Size
parameter DWBQ_SIZE = 4, parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size // Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8, parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size // Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16, parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
// Dram knobs // Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
) ( ) (
// Per Bank WB // Per Bank WB
input wire [NUM_BANKS-1:0] per_bank_wb_valid, input wire [NUM_BANKS-1:0] per_bank_wb_valid,
input wire [NUM_BANKS-1:0][`LOG2UP(NUM_REQUESTS)-1:0] per_bank_wb_tid, input wire [NUM_BANKS-1:0][`LOG2UP(NUM_REQUESTS)-1:0] per_bank_wb_tid,
input wire [NUM_BANKS-1:0][4:0] per_bank_wb_rd, input wire [NUM_BANKS-1:0][4:0] per_bank_wb_rd,
input wire [NUM_BANKS-1:0][1:0] per_bank_wb_wb, input wire [NUM_BANKS-1:0][1:0] per_bank_wb_wb,
@@ -61,77 +61,77 @@ module VX_cache_wb_sel_merge #(
output wire [4:0] core_rsp_read, output wire [4:0] core_rsp_read,
output wire [1:0] core_rsp_write, output wire [1:0] core_rsp_write,
output wire [`NW_BITS-1:0] core_rsp_warp_num, output wire [`NW_BITS-1:0] core_rsp_warp_num,
output reg [NUM_REQUESTS-1:0][31:0] core_rsp_addr output reg [NUM_REQUESTS-1:0][31:0] core_rsp_addr
); );
reg [NUM_BANKS-1:0] per_bank_wb_pop_unqual; reg [NUM_BANKS-1:0] per_bank_wb_pop_unqual;
assign per_bank_wb_pop = per_bank_wb_pop_unqual & {NUM_BANKS{core_rsp_ready}}; assign per_bank_wb_pop = per_bank_wb_pop_unqual & {NUM_BANKS{core_rsp_ready}};
// wire[NUM_BANKS-1:0] bank_wants_wb; // wire[NUM_BANKS-1:0] bank_wants_wb;
// genvar curr_bank; // genvar curr_bank;
// generate // generate
// for (curr_bank = 0; curr_bank < NUM_BANKS; curr_bank=curr_bank+1) begin // for (curr_bank = 0; curr_bank < NUM_BANKS; curr_bank=curr_bank+1) begin
// assign bank_wants_wb[curr_bank] = (|per_bank_wb_valid[curr_bank]); // assign bank_wants_wb[curr_bank] = (|per_bank_wb_valid[curr_bank]);
// end // end
// endgenerate // endgenerate
wire [`LOG2UP(NUM_BANKS)-1:0] main_bank_index; wire [`LOG2UP(NUM_BANKS)-1:0] main_bank_index;
wire found_bank; wire found_bank;
VX_generic_priority_encoder #( VX_generic_priority_encoder #(
.N(NUM_BANKS) .N(NUM_BANKS)
) sel_bank ( ) sel_bank (
.valids(per_bank_wb_valid), .valids(per_bank_wb_valid),
.index (main_bank_index), .index (main_bank_index),
.found (found_bank) .found (found_bank)
); );
assign core_rsp_read = per_bank_wb_rd[main_bank_index]; assign core_rsp_read = per_bank_wb_rd[main_bank_index];
assign core_rsp_write = per_bank_wb_wb[main_bank_index]; assign core_rsp_write = per_bank_wb_wb[main_bank_index];
assign core_rsp_warp_num = per_bank_wb_warp_num[main_bank_index]; assign core_rsp_warp_num = per_bank_wb_warp_num[main_bank_index];
integer this_bank; integer this_bank;
generate generate
always @(*) begin always @(*) begin
core_rsp_valid = 0; core_rsp_valid = 0;
core_rsp_data = 0; core_rsp_data = 0;
core_rsp_pc = 0; core_rsp_pc = 0;
core_rsp_addr = 0; core_rsp_addr = 0;
for (this_bank = 0; this_bank < NUM_BANKS; this_bank = this_bank + 1) begin for (this_bank = 0; this_bank < NUM_BANKS; this_bank = this_bank + 1) begin
if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin if ((FUNC_ID == `L2FUNC_ID) || (FUNC_ID == `L3FUNC_ID)) begin
if (found_bank if (found_bank
&& !core_rsp_valid[per_bank_wb_tid[this_bank]] && !core_rsp_valid[per_bank_wb_tid[this_bank]]
&& per_bank_wb_valid[this_bank] && per_bank_wb_valid[this_bank]
&& ((main_bank_index == `LOG2UP(NUM_BANKS)'(this_bank)) && ((main_bank_index == `LOG2UP(NUM_BANKS)'(this_bank))
|| (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) begin || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))) begin
core_rsp_valid[per_bank_wb_tid[this_bank]] = 1; core_rsp_valid[per_bank_wb_tid[this_bank]] = 1;
core_rsp_data[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank]; core_rsp_data[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
core_rsp_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank]; core_rsp_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank];
core_rsp_addr[per_bank_wb_tid[this_bank]] = per_bank_wb_addr[this_bank]; core_rsp_addr[per_bank_wb_tid[this_bank]] = per_bank_wb_addr[this_bank];
per_bank_wb_pop_unqual[this_bank] = 1; per_bank_wb_pop_unqual[this_bank] = 1;
end else begin end else begin
per_bank_wb_pop_unqual[this_bank] = 0; per_bank_wb_pop_unqual[this_bank] = 0;
end end
end else begin end else begin
if (((main_bank_index == `LOG2UP(NUM_BANKS)'(this_bank)) if (((main_bank_index == `LOG2UP(NUM_BANKS)'(this_bank))
|| (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index])) || (per_bank_wb_tid[this_bank] != per_bank_wb_tid[main_bank_index]))
&& found_bank && found_bank
&& !core_rsp_valid[per_bank_wb_tid[this_bank]] && !core_rsp_valid[per_bank_wb_tid[this_bank]]
&& (per_bank_wb_valid[this_bank]) && (per_bank_wb_valid[this_bank])
&& (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index]) && (per_bank_wb_rd[this_bank] == per_bank_wb_rd[main_bank_index])
&& (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index])) begin && (per_bank_wb_warp_num[this_bank] == per_bank_wb_warp_num[main_bank_index])) begin
core_rsp_valid[per_bank_wb_tid[this_bank]] = 1; core_rsp_valid[per_bank_wb_tid[this_bank]] = 1;
core_rsp_data[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank]; core_rsp_data[per_bank_wb_tid[this_bank]] = per_bank_wb_data[this_bank];
core_rsp_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank]; core_rsp_pc[per_bank_wb_tid[this_bank]] = per_bank_wb_pc[this_bank];
core_rsp_addr[per_bank_wb_tid[this_bank]] = per_bank_wb_addr[this_bank]; core_rsp_addr[per_bank_wb_tid[this_bank]] = per_bank_wb_addr[this_bank];
per_bank_wb_pop_unqual[this_bank] = 1; per_bank_wb_pop_unqual[this_bank] = 1;
end else begin end else begin
per_bank_wb_pop_unqual[this_bank] = 0; per_bank_wb_pop_unqual[this_bank] = 0;
end end
end end
end end
end end
endgenerate endgenerate
endmodule endmodule

View File

@@ -1,154 +1,154 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module VX_fill_invalidator #( module VX_fill_invalidator #(
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE_BYTES = 1024, parameter CACHE_SIZE_BYTES = 1024,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter BANK_LINE_SIZE_BYTES = 16, parameter BANK_LINE_SIZE_BYTES = 16,
// Number of banks {1, 2, 4, 8,...} // Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8, parameter NUM_BANKS = 8,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE_BYTES = 4, parameter WORD_SIZE_BYTES = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2, parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory) // Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2, parameter STAGE_1_CYCLES = 2,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size // Core Request Queue Size
parameter REQQ_SIZE = 8, parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob // Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8, parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size // Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2, parameter DFPQ_SIZE = 2,
// Snoop Req Queue // Snoop Req Queue
parameter SNRQ_SIZE = 8, parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...} // Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size // Core Writeback Queue Size
parameter CWBQ_SIZE = 8, parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size // Dram Writeback Queue Size
parameter DWBQ_SIZE = 4, parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size // Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8, parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size // Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16, parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
// Dram knobs // Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire possible_fill, input wire possible_fill,
input wire success_fill, input wire success_fill,
input wire[31:0] fill_addr, input wire[31:0] fill_addr,
output reg invalidate_fill output reg invalidate_fill
); );
if (FILL_INVALIDAOR_SIZE == 0) begin if (FILL_INVALIDAOR_SIZE == 0) begin
assign invalidate_fill = 0; assign invalidate_fill = 0;
end else begin end else begin
reg [FILL_INVALIDAOR_SIZE-1:0] fills_active; reg [FILL_INVALIDAOR_SIZE-1:0] fills_active;
reg [FILL_INVALIDAOR_SIZE-1:0][31:0] fills_address; reg [FILL_INVALIDAOR_SIZE-1:0][31:0] fills_address;
reg [FILL_INVALIDAOR_SIZE-1:0] matched_fill; reg [FILL_INVALIDAOR_SIZE-1:0] matched_fill;
wire matched; wire matched;
integer fi; integer fi;
always @(*) begin always @(*) begin
for (fi = 0; fi < FILL_INVALIDAOR_SIZE; fi+=1) begin for (fi = 0; fi < FILL_INVALIDAOR_SIZE; fi+=1) begin
matched_fill[fi] = fills_active[fi] && (fills_address[fi][31:`LINE_SELECT_ADDR_START] == fill_addr[31:`LINE_SELECT_ADDR_START]); matched_fill[fi] = fills_active[fi] && (fills_address[fi][31:`LINE_SELECT_ADDR_START] == fill_addr[31:`LINE_SELECT_ADDR_START]);
end end
end end
assign matched = (|(matched_fill)); assign matched = (|(matched_fill));
wire [(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index; wire [(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index;
wire enqueue_found; wire enqueue_found;
VX_generic_priority_encoder #( VX_generic_priority_encoder #(
.N(FILL_INVALIDAOR_SIZE) .N(FILL_INVALIDAOR_SIZE)
) sel_bank ( ) sel_bank (
.valids(~fills_active), .valids(~fills_active),
.index (enqueue_index), .index (enqueue_index),
.found (enqueue_found) .found (enqueue_found)
); );
assign invalidate_fill = possible_fill && matched; assign invalidate_fill = possible_fill && matched;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
fills_active <= 0; fills_active <= 0;
fills_address <= 0; fills_address <= 0;
end else begin end else begin
if (possible_fill && !matched && enqueue_found) begin if (possible_fill && !matched && enqueue_found) begin
fills_active [enqueue_index] <= 1; fills_active [enqueue_index] <= 1;
fills_address[enqueue_index] <= fill_addr; fills_address[enqueue_index] <= fill_addr;
end else if (success_fill && matched) begin end else if (success_fill && matched) begin
fills_active <= fills_active & (~matched_fill); fills_active <= fills_active & (~matched_fill);
end end
end end
end end
// reg success_found; // reg success_found;
// reg[(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] success_index; // reg[(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] success_index;
// integer curr_fill; // integer curr_fill;
// always @(*) begin // always @(*) begin
// invalidate_fill = 0; // invalidate_fill = 0;
// success_found = 0; // success_found = 0;
// success_index = 0; // success_index = 0;
// for (curr_fill = 0; curr_fill < FILL_INVALIDAOR_SIZE; curr_fill=curr_fill+1) begin // for (curr_fill = 0; curr_fill < FILL_INVALIDAOR_SIZE; curr_fill=curr_fill+1) begin
// if (fill_addr[31:`LINE_SELECT_ADDR_START] == fills_address[curr_fill][31:`LINE_SELECT_ADDR_START]) begin // if (fill_addr[31:`LINE_SELECT_ADDR_START] == fills_address[curr_fill][31:`LINE_SELECT_ADDR_START]) begin
// if (possible_fill && fills_active[curr_fill]) begin // if (possible_fill && fills_active[curr_fill]) begin
// invalidate_fill = 1; // invalidate_fill = 1;
// end // end
// if (success_fill) begin // if (success_fill) begin
// success_found = 1; // success_found = 1;
// success_index = curr_fill; // success_index = curr_fill;
// end // end
// end // end
// end // end
// end // end
// wire [(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index; // wire [(`LOG2UP(FILL_INVALIDAOR_SIZE))-1:0] enqueue_index;
// wire enqueue_found; // wire enqueue_found;
// VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) sel_bank( // VX_generic_priority_encoder #(.N(FILL_INVALIDAOR_SIZE)) sel_bank(
// .valids(~fills_active), // .valids(~fills_active),
// .index (enqueue_index), // .index (enqueue_index),
// .found (enqueue_found) // .found (enqueue_found)
// ); // );
// always @(posedge clk) begin // always @(posedge clk) begin
// if (reset) begin // if (reset) begin
// fills_active <= 0; // fills_active <= 0;
// fills_address <= 0; // fills_address <= 0;
// end else begin // end else begin
// if (possible_fill && !invalidate_fill) begin // if (possible_fill && !invalidate_fill) begin
// fills_active[enqueue_index] <= 1; // fills_active[enqueue_index] <= 1;
// fills_address[enqueue_index] <= fill_addr; // fills_address[enqueue_index] <= fill_addr;
// end // end
// if (success_found) begin // if (success_found) begin
// fills_active[success_index] <= 0; // fills_active[success_index] <= 0;
// end // end
// end // end
// end // end
end end
endmodule endmodule

View File

@@ -1,70 +1,70 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module VX_prefetcher #( module VX_prefetcher #(
parameter PRFQ_SIZE = 64, parameter PRFQ_SIZE = 64,
parameter PRFQ_STRIDE = 2, parameter PRFQ_STRIDE = 2,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter BANK_LINE_SIZE_BYTES = 16, parameter BANK_LINE_SIZE_BYTES = 16,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE_BYTES = 4 parameter WORD_SIZE_BYTES = 4
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire dram_req, input wire dram_req,
input wire[31:0] dram_req_addr, input wire[31:0] dram_req_addr,
input wire pref_pop, input wire pref_pop,
output wire pref_valid, output wire pref_valid,
output wire[31:0] pref_addr output wire[31:0] pref_addr
); );
reg[`LOG2UP(PRFQ_STRIDE):0] use_valid; reg[`LOG2UP(PRFQ_STRIDE):0] use_valid;
reg[31:0] use_addr; reg[31:0] use_addr;
wire current_valid; wire current_valid;
wire[31:0] current_addr; wire[31:0] current_addr;
wire current_full; wire current_full;
wire current_empty; wire current_empty;
assign current_valid = ~current_empty; assign current_valid = ~current_empty;
wire update_use = ((use_valid == 0) || ((use_valid-1) == 0)) && current_valid; wire update_use = ((use_valid == 0) || ((use_valid-1) == 0)) && current_valid;
VX_generic_queue #( VX_generic_queue #(
.DATAW(32), .DATAW(32),
.SIZE(PRFQ_SIZE) .SIZE(PRFQ_SIZE)
) pfq_queue ( ) pfq_queue (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.push (dram_req && !current_full && !pref_pop), .push (dram_req && !current_full && !pref_pop),
.data_in (dram_req_addr & `BASE_ADDR_MASK), .data_in (dram_req_addr & `BASE_ADDR_MASK),
.pop (update_use), .pop (update_use),
.data_out(current_addr), .data_out(current_addr),
.empty (current_empty), .empty (current_empty),
.full (current_full) .full (current_full)
); );
assign pref_valid = use_valid != 0; assign pref_valid = use_valid != 0;
assign pref_addr = use_addr; assign pref_addr = use_addr;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
use_valid <= 0; use_valid <= 0;
use_addr <= 0; use_addr <= 0;
end else begin end else begin
if (update_use) begin if (update_use) begin
use_valid <= PRFQ_STRIDE; use_valid <= PRFQ_STRIDE;
use_addr <= current_addr + BANK_LINE_SIZE_BYTES; use_addr <= current_addr + BANK_LINE_SIZE_BYTES;
end else if (pref_valid && pref_pop) begin end else if (pref_valid && pref_pop) begin
use_valid <= use_valid - 1; use_valid <= use_valid - 1;
use_addr <= use_addr + BANK_LINE_SIZE_BYTES; use_addr <= use_addr + BANK_LINE_SIZE_BYTES;
end end
end end
end end
endmodule endmodule

View File

@@ -1,38 +1,38 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module VX_snp_fwd_arb #( module VX_snp_fwd_arb #(
parameter NUM_BANKS = 8 parameter NUM_BANKS = 8
) ( ) (
input wire [NUM_BANKS-1:0] per_bank_snp_fwd_valid, input wire [NUM_BANKS-1:0] per_bank_snp_fwd_valid,
input wire [NUM_BANKS-1:0][31:0] per_bank_snp_fwd_addr, input wire [NUM_BANKS-1:0][31:0] per_bank_snp_fwd_addr,
output reg [NUM_BANKS-1:0] per_bank_snp_fwd_pop, output reg [NUM_BANKS-1:0] per_bank_snp_fwd_pop,
output wire snp_fwd_valid, output wire snp_fwd_valid,
output wire [31:0] snp_fwd_addr, output wire [31:0] snp_fwd_addr,
input wire snp_fwd_ready input wire snp_fwd_ready
); );
wire [NUM_BANKS-1:0] qual_per_bank_snp_fwd = per_bank_snp_fwd_valid & {NUM_BANKS{snp_fwd_ready}}; wire [NUM_BANKS-1:0] qual_per_bank_snp_fwd = per_bank_snp_fwd_valid & {NUM_BANKS{snp_fwd_ready}};
wire [`LOG2UP(NUM_BANKS)-1:0] fsq_bank; wire [`LOG2UP(NUM_BANKS)-1:0] fsq_bank;
wire fsq_valid; wire fsq_valid;
VX_generic_priority_encoder #( VX_generic_priority_encoder #(
.N(NUM_BANKS) .N(NUM_BANKS)
) sel_ffsq ( ) sel_ffsq (
.valids (qual_per_bank_snp_fwd), .valids (qual_per_bank_snp_fwd),
.index (fsq_bank), .index (fsq_bank),
.found (fsq_valid) .found (fsq_valid)
); );
assign snp_fwd_valid = fsq_valid; assign snp_fwd_valid = fsq_valid;
assign snp_fwd_addr = per_bank_snp_fwd_addr[fsq_bank]; assign snp_fwd_addr = per_bank_snp_fwd_addr[fsq_bank];
always @(*) begin always @(*) begin
per_bank_snp_fwd_pop = 0; per_bank_snp_fwd_pop = 0;
if (fsq_valid) begin if (fsq_valid) begin
per_bank_snp_fwd_pop[fsq_bank] = 1; per_bank_snp_fwd_pop[fsq_bank] = 1;
end end
end end
endmodule endmodule

View File

@@ -1,98 +1,98 @@
`include "VX_cache_config.vh" `include "VX_cache_config.vh"
module VX_tag_data_access #( module VX_tag_data_access #(
// Size of cache in bytes // Size of cache in bytes
parameter CACHE_SIZE_BYTES = 1024, parameter CACHE_SIZE_BYTES = 1024,
// Size of line inside a bank in bytes // Size of line inside a bank in bytes
parameter BANK_LINE_SIZE_BYTES = 16, parameter BANK_LINE_SIZE_BYTES = 16,
// Number of banks {1, 2, 4, 8,...} // Number of banks {1, 2, 4, 8,...}
parameter NUM_BANKS = 8, parameter NUM_BANKS = 8,
// Size of a word in bytes // Size of a word in bytes
parameter WORD_SIZE_BYTES = 4, parameter WORD_SIZE_BYTES = 4,
// Number of Word requests per cycle {1, 2, 4, 8, ...} // Number of Word requests per cycle {1, 2, 4, 8, ...}
parameter NUM_REQUESTS = 2, parameter NUM_REQUESTS = 2,
// Number of cycles to complete stage 1 (read from memory) // Number of cycles to complete stage 1 (read from memory)
parameter STAGE_1_CYCLES = 2, parameter STAGE_1_CYCLES = 2,
// Function ID, {Dcache=0, Icache=1, Sharedmemory=2} // Function ID, {Dcache=0, Icache=1, Sharedmemory=2}
parameter FUNC_ID = 0, parameter FUNC_ID = 0,
// Queues feeding into banks Knobs {1, 2, 4, 8, ...} // Queues feeding into banks Knobs {1, 2, 4, 8, ...}
// Core Request Queue Size // Core Request Queue Size
parameter REQQ_SIZE = 8, parameter REQQ_SIZE = 8,
// Miss Reserv Queue Knob // Miss Reserv Queue Knob
parameter MRVQ_SIZE = 8, parameter MRVQ_SIZE = 8,
// Dram Fill Rsp Queue Size // Dram Fill Rsp Queue Size
parameter DFPQ_SIZE = 2, parameter DFPQ_SIZE = 2,
// Snoop Req Queue // Snoop Req Queue
parameter SNRQ_SIZE = 8, parameter SNRQ_SIZE = 8,
// Queues for writebacks Knobs {1, 2, 4, 8, ...} // Queues for writebacks Knobs {1, 2, 4, 8, ...}
// Core Writeback Queue Size // Core Writeback Queue Size
parameter CWBQ_SIZE = 8, parameter CWBQ_SIZE = 8,
// Dram Writeback Queue Size // Dram Writeback Queue Size
parameter DWBQ_SIZE = 4, parameter DWBQ_SIZE = 4,
// Dram Fill Req Queue Size // Dram Fill Req Queue Size
parameter DFQQ_SIZE = 8, parameter DFQQ_SIZE = 8,
// Lower Level Cache Hit Queue Size // Lower Level Cache Hit Queue Size
parameter LLVQ_SIZE = 16, parameter LLVQ_SIZE = 16,
// Fill Invalidator Size {Fill invalidator must be active} // Fill Invalidator Size {Fill invalidator must be active}
parameter FILL_INVALIDAOR_SIZE = 16, parameter FILL_INVALIDAOR_SIZE = 16,
// Dram knobs // Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire stall, input wire stall,
input wire is_snp_st1e, input wire is_snp_st1e,
input wire stall_bank_pipe, input wire stall_bank_pipe,
// Initial Reading // Initial Reading
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
// TODO: should fix this // TODO: should fix this
input wire[31:0] readaddr_st10, input wire[31:0] readaddr_st10,
input wire[31:0] writeaddr_st1e, input wire[31:0] writeaddr_st1e,
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
input wire valid_req_st1e, input wire valid_req_st1e,
input wire writefill_st1e, input wire writefill_st1e,
input wire[`WORD_SIZE_RNG] writeword_st1e, input wire[`WORD_SIZE_RNG] writeword_st1e,
input wire[`DBANK_LINE_WORDS-1:0][31:0] writedata_st1e, input wire[`DBANK_LINE_WORDS-1:0][31:0] writedata_st1e,
input wire[2:0] mem_write_st1e, input wire[2:0] mem_write_st1e,
input wire[2:0] mem_read_st1e, input wire[2:0] mem_read_st1e,
output wire[`WORD_SIZE_RNG] readword_st1e, output wire[`WORD_SIZE_RNG] readword_st1e,
output wire[`DBANK_LINE_WORDS-1:0][31:0] readdata_st1e, output wire[`DBANK_LINE_WORDS-1:0][31:0] readdata_st1e,
output wire[`TAG_SELECT_BITS-1:0] readtag_st1e, output wire[`TAG_SELECT_BITS-1:0] readtag_st1e,
output wire miss_st1e, output wire miss_st1e,
output wire dirty_st1e, output wire dirty_st1e,
output wire fill_saw_dirty_st1e output wire fill_saw_dirty_st1e
); );
reg read_valid_st1c[STAGE_1_CYCLES-1:0]; reg read_valid_st1c[STAGE_1_CYCLES-1:0];
reg read_dirty_st1c[STAGE_1_CYCLES-1:0]; reg read_dirty_st1c[STAGE_1_CYCLES-1:0];
reg[`TAG_SELECT_BITS-1:0] read_tag_st1c [STAGE_1_CYCLES-1:0]; reg[`TAG_SELECT_BITS-1:0] read_tag_st1c [STAGE_1_CYCLES-1:0];
reg[`DBANK_LINE_WORDS-1:0][31:0] read_data_st1c [STAGE_1_CYCLES-1:0]; reg[`DBANK_LINE_WORDS-1:0][31:0] read_data_st1c [STAGE_1_CYCLES-1:0];
wire qual_read_valid_st1; wire qual_read_valid_st1;
wire qual_read_dirty_st1; wire qual_read_dirty_st1;
wire[`TAG_SELECT_BITS-1:0] qual_read_tag_st1; wire[`TAG_SELECT_BITS-1:0] qual_read_tag_st1;
wire[`DBANK_LINE_WORDS-1:0][31:0] qual_read_data_st1; wire[`DBANK_LINE_WORDS-1:0][31:0] qual_read_data_st1;
wire use_read_valid_st1e; wire use_read_valid_st1e;
wire use_read_dirty_st1e; wire use_read_dirty_st1e;
wire[`TAG_SELECT_BITS-1:0] use_read_tag_st1e; wire[`TAG_SELECT_BITS-1:0] use_read_tag_st1e;
wire[`DBANK_LINE_WORDS-1:0][31:0] use_read_data_st1e; wire[`DBANK_LINE_WORDS-1:0][31:0] use_read_data_st1e;
wire[`DBANK_LINE_WORDS-1:0][3:0] use_write_enable; wire[`DBANK_LINE_WORDS-1:0][3:0] use_write_enable;
wire[`DBANK_LINE_WORDS-1:0][31:0] use_write_data; wire[`DBANK_LINE_WORDS-1:0][31:0] use_write_data;
wire sw, sb, sh; wire sw, sb, sh;
wire real_writefill = writefill_st1e && ((valid_req_st1e && !use_read_valid_st1e) || (valid_req_st1e && use_read_valid_st1e && (writeaddr_st1e[`TAG_SELECT_ADDR_RNG] != use_read_tag_st1e))); wire real_writefill = writefill_st1e && ((valid_req_st1e && !use_read_valid_st1e) || (valid_req_st1e && use_read_valid_st1e && (writeaddr_st1e[`TAG_SELECT_ADDR_RNG] != use_read_tag_st1e)));
wire fill_sent; wire fill_sent;
wire invalidate_line; wire invalidate_line;
VX_tag_data_structure #( VX_tag_data_structure #(
.CACHE_SIZE_BYTES (CACHE_SIZE_BYTES), .CACHE_SIZE_BYTES (CACHE_SIZE_BYTES),
.BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES), .BANK_LINE_SIZE_BYTES (BANK_LINE_SIZE_BYTES),
.NUM_BANKS (NUM_BANKS), .NUM_BANKS (NUM_BANKS),
@@ -110,67 +110,67 @@ module VX_tag_data_access #(
.LLVQ_SIZE (LLVQ_SIZE), .LLVQ_SIZE (LLVQ_SIZE),
.FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE), .FILL_INVALIDAOR_SIZE (FILL_INVALIDAOR_SIZE),
.SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES) .SIMULATED_DRAM_LATENCY_CYCLES(SIMULATED_DRAM_LATENCY_CYCLES)
) tag_data_structure ( ) tag_data_structure (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall_bank_pipe(stall_bank_pipe), .stall_bank_pipe(stall_bank_pipe),
.read_addr (readaddr_st10[`LINE_SELECT_ADDR_RNG]), .read_addr (readaddr_st10[`LINE_SELECT_ADDR_RNG]),
.read_valid (qual_read_valid_st1), .read_valid (qual_read_valid_st1),
.read_dirty (qual_read_dirty_st1), .read_dirty (qual_read_dirty_st1),
.read_tag (qual_read_tag_st1), .read_tag (qual_read_tag_st1),
.read_data (qual_read_data_st1), .read_data (qual_read_data_st1),
.invalidate (invalidate_line), .invalidate (invalidate_line),
.write_enable(use_write_enable), .write_enable(use_write_enable),
.write_fill (real_writefill), .write_fill (real_writefill),
.write_addr (writeaddr_st1e[`LINE_SELECT_ADDR_RNG]), .write_addr (writeaddr_st1e[`LINE_SELECT_ADDR_RNG]),
.tag_index (writeaddr_st1e[`TAG_SELECT_ADDR_RNG]), .tag_index (writeaddr_st1e[`TAG_SELECT_ADDR_RNG]),
.write_data (use_write_data), .write_data (use_write_data),
.fill_sent (fill_sent) .fill_sent (fill_sent)
); );
// VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_BITS + (`DBANK_LINE_WORDS*32) )) s0_1_c0 ( // VX_generic_register #(.N( 1 + 1 + `TAG_SELECT_BITS + (`DBANK_LINE_WORDS*32) )) s0_1_c0 (
VX_generic_register #( VX_generic_register #(
.N( 1 + 1 + `TAG_SELECT_BITS + (`DBANK_LINE_WORDS*32) ), .N( 1 + 1 + `TAG_SELECT_BITS + (`DBANK_LINE_WORDS*32) ),
.PassThru(1) .PassThru(1)
) s0_1_c0 ( ) s0_1_c0 (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall), .stall(stall),
.flush(0), .flush(0),
.in ({qual_read_valid_st1, qual_read_dirty_st1, qual_read_tag_st1, qual_read_data_st1}), .in ({qual_read_valid_st1, qual_read_dirty_st1, qual_read_tag_st1, qual_read_data_st1}),
.out ({read_valid_st1c[0] , read_dirty_st1c[0] , read_tag_st1c[0] , read_data_st1c[0]}) .out ({read_valid_st1c[0] , read_dirty_st1c[0] , read_tag_st1c[0] , read_data_st1c[0]})
); );
genvar curr_stage; genvar curr_stage;
generate generate
for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-1; curr_stage = curr_stage + 1) begin for (curr_stage = 1; curr_stage < STAGE_1_CYCLES-1; curr_stage = curr_stage + 1) begin
VX_generic_register #( VX_generic_register #(
.N( 1 + 1 + `TAG_SELECT_BITS + (`DBANK_LINE_WORDS*32)) .N( 1 + 1 + `TAG_SELECT_BITS + (`DBANK_LINE_WORDS*32))
) s0_1_cc ( ) s0_1_cc (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall), .stall(stall),
.flush(0), .flush(0),
.in ({read_valid_st1c[curr_stage-1] , read_dirty_st1c[curr_stage-1] , read_tag_st1c[curr_stage-1] , read_data_st1c[curr_stage-1]}), .in ({read_valid_st1c[curr_stage-1] , read_dirty_st1c[curr_stage-1] , read_tag_st1c[curr_stage-1] , read_data_st1c[curr_stage-1]}),
.out ({read_valid_st1c[curr_stage] , read_dirty_st1c[curr_stage] , read_tag_st1c[curr_stage] , read_data_st1c[curr_stage] }) .out ({read_valid_st1c[curr_stage] , read_dirty_st1c[curr_stage] , read_tag_st1c[curr_stage] , read_data_st1c[curr_stage] })
); );
end end
endgenerate endgenerate
assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || (FUNC_ID == `SFUNC_ID); // If shared memory, always valid assign use_read_valid_st1e = read_valid_st1c[STAGE_1_CYCLES-1] || (FUNC_ID == `SFUNC_ID); // If shared memory, always valid
assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && (FUNC_ID != `SFUNC_ID); // Dirty only applies in Dcache assign use_read_dirty_st1e = read_dirty_st1c[STAGE_1_CYCLES-1] && (FUNC_ID != `SFUNC_ID); // Dirty only applies in Dcache
assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_SELECT_ADDR_RNG] : read_tag_st1c [STAGE_1_CYCLES-1]; // Tag is always the same in SM assign use_read_tag_st1e = (FUNC_ID == `SFUNC_ID) ? writeaddr_st1e[`TAG_SELECT_ADDR_RNG] : read_tag_st1c [STAGE_1_CYCLES-1]; // Tag is always the same in SM
genvar curr_w; genvar curr_w;
for (curr_w = 0; curr_w < `DBANK_LINE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0]; for (curr_w = 0; curr_w < `DBANK_LINE_WORDS; curr_w = curr_w+1) assign use_read_data_st1e[curr_w][31:0] = read_data_st1c[STAGE_1_CYCLES-1][curr_w][31:0];
// assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-1]; // assign use_read_data_st1e = read_data_st1c [STAGE_1_CYCLES-1];
/////////////////////// LOAD LOGIC /////////////////// /////////////////////// LOAD LOGIC ///////////////////
wire[`OFFSET_SIZE_RNG] byte_select = writeaddr_st1e[`OFFSET_ADDR_RNG]; wire[`OFFSET_SIZE_RNG] byte_select = writeaddr_st1e[`OFFSET_ADDR_RNG];
wire[`WORD_SELECT_BITS-1:0] block_offset = writeaddr_st1e[`WORD_SELECT_ADDR_RNG]; wire[`WORD_SELECT_BITS-1:0] block_offset = writeaddr_st1e[`WORD_SELECT_ADDR_RNG];
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
wire lw = valid_req_st1e && (mem_read_st1e == `LW_MEM_READ); wire lw = valid_req_st1e && (mem_read_st1e == `LW_MEM_READ);
@@ -182,7 +182,7 @@ module VX_tag_data_access #(
wire b0 = (byte_select == 0); wire b0 = (byte_select == 0);
wire b1 = (byte_select == 1); wire b1 = (byte_select == 1);
wire b2 = (byte_select == 2); wire b2 = (byte_select == 2);
wire b3 = (byte_select == 3); wire b3 = (byte_select == 3);
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
`DEBUG_BEGIN `DEBUG_BEGIN
@@ -207,74 +207,74 @@ module VX_tag_data_access #(
wire[`DBANK_LINE_WORDS-1:0][3:0] we; wire[`DBANK_LINE_WORDS-1:0][3:0] we;
wire[`DBANK_LINE_WORDS-1:0][31:0] data_write; wire[`DBANK_LINE_WORDS-1:0][31:0] data_write;
genvar g; genvar g;
generate generate
for (g = 0; g < `DBANK_LINE_WORDS; g = g + 1) begin : write_enables for (g = 0; g < `DBANK_LINE_WORDS; g = g + 1) begin : write_enables
wire normal_write = (block_offset == g[`WORD_SELECT_BITS-1:0]) && should_write && !real_writefill; wire normal_write = (block_offset == g[`WORD_SELECT_BITS-1:0]) && should_write && !real_writefill;
assign we[g] = (force_write) ? 4'b1111 : assign we[g] = (force_write) ? 4'b1111 :
(should_write && !real_writefill && (FUNC_ID == `L2FUNC_ID)) ? 4'b1111 : (should_write && !real_writefill && (FUNC_ID == `L2FUNC_ID)) ? 4'b1111 :
(normal_write && sw) ? 4'b1111 : (normal_write && sw) ? 4'b1111 :
(normal_write && sb) ? sb_mask : (normal_write && sb) ? sb_mask :
(normal_write && sh) ? sh_mask : (normal_write && sh) ? sh_mask :
4'b0000; 4'b0000;
if (FUNC_ID != `L2FUNC_ID) begin if (FUNC_ID != `L2FUNC_ID) begin
wire[31:0] sb_data = b1 ? {{16{1'b0}}, writeword_st1e[7:0], { 8{1'b0}}} : wire[31:0] sb_data = b1 ? {{16{1'b0}}, writeword_st1e[7:0], { 8{1'b0}}} :
b2 ? {{ 8{1'b0}}, writeword_st1e[7:0], {16{1'b0}}} : b2 ? {{ 8{1'b0}}, writeword_st1e[7:0], {16{1'b0}}} :
b3 ? {{ 0{1'b0}}, writeword_st1e[7:0], {24{1'b0}}} : b3 ? {{ 0{1'b0}}, writeword_st1e[7:0], {24{1'b0}}} :
writeword_st1e[31:0]; writeword_st1e[31:0];
wire[31:0] sw_data = writeword_st1e[31:0]; wire[31:0] sw_data = writeword_st1e[31:0];
wire[31:0] sh_data = b2 ? {writeword_st1e[15:0], {16{1'b0}}} : writeword_st1e[31:0]; wire[31:0] sh_data = b2 ? {writeword_st1e[15:0], {16{1'b0}}} : writeword_st1e[31:0];
wire[31:0] use_write_dat = sb ? sb_data : sh ? sh_data : sw_data; wire[31:0] use_write_dat = sb ? sb_data : sh ? sh_data : sw_data;
assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat; assign data_write[g] = force_write ? writedata_st1e[g] : use_write_dat;
end end
end end
if (FUNC_ID == `L2FUNC_ID) begin if (FUNC_ID == `L2FUNC_ID) begin
assign data_write = force_write ? writedata_st1e : writeword_st1e; assign data_write = force_write ? writedata_st1e : writeword_st1e;
end end
endgenerate endgenerate
assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we; assign use_write_enable = (writefill_st1e && !real_writefill) ? 0 : we;
assign use_write_data = data_write; assign use_write_data = data_write;
if (FUNC_ID == `L2FUNC_ID) begin if (FUNC_ID == `L2FUNC_ID) begin
assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1]; assign readword_st1e = read_data_st1c[STAGE_1_CYCLES-1];
end else begin end else begin
wire[31:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-1][block_offset][31:0]; wire[31:0] data_unmod = read_data_st1c[STAGE_1_CYCLES-1][block_offset][31:0];
wire[31:0] data_unQual = (b0 || lw) ? (data_unmod) : wire[31:0] data_unQual = (b0 || lw) ? (data_unmod) :
b1 ? (data_unmod >> 8) : b1 ? (data_unmod >> 8) :
b2 ? (data_unmod >> 16) : b2 ? (data_unmod >> 16) :
(data_unmod >> 24); (data_unmod >> 24);
wire[31:0] lb_data = (data_unQual[7] ) ? (data_unQual | 32'hFFFFFF00) : (data_unQual & 32'hFF); wire[31:0] lb_data = (data_unQual[7] ) ? (data_unQual | 32'hFFFFFF00) : (data_unQual & 32'hFF);
wire[31:0] lh_data = (data_unQual[15]) ? (data_unQual | 32'hFFFF0000) : (data_unQual & 32'hFFFF); wire[31:0] lh_data = (data_unQual[15]) ? (data_unQual | 32'hFFFF0000) : (data_unQual & 32'hFFFF);
wire[31:0] lbu_data = (data_unQual & 32'hFF); wire[31:0] lbu_data = (data_unQual & 32'hFF);
wire[31:0] lhu_data = (data_unQual & 32'hFFFF); wire[31:0] lhu_data = (data_unQual & 32'hFFFF);
wire[31:0] lw_data = (data_unQual); wire[31:0] lw_data = (data_unQual);
wire[31:0] data_Qual = lb ? lb_data : wire[31:0] data_Qual = lb ? lb_data :
lh ? lh_data : lh ? lh_data :
lhu ? lhu_data : lhu ? lhu_data :
lbu ? lbu_data : lbu ? lbu_data :
lw_data; lw_data;
assign readword_st1e = data_Qual; assign readword_st1e = data_Qual;
end end
wire[`TAG_SELECT_ADDR_RNG] writeaddr_tag = writeaddr_st1e[`TAG_SELECT_ADDR_RNG]; wire[`TAG_SELECT_ADDR_RNG] writeaddr_tag = writeaddr_st1e[`TAG_SELECT_ADDR_RNG];
wire tags_mismatch = writeaddr_tag != use_read_tag_st1e; wire tags_mismatch = writeaddr_tag != use_read_tag_st1e;
wire tags_match = writeaddr_tag == use_read_tag_st1e; wire tags_match = writeaddr_tag == use_read_tag_st1e;
wire snoop_hit = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && use_read_dirty_st1e; wire snoop_hit = valid_req_st1e && is_snp_st1e && use_read_valid_st1e && tags_match && use_read_dirty_st1e;
wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e; wire req_invalid = valid_req_st1e && !is_snp_st1e && !use_read_valid_st1e && !writefill_st1e;
wire req_miss = valid_req_st1e && !is_snp_st1e && use_read_valid_st1e && !writefill_st1e && tags_mismatch; wire req_miss = valid_req_st1e && !is_snp_st1e && use_read_valid_st1e && !writefill_st1e && tags_mismatch;
assign miss_st1e = snoop_hit || req_invalid || req_miss; assign miss_st1e = snoop_hit || req_invalid || req_miss;
assign dirty_st1e = valid_req_st1e && use_read_valid_st1e && use_read_dirty_st1e; assign dirty_st1e = valid_req_st1e && use_read_valid_st1e && use_read_dirty_st1e;
assign readdata_st1e = use_read_data_st1e; assign readdata_st1e = use_read_data_st1e;
assign readtag_st1e = use_read_tag_st1e; assign readtag_st1e = use_read_tag_st1e;
assign fill_sent = miss_st1e; assign fill_sent = miss_st1e;
assign fill_saw_dirty_st1e = real_writefill && dirty_st1e; assign fill_saw_dirty_st1e = real_writefill && dirty_st1e;
assign invalidate_line = snoop_hit; assign invalidate_line = snoop_hit;
endmodule endmodule

View File

@@ -42,23 +42,23 @@ module VX_tag_data_structure #(
// Dram knobs // Dram knobs
parameter SIMULATED_DRAM_LATENCY_CYCLES = 10 parameter SIMULATED_DRAM_LATENCY_CYCLES = 10
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire stall_bank_pipe, input wire stall_bank_pipe,
input wire[`LINE_SELECT_BITS-1:0] read_addr, input wire[`LINE_SELECT_BITS-1:0] read_addr,
output wire read_valid, output wire read_valid,
output wire read_dirty, output wire read_dirty,
output wire[`TAG_SELECT_BITS-1:0] read_tag, output wire[`TAG_SELECT_BITS-1:0] read_tag,
output wire[`DBANK_LINE_WORDS-1:0][31:0] read_data, output wire[`DBANK_LINE_WORDS-1:0][31:0] read_data,
input wire invalidate, input wire invalidate,
input wire[`DBANK_LINE_WORDS-1:0][3:0] write_enable, input wire[`DBANK_LINE_WORDS-1:0][3:0] write_enable,
input wire write_fill, input wire write_fill,
input wire[`LINE_SELECT_BITS-1:0] write_addr, input wire[`LINE_SELECT_BITS-1:0] write_addr,
input wire[`TAG_SELECT_BITS-1:0] tag_index, input wire[`TAG_SELECT_BITS-1:0] tag_index,
input wire[`DBANK_LINE_WORDS-1:0][31:0] write_data, input wire[`DBANK_LINE_WORDS-1:0][31:0] write_data,
input wire fill_sent input wire fill_sent
); );
reg [`DBANK_LINE_WORDS-1:0][3:0][7:0] data [`BANK_LINE_COUNT-1:0]; reg [`DBANK_LINE_WORDS-1:0][3:0][7:0] data [`BANK_LINE_COUNT-1:0];
@@ -84,15 +84,15 @@ module VX_tag_data_structure #(
// data [l] <= 0; // data [l] <= 0;
end end
end else if (!stall_bank_pipe) begin end else if (!stall_bank_pipe) begin
if (going_to_write) begin if (going_to_write) begin
valid[write_addr] <= 1; valid[write_addr] <= 1;
tag [write_addr] <= tag_index; tag [write_addr] <= tag_index;
if (write_fill) begin if (write_fill) begin
dirty[write_addr] <= 0; dirty[write_addr] <= 0;
end else begin end else begin
dirty[write_addr] <= 1; dirty[write_addr] <= 1;
end end
end else if (fill_sent) begin end else if (fill_sent) begin
dirty[write_addr] <= 0; dirty[write_addr] <= 0;
// valid[write_addr] <= 0; // valid[write_addr] <= 0;
end end
@@ -101,12 +101,12 @@ module VX_tag_data_structure #(
valid[write_addr] <= 0; valid[write_addr] <= 0;
end end
for (f = 0; f < `DBANK_LINE_WORDS; f = f + 1) begin for (f = 0; f < `DBANK_LINE_WORDS; f = f + 1) begin
if (write_enable[f][0]) data[write_addr][f][0] <= write_data[f][7 :0 ]; if (write_enable[f][0]) data[write_addr][f][0] <= write_data[f][7 :0 ];
if (write_enable[f][1]) data[write_addr][f][1] <= write_data[f][15:8 ]; if (write_enable[f][1]) data[write_addr][f][1] <= write_data[f][15:8 ];
if (write_enable[f][2]) data[write_addr][f][2] <= write_data[f][23:16]; if (write_enable[f][2]) data[write_addr][f][2] <= write_data[f][23:16];
if (write_enable[f][3]) data[write_addr][f][3] <= write_data[f][31:24]; if (write_enable[f][3]) data[write_addr][f][3] <= write_data[f][31:24];
end end
end end
end end

View File

@@ -5,10 +5,10 @@
interface VX_branch_rsp_if (); interface VX_branch_rsp_if ();
wire valid_branch; wire valid_branch;
wire branch_dir; wire branch_dir;
wire [31:0] branch_dest; wire [31:0] branch_dest;
wire [`NW_BITS-1:0] branch_warp_num; wire [`NW_BITS-1:0] branch_warp_num;
endinterface endinterface

View File

@@ -5,15 +5,15 @@
interface VX_csr_req_if (); interface VX_csr_req_if ();
wire [`NUM_THREADS-1:0] valid; wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
wire [4:0] rd; wire [4:0] rd;
wire [1:0] wb; wire [1:0] wb;
wire [4:0] alu_op; wire [4:0] alu_op;
wire is_csr; wire is_csr;
wire [11:0] csr_address; wire [11:0] csr_address;
wire csr_immed; wire csr_immed;
wire [31:0] csr_mask; wire [31:0] csr_mask;
endinterface endinterface

View File

@@ -5,12 +5,12 @@
interface VX_csr_wb_if (); interface VX_csr_wb_if ();
wire [`NUM_THREADS-1:0] valid; wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
wire [4:0] rd; wire [4:0] rd;
wire [1:0] wb; wire [1:0] wb;
wire [`NUM_THREADS-1:0][31:0] csr_result; wire [`NUM_THREADS-1:0][31:0] csr_result;
endinterface endinterface

View File

@@ -5,44 +5,44 @@
interface VX_exec_unit_req_if (); interface VX_exec_unit_req_if ();
// Meta // Meta
wire [`NUM_THREADS-1:0] valid; wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
wire [31:0] curr_PC; wire [31:0] curr_PC;
wire [31:0] PC_next; wire [31:0] PC_next;
// Write Back Info // Write Back Info
wire [4:0] rd; wire [4:0] rd;
wire [1:0] wb; wire [1:0] wb;
// Data and alu op // Data and alu op
wire [`NUM_THREADS-1:0][31:0] a_reg_data; wire [`NUM_THREADS-1:0][31:0] a_reg_data;
wire [`NUM_THREADS-1:0][31:0] b_reg_data; wire [`NUM_THREADS-1:0][31:0] b_reg_data;
wire [4:0] alu_op; wire [4:0] alu_op;
wire [4:0] rs1; wire [4:0] rs1;
wire [4:0] rs2; wire [4:0] rs2;
wire rs2_src; wire rs2_src;
wire [31:0] itype_immed; wire [31:0] itype_immed;
wire [19:0] upper_immed; wire [19:0] upper_immed;
// Branch type // Branch type
wire [2:0] branch_type; wire [2:0] branch_type;
// Jal info // Jal info
wire jalQual; wire jalQual;
wire jal; wire jal;
wire [31:0] jal_offset; wire [31:0] jal_offset;
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
wire ebreak; wire ebreak;
wire wspawn; wire wspawn;
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
// CSR info // CSR info
wire is_csr; wire is_csr;
wire [11:0] csr_address; wire [11:0] csr_address;
wire csr_immed; wire csr_immed;
wire [31:0] csr_mask; wire [31:0] csr_mask;
endinterface endinterface

View File

@@ -5,37 +5,37 @@
interface VX_frE_to_bckE_req_if (); interface VX_frE_to_bckE_req_if ();
wire [11:0] csr_address; wire [11:0] csr_address;
wire is_csr; wire is_csr;
wire csr_immed; wire csr_immed;
wire [31:0] csr_mask; wire [31:0] csr_mask;
wire [4:0] rd; wire [4:0] rd;
wire [4:0] rs1; wire [4:0] rs1;
wire [4:0] rs2; wire [4:0] rs2;
wire [4:0] alu_op; wire [4:0] alu_op;
wire [1:0] wb; wire [1:0] wb;
wire rs2_src; wire rs2_src;
wire [31:0] itype_immed; wire [31:0] itype_immed;
wire [2:0] mem_read; wire [2:0] mem_read;
wire [2:0] mem_write; wire [2:0] mem_write;
wire [2:0] branch_type; wire [2:0] branch_type;
wire [19:0] upper_immed; wire [19:0] upper_immed;
wire [31:0] curr_PC; wire [31:0] curr_PC;
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
wire ebreak; wire ebreak;
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
wire jalQual; wire jalQual;
wire jal; wire jal;
wire [31:0] jal_offset; wire [31:0] jal_offset;
wire [31:0] PC_next; wire [31:0] PC_next;
wire [`NUM_THREADS-1:0] valid; wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
// GPGPU stuff // GPGPU stuff
wire is_wspawn; wire is_wspawn;
wire is_tmc; wire is_tmc;
wire is_split; wire is_split;
wire is_barrier; wire is_barrier;
endinterface endinterface

View File

@@ -6,8 +6,8 @@
interface VX_gpr_data_if (); interface VX_gpr_data_if ();
wire [`NUM_THREADS-1:0][31:0] a_reg_data; wire [`NUM_THREADS-1:0][31:0] a_reg_data;
wire [`NUM_THREADS-1:0][31:0] b_reg_data; wire [`NUM_THREADS-1:0][31:0] b_reg_data;
endinterface endinterface

View File

@@ -5,9 +5,9 @@
interface VX_gpr_jal_if (); interface VX_gpr_jal_if ();
wire is_jal; wire is_jal;
wire[31:0] curr_PC; wire[31:0] curr_PC;
endinterface endinterface
`endif `endif

View File

@@ -5,9 +5,9 @@
interface VX_gpr_read_if (); interface VX_gpr_read_if ();
wire [4:0] rs1; wire [4:0] rs1;
wire [4:0] rs2; wire [4:0] rs2;
wire [`NW_BITS-1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
endinterface endinterface

View File

@@ -7,7 +7,7 @@ interface VX_gpu_dcache_dram_req_if #(
parameter BANK_LINE_WORDS = 2 parameter BANK_LINE_WORDS = 2
) (); ) ();
// DRAM Request // DRAM Request
wire dram_req_write; wire dram_req_write;
wire dram_req_read; wire dram_req_read;
wire [31:0] dram_req_addr; wire [31:0] dram_req_addr;

View File

@@ -4,9 +4,9 @@
`include "../cache/VX_cache_config.vh" `include "../cache/VX_cache_config.vh"
interface VX_gpu_dcache_dram_rsp_if #( interface VX_gpu_dcache_dram_rsp_if #(
parameter BANK_LINE_WORDS = 2 parameter BANK_LINE_WORDS = 2
) (); ) ();
// DRAM Response // DRAM Response
wire dram_rsp_valid; wire dram_rsp_valid;
wire [31:0] dram_rsp_addr; wire [31:0] dram_rsp_addr;
wire [BANK_LINE_WORDS-1:0][31:0] dram_rsp_data; wire [BANK_LINE_WORDS-1:0][31:0] dram_rsp_data;

View File

@@ -4,22 +4,22 @@
`include "../cache/VX_cache_config.vh" `include "../cache/VX_cache_config.vh"
interface VX_gpu_dcache_req_if #( interface VX_gpu_dcache_req_if #(
parameter NUM_REQUESTS = 32 parameter NUM_REQUESTS = 32
) (); ) ();
// Core request // Core request
wire [NUM_REQUESTS-1:0] core_req_valid; wire [NUM_REQUESTS-1:0] core_req_valid;
wire [NUM_REQUESTS-1:0][2:0] core_req_read; wire [NUM_REQUESTS-1:0][2:0] core_req_read;
wire [NUM_REQUESTS-1:0][2:0] core_req_write; wire [NUM_REQUESTS-1:0][2:0] core_req_write;
wire [NUM_REQUESTS-1:0][31:0] core_req_addr; wire [NUM_REQUESTS-1:0][31:0] core_req_addr;
wire [NUM_REQUESTS-1:0][31:0] core_req_data; wire [NUM_REQUESTS-1:0][31:0] core_req_data;
wire core_req_ready; wire core_req_ready;
// Core request Meta data // Core request Meta data
wire [4:0] core_req_rd; wire [4:0] core_req_rd;
wire [NUM_REQUESTS-1:0][1:0] core_req_wb; wire [NUM_REQUESTS-1:0][1:0] core_req_wb;
wire [`NW_BITS-1:0] core_req_warp_num; wire [`NW_BITS-1:0] core_req_warp_num;
wire [31:0] core_req_pc; wire [31:0] core_req_pc;
endinterface endinterface

View File

@@ -7,7 +7,7 @@ interface VX_gpu_dcache_rsp_if #(
parameter NUM_REQUESTS = 32 parameter NUM_REQUESTS = 32
) (); ) ();
// Core response // Core response
wire [NUM_REQUESTS-1:0] core_rsp_valid; wire [NUM_REQUESTS-1:0] core_rsp_valid;
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
wire [4:0] core_rsp_read; wire [4:0] core_rsp_read;

View File

@@ -4,7 +4,7 @@
`include "../cache/VX_cache_config.vh" `include "../cache/VX_cache_config.vh"
interface VX_gpu_dcache_snp_req_if (); interface VX_gpu_dcache_snp_req_if ();
// Snoop Req // Snoop Req
wire snp_req_valid; wire snp_req_valid;
wire [31:0] snp_req_addr; wire [31:0] snp_req_addr;

View File

@@ -5,18 +5,18 @@
interface VX_gpu_inst_req_if(); interface VX_gpu_inst_req_if();
wire [`NUM_THREADS-1:0] valid; wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
wire is_wspawn; wire is_wspawn;
wire is_tmc; wire is_tmc;
wire is_split; wire is_split;
wire is_barrier; wire is_barrier;
wire[31:0] pc_next; wire[31:0] pc_next;
wire [`NUM_THREADS-1:0][31:0] a_reg_data; wire [`NUM_THREADS-1:0][31:0] a_reg_data;
wire [31:0] rd2; wire [31:0] rd2;
endinterface endinterface

View File

@@ -5,13 +5,13 @@
interface VX_gpu_snp_req_rsp_if (); interface VX_gpu_snp_req_rsp_if ();
// Snoop request // Snoop request
wire snp_req_valid; wire snp_req_valid;
wire [31:0] snp_req_addr; wire [31:0] snp_req_addr;
wire snp_req_ready; wire snp_req_ready;
// Snoop Response // Snoop Response
// TODO: // TODO:
endinterface endinterface

View File

@@ -5,10 +5,10 @@
interface VX_icache_rsp_if (); interface VX_icache_rsp_if ();
// wire ready; // wire ready;
// wire stall; // wire stall;
wire [31:0] instruction; wire [31:0] instruction;
wire delay; wire delay;
endinterface endinterface

View File

@@ -6,12 +6,12 @@
interface VX_inst_exec_wb_if (); interface VX_inst_exec_wb_if ();
wire [`NUM_THREADS-1:0][31:0] alu_result; wire [`NUM_THREADS-1:0][31:0] alu_result;
wire [31:0] exec_wb_pc; wire [31:0] exec_wb_pc;
wire [4:0] rd; wire [4:0] rd;
wire [1:0] wb; wire [1:0] wb;
wire [`NUM_THREADS-1:0] wb_valid; wire [`NUM_THREADS-1:0] wb_valid;
wire [`NW_BITS-1:0] wb_warp_num; wire [`NW_BITS-1:0] wb_warp_num;
endinterface endinterface

View File

@@ -6,12 +6,12 @@
interface VX_inst_mem_wb_if (); interface VX_inst_mem_wb_if ();
wire [`NUM_THREADS-1:0][31:0] loaded_data; wire [`NUM_THREADS-1:0][31:0] loaded_data;
wire [31:0] mem_wb_pc; wire [31:0] mem_wb_pc;
wire [4:0] rd; wire [4:0] rd;
wire [1:0] wb; wire [1:0] wb;
wire [`NUM_THREADS-1:0] wb_valid; wire [`NUM_THREADS-1:0] wb_valid;
wire [`NW_BITS-1:0] wb_warp_num; wire [`NW_BITS-1:0] wb_warp_num;
endinterface endinterface

View File

@@ -5,10 +5,10 @@
interface VX_inst_meta_if (); interface VX_inst_meta_if ();
wire [31:0] instruction; wire [31:0] instruction;
wire [31:0] inst_pc; wire [31:0] inst_pc;
wire [`NW_BITS-1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
wire [`NUM_THREADS-1:0] valid; wire [`NUM_THREADS-1:0] valid;
endinterface endinterface

View File

@@ -6,10 +6,10 @@
interface VX_jal_rsp_if (); interface VX_jal_rsp_if ();
wire jal; wire jal;
wire [31:0] jal_dest; wire [31:0] jal_dest;
wire [`NW_BITS-1:0] jal_warp_num; wire [`NW_BITS-1:0] jal_warp_num;
endinterface endinterface
`endif `endif

View File

@@ -6,8 +6,8 @@
interface VX_join_if (); interface VX_join_if ();
wire is_join; wire is_join;
wire [`NW_BITS-1:0] join_warp_num; wire [`NW_BITS-1:0] join_warp_num;
endinterface endinterface

View File

@@ -6,16 +6,16 @@
interface VX_lsu_req_if (); interface VX_lsu_req_if ();
wire [`NUM_THREADS-1:0] valid; wire [`NUM_THREADS-1:0] valid;
wire [31:0] lsu_pc; wire [31:0] lsu_pc;
wire [`NW_BITS-1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
wire [`NUM_THREADS-1:0][31:0] store_data; wire [`NUM_THREADS-1:0][31:0] store_data;
wire [`NUM_THREADS-1:0][31:0] base_address; // A reg data wire [`NUM_THREADS-1:0][31:0] base_address; // A reg data
wire [31:0] offset; // itype_immed wire [31:0] offset; // itype_immed
wire [2:0] mem_read; wire [2:0] mem_read;
wire [2:0] mem_write; wire [2:0] mem_write;
wire [4:0] rd; wire [4:0] rd;
wire [1:0] wb; wire [1:0] wb;
endinterface endinterface

View File

@@ -6,13 +6,13 @@
interface VX_mw_wb_if (); interface VX_mw_wb_if ();
wire [`NUM_THREADS-1:0][31:0] alu_result; wire [`NUM_THREADS-1:0][31:0] alu_result;
wire [`NUM_THREADS-1:0][31:0] mem_result; wire [`NUM_THREADS-1:0][31:0] mem_result;
wire [4:0] rd; wire [4:0] rd;
wire [1:0] wb; wire [1:0] wb;
wire [31:0] PC_next; wire [31:0] PC_next;
wire [`NUM_THREADS-1:0] valid; wire [`NUM_THREADS-1:0] valid;
wire [`NW_BITS-1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
endinterface endinterface

View File

@@ -6,29 +6,29 @@
interface VX_warp_ctl_if (); interface VX_warp_ctl_if ();
wire [`NW_BITS-1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
wire change_mask; wire change_mask;
wire [`NUM_THREADS-1:0] thread_mask; wire [`NUM_THREADS-1:0] thread_mask;
wire wspawn; wire wspawn;
wire [31:0] wspawn_pc; wire [31:0] wspawn_pc;
wire [`NUM_WARPS-1:0] wspawn_new_active; wire [`NUM_WARPS-1:0] wspawn_new_active;
wire ebreak; wire ebreak;
// barrier // barrier
wire is_barrier; wire is_barrier;
wire [31:0] barrier_id; wire [31:0] barrier_id;
wire [$clog2(`NUM_WARPS):0] num_warps; wire [$clog2(`NUM_WARPS):0] num_warps;
wire is_split; wire is_split;
wire dont_split; wire dont_split;
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
wire [`NW_BITS-1:0] split_warp_num; wire [`NW_BITS-1:0] split_warp_num;
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
wire [`NUM_THREADS-1:0] split_new_mask; wire [`NUM_THREADS-1:0] split_new_mask;
wire [`NUM_THREADS-1:0] split_later_mask; wire [`NUM_THREADS-1:0] split_later_mask;
wire [31:0] split_save_pc; wire [31:0] split_save_pc;
endinterface endinterface

View File

@@ -5,12 +5,12 @@
interface VX_wb_if (); interface VX_wb_if ();
wire [`NUM_THREADS-1:0][31:0] write_data; wire [`NUM_THREADS-1:0][31:0] write_data;
wire [31:0] wb_pc; wire [31:0] wb_pc;
wire [4:0] rd; wire [4:0] rd;
wire [1:0] wb; wire [1:0] wb;
wire [`NUM_THREADS-1:0] wb_valid; wire [`NUM_THREADS-1:0] wb_valid;
wire [`NW_BITS-1:0] wb_warp_num; wire [`NW_BITS-1:0] wb_warp_num;
endinterface endinterface

View File

@@ -5,8 +5,8 @@
interface VX_wstall_if(); interface VX_wstall_if();
wire wstall; wire wstall;
wire [`NW_BITS-1:0] warp_num; wire [`NW_BITS-1:0] warp_num;
endinterface endinterface

View File

@@ -1,18 +1,18 @@
module VX_countones #( module VX_countones #(
parameter N = 10 parameter N = 10
) ( ) (
input wire[N-1:0] valids, input wire[N-1:0] valids,
output reg[$clog2(N):0] count output reg[$clog2(N):0] count
); );
integer i; integer i;
always @(*) begin always @(*) begin
count = 0; count = 0;
for (i = N-1; i >= 0; i = i - 1) begin for (i = N-1; i >= 0; i = i - 1) begin
if (valids[i]) begin if (valids[i]) begin
count = count + 1; count = count + 1;
end end
end end
end end
endmodule endmodule

View File

@@ -1,26 +1,26 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_generic_priority_encoder #( module VX_generic_priority_encoder #(
parameter N = 1 parameter N = 1
) ( ) (
input wire[N-1:0] valids, input wire[N-1:0] valids,
//output reg[$clog2(N)-1:0] index, //output reg[$clog2(N)-1:0] index,
output reg[(`LOG2UP(N))-1:0] index, output reg[(`LOG2UP(N))-1:0] index,
//output reg[`LOG2UP(N):0] index, // eh //output reg[`LOG2UP(N):0] index, // eh
output reg found output reg found
); );
integer i; integer i;
always @(*) begin always @(*) begin
index = 0; index = 0;
found = 0; found = 0;
for (i = N-1; i >= 0; i = i - 1) begin for (i = N-1; i >= 0; i = i - 1) begin
if (valids[i]) begin if (valids[i]) begin
//index = i[$clog2(N)-1:0]; //index = i[$clog2(N)-1:0];
index = i[(`LOG2UP(N))-1:0]; index = i[(`LOG2UP(N))-1:0];
found = 1; found = 1;
end end
end end
end end
endmodule endmodule

View File

@@ -5,15 +5,15 @@ module VX_generic_queue #(
parameter SIZE = 16 parameter SIZE = 16
) ( ) (
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire push, input wire push,
input wire pop, input wire pop,
output wire empty, output wire empty,
output wire full, output wire full,
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
input wire [DATAW-1:0] data_in, input wire [DATAW-1:0] data_in,
output wire [DATAW-1:0] data_out output wire [DATAW-1:0] data_out
); );
if (SIZE == 0) begin if (SIZE == 0) begin

View File

@@ -1,36 +1,36 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_generic_register #( module VX_generic_register #(
parameter N, parameter N,
parameter PassThru = 0 parameter PassThru = 0
) ( ) (
`IGNORE_WARNINGS_BEGIN `IGNORE_WARNINGS_BEGIN
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire stall, input wire stall,
input wire flush, input wire flush,
`IGNORE_WARNINGS_END `IGNORE_WARNINGS_END
input wire[N-1:0] in, input wire[N-1:0] in,
output wire[N-1:0] out output wire[N-1:0] out
); );
if (PassThru) begin if (PassThru) begin
assign out = in; assign out = in;
end else begin end else begin
reg [(N-1):0] value; reg [(N-1):0] value;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
value <= 0; value <= 0;
end else if (flush) begin end else if (flush) begin
value <= 0; value <= 0;
end else if (~stall) begin end else if (~stall) begin
value <= in; value <= in;
end end
end end
assign out = value; assign out = value;
end end
endmodule endmodule

View File

@@ -1,34 +1,34 @@
module VX_generic_stack #( module VX_generic_stack #(
parameter WIDTH = 40, parameter WIDTH = 40,
parameter DEPTH = 2 parameter DEPTH = 2
) ( ) (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire push, input wire push,
input wire pop, input wire pop,
input reg [WIDTH - 1:0] q1, input reg [WIDTH - 1:0] q1,
input reg [WIDTH - 1:0] q2, input reg [WIDTH - 1:0] q2,
output wire[WIDTH - 1:0] d output wire[WIDTH - 1:0] d
); );
reg [DEPTH - 1:0] ptr; reg [DEPTH - 1:0] ptr;
reg [WIDTH - 1:0] stack [0:(1 << DEPTH) - 1]; reg [WIDTH - 1:0] stack [0:(1 << DEPTH) - 1];
integer i; integer i;
always @(posedge clk) begin always @(posedge clk) begin
if (reset) begin if (reset) begin
ptr <= 0; ptr <= 0;
for (i = 0; i < (1 << DEPTH); i=i+1) stack[i] <= 0; for (i = 0; i < (1 << DEPTH); i=i+1) stack[i] <= 0;
end else if (push) begin end else if (push) begin
stack[ptr] <= q1; stack[ptr] <= q1;
stack[ptr+1] <= q2; stack[ptr+1] <= q2;
ptr <= ptr + 2; ptr <= ptr + 2;
end else if (pop) begin end else if (pop) begin
ptr <= ptr - 1; ptr <= ptr - 1;
end end
end end
assign d = stack[ptr - 1]; assign d = stack[ptr - 1];
endmodule endmodule

View File

@@ -9,10 +9,12 @@ module VX_mult #(
parameter PIPELINE=0, parameter PIPELINE=0,
parameter FORCE_LE="NO" parameter FORCE_LE="NO"
) ( ) (
input clock, aclr, clken, input clock;
input aclr;
input clken;
input [WIDTHA-1:0] dataa, input [WIDTHA-1:0] dataa,
input [WIDTHB-1:0] datab, input [WIDTHB-1:0] datab,
output reg [WIDTHP-1:0] result output reg [WIDTHP-1:0] result
); );

View File

@@ -1,28 +1,28 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_priority_encoder #( module VX_priority_encoder #(
parameter N parameter N
) ( ) (
input wire [N-1:0] valids, input wire [N-1:0] valids,
output wire [`LOG2UP(N)-1:0] index, output wire [`LOG2UP(N)-1:0] index,
output wire found output wire found
); );
reg [`LOG2UP(N)-1:0] index_r; reg [`LOG2UP(N)-1:0] index_r;
reg found_r; reg found_r;
integer i; integer i;
always @(*) begin always @(*) begin
index_r = 0; index_r = 0;
found_r = 0; found_r = 0;
for (i = `NUM_WARPS-1; i >= 0; i = i - 1) begin for (i = `NUM_WARPS-1; i >= 0; i = i - 1) begin
if (valids[i]) begin if (valids[i]) begin
index_r = i[`NW_BITS-1:0]; index_r = i[`NW_BITS-1:0];
found_r = 1; found_r = 1;
end end
end end
end end
assign index = index_r; assign index = index_r;
assign found = found_r; assign found = found_r;
endmodule endmodule

View File

@@ -1,32 +1,32 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_priority_encoder_w_mask #( module VX_priority_encoder_w_mask #(
parameter N = 10 parameter N = 10
) ( ) (
input wire[N-1:0] valids, input wire[N-1:0] valids,
output reg [N-1:0] mask, output reg [N-1:0] mask,
//output reg[$clog2(N)-1:0] index, //output reg[$clog2(N)-1:0] index,
output reg[(`LOG2UP(N))-1:0] index, output reg[(`LOG2UP(N))-1:0] index,
//output reg[`LOG2UP(N):0] index, // eh //output reg[`LOG2UP(N):0] index, // eh
output reg found output reg found
); );
integer i; integer i;
always @(valids) begin always @(valids) begin
index = 0; index = 0;
found = 0; found = 0;
// mask = 0; // mask = 0;
for (i = 0; i < N; i=i+1) begin for (i = 0; i < N; i=i+1) begin
if (valids[i]) begin if (valids[i]) begin
//index = i[$clog2(N)-1:0]; //index = i[$clog2(N)-1:0];
index = i[(`LOG2UP(N))-1:0]; index = i[(`LOG2UP(N))-1:0];
found = 1; found = 1;
// mask[index] = (1 << i); // mask[index] = (1 << i);
// $display("%h",(1 << i)); // $display("%h",(1 << i));
end end
end end
end end
assign mask = found ? (1 << index) : 0; assign mask = found ? (1 << index) : 0;
endmodule endmodule

View File

@@ -1,27 +1,27 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_d_e_reg ( module VX_d_e_reg (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire branch_stall, input wire branch_stall,
input wire freeze, input wire freeze,
VX_frE_to_bckE_req_if frE_to_bckE_req_if, VX_frE_to_bckE_req_if frE_to_bckE_req_if,
VX_frE_to_bckE_req_if bckE_req_if VX_frE_to_bckE_req_if bckE_req_if
); );
wire stall = freeze; wire stall = freeze;
wire flush = (branch_stall == `STALL); wire flush = (branch_stall == `STALL);
VX_generic_register #( VX_generic_register #(
.N(233 + `NW_BITS-1 + 1 + `NUM_THREADS) .N(233 + `NW_BITS-1 + 1 + `NUM_THREADS)
) d_e_reg ( ) d_e_reg (
.clk (clk), .clk (clk),
.reset (reset), .reset (reset),
.stall (stall), .stall (stall),
.flush (flush), .flush (flush),
.in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.ebreak, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}), .in ({frE_to_bckE_req_if.csr_address, frE_to_bckE_req_if.jalQual, frE_to_bckE_req_if.ebreak, frE_to_bckE_req_if.is_csr, frE_to_bckE_req_if.csr_immed, frE_to_bckE_req_if.csr_mask, frE_to_bckE_req_if.rd, frE_to_bckE_req_if.rs1, frE_to_bckE_req_if.rs2, frE_to_bckE_req_if.alu_op, frE_to_bckE_req_if.wb, frE_to_bckE_req_if.rs2_src, frE_to_bckE_req_if.itype_immed, frE_to_bckE_req_if.mem_read, frE_to_bckE_req_if.mem_write, frE_to_bckE_req_if.branch_type, frE_to_bckE_req_if.upper_immed, frE_to_bckE_req_if.curr_PC, frE_to_bckE_req_if.jal, frE_to_bckE_req_if.jal_offset, frE_to_bckE_req_if.PC_next, frE_to_bckE_req_if.valid, frE_to_bckE_req_if.warp_num, frE_to_bckE_req_if.is_wspawn, frE_to_bckE_req_if.is_tmc, frE_to_bckE_req_if.is_split, frE_to_bckE_req_if.is_barrier}),
.out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.ebreak ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier }) .out ({bckE_req_if.csr_address , bckE_req_if.jalQual , bckE_req_if.ebreak ,bckE_req_if.is_csr , bckE_req_if.csr_immed , bckE_req_if.csr_mask , bckE_req_if.rd , bckE_req_if.rs1 , bckE_req_if.rs2 , bckE_req_if.alu_op , bckE_req_if.wb , bckE_req_if.rs2_src , bckE_req_if.itype_immed , bckE_req_if.mem_read , bckE_req_if.mem_write , bckE_req_if.branch_type , bckE_req_if.upper_immed , bckE_req_if.curr_PC , bckE_req_if.jal , bckE_req_if.jal_offset , bckE_req_if.PC_next , bckE_req_if.valid , bckE_req_if.warp_num , bckE_req_if.is_wspawn , bckE_req_if.is_tmc , bckE_req_if.is_split , bckE_req_if.is_barrier })
); );
endmodule endmodule

View File

@@ -1,27 +1,27 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_f_d_reg ( module VX_f_d_reg (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire freeze, input wire freeze,
VX_inst_meta_if fe_inst_meta_fd, VX_inst_meta_if fe_inst_meta_fd,
VX_inst_meta_if fd_inst_meta_de VX_inst_meta_if fd_inst_meta_de
); );
wire flush = 1'b0; wire flush = 1'b0;
wire stall = freeze == 1'b1; wire stall = freeze == 1'b1;
VX_generic_register #( VX_generic_register #(
.N(64+`NW_BITS-1+1+`NUM_THREADS) .N(64+`NW_BITS-1+1+`NUM_THREADS)
) f_d_reg ( ) f_d_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall), .stall(stall),
.flush(flush), .flush(flush),
.in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}), .in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}),
.out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid}) .out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid})
); );
endmodule endmodule

View File

@@ -1,28 +1,28 @@
`include "VX_define.vh" `include "VX_define.vh"
module VX_i_d_reg ( module VX_i_d_reg (
input wire clk, input wire clk,
input wire reset, input wire reset,
input wire freeze, input wire freeze,
VX_inst_meta_if fe_inst_meta_fd, VX_inst_meta_if fe_inst_meta_fd,
VX_inst_meta_if fd_inst_meta_de VX_inst_meta_if fd_inst_meta_de
); );
wire flush = 1'b0; wire flush = 1'b0;
wire stall = freeze == 1'b1; wire stall = freeze == 1'b1;
VX_generic_register #( VX_generic_register #(
.N(64 + `NW_BITS-1 + 1 + `NUM_THREADS) .N(64 + `NW_BITS-1 + 1 + `NUM_THREADS)
) i_d_reg ( ) i_d_reg (
.clk (clk), .clk (clk),
.reset(reset), .reset(reset),
.stall(stall), .stall(stall),
.flush(flush), .flush(flush),
.in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}), .in ({fe_inst_meta_fd.instruction, fe_inst_meta_fd.inst_pc, fe_inst_meta_fd.warp_num, fe_inst_meta_fd.valid}),
.out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid}) .out ({fd_inst_meta_de.instruction, fd_inst_meta_de.inst_pc, fd_inst_meta_de.warp_num, fd_inst_meta_de.valid})
); );
endmodule endmodule

View File

@@ -1,5 +1,5 @@
# load design # load design
read_verilog -sv -I../../rtl -I../../rtl/libs -I../../rtl/interfaces -I../../rtl/cache -I../../rtl/shared_memory -I../../rtl/pipe_regs ../../rtl/Vortex.v read_verilog -sv -I../../rtl -I../../rtl/libs -I../../rtl/interfaces -I../../rtl/pipe_regs -I../../rtl/cache ../../rtl/Vortex.v
# dump diagram # dump diagram
show show