pipeline refactoring - fmax >= 222 mhz
This commit is contained in:
@@ -18,7 +18,7 @@ VX_SRCS = kernel.c
|
|||||||
|
|
||||||
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
CXXFLAGS += -std=c++11 -O0 -g -Wall -Wextra -pedantic -Wfatal-errors
|
||||||
|
|
||||||
CXXFLAGS += -I../../include
|
CXXFLAGS += -I../../include -I../../../hw
|
||||||
|
|
||||||
PROJECT = dogfood
|
PROJECT = dogfood
|
||||||
|
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <vortex.h>
|
#include <vortex.h>
|
||||||
|
#include <VX_config.h>
|
||||||
#include "testcases.h"
|
#include "testcases.h"
|
||||||
#include "common.h"
|
#include "common.h"
|
||||||
|
|
||||||
@@ -25,6 +26,7 @@ public:
|
|||||||
this->add_test("imul", new Test_IMUL());
|
this->add_test("imul", new Test_IMUL());
|
||||||
this->add_test("idiv", new Test_IDIV());
|
this->add_test("idiv", new Test_IDIV());
|
||||||
this->add_test("idiv-mul", new Test_IDIV_MUL());
|
this->add_test("idiv-mul", new Test_IDIV_MUL());
|
||||||
|
#ifdef EXT_F_ENABLE
|
||||||
this->add_test("fadd", new Test_FADD());
|
this->add_test("fadd", new Test_FADD());
|
||||||
this->add_test("fsub", new Test_FSUB());
|
this->add_test("fsub", new Test_FSUB());
|
||||||
this->add_test("fmul", new Test_FMUL());
|
this->add_test("fmul", new Test_FMUL());
|
||||||
@@ -40,6 +42,7 @@ public:
|
|||||||
this->add_test("ftou", new Test_FTOU());
|
this->add_test("ftou", new Test_FTOU());
|
||||||
this->add_test("tof", new Test_ITOF());
|
this->add_test("tof", new Test_ITOF());
|
||||||
this->add_test("utof", new Test_UTOF());
|
this->add_test("utof", new Test_UTOF());
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
~TestMngr() {
|
~TestMngr() {
|
||||||
|
|||||||
@@ -15,8 +15,8 @@ union Float_t {
|
|||||||
};
|
};
|
||||||
|
|
||||||
inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) {
|
inline bool almost_equal_eps(float a, float b, float eps = std::numeric_limits<float>::epsilon()) {
|
||||||
auto tolerance = std::max(std::fabs(a), std::fabs(b)) * eps;
|
auto tolerance = std::max(fabs(a), fabs(b)) * eps;
|
||||||
return std::fabs(a - b) <= tolerance;
|
return fabs(a - b) <= tolerance;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) {
|
inline bool almost_equal_ulp(float a, float b, int32_t ulp = 4) {
|
||||||
|
|||||||
@@ -62,6 +62,7 @@ make ase
|
|||||||
# tests
|
# tests
|
||||||
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256
|
./run_ase.sh build_ase_1c ../../driver/tests/basic/basic -n 256
|
||||||
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
|
./run_ase.sh build_ase_1c ../../driver/tests/demo/demo -n 16
|
||||||
|
./run_ase.sh build_ase_1c ../../driver/tests/dogfood/dogfood -n 16
|
||||||
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
./run_ase.sh build_ase_1c ../../benchmarks/opencl/vecadd/vecadd
|
||||||
|
|
||||||
# modify "vsim_run.tcl" to dump VCD trace
|
# modify "vsim_run.tcl" to dump VCD trace
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
|
|
||||||
# Analysis & Synthesis Assignments
|
# Analysis & Synthesis Assignments
|
||||||
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
|
set_global_assignment -name VERILOG_INPUT_VERSION SYSTEMVERILOG_2009
|
||||||
set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
|
# set_global_assignment -name ADD_PASS_THROUGH_LOGIC_TO_INFERRED_RAMS ON
|
||||||
set_global_assignment -name VERILOG_MACRO QUARTUS
|
set_global_assignment -name VERILOG_MACRO QUARTUS
|
||||||
set_global_assignment -name VERILOG_MACRO SYNTHESIS
|
set_global_assignment -name VERILOG_MACRO SYNTHESIS
|
||||||
set_global_assignment -name VERILOG_MACRO NDEBUG
|
set_global_assignment -name VERILOG_MACRO NDEBUG
|
||||||
@@ -1035,8 +1035,7 @@ wire [SCOPE_DATAW+1:0] scope_data_in_ste;
|
|||||||
assign scope_data_in_st[0] = {`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST, scope_changed, scope_start};
|
assign scope_data_in_st[0] = {`SCOPE_SIGNALS_DATA_LIST `SCOPE_SIGNALS_UPD_LIST, scope_changed, scope_start};
|
||||||
assign scope_data_in_ste = scope_data_in_st[SCOPE_SR_DEPTH-1];
|
assign scope_data_in_ste = scope_data_in_st[SCOPE_SR_DEPTH-1];
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 1; i < SCOPE_SR_DEPTH; i++) begin
|
||||||
for (i = 1; i < SCOPE_SR_DEPTH; i++) begin
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N (SCOPE_DATAW+2)
|
.N (SCOPE_DATAW+2)
|
||||||
) scope_sr (
|
) scope_sr (
|
||||||
|
|||||||
@@ -10,98 +10,83 @@ module VX_alu_unit #(
|
|||||||
VX_alu_req_if alu_req_if,
|
VX_alu_req_if alu_req_if,
|
||||||
|
|
||||||
// Outputs
|
// Outputs
|
||||||
VX_branch_ctl_if branch_ctl_if,
|
VX_exu_to_cmt_if alu_commit_if
|
||||||
VX_exu_to_cmt_if alu_commit_if
|
|
||||||
);
|
);
|
||||||
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
reg [`NUM_THREADS-1:0][31:0] alu_result;
|
||||||
wire [`NUM_THREADS-1:0][32:0] sub_result;
|
|
||||||
wire [`NUM_THREADS-1:0][32:0] shift_result;
|
wire [`NUM_THREADS-1:0][31:0] addsub_result;
|
||||||
|
wire [`NUM_THREADS-1:0] less_result;
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] shift_result;
|
||||||
|
reg [`NUM_THREADS-1:0][31:0] misc_result;
|
||||||
|
|
||||||
wire [`ALU_BITS-1:0] alu_op = alu_req_if.alu_op;
|
wire [`ALU_BITS-1:0] alu_op = `ALU_OP(alu_req_if.op);
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
||||||
|
|
||||||
genvar i;
|
wire [`NUM_THREADS-1:0][31:0] alu_in1_PC = alu_req_if.rs1_is_PC ? {`NUM_THREADS{alu_req_if.curr_PC}} : alu_in1;
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] alu_in2_imm = alu_req_if.rs2_is_imm ? {`NUM_THREADS{alu_req_if.imm}} : alu_in2;
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
wire negate_add = (alu_op == `ALU_SUB);
|
||||||
|
wire signed_less = (alu_op == `ALU_SLT);
|
||||||
|
wire signed_shift = (alu_op == `ALU_SRA);
|
||||||
|
|
||||||
wire [32:0] sub_in1 = {(alu_op != `ALU_SLTU) & (alu_op != `ALU_BLTU) & (alu_op != `ALU_BGEU) & alu_in1[i][31], alu_in1[i]};
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
wire [32:0] sub_in2 = {(alu_op != `ALU_SLTU) & (alu_op != `ALU_BLTU) & (alu_op != `ALU_BGEU) & alu_in2[i][31], alu_in2[i]};
|
wire [32:0] addsub_in1 = {alu_in1_PC[i], 1'b1};
|
||||||
assign sub_result[i] = $signed(sub_in1) - $signed(sub_in2);
|
wire [32:0] addsub_in2 = {alu_in2_imm[i], 1'b0} ^ {33{negate_add}};
|
||||||
|
`IGNORE_WARNINGS_BEGIN
|
||||||
|
wire [32:0] addsub_addd = addsub_in1 + addsub_in2;
|
||||||
|
`IGNORE_WARNINGS_END
|
||||||
|
assign addsub_result[i] = addsub_addd[32:1];
|
||||||
|
end
|
||||||
|
|
||||||
wire [32:0] shift_in1 = {(alu_op == `ALU_SRA) & alu_in1[i][31], alu_in1[i]};
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
assign shift_result[i] = $signed(shift_in1) >>> alu_in2[i][4:0];
|
wire [32:0] less_in1 = {signed_less & alu_in1[i][31], alu_in1[i]};
|
||||||
|
wire [32:0] less_in2 = {signed_less & alu_in2_imm[i][31], alu_in2_imm[i]};
|
||||||
always @(*) begin
|
assign less_result[i] = $signed(less_in1) < $signed(less_in2);
|
||||||
case (alu_op)
|
end
|
||||||
`ALU_SUB: alu_result[i] = sub_result[i][31:0];
|
|
||||||
`ALU_SLL: alu_result[i] = alu_in1[i] << alu_in2[i][4:0];
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
`ALU_SLT,
|
wire [32:0] shift_in1 = {signed_shift & alu_in1[i][31], alu_in1[i]};
|
||||||
`ALU_SLTU: alu_result[i] = 32'(sub_result[i][32]);
|
`IGNORE_WARNINGS_BEGIN
|
||||||
`ALU_XOR: alu_result[i] = alu_in1[i] ^ alu_in2[i];
|
wire [32:0] shift_value = $signed(shift_in1) >>> alu_in2_imm[i][4:0];
|
||||||
`ALU_SRL,
|
`IGNORE_WARNINGS_END
|
||||||
`ALU_SRA: alu_result[i] = shift_result[i][31:0];
|
assign shift_result[i] = shift_value[31:0];
|
||||||
`ALU_OR: alu_result[i] = alu_in1[i] | alu_in2[i];
|
|
||||||
`ALU_AND: alu_result[i] = alu_in1[i] & alu_in2[i];
|
|
||||||
default: alu_result[i] = alu_in1[i] + alu_in2[i]; // ADD, LUI, AUIPC
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
wire [`NT_BITS-1:0] br_result_index;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
always @(*) begin
|
||||||
VX_priority_encoder #(
|
case (alu_op)
|
||||||
.N(`NUM_THREADS)
|
`ALU_AND: misc_result[i] = alu_in1[i] & alu_in2_imm[i];
|
||||||
) choose_alu_result (
|
`ALU_OR: misc_result[i] = alu_in1[i] | alu_in2_imm[i];
|
||||||
.data_in (alu_req_if.thread_mask),
|
`ALU_XOR: misc_result[i] = alu_in1[i] ^ alu_in2_imm[i];
|
||||||
.data_out (br_result_index),
|
//`ALU_SLL,
|
||||||
`UNUSED_PIN (valid_out)
|
default: misc_result[i] = alu_in1[i] << alu_in2_imm[i][4:0];
|
||||||
);
|
endcase
|
||||||
|
end
|
||||||
wire [32:0] br_result = sub_result[br_result_index];
|
end
|
||||||
wire br_sign = br_result[32];
|
|
||||||
wire br_nzero = (| br_result[31:0]);
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
wire br_sign_s1;
|
always @(*) begin
|
||||||
wire br_nzero_s1;
|
case (`ALU_OP_CLASS(alu_op))
|
||||||
|
0: alu_result[i] = addsub_result[i];
|
||||||
wire [`BR_BITS-1:0] br_op = `IS_BR_OP(alu_req_if.alu_op) ? `BR_OP(alu_req_if.alu_op) : `BR_NO;
|
1: alu_result[i] = {31'b0, less_result[i]};
|
||||||
wire [`BR_BITS-1:0] br_op_s1;
|
2: alu_result[i] = shift_result[i];
|
||||||
|
default: alu_result[i] = misc_result[i];
|
||||||
wire [31:0] br_addr = (br_op == `BR_JALR) ? alu_req_if.rs1_data[br_result_index] : alu_req_if.curr_PC;
|
endcase
|
||||||
wire [31:0] br_dest = $signed(br_addr) + $signed(alu_req_if.offset);
|
end
|
||||||
|
end
|
||||||
wire is_jal = (alu_op == `ALU_JAL || alu_op == `ALU_JALR);
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_jal_result = is_jal ? {`NUM_THREADS{alu_req_if.next_PC}} : alu_result;
|
|
||||||
|
|
||||||
wire stall = ~alu_commit_if.ready && alu_commit_if.valid;
|
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + `ISTAG_BITS + (`NUM_THREADS * 32) + `BR_BITS + 32 + 1 + 1)
|
.N(1 + `ISTAG_BITS + (`NUM_THREADS * 32))
|
||||||
) alu_reg (
|
) alu_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (0),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({alu_req_if.valid, alu_req_if.warp_num, alu_req_if.issue_tag, alu_jal_result, br_op, br_dest, br_sign, br_nzero}),
|
.in ({alu_req_if.valid, alu_req_if.issue_tag, alu_result}),
|
||||||
.out ({alu_commit_if.valid, branch_ctl_if.warp_num, alu_commit_if.issue_tag, alu_commit_if.data, br_op_s1, branch_ctl_if.dest, br_sign_s1, br_nzero_s1})
|
.out ({alu_commit_if.valid, alu_commit_if.issue_tag, alu_commit_if.data})
|
||||||
);
|
);
|
||||||
|
|
||||||
reg br_taken;
|
|
||||||
always @(*) begin
|
|
||||||
case (br_op_s1)
|
|
||||||
`BR_NE: br_taken = br_nzero_s1;
|
|
||||||
`BR_EQ: br_taken = ~br_nzero_s1;
|
|
||||||
`BR_LT,
|
|
||||||
`BR_LTU: br_taken = br_sign_s1;
|
|
||||||
`BR_GE,
|
|
||||||
`BR_GEU: br_taken = ~br_sign_s1;
|
|
||||||
default: br_taken = 1'b1;
|
|
||||||
endcase
|
|
||||||
end
|
|
||||||
|
|
||||||
assign branch_ctl_if.valid = alu_commit_if.valid && (br_op_s1 != `BR_NO);
|
assign alu_req_if.ready = 1'b1;
|
||||||
assign branch_ctl_if.taken = br_taken;
|
|
||||||
|
|
||||||
assign alu_req_if.ready = ~stall;
|
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
56
hw/rtl/VX_bru_unit.v
Normal file
56
hw/rtl/VX_bru_unit.v
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
module VX_bru_unit #(
|
||||||
|
parameter CORE_ID = 0
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
|
|
||||||
|
// Inputs
|
||||||
|
VX_bru_req_if bru_req_if,
|
||||||
|
|
||||||
|
// Outputs
|
||||||
|
VX_branch_ctl_if branch_ctl_if,
|
||||||
|
VX_exu_to_cmt_if bru_commit_if
|
||||||
|
);
|
||||||
|
wire [`BRU_BITS-1:0] bru_op = bru_req_if.op;
|
||||||
|
wire bru_neg = `BRU_NEG(bru_op);
|
||||||
|
wire bru_less = `BRU_LESS(bru_op);
|
||||||
|
wire bru_signed = `BRU_SIGNED(bru_op);
|
||||||
|
wire bru_static = `BRU_STATIC(bru_op);
|
||||||
|
|
||||||
|
wire [31:0] rs1_data = bru_req_if.rs1_data;
|
||||||
|
wire [31:0] rs2_data = bru_req_if.rs2_data;
|
||||||
|
|
||||||
|
wire [32:0] signed_in1 = {bru_signed & rs1_data[31], rs1_data};
|
||||||
|
wire [32:0] signed_in2 = {bru_signed & rs2_data[31], rs2_data};
|
||||||
|
wire is_less = $signed(signed_in1) < $signed(signed_in2);
|
||||||
|
|
||||||
|
wire is_equal = (rs1_data == rs2_data);
|
||||||
|
|
||||||
|
wire taken = ((bru_less ? is_less : is_equal) ^ bru_neg) | bru_static;
|
||||||
|
|
||||||
|
wire [31:0] base_addr = bru_req_if.rs1_is_PC ? bru_req_if.curr_PC : rs1_data;
|
||||||
|
wire [31:0] dest = base_addr + bru_req_if.offset;
|
||||||
|
|
||||||
|
wire [31:0] jal_result = bru_req_if.curr_PC + 4;
|
||||||
|
wire [31:0] jal_result_r;
|
||||||
|
|
||||||
|
VX_generic_register #(
|
||||||
|
.N(1 + `NW_BITS + `ISTAG_BITS + 1 + 32 + 32)
|
||||||
|
) bru_reg (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.stall (0),
|
||||||
|
.flush (0),
|
||||||
|
.in ({bru_req_if.valid, bru_req_if.wid, bru_req_if.issue_tag, taken, dest, jal_result}),
|
||||||
|
.out ({bru_commit_if.valid, branch_ctl_if.wid, bru_commit_if.issue_tag, branch_ctl_if.taken, branch_ctl_if.dest, jal_result_r})
|
||||||
|
);
|
||||||
|
|
||||||
|
assign branch_ctl_if.valid = bru_commit_if.valid;
|
||||||
|
|
||||||
|
assign bru_commit_if.data = {`NUM_THREADS{jal_result_r}};
|
||||||
|
|
||||||
|
assign bru_req_if.ready = 1'b1;
|
||||||
|
|
||||||
|
endmodule
|
||||||
@@ -135,9 +135,7 @@ module VX_cluster #(
|
|||||||
wire [`NUM_CORES-1:0] per_core_busy;
|
wire [`NUM_CORES-1:0] per_core_busy;
|
||||||
wire [`NUM_CORES-1:0] per_core_ebreak;
|
wire [`NUM_CORES-1:0] per_core_ebreak;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_CORES; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < `NUM_CORES; i++) begin
|
|
||||||
VX_core #(
|
VX_core #(
|
||||||
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
|
.CORE_ID(i + (CLUSTER_ID * `NUM_CORES))
|
||||||
) core (
|
) core (
|
||||||
@@ -316,7 +314,7 @@ module VX_cluster #(
|
|||||||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdin_tag;
|
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] l2_snp_fwdin_tag;
|
||||||
wire[`NUM_CORES-1:0] l2_snp_fwdin_ready;
|
wire[`NUM_CORES-1:0] l2_snp_fwdin_ready;
|
||||||
|
|
||||||
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
||||||
assign l2_core_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
|
assign l2_core_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
|
||||||
assign l2_core_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
|
assign l2_core_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
|
||||||
|
|
||||||
@@ -472,7 +470,7 @@ module VX_cluster #(
|
|||||||
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdin_tag;
|
wire[`NUM_CORES-1:0][`DSNP_TAG_WIDTH-1:0] arb_snp_fwdin_tag;
|
||||||
wire[`NUM_CORES-1:0] arb_snp_fwdin_ready;
|
wire[`NUM_CORES-1:0] arb_snp_fwdin_ready;
|
||||||
|
|
||||||
for (i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
for (genvar i = 0; i < `L2NUM_REQUESTS; i = i + 2) begin
|
||||||
assign arb_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
|
assign arb_dram_req_valid [i] = per_core_D_dram_req_valid[(i/2)];
|
||||||
assign arb_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
|
assign arb_dram_req_valid [i+1] = per_core_I_dram_req_valid[(i/2)];
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ module VX_commit #(
|
|||||||
|
|
||||||
// inputs
|
// inputs
|
||||||
VX_exu_to_cmt_if alu_commit_if,
|
VX_exu_to_cmt_if alu_commit_if,
|
||||||
|
VX_exu_to_cmt_if bru_commit_if,
|
||||||
VX_exu_to_cmt_if lsu_commit_if,
|
VX_exu_to_cmt_if lsu_commit_if,
|
||||||
VX_exu_to_cmt_if mul_commit_if,
|
VX_exu_to_cmt_if mul_commit_if,
|
||||||
VX_exu_to_cmt_if csr_commit_if,
|
VX_exu_to_cmt_if csr_commit_if,
|
||||||
@@ -22,12 +23,13 @@ module VX_commit #(
|
|||||||
// update CRSs
|
// update CRSs
|
||||||
|
|
||||||
wire [`NUM_EXS-1:0] commited_mask;
|
wire [`NUM_EXS-1:0] commited_mask;
|
||||||
assign commited_mask = {(alu_commit_if.valid && alu_commit_if.ready),
|
assign commited_mask = {alu_commit_if.valid,
|
||||||
(lsu_commit_if.valid && lsu_commit_if.ready),
|
bru_commit_if.valid,
|
||||||
(csr_commit_if.valid && csr_commit_if.ready),
|
lsu_commit_if.valid,
|
||||||
(mul_commit_if.valid && mul_commit_if.ready),
|
csr_commit_if.valid,
|
||||||
(fpu_commit_if.valid && fpu_commit_if.ready),
|
mul_commit_if.valid,
|
||||||
(gpu_commit_if.valid && gpu_commit_if.ready)};
|
fpu_commit_if.valid,
|
||||||
|
gpu_commit_if.valid};
|
||||||
|
|
||||||
wire [`NE_BITS:0] num_commits;
|
wire [`NE_BITS:0] num_commits;
|
||||||
|
|
||||||
@@ -38,18 +40,10 @@ module VX_commit #(
|
|||||||
.count (num_commits)
|
.count (num_commits)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign cmt_to_csr_if.valid = (| commited_mask);
|
|
||||||
assign cmt_to_csr_if.warp_num = cmt_to_issue_if.fpu_data.warp_num;
|
|
||||||
assign cmt_to_csr_if.num_commits = num_commits;
|
|
||||||
|
|
||||||
assign cmt_to_csr_if.has_fflags = (fpu_commit_if.valid && fpu_commit_if.ready) && fpu_commit_if.has_fflags;
|
|
||||||
|
|
||||||
integer i;
|
|
||||||
|
|
||||||
fflags_t fflags;
|
fflags_t fflags;
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
fflags = 0;
|
fflags = 0;
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||||
if (cmt_to_issue_if.fpu_data.thread_mask[i]) begin
|
if (cmt_to_issue_if.fpu_data.thread_mask[i]) begin
|
||||||
fflags.NX |= fpu_commit_if.fflags[i].NX;
|
fflags.NX |= fpu_commit_if.fflags[i].NX;
|
||||||
fflags.UF |= fpu_commit_if.fflags[i].UF;
|
fflags.UF |= fpu_commit_if.fflags[i].UF;
|
||||||
@@ -59,18 +53,39 @@ module VX_commit #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
assign cmt_to_csr_if.fflags = fflags;
|
|
||||||
|
fflags_t fflags_r;
|
||||||
|
reg has_fflags_r;
|
||||||
|
reg [`NW_BITS-1:0] wid_r;
|
||||||
|
reg [`NE_BITS:0] num_commits_r;
|
||||||
|
reg csr_update_r;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
csr_update_r <= (| commited_mask);
|
||||||
|
fflags_r <= fflags;
|
||||||
|
has_fflags_r <= fpu_commit_if.valid && fpu_commit_if.has_fflags;
|
||||||
|
wid_r <= cmt_to_issue_if.fpu_data.wid;
|
||||||
|
num_commits_r <= num_commits;
|
||||||
|
end
|
||||||
|
|
||||||
|
assign cmt_to_csr_if.valid = csr_update_r;
|
||||||
|
assign cmt_to_csr_if.wid = wid_r;
|
||||||
|
assign cmt_to_csr_if.num_commits = num_commits_r;
|
||||||
|
assign cmt_to_csr_if.has_fflags = has_fflags_r;
|
||||||
|
assign cmt_to_csr_if.fflags = fflags_r;
|
||||||
|
|
||||||
// Notify issue stage
|
// Notify issue stage
|
||||||
|
|
||||||
assign cmt_to_issue_if.alu_valid = alu_commit_if.valid && alu_commit_if.ready;
|
assign cmt_to_issue_if.alu_valid = alu_commit_if.valid;
|
||||||
assign cmt_to_issue_if.lsu_valid = lsu_commit_if.valid && lsu_commit_if.ready;
|
assign cmt_to_issue_if.bru_valid = bru_commit_if.valid;
|
||||||
assign cmt_to_issue_if.csr_valid = csr_commit_if.valid && csr_commit_if.ready;
|
assign cmt_to_issue_if.lsu_valid = lsu_commit_if.valid;
|
||||||
assign cmt_to_issue_if.mul_valid = mul_commit_if.valid && mul_commit_if.ready;
|
assign cmt_to_issue_if.csr_valid = csr_commit_if.valid;
|
||||||
assign cmt_to_issue_if.fpu_valid = fpu_commit_if.valid && fpu_commit_if.ready;
|
assign cmt_to_issue_if.mul_valid = mul_commit_if.valid;
|
||||||
assign cmt_to_issue_if.gpu_valid = gpu_commit_if.valid && gpu_commit_if.ready;
|
assign cmt_to_issue_if.fpu_valid = fpu_commit_if.valid;
|
||||||
|
assign cmt_to_issue_if.gpu_valid = gpu_commit_if.valid;
|
||||||
|
|
||||||
assign cmt_to_issue_if.alu_tag = alu_commit_if.issue_tag;
|
assign cmt_to_issue_if.alu_tag = alu_commit_if.issue_tag;
|
||||||
|
assign cmt_to_issue_if.bru_tag = bru_commit_if.issue_tag;
|
||||||
assign cmt_to_issue_if.lsu_tag = lsu_commit_if.issue_tag;
|
assign cmt_to_issue_if.lsu_tag = lsu_commit_if.issue_tag;
|
||||||
assign cmt_to_issue_if.csr_tag = csr_commit_if.issue_tag;
|
assign cmt_to_issue_if.csr_tag = csr_commit_if.issue_tag;
|
||||||
assign cmt_to_issue_if.mul_tag = mul_commit_if.issue_tag;
|
assign cmt_to_issue_if.mul_tag = mul_commit_if.issue_tag;
|
||||||
@@ -84,6 +99,7 @@ module VX_commit #(
|
|||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
||||||
.alu_commit_if (alu_commit_if),
|
.alu_commit_if (alu_commit_if),
|
||||||
|
.bru_commit_if (bru_commit_if),
|
||||||
.lsu_commit_if (lsu_commit_if),
|
.lsu_commit_if (lsu_commit_if),
|
||||||
.csr_commit_if (csr_commit_if),
|
.csr_commit_if (csr_commit_if),
|
||||||
.mul_commit_if (mul_commit_if),
|
.mul_commit_if (mul_commit_if),
|
||||||
@@ -96,23 +112,26 @@ module VX_commit #(
|
|||||||
|
|
||||||
`ifdef DBG_PRINT_PIPELINE
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (alu_commit_if.valid && alu_commit_if.ready) begin
|
if (alu_commit_if.valid) begin
|
||||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.alu_data.warp_num, cmt_to_issue_if.alu_data.curr_PC, alu_commit_if.issue_tag, cmt_to_issue_if.alu_data.thread_mask, cmt_to_issue_if.alu_data.wb, cmt_to_issue_if.alu_data.rd, alu_commit_if.data);
|
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.alu_data.wid, cmt_to_issue_if.alu_data.curr_PC, alu_commit_if.issue_tag, cmt_to_issue_if.alu_data.thread_mask, cmt_to_issue_if.alu_data.wb, cmt_to_issue_if.alu_data.rd, alu_commit_if.data);
|
||||||
end
|
end
|
||||||
if (lsu_commit_if.valid && lsu_commit_if.ready) begin
|
if (bru_commit_if.valid) begin
|
||||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.lsu_data.warp_num, cmt_to_issue_if.lsu_data.curr_PC, lsu_commit_if.issue_tag, cmt_to_issue_if.lsu_data.thread_mask, cmt_to_issue_if.lsu_data.wb, cmt_to_issue_if.lsu_data.rd, lsu_commit_if.data);
|
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=BRU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.bru_data.wid, cmt_to_issue_if.bru_data.curr_PC, bru_commit_if.issue_tag, cmt_to_issue_if.bru_data.thread_mask, cmt_to_issue_if.bru_data.wb, cmt_to_issue_if.bru_data.rd, bru_commit_if.data);
|
||||||
end
|
end
|
||||||
if (csr_commit_if.valid && csr_commit_if.ready) begin
|
if (lsu_commit_if.valid) begin
|
||||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.csr_data.warp_num, cmt_to_issue_if.csr_data.curr_PC, csr_commit_if.issue_tag, cmt_to_issue_if.csr_data.thread_mask, cmt_to_issue_if.csr_data.wb, cmt_to_issue_if.csr_data.rd, csr_commit_if.data);
|
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.lsu_data.wid, cmt_to_issue_if.lsu_data.curr_PC, lsu_commit_if.issue_tag, cmt_to_issue_if.lsu_data.thread_mask, cmt_to_issue_if.lsu_data.wb, cmt_to_issue_if.lsu_data.rd, lsu_commit_if.data);
|
||||||
|
end
|
||||||
|
if (csr_commit_if.valid) begin
|
||||||
|
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.csr_data.wid, cmt_to_issue_if.csr_data.curr_PC, csr_commit_if.issue_tag, cmt_to_issue_if.csr_data.thread_mask, cmt_to_issue_if.csr_data.wb, cmt_to_issue_if.csr_data.rd, csr_commit_if.data);
|
||||||
end
|
end
|
||||||
if (mul_commit_if.valid && mul_commit_if.ready) begin
|
if (mul_commit_if.validy) begin
|
||||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.mul_data.warp_num, cmt_to_issue_if.mul_data.curr_PC, mul_commit_if.issue_tag, cmt_to_issue_if.mul_data.thread_mask, cmt_to_issue_if.mul_data.wb, cmt_to_issue_if.mul_data.rd, mul_commit_if.data);
|
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.mul_data.wid, cmt_to_issue_if.mul_data.curr_PC, mul_commit_if.issue_tag, cmt_to_issue_if.mul_data.thread_mask, cmt_to_issue_if.mul_data.wb, cmt_to_issue_if.mul_data.rd, mul_commit_if.data);
|
||||||
end
|
end
|
||||||
if (fpu_commit_if.valid && fpu_commit_if.ready) begin
|
if (fpu_commit_if.valid) begin
|
||||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.fpu_data.warp_num, cmt_to_issue_if.fpu_data.curr_PC, fpu_commit_if.issue_tag, cmt_to_issue_if.fpu_data.thread_mask, cmt_to_issue_if.fpu_data.wb, cmt_to_issue_if.fpu_data.rd, fpu_commit_if.data);
|
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.fpu_data.wid, cmt_to_issue_if.fpu_data.curr_PC, fpu_commit_if.issue_tag, cmt_to_issue_if.fpu_data.thread_mask, cmt_to_issue_if.fpu_data.wb, cmt_to_issue_if.fpu_data.rd, fpu_commit_if.data);
|
||||||
end
|
end
|
||||||
if (gpu_commit_if.valid && gpu_commit_if.ready) begin
|
if (gpu_commit_if.valid) begin
|
||||||
$display("%t: Core%0d-commit: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.gpu_data.warp_num, cmt_to_issue_if.gpu_data.curr_PC, gpu_commit_if.issue_tag, cmt_to_issue_if.gpu_data.thread_mask, cmt_to_issue_if.gpu_data.wb, cmt_to_issue_if.gpu_data.rd, gpu_commit_if.data);
|
$display("%t: Core%0d-commit: wid=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, wb=%0d, rd=%0d, data=%0h", $time, CORE_ID, cmt_to_issue_if.gpu_data.wid, cmt_to_issue_if.gpu_data.curr_PC, gpu_commit_if.issue_tag, cmt_to_issue_if.gpu_data.thread_mask, cmt_to_issue_if.gpu_data.wb, cmt_to_issue_if.gpu_data.rd, gpu_commit_if.data);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ module VX_csr_arb (
|
|||||||
VX_csr_req_if csr_req_if,
|
VX_csr_req_if csr_req_if,
|
||||||
|
|
||||||
// input
|
// input
|
||||||
VX_exu_to_cmt_if csr_rsp_if,
|
VX_csr_rsp_if csr_rsp_if,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_exu_to_cmt_if csr_commit_if,
|
VX_exu_to_cmt_if csr_commit_if,
|
||||||
@@ -28,9 +28,9 @@ module VX_csr_arb (
|
|||||||
// requests
|
// requests
|
||||||
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
|
assign csr_req_if.valid = (~select_io_req) ? csr_core_req_if.valid : csr_io_req_if.valid;
|
||||||
assign csr_req_if.issue_tag = (~select_io_req) ? csr_core_req_if.issue_tag : 0;
|
assign csr_req_if.issue_tag = (~select_io_req) ? csr_core_req_if.issue_tag : 0;
|
||||||
assign csr_req_if.warp_num = (~select_io_req) ? csr_core_req_if.warp_num : 0;
|
assign csr_req_if.wid = (~select_io_req) ? csr_core_req_if.wid : 0;
|
||||||
assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0;
|
assign csr_req_if.curr_PC = (~select_io_req) ? csr_core_req_if.curr_PC : 0;
|
||||||
assign csr_req_if.csr_op = (~select_io_req) ? csr_core_req_if.csr_op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
assign csr_req_if.op = (~select_io_req) ? csr_core_req_if.op : (csr_io_req_if.rw ? `CSR_RW : `CSR_RS);
|
||||||
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
assign csr_req_if.csr_addr = (~select_io_req) ? csr_core_req_if.csr_addr : csr_io_req_if.addr;
|
||||||
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
assign csr_req_if.csr_mask = (~select_io_req) ? csr_core_req_if.csr_mask : (csr_io_req_if.rw ? csr_io_req_if.data : 32'b0);
|
||||||
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
|
assign csr_req_if.rd = (~select_io_req) ? csr_core_req_if.rd : 0;
|
||||||
@@ -48,6 +48,6 @@ module VX_csr_arb (
|
|||||||
assign csr_commit_if.issue_tag= csr_rsp_if.issue_tag;
|
assign csr_commit_if.issue_tag= csr_rsp_if.issue_tag;
|
||||||
assign csr_commit_if.data = csr_rsp_if.data;
|
assign csr_commit_if.data = csr_rsp_if.data;
|
||||||
|
|
||||||
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : csr_commit_if.ready;
|
assign csr_rsp_if.ready = select_io_rsp ? csr_io_rsp_if.ready : 1'b1;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ module VX_csr_data #(
|
|||||||
VX_cmt_to_csr_if cmt_to_csr_if,
|
VX_cmt_to_csr_if cmt_to_csr_if,
|
||||||
VX_csr_to_fpu_if csr_to_fpu_if,
|
VX_csr_to_fpu_if csr_to_fpu_if,
|
||||||
|
|
||||||
input wire[`NW_BITS-1:0] warp_num,
|
input wire[`NW_BITS-1:0] wid,
|
||||||
|
|
||||||
input wire read_enable,
|
input wire read_enable,
|
||||||
input wire[`CSR_ADDR_BITS-1:0] read_addr,
|
input wire[`CSR_ADDR_BITS-1:0] read_addr,
|
||||||
@@ -38,24 +38,24 @@ module VX_csr_data #(
|
|||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (cmt_to_csr_if.has_fflags) begin
|
if (cmt_to_csr_if.has_fflags) begin
|
||||||
csr_fflags[cmt_to_csr_if.warp_num] <= cmt_to_csr_if.fflags;
|
csr_fflags[cmt_to_csr_if.wid] <= cmt_to_csr_if.fflags;
|
||||||
csr_fcsr[cmt_to_csr_if.warp_num][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags;
|
csr_fcsr[cmt_to_csr_if.wid][`FFG_BITS-1:0] <= cmt_to_csr_if.fflags;
|
||||||
end
|
end
|
||||||
|
|
||||||
if (write_enable) begin
|
if (write_enable) begin
|
||||||
case (write_addr)
|
case (write_addr)
|
||||||
`CSR_FFLAGS: begin
|
`CSR_FFLAGS: begin
|
||||||
csr_fcsr[warp_num][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
csr_fcsr[wid][`FFG_BITS-1:0] <= write_data[`FFG_BITS-1:0];
|
||||||
csr_fflags[warp_num] <= write_data[`FFG_BITS-1:0];
|
csr_fflags[wid] <= write_data[`FFG_BITS-1:0];
|
||||||
end
|
end
|
||||||
`CSR_FRM: begin
|
`CSR_FRM: begin
|
||||||
csr_fcsr[warp_num][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
|
csr_fcsr[wid][`FFG_BITS+`FRM_BITS-1:`FFG_BITS] <= write_data[`FRM_BITS-1:0];
|
||||||
csr_frm[warp_num] <= write_data[`FRM_BITS-1:0];
|
csr_frm[wid] <= write_data[`FRM_BITS-1:0];
|
||||||
end
|
end
|
||||||
`CSR_FCSR: begin
|
`CSR_FCSR: begin
|
||||||
csr_fcsr[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
|
csr_fcsr[wid] <= write_data[`FFG_BITS+`FRM_BITS-1:0];
|
||||||
csr_frm[warp_num] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS];
|
csr_frm[wid] <= write_data[`FFG_BITS+`FRM_BITS-1:`FFG_BITS];
|
||||||
csr_fflags[warp_num] <= write_data[`FFG_BITS-1:0];
|
csr_fflags[wid] <= write_data[`FFG_BITS-1:0];
|
||||||
end
|
end
|
||||||
`CSR_SATP: csr_satp <= write_data;
|
`CSR_SATP: csr_satp <= write_data;
|
||||||
|
|
||||||
@@ -79,7 +79,7 @@ module VX_csr_data #(
|
|||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
csr_cycle <= 0;
|
csr_cycle <= 0;
|
||||||
csr_instret <= 0;
|
csr_instret <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
csr_cycle <= csr_cycle + 1;
|
csr_cycle <= csr_cycle + 1;
|
||||||
@@ -91,15 +91,15 @@ module VX_csr_data #(
|
|||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (read_addr)
|
case (read_addr)
|
||||||
`CSR_FFLAGS : read_data = 32'(csr_fflags[warp_num]);
|
`CSR_FFLAGS : read_data = 32'(csr_fflags[wid]);
|
||||||
`CSR_FRM : read_data = 32'(csr_frm[warp_num]);
|
`CSR_FRM : read_data = 32'(csr_frm[wid]);
|
||||||
`CSR_FCSR : read_data = 32'(csr_fcsr[warp_num]);
|
`CSR_FCSR : read_data = 32'(csr_fcsr[wid]);
|
||||||
|
|
||||||
`CSR_LWID : read_data = 32'(warp_num);
|
`CSR_LWID : read_data = 32'(wid);
|
||||||
`CSR_LTID ,
|
`CSR_LTID ,
|
||||||
`CSR_GTID ,
|
`CSR_GTID ,
|
||||||
`CSR_MHARTID ,
|
`CSR_MHARTID ,
|
||||||
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(warp_num);
|
`CSR_GWID : read_data = CORE_ID * `NUM_WARPS + 32'(wid);
|
||||||
`CSR_GCID : read_data = CORE_ID;
|
`CSR_GCID : read_data = CORE_ID;
|
||||||
`CSR_NT : read_data = `NUM_THREADS;
|
`CSR_NT : read_data = `NUM_THREADS;
|
||||||
`CSR_NW : read_data = `NUM_WARPS;
|
`CSR_NW : read_data = `NUM_WARPS;
|
||||||
@@ -134,6 +134,6 @@ module VX_csr_data #(
|
|||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.warp_num];
|
assign csr_to_fpu_if.frm = csr_frm[csr_to_fpu_if.wid];
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -51,9 +51,7 @@ module VX_csr_io_arb #(
|
|||||||
|
|
||||||
end else begin
|
end else begin
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
|
||||||
assign out_csr_io_req_valid[i] = in_csr_io_req_valid && (request_id == `REQS_BITS'(i));
|
assign out_csr_io_req_valid[i] = in_csr_io_req_valid && (request_id == `REQS_BITS'(i));
|
||||||
assign out_csr_io_req_rw[i] = in_csr_io_req_rw;
|
assign out_csr_io_req_rw[i] = in_csr_io_req_rw;
|
||||||
assign out_csr_io_req_addr[i] = in_csr_io_req_addr;
|
assign out_csr_io_req_addr[i] = in_csr_io_req_addr;
|
||||||
@@ -78,7 +76,7 @@ module VX_csr_io_arb #(
|
|||||||
assign out_csr_io_rsp_valid = in_csr_io_rsp_valid [bus_rsp_sel];
|
assign out_csr_io_rsp_valid = in_csr_io_rsp_valid [bus_rsp_sel];
|
||||||
assign out_csr_io_rsp_data = in_csr_io_rsp_data [bus_rsp_sel];
|
assign out_csr_io_rsp_data = in_csr_io_rsp_data [bus_rsp_sel];
|
||||||
|
|
||||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||||
assign in_csr_io_rsp_ready[i] = out_csr_io_rsp_ready && (bus_rsp_sel == `REQS_BITS'(i));
|
assign in_csr_io_rsp_ready[i] = out_csr_io_rsp_ready && (bus_rsp_sel == `REQS_BITS'(i));
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -15,11 +15,11 @@ module VX_csr_unit #(
|
|||||||
VX_csr_req_if csr_req_if,
|
VX_csr_req_if csr_req_if,
|
||||||
VX_exu_to_cmt_if csr_commit_if
|
VX_exu_to_cmt_if csr_commit_if
|
||||||
);
|
);
|
||||||
VX_csr_req_if csr_pipe_req_if();
|
VX_csr_req_if csr_pipe_req_if();
|
||||||
VX_exu_to_cmt_if csr_pipe_commit_if();
|
VX_csr_rsp_if csr_pipe_rsp_if();
|
||||||
|
|
||||||
wire select_io_req = csr_io_req_if.valid;
|
wire select_io_req = csr_io_req_if.valid;
|
||||||
wire select_io_rsp;
|
wire select_io_rsp;
|
||||||
|
|
||||||
VX_csr_arb csr_arb (
|
VX_csr_arb csr_arb (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
@@ -29,7 +29,7 @@ module VX_csr_unit #(
|
|||||||
.csr_io_req_if (csr_io_req_if),
|
.csr_io_req_if (csr_io_req_if),
|
||||||
.csr_req_if (csr_pipe_req_if),
|
.csr_req_if (csr_pipe_req_if),
|
||||||
|
|
||||||
.csr_rsp_if (csr_pipe_commit_if),
|
.csr_rsp_if (csr_pipe_rsp_if),
|
||||||
.csr_io_rsp_if (csr_io_rsp_if),
|
.csr_io_rsp_if (csr_io_rsp_if),
|
||||||
.csr_commit_if (csr_commit_if),
|
.csr_commit_if (csr_commit_if),
|
||||||
|
|
||||||
@@ -41,7 +41,7 @@ module VX_csr_unit #(
|
|||||||
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
|
wire [`CSR_ADDR_BITS-1:0] csr_addr_s1;
|
||||||
wire [31:0] csr_read_data, csr_read_data_s1;
|
wire [31:0] csr_read_data, csr_read_data_s1;
|
||||||
wire [31:0] csr_updated_data_s1;
|
wire [31:0] csr_updated_data_s1;
|
||||||
wire [`NW_BITS-1:0] warp_num_s1;
|
wire [`NW_BITS-1:0] wid_s1;
|
||||||
|
|
||||||
VX_csr_data #(
|
VX_csr_data #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
@@ -56,12 +56,12 @@ module VX_csr_unit #(
|
|||||||
.write_enable (csr_we_s1),
|
.write_enable (csr_we_s1),
|
||||||
.write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]),
|
.write_data (csr_updated_data_s1[`CSR_WIDTH-1:0]),
|
||||||
.write_addr (csr_addr_s1),
|
.write_addr (csr_addr_s1),
|
||||||
.warp_num (csr_pipe_req_if.warp_num)
|
.wid (csr_pipe_req_if.wid)
|
||||||
);
|
);
|
||||||
|
|
||||||
wire csr_hazard = (csr_addr_s1 == csr_pipe_req_if.csr_addr)
|
wire csr_hazard = (csr_addr_s1 == csr_pipe_req_if.csr_addr)
|
||||||
&& (warp_num_s1 == csr_pipe_req_if.warp_num)
|
&& (wid_s1 == csr_pipe_req_if.wid)
|
||||||
&& csr_pipe_commit_if.valid;
|
&& csr_pipe_rsp_if.valid;
|
||||||
|
|
||||||
wire [31:0] csr_read_data_qual = csr_hazard ? csr_updated_data_s1 : csr_read_data;
|
wire [31:0] csr_read_data_qual = csr_hazard ? csr_updated_data_s1 : csr_read_data;
|
||||||
|
|
||||||
@@ -71,7 +71,7 @@ module VX_csr_unit #(
|
|||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
csr_we_s0_unqual = 0;
|
csr_we_s0_unqual = 0;
|
||||||
case (csr_pipe_req_if.csr_op)
|
case (csr_pipe_req_if.op)
|
||||||
`CSR_RW: begin
|
`CSR_RW: begin
|
||||||
csr_updated_data = csr_pipe_req_if.csr_mask;
|
csr_updated_data = csr_pipe_req_if.csr_mask;
|
||||||
csr_we_s0_unqual = 1;
|
csr_we_s0_unqual = 1;
|
||||||
@@ -90,7 +90,7 @@ module VX_csr_unit #(
|
|||||||
|
|
||||||
wire csr_we_s0 = csr_we_s0_unqual && csr_pipe_req_if.valid;
|
wire csr_we_s0 = csr_we_s0_unqual && csr_pipe_req_if.valid;
|
||||||
|
|
||||||
wire stall = ~csr_pipe_commit_if.ready && csr_pipe_commit_if.valid;
|
wire stall = ~csr_pipe_rsp_if.ready && csr_pipe_rsp_if.valid;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `ISTAG_BITS + `NW_BITS + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
|
.N(1 + `ISTAG_BITS + `NW_BITS + 1 + `CSR_ADDR_BITS + 1 + 32 + 32)
|
||||||
@@ -99,13 +99,12 @@ module VX_csr_unit #(
|
|||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.warp_num, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
|
.in ({csr_pipe_req_if.valid, csr_pipe_req_if.issue_tag, csr_pipe_req_if.wid, csr_we_s0, csr_pipe_req_if.csr_addr, csr_pipe_req_if.is_io, csr_read_data_qual, csr_updated_data}),
|
||||||
.out ({csr_pipe_commit_if.valid, csr_pipe_commit_if.issue_tag, warp_num_s1, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
|
.out ({csr_pipe_rsp_if.valid, csr_pipe_rsp_if.issue_tag, wid_s1, csr_we_s1, csr_addr_s1, select_io_rsp, csr_read_data_s1, csr_updated_data_s1})
|
||||||
);
|
);
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
assign csr_pipe_rsp_if.data[i] = (csr_addr_s1 == `CSR_LTID) ? i :
|
||||||
assign csr_pipe_commit_if.data[i] = (csr_addr_s1 == `CSR_LTID) ? i :
|
|
||||||
(csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) :
|
(csr_addr_s1 == `CSR_GTID) ? (csr_read_data_s1 * `NUM_THREADS + i) :
|
||||||
csr_read_data_s1;
|
csr_read_data_s1;
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ module VX_decode #(
|
|||||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||||
|
|
||||||
reg [`ALU_BITS-1:0] alu_op;
|
reg [`ALU_BITS-1:0] alu_op;
|
||||||
reg [`BR_BITS-1:0] br_op;
|
reg [`BRU_BITS-1:0] br_op;
|
||||||
reg [`LSU_BITS-1:0] lsu_op;
|
reg [`LSU_BITS-1:0] lsu_op;
|
||||||
reg [`CSR_BITS-1:0] csr_op;
|
reg [`CSR_BITS-1:0] csr_op;
|
||||||
reg [`MUL_BITS-1:0] mul_op;
|
reg [`MUL_BITS-1:0] mul_op;
|
||||||
@@ -100,27 +100,27 @@ module VX_decode #(
|
|||||||
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
|
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
br_op = `BR_EQ;
|
br_op = `BRU_OTHER;
|
||||||
case (opcode)
|
case (opcode)
|
||||||
`INST_B: begin
|
`INST_B: begin
|
||||||
case (func3)
|
case (func3)
|
||||||
3'h0: br_op = `BR_EQ;
|
3'h0: br_op = `BRU_EQ;
|
||||||
3'h1: br_op = `BR_NE;
|
3'h1: br_op = `BRU_NE;
|
||||||
3'h4: br_op = `BR_LT;
|
3'h4: br_op = `BRU_LT;
|
||||||
3'h5: br_op = `BR_GE;
|
3'h5: br_op = `BRU_GE;
|
||||||
3'h6: br_op = `BR_LTU;
|
3'h6: br_op = `BRU_LTU;
|
||||||
3'h7: br_op = `BR_GEU;
|
3'h7: br_op = `BRU_GEU;
|
||||||
default:;
|
default:;
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
`INST_JAL: br_op = `BR_JAL;
|
`INST_JAL: br_op = `BRU_JAL;
|
||||||
`INST_JALR: br_op = `BR_JALR;
|
`INST_JALR: br_op = `BRU_JALR;
|
||||||
`INST_SYS: begin
|
`INST_SYS: begin
|
||||||
if (is_jals && u_12 == 12'h000) br_op = `BR_ECALL;
|
if (is_jals && u_12 == 12'h000) br_op = `BRU_ECALL;
|
||||||
if (is_jals && u_12 == 12'h001) br_op = `BR_EBREAK;
|
if (is_jals && u_12 == 12'h001) br_op = `BRU_EBREAK;
|
||||||
if (is_jals && u_12 == 12'h302) br_op = `BR_MRET;
|
if (is_jals && u_12 == 12'h302) br_op = `BRU_MRET;
|
||||||
if (is_jals && u_12 == 12'h102) br_op = `BR_SRET;
|
if (is_jals && u_12 == 12'h102) br_op = `BRU_SRET;
|
||||||
if (is_jals && u_12 == 12'h7B2) br_op = `BR_DRET;
|
if (is_jals && u_12 == 12'h7B2) br_op = `BRU_DRET;
|
||||||
end
|
end
|
||||||
default:;
|
default:;
|
||||||
endcase
|
endcase
|
||||||
@@ -292,18 +292,17 @@ module VX_decode #(
|
|||||||
|
|
||||||
VX_decode_if decode_tmp_if();
|
VX_decode_if decode_tmp_if();
|
||||||
|
|
||||||
assign decode_tmp_if.valid = ifetch_rsp_if.valid;
|
assign decode_tmp_if.valid = ifetch_rsp_if.valid;
|
||||||
assign decode_tmp_if.warp_num = ifetch_rsp_if.warp_num;
|
assign decode_tmp_if.wid = ifetch_rsp_if.wid;
|
||||||
assign decode_tmp_if.thread_mask= ifetch_rsp_if.thread_mask;
|
assign decode_tmp_if.thread_mask = ifetch_rsp_if.thread_mask;
|
||||||
assign decode_tmp_if.curr_PC = ifetch_rsp_if.curr_PC;
|
assign decode_tmp_if.curr_PC = ifetch_rsp_if.curr_PC;
|
||||||
assign decode_tmp_if.next_PC = ifetch_rsp_if.curr_PC + 32'h4;
|
|
||||||
|
|
||||||
assign decode_tmp_if.ex_type = is_lsu ? `EX_LSU :
|
assign decode_tmp_if.ex_type = is_lsu ? `EX_LSU :
|
||||||
is_csr ? `EX_CSR :
|
is_csr ? `EX_CSR :
|
||||||
is_mul ? `EX_MUL :
|
is_mul ? `EX_MUL :
|
||||||
is_fpu ? `EX_FPU :
|
is_fpu ? `EX_FPU :
|
||||||
is_gpu ? `EX_GPU :
|
is_gpu ? `EX_GPU :
|
||||||
is_br ? `EX_ALU :
|
is_br ? `EX_BRU :
|
||||||
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
(is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||||
`EX_NOP;
|
`EX_NOP;
|
||||||
|
|
||||||
@@ -312,7 +311,7 @@ module VX_decode #(
|
|||||||
is_mul ? `OP_BITS'(mul_op) :
|
is_mul ? `OP_BITS'(mul_op) :
|
||||||
is_fpu ? `OP_BITS'(fpu_op) :
|
is_fpu ? `OP_BITS'(fpu_op) :
|
||||||
is_gpu ? `OP_BITS'(gpu_op) :
|
is_gpu ? `OP_BITS'(gpu_op) :
|
||||||
is_br ? `OP_BITS'({1'b1, br_op}) :
|
is_br ? `OP_BITS'(br_op) :
|
||||||
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
(is_rtype || is_itype || is_lui || is_auipc) ? `OP_BITS'(alu_op) :
|
||||||
0;
|
0;
|
||||||
|
|
||||||
@@ -347,28 +346,28 @@ module VX_decode #(
|
|||||||
is_csr ? 32'(u_12) :
|
is_csr ? 32'(u_12) :
|
||||||
src2_imm;
|
src2_imm;
|
||||||
|
|
||||||
assign decode_tmp_if.rs1_is_PC = is_auipc;
|
assign decode_tmp_if.rs1_is_PC = is_auipc || is_btype || is_jal || is_jals;
|
||||||
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
|
assign decode_tmp_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm;
|
||||||
|
|
||||||
assign decode_tmp_if.frm = func3;
|
assign decode_tmp_if.frm = func3;
|
||||||
|
|
||||||
assign join_if.is_join = valid_in && is_gpu && (gpu_op == `GPU_JOIN);
|
assign join_if.is_join = valid_in && is_gpu && (gpu_op == `GPU_JOIN);
|
||||||
assign join_if.warp_num = ifetch_rsp_if.warp_num;
|
assign join_if.wid = ifetch_rsp_if.wid;
|
||||||
|
|
||||||
assign wstall_if.wstall = valid_in && (is_btype || is_jal || is_jalr || (is_gpu && (gpu_op == `GPU_TMC || gpu_op == `GPU_SPLIT || gpu_op == `GPU_BAR)));
|
assign wstall_if.wstall = valid_in && (is_btype || is_jal || is_jalr || (is_gpu && (gpu_op == `GPU_TMC || gpu_op == `GPU_SPLIT || gpu_op == `GPU_BAR)));
|
||||||
assign wstall_if.warp_num = ifetch_rsp_if.warp_num;
|
assign wstall_if.wid = ifetch_rsp_if.wid;
|
||||||
|
|
||||||
wire stall = ~decode_if.ready && decode_if.valid;
|
wire stall = ~decode_if.ready && decode_if.valid;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NW_BITS + `NUM_THREADS + 32 + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + `FRM_BITS + `NUM_REGS)
|
.N(1 + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + `NR_BITS + `NR_BITS + 32 + 1 + `EX_BITS + `OP_BITS + 1 + `NR_BITS + 1 + 1 + `FRM_BITS + `NUM_REGS)
|
||||||
) decode_reg (
|
) decode_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({decode_tmp_if.valid, decode_tmp_if.warp_num, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.next_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, decode_tmp_if.reg_use_mask}),
|
.in ({decode_tmp_if.valid, decode_tmp_if.wid, decode_tmp_if.thread_mask, decode_tmp_if.curr_PC, decode_tmp_if.rd, decode_tmp_if.rs1, decode_tmp_if.rs2, decode_tmp_if.imm, decode_tmp_if.rs1_is_PC, decode_tmp_if.rs2_is_imm, decode_tmp_if.ex_type, decode_tmp_if.ex_op, decode_tmp_if.wb, decode_tmp_if.rs3, decode_tmp_if.use_rs3, decode_tmp_if.frm, decode_tmp_if.reg_use_mask}),
|
||||||
.out ({decode_if.valid, decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.next_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.frm, decode_if.reg_use_mask})
|
.out ({decode_if.valid, decode_if.wid, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.rs3, decode_if.use_rs3, decode_if.frm, decode_if.reg_use_mask})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign ifetch_rsp_if.ready = ~stall;
|
assign ifetch_rsp_if.ready = ~stall;
|
||||||
@@ -376,7 +375,7 @@ module VX_decode #(
|
|||||||
`ifdef DBG_PRINT_PIPELINE
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (decode_tmp_if.valid && ~stall) begin
|
if (decode_tmp_if.valid && ~stall) begin
|
||||||
$write("%t: Core%0d-Decode: warp=%0d, PC=%0h, ex=", $time, CORE_ID, decode_tmp_if.warp_num, decode_tmp_if.curr_PC);
|
$write("%t: Core%0d-Decode: wid=%0d, PC=%0h, ex=", $time, CORE_ID, decode_tmp_if.wid, decode_tmp_if.curr_PC);
|
||||||
print_ex_type(decode_tmp_if.ex_type);
|
print_ex_type(decode_tmp_if.ex_type);
|
||||||
$write(", op=");
|
$write(", op=");
|
||||||
print_ex_op(decode_tmp_if.ex_type, decode_tmp_if.ex_op);
|
print_ex_op(decode_tmp_if.ex_type, decode_tmp_if.ex_op);
|
||||||
@@ -386,6 +385,7 @@ module VX_decode #(
|
|||||||
|
|
||||||
// trap unsupported instructions
|
// trap unsupported instructions
|
||||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.ex_op) == `ALU_OTHER));
|
assert(~(~stall && (decode_tmp_if.ex_type == `EX_ALU) && `ALU_OP(decode_tmp_if.ex_op) == `ALU_OTHER));
|
||||||
|
assert(~(~stall && (decode_tmp_if.ex_type == `EX_BRU) && `BRU_OP(decode_tmp_if.ex_op) == `BRU_OTHER));
|
||||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_CSR) && `CSR_OP(decode_tmp_if.ex_op) == `CSR_OTHER));
|
assert(~(~stall && (decode_tmp_if.ex_type == `EX_CSR) && `CSR_OP(decode_tmp_if.ex_op) == `CSR_OTHER));
|
||||||
assert(~(~stall && (decode_tmp_if.ex_type == `EX_GPU) && `GPU_OP(decode_tmp_if.ex_op) == `GPU_OTHER));
|
assert(~(~stall && (decode_tmp_if.ex_type == `EX_GPU) && `GPU_OP(decode_tmp_if.ex_op) == `GPU_OTHER));
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -3,7 +3,6 @@
|
|||||||
|
|
||||||
`include "VX_platform.vh"
|
`include "VX_platform.vh"
|
||||||
`include "VX_config.vh"
|
`include "VX_config.vh"
|
||||||
`include "VX_scope.vh"
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
@@ -38,8 +37,8 @@
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`define LATENCY_IDIV 23
|
`define LATENCY_IDIV 33
|
||||||
`define LATENCY_IMUL 2
|
`define LATENCY_IMUL 3
|
||||||
|
|
||||||
`define LATENCY_FDIV 16
|
`define LATENCY_FDIV 16
|
||||||
`define LATENCY_FSQRT 10
|
`define LATENCY_FSQRT 10
|
||||||
@@ -87,72 +86,70 @@
|
|||||||
`define BYTEEN_BITS 3
|
`define BYTEEN_BITS 3
|
||||||
`define BYTEEN_TYPE(x) x[1:0]
|
`define BYTEEN_TYPE(x) x[1:0]
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
`define FRM_RNE 3'b000 // round to nearest even
|
||||||
|
`define FRM_RTZ 3'b001 // round to zero
|
||||||
`define BR_EQ 4'h0
|
`define FRM_RDN 3'b010 // round to -inf
|
||||||
`define BR_NE 4'h1
|
`define FRM_RUP 3'b011 // round to +inf
|
||||||
`define BR_LT 4'h2
|
`define FRM_RMM 3'b100 // round to nearest max magnitude
|
||||||
`define BR_GE 4'h3
|
`define FRM_DYN 3'b111 // dynamic mode
|
||||||
`define BR_LTU 4'h4
|
`define FRM_BITS 3
|
||||||
`define BR_GEU 4'h5
|
|
||||||
`define BR_JAL 4'h6
|
|
||||||
`define BR_JALR 4'h7
|
|
||||||
`define BR_ECALL 4'h8
|
|
||||||
`define BR_EBREAK 4'h9
|
|
||||||
`define BR_MRET 4'hA
|
|
||||||
`define BR_SRET 4'hB
|
|
||||||
`define BR_DRET 4'hC
|
|
||||||
`define BR_NO 4'hF
|
|
||||||
`define BR_BITS 4
|
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`define EX_NOP 3'h0
|
`define EX_NOP 3'h0
|
||||||
`define EX_ALU 3'h1
|
`define EX_ALU 3'h1
|
||||||
`define EX_LSU 3'h2
|
`define EX_BRU 3'h2
|
||||||
`define EX_CSR 3'h3
|
`define EX_LSU 3'h3
|
||||||
`define EX_MUL 3'h4
|
`define EX_CSR 3'h4
|
||||||
`define EX_FPU 3'h5
|
`define EX_MUL 3'h5
|
||||||
`define EX_GPU 3'h6
|
`define EX_FPU 3'h6
|
||||||
|
`define EX_GPU 3'h7
|
||||||
`define EX_BITS 3
|
`define EX_BITS 3
|
||||||
|
|
||||||
`define NUM_EXS 6
|
`define NUM_EXS 7
|
||||||
`define NE_BITS `LOG2UP(`NUM_EXS)
|
`define NE_BITS `LOG2UP(`NUM_EXS)
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`define OP_BITS 5
|
`define OP_BITS 5
|
||||||
|
|
||||||
`define ALU_ADD 5'h00
|
`define ALU_ADD 4'b0000
|
||||||
`define ALU_SUB 5'h01
|
`define ALU_SUB 4'b0001
|
||||||
`define ALU_SLL 5'h02
|
`define ALU_LUI 4'b0010
|
||||||
`define ALU_SRL 5'h03
|
`define ALU_AUIPC 4'b0011
|
||||||
`define ALU_SRA 5'h04
|
`define ALU_SLT 4'b0100
|
||||||
`define ALU_SLT 5'h05
|
`define ALU_SLTU 4'b0101
|
||||||
`define ALU_SLTU 5'h06
|
`define ALU_SRL 4'b1000
|
||||||
`define ALU_XOR 5'h07
|
`define ALU_SRA 4'b1001
|
||||||
`define ALU_OR 5'h08
|
`define ALU_AND 4'b1100
|
||||||
`define ALU_AND 5'h09
|
`define ALU_OR 4'b1101
|
||||||
`define ALU_LUI 5'h0A
|
`define ALU_XOR 4'b1110
|
||||||
`define ALU_AUIPC 5'h0B
|
`define ALU_SLL 4'b1111
|
||||||
`define ALU_BEQ {1'b1, `BR_EQ}
|
`define ALU_OTHER 4'b0111
|
||||||
`define ALU_BNE {1'b1, `BR_NE}
|
`define ALU_BITS 4
|
||||||
`define ALU_BLT {1'b1, `BR_LT}
|
|
||||||
`define ALU_BGE {1'b1, `BR_GE}
|
|
||||||
`define ALU_BLTU {1'b1, `BR_LTU}
|
|
||||||
`define ALU_BGEU {1'b1, `BR_GEU}
|
|
||||||
`define ALU_JAL {1'b1, `BR_JAL}
|
|
||||||
`define ALU_JALR {1'b1, `BR_JALR}
|
|
||||||
`define ALU_ECALL {1'b1, `BR_ECALL}
|
|
||||||
`define ALU_EBREAK {1'b1, `BR_EBREAK}
|
|
||||||
`define ALU_MRET {1'b1, `BR_MRET}
|
|
||||||
`define ALU_SRET {1'b1, `BR_SRET}
|
|
||||||
`define ALU_DRET {1'b1, `BR_DRET}
|
|
||||||
`define ALU_OTHER 5'h1F
|
|
||||||
`define ALU_BITS 5
|
|
||||||
`define ALU_OP(x) x[`ALU_BITS-1:0]
|
`define ALU_OP(x) x[`ALU_BITS-1:0]
|
||||||
`define BR_OP(x) x[`BR_BITS-1:0]
|
`define ALU_OP_CLASS(x) x[3:2]
|
||||||
`define IS_BR_OP(x) x[4]
|
|
||||||
|
`define BRU_EQ 4'b0000
|
||||||
|
`define BRU_NE 4'b0001
|
||||||
|
`define BRU_LTU 4'b0010
|
||||||
|
`define BRU_GEU 4'b0011
|
||||||
|
`define BRU_LT 4'b0110
|
||||||
|
`define BRU_GE 4'b0111
|
||||||
|
`define BRU_JAL 4'b1000
|
||||||
|
`define BRU_JALR 4'b1001
|
||||||
|
`define BRU_ECALL 4'b1010
|
||||||
|
`define BRU_EBREAK 4'b1011
|
||||||
|
`define BRU_MRET 4'b1100
|
||||||
|
`define BRU_SRET 4'b1101
|
||||||
|
`define BRU_DRET 4'b1110
|
||||||
|
`define BRU_OTHER 4'b1111
|
||||||
|
`define BRU_BITS 4
|
||||||
|
`define BRU_OP(x) x[`BRU_BITS-1:0]
|
||||||
|
`define BRU_NEG(x) x[0]
|
||||||
|
`define BRU_LESS(x) x[1]
|
||||||
|
`define BRU_SIGNED(x) x[2]
|
||||||
|
`define BRU_STATIC(x) x[3]
|
||||||
|
|
||||||
`define LSU_LB {1'b0, `BYTEEN_SB}
|
`define LSU_LB {1'b0, `BYTEEN_SB}
|
||||||
`define LSU_LH {1'b0, `BYTEEN_SH}
|
`define LSU_LH {1'b0, `BYTEEN_SH}
|
||||||
@@ -213,14 +210,6 @@
|
|||||||
`define FPU_BITS 5
|
`define FPU_BITS 5
|
||||||
`define FPU_OP(x) x[`FPU_BITS-1:0]
|
`define FPU_OP(x) x[`FPU_BITS-1:0]
|
||||||
|
|
||||||
`define FRM_RNE 3'b000 // round to nearest even
|
|
||||||
`define FRM_RTZ 3'b001 // round to zero
|
|
||||||
`define FRM_RDN 3'b010 // round to -inf
|
|
||||||
`define FRM_RUP 3'b011 // round to +inf
|
|
||||||
`define FRM_RMM 3'b100 // round to nearest max magnitude
|
|
||||||
`define FRM_DYN 3'b111 // dynamic mode
|
|
||||||
`define FRM_BITS 3
|
|
||||||
|
|
||||||
`define GPU_TMC 3'h0
|
`define GPU_TMC 3'h0
|
||||||
`define GPU_WSPAWN 3'h1
|
`define GPU_WSPAWN 3'h1
|
||||||
`define GPU_SPLIT 3'h2
|
`define GPU_SPLIT 3'h2
|
||||||
@@ -273,7 +262,7 @@
|
|||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, warp_num
|
`ifdef DBG_CORE_REQ_INFO // pc, wb, rd, wid
|
||||||
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 1 + `NR_BITS + `NW_BITS)
|
`define DEBUG_CORE_REQ_MDATA_WIDTH (32 + 1 + `NR_BITS + `NW_BITS)
|
||||||
`else
|
`else
|
||||||
`define DEBUG_CORE_REQ_MDATA_WIDTH 0
|
`define DEBUG_CORE_REQ_MDATA_WIDTH 0
|
||||||
@@ -421,34 +410,6 @@
|
|||||||
|
|
||||||
`define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)}
|
`define DRAM_TO_BYTE_ADDR(x) {x, (32-$bits(x))'(0)}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
`include "VX_types.vh"
|
||||||
|
|
||||||
typedef struct packed {
|
|
||||||
logic [`NW_BITS-1:0] warp_num;
|
|
||||||
logic [`NUM_THREADS-1:0] thread_mask;
|
|
||||||
logic [31:0] curr_PC;
|
|
||||||
logic [`NR_BITS-1:0] rd;
|
|
||||||
logic wb;
|
|
||||||
} issue_data_t;
|
|
||||||
|
|
||||||
typedef struct packed {
|
|
||||||
logic is_normal;
|
|
||||||
logic is_zero;
|
|
||||||
logic is_subnormal;
|
|
||||||
logic is_inf;
|
|
||||||
logic is_nan;
|
|
||||||
logic is_signaling;
|
|
||||||
logic is_quiet;
|
|
||||||
} fp_type_t;
|
|
||||||
|
|
||||||
typedef struct packed {
|
|
||||||
logic NV; // Invalid
|
|
||||||
logic DZ; // Divide by zero
|
|
||||||
logic OF; // Overflow
|
|
||||||
logic UF; // Underflow
|
|
||||||
logic NX; // Inexact
|
|
||||||
} fflags_t;
|
|
||||||
|
|
||||||
`define FFG_BITS $bits(fflags_t)
|
|
||||||
|
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ module VX_execute #(
|
|||||||
|
|
||||||
// inputs
|
// inputs
|
||||||
VX_alu_req_if alu_req_if,
|
VX_alu_req_if alu_req_if,
|
||||||
|
VX_bru_req_if bru_req_if,
|
||||||
VX_lsu_req_if lsu_req_if,
|
VX_lsu_req_if lsu_req_if,
|
||||||
VX_csr_req_if csr_req_if,
|
VX_csr_req_if csr_req_if,
|
||||||
VX_mul_req_if mul_req_if,
|
VX_mul_req_if mul_req_if,
|
||||||
@@ -32,6 +33,7 @@ module VX_execute #(
|
|||||||
VX_branch_ctl_if branch_ctl_if,
|
VX_branch_ctl_if branch_ctl_if,
|
||||||
VX_warp_ctl_if warp_ctl_if,
|
VX_warp_ctl_if warp_ctl_if,
|
||||||
VX_exu_to_cmt_if alu_commit_if,
|
VX_exu_to_cmt_if alu_commit_if,
|
||||||
|
VX_exu_to_cmt_if bru_commit_if,
|
||||||
VX_exu_to_cmt_if lsu_commit_if,
|
VX_exu_to_cmt_if lsu_commit_if,
|
||||||
VX_exu_to_cmt_if csr_commit_if,
|
VX_exu_to_cmt_if csr_commit_if,
|
||||||
VX_exu_to_cmt_if mul_commit_if,
|
VX_exu_to_cmt_if mul_commit_if,
|
||||||
@@ -49,10 +51,19 @@ module VX_execute #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.alu_req_if (alu_req_if),
|
.alu_req_if (alu_req_if),
|
||||||
.branch_ctl_if (branch_ctl_if),
|
|
||||||
.alu_commit_if (alu_commit_if)
|
.alu_commit_if (alu_commit_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
VX_bru_unit #(
|
||||||
|
.CORE_ID(CORE_ID)
|
||||||
|
) bru_unit (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.bru_req_if (bru_req_if),
|
||||||
|
.branch_ctl_if (branch_ctl_if),
|
||||||
|
.bru_commit_if (bru_commit_if)
|
||||||
|
);
|
||||||
|
|
||||||
VX_lsu_unit #(
|
VX_lsu_unit #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
) lsu_unit (
|
) lsu_unit (
|
||||||
@@ -116,29 +127,33 @@ module VX_execute #(
|
|||||||
VX_gpu_unit #(
|
VX_gpu_unit #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
) gpu_unit (
|
) gpu_unit (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
.gpu_req_if (gpu_req_if),
|
.gpu_req_if (gpu_req_if),
|
||||||
.warp_ctl_if (warp_ctl_if),
|
.warp_ctl_if (warp_ctl_if),
|
||||||
.gpu_commit_if (gpu_commit_if)
|
.gpu_commit_if (gpu_commit_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign ebreak = alu_req_if.valid && (alu_req_if.alu_op == `ALU_EBREAK || alu_req_if.alu_op == `ALU_ECALL);
|
assign ebreak = bru_req_if.valid
|
||||||
|
&& (bru_req_if.op == `BRU_EBREAK
|
||||||
|
|| bru_req_if.op == `BRU_ECALL);
|
||||||
|
|
||||||
`SCOPE_ASSIGN (scope_decode_valid, decode_if.valid);
|
`SCOPE_ASSIGN (scope_decode_valid, decode_if.valid);
|
||||||
`SCOPE_ASSIGN (scope_decode_warp_num, decode_if.warp_num);
|
`SCOPE_ASSIGN (scope_decode_wid, decode_if.wid);
|
||||||
`SCOPE_ASSIGN (scope_decode_curr_PC, decode_if.curr_PC);
|
`SCOPE_ASSIGN (scope_decode_curr_PC, decode_if.curr_PC);
|
||||||
`SCOPE_ASSIGN (scope_decode_is_jal, decode_if.is_jal);
|
`SCOPE_ASSIGN (scope_decode_is_jal, decode_if.is_jal);
|
||||||
`SCOPE_ASSIGN (scope_decode_rs1, decode_if.rs1);
|
`SCOPE_ASSIGN (scope_decode_rs1, decode_if.rs1);
|
||||||
`SCOPE_ASSIGN (scope_decode_rs2, decode_if.rs2);
|
`SCOPE_ASSIGN (scope_decode_rs2, decode_if.rs2);
|
||||||
|
|
||||||
`SCOPE_ASSIGN (scope_execute_valid, alu_req_if.valid);
|
`SCOPE_ASSIGN (scope_execute_valid, alu_req_if.valid);
|
||||||
`SCOPE_ASSIGN (scope_execute_warp_num, alu_req_if.warp_num);
|
`SCOPE_ASSIGN (scope_execute_wid, alu_req_if.wid);
|
||||||
`SCOPE_ASSIGN (scope_execute_curr_PC, alu_req_if.curr_PC);
|
`SCOPE_ASSIGN (scope_execute_curr_PC, alu_req_if.curr_PC);
|
||||||
`SCOPE_ASSIGN (scope_execute_rd, alu_req_if.rd);
|
`SCOPE_ASSIGN (scope_execute_rd, alu_req_if.rd);
|
||||||
`SCOPE_ASSIGN (scope_execute_a, alu_req_if.rs1_data);
|
`SCOPE_ASSIGN (scope_execute_a, alu_req_if.rs1_data);
|
||||||
`SCOPE_ASSIGN (scope_execute_b, alu_req_if.rs2_data);
|
`SCOPE_ASSIGN (scope_execute_b, alu_req_if.rs2_data);
|
||||||
|
|
||||||
`SCOPE_ASSIGN (scope_writeback_valid, writeback_if.valid);
|
`SCOPE_ASSIGN (scope_writeback_valid, writeback_if.valid);
|
||||||
`SCOPE_ASSIGN (scope_writeback_warp_num, writeback_if.warp_num);
|
`SCOPE_ASSIGN (scope_writeback_wid, writeback_if.wid);
|
||||||
`SCOPE_ASSIGN (scope_writeback_curr_PC, writeback_if.curr_PC);
|
`SCOPE_ASSIGN (scope_writeback_curr_PC, writeback_if.curr_PC);
|
||||||
`SCOPE_ASSIGN (scope_writeback_wb, writeback_if.wb);
|
`SCOPE_ASSIGN (scope_writeback_wb, writeback_if.wb);
|
||||||
`SCOPE_ASSIGN (scope_writeback_rd, writeback_if.rd);
|
`SCOPE_ASSIGN (scope_writeback_rd, writeback_if.rd);
|
||||||
|
|||||||
@@ -14,9 +14,27 @@ module VX_fpu_unit #(
|
|||||||
// outputs
|
// outputs
|
||||||
VX_fpu_to_cmt_if fpu_commit_if
|
VX_fpu_to_cmt_if fpu_commit_if
|
||||||
);
|
);
|
||||||
|
VX_fpu_req_if fpu_req_tmp_if();
|
||||||
assign csr_to_fpu_if.warp_num = fpu_req_if.warp_num;
|
|
||||||
wire [`FRM_BITS-1:0] frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm;
|
// resolve dynamic FRM
|
||||||
|
wire [`FRM_BITS-1:0] frm, frm_tmp;
|
||||||
|
assign csr_to_fpu_if.wid = fpu_req_if.wid;
|
||||||
|
assign frm = (fpu_req_if.frm == `FRM_DYN) ? csr_to_fpu_if.frm : fpu_req_if.frm;
|
||||||
|
|
||||||
|
// use a skid buffer since fpcore has realtime backpressure
|
||||||
|
VX_elastic_buffer #(
|
||||||
|
.DATAW (`ISTAG_BITS + `NW_BITS + 32 + `FPU_BITS + `FRM_BITS + (3 * `NUM_THREADS * 32)),
|
||||||
|
.SIZE (0)
|
||||||
|
) input_buffer (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.valid_in (fpu_req_if.valid),
|
||||||
|
.ready_in (fpu_req_if.ready),
|
||||||
|
.data_in ({fpu_req_if.issue_tag, fpu_req_if.wid, fpu_req_if.curr_PC, fpu_req_if.op, frm, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data}),
|
||||||
|
.data_out ({fpu_req_tmp_if.issue_tag, fpu_req_tmp_if.wid, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.op, frm_tmp, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data}),
|
||||||
|
.ready_out (fpu_req_tmp_if.ready),
|
||||||
|
.valid_out (fpu_req_tmp_if.valid)
|
||||||
|
);
|
||||||
|
|
||||||
`ifdef SYNTHESIS
|
`ifdef SYNTHESIS
|
||||||
|
|
||||||
@@ -24,17 +42,17 @@ module VX_fpu_unit #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
||||||
.valid_in (fpu_req_if.valid),
|
.valid_in (fpu_req_tmp_if.valid),
|
||||||
.ready_in (fpu_req_if.ready),
|
.ready_in (fpu_req_tmp_if.ready),
|
||||||
|
|
||||||
.tag_in (fpu_req_if.issue_tag),
|
.tag_in (fpu_req_tmp_if.issue_tag),
|
||||||
|
|
||||||
.op (fpu_req_if.fpu_op),
|
.op (fpu_req_tmp_if.op),
|
||||||
.frm (frm),
|
.frm (frm_tmp),
|
||||||
|
|
||||||
.dataa (fpu_req_if.rs1_data),
|
.dataa (fpu_req_tmp_if.rs1_data),
|
||||||
.datab (fpu_req_if.rs2_data),
|
.datab (fpu_req_tmp_if.rs2_data),
|
||||||
.datac (fpu_req_if.rs3_data),
|
.datac (fpu_req_tmp_if.rs3_data),
|
||||||
.result (fpu_commit_if.data),
|
.result (fpu_commit_if.data),
|
||||||
|
|
||||||
.has_fflags (fpu_commit_if.has_fflags),
|
.has_fflags (fpu_commit_if.has_fflags),
|
||||||
@@ -42,7 +60,7 @@ module VX_fpu_unit #(
|
|||||||
|
|
||||||
.tag_out (fpu_commit_if.issue_tag),
|
.tag_out (fpu_commit_if.issue_tag),
|
||||||
|
|
||||||
.ready_out (fpu_commit_if.ready),
|
.ready_out (1'b1),
|
||||||
.valid_out (fpu_commit_if.valid)
|
.valid_out (fpu_commit_if.valid)
|
||||||
);
|
);
|
||||||
|
|
||||||
@@ -57,17 +75,17 @@ module VX_fpu_unit #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
||||||
.valid_in (fpu_req_if.valid),
|
.valid_in (fpu_req_tmp_if.valid),
|
||||||
.ready_in (fpu_req_if.ready),
|
.ready_in (fpu_req_tmp_if.ready),
|
||||||
|
|
||||||
.tag_in (fpu_req_if.issue_tag),
|
.tag_in (fpu_req_tmp_if.issue_tag),
|
||||||
|
|
||||||
.op (fpu_req_if.fpu_op),
|
.op (fpu_req_tmp_if.op),
|
||||||
.frm (frm),
|
.frm (frm_tmp),
|
||||||
|
|
||||||
.dataa (fpu_req_if.rs1_data),
|
.dataa (fpu_req_tmp_if.rs1_data),
|
||||||
.datab (fpu_req_if.rs2_data),
|
.datab (fpu_req_tmp_if.rs2_data),
|
||||||
.datac (fpu_req_if.rs3_data),
|
.datac (fpu_req_tmp_if.rs3_data),
|
||||||
.result (fpu_commit_if.data),
|
.result (fpu_commit_if.data),
|
||||||
|
|
||||||
.has_fflags (fpu_commit_if.has_fflags),
|
.has_fflags (fpu_commit_if.has_fflags),
|
||||||
@@ -75,7 +93,7 @@ module VX_fpu_unit #(
|
|||||||
|
|
||||||
.tag_out (fpu_commit_if.issue_tag),
|
.tag_out (fpu_commit_if.issue_tag),
|
||||||
|
|
||||||
.ready_out (fpu_commit_if.ready),
|
.ready_out (1'b1),
|
||||||
.valid_out (fpu_commit_if.valid)
|
.valid_out (fpu_commit_if.valid)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|||||||
@@ -10,40 +10,43 @@ module VX_gpr_fp_ctrl (
|
|||||||
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
|
input wire [`NUM_THREADS-1:0][31:0] rs2_data,
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
output wire [`NW_BITS+`NR_BITS-1:0] raddr1,
|
output wire [`NW_BITS+`NR_BITS-1:0] raddr1,
|
||||||
|
|
||||||
VX_gpr_read_if gpr_read_if
|
VX_gpr_read_if gpr_read_if
|
||||||
);
|
);
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0][31:0] tmp_rs1_data;
|
reg [`NUM_THREADS-1:0][31:0] rs1_tmp_data, rs2_tmp_data, rs3_tmp_data;
|
||||||
reg read_rs3;
|
reg read_rs3;
|
||||||
|
|
||||||
wire rs3_delay = gpr_read_if.valid && gpr_read_if.use_rs3 && ~read_rs3;
|
wire rs3_delay = gpr_read_if.valid && gpr_read_if.use_rs3 && ~read_rs3;
|
||||||
|
|
||||||
wire read_fire = gpr_read_if.valid && read_rs3;
|
wire read_fire = gpr_read_if.valid && read_rs3;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
read_rs3 <= 0;
|
read_rs3 <= 0;
|
||||||
end else if (rs3_delay) begin
|
end else begin
|
||||||
read_rs3 <= 1;
|
if (rs3_delay) begin
|
||||||
end else if (read_fire) begin
|
read_rs3 <= 1;
|
||||||
read_rs3 <= 0;
|
end else if (read_fire) begin
|
||||||
end
|
read_rs3 <= 0;
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
// backup original rs1 data
|
// backup original rs1 data
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (rs3_delay) begin
|
if (~gpr_read_if.use_rs3 || rs3_delay) begin
|
||||||
tmp_rs1_data <= rs1_data;
|
rs1_tmp_data <= rs1_data;
|
||||||
end
|
end
|
||||||
|
rs2_tmp_data <= rs2_data;
|
||||||
|
rs3_tmp_data <= rs1_data;
|
||||||
end
|
end
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
assign raddr1 = {gpr_read_if.warp_num, (read_rs3 ? gpr_read_if.rs3 : gpr_read_if.rs1)};
|
wire [`NR_BITS-1:0] rs1 = read_rs3 ? gpr_read_if.rs3 : gpr_read_if.rs1;
|
||||||
|
assign raddr1 = {gpr_read_if.wid, rs1};
|
||||||
assign gpr_read_if.ready = ~rs3_delay;
|
assign gpr_read_if.ready = ~rs3_delay;
|
||||||
assign gpr_read_if.rs1_data = gpr_read_if.use_rs3 ? tmp_rs1_data : rs1_data;
|
assign gpr_read_if.rs1_data = rs1_tmp_data;
|
||||||
assign gpr_read_if.rs2_data = rs2_data;
|
assign gpr_read_if.rs2_data = rs2_tmp_data;
|
||||||
assign gpr_read_if.rs3_data = rs1_data;
|
assign gpr_read_if.rs3_data = rs3_tmp_data;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -20,12 +20,7 @@ module VX_gpr_ram (
|
|||||||
for (integer i = 0; i < `NUM_REGS; i++) begin
|
for (integer i = 0; i < `NUM_REGS; i++) begin
|
||||||
if (i == 0) begin
|
if (i == 0) begin
|
||||||
ram[j * `NUM_REGS + i] = {`NUM_THREADS{32'h00000000}}; // set r0 = 0
|
ram[j * `NUM_REGS + i] = {`NUM_THREADS{32'h00000000}}; // set r0 = 0
|
||||||
end
|
|
||||||
`ifndef SYNTHESIS
|
|
||||||
else begin
|
|
||||||
ram[j * `NUM_REGS + i] = {`NUM_THREADS{32'hdeadbeef}};
|
|
||||||
end
|
end
|
||||||
`endif
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -48,8 +43,7 @@ module VX_gpr_ram (
|
|||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] write_bit_mask;
|
wire [`NUM_THREADS-1:0][31:0] write_bit_mask;
|
||||||
|
|
||||||
integer i;
|
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
assign write_bit_mask[i] = {32{~we[i]}};
|
assign write_bit_mask[i] = {32{~we[i]}};
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -61,9 +55,8 @@ module VX_gpr_ram (
|
|||||||
wire [`NUM_THREADS-1:0][31:0] tmp_b;
|
wire [`NUM_THREADS-1:0][31:0] tmp_b;
|
||||||
|
|
||||||
`ifndef SYNTHESIS
|
`ifndef SYNTHESIS
|
||||||
integer j;
|
for (integer i = 0; i < `NUM_THREADS; i++) begin
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (integer j = 0; j < 32; j++) begin
|
||||||
for (j = 0; j < 32; j++) begin
|
|
||||||
assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
|
assign rs1_data[i][j] = ((tmp_a[i][j] === 1'dx) || cena_1) ? 1'b0 : tmp_a[i][j];
|
||||||
assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
|
assign rs2_data[i][j] = ((tmp_b[i][j] === 1'dx) || cena_2) ? 1'b0 : tmp_b[i][j];
|
||||||
end
|
end
|
||||||
@@ -72,7 +65,7 @@ module VX_gpr_ram (
|
|||||||
assign rs1_data = tmp_a;
|
assign rs1_data = tmp_a;
|
||||||
assign rs2_data = tmp_b;
|
assign rs2_data = tmp_b;
|
||||||
`endif
|
`endif
|
||||||
for (i = 0; i < 'NT; i=i+4) begin
|
for (integer i = 0; i < 'NT; i=i+4) begin
|
||||||
`IGNORE_WARNINGS_BEGIN
|
`IGNORE_WARNINGS_BEGIN
|
||||||
rf2_32x128_wm1 first_ram (
|
rf2_32x128_wm1 first_ram (
|
||||||
.CENYA(),
|
.CENYA(),
|
||||||
|
|||||||
@@ -21,10 +21,10 @@ module VX_gpr_stage #(
|
|||||||
VX_gpr_ram gpr_ram (
|
VX_gpr_ram gpr_ram (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.we ({`NUM_THREADS{writeback_if.valid}} & writeback_if.thread_mask),
|
.we ({`NUM_THREADS{writeback_if.valid}} & writeback_if.thread_mask),
|
||||||
.waddr ({writeback_if.warp_num, writeback_if.rd}),
|
.waddr ({writeback_if.wid, writeback_if.rd}),
|
||||||
.wdata (writeback_if.data),
|
.wdata (writeback_if.data),
|
||||||
.rs1 (raddr1),
|
.rs1 (raddr1),
|
||||||
.rs2 ({gpr_read_if.warp_num, gpr_read_if.rs2}),
|
.rs2 ({gpr_read_if.wid, gpr_read_if.rs2}),
|
||||||
.rs1_data (rs1_data),
|
.rs1_data (rs1_data),
|
||||||
.rs2_data (rs2_data)
|
.rs2_data (rs2_data)
|
||||||
);
|
);
|
||||||
@@ -39,9 +39,16 @@ module VX_gpr_stage #(
|
|||||||
.gpr_read_if(gpr_read_if)
|
.gpr_read_if(gpr_read_if)
|
||||||
);
|
);
|
||||||
`else
|
`else
|
||||||
assign raddr1 = {gpr_read_if.warp_num, gpr_read_if.rs1};
|
reg [`NUM_THREADS-1:0][31:0] rs1_tmp_data, rs2_tmp_data;
|
||||||
assign gpr_read_if.rs1_data = rs1_data;
|
|
||||||
assign gpr_read_if.rs2_data = rs2_data;
|
always @(posedge clk) begin
|
||||||
|
rs1_tmp_data <= rs1_data;
|
||||||
|
rs2_tmp_data <= rs2_data;
|
||||||
|
end
|
||||||
|
|
||||||
|
assign raddr1 = {gpr_read_if.wid, gpr_read_if.rs1};
|
||||||
|
assign gpr_read_if.rs1_data = rs1_tmp_data;
|
||||||
|
assign gpr_read_if.rs2_data = rs2_tmp_data;
|
||||||
assign gpr_read_if.rs3_data = 0;
|
assign gpr_read_if.rs3_data = 0;
|
||||||
assign gpr_read_if.ready = 1;
|
assign gpr_read_if.ready = 1;
|
||||||
|
|
||||||
@@ -53,6 +60,4 @@ module VX_gpr_stage #(
|
|||||||
`UNUSED_VAR (rs3);
|
`UNUSED_VAR (rs3);
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
assign writeback_if.ready = 1'b1; // writes are stall-free
|
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
|
|||||||
@@ -3,6 +3,9 @@
|
|||||||
module VX_gpu_unit #(
|
module VX_gpu_unit #(
|
||||||
parameter CORE_ID = 0
|
parameter CORE_ID = 0
|
||||||
) (
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
|
|
||||||
// Input
|
// Input
|
||||||
VX_gpu_req_if gpu_req_if,
|
VX_gpu_req_if gpu_req_if,
|
||||||
|
|
||||||
@@ -10,74 +13,74 @@ module VX_gpu_unit #(
|
|||||||
VX_warp_ctl_if warp_ctl_if,
|
VX_warp_ctl_if warp_ctl_if,
|
||||||
VX_exu_to_cmt_if gpu_commit_if
|
VX_exu_to_cmt_if gpu_commit_if
|
||||||
);
|
);
|
||||||
|
gpu_tmc_t tmc;
|
||||||
|
gpu_wspawn_t wspawn;
|
||||||
|
gpu_barrier_t barrier;
|
||||||
|
gpu_split_t split;
|
||||||
|
|
||||||
wire is_wspawn = (gpu_req_if.gpu_op == `GPU_WSPAWN);
|
wire is_wspawn = (gpu_req_if.op == `GPU_WSPAWN);
|
||||||
wire is_tmc = (gpu_req_if.gpu_op == `GPU_TMC);
|
wire is_tmc = (gpu_req_if.op == `GPU_TMC);
|
||||||
wire is_split = (gpu_req_if.gpu_op == `GPU_SPLIT);
|
wire is_split = (gpu_req_if.op == `GPU_SPLIT);
|
||||||
wire is_bar = (gpu_req_if.gpu_op == `GPU_BAR);
|
wire is_bar = (gpu_req_if.op == `GPU_BAR);
|
||||||
|
|
||||||
wire gpu_req_fire = gpu_req_if.valid && gpu_commit_if.ready;
|
wire gpu_req_fire = gpu_req_if.valid;
|
||||||
|
|
||||||
assign warp_ctl_if.warp_num = gpu_req_if.warp_num;
|
|
||||||
|
|
||||||
// tmc
|
// tmc
|
||||||
|
|
||||||
genvar i;
|
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
wire [`NUM_THREADS-1:0] tmc_new_mask;
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
assign tmc_new_mask[i] = (i < gpu_req_if.rs1_data[0]);
|
assign tmc_new_mask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||||
end
|
end
|
||||||
assign warp_ctl_if.change_mask = is_tmc && gpu_req_fire;
|
assign tmc.valid = gpu_req_fire && is_tmc;
|
||||||
assign warp_ctl_if.thread_mask = tmc_new_mask;
|
assign tmc.thread_mask = tmc_new_mask;
|
||||||
|
|
||||||
// barrier
|
|
||||||
|
|
||||||
assign warp_ctl_if.is_barrier = is_bar && gpu_req_fire;
|
|
||||||
assign warp_ctl_if.barrier_id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
|
||||||
assign warp_ctl_if.barrier_num_warps = (`NW_BITS+1)'(gpu_req_if.rs2_data - 1);
|
|
||||||
|
|
||||||
// wspawn
|
// wspawn
|
||||||
|
|
||||||
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
|
wire [31:0] wspawn_pc = gpu_req_if.rs2_data;
|
||||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
||||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||||
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]);
|
assign wspawn_wmask[i] = (i < gpu_req_if.rs1_data[0]);
|
||||||
end
|
end
|
||||||
assign warp_ctl_if.wspawn = is_wspawn && gpu_req_fire;
|
assign wspawn.valid = gpu_req_fire && is_wspawn;
|
||||||
assign warp_ctl_if.wspawn_pc = wspawn_pc;
|
assign wspawn.wmask = wspawn_wmask;
|
||||||
assign warp_ctl_if.wspawn_wmask = wspawn_wmask;
|
assign wspawn.pc = wspawn_pc;
|
||||||
|
|
||||||
// split
|
// split
|
||||||
|
|
||||||
wire[`NUM_THREADS-1:0] split_new_use_mask;
|
wire [`NUM_THREADS-1:0] split_then_mask;
|
||||||
wire[`NUM_THREADS-1:0] split_new_later_mask;
|
wire [`NUM_THREADS-1:0] split_else_mask;
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
wire curr_bool = (gpu_req_if.rs1_data[i] == 32'b1);
|
wire taken = gpu_req_if.rs1_data[i][0];
|
||||||
assign split_new_use_mask[i] = gpu_req_if.thread_mask[i] & (curr_bool);
|
assign split_then_mask[i] = gpu_req_if.thread_mask[i] & taken;
|
||||||
assign split_new_later_mask[i] = gpu_req_if.thread_mask[i] & (!curr_bool);
|
assign split_else_mask[i] = gpu_req_if.thread_mask[i] & ~taken;
|
||||||
end
|
end
|
||||||
|
|
||||||
wire [`NT_BITS:0] num_valids;
|
assign split.valid = gpu_req_fire && is_split;
|
||||||
|
assign split.diverged = (| split_then_mask) && (| split_else_mask);
|
||||||
|
assign split.then_mask = split_then_mask;
|
||||||
|
assign split.else_mask = split_else_mask;
|
||||||
|
assign split.pc = gpu_req_if.curr_PC + 4;
|
||||||
|
|
||||||
VX_countones #(
|
// barrier
|
||||||
.N(`NUM_THREADS)
|
|
||||||
) valids_counter (
|
|
||||||
.valids(gpu_req_if.thread_mask),
|
|
||||||
.count (num_valids)
|
|
||||||
);
|
|
||||||
|
|
||||||
assign warp_ctl_if.is_split = is_split && (num_valids > 1) && gpu_req_fire;
|
assign barrier.valid = is_bar && gpu_req_fire;
|
||||||
assign warp_ctl_if.do_split = (split_new_use_mask != 0) && (split_new_use_mask != {`NUM_THREADS{1'b1}});
|
assign barrier.id = gpu_req_if.rs1_data[0][`NB_BITS-1:0];
|
||||||
assign warp_ctl_if.split_new_mask = split_new_use_mask;
|
assign barrier.num_warps = (`NW_BITS+1)'(gpu_req_if.rs2_data - 1);
|
||||||
assign warp_ctl_if.split_later_mask = split_new_later_mask;
|
|
||||||
assign warp_ctl_if.split_save_pc = gpu_req_if.next_PC;
|
|
||||||
|
|
||||||
// commit
|
// output
|
||||||
assign gpu_commit_if.valid = gpu_req_if.valid;
|
|
||||||
assign gpu_commit_if.issue_tag = gpu_req_if.issue_tag;
|
VX_generic_register #(
|
||||||
assign gpu_commit_if.data = 0;
|
.N(1 + `ISTAG_BITS + `NW_BITS + $bits(gpu_tmc_t) + $bits(gpu_wspawn_t) + $bits(gpu_split_t) + $bits(gpu_barrier_t))
|
||||||
assign gpu_req_if.ready = gpu_commit_if.ready;
|
) gpu_reg (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.stall (0),
|
||||||
|
.flush (0),
|
||||||
|
.in ({gpu_req_if.valid, gpu_req_if.issue_tag, gpu_req_if.wid, tmc, wspawn, split, barrier}),
|
||||||
|
.out ({gpu_commit_if.valid, gpu_commit_if.issue_tag, warp_ctl_if.wid, warp_ctl_if.tmc, warp_ctl_if.wspawn, warp_ctl_if.split, warp_ctl_if.barrier})
|
||||||
|
);
|
||||||
|
|
||||||
|
assign gpu_req_if.ready = 1'b1;
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -25,7 +25,7 @@ module VX_icache_stage #(
|
|||||||
|
|
||||||
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
|
wire icache_req_fire = icache_req_if.valid && icache_req_if.ready;
|
||||||
|
|
||||||
wire [`NW_BITS-1:0] req_tag = ifetch_req_if.warp_num;
|
wire [`NW_BITS-1:0] req_tag = ifetch_req_if.wid;
|
||||||
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0];
|
wire [`NW_BITS-1:0] rsp_tag = icache_rsp_if.tag[0][`NW_BITS-1:0];
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
@@ -46,13 +46,13 @@ module VX_icache_stage #(
|
|||||||
assign ifetch_req_if.ready = icache_req_if.ready;
|
assign ifetch_req_if.ready = icache_req_if.ready;
|
||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, `NR_BITS'(0), ifetch_req_if.warp_num, req_tag};
|
assign icache_req_if.tag = {ifetch_req_if.curr_PC, 1'b0, `NR_BITS'(0), ifetch_req_if.wid, req_tag};
|
||||||
`else
|
`else
|
||||||
assign icache_req_if.tag = req_tag;
|
assign icache_req_if.tag = req_tag;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
assign ifetch_rsp_if.valid = icache_rsp_if.valid;
|
assign ifetch_rsp_if.valid = icache_rsp_if.valid;
|
||||||
assign ifetch_rsp_if.warp_num = rsp_tag;
|
assign ifetch_rsp_if.wid = rsp_tag;
|
||||||
assign ifetch_rsp_if.thread_mask = rsp_thread_mask_buf[rsp_tag];
|
assign ifetch_rsp_if.thread_mask = rsp_thread_mask_buf[rsp_tag];
|
||||||
assign ifetch_rsp_if.curr_PC = rsp_curr_PC_buf[rsp_tag];
|
assign ifetch_rsp_if.curr_PC = rsp_curr_PC_buf[rsp_tag];
|
||||||
assign ifetch_rsp_if.instr = icache_rsp_if.data[0];
|
assign ifetch_rsp_if.instr = icache_rsp_if.data[0];
|
||||||
@@ -61,7 +61,7 @@ module VX_icache_stage #(
|
|||||||
assign icache_rsp_if.ready = ifetch_rsp_if.ready;
|
assign icache_rsp_if.ready = ifetch_rsp_if.ready;
|
||||||
|
|
||||||
`SCOPE_ASSIGN (scope_icache_req_valid, icache_req_if.valid);
|
`SCOPE_ASSIGN (scope_icache_req_valid, icache_req_if.valid);
|
||||||
`SCOPE_ASSIGN (scope_icache_req_warp_num, ifetch_req_if.warp_num);
|
`SCOPE_ASSIGN (scope_icache_req_wid, ifetch_req_if.wid);
|
||||||
`SCOPE_ASSIGN (scope_icache_req_addr, {icache_req_if.addr, 2'b0});
|
`SCOPE_ASSIGN (scope_icache_req_addr, {icache_req_if.addr, 2'b0});
|
||||||
`SCOPE_ASSIGN (scope_icache_req_tag, icache_req_if.tag);
|
`SCOPE_ASSIGN (scope_icache_req_tag, icache_req_if.tag);
|
||||||
`SCOPE_ASSIGN (scope_icache_req_ready, icache_req_if.ready);
|
`SCOPE_ASSIGN (scope_icache_req_ready, icache_req_if.ready);
|
||||||
@@ -74,10 +74,10 @@ module VX_icache_stage #(
|
|||||||
`ifdef DBG_PRINT_CORE_ICACHE
|
`ifdef DBG_PRINT_CORE_ICACHE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (icache_req_if.valid && icache_req_if.ready) begin
|
if (icache_req_if.valid && icache_req_if.ready) begin
|
||||||
$display("%t: I$%0d req: warp=%0d, PC=%0h", $time, CORE_ID, ifetch_req_if.warp_num, ifetch_req_if.curr_PC);
|
$display("%t: I$%0d req: wid=%0d, PC=%0h", $time, CORE_ID, ifetch_req_if.wid, ifetch_req_if.curr_PC);
|
||||||
end
|
end
|
||||||
if (icache_rsp_if.valid && icache_rsp_if.ready) begin
|
if (icache_rsp_if.valid && icache_rsp_if.ready) begin
|
||||||
$display("%t: I$%0d rsp: warp=%0d, PC=%0h, instr=%0h", $time, CORE_ID, ifetch_rsp_if.warp_num, ifetch_rsp_if.curr_PC, ifetch_rsp_if.instr);
|
$display("%t: I$%0d rsp: wid=%0d, PC=%0h, instr=%0h", $time, CORE_ID, ifetch_rsp_if.wid, ifetch_rsp_if.curr_PC, ifetch_rsp_if.instr);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -26,16 +26,18 @@ module VX_ipdom_stack #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
wr_ptr <= 0;
|
wr_ptr <= 0;
|
||||||
end else if (push) begin
|
end else begin
|
||||||
stack_1[wr_ptr] <= q1;
|
if (push) begin
|
||||||
stack_2[wr_ptr] <= q2;
|
stack_1[wr_ptr] <= q1;
|
||||||
is_part[wr_ptr] <= 0;
|
stack_2[wr_ptr] <= q2;
|
||||||
rd_ptr <= wr_ptr;
|
is_part[wr_ptr] <= 0;
|
||||||
wr_ptr <= wr_ptr + 1;
|
rd_ptr <= wr_ptr;
|
||||||
end else if (pop) begin
|
wr_ptr <= wr_ptr + 1;
|
||||||
wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]);
|
end else if (pop) begin
|
||||||
rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]);
|
wr_ptr <= wr_ptr - DEPTH'(is_part[rd_ptr]);
|
||||||
is_part[rd_ptr] <= 1;
|
rd_ptr <= rd_ptr - DEPTH'(is_part[rd_ptr]);
|
||||||
|
is_part[rd_ptr] <= 1;
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ module VX_issue #(
|
|||||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||||
|
|
||||||
VX_alu_req_if alu_req_if,
|
VX_alu_req_if alu_req_if,
|
||||||
|
VX_bru_req_if bru_req_if,
|
||||||
VX_lsu_req_if lsu_req_if,
|
VX_lsu_req_if lsu_req_if,
|
||||||
VX_csr_req_if csr_req_if,
|
VX_csr_req_if csr_req_if,
|
||||||
VX_mul_req_if mul_req_if,
|
VX_mul_req_if mul_req_if,
|
||||||
@@ -23,13 +24,14 @@ module VX_issue #(
|
|||||||
|
|
||||||
VX_gpr_read_if gpr_read_if();
|
VX_gpr_read_if gpr_read_if();
|
||||||
assign gpr_read_if.valid = decode_if.valid && ~schedule_delay;
|
assign gpr_read_if.valid = decode_if.valid && ~schedule_delay;
|
||||||
assign gpr_read_if.warp_num = decode_if.warp_num;
|
assign gpr_read_if.wid = decode_if.wid;
|
||||||
assign gpr_read_if.rs1 = decode_if.rs1;
|
assign gpr_read_if.rs1 = decode_if.rs1;
|
||||||
assign gpr_read_if.rs2 = decode_if.rs2;
|
assign gpr_read_if.rs2 = decode_if.rs2;
|
||||||
assign gpr_read_if.rs3 = decode_if.rs3;
|
assign gpr_read_if.rs3 = decode_if.rs3;
|
||||||
assign gpr_read_if.use_rs3 = decode_if.use_rs3;
|
assign gpr_read_if.use_rs3 = decode_if.use_rs3;
|
||||||
|
|
||||||
wire ex_busy = (~alu_req_if.ready && (decode_if.ex_type == `EX_ALU))
|
wire ex_busy = (~alu_req_if.ready && (decode_if.ex_type == `EX_ALU))
|
||||||
|
|| (~bru_req_if.ready && (decode_if.ex_type == `EX_BRU))
|
||||||
|| (~lsu_req_if.ready && (decode_if.ex_type == `EX_LSU))
|
|| (~lsu_req_if.ready && (decode_if.ex_type == `EX_LSU))
|
||||||
|| (~csr_req_if.ready && (decode_if.ex_type == `EX_CSR))
|
|| (~csr_req_if.ready && (decode_if.ex_type == `EX_CSR))
|
||||||
`ifdef EXT_M_ENABLE
|
`ifdef EXT_M_ENABLE
|
||||||
@@ -40,9 +42,9 @@ module VX_issue #(
|
|||||||
`endif
|
`endif
|
||||||
|| (~gpu_req_if.ready && (decode_if.ex_type == `EX_GPU));
|
|| (~gpu_req_if.ready && (decode_if.ex_type == `EX_GPU));
|
||||||
|
|
||||||
VX_scheduler #(
|
VX_scoreboard #(
|
||||||
.CORE_ID(CORE_ID)
|
.CORE_ID(CORE_ID)
|
||||||
) scheduler (
|
) scoreboard (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.decode_if (decode_if),
|
.decode_if (decode_if),
|
||||||
@@ -62,117 +64,71 @@ module VX_issue #(
|
|||||||
.gpr_read_if (gpr_read_if)
|
.gpr_read_if (gpr_read_if)
|
||||||
);
|
);
|
||||||
|
|
||||||
VX_alu_req_if alu_req_tmp_if();
|
VX_issue_if issue_if();
|
||||||
VX_lsu_req_if lsu_req_tmp_if();
|
|
||||||
VX_csr_req_if csr_req_tmp_if();
|
|
||||||
VX_mul_req_if mul_req_tmp_if();
|
|
||||||
VX_fpu_req_if fpu_req_tmp_if();
|
|
||||||
VX_gpu_req_if gpu_req_tmp_if();
|
|
||||||
|
|
||||||
VX_issue_demux issue_demux (
|
assign issue_if.rs1_data = gpr_read_if.rs1_data;
|
||||||
.decode_if (decode_if),
|
assign issue_if.rs2_data = gpr_read_if.rs2_data;
|
||||||
.gpr_read_if(gpr_read_if),
|
assign issue_if.rs3_data = gpr_read_if.rs3_data;
|
||||||
.issue_tag (issue_tag),
|
|
||||||
.alu_req_if (alu_req_tmp_if),
|
|
||||||
.lsu_req_if (lsu_req_tmp_if),
|
|
||||||
.csr_req_if (csr_req_tmp_if),
|
|
||||||
.mul_req_if (mul_req_tmp_if),
|
|
||||||
.fpu_req_if (fpu_req_tmp_if),
|
|
||||||
.gpu_req_if (gpu_req_tmp_if)
|
|
||||||
);
|
|
||||||
|
|
||||||
wire stall = schedule_delay || ~gpr_read_if.ready;
|
wire [`NT_BITS-1:0] tid;
|
||||||
assign decode_if.ready = ~stall;
|
VX_priority_encoder #(
|
||||||
|
.N(`NUM_THREADS)
|
||||||
|
) sel_src (
|
||||||
|
.data_in (decode_if.thread_mask),
|
||||||
|
.data_out (tid),
|
||||||
|
`UNUSED_PIN (valid_out)
|
||||||
|
);
|
||||||
|
|
||||||
|
wire stall = schedule_delay || ~gpr_read_if.ready;
|
||||||
|
wire flush = stall; // clear output on stall
|
||||||
|
|
||||||
|
VX_generic_register #(
|
||||||
|
.N(1 + `ISTAG_BITS + `NW_BITS + `NUM_THREADS + 32 + `NR_BITS + `NR_BITS + 32 + 1 + 1 + `EX_BITS + `OP_BITS + 1 + `FRM_BITS + `NT_BITS)
|
||||||
|
) issue_reg (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.stall (stall),
|
||||||
|
.flush (flush),
|
||||||
|
.in ({decode_if.valid, issue_tag, decode_if.wid, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.rs1, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.ex_type, decode_if.ex_op, decode_if.wb, decode_if.frm, tid}),
|
||||||
|
.out ({issue_if.valid, issue_if.issue_tag, issue_if.wid, issue_if.thread_mask, issue_if.curr_PC, issue_if.rd, issue_if.rs1, issue_if.imm, issue_if.rs1_is_PC, issue_if.rs2_is_imm, issue_if.ex_type, issue_if.ex_op, issue_if.wb, issue_if.frm, issue_if.tid})
|
||||||
|
);
|
||||||
|
|
||||||
|
assign decode_if.ready = issue_if.ready;
|
||||||
|
assign issue_if.ready = ~stall;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_issue_demux issue_demux (
|
||||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `ALU_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + 32 + 32)
|
.issue_if (issue_if),
|
||||||
) alu_reg (
|
.alu_req_if (alu_req_if),
|
||||||
.clk (clk),
|
.bru_req_if (bru_req_if),
|
||||||
.reset (reset),
|
.lsu_req_if (lsu_req_if),
|
||||||
.stall (~alu_req_if.ready),
|
.csr_req_if (csr_req_if),
|
||||||
.flush (stall && alu_req_if.ready),
|
.mul_req_if (mul_req_if),
|
||||||
.in ({alu_req_tmp_if.valid, alu_req_tmp_if.issue_tag, alu_req_tmp_if.warp_num, alu_req_tmp_if.curr_PC, alu_req_tmp_if.thread_mask, alu_req_tmp_if.alu_op, alu_req_tmp_if.rs1_data, alu_req_tmp_if.rs2_data, alu_req_tmp_if.offset, alu_req_tmp_if.next_PC}),
|
.fpu_req_if (fpu_req_if),
|
||||||
.out ({alu_req_if.valid, alu_req_if.issue_tag, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.thread_mask, alu_req_if.alu_op, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC})
|
.gpu_req_if (gpu_req_if)
|
||||||
);
|
|
||||||
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + 1 + `BYTEEN_BITS + (`NUM_THREADS * 32) + 32 + (`NUM_THREADS * 32) + `NR_BITS + 1)
|
|
||||||
) lsu_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (~lsu_req_if.ready),
|
|
||||||
.flush (stall && lsu_req_if.ready),
|
|
||||||
.in ({lsu_req_tmp_if.valid, lsu_req_tmp_if.issue_tag, lsu_req_tmp_if.warp_num, lsu_req_tmp_if.curr_PC, lsu_req_tmp_if.thread_mask, lsu_req_tmp_if.rw, lsu_req_tmp_if.byteen, lsu_req_tmp_if.base_addr, lsu_req_tmp_if.offset, lsu_req_tmp_if.store_data, lsu_req_tmp_if.rd, lsu_req_tmp_if.wb}),
|
|
||||||
.out ({lsu_req_if.valid, lsu_req_if.issue_tag, lsu_req_if.warp_num, lsu_req_if.curr_PC, lsu_req_if.thread_mask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data, lsu_req_if.rd, lsu_req_if.wb})
|
|
||||||
);
|
|
||||||
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `CSR_BITS + `CSR_ADDR_BITS + 32 + 1)
|
|
||||||
) csr_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (~csr_req_if.ready),
|
|
||||||
.flush (stall && csr_req_if.ready),
|
|
||||||
.in ({csr_req_tmp_if.valid, csr_req_tmp_if.issue_tag, csr_req_tmp_if.warp_num, csr_req_tmp_if.curr_PC, csr_req_tmp_if.thread_mask, csr_req_tmp_if.csr_op, csr_req_tmp_if.csr_addr, csr_req_tmp_if.csr_mask, csr_req_tmp_if.is_io}),
|
|
||||||
.out ({csr_req_if.valid, csr_req_if.issue_tag, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.thread_mask, csr_req_if.csr_op, csr_req_if.csr_addr, csr_req_if.csr_mask, csr_req_if.is_io})
|
|
||||||
);
|
|
||||||
|
|
||||||
`ifdef EXT_M_ENABLE
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `MUL_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
|
||||||
) mul_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (~mul_req_if.ready),
|
|
||||||
.flush (stall && mul_req_if.ready),
|
|
||||||
.in ({mul_req_tmp_if.valid, mul_req_tmp_if.issue_tag, mul_req_tmp_if.warp_num, mul_req_tmp_if.curr_PC, mul_req_tmp_if.thread_mask, mul_req_tmp_if.mul_op, mul_req_tmp_if.rs1_data, mul_req_tmp_if.rs2_data}),
|
|
||||||
.out ({mul_req_if.valid, mul_req_if.issue_tag, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.thread_mask, mul_req_if.mul_op, mul_req_if.rs1_data, mul_req_if.rs2_data})
|
|
||||||
);
|
|
||||||
`endif
|
|
||||||
|
|
||||||
`ifdef EXT_F_ENABLE
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `FPU_BITS + `FRM_BITS + (`NUM_THREADS * 32) + (`NUM_THREADS * 32) + (`NUM_THREADS * 32))
|
|
||||||
) fpu_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (~fpu_req_if.ready),
|
|
||||||
.flush (stall && fpu_req_if.ready),
|
|
||||||
.in ({fpu_req_tmp_if.valid, fpu_req_tmp_if.issue_tag, fpu_req_tmp_if.warp_num, fpu_req_tmp_if.curr_PC, fpu_req_tmp_if.thread_mask, fpu_req_tmp_if.fpu_op, fpu_req_tmp_if.frm, fpu_req_tmp_if.rs1_data, fpu_req_tmp_if.rs2_data, fpu_req_tmp_if.rs3_data}),
|
|
||||||
.out ({fpu_req_if.valid, fpu_req_if.issue_tag, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.thread_mask, fpu_req_if.fpu_op, fpu_req_if.frm, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data})
|
|
||||||
);
|
|
||||||
`endif
|
|
||||||
|
|
||||||
VX_generic_register #(
|
|
||||||
.N(1 + `ISTAG_BITS + `NW_BITS + 32 + `NUM_THREADS + `GPU_BITS + (`NUM_THREADS * 32) + 32 + 32)
|
|
||||||
) gpu_reg (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.stall (~gpu_req_if.ready),
|
|
||||||
.flush (stall && gpu_req_if.ready),
|
|
||||||
.in ({gpu_req_tmp_if.valid, gpu_req_tmp_if.issue_tag, gpu_req_tmp_if.warp_num, gpu_req_tmp_if.curr_PC, gpu_req_tmp_if.thread_mask, gpu_req_tmp_if.gpu_op, gpu_req_tmp_if.rs1_data, gpu_req_tmp_if.rs2_data, gpu_req_tmp_if.next_PC}),
|
|
||||||
.out ({gpu_req_if.valid, gpu_req_if.issue_tag, gpu_req_if.warp_num, gpu_req_if.curr_PC, gpu_req_if.thread_mask, gpu_req_if.gpu_op, gpu_req_if.rs1_data, gpu_req_if.rs2_data, gpu_req_if.next_PC})
|
|
||||||
);
|
);
|
||||||
|
|
||||||
`ifdef DBG_PRINT_PIPELINE
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (alu_req_if.valid && alu_req_if.ready) begin
|
if (alu_req_if.valid && alu_req_if.ready) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h, offset=%0h, next_PC=%0h", $time, CORE_ID, alu_req_if.warp_num, alu_req_if.curr_PC, alu_req_if.issue_tag, alu_req_if.thread_mask, alu_req_if.rs1_data, alu_req_if.rs2_data, alu_req_if.offset, alu_req_if.next_PC);
|
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=ALU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, alu_req_if.wid, alu_req_if.curr_PC, alu_req_if.issue_tag, alu_req_if.thread_mask, alu_req_if.rs1_data, alu_req_if.rs2_data);
|
||||||
|
end
|
||||||
|
if (bru_req_if.valid && bru_req_if.ready) begin
|
||||||
|
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=BRU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h, offset=%0h", $time, CORE_ID, bru_req_if.wid, bru_req_if.curr_PC, bru_req_if.issue_tag, bru_req_if.thread_mask, bru_req_if.rs1_data, bru_req_if.rs2_data, bru_req_if.offset);
|
||||||
end
|
end
|
||||||
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
if (lsu_req_if.valid && lsu_req_if.ready) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.warp_num, lsu_req_if.curr_PC, lsu_req_if.issue_tag, lsu_req_if.thread_mask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=LSU, istag=%0d, tmask=%b, rw=%b, byteen=%b, baddr=%0h, offset=%0h, data=%0h", $time, CORE_ID, lsu_req_if.wid, lsu_req_if.curr_PC, lsu_req_if.issue_tag, lsu_req_if.thread_mask, lsu_req_if.rw, lsu_req_if.byteen, lsu_req_if.base_addr, lsu_req_if.offset, lsu_req_if.store_data);
|
||||||
end
|
end
|
||||||
if (csr_req_if.valid && csr_req_if.ready) begin
|
if (csr_req_if.valid && csr_req_if.ready) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.warp_num, csr_req_if.curr_PC, csr_req_if.issue_tag, csr_req_if.thread_mask, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=CSR, istag=%0d, tmask=%b, addr=%0h, mask=%0h", $time, CORE_ID, csr_req_if.wid, csr_req_if.curr_PC, csr_req_if.issue_tag, csr_req_if.thread_mask, csr_req_if.csr_addr, csr_req_if.csr_mask);
|
||||||
end
|
end
|
||||||
if (mul_req_if.valid && mul_req_if.ready) begin
|
if (mul_req_if.valid && mul_req_if.ready) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.warp_num, mul_req_if.curr_PC, mul_req_if.issue_tag, mul_req_if.thread_mask, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=MUL, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, mul_req_if.wid, mul_req_if.curr_PC, mul_req_if.issue_tag, mul_req_if.thread_mask, mul_req_if.rs1_data, mul_req_if.rs2_data);
|
||||||
end
|
end
|
||||||
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
if (fpu_req_if.valid && fpu_req_if.ready) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.warp_num, fpu_req_if.curr_PC, fpu_req_if.issue_tag, fpu_req_if.thread_mask, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=FPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h, rs3_data=%0h", $time, CORE_ID, fpu_req_if.wid, fpu_req_if.curr_PC, fpu_req_if.issue_tag, fpu_req_if.thread_mask, fpu_req_if.rs1_data, fpu_req_if.rs2_data, fpu_req_if.rs3_data);
|
||||||
end
|
end
|
||||||
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
if (gpu_req_if.valid && gpu_req_if.ready) begin
|
||||||
$display("%t: Core%0d-issue: warp=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.warp_num, gpu_req_if.curr_PC, gpu_req_if.issue_tag, gpu_req_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
$display("%t: Core%0d-issue: wid=%0d, PC=%0h, ex=GPU, istag=%0d, tmask=%b, rs1_data=%0h, rs2_data=%0h", $time, CORE_ID, gpu_req_if.wid, gpu_req_if.curr_PC, gpu_req_if.issue_tag, gpu_req_if.thread_mask, gpu_req_if.rs1_data, gpu_req_if.rs2_data);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -2,12 +2,11 @@
|
|||||||
|
|
||||||
module VX_issue_demux (
|
module VX_issue_demux (
|
||||||
// inputs
|
// inputs
|
||||||
VX_decode_if decode_if,
|
VX_issue_if issue_if,
|
||||||
VX_gpr_read_if gpr_read_if,
|
|
||||||
input wire [`ISTAG_BITS-1:0] issue_tag,
|
|
||||||
|
|
||||||
// outputs
|
// outputs
|
||||||
VX_alu_req_if alu_req_if,
|
VX_alu_req_if alu_req_if,
|
||||||
|
VX_bru_req_if bru_req_if,
|
||||||
VX_lsu_req_if lsu_req_if,
|
VX_lsu_req_if lsu_req_if,
|
||||||
VX_csr_req_if csr_req_if,
|
VX_csr_req_if csr_req_if,
|
||||||
VX_mul_req_if mul_req_if,
|
VX_mul_req_if mul_req_if,
|
||||||
@@ -15,74 +14,89 @@ module VX_issue_demux (
|
|||||||
VX_gpu_req_if gpu_req_if
|
VX_gpu_req_if gpu_req_if
|
||||||
);
|
);
|
||||||
// ALU unit
|
// ALU unit
|
||||||
assign alu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_ALU);
|
assign alu_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_ALU);
|
||||||
assign alu_req_if.thread_mask = decode_if.thread_mask;
|
assign alu_req_if.issue_tag = issue_if.issue_tag;
|
||||||
assign alu_req_if.issue_tag = issue_tag;
|
assign alu_req_if.wid = issue_if.wid;
|
||||||
assign alu_req_if.warp_num = decode_if.warp_num;
|
assign alu_req_if.thread_mask = issue_if.thread_mask;
|
||||||
assign alu_req_if.curr_PC = decode_if.curr_PC;
|
assign alu_req_if.curr_PC = issue_if.curr_PC;
|
||||||
assign alu_req_if.alu_op = `ALU_OP(decode_if.ex_op);
|
assign alu_req_if.op = `ALU_OP(issue_if.ex_op);
|
||||||
assign alu_req_if.rs1_data = decode_if.rs1_is_PC ? {`NUM_THREADS{decode_if.curr_PC}} : gpr_read_if.rs1_data;
|
assign alu_req_if.rs1_is_PC = issue_if.rs1_is_PC;
|
||||||
assign alu_req_if.rs2_data = decode_if.rs2_is_imm ? {`NUM_THREADS{decode_if.imm}} : gpr_read_if.rs2_data;
|
assign alu_req_if.rs2_is_imm = issue_if.rs2_is_imm;
|
||||||
assign alu_req_if.offset = decode_if.imm;
|
assign alu_req_if.imm = issue_if.imm;
|
||||||
assign alu_req_if.next_PC = decode_if.next_PC;
|
assign alu_req_if.rs1_data = issue_if.rs1_data;
|
||||||
|
assign alu_req_if.rs2_data = issue_if.rs2_data;
|
||||||
|
|
||||||
|
// BRU unit
|
||||||
|
assign bru_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_BRU);
|
||||||
|
assign bru_req_if.issue_tag = issue_if.issue_tag;
|
||||||
|
assign bru_req_if.wid = issue_if.wid;
|
||||||
|
assign bru_req_if.thread_mask = issue_if.thread_mask;
|
||||||
|
assign bru_req_if.curr_PC = issue_if.curr_PC;
|
||||||
|
assign bru_req_if.op = `BRU_OP(issue_if.ex_op);
|
||||||
|
assign bru_req_if.rs1_is_PC = issue_if.rs1_is_PC;
|
||||||
|
assign bru_req_if.rs1_data = issue_if.rs1_data[issue_if.tid];
|
||||||
|
assign bru_req_if.rs2_data = issue_if.rs2_data[issue_if.tid];
|
||||||
|
assign bru_req_if.offset = issue_if.imm;
|
||||||
|
|
||||||
// LSU unit
|
// LSU unit
|
||||||
assign lsu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_LSU);
|
assign lsu_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_LSU);
|
||||||
assign lsu_req_if.thread_mask = decode_if.thread_mask;
|
assign lsu_req_if.issue_tag = issue_if.issue_tag;
|
||||||
assign lsu_req_if.issue_tag = issue_tag;
|
assign lsu_req_if.wid = issue_if.wid;
|
||||||
assign lsu_req_if.warp_num = decode_if.warp_num;
|
assign lsu_req_if.thread_mask = issue_if.thread_mask;
|
||||||
assign lsu_req_if.curr_PC = decode_if.curr_PC;
|
assign lsu_req_if.curr_PC = issue_if.curr_PC;
|
||||||
assign lsu_req_if.rw = `LSU_RW(decode_if.ex_op);
|
assign lsu_req_if.rw = `LSU_RW(issue_if.ex_op);
|
||||||
assign lsu_req_if.byteen = `LSU_BE(decode_if.ex_op);
|
assign lsu_req_if.byteen = `LSU_BE(issue_if.ex_op);
|
||||||
assign lsu_req_if.base_addr = gpr_read_if.rs1_data;
|
assign lsu_req_if.base_addr = issue_if.rs1_data;
|
||||||
assign lsu_req_if.store_data = gpr_read_if.rs2_data;
|
assign lsu_req_if.store_data = issue_if.rs2_data;
|
||||||
assign lsu_req_if.offset = decode_if.imm;
|
assign lsu_req_if.offset = issue_if.imm;
|
||||||
assign lsu_req_if.rd = decode_if.rd;
|
assign lsu_req_if.rd = issue_if.rd;
|
||||||
assign lsu_req_if.wb = decode_if.wb;
|
assign lsu_req_if.wb = issue_if.wb;
|
||||||
|
|
||||||
// CSR unit
|
// CSR unit
|
||||||
assign csr_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_CSR);
|
assign csr_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_CSR);
|
||||||
assign csr_req_if.issue_tag = issue_tag;
|
assign csr_req_if.issue_tag = issue_if.issue_tag;
|
||||||
assign csr_req_if.warp_num = decode_if.warp_num;
|
assign csr_req_if.wid = issue_if.wid;
|
||||||
assign csr_req_if.curr_PC = decode_if.curr_PC;
|
assign csr_req_if.thread_mask = issue_if.thread_mask;
|
||||||
assign csr_req_if.csr_op = `CSR_OP(decode_if.ex_op);
|
assign csr_req_if.curr_PC = issue_if.curr_PC;
|
||||||
assign csr_req_if.csr_addr = decode_if.imm[`CSR_ADDR_BITS-1:0];
|
assign csr_req_if.op = `CSR_OP(issue_if.ex_op);
|
||||||
assign csr_req_if.csr_mask = decode_if.rs2_is_imm ? 32'(decode_if.rs1) : gpr_read_if.rs1_data[0];
|
assign csr_req_if.csr_addr = issue_if.imm[`CSR_ADDR_BITS-1:0];
|
||||||
|
assign csr_req_if.csr_mask = issue_if.rs2_is_imm ? 32'(issue_if.rs1) : issue_if.rs1_data[0];
|
||||||
assign csr_req_if.is_io = 1'b0;
|
assign csr_req_if.is_io = 1'b0;
|
||||||
|
|
||||||
// MUL unit
|
// MUL unit
|
||||||
`ifdef EXT_M_ENABLE
|
`ifdef EXT_M_ENABLE
|
||||||
assign mul_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_MUL);
|
assign mul_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_MUL);
|
||||||
assign mul_req_if.issue_tag = issue_tag;
|
assign mul_req_if.issue_tag = issue_if.issue_tag;
|
||||||
assign mul_req_if.warp_num = decode_if.warp_num;
|
assign mul_req_if.wid = issue_if.wid;
|
||||||
assign mul_req_if.curr_PC = decode_if.curr_PC;
|
assign mul_req_if.thread_mask = issue_if.thread_mask;
|
||||||
assign mul_req_if.mul_op = `MUL_OP(decode_if.ex_op);
|
assign mul_req_if.curr_PC = issue_if.curr_PC;
|
||||||
assign mul_req_if.rs1_data = gpr_read_if.rs1_data;
|
assign mul_req_if.op = `MUL_OP(issue_if.ex_op);
|
||||||
assign mul_req_if.rs2_data = gpr_read_if.rs2_data;
|
assign mul_req_if.rs1_data = issue_if.rs1_data;
|
||||||
|
assign mul_req_if.rs2_data = issue_if.rs2_data;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// FPU unit
|
// FPU unit
|
||||||
`ifdef EXT_F_ENABLE
|
`ifdef EXT_F_ENABLE
|
||||||
assign fpu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_FPU);
|
assign fpu_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_FPU);
|
||||||
assign fpu_req_if.issue_tag = issue_tag;
|
assign fpu_req_if.issue_tag = issue_if.issue_tag;
|
||||||
assign fpu_req_if.warp_num = decode_if.warp_num;
|
assign fpu_req_if.wid = issue_if.wid;
|
||||||
assign fpu_req_if.curr_PC = decode_if.curr_PC;
|
assign fpu_req_if.thread_mask = issue_if.thread_mask;
|
||||||
assign fpu_req_if.fpu_op = `FPU_OP(decode_if.ex_op);
|
assign fpu_req_if.curr_PC = issue_if.curr_PC;
|
||||||
assign fpu_req_if.frm = decode_if.frm;
|
assign fpu_req_if.op = `FPU_OP(issue_if.ex_op);
|
||||||
assign fpu_req_if.rs1_data = gpr_read_if.rs1_data;
|
assign fpu_req_if.frm = issue_if.frm;
|
||||||
assign fpu_req_if.rs2_data = gpr_read_if.rs2_data;
|
assign fpu_req_if.rs1_data = issue_if.rs1_data;
|
||||||
assign fpu_req_if.rs3_data = gpr_read_if.rs3_data;
|
assign fpu_req_if.rs2_data = issue_if.rs2_data;
|
||||||
|
assign fpu_req_if.rs3_data = issue_if.rs3_data;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// GPU unit
|
// GPU unit
|
||||||
assign gpu_req_if.valid = decode_if.valid && (decode_if.ex_type == `EX_GPU);
|
assign gpu_req_if.valid = issue_if.valid && (issue_if.ex_type == `EX_GPU);
|
||||||
assign gpu_req_if.thread_mask = decode_if.thread_mask;
|
assign gpu_req_if.issue_tag = issue_if.issue_tag;
|
||||||
assign gpu_req_if.issue_tag = issue_tag;
|
assign gpu_req_if.wid = issue_if.wid;
|
||||||
assign gpu_req_if.warp_num = decode_if.warp_num;
|
assign gpu_req_if.thread_mask = issue_if.thread_mask;
|
||||||
assign gpu_req_if.curr_PC = decode_if.curr_PC;
|
assign gpu_req_if.curr_PC = issue_if.curr_PC;
|
||||||
assign gpu_req_if.gpu_op = `GPU_OP(decode_if.ex_op);
|
assign gpu_req_if.op = `GPU_OP(issue_if.ex_op);
|
||||||
assign gpu_req_if.rs1_data = gpr_read_if.rs1_data;
|
assign gpu_req_if.rs1_data = issue_if.rs1_data;
|
||||||
assign gpu_req_if.rs2_data = gpr_read_if.rs2_data[0];
|
assign gpu_req_if.rs2_data = issue_if.rs2_data[0];
|
||||||
assign gpu_req_if.next_PC = decode_if.next_PC;
|
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -19,24 +19,24 @@ module VX_lsu_unit #(
|
|||||||
VX_exu_to_cmt_if lsu_commit_if
|
VX_exu_to_cmt_if lsu_commit_if
|
||||||
);
|
);
|
||||||
|
|
||||||
wire use_valid;
|
wire valid_in;
|
||||||
wire [`NUM_THREADS-1:0] use_thread_mask;
|
wire ready_in;
|
||||||
wire use_req_rw;
|
|
||||||
wire [`NUM_THREADS-1:0][29:0] use_req_addr;
|
|
||||||
wire [`NUM_THREADS-1:0][1:0] use_req_offset;
|
|
||||||
wire [`NUM_THREADS-1:0][3:0] use_req_byteen;
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] use_req_data;
|
|
||||||
wire [1:0] use_req_sext;
|
|
||||||
wire [`NR_BITS-1:0] use_rd;
|
|
||||||
wire [`NW_BITS-1:0] use_warp_num;
|
|
||||||
wire [`ISTAG_BITS-1:0] use_issue_tag;
|
|
||||||
wire use_wb;
|
|
||||||
wire [31:0] use_pc;
|
|
||||||
|
|
||||||
genvar i;
|
wire [`NUM_THREADS-1:0] req_thread_mask;
|
||||||
|
wire req_rw;
|
||||||
|
wire [`NUM_THREADS-1:0][29:0] req_addr;
|
||||||
|
wire [`NUM_THREADS-1:0][1:0] req_offset;
|
||||||
|
wire [`NUM_THREADS-1:0][3:0] req_byteen;
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] req_data;
|
||||||
|
wire [1:0] req_sext;
|
||||||
|
wire [`NR_BITS-1:0] req_rd;
|
||||||
|
wire [`NW_BITS-1:0] req_wid;
|
||||||
|
wire [`ISTAG_BITS-1:0] req_issue_tag;
|
||||||
|
wire req_wb;
|
||||||
|
wire [31:0] req_pc;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] full_address;
|
wire [`NUM_THREADS-1:0][31:0] full_address;
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
assign full_address[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset;
|
assign full_address[i] = lsu_req_if.base_addr[i] + lsu_req_if.offset;
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -63,38 +63,39 @@ module VX_lsu_unit #(
|
|||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
assign mem_req_addr[i] = full_address[i][31:2];
|
assign mem_req_addr[i] = full_address[i][31:2];
|
||||||
assign mem_req_offset[i] = full_address[i][1:0];
|
assign mem_req_offset[i] = full_address[i][1:0];
|
||||||
assign mem_req_byteen[i] = wmask << full_address[i][1:0];
|
assign mem_req_byteen[i] = wmask << full_address[i][1:0];
|
||||||
assign mem_req_data[i] = lsu_req_if.store_data[i] << {mem_req_offset[i], 3'b0};
|
assign mem_req_data[i] = lsu_req_if.store_data[i] << {mem_req_offset[i], 3'b0};
|
||||||
end
|
end
|
||||||
|
|
||||||
wire stall_in = ~dcache_req_if.ready && use_valid;
|
|
||||||
|
|
||||||
// Can accept new request?
|
|
||||||
assign lsu_req_if.ready = ~stall_in;
|
|
||||||
|
|
||||||
`IGNORE_WARNINGS_BEGIN
|
`IGNORE_WARNINGS_BEGIN
|
||||||
wire [`NUM_THREADS-1:0][31:0] use_address;
|
wire [`NUM_THREADS-1:0][31:0] req_address;
|
||||||
`IGNORE_WARNINGS_END
|
`IGNORE_WARNINGS_END
|
||||||
|
|
||||||
VX_generic_register #(
|
// use a skid buffer because the dcache's ready signal is combinational
|
||||||
.N(1 + `NW_BITS + `NUM_THREADS + `ISTAG_BITS + (`NUM_THREADS * 32) + 2 + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + 1 + 32)
|
// use buffer size of two for stall-free execution
|
||||||
) lsu_req_reg (
|
VX_elastic_buffer #(
|
||||||
.clk (clk),
|
.DATAW (`NW_BITS + `NUM_THREADS + `ISTAG_BITS + (`NUM_THREADS * 32) + 2 + 1 + (`NUM_THREADS * (30 + 2 + 4 + 32)) + `NR_BITS + 1 + 32),
|
||||||
.reset (reset),
|
.SIZE (2)
|
||||||
.stall (stall_in),
|
) input_buffer (
|
||||||
.flush (0),
|
.clk (clk),
|
||||||
.in ({lsu_req_if.valid, lsu_req_if.warp_num, lsu_req_if.thread_mask, lsu_req_if.issue_tag, full_address, mem_req_sext, lsu_req_if.rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.curr_PC}),
|
.reset (reset),
|
||||||
.out ({use_valid, use_warp_num, use_thread_mask, use_issue_tag, use_address, use_req_sext, use_req_rw, use_req_addr, use_req_offset, use_req_byteen, use_req_data, use_rd, use_wb, use_pc})
|
.valid_in (lsu_req_if.valid),
|
||||||
|
.ready_in (lsu_req_if.ready),
|
||||||
|
.data_in ({lsu_req_if.wid, lsu_req_if.thread_mask, lsu_req_if.issue_tag, full_address, mem_req_sext, lsu_req_if.rw, mem_req_addr, mem_req_offset, mem_req_byteen, mem_req_data, lsu_req_if.rd, lsu_req_if.wb, lsu_req_if.curr_PC}),
|
||||||
|
.data_out ({req_wid, req_thread_mask, req_issue_tag, req_address, req_sext, req_rw, req_addr, req_offset, req_byteen, req_data, req_rd, req_wb, req_pc}),
|
||||||
|
.ready_out (ready_in),
|
||||||
|
.valid_out (valid_in)
|
||||||
);
|
);
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0] mem_rsp_mask_buf [`ISSUEQ_SIZE-1:0];
|
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0] mem_rsp_mask_buf;
|
||||||
|
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0][31:0] mem_rsp_data_prev_buf;
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0][1:0] mem_rsp_offset_buf [`ISSUEQ_SIZE-1:0];
|
reg [`NUM_THREADS-1:0][1:0] mem_rsp_offset_buf [`ISSUEQ_SIZE-1:0];
|
||||||
reg [1:0] mem_rsp_sext_buf [`ISSUEQ_SIZE-1:0];
|
reg [1:0] mem_rsp_sext_buf [`ISSUEQ_SIZE-1:0];
|
||||||
reg [`NUM_THREADS-1:0][31:0] mem_rsp_data_all_buf [`ISSUEQ_SIZE-1:0];
|
reg [`NW_BITS-1:0] mem_rsp_wid_buf [`ISSUEQ_SIZE-1:0];
|
||||||
reg [`NW_BITS-1:0] mem_rsp_warp_num_buf [`ISSUEQ_SIZE-1:0];
|
|
||||||
reg [31:0] mem_rsp_curr_PC_buf [`ISSUEQ_SIZE-1:0];
|
reg [31:0] mem_rsp_curr_PC_buf [`ISSUEQ_SIZE-1:0];
|
||||||
reg [`NR_BITS-1:0] mem_rsp_rd_buf [`ISSUEQ_SIZE-1:0];
|
reg [`NR_BITS-1:0] mem_rsp_rd_buf [`ISSUEQ_SIZE-1:0];
|
||||||
|
|
||||||
@@ -105,47 +106,56 @@ module VX_lsu_unit #(
|
|||||||
wire [`NUM_THREADS-1:0] mem_rsp_mask = mem_rsp_mask_buf [rsp_issue_tag];
|
wire [`NUM_THREADS-1:0] mem_rsp_mask = mem_rsp_mask_buf [rsp_issue_tag];
|
||||||
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset = mem_rsp_offset_buf [rsp_issue_tag];
|
wire [`NUM_THREADS-1:0][1:0] mem_rsp_offset = mem_rsp_offset_buf [rsp_issue_tag];
|
||||||
wire [1:0] mem_rsp_sext = mem_rsp_sext_buf [rsp_issue_tag];
|
wire [1:0] mem_rsp_sext = mem_rsp_sext_buf [rsp_issue_tag];
|
||||||
wire [`NUM_THREADS-1:0][31:0] mem_rsp_data_all = mem_rsp_data_all_buf [rsp_issue_tag];
|
wire [`NUM_THREADS-1:0][31:0] mem_rsp_data_prev= mem_rsp_data_prev_buf [rsp_issue_tag];
|
||||||
wire [`NW_BITS-1:0] mem_rsp_warp_num = mem_rsp_warp_num_buf [rsp_issue_tag];
|
wire [`NW_BITS-1:0] mem_rsp_wid = mem_rsp_wid_buf [rsp_issue_tag];
|
||||||
wire [31:0] mem_rsp_curr_PC = mem_rsp_curr_PC_buf [rsp_issue_tag];
|
wire [31:0] mem_rsp_curr_PC = mem_rsp_curr_PC_buf [rsp_issue_tag];
|
||||||
wire [`NR_BITS-1:0] mem_rsp_rd = mem_rsp_rd_buf [rsp_issue_tag];
|
wire [`NR_BITS-1:0] mem_rsp_rd = mem_rsp_rd_buf [rsp_issue_tag];
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] mem_rsp_mask_n = mem_rsp_mask & ~dcache_rsp_if.valid;
|
|
||||||
|
|
||||||
wire dcache_req_fire = (| dcache_req_if.valid) && dcache_req_if.ready;
|
wire dcache_req_fire = (| dcache_req_if.valid) && dcache_req_if.ready;
|
||||||
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
wire dcache_rsp_fire = (| dcache_rsp_if.valid) && dcache_rsp_if.ready;
|
||||||
|
|
||||||
|
wire [`NUM_THREADS-1:0] mem_rsp_mask_n = mem_rsp_mask & ~dcache_rsp_if.valid;
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (dcache_req_fire && (0 == use_req_rw)) begin
|
if (dcache_req_fire && (0 == req_rw)) begin
|
||||||
mem_rsp_mask_buf [use_issue_tag] <= use_thread_mask;
|
mem_rsp_mask_buf [req_issue_tag] <= req_thread_mask;
|
||||||
mem_rsp_offset_buf [use_issue_tag] <= use_req_offset;
|
mem_rsp_data_prev_buf [req_issue_tag] <= 0;
|
||||||
mem_rsp_sext_buf [use_issue_tag] <= use_req_sext;
|
|
||||||
mem_rsp_data_all_buf [use_issue_tag] <= 0;
|
|
||||||
mem_rsp_warp_num_buf [use_issue_tag] <= use_warp_num;
|
|
||||||
mem_rsp_curr_PC_buf [use_issue_tag] <= use_pc;
|
|
||||||
mem_rsp_rd_buf [use_issue_tag] <= use_rd;
|
|
||||||
end
|
end
|
||||||
if (dcache_rsp_fire) begin
|
if (dcache_rsp_fire) begin
|
||||||
mem_rsp_mask_buf [rsp_issue_tag] <= mem_rsp_mask_n;
|
mem_rsp_mask_buf [rsp_issue_tag] <= mem_rsp_mask_n;
|
||||||
mem_rsp_data_all_buf [rsp_issue_tag] <= mem_rsp_data_all | mem_rsp_data_curr;
|
mem_rsp_data_prev_buf [rsp_issue_tag] <= mem_rsp_data_curr | mem_rsp_data_prev;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (dcache_req_fire && (0 == req_rw)) begin
|
||||||
|
mem_rsp_offset_buf [req_issue_tag] <= req_offset;
|
||||||
|
mem_rsp_sext_buf [req_issue_tag] <= req_sext;
|
||||||
|
mem_rsp_wid_buf [req_issue_tag] <= req_wid;
|
||||||
|
mem_rsp_curr_PC_buf [req_issue_tag] <= req_pc;
|
||||||
|
mem_rsp_rd_buf [req_issue_tag] <= req_rd;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
wire stall_in;
|
||||||
|
|
||||||
// Core Request
|
// Core Request
|
||||||
assign dcache_req_if.valid = {`NUM_THREADS{use_valid}} & use_thread_mask;
|
assign dcache_req_if.valid = {`NUM_THREADS{valid_in && ~stall_in}} & req_thread_mask;
|
||||||
assign dcache_req_if.rw = {`NUM_THREADS{use_req_rw}};
|
assign dcache_req_if.rw = {`NUM_THREADS{req_rw}};
|
||||||
assign dcache_req_if.byteen = use_req_byteen;
|
assign dcache_req_if.byteen = req_byteen;
|
||||||
assign dcache_req_if.addr = use_req_addr;
|
assign dcache_req_if.addr = req_addr;
|
||||||
assign dcache_req_if.data = use_req_data;
|
assign dcache_req_if.data = req_data;
|
||||||
|
|
||||||
|
assign ready_in = dcache_req_if.ready && ~stall_in;
|
||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
assign dcache_req_if.tag = {use_pc, use_wb, use_rd, use_warp_num, use_issue_tag};
|
assign dcache_req_if.tag = {req_pc, req_wb, req_rd, req_wid, req_issue_tag};
|
||||||
`else
|
`else
|
||||||
assign dcache_req_if.tag = use_issue_tag;
|
assign dcache_req_if.tag = req_issue_tag;
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
// Core Response
|
// Core Response
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
wire [31:0] rsp_data_shifted = dcache_rsp_if.data[i] >> {mem_rsp_offset[i], 3'b0};
|
wire [31:0] rsp_data_shifted = dcache_rsp_if.data[i] >> {mem_rsp_offset[i], 3'b0};
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (mem_rsp_sext)
|
case (mem_rsp_sext)
|
||||||
@@ -156,46 +166,60 @@ module VX_lsu_unit #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
wire is_store_rsp = dcache_req_fire && use_req_rw;
|
reg is_load_rsp;
|
||||||
wire is_load_rsp = (| dcache_rsp_if.valid) && (0 == mem_rsp_mask_n);
|
reg [`NUM_THREADS-1:0][31:0] load_data;
|
||||||
|
reg [`ISTAG_BITS-1:0] rsp_issue_tag_r;
|
||||||
|
|
||||||
assign lsu_commit_if.valid = is_load_rsp || is_store_rsp;
|
always @(posedge clk) begin
|
||||||
assign lsu_commit_if.issue_tag = is_store_rsp ? use_issue_tag : rsp_issue_tag;
|
if (reset) begin
|
||||||
assign lsu_commit_if.data = mem_rsp_data_curr | mem_rsp_data_all;
|
is_load_rsp <= 0;
|
||||||
|
end else begin
|
||||||
|
is_load_rsp <= dcache_rsp_fire && (0 == mem_rsp_mask_n);
|
||||||
|
load_data <= mem_rsp_data_curr | mem_rsp_data_prev;
|
||||||
|
rsp_issue_tag_r <= rsp_issue_tag;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
wire is_store_req = dcache_req_fire && req_rw;
|
||||||
|
assign stall_in = is_load_rsp && valid_in && req_rw; // LOAD has priority
|
||||||
|
|
||||||
|
assign lsu_commit_if.valid = is_load_rsp || is_store_req;
|
||||||
|
assign lsu_commit_if.issue_tag = is_load_rsp ? rsp_issue_tag_r : req_issue_tag;
|
||||||
|
assign lsu_commit_if.data = load_data;
|
||||||
|
|
||||||
// Can accept new cache response?
|
// Can accept new cache response?
|
||||||
assign dcache_rsp_if.ready = lsu_commit_if.ready && ~is_store_rsp; // STORE has priority
|
assign dcache_rsp_if.ready = 1'b1;
|
||||||
|
|
||||||
// scope registration
|
// scope registration
|
||||||
`SCOPE_ASSIGN (scope_dcache_req_valid, dcache_req_if.valid);
|
`SCOPE_ASSIGN (scope_dcache_req_valid, dcache_req_if.valid);
|
||||||
`SCOPE_ASSIGN (scope_dcache_req_addr, use_address);
|
`SCOPE_ASSIGN (scope_dcache_req_addr, req_address);
|
||||||
`SCOPE_ASSIGN (scope_dcache_req_rw, dcache_req_if.rw );
|
`SCOPE_ASSIGN (scope_dcache_req_rw, dcache_req_if.rw );
|
||||||
`SCOPE_ASSIGN (scope_dcache_req_byteen,dcache_req_if.byteen);
|
`SCOPE_ASSIGN (scope_dcache_req_byteen,dcache_req_if.byteen);
|
||||||
`SCOPE_ASSIGN (scope_dcache_req_data, dcache_req_if.data);
|
`SCOPE_ASSIGN (scope_dcache_req_data, dcache_req_if.data);
|
||||||
`SCOPE_ASSIGN (scope_dcache_req_tag, dcache_req_if.tag);
|
`SCOPE_ASSIGN (scope_dcache_req_tag, dcache_req_if.tag);
|
||||||
`SCOPE_ASSIGN (scope_dcache_req_ready, dcache_req_if.ready);
|
`SCOPE_ASSIGN (scope_dcache_req_ready, dcache_req_if.ready);
|
||||||
`SCOPE_ASSIGN (scope_dcache_req_warp_num, use_warp_num);
|
`SCOPE_ASSIGN (scope_dcache_req_wid, req_wid);
|
||||||
`SCOPE_ASSIGN (scope_dcache_req_curr_PC, use_pc);
|
`SCOPE_ASSIGN (scope_dcache_req_curr_PC, req_pc);
|
||||||
|
|
||||||
`SCOPE_ASSIGN (scope_dcache_rsp_valid, dcache_rsp_if.valid);
|
`SCOPE_ASSIGN (scope_dcache_rsp_valid, dcache_rsp_if.valid);
|
||||||
`SCOPE_ASSIGN (scope_dcache_rsp_data, dcache_rsp_if.data);
|
`SCOPE_ASSIGN (scope_dcache_rsp_data, dcache_rsp_if.data);
|
||||||
`SCOPE_ASSIGN (scope_dcache_rsp_tag, dcache_rsp_if.tag);
|
`SCOPE_ASSIGN (scope_dcache_rsp_tag, dcache_rsp_if.tag);
|
||||||
`SCOPE_ASSIGN (scope_dcache_rsp_ready, dcache_rsp_if.ready);
|
`SCOPE_ASSIGN (scope_dcache_rsp_ready, dcache_rsp_if.ready);
|
||||||
|
|
||||||
`UNUSED_VAR (mem_rsp_warp_num)
|
`UNUSED_VAR (mem_rsp_wid)
|
||||||
`UNUSED_VAR (mem_rsp_curr_PC)
|
`UNUSED_VAR (mem_rsp_curr_PC)
|
||||||
`UNUSED_VAR (mem_rsp_rd)
|
`UNUSED_VAR (mem_rsp_rd)
|
||||||
`UNUSED_VAR (use_wb)
|
`UNUSED_VAR (req_wb)
|
||||||
|
|
||||||
`ifdef DBG_PRINT_CORE_DCACHE
|
`ifdef DBG_PRINT_CORE_DCACHE
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if ((| dcache_req_if.valid) && dcache_req_if.ready) begin
|
if ((| dcache_req_if.valid) && dcache_req_if.ready) begin
|
||||||
$display("%t: D$%0d req: warp=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, rd=%0d, rw=%0b, byteen=%0h, data=%0h",
|
$display("%t: D$%0d req: wid=%0d, PC=%0h, tmask=%b, addr=%0h, tag=%0h, rd=%0d, rw=%0b, byteen=%0h, data=%0h",
|
||||||
$time, CORE_ID, use_warp_num, use_pc, dcache_req_if.valid, use_address, dcache_req_if.tag, use_rd, dcache_req_if.rw, dcache_req_if.byteen, dcache_req_if.data);
|
$time, CORE_ID, req_wid, req_pc, dcache_req_if.valid, req_address, dcache_req_if.tag, req_rd, dcache_req_if.rw, dcache_req_if.byteen, dcache_req_if.data);
|
||||||
end
|
end
|
||||||
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
if ((| dcache_rsp_if.valid) && dcache_rsp_if.ready) begin
|
||||||
$display("%t: D$%0d rsp: valid=%b, warp=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
|
$display("%t: D$%0d rsp: valid=%b, wid=%0d, PC=%0h, tag=%0h, rd=%0d, data=%0h",
|
||||||
$time, CORE_ID, dcache_rsp_if.valid, mem_rsp_warp_num, mem_rsp_curr_PC, dcache_rsp_if.tag, mem_rsp_rd, dcache_rsp_if.data);
|
$time, CORE_ID, dcache_rsp_if.valid, mem_rsp_wid, mem_rsp_curr_PC, dcache_rsp_if.tag, mem_rsp_rd, dcache_rsp_if.data);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|||||||
@@ -83,15 +83,13 @@ module VX_mem_arb #(
|
|||||||
assign out_mem_req_data = in_mem_req_data [bus_req_sel];
|
assign out_mem_req_data = in_mem_req_data [bus_req_sel];
|
||||||
assign out_mem_req_tag = {in_mem_req_tag [bus_req_sel], REQS_BITS'(bus_req_sel)};
|
assign out_mem_req_tag = {in_mem_req_tag [bus_req_sel], REQS_BITS'(bus_req_sel)};
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
|
||||||
assign in_mem_req_ready[i] = out_mem_req_ready && (bus_req_sel == REQS_BITS'(i));
|
assign in_mem_req_ready[i] = out_mem_req_ready && (bus_req_sel == REQS_BITS'(i));
|
||||||
end
|
end
|
||||||
|
|
||||||
wire [REQS_BITS-1:0] bus_rsp_sel = out_mem_rsp_tag[REQS_BITS-1:0];
|
wire [REQS_BITS-1:0] bus_rsp_sel = out_mem_rsp_tag[REQS_BITS-1:0];
|
||||||
|
|
||||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||||
assign in_mem_rsp_valid[i] = out_mem_rsp_valid && (bus_rsp_sel == REQS_BITS'(i));
|
assign in_mem_rsp_valid[i] = out_mem_rsp_valid && (bus_rsp_sel == REQS_BITS'(i));
|
||||||
assign in_mem_rsp_data[i] = out_mem_rsp_data;
|
assign in_mem_rsp_data[i] = out_mem_rsp_data;
|
||||||
assign in_mem_rsp_tag[i] = out_mem_rsp_tag[REQS_BITS +: TAG_IN_WIDTH];
|
assign in_mem_rsp_tag[i] = out_mem_rsp_tag[REQS_BITS +: TAG_IN_WIDTH];
|
||||||
|
|||||||
@@ -11,52 +11,36 @@ module VX_mul_unit #(
|
|||||||
|
|
||||||
// Outputs
|
// Outputs
|
||||||
VX_exu_to_cmt_if alu_commit_if
|
VX_exu_to_cmt_if alu_commit_if
|
||||||
);
|
);
|
||||||
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
wire [`MUL_BITS-1:0] alu_op = alu_req_if.mul_op;
|
wire [`MUL_BITS-1:0] alu_op;
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_in1 = alu_req_if.rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] alu_in1, alu_in2;
|
||||||
wire [`NUM_THREADS-1:0][31:0] alu_in2 = alu_req_if.rs2_data;
|
wire valid_in, ready_in;
|
||||||
|
|
||||||
|
// use a skid buffer due to MUL/DIV output arbitration adding realtime backpressure
|
||||||
|
VX_elastic_buffer #(
|
||||||
|
.DATAW (`ISTAG_BITS + `MUL_BITS + (2 * `NUM_THREADS * 32)),
|
||||||
|
.SIZE (0)
|
||||||
|
) input_buffer (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.valid_in (alu_req_if.valid),
|
||||||
|
.ready_in (alu_req_if.ready),
|
||||||
|
.data_in ({alu_req_if.issue_tag, alu_req_if.op, alu_req_if.rs1_data, alu_req_if.rs2_data}),
|
||||||
|
.data_out ({issue_tag, alu_op, alu_in1, alu_in2}),
|
||||||
|
.ready_out (ready_in),
|
||||||
|
.valid_out (valid_in)
|
||||||
|
);
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] mul_result, div_result;
|
wire [`NUM_THREADS-1:0][31:0] mul_result;
|
||||||
|
wire is_mulw = (alu_op == `MUL_MUL);
|
||||||
|
wire is_mulw_out;
|
||||||
|
|
||||||
wire stall_mul, stall_div;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
wire is_mul_mul = (alu_op == `MUL_MUL);
|
|
||||||
wire is_mul_mul_out;
|
|
||||||
|
|
||||||
wire is_div_divu = (alu_op == `MUL_DIV || alu_op == `MUL_DIVU);
|
|
||||||
reg [`NUM_THREADS-1:0] is_div_divu_qual;
|
|
||||||
wire [`NUM_THREADS-1:0] is_div_divu_out;
|
|
||||||
|
|
||||||
genvar i;
|
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
|
|
||||||
wire [32:0] mul_in1 = {(alu_op != `MUL_MULHU) & alu_in1[i][31], alu_in1[i]};
|
wire [32:0] mul_in1 = {(alu_op != `MUL_MULHU) & alu_in1[i][31], alu_in1[i]};
|
||||||
wire [32:0] mul_in2 = {(alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU) & alu_in2[i][31], alu_in2[i]};
|
wire [32:0] mul_in2 = {(alu_op != `MUL_MULHU && alu_op != `MUL_MULHSU) & alu_in2[i][31], alu_in2[i]};
|
||||||
|
|
||||||
reg [32:0] div_in1, div_in2;
|
|
||||||
|
|
||||||
// handle divide by zero
|
|
||||||
always @(*) begin
|
|
||||||
is_div_divu_qual[i] = is_div_divu;
|
|
||||||
div_in1 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in1[i][31], alu_in1[i]};
|
|
||||||
div_in2 = {(alu_op == `MUL_DIV || alu_op == `MUL_REM) & alu_in2[i][31], alu_in2[i]};
|
|
||||||
|
|
||||||
if (0 == alu_in2[i]) begin
|
|
||||||
if (is_div_divu) begin
|
|
||||||
div_in1 = {1'b0, 32'hFFFFFFFF}; // quotient = (0xFFFFFFFF / 1)
|
|
||||||
div_in2 = 1;
|
|
||||||
end else begin
|
|
||||||
is_div_divu_qual[i] = 1; // remainder = (in1 / 1)
|
|
||||||
div_in2 = 1;
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
wire [63:0] mul_result_tmp;
|
wire [63:0] mul_result_tmp;
|
||||||
wire [31:0] div_result_tmp;
|
|
||||||
wire [31:0] rem_result_tmp;
|
|
||||||
|
|
||||||
VX_multiplier #(
|
VX_multiplier #(
|
||||||
.WIDTHA(33),
|
.WIDTHA(33),
|
||||||
@@ -67,12 +51,71 @@ module VX_mul_unit #(
|
|||||||
) multiplier (
|
) multiplier (
|
||||||
.clk(clk),
|
.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
.clk_en(~stall_mul),
|
.clk_en(1'b1),
|
||||||
.dataa(mul_in1),
|
.dataa(mul_in1),
|
||||||
.datab(mul_in2),
|
.datab(mul_in2),
|
||||||
.result(mul_result_tmp)
|
.result(mul_result_tmp)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
assign mul_result[i] = is_mulw_out ? mul_result_tmp[31:0] : mul_result_tmp[63:32];
|
||||||
|
end
|
||||||
|
|
||||||
|
wire [`ISTAG_BITS-1:0] mul_issue_tag;
|
||||||
|
wire mul_valid_out;
|
||||||
|
|
||||||
|
wire mul_fire = valid_in && ready_in && ~`IS_DIV_OP(alu_op);
|
||||||
|
|
||||||
|
VX_shift_register #(
|
||||||
|
.DATAW(1 + `ISTAG_BITS + 1),
|
||||||
|
.DEPTH(`LATENCY_IMUL)
|
||||||
|
) mul_shift_reg (
|
||||||
|
.clk(clk),
|
||||||
|
.reset(reset),
|
||||||
|
.enable(1'b1),
|
||||||
|
.in({mul_fire, issue_tag, is_mulw}),
|
||||||
|
.out({mul_valid_out, mul_issue_tag, is_mulw_out})
|
||||||
|
);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] div_result;
|
||||||
|
wire is_div = (alu_op == `MUL_DIV || alu_op == `MUL_DIVU);
|
||||||
|
wire is_signed_div = (alu_op == `MUL_DIV || alu_op == `MUL_REM);
|
||||||
|
reg [`NUM_THREADS-1:0] is_div_qual;
|
||||||
|
wire [`NUM_THREADS-1:0] is_div_out;
|
||||||
|
wire stall_div;
|
||||||
|
|
||||||
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
|
reg [31:0] div_in1_qual, div_in2_qual;
|
||||||
|
reg [32:0] div_in1, div_in2;
|
||||||
|
wire [31:0] div_result_tmp, rem_result_tmp;
|
||||||
|
|
||||||
|
// handle divide by zero
|
||||||
|
always @(*) begin
|
||||||
|
if (~stall_div) begin
|
||||||
|
is_div_qual[i] = is_div;
|
||||||
|
div_in1_qual = alu_in1[i];
|
||||||
|
div_in2_qual = alu_in2[i];
|
||||||
|
if (0 == alu_in2[i]) begin
|
||||||
|
div_in2_qual = 1;
|
||||||
|
if (is_div) begin
|
||||||
|
div_in1_qual = 32'hFFFFFFFF; // quotient = (0xFFFFFFFF / 1)
|
||||||
|
end else begin
|
||||||
|
is_div_qual[i] = 1; // remainder = (in1 / 1)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
// latch divider inputs
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (~stall_div) begin
|
||||||
|
div_in1 <= {is_signed_div & alu_in1[i][31], div_in1_qual};
|
||||||
|
div_in2 <= {is_signed_div & alu_in2[i][31], div_in2_qual};
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
VX_divide #(
|
VX_divide #(
|
||||||
.WIDTHN(33),
|
.WIDTHN(33),
|
||||||
.WIDTHD(33),
|
.WIDTHD(33),
|
||||||
@@ -90,49 +133,32 @@ module VX_mul_unit #(
|
|||||||
.quotient(div_result_tmp),
|
.quotient(div_result_tmp),
|
||||||
.remainder(rem_result_tmp)
|
.remainder(rem_result_tmp)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign mul_result[i] = is_mul_mul_out ? mul_result_tmp[31:0] : mul_result_tmp[63:32];
|
assign div_result[i] = is_div_out[i] ? div_result_tmp : rem_result_tmp;
|
||||||
assign div_result[i] = is_div_divu_out[i] ? div_result_tmp : rem_result_tmp;
|
|
||||||
end
|
end
|
||||||
|
|
||||||
wire is_mul_fire = alu_req_if.valid && alu_req_if.ready && ~`IS_DIV_OP(alu_op);
|
wire [`ISTAG_BITS-1:0] div_issue_tag;
|
||||||
wire is_div_fire = alu_req_if.valid && alu_req_if.ready && `IS_DIV_OP(alu_op);
|
|
||||||
|
|
||||||
wire mul_valid_out;
|
|
||||||
wire div_valid_out;
|
wire div_valid_out;
|
||||||
|
|
||||||
wire [`ISTAG_BITS-1:0] mul_issue_tag;
|
wire div_fire = valid_in && ready_in && `IS_DIV_OP(alu_op);
|
||||||
wire [`ISTAG_BITS-1:0] div_issue_tag;
|
|
||||||
|
|
||||||
VX_shift_register #(
|
|
||||||
.DATAW(1 + `ISTAG_BITS + 1),
|
|
||||||
.DEPTH(`LATENCY_IMUL)
|
|
||||||
) mul_shift_reg (
|
|
||||||
.clk(clk),
|
|
||||||
.reset(reset),
|
|
||||||
.enable(~stall_mul),
|
|
||||||
.in({is_mul_fire, alu_req_if.issue_tag, is_mul_mul}),
|
|
||||||
.out({mul_valid_out, mul_issue_tag, is_mul_mul_out})
|
|
||||||
);
|
|
||||||
|
|
||||||
VX_shift_register #(
|
VX_shift_register #(
|
||||||
.DATAW(1 + `ISTAG_BITS + `NUM_THREADS),
|
.DATAW(1 + `ISTAG_BITS + `NUM_THREADS),
|
||||||
.DEPTH(`LATENCY_IDIV)
|
.DEPTH(`LATENCY_IDIV + 1)
|
||||||
) div_shift_reg (
|
) div_shift_reg (
|
||||||
.clk(clk),
|
.clk(clk),
|
||||||
.reset(reset),
|
.reset(reset),
|
||||||
.enable(~stall_div),
|
.enable(~stall_div),
|
||||||
.in({is_div_fire, alu_req_if.issue_tag, is_div_divu_qual}),
|
.in({div_fire, issue_tag, is_div_qual}),
|
||||||
.out({div_valid_out, div_issue_tag, is_div_divu_out})
|
.out({div_valid_out, div_issue_tag, is_div_out})
|
||||||
);
|
);
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
wire stall_out = (~alu_commit_if.ready && alu_commit_if.valid);
|
assign stall_div = mul_valid_out && div_valid_out; // arbitration prioritizes MUL
|
||||||
assign stall_mul = stall_out;
|
|
||||||
assign stall_div = stall_out
|
|
||||||
|| (mul_valid_out && div_valid_out); // arbitration prioritizes MUL
|
|
||||||
|
|
||||||
// can accept new request?
|
// can accept new request?
|
||||||
assign alu_req_if.ready = ~(stall_mul || stall_div);
|
assign ready_in = ~stall_div;
|
||||||
|
|
||||||
assign alu_commit_if.valid = mul_valid_out || div_valid_out;
|
assign alu_commit_if.valid = mul_valid_out || div_valid_out;
|
||||||
assign alu_commit_if.issue_tag = mul_valid_out ? mul_issue_tag : div_issue_tag;
|
assign alu_commit_if.issue_tag = mul_valid_out ? mul_issue_tag : div_issue_tag;
|
||||||
|
|||||||
@@ -107,6 +107,7 @@ module VX_pipeline #(
|
|||||||
VX_warp_ctl_if warp_ctl_if();
|
VX_warp_ctl_if warp_ctl_if();
|
||||||
VX_ifetch_rsp_if ifetch_rsp_if();
|
VX_ifetch_rsp_if ifetch_rsp_if();
|
||||||
VX_alu_req_if alu_req_if();
|
VX_alu_req_if alu_req_if();
|
||||||
|
VX_bru_req_if bru_req_if();
|
||||||
VX_lsu_req_if lsu_req_if();
|
VX_lsu_req_if lsu_req_if();
|
||||||
VX_csr_req_if csr_req_if();
|
VX_csr_req_if csr_req_if();
|
||||||
VX_mul_req_if mul_req_if();
|
VX_mul_req_if mul_req_if();
|
||||||
@@ -117,6 +118,7 @@ module VX_pipeline #(
|
|||||||
VX_wstall_if wstall_if();
|
VX_wstall_if wstall_if();
|
||||||
VX_join_if join_if();
|
VX_join_if join_if();
|
||||||
VX_exu_to_cmt_if alu_commit_if();
|
VX_exu_to_cmt_if alu_commit_if();
|
||||||
|
VX_exu_to_cmt_if bru_commit_if();
|
||||||
VX_exu_to_cmt_if lsu_commit_if();
|
VX_exu_to_cmt_if lsu_commit_if();
|
||||||
VX_exu_to_cmt_if csr_commit_if();
|
VX_exu_to_cmt_if csr_commit_if();
|
||||||
VX_exu_to_cmt_if mul_commit_if();
|
VX_exu_to_cmt_if mul_commit_if();
|
||||||
@@ -157,9 +159,10 @@ module VX_pipeline #(
|
|||||||
|
|
||||||
.decode_if (decode_if),
|
.decode_if (decode_if),
|
||||||
.writeback_if (writeback_if),
|
.writeback_if (writeback_if),
|
||||||
.cmt_to_issue_if (cmt_to_issue_if),
|
.cmt_to_issue_if(cmt_to_issue_if),
|
||||||
|
|
||||||
.alu_req_if (alu_req_if),
|
.alu_req_if (alu_req_if),
|
||||||
|
.bru_req_if (bru_req_if),
|
||||||
.lsu_req_if (lsu_req_if),
|
.lsu_req_if (lsu_req_if),
|
||||||
.csr_req_if (csr_req_if),
|
.csr_req_if (csr_req_if),
|
||||||
.mul_req_if (mul_req_if),
|
.mul_req_if (mul_req_if),
|
||||||
@@ -183,6 +186,7 @@ module VX_pipeline #(
|
|||||||
.cmt_to_csr_if (cmt_to_csr_if),
|
.cmt_to_csr_if (cmt_to_csr_if),
|
||||||
|
|
||||||
.alu_req_if (alu_req_if),
|
.alu_req_if (alu_req_if),
|
||||||
|
.bru_req_if (bru_req_if),
|
||||||
.lsu_req_if (lsu_req_if),
|
.lsu_req_if (lsu_req_if),
|
||||||
.csr_req_if (csr_req_if),
|
.csr_req_if (csr_req_if),
|
||||||
.mul_req_if (mul_req_if),
|
.mul_req_if (mul_req_if),
|
||||||
@@ -192,6 +196,7 @@ module VX_pipeline #(
|
|||||||
.warp_ctl_if (warp_ctl_if),
|
.warp_ctl_if (warp_ctl_if),
|
||||||
.branch_ctl_if (branch_ctl_if),
|
.branch_ctl_if (branch_ctl_if),
|
||||||
.alu_commit_if (alu_commit_if),
|
.alu_commit_if (alu_commit_if),
|
||||||
|
.bru_commit_if (bru_commit_if),
|
||||||
.lsu_commit_if (lsu_commit_if),
|
.lsu_commit_if (lsu_commit_if),
|
||||||
.csr_commit_if (csr_commit_if),
|
.csr_commit_if (csr_commit_if),
|
||||||
.mul_commit_if (mul_commit_if),
|
.mul_commit_if (mul_commit_if),
|
||||||
@@ -208,6 +213,7 @@ module VX_pipeline #(
|
|||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
||||||
.alu_commit_if (alu_commit_if),
|
.alu_commit_if (alu_commit_if),
|
||||||
|
.bru_commit_if (bru_commit_if),
|
||||||
.lsu_commit_if (lsu_commit_if),
|
.lsu_commit_if (lsu_commit_if),
|
||||||
.csr_commit_if (csr_commit_if),
|
.csr_commit_if (csr_commit_if),
|
||||||
.mul_commit_if (mul_commit_if),
|
.mul_commit_if (mul_commit_if),
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
`ifndef VX_PLATFORM
|
`ifndef VX_PLATFORM
|
||||||
`define VX_PLATFORM
|
`define VX_PLATFORM
|
||||||
|
|
||||||
|
`include "VX_scope.vh"
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`ifndef NDEBUG
|
`ifndef NDEBUG
|
||||||
@@ -50,6 +52,7 @@
|
|||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
`define USE_FAST_BRAM (* syn_ramstyle = "mlab" *)
|
`define USE_FAST_BRAM (* syn_ramstyle = "mlab" *)
|
||||||
|
`define RELAX_BRAM_RW (* syn_ramstyle = "no_rw_check" *)
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ task print_ex_op;
|
|||||||
input [`EX_BITS-1:0] ex;
|
input [`EX_BITS-1:0] ex;
|
||||||
input [`OP_BITS-1:0] op;
|
input [`OP_BITS-1:0] op;
|
||||||
begin
|
begin
|
||||||
case (ex)
|
case (ex)
|
||||||
`EX_ALU: begin
|
`EX_ALU: begin
|
||||||
case (`ALU_BITS'(op))
|
case (`ALU_BITS'(op))
|
||||||
`ALU_ADD: $write("ADD");
|
`ALU_ADD: $write("ADD");
|
||||||
@@ -37,22 +37,27 @@ task print_ex_op;
|
|||||||
`ALU_AND: $write("AND");
|
`ALU_AND: $write("AND");
|
||||||
`ALU_LUI: $write("LUI");
|
`ALU_LUI: $write("LUI");
|
||||||
`ALU_AUIPC: $write("AUIPC");
|
`ALU_AUIPC: $write("AUIPC");
|
||||||
`ALU_BEQ: $write("BEQ");
|
|
||||||
`ALU_BNE: $write("BNE");
|
|
||||||
`ALU_BLT: $write("BLT");
|
|
||||||
`ALU_BGE: $write("BGE");
|
|
||||||
`ALU_BLTU: $write("BLTU");
|
|
||||||
`ALU_BGEU: $write("BGEU");
|
|
||||||
`ALU_JAL: $write("JAL");
|
|
||||||
`ALU_JALR: $write("JALR");
|
|
||||||
`ALU_ECALL: $write("ECALL");
|
|
||||||
`ALU_EBREAK:$write("EBREAK");
|
|
||||||
`ALU_MRET: $write("MRET");
|
|
||||||
`ALU_SRET: $write("SRET");
|
|
||||||
`ALU_DRET: $write("DRET");
|
|
||||||
default: $write("?");
|
default: $write("?");
|
||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
`EX_BRU: begin
|
||||||
|
case (`BRU_BITS'(op))
|
||||||
|
`BRU_EQ: $write("BEQ");
|
||||||
|
`BRU_NE: $write("BNE");
|
||||||
|
`BRU_LT: $write("BLT");
|
||||||
|
`BRU_GE: $write("BGE");
|
||||||
|
`BRU_LTU: $write("BLTU");
|
||||||
|
`BRU_GEU: $write("BGEU");
|
||||||
|
`BRU_JAL: $write("JAL");
|
||||||
|
`BRU_JALR: $write("JALR");
|
||||||
|
`BRU_ECALL: $write("ECALL");
|
||||||
|
`BRU_EBREAK:$write("EBREAK");
|
||||||
|
`BRU_MRET: $write("MRET");
|
||||||
|
`BRU_SRET: $write("SRET");
|
||||||
|
`BRU_DRET: $write("DRET");
|
||||||
|
default: $write("?");
|
||||||
|
endcase
|
||||||
|
end
|
||||||
`EX_LSU: begin
|
`EX_LSU: begin
|
||||||
case (`LSU_BITS'(op))
|
case (`LSU_BITS'(op))
|
||||||
`LSU_LB: $write("LB");
|
`LSU_LB: $write("LB");
|
||||||
|
|||||||
@@ -1,82 +0,0 @@
|
|||||||
`include "VX_define.vh"
|
|
||||||
|
|
||||||
module VX_scheduler #(
|
|
||||||
parameter CORE_ID = 0
|
|
||||||
) (
|
|
||||||
input wire clk,
|
|
||||||
input wire reset,
|
|
||||||
|
|
||||||
VX_decode_if decode_if,
|
|
||||||
VX_wb_if writeback_if,
|
|
||||||
VX_cmt_to_issue_if cmt_to_issue_if,
|
|
||||||
input wire ex_busy,
|
|
||||||
output wire [`ISTAG_BITS-1:0] issue_tag,
|
|
||||||
output wire schedule_delay
|
|
||||||
);
|
|
||||||
localparam CTVW = `CLOG2(`NUM_WARPS * `NUM_REGS + 1);
|
|
||||||
reg [`NUM_THREADS-1:0] inuse_registers [(`NUM_WARPS * `NUM_REGS)-1:0];
|
|
||||||
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
|
||||||
|
|
||||||
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[decode_if.warp_num] & decode_if.reg_use_mask;
|
|
||||||
wire inuse_hazard = (inuse_mask != 0);
|
|
||||||
|
|
||||||
wire issue_buf_full;
|
|
||||||
|
|
||||||
assign schedule_delay = ex_busy || inuse_hazard || issue_buf_full;
|
|
||||||
|
|
||||||
wire issue_fire = decode_if.valid && decode_if.ready;
|
|
||||||
|
|
||||||
wire writeback_fire = writeback_if.valid && writeback_if.ready;
|
|
||||||
|
|
||||||
wire acquire_rd = issue_fire && (decode_if.wb != 0);
|
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] inuse_registers_n = inuse_registers[{writeback_if.warp_num, writeback_if.rd}] & ~writeback_if.thread_mask;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (reset) begin
|
|
||||||
for (integer w = 0; w < `NUM_WARPS; w++) begin
|
|
||||||
for (integer i = 0; i < `NUM_REGS; i++) begin
|
|
||||||
inuse_registers[w * `NUM_REGS + i] <= 0;
|
|
||||||
end
|
|
||||||
inuse_reg_mask[w] <= `NUM_REGS'(0);
|
|
||||||
end
|
|
||||||
end else begin
|
|
||||||
if (acquire_rd) begin
|
|
||||||
inuse_registers[{decode_if.warp_num, decode_if.rd}] <= decode_if.thread_mask;
|
|
||||||
inuse_reg_mask[decode_if.warp_num][decode_if.rd] <= 1;
|
|
||||||
end
|
|
||||||
if (writeback_fire) begin
|
|
||||||
assert(inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] != 0);
|
|
||||||
inuse_registers[{writeback_if.warp_num, writeback_if.rd}] <= inuse_registers_n;
|
|
||||||
inuse_reg_mask[writeback_if.warp_num][writeback_if.rd] <= (| inuse_registers_n);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
VX_cam_buffer #(
|
|
||||||
.DATAW ($bits(issue_data_t)),
|
|
||||||
.SIZE (`ISSUEQ_SIZE),
|
|
||||||
.RPORTS (`NUM_EXS)
|
|
||||||
) issue_buffer (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.write_data ({decode_if.warp_num, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.wb}),
|
|
||||||
.write_addr (issue_tag),
|
|
||||||
.acquire_slot (issue_fire),
|
|
||||||
.release_slot ({cmt_to_issue_if.alu_valid, cmt_to_issue_if.lsu_valid, cmt_to_issue_if.csr_valid, cmt_to_issue_if.mul_valid, cmt_to_issue_if.fpu_valid, cmt_to_issue_if.gpu_valid}),
|
|
||||||
.read_addr ({cmt_to_issue_if.alu_tag, cmt_to_issue_if.lsu_tag, cmt_to_issue_if.csr_tag, cmt_to_issue_if.mul_tag, cmt_to_issue_if.fpu_tag, cmt_to_issue_if.gpu_tag}),
|
|
||||||
.read_data ({cmt_to_issue_if.alu_data, cmt_to_issue_if.lsu_data, cmt_to_issue_if.csr_data, cmt_to_issue_if.mul_data, cmt_to_issue_if.fpu_data, cmt_to_issue_if.gpu_data}),
|
|
||||||
.full (issue_buf_full)
|
|
||||||
);
|
|
||||||
|
|
||||||
`ifdef DBG_PRINT_PIPELINE
|
|
||||||
always @(posedge clk) begin
|
|
||||||
if (decode_if.valid && ~decode_if.ready) begin
|
|
||||||
$display("%t: Core%0d-stall: warp=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, ex_busy=%b",
|
|
||||||
$time, CORE_ID, decode_if.warp_num, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full,
|
|
||||||
inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1], inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], ex_busy);
|
|
||||||
end
|
|
||||||
end
|
|
||||||
`endif
|
|
||||||
|
|
||||||
endmodule
|
|
||||||
@@ -15,12 +15,12 @@
|
|||||||
scope_snp_req_invalidate, \
|
scope_snp_req_invalidate, \
|
||||||
scope_snp_req_tag, \
|
scope_snp_req_tag, \
|
||||||
scope_snp_rsp_tag, \
|
scope_snp_rsp_tag, \
|
||||||
scope_icache_req_warp_num, \
|
scope_icache_req_wid, \
|
||||||
scope_icache_req_addr, \
|
scope_icache_req_addr, \
|
||||||
scope_icache_req_tag, \
|
scope_icache_req_tag, \
|
||||||
scope_icache_rsp_data, \
|
scope_icache_rsp_data, \
|
||||||
scope_icache_rsp_tag, \
|
scope_icache_rsp_tag, \
|
||||||
scope_dcache_req_warp_num, \
|
scope_dcache_req_wid, \
|
||||||
scope_dcache_req_curr_PC, \
|
scope_dcache_req_curr_PC, \
|
||||||
scope_dcache_req_addr, \
|
scope_dcache_req_addr, \
|
||||||
scope_dcache_req_rw, \
|
scope_dcache_req_rw, \
|
||||||
@@ -29,17 +29,17 @@
|
|||||||
scope_dcache_req_tag, \
|
scope_dcache_req_tag, \
|
||||||
scope_dcache_rsp_data, \
|
scope_dcache_rsp_data, \
|
||||||
scope_dcache_rsp_tag, \
|
scope_dcache_rsp_tag, \
|
||||||
scope_decode_warp_num, \
|
scope_decode_wid, \
|
||||||
scope_decode_curr_PC, \
|
scope_decode_curr_PC, \
|
||||||
scope_decode_is_jal, \
|
scope_decode_is_jal, \
|
||||||
scope_decode_rs1, \
|
scope_decode_rs1, \
|
||||||
scope_decode_rs2, \
|
scope_decode_rs2, \
|
||||||
scope_execute_warp_num, \
|
scope_execute_wid, \
|
||||||
scope_execute_curr_PC, \
|
scope_execute_curr_PC, \
|
||||||
scope_execute_rd, \
|
scope_execute_rd, \
|
||||||
scope_execute_a, \
|
scope_execute_a, \
|
||||||
scope_execute_b, \
|
scope_execute_b, \
|
||||||
scope_writeback_warp_num, \
|
scope_writeback_wid, \
|
||||||
scope_writeback_curr_PC, \
|
scope_writeback_curr_PC, \
|
||||||
scope_writeback_wb, \
|
scope_writeback_wb, \
|
||||||
scope_writeback_rd, \
|
scope_writeback_rd, \
|
||||||
@@ -103,7 +103,7 @@
|
|||||||
wire scope_snp_rsp_valid; \
|
wire scope_snp_rsp_valid; \
|
||||||
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \
|
wire [`VX_SNP_TAG_WIDTH-1:0] scope_snp_rsp_tag; \
|
||||||
wire scope_icache_req_valid; \
|
wire scope_icache_req_valid; \
|
||||||
wire [`NW_BITS-1:0] scope_icache_req_warp_num; \
|
wire [`NW_BITS-1:0] scope_icache_req_wid; \
|
||||||
wire [31:0] scope_icache_req_addr; \
|
wire [31:0] scope_icache_req_addr; \
|
||||||
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \
|
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag; \
|
||||||
wire scope_icache_req_ready; \
|
wire scope_icache_req_ready; \
|
||||||
@@ -112,7 +112,7 @@
|
|||||||
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \
|
wire [`ICORE_TAG_WIDTH-1:0] scope_icache_rsp_tag; \
|
||||||
wire scope_icache_rsp_ready; \
|
wire scope_icache_rsp_ready; \
|
||||||
wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \
|
wire [`NUM_THREADS-1:0] scope_dcache_req_valid; \
|
||||||
wire [`NW_BITS-1:0] scope_dcache_req_warp_num; \
|
wire [`NW_BITS-1:0] scope_dcache_req_wid; \
|
||||||
wire [31:0] scope_dcache_req_curr_PC; \
|
wire [31:0] scope_dcache_req_curr_PC; \
|
||||||
wire [63:0] scope_dcache_req_addr; \
|
wire [63:0] scope_dcache_req_addr; \
|
||||||
wire scope_dcache_req_rw; \
|
wire scope_dcache_req_rw; \
|
||||||
@@ -131,19 +131,19 @@
|
|||||||
wire scope_exec_delay; \
|
wire scope_exec_delay; \
|
||||||
wire scope_gpr_stage_delay; \
|
wire scope_gpr_stage_delay; \
|
||||||
wire [`NUM_THREADS-1:0] scope_decode_valid; \
|
wire [`NUM_THREADS-1:0] scope_decode_valid; \
|
||||||
wire [`NW_BITS-1:0] scope_decode_warp_num; \
|
wire [`NW_BITS-1:0] scope_decode_wid; \
|
||||||
wire [31:0] scope_decode_curr_PC; \
|
wire [31:0] scope_decode_curr_PC; \
|
||||||
wire scope_decode_is_jal; \
|
wire scope_decode_is_jal; \
|
||||||
wire [`NR_BITS-1:0] scope_decode_rs1; \
|
wire [`NR_BITS-1:0] scope_decode_rs1; \
|
||||||
wire [`NR_BITS-1:0] scope_decode_rs2; \
|
wire [`NR_BITS-1:0] scope_decode_rs2; \
|
||||||
wire [`NUM_THREADS-1:0] scope_execute_valid; \
|
wire [`NUM_THREADS-1:0] scope_execute_valid; \
|
||||||
wire [`NW_BITS-1:0] scope_execute_warp_num; \
|
wire [`NW_BITS-1:0] scope_execute_wid; \
|
||||||
wire [31:0] scope_execute_curr_PC; \
|
wire [31:0] scope_execute_curr_PC; \
|
||||||
wire [`NR_BITS-1:0] scope_execute_rd; \
|
wire [`NR_BITS-1:0] scope_execute_rd; \
|
||||||
wire [63:0] scope_execute_a; \
|
wire [63:0] scope_execute_a; \
|
||||||
wire [63:0] scope_execute_b; \
|
wire [63:0] scope_execute_b; \
|
||||||
wire [`NUM_THREADS-1:0] scope_writeback_valid; \
|
wire [`NUM_THREADS-1:0] scope_writeback_valid; \
|
||||||
wire [`NW_BITS-1:0] scope_writeback_warp_num; \
|
wire [`NW_BITS-1:0] scope_writeback_wid; \
|
||||||
wire [31:0] scope_writeback_curr_PC; \
|
wire [31:0] scope_writeback_curr_PC; \
|
||||||
wire scope_writeback_wb; \
|
wire scope_writeback_wb; \
|
||||||
wire [`NR_BITS-1:0] scope_writeback_rd; \
|
wire [`NR_BITS-1:0] scope_writeback_rd; \
|
||||||
@@ -162,7 +162,7 @@
|
|||||||
|
|
||||||
`define SCOPE_SIGNALS_ISTAGE_IO \
|
`define SCOPE_SIGNALS_ISTAGE_IO \
|
||||||
output wire scope_icache_req_valid, \
|
output wire scope_icache_req_valid, \
|
||||||
output wire [`NW_BITS-1:0] scope_icache_req_warp_num, \
|
output wire [`NW_BITS-1:0] scope_icache_req_wid, \
|
||||||
output wire [31:0] scope_icache_req_addr, \
|
output wire [31:0] scope_icache_req_addr, \
|
||||||
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \
|
output wire [`ICORE_TAG_WIDTH-1:0] scope_icache_req_tag, \
|
||||||
output wire scope_icache_req_ready, \
|
output wire scope_icache_req_ready, \
|
||||||
@@ -173,7 +173,7 @@
|
|||||||
|
|
||||||
`define SCOPE_SIGNALS_LSU_IO \
|
`define SCOPE_SIGNALS_LSU_IO \
|
||||||
output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \
|
output wire [`NUM_THREADS-1:0] scope_dcache_req_valid, \
|
||||||
output wire [`NW_BITS-1:0] scope_dcache_req_warp_num, \
|
output wire [`NW_BITS-1:0] scope_dcache_req_wid, \
|
||||||
output wire [31:0] scope_dcache_req_curr_PC, \
|
output wire [31:0] scope_dcache_req_curr_PC, \
|
||||||
output wire [63:0] scope_dcache_req_addr, \
|
output wire [63:0] scope_dcache_req_addr, \
|
||||||
output wire scope_dcache_req_rw, \
|
output wire scope_dcache_req_rw, \
|
||||||
@@ -210,19 +210,19 @@
|
|||||||
|
|
||||||
`define SCOPE_SIGNALS_BE_IO \
|
`define SCOPE_SIGNALS_BE_IO \
|
||||||
output wire [`NUM_THREADS-1:0] scope_decode_valid, \
|
output wire [`NUM_THREADS-1:0] scope_decode_valid, \
|
||||||
output wire [`NW_BITS-1:0] scope_decode_warp_num, \
|
output wire [`NW_BITS-1:0] scope_decode_wid, \
|
||||||
output wire [31:0] scope_decode_curr_PC, \
|
output wire [31:0] scope_decode_curr_PC, \
|
||||||
output wire scope_decode_is_jal, \
|
output wire scope_decode_is_jal, \
|
||||||
output wire [`NR_BITS-1:0] scope_decode_rs1, \
|
output wire [`NR_BITS-1:0] scope_decode_rs1, \
|
||||||
output wire [`NR_BITS-1:0] scope_decode_rs2, \
|
output wire [`NR_BITS-1:0] scope_decode_rs2, \
|
||||||
output wire [`NUM_THREADS-1:0] scope_execute_valid, \
|
output wire [`NUM_THREADS-1:0] scope_execute_valid, \
|
||||||
output wire [`NW_BITS-1:0] scope_execute_warp_num, \
|
output wire [`NW_BITS-1:0] scope_execute_wid, \
|
||||||
output wire [31:0] scope_execute_curr_PC, \
|
output wire [31:0] scope_execute_curr_PC, \
|
||||||
output wire [`NR_BITS-1:0] scope_execute_rd, \
|
output wire [`NR_BITS-1:0] scope_execute_rd, \
|
||||||
output wire [63:0] scope_execute_a, \
|
output wire [63:0] scope_execute_a, \
|
||||||
output wire [63:0] scope_execute_b, \
|
output wire [63:0] scope_execute_b, \
|
||||||
output wire [`NUM_THREADS-1:0] scope_writeback_valid, \
|
output wire [`NUM_THREADS-1:0] scope_writeback_valid, \
|
||||||
output wire [`NW_BITS-1:0] scope_writeback_warp_num, \
|
output wire [`NW_BITS-1:0] scope_writeback_wid, \
|
||||||
output wire [31:0] scope_writeback_curr_PC, \
|
output wire [31:0] scope_writeback_curr_PC, \
|
||||||
output wire scope_writeback_wb, \
|
output wire scope_writeback_wb, \
|
||||||
output wire [`NR_BITS-1:0] scope_writeback_rd, \
|
output wire [`NR_BITS-1:0] scope_writeback_rd, \
|
||||||
@@ -230,7 +230,7 @@
|
|||||||
|
|
||||||
`define SCOPE_SIGNALS_ISTAGE_BIND \
|
`define SCOPE_SIGNALS_ISTAGE_BIND \
|
||||||
.scope_icache_req_valid (scope_icache_req_valid), \
|
.scope_icache_req_valid (scope_icache_req_valid), \
|
||||||
.scope_icache_req_warp_num (scope_icache_req_warp_num), \
|
.scope_icache_req_wid (scope_icache_req_wid), \
|
||||||
.scope_icache_req_addr (scope_icache_req_addr), \
|
.scope_icache_req_addr (scope_icache_req_addr), \
|
||||||
.scope_icache_req_tag (scope_icache_req_tag), \
|
.scope_icache_req_tag (scope_icache_req_tag), \
|
||||||
.scope_icache_req_ready (scope_icache_req_ready), \
|
.scope_icache_req_ready (scope_icache_req_ready), \
|
||||||
@@ -241,7 +241,7 @@
|
|||||||
|
|
||||||
`define SCOPE_SIGNALS_LSU_BIND \
|
`define SCOPE_SIGNALS_LSU_BIND \
|
||||||
.scope_dcache_req_valid (scope_dcache_req_valid), \
|
.scope_dcache_req_valid (scope_dcache_req_valid), \
|
||||||
.scope_dcache_req_warp_num (scope_dcache_req_warp_num), \
|
.scope_dcache_req_wid (scope_dcache_req_wid), \
|
||||||
.scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \
|
.scope_dcache_req_curr_PC (scope_dcache_req_curr_PC), \
|
||||||
.scope_dcache_req_addr (scope_dcache_req_addr), \
|
.scope_dcache_req_addr (scope_dcache_req_addr), \
|
||||||
.scope_dcache_req_rw (scope_dcache_req_rw), \
|
.scope_dcache_req_rw (scope_dcache_req_rw), \
|
||||||
@@ -332,19 +332,19 @@
|
|||||||
|
|
||||||
`define SCOPE_SIGNALS_BE_BIND \
|
`define SCOPE_SIGNALS_BE_BIND \
|
||||||
.scope_decode_valid (scope_decode_valid), \
|
.scope_decode_valid (scope_decode_valid), \
|
||||||
.scope_decode_warp_num (scope_decode_warp_num), \
|
.scope_decode_wid (scope_decode_wid), \
|
||||||
.scope_decode_curr_PC (scope_decode_curr_PC), \
|
.scope_decode_curr_PC (scope_decode_curr_PC), \
|
||||||
.scope_decode_is_jal (scope_decode_is_jal), \
|
.scope_decode_is_jal (scope_decode_is_jal), \
|
||||||
.scope_decode_rs1 (scope_decode_rs1), \
|
.scope_decode_rs1 (scope_decode_rs1), \
|
||||||
.scope_decode_rs2 (scope_decode_rs2), \
|
.scope_decode_rs2 (scope_decode_rs2), \
|
||||||
.scope_execute_valid (scope_execute_valid), \
|
.scope_execute_valid (scope_execute_valid), \
|
||||||
.scope_execute_warp_num (scope_execute_warp_num), \
|
.scope_execute_wid (scope_execute_wid), \
|
||||||
.scope_execute_curr_PC (scope_execute_curr_PC), \
|
.scope_execute_curr_PC (scope_execute_curr_PC), \
|
||||||
.scope_execute_rd (scope_execute_rd), \
|
.scope_execute_rd (scope_execute_rd), \
|
||||||
.scope_execute_a (scope_execute_a), \
|
.scope_execute_a (scope_execute_a), \
|
||||||
.scope_execute_b (scope_execute_b), \
|
.scope_execute_b (scope_execute_b), \
|
||||||
.scope_writeback_valid (scope_writeback_valid), \
|
.scope_writeback_valid (scope_writeback_valid), \
|
||||||
.scope_writeback_warp_num (scope_writeback_warp_num), \
|
.scope_writeback_wid (scope_writeback_wid), \
|
||||||
.scope_writeback_curr_PC(scope_writeback_curr_PC), \
|
.scope_writeback_curr_PC(scope_writeback_curr_PC), \
|
||||||
.scope_writeback_wb (scope_writeback_wb), \
|
.scope_writeback_wb (scope_writeback_wb), \
|
||||||
.scope_writeback_rd (scope_writeback_rd), \
|
.scope_writeback_rd (scope_writeback_rd), \
|
||||||
|
|||||||
73
hw/rtl/VX_scoreboard.v
Normal file
73
hw/rtl/VX_scoreboard.v
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
module VX_scoreboard #(
|
||||||
|
parameter CORE_ID = 0
|
||||||
|
) (
|
||||||
|
input wire clk,
|
||||||
|
input wire reset,
|
||||||
|
|
||||||
|
VX_decode_if decode_if,
|
||||||
|
VX_wb_if writeback_if,
|
||||||
|
VX_cmt_to_issue_if cmt_to_issue_if,
|
||||||
|
input wire ex_busy,
|
||||||
|
output wire [`ISTAG_BITS-1:0] issue_tag,
|
||||||
|
output wire schedule_delay
|
||||||
|
);
|
||||||
|
reg [`NUM_REGS-1:0] inuse_reg_mask [`NUM_WARPS-1:0];
|
||||||
|
|
||||||
|
wire [`NUM_REGS-1:0] inuse_mask = inuse_reg_mask[decode_if.wid] & decode_if.reg_use_mask;
|
||||||
|
wire inuse_hazard = (inuse_mask != 0);
|
||||||
|
|
||||||
|
wire issue_buf_full;
|
||||||
|
|
||||||
|
assign schedule_delay = ex_busy || inuse_hazard || issue_buf_full;
|
||||||
|
|
||||||
|
wire issue_fire = decode_if.valid && decode_if.ready;
|
||||||
|
|
||||||
|
wire reserve_rd = issue_fire && (decode_if.wb != 0);
|
||||||
|
|
||||||
|
wire release_rd = writeback_if.valid;
|
||||||
|
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (reset) begin
|
||||||
|
for (integer i = 0; i < `NUM_WARPS; i++) begin
|
||||||
|
inuse_reg_mask[i] <= `NUM_REGS'(0);
|
||||||
|
end
|
||||||
|
end else begin
|
||||||
|
if (reserve_rd) begin
|
||||||
|
inuse_reg_mask[decode_if.wid][decode_if.rd] <= 1;
|
||||||
|
end
|
||||||
|
if (release_rd) begin
|
||||||
|
assert(inuse_reg_mask[writeback_if.wid][writeback_if.rd] != 0);
|
||||||
|
inuse_reg_mask[writeback_if.wid][writeback_if.rd] <= 0;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
VX_cam_buffer #(
|
||||||
|
.DATAW ($bits(issue_data_t)),
|
||||||
|
.SIZE (`ISSUEQ_SIZE),
|
||||||
|
.RPORTS (`NUM_EXS)
|
||||||
|
) issue_table (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.write_data ({decode_if.wid, decode_if.thread_mask, decode_if.curr_PC, decode_if.rd, decode_if.wb}),
|
||||||
|
.write_addr (issue_tag),
|
||||||
|
.acquire_slot (issue_fire),
|
||||||
|
.release_slot ({cmt_to_issue_if.alu_valid, cmt_to_issue_if.bru_valid, cmt_to_issue_if.lsu_valid, cmt_to_issue_if.csr_valid, cmt_to_issue_if.mul_valid, cmt_to_issue_if.fpu_valid, cmt_to_issue_if.gpu_valid}),
|
||||||
|
.read_addr ({cmt_to_issue_if.alu_tag, cmt_to_issue_if.bru_tag, cmt_to_issue_if.lsu_tag, cmt_to_issue_if.csr_tag, cmt_to_issue_if.mul_tag, cmt_to_issue_if.fpu_tag, cmt_to_issue_if.gpu_tag}),
|
||||||
|
.read_data ({cmt_to_issue_if.alu_data, cmt_to_issue_if.bru_data, cmt_to_issue_if.lsu_data, cmt_to_issue_if.csr_data, cmt_to_issue_if.mul_data, cmt_to_issue_if.fpu_data, cmt_to_issue_if.gpu_data}),
|
||||||
|
.full (issue_buf_full)
|
||||||
|
);
|
||||||
|
|
||||||
|
`ifdef DBG_PRINT_PIPELINE
|
||||||
|
always @(posedge clk) begin
|
||||||
|
if (decode_if.valid && ~decode_if.ready) begin
|
||||||
|
$display("%t: Core%0d-stall: wid=%0d, PC=%0h, rd=%0d, wb=%0d, ib_full=%b, inuse=%b%b%b%b, ex_busy=%b",
|
||||||
|
$time, CORE_ID, decode_if.wid, decode_if.curr_PC, decode_if.rd, decode_if.wb, issue_buf_full,
|
||||||
|
inuse_mask[decode_if.rd], inuse_mask[decode_if.rs1], inuse_mask[decode_if.rs2], inuse_mask[decode_if.rs3], ex_busy);
|
||||||
|
end
|
||||||
|
end
|
||||||
|
`endif
|
||||||
|
|
||||||
|
endmodule
|
||||||
59
hw/rtl/VX_types.vh
Normal file
59
hw/rtl/VX_types.vh
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
`ifndef VX_TYPES
|
||||||
|
`define VX_TYPES
|
||||||
|
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
typedef struct packed {
|
||||||
|
logic [`NW_BITS-1:0] wid;
|
||||||
|
logic [`NUM_THREADS-1:0] thread_mask;
|
||||||
|
logic [31:0] curr_PC;
|
||||||
|
logic [`NR_BITS-1:0] rd;
|
||||||
|
logic wb;
|
||||||
|
} issue_data_t;
|
||||||
|
|
||||||
|
typedef struct packed {
|
||||||
|
logic is_normal;
|
||||||
|
logic is_zero;
|
||||||
|
logic is_subnormal;
|
||||||
|
logic is_inf;
|
||||||
|
logic is_nan;
|
||||||
|
logic is_signaling;
|
||||||
|
logic is_quiet;
|
||||||
|
} fp_type_t;
|
||||||
|
|
||||||
|
typedef struct packed {
|
||||||
|
logic NV; // Invalid
|
||||||
|
logic DZ; // Divide by zero
|
||||||
|
logic OF; // Overflow
|
||||||
|
logic UF; // Underflow
|
||||||
|
logic NX; // Inexact
|
||||||
|
} fflags_t;
|
||||||
|
|
||||||
|
`define FFG_BITS $bits(fflags_t)
|
||||||
|
|
||||||
|
typedef struct packed {
|
||||||
|
logic valid;
|
||||||
|
logic [`NUM_THREADS-1:0] thread_mask;
|
||||||
|
} gpu_tmc_t;
|
||||||
|
|
||||||
|
typedef struct packed {
|
||||||
|
logic valid;
|
||||||
|
logic [`NUM_WARPS-1:0] wmask;
|
||||||
|
logic [31:0] pc;
|
||||||
|
} gpu_wspawn_t;
|
||||||
|
|
||||||
|
typedef struct packed {
|
||||||
|
logic valid;
|
||||||
|
logic diverged;
|
||||||
|
logic [`NUM_THREADS-1:0] then_mask;
|
||||||
|
logic [`NUM_THREADS-1:0] else_mask;
|
||||||
|
logic [31:0] pc;
|
||||||
|
} gpu_split_t;
|
||||||
|
|
||||||
|
typedef struct packed {
|
||||||
|
logic valid;
|
||||||
|
logic [`NB_BITS-1:0] id;
|
||||||
|
logic [`NW_BITS:0] num_warps;
|
||||||
|
} gpu_barrier_t;
|
||||||
|
|
||||||
|
`endif
|
||||||
@@ -16,64 +16,44 @@ module VX_warp_sched #(
|
|||||||
|
|
||||||
output wire busy
|
output wire busy
|
||||||
);
|
);
|
||||||
wire update_use_wspawn;
|
|
||||||
wire update_visible_active;
|
|
||||||
|
|
||||||
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];
|
|
||||||
|
|
||||||
wire join_fall;
|
wire join_fall;
|
||||||
wire [31:0] join_pc;
|
wire [31:0] join_pc;
|
||||||
wire [`NUM_THREADS-1:0] join_tm;
|
wire [`NUM_THREADS-1:0] join_tm;
|
||||||
|
|
||||||
reg [`NUM_WARPS-1:0] warp_active;
|
reg [`NUM_WARPS-1:0] warp_active;
|
||||||
reg [`NUM_WARPS-1:0] warp_stalled;
|
reg [`NUM_WARPS-1:0] warp_stalled;
|
||||||
|
reg [`NUM_WARPS-1:0] visible_active;
|
||||||
reg [`NUM_WARPS-1:0] visible_active;
|
wire update_visible_active;
|
||||||
wire [`NUM_WARPS-1:0] use_active;
|
|
||||||
|
|
||||||
reg [`NUM_WARPS-1:0] warp_lock;
|
reg [`NUM_WARPS-1:0] warp_lock;
|
||||||
|
|
||||||
wire wstall_this_cycle;
|
|
||||||
|
|
||||||
reg [`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0];
|
reg [`NUM_THREADS-1:0] thread_masks[`NUM_WARPS-1:0];
|
||||||
reg [31:0] warp_pcs[`NUM_WARPS-1:0];
|
reg [31:0] warp_pcs[`NUM_WARPS-1:0];
|
||||||
|
|
||||||
// barriers
|
// barriers
|
||||||
reg [`NUM_WARPS-1:0] barrier_stall_mask[`NUM_BARRIERS-1:0];
|
reg [`NUM_WARPS-1:0] barrier_stall_mask[`NUM_BARRIERS-1:0];
|
||||||
wire [`NUM_WARPS-1:0] b_mask;
|
|
||||||
wire [`NW_BITS:0] b_count;
|
|
||||||
|
|
||||||
wire reached_barrier_limit;
|
wire reached_barrier_limit;
|
||||||
|
reg [`NUM_WARPS-1:0] total_barrier_stall;
|
||||||
|
|
||||||
// wspawn
|
// wspawn
|
||||||
reg [31:0] use_wspawn_pc;
|
reg [31:0] use_wspawn_pc;
|
||||||
reg [`NUM_WARPS-1:0] use_wspawn;
|
reg [`NUM_WARPS-1:0] use_wspawn;
|
||||||
|
|
||||||
wire [`NW_BITS-1:0] warp_to_schedule;
|
|
||||||
wire schedule;
|
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0] thread_mask;
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
|
||||||
wire [31:0] warp_pc;
|
wire [31:0] warp_pc;
|
||||||
|
wire [`NW_BITS-1:0] warp_to_schedule;
|
||||||
wire scheduled_warp;
|
wire scheduled_warp;
|
||||||
|
|
||||||
wire hazard;
|
wire stall_out;
|
||||||
wire global_stall;
|
wire global_stall;
|
||||||
|
wire real_schedule;
|
||||||
|
|
||||||
wire real_schedule;
|
reg didnt_split;
|
||||||
|
|
||||||
wire [31:0] new_pc;
|
|
||||||
|
|
||||||
reg [`NUM_WARPS-1:0] total_barrier_stall;
|
|
||||||
|
|
||||||
reg didnt_split;
|
|
||||||
|
|
||||||
wire stall;
|
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
integer i;
|
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
for (i = 0; i < `NUM_BARRIERS; i++) begin
|
for (integer i = 0; i < `NUM_BARRIERS; i++) begin
|
||||||
barrier_stall_mask[i] <= 0;
|
barrier_stall_mask[i] <= 0;
|
||||||
end
|
end
|
||||||
|
|
||||||
@@ -87,92 +67,92 @@ module VX_warp_sched #(
|
|||||||
didnt_split <= 0;
|
didnt_split <= 0;
|
||||||
warp_lock <= 0;
|
warp_lock <= 0;
|
||||||
|
|
||||||
for (i = 1; i < `NUM_WARPS; i++) begin
|
for (integer i = 1; i < `NUM_WARPS; i++) begin
|
||||||
warp_pcs[i] <= 0;
|
warp_pcs[i] <= 0;
|
||||||
warp_active[i] <= 0; // Activating first warp
|
warp_active[i] <= 0; // Activating first warp
|
||||||
visible_active[i] <= 0; // Activating first warp
|
visible_active[i] <= 0; // Activating first warp
|
||||||
thread_masks[i] <= 1; // Activating first thread in first warp
|
thread_masks[i] <= 1; // Activating first thread in first warp
|
||||||
end
|
end
|
||||||
|
end else begin
|
||||||
end else begin
|
if (warp_ctl_if.wspawn.valid) begin
|
||||||
|
warp_active <= warp_ctl_if.wspawn.wmask;
|
||||||
if (warp_ctl_if.wspawn) begin
|
use_wspawn <= warp_ctl_if.wspawn.wmask & (~`NUM_WARPS'(1));
|
||||||
warp_active <= warp_ctl_if.wspawn_wmask;
|
use_wspawn_pc <= warp_ctl_if.wspawn.pc;
|
||||||
use_wspawn <= warp_ctl_if.wspawn_wmask & (~`NUM_WARPS'(1));
|
|
||||||
use_wspawn_pc <= warp_ctl_if.wspawn_pc;
|
|
||||||
end
|
end
|
||||||
|
|
||||||
if (warp_ctl_if.is_barrier) begin
|
if (warp_ctl_if.barrier.valid) begin
|
||||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
warp_stalled[warp_ctl_if.wid] <= 0;
|
||||||
if (reached_barrier_limit) begin
|
if (reached_barrier_limit) begin
|
||||||
barrier_stall_mask[warp_ctl_if.barrier_id] <= 0;
|
barrier_stall_mask[warp_ctl_if.barrier.id] <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
barrier_stall_mask[warp_ctl_if.barrier_id][warp_ctl_if.warp_num] <= 1;
|
barrier_stall_mask[warp_ctl_if.barrier.id][warp_ctl_if.wid] <= 1;
|
||||||
end
|
end
|
||||||
end else if (warp_ctl_if.change_mask) begin
|
end else if (warp_ctl_if.tmc.valid) begin
|
||||||
thread_masks[warp_ctl_if.warp_num] <= warp_ctl_if.thread_mask;
|
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.tmc.thread_mask;
|
||||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
warp_stalled[warp_ctl_if.wid] <= 0;
|
||||||
if (0 == warp_ctl_if.thread_mask) begin
|
if (0 == warp_ctl_if.tmc.thread_mask) begin
|
||||||
warp_active[warp_ctl_if.warp_num] <= 0;
|
warp_active[warp_ctl_if.wid] <= 0;
|
||||||
visible_active[warp_ctl_if.warp_num] <= 0;
|
visible_active[warp_ctl_if.wid] <= 0;
|
||||||
end
|
end
|
||||||
end else if (join_if.is_join && !didnt_split) begin
|
end else if (join_if.is_join && !didnt_split) begin
|
||||||
if (!join_fall) begin
|
if (!join_fall) begin
|
||||||
warp_pcs[join_if.warp_num] <= join_pc;
|
warp_pcs[join_if.wid] <= join_pc;
|
||||||
end
|
end
|
||||||
thread_masks[join_if.warp_num] <= join_tm;
|
thread_masks[join_if.wid] <= join_tm;
|
||||||
didnt_split <= 0;
|
didnt_split <= 0;
|
||||||
end else if (warp_ctl_if.is_split) begin
|
end else if (warp_ctl_if.split.valid) begin
|
||||||
warp_stalled[warp_ctl_if.warp_num] <= 0;
|
warp_stalled[warp_ctl_if.wid] <= 0;
|
||||||
if (warp_ctl_if.do_split) begin
|
if (warp_ctl_if.split.diverged) begin
|
||||||
thread_masks[warp_ctl_if.warp_num] <= warp_ctl_if.split_new_mask;
|
thread_masks[warp_ctl_if.wid] <= warp_ctl_if.split.then_mask;
|
||||||
didnt_split <= 0;
|
didnt_split <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
didnt_split <= 1;
|
didnt_split <= 1;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
if (update_use_wspawn) begin
|
if (use_wspawn[warp_to_schedule] && !global_stall) begin
|
||||||
use_wspawn[warp_to_schedule] <= 0;
|
use_wspawn[warp_to_schedule] <= 0;
|
||||||
thread_masks[warp_to_schedule] <= 1;
|
thread_masks[warp_to_schedule] <= 1;
|
||||||
end
|
end
|
||||||
|
|
||||||
// Stalling the scheduling of warps
|
// Stalling the scheduling of warps
|
||||||
if (wstall_if.wstall) begin
|
if (wstall_if.wstall) begin
|
||||||
warp_stalled[wstall_if.warp_num] <= 1;
|
warp_stalled[wstall_if.wid] <= 1;
|
||||||
visible_active[wstall_if.warp_num] <= 0;
|
visible_active[wstall_if.wid] <= 0;
|
||||||
end
|
end
|
||||||
|
|
||||||
// Refilling active warps
|
// Refilling active warps
|
||||||
if (update_visible_active) begin
|
if (update_visible_active) begin
|
||||||
visible_active <= warp_active & (~warp_stalled) & (~total_barrier_stall) & ~warp_lock;
|
visible_active <= warp_active & ~warp_stalled & ~total_barrier_stall & ~warp_lock;
|
||||||
end
|
end
|
||||||
|
|
||||||
// Don't change state if stall
|
// Don't change state if stall
|
||||||
if (!global_stall && real_schedule && (thread_mask != 0)) begin
|
if (!global_stall && real_schedule && (thread_mask != 0)) begin
|
||||||
visible_active[warp_to_schedule] <= 0;
|
visible_active[warp_to_schedule] <= 0;
|
||||||
warp_pcs[warp_to_schedule] <= new_pc;
|
warp_pcs[warp_to_schedule] <= warp_pc + 4;
|
||||||
end
|
end
|
||||||
|
|
||||||
// Branch
|
// Branch
|
||||||
if (branch_ctl_if.valid) begin
|
if (branch_ctl_if.valid) begin
|
||||||
if (branch_ctl_if.taken) begin
|
if (branch_ctl_if.taken) begin
|
||||||
warp_pcs[branch_ctl_if.warp_num] <= branch_ctl_if.dest;
|
warp_pcs[branch_ctl_if.wid] <= branch_ctl_if.dest;
|
||||||
end
|
end
|
||||||
warp_stalled[branch_ctl_if.warp_num] <= 0;
|
warp_stalled[branch_ctl_if.wid] <= 0;
|
||||||
end
|
end
|
||||||
|
|
||||||
// Lock/Release
|
// Lock/Release
|
||||||
if (scheduled_warp && !stall) begin
|
if (scheduled_warp && !stall_out) begin
|
||||||
warp_lock[warp_num] <= 1;
|
warp_lock[warp_to_schedule] <= 1;
|
||||||
end
|
end
|
||||||
if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin
|
if (ifetch_rsp_if.valid && ifetch_rsp_if.ready) begin
|
||||||
warp_lock[ifetch_rsp_if.warp_num] <= 0;
|
warp_lock[ifetch_rsp_if.wid] <= 0;
|
||||||
end
|
end
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
wire [`NUM_WARPS-1:0] b_mask = barrier_stall_mask[warp_ctl_if.barrier.id][`NUM_WARPS-1:0];
|
||||||
|
wire [`NW_BITS:0] b_count;
|
||||||
|
|
||||||
VX_countones #(
|
VX_countones #(
|
||||||
.N(`NUM_WARPS)
|
.N(`NUM_WARPS)
|
||||||
) barrier_count (
|
) barrier_count (
|
||||||
@@ -188,26 +168,24 @@ module VX_warp_sched #(
|
|||||||
.valids(visible_active),
|
.valids(visible_active),
|
||||||
.count (count_visible_active)
|
.count (count_visible_active)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign b_mask = barrier_stall_mask[warp_ctl_if.barrier_id][`NUM_WARPS-1:0];
|
|
||||||
|
|
||||||
assign reached_barrier_limit = (b_count == warp_ctl_if.barrier_num_warps);
|
assign reached_barrier_limit = (b_count == warp_ctl_if.barrier.num_warps);
|
||||||
|
|
||||||
assign wstall_this_cycle = wstall_if.wstall && (wstall_if.warp_num == warp_to_schedule); // Maybe bug
|
|
||||||
|
|
||||||
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
|
assign total_barrier_stall = barrier_stall_mask[0] | barrier_stall_mask[1] | barrier_stall_mask[2] | barrier_stall_mask[3];
|
||||||
|
|
||||||
assign update_visible_active = (0 == count_visible_active) && !(stall || wstall_this_cycle || hazard || join_if.is_join);
|
wire [(1+32+`NUM_THREADS-1):0] ipdom[`NUM_WARPS-1:0];
|
||||||
|
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.wid]};
|
||||||
|
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, warp_ctl_if.split.pc, warp_ctl_if.split.else_mask};
|
||||||
|
|
||||||
wire [(1+32+`NUM_THREADS-1):0] q1 = {1'b1, 32'b0, thread_masks[warp_ctl_if.warp_num]};
|
assign {join_fall, join_pc, join_tm} = ipdom[join_if.wid];
|
||||||
wire [(1+32+`NUM_THREADS-1):0] q2 = {1'b0, warp_ctl_if.split_save_pc, warp_ctl_if.split_later_mask};
|
|
||||||
|
|
||||||
assign {join_fall, join_pc, join_tm} = ipdom[join_if.warp_num];
|
for (genvar i = 0; i < `NUM_WARPS; i++) begin
|
||||||
|
wire push = warp_ctl_if.split.valid
|
||||||
|
&& warp_ctl_if.split.diverged
|
||||||
|
&& (i == warp_ctl_if.wid);
|
||||||
|
|
||||||
genvar i;
|
wire pop = join_if.is_join
|
||||||
for (i = 0; i < `NUM_WARPS; i++) begin
|
&& (i == join_if.wid);
|
||||||
wire push = warp_ctl_if.is_split && warp_ctl_if.do_split && (i == warp_ctl_if.warp_num);
|
|
||||||
wire pop = join_if.is_join && (i == join_if.warp_num);
|
|
||||||
|
|
||||||
VX_ipdom_stack #(
|
VX_ipdom_stack #(
|
||||||
.WIDTH(1+32+`NUM_THREADS),
|
.WIDTH(1+32+`NUM_THREADS),
|
||||||
@@ -217,37 +195,40 @@ module VX_warp_sched #(
|
|||||||
.reset(reset),
|
.reset(reset),
|
||||||
.push (push),
|
.push (push),
|
||||||
.pop (pop),
|
.pop (pop),
|
||||||
.d (ipdom[i]),
|
|
||||||
.q1 (q1),
|
.q1 (q1),
|
||||||
.q2 (q2),
|
.q2 (q2),
|
||||||
|
.d (ipdom[i]),
|
||||||
`UNUSED_PIN (empty),
|
`UNUSED_PIN (empty),
|
||||||
`UNUSED_PIN (full)
|
`UNUSED_PIN (full)
|
||||||
);
|
);
|
||||||
end
|
end
|
||||||
|
|
||||||
wire should_bra = (branch_ctl_if.valid && branch_ctl_if.taken && (warp_to_schedule == branch_ctl_if.warp_num));
|
wire schedule;
|
||||||
|
|
||||||
assign hazard = should_bra && schedule;
|
wire branch_hazard = schedule
|
||||||
|
&& branch_ctl_if.valid
|
||||||
|
&& branch_ctl_if.taken
|
||||||
|
&& (branch_ctl_if.wid == warp_to_schedule);
|
||||||
|
|
||||||
assign real_schedule = schedule && !warp_stalled[warp_to_schedule] && !total_barrier_stall[warp_to_schedule] && !warp_lock[0];
|
assign real_schedule = schedule
|
||||||
|
&& !warp_stalled[warp_to_schedule]
|
||||||
|
&& !total_barrier_stall[warp_to_schedule]
|
||||||
|
&& !warp_lock[0];
|
||||||
|
|
||||||
assign global_stall = stall || wstall_this_cycle || hazard || !real_schedule || join_if.is_join;
|
wire wstall_this_cycle = wstall_if.wstall && (wstall_if.wid == warp_to_schedule); // Maybe bug
|
||||||
|
|
||||||
assign scheduled_warp = !(wstall_this_cycle || hazard || !real_schedule || join_if.is_join) && !reset;
|
assign update_visible_active = (0 == count_visible_active) && !(stall_out || wstall_this_cycle || branch_hazard || join_if.is_join);
|
||||||
|
|
||||||
wire real_use_wspawn = use_wspawn[warp_to_schedule];
|
assign global_stall = stall_out || wstall_this_cycle || branch_hazard || !real_schedule || join_if.is_join;
|
||||||
|
|
||||||
assign warp_pc = real_use_wspawn ? use_wspawn_pc : warp_pcs[warp_to_schedule];
|
assign scheduled_warp = !(wstall_this_cycle || branch_hazard || !real_schedule || join_if.is_join) && !reset;
|
||||||
|
|
||||||
|
assign warp_pc = use_wspawn[warp_to_schedule] ? use_wspawn_pc : warp_pcs[warp_to_schedule];
|
||||||
|
|
||||||
assign thread_mask = global_stall ? 0 : (real_use_wspawn ? `NUM_THREADS'(1) : thread_masks[warp_to_schedule]);
|
assign thread_mask = global_stall ? 0 : (use_wspawn[warp_to_schedule] ? `NUM_THREADS'(1) : thread_masks[warp_to_schedule]);
|
||||||
|
|
||||||
assign warp_num = warp_to_schedule;
|
wire [`NUM_WARPS-1:0] use_active = (count_visible_active != 0) ? visible_active :
|
||||||
|
(warp_active & ~warp_stalled & ~total_barrier_stall & ~warp_lock);
|
||||||
assign update_use_wspawn = use_wspawn[warp_to_schedule] && !global_stall;
|
|
||||||
|
|
||||||
assign new_pc = warp_pc + 4;
|
|
||||||
|
|
||||||
assign use_active = (count_visible_active != 0) ? visible_active : (warp_active & (~warp_stalled) & (~total_barrier_stall) & (~warp_lock));
|
|
||||||
|
|
||||||
// Choosing a warp to schedule
|
// Choosing a warp to schedule
|
||||||
VX_fixed_arbiter #(
|
VX_fixed_arbiter #(
|
||||||
@@ -261,17 +242,17 @@ module VX_warp_sched #(
|
|||||||
`UNUSED_PIN (grant_onehot)
|
`UNUSED_PIN (grant_onehot)
|
||||||
);
|
);
|
||||||
|
|
||||||
assign stall = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
assign stall_out = ~ifetch_req_if.ready && ifetch_req_if.valid;
|
||||||
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
|
.N(1 + `NUM_THREADS + 32 + `NW_BITS)
|
||||||
) fetch_reg (
|
) fetch_reg (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
.stall (stall),
|
.stall (stall_out),
|
||||||
.flush (0),
|
.flush (0),
|
||||||
.in ({(| thread_mask), thread_mask, warp_pc, warp_num}),
|
.in ({(| thread_mask), thread_mask, warp_pc, warp_to_schedule}),
|
||||||
.out ({ifetch_req_if.valid, ifetch_req_if.thread_mask, ifetch_req_if.curr_PC, ifetch_req_if.warp_num})
|
.out ({ifetch_req_if.valid, ifetch_req_if.thread_mask, ifetch_req_if.curr_PC, ifetch_req_if.wid})
|
||||||
);
|
);
|
||||||
|
|
||||||
assign busy = (warp_active != 0);
|
assign busy = (warp_active != 0);
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ module VX_writeback #(
|
|||||||
|
|
||||||
// inputs
|
// inputs
|
||||||
VX_exu_to_cmt_if alu_commit_if,
|
VX_exu_to_cmt_if alu_commit_if,
|
||||||
|
VX_exu_to_cmt_if bru_commit_if,
|
||||||
VX_exu_to_cmt_if lsu_commit_if,
|
VX_exu_to_cmt_if lsu_commit_if,
|
||||||
VX_exu_to_cmt_if csr_commit_if,
|
VX_exu_to_cmt_if csr_commit_if,
|
||||||
VX_exu_to_cmt_if mul_commit_if,
|
VX_exu_to_cmt_if mul_commit_if,
|
||||||
@@ -20,26 +21,24 @@ module VX_writeback #(
|
|||||||
);
|
);
|
||||||
reg [`ISSUEQ_SIZE-1:0] wb_valid_table, wb_valid_table_n;
|
reg [`ISSUEQ_SIZE-1:0] wb_valid_table, wb_valid_table_n;
|
||||||
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0][31:0] wb_data_table, wb_data_table_n;
|
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0][31:0] wb_data_table, wb_data_table_n;
|
||||||
reg [`ISSUEQ_SIZE-1:0][`NW_BITS-1:0] wb_warp_num_table, wb_warp_num_table_n;
|
reg [`ISSUEQ_SIZE-1:0][`NW_BITS-1:0] wb_wid_table, wb_wid_table_n;
|
||||||
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0] wb_thread_mask_table, wb_thread_mask_table_n;
|
reg [`ISSUEQ_SIZE-1:0][`NUM_THREADS-1:0] wb_thread_mask_table, wb_thread_mask_table_n;
|
||||||
reg [`ISSUEQ_SIZE-1:0][31:0] wb_curr_PC_table, wb_curr_PC_table_n;
|
reg [`ISSUEQ_SIZE-1:0][31:0] wb_curr_PC_table, wb_curr_PC_table_n;
|
||||||
reg [`ISSUEQ_SIZE-1:0][`NR_BITS-1:0] wb_rd_table, wb_rd_table_n;
|
reg [`ISSUEQ_SIZE-1:0][`NR_BITS-1:0] wb_rd_table, wb_rd_table_n;
|
||||||
|
|
||||||
|
reg wb_valid, wb_valid_n;
|
||||||
reg [`NUM_THREADS-1:0][31:0] wb_data, wb_data_n;
|
reg [`NUM_THREADS-1:0][31:0] wb_data, wb_data_n;
|
||||||
reg [`NW_BITS-1:0] wb_warp_num, wb_warp_num_n;
|
reg [`NW_BITS-1:0] wb_wid, wb_wid_n;
|
||||||
reg [`NUM_THREADS-1:0] wb_thread_mask, wb_thread_mask_n;
|
reg [`NUM_THREADS-1:0] wb_thread_mask, wb_thread_mask_n;
|
||||||
reg [31:0] wb_curr_PC, wb_curr_PC_n;
|
reg [31:0] wb_curr_PC, wb_curr_PC_n;
|
||||||
reg [`NR_BITS-1:0] wb_rd, wb_rd_n;
|
reg [`NR_BITS-1:0] wb_rd, wb_rd_n;
|
||||||
|
|
||||||
reg [`ISTAG_BITS-1:0] wb_index;
|
reg [`ISTAG_BITS-1:0] wb_index;
|
||||||
reg [`ISTAG_BITS-1:0] wb_index_n;
|
reg [`ISTAG_BITS-1:0] wb_index_n;
|
||||||
|
|
||||||
reg wb_valid;
|
|
||||||
reg wb_valid_n;
|
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
wb_valid_table_n = wb_valid_table;
|
wb_valid_table_n = wb_valid_table;
|
||||||
wb_warp_num_table_n = wb_warp_num_table;
|
wb_wid_table_n = wb_wid_table;
|
||||||
wb_thread_mask_table_n = wb_thread_mask_table;
|
wb_thread_mask_table_n = wb_thread_mask_table;
|
||||||
wb_curr_PC_table_n = wb_curr_PC_table;
|
wb_curr_PC_table_n = wb_curr_PC_table;
|
||||||
wb_rd_table_n = wb_rd_table;
|
wb_rd_table_n = wb_rd_table;
|
||||||
@@ -53,16 +52,25 @@ module VX_writeback #(
|
|||||||
wb_valid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wb;
|
wb_valid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wb;
|
||||||
wb_thread_mask_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.thread_mask;
|
wb_thread_mask_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.thread_mask;
|
||||||
wb_data_table_n [alu_commit_if.issue_tag] = alu_commit_if.data;
|
wb_data_table_n [alu_commit_if.issue_tag] = alu_commit_if.data;
|
||||||
wb_warp_num_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.warp_num;
|
wb_wid_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.wid;
|
||||||
wb_curr_PC_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.curr_PC;
|
wb_curr_PC_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.curr_PC;
|
||||||
wb_rd_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.rd;
|
wb_rd_table_n [alu_commit_if.issue_tag] = cmt_to_issue_if.alu_data.rd;
|
||||||
end
|
end
|
||||||
|
|
||||||
|
if (bru_commit_if.valid) begin
|
||||||
|
wb_valid_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.wb;
|
||||||
|
wb_thread_mask_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.thread_mask;
|
||||||
|
wb_data_table_n [bru_commit_if.issue_tag] = bru_commit_if.data;
|
||||||
|
wb_wid_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.wid;
|
||||||
|
wb_curr_PC_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.curr_PC;
|
||||||
|
wb_rd_table_n [bru_commit_if.issue_tag] = cmt_to_issue_if.bru_data.rd;
|
||||||
|
end
|
||||||
|
|
||||||
if (lsu_commit_if.valid) begin
|
if (lsu_commit_if.valid) begin
|
||||||
wb_valid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wb;
|
wb_valid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wb;
|
||||||
wb_thread_mask_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.thread_mask;
|
wb_thread_mask_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.thread_mask;
|
||||||
wb_data_table_n [lsu_commit_if.issue_tag] = lsu_commit_if.data;
|
wb_data_table_n [lsu_commit_if.issue_tag] = lsu_commit_if.data;
|
||||||
wb_warp_num_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.warp_num;
|
wb_wid_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.wid;
|
||||||
wb_curr_PC_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.curr_PC;
|
wb_curr_PC_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.curr_PC;
|
||||||
wb_rd_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.rd;
|
wb_rd_table_n [lsu_commit_if.issue_tag] = cmt_to_issue_if.lsu_data.rd;
|
||||||
end
|
end
|
||||||
@@ -71,7 +79,7 @@ module VX_writeback #(
|
|||||||
wb_valid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wb;
|
wb_valid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wb;
|
||||||
wb_thread_mask_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.thread_mask;
|
wb_thread_mask_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.thread_mask;
|
||||||
wb_data_table_n [csr_commit_if.issue_tag] = csr_commit_if.data;
|
wb_data_table_n [csr_commit_if.issue_tag] = csr_commit_if.data;
|
||||||
wb_warp_num_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.warp_num;
|
wb_wid_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.wid;
|
||||||
wb_curr_PC_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.curr_PC;
|
wb_curr_PC_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.curr_PC;
|
||||||
wb_rd_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.rd;
|
wb_rd_table_n [csr_commit_if.issue_tag] = cmt_to_issue_if.csr_data.rd;
|
||||||
end
|
end
|
||||||
@@ -80,7 +88,7 @@ module VX_writeback #(
|
|||||||
wb_valid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wb;
|
wb_valid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wb;
|
||||||
wb_thread_mask_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.thread_mask;
|
wb_thread_mask_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.thread_mask;
|
||||||
wb_data_table_n [mul_commit_if.issue_tag] = mul_commit_if.data;
|
wb_data_table_n [mul_commit_if.issue_tag] = mul_commit_if.data;
|
||||||
wb_warp_num_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.warp_num;
|
wb_wid_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.wid;
|
||||||
wb_curr_PC_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.curr_PC;
|
wb_curr_PC_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.curr_PC;
|
||||||
wb_rd_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.rd;
|
wb_rd_table_n [mul_commit_if.issue_tag] = cmt_to_issue_if.mul_data.rd;
|
||||||
end
|
end
|
||||||
@@ -89,7 +97,7 @@ module VX_writeback #(
|
|||||||
wb_valid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wb;
|
wb_valid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wb;
|
||||||
wb_thread_mask_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.thread_mask;
|
wb_thread_mask_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.thread_mask;
|
||||||
wb_data_table_n [fpu_commit_if.issue_tag] = fpu_commit_if.data;
|
wb_data_table_n [fpu_commit_if.issue_tag] = fpu_commit_if.data;
|
||||||
wb_warp_num_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.warp_num;
|
wb_wid_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.wid;
|
||||||
wb_curr_PC_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.curr_PC;
|
wb_curr_PC_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.curr_PC;
|
||||||
wb_rd_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.rd;
|
wb_rd_table_n [fpu_commit_if.issue_tag] = cmt_to_issue_if.fpu_data.rd;
|
||||||
end
|
end
|
||||||
@@ -98,23 +106,25 @@ module VX_writeback #(
|
|||||||
wb_valid_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.wb;
|
wb_valid_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.wb;
|
||||||
wb_thread_mask_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.thread_mask;
|
wb_thread_mask_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.thread_mask;
|
||||||
wb_data_table_n [gpu_commit_if.issue_tag] = gpu_commit_if.data;
|
wb_data_table_n [gpu_commit_if.issue_tag] = gpu_commit_if.data;
|
||||||
wb_warp_num_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.warp_num;
|
wb_wid_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.wid;
|
||||||
wb_curr_PC_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.curr_PC;
|
wb_curr_PC_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.curr_PC;
|
||||||
wb_rd_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.rd;
|
wb_rd_table_n [gpu_commit_if.issue_tag] = cmt_to_issue_if.gpu_data.rd;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
integer i;
|
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
wb_index_n = 0;
|
wb_index_n = 0;
|
||||||
wb_valid_n = 0;
|
wb_valid_n = 0;
|
||||||
for (i = `ISSUEQ_SIZE-1; i >= 0; i--) begin
|
wb_thread_mask_n = {`NUM_THREADS{1'bx}};
|
||||||
|
wb_wid_n = {`NW_BITS{1'bx}};
|
||||||
|
wb_curr_PC_n = {32{1'bx}};
|
||||||
|
wb_data_n = {(`NUM_THREADS * 32){1'bx}};
|
||||||
|
for (integer i = `ISSUEQ_SIZE-1; i >= 0; i--) begin
|
||||||
if (wb_valid_table_n[i]) begin
|
if (wb_valid_table_n[i]) begin
|
||||||
wb_index_n = `ISTAG_BITS'(i);
|
wb_index_n = `ISTAG_BITS'(i);
|
||||||
wb_valid_n = 1;
|
wb_valid_n = 1;
|
||||||
wb_thread_mask_n= wb_thread_mask_table_n[i];
|
wb_thread_mask_n= wb_thread_mask_table_n[i];
|
||||||
wb_warp_num_n = wb_warp_num_table_n[i];
|
wb_wid_n = wb_wid_table_n[i];
|
||||||
wb_curr_PC_n = wb_curr_PC_table_n[i];
|
wb_curr_PC_n = wb_curr_PC_table_n[i];
|
||||||
wb_rd_n = wb_rd_table_n[i];
|
wb_rd_n = wb_rd_table_n[i];
|
||||||
wb_data_n = wb_data_table_n[i];
|
wb_data_n = wb_data_table_n[i];
|
||||||
@@ -130,15 +140,15 @@ module VX_writeback #(
|
|||||||
end else begin
|
end else begin
|
||||||
wb_valid_table <= wb_valid_table_n;
|
wb_valid_table <= wb_valid_table_n;
|
||||||
wb_thread_mask_table <= wb_thread_mask_table_n;
|
wb_thread_mask_table <= wb_thread_mask_table_n;
|
||||||
wb_warp_num_table <= wb_warp_num_table_n;
|
wb_wid_table <= wb_wid_table_n;
|
||||||
wb_curr_PC_table <= wb_curr_PC_table_n;
|
wb_curr_PC_table <= wb_curr_PC_table_n;
|
||||||
wb_rd_table <= wb_rd_table_n;
|
wb_rd_table <= wb_rd_table_n;
|
||||||
wb_data_table <= wb_data_table_n;
|
wb_data_table <= wb_data_table_n;
|
||||||
|
|
||||||
wb_index <= wb_index_n;
|
wb_index <= wb_index_n;
|
||||||
wb_valid <= wb_valid_n && writeback_if.ready;
|
wb_valid <= wb_valid_n;
|
||||||
wb_thread_mask <= wb_thread_mask_n;
|
wb_thread_mask <= wb_thread_mask_n;
|
||||||
wb_warp_num <= wb_warp_num_n;
|
wb_wid <= wb_wid_n;
|
||||||
wb_curr_PC <= wb_curr_PC_n;
|
wb_curr_PC <= wb_curr_PC_n;
|
||||||
wb_rd <= wb_rd_n;
|
wb_rd <= wb_rd_n;
|
||||||
wb_data <= wb_data_n;
|
wb_data <= wb_data_n;
|
||||||
@@ -148,18 +158,10 @@ module VX_writeback #(
|
|||||||
// writeback request
|
// writeback request
|
||||||
assign writeback_if.valid = wb_valid;
|
assign writeback_if.valid = wb_valid;
|
||||||
assign writeback_if.thread_mask = wb_thread_mask;
|
assign writeback_if.thread_mask = wb_thread_mask;
|
||||||
assign writeback_if.warp_num = wb_warp_num;
|
assign writeback_if.wid = wb_wid;
|
||||||
assign writeback_if.curr_PC = wb_curr_PC;
|
assign writeback_if.curr_PC = wb_curr_PC;
|
||||||
assign writeback_if.rd = wb_rd;
|
assign writeback_if.rd = wb_rd;
|
||||||
assign writeback_if.data = wb_data;
|
assign writeback_if.data = wb_data;
|
||||||
|
|
||||||
// commit back-pressure
|
|
||||||
assign alu_commit_if.ready = 1'b1;
|
|
||||||
assign lsu_commit_if.ready = 1'b1;
|
|
||||||
assign csr_commit_if.ready = 1'b1;
|
|
||||||
assign mul_commit_if.ready = 1'b1;
|
|
||||||
assign fpu_commit_if.ready = 1'b1;
|
|
||||||
assign gpu_commit_if.ready = 1'b1;
|
|
||||||
|
|
||||||
// special workaround to get RISC-V tests Pass/Fail status
|
// special workaround to get RISC-V tests Pass/Fail status
|
||||||
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
reg [31:0] last_wb_value [`NUM_REGS-1:0] /* verilator public */;
|
||||||
|
|||||||
@@ -191,8 +191,7 @@ module Vortex (
|
|||||||
wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS));
|
wire [`CLOG2(`NUM_CLUSTERS)-1:0] csr_io_request_id = `CLOG2(`NUM_CLUSTERS)'(csr_io_req_coreid >> `CLOG2(`NUM_CLUSTERS));
|
||||||
wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid);
|
wire [`NC_BITS-1:0] per_cluster_csr_io_req_coreid = `NC_BITS'(csr_io_req_coreid);
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_CLUSTERS; i++) begin
|
||||||
for (i = 0; i < `NUM_CLUSTERS; i++) begin
|
|
||||||
VX_cluster #(
|
VX_cluster #(
|
||||||
.CLUSTER_ID(i)
|
.CLUSTER_ID(i)
|
||||||
) cluster (
|
) cluster (
|
||||||
@@ -358,7 +357,7 @@ module Vortex (
|
|||||||
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag;
|
wire [`NUM_CLUSTERS-1:0][`L2SNP_TAG_WIDTH-1:0] l3_snp_fwdin_tag;
|
||||||
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready;
|
wire [`NUM_CLUSTERS-1:0] l3_snp_fwdin_ready;
|
||||||
|
|
||||||
for (i = 0; i < `L3NUM_REQUESTS; i++) begin
|
for (genvar i = 0; i < `L3NUM_REQUESTS; i++) begin
|
||||||
// Core Request
|
// Core Request
|
||||||
assign l3_core_req_valid [i] = per_cluster_dram_req_valid [i];
|
assign l3_core_req_valid [i] = per_cluster_dram_req_valid [i];
|
||||||
assign l3_core_req_rw [i] = per_cluster_dram_req_rw [i];
|
assign l3_core_req_rw [i] = per_cluster_dram_req_rw [i];
|
||||||
|
|||||||
36
hw/rtl/cache/VX_bank.v
vendored
36
hw/rtl/cache/VX_bank.v
vendored
@@ -108,7 +108,7 @@ module VX_bank #(
|
|||||||
wire[31:0] debug_pc_st0;
|
wire[31:0] debug_pc_st0;
|
||||||
wire debug_wb_st0;
|
wire debug_wb_st0;
|
||||||
wire[`NR_BITS-1:0] debug_rd_st0;
|
wire[`NR_BITS-1:0] debug_rd_st0;
|
||||||
wire[`NW_BITS-1:0] debug_warp_num_st0;
|
wire[`NW_BITS-1:0] debug_wid_st0;
|
||||||
wire debug_rw_st0;
|
wire debug_rw_st0;
|
||||||
wire[WORD_SIZE-1:0] debug_byteen_st0;
|
wire[WORD_SIZE-1:0] debug_byteen_st0;
|
||||||
wire[`REQS_BITS-1:0] debug_tid_st0;
|
wire[`REQS_BITS-1:0] debug_tid_st0;
|
||||||
@@ -117,7 +117,7 @@ module VX_bank #(
|
|||||||
wire[31:0] debug_pc_st1e;
|
wire[31:0] debug_pc_st1e;
|
||||||
wire debug_wb_st1e;
|
wire debug_wb_st1e;
|
||||||
wire[`NR_BITS-1:0] debug_rd_st1e;
|
wire[`NR_BITS-1:0] debug_rd_st1e;
|
||||||
wire[`NW_BITS-1:0] debug_warp_num_st1e;
|
wire[`NW_BITS-1:0] debug_wid_st1e;
|
||||||
wire debug_rw_st1e;
|
wire debug_rw_st1e;
|
||||||
wire[WORD_SIZE-1:0] debug_byteen_st1e;
|
wire[WORD_SIZE-1:0] debug_byteen_st1e;
|
||||||
wire[`REQS_BITS-1:0] debug_tid_st1e;
|
wire[`REQS_BITS-1:0] debug_tid_st1e;
|
||||||
@@ -126,7 +126,7 @@ module VX_bank #(
|
|||||||
wire[31:0] debug_pc_st2;
|
wire[31:0] debug_pc_st2;
|
||||||
wire debug_wb_st2;
|
wire debug_wb_st2;
|
||||||
wire[`NR_BITS-1:0] debug_rd_st2;
|
wire[`NR_BITS-1:0] debug_rd_st2;
|
||||||
wire[`NW_BITS-1:0] debug_warp_num_st2;
|
wire[`NW_BITS-1:0] debug_wid_st2;
|
||||||
wire debug_rw_st2;
|
wire debug_rw_st2;
|
||||||
wire[WORD_SIZE-1:0] debug_byteen_st2;
|
wire[WORD_SIZE-1:0] debug_byteen_st2;
|
||||||
wire[`REQS_BITS-1:0] debug_tid_st2;
|
wire[`REQS_BITS-1:0] debug_tid_st2;
|
||||||
@@ -271,10 +271,9 @@ module VX_bank #(
|
|||||||
wire going_to_write_st1 [STAGE_1_CYCLES-1:0];
|
wire going_to_write_st1 [STAGE_1_CYCLES-1:0];
|
||||||
`DEBUG_END
|
`DEBUG_END
|
||||||
|
|
||||||
integer j;
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
is_fill_in_pipe = 0;
|
is_fill_in_pipe = 0;
|
||||||
for (j = 0; j < STAGE_1_CYCLES; j++) begin
|
for (integer j = 0; j < STAGE_1_CYCLES; j++) begin
|
||||||
if (is_fill_st1[j]) begin
|
if (is_fill_st1[j]) begin
|
||||||
is_fill_in_pipe = 1;
|
is_fill_in_pipe = 1;
|
||||||
end
|
end
|
||||||
@@ -360,7 +359,7 @@ module VX_bank #(
|
|||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||||
assign {debug_pc_st0, debug_wb_st0, debug_rd_st0, debug_warp_num_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
|
assign {debug_pc_st0, debug_wb_st0, debug_rd_st0, debug_wid_st0, debug_tagid_st0, debug_rw_st0, debug_byteen_st0, debug_tid_st0} = qual_inst_meta_st0;
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
@@ -375,8 +374,7 @@ module VX_bank #(
|
|||||||
.out ({is_mrvq_st1[0] , is_snp_st1[0], snp_invalidate_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
|
.out ({is_mrvq_st1[0] , is_snp_st1[0], snp_invalidate_st1[0], going_to_write_st1[0], valid_st1[0], addr_st1[0], wsel_st1[0], writeword_st1[0], inst_meta_st1[0], is_fill_st1[0], writedata_st1[0]})
|
||||||
);
|
);
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 1; i < STAGE_1_CYCLES; i++) begin
|
||||||
for (i = 1; i < STAGE_1_CYCLES; i++) begin
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
|
.N(1 + 1 + 1 + 1 + 1 + `LINE_ADDR_WIDTH + `UP(`WORD_SELECT_WIDTH) + `WORD_WIDTH + `REQ_INST_META_WIDTH + 1 + `BANK_LINE_WIDTH)
|
||||||
) s0_1_cc (
|
) s0_1_cc (
|
||||||
@@ -446,13 +444,13 @@ module VX_bank #(
|
|||||||
.clk (clk),
|
.clk (clk),
|
||||||
.reset (reset),
|
.reset (reset),
|
||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
.debug_pc_st1e(debug_pc_st1e),
|
.debug_pc_st1e(debug_pc_st1e),
|
||||||
.debug_wb_st1e(debug_wb_st1e),
|
.debug_wb_st1e(debug_wb_st1e),
|
||||||
.debug_rd_st1e(debug_rd_st1e),
|
.debug_rd_st1e(debug_rd_st1e),
|
||||||
.debug_warp_num_st1e(debug_warp_num_st1e),
|
.debug_wid_st1e(debug_wid_st1e),
|
||||||
.debug_tagid_st1e(debug_tagid_st1e),
|
.debug_tagid_st1e(debug_tagid_st1e),
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
.stall (stall_bank_pipe),
|
.stall (stall_bank_pipe),
|
||||||
.stall_bank_pipe(stall_bank_pipe),
|
.stall_bank_pipe(stall_bank_pipe),
|
||||||
@@ -490,7 +488,7 @@ module VX_bank #(
|
|||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||||
assign {debug_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_warp_num_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
assign {debug_pc_st1e, debug_wb_st1e, debug_rd_st1e, debug_wid_st1e, debug_tagid_st1e, debug_rw_st1e, debug_byteen_st1e, debug_tid_st1e} = inst_meta_st1[STAGE_1_CYCLES-1];
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
@@ -531,7 +529,7 @@ module VX_bank #(
|
|||||||
|
|
||||||
`ifdef DBG_CORE_REQ_INFO
|
`ifdef DBG_CORE_REQ_INFO
|
||||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||||
assign {debug_pc_st2, debug_wb_st2, debug_rd_st2, debug_warp_num_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
assign {debug_pc_st2, debug_wb_st2, debug_rd_st2, debug_wid_st2, debug_tagid_st2, debug_rw_st2, debug_byteen_st2, debug_tid_st2} = inst_meta_st2;
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
@@ -543,10 +541,10 @@ module VX_bank #(
|
|||||||
assign mrvq_push_stall = miss_add_unqual && mrvq_full;
|
assign mrvq_push_stall = miss_add_unqual && mrvq_full;
|
||||||
|
|
||||||
wire miss_add = miss_add_unqual
|
wire miss_add = miss_add_unqual
|
||||||
&& !mrvq_full
|
&& !mrvq_full
|
||||||
&& !(cwbq_push_stall
|
&& !(cwbq_push_stall
|
||||||
|| dwbq_push_stall
|
|| dwbq_push_stall
|
||||||
|| dram_fill_req_stall);
|
|| dram_fill_req_stall);
|
||||||
|
|
||||||
assign recover_mrvq_state_st2 = miss_add_unqual && is_mrvq_st2; // Doesn't need to include the stalls
|
assign recover_mrvq_state_st2 = miss_add_unqual && is_mrvq_st2; // Doesn't need to include the stalls
|
||||||
|
|
||||||
@@ -718,7 +716,9 @@ module VX_bank #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
dwbq_dual_valid_sel <= 0;
|
dwbq_dual_valid_sel <= 0;
|
||||||
end else if (dwbq_is_dwb_out && dwbq_is_snp_out && (dram_wb_req_fire || snp_rsp_fire)) begin
|
end else if (dwbq_is_dwb_out
|
||||||
|
&& dwbq_is_snp_out
|
||||||
|
&& (dram_wb_req_fire || snp_rsp_fire)) begin
|
||||||
dwbq_dual_valid_sel <= ~dwbq_dual_valid_sel;
|
dwbq_dual_valid_sel <= ~dwbq_dual_valid_sel;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
8
hw/rtl/cache/VX_cache.v
vendored
8
hw/rtl/cache/VX_cache.v
vendored
@@ -132,12 +132,12 @@ module VX_cache #(
|
|||||||
wire[31:0] debug_core_req_use_pc;
|
wire[31:0] debug_core_req_use_pc;
|
||||||
wire debug_core_req_wb;
|
wire debug_core_req_wb;
|
||||||
wire[`NR_BITS-1:0] debug_core_req_rd;
|
wire[`NR_BITS-1:0] debug_core_req_rd;
|
||||||
wire[`NW_BITS-1:0] debug_core_req_warp_num;
|
wire[`NW_BITS-1:0] debug_core_req_wid;
|
||||||
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_core_req_idx;
|
wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_core_req_idx;
|
||||||
/* verilator lint_on UNUSED */
|
/* verilator lint_on UNUSED */
|
||||||
|
|
||||||
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
if (WORD_SIZE != `GLOBAL_BLOCK_SIZE) begin
|
||||||
assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_warp_num, debug_core_req_idx} = core_req_tag[0];
|
assign {debug_core_req_use_pc, debug_core_req_wb, debug_core_req_rd, debug_core_req_wid, debug_core_req_idx} = core_req_tag[0];
|
||||||
end
|
end
|
||||||
`endif
|
`endif
|
||||||
|
|
||||||
@@ -246,10 +246,8 @@ module VX_cache #(
|
|||||||
|
|
||||||
assign dram_req_tag = dram_req_addr;
|
assign dram_req_tag = dram_req_addr;
|
||||||
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
|
assign dram_rsp_ready = (| per_bank_dram_fill_rsp_ready);
|
||||||
|
|
||||||
genvar i;
|
|
||||||
|
|
||||||
for (i = 0; i < NUM_BANKS; i++) begin
|
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
|
wire [NUM_REQUESTS-1:0] curr_bank_core_req_valid;
|
||||||
wire [NUM_REQUESTS-1:0] curr_bank_core_req_rw;
|
wire [NUM_REQUESTS-1:0] curr_bank_core_req_rw;
|
||||||
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
|
wire [NUM_REQUESTS-1:0][WORD_SIZE-1:0] curr_bank_core_req_byteen;
|
||||||
|
|||||||
6
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
6
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
@@ -18,12 +18,10 @@ module VX_cache_core_req_bank_sel #(
|
|||||||
output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid,
|
output reg [NUM_BANKS-1:0][NUM_REQUESTS-1:0] per_bank_valid,
|
||||||
output wire core_req_ready
|
output wire core_req_ready
|
||||||
);
|
);
|
||||||
integer i;
|
|
||||||
|
|
||||||
if (NUM_BANKS == 1) begin
|
if (NUM_BANKS == 1) begin
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
per_bank_valid = 0;
|
per_bank_valid = 0;
|
||||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
for (integer i = 0; i < NUM_REQUESTS; i++) begin
|
||||||
per_bank_valid[0][i] = core_req_valid[i];
|
per_bank_valid[0][i] = core_req_valid[i];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -33,7 +31,7 @@ module VX_cache_core_req_bank_sel #(
|
|||||||
always @(*) begin
|
always @(*) begin
|
||||||
per_bank_valid = 0;
|
per_bank_valid = 0;
|
||||||
per_bank_ready_sel = {NUM_BANKS{1'b1}};
|
per_bank_ready_sel = {NUM_BANKS{1'b1}};
|
||||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
for (integer i = 0; i < NUM_REQUESTS; i++) begin
|
||||||
per_bank_valid[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
|
per_bank_valid[core_req_addr[i][`BANK_SELECT_ADDR_RNG]][i] = core_req_valid[i];
|
||||||
per_bank_ready_sel[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 0;
|
per_bank_ready_sel[core_req_addr[i][`BANK_SELECT_ADDR_RNG]] = 0;
|
||||||
end
|
end
|
||||||
|
|||||||
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
@@ -48,14 +48,12 @@ module VX_cache_core_rsp_merge #(
|
|||||||
|
|
||||||
wire stall = ~core_rsp_ready && (| core_rsp_valid);
|
wire stall = ~core_rsp_ready && (| core_rsp_valid);
|
||||||
|
|
||||||
integer i;
|
|
||||||
|
|
||||||
if (CORE_TAG_ID_BITS != 0) begin
|
if (CORE_TAG_ID_BITS != 0) begin
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
core_rsp_valid_unqual = 0;
|
core_rsp_valid_unqual = 0;
|
||||||
core_rsp_data_unqual = 0;
|
core_rsp_data_unqual = 0;
|
||||||
core_rsp_tag_unqual = per_bank_core_rsp_tag[main_bank_index];
|
core_rsp_tag_unqual = per_bank_core_rsp_tag[main_bank_index];
|
||||||
for (i = 0; i < NUM_BANKS; i++) begin
|
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||||
if (per_bank_core_rsp_valid[i]
|
if (per_bank_core_rsp_valid[i]
|
||||||
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin
|
&& (per_bank_core_rsp_tag[i][CORE_TAG_ID_BITS-1:0] == per_bank_core_rsp_tag[main_bank_index][CORE_TAG_ID_BITS-1:0])) begin
|
||||||
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
core_rsp_valid_unqual[per_bank_core_rsp_tid[i]] = 1;
|
||||||
@@ -71,7 +69,7 @@ module VX_cache_core_rsp_merge #(
|
|||||||
core_rsp_valid_unqual = 0;
|
core_rsp_valid_unqual = 0;
|
||||||
core_rsp_data_unqual = 0;
|
core_rsp_data_unqual = 0;
|
||||||
core_rsp_tag_unqual = 0;
|
core_rsp_tag_unqual = 0;
|
||||||
for (i = 0; i < NUM_BANKS; i++) begin
|
for (integer i = 0; i < NUM_BANKS; i++) begin
|
||||||
if (per_bank_core_rsp_valid[i]
|
if (per_bank_core_rsp_valid[i]
|
||||||
&& !core_rsp_valid_unqual[per_bank_core_rsp_tid[i]]
|
&& !core_rsp_valid_unqual[per_bank_core_rsp_tid[i]]
|
||||||
&& ((main_bank_index == `BANK_BITS'(i))
|
&& ((main_bank_index == `BANK_BITS'(i))
|
||||||
|
|||||||
3
hw/rtl/cache/VX_cache_dram_req_arb.v
vendored
3
hw/rtl/cache/VX_cache_dram_req_arb.v
vendored
@@ -106,8 +106,7 @@ module VX_cache_dram_req_arb #(
|
|||||||
`UNUSED_PIN (grant_onehot)
|
`UNUSED_PIN (grant_onehot)
|
||||||
);
|
);
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||||
for (i = 0; i < NUM_BANKS; i++) begin
|
|
||||||
assign per_bank_dram_wb_req_ready[i] = dram_req_ready && (dwb_bank == `BANK_BITS'(i));
|
assign per_bank_dram_wb_req_ready[i] = dram_req_ready && (dwb_bank == `BANK_BITS'(i));
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
9
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
9
hw/rtl/cache/VX_cache_miss_resrv.v
vendored
@@ -77,9 +77,8 @@ module VX_cache_miss_resrv #(
|
|||||||
reg [MRVQ_SIZE-1:0] make_ready;
|
reg [MRVQ_SIZE-1:0] make_ready;
|
||||||
reg [MRVQ_SIZE-1:0] make_ready_push;
|
reg [MRVQ_SIZE-1:0] make_ready_push;
|
||||||
reg [MRVQ_SIZE-1:0] valid_address_match;
|
reg [MRVQ_SIZE-1:0] valid_address_match;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < MRVQ_SIZE; i++) begin
|
||||||
for (i = 0; i < MRVQ_SIZE; i++) begin
|
|
||||||
assign valid_address_match[i] = valid_table[i] ? (addr_table[i] == fill_addr_st1) : 0;
|
assign valid_address_match[i] = valid_table[i] ? (addr_table[i] == fill_addr_st1) : 0;
|
||||||
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
|
assign make_ready[i] = is_fill_st1 && valid_address_match[i];
|
||||||
end
|
end
|
||||||
@@ -121,7 +120,6 @@ module VX_cache_miss_resrv #(
|
|||||||
head_ptr <= 0;
|
head_ptr <= 0;
|
||||||
tail_ptr <= 0;
|
tail_ptr <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
|
|
||||||
if (mrvq_push) begin
|
if (mrvq_push) begin
|
||||||
valid_table[enqueue_index] <= 1;
|
valid_table[enqueue_index] <= 1;
|
||||||
ready_table[enqueue_index] <= mrvq_init_ready_state;
|
ready_table[enqueue_index] <= mrvq_init_ready_state;
|
||||||
@@ -157,11 +155,10 @@ module VX_cache_miss_resrv #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
`ifdef DBG_PRINT_CACHE_MSRQ
|
`ifdef DBG_PRINT_CACHE_MSRQ
|
||||||
integer j;
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
|
if (mrvq_push || mrvq_pop || increment_head || recover_state) begin
|
||||||
$write("%t: bank%0d:%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
|
$write("%t: bank%0d:%0d msrq: push=%b pop=%b incr=%d recv=%d", $time, CACHE_ID, BANK_ID, mrvq_push, mrvq_pop, increment_head, recover_state);
|
||||||
for (j = 0; j < MRVQ_SIZE; j++) begin
|
for (integer j = 0; j < MRVQ_SIZE; j++) begin
|
||||||
if (valid_table[j]) begin
|
if (valid_table[j]) begin
|
||||||
$write(" ");
|
$write(" ");
|
||||||
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
|
if (schedule_ptr == $bits(schedule_ptr)'(j)) $write("*");
|
||||||
|
|||||||
6
hw/rtl/cache/VX_snp_forwarder.v
vendored
6
hw/rtl/cache/VX_snp_forwarder.v
vendored
@@ -83,9 +83,7 @@ module VX_snp_forwarder #(
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
|
||||||
assign snp_fwdout_valid[i] = snp_req_valid && snp_req_ready;
|
assign snp_fwdout_valid[i] = snp_req_valid && snp_req_ready;
|
||||||
assign snp_fwdout_addr[i] = snp_req_addr;
|
assign snp_fwdout_addr[i] = snp_req_addr;
|
||||||
assign snp_fwdout_invalidate[i] = snp_req_invalidate;
|
assign snp_fwdout_invalidate[i] = snp_req_invalidate;
|
||||||
@@ -110,7 +108,7 @@ module VX_snp_forwarder #(
|
|||||||
assign fwdin_valid = snp_fwdin_valid[fwdin_sel];
|
assign fwdin_valid = snp_fwdin_valid[fwdin_sel];
|
||||||
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
|
assign fwdin_tag = snp_fwdin_tag[fwdin_sel];
|
||||||
|
|
||||||
for (i = 0; i < NUM_REQUESTS; i++) begin
|
for (genvar i = 0; i < NUM_REQUESTS; i++) begin
|
||||||
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));
|
assign snp_fwdin_ready[i] = fwdin_ready && (fwdin_sel == `REQS_BITS'(i));
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
3
hw/rtl/cache/VX_snp_rsp_arb.v
vendored
3
hw/rtl/cache/VX_snp_rsp_arb.v
vendored
@@ -34,8 +34,7 @@ module VX_snp_rsp_arb #(
|
|||||||
assign snp_rsp_valid = fsq_valid;
|
assign snp_rsp_valid = fsq_valid;
|
||||||
assign snp_rsp_tag = per_bank_snp_rsp_tag[fsq_bank];
|
assign snp_rsp_tag = per_bank_snp_rsp_tag[fsq_bank];
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < NUM_BANKS; i++) begin
|
||||||
for (i = 0; i < NUM_BANKS; i++) begin
|
|
||||||
assign per_bank_snp_rsp_ready[i] = snp_rsp_ready && (fsq_bank == `BANK_BITS'(i));
|
assign per_bank_snp_rsp_ready[i] = snp_rsp_ready && (fsq_bank == `BANK_BITS'(i));
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
19
hw/rtl/cache/VX_tag_data_access.v
vendored
19
hw/rtl/cache/VX_tag_data_access.v
vendored
@@ -30,7 +30,7 @@ module VX_tag_data_access #(
|
|||||||
input wire[31:0] debug_pc_st1e,
|
input wire[31:0] debug_pc_st1e,
|
||||||
input wire debug_wb_st1e,
|
input wire debug_wb_st1e,
|
||||||
input wire[`NR_BITS-1:0] debug_rd_st1e,
|
input wire[`NR_BITS-1:0] debug_rd_st1e,
|
||||||
input wire[`NW_BITS-1:0] debug_warp_num_st1e,
|
input wire[`NW_BITS-1:0] debug_wid_st1e,
|
||||||
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e,
|
input wire[`UP(CORE_TAG_ID_BITS)-1:0] debug_tagid_st1e,
|
||||||
`IGNORE_WARNINGS_END
|
`IGNORE_WARNINGS_END
|
||||||
`endif
|
`endif
|
||||||
@@ -135,8 +135,7 @@ module VX_tag_data_access #(
|
|||||||
.out ({read_valid_st1c[0], read_dirty_st1c[0], read_dirtyb_st1c[0], read_tag_st1c[0], read_data_st1c[0]})
|
.out ({read_valid_st1c[0], read_dirty_st1c[0], read_dirtyb_st1c[0], read_tag_st1c[0], read_data_st1c[0]})
|
||||||
);
|
);
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 1; i < STAGE_1_CYCLES-1; i++) begin
|
||||||
for (i = 1; i < STAGE_1_CYCLES-1; i++) begin
|
|
||||||
VX_generic_register #(
|
VX_generic_register #(
|
||||||
.N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
|
.N(1 + 1 + BANK_LINE_SIZE + `TAG_SELECT_BITS + `BANK_LINE_WIDTH)
|
||||||
) s0_1_cc (
|
) s0_1_cc (
|
||||||
@@ -157,11 +156,11 @@ module VX_tag_data_access #(
|
|||||||
|
|
||||||
if (`WORD_SELECT_WIDTH != 0) begin
|
if (`WORD_SELECT_WIDTH != 0) begin
|
||||||
wire [`WORD_WIDTH-1:0] readword = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH];
|
wire [`WORD_WIDTH-1:0] readword = use_read_data_st1e[wordsel_st1e * `WORD_WIDTH +: `WORD_WIDTH];
|
||||||
for (i = 0; i < WORD_SIZE; i++) begin
|
for (genvar i = 0; i < WORD_SIZE; i++) begin
|
||||||
assign readword_st1e[i * 8 +: 8] = readword[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
|
assign readword_st1e[i * 8 +: 8] = readword[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
|
||||||
end
|
end
|
||||||
end else begin
|
end else begin
|
||||||
for (i = 0; i < WORD_SIZE; i++) begin
|
for (genvar i = 0; i < WORD_SIZE; i++) begin
|
||||||
assign readword_st1e[i * 8 +: 8] = use_read_data_st1e[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
|
assign readword_st1e[i * 8 +: 8] = use_read_data_st1e[i * 8 +: 8] & {8{mem_byteen_st1e[i]}};
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -176,7 +175,7 @@ module VX_tag_data_access #(
|
|||||||
&& ~is_snp_st1e
|
&& ~is_snp_st1e
|
||||||
&& ~real_writefill;
|
&& ~real_writefill;
|
||||||
|
|
||||||
for (i = 0; i < `BANK_LINE_WORDS; i++) begin
|
for (genvar i = 0; i < `BANK_LINE_WORDS; i++) begin
|
||||||
wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1e == `UP(`WORD_SELECT_WIDTH)'(i)))
|
wire normal_write = ((`WORD_SELECT_WIDTH == 0) || (wordsel_st1e == `UP(`WORD_SELECT_WIDTH)'(i)))
|
||||||
&& should_write;
|
&& should_write;
|
||||||
|
|
||||||
@@ -218,15 +217,15 @@ module VX_tag_data_access #(
|
|||||||
if (valid_req_st1e) begin
|
if (valid_req_st1e) begin
|
||||||
if ((| use_write_enable)) begin
|
if ((| use_write_enable)) begin
|
||||||
if (writefill_st1e) begin
|
if (writefill_st1e) begin
|
||||||
$display("%t: bank%0d:%0d store-fill: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data);
|
$display("%t: bank%0d:%0d store-fill: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, use_write_data);
|
||||||
end else begin
|
end else begin
|
||||||
$display("%t: bank%0d:%0d store-write: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e);
|
$display("%t: bank%0d:%0d store-write: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, writeladdr_st1e, writetag_st1e, wordsel_st1e, writeword_st1e);
|
||||||
end
|
end
|
||||||
end else
|
end else
|
||||||
if (miss_st1e) begin
|
if (miss_st1e) begin
|
||||||
$display("%t: bank%0d:%0d store-miss: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e);
|
$display("%t: bank%0d:%0d store-miss: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e);
|
||||||
end else begin
|
end else begin
|
||||||
$display("%t: bank%0d:%0d store-read: warp=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_warp_num_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1);
|
$display("%t: bank%0d:%0d store-read: wid=%0d, PC=%0h, tag=%0h, wb=%b, rd=%0d, dirty=%b, blk_addr=%0d, tag_id=%0h, wsel=%0d, data=%0h", $time, CACHE_ID, BANK_ID, debug_wid_st1e, debug_pc_st1e, debug_tagid_st1e, debug_wb_st1e, debug_rd_st1e, dirty_st1e, readaddr_st10, qual_read_tag_st1, wordsel_st1e, qual_read_data_st1);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -41,8 +41,6 @@ module VX_fp_fpga (
|
|||||||
reg [FPC_BITS-1:0] core_select;
|
reg [FPC_BITS-1:0] core_select;
|
||||||
reg fmadd_negate;
|
reg fmadd_negate;
|
||||||
|
|
||||||
genvar i;
|
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
core_select = 0;
|
core_select = 0;
|
||||||
fmadd_negate = 0;
|
fmadd_negate = 0;
|
||||||
@@ -246,7 +244,7 @@ module VX_fp_fpga (
|
|||||||
.valid_out (fp_valid)
|
.valid_out (fp_valid)
|
||||||
);
|
);
|
||||||
|
|
||||||
for (i = 0; i < NUM_FPC; i++) begin
|
for (genvar i = 0; i < NUM_FPC; i++) begin
|
||||||
assign per_core_ready_out[i] = ready_out && (i == fp_index);
|
assign per_core_ready_out[i] = ready_out && (i == fp_index);
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -48,10 +48,8 @@ module VX_fp_noncomp (
|
|||||||
reg [`NUM_THREADS-1:0][31:0] fcmp_res; // result of comparison
|
reg [`NUM_THREADS-1:0][31:0] fcmp_res; // result of comparison
|
||||||
reg [`NUM_THREADS-1:0][ 4:0] fcmp_excp; // exception of comparison
|
reg [`NUM_THREADS-1:0][ 4:0] fcmp_excp; // exception of comparison
|
||||||
|
|
||||||
genvar i;
|
|
||||||
|
|
||||||
// Setup
|
// Setup
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
assign a_sign[i] = dataa[i][31];
|
assign a_sign[i] = dataa[i][31];
|
||||||
assign a_exponent[i] = dataa[i][30:23];
|
assign a_exponent[i] = dataa[i][30:23];
|
||||||
assign a_mantissa[i] = dataa[i][22:0];
|
assign a_mantissa[i] = dataa[i][22:0];
|
||||||
@@ -77,7 +75,7 @@ module VX_fp_noncomp (
|
|||||||
end
|
end
|
||||||
|
|
||||||
// FCLASS
|
// FCLASS
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
if (a_type[i].is_normal) begin
|
if (a_type[i].is_normal) begin
|
||||||
fclass_mask[i] = a_sign[i] ? NEG_NORM : POS_NORM;
|
fclass_mask[i] = a_sign[i] ? NEG_NORM : POS_NORM;
|
||||||
@@ -101,7 +99,7 @@ module VX_fp_noncomp (
|
|||||||
end
|
end
|
||||||
|
|
||||||
// Min/Max
|
// Min/Max
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
if (a_type[i].is_nan && b_type[i].is_nan)
|
if (a_type[i].is_nan && b_type[i].is_nan)
|
||||||
fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
|
fminmax_res[i] = {1'b0, 8'hff, 1'b1, 22'd0}; // canonical qNaN
|
||||||
@@ -120,7 +118,7 @@ module VX_fp_noncomp (
|
|||||||
end
|
end
|
||||||
|
|
||||||
// Sign Injection
|
// Sign Injection
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (op)
|
case (op)
|
||||||
`FPU_SGNJ: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
`FPU_SGNJ: fsgnj_res[i] = { b_sign[i], a_exponent[i], a_mantissa[i]};
|
||||||
@@ -132,7 +130,7 @@ module VX_fp_noncomp (
|
|||||||
end
|
end
|
||||||
|
|
||||||
// Comparison
|
// Comparison
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
case (frm)
|
case (frm)
|
||||||
`FRM_RNE: begin
|
`FRM_RNE: begin
|
||||||
@@ -193,7 +191,7 @@ module VX_fp_noncomp (
|
|||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
tmp_valid = 1'b1;
|
tmp_valid = 1'b1;
|
||||||
case (op)
|
case (op)
|
||||||
|
|||||||
@@ -129,11 +129,9 @@ module VX_fpnew #(
|
|||||||
endcase
|
endcase
|
||||||
end
|
end
|
||||||
|
|
||||||
genvar i;
|
|
||||||
|
|
||||||
`DISABLE_TRACING
|
`DISABLE_TRACING
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
if (0 == i) begin
|
if (0 == i) begin
|
||||||
fpnew_top #(
|
fpnew_top #(
|
||||||
.Features (FPU_FEATURES),
|
.Features (FPU_FEATURES),
|
||||||
@@ -194,8 +192,7 @@ module VX_fpnew #(
|
|||||||
`ENABLE_TRACING
|
`ENABLE_TRACING
|
||||||
|
|
||||||
assign fpu_valid_in = valid_in;
|
assign fpu_valid_in = valid_in;
|
||||||
assign ready_in = fpu_ready_in
|
assign ready_in = fpu_ready_in;
|
||||||
|| ~valid_in; // fix
|
|
||||||
|
|
||||||
assign fpu_tag_in = tag_in;
|
assign fpu_tag_in = tag_in;
|
||||||
assign tag_out = fpu_tag_out;
|
assign tag_out = fpu_tag_out;
|
||||||
|
|||||||
@@ -22,9 +22,7 @@ module VX_fp_add (
|
|||||||
wire enable = ~stall;
|
wire enable = ~stall;
|
||||||
assign ready_in = enable;
|
assign ready_in = enable;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
twentynm_fp_mac mac_fp_wys (
|
twentynm_fp_mac mac_fp_wys (
|
||||||
// inputs
|
// inputs
|
||||||
.accumulate(),
|
.accumulate(),
|
||||||
|
|||||||
@@ -22,9 +22,7 @@ module VX_fp_div (
|
|||||||
wire enable = ~stall;
|
wire enable = ~stall;
|
||||||
assign ready_in = enable;
|
assign ready_in = enable;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
acl_fp_div fdiv (
|
acl_fp_div fdiv (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.areset (1'b0),
|
.areset (1'b0),
|
||||||
|
|||||||
@@ -21,9 +21,7 @@ module VX_fp_ftoi (
|
|||||||
wire enable = ~stall;
|
wire enable = ~stall;
|
||||||
assign ready_in = enable;
|
assign ready_in = enable;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
acl_fp_ftoi ftoi (
|
acl_fp_ftoi ftoi (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.areset (1'b0),
|
.areset (1'b0),
|
||||||
|
|||||||
@@ -21,9 +21,7 @@ module VX_fp_ftou (
|
|||||||
wire enable = ~stall;
|
wire enable = ~stall;
|
||||||
assign ready_in = enable;
|
assign ready_in = enable;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
acl_fp_ftou ftou (
|
acl_fp_ftou ftou (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.areset (1'b0),
|
.areset (1'b0),
|
||||||
|
|||||||
@@ -21,9 +21,7 @@ module VX_fp_itof (
|
|||||||
wire enable = ~stall;
|
wire enable = ~stall;
|
||||||
assign ready_in = enable;
|
assign ready_in = enable;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
acl_fp_itof itof (
|
acl_fp_itof itof (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.areset (1'b0),
|
.areset (1'b0),
|
||||||
|
|||||||
@@ -28,9 +28,7 @@ module VX_fp_madd (
|
|||||||
wire [`ISTAG_BITS-1:0] out_tag_st0, out_tag_st1;
|
wire [`ISTAG_BITS-1:0] out_tag_st0, out_tag_st1;
|
||||||
wire in_valid_st0, out_valid_st0, out_valid_st1;
|
wire in_valid_st0, out_valid_st0, out_valid_st1;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
twentynm_fp_mac mac_fp_wys0 (
|
twentynm_fp_mac mac_fp_wys0 (
|
||||||
// inputs
|
// inputs
|
||||||
.accumulate(),
|
.accumulate(),
|
||||||
|
|||||||
@@ -28,9 +28,7 @@ module VX_fp_msub (
|
|||||||
wire [`ISTAG_BITS-1:0] out_tag_st0, out_tag_st1;
|
wire [`ISTAG_BITS-1:0] out_tag_st0, out_tag_st1;
|
||||||
wire in_valid_st0, out_valid_st0, out_valid_st1;
|
wire in_valid_st0, out_valid_st0, out_valid_st1;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
twentynm_fp_mac mac_fp_wys0 (
|
twentynm_fp_mac mac_fp_wys0 (
|
||||||
// inputs
|
// inputs
|
||||||
.accumulate(),
|
.accumulate(),
|
||||||
|
|||||||
@@ -22,9 +22,7 @@ module VX_fp_mul (
|
|||||||
wire enable = ~stall;
|
wire enable = ~stall;
|
||||||
assign ready_in = enable;
|
assign ready_in = enable;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
twentynm_fp_mac mac_fp_wys (
|
twentynm_fp_mac mac_fp_wys (
|
||||||
// inputs
|
// inputs
|
||||||
.accumulate(),
|
.accumulate(),
|
||||||
|
|||||||
@@ -21,9 +21,7 @@ module VX_fp_sqrt (
|
|||||||
wire enable = ~stall;
|
wire enable = ~stall;
|
||||||
assign ready_in = enable;
|
assign ready_in = enable;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
acl_fp_sqrt fsqrt (
|
acl_fp_sqrt fsqrt (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.areset (1'b0),
|
.areset (1'b0),
|
||||||
|
|||||||
@@ -22,9 +22,7 @@ module VX_fp_sub (
|
|||||||
wire enable = ~stall;
|
wire enable = ~stall;
|
||||||
assign ready_in = enable;
|
assign ready_in = enable;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
twentynm_fp_mac mac_fp_wys (
|
twentynm_fp_mac mac_fp_wys (
|
||||||
// inputs
|
// inputs
|
||||||
.accumulate(),
|
.accumulate(),
|
||||||
|
|||||||
@@ -21,9 +21,7 @@ module VX_fp_utof (
|
|||||||
wire enable = ~stall;
|
wire enable = ~stall;
|
||||||
assign ready_in = enable;
|
assign ready_in = enable;
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < `NUM_THREADS; i++) begin
|
||||||
|
|
||||||
for (i = 0; i < `NUM_THREADS; i++) begin
|
|
||||||
acl_fp_utof utof (
|
acl_fp_utof utof (
|
||||||
.clk (clk),
|
.clk (clk),
|
||||||
.areset (1'b0),
|
.areset (1'b0),
|
||||||
|
|||||||
@@ -7,18 +7,22 @@ interface VX_alu_req_if ();
|
|||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
|
`DEBUG_BEGIN
|
||||||
|
wire [`NW_BITS-1:0] wid;
|
||||||
wire [`NUM_THREADS-1:0] thread_mask;
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
`DEBUG_END
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
|
|
||||||
wire [`ALU_BITS-1:0] alu_op;
|
wire [`ALU_BITS-1:0] op;
|
||||||
|
|
||||||
|
wire rs1_is_PC;
|
||||||
|
wire rs2_is_imm;
|
||||||
|
|
||||||
|
wire [31:0] imm;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||||
|
|
||||||
wire [31:0] offset;
|
|
||||||
wire [31:0] next_PC;
|
|
||||||
|
|
||||||
wire ready;
|
wire ready;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
interface VX_branch_ctl_if ();
|
interface VX_branch_ctl_if ();
|
||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
wire taken;
|
wire taken;
|
||||||
wire [31:0] dest;
|
wire [31:0] dest;
|
||||||
|
|
||||||
|
|||||||
29
hw/rtl/interfaces/VX_bru_req_if.v
Normal file
29
hw/rtl/interfaces/VX_bru_req_if.v
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
`ifndef VX_BRANCH_REQ_IF
|
||||||
|
`define VX_BRANCH_REQ_IF
|
||||||
|
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
interface VX_bru_req_if ();
|
||||||
|
|
||||||
|
wire valid;
|
||||||
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
|
wire [`NW_BITS-1:0] wid;
|
||||||
|
`DEBUG_BEGIN
|
||||||
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
|
`DEBUG_END
|
||||||
|
wire [31:0] curr_PC;
|
||||||
|
|
||||||
|
wire [`BRU_BITS-1:0] op;
|
||||||
|
|
||||||
|
wire rs1_is_PC;
|
||||||
|
|
||||||
|
wire [31:0] rs1_data;
|
||||||
|
wire [31:0] rs2_data;
|
||||||
|
|
||||||
|
wire [31:0] offset;
|
||||||
|
|
||||||
|
wire ready;
|
||||||
|
|
||||||
|
endinterface
|
||||||
|
|
||||||
|
`endif
|
||||||
@@ -7,7 +7,7 @@ interface VX_cmt_to_csr_if ();
|
|||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
|
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
|
|
||||||
wire [`NE_BITS:0] num_commits;
|
wire [`NE_BITS:0] num_commits;
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@
|
|||||||
interface VX_cmt_to_issue_if ();
|
interface VX_cmt_to_issue_if ();
|
||||||
|
|
||||||
wire alu_valid;
|
wire alu_valid;
|
||||||
|
wire bru_valid;
|
||||||
wire lsu_valid;
|
wire lsu_valid;
|
||||||
wire csr_valid;
|
wire csr_valid;
|
||||||
wire mul_valid;
|
wire mul_valid;
|
||||||
@@ -13,6 +14,7 @@ interface VX_cmt_to_issue_if ();
|
|||||||
wire gpu_valid;
|
wire gpu_valid;
|
||||||
|
|
||||||
wire [`ISTAG_BITS-1:0] alu_tag;
|
wire [`ISTAG_BITS-1:0] alu_tag;
|
||||||
|
wire [`ISTAG_BITS-1:0] bru_tag;
|
||||||
wire [`ISTAG_BITS-1:0] lsu_tag;
|
wire [`ISTAG_BITS-1:0] lsu_tag;
|
||||||
wire [`ISTAG_BITS-1:0] csr_tag;
|
wire [`ISTAG_BITS-1:0] csr_tag;
|
||||||
wire [`ISTAG_BITS-1:0] mul_tag;
|
wire [`ISTAG_BITS-1:0] mul_tag;
|
||||||
@@ -21,6 +23,7 @@ interface VX_cmt_to_issue_if ();
|
|||||||
|
|
||||||
`IGNORE_WARNINGS_BEGIN
|
`IGNORE_WARNINGS_BEGIN
|
||||||
issue_data_t alu_data;
|
issue_data_t alu_data;
|
||||||
|
issue_data_t bru_data;
|
||||||
issue_data_t lsu_data;
|
issue_data_t lsu_data;
|
||||||
issue_data_t csr_data;
|
issue_data_t csr_data;
|
||||||
issue_data_t mul_data;
|
issue_data_t mul_data;
|
||||||
|
|||||||
@@ -7,13 +7,13 @@ interface VX_csr_req_if ();
|
|||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
|
wire [`NW_BITS-1:0] wid;
|
||||||
`DEBUG_BEGIN
|
`DEBUG_BEGIN
|
||||||
wire [`NUM_THREADS-1:0] thread_mask;
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
`DEBUG_END
|
`DEBUG_END
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
|
|
||||||
wire [`CSR_BITS-1:0] csr_op;
|
wire [`CSR_BITS-1:0] op;
|
||||||
|
|
||||||
wire [`CSR_ADDR_BITS-1:0] csr_addr;
|
wire [`CSR_ADDR_BITS-1:0] csr_addr;
|
||||||
wire [31:0] csr_mask;
|
wire [31:0] csr_mask;
|
||||||
|
|||||||
15
hw/rtl/interfaces/VX_csr_rsp_if.v
Normal file
15
hw/rtl/interfaces/VX_csr_rsp_if.v
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
`ifndef VX_CSR_RSP_IF
|
||||||
|
`define VX_CSR_RSP_IF
|
||||||
|
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
interface VX_csr_rsp_if ();
|
||||||
|
|
||||||
|
wire valid;
|
||||||
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] data;
|
||||||
|
wire ready;
|
||||||
|
|
||||||
|
endinterface
|
||||||
|
|
||||||
|
`endif
|
||||||
@@ -9,7 +9,7 @@
|
|||||||
|
|
||||||
interface VX_csr_to_fpu_if ();
|
interface VX_csr_to_fpu_if ();
|
||||||
|
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
wire [`FRM_BITS-1:0] frm;
|
wire [`FRM_BITS-1:0] frm;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|||||||
@@ -6,10 +6,9 @@
|
|||||||
interface VX_decode_if ();
|
interface VX_decode_if ();
|
||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
wire [`NUM_THREADS-1:0] thread_mask;
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
wire [31:0] next_PC;
|
|
||||||
|
|
||||||
wire [`EX_BITS-1:0] ex_type;
|
wire [`EX_BITS-1:0] ex_type;
|
||||||
wire [`OP_BITS-1:0] ex_op;
|
wire [`OP_BITS-1:0] ex_op;
|
||||||
|
|||||||
@@ -5,10 +5,9 @@
|
|||||||
|
|
||||||
interface VX_exu_to_cmt_if ();
|
interface VX_exu_to_cmt_if ();
|
||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
wire [`NUM_THREADS-1:0][31:0] data;
|
wire [`NUM_THREADS-1:0][31:0] data;
|
||||||
wire ready;
|
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|
||||||
|
|||||||
@@ -11,13 +11,13 @@ interface VX_fpu_req_if ();
|
|||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
|
wire [`NW_BITS-1:0] wid;
|
||||||
`DEBUG_BEGIN
|
`DEBUG_BEGIN
|
||||||
wire [`NUM_THREADS-1:0] thread_mask;
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
`DEBUG_END
|
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
|
`DEBUG_END
|
||||||
|
|
||||||
wire [`FPU_BITS-1:0] fpu_op;
|
wire [`FPU_BITS-1:0] op;
|
||||||
wire [`FRM_BITS-1:0] frm;
|
wire [`FRM_BITS-1:0] frm;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||||
|
|||||||
@@ -10,7 +10,6 @@ interface VX_fpu_to_cmt_if ();
|
|||||||
wire [`NUM_THREADS-1:0][31:0] data;
|
wire [`NUM_THREADS-1:0][31:0] data;
|
||||||
wire has_fflags;
|
wire has_fflags;
|
||||||
fflags_t [`NUM_THREADS-1:0] fflags;
|
fflags_t [`NUM_THREADS-1:0] fflags;
|
||||||
wire ready;
|
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ interface VX_fpu_to_csr_if ();
|
|||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
|
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
|
|
||||||
wire fflags_NV;
|
wire fflags_NV;
|
||||||
wire fflags_DZ;
|
wire fflags_DZ;
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ interface VX_gpr_read_if ();
|
|||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
|
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
|
|
||||||
wire [`NR_BITS-1:0] rs1;
|
wire [`NR_BITS-1:0] rs1;
|
||||||
wire [`NR_BITS-1:0] rs2;
|
wire [`NR_BITS-1:0] rs2;
|
||||||
|
|||||||
@@ -6,17 +6,15 @@
|
|||||||
interface VX_gpu_req_if();
|
interface VX_gpu_req_if();
|
||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
|
wire [`NW_BITS-1:0] wid;
|
||||||
wire [`NUM_THREADS-1:0] thread_mask;
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
|
||||||
`DEBUG_BEGIN
|
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
`DEBUG_END
|
|
||||||
wire [`GPU_BITS-1:0] gpu_op;
|
wire [`GPU_BITS-1:0] op;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||||
wire [31:0] rs2_data;
|
wire [31:0] rs2_data;
|
||||||
wire [31:0] next_PC;
|
|
||||||
|
|
||||||
wire ready;
|
wire ready;
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ interface VX_ifetch_req_if ();
|
|||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
wire [`NUM_THREADS-1:0] thread_mask;
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
wire ready;
|
wire ready;
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ interface VX_ifetch_rsp_if ();
|
|||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
wire [`NUM_THREADS-1:0] thread_mask;
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
wire [31:0] instr;
|
wire [31:0] instr;
|
||||||
wire ready;
|
wire ready;
|
||||||
|
|||||||
39
hw/rtl/interfaces/VX_issue_if.v
Normal file
39
hw/rtl/interfaces/VX_issue_if.v
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
`ifndef VX_ISSUE_IF
|
||||||
|
`define VX_ISSUE_IF
|
||||||
|
|
||||||
|
`include "VX_define.vh"
|
||||||
|
|
||||||
|
interface VX_issue_if ();
|
||||||
|
|
||||||
|
wire valid;
|
||||||
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
|
wire [`NW_BITS-1:0] wid;
|
||||||
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
|
wire [31:0] curr_PC;
|
||||||
|
|
||||||
|
wire [`EX_BITS-1:0] ex_type;
|
||||||
|
wire [`OP_BITS-1:0] ex_op;
|
||||||
|
|
||||||
|
wire [`FRM_BITS-1:0] frm;
|
||||||
|
|
||||||
|
wire wb;
|
||||||
|
|
||||||
|
wire [`NR_BITS-1:0] rd;
|
||||||
|
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||||
|
wire [`NUM_THREADS-1:0][31:0] rs3_data;
|
||||||
|
|
||||||
|
wire [`NR_BITS-1:0] rs1;
|
||||||
|
wire [31:0] imm;
|
||||||
|
|
||||||
|
wire rs1_is_PC;
|
||||||
|
wire rs2_is_imm;
|
||||||
|
|
||||||
|
wire [`NT_BITS-1:0] tid;
|
||||||
|
|
||||||
|
wire ready;
|
||||||
|
|
||||||
|
endinterface
|
||||||
|
|
||||||
|
`endif
|
||||||
@@ -6,7 +6,7 @@
|
|||||||
interface VX_join_if ();
|
interface VX_join_if ();
|
||||||
|
|
||||||
wire is_join;
|
wire is_join;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ interface VX_lsu_req_if ();
|
|||||||
wire valid;
|
wire valid;
|
||||||
wire [`NUM_THREADS-1:0] thread_mask;
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
|
|
||||||
wire rw;
|
wire rw;
|
||||||
|
|||||||
@@ -12,11 +12,11 @@ interface VX_mul_req_if ();
|
|||||||
wire valid;
|
wire valid;
|
||||||
wire [`ISTAG_BITS-1:0] issue_tag;
|
wire [`ISTAG_BITS-1:0] issue_tag;
|
||||||
`DEBUG_BEGIN
|
`DEBUG_BEGIN
|
||||||
|
wire [`NW_BITS-1:0] wid;
|
||||||
wire [`NUM_THREADS-1:0] thread_mask;
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
`DEBUG_END
|
`DEBUG_END
|
||||||
wire [`MUL_BITS-1:0] mul_op;
|
wire [`MUL_BITS-1:0] op;
|
||||||
|
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
wire [`NUM_THREADS-1:0][31:0] rs1_data;
|
||||||
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
wire [`NUM_THREADS-1:0][31:0] rs2_data;
|
||||||
|
|||||||
@@ -5,24 +5,12 @@
|
|||||||
|
|
||||||
interface VX_warp_ctl_if ();
|
interface VX_warp_ctl_if ();
|
||||||
|
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
|
|
||||||
wire change_mask;
|
gpu_tmc_t tmc;
|
||||||
wire [`NUM_THREADS-1:0] thread_mask;
|
gpu_wspawn_t wspawn;
|
||||||
|
gpu_barrier_t barrier;
|
||||||
wire wspawn;
|
gpu_split_t split;
|
||||||
wire [31:0] wspawn_pc;
|
|
||||||
wire [`NUM_WARPS-1:0] wspawn_wmask;
|
|
||||||
|
|
||||||
wire is_barrier;
|
|
||||||
wire [`NB_BITS-1:0] barrier_id;
|
|
||||||
wire [`NW_BITS:0] barrier_num_warps;
|
|
||||||
|
|
||||||
wire is_split;
|
|
||||||
wire do_split;
|
|
||||||
wire [`NUM_THREADS-1:0] split_new_mask;
|
|
||||||
wire [`NUM_THREADS-1:0] split_later_mask;
|
|
||||||
wire [31:0] split_save_pc;
|
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|
||||||
|
|||||||
@@ -7,16 +7,14 @@ interface VX_wb_if ();
|
|||||||
|
|
||||||
wire valid;
|
wire valid;
|
||||||
wire [`NUM_THREADS-1:0] thread_mask;
|
wire [`NUM_THREADS-1:0] thread_mask;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
|
|
||||||
`IGNORE_WARNINGS_BEGIN
|
`IGNORE_WARNINGS_BEGIN
|
||||||
wire [31:0] curr_PC;
|
wire [31:0] curr_PC;
|
||||||
`IGNORE_WARNINGS_END
|
`IGNORE_WARNINGS_END
|
||||||
|
|
||||||
wire [`NR_BITS-1:0] rd;
|
wire [`NR_BITS-1:0] rd;
|
||||||
wire [`NUM_THREADS-1:0][31:0] data;
|
wire [`NUM_THREADS-1:0][31:0] data;
|
||||||
|
|
||||||
wire ready;
|
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@
|
|||||||
interface VX_wstall_if();
|
interface VX_wstall_if();
|
||||||
|
|
||||||
wire wstall;
|
wire wstall;
|
||||||
wire [`NW_BITS-1:0] warp_num;
|
wire [`NW_BITS-1:0] wid;
|
||||||
|
|
||||||
endinterface
|
endinterface
|
||||||
|
|
||||||
|
|||||||
@@ -32,19 +32,17 @@ module VX_cam_buffer #(
|
|||||||
.valid_out (free_valid)
|
.valid_out (free_valid)
|
||||||
);
|
);
|
||||||
|
|
||||||
integer i;
|
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
free_slots_n = free_slots;
|
free_slots_n = free_slots;
|
||||||
if (acquire_slot) begin
|
for (integer i = 0; i < RPORTS; i++) begin
|
||||||
free_slots_n[write_addr_r] = 0;
|
|
||||||
end
|
|
||||||
for (i = 0; i < RPORTS; i++) begin
|
|
||||||
if (release_slot[i]) begin
|
if (release_slot[i]) begin
|
||||||
free_slots_n[read_addr[i]] = 1;
|
free_slots_n[read_addr[i]] = 1;
|
||||||
end
|
end
|
||||||
read_data[i] = entries[read_addr[i]];
|
read_data[i] = entries[read_addr[i]];
|
||||||
end
|
end
|
||||||
|
if (acquire_slot) begin
|
||||||
|
free_slots_n[write_addr_r] = 0;
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
@@ -54,12 +52,12 @@ module VX_cam_buffer #(
|
|||||||
write_addr_r <= ADDRW'(1'b0);
|
write_addr_r <= ADDRW'(1'b0);
|
||||||
end else begin
|
end else begin
|
||||||
if (acquire_slot) begin
|
if (acquire_slot) begin
|
||||||
assert(1 == free_slots[write_addr]);
|
assert(1 == free_slots[write_addr]) else $display("%t: inused slot at port %d", $time, write_addr);
|
||||||
entries[write_addr] <= write_data;
|
entries[write_addr] <= write_data;
|
||||||
end
|
end
|
||||||
for (i = 0; i < RPORTS; i++) begin
|
for (integer i = 0; i < RPORTS; i++) begin
|
||||||
if (release_slot[i]) begin
|
if (release_slot[i]) begin
|
||||||
assert(0 == free_slots[read_addr[i]]);
|
assert(0 == free_slots[read_addr[i]]) else $display("%t: freed slot at port %d", $time, read_addr[i]);
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
free_slots <= free_slots_n;
|
free_slots <= free_slots_n;
|
||||||
|
|||||||
@@ -7,11 +7,9 @@ module VX_countones #(
|
|||||||
input wire [N-1:0] valids,
|
input wire [N-1:0] valids,
|
||||||
output reg [$clog2(N):0] count
|
output reg [$clog2(N):0] count
|
||||||
);
|
);
|
||||||
|
|
||||||
integer i;
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
count = 0;
|
count = 0;
|
||||||
for (i = N-1; i >= 0; i = i - 1) begin
|
for (integer i = N-1; i >= 0; i = i - 1) begin
|
||||||
if (valids[i]) begin
|
if (valids[i]) begin
|
||||||
count = count + 1;
|
count = count + 1;
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -52,15 +52,6 @@ module VX_divide #(
|
|||||||
reg [WIDTHD-1:0] remainder_unqual;
|
reg [WIDTHD-1:0] remainder_unqual;
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
`ifndef SYNTHESIS
|
|
||||||
// this edge case kills verilator in some cases by causing a division
|
|
||||||
// overflow exception. INT_MIN / -1 (on x86)
|
|
||||||
if (numer == {1'b1, (WIDTHN-1)'(1'b0)}
|
|
||||||
&& denom == {WIDTHD{1'b1}}) begin
|
|
||||||
quotient_unqual = 0;
|
|
||||||
remainder_unqual = 0;
|
|
||||||
end else
|
|
||||||
`endif
|
|
||||||
begin
|
begin
|
||||||
if (NSIGNED && DSIGNED) begin
|
if (NSIGNED && DSIGNED) begin
|
||||||
quotient_unqual = $signed(numer) / $signed(denom);
|
quotient_unqual = $signed(numer) / $signed(denom);
|
||||||
@@ -88,21 +79,21 @@ module VX_divide #(
|
|||||||
reg [WIDTHN-1:0] quotient_pipe [0:PIPELINE-1];
|
reg [WIDTHN-1:0] quotient_pipe [0:PIPELINE-1];
|
||||||
reg [WIDTHD-1:0] remainder_pipe [0:PIPELINE-1];
|
reg [WIDTHD-1:0] remainder_pipe [0:PIPELINE-1];
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < PIPELINE; i++) begin
|
||||||
for (i = 0; i < PIPELINE; i++) begin
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
quotient_pipe[i] <= 0;
|
quotient_pipe[i] <= 0;
|
||||||
remainder_pipe[i] <= 0;
|
remainder_pipe[i] <= 0;
|
||||||
end
|
end else begin
|
||||||
else if (clk_en) begin
|
if (clk_en) begin
|
||||||
if (i == 0) begin
|
if (i == 0) begin
|
||||||
quotient_pipe[i] <= quotient_unqual;
|
quotient_pipe[i] <= quotient_unqual;
|
||||||
remainder_pipe[i] <= remainder_unqual;
|
remainder_pipe[i] <= remainder_unqual;
|
||||||
end else begin
|
end else begin
|
||||||
quotient_pipe[i] <= quotient_pipe[i-1];
|
quotient_pipe[i] <= quotient_pipe[i-1];
|
||||||
remainder_pipe[i] <= remainder_pipe[i-1];
|
remainder_pipe[i] <= remainder_pipe[i-1];
|
||||||
end
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -14,25 +14,53 @@ module VX_elastic_buffer #(
|
|||||||
input wire ready_out,
|
input wire ready_out,
|
||||||
output wire valid_out
|
output wire valid_out
|
||||||
);
|
);
|
||||||
wire empty, full;
|
if (0 == SIZE) begin
|
||||||
|
|
||||||
VX_generic_queue #(
|
reg [DATAW-1:0] skid_buffer;
|
||||||
.DATAW (DATAW),
|
reg skid_valid;
|
||||||
.SIZE (SIZE),
|
|
||||||
.BUFFERED (BUFFERED)
|
|
||||||
) queue (
|
|
||||||
.clk (clk),
|
|
||||||
.reset (reset),
|
|
||||||
.push (valid_in),
|
|
||||||
.pop (ready_out),
|
|
||||||
.data_in(data_in),
|
|
||||||
.data_out(data_out),
|
|
||||||
.empty (empty),
|
|
||||||
.full (full),
|
|
||||||
`UNUSED_PIN (size)
|
|
||||||
);
|
|
||||||
|
|
||||||
assign ready_in = ~full;
|
always @(posedge clk) begin
|
||||||
assign valid_out = ~empty;
|
if (reset) begin
|
||||||
|
skid_valid <= 0;
|
||||||
|
end else begin
|
||||||
|
if (valid_in && ~ready_out) begin
|
||||||
|
assert(~skid_valid);
|
||||||
|
skid_buffer <= data_in;
|
||||||
|
skid_valid <= 1;
|
||||||
|
end
|
||||||
|
if (ready_out) begin
|
||||||
|
skid_valid <= 0;
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
assign ready_in = ready_out || ~skid_valid;
|
||||||
|
assign data_out = skid_valid ? skid_buffer : data_in;
|
||||||
|
assign valid_out = valid_in || skid_valid;
|
||||||
|
|
||||||
|
end else begin
|
||||||
|
|
||||||
|
wire empty, full;
|
||||||
|
|
||||||
|
VX_generic_queue #(
|
||||||
|
.DATAW (DATAW),
|
||||||
|
.SIZE (SIZE),
|
||||||
|
.BUFFERED (BUFFERED)
|
||||||
|
) queue (
|
||||||
|
.clk (clk),
|
||||||
|
.reset (reset),
|
||||||
|
.push (valid_in),
|
||||||
|
.pop (ready_out),
|
||||||
|
.data_in(data_in),
|
||||||
|
.data_out(data_out),
|
||||||
|
.empty (empty),
|
||||||
|
.full (full),
|
||||||
|
`UNUSED_PIN (size)
|
||||||
|
);
|
||||||
|
|
||||||
|
assign ready_in = ~full;
|
||||||
|
assign valid_out = ~empty;
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
endmodule
|
endmodule
|
||||||
@@ -11,7 +11,7 @@ module VX_fair_arbiter #(
|
|||||||
output wire grant_valid
|
output wire grant_valid
|
||||||
);
|
);
|
||||||
|
|
||||||
if (N == 1) begin
|
if (N == 1) begin
|
||||||
|
|
||||||
`UNUSED_VAR (clk)
|
`UNUSED_VAR (clk)
|
||||||
`UNUSED_VAR (reset)
|
`UNUSED_VAR (reset)
|
||||||
@@ -33,11 +33,13 @@ module VX_fair_arbiter #(
|
|||||||
if (reset) begin
|
if (reset) begin
|
||||||
requests_use <= 0;
|
requests_use <= 0;
|
||||||
refill_original <= 0;
|
refill_original <= 0;
|
||||||
end else if (refill) begin
|
|
||||||
requests_use <= refill_value;
|
|
||||||
refill_original <= refill_value;
|
|
||||||
end else begin
|
end else begin
|
||||||
requests_use <= update_value;
|
if (refill) begin
|
||||||
|
requests_use <= refill_value;
|
||||||
|
refill_original <= refill_value;
|
||||||
|
end else begin
|
||||||
|
requests_use <= update_value;
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -38,7 +38,6 @@ module VX_generic_queue #(
|
|||||||
end else if (reading && !writing) begin
|
end else if (reading && !writing) begin
|
||||||
size_r <= 0;
|
size_r <= 0;
|
||||||
end
|
end
|
||||||
|
|
||||||
if (writing) begin
|
if (writing) begin
|
||||||
head_r <= data_in;
|
head_r <= data_in;
|
||||||
end
|
end
|
||||||
@@ -146,7 +145,7 @@ module VX_generic_queue #(
|
|||||||
end
|
end
|
||||||
|
|
||||||
bypass_r <= writing
|
bypass_r <= writing
|
||||||
&& (empty_r || ((1 == size_r) && reading)); // empty or about to go empty
|
&& (empty_r || ((1 == size_r) && reading)); // empty or about to go empty
|
||||||
|
|
||||||
curr_r <= data_in;
|
curr_r <= data_in;
|
||||||
head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r];
|
head_r <= data[reading ? rd_ptr_next_r : rd_ptr_r];
|
||||||
|
|||||||
@@ -24,11 +24,9 @@ module VX_matrix_arbiter #(
|
|||||||
|
|
||||||
reg [N-1:1] state [0:N-1];
|
reg [N-1:1] state [0:N-1];
|
||||||
wire [N-1:0] pri [0:N-1];
|
wire [N-1:0] pri [0:N-1];
|
||||||
|
|
||||||
genvar i, j;
|
for (genvar i = 0; i < N; i++) begin
|
||||||
|
for (genvar j = 0; j < N; j++) begin
|
||||||
for (i = 0; i < N; i++) begin
|
|
||||||
for (j = 0; j < N; j++) begin
|
|
||||||
if (j > i) begin
|
if (j > i) begin
|
||||||
assign pri[j][i] = requests[i] && state[i][j];
|
assign pri[j][i] = requests[i] && state[i][j];
|
||||||
end
|
end
|
||||||
@@ -43,13 +41,12 @@ module VX_matrix_arbiter #(
|
|||||||
assign grant_onehot[i] = requests[i] && !(| pri[i]);
|
assign grant_onehot[i] = requests[i] && !(| pri[i]);
|
||||||
end
|
end
|
||||||
|
|
||||||
for (i = 0; i < N; i++) begin
|
for (genvar i = 0; i < N; i++) begin
|
||||||
for (j = i + 1; j < N; j++) begin
|
for (genvar j = i + 1; j < N; j++) begin
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
state[i][j] <= 0;
|
state[i][j] <= 0;
|
||||||
end
|
end else begin
|
||||||
else begin
|
|
||||||
state[i][j] <= (state[i][j] || grant_onehot[j]) && !grant_onehot[i];
|
state[i][j] <= (state[i][j] || grant_onehot[j]) && !grant_onehot[i];
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -50,18 +50,18 @@ module VX_multiplier #(
|
|||||||
|
|
||||||
reg [WIDTHP-1:0] result_pipe [0:PIPELINE-1];
|
reg [WIDTHP-1:0] result_pipe [0:PIPELINE-1];
|
||||||
|
|
||||||
genvar i;
|
for (genvar i = 0; i < PIPELINE; i++) begin
|
||||||
for (i = 0; i < PIPELINE; i++) begin
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
result_pipe[i] <= 0;
|
result_pipe[i] <= 0;
|
||||||
end
|
end else begin
|
||||||
else if (clk_en) begin
|
if (clk_en) begin
|
||||||
if (i == 0) begin
|
if (i == 0) begin
|
||||||
result_pipe[i] <= result_unqual;
|
result_pipe[i] <= result_unqual;
|
||||||
end else begin
|
end else begin
|
||||||
result_pipe[i] <= result_pipe[i-1];
|
result_pipe[i] <= result_pipe[i-1];
|
||||||
end
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -7,12 +7,10 @@ module VX_onehot_encoder #(
|
|||||||
output reg [`LOG2UP(N)-1:0] binary,
|
output reg [`LOG2UP(N)-1:0] binary,
|
||||||
output reg valid
|
output reg valid
|
||||||
);
|
);
|
||||||
integer i;
|
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
valid = 1'b0;
|
valid = 1'b0;
|
||||||
binary = `LOG2UP(N)'(0);
|
binary = `LOG2UP(N)'(0);
|
||||||
for (i = 0; i < N; i++) begin
|
for (integer i = 0; i < N; i++) begin
|
||||||
if (onehot[i]) begin
|
if (onehot[i]) begin
|
||||||
valid = 1'b1;
|
valid = 1'b1;
|
||||||
binary = `LOG2UP(N)'(i);
|
binary = `LOG2UP(N)'(i);
|
||||||
|
|||||||
@@ -6,13 +6,11 @@ module VX_priority_encoder #(
|
|||||||
input wire [N-1:0] data_in,
|
input wire [N-1:0] data_in,
|
||||||
output reg [`LOG2UP(N)-1:0] data_out,
|
output reg [`LOG2UP(N)-1:0] data_out,
|
||||||
output reg valid_out
|
output reg valid_out
|
||||||
);
|
);
|
||||||
integer i;
|
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
data_out = 0;
|
data_out = 0;
|
||||||
valid_out = 0;
|
valid_out = 0;
|
||||||
for (i = N-1; i >= 0; i = i - 1) begin
|
for (integer i = N-1; i >= 0; i = i - 1) begin
|
||||||
if (data_in[i]) begin
|
if (data_in[i]) begin
|
||||||
data_out = `LOG2UP(N)'(i);
|
data_out = `LOG2UP(N)'(i);
|
||||||
valid_out = 1;
|
valid_out = 1;
|
||||||
|
|||||||
@@ -26,12 +26,10 @@ module VX_rr_arbiter #(
|
|||||||
reg [`CLOG2(N)-1:0] state;
|
reg [`CLOG2(N)-1:0] state;
|
||||||
reg [N-1:0] grant_onehot_r;
|
reg [N-1:0] grant_onehot_r;
|
||||||
|
|
||||||
integer i, j;
|
|
||||||
|
|
||||||
always @(*) begin
|
always @(*) begin
|
||||||
for (i = 0; i < N; i++) begin
|
for (integer i = 0; i < N; i++) begin
|
||||||
grant_table[i] = `CLOG2(N)'(i);
|
grant_table[i] = `CLOG2(N)'(i);
|
||||||
for (j = 0; j < N; j++) begin
|
for (integer j = 0; j < N; j++) begin
|
||||||
if (requests[(i+j) % N]) begin
|
if (requests[(i+j) % N]) begin
|
||||||
grant_table[i] = `CLOG2(N)'((i+j) % N);
|
grant_table[i] = `CLOG2(N)'((i+j) % N);
|
||||||
end
|
end
|
||||||
@@ -44,8 +42,7 @@ module VX_rr_arbiter #(
|
|||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
state <= 0;
|
state <= 0;
|
||||||
end
|
end else begin
|
||||||
else begin
|
|
||||||
state <= grant_index;
|
state <= grant_index;
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -74,7 +74,6 @@ module VX_scope #(
|
|||||||
read_delta <= 0;
|
read_delta <= 0;
|
||||||
data_valid <= 0;
|
data_valid <= 0;
|
||||||
end else begin
|
end else begin
|
||||||
|
|
||||||
if (bus_write) begin
|
if (bus_write) begin
|
||||||
case (cmd_type)
|
case (cmd_type)
|
||||||
CMD_GET_VALID,
|
CMD_GET_VALID,
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ module VX_shift_register #(
|
|||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
entries <= '0;
|
entries <= (DEPTH * DATAW)'(0);
|
||||||
end else begin
|
end else begin
|
||||||
if (enable) begin
|
if (enable) begin
|
||||||
entries <= in;
|
entries <= in;
|
||||||
@@ -28,7 +28,7 @@ module VX_shift_register #(
|
|||||||
|
|
||||||
always @(posedge clk) begin
|
always @(posedge clk) begin
|
||||||
if (reset) begin
|
if (reset) begin
|
||||||
entries <= '0;
|
entries <= (DEPTH * DATAW)'(0);
|
||||||
end else begin
|
end else begin
|
||||||
if (enable) begin
|
if (enable) begin
|
||||||
entries <= {entries[DEPTH-2:0], in};
|
entries <= {entries[DEPTH-2:0], in};
|
||||||
|
|||||||
4
hw/syn/quartus/cache/Makefile
vendored
4
hw/syn/quartus/cache/Makefile
vendored
@@ -9,9 +9,9 @@ DEVICE = 10AX115N3F40E2SG
|
|||||||
|
|
||||||
# Executable Configuration
|
# Executable Configuration
|
||||||
SYN_ARGS = --parallel --read_settings_files=on
|
SYN_ARGS = --parallel --read_settings_files=on
|
||||||
FIT_ARGS = --part=$(DEVICE) --read_settings_files=on
|
FIT_ARGS = --parallel --part=$(DEVICE) --read_settings_files=on
|
||||||
ASM_ARGS =
|
ASM_ARGS =
|
||||||
STA_ARGS = --do_report_timing
|
STA_ARGS = --parallel --do_report_timing
|
||||||
|
|
||||||
# Build targets
|
# Build targets
|
||||||
all: $(PROJECT).sta.rpt
|
all: $(PROJECT).sta.rpt
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user