Merge branch 'master' of https://github.gatech.edu/casl/Vortex
This commit is contained in:
@@ -213,7 +213,7 @@ extern int vx_dump_perf(vx_device_h device, FILE* stream) {
|
||||
uint64_t dcache_miss_w_per_core;
|
||||
ret |= vx_csr_get_l(device, core_id, CSR_MPM_DCACHE_MISS_W, CSR_MPM_DCACHE_MISS_W_H, &dcache_miss_w_per_core);
|
||||
int dcache_write_hit_ratio = (int)((1.0 - (double(dcache_miss_w_per_core) / double(dcache_writes_per_core))) * 100);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache wrire misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_w_per_core, dcache_write_hit_ratio);
|
||||
if (num_cores > 1) fprintf(stream, "PERF: core%d: dcache write misses=%ld (hit ratio=%d%%)\n", core_id, dcache_miss_w_per_core, dcache_write_hit_ratio);
|
||||
dcache_write_misses += dcache_miss_w_per_core;
|
||||
// bank_stalls
|
||||
uint64_t dcache_bank_st_per_core;
|
||||
|
||||
@@ -185,7 +185,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
*value = IMPLEMENTATION_ID;
|
||||
break;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
*value = NUM_CORES;
|
||||
*value = NUM_CORES * NUM_CLUSTERS;
|
||||
break;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
*value = NUM_WARPS;
|
||||
|
||||
@@ -12,8 +12,8 @@ CXXFLAGS += -DDUMP_PERF_STATS
|
||||
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=2 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=4 -DL2_ENABLE=1
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
#CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=2 -DL2_ENABLE=0
|
||||
CONFIGS ?= -DNUM_CLUSTERS=1 -DNUM_CORES=1
|
||||
|
||||
CXXFLAGS += $(CONFIGS)
|
||||
|
||||
@@ -21,7 +21,7 @@ LDFLAGS += -shared -pthread
|
||||
#LDFLAGS += -dynamiclib -pthread
|
||||
|
||||
SRCS = vortex.cpp ../common/vx_utils.cpp
|
||||
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/instr.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
|
||||
SRCS += $(SIMX_DIR)/util.cpp $(SIMX_DIR)/args.cpp $(SIMX_DIR)/mem.cpp $(SIMX_DIR)/warp.cpp $(SIMX_DIR)/core.cpp $(SIMX_DIR)/decode.cpp $(SIMX_DIR)/execute.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
|
||||
@@ -144,19 +144,18 @@ private:
|
||||
void run() {
|
||||
vortex::ArchDef arch("rv32i", NUM_CORES, NUM_WARPS, NUM_THREADS);
|
||||
vortex::Decoder decoder(arch);
|
||||
vortex::MemoryUnit mu(PAGE_SIZE, arch.getWordSize(), true);
|
||||
vortex::MemoryUnit mu(PAGE_SIZE, arch.wsize(), true);
|
||||
mu.attach(ram_, 0);
|
||||
|
||||
std::vector<std::shared_ptr<vortex::Core>> cores(NUM_CORES);
|
||||
for (size_t i = 0; i < NUM_CORES; ++i) {
|
||||
cores[i] = std::make_shared<vortex::Core>(arch, decoder, mu);
|
||||
std::vector<std::shared_ptr<vortex::Core>> cores(arch.num_cores());
|
||||
for (int i = 0; i < arch.num_cores(); ++i) {
|
||||
cores[i] = std::make_shared<vortex::Core>(arch, decoder, mu, i);
|
||||
}
|
||||
|
||||
bool running;
|
||||
|
||||
do {
|
||||
running = false;
|
||||
for (size_t i = 0; i < NUM_CORES; ++i) {
|
||||
for (int i = 0; i < arch.num_cores(); ++i) {
|
||||
if (!cores[i]->running())
|
||||
continue;
|
||||
running = true;
|
||||
@@ -236,7 +235,7 @@ extern int vx_dev_caps(vx_device_h hdevice, unsigned caps_id, unsigned *value) {
|
||||
*value = IMPLEMENTATION_ID;
|
||||
break;
|
||||
case VX_CAPS_MAX_CORES:
|
||||
*value = NUM_CORES;
|
||||
*value = NUM_CORES * NUM_CLUSTERS;
|
||||
break;
|
||||
case VX_CAPS_MAX_WARPS:
|
||||
*value = NUM_WARPS;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
`endif
|
||||
|
||||
`ifndef NUM_CORES
|
||||
`define NUM_CORES 4
|
||||
`define NUM_CORES 1
|
||||
`endif
|
||||
|
||||
`ifndef NUM_WARPS
|
||||
@@ -235,11 +235,6 @@
|
||||
|
||||
// Pipeline Queues ////////////////////////////////////////////////////////////
|
||||
|
||||
// Size of instruction queue
|
||||
`ifndef IBUF_SIZE
|
||||
`define IBUF_SIZE 4
|
||||
`endif
|
||||
|
||||
// Size of LSU Request Queue
|
||||
`ifndef LSUQ_SIZE
|
||||
`define LSUQ_SIZE 8
|
||||
|
||||
@@ -7,7 +7,7 @@ module VX_csr_data #(
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_memsys_if perf_memsys_if,
|
||||
VX_perf_pipeline_if perf_pipeline_if,
|
||||
`endif
|
||||
|
||||
@@ -123,61 +123,61 @@ module VX_csr_data #(
|
||||
`ifdef PERF_ENABLE
|
||||
// PERF: pipeline
|
||||
`CSR_MPM_IBUF_ST : read_data_r = perf_pipeline_if.ibf_stalls[31:0];
|
||||
`CSR_MPM_IBUF_ST_H : read_data_r = perf_pipeline_if.ibf_stalls[63:32];
|
||||
`CSR_MPM_IBUF_ST_H : read_data_r = 32'(perf_pipeline_if.ibf_stalls[43:32]);
|
||||
`CSR_MPM_SCRB_ST : read_data_r = perf_pipeline_if.scb_stalls[31:0];
|
||||
`CSR_MPM_SCRB_ST_H : read_data_r = perf_pipeline_if.scb_stalls[63:32];
|
||||
`CSR_MPM_SCRB_ST_H : read_data_r = 32'(perf_pipeline_if.scb_stalls[43:32]);
|
||||
`CSR_MPM_ALU_ST : read_data_r = perf_pipeline_if.alu_stalls[31:0];
|
||||
`CSR_MPM_ALU_ST_H : read_data_r = perf_pipeline_if.alu_stalls[63:32];
|
||||
`CSR_MPM_ALU_ST_H : read_data_r = 32'(perf_pipeline_if.alu_stalls[43:32]);
|
||||
`CSR_MPM_LSU_ST : read_data_r = perf_pipeline_if.lsu_stalls[31:0];
|
||||
`CSR_MPM_LSU_ST_H : read_data_r = perf_pipeline_if.lsu_stalls[63:32];
|
||||
`CSR_MPM_LSU_ST_H : read_data_r = 32'(perf_pipeline_if.lsu_stalls[43:32]);
|
||||
`CSR_MPM_CSR_ST : read_data_r = perf_pipeline_if.csr_stalls[31:0];
|
||||
`CSR_MPM_CSR_ST_H : read_data_r = perf_pipeline_if.csr_stalls[63:32];
|
||||
`CSR_MPM_CSR_ST_H : read_data_r = 32'(perf_pipeline_if.csr_stalls[43:32]);
|
||||
`CSR_MPM_FPU_ST : read_data_r = perf_pipeline_if.fpu_stalls[31:0];
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = perf_pipeline_if.fpu_stalls[63:32];
|
||||
`CSR_MPM_FPU_ST_H : read_data_r = 32'(perf_pipeline_if.fpu_stalls[43:32]);
|
||||
`CSR_MPM_GPU_ST : read_data_r = perf_pipeline_if.gpu_stalls[31:0];
|
||||
`CSR_MPM_GPU_ST_H : read_data_r = perf_pipeline_if.gpu_stalls[63:32];
|
||||
`CSR_MPM_GPU_ST_H : read_data_r = 32'(perf_pipeline_if.gpu_stalls[43:32]);
|
||||
// PERF: icache
|
||||
`CSR_MPM_ICACHE_READS : read_data_r = perf_memsys_if.icache_reads[31:0];
|
||||
`CSR_MPM_ICACHE_READS_H : read_data_r = perf_memsys_if.icache_reads[63:32];
|
||||
`CSR_MPM_ICACHE_READS_H : read_data_r = 32'(perf_memsys_if.icache_reads[43:32]);
|
||||
`CSR_MPM_ICACHE_MISS_R : read_data_r = perf_memsys_if.icache_read_misses[31:0];
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = perf_memsys_if.icache_read_misses[63:32];
|
||||
`CSR_MPM_ICACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.icache_read_misses[43:32]);
|
||||
`CSR_MPM_ICACHE_PIPE_ST : read_data_r = perf_memsys_if.icache_pipe_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = perf_memsys_if.icache_pipe_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.icache_pipe_stalls[43:32]);
|
||||
`CSR_MPM_ICACHE_CRSP_ST : read_data_r = perf_memsys_if.icache_crsp_stalls[31:0];
|
||||
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = perf_memsys_if.icache_crsp_stalls[63:32];
|
||||
`CSR_MPM_ICACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.icache_crsp_stalls[43:32]);
|
||||
// PERF: dcache
|
||||
`CSR_MPM_DCACHE_READS : read_data_r = perf_memsys_if.dcache_reads[31:0];
|
||||
`CSR_MPM_DCACHE_READS_H : read_data_r = perf_memsys_if.dcache_reads[63:32];
|
||||
`CSR_MPM_DCACHE_READS_H : read_data_r = 32'(perf_memsys_if.dcache_reads[43:32]);
|
||||
`CSR_MPM_DCACHE_WRITES : read_data_r = perf_memsys_if.dcache_writes[31:0];
|
||||
`CSR_MPM_DCACHE_WRITES_H : read_data_r = perf_memsys_if.dcache_writes[63:32];
|
||||
`CSR_MPM_DCACHE_WRITES_H : read_data_r = 32'(perf_memsys_if.dcache_writes[43:32]);
|
||||
`CSR_MPM_DCACHE_MISS_R : read_data_r = perf_memsys_if.dcache_read_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = perf_memsys_if.dcache_read_misses[63:32];
|
||||
`CSR_MPM_DCACHE_MISS_R_H : read_data_r = 32'(perf_memsys_if.dcache_read_misses[43:32]);
|
||||
`CSR_MPM_DCACHE_MISS_W : read_data_r = perf_memsys_if.dcache_write_misses[31:0];
|
||||
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = perf_memsys_if.dcache_write_misses[63:32];
|
||||
`CSR_MPM_DCACHE_MISS_W_H : read_data_r = 32'(perf_memsys_if.dcache_write_misses[43:32]);
|
||||
`CSR_MPM_DCACHE_BANK_ST : read_data_r = perf_memsys_if.dcache_bank_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = perf_memsys_if.dcache_bank_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_BANK_ST_H : read_data_r = 32'(perf_memsys_if.dcache_bank_stalls[43:32]);
|
||||
`CSR_MPM_DCACHE_MSHR_ST : read_data_r = perf_memsys_if.dcache_mshr_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = perf_memsys_if.dcache_mshr_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_MSHR_ST_H : read_data_r = 32'(perf_memsys_if.dcache_mshr_stalls[43:32]);
|
||||
`CSR_MPM_DCACHE_PIPE_ST : read_data_r = perf_memsys_if.dcache_pipe_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = perf_memsys_if.dcache_pipe_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_PIPE_ST_H : read_data_r = 32'(perf_memsys_if.dcache_pipe_stalls[43:32]);
|
||||
`CSR_MPM_DCACHE_CRSP_ST : read_data_r = perf_memsys_if.dcache_crsp_stalls[31:0];
|
||||
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = perf_memsys_if.dcache_crsp_stalls[63:32];
|
||||
`CSR_MPM_DCACHE_CRSP_ST_H : read_data_r = 32'(perf_memsys_if.dcache_crsp_stalls[43:32]);
|
||||
// PERF: smem
|
||||
`CSR_MPM_SMEM_READS : read_data_r = perf_memsys_if.smem_reads[31:0];
|
||||
`CSR_MPM_SMEM_READS_H : read_data_r = perf_memsys_if.smem_reads[63:32];
|
||||
`CSR_MPM_SMEM_READS_H : read_data_r = 32'(perf_memsys_if.smem_reads[43:32]);
|
||||
`CSR_MPM_SMEM_WRITES : read_data_r = perf_memsys_if.smem_writes[31:0];
|
||||
`CSR_MPM_SMEM_WRITES_H : read_data_r = perf_memsys_if.smem_writes[63:32];
|
||||
`CSR_MPM_SMEM_WRITES_H : read_data_r = 32'(perf_memsys_if.smem_writes[43:32]);
|
||||
`CSR_MPM_SMEM_BANK_ST : read_data_r = perf_memsys_if.smem_bank_stalls[31:0];
|
||||
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = perf_memsys_if.smem_bank_stalls[63:32];
|
||||
`CSR_MPM_SMEM_BANK_ST_H : read_data_r = 32'(perf_memsys_if.smem_bank_stalls[43:32]);
|
||||
// PERF: DRAM
|
||||
`CSR_MPM_DRAM_READS : read_data_r = perf_memsys_if.dram_reads[31:0];
|
||||
`CSR_MPM_DRAM_READS_H : read_data_r = perf_memsys_if.dram_reads[63:32];
|
||||
`CSR_MPM_DRAM_READS_H : read_data_r = 32'(perf_memsys_if.dram_reads[43:32]);
|
||||
`CSR_MPM_DRAM_WRITES : read_data_r = perf_memsys_if.dram_writes[31:0];
|
||||
`CSR_MPM_DRAM_WRITES_H : read_data_r = perf_memsys_if.dram_writes[63:32];
|
||||
`CSR_MPM_DRAM_WRITES_H : read_data_r = 32'(perf_memsys_if.dram_writes[43:32]);
|
||||
`CSR_MPM_DRAM_ST : read_data_r = perf_memsys_if.dram_stalls[31:0];
|
||||
`CSR_MPM_DRAM_ST_H : read_data_r = perf_memsys_if.dram_stalls[63:32];
|
||||
`CSR_MPM_DRAM_ST_H : read_data_r = 32'(perf_memsys_if.dram_stalls[43:32]);
|
||||
`CSR_MPM_DRAM_LAT : read_data_r = perf_memsys_if.dram_latency[31:0];
|
||||
`CSR_MPM_DRAM_LAT_H : read_data_r = perf_memsys_if.dram_latency[63:32];
|
||||
`CSR_MPM_DRAM_LAT_H : read_data_r = 32'(perf_memsys_if.dram_latency[43:32]);
|
||||
`endif
|
||||
|
||||
`CSR_SATP : read_data_r = 32'(csr_satp);
|
||||
@@ -195,9 +195,9 @@ module VX_csr_data #(
|
||||
`CSR_PMPADDR0 : read_data_r = 32'(csr_pmpaddr[0]);
|
||||
|
||||
`CSR_CYCLE : read_data_r = csr_cycle[31:0];
|
||||
`CSR_CYCLE_H : read_data_r = csr_cycle[63:32];
|
||||
`CSR_CYCLE_H : read_data_r = 32'(csr_cycle[43:32]);
|
||||
`CSR_INSTRET : read_data_r = csr_instret[31:0];
|
||||
`CSR_INSTRET_H : read_data_r = csr_instret[63:32];
|
||||
`CSR_INSTRET_H : read_data_r = 32'(csr_instret[43:32]);
|
||||
|
||||
`CSR_MVENDORID : read_data_r = `VENDOR_ID;
|
||||
`CSR_MARCHID : read_data_r = `ARCHITECTURE_ID;
|
||||
|
||||
@@ -45,8 +45,7 @@ module VX_csr_io_arb (
|
||||
// responses
|
||||
wire csr_io_rsp_ready;
|
||||
VX_skid_buffer #(
|
||||
.DATAW (32),
|
||||
.BUFFERED (1)
|
||||
.DATAW (32)
|
||||
) csr_io_out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -39,8 +39,7 @@ module VX_databus_arb (
|
||||
&& (core_req_if.addr[i][REQ_ADDRW-1:SMEM_ASHIFT-REQ_ASHIFT] < (32-SMEM_ASHIFT)'(`SHARED_MEM_BASE_ADDR >> SMEM_ASHIFT));
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (1)
|
||||
.DATAW (REQ_DATAW)
|
||||
) cache_out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
@@ -53,8 +52,7 @@ module VX_databus_arb (
|
||||
);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (REQ_DATAW),
|
||||
.BUFFERED (1)
|
||||
.DATAW (REQ_DATAW)
|
||||
) smem_out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -18,20 +18,15 @@ module VX_decode #(
|
||||
`UNUSED_VAR (clk)
|
||||
`UNUSED_VAR (reset)
|
||||
|
||||
reg [`EX_BITS-1:0] ex_type;
|
||||
reg [`OP_BITS-1:0] op_type;
|
||||
reg [`MOD_BITS-1:0] op_mod;
|
||||
reg [31:0] imm;
|
||||
reg use_rd, use_rs1, use_rs2, use_rs3, use_PC, use_imm;
|
||||
reg rd_fp, rs1_fp, rs2_fp;
|
||||
reg is_join, is_wstall;
|
||||
|
||||
wire [31:0] instr = ifetch_rsp_if.instr;
|
||||
|
||||
reg [`ALU_BITS-1:0] alu_op;
|
||||
reg [`BR_BITS-1:0] br_op;
|
||||
reg [`MUL_BITS-1:0] mul_op;
|
||||
reg [`LSU_BITS-1:0] lsu_op;
|
||||
reg [`CSR_BITS-1:0] csr_op;
|
||||
reg [`FPU_BITS-1:0] fpu_op;
|
||||
reg [`GPU_BITS-1:0] gpu_op;
|
||||
|
||||
reg [19:0] upper_imm;
|
||||
reg [31:0] jalx_offset;
|
||||
reg [31:0] src2_imm;
|
||||
|
||||
wire [6:0] opcode = instr[6:0];
|
||||
wire [2:0] func3 = instr[14:12];
|
||||
wire [6:0] func7 = instr[31:25];
|
||||
@@ -42,360 +37,378 @@ module VX_decode #(
|
||||
wire [4:0] rs2 = instr[24:20];
|
||||
wire [4:0] rs3 = instr[31:27];
|
||||
|
||||
// opcode types
|
||||
wire is_rtype = (opcode == `INST_R);
|
||||
wire is_ltype = (opcode == `INST_L);
|
||||
wire is_itype = (opcode == `INST_I);
|
||||
wire is_stype = (opcode == `INST_S);
|
||||
wire is_btype = (opcode == `INST_B);
|
||||
wire is_jal = (opcode == `INST_JAL);
|
||||
wire is_jalr = (opcode == `INST_JALR);
|
||||
wire is_lui = (opcode == `INST_LUI);
|
||||
wire is_auipc = (opcode == `INST_AUIPC);
|
||||
wire is_jals = (opcode == `INST_SYS) && (func3 == 0);
|
||||
wire is_csr = (opcode == `INST_SYS) && (func3 != 0);
|
||||
wire is_gpu = (opcode == `INST_GPU);
|
||||
|
||||
// upper immediate
|
||||
|
||||
wire [19:0] upper_imm = {func7, rs2, rs1, func3};
|
||||
wire [11:0] alu_imm = ((func3 == 3'h1) || (func3 == 3'h5)) ? {{7{1'b0}}, rs2} : u_12;
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
wire [11:0] jalr_imm = {func7, rs2};
|
||||
|
||||
always @(*) begin
|
||||
case (opcode)
|
||||
`INST_LUI: upper_imm = {func7, rs2, rs1, func3};
|
||||
`INST_AUIPC: upper_imm = {func7, rs2, rs1, func3};
|
||||
default: upper_imm = 20'h0;
|
||||
endcase
|
||||
end
|
||||
|
||||
// I-type immediate
|
||||
ex_type = `EX_NOP;
|
||||
op_type = 'x;
|
||||
op_mod = 'x;
|
||||
imm = 'x;
|
||||
use_rd = 0;
|
||||
use_rs1 = 0;
|
||||
use_rs2 = 0;
|
||||
use_rs3 = 0;
|
||||
use_PC = 0;
|
||||
use_imm = 0;
|
||||
rd_fp = 0;
|
||||
rs1_fp = 0;
|
||||
rs2_fp = 0;
|
||||
is_join = 0;
|
||||
is_wstall = 0;
|
||||
|
||||
wire alu_shift_i = (func3 == 3'h1) || (func3 == 3'h5);
|
||||
wire [11:0] alu_shift_imm = {{7{1'b0}}, rs2};
|
||||
wire [11:0] alu_imm = alu_shift_i ? alu_shift_imm : u_12;
|
||||
|
||||
always @(*) begin
|
||||
case (opcode)
|
||||
`INST_I: src2_imm = {{20{alu_imm[11]}}, alu_imm};
|
||||
`INST_S,
|
||||
`INST_FS: src2_imm = {{20{func7[6]}}, func7, rd};
|
||||
`INST_L,
|
||||
`INST_FL: src2_imm = {{20{u_12[11]}}, u_12};
|
||||
`INST_B: src2_imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
default: src2_imm = 'x;
|
||||
endcase
|
||||
end
|
||||
|
||||
// JAL
|
||||
|
||||
wire [20:0] jal_imm = {instr[31], instr[19:12], instr[20], instr[30:21], 1'b0};
|
||||
wire [31:0] jal_offset = {{11{jal_imm[20]}}, jal_imm};
|
||||
wire [11:0] jalr_imm = {func7, rs2};
|
||||
wire [31:0] jalr_offset = {{20{jalr_imm[11]}}, jalr_imm};
|
||||
|
||||
always @(*) begin
|
||||
case (opcode)
|
||||
`INST_JAL: jalx_offset = jal_offset;
|
||||
`INST_JALR: jalx_offset = jalr_offset;
|
||||
default: jalx_offset = 32'd4;
|
||||
endcase
|
||||
end
|
||||
|
||||
// BRANCH
|
||||
|
||||
wire is_br = (is_btype || is_jal || is_jalr || is_jals);
|
||||
|
||||
always @(*) begin
|
||||
br_op = `BR_OTHER;
|
||||
case (opcode)
|
||||
`INST_B: begin
|
||||
case (opcode)
|
||||
`INST_I: begin
|
||||
ex_type = `EX_ALU;
|
||||
case (func3)
|
||||
3'h0: br_op = `BR_EQ;
|
||||
3'h1: br_op = `BR_NE;
|
||||
3'h4: br_op = `BR_LT;
|
||||
3'h5: br_op = `BR_GE;
|
||||
3'h6: br_op = `BR_LTU;
|
||||
3'h7: br_op = `BR_GEU;
|
||||
default:;
|
||||
3'h0: op_type = `OP_BITS'(`ALU_ADD);
|
||||
3'h1: op_type = `OP_BITS'(`ALU_SLL);
|
||||
3'h2: op_type = `OP_BITS'(`ALU_SLT);
|
||||
3'h3: op_type = `OP_BITS'(`ALU_SLTU);
|
||||
3'h4: op_type = `OP_BITS'(`ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `OP_BITS'(`ALU_SRA) : `OP_BITS'(`ALU_SRL);
|
||||
3'h6: op_type = `OP_BITS'(`ALU_OR);
|
||||
3'h7: op_type = `OP_BITS'(`ALU_AND);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 0;
|
||||
imm = {{20{alu_imm[11]}}, alu_imm};
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_imm = 1;
|
||||
end
|
||||
`INST_R: begin
|
||||
ex_type = `EX_ALU;
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (func7[0]) begin
|
||||
case (func3)
|
||||
3'h0: op_type = `OP_BITS'(`MUL_MUL);
|
||||
3'h1: op_type = `OP_BITS'(`MUL_MULH);
|
||||
3'h2: op_type = `OP_BITS'(`MUL_MULHSU);
|
||||
3'h3: op_type = `OP_BITS'(`MUL_MULHU);
|
||||
3'h4: op_type = `OP_BITS'(`MUL_DIV);
|
||||
3'h5: op_type = `OP_BITS'(`MUL_DIVU);
|
||||
3'h6: op_type = `OP_BITS'(`MUL_REM);
|
||||
3'h7: op_type = `OP_BITS'(`MUL_REMU);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 2;
|
||||
end else
|
||||
`endif
|
||||
begin
|
||||
case (func3)
|
||||
3'h0: op_type = (func7[5]) ? `OP_BITS'(`ALU_SUB) : `OP_BITS'(`ALU_ADD);
|
||||
3'h1: op_type = `OP_BITS'(`ALU_SLL);
|
||||
3'h2: op_type = `OP_BITS'(`ALU_SLT);
|
||||
3'h3: op_type = `OP_BITS'(`ALU_SLTU);
|
||||
3'h4: op_type = `OP_BITS'(`ALU_XOR);
|
||||
3'h5: op_type = (func7[5]) ? `OP_BITS'(`ALU_SRA) : `OP_BITS'(`ALU_SRL);
|
||||
3'h6: op_type = `OP_BITS'(`ALU_OR);
|
||||
3'h7: op_type = `OP_BITS'(`ALU_AND);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 0;
|
||||
end
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
end
|
||||
`INST_LUI: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`ALU_LUI);
|
||||
op_mod = 0;
|
||||
imm = {upper_imm, 12'(0)};
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_imm = 1;
|
||||
end
|
||||
`INST_AUIPC: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`ALU_AUIPC);
|
||||
op_mod = 0;
|
||||
imm = {upper_imm, 12'(0)};
|
||||
use_rd = 1;
|
||||
use_PC = 1;
|
||||
use_imm = 1;
|
||||
end
|
||||
`INST_JAL: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`BR_JAL);
|
||||
op_mod = 1;
|
||||
imm = {{11{jal_imm[20]}}, jal_imm};
|
||||
use_rd = 1;
|
||||
use_PC = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
end
|
||||
`INST_JALR: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_type = `OP_BITS'(`BR_JALR);
|
||||
op_mod = 1;
|
||||
imm = {{20{jalr_imm[11]}}, jalr_imm};
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
end
|
||||
`INST_B: begin
|
||||
ex_type = `EX_ALU;
|
||||
case (func3)
|
||||
3'h0: op_type = `OP_BITS'(`BR_EQ);
|
||||
3'h1: op_type = `OP_BITS'(`BR_NE);
|
||||
3'h4: op_type = `OP_BITS'(`BR_LT);
|
||||
3'h5: op_type = `OP_BITS'(`BR_GE);
|
||||
3'h6: op_type = `OP_BITS'(`BR_LTU);
|
||||
3'h7: op_type = `OP_BITS'(`BR_GEU);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 1;
|
||||
imm = {{20{instr[31]}}, instr[7], instr[30:25], instr[11:8], 1'b0};
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
use_PC = 1;
|
||||
use_imm = 1;
|
||||
is_wstall = 1;
|
||||
end
|
||||
`INST_SYS : begin
|
||||
if (func3 == 0) begin
|
||||
ex_type = `EX_ALU;
|
||||
case (u_12)
|
||||
12'h000: op_type = `OP_BITS'(`BR_ECALL);
|
||||
12'h001: op_type = `OP_BITS'(`BR_EBREAK);
|
||||
12'h302: op_type = `OP_BITS'(`BR_MRET);
|
||||
12'h102: op_type = `OP_BITS'(`BR_SRET);
|
||||
12'h7B2: op_type = `OP_BITS'(`BR_DRET);
|
||||
default:;
|
||||
endcase
|
||||
op_mod = 1;
|
||||
imm = 32'd4;
|
||||
use_rd = 1;
|
||||
use_PC = 1;
|
||||
use_imm = 1;
|
||||
end else begin
|
||||
ex_type = `EX_CSR;
|
||||
case (func3[1:0])
|
||||
2'h0: op_type = `OP_BITS'(`CSR_RW);
|
||||
2'h1: op_type = `OP_BITS'(`CSR_RW);
|
||||
2'h2: op_type = `OP_BITS'(`CSR_RS);
|
||||
2'h3: op_type = `OP_BITS'(`CSR_RC);
|
||||
default:;
|
||||
endcase
|
||||
imm = 32'(u_12);
|
||||
use_rd = 1;
|
||||
use_rs1 = !func3[2];
|
||||
use_imm = func3[2];
|
||||
end
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FL,
|
||||
`endif
|
||||
`INST_L: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b0, func3});
|
||||
imm = {{20{u_12[11]}}, u_12};
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
rd_fp = (opcode == `INST_FL);
|
||||
`endif
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FS,
|
||||
`endif
|
||||
`INST_S: begin
|
||||
ex_type = `EX_LSU;
|
||||
op_type = `OP_BITS'({1'b1, func3});
|
||||
imm = {{20{func7[6]}}, func7, rd};
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
`ifdef EXT_F_ENABLE
|
||||
rs2_fp = (opcode == `INST_FS);
|
||||
`endif
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
`INST_FMADD,
|
||||
`INST_FMSUB,
|
||||
`INST_FNMSUB,
|
||||
`INST_FNMADD: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_type = `OP_BITS'(opcode[3:0]);
|
||||
op_mod = func3;
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
use_rs3 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
end
|
||||
`INST_FCI: begin
|
||||
ex_type = `EX_FPU;
|
||||
op_mod = func3;
|
||||
use_rd = 1;
|
||||
case (func7)
|
||||
7'h00, // FADD
|
||||
7'h04, // FSUB
|
||||
7'h08, // FMUL
|
||||
7'h0C: // FDIV
|
||||
begin
|
||||
op_type = `OP_BITS'(func7[3:0]);
|
||||
use_rd = 1;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
end
|
||||
7'h2C: begin
|
||||
op_type = `OP_BITS'(`FPU_SQRT);
|
||||
use_rs1 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
end
|
||||
7'h50: begin
|
||||
op_type = `OP_BITS'(`FPU_CMP);
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
end
|
||||
7'h60: begin
|
||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTWUS) : `OP_BITS'(`FPU_CVTWS);
|
||||
use_rs1 = 1;
|
||||
rs1_fp = 1;
|
||||
end
|
||||
7'h68: begin
|
||||
op_type = (instr[20]) ? `OP_BITS'(`FPU_CVTSWU) : `OP_BITS'(`FPU_CVTSW);
|
||||
use_rs1 = 1;
|
||||
rd_fp = 1;
|
||||
end
|
||||
7'h10: begin
|
||||
// FSGNJ=0, FSGNJN=1, FSGNJX=2
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = {1'b0, func3[1:0]};
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
rd_fp = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
end
|
||||
7'h14: begin
|
||||
// FMIN=3, FMAX=4
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = func3[0] ? 4 : 3;
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
rs1_fp = 1;
|
||||
rs2_fp = 1;
|
||||
end
|
||||
7'h70: begin
|
||||
if (func3[0]) begin
|
||||
// FCLASS
|
||||
op_type = `OP_BITS'(`FPU_CLASS);
|
||||
end else begin
|
||||
// FMV.X.W=5
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = 5;
|
||||
end
|
||||
use_rs1 = 1;
|
||||
rs1_fp = 1;
|
||||
end
|
||||
7'h78: begin
|
||||
// FMV.W.X=6
|
||||
op_type = `OP_BITS'(`FPU_MISC);
|
||||
op_mod = 6;
|
||||
rd_fp = 1;
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`INST_JAL: br_op = `BR_JAL;
|
||||
`INST_JALR: br_op = `BR_JALR;
|
||||
`INST_SYS: begin
|
||||
if (is_jals && u_12 == 12'h000) br_op = `BR_ECALL;
|
||||
if (is_jals && u_12 == 12'h001) br_op = `BR_EBREAK;
|
||||
if (is_jals && u_12 == 12'h302) br_op = `BR_MRET;
|
||||
if (is_jals && u_12 == 12'h102) br_op = `BR_SRET;
|
||||
if (is_jals && u_12 == 12'h7B2) br_op = `BR_DRET;
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
// ALU
|
||||
|
||||
always @(*) begin
|
||||
alu_op = `ALU_OTHER;
|
||||
if (is_lui) begin
|
||||
alu_op = `ALU_LUI;
|
||||
end else if (is_auipc) begin
|
||||
alu_op = `ALU_AUIPC;
|
||||
end else if (is_itype || is_rtype) begin
|
||||
case (func3)
|
||||
3'h0: alu_op = (is_rtype && func7 == 7'h20) ? `ALU_SUB : `ALU_ADD;
|
||||
3'h1: alu_op = `ALU_SLL;
|
||||
3'h2: alu_op = `ALU_SLT;
|
||||
3'h3: alu_op = `ALU_SLTU;
|
||||
3'h4: alu_op = `ALU_XOR;
|
||||
3'h5: alu_op = (func7 == 7'h0) ? `ALU_SRL : `ALU_SRA;
|
||||
3'h6: alu_op = `ALU_OR;
|
||||
3'h7: alu_op = `ALU_AND;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
|
||||
// CSR
|
||||
|
||||
wire is_csr_imm = is_csr && (func3[2] == 1);
|
||||
|
||||
always @(*) begin
|
||||
csr_op = `CSR_OTHER;
|
||||
case (func3[1:0])
|
||||
2'h1: csr_op = `CSR_RW;
|
||||
2'h2: csr_op = `CSR_RS;
|
||||
2'h3: csr_op = `CSR_RC;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
// MUL
|
||||
`ifdef EXT_M_ENABLE
|
||||
wire is_mul = is_rtype && (func7 == 7'h1);
|
||||
always @(*) begin
|
||||
mul_op = `MUL_MUL;
|
||||
case (func3)
|
||||
3'h0: mul_op = `MUL_MUL;
|
||||
3'h1: mul_op = `MUL_MULH;
|
||||
3'h2: mul_op = `MUL_MULHSU;
|
||||
3'h3: mul_op = `MUL_MULHU;
|
||||
3'h4: mul_op = `MUL_DIV;
|
||||
3'h5: mul_op = `MUL_DIVU;
|
||||
3'h6: mul_op = `MUL_REM;
|
||||
3'h7: mul_op = `MUL_REMU;
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`else
|
||||
wire is_mul = 0;
|
||||
always @(*) begin
|
||||
mul_op = `MUL_MUL;
|
||||
end
|
||||
`endif
|
||||
|
||||
// FPU
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire is_fl = (opcode == `INST_FL) && ((func3 == 2));
|
||||
wire is_fs = (opcode == `INST_FS) && ((func3 == 2));
|
||||
wire is_fci = (opcode == `INST_FCI);
|
||||
wire is_fmadd = (opcode == `INST_FMADD);
|
||||
wire is_fmsub = (opcode == `INST_FMSUB);
|
||||
wire is_fnmsub = (opcode == `INST_FNMSUB);
|
||||
wire is_fnmadd = (opcode == `INST_FNMADD);
|
||||
|
||||
wire is_fcmp = is_fci && (func7 == 7'h50); // compare
|
||||
wire is_fcvti = is_fci && (func7 == 7'h60); // convert to int
|
||||
wire is_fcvtf = is_fci && (func7 == 7'h68); // convert to float
|
||||
wire is_fmvw_clss = is_fci && (func7 == 7'h70); // move to int + class
|
||||
wire is_fmvx = is_fci && (func7 == 7'h78); // move to float
|
||||
wire is_fr4 = is_fmadd || is_fmsub || is_fnmsub || is_fnmadd;
|
||||
wire is_fpu_no_mem = is_fci || is_fr4;
|
||||
wire is_fpu = is_fl || is_fs || is_fci || is_fr4;
|
||||
|
||||
reg [`MOD_BITS-1:0] frm;
|
||||
reg is_fsqrt;
|
||||
|
||||
always @(*) begin
|
||||
fpu_op = `FPU_MISC;
|
||||
frm = func3;
|
||||
is_fsqrt = 0;
|
||||
if (is_fr4) begin
|
||||
case ({is_fmadd, is_fmsub, is_fnmsub, is_fnmadd})
|
||||
4'b1000: fpu_op = `FPU_MADD;
|
||||
4'b0100: fpu_op = `FPU_MSUB;
|
||||
4'b0010: fpu_op = `FPU_NMSUB;
|
||||
4'b0001: fpu_op = `FPU_NMADD;
|
||||
default:;
|
||||
endcase
|
||||
end else begin
|
||||
case (func7)
|
||||
7'h00: fpu_op = `FPU_ADD;
|
||||
7'h04: fpu_op = `FPU_SUB;
|
||||
7'h08: fpu_op = `FPU_MUL;
|
||||
7'h0C: fpu_op = `FPU_DIV;
|
||||
7'h10: begin
|
||||
fpu_op = `FPU_MISC;
|
||||
frm = func3[1] ? 3'b010 : {2'b0, func3[0]};
|
||||
end
|
||||
7'h14: begin
|
||||
fpu_op = `FPU_MISC;
|
||||
frm = (func3 == 3'h0) ? 3'b011 : 3'b100;
|
||||
end
|
||||
7'h2C: begin
|
||||
fpu_op = `FPU_SQRT;
|
||||
is_fsqrt = 1;
|
||||
end
|
||||
7'h50: fpu_op = `FPU_CMP; // wb to intReg
|
||||
7'h60: fpu_op = (instr[20]) ? `FPU_CVTWUS : `FPU_CVTWS; // doesn't need rs2, and read rs1 from fpReg, WB to intReg
|
||||
7'h68: fpu_op = (instr[20]) ? `FPU_CVTSWU : `FPU_CVTSW; // doesn't need rs2, and read rs1 from intReg
|
||||
7'h70: begin
|
||||
fpu_op = (func3 == 3'h0) ? `FPU_MISC : `FPU_CLASS;
|
||||
frm = (func3 == 3'h0) ? 5 : func3;
|
||||
end
|
||||
7'h78: begin fpu_op = `FPU_MISC; frm = 6; end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
end
|
||||
`else
|
||||
wire is_fl = 0;
|
||||
wire is_fs = 0;
|
||||
wire is_fci = 0;
|
||||
wire is_fcvti = 0;
|
||||
wire is_fcvtf = 0;
|
||||
wire is_fmvw_clss = 0;
|
||||
wire is_fmvx = 0;
|
||||
wire is_fr4 = 0;
|
||||
wire is_fpu = 0;
|
||||
wire is_fpu_no_mem= 0;
|
||||
wire [2:0] frm = 0;
|
||||
wire is_fsqrt = 0;
|
||||
|
||||
always @(*) begin
|
||||
fpu_op = `FPU_MISC;
|
||||
end
|
||||
`endif
|
||||
|
||||
// LSU
|
||||
|
||||
wire is_lsu = (is_ltype || is_stype || is_fl || is_fs);
|
||||
always @(*) begin
|
||||
lsu_op = (is_fl || is_fs) ? `LSU_SW : func3;
|
||||
end
|
||||
|
||||
// GPU
|
||||
|
||||
reg is_gpu_bar, is_qpu_spawn;
|
||||
|
||||
always @(*) begin
|
||||
gpu_op = `GPU_OTHER;
|
||||
is_gpu_bar = 0;
|
||||
is_qpu_spawn = 0;
|
||||
case (func3)
|
||||
3'h0: gpu_op = `GPU_TMC;
|
||||
3'h1: begin
|
||||
gpu_op = `GPU_WSPAWN;
|
||||
is_qpu_spawn = 1;
|
||||
end
|
||||
3'h2: gpu_op = `GPU_SPLIT;
|
||||
3'h3: gpu_op = `GPU_JOIN;
|
||||
3'h4: begin
|
||||
gpu_op = `GPU_BAR;
|
||||
is_gpu_bar = 1;
|
||||
`endif
|
||||
`INST_GPU: begin
|
||||
ex_type = `EX_GPU;
|
||||
case (func3)
|
||||
3'h0: begin
|
||||
op_type = `OP_BITS'(`GPU_TMC);
|
||||
use_rs1 = 1;
|
||||
is_wstall = 1;
|
||||
end
|
||||
3'h1: begin
|
||||
op_type = `OP_BITS'(`GPU_WSPAWN);
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
end
|
||||
3'h2: begin
|
||||
op_type = `OP_BITS'(`GPU_SPLIT);
|
||||
use_rs1 = 1;
|
||||
is_wstall = 1;
|
||||
end
|
||||
3'h3: begin
|
||||
op_type = `OP_BITS'(`GPU_JOIN);
|
||||
is_join = 1;
|
||||
end
|
||||
3'h4: begin
|
||||
op_type = `OP_BITS'(`GPU_BAR);
|
||||
use_rs1 = 1;
|
||||
use_rs2 = 1;
|
||||
is_wstall = 1;
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// disable write to integer register r0
|
||||
wire use_rd_qual = use_rd && (rd_fp || (rd != 0));
|
||||
|
||||
wire use_rd = (is_fl || is_fci || is_fr4)
|
||||
|| ((is_itype || is_rtype || is_lui || is_auipc || is_csr || is_jal || is_jalr || is_jals || is_ltype) && (rd != 0));
|
||||
// EX_ALU needs rs1=0 for LUI operation
|
||||
wire [4:0] rs1_qual = (opcode == `INST_LUI) ? 5'h0 : rs1;
|
||||
|
||||
wire use_rs1 = is_fpu
|
||||
|| is_gpu
|
||||
|| (is_jalr || is_btype || is_ltype || is_stype || is_itype || is_rtype || !is_csr_imm || is_gpu);
|
||||
|
||||
wire use_rs2 = (is_fpu && ~(is_fl || is_fsqrt || is_fcvti || is_fcvtf || is_fmvw_clss || is_fmvx))
|
||||
|| (is_gpu && (is_gpu_bar || is_qpu_spawn))
|
||||
|| (is_btype || is_stype || is_rtype);
|
||||
|
||||
wire use_rs3 = is_fr4;
|
||||
|
||||
wire [4:0] rs1_qual = is_lui ? 5'h0 : rs1;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign decode_if.valid = ifetch_rsp_if.valid;
|
||||
|
||||
assign decode_if.wid = ifetch_rsp_if.wid;
|
||||
assign decode_if.tmask = ifetch_rsp_if.tmask;
|
||||
assign decode_if.PC = ifetch_rsp_if.PC;
|
||||
|
||||
assign decode_if.ex_type = is_gpu ? `EX_GPU :
|
||||
is_csr ? `EX_CSR :
|
||||
is_fpu_no_mem ? `EX_FPU :
|
||||
is_lsu ? `EX_LSU :
|
||||
(is_br || is_rtype || is_itype || is_lui || is_auipc) ? `EX_ALU :
|
||||
`EX_NOP;
|
||||
|
||||
assign decode_if.op_type = is_gpu ? `OP_BITS'(gpu_op) :
|
||||
is_csr ? `OP_BITS'(csr_op) :
|
||||
is_mul ? `OP_BITS'(mul_op) :
|
||||
is_fpu_no_mem ? `OP_BITS'(fpu_op) :
|
||||
is_lsu ? `OP_BITS'(lsu_op) :
|
||||
is_br ? `OP_BITS'(br_op) :
|
||||
`OP_BITS'(alu_op);
|
||||
|
||||
assign decode_if.wb = use_rd && (decode_if.ex_type != `EX_NOP);
|
||||
assign decode_if.valid = ifetch_rsp_if.valid;
|
||||
assign decode_if.wid = ifetch_rsp_if.wid;
|
||||
assign decode_if.tmask = ifetch_rsp_if.tmask;
|
||||
assign decode_if.PC = ifetch_rsp_if.PC;
|
||||
assign decode_if.ex_type = ex_type;
|
||||
assign decode_if.op_type = op_type;
|
||||
assign decode_if.op_mod = op_mod;
|
||||
assign decode_if.wb = use_rd_qual;
|
||||
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire rd_is_fp = is_fpu && ~(is_fcmp || is_fcvti || is_fmvw_clss);
|
||||
wire rs1_is_fp = is_fr4 || (is_fci && ~(is_fcvtf || is_fmvx));
|
||||
wire rs2_is_fp = is_fs || is_fr4 || is_fci;
|
||||
|
||||
assign decode_if.rd = {rd_is_fp, rd};
|
||||
assign decode_if.rs1 = {rs1_is_fp, rs1_qual};
|
||||
assign decode_if.rs2 = {rs2_is_fp, rs2};
|
||||
assign decode_if.rs3 = {1'b1, rs3};
|
||||
assign decode_if.rd = {rd_fp, rd};
|
||||
assign decode_if.rs1 = {rs1_fp, rs1_qual};
|
||||
assign decode_if.rs2 = {rs2_fp, rs2};
|
||||
assign decode_if.rs3 = {1'b1, rs3};
|
||||
`else
|
||||
assign decode_if.rd = rd;
|
||||
assign decode_if.rs1 = rs1_qual;
|
||||
assign decode_if.rs1 = rs1;
|
||||
assign decode_if.rs2 = rs2;
|
||||
assign decode_if.rs3 = rs3;
|
||||
`endif
|
||||
|
||||
assign decode_if.imm = imm;
|
||||
assign decode_if.rs1_is_PC = use_PC;
|
||||
assign decode_if.rs2_is_imm = use_imm;
|
||||
|
||||
assign decode_if.used_regs = (`NUM_REGS'(use_rd) << decode_if.rd)
|
||||
| (`NUM_REGS'(use_rs1) << decode_if.rs1)
|
||||
| (`NUM_REGS'(use_rs2) << decode_if.rs2)
|
||||
| (`NUM_REGS'(use_rs3) << decode_if.rs3);
|
||||
|
||||
assign decode_if.imm = (is_lui || is_auipc) ? {upper_imm, 12'(0)} :
|
||||
(is_jal || is_jalr || is_jals) ? jalx_offset :
|
||||
is_csr ? 32'(u_12) :
|
||||
src2_imm;
|
||||
|
||||
assign decode_if.rs1_is_PC = is_auipc || is_btype || is_jal || is_jals;
|
||||
assign decode_if.rs2_is_imm = is_itype || is_lui || is_auipc || is_csr_imm || is_br;
|
||||
|
||||
wire [`MOD_BITS-1:0] alu_mod = {1'b0, is_mul, is_br};
|
||||
assign decode_if.op_mod = is_fpu_no_mem ? frm : alu_mod;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
wire decode_fire_unqual = ifetch_rsp_if.valid && decode_if.ready;
|
||||
wire ifetch_rsp_fire = ifetch_rsp_if.valid && ifetch_rsp_if.ready;
|
||||
|
||||
assign join_if.valid = decode_fire_unqual && is_gpu && (gpu_op == `GPU_JOIN);
|
||||
assign join_if.valid = ifetch_rsp_fire && is_join;
|
||||
assign join_if.wid = ifetch_rsp_if.wid;
|
||||
|
||||
assign wstall_if.valid = decode_fire_unqual && (is_btype
|
||||
|| is_jal
|
||||
|| is_jalr
|
||||
|| (is_gpu && (gpu_op == `GPU_TMC
|
||||
|| gpu_op == `GPU_SPLIT
|
||||
|| gpu_op == `GPU_BAR)));
|
||||
assign wstall_if.valid = ifetch_rsp_fire && is_wstall;
|
||||
assign wstall_if.wid = ifetch_rsp_if.wid;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
assign ifetch_rsp_if.ready = decode_if.ready;
|
||||
|
||||
`ifdef DBG_PRINT_PIPELINE
|
||||
@@ -405,7 +418,7 @@ module VX_decode #(
|
||||
print_ex_type(decode_if.ex_type);
|
||||
$write(", op=");
|
||||
print_ex_op(decode_if.ex_type, decode_if.op_type, decode_if.op_mod);
|
||||
$write(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b\n", decode_if.op_mod, decode_if.tmask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm);
|
||||
$write(", mod=%0d, tmask=%b, wb=%b, rd=%0d, rs1=%0d, rs2=%0d, rs3=%0d, imm=%0h, use_pc=%b, use_imm=%b, use_regs=%b\n", decode_if.op_mod, decode_if.tmask, decode_if.wb, decode_if.rd, decode_if.rs1, decode_if.rs2, decode_if.rs3, decode_if.imm, decode_if.rs1_is_PC, decode_if.rs2_is_imm, decode_if.used_regs);
|
||||
end
|
||||
end
|
||||
`endif
|
||||
|
||||
@@ -132,12 +132,22 @@
|
||||
`define IS_DIV_OP(x) x[2]
|
||||
`define IS_MUL_MOD(x) x[1]
|
||||
|
||||
`define LSU_SB 3'h0
|
||||
`define LSU_SH 3'h1
|
||||
`define LSU_SW 3'h2
|
||||
`define LSU_UB 3'h4
|
||||
`define LSU_UH 3'h5
|
||||
`define LSU_BITS 3
|
||||
`define FMT_B 3'b000
|
||||
`define FMT_H 3'b001
|
||||
`define FMT_W 3'b010
|
||||
`define FMT_BU 3'b100
|
||||
`define FMT_HU 3'b101
|
||||
|
||||
`define LSU_LB 4'b0000
|
||||
`define LSU_LH 4'b0001
|
||||
`define LSU_LW 4'b0010
|
||||
`define LSU_LBU 4'b0100
|
||||
`define LSU_LHU 4'b0101
|
||||
`define LSU_SB 4'b1000
|
||||
`define LSU_SH 4'b1001
|
||||
`define LSU_SW 4'b1010
|
||||
`define LSU_BITS 4
|
||||
`define LSU_FMT(x) x[2:0]
|
||||
`define LSU_WSIZE(x) x[1:0]
|
||||
`define LSU_OP(x) x[`LSU_BITS-1:0]
|
||||
|
||||
@@ -149,21 +159,21 @@
|
||||
`define CSR_OP(x) x[`CSR_BITS-1:0]
|
||||
|
||||
`define FPU_ADD 4'h0
|
||||
`define FPU_SUB 4'h1
|
||||
`define FPU_MUL 4'h2
|
||||
`define FPU_DIV 4'h3
|
||||
`define FPU_SQRT 4'h4
|
||||
`define FPU_MADD 4'h5
|
||||
`define FPU_MSUB 4'h6
|
||||
`define FPU_NMSUB 4'h7
|
||||
`define FPU_NMADD 4'h8
|
||||
`define FPU_CVTWS 4'h9 // FCVT.W.S
|
||||
`define FPU_CVTWUS 4'hA // FCVT.WU.S
|
||||
`define FPU_CVTSW 4'hB // FCVT.S.W
|
||||
`define FPU_CVTSWU 4'hC // FCVT.S.WU
|
||||
`define FPU_CLASS 4'hD
|
||||
`define FPU_CMP 4'hE
|
||||
`define FPU_MISC 4'hF // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
|
||||
`define FPU_SUB 4'h4
|
||||
`define FPU_MUL 4'h8
|
||||
`define FPU_DIV 4'hC
|
||||
`define FPU_CVTWS 4'h1 // FCVT.W.S
|
||||
`define FPU_CVTWUS 4'h5 // FCVT.WU.S
|
||||
`define FPU_CVTSW 4'h9 // FCVT.S.W
|
||||
`define FPU_CVTSWU 4'hD // FCVT.S.WU
|
||||
`define FPU_SQRT 4'h2
|
||||
`define FPU_CLASS 4'h6
|
||||
`define FPU_CMP 4'hA
|
||||
`define FPU_MISC 4'hE // SGNJ, SGNJN, SGNJX, FMIN, FMAX, MVXW, MVWX
|
||||
`define FPU_MADD 4'h3
|
||||
`define FPU_MSUB 4'h7
|
||||
`define FPU_NMSUB 4'hB
|
||||
`define FPU_NMADD 4'hF
|
||||
`define FPU_BITS 4
|
||||
`define FPU_OP(x) x[`FPU_BITS-1:0]
|
||||
|
||||
|
||||
@@ -14,8 +14,8 @@ module VX_ibuffer #(
|
||||
VX_decode_if ibuf_deq_if
|
||||
);
|
||||
localparam DATAW = `NUM_THREADS + 32 + `EX_BITS + `OP_BITS + `FRM_BITS + 1 + (`NR_BITS * 4) + 32 + 1 + 1 + `NUM_REGS;
|
||||
localparam SIZE = `IBUF_SIZE;
|
||||
localparam ADDRW = $clog2(SIZE+1);
|
||||
localparam SIZE = 3;
|
||||
localparam ADDRW = $clog2(SIZE);
|
||||
localparam NWARPSW = $clog2(`NUM_WARPS+1);
|
||||
|
||||
reg [`NUM_WARPS-1:0][ADDRW-1:0] used_r;
|
||||
@@ -39,22 +39,17 @@ module VX_ibuffer #(
|
||||
wire push = writing && !is_slot0;
|
||||
wire pop = reading && !alm_empty_r[i];
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (SIZE),
|
||||
.BUFFERED (1)
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW)
|
||||
) queue (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (push),
|
||||
.pop (pop),
|
||||
.data_in (q_data_in),
|
||||
.data_out (q_data_prev[i]),
|
||||
`UNUSED_PIN (empty),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (alm_full),
|
||||
`UNUSED_PIN (size)
|
||||
.valid_in (push),
|
||||
.data_in (q_data_in),
|
||||
.ready_out(pop),
|
||||
.data_out (q_data_prev[i]),
|
||||
`UNUSED_PIN (ready_in),
|
||||
`UNUSED_PIN (valid_out)
|
||||
);
|
||||
|
||||
always @(posedge clk) begin
|
||||
@@ -69,7 +64,7 @@ module VX_ibuffer #(
|
||||
empty_r[i] <= 0;
|
||||
if (used_r[i] == 1)
|
||||
alm_empty_r[i] <= 0;
|
||||
if (used_r[i] == ADDRW'(SIZE))
|
||||
if (used_r[i] == ADDRW'(SIZE-1))
|
||||
full_r[i] <= 1;
|
||||
end
|
||||
end else if (reading) begin
|
||||
|
||||
@@ -38,8 +38,7 @@ module VX_instr_demux (
|
||||
wire alu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_ALU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `ALU_BITS + `MOD_BITS + 32 + 1 + 1 + `NR_BITS + 1 + `NT_BITS + (2 * `NUM_THREADS * 32))
|
||||
) alu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
@@ -56,8 +55,7 @@ module VX_instr_demux (
|
||||
wire lsu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_LSU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `LSU_BITS + 32 + `NR_BITS + 1 + (2 * `NUM_THREADS * 32))
|
||||
) lsu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
@@ -74,8 +72,7 @@ module VX_instr_demux (
|
||||
wire csr_req_valid = execute_if.valid && (execute_if.ex_type == `EX_CSR);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32),
|
||||
.BUFFERED (1)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `CSR_BITS + `CSR_ADDR_BITS + `NR_BITS + 1 + 1 + `NR_BITS + 32)
|
||||
) csr_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
@@ -93,8 +90,7 @@ module VX_instr_demux (
|
||||
wire fpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_FPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32)),
|
||||
.BUFFERED (1)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + `FPU_BITS + `MOD_BITS + `NR_BITS + 1 + (3 * `NUM_THREADS * 32))
|
||||
) fpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
@@ -115,8 +111,7 @@ module VX_instr_demux (
|
||||
wire gpu_req_valid = execute_if.valid && (execute_if.ex_type == `EX_GPU);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32)),
|
||||
.BUFFERED (1)
|
||||
.DATAW (`NW_BITS + `NUM_THREADS + 32 + 32 + `GPU_BITS + `NR_BITS + 1 + (`NUM_THREADS * 32 + 32))
|
||||
) gpu_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -121,14 +121,14 @@ module VX_issue #(
|
||||
`SCOPE_ASSIGN (writeback_eop, writeback_if.eop);
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
reg [63:0] perf_ibf_stalls;
|
||||
reg [63:0] perf_scb_stalls;
|
||||
reg [63:0] perf_alu_stalls;
|
||||
reg [63:0] perf_lsu_stalls;
|
||||
reg [63:0] perf_csr_stalls;
|
||||
reg [63:0] perf_gpu_stalls;
|
||||
reg [43:0] perf_ibf_stalls;
|
||||
reg [43:0] perf_scb_stalls;
|
||||
reg [43:0] perf_alu_stalls;
|
||||
reg [43:0] perf_lsu_stalls;
|
||||
reg [43:0] perf_csr_stalls;
|
||||
reg [43:0] perf_gpu_stalls;
|
||||
`ifdef EXT_F_ENABLE
|
||||
reg [63:0] perf_fpu_stalls;
|
||||
reg [43:0] perf_fpu_stalls;
|
||||
`endif
|
||||
|
||||
always @(posedge clk) begin
|
||||
@@ -144,26 +144,26 @@ module VX_issue #(
|
||||
`endif
|
||||
end else begin
|
||||
if (decode_if.valid & !decode_if.ready) begin
|
||||
perf_ibf_stalls <= perf_ibf_stalls + 64'd1;
|
||||
perf_ibf_stalls <= perf_ibf_stalls + 44'd1;
|
||||
end
|
||||
if (ibuf_deq_if.valid & scoreboard_delay) begin
|
||||
perf_scb_stalls <= perf_scb_stalls + 64'd1;
|
||||
perf_scb_stalls <= perf_scb_stalls + 44'd1;
|
||||
end
|
||||
if (alu_req_if.valid & !alu_req_if.ready) begin
|
||||
perf_alu_stalls <= perf_alu_stalls + 64'd1;
|
||||
perf_alu_stalls <= perf_alu_stalls + 44'd1;
|
||||
end
|
||||
if (lsu_req_if.valid & !lsu_req_if.ready) begin
|
||||
perf_lsu_stalls <= perf_lsu_stalls + 64'd1;
|
||||
perf_lsu_stalls <= perf_lsu_stalls + 44'd1;
|
||||
end
|
||||
if (csr_req_if.valid & !csr_req_if.ready) begin
|
||||
perf_csr_stalls <= perf_csr_stalls + 64'd1;
|
||||
perf_csr_stalls <= perf_csr_stalls + 44'd1;
|
||||
end
|
||||
if (gpu_req_if.valid & !gpu_req_if.ready) begin
|
||||
perf_gpu_stalls <= perf_gpu_stalls + 64'd1;
|
||||
perf_gpu_stalls <= perf_gpu_stalls + 44'd1;
|
||||
end
|
||||
`ifdef EXT_F_ENABLE
|
||||
if (fpu_req_if.valid & !fpu_req_if.ready) begin
|
||||
perf_fpu_stalls <= perf_fpu_stalls + 64'd1;
|
||||
perf_fpu_stalls <= perf_fpu_stalls + 44'd1;
|
||||
end
|
||||
`endif
|
||||
end
|
||||
|
||||
@@ -69,6 +69,8 @@ module VX_lsu_unit #(
|
||||
wire rsp_wb;
|
||||
wire [`LSU_BITS-1:0] rsp_type;
|
||||
wire rsp_is_dup;
|
||||
|
||||
`UNUSED_VAR (rsp_type)
|
||||
|
||||
reg [`LSUQ_SIZE-1:0][`NUM_THREADS-1:0] rsp_rem_mask;
|
||||
reg [`NUM_THREADS-1:0] rsp_rem_mask_n;
|
||||
@@ -220,11 +222,11 @@ module VX_lsu_unit #(
|
||||
end
|
||||
|
||||
always @(*) begin
|
||||
case (rsp_type)
|
||||
`LSU_SB: rsp_data[i] = 32'(signed'(rsp_data_shifted[7:0]));
|
||||
`LSU_SH: rsp_data[i] = 32'(signed'(rsp_data_shifted[15:0]));
|
||||
`LSU_UB: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[7:0]));
|
||||
`LSU_UH: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[15:0]));
|
||||
case (`LSU_FMT(rsp_type))
|
||||
`FMT_B: rsp_data[i] = 32'(signed'(rsp_data_shifted[7:0]));
|
||||
`FMT_H: rsp_data[i] = 32'(signed'(rsp_data_shifted[15:0]));
|
||||
`FMT_BU: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[7:0]));
|
||||
`FMT_HU: rsp_data[i] = 32'(unsigned'(rsp_data_shifted[15:0]));
|
||||
default: rsp_data[i] = rsp_data_shifted;
|
||||
endcase
|
||||
end
|
||||
|
||||
@@ -323,19 +323,22 @@ end else begin
|
||||
assign perf_memsys_if.smem_bank_stalls = 0;
|
||||
end
|
||||
|
||||
reg [63:0] perf_dram_lat_per_cycle;
|
||||
reg [43:0] perf_dram_lat_per_cycle;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
perf_dram_lat_per_cycle <= 0;
|
||||
end else begin
|
||||
perf_dram_lat_per_cycle <= perf_dram_lat_per_cycle +
|
||||
64'($signed(2'((dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready) && !(dram_rsp_if.valid && dram_rsp_if.ready)) -
|
||||
44'($signed(2'((dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready) && !(dram_rsp_if.valid && dram_rsp_if.ready)) -
|
||||
2'((dram_rsp_if.valid && dram_rsp_if.ready) && !(dram_req_if.valid && !dram_req_if.rw && dram_req_if.ready))));
|
||||
end
|
||||
end
|
||||
|
||||
reg [63:0] perf_dram_reads, perf_dram_writes, perf_dram_lat, perf_dram_stalls;
|
||||
reg [43:0] perf_dram_reads;
|
||||
reg [43:0] perf_dram_writes;
|
||||
reg [43:0] perf_dram_lat;
|
||||
reg [43:0] perf_dram_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
@@ -345,13 +348,13 @@ end
|
||||
perf_dram_stalls <= 0;
|
||||
end else begin
|
||||
if (dram_req_if.valid && dram_req_if.ready && !dram_req_if.rw) begin
|
||||
perf_dram_reads <= perf_dram_reads + 64'd1;
|
||||
perf_dram_reads <= perf_dram_reads + 44'd1;
|
||||
end
|
||||
if (dram_req_if.valid && dram_req_if.ready && dram_req_if.rw) begin
|
||||
perf_dram_writes <= perf_dram_writes + 64'd1;
|
||||
perf_dram_writes <= perf_dram_writes + 44'd1;
|
||||
end
|
||||
if (dram_req_if.valid && !dram_req_if.ready) begin
|
||||
perf_dram_stalls <= perf_dram_stalls + 64'd1;
|
||||
perf_dram_stalls <= perf_dram_stalls + 44'd1;
|
||||
end
|
||||
perf_dram_lat <= perf_dram_lat + perf_dram_lat_per_cycle;
|
||||
end
|
||||
|
||||
@@ -72,11 +72,14 @@ task print_ex_op (
|
||||
end
|
||||
`EX_LSU: begin
|
||||
case (`LSU_BITS'(op_type))
|
||||
`LSU_LB: $write("LB");
|
||||
`LSU_LH: $write("LH");
|
||||
`LSU_LW: $write("LW");
|
||||
`LSU_LBU:$write("LBU");
|
||||
`LSU_LHU:$write("LHU");
|
||||
`LSU_SB: $write("SB");
|
||||
`LSU_SH: $write("SH");
|
||||
`LSU_SW: $write("SW");
|
||||
`LSU_UB: $write("UB");
|
||||
`LSU_UH: $write("UH");
|
||||
default: $write("?");
|
||||
endcase
|
||||
end
|
||||
|
||||
3
hw/rtl/cache/VX_bank.v
vendored
3
hw/rtl/cache/VX_bank.v
vendored
@@ -488,8 +488,7 @@ module VX_bank #(
|
||||
end
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS),
|
||||
.BUFFERED (1)
|
||||
.DATAW (CORE_TAG_WIDTH + (1 + `WORD_WIDTH + `REQS_BITS) * NUM_PORTS)
|
||||
) core_rsp_req (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
31
hw/rtl/cache/VX_cache.v
vendored
31
hw/rtl/cache/VX_cache.v
vendored
@@ -399,7 +399,8 @@ module VX_cache #(
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
// per cycle: core_reads, core_writes
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle, perf_core_writes_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_reads_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_core_writes_per_cycle;
|
||||
reg [($clog2(NUM_REQS+1)-1):0] perf_crsp_stall_per_cycle;
|
||||
|
||||
assign perf_core_reads_per_cycle = $countones(core_req_valid & core_req_ready & ~core_req_rw);
|
||||
@@ -422,13 +423,13 @@ module VX_cache #(
|
||||
assign perf_mshr_stall_per_cycle = $countones(perf_mshr_stall_per_bank);
|
||||
assign perf_pipe_stall_per_cycle = $countones(perf_pipe_stall_per_bank);
|
||||
|
||||
reg [63:0] perf_core_reads;
|
||||
reg [63:0] perf_core_writes;
|
||||
reg [63:0] perf_read_misses;
|
||||
reg [63:0] perf_write_misses;
|
||||
reg [63:0] perf_mshr_stalls;
|
||||
reg [63:0] perf_pipe_stalls;
|
||||
reg [63:0] perf_crsp_stalls;
|
||||
reg [43:0] perf_core_reads;
|
||||
reg [43:0] perf_core_writes;
|
||||
reg [43:0] perf_read_misses;
|
||||
reg [43:0] perf_write_misses;
|
||||
reg [43:0] perf_mshr_stalls;
|
||||
reg [43:0] perf_pipe_stalls;
|
||||
reg [43:0] perf_crsp_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
@@ -440,13 +441,13 @@ module VX_cache #(
|
||||
perf_pipe_stalls <= 0;
|
||||
perf_crsp_stalls <= 0;
|
||||
end else begin
|
||||
perf_core_reads <= perf_core_reads + 64'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + 64'(perf_core_writes_per_cycle);
|
||||
perf_read_misses <= perf_read_misses + 64'(perf_read_miss_per_cycle);
|
||||
perf_write_misses <= perf_write_misses+ 64'(perf_write_miss_per_cycle);
|
||||
perf_mshr_stalls <= perf_mshr_stalls + 64'(perf_mshr_stall_per_cycle);
|
||||
perf_pipe_stalls <= perf_pipe_stalls + 64'(perf_pipe_stall_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + 64'(perf_crsp_stall_per_cycle);
|
||||
perf_core_reads <= perf_core_reads + 44'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + 44'(perf_core_writes_per_cycle);
|
||||
perf_read_misses <= perf_read_misses + 44'(perf_read_miss_per_cycle);
|
||||
perf_write_misses <= perf_write_misses+ 44'(perf_write_miss_per_cycle);
|
||||
perf_mshr_stalls <= perf_mshr_stalls + 44'(perf_mshr_stall_per_cycle);
|
||||
perf_pipe_stalls <= perf_pipe_stalls + 44'(perf_pipe_stall_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + 44'(perf_crsp_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
6
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
6
hw/rtl/cache/VX_cache_core_req_bank_sel.v
vendored
@@ -22,7 +22,7 @@ module VX_cache_core_req_bank_sel #(
|
||||
input wire reset,
|
||||
|
||||
`ifdef PERF_ENABLE
|
||||
output wire [63:0] bank_stalls,
|
||||
output wire [43:0] bank_stalls,
|
||||
`endif
|
||||
|
||||
input wire [NUM_REQS-1:0] core_req_valid,
|
||||
@@ -303,13 +303,13 @@ module VX_cache_core_req_bank_sel #(
|
||||
end
|
||||
end
|
||||
|
||||
reg [63:0] bank_stalls_r;
|
||||
reg [43:0] bank_stalls_r;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
bank_stalls_r <= 0;
|
||||
end else begin
|
||||
bank_stalls_r <= bank_stalls_r + 64'($countones(core_req_sel_r & ~core_req_ready));
|
||||
bank_stalls_r <= bank_stalls_r + 44'($countones(core_req_sel_r & ~core_req_ready));
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
6
hw/rtl/cache/VX_cache_core_rsp_merge.v
vendored
@@ -98,8 +98,7 @@ module VX_cache_core_rsp_merge #(
|
||||
wire core_rsp_valid_any = (| per_bank_core_rsp_valid);
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH)),
|
||||
.BUFFERED (1)
|
||||
.DATAW (NUM_REQS + CORE_TAG_WIDTH + (NUM_REQS *`WORD_WIDTH))
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
@@ -147,8 +146,7 @@ module VX_cache_core_rsp_merge #(
|
||||
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH),
|
||||
.BUFFERED (1)
|
||||
.DATAW (CORE_TAG_WIDTH + `WORD_WIDTH)
|
||||
) pipe_reg (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
15
hw/rtl/cache/VX_shared_mem.v
vendored
15
hw/rtl/cache/VX_shared_mem.v
vendored
@@ -205,8 +205,7 @@ module VX_shared_mem #(
|
||||
wire crsq_in_valid = ~creq_empty && ~core_rsp_rw;
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH),
|
||||
.BUFFERED (1)
|
||||
.DATAW (NUM_BANKS * (1 + `WORD_WIDTH) + CORE_TAG_WIDTH)
|
||||
) core_rsp_req (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
@@ -248,9 +247,9 @@ module VX_shared_mem #(
|
||||
assign perf_crsp_stall_per_cycle = $countones(core_rsp_valid & ~core_rsp_ready);
|
||||
end
|
||||
|
||||
reg [63:0] perf_core_reads;
|
||||
reg [63:0] perf_core_writes;
|
||||
reg [63:0] perf_crsp_stalls;
|
||||
reg [43:0] perf_core_reads;
|
||||
reg [43:0] perf_core_writes;
|
||||
reg [43:0] perf_crsp_stalls;
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
@@ -258,9 +257,9 @@ module VX_shared_mem #(
|
||||
perf_core_writes <= 0;
|
||||
perf_crsp_stalls <= 0;
|
||||
end else begin
|
||||
perf_core_reads <= perf_core_reads + 64'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + 64'(perf_core_writes_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + 64'(perf_crsp_stall_per_cycle);
|
||||
perf_core_reads <= perf_core_reads + 44'(perf_core_reads_per_cycle);
|
||||
perf_core_writes <= perf_core_writes + 44'(perf_core_writes_per_cycle);
|
||||
perf_crsp_stalls <= perf_crsp_stalls + 44'(perf_crsp_stall_per_cycle);
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -5,14 +5,14 @@
|
||||
|
||||
interface VX_perf_cache_if ();
|
||||
|
||||
wire [63:0] reads;
|
||||
wire [63:0] writes;
|
||||
wire [63:0] read_misses;
|
||||
wire [63:0] write_misses;
|
||||
wire [63:0] bank_stalls;
|
||||
wire [63:0] mshr_stalls;
|
||||
wire [63:0] pipe_stalls;
|
||||
wire [63:0] crsp_stalls;
|
||||
wire [43:0] reads;
|
||||
wire [43:0] writes;
|
||||
wire [43:0] read_misses;
|
||||
wire [43:0] write_misses;
|
||||
wire [43:0] bank_stalls;
|
||||
wire [43:0] mshr_stalls;
|
||||
wire [43:0] pipe_stalls;
|
||||
wire [43:0] crsp_stalls;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -5,28 +5,28 @@
|
||||
|
||||
interface VX_perf_memsys_if ();
|
||||
|
||||
wire [63:0] icache_reads;
|
||||
wire [63:0] icache_read_misses;
|
||||
wire [63:0] icache_pipe_stalls;
|
||||
wire [63:0] icache_crsp_stalls;
|
||||
wire [43:0] icache_reads;
|
||||
wire [43:0] icache_read_misses;
|
||||
wire [43:0] icache_pipe_stalls;
|
||||
wire [43:0] icache_crsp_stalls;
|
||||
|
||||
wire [63:0] dcache_reads;
|
||||
wire [63:0] dcache_writes;
|
||||
wire [63:0] dcache_read_misses;
|
||||
wire [63:0] dcache_write_misses;
|
||||
wire [63:0] dcache_bank_stalls;
|
||||
wire [63:0] dcache_mshr_stalls;
|
||||
wire [63:0] dcache_pipe_stalls;
|
||||
wire [63:0] dcache_crsp_stalls;
|
||||
wire [43:0] dcache_reads;
|
||||
wire [43:0] dcache_writes;
|
||||
wire [43:0] dcache_read_misses;
|
||||
wire [43:0] dcache_write_misses;
|
||||
wire [43:0] dcache_bank_stalls;
|
||||
wire [43:0] dcache_mshr_stalls;
|
||||
wire [43:0] dcache_pipe_stalls;
|
||||
wire [43:0] dcache_crsp_stalls;
|
||||
|
||||
wire [63:0] smem_reads;
|
||||
wire [63:0] smem_writes;
|
||||
wire [63:0] smem_bank_stalls;
|
||||
wire [43:0] smem_reads;
|
||||
wire [43:0] smem_writes;
|
||||
wire [43:0] smem_bank_stalls;
|
||||
|
||||
wire [63:0] dram_reads;
|
||||
wire [63:0] dram_writes;
|
||||
wire [63:0] dram_stalls;
|
||||
wire [63:0] dram_latency;
|
||||
wire [43:0] dram_reads;
|
||||
wire [43:0] dram_writes;
|
||||
wire [43:0] dram_stalls;
|
||||
wire [43:0] dram_latency;
|
||||
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -4,14 +4,14 @@
|
||||
`include "VX_define.vh"
|
||||
|
||||
interface VX_perf_pipeline_if ();
|
||||
wire [63:0] ibf_stalls;
|
||||
wire [63:0] scb_stalls;
|
||||
wire [63:0] lsu_stalls;
|
||||
wire [63:0] csr_stalls;
|
||||
wire [63:0] alu_stalls;
|
||||
wire [63:0] gpu_stalls;
|
||||
wire [43:0] ibf_stalls;
|
||||
wire [43:0] scb_stalls;
|
||||
wire [43:0] lsu_stalls;
|
||||
wire [43:0] csr_stalls;
|
||||
wire [43:0] alu_stalls;
|
||||
wire [43:0] gpu_stalls;
|
||||
`ifdef EXT_F_ENABLE
|
||||
wire [63:0] fpu_stalls;
|
||||
wire [43:0] fpu_stalls;
|
||||
`endif
|
||||
endinterface
|
||||
|
||||
|
||||
@@ -93,9 +93,10 @@ module VX_fifo_queue #(
|
||||
end
|
||||
if (SIZE > 2) begin
|
||||
used_r <= used_r + ADDRW'($signed(2'(push) - 2'(pop)));
|
||||
end else begin // (SIZE == 2);
|
||||
end else begin
|
||||
// (SIZE == 2);
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
used_r <= used_r ^ (push ^ pop);
|
||||
used_r <= used_r ^ (push ^ pop);
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
end
|
||||
@@ -105,7 +106,7 @@ module VX_fifo_queue #(
|
||||
|
||||
if (0 == BUFFERED) begin
|
||||
|
||||
reg [1:0][DATAW-1:0] shift_reg;
|
||||
reg [DATAW-1:0] shift_reg [1:0];
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
|
||||
@@ -94,33 +94,43 @@ module VX_skid_buffer #(
|
||||
|
||||
end else begin
|
||||
|
||||
wire q_push = valid_in && ready_in;
|
||||
wire q_pop = valid_out && ready_out;
|
||||
reg [DATAW-1:0] shift_reg [1:0];
|
||||
reg valid_out_r, ready_in_r, rd_ptr_r;
|
||||
|
||||
wire q_empty, q_full;
|
||||
wire push = valid_in && ready_in;
|
||||
wire pop = valid_out_r && ready_out;
|
||||
|
||||
VX_fifo_queue #(
|
||||
.DATAW (DATAW),
|
||||
.SIZE (2),
|
||||
.BUFFERED (BUFFERED),
|
||||
.FASTRAM (FASTRAM)
|
||||
) fifo (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
.push (q_push),
|
||||
.pop (q_pop),
|
||||
.data_in (data_in),
|
||||
.data_out (data_out),
|
||||
.empty (q_empty),
|
||||
.alm_full (q_full),
|
||||
`UNUSED_PIN (full),
|
||||
`UNUSED_PIN (alm_empty),
|
||||
`UNUSED_PIN (size)
|
||||
);
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
valid_out_r <= 0;
|
||||
ready_in_r <= 1;
|
||||
rd_ptr_r <= 1;
|
||||
end else begin
|
||||
if (push) begin
|
||||
if (!pop) begin
|
||||
ready_in_r <= rd_ptr_r;
|
||||
valid_out_r <= 1;
|
||||
end
|
||||
end else if (pop) begin
|
||||
ready_in_r <= 1;
|
||||
valid_out_r <= rd_ptr_r;
|
||||
end
|
||||
`IGNORE_WARNINGS_BEGIN
|
||||
rd_ptr_r <= rd_ptr_r ^ (push ^ pop);
|
||||
`IGNORE_WARNINGS_END
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = !q_full;
|
||||
assign valid_out = !q_empty;
|
||||
always @(posedge clk) begin
|
||||
if (push) begin
|
||||
shift_reg[1] <= shift_reg[0];
|
||||
shift_reg[0] <= data_in;
|
||||
end
|
||||
end
|
||||
|
||||
assign ready_in = ready_in_r;
|
||||
assign valid_out = valid_out_r;
|
||||
assign data_out = shift_reg[rd_ptr_r];
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -92,8 +92,7 @@ module VX_stream_arbiter #(
|
||||
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.PASSTHRU (!BUFFERED),
|
||||
.BUFFERED (1)
|
||||
.PASSTHRU (!BUFFERED)
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -40,8 +40,7 @@ module VX_stream_demux #(
|
||||
for (genvar i = 0; i < NUM_REQS; i++) begin
|
||||
VX_skid_buffer #(
|
||||
.DATAW (DATAW),
|
||||
.PASSTHRU (!BUFFERED),
|
||||
.BUFFERED (1)
|
||||
.PASSTHRU (!BUFFERED)
|
||||
) out_buffer (
|
||||
.clk (clk),
|
||||
.reset (reset),
|
||||
|
||||
@@ -13,7 +13,7 @@ RTL_DIR = ../hw/rtl
|
||||
|
||||
PROJECT = simX
|
||||
|
||||
SRCS = util.cpp args.cpp mem.cpp core.cpp warp.cpp instr.cpp decode.cpp execute.cpp simX.cpp
|
||||
SRCS = util.cpp args.cpp mem.cpp warp.cpp core.cpp decode.cpp execute.cpp main.cpp
|
||||
|
||||
# Debugigng
|
||||
ifdef DEBUG
|
||||
@@ -27,6 +27,9 @@ all: $(PROJECT)
|
||||
$(PROJECT): $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) $^ $(LDFLAGS) -o $@
|
||||
|
||||
run: $(PROJECT)
|
||||
./$(PROJECT)
|
||||
|
||||
.depend: $(SRCS)
|
||||
$(CXX) $(CXXFLAGS) -MM $^ > .depend;
|
||||
|
||||
|
||||
160
simX/archdef.h
160
simX/archdef.h
@@ -11,156 +11,56 @@ namespace vortex {
|
||||
|
||||
class ArchDef {
|
||||
public:
|
||||
struct Undefined {};
|
||||
|
||||
ArchDef(const std::string &s,
|
||||
ArchDef(const std::string &/*arch*/,
|
||||
int num_cores,
|
||||
int num_warps,
|
||||
int num_threads) {
|
||||
std::istringstream iss(s.c_str());
|
||||
wordSize_ = 4;
|
||||
encChar_ = 'w';
|
||||
numRegs_ = 32;
|
||||
numPRegs_ = 0;
|
||||
numCores_ = num_cores;
|
||||
numWarps_ = num_warps;
|
||||
numThreads_ = num_threads;
|
||||
extent_ = EXT_END;
|
||||
int num_threads) {
|
||||
wsize_ = 4;
|
||||
vsize_ = 16;
|
||||
num_regs_ = 32;
|
||||
num_csrs_ = 4096;
|
||||
num_cores_ = num_cores;
|
||||
num_warps_ = num_warps;
|
||||
num_threads_ = num_threads;
|
||||
}
|
||||
|
||||
operator std::string () const {
|
||||
if (extent_ == EXT_NULL)
|
||||
return "";
|
||||
|
||||
std::ostringstream oss;
|
||||
if (extent_ >= EXT_WORDSIZE) oss << wordSize_;
|
||||
if (extent_ >= EXT_ENC ) oss << encChar_;
|
||||
if (extent_ >= EXT_REGS ) oss << numRegs_;
|
||||
if (extent_ >= EXT_PREGS ) oss << '/' << numPRegs_;
|
||||
if (extent_ >= EXT_THREADS ) oss << '/' << numThreads_;
|
||||
if (extent_ >= EXT_WARPS ) oss << '/' << numWarps_;
|
||||
if (extent_ >= EXT_CORES ) oss << '/' << numCores_;
|
||||
|
||||
return oss.str();
|
||||
int wsize() const {
|
||||
return wsize_;
|
||||
}
|
||||
|
||||
bool operator==(const ArchDef &r) const {
|
||||
Extent minExtent(r.extent_ > extent_ ? extent_ : r.extent_);
|
||||
|
||||
// Can't be equal if we can't specify a binary encoding at all.
|
||||
if (minExtent < EXT_PREGS)
|
||||
return false;
|
||||
|
||||
if (minExtent >= EXT_WORDSIZE) {
|
||||
if (wordSize_!=r.wordSize_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_ENC) {
|
||||
if (encChar_ != r.encChar_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_REGS) {
|
||||
if (numRegs_ != r.numRegs_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_PREGS) {
|
||||
if (numPRegs_ != r.numPRegs_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_THREADS) {
|
||||
if (numThreads_ != r.numThreads_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_WARPS) {
|
||||
if (numWarps_ != r.numWarps_)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (minExtent >= EXT_CORES) {
|
||||
if (numCores_ != r.numCores_)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
int vsize() const {
|
||||
return vsize_;
|
||||
}
|
||||
|
||||
bool operator!=(const ArchDef &r) const {
|
||||
return !(*this == r);
|
||||
int num_regs() const {
|
||||
return num_regs_;
|
||||
}
|
||||
|
||||
Size getWordSize() const {
|
||||
if (extent_ < EXT_WORDSIZE)
|
||||
throw Undefined();
|
||||
return wordSize_;
|
||||
int num_csrs() const {
|
||||
return num_csrs_;
|
||||
}
|
||||
|
||||
char getEncChar() const {
|
||||
if ((extent_ < EXT_ENC) || (encChar_ == 'x'))
|
||||
throw Undefined();
|
||||
return encChar_;
|
||||
int num_threads() const {
|
||||
return num_threads_;
|
||||
}
|
||||
|
||||
RegNum getNumRegs() const {
|
||||
if (extent_ < EXT_REGS)
|
||||
throw Undefined();
|
||||
return numRegs_;
|
||||
int num_warps() const {
|
||||
return num_warps_;
|
||||
}
|
||||
|
||||
RegNum getNumPRegs() const {
|
||||
if (extent_ < EXT_PREGS)
|
||||
throw Undefined();
|
||||
return numPRegs_;
|
||||
}
|
||||
|
||||
ThdNum getNumThreads() const {
|
||||
if (extent_ < EXT_THREADS)
|
||||
throw Undefined();
|
||||
return numThreads_;
|
||||
}
|
||||
|
||||
ThdNum getNumWarps() const {
|
||||
if (extent_ < EXT_WARPS)
|
||||
throw Undefined();
|
||||
return numWarps_;
|
||||
}
|
||||
|
||||
ThdNum getNumCores() const {
|
||||
if (extent_ < EXT_CORES)
|
||||
throw Undefined();
|
||||
return numCores_;
|
||||
}
|
||||
|
||||
bool is_cpu_mode() const {
|
||||
return cpu_mode_;
|
||||
int num_cores() const {
|
||||
return num_cores_;
|
||||
}
|
||||
|
||||
private:
|
||||
enum Extent {
|
||||
EXT_NULL,
|
||||
EXT_WORDSIZE,
|
||||
EXT_ENC,
|
||||
EXT_REGS,
|
||||
EXT_PREGS,
|
||||
EXT_THREADS,
|
||||
EXT_WARPS,
|
||||
EXT_CORES,
|
||||
EXT_END
|
||||
};
|
||||
|
||||
Extent extent_;
|
||||
Size wordSize_;
|
||||
ThdNum numThreads_;
|
||||
ThdNum numWarps_;
|
||||
ThdNum numCores_;
|
||||
RegNum numRegs_;
|
||||
ThdNum numPRegs_;
|
||||
char encChar_;
|
||||
bool cpu_mode_;
|
||||
int wsize_;
|
||||
int vsize_;
|
||||
int num_regs_;
|
||||
int num_csrs_;
|
||||
int num_threads_;
|
||||
int num_warps_;
|
||||
int num_cores_;
|
||||
};
|
||||
|
||||
}
|
||||
386
simX/core.cpp
386
simX/core.cpp
@@ -1,10 +1,7 @@
|
||||
#include <iostream>
|
||||
#include <iomanip>
|
||||
#include <string.h>
|
||||
|
||||
// #define USE_DEBUG 7
|
||||
// #define PRINT_ACTIVE_THREADS
|
||||
|
||||
#include <assert.h>
|
||||
#include "types.h"
|
||||
#include "util.h"
|
||||
#include "archdef.h"
|
||||
@@ -14,21 +11,25 @@
|
||||
#include "debug.h"
|
||||
|
||||
#define INIT_TRACE(trace_inst) \
|
||||
trace_inst.valid_inst = false; \
|
||||
trace_inst.pc = 0; \
|
||||
trace_inst.valid = false; \
|
||||
trace_inst.PC = 0; \
|
||||
trace_inst.wid = schedule_w_; \
|
||||
trace_inst.rs1 = -1; \
|
||||
trace_inst.rs2 = -1; \
|
||||
trace_inst.rd = -1; \
|
||||
trace_inst.vs1 = -1; \
|
||||
trace_inst.vs2 = -1; \
|
||||
trace_inst.vd = -1; \
|
||||
trace_inst.irs1 = -1; \
|
||||
trace_inst.irs2 = -1; \
|
||||
trace_inst.frs1 = -1; \
|
||||
trace_inst.frs2 = -1; \
|
||||
trace_inst.frs3 = -1; \
|
||||
trace_inst.frd = -1; \
|
||||
trace_inst.ird = -1; \
|
||||
trace_inst.vrs1 = -1; \
|
||||
trace_inst.vrs2 = -1; \
|
||||
trace_inst.vrd = -1; \
|
||||
trace_inst.is_lw = false; \
|
||||
trace_inst.is_sw = false; \
|
||||
if (trace_inst.mem_addresses != NULL) \
|
||||
free(trace_inst.mem_addresses); \
|
||||
trace_inst.mem_addresses = (unsigned *)malloc(32 * sizeof(unsigned)); \
|
||||
for (ThdNum tid = 0; tid < arch_.getNumThreads(); tid++) \
|
||||
for (int tid = 0; tid < arch_.num_threads(); tid++) \
|
||||
trace_inst.mem_addresses[tid] = 0xdeadbeef; \
|
||||
trace_inst.mem_stall_cycles = 0; \
|
||||
trace_inst.fetch_stall_cycles = 0; \
|
||||
@@ -37,18 +38,22 @@
|
||||
trace_inst.stalled = false;
|
||||
|
||||
#define CPY_TRACE(drain, source) \
|
||||
drain.valid_inst = source.valid_inst; \
|
||||
drain.pc = source.pc; \
|
||||
drain.valid = source.valid; \
|
||||
drain.PC = source.PC; \
|
||||
drain.wid = source.wid; \
|
||||
drain.rs1 = source.rs1; \
|
||||
drain.rs2 = source.rs2; \
|
||||
drain.rd = source.rd; \
|
||||
drain.vs1 = source.vs1; \
|
||||
drain.vs2 = source.vs2; \
|
||||
drain.vd = source.vd; \
|
||||
drain.irs1 = source.irs1; \
|
||||
drain.irs2 = source.irs2; \
|
||||
drain.ird = source.ird; \
|
||||
drain.frs1 = source.frs1; \
|
||||
drain.frs2 = source.frs2; \
|
||||
drain.frs3 = source.frs3; \
|
||||
drain.frd = source.frd; \
|
||||
drain.vrs1 = source.vrs1; \
|
||||
drain.vrs2 = source.vrs2; \
|
||||
drain.vrd = source.vrd; \
|
||||
drain.is_lw = source.is_lw; \
|
||||
drain.is_sw = source.is_sw; \
|
||||
for (ThdNum tid = 0; tid < arch_.getNumThreads(); tid++)\
|
||||
for (int tid = 0; tid < arch_.num_threads(); tid++) \
|
||||
drain.mem_addresses[tid] = source.mem_addresses[tid]; \
|
||||
drain.mem_stall_cycles = source.mem_stall_cycles; \
|
||||
drain.fetch_stall_cycles = source.fetch_stall_cycles; \
|
||||
@@ -60,17 +65,17 @@ using namespace vortex;
|
||||
|
||||
void printTrace(trace_inst_t *trace, const char *stage_name) {
|
||||
__unused(trace, stage_name);
|
||||
D(3, stage_name << ": valid=" << trace->valid_inst);
|
||||
D(3, stage_name << ": PC=" << std::hex << trace->pc << std::dec);
|
||||
D(3, stage_name << ": wid=" << trace->wid);
|
||||
D(3, stage_name << ": rd=" << trace->rd << ", rs1=" << trace->rs1 << ", trs2=" << trace->rs2);
|
||||
D(3, stage_name << ": is_lw=" << trace->is_lw);
|
||||
D(3, stage_name << ": is_sw=" << trace->is_sw);
|
||||
D(3, stage_name << ": fetch_stall_cycles=" << trace->fetch_stall_cycles);
|
||||
D(3, stage_name << ": mem_stall_cycles=" << trace->mem_stall_cycles);
|
||||
D(3, stage_name << ": stall_warp=" << trace->stall_warp);
|
||||
D(3, stage_name << ": wspawn=" << trace->wspawn);
|
||||
D(3, stage_name << ": stalled=" << trace->stalled);
|
||||
D(4, stage_name << ": valid=" << trace->valid);
|
||||
D(4, stage_name << ": PC=" << std::hex << trace->PC << std::dec);
|
||||
D(4, stage_name << ": wid=" << trace->wid);
|
||||
D(4, stage_name << ": rd=" << trace->ird << ", rs1=" << trace->irs1 << ", trs2=" << trace->irs2);
|
||||
D(4, stage_name << ": is_lw=" << trace->is_lw);
|
||||
D(4, stage_name << ": is_sw=" << trace->is_sw);
|
||||
D(4, stage_name << ": fetch_stall_cycles=" << trace->fetch_stall_cycles);
|
||||
D(4, stage_name << ": mem_stall_cycles=" << trace->mem_stall_cycles);
|
||||
D(4, stage_name << ": stall_warp=" << trace->stall_warp);
|
||||
D(4, stage_name << ": wspawn=" << trace->wspawn);
|
||||
D(4, stage_name << ": stalled=" << trace->stalled);
|
||||
}
|
||||
|
||||
Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
|
||||
@@ -79,8 +84,7 @@ Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
|
||||
, decoder_(decoder)
|
||||
, mem_(mem)
|
||||
, steps_(0)
|
||||
, num_instructions_(0) {
|
||||
release_warp_ = false;
|
||||
, num_insts_(0) {
|
||||
foundSchedule_ = true;
|
||||
schedule_w_ = 0;
|
||||
|
||||
@@ -98,28 +102,17 @@ Core::Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id)
|
||||
INIT_TRACE(inst_in_lsu_);
|
||||
INIT_TRACE(inst_in_wb_);
|
||||
|
||||
for (int i = 0; i < 32; i++) {
|
||||
stalled_warps_[i] = false;
|
||||
for (int j = 0; j < 32; j++) {
|
||||
renameTable_[i][j] = true;
|
||||
}
|
||||
iRenameTable_.resize(arch.num_warps(), std::vector<bool>(arch.num_regs(), false));
|
||||
fRenameTable_.resize(arch.num_warps(), std::vector<bool>(arch.num_regs(), false));
|
||||
vRenameTable_.resize(arch.num_regs(), false);
|
||||
|
||||
stalled_warps_.resize(arch.num_warps(), false);
|
||||
|
||||
for (int i = 0; i < arch_.num_warps(); ++i) {
|
||||
warps_.emplace_back(this, i);
|
||||
}
|
||||
|
||||
for (int i = 0; i < 32; i++) {
|
||||
vecRenameTable_[i] = true;
|
||||
}
|
||||
|
||||
for (unsigned i = 0; i < arch_.getNumWarps(); ++i) {
|
||||
warps_.push_back(Warp(this, i));
|
||||
}
|
||||
|
||||
warps_[0].setActiveThreads(1);
|
||||
warps_[0].setSpawned(true);
|
||||
}
|
||||
|
||||
bool Core::interrupt(Word r0) {
|
||||
warps_[0].interrupt(r0);
|
||||
return false;
|
||||
warps_[0].setTmask(0, true);
|
||||
}
|
||||
|
||||
Core::~Core() {
|
||||
@@ -130,32 +123,20 @@ void Core::step() {
|
||||
D(3, "###########################################################");
|
||||
|
||||
steps_++;
|
||||
D(3, "cycle: " << steps_);
|
||||
D(3, std::dec << "Core" << id_ << ": cycle: " << steps_);
|
||||
|
||||
DPH(3, "stalled warps:");
|
||||
for (ThdNum widd = 0; widd < arch_.getNumWarps(); widd++) {
|
||||
DPN(3, " " << stalled_warps_[widd]);
|
||||
for (int i = 0; i < arch_.num_warps(); i++) {
|
||||
DPN(3, " " << stalled_warps_[i]);
|
||||
}
|
||||
DPN(3, "\n");
|
||||
|
||||
// cout << "About to call writeback" << std::endl;
|
||||
this->writeback();
|
||||
// cout << "About to call load_store" << std::endl;
|
||||
this->load_store();
|
||||
// cout << "About to call execute_unit" << std::endl;
|
||||
this->execute_unit();
|
||||
// cout << "About to call scheduler" << std::endl;
|
||||
this->scheduler();
|
||||
// cout << "About to call decode" << std::endl;
|
||||
this->decode();
|
||||
// D(3, "About to call fetch" << std::flush);
|
||||
this->fetch();
|
||||
// D(3, "Finished fetch" << std::flush);
|
||||
|
||||
if (release_warp_) {
|
||||
release_warp_ = false;
|
||||
stalled_warps_[release_warp_num_] = false;
|
||||
}
|
||||
|
||||
DPN(3, std::flush);
|
||||
}
|
||||
@@ -166,10 +147,8 @@ void Core::warpScheduler() {
|
||||
for (size_t wid = 0; wid < warps_.size(); ++wid) {
|
||||
// round robin scheduling
|
||||
next_warp = (next_warp + 1) % warps_.size();
|
||||
|
||||
bool has_active_threads = (warps_[next_warp].getActiveThreads() > 0);
|
||||
bool has_active_threads = warps_[next_warp].active();
|
||||
bool stalled = stalled_warps_[next_warp];
|
||||
|
||||
if (has_active_threads && !stalled) {
|
||||
foundSchedule_ = true;
|
||||
break;
|
||||
@@ -179,35 +158,28 @@ void Core::warpScheduler() {
|
||||
}
|
||||
|
||||
void Core::fetch() {
|
||||
|
||||
// D(-1, "Found schedule: " << foundSchedule_);
|
||||
|
||||
if ((!inst_in_scheduler_.stalled)
|
||||
&& (inst_in_fetch_.fetch_stall_cycles == 0)) {
|
||||
// CPY_TRACE(inst_in_decode_, inst_in_fetch_);
|
||||
// if (warps_[schedule_w_].activeThreads)
|
||||
{
|
||||
INIT_TRACE(inst_in_fetch_);
|
||||
INIT_TRACE(inst_in_fetch_);
|
||||
|
||||
if (foundSchedule_) {
|
||||
auto active_threads_b = warps_[schedule_w_].getActiveThreads();
|
||||
if (foundSchedule_) {
|
||||
auto active_threads_b = warps_[schedule_w_].getActiveThreads();
|
||||
num_insts_ = num_insts_ + warps_[schedule_w_].getActiveThreads();
|
||||
|
||||
num_instructions_ = num_instructions_ + warps_[schedule_w_].getActiveThreads();
|
||||
warps_[schedule_w_].step(&inst_in_fetch_);
|
||||
warps_[schedule_w_].step(&inst_in_fetch_);
|
||||
|
||||
auto active_threads_a = warps_[schedule_w_].getActiveThreads();
|
||||
if (active_threads_b != active_threads_a) {
|
||||
D(3, "** warp #" << schedule_w_ << " active threads changed from " << active_threads_b << " to " << active_threads_a);
|
||||
}
|
||||
|
||||
this->getCacheDelays(&inst_in_fetch_);
|
||||
|
||||
if (inst_in_fetch_.stall_warp) {
|
||||
stalled_warps_[inst_in_fetch_.wid] = true;
|
||||
}
|
||||
auto active_threads_a = warps_[schedule_w_].getActiveThreads();
|
||||
if (active_threads_b != active_threads_a) {
|
||||
D(3, "** warp #" << schedule_w_ << " active threads changed from " << active_threads_b << " to " << active_threads_a);
|
||||
}
|
||||
|
||||
this->getCacheDelays(&inst_in_fetch_);
|
||||
|
||||
if (inst_in_fetch_.stall_warp) {
|
||||
stalled_warps_[inst_in_fetch_.wid] = true;
|
||||
}
|
||||
this->warpScheduler();
|
||||
}
|
||||
this->warpScheduler();
|
||||
} else {
|
||||
inst_in_fetch_.stalled = false;
|
||||
if (inst_in_fetch_.fetch_stall_cycles > 0)
|
||||
@@ -223,7 +195,6 @@ void Core::decode() {
|
||||
CPY_TRACE(inst_in_decode_, inst_in_fetch_);
|
||||
INIT_TRACE(inst_in_fetch_);
|
||||
}
|
||||
//printTrace(&inst_in_decode_, "Decode");
|
||||
}
|
||||
|
||||
void Core::scheduler() {
|
||||
@@ -231,136 +202,162 @@ void Core::scheduler() {
|
||||
CPY_TRACE(inst_in_scheduler_, inst_in_decode_);
|
||||
INIT_TRACE(inst_in_decode_);
|
||||
}
|
||||
//printTrace(&inst_in_scheduler_, "Scheduler");
|
||||
}
|
||||
|
||||
void Core::load_store() {
|
||||
if ((inst_in_lsu_.mem_stall_cycles > 0) || (inst_in_lsu_.stalled)) {
|
||||
if ((inst_in_lsu_.mem_stall_cycles > 0) || inst_in_lsu_.stalled) {
|
||||
// LSU currently busy
|
||||
if ((inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)) {
|
||||
inst_in_scheduler_.stalled = true;
|
||||
}
|
||||
} else {
|
||||
// LSU not busy
|
||||
if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw) {
|
||||
// Scheduler has LSU inst
|
||||
bool scheduler_srcs_ready = true;
|
||||
if (inst_in_scheduler_.rs1 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1];
|
||||
}
|
||||
if (!inst_in_scheduler_.is_lw && !inst_in_scheduler_.is_sw)
|
||||
return;
|
||||
|
||||
if (inst_in_scheduler_.rs2 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2];
|
||||
}
|
||||
// Scheduler has LSU inst
|
||||
bool scheduler_srcs_busy = false;
|
||||
|
||||
if (inst_in_scheduler_.vs1 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs1];
|
||||
}
|
||||
if (inst_in_scheduler_.vs2 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs2];
|
||||
}
|
||||
if (inst_in_scheduler_.irs1 > 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs1];
|
||||
}
|
||||
|
||||
if (scheduler_srcs_ready) {
|
||||
if (inst_in_scheduler_.rd != -1)
|
||||
renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rd] = false;
|
||||
if (inst_in_scheduler_.rd != -1)
|
||||
vecRenameTable_[inst_in_scheduler_.vd] = false;
|
||||
CPY_TRACE(inst_in_lsu_, inst_in_scheduler_);
|
||||
INIT_TRACE(inst_in_scheduler_);
|
||||
} else {
|
||||
inst_in_scheduler_.stalled = true;
|
||||
// INIT_TRACE(inst_in_lsu_);
|
||||
}
|
||||
} else {
|
||||
// INIT_TRACE(inst_in_lsu_);
|
||||
if (inst_in_scheduler_.irs2 > 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs2];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frs1 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs1];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frs2 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs2];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frs3 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs3];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.vrs1 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs1];
|
||||
}
|
||||
if (inst_in_scheduler_.vrs2 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs2];
|
||||
}
|
||||
|
||||
if (scheduler_srcs_busy) {
|
||||
inst_in_scheduler_.stalled = true;
|
||||
} else {
|
||||
if (inst_in_scheduler_.ird > 0)
|
||||
iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.ird] = true;
|
||||
|
||||
if (inst_in_scheduler_.frd >= 0)
|
||||
fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frd] = true;
|
||||
|
||||
if (inst_in_scheduler_.vrd >= 0)
|
||||
vRenameTable_[inst_in_scheduler_.vrd] = true;
|
||||
|
||||
CPY_TRACE(inst_in_lsu_, inst_in_scheduler_);
|
||||
INIT_TRACE(inst_in_scheduler_);
|
||||
}
|
||||
}
|
||||
|
||||
if (inst_in_lsu_.mem_stall_cycles > 0)
|
||||
inst_in_lsu_.mem_stall_cycles--;
|
||||
|
||||
//printTrace(&inst_in_lsu_, "LSU");
|
||||
}
|
||||
|
||||
void Core::execute_unit() {
|
||||
// EXEC is always not busy
|
||||
if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw) {
|
||||
// Not an execute instruction
|
||||
// INIT_TRACE(inst_in_exe_);
|
||||
} else {
|
||||
bool scheduler_srcs_ready = true;
|
||||
if (inst_in_scheduler_.rs1 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1];
|
||||
// cout << "Rename RS1: " << inst_in_scheduler_.rs1 << " is " << renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs1] << " wid: " << inst_in_scheduler_.wid << '\n';
|
||||
}
|
||||
if (inst_in_scheduler_.is_lw || inst_in_scheduler_.is_sw)
|
||||
return;
|
||||
|
||||
bool scheduler_srcs_busy = false;
|
||||
|
||||
if (inst_in_scheduler_.rs2 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2];
|
||||
// cout << "Rename RS2: " << inst_in_scheduler_.rs1 << " is " << renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rs2] << " wid: " << inst_in_scheduler_.wid << '\n';
|
||||
}
|
||||
|
||||
// cout << "About to check vs*\n" << std::flush;
|
||||
if (inst_in_scheduler_.vs1 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs1];
|
||||
}
|
||||
if (inst_in_scheduler_.vs2 > 0) {
|
||||
scheduler_srcs_ready = scheduler_srcs_ready && vecRenameTable_[inst_in_scheduler_.vs2];
|
||||
}
|
||||
// cout << "Finished sources\n" << std::flush;
|
||||
|
||||
if (scheduler_srcs_ready) {
|
||||
if (inst_in_scheduler_.rd != -1) {
|
||||
// cout << "rename setting rd: " << inst_in_scheduler_.rd << " to not useabel wid: " << inst_in_scheduler_.wid << '\n';
|
||||
renameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.rd] = false;
|
||||
}
|
||||
|
||||
// cout << "About to check vector wb: " << inst_in_scheduler_.vd << "\n" << std::flush;
|
||||
if (inst_in_scheduler_.vd != -1) {
|
||||
vecRenameTable_[inst_in_scheduler_.vd] = false;
|
||||
}
|
||||
// cout << "Finished wb checking" << "\n" << std::flush;
|
||||
CPY_TRACE(inst_in_exe_, inst_in_scheduler_);
|
||||
INIT_TRACE(inst_in_scheduler_);
|
||||
// cout << "Finished trace copying and clearning" << "\n" << std::flush;
|
||||
} else {
|
||||
D(3, "Execute: srcs not ready!");
|
||||
inst_in_scheduler_.stalled = true;
|
||||
// INIT_TRACE(inst_in_exe_);
|
||||
}
|
||||
if (inst_in_scheduler_.irs1 > 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs1];
|
||||
}
|
||||
|
||||
//printTrace(&inst_in_exe_, "EXE");
|
||||
// INIT_TRACE(inst_in_exe_);
|
||||
if (inst_in_scheduler_.irs2 > 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.irs2];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frs1 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs1];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frs2 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs2];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frs3 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frs3];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.vrs1 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs1];
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.vrs2 >= 0) {
|
||||
scheduler_srcs_busy = scheduler_srcs_busy || vRenameTable_[inst_in_scheduler_.vrs2];
|
||||
}
|
||||
|
||||
if (scheduler_srcs_busy) {
|
||||
D(3, "Execute: srcs not ready!");
|
||||
inst_in_scheduler_.stalled = true;
|
||||
} else {
|
||||
if (inst_in_scheduler_.ird > 0) {
|
||||
iRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.ird] = true;
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.frd >= 0) {
|
||||
fRenameTable_[inst_in_scheduler_.wid][inst_in_scheduler_.frd] = true;
|
||||
}
|
||||
|
||||
if (inst_in_scheduler_.vrd >= 0) {
|
||||
vRenameTable_[inst_in_scheduler_.vrd] = true;
|
||||
}
|
||||
|
||||
CPY_TRACE(inst_in_exe_, inst_in_scheduler_);
|
||||
INIT_TRACE(inst_in_scheduler_);
|
||||
}
|
||||
}
|
||||
|
||||
void Core::writeback() {
|
||||
if (inst_in_wb_.rd > 0)
|
||||
renameTable_[inst_in_wb_.wid][inst_in_wb_.rd] = true;
|
||||
if (inst_in_wb_.vd > 0)
|
||||
vecRenameTable_[inst_in_wb_.vd] = true;
|
||||
if (inst_in_wb_.ird > 0) {
|
||||
iRenameTable_[inst_in_wb_.wid][inst_in_wb_.ird] = false;
|
||||
}
|
||||
|
||||
if (inst_in_wb_.frd >= 0) {
|
||||
fRenameTable_[inst_in_wb_.wid][inst_in_wb_.frd] = false;
|
||||
}
|
||||
|
||||
if (inst_in_wb_.vrd >= 0) {
|
||||
vRenameTable_[inst_in_wb_.vrd] = false;
|
||||
}
|
||||
|
||||
if (inst_in_wb_.stall_warp) {
|
||||
stalled_warps_[inst_in_wb_.wid] = false;
|
||||
// release_warp_ = true;
|
||||
// release_warp_num_ = inst_in_wb_.wid;
|
||||
}
|
||||
|
||||
INIT_TRACE(inst_in_wb_);
|
||||
|
||||
bool serviced_exe = false;
|
||||
if ((inst_in_exe_.rd > 0) || (inst_in_exe_.stall_warp)) {
|
||||
if ((inst_in_exe_.ird > 0)
|
||||
|| (inst_in_exe_.frd >= 0)
|
||||
|| (inst_in_exe_.vrd >= 0)
|
||||
|| (inst_in_exe_.stall_warp)) {
|
||||
CPY_TRACE(inst_in_wb_, inst_in_exe_);
|
||||
INIT_TRACE(inst_in_exe_);
|
||||
serviced_exe = true;
|
||||
// cout << "WRITEBACK SERVICED EXE\n";
|
||||
}
|
||||
|
||||
if (inst_in_lsu_.is_sw) {
|
||||
INIT_TRACE(inst_in_lsu_);
|
||||
} else {
|
||||
if (((inst_in_lsu_.rd > 0) || (inst_in_lsu_.vd > 0)) && (inst_in_lsu_.mem_stall_cycles == 0)) {
|
||||
if (((inst_in_lsu_.ird > 0)
|
||||
|| (inst_in_lsu_.frd >= 0)
|
||||
|| (inst_in_lsu_.vrd >= 0))
|
||||
&& (inst_in_lsu_.mem_stall_cycles == 0)) {
|
||||
if (serviced_exe) {
|
||||
D(3, "$$$$$$$$$$$$$$$$$$$$ Stalling LSU because EXE is being used");
|
||||
// Stalling LSU because EXE is busy
|
||||
inst_in_lsu_.stalled = true;
|
||||
} else {
|
||||
CPY_TRACE(inst_in_wb_, inst_in_lsu_);
|
||||
@@ -371,27 +368,28 @@ void Core::writeback() {
|
||||
}
|
||||
|
||||
void Core::getCacheDelays(trace_inst_t *trace_inst) {
|
||||
trace_inst->fetch_stall_cycles += 3;
|
||||
trace_inst->fetch_stall_cycles += 1;
|
||||
if (trace_inst->is_sw || trace_inst->is_lw) {
|
||||
trace_inst->mem_stall_cycles += 5;
|
||||
trace_inst->mem_stall_cycles += 3;
|
||||
}
|
||||
}
|
||||
|
||||
bool Core::running() const {
|
||||
bool stages_have_valid = inst_in_fetch_.valid_inst
|
||||
|| inst_in_decode_.valid_inst
|
||||
|| inst_in_scheduler_.valid_inst
|
||||
|| inst_in_lsu_.valid_inst
|
||||
|| inst_in_exe_.valid_inst
|
||||
|| inst_in_wb_.valid_inst;
|
||||
bool stages_have_valid = inst_in_fetch_.valid
|
||||
|| inst_in_decode_.valid
|
||||
|| inst_in_scheduler_.valid
|
||||
|| inst_in_lsu_.valid
|
||||
|| inst_in_exe_.valid
|
||||
|| inst_in_wb_.valid;
|
||||
|
||||
if (stages_have_valid)
|
||||
return true;
|
||||
|
||||
for (unsigned i = 0; i < warps_.size(); ++i)
|
||||
if (warps_[i].running()) {
|
||||
for (unsigned i = 0; i < warps_.size(); ++i) {
|
||||
if (warps_[i].active()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
16
simX/core.h
16
simX/core.h
@@ -21,7 +21,6 @@ public:
|
||||
Core(const ArchDef &arch, Decoder &decoder, MemoryUnit &mem, Word id = 0);
|
||||
~Core();
|
||||
|
||||
bool interrupt(Word r0);
|
||||
bool running() const;
|
||||
|
||||
void getCacheDelays(trace_inst_t *);
|
||||
@@ -61,8 +60,8 @@ public:
|
||||
return interruptEntry_;
|
||||
}
|
||||
|
||||
unsigned long num_instructions() const {
|
||||
return num_instructions_;
|
||||
unsigned long num_insts() const {
|
||||
return num_insts_;
|
||||
}
|
||||
|
||||
unsigned long num_steps() const {
|
||||
@@ -71,9 +70,10 @@ public:
|
||||
|
||||
private:
|
||||
|
||||
bool renameTable_[32][32];
|
||||
bool vecRenameTable_[32];
|
||||
bool stalled_warps_[32];
|
||||
std::vector<std::vector<bool>> iRenameTable_;
|
||||
std::vector<std::vector<bool>> fRenameTable_;
|
||||
std::vector<bool> vRenameTable_;
|
||||
std::vector<bool> stalled_warps_;
|
||||
bool foundSchedule_;
|
||||
|
||||
Word id_;
|
||||
@@ -84,10 +84,8 @@ private:
|
||||
std::unordered_map<Word, std::set<Warp *>> barriers_;
|
||||
int schedule_w_;
|
||||
uint64_t steps_;
|
||||
uint64_t num_instructions_;
|
||||
uint64_t num_insts_;
|
||||
Word interruptEntry_;
|
||||
bool release_warp_;
|
||||
int release_warp_num_;
|
||||
|
||||
trace_inst_t inst_in_fetch_;
|
||||
trace_inst_t inst_in_decode_;
|
||||
|
||||
12
simX/debug.h
12
simX/debug.h
@@ -1,6 +1,8 @@
|
||||
#pragma once
|
||||
|
||||
//#define USE_DEBUG 9
|
||||
#define USE_DEBUG 3
|
||||
#define DEBUG_HEADER << "DEBUG "
|
||||
//#define DEBUG_HEADER << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": "
|
||||
|
||||
#ifdef USE_DEBUG
|
||||
|
||||
@@ -11,13 +13,13 @@
|
||||
|
||||
#define D(lvl, x) do { \
|
||||
if ((lvl) <= USE_DEBUG) { \
|
||||
std::cout << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " << x << std::endl; \
|
||||
std::cout DEBUG_HEADER << x << std::endl; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define DPH(lvl, x) do { \
|
||||
if ((lvl) <= USE_DEBUG) { \
|
||||
std::cout << "DEBUG " << __FILE__ << ':' << std::dec << __LINE__ << ": " << x; \
|
||||
std::cout DEBUG_HEADER << x; \
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
@@ -27,10 +29,6 @@
|
||||
} \
|
||||
} while(0)
|
||||
|
||||
#define D_RAW(x) do { \
|
||||
std::cout << x; \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define DX(x)
|
||||
|
||||
176
simX/decode.cpp
176
simX/decode.cpp
@@ -38,7 +38,14 @@ static const std::unordered_map<int, struct InstTableEntry_t> sc_instTable = {
|
||||
{Opcode::GPGPU, {"gpgpu" , false, InstType::R_TYPE}},
|
||||
{Opcode::VSET_ARITH, {"vsetvl", false, InstType::V_TYPE}},
|
||||
{Opcode::VL, {"vl" , false, InstType::V_TYPE}},
|
||||
{Opcode::VS, {"vs" , false, InstType::V_TYPE}}
|
||||
{Opcode::VS, {"vs" , false, InstType::V_TYPE}},
|
||||
{Opcode::FL, {"fl" , false, InstType::I_TYPE }},
|
||||
{Opcode::FS, {"fs" , false, InstType::S_TYPE }},
|
||||
{Opcode::FCI, {"fci" , false, InstType::R_TYPE }},
|
||||
{Opcode::FMADD, {"fma" , false, InstType::R4_TYPE }},
|
||||
{Opcode::FMSUB, {"fms" , false, InstType::R4_TYPE }},
|
||||
{Opcode::FMNMADD, {"fmnma" , false, InstType::R4_TYPE }},
|
||||
{Opcode::FMNMSUB, {"fmnms" , false, InstType::R4_TYPE }}
|
||||
};
|
||||
|
||||
std::ostream &vortex::operator<<(std::ostream &os, Instr &instr) {
|
||||
@@ -47,9 +54,10 @@ std::ostream &vortex::operator<<(std::ostream &os, Instr &instr) {
|
||||
}
|
||||
|
||||
Decoder::Decoder(const ArchDef &arch) {
|
||||
inst_s_ = arch.getWordSize() * 8;
|
||||
inst_s_ = arch.wsize() * 8;
|
||||
opcode_s_ = 7;
|
||||
reg_s_ = 5;
|
||||
func2_s_ = 2;
|
||||
func3_s_ = 3;
|
||||
mop_s_ = 3;
|
||||
vmask_s_ = 1;
|
||||
@@ -60,6 +68,8 @@ Decoder::Decoder(const ArchDef &arch) {
|
||||
shift_rs1_ = opcode_s_ + reg_s_ + func3_s_;
|
||||
shift_rs2_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_;
|
||||
shift_func7_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_;
|
||||
shift_func2_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_;
|
||||
shift_rs3_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + func2_s_;
|
||||
shift_j_u_immed_ = opcode_s_ + reg_s_;
|
||||
shift_s_b_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_;
|
||||
shift_i_immed_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_;
|
||||
@@ -71,6 +81,7 @@ Decoder::Decoder(const ArchDef &arch) {
|
||||
shift_vset_ = opcode_s_ + reg_s_ + func3_s_ + reg_s_ + reg_s_ + 6;
|
||||
|
||||
reg_mask_ = 0x1f;
|
||||
func2_mask_ = 0x2;
|
||||
func3_mask_ = 0x7;
|
||||
func6_mask_ = 0x3f;
|
||||
func7_mask_ = 0x7f;
|
||||
@@ -83,7 +94,11 @@ Decoder::Decoder(const ArchDef &arch) {
|
||||
v_imm_mask_ = 0x7ff;
|
||||
}
|
||||
|
||||
std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, trace_inst_t *trace_inst) {
|
||||
std::shared_ptr<Instr> Decoder::decode(
|
||||
const std::vector<Byte> &v,
|
||||
Size &idx,
|
||||
trace_inst_t *trace_inst)
|
||||
{
|
||||
Word code(readWord(v, idx, inst_s_ / 8));
|
||||
|
||||
// std::cout << "code: " << (int) code << " v: " << v << " indx: " << idx << "\n";
|
||||
@@ -96,59 +111,66 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
|
||||
Word imeed, dest_bits, imm_bits, bit_11, bits_4_1, bit_10_5,
|
||||
bit_12, bits_19_12, bits_10_1, bit_20, unordered, func3;
|
||||
|
||||
InstType curInstType = sc_instTable.at(op).iType;
|
||||
if (op == Opcode::FL || op == Opcode::FS) {
|
||||
// need to find out whether it is vector or floating point inst
|
||||
Word width_bits = (code >> shift_func3_) & func3_mask_;
|
||||
if ((width_bits == 0x1) || (width_bits == 0x2)
|
||||
|| (width_bits == 0x3) || (width_bits == 0x4)) {
|
||||
curInstType = (op == Opcode::FL) ? InstType::I_TYPE : InstType::S_TYPE;
|
||||
}
|
||||
}
|
||||
|
||||
// std::cout << "op: " << std::hex << op << " what " << sc_instTable[op].iType << "\n";
|
||||
switch (sc_instTable.at(op).iType) {
|
||||
switch (curInstType) {
|
||||
case InstType::N_TYPE:
|
||||
break;
|
||||
|
||||
case InstType::R_TYPE:
|
||||
instr->setPred((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
if (op == Opcode::FCI) {
|
||||
instr->setDestFReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
|
||||
} else {
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
}
|
||||
instr->setFunc3((code >> shift_func3_) & func3_mask_);
|
||||
instr->setFunc7((code >> shift_func7_) & func7_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::I_TYPE:
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
if (op == Opcode::FCI || op == Opcode::FL) {
|
||||
instr->setDestFReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
|
||||
} else {
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
}
|
||||
instr->setFunc7((code >> shift_func7_) & func7_mask_);
|
||||
func3 = (code >> shift_func3_) & func3_mask_;
|
||||
instr->setFunc3(func3);
|
||||
|
||||
if ((func3 == 5) && (op != L_INST)) {
|
||||
// std::cout << "func7: " << func7 << "\n";
|
||||
if ((func3 == 5) && (op != L_INST) && (op != Opcode::FL)) {
|
||||
instr->setSrcImm(signExt(((code >> shift_rs2_) & reg_mask_), 5, reg_mask_));
|
||||
} else {
|
||||
instr->setSrcImm(signExt(code >> shift_i_immed_, 12, i_imm_mask_));
|
||||
}
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::S_TYPE:
|
||||
// std::cout << "************STORE\n";
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
if (op == Opcode::FS) {
|
||||
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
|
||||
} else {
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
}
|
||||
instr->setFunc3((code >> shift_func3_) & func3_mask_);
|
||||
|
||||
dest_bits = (code >> shift_rd_) & reg_mask_;
|
||||
imm_bits = (code >> shift_s_b_immed_ & func7_mask_);
|
||||
imeed = (imm_bits << reg_s_) | dest_bits;
|
||||
// std::cout << "ENC: store imeed: " << imeed << "\n";
|
||||
instr->setSrcImm(signExt(imeed, 12, s_imm_mask_));
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::B_TYPE:
|
||||
@@ -165,51 +187,34 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
|
||||
bit_12 = imm_bits >> 6;
|
||||
|
||||
imeed = 0 | (bits_4_1 << 1) | (bit_10_5 << 5) | (bit_11 << 11) | (bit_12 << 12);
|
||||
|
||||
instr->setSrcImm(signExt(imeed, 13, b_imm_mask_));
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::U_TYPE:
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcImm(signExt(code >> shift_j_u_immed_, 20, u_imm_mask_));
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::J_TYPE:
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
|
||||
// [20 | 10:1 | 11 | 19:12]
|
||||
|
||||
unordered = code >> shift_j_u_immed_;
|
||||
|
||||
bits_19_12 = unordered & 0xff;
|
||||
bit_11 = (unordered >> 8) & 0x1;
|
||||
bits_10_1 = (unordered >> 9) & 0x3ff;
|
||||
bit_20 = (unordered >> 19) & 0x1;
|
||||
|
||||
imeed = 0 | (bits_10_1 << 1) | (bit_11 << 11) | (bits_19_12 << 12) | (bit_20 << 20);
|
||||
|
||||
if (bit_20) {
|
||||
imeed |= ~j_imm_mask_;
|
||||
}
|
||||
|
||||
instr->setSrcImm(imeed);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case InstType::V_TYPE:
|
||||
D(3, "Entered here: instr type = vector" << op);
|
||||
switch (op) {
|
||||
case Opcode::VSET_ARITH: //TODO: arithmetic ops
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setDestVReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
|
||||
func3 = (code >> shift_func3_) & func3_mask_;
|
||||
instr->setFunc3(func3);
|
||||
D(3, "Entered here: instr type = vector");
|
||||
@@ -228,53 +233,34 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
|
||||
instr->setVsew((immed >> 2) & 0x3);
|
||||
D(3, "sew " << ((immed >> 2) & 0x3));
|
||||
} else {
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
|
||||
}
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
} else {
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setVmask((code >> shift_vmask_) & 0x1);
|
||||
instr->setFunc6((code >> shift_func6_) & func6_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->rs2 = ((code >> shift_rs2_) & reg_mask_);
|
||||
trace_inst->rd = ((code >> shift_rd_) & reg_mask_);
|
||||
}
|
||||
break;
|
||||
|
||||
case Opcode::VL:
|
||||
D(3, "vector load instr");
|
||||
instr->setDestReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setDestVReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setVlsWidth((code >> shift_func3_) & func3_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setVmask((code >> shift_vmask_));
|
||||
instr->setVmop((code >> shift_vmop_) & func3_mask_);
|
||||
instr->setVnf((code >> shift_vnf_) & func3_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
trace_inst->vd = ((code >> shift_rd_) & reg_mask_);
|
||||
//trace_inst->vs2 = ((code>>shift_rs2_) & reg_mask_);
|
||||
break;
|
||||
|
||||
case Opcode::VS:
|
||||
instr->setVs3((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setVlsWidth((code >> shift_func3_) & func3_mask_);
|
||||
instr->setSrcReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setSrcVReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setVmask((code >> shift_vmask_));
|
||||
instr->setVmop((code >> shift_vmop_) & func3_mask_);
|
||||
instr->setVnf((code >> shift_vnf_) & func3_mask_);
|
||||
|
||||
trace_inst->valid_inst = true;
|
||||
trace_inst->rs1 = ((code >> shift_rs1_) & reg_mask_);
|
||||
//trace_inst->vd = ((code>>shift_rd_) & reg_mask_);
|
||||
trace_inst->vs1 = ((code >> shift_rd_) & reg_mask_); //vs3
|
||||
break;
|
||||
|
||||
default:
|
||||
@@ -282,11 +268,49 @@ std::shared_ptr<Instr> Decoder::decode(const std::vector<Byte> &v, Size &idx, tr
|
||||
std::abort();
|
||||
}
|
||||
break;
|
||||
case R4_TYPE:
|
||||
// RT: add R4_TYPE decoder
|
||||
instr->setDestFReg((code >> shift_rd_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs1_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs2_) & reg_mask_);
|
||||
instr->setSrcFReg((code >> shift_rs3_) & reg_mask_);
|
||||
instr->setFunc3((code >> shift_func3_) & func3_mask_);
|
||||
break;
|
||||
default:
|
||||
std::cout << "Unrecognized argument class in word decoder.\n";
|
||||
std::abort();
|
||||
}
|
||||
|
||||
if (curInstType != InstType::N_TYPE) {
|
||||
trace_inst->valid = true;
|
||||
if (instr->hasRDest()) {
|
||||
if (instr->is_FpDest()) {
|
||||
trace_inst->frd = instr->getRDest();
|
||||
} else if (instr->is_VDest()) {
|
||||
trace_inst->vrd = instr->getRDest();
|
||||
} else {
|
||||
trace_inst->ird = instr->getRDest();
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < instr->getNRSrc(); ++i) {
|
||||
if (instr->is_FpSrc(i)) {
|
||||
if (i == 0) trace_inst->frs1 = instr->getRSrc(i);
|
||||
else if (i == 1) trace_inst->frs2 = instr->getRSrc(i);
|
||||
else if (i == 2) trace_inst->frs3 = instr->getRSrc(i);
|
||||
else std::abort();
|
||||
} else if (instr->is_VSrc(i)) {
|
||||
if (i == 0) trace_inst->vrs1 = instr->getRSrc(i);
|
||||
else if (i == 1) trace_inst->vrs2 = instr->getRSrc(i);
|
||||
else std::abort();
|
||||
} else {
|
||||
if (i == 0) trace_inst->irs1 = instr->getRSrc(i);
|
||||
else if (i == 1) trace_inst->irs2 = instr->getRSrc(i);
|
||||
else std::abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
D(2, "Decoded instr 0x" << std::hex << code << " into: " << instr << std::flush);
|
||||
|
||||
return instr;
|
||||
|
||||
@@ -21,11 +21,14 @@ private:
|
||||
Word inst_s_;
|
||||
Word opcode_s_;
|
||||
Word reg_s_;
|
||||
Word func2_s_;
|
||||
Word func3_s_;
|
||||
Word shift_opcode_;
|
||||
Word shift_rd_;
|
||||
Word shift_rs1_;
|
||||
Word shift_rs2_;
|
||||
Word shift_rs3_;
|
||||
Word shift_func2_;
|
||||
Word shift_func3_;
|
||||
Word shift_func7_;
|
||||
Word shift_j_u_immed_;
|
||||
@@ -33,6 +36,7 @@ private:
|
||||
Word shift_i_immed_;
|
||||
|
||||
Word reg_mask_;
|
||||
Word func2_mask_;
|
||||
Word func3_mask_;
|
||||
Word func6_mask_;
|
||||
Word func7_mask_;
|
||||
|
||||
2524
simX/execute.cpp
2524
simX/execute.cpp
File diff suppressed because it is too large
Load Diff
@@ -1,23 +0,0 @@
|
||||
#include <iostream>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "instr.h"
|
||||
|
||||
using namespace vortex;
|
||||
|
||||
void Instr::setVlmul(Word lmul) {
|
||||
vlmul_ = std::pow(2, lmul);
|
||||
}
|
||||
|
||||
void Instr::setVsew(Word sew) {
|
||||
vsew_ = std::pow(2, 3+sew);
|
||||
}
|
||||
|
||||
void Instr::setVediv(Word ediv) {
|
||||
vediv_ = std::pow(2,ediv);
|
||||
}
|
||||
75
simX/instr.h
75
simX/instr.h
@@ -25,6 +25,14 @@ enum Opcode {
|
||||
VSET_ARITH= 0x57,
|
||||
VL = 0x7,
|
||||
VS = 0x27,
|
||||
// F-Extension
|
||||
FL = 0x7,
|
||||
FS = 0x27,
|
||||
FCI = 0x53,
|
||||
FMADD = 0x43,
|
||||
FMSUB = 0x47,
|
||||
FMNMSUB = 0x4b,
|
||||
FMNMADD = 0x4f,
|
||||
};
|
||||
|
||||
enum InstType {
|
||||
@@ -35,55 +43,60 @@ enum InstType {
|
||||
B_TYPE,
|
||||
U_TYPE,
|
||||
J_TYPE,
|
||||
V_TYPE
|
||||
V_TYPE,
|
||||
R4_TYPE
|
||||
};
|
||||
|
||||
class Instr {
|
||||
public:
|
||||
Instr()
|
||||
: predicated_(false)
|
||||
: opcode_(Opcode::NOP)
|
||||
, nRsrc_(0)
|
||||
, nPsrc_(0)
|
||||
, hasImmSrc_(false)
|
||||
, hasRDest_(false)
|
||||
, hasPDest_(false)
|
||||
, is_FpDest_(false)
|
||||
, is_VDest_(false)
|
||||
, is_FpSrc_(0)
|
||||
, is_VSrc_(0)
|
||||
, func2_(0)
|
||||
, func3_(0)
|
||||
, func7_(0)
|
||||
{}
|
||||
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &, Instr &);
|
||||
|
||||
/* Setters used to "craft" the instruction. */
|
||||
void setOpcode(Opcode opcode) { opcode_ = opcode; }
|
||||
void setPred(RegNum pReg) { predicated_ = true; pred_ = pReg; }
|
||||
void setDestReg(RegNum destReg) { hasRDest_ = true; rdest_ = destReg; }
|
||||
void setSrcReg(RegNum srcReg) { rsrc_[nRsrc_++] = srcReg; }
|
||||
void setDestReg(int destReg) { hasRDest_ = true; rdest_ = destReg; }
|
||||
void setSrcReg(int srcReg) { rsrc_[nRsrc_++] = srcReg; }
|
||||
void setDestFReg(int destReg) { hasRDest_ = true; is_FpDest_ = true; rdest_ = destReg; }
|
||||
void setSrcFReg(int srcReg) { is_FpSrc_ |= (1 << nRsrc_); rsrc_[nRsrc_++] = srcReg; }
|
||||
void setDestVReg(int destReg) { hasRDest_ = true; is_VDest_ = true; rdest_ = destReg; }
|
||||
void setSrcVReg(int srcReg) { is_VSrc_ |= (1 << nRsrc_); rsrc_[nRsrc_++] = srcReg; }
|
||||
void setFunc3(Word func3) { func3_ = func3; }
|
||||
void setFunc7(Word func7) { func7_ = func7; }
|
||||
void setSrcImm(Word srcImm) { hasImmSrc_ = true; immsrc_ = srcImm; }
|
||||
void setVsetImm(Word vset_imm) { if(vset_imm) vsetImm_ = true; else vsetImm_ = false; }
|
||||
void setVsetImm(Word vset_imm) { if (vset_imm) vsetImm_ = true; else vsetImm_ = false; }
|
||||
void setVlsWidth(Word width) { vlsWidth_ = width; }
|
||||
void setVmop(Word mop) { vMop_ = mop; }
|
||||
void setVnf(Word nf) { vNf_ = nf; }
|
||||
void setVmask(Word mask) { vmask_ = mask; }
|
||||
void setVs3(Word vs) { vs3_ = vs; }
|
||||
void setVlmul(Word lmul);
|
||||
void setVsew(Word sew);
|
||||
void setVediv(Word ediv);
|
||||
void setVlmul(Word lmul) { vlmul_ = 1 << lmul; }
|
||||
void setVsew(Word sew) { vsew_ = 1 << (3+sew); }
|
||||
void setVediv(Word ediv) { vediv_ = 1 << ediv; }
|
||||
void setFunc6(Word func6) { func6_ = func6; }
|
||||
void setPrivileged(bool privileged) { privileged_ = privileged; }
|
||||
|
||||
/* Getters used by encoders. */
|
||||
Opcode getOpcode() const { return opcode_; }
|
||||
Word getFunc3() const { return func3_; }
|
||||
Word getFunc6() const { return func6_; }
|
||||
Word getFunc7() const { return func7_; }
|
||||
RegNum getNRSrc() const { return nRsrc_; }
|
||||
RegNum getRSrc(RegNum i) const { return rsrc_[i]; }
|
||||
int getNRSrc() const { return nRsrc_; }
|
||||
int getRSrc(int i) const { return rsrc_[i]; }
|
||||
bool hasRDest() const { return hasRDest_; }
|
||||
RegNum getRDest() const { return rdest_; }
|
||||
bool hasPDest() const { return hasPDest_; }
|
||||
RegNum getPDest() const { return pdest_; }
|
||||
bool hasPred() const { return predicated_; }
|
||||
RegNum getPred() const { return pred_; }
|
||||
int getRDest() const { return rdest_; }
|
||||
bool hasImm() const { return hasImmSrc_; }
|
||||
Word getImm() const { return immsrc_; }
|
||||
bool getVsetImm() const { return vsetImm_; }
|
||||
@@ -95,7 +108,12 @@ public:
|
||||
Word getVlmul() const { return vlmul_; }
|
||||
Word getVsew() const { return vsew_; }
|
||||
Word getVediv() const { return vediv_; }
|
||||
bool getPrivileged() const { return privileged_; }
|
||||
|
||||
bool is_FpDest() const { return is_FpDest_; }
|
||||
bool is_FpSrc(int i) const { return (is_FpSrc_ >> i) & 0x1; }
|
||||
|
||||
bool is_VDest() const { return is_VDest_; }
|
||||
bool is_VSrc(int i) const { return (is_VSrc_ >> i) & 0x1; }
|
||||
|
||||
private:
|
||||
|
||||
@@ -104,20 +122,19 @@ private:
|
||||
};
|
||||
|
||||
Opcode opcode_;
|
||||
bool predicated_;
|
||||
RegNum pred_;
|
||||
int nRsrc_;
|
||||
int nPsrc_;
|
||||
RegNum rsrc_[MAX_REG_SOURCES];
|
||||
bool hasImmSrc_;
|
||||
bool hasRDest_;
|
||||
bool is_FpDest_;
|
||||
bool is_VDest_;
|
||||
int is_FpSrc_;
|
||||
int is_VSrc_;
|
||||
Word immsrc_;
|
||||
Word func2_;
|
||||
Word func3_;
|
||||
Word func7_;
|
||||
bool hasRDest_;
|
||||
bool hasPDest_;
|
||||
RegNum rdest_;
|
||||
RegNum pdest_;
|
||||
bool privileged_;
|
||||
int rsrc_[MAX_REG_SOURCES];
|
||||
int rdest_;
|
||||
|
||||
//Vector
|
||||
bool vsetImm_;
|
||||
|
||||
@@ -15,8 +15,8 @@ using namespace vortex;
|
||||
|
||||
int main(int argc, char **argv) {
|
||||
|
||||
std::string archString("rv32i");
|
||||
int num_cores(1);
|
||||
std::string archString("rv32imf");
|
||||
int num_cores(NUM_CORES * NUM_CLUSTERS);
|
||||
int num_warps(NUM_WARPS);
|
||||
int num_threads(NUM_THREADS);
|
||||
std::string imgFileName;
|
||||
@@ -36,19 +36,19 @@ int main(int argc, char **argv) {
|
||||
|
||||
if (showHelp || imgFileName.empty()) {
|
||||
std::cout << "Vortex emulator command line arguments:\n"
|
||||
" -i, --image <filename> Program RAM image\n"
|
||||
" -i, --image <filename> Program RAM image\n"
|
||||
" -c, --cores <num> Number of cores\n"
|
||||
" -w, --warps <num> Number of warps\n"
|
||||
" -t, --threads <num> Number of threads\n"
|
||||
" -a, --arch <arch string> Architecture string\n"
|
||||
" -s, --stats Print stats on exit.\n";
|
||||
" -s, --stats Print stats on exit.\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
ArchDef arch(archString, num_cores, num_warps, num_threads);
|
||||
|
||||
Decoder decoder(arch);
|
||||
MemoryUnit mu(4096, arch.getWordSize(), true);
|
||||
MemoryUnit mu(4096, arch.wsize(), true);
|
||||
|
||||
RAM old_ram;
|
||||
old_ram.loadHexImpl(imgFileName.c_str());
|
||||
@@ -59,7 +59,7 @@ int main(int argc, char **argv) {
|
||||
|
||||
std::vector<std::shared_ptr<Core>> cores(num_cores);
|
||||
for (int i = 0; i < num_cores; ++i) {
|
||||
cores[i] = std::make_shared<Core>(arch, decoder, mu);
|
||||
cores[i] = std::make_shared<Core>(arch, decoder, mu, i);
|
||||
}
|
||||
|
||||
bool running;
|
||||
24
simX/trace.h
24
simX/trace.h
@@ -5,21 +5,27 @@ namespace vortex {
|
||||
|
||||
struct trace_inst_t {
|
||||
// Warp step
|
||||
bool valid_inst;
|
||||
unsigned pc;
|
||||
bool valid;
|
||||
unsigned PC;
|
||||
|
||||
// Core scheduler
|
||||
int wid;
|
||||
|
||||
// Encoder
|
||||
int rs1;
|
||||
int rs2;
|
||||
int rd;
|
||||
int irs1;
|
||||
int irs2;
|
||||
int ird;
|
||||
|
||||
//Encoder
|
||||
int vs1;
|
||||
int vs2;
|
||||
int vd;
|
||||
// Floating-point
|
||||
int frs1;
|
||||
int frs2;
|
||||
int frs3;
|
||||
int frd;
|
||||
|
||||
// Vector extension
|
||||
int vrs1;
|
||||
int vrs2;
|
||||
int vrd;
|
||||
|
||||
// Instruction execute
|
||||
bool is_lw;
|
||||
|
||||
10
simX/types.h
10
simX/types.h
@@ -1,20 +1,18 @@
|
||||
#pragma once
|
||||
|
||||
#include <stdint.h>
|
||||
#include <bitset>
|
||||
#include <VX_config.h>
|
||||
|
||||
namespace vortex {
|
||||
|
||||
typedef uint8_t Byte;
|
||||
typedef uint32_t Word;
|
||||
typedef uint32_t Word_u;
|
||||
typedef int32_t Word_s;
|
||||
|
||||
typedef Word_u Addr;
|
||||
typedef Word_u Size;
|
||||
typedef uint32_t Addr;
|
||||
typedef uint32_t Size;
|
||||
|
||||
typedef unsigned RegNum;
|
||||
typedef unsigned ThdNum;
|
||||
typedef std::bitset<32> ThreadMask;
|
||||
|
||||
enum MemFlags {
|
||||
RD_USR = 1,
|
||||
|
||||
@@ -12,15 +12,15 @@ Word vortex::signExt(Word w, Size bit, Word mask) {
|
||||
return w;
|
||||
}
|
||||
|
||||
void vortex::wordToBytes(Byte *b, Word_u w, Size wordSize) {
|
||||
void vortex::wordToBytes(Byte *b, Word w, Size wordSize) {
|
||||
while (wordSize--) {
|
||||
*(b++) = w & 0xff;
|
||||
w >>= 8;
|
||||
}
|
||||
}
|
||||
|
||||
Word_u vortex::bytesToWord(const Byte *b, Size wordSize) {
|
||||
Word_u w = 0;
|
||||
Word vortex::bytesToWord(const Byte *b, Size wordSize) {
|
||||
Word w = 0;
|
||||
b += wordSize-1;
|
||||
while (wordSize--) {
|
||||
w <<= 8;
|
||||
@@ -29,15 +29,15 @@ Word_u vortex::bytesToWord(const Byte *b, Size wordSize) {
|
||||
return w;
|
||||
}
|
||||
|
||||
Word_u vortex::flagsToWord(bool r, bool w, bool x) {
|
||||
Word_u word = 0;
|
||||
Word vortex::flagsToWord(bool r, bool w, bool x) {
|
||||
Word word = 0;
|
||||
if (r) word |= RD_USR;
|
||||
if (w) word |= WR_USR;
|
||||
if (x) word |= EX_USR;
|
||||
return word;
|
||||
}
|
||||
|
||||
void vortex::wordToFlags(bool &r, bool &w, bool &x, Word_u f) {
|
||||
void vortex::wordToFlags(bool &r, bool &w, bool &x, Word f) {
|
||||
r = f & RD_USR;
|
||||
w = f & WR_USR;
|
||||
x = f & EX_USR;
|
||||
@@ -49,10 +49,10 @@ Byte vortex::readByte(const std::vector<Byte> &b, Size &n) {
|
||||
return b[n++];
|
||||
}
|
||||
|
||||
Word_u vortex::readWord(const std::vector<Byte> &b, Size &n, Size wordSize) {
|
||||
Word vortex::readWord(const std::vector<Byte> &b, Size &n, Size wordSize) {
|
||||
if (b.size() - n < wordSize)
|
||||
throw std::out_of_range("out of range");
|
||||
Word_u w(0);
|
||||
Word w(0);
|
||||
n += wordSize;
|
||||
// std::cout << "wordSize: " << wordSize << "\n";
|
||||
for (Size i = 0; i < wordSize; i++) {
|
||||
|
||||
10
simX/util.h
10
simX/util.h
@@ -12,13 +12,13 @@ void unused(Args&&...) {}
|
||||
|
||||
Word signExt(Word w, Size bit, Word mask);
|
||||
|
||||
Word_u bytesToWord(const Byte *b, Size wordSize);
|
||||
void wordToBytes(Byte *b, Word_u w, Size wordSize);
|
||||
Word_u flagsToWord(bool r, bool w, bool x);
|
||||
void wordToFlags(bool &r, bool &w, bool &x, Word_u f);
|
||||
Word bytesToWord(const Byte *b, Size wordSize);
|
||||
void wordToBytes(Byte *b, Word w, Size wordSize);
|
||||
Word flagsToWord(bool r, bool w, bool x);
|
||||
void wordToFlags(bool &r, bool &w, bool &x, Word f);
|
||||
|
||||
Byte readByte(const std::vector<Byte> &b, Size &n);
|
||||
Word_u readWord(const std::vector<Byte> &b, Size &n, Size wordSize);
|
||||
Word readWord(const std::vector<Byte> &b, Size &n, Size wordSize);
|
||||
void writeByte(std::vector<Byte> &p, Size &n, Byte b);
|
||||
void writeWord(std::vector<Byte> &p, Size &n, Size wordSize, Word w);
|
||||
|
||||
|
||||
102
simX/warp.cpp
102
simX/warp.cpp
@@ -2,6 +2,7 @@
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "instr.h"
|
||||
@@ -11,120 +12,71 @@ using namespace vortex;
|
||||
|
||||
Warp::Warp(Core *core, Word id)
|
||||
: id_(id)
|
||||
, active_(false)
|
||||
, core_(core)
|
||||
, pc_(0x80000000)
|
||||
, shadowPc_(0)
|
||||
, activeThreads_(0)
|
||||
, shadowActiveThreads_(0)
|
||||
, shadowReg_(core_->arch().getNumRegs())
|
||||
, VLEN_(1024)
|
||||
, interruptEnable_(true)
|
||||
, shadowInterruptEnable_(false)
|
||||
, supervisorMode_(true)
|
||||
, shadowSupervisorMode_(false)
|
||||
, spawned_(false)
|
||||
, PC_(0x80000000)
|
||||
, steps_(0)
|
||||
, insts_(0)
|
||||
, loads_(0)
|
||||
, stores_(0) {
|
||||
D(3, "Creating a new thread with PC: " << std::hex << pc_);
|
||||
/* Build the register file. */
|
||||
Word regNum(0);
|
||||
for (Word j = 0; j < core_->arch().getNumThreads(); ++j) {
|
||||
regFile_.push_back(std::vector<Reg<Word>>(0));
|
||||
for (Word i = 0; i < core_->arch().getNumRegs(); ++i) {
|
||||
regFile_[j].push_back(Reg<Word>(id, regNum++));
|
||||
}
|
||||
|
||||
bool act = false;
|
||||
if (j == 0)
|
||||
act = true;
|
||||
tmask_.push_back(act);
|
||||
shadowTmask_.push_back(act);
|
||||
}
|
||||
tmask_.reset();
|
||||
|
||||
for (Word i = 0; i < (1 << 12); i++) {
|
||||
csrs_.push_back(Reg<uint16_t>(id, regNum++));
|
||||
}
|
||||
|
||||
/* Set initial register contents. */
|
||||
regFile_[0][0] = (core_->arch().getNumThreads() << (core_->arch().getWordSize() * 8 / 2)) | id;
|
||||
iRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
|
||||
fRegFile_.resize(core_->arch().num_threads(), std::vector<Word>(core_->arch().num_regs(), 0));
|
||||
vRegFile_.resize(core_->arch().num_regs(), std::vector<Byte>(core_->arch().vsize(), 0));
|
||||
csrs_.resize(core_->arch().num_csrs());
|
||||
}
|
||||
|
||||
void Warp::step(trace_inst_t *trace_inst) {
|
||||
assert(tmask_.any());
|
||||
|
||||
Size fetchPos(0);
|
||||
Size decPos;
|
||||
Size wordSize(core_->arch().getWordSize());
|
||||
Size wordSize(core_->arch().wsize());
|
||||
std::vector<Byte> fetchBuffer(wordSize);
|
||||
|
||||
if (activeThreads_ == 0)
|
||||
return;
|
||||
|
||||
++steps_;
|
||||
|
||||
D(3, "current PC=0x" << std::hex << pc_);
|
||||
D(3, "current PC=0x" << std::hex << PC_);
|
||||
|
||||
// std::cout << "pc: " << std::hex << pc << "\n";
|
||||
trace_inst->pc = pc_;
|
||||
// std::cout << "PC: " << std::hex << PC << "\n";
|
||||
trace_inst->PC = PC_;
|
||||
|
||||
/* Fetch and decode. */
|
||||
if (wordSize < sizeof(pc_))
|
||||
pc_ &= ((1ll << (wordSize * 8)) - 1);
|
||||
if (wordSize < sizeof(PC_))
|
||||
PC_ &= ((1ll << (wordSize * 8)) - 1);
|
||||
|
||||
unsigned fetchSize = 4;
|
||||
fetchBuffer.resize(fetchSize);
|
||||
Word fetched = core_->mem().fetch(pc_ + fetchPos, supervisorMode_);
|
||||
Word fetched = core_->mem().fetch(PC_ + fetchPos, 0);
|
||||
writeWord(fetchBuffer, fetchPos, fetchSize, fetched);
|
||||
|
||||
decPos = 0;
|
||||
std::shared_ptr<Instr> instr = core_->decoder().decode(fetchBuffer, decPos, trace_inst);
|
||||
|
||||
// Update pc
|
||||
pc_ += decPos;
|
||||
// Update PC
|
||||
PC_ += decPos;
|
||||
|
||||
// Execute
|
||||
this->execute(*instr, trace_inst);
|
||||
|
||||
// At Debug Level 3, print debug info after each instruction.
|
||||
D(3, "Register state:");
|
||||
for (unsigned i = 0; i < regFile_[0].size(); ++i) {
|
||||
D_RAW(" %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
|
||||
for (unsigned j = 0; j < (activeThreads_); ++j)
|
||||
D_RAW(' ' << std::setfill('0') << std::setw(8) << std::hex << regFile_[j][i] << std::setfill(' ') << ' ');
|
||||
D_RAW('(' << shadowReg_[i] << ')' << std::endl);
|
||||
D(4, "Register state:");
|
||||
for (int i = 0; i < core_->arch().num_regs(); ++i) {
|
||||
DPN(4, " %r" << std::setfill('0') << std::setw(2) << std::dec << i << ':');
|
||||
for (int j = 0; j < core_->arch().num_threads(); ++j) {
|
||||
DPN(4, ' ' << std::setfill('0') << std::setw(8) << std::hex << iRegFile_[j][i] << std::setfill(' ') << ' ');
|
||||
}
|
||||
DPN(4, std::endl);
|
||||
}
|
||||
|
||||
DPH(3, "Thread mask:");
|
||||
for (unsigned i = 0; i < tmask_.size(); ++i)
|
||||
for (int i = 0; i < core_->arch().num_threads(); ++i)
|
||||
DPN(3, " " << tmask_[i]);
|
||||
DPN(3, "\n");
|
||||
}
|
||||
|
||||
bool Warp::interrupt(Word r0) {
|
||||
if (!interruptEnable_)
|
||||
return false;
|
||||
|
||||
shadowActiveThreads_ = activeThreads_;
|
||||
shadowTmask_ = tmask_;
|
||||
shadowInterruptEnable_ = interruptEnable_; /* For traps. */
|
||||
shadowSupervisorMode_ = supervisorMode_;
|
||||
|
||||
for (Word i = 0; i < regFile_[0].size(); ++i)
|
||||
shadowReg_[i] = regFile_[0][i];
|
||||
|
||||
for (Word i = 0; i < regFile_.size(); ++i)
|
||||
tmask_[i] = 1;
|
||||
|
||||
shadowPc_ = pc_;
|
||||
activeThreads_ = 1;
|
||||
interruptEnable_ = false;
|
||||
supervisorMode_ = true;
|
||||
regFile_[0][0] = r0;
|
||||
pc_ = core_->interruptEntry();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Warp::printStats() const {
|
||||
std::cout << "Steps : " << steps_ << std::endl
|
||||
<< "Insts : " << insts_ << std::endl
|
||||
|
||||
160
simX/warp.h
160
simX/warp.h
@@ -7,69 +7,25 @@
|
||||
|
||||
namespace vortex {
|
||||
|
||||
template <typename T>
|
||||
class Reg {
|
||||
public:
|
||||
Reg()
|
||||
: value_(0), cpuId_(0), regNum_(0) {}
|
||||
Reg(Word c, Word n)
|
||||
: value_(0), cpuId_(c), regNum_(n) {}
|
||||
Reg(Word c, Word n, T v)
|
||||
: value_(v), cpuId_(c), regNum_(n) {}
|
||||
|
||||
const T &value() const {
|
||||
return value_;
|
||||
}
|
||||
|
||||
Reg &operator=(T r) {
|
||||
if (regNum_) {
|
||||
value_ = r;
|
||||
doWrite();
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
operator T() const {
|
||||
doRead();
|
||||
return value_;
|
||||
}
|
||||
|
||||
void trunc(Size s) {
|
||||
Word mask((~0ull >> (sizeof(Word) - s) * 8));
|
||||
value_ &= mask;
|
||||
}
|
||||
|
||||
private:
|
||||
T value_;
|
||||
Word cpuId_, regNum_;
|
||||
|
||||
void doWrite() const {}
|
||||
void doRead() const {}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
struct DomStackEntry {
|
||||
DomStackEntry(
|
||||
unsigned p,
|
||||
const std::vector<std::vector<Reg<Word>>> &m,
|
||||
std::vector<bool> &tm,
|
||||
Word pc
|
||||
) : pc(pc)
|
||||
, fallThrough(false)
|
||||
, uni(false) {
|
||||
for (unsigned i = 0; i < m.size(); ++i) {
|
||||
tmask.push_back(!bool(m[i][p]) && tm[i]);
|
||||
}
|
||||
}
|
||||
DomStackEntry(const ThreadMask &tmask, Word PC)
|
||||
: tmask(tmask)
|
||||
, PC(PC)
|
||||
, fallThrough(false)
|
||||
, unanimous(false)
|
||||
{}
|
||||
|
||||
DomStackEntry(const std::vector<bool> &tmask)
|
||||
: tmask(tmask), fallThrough(true), uni(false) {}
|
||||
DomStackEntry(const ThreadMask &tmask)
|
||||
: tmask(tmask)
|
||||
, PC(0)
|
||||
, fallThrough(true)
|
||||
, unanimous(false)
|
||||
{}
|
||||
|
||||
std::vector<bool> tmask;
|
||||
Word pc;
|
||||
ThreadMask tmask;
|
||||
Word PC;
|
||||
bool fallThrough;
|
||||
bool uni;
|
||||
bool unanimous;
|
||||
};
|
||||
|
||||
struct vtype {
|
||||
@@ -86,13 +42,13 @@ class trace_inst_t;
|
||||
class Warp {
|
||||
public:
|
||||
Warp(Core *core, Word id = 0);
|
||||
|
||||
bool active() const {
|
||||
return tmask_.any();
|
||||
}
|
||||
|
||||
void step(trace_inst_t *);
|
||||
|
||||
bool interrupt(Word r0);
|
||||
|
||||
bool running() const {
|
||||
return (activeThreads_ != 0);
|
||||
std::size_t getActiveThreads() const {
|
||||
return tmask_.count();
|
||||
}
|
||||
|
||||
void printStats() const;
|
||||
@@ -105,78 +61,40 @@ public:
|
||||
return id_;
|
||||
}
|
||||
|
||||
Word get_pc() const {
|
||||
return pc_;
|
||||
Word getPC() const {
|
||||
return PC_;
|
||||
}
|
||||
|
||||
void set_pc(Word pc) {
|
||||
pc_ = pc;
|
||||
void setPC(Word PC) {
|
||||
PC_ = PC;
|
||||
}
|
||||
|
||||
void setActiveThreads(Size activeThreads) {
|
||||
activeThreads_ = activeThreads;
|
||||
}
|
||||
|
||||
Size getActiveThreads() const {
|
||||
return activeThreads_;
|
||||
}
|
||||
|
||||
void setSpawned(bool spawned) {
|
||||
spawned_ = spawned;
|
||||
}
|
||||
|
||||
void setSupervisorMode(bool supervisorMode) {
|
||||
supervisorMode_ = supervisorMode;
|
||||
}
|
||||
|
||||
bool getSupervisorMode() const {
|
||||
return supervisorMode_;
|
||||
}
|
||||
|
||||
void setTmask(size_t index, bool value) {
|
||||
tmask_[index] = value;
|
||||
}
|
||||
|
||||
void step(trace_inst_t *);
|
||||
|
||||
private:
|
||||
|
||||
void execute(Instr &instr, trace_inst_t *);
|
||||
|
||||
struct MemAccess {
|
||||
MemAccess(bool w, Word a)
|
||||
: wr(w), addr(a) {}
|
||||
bool wr;
|
||||
Word addr;
|
||||
};
|
||||
|
||||
std::vector<MemAccess> memAccesses_;
|
||||
|
||||
Word id_;
|
||||
bool active_;
|
||||
Core *core_;
|
||||
Word pc_;
|
||||
Word shadowPc_;
|
||||
Size activeThreads_;
|
||||
Size shadowActiveThreads_;
|
||||
std::vector<std::vector<Reg<Word>>> regFile_;
|
||||
std::vector<Reg<uint16_t>> csrs_;
|
||||
|
||||
std::vector<bool> tmask_;
|
||||
std::vector<bool> shadowTmask_;
|
||||
|
||||
Word PC_;
|
||||
ThreadMask tmask_;
|
||||
|
||||
std::vector<std::vector<Word>> iRegFile_;
|
||||
std::vector<std::vector<Word>> fRegFile_;
|
||||
std::vector<std::vector<Byte>> vRegFile_;
|
||||
std::vector<Word> csrs_;
|
||||
std::stack<DomStackEntry> domStack_;
|
||||
|
||||
std::vector<Word> shadowReg_;
|
||||
|
||||
struct vtype vtype_; // both of them are XLEN WIDE
|
||||
int vl_; // both of them are XLEN WIDE
|
||||
Word VLEN_; // total vector length
|
||||
|
||||
std::vector<std::vector<Reg<char *>>> vregFile_; // 32 vector registers
|
||||
|
||||
bool interruptEnable_;
|
||||
bool shadowInterruptEnable_;
|
||||
bool supervisorMode_;
|
||||
bool shadowSupervisorMode_;
|
||||
bool spawned_;
|
||||
|
||||
struct vtype vtype_;
|
||||
int vl_;
|
||||
|
||||
unsigned long steps_;
|
||||
unsigned long insts_;
|
||||
unsigned long loads_;
|
||||
|
||||
Reference in New Issue
Block a user