8Warp 32Threads for GTCAD synthesis

This commit is contained in:
fares
2019-11-21 23:51:11 -05:00
9 changed files with 271 additions and 87 deletions

View File

@@ -1,4 +1,3 @@
`include "VX_define.v"
module VX_alu(
@@ -13,6 +12,71 @@ module VX_alu(
);
`ifdef SYN_FUNC
wire which_in2;
wire[31:0] ALU_in1;
wire[31:0] ALU_in2;
wire[63:0] ALU_in1_mult;
wire[63:0] ALU_in2_mult;
wire[31:0] upper_immed;
wire[31:0] div_result;
wire[31:0] rem_result;
assign which_in2 = in_rs2_src == `RS2_IMMED;
assign ALU_in1 = in_1;
assign ALU_in2 = which_in2 ? in_itype_immed : in_2;
assign upper_immed = {in_upper_immed, {12{1'b0}}};
//always @(posedge `MUL) begin
/* verilator lint_off UNUSED */
wire[63:0] alu_in1_signed = {{32{ALU_in1[31]}}, ALU_in1};
wire[63:0] alu_in2_signed = {{32{ALU_in2[31]}}, ALU_in2};
assign ALU_in1_mult = (in_alu_op == `MULHU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in1} : alu_in1_signed;
assign ALU_in2_mult = (in_alu_op == `MULHU || in_alu_op == `MULHSU || in_alu_op == `DIVU || in_alu_op == `REMU) ? {32'b0, ALU_in2} : alu_in2_signed;
wire[63:0] mult_result = ALU_in1_mult * ALU_in2_mult;
/* verilator lint_on UNUSED */
always @(in_alu_op or ALU_in1 or ALU_in2) begin
case(in_alu_op)
`ADD: out_alu_result = $signed(ALU_in1) + $signed(ALU_in2);
`SUB: out_alu_result = $signed(ALU_in1) - $signed(ALU_in2);
`SLLA: out_alu_result = ALU_in1 << ALU_in2[4:0];
`SLT: out_alu_result = ($signed(ALU_in1) < $signed(ALU_in2)) ? 32'h1 : 32'h0;
`SLTU: out_alu_result = ALU_in1 < ALU_in2 ? 32'h1 : 32'h0;
`XOR: out_alu_result = ALU_in1 ^ ALU_in2;
`SRL: out_alu_result = ALU_in1 >> ALU_in2[4:0];
`SRA: out_alu_result = $signed(ALU_in1) >>> ALU_in2[4:0];
`OR: out_alu_result = ALU_in1 | ALU_in2;
`AND: out_alu_result = ALU_in2 & ALU_in1;
`SUBU: out_alu_result = (ALU_in1 >= ALU_in2) ? 32'h0 : 32'hffffffff;
`LUI_ALU: out_alu_result = upper_immed;
`AUIPC_ALU: out_alu_result = $signed(in_curr_PC) + $signed(upper_immed);
`MUL: out_alu_result = mult_result[31:0];
`MULH: out_alu_result = mult_result[63:32];
`MULHSU: out_alu_result = mult_result[63:32];
`MULHU: out_alu_result = mult_result[63:32];
`DIV: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : $signed($signed(ALU_in1) / $signed(ALU_in2));
`DIVU: out_alu_result = (ALU_in2 == 0) ? 32'hffffffff : ALU_in1 / ALU_in2;
`REM: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : $signed($signed(ALU_in1) % $signed(ALU_in2));
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2;
default: out_alu_result = 32'h0;
endcase // in_alu_op
end
`else
wire which_in2;
wire[31:0] ALU_in1;
@@ -69,7 +133,7 @@ module VX_alu(
`REMU: out_alu_result = (ALU_in2 == 0) ? ALU_in1 : ALU_in1 % ALU_in2;
default: out_alu_result = 32'h0;
endcase // in_alu_op
end
end
`endif
endmodule // VX_alu

View File

@@ -1,11 +1,10 @@
`include "./VX_define_synth.v"
`define NT 4
`define NT_M1 (`NT-1)
// NW_M1 is actually log2(NW)
//`define NW_M1 (4-1)
`define NW 8
`define NW_M1 (`CLOG2(`NW))
// Uncomment the below line if NW=1
@@ -13,6 +12,7 @@
// `define SYN 1
// `define ASIC 1
// `define SYN_FUNC 1
`define NUM_BARRIERS 4

2
rtl/VX_define_synth.v Normal file
View File

@@ -0,0 +1,2 @@
`define NT 32
`define NW 8

View File

@@ -85,83 +85,87 @@ module VX_gpr (
wire[`NT_M1:0][31:0] to_write = (VX_writeback_inter.rd != 0) ? VX_writeback_inter.write_data : 0;
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 first_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(temp_a),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_1),
.AA(VX_gpr_read.rs1),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask),
.AB(VX_writeback_inter.rd),
.DB(to_write),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
genvar curr_base_thread;
for (curr_base_thread = 0; curr_base_thread < 'NT; curr_base_thread=curr_base_thread+4)
begin
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 first_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(temp_a[(curr_base_thread+3):(curr_base_thread)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_1),
.AA(VX_gpr_read.rs1[(curr_base_thread+3):(curr_base_thread)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
.AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]),
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 second_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(temp_b),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_2),
.AA(VX_gpr_read.rs2),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask),
.AB(VX_writeback_inter.rd),
.DB(to_write),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
/* verilator lint_off PINCONNECTEMPTY */
rf2_32x128_wm1 second_ram (
.CENYA(),
.AYA(),
.CENYB(),
.WENYB(),
.AYB(),
.QA(temp_b[(curr_base_thread+3):(curr_base_thread)]),
.SOA(),
.SOB(),
.CLKA(clk),
.CENA(cena_2),
.AA(VX_gpr_read.rs2[(curr_base_thread+3):(curr_base_thread)]),
.CLKB(clk),
.CENB(cenb),
.WENB(write_bit_mask[(curr_base_thread+3):(curr_base_thread)]),
.AB(VX_writeback_inter.rd[(curr_base_thread+3):(curr_base_thread)]),
.DB(to_write[(curr_base_thread+3):(curr_base_thread)]),
.EMAA(3'b011),
.EMASA(1'b0),
.EMAB(3'b011),
.TENA(1'b1),
.TCENA(1'b0),
.TAA(5'b0),
.TENB(1'b1),
.TCENB(1'b0),
.TWENB(128'b0),
.TAB(5'b0),
.TDB(128'b0),
.RET1N(1'b1),
.SIA(2'b0),
.SEA(1'b0),
.DFTRAMBYP(1'b0),
.SIB(2'b0),
.SEB(1'b0),
.COLLDISN(1'b1)
);
/* verilator lint_on PINCONNECTEMPTY */
end
`endif

View File

@@ -304,9 +304,15 @@ module VX_d_cache
// 0;
wire[1:0] byte_select = bank_addr[1:0];
wire[TAG_SIZE_END:TAG_SIZE_START] cache_tag = bank_addr[ADDR_TAG_END:ADDR_TAG_START];
`ifdef SYN_FUNC
wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = 0;
wire[IND_SIZE_END:IND_SIZE_START] cache_index = 0;
`else
wire[OFFSET_SIZE_END:OFFSET_SIZE_START] cache_offset = bank_addr[ADDR_OFFSET_END:ADDR_OFFSET_START];
wire[IND_SIZE_END:IND_SIZE_START] cache_index = bank_addr[ADDR_IND_END:ADDR_IND_START];
wire[TAG_SIZE_END:TAG_SIZE_START] cache_tag = bank_addr[ADDR_TAG_END:ADDR_TAG_START];
`endif
wire normal_valid_in = valid_per_bank[bank_id];

View File

@@ -7,6 +7,7 @@ SRC = \
vortex_dpi.cpp \
vortex_tb.v \
../VX_define.v \
../VX_define_synth.v \
../interfaces/VX_branch_response_inter.v \
../interfaces/VX_csr_req_inter.v \
../interfaces/VX_csr_wb_inter.v \