integer reduction unit
This commit is contained in:
@@ -115,7 +115,7 @@
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
`define INST_OP_BITS 4
|
||||
`define INST_MOD_BITS 3
|
||||
`define INST_MOD_BITS 4
|
||||
`define INST_FMT_BITS 2
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -140,6 +140,7 @@
|
||||
`define INST_ALU_IS_BR(mod) mod[0]
|
||||
`define INST_ALU_IS_M(mod) mod[1]
|
||||
`define INST_ALU_IS_W(mod) mod[2]
|
||||
`define INST_ALU_IS_RED(mod) mod[3]
|
||||
|
||||
`define INST_BR_EQ 4'b0000
|
||||
`define INST_BR_NE 4'b0010
|
||||
@@ -176,6 +177,17 @@
|
||||
`define INST_M_SIGNED_A(op) (op[1:0] != 1)
|
||||
`define INST_M_IS_REM(op) op[1]
|
||||
|
||||
`define INST_RED_ADD 4'b0000
|
||||
`define INST_RED_ADDU 4'b1000
|
||||
`define INST_RED_MIN 4'b0001
|
||||
`define INST_RED_MINU 4'b1001
|
||||
`define INST_RED_MAX 4'b0010
|
||||
`define INST_RED_MAXU 4'b1010
|
||||
`define INST_RED_AND 4'b0011
|
||||
`define INST_RED_OR 4'b0100
|
||||
`define INST_RED_XOR 4'b0101
|
||||
`define INST_RED_BITS 4
|
||||
|
||||
`define INST_FMT_B 3'b000
|
||||
`define INST_FMT_H 3'b001
|
||||
`define INST_FMT_W 3'b010
|
||||
|
||||
@@ -33,7 +33,7 @@ module VX_alu_unit #(
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1;
|
||||
localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED;
|
||||
localparam RSP_ARB_SIZE = 2 + `EXT_M_ENABLED;
|
||||
localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS);
|
||||
|
||||
VX_execute_if #(
|
||||
@@ -60,12 +60,13 @@ module VX_alu_unit #(
|
||||
for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin
|
||||
|
||||
wire is_muldiv_op;
|
||||
wire is_reduce_op;
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) int_execute_if();
|
||||
|
||||
assign int_execute_if.valid = execute_if[block_idx].valid && ~is_muldiv_op;
|
||||
assign int_execute_if.valid = execute_if[block_idx].valid && ~is_muldiv_op && ~is_reduce_op;
|
||||
assign int_execute_if.data = execute_if[block_idx].data;
|
||||
|
||||
VX_commit_if #(
|
||||
@@ -86,6 +87,31 @@ module VX_alu_unit #(
|
||||
.commit_if (int_commit_if)
|
||||
);
|
||||
|
||||
assign is_reduce_op = `INST_ALU_IS_RED(execute_if[block_idx].data.op_mod);
|
||||
|
||||
VX_execute_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) red_execute_if();
|
||||
|
||||
assign red_execute_if.valid = execute_if[block_idx].valid && is_reduce_op;
|
||||
assign red_execute_if.data = execute_if[block_idx].data;
|
||||
|
||||
VX_commit_if #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) red_commit_if();
|
||||
|
||||
`RESET_RELAY(red_reset, reset);
|
||||
|
||||
VX_reduce_unit #(
|
||||
.CORE_ID(CORE_ID),
|
||||
.NUM_LANES(NUM_LANES)
|
||||
) reduce_unit (
|
||||
.clk(clk),
|
||||
.reset(red_reset),
|
||||
.execute_if(red_execute_if),
|
||||
.commit_if(red_commit_if)
|
||||
);
|
||||
|
||||
`ifdef EXT_M_ENABLE
|
||||
|
||||
assign is_muldiv_op = `INST_ALU_IS_M(execute_if[block_idx].data.op_mod);
|
||||
@@ -96,7 +122,7 @@ module VX_alu_unit #(
|
||||
.NUM_LANES (NUM_LANES)
|
||||
) mdv_execute_if();
|
||||
|
||||
assign mdv_execute_if.valid = execute_if[block_idx].valid && is_muldiv_op;
|
||||
assign mdv_execute_if.valid = execute_if[block_idx].valid && is_muldiv_op && ~is_reduce_op;
|
||||
assign mdv_execute_if.data = execute_if[block_idx].data;
|
||||
|
||||
VX_commit_if #(
|
||||
@@ -113,12 +139,12 @@ module VX_alu_unit #(
|
||||
.commit_if (mdv_commit_if)
|
||||
);
|
||||
|
||||
assign execute_if[block_idx].ready = is_muldiv_op ? mdv_execute_if.ready : int_execute_if.ready;
|
||||
assign execute_if[block_idx].ready = is_reduce_op ? red_execute_if.ready : (is_muldiv_op ? mdv_execute_if.ready : int_execute_if.ready);
|
||||
|
||||
`else
|
||||
|
||||
assign is_muldiv_op = 0;
|
||||
assign execute_if[block_idx].ready = int_execute_if.ready;
|
||||
assign execute_if[block_idx].ready = is_reduce_op ? red_execute_if.ready : int_execute_if.ready;
|
||||
|
||||
`endif
|
||||
|
||||
@@ -135,19 +161,22 @@ module VX_alu_unit #(
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.valid,
|
||||
`endif
|
||||
int_commit_if.valid
|
||||
int_commit_if.valid,
|
||||
red_commit_if.valid
|
||||
}),
|
||||
.ready_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.ready,
|
||||
`endif
|
||||
int_commit_if.ready
|
||||
int_commit_if.ready,
|
||||
red_commit_if.ready
|
||||
}),
|
||||
.data_in ({
|
||||
`ifdef EXT_M_ENABLE
|
||||
mdv_commit_if.data,
|
||||
`endif
|
||||
int_commit_if.data
|
||||
int_commit_if.data,
|
||||
red_commit_if.data
|
||||
}),
|
||||
.data_out (commit_block_if[block_idx].data),
|
||||
.valid_out (commit_block_if[block_idx].valid),
|
||||
|
||||
@@ -505,6 +505,34 @@ module VX_decode #(
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
`INST_EXT3: begin
|
||||
ex_type = `EX_ALU;
|
||||
op_mod[3] = 1;
|
||||
`USED_IREG(rs1);
|
||||
`USED_IREG(rd);
|
||||
|
||||
case (func7[5:0])
|
||||
6'h0: begin
|
||||
op_type = func7[6] ? `INST_RED_ADDU : `INST_RED_ADD;
|
||||
end
|
||||
6'h1: begin
|
||||
op_type = func7[6] ? `INST_RED_MINU : `INST_RED_MIN;
|
||||
end
|
||||
6'h2: begin
|
||||
op_type = func7[6] ? `INST_RED_MAXU : `INST_RED_MAX;
|
||||
end
|
||||
6'h3: begin
|
||||
op_type = `INST_RED_AND;
|
||||
end
|
||||
6'h4: begin
|
||||
op_type = `INST_RED_OR;
|
||||
end
|
||||
6'h5: begin
|
||||
op_type = `INST_RED_XOR;
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
default:;
|
||||
endcase
|
||||
end
|
||||
|
||||
283
hw/rtl/core/VX_reduce_unit.sv
Normal file
283
hw/rtl/core/VX_reduce_unit.sv
Normal file
@@ -0,0 +1,283 @@
|
||||
`include "VX_define.vh"
|
||||
`include "VX_platform.vh"
|
||||
|
||||
|
||||
// Copyright © 2019-2023
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
|
||||
`include "VX_platform.vh"
|
||||
|
||||
module VX_reduce_ext #(
|
||||
parameter DATAW_IN = 1,
|
||||
parameter DATAW_OUT = DATAW_IN,
|
||||
parameter N = 1
|
||||
) (
|
||||
input wire [N-1:0][DATAW_IN-1:0] data_in,
|
||||
input wire [N-1:0] mask,
|
||||
input wire [`INST_RED_BITS-1:0] op_type,
|
||||
output wire [DATAW_OUT-1:0] data_out
|
||||
);
|
||||
if (N == 1) begin
|
||||
`UNUSED_VAR(op_type)
|
||||
`UNUSED_VAR(mask)
|
||||
assign data_out = DATAW_OUT'(data_in[0]);
|
||||
end else begin
|
||||
localparam int N_A = N / 2;
|
||||
localparam int N_B = N - N_A;
|
||||
|
||||
wire [N_A-1:0][DATAW_IN-1:0] in_A;
|
||||
wire [N_B-1:0][DATAW_IN-1:0] in_B;
|
||||
wire [DATAW_OUT-1:0] out_A, out_B;
|
||||
|
||||
wire [N_A-1:0] mask_A;
|
||||
wire [N_B-1:0] mask_B;
|
||||
wire any_A, any_B;
|
||||
|
||||
for (genvar i = 0; i < N_A; i++) begin
|
||||
assign in_A[i] = data_in[i];
|
||||
end
|
||||
|
||||
for (genvar i = 0; i < N_B; i++) begin
|
||||
assign in_B[i] = data_in[N_A + i];
|
||||
end
|
||||
|
||||
assign mask_A = mask[N_A-1:0];
|
||||
assign mask_B = mask[N-1:N_A];
|
||||
assign any_A = |mask_A;
|
||||
assign any_B = |mask_B;
|
||||
|
||||
VX_reduce_ext #(
|
||||
.DATAW_IN (DATAW_IN),
|
||||
.DATAW_OUT (DATAW_OUT),
|
||||
.N (N_A)
|
||||
) reduce_A (
|
||||
.data_in (in_A),
|
||||
.mask(mask_A),
|
||||
.op_type(op_type),
|
||||
.data_out (out_A)
|
||||
);
|
||||
|
||||
VX_reduce_ext #(
|
||||
.DATAW_IN (DATAW_IN),
|
||||
.DATAW_OUT (DATAW_OUT),
|
||||
.N (N_B)
|
||||
) reduce_B (
|
||||
.data_in (in_B),
|
||||
.mask(mask_B),
|
||||
.op_type(op_type),
|
||||
.data_out (out_B)
|
||||
);
|
||||
|
||||
logic [DATAW_OUT-1:0] _data_out;
|
||||
|
||||
always @(*) begin
|
||||
case (op_type)
|
||||
`INST_RED_ADD: _data_out = out_A + out_B;
|
||||
`INST_RED_ADDU: _data_out = out_A + out_B;
|
||||
`INST_RED_MIN: _data_out = ($signed(out_A) < $signed(out_B)) ? out_A : out_B;
|
||||
`INST_RED_MINU: _data_out = (out_A < out_B) ? out_A : out_B;
|
||||
`INST_RED_MAX: _data_out = ($signed(out_A) < $signed(out_B)) ? out_B : out_A;
|
||||
`INST_RED_MAXU: _data_out = (out_A < out_B) ? out_B : out_A;
|
||||
`INST_RED_AND: _data_out = out_A & out_B;
|
||||
`INST_RED_OR: _data_out = out_A | out_B;
|
||||
`INST_RED_XOR: _data_out = out_A ^ out_B;
|
||||
default: _data_out = out_A;
|
||||
endcase
|
||||
end
|
||||
|
||||
// if both sides are masked out, then it doesn't matter what we output
|
||||
assign data_out = (any_A && any_B) ? _data_out : (any_A ? out_A : out_B);
|
||||
|
||||
end
|
||||
|
||||
endmodule
|
||||
|
||||
module VX_reduce_unit #(
|
||||
parameter CORE_ID = 0,
|
||||
parameter NUM_LANES = 1
|
||||
) (
|
||||
input wire clk,
|
||||
input wire reset,
|
||||
|
||||
VX_execute_if.slave execute_if,
|
||||
VX_commit_if.master commit_if
|
||||
);
|
||||
`UNUSED_PARAM(CORE_ID)
|
||||
|
||||
localparam NUM_PACKETS = `NUM_THREADS / NUM_LANES;
|
||||
localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES);
|
||||
localparam PID_WIDTH = `UP(PID_BITS);
|
||||
|
||||
logic [`XLEN-1:0] accumulator, accumulator_n, reduced_accumulator;
|
||||
wire [(NUM_LANES * `XLEN)-1:0] broadcasted_accumulator;
|
||||
|
||||
assign broadcasted_accumulator = {NUM_LANES{accumulator}};
|
||||
|
||||
wire eop;
|
||||
wire [NUM_LANES-1:0][`XLEN-1:0] data_in;
|
||||
wire [`XLEN-1:0] data_out;
|
||||
|
||||
assign eop = execute_if.data.eop;
|
||||
assign data_in = execute_if.data.rs1_data;
|
||||
|
||||
logic execute_if_valid;
|
||||
logic execute_if_ready;
|
||||
logic commit_if_valid;
|
||||
logic commit_if_ready;
|
||||
|
||||
wire execute_if_fire;
|
||||
wire commit_if_fire;
|
||||
|
||||
assign execute_if_valid = execute_if.valid;
|
||||
assign execute_if.ready = execute_if_ready;
|
||||
|
||||
assign execute_if_fire = execute_if.ready && execute_if.valid;
|
||||
assign commit_if_fire = commit_if_ready && commit_if_valid;
|
||||
|
||||
logic store_tmask_pid;
|
||||
logic read_tmask_pid;
|
||||
wire [PID_WIDTH-1:0] stored_pid;
|
||||
wire [NUM_LANES-1:0] stored_tmask;
|
||||
wire stored_sop;
|
||||
wire stored_eop;
|
||||
|
||||
logic [PID_BITS:0] size, size_n;
|
||||
|
||||
// 1. idle state - wait for execute_if to be valid
|
||||
// 2. accumulate - continue accumulating until eop, store packet id + thread mask for broadcast phase
|
||||
// 3. broadcast - broadcast to rds
|
||||
localparam IDLE = 2'b00;
|
||||
localparam ACCUMULATE = 2'b01;
|
||||
localparam BROADCAST = 2'b10;
|
||||
localparam FINISH = 2'b11;
|
||||
|
||||
logic [1:0] state, state_n;
|
||||
|
||||
always @(*) begin
|
||||
state_n = state;
|
||||
accumulator_n = accumulator;
|
||||
execute_if_ready = '0;
|
||||
commit_if_valid = '0;
|
||||
store_tmask_pid = '0;
|
||||
read_tmask_pid = '0;
|
||||
size_n = store_tmask_pid ? size + 1 : (read_tmask_pid ? size - 1 : size);
|
||||
|
||||
case (state)
|
||||
IDLE: begin
|
||||
if (execute_if_valid) begin
|
||||
accumulator_n = data_out;
|
||||
store_tmask_pid = '1;
|
||||
if (eop) begin
|
||||
state_n = BROADCAST;
|
||||
end
|
||||
else begin
|
||||
execute_if_ready = '1;
|
||||
state_n = ACCUMULATE;
|
||||
end
|
||||
end
|
||||
end
|
||||
ACCUMULATE: begin
|
||||
execute_if_ready = '1;
|
||||
if (eop) begin
|
||||
execute_if_ready = '0;
|
||||
state_n = BROADCAST;
|
||||
end
|
||||
if (eop || execute_if_fire) begin
|
||||
accumulator_n = reduced_accumulator;
|
||||
store_tmask_pid = '1;
|
||||
end
|
||||
end
|
||||
BROADCAST: begin
|
||||
execute_if_ready = '0;
|
||||
commit_if_valid = '1;
|
||||
|
||||
if (commit_if_fire) begin
|
||||
read_tmask_pid = '1;
|
||||
end
|
||||
if (size_n == '0) begin
|
||||
state_n = FINISH;
|
||||
end
|
||||
end
|
||||
FINISH: begin
|
||||
execute_if_ready = '1;
|
||||
if (execute_if_fire) begin
|
||||
state_n = IDLE;
|
||||
end
|
||||
end
|
||||
endcase
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
if (reset) begin
|
||||
accumulator <= '0;
|
||||
state <= IDLE;
|
||||
size <= '0;
|
||||
end
|
||||
else begin
|
||||
accumulator <= accumulator_n;
|
||||
state <= state_n;
|
||||
size <= size_n;
|
||||
end
|
||||
end
|
||||
|
||||
VX_reduce_ext #(
|
||||
.DATAW_IN(`XLEN),
|
||||
.N(NUM_LANES)
|
||||
) reducer (
|
||||
.data_in(data_in),
|
||||
.mask(execute_if.data.tmask),
|
||||
.op_type(execute_if.data.op_type),
|
||||
.data_out(data_out)
|
||||
);
|
||||
|
||||
VX_reduce_ext #(
|
||||
.DATAW_IN(`XLEN),
|
||||
.N(2)
|
||||
) accumulator_reducer (
|
||||
.data_in({accumulator, data_out}),
|
||||
.mask(2'b11),
|
||||
.op_type(execute_if.data.op_type),
|
||||
.data_out(reduced_accumulator)
|
||||
);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW(NUM_LANES + PID_WIDTH + 1 + 1),
|
||||
.SIZE(NUM_PACKETS),
|
||||
) tmask_pid_store (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
|
||||
.valid_in(store_tmask_pid),
|
||||
`UNUSED_PIN(ready_in),
|
||||
.data_in({execute_if.data.tmask, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}),
|
||||
|
||||
.data_out({stored_tmask, stored_pid, stored_sop, stored_eop}),
|
||||
.ready_out(read_tmask_pid),
|
||||
`UNUSED_PIN(valid_out)
|
||||
);
|
||||
|
||||
VX_elastic_buffer #(
|
||||
.DATAW(`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + (`XLEN * NUM_LANES) + PID_WIDTH + 1 + 1)
|
||||
) output_buffer (
|
||||
.clk(clk),
|
||||
.reset(reset),
|
||||
.valid_in(commit_if_valid),
|
||||
.ready_in(commit_if_ready),
|
||||
.data_in({execute_if.data.uuid, execute_if.data.wid, stored_tmask, execute_if.data.PC, execute_if.data.wb, execute_if.data.rd, broadcasted_accumulator, stored_pid, stored_sop, stored_eop}),
|
||||
|
||||
.data_out({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.wb, commit_if.data.rd, commit_if.data.data, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}),
|
||||
.ready_out(commit_if.ready),
|
||||
.valid_out(commit_if.valid)
|
||||
);
|
||||
|
||||
endmodule
|
||||
@@ -1,19 +1,23 @@
|
||||
all:
|
||||
$(MAKE) -C conform
|
||||
$(MAKE) -C hello
|
||||
$(MAKE) -C fibonacci
|
||||
$(MAKE) -C fibonacci
|
||||
$(MAKE) -C reductions
|
||||
|
||||
run-simx:
|
||||
$(MAKE) -C conform run-simx
|
||||
$(MAKE) -C hello run-simx
|
||||
$(MAKE) -C fibonacci run-simx
|
||||
$(MAKE) -C reductions run-simx
|
||||
|
||||
run-rtlsim:
|
||||
$(MAKE) -C conform run-rtlsim
|
||||
$(MAKE) -C hello run-rtlsim
|
||||
$(MAKE) -C fibonacci run-rtlsim
|
||||
$(MAKE) -C reductions run-rtlsim
|
||||
|
||||
clean:
|
||||
$(MAKE) -C conform clean
|
||||
$(MAKE) -C hello clean
|
||||
$(MAKE) -C fibonacci clean
|
||||
$(MAKE) -C reductions clean
|
||||
|
||||
5
tests/kernel/reductions/Makefile
Normal file
5
tests/kernel/reductions/Makefile
Normal file
@@ -0,0 +1,5 @@
|
||||
PROJECT = reductions
|
||||
|
||||
SRCS = main.cpp
|
||||
|
||||
include ../common.mk
|
||||
216
tests/kernel/reductions/main.cpp
Normal file
216
tests/kernel/reductions/main.cpp
Normal file
@@ -0,0 +1,216 @@
|
||||
#define RISCV_CUSTOM2 0x5B
|
||||
#define ADD_FUNC7 0b0000000
|
||||
#define ADDU_FUNC7 0b1000000
|
||||
#define MIN_FUNC7 0b0000001
|
||||
#define MINU_FUNC7 0b1000001
|
||||
#define MAX_FUNC7 0b0000010
|
||||
#define MAXU_FUNC7 0b1000010
|
||||
#define AND_FUNC7 0b0000011
|
||||
#define OR_FUNC7 0b0000100
|
||||
#define XOR_FUNC7 0b0000101
|
||||
|
||||
/*
|
||||
6'h0: begin
|
||||
op_type = func7[6] ? `INST_RED_ADDU : `INST_RED_ADD;
|
||||
end
|
||||
6'h1: begin
|
||||
op_type = func7[6] ? `INST_RED_MINU : `INST_RED_MIN;
|
||||
end
|
||||
6'h2: begin
|
||||
op_type = func7[6] ? `INST_RED_MAXU : `INST_RED_MAX;
|
||||
end
|
||||
6'h3: begin
|
||||
op_type = `INST_RED_AND;
|
||||
end
|
||||
6'h4: begin
|
||||
op_type = `INST_RED_OR;
|
||||
end
|
||||
6'h5: begin
|
||||
op_type = `INST_RED_XOR;
|
||||
end
|
||||
*/
|
||||
|
||||
#include <vx_intrinsics.h>
|
||||
#include <stdio.h>
|
||||
#include <vx_print.h>
|
||||
|
||||
int x[4] = {3, 7, 2, 5};
|
||||
int y = -1;
|
||||
|
||||
inline int vx_add_reduce(int v) {
|
||||
int ret;
|
||||
asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(ADD_FUNC7));
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline int vx_min_reduce(int v) {
|
||||
int ret;
|
||||
asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(MIN_FUNC7));
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline unsigned vx_minu_reduce(unsigned v) {
|
||||
unsigned ret;
|
||||
asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(MINU_FUNC7));
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline int vx_max_reduce(int v) {
|
||||
int ret;
|
||||
asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(MAX_FUNC7));
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline unsigned vx_maxu_reduce(unsigned v) {
|
||||
unsigned ret;
|
||||
asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(MAXU_FUNC7));
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
inline unsigned vx_and_reduce(unsigned v) {
|
||||
unsigned ret;
|
||||
asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(AND_FUNC7));
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline unsigned vx_or_reduce(unsigned v) {
|
||||
unsigned ret;
|
||||
asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(OR_FUNC7));
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline unsigned vx_xor_reduce(unsigned v) {
|
||||
unsigned ret;
|
||||
asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(XOR_FUNC7));
|
||||
return ret;
|
||||
}
|
||||
|
||||
void test_add_reduce() {
|
||||
vx_tmc(-1);
|
||||
int tid = vx_thread_id();
|
||||
int v = x[tid];
|
||||
int reduced = vx_add_reduce(v);
|
||||
vx_tmc(1);
|
||||
|
||||
y = reduced;
|
||||
}
|
||||
|
||||
unsigned unsigned_vector[4] = {(unsigned)-1, 0, (unsigned)-2, 5};
|
||||
|
||||
void test_min_reduce() {
|
||||
vx_tmc(-1);
|
||||
int tid = vx_thread_id();
|
||||
int v = unsigned_vector[tid];
|
||||
int reduced = vx_min_reduce(v);
|
||||
vx_tmc(1);
|
||||
|
||||
y = reduced;
|
||||
}
|
||||
|
||||
void test_max_reduce() {
|
||||
vx_tmc(-1);
|
||||
int tid = vx_thread_id();
|
||||
int v = unsigned_vector[tid];
|
||||
int reduced = vx_max_reduce(v);
|
||||
vx_tmc(1);
|
||||
|
||||
y = reduced;
|
||||
}
|
||||
|
||||
void test_minu_reduce() {
|
||||
vx_tmc(-1);
|
||||
int tid = vx_thread_id();
|
||||
unsigned v = unsigned_vector[tid];
|
||||
unsigned reduced = vx_minu_reduce(v);
|
||||
vx_tmc(1);
|
||||
|
||||
y = reduced;
|
||||
}
|
||||
|
||||
void test_maxu_reduce() {
|
||||
vx_tmc(-1);
|
||||
int tid = vx_thread_id();
|
||||
unsigned v = unsigned_vector[tid];
|
||||
unsigned reduced = vx_maxu_reduce(v);
|
||||
vx_tmc(1);
|
||||
|
||||
y = reduced;
|
||||
}
|
||||
|
||||
unsigned bit_vectors[4] = {0b11010110000111001100010100100110, 0b10010100011010001010000000001110, 0b10001001010111110001110000000010, 0b00010011010100101101110111001111};
|
||||
|
||||
void test_and_reduce() {
|
||||
vx_tmc(-1);
|
||||
int tid = vx_thread_id();
|
||||
unsigned v = bit_vectors[tid];
|
||||
unsigned reduced = vx_and_reduce(v);
|
||||
vx_tmc(1);
|
||||
|
||||
y = reduced;
|
||||
}
|
||||
|
||||
void test_or_reduce() {
|
||||
vx_tmc(-1);
|
||||
int tid = vx_thread_id();
|
||||
unsigned v = bit_vectors[tid];
|
||||
unsigned reduced = vx_or_reduce(v);
|
||||
vx_tmc(1);
|
||||
|
||||
y = reduced;
|
||||
}
|
||||
|
||||
void test_xor_reduce() {
|
||||
vx_tmc(-1);
|
||||
int tid = vx_thread_id();
|
||||
unsigned v = bit_vectors[tid];
|
||||
unsigned reduced = vx_xor_reduce(v);
|
||||
vx_tmc(1);
|
||||
|
||||
y = reduced;
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
int expected;
|
||||
|
||||
test_add_reduce();
|
||||
vx_printf("add reduce result: %d\n", y);
|
||||
vx_printf("expected: %d\n", x[0] + x[1] + x[2] + x[3]);
|
||||
|
||||
test_min_reduce();
|
||||
vx_printf("min reduce result: %d\n", y);
|
||||
expected = MIN((int)unsigned_vector[0], MIN((int)unsigned_vector[1], MIN((int)unsigned_vector[2], (int)unsigned_vector[3])));
|
||||
vx_printf("expected: %d\n", expected);
|
||||
|
||||
test_max_reduce();
|
||||
vx_printf("max reduce result: %d\n", y);
|
||||
expected = MAX((int)unsigned_vector[0], MAX((int)unsigned_vector[1], MAX((int)unsigned_vector[2], (int)unsigned_vector[3])));
|
||||
vx_printf("expected: %d\n", expected);
|
||||
|
||||
test_minu_reduce();
|
||||
vx_printf("minu reduce result: %d\n", y);
|
||||
expected = MIN(unsigned_vector[0], MIN(unsigned_vector[1], MIN(unsigned_vector[2], unsigned_vector[3])));
|
||||
vx_printf("expected: %d\n", expected);
|
||||
|
||||
test_maxu_reduce();
|
||||
vx_printf("maxu reduce result: %d\n", y);
|
||||
expected = MAX(unsigned_vector[0], MAX(unsigned_vector[1], MAX(unsigned_vector[2], unsigned_vector[3])));
|
||||
vx_printf("expected: %d\n", expected);
|
||||
|
||||
test_and_reduce();
|
||||
vx_printf("and reduce result: %d\n", y);
|
||||
vx_printf("expected: %d\n", bit_vectors[0] & bit_vectors[1] & bit_vectors[2] & bit_vectors[3]);
|
||||
|
||||
|
||||
test_or_reduce();
|
||||
vx_printf("or reduce result: %d\n", y);
|
||||
vx_printf("expected: %d\n", bit_vectors[0] | bit_vectors[1] | bit_vectors[2] | bit_vectors[3]);
|
||||
|
||||
test_xor_reduce();
|
||||
vx_printf("xor reduce result: %d\n", y);
|
||||
vx_printf("expected: %d\n", bit_vectors[0] ^ bit_vectors[1] ^ bit_vectors[2] ^ bit_vectors[3]);
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
Reference in New Issue
Block a user