diff --git a/hw/rtl/VX_define.vh b/hw/rtl/VX_define.vh index 996c769d..9ddeeeea 100644 --- a/hw/rtl/VX_define.vh +++ b/hw/rtl/VX_define.vh @@ -115,7 +115,7 @@ /////////////////////////////////////////////////////////////////////////////// `define INST_OP_BITS 4 -`define INST_MOD_BITS 3 +`define INST_MOD_BITS 4 `define INST_FMT_BITS 2 /////////////////////////////////////////////////////////////////////////////// @@ -140,6 +140,7 @@ `define INST_ALU_IS_BR(mod) mod[0] `define INST_ALU_IS_M(mod) mod[1] `define INST_ALU_IS_W(mod) mod[2] +`define INST_ALU_IS_RED(mod) mod[3] `define INST_BR_EQ 4'b0000 `define INST_BR_NE 4'b0010 @@ -176,6 +177,17 @@ `define INST_M_SIGNED_A(op) (op[1:0] != 1) `define INST_M_IS_REM(op) op[1] +`define INST_RED_ADD 4'b0000 +`define INST_RED_ADDU 4'b1000 +`define INST_RED_MIN 4'b0001 +`define INST_RED_MINU 4'b1001 +`define INST_RED_MAX 4'b0010 +`define INST_RED_MAXU 4'b1010 +`define INST_RED_AND 4'b0011 +`define INST_RED_OR 4'b0100 +`define INST_RED_XOR 4'b0101 +`define INST_RED_BITS 4 + `define INST_FMT_B 3'b000 `define INST_FMT_H 3'b001 `define INST_FMT_W 3'b010 diff --git a/hw/rtl/core/VX_alu_unit.sv b/hw/rtl/core/VX_alu_unit.sv index d2b38cf4..7546f4b3 100644 --- a/hw/rtl/core/VX_alu_unit.sv +++ b/hw/rtl/core/VX_alu_unit.sv @@ -33,7 +33,7 @@ module VX_alu_unit #( localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); localparam PID_WIDTH = `UP(PID_BITS); localparam RSP_ARB_DATAW= `UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + `NR_BITS + 1 + NUM_LANES * `XLEN + PID_WIDTH + 1 + 1; - localparam RSP_ARB_SIZE = 1 + `EXT_M_ENABLED; + localparam RSP_ARB_SIZE = 2 + `EXT_M_ENABLED; localparam PARTIAL_BW = (BLOCK_SIZE != `ISSUE_WIDTH) || (NUM_LANES != `NUM_THREADS); VX_execute_if #( @@ -60,12 +60,13 @@ module VX_alu_unit #( for (genvar block_idx = 0; block_idx < BLOCK_SIZE; ++block_idx) begin wire is_muldiv_op; + wire is_reduce_op; VX_execute_if #( .NUM_LANES (NUM_LANES) ) int_execute_if(); - assign int_execute_if.valid = execute_if[block_idx].valid && ~is_muldiv_op; + assign int_execute_if.valid = execute_if[block_idx].valid && ~is_muldiv_op && ~is_reduce_op; assign int_execute_if.data = execute_if[block_idx].data; VX_commit_if #( @@ -86,6 +87,31 @@ module VX_alu_unit #( .commit_if (int_commit_if) ); + assign is_reduce_op = `INST_ALU_IS_RED(execute_if[block_idx].data.op_mod); + + VX_execute_if #( + .NUM_LANES (NUM_LANES) + ) red_execute_if(); + + assign red_execute_if.valid = execute_if[block_idx].valid && is_reduce_op; + assign red_execute_if.data = execute_if[block_idx].data; + + VX_commit_if #( + .NUM_LANES (NUM_LANES) + ) red_commit_if(); + + `RESET_RELAY(red_reset, reset); + + VX_reduce_unit #( + .CORE_ID(CORE_ID), + .NUM_LANES(NUM_LANES) + ) reduce_unit ( + .clk(clk), + .reset(red_reset), + .execute_if(red_execute_if), + .commit_if(red_commit_if) + ); + `ifdef EXT_M_ENABLE assign is_muldiv_op = `INST_ALU_IS_M(execute_if[block_idx].data.op_mod); @@ -96,7 +122,7 @@ module VX_alu_unit #( .NUM_LANES (NUM_LANES) ) mdv_execute_if(); - assign mdv_execute_if.valid = execute_if[block_idx].valid && is_muldiv_op; + assign mdv_execute_if.valid = execute_if[block_idx].valid && is_muldiv_op && ~is_reduce_op; assign mdv_execute_if.data = execute_if[block_idx].data; VX_commit_if #( @@ -113,12 +139,12 @@ module VX_alu_unit #( .commit_if (mdv_commit_if) ); - assign execute_if[block_idx].ready = is_muldiv_op ? mdv_execute_if.ready : int_execute_if.ready; + assign execute_if[block_idx].ready = is_reduce_op ? red_execute_if.ready : (is_muldiv_op ? mdv_execute_if.ready : int_execute_if.ready); `else assign is_muldiv_op = 0; - assign execute_if[block_idx].ready = int_execute_if.ready; + assign execute_if[block_idx].ready = is_reduce_op ? red_execute_if.ready : int_execute_if.ready; `endif @@ -135,19 +161,22 @@ module VX_alu_unit #( `ifdef EXT_M_ENABLE mdv_commit_if.valid, `endif - int_commit_if.valid + int_commit_if.valid, + red_commit_if.valid }), .ready_in ({ `ifdef EXT_M_ENABLE mdv_commit_if.ready, `endif - int_commit_if.ready + int_commit_if.ready, + red_commit_if.ready }), .data_in ({ `ifdef EXT_M_ENABLE mdv_commit_if.data, `endif - int_commit_if.data + int_commit_if.data, + red_commit_if.data }), .data_out (commit_block_if[block_idx].data), .valid_out (commit_block_if[block_idx].valid), diff --git a/hw/rtl/core/VX_decode.sv b/hw/rtl/core/VX_decode.sv index 0a6b00ec..42cd7ffc 100644 --- a/hw/rtl/core/VX_decode.sv +++ b/hw/rtl/core/VX_decode.sv @@ -505,6 +505,34 @@ module VX_decode #( default:; endcase end + `INST_EXT3: begin + ex_type = `EX_ALU; + op_mod[3] = 1; + `USED_IREG(rs1); + `USED_IREG(rd); + + case (func7[5:0]) + 6'h0: begin + op_type = func7[6] ? `INST_RED_ADDU : `INST_RED_ADD; + end + 6'h1: begin + op_type = func7[6] ? `INST_RED_MINU : `INST_RED_MIN; + end + 6'h2: begin + op_type = func7[6] ? `INST_RED_MAXU : `INST_RED_MAX; + end + 6'h3: begin + op_type = `INST_RED_AND; + end + 6'h4: begin + op_type = `INST_RED_OR; + end + 6'h5: begin + op_type = `INST_RED_XOR; + end + default:; + endcase + end default:; endcase end diff --git a/hw/rtl/core/VX_reduce_unit.sv b/hw/rtl/core/VX_reduce_unit.sv new file mode 100644 index 00000000..37610bcf --- /dev/null +++ b/hw/rtl/core/VX_reduce_unit.sv @@ -0,0 +1,283 @@ +`include "VX_define.vh" +`include "VX_platform.vh" + + +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +module VX_reduce_ext #( + parameter DATAW_IN = 1, + parameter DATAW_OUT = DATAW_IN, + parameter N = 1 +) ( + input wire [N-1:0][DATAW_IN-1:0] data_in, + input wire [N-1:0] mask, + input wire [`INST_RED_BITS-1:0] op_type, + output wire [DATAW_OUT-1:0] data_out +); + if (N == 1) begin + `UNUSED_VAR(op_type) + `UNUSED_VAR(mask) + assign data_out = DATAW_OUT'(data_in[0]); + end else begin + localparam int N_A = N / 2; + localparam int N_B = N - N_A; + + wire [N_A-1:0][DATAW_IN-1:0] in_A; + wire [N_B-1:0][DATAW_IN-1:0] in_B; + wire [DATAW_OUT-1:0] out_A, out_B; + + wire [N_A-1:0] mask_A; + wire [N_B-1:0] mask_B; + wire any_A, any_B; + + for (genvar i = 0; i < N_A; i++) begin + assign in_A[i] = data_in[i]; + end + + for (genvar i = 0; i < N_B; i++) begin + assign in_B[i] = data_in[N_A + i]; + end + + assign mask_A = mask[N_A-1:0]; + assign mask_B = mask[N-1:N_A]; + assign any_A = |mask_A; + assign any_B = |mask_B; + + VX_reduce_ext #( + .DATAW_IN (DATAW_IN), + .DATAW_OUT (DATAW_OUT), + .N (N_A) + ) reduce_A ( + .data_in (in_A), + .mask(mask_A), + .op_type(op_type), + .data_out (out_A) + ); + + VX_reduce_ext #( + .DATAW_IN (DATAW_IN), + .DATAW_OUT (DATAW_OUT), + .N (N_B) + ) reduce_B ( + .data_in (in_B), + .mask(mask_B), + .op_type(op_type), + .data_out (out_B) + ); + + logic [DATAW_OUT-1:0] _data_out; + + always @(*) begin + case (op_type) + `INST_RED_ADD: _data_out = out_A + out_B; + `INST_RED_ADDU: _data_out = out_A + out_B; + `INST_RED_MIN: _data_out = ($signed(out_A) < $signed(out_B)) ? out_A : out_B; + `INST_RED_MINU: _data_out = (out_A < out_B) ? out_A : out_B; + `INST_RED_MAX: _data_out = ($signed(out_A) < $signed(out_B)) ? out_B : out_A; + `INST_RED_MAXU: _data_out = (out_A < out_B) ? out_B : out_A; + `INST_RED_AND: _data_out = out_A & out_B; + `INST_RED_OR: _data_out = out_A | out_B; + `INST_RED_XOR: _data_out = out_A ^ out_B; + default: _data_out = out_A; + endcase + end + + // if both sides are masked out, then it doesn't matter what we output + assign data_out = (any_A && any_B) ? _data_out : (any_A ? out_A : out_B); + + end + +endmodule + +module VX_reduce_unit #( + parameter CORE_ID = 0, + parameter NUM_LANES = 1 +) ( + input wire clk, + input wire reset, + + VX_execute_if.slave execute_if, + VX_commit_if.master commit_if +); + `UNUSED_PARAM(CORE_ID) + + localparam NUM_PACKETS = `NUM_THREADS / NUM_LANES; + localparam PID_BITS = `CLOG2(`NUM_THREADS / NUM_LANES); + localparam PID_WIDTH = `UP(PID_BITS); + + logic [`XLEN-1:0] accumulator, accumulator_n, reduced_accumulator; + wire [(NUM_LANES * `XLEN)-1:0] broadcasted_accumulator; + + assign broadcasted_accumulator = {NUM_LANES{accumulator}}; + + wire eop; + wire [NUM_LANES-1:0][`XLEN-1:0] data_in; + wire [`XLEN-1:0] data_out; + + assign eop = execute_if.data.eop; + assign data_in = execute_if.data.rs1_data; + + logic execute_if_valid; + logic execute_if_ready; + logic commit_if_valid; + logic commit_if_ready; + + wire execute_if_fire; + wire commit_if_fire; + + assign execute_if_valid = execute_if.valid; + assign execute_if.ready = execute_if_ready; + + assign execute_if_fire = execute_if.ready && execute_if.valid; + assign commit_if_fire = commit_if_ready && commit_if_valid; + + logic store_tmask_pid; + logic read_tmask_pid; + wire [PID_WIDTH-1:0] stored_pid; + wire [NUM_LANES-1:0] stored_tmask; + wire stored_sop; + wire stored_eop; + + logic [PID_BITS:0] size, size_n; + + // 1. idle state - wait for execute_if to be valid + // 2. accumulate - continue accumulating until eop, store packet id + thread mask for broadcast phase + // 3. broadcast - broadcast to rds + localparam IDLE = 2'b00; + localparam ACCUMULATE = 2'b01; + localparam BROADCAST = 2'b10; + localparam FINISH = 2'b11; + + logic [1:0] state, state_n; + + always @(*) begin + state_n = state; + accumulator_n = accumulator; + execute_if_ready = '0; + commit_if_valid = '0; + store_tmask_pid = '0; + read_tmask_pid = '0; + size_n = store_tmask_pid ? size + 1 : (read_tmask_pid ? size - 1 : size); + + case (state) + IDLE: begin + if (execute_if_valid) begin + accumulator_n = data_out; + store_tmask_pid = '1; + if (eop) begin + state_n = BROADCAST; + end + else begin + execute_if_ready = '1; + state_n = ACCUMULATE; + end + end + end + ACCUMULATE: begin + execute_if_ready = '1; + if (eop) begin + execute_if_ready = '0; + state_n = BROADCAST; + end + if (eop || execute_if_fire) begin + accumulator_n = reduced_accumulator; + store_tmask_pid = '1; + end + end + BROADCAST: begin + execute_if_ready = '0; + commit_if_valid = '1; + + if (commit_if_fire) begin + read_tmask_pid = '1; + end + if (size_n == '0) begin + state_n = FINISH; + end + end + FINISH: begin + execute_if_ready = '1; + if (execute_if_fire) begin + state_n = IDLE; + end + end + endcase + end + + always @(posedge clk) begin + if (reset) begin + accumulator <= '0; + state <= IDLE; + size <= '0; + end + else begin + accumulator <= accumulator_n; + state <= state_n; + size <= size_n; + end + end + + VX_reduce_ext #( + .DATAW_IN(`XLEN), + .N(NUM_LANES) + ) reducer ( + .data_in(data_in), + .mask(execute_if.data.tmask), + .op_type(execute_if.data.op_type), + .data_out(data_out) + ); + + VX_reduce_ext #( + .DATAW_IN(`XLEN), + .N(2) + ) accumulator_reducer ( + .data_in({accumulator, data_out}), + .mask(2'b11), + .op_type(execute_if.data.op_type), + .data_out(reduced_accumulator) + ); + + VX_elastic_buffer #( + .DATAW(NUM_LANES + PID_WIDTH + 1 + 1), + .SIZE(NUM_PACKETS), + ) tmask_pid_store ( + .clk(clk), + .reset(reset), + + .valid_in(store_tmask_pid), + `UNUSED_PIN(ready_in), + .data_in({execute_if.data.tmask, execute_if.data.pid, execute_if.data.sop, execute_if.data.eop}), + + .data_out({stored_tmask, stored_pid, stored_sop, stored_eop}), + .ready_out(read_tmask_pid), + `UNUSED_PIN(valid_out) + ); + + VX_elastic_buffer #( + .DATAW(`UUID_WIDTH + `NW_WIDTH + NUM_LANES + `XLEN + 1 + `NR_BITS + (`XLEN * NUM_LANES) + PID_WIDTH + 1 + 1) + ) output_buffer ( + .clk(clk), + .reset(reset), + .valid_in(commit_if_valid), + .ready_in(commit_if_ready), + .data_in({execute_if.data.uuid, execute_if.data.wid, stored_tmask, execute_if.data.PC, execute_if.data.wb, execute_if.data.rd, broadcasted_accumulator, stored_pid, stored_sop, stored_eop}), + + .data_out({commit_if.data.uuid, commit_if.data.wid, commit_if.data.tmask, commit_if.data.PC, commit_if.data.wb, commit_if.data.rd, commit_if.data.data, commit_if.data.pid, commit_if.data.sop, commit_if.data.eop}), + .ready_out(commit_if.ready), + .valid_out(commit_if.valid) + ); + +endmodule diff --git a/tests/kernel/Makefile b/tests/kernel/Makefile index ab4fdd07..f7c46754 100644 --- a/tests/kernel/Makefile +++ b/tests/kernel/Makefile @@ -1,19 +1,23 @@ all: $(MAKE) -C conform $(MAKE) -C hello - $(MAKE) -C fibonacci + $(MAKE) -C fibonacci + $(MAKE) -C reductions run-simx: $(MAKE) -C conform run-simx $(MAKE) -C hello run-simx $(MAKE) -C fibonacci run-simx + $(MAKE) -C reductions run-simx run-rtlsim: $(MAKE) -C conform run-rtlsim $(MAKE) -C hello run-rtlsim $(MAKE) -C fibonacci run-rtlsim + $(MAKE) -C reductions run-rtlsim clean: $(MAKE) -C conform clean $(MAKE) -C hello clean $(MAKE) -C fibonacci clean + $(MAKE) -C reductions clean diff --git a/tests/kernel/reductions/Makefile b/tests/kernel/reductions/Makefile new file mode 100644 index 00000000..76e96c46 --- /dev/null +++ b/tests/kernel/reductions/Makefile @@ -0,0 +1,5 @@ +PROJECT = reductions + +SRCS = main.cpp + +include ../common.mk diff --git a/tests/kernel/reductions/main.cpp b/tests/kernel/reductions/main.cpp new file mode 100644 index 00000000..edde1da4 --- /dev/null +++ b/tests/kernel/reductions/main.cpp @@ -0,0 +1,216 @@ +#define RISCV_CUSTOM2 0x5B +#define ADD_FUNC7 0b0000000 +#define ADDU_FUNC7 0b1000000 +#define MIN_FUNC7 0b0000001 +#define MINU_FUNC7 0b1000001 +#define MAX_FUNC7 0b0000010 +#define MAXU_FUNC7 0b1000010 +#define AND_FUNC7 0b0000011 +#define OR_FUNC7 0b0000100 +#define XOR_FUNC7 0b0000101 + +/* + 6'h0: begin + op_type = func7[6] ? `INST_RED_ADDU : `INST_RED_ADD; + end + 6'h1: begin + op_type = func7[6] ? `INST_RED_MINU : `INST_RED_MIN; + end + 6'h2: begin + op_type = func7[6] ? `INST_RED_MAXU : `INST_RED_MAX; + end + 6'h3: begin + op_type = `INST_RED_AND; + end + 6'h4: begin + op_type = `INST_RED_OR; + end + 6'h5: begin + op_type = `INST_RED_XOR; + end +*/ + +#include +#include +#include + +int x[4] = {3, 7, 2, 5}; +int y = -1; + +inline int vx_add_reduce(int v) { + int ret; + asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(ADD_FUNC7)); + return ret; +} + +inline int vx_min_reduce(int v) { + int ret; + asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(MIN_FUNC7)); + return ret; +} + +inline unsigned vx_minu_reduce(unsigned v) { + unsigned ret; + asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(MINU_FUNC7)); + return ret; +} + +inline int vx_max_reduce(int v) { + int ret; + asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(MAX_FUNC7)); + return ret; +} + +inline unsigned vx_maxu_reduce(unsigned v) { + unsigned ret; + asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(MAXU_FUNC7)); + return ret; +} + + +inline unsigned vx_and_reduce(unsigned v) { + unsigned ret; + asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(AND_FUNC7)); + return ret; +} + +inline unsigned vx_or_reduce(unsigned v) { + unsigned ret; + asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(OR_FUNC7)); + return ret; +} + +inline unsigned vx_xor_reduce(unsigned v) { + unsigned ret; + asm volatile (".insn r %2, 0, %3, %0, %1, x0" : "=r"(ret) : "r"(v), "i"(RISCV_CUSTOM2), "i"(XOR_FUNC7)); + return ret; +} + +void test_add_reduce() { + vx_tmc(-1); + int tid = vx_thread_id(); + int v = x[tid]; + int reduced = vx_add_reduce(v); + vx_tmc(1); + + y = reduced; +} + +unsigned unsigned_vector[4] = {(unsigned)-1, 0, (unsigned)-2, 5}; + +void test_min_reduce() { + vx_tmc(-1); + int tid = vx_thread_id(); + int v = unsigned_vector[tid]; + int reduced = vx_min_reduce(v); + vx_tmc(1); + + y = reduced; +} + +void test_max_reduce() { + vx_tmc(-1); + int tid = vx_thread_id(); + int v = unsigned_vector[tid]; + int reduced = vx_max_reduce(v); + vx_tmc(1); + + y = reduced; +} + +void test_minu_reduce() { + vx_tmc(-1); + int tid = vx_thread_id(); + unsigned v = unsigned_vector[tid]; + unsigned reduced = vx_minu_reduce(v); + vx_tmc(1); + + y = reduced; +} + +void test_maxu_reduce() { + vx_tmc(-1); + int tid = vx_thread_id(); + unsigned v = unsigned_vector[tid]; + unsigned reduced = vx_maxu_reduce(v); + vx_tmc(1); + + y = reduced; +} + +unsigned bit_vectors[4] = {0b11010110000111001100010100100110, 0b10010100011010001010000000001110, 0b10001001010111110001110000000010, 0b00010011010100101101110111001111}; + +void test_and_reduce() { + vx_tmc(-1); + int tid = vx_thread_id(); + unsigned v = bit_vectors[tid]; + unsigned reduced = vx_and_reduce(v); + vx_tmc(1); + + y = reduced; +} + +void test_or_reduce() { + vx_tmc(-1); + int tid = vx_thread_id(); + unsigned v = bit_vectors[tid]; + unsigned reduced = vx_or_reduce(v); + vx_tmc(1); + + y = reduced; +} + +void test_xor_reduce() { + vx_tmc(-1); + int tid = vx_thread_id(); + unsigned v = bit_vectors[tid]; + unsigned reduced = vx_xor_reduce(v); + vx_tmc(1); + + y = reduced; +} + +int main() +{ + int expected; + + test_add_reduce(); + vx_printf("add reduce result: %d\n", y); + vx_printf("expected: %d\n", x[0] + x[1] + x[2] + x[3]); + + test_min_reduce(); + vx_printf("min reduce result: %d\n", y); + expected = MIN((int)unsigned_vector[0], MIN((int)unsigned_vector[1], MIN((int)unsigned_vector[2], (int)unsigned_vector[3]))); + vx_printf("expected: %d\n", expected); + + test_max_reduce(); + vx_printf("max reduce result: %d\n", y); + expected = MAX((int)unsigned_vector[0], MAX((int)unsigned_vector[1], MAX((int)unsigned_vector[2], (int)unsigned_vector[3]))); + vx_printf("expected: %d\n", expected); + + test_minu_reduce(); + vx_printf("minu reduce result: %d\n", y); + expected = MIN(unsigned_vector[0], MIN(unsigned_vector[1], MIN(unsigned_vector[2], unsigned_vector[3]))); + vx_printf("expected: %d\n", expected); + + test_maxu_reduce(); + vx_printf("maxu reduce result: %d\n", y); + expected = MAX(unsigned_vector[0], MAX(unsigned_vector[1], MAX(unsigned_vector[2], unsigned_vector[3]))); + vx_printf("expected: %d\n", expected); + + test_and_reduce(); + vx_printf("and reduce result: %d\n", y); + vx_printf("expected: %d\n", bit_vectors[0] & bit_vectors[1] & bit_vectors[2] & bit_vectors[3]); + + + test_or_reduce(); + vx_printf("or reduce result: %d\n", y); + vx_printf("expected: %d\n", bit_vectors[0] | bit_vectors[1] | bit_vectors[2] | bit_vectors[3]); + + test_xor_reduce(); + vx_printf("xor reduce result: %d\n", y); + vx_printf("expected: %d\n", bit_vectors[0] ^ bit_vectors[1] ^ bit_vectors[2] ^ bit_vectors[3]); + + + return 0; +} \ No newline at end of file