vortex/kernel/vx_include/vx_front.c


#include "vx_front.h"
#include "../vx_os/vx_back/vx_back.h"

// -------------------------- Matrix Multiplication --------------------------

static mat_mult_arg_t mat_mult_args;

void _vx_mat_mult(unsigned, unsigned);
void vx_sq_mat_mult(void * x, void * y, void * z, unsigned mat_dim)
{
	mat_mult_args.x = x;
	mat_mult_args.y = y;
	mat_mult_args.z = z;
	mat_mult_args.mat_dim = mat_dim;

	unsigned num_avail_threads = vx_available_threads();

	unsigned off = (mat_dim/num_avail_threads);

	if ((mat_dim%num_avail_threads) != 0)
	{
		off += 1;
	}

	vx_printf("Offset: ", off);


	mat_mult_args.offset = off;

	if (mat_dim >= num_avail_threads)
	{
		vx_spawnWarps(mat_dim, num_avail_threads, _vx_mat_mult, (void *) (&mat_mult_args));
	}
	else
	{
		vx_spawnWarps(mat_dim, mat_dim, _vx_mat_mult, (void *) (&mat_mult_args));
	}

	unsigned num_avail_warps = vx_available_warps();

	if (mat_dim > num_avail_warps)
	{
		vx_wait_for_warps(num_avail_warps);
	}
	else
	{
		vx_wait_for_warps(mat_dim);
	}
}

void _vx_mat_mult(unsigned tid, unsigned wid)
{
	mat_mult_arg_t * args = (mat_mult_arg_t *) vx_get_arg_struct();

	unsigned * x_ptr = args->x;
	unsigned * y_ptr = args->y;
	unsigned * z_ptr = args->z;

	unsigned off = args->offset;

	unsigned i_index = off * tid;

	if (off == 0)
	{
		off = 1;
		i_index = tid;
	}

	unsigned mat_dim = args->mat_dim;

	for (int iter = 0; iter < off; ++iter)
	{
		unsigned total = 0;
		for (unsigned place = 0; place < mat_dim; ++place)
		{
			unsigned x_i = (wid * mat_dim)   + place;
			unsigned y_i = (mat_dim * place) + i_index;

			total += (x_ptr[x_i] * y_ptr[y_i]);
		}

			int final_i = (wid * mat_dim) + i_index;
			unsigned cond = i_index < mat_dim;
			__if(cond)
			{
				z_ptr[final_i] = total;
				i_index++;
			}
			__else
			__end_if
	}

	// for (int z = 0; z < ((1000 * wid) + 1000); z++);
	return;
}


static mat_r_arg_t mat_r_args;
// -------------------------- Matrix Addition --------------------------
void _vx_mat_add(unsigned, unsigned);
void vx_mat_add(void * x, void * y, void * z, unsigned num_rows, unsigned num_cols)
{
	mat_r_args.x        = x;
	mat_r_args.y        = y;
	mat_r_args.z        = z;
	mat_r_args.num_cols = num_cols;
	mat_r_args.num_rows = num_rows;


	unsigned num_avail_threads = vx_available_threads();

	unsigned off = (num_cols/num_avail_threads);

	if ((num_cols%num_avail_threads) != 0)
	{
		off += 1;
	}


	mat_r_args.offset = off;

	if (num_cols >= num_avail_threads)
	{
		vx_spawnWarps(num_rows, num_avail_threads, _vx_mat_add, (void *) (&mat_r_args));
	}
	else
	{
		vx_spawnWarps(num_rows, num_cols, _vx_mat_add, (void *) (&mat_r_args));
	}

	unsigned num_avail_warps = vx_available_warps();

	if (num_rows > num_avail_warps)
	{
		vx_wait_for_warps(num_avail_warps);
	}
	else
	{
		vx_wait_for_warps(num_rows);
	}
}

void _vx_mat_add(unsigned tid, unsigned wid)
{
	// vx_print_str("*");
	// for (int z = 0; z < ((wid * 1000) + 1000); z++);

	mat_r_arg_t * args = (mat_r_arg_t *) vx_get_arg_struct();

	unsigned * x_ptr = args->x;
	unsigned * y_ptr = args->y;
	unsigned * z_ptr = args->z;

	unsigned off = args->offset;

	unsigned i_index = off * tid;

	if (off == 0)
	{
		off = 1;
		i_index = tid;
	}

	unsigned num_cols = args->num_cols;

	for (int iter = 0; iter < off; ++iter)
	{
		int final_i = (wid * num_cols) + i_index;
		unsigned cond = i_index < num_cols;
		__if(cond)
		{
			z_ptr[final_i] = x_ptr[final_i] + y_ptr[final_i];
			i_index++;
		}
		__else
		__end_if
	}
	return;

}


// -------------------------- Matrix Subtraction --------------------------
void _vx_mat_sub(unsigned, unsigned);
void vx_mat_sub(void * x, void * y, void * z, unsigned num_rows, unsigned num_cols)
{
	mat_r_args.x        = x;
	mat_r_args.y        = y;
	mat_r_args.z        = z;
	mat_r_args.num_cols = num_cols;
	mat_r_args.num_rows = num_rows;

	unsigned num_avail_threads = vx_available_threads();

	unsigned off = (num_cols/num_avail_threads);

	if ((num_cols%num_avail_threads) != 0)
	{
		off += 1;
	}


	mat_r_args.offset = off;

	if (num_cols >= num_avail_threads)
	{
		vx_spawnWarps(num_rows, num_avail_threads, _vx_mat_sub, (void *) (&mat_r_args));
	}
	else
	{
		vx_spawnWarps(num_rows, num_cols, _vx_mat_sub, (void *) (&mat_r_args));
	}

	unsigned num_avail_warps = vx_available_warps();

	if (num_rows > num_avail_warps)
	{
		vx_wait_for_warps(num_avail_warps);
	}
	else
	{
		vx_wait_for_warps(num_rows);
	}
}

void _vx_mat_sub(unsigned tid, unsigned wid)
{
	// vx_print_str("*");
	// for (int z = 0; z < ((wid * 1000) + 1000); z++);

	mat_r_arg_t * args = (mat_r_arg_t *) vx_get_arg_struct();

	unsigned * x_ptr = args->x;
	unsigned * y_ptr = args->y;
	unsigned * z_ptr = args->z;

	unsigned off = args->offset;

	unsigned i_index = off * tid;

	if (off == 0)
	{
		off = 1;
		i_index = tid;
	}

	unsigned num_cols = args->num_cols;

	for (int iter = 0; iter < off; ++iter)
	{
		int final_i = (wid * num_cols) + i_index;
		unsigned cond = i_index < num_cols;
		__if(cond)
		{
			z_ptr[final_i] = x_ptr[final_i] - y_ptr[final_i];
			i_index++;
		}
		__else
		__end_if
	}
	return;

}


static mat_e_arg_t mat_e_args;
// --------------------------------------------------------------

void _vx_e_mat_add(unsigned, unsigned);
void vx_e_mat_add(void * x, void * scal, void * z, unsigned num_rows, unsigned num_cols)
{
	mat_e_args.x        = x;
	mat_e_args.scal     = scal;
	mat_e_args.z        = z;
	mat_e_args.num_cols = num_cols;
	mat_e_args.num_rows = num_rows;


	unsigned num_avail_threads = vx_available_threads();

	unsigned off = (num_cols/num_avail_threads);

	if ((num_cols%num_avail_threads) != 0)
	{
		off += 1;
	}

	mat_e_args.offset = off;

	if (num_cols >= num_avail_threads)
	{
		vx_spawnWarps(num_rows, num_avail_threads, _vx_e_mat_add, (void *) (&mat_e_args));
	}
	else
	{
		vx_spawnWarps(num_rows, num_cols, _vx_e_mat_add, (void *) (&mat_e_args));
	}

	unsigned num_avail_warps = vx_available_warps();

	if (num_rows > num_avail_warps)
	{
		vx_wait_for_warps(num_avail_warps);
	}
	else
	{
		vx_wait_for_warps(num_rows);
	}
}

void _vx_e_mat_add(unsigned tid, unsigned wid)
{
	// vx_print_str("*");
	// for (int z = 0; z < ((wid * 1000) + 1000); z++);

	mat_e_arg_t * args = (mat_e_arg_t *) vx_get_arg_struct();

	unsigned * x_ptr = args->x;
	unsigned scal    = *((unsigned *) args->scal);

	unsigned * z_ptr = args->z;

	unsigned off = args->offset;

	unsigned i_index = off * tid;

	if (off == 0)
	{
		off = 1;
		i_index = tid;
	}

	unsigned num_cols = args->num_cols;

	for (int iter = 0; iter < off; ++iter)
	{
		int final_i = (wid * num_cols) + i_index;
		unsigned cond = i_index < num_cols;
		__if(cond)
		{
			z_ptr[final_i] = x_ptr[final_i] + scal;
			i_index++;
		}
		__else
		__end_if
	}
	return;

}

void _vx_e_mat_mult(unsigned, unsigned);
void vx_e_mat_mult(void * x, void * scal, void * z, unsigned num_rows, unsigned num_cols)
{
	mat_e_args.x        = x;
	mat_e_args.scal     = scal;
	mat_e_args.z        = z;
	mat_e_args.num_cols = num_cols;
	mat_e_args.num_rows = num_rows;


	unsigned num_avail_threads = vx_available_threads();

	unsigned off = (num_cols/num_avail_threads);

	if ((num_cols%num_avail_threads) != 0)
	{
		off += 1;
	}

	mat_e_args.offset = off;

	if (num_cols >= num_avail_threads)
	{
		vx_spawnWarps(num_rows, num_avail_threads, _vx_e_mat_mult, (void *) (&mat_e_args));
	}
	else
	{
		vx_spawnWarps(num_rows, num_cols, _vx_e_mat_mult, (void *) (&mat_e_args));
	}

	unsigned num_avail_warps = vx_available_warps();

	if (num_rows > num_avail_warps)
	{
		vx_wait_for_warps(num_avail_warps);
	}
	else
	{
		vx_wait_for_warps(num_rows);
	}
}

void _vx_e_mat_mult(unsigned tid, unsigned wid)
{
	// vx_print_str("*");
	// for (int z = 0; z < ((wid * 1000) + 1000); z++);

	mat_e_arg_t * args = (mat_e_arg_t *) vx_get_arg_struct();

	unsigned * x_ptr = args->x;
	unsigned scal    = *((unsigned *) args->scal);

	unsigned * z_ptr = args->z;

	unsigned off = args->offset;

	unsigned i_index = off * tid;

	if (off == 0)
	{
		off = 1;
		i_index = tid;
	}

	unsigned num_cols = args->num_cols;

	for (int iter = 0; iter < off; ++iter)
	{
		int final_i = (wid * num_cols) + i_index;
		unsigned cond = i_index < num_cols;
		__if(cond)
		{
			z_ptr[final_i] = x_ptr[final_i] * scal;
			i_index++;
		}
		__else
		__end_if
	}
	return;

}