/*
 * compute_flow.cpp
 *
 *  Created on: Sep 7, 2012
 *      Author: fogelson
 */

#include "compute_flow.h"

void export_compute_flow(){
	def("compute_flow_cpp",compute_flow);
	def("compute_flow_mp_cpp",compute_flow_mp);
	def("compute_arbitrary_flow",compute_arbitrary_flow);
	//def("compute_arbitrary_flow_mp",compute_arbitrary_flow_mp);
	def("compute_arbitrary_flow_mp",raw_function(compute_arbitrary_flow_mp,3));
}

tuple compute_arbitrary_flow(tuple points, object pf, object setup){
	int nF = extract<int>(pf.attr("nF"));
	double e = extract<double>(setup.attr("e"));
	double mu = extract<double>(setup.attr("mu"));

	bz_1_ptr x_bz_ptr, y_bz_ptr, u_bz_ptr, v_bz_ptr, p_bz_ptr;

	x_bz_ptr = extract<bz_1_ptr>(points[0]);
	y_bz_ptr = extract<bz_1_ptr>(points[1]);

	Array<double,1> & x = *x_bz_ptr.get(),
			& y = *y_bz_ptr.get();

	int NX[] = {x.length(0)};

	object u_py, v_py, p_py;
	u_py = create_array_for_py<1>(NX);
	v_py = create_array_for_py<1>(NX);
	p_py = create_array_for_py<1>(NX);

	u_bz_ptr = extract<bz_1_ptr>(u_py);
	v_bz_ptr = extract<bz_1_ptr>(v_py);
	p_bz_ptr = extract<bz_1_ptr>(p_py);

	Array<double,1> & u = *u_bz_ptr.get(),
			& v = *v_bz_ptr.get(),
			& p = *p_bz_ptr.get();

	u = 0;
	v = 0;
	p = 0;

	bz_1_ptr xF_bz_ptr, yF_bz_ptr, fF_bz_ptr, gF_bz_ptr;
	xF_bz_ptr = extract<bz_1_ptr>(pf.attr("xF"));
	yF_bz_ptr = extract<bz_1_ptr>(pf.attr("yF"));
	fF_bz_ptr = extract<bz_1_ptr>(pf.attr("fF"));
	gF_bz_ptr = extract<bz_1_ptr>(pf.attr("gF"));

	Array<double,1> & xF = *xF_bz_ptr.get(),
			& yF = *yF_bz_ptr.get(),
			& fF = *fF_bz_ptr.get(),
			& gF = *gF_bz_ptr.get();

	int N = x.length(0);

	double xMxK, yMyK, f_dot_xMxK, r2, r, root_r2Pe2, velFactor1, velFactor2;
	double e2 = pow2(e);

	int k, n;

	int chunk = 100;

	for(n = 0; n < N; n++){
		double Ucurr, Pcurr, Vcurr;
		Ucurr = 0;
		Pcurr = 0;
		Vcurr = 0;
		for(k = 0; k < nF; k++){
				xMxK = x(n) - xF(k);
				yMyK = y(n) - yF(k);

				f_dot_xMxK = fF(k)*xMxK + gF(k)*yMyK;
				r2 = pow2(xMxK) + pow2(yMyK);
				r = sqrt(r2);
				root_r2Pe2 = sqrt(r2 + e2);

				velFactor1 = log(root_r2Pe2 + e) - (e*(root_r2Pe2 + 2*e))/((root_r2Pe2 + e)*root_r2Pe2);
				velFactor2 = (root_r2Pe2 + 2*e)/((pow2(root_r2Pe2+e))*root_r2Pe2);

				Pcurr = Pcurr + (1/(2*pi))*f_dot_xMxK*((r2 + 2*e2 + e*root_r2Pe2)/((root_r2Pe2 + e)*pow(root_r2Pe2,3.0)));

				Ucurr = Ucurr + (-fF(k)/(4*pi*mu))*velFactor1 + (1.0/(4*pi*mu))*f_dot_xMxK*xMxK*velFactor2;

				Vcurr = Vcurr + (-gF(k)/(4*pi*mu))*velFactor1 + (1.0/(4*pi*mu))*f_dot_xMxK*yMyK*velFactor2;

			}
		p(n) += Pcurr;
		u(n) += Ucurr;
		v(n) += Vcurr;
	}

	tuple results = make_tuple(u_py, v_py, p_py);

	return results;
}


//tuple compute_arbitrary_flow_mp(tuple points, object pf, object setup){
tuple compute_arbitrary_flow_mp(tuple args, dict kwargs){
	tuple points = extract<tuple>(args[0]);
	object pf = args[1], setup = args[2];

	bool U = true, V = true, P = true;

	if(kwargs.has_key("U")){
		U = extract<bool>(kwargs.get("U"));
	}
	if(kwargs.has_key("V")){
		V = extract<bool>(kwargs.get("V"));
	}
	if(kwargs.has_key("P")){
		P = extract<bool>(kwargs.get("P"));
	}

	int nF = extract<int>(pf.attr("nF"));
	double e = extract<double>(setup.attr("e"));
	double mu = extract<double>(setup.attr("mu"));

	bz_1_ptr x_bz_ptr, y_bz_ptr, u_bz_ptr, v_bz_ptr, p_bz_ptr;

	x_bz_ptr = extract<bz_1_ptr>(points[0]);
	y_bz_ptr = extract<bz_1_ptr>(points[1]);

	Array<double,1> & x = *x_bz_ptr.get(),
			& y = *y_bz_ptr.get();

	int NX[] = {x.length(0)};

	object u_py, v_py, p_py;
	u_py = create_array_for_py<1>(NX);
	v_py = create_array_for_py<1>(NX);
	p_py = create_array_for_py<1>(NX);

	u_bz_ptr = extract<bz_1_ptr>(u_py);
	v_bz_ptr = extract<bz_1_ptr>(v_py);
	p_bz_ptr = extract<bz_1_ptr>(p_py);

	Array<double,1> & u = *u_bz_ptr.get(),
			& v = *v_bz_ptr.get(),
			& p = *p_bz_ptr.get();

	u = 0;
	v = 0;
	p = 0;

	bz_1_ptr xF_bz_ptr, yF_bz_ptr, fF_bz_ptr, gF_bz_ptr;
	xF_bz_ptr = extract<bz_1_ptr>(pf.attr("xF"));
	yF_bz_ptr = extract<bz_1_ptr>(pf.attr("yF"));
	fF_bz_ptr = extract<bz_1_ptr>(pf.attr("fF"));
	gF_bz_ptr = extract<bz_1_ptr>(pf.attr("gF"));

	Array<double,1> & xF = *xF_bz_ptr.get(),
			& yF = *yF_bz_ptr.get(),
			& fF = *fF_bz_ptr.get(),
			& gF = *gF_bz_ptr.get();

	int N = x.length(0);

	double xMxK, yMyK, f_dot_xMxK, r2, r, root_r2Pe2, velFactor1, velFactor2;
	double e2 = pow2(e);

	int k, n;

	int chunk = 100;

	for(n = 0; n < N; n++){
		double Ucurr, Pcurr, Vcurr;
		Ucurr = 0;
		Pcurr = 0;
		Vcurr = 0;
		#pragma omp parallel for default(shared) private(k,xMxK,yMyK,f_dot_xMxK,r2,r,root_r2Pe2,velFactor1,velFactor2) schedule(dynamic,100) reduction(+:Pcurr,Ucurr,Vcurr)
		for(k = 0; k < nF; k++){
				xMxK = x(n) - xF(k);
				yMyK = y(n) - yF(k);

				f_dot_xMxK = fF(k)*xMxK + gF(k)*yMyK;
				r2 = pow2(xMxK) + pow2(yMyK);
				r = sqrt(r2);
				root_r2Pe2 = sqrt(r2 + e2);

				velFactor1 = log(root_r2Pe2 + e) - (e*(root_r2Pe2 + 2*e))/((root_r2Pe2 + e)*root_r2Pe2);
				velFactor2 = (root_r2Pe2 + 2*e)/((pow2(root_r2Pe2+e))*root_r2Pe2);

				Pcurr = Pcurr + (1/(2*pi))*f_dot_xMxK*((r2 + 2*e2 + e*root_r2Pe2)/((root_r2Pe2 + e)*pow(root_r2Pe2,3.0)));

				Ucurr = Ucurr + (-fF(k)/(4*pi*mu))*velFactor1 + (1.0/(4*pi*mu))*f_dot_xMxK*xMxK*velFactor2;

				Vcurr = Vcurr + (-gF(k)/(4*pi*mu))*velFactor1 + (1.0/(4*pi*mu))*f_dot_xMxK*yMyK*velFactor2;

			}
		p(n) += Pcurr;
		u(n) += Ucurr;
		v(n) += Vcurr;
	}
	tuple results = make_tuple(u_py, v_py, p_py);

	return results;
}

void compute_flow(object pf, object fg, object setup){
	int nF = extract<int>(pf.attr("nF"));
	double e = extract<double>(setup.attr("e"));
	double mu = extract<double>(setup.attr("mu"));

	bz_1_ptr xF_bz_ptr, yF_bz_ptr, fF_bz_ptr, gF_bz_ptr;
	xF_bz_ptr = extract<bz_1_ptr>(pf.attr("xF"));
	yF_bz_ptr = extract<bz_1_ptr>(pf.attr("yF"));
	fF_bz_ptr = extract<bz_1_ptr>(pf.attr("fF"));
	gF_bz_ptr = extract<bz_1_ptr>(pf.attr("gF"));

	Array<double,1> & xF = *xF_bz_ptr.get(),
			& yF = *yF_bz_ptr.get(),
			& fF = *fF_bz_ptr.get(),
			& gF = *gF_bz_ptr.get();

	bz_2_ptr X_bz_ptr, Y_bz_ptr, U_bz_ptr, V_bz_ptr, P_bz_ptr;
	X_bz_ptr = extract<bz_2_ptr>(fg.attr("X"));
	Y_bz_ptr = extract<bz_2_ptr>(fg.attr("Y"));
	U_bz_ptr = extract<bz_2_ptr>(fg.attr("U"));
	V_bz_ptr = extract<bz_2_ptr>(fg.attr("V"));
	P_bz_ptr = extract<bz_2_ptr>(fg.attr("P"));

	Array<double,2> & X = *X_bz_ptr.get(),
			& Y = *Y_bz_ptr.get(),
			& U = *U_bz_ptr.get(),
			& V = *V_bz_ptr.get(),
			& P = *P_bz_ptr.get();

	U = 0;
	V = 0;
	P = 0;

	double xMxK, yMyK, f_dot_xMxK, r2, r, root_r2Pe2, velFactor1, velFactor2;
	double e2 = pow2(e);

	int k, i, j;
	int nX = X.length(0);
	int nY = X.length(1);

	int chunk = 100;

	for(i = 0; i < nX; i++){
		for(j = 0; j < nY; j++){
			double Ucurr, Pcurr, Vcurr;
			Ucurr = 0;
			Pcurr = 0;
			Vcurr = 0;
			for(k = 0; k < nF; k++){
					xMxK = X(i,j) - xF(k);
					yMyK = Y(i,j) - yF(k);

					f_dot_xMxK = fF(k)*xMxK + gF(k)*yMyK;
					r2 = pow2(xMxK) + pow2(yMyK);
					r = sqrt(r2);
					root_r2Pe2 = sqrt(r2 + e2);

					velFactor1 = log(root_r2Pe2 + e) - (e*(root_r2Pe2 + 2*e))/((root_r2Pe2 + e)*root_r2Pe2);
					velFactor2 = (root_r2Pe2 + 2*e)/((pow2(root_r2Pe2+e))*root_r2Pe2);

					Pcurr = Pcurr + (1/(2*pi))*f_dot_xMxK*((r2 + 2*e2 + e*root_r2Pe2)/((root_r2Pe2 + e)*pow(root_r2Pe2,3.0)));

					Ucurr = Ucurr + (-fF(k)/(4*pi*mu))*velFactor1 + (1.0/(4*pi*mu))*f_dot_xMxK*xMxK*velFactor2;

					Vcurr = Vcurr + (-gF(k)/(4*pi*mu))*velFactor1 + (1.0/(4*pi*mu))*f_dot_xMxK*yMyK*velFactor2;

				}
			P(i,j) += Pcurr;
			U(i,j) += Ucurr;
			V(i,j) += Vcurr;
		}
	}
}

void compute_flow_mp(object pf, object fg, object setup){
	int nF = extract<int>(pf.attr("nF"));
	double e = extract<double>(setup.attr("e"));
	double mu = extract<double>(setup.attr("mu"));

	bz_1_ptr xF_bz_ptr, yF_bz_ptr, fF_bz_ptr, gF_bz_ptr;
	xF_bz_ptr = extract<bz_1_ptr>(pf.attr("xF"));
	yF_bz_ptr = extract<bz_1_ptr>(pf.attr("yF"));
	fF_bz_ptr = extract<bz_1_ptr>(pf.attr("fF"));
	gF_bz_ptr = extract<bz_1_ptr>(pf.attr("gF"));

	Array<double,1> & xF = *xF_bz_ptr.get(),
			& yF = *yF_bz_ptr.get(),
			& fF = *fF_bz_ptr.get(),
			& gF = *gF_bz_ptr.get();

	bz_2_ptr X_bz_ptr, Y_bz_ptr, U_bz_ptr, V_bz_ptr, P_bz_ptr;
	X_bz_ptr = extract<bz_2_ptr>(fg.attr("X"));
	Y_bz_ptr = extract<bz_2_ptr>(fg.attr("Y"));
	U_bz_ptr = extract<bz_2_ptr>(fg.attr("U"));
	V_bz_ptr = extract<bz_2_ptr>(fg.attr("V"));
	P_bz_ptr = extract<bz_2_ptr>(fg.attr("P"));

	Array<double,2> & X = *X_bz_ptr.get(),
			& Y = *Y_bz_ptr.get(),
			& U = *U_bz_ptr.get(),
			& V = *V_bz_ptr.get(),
			& P = *P_bz_ptr.get();

	U = 0;
	V = 0;
	P = 0;

	double xMxK, yMyK, f_dot_xMxK, r2, r, root_r2Pe2, velFactor1, velFactor2;
	double e2 = pow2(e);

	int k, i, j;
	int nX = X.length(0);
	int nY = X.length(1);

	int chunk = 100;

	for(i = 0; i < nX; i++){
		for(j = 0; j < nY; j++){
			double Ucurr, Pcurr, Vcurr;
			Ucurr = 0;
			Pcurr = 0;
			Vcurr = 0;
			#pragma omp parallel for default(shared) private(k,xMxK,yMyK,f_dot_xMxK,r2,r,root_r2Pe2,velFactor1,velFactor2) schedule(dynamic,100) reduction(+:Pcurr,Ucurr,Vcurr)
				for(k = 0; k < nF; k++){
						xMxK = X(i,j) - xF(k);
						yMyK = Y(i,j) - yF(k);

						f_dot_xMxK = fF(k)*xMxK + gF(k)*yMyK;
						r2 = pow2(xMxK) + pow2(yMyK);
						r = sqrt(r2);
						root_r2Pe2 = sqrt(r2 + e2);

						velFactor1 = log(root_r2Pe2 + e) - (e*(root_r2Pe2 + 2*e))/((root_r2Pe2 + e)*root_r2Pe2);
						velFactor2 = (root_r2Pe2 + 2*e)/((pow2(root_r2Pe2+e))*root_r2Pe2);

						Pcurr = Pcurr + (1/(2*pi))*f_dot_xMxK*((r2 + 2*e2 + e*root_r2Pe2)/((root_r2Pe2 + e)*pow(root_r2Pe2,3.0)));

						Ucurr = Ucurr + (-fF(k)/(4*pi*mu))*velFactor1 + (1.0/(4*pi*mu))*f_dot_xMxK*xMxK*velFactor2;

						Vcurr = Vcurr + (-gF(k)/(4*pi*mu))*velFactor1 + (1.0/(4*pi*mu))*f_dot_xMxK*yMyK*velFactor2;

					}
				P(i,j) += Pcurr;
				U(i,j) += Ucurr;
				V(i,j) += Vcurr;
			//}
		}
	}
}
