#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "graphicsMath.h"

// calculates the length of v
double VectorLength(Vector_t v) {
  //  double length = sqrt(v[0] * v[0] + v[1] * v[1] + v[2] * v[2]);

  long double vector;
  float *vectorP = (float *) &vector;
  int i;
  double length;


  for(i=0 ; i<4 ; i++){
    vectorP[i] = (float) v[i];
  }

  __asm__ volatile(
		   "\n\t movaps %0, %%xmm0"
		   "\n\t movaps %%xmm0, %%xmm1"  
		   "\n\t mulps %%xmm1, %%xmm0"
		   "\n\t movaps %%xmm0, %0"
		   
		   
		   : "=m" (vector)
		   :
		   );
    
    length = (double) sqrt(vectorP[0]+vectorP[1]+vectorP[2]);
  
  return(length);
}


// normalizes v to length 1
void VectorNormalize(Vector_t v) {
  long double vector, copy;
  float *vectorP = (float *) &vector;
  float *copyP = (float *) &copy;
  int i;
  float length;


  for(i=0 ; i<4 ; i++){
    vectorP[i] = (float) v[i];
	copyP[i] = (float) v[i];
  }

  __asm__ volatile(
		   "\n\t movaps %0, %%xmm0"
		   "\n\t movaps %%xmm0, %%xmm1"
		   "\n\t mulps %%xmm1, %%xmm0"
		   "\n\t movaps %%xmm0, %0"
		   
		   
		   : "=m" (vector)
		   :
		   );
    
    length = (float) sqrt(vectorP[0]+vectorP[1]+vectorP[2]);
	//or just length = VectorLength(v);

    for(i=0; i<4; i++){
      vectorP[i] = length;
    }

  __asm__ volatile(
		   "\n\t movaps %0, %%xmm0"
		   "\n\t movaps %1, %%xmm1"
		   "\n\t divps %%xmm1, %%xmm0"
		   "\n\t movaps %%xmm0, %0"		   		   
		   : "=m" (copy)
		   : "m" (vector)
		   );

  v[0] = (double) copyP[0];
  v[1] = (double) copyP[1];
  v[2] = (double) copyP[2];
  v[3] = 1.0;
}

// sets vector v to be the normalized vector created by x, y, and z
void VectorSetNormalized(double x, double y, double z, Vector_t v) { 

  v[0] = x;
  v[1] = y;
  v[2] = z;
  v[3] = 1.0;

  VectorNormalize(v);

}

// sets v to be vector a crossed with vector b
void VectorCross(Vector_t a, Vector_t b, Vector_t v) {
  long double t0, t1, t2, t3;
  float * t0p = (float *) &t0;
  float * t1p = (float *) &t1;
  float * t2p = (float *) &t2;
  float * t3p = (float *) &t3;

  t0p[0] = (float) a[1];
  t0p[1] = (float) a[2];
  t0p[2] = (float) a[0];
  t1p[0] = (float) b[2];
  t1p[1] = (float) b[0];
  t1p[2] = (float) b[1];
  t2p[0] = (float) a[2];
  t2p[1] = (float) a[0];
  t2p[2] = (float) a[1];
  t3p[0] = (float) b[1];
  t3p[1] = (float) b[2];
  t3p[2] = (float) b[0];    

   __asm__ volatile(
		   "\n\t movaps %0, %%xmm0"
		   "\n\t movaps %1, %%xmm1"
		   "\n\t movaps %2, %%xmm2"
		   "\n\t movaps %3, %%xmm3"
		   "\n\t mulps %%xmm1, %%xmm0"
		   "\n\t mulps %%xmm3, %%xmm2"
		   "\n\t subps %%xmm2, %%xmm0"
		   "\n\t movaps %%xmm0, %0"		   		   
		   : "=m" (t0)
		   : "m" (t1), "m" (t2), "m" (t3)
		   );

   v[0] = (double) t0p[0];
   v[1] = (double) t0p[1];
   v[2] = (double) t0p[2];
   v[3] = 1.0;

}

// copies from to to
void VectorCopy(Vector_t to, Vector_t from) {
  to[0] = from[0];
  to[1] = from[1];
  to[2] = from[2];
  to[3] = from[3];
}

// initializes a position using homogeneous coordinates
void VectorInit(Vector_t v) {
  v[0] = v[1] = v[2] = 0.0;
  v[3] = 1.0;
}

// makes a homogeneous vector out of x, y, and z
void MakeVector(double x, double y, double z, Vector_t v) {
  v[0] = x;
  v[1] = y;
  v[2] = z;
  v[3] = 1.0;
}

// prints out a vector nicely. Use stdout or stderr to send it to the console.
void VectorPrint(Vector_t v, FILE *fp) {
  fprintf(fp, "\n(%.3f, %.3f, %.3f, %.3f)\n\n", v[0], v[1], v[2], v[3]);
}

// Adds two vectors and returns the result in t = v1 + v2
void VectorAdd(Vector_t v1, Vector_t v2, Vector_t t) {
  long double t0, t1;
  float * t0p = (float *) &t0;
  float * t1p = (float *) &t1;

  t0p[0] = (float) v1[0];
  t0p[1] = (float) v1[1];
  t0p[2] = (float) v1[2];
  t1p[0] = (float) v2[0];
  t1p[1] = (float) v2[1];
  t1p[2] = (float) v2[2];

  __asm__ volatile(
		   "\n\t movaps %0, %%xmm0"
		   "\n\t movaps %1, %%xmm1"
		   "\n\t addps %%xmm1, %%xmm0"
		   "\n\t movaps %%xmm0, %0"		   		   
		   : "=m" (t0)
		   : "m" (t1)
		   );
  t[0] = t0p[0];
  t[1] = t0p[1];
  t[2] = t0p[2];
  t[3] = 1.0;
}

// Subtracts two vectors and returns the result in t = v1 - v2
void VectorSub(Vector_t v1, Vector_t v2, Vector_t t) {
  long double t0, t1;
  float * t0p = (float *) &t0;
  float * t1p = (float *) &t1;

  t0p[0] = (float) v1[0];
  t0p[1] = (float) v1[1];
  t0p[2] = (float) v1[2];
  t1p[0] = (float) v2[0];
  t1p[1] = (float) v2[1];
  t1p[2] = (float) v2[2];

  __asm__ volatile(
		   "\n\t movaps %0, %%xmm0"
		   "\n\t movaps %1, %%xmm1"
		   "\n\t subps %%xmm1, %%xmm0"
		   "\n\t movaps %%xmm0, %0"		   		   
		   : "=m" (t0)
		   : "m" (t1)
		   );
  t[0] = t0p[0];
  t[1] = t0p[1];
  t[2] = t0p[2];
  t[3] = 1.0;
}

// Multiples a vector by a scaler
void VectorMulScalar(Vector_t v, double d) {
  long double t0, t1;
  float * t0p = (float *) &t0;
  float * t1p = (float *) &t1;

  t0p[0] = (float) v[0];
  t0p[1] = (float) v[1];
  t0p[2] = (float) v[2];
  t1p[0] = (float) d;
  t1p[1] = (float) d;
  t1p[2] = (float) d;

  __asm__ volatile(
		   "\n\t movaps %0, %%xmm0"
		   "\n\t movaps %1, %%xmm1"
		   "\n\t mulps %%xmm1, %%xmm0"
		   "\n\t movaps %%xmm0, %0"		   		   
		   : "=m" (t0)
		   : "m" (t1)
		   );
  v[0] = t0p[0];
  v[1] = t0p[1];
  v[2] = t0p[2];
  v[3] = 1.0;
  /*
  v[0] = v[0]*d;
  v[1] = v[1]*d;
  v[2] = v[2]*d;
  v[3] = 1.0;*/
}

// returns the dot product of two vectors
double VectorDot(Vector_t v1, Vector_t v2) {
  long double t0, t1;
  float * t0p = (float *) &t0;
  float * t1p = (float *) &t1;

  t0p[0] = (float) v1[0];
  t0p[1] = (float) v1[1];
  t0p[2] = (float) v1[2];
  t1p[0] = (float) v2[0];
  t1p[1] = (float) v2[1];
  t1p[2] = (float) v2[2];

  __asm__ volatile(
		   "\n\t movaps %0, %%xmm0"
		   "\n\t movaps %1, %%xmm1"
		   "\n\t mulps %%xmm1, %%xmm0"
		   "\n\t movaps %%xmm0, %0"		   		   
		   : "=m" (t0)
		   : "m" (t1)
		   );  

  return (double) (t0p[0] + t0p[1] + t0p[2]);
}

