#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "graphicsMath.h"
#include "vector.c"
#include <sys/timeb.h>


#define COUNT 100

// Prints a matrix to a stream in a nice format. Pass in stdout or
// stderr to send it to the console
void MatrixPrint(Matrix_t m, FILE *fp) {
  int i;

  for(i=0;i<4;i++) {
    fprintf(fp, "| %5.3f %5.3f %5.3f %5.3f |\n", m[i*4], m[i*4+1], m[i*4+2], m[i*4+3]);
  }
  printf("\n");
}

void MatrixPrintD(Matrix_t m, FILE *fp) {
  int i;

  for(i=0;i<4;i++) {
    fprintf(fp, "| %5.6f %5.6f %5.6f %5.6f |\n", m[i*4], m[i*4+1], m[i*4+2], m[i*4+3]);
  }
  printf("\n");
}

// Sets all entries in a matrix to a single value
void MatrixSetValue(double val, Matrix_t m) {
  long i;

  for(i=0;i<MatrixSize;i++)
    m[i] = val;
}

// Sets a matrix to identity
void MatrixSetIdentity(Matrix_t m) {
  m[1] = 0;
  m[2] = 0;
  m[3] = 0;
  m[4] = 0;
  m[6] = 0;
  m[7] = 0;
  m[8] = 0;
  m[9] = 0;
  m[11] = 0;
  m[12] = 0;
  m[13] = 0;
  m[14] = 0;

  m[0] = m[5] = m[10] = m[15] = 1.0;
}

// Returns the value at row r, column c with bounds checking
double MatrixGet(short r, short c, Matrix_t m) {
  if(r >= 0 && r < 4) {
    if(c >= 0 && c < 4)
      return(m[(r<<2) + c]);
  }
  fprintf(stderr,"Illegal matrix get (%hd, %hd)\n", r, c);
  return(0.0);
}

// Sets the value at row r, column c with bounds checking
void MatrixSet(short r, short c, double val, Matrix_t m) {
  if(r >= 0 && r < 4) {
    if(c >= 0 && c < 4) {
      m[(r<<2) + c] = val;
      return;
    }
  }
  fprintf(stderr,"Illegal matrix set (%hd, %hd)\n", r, c);
}

// Transposes a matrix in place (modifies the input)
void MatrixTranspose(Matrix_t m) {
  Matrix_t t;

  t[1] = m[4];
  t[2] = m[8];
  t[3] = m[12];
  t[4] = m[1];

  t[6] = m[9];
  t[7] = m[13];
  t[8] = m[2];
  t[9] = m[6];

  t[11] = m[14];
  t[12] = m[3];
  t[13] = m[7];
  t[14] = m[11];

  m[1] = t[1];
  m[2] = t[2];
  m[3] = t[3];
  m[4] = t[4];
  m[6] = t[6];
  m[7] = t[7];
  m[8] = t[8];
  m[9] = t[9];
  m[11] = t[11];
  m[12] = t[12];
  m[13] = t[13];
  m[14] = t[14];
}

// Copies the second matrix to the first
void MatrixCopy(Matrix_t to, Matrix_t from) {

  to[0] = from[0];
  to[1] = from[1];
  to[2] = from[2];
  to[3] = from[3];
  to[4] = from[4];
  to[5] = from[5];
  to[6] = from[6];
  to[7] = from[7];
  to[8] = from[8];
  to[9] = from[9];
  to[10] = from[10];
  to[11] = from[11];
  to[12] = from[12];
  to[13] = from[13];
  to[14] = from[14];
  to[15] = from[15];
}

// Executes the matrix multiplication left * right => result
// The result matrix can be one of the operands.
void MatrixMultiply(Matrix_t left, Matrix_t right, Matrix_t result) {

  long double lefty0, lefty1, lefty2, lefty3;
  long double righty0, righty1, righty2, righty3;
  long double tmp0, tmp1, tmp2, tmp3;

  float * left0p = (float *) &lefty0;
  float * left1p = (float *) &lefty1;
  float * left2p = (float *) &lefty2;
  float * left3p = (float *) &lefty3;

  float * right0p = (float *) &righty0;
  float * right1p = (float *) &righty1;
  float * right2p = (float *) &righty2;
  float * right3p = (float *) &righty3;

  float * tmp0p = (float *) &tmp0;
  float * tmp1p = (float *) &tmp1;
  float * tmp2p = (float *) &tmp2;
  float * tmp3p = (float *) &tmp3;

  //Set up the vectors to dot together
  //First matrix's vectors are rows
  left0p[0] = (float) left[0];
  left0p[1] = (float) left[1];
  left0p[2] = (float) left[2];
  left0p[3] = (float) left[3];
  left1p[0] = (float) left[4];
  left1p[1] = (float) left[5];
  left1p[2] = (float) left[6];
  left1p[3] = (float) left[7];
  left2p[0] = (float) left[8];
  left2p[1] = (float) left[9];
  left2p[2] = (float) left[10];
  left2p[3] = (float) left[11];
  left3p[0] = (float) left[12];
  left3p[1] = (float) left[13];
  left3p[2] = (float) left[14];
  left3p[3] = (float) left[15];

  //Second matrix's vectors are columns
  right0p[0] = (float) right[0];
  right0p[1] = (float) right[4];
  right0p[2] = (float) right[8];
  right0p[3] = (float) right[12];
  right1p[0] = (float) right[1];
  right1p[1] = (float) right[5];
  right1p[2] = (float) right[9];
  right1p[3] = (float) right[13];
  right2p[0] = (float) right[2];
  right2p[1] = (float) right[6];
  right2p[2] = (float) right[10];
  right2p[3] = (float) right[14];
  right3p[0] = (float) right[3];
  right3p[1] = (float) right[7];
  right3p[2] = (float) right[11];
  right3p[3] = (float) right[15];

  //first row of result (multiplying by righty0)
  __asm__ volatile(
		   "\n\t movaps %4, %%xmm7"
		   "\n\t movaps %5, %%xmm0"
		   "\n\t mulps %%xmm7, %%xmm0"
		   "\n\t movaps %%xmm0, %0"

		   "\n\t movaps %6, %%xmm1"
		   "\n\t mulps %%xmm7, %%xmm1"
		   "\n\t movaps %%xmm1, %1"

		   "\n\t movaps %7, %%xmm2"
		   "\n\t mulps %%xmm7, %%xmm2"
		   "\n\t movaps %%xmm2, %2"

		   "\n\t movaps %8, %%xmm3"
		   "\n\t mulps %%xmm7, %%xmm3"
		   "\n\t movaps %%xmm3, %3"

		   : "=m" (tmp0), "=m" (tmp1), "=m" (tmp2), "=m" (tmp3)
		   : "m" (righty0), "m" (lefty0), "m" (lefty1), "m" (lefty2), 
		   "m" (lefty3)
		   );


  result[0] = (double) (tmp0p[0] + tmp0p[1] + tmp0p[2] + tmp0p[3]);
  result[4] = (double) (tmp1p[0] + tmp1p[1] + tmp1p[2] + tmp1p[3]);
  result[8] = (double) (tmp2p[0] + tmp2p[1] + tmp2p[2] + tmp2p[3]);
  result[12] = (double) (tmp3p[0] + tmp3p[1] + tmp3p[2] + tmp3p[3]);

  //second row of result (multiplying by righty1)
  __asm__ volatile(
		   "\n\t movaps %4, %%xmm7"
		   "\n\t movaps %5, %%xmm0"
		   "\n\t mulps %%xmm7, %%xmm0"
		   "\n\t movaps %%xmm0, %0"

		   "\n\t movaps %6, %%xmm1"
		   "\n\t mulps %%xmm7, %%xmm1"
		   "\n\t movaps %%xmm1, %1"

		   "\n\t movaps %7, %%xmm2"
		   "\n\t mulps %%xmm7, %%xmm2"
		   "\n\t movaps %%xmm2, %2"

		   "\n\t movaps %8, %%xmm3"
		   "\n\t mulps %%xmm7, %%xmm3"
		   "\n\t movaps %%xmm3, %3"

		   : "=m" (tmp0), "=m" (tmp1), "=m" (tmp2), "=m" (tmp3)
		   : "m" (righty1), "m" (lefty0), "m" (lefty1), "m" (lefty2), 
		   "m" (lefty3)
		   );


  result[1] = (double) (tmp0p[0] + tmp0p[1] + tmp0p[2] + tmp0p[3]);
  result[5] = (double) (tmp1p[0] + tmp1p[1] + tmp1p[2] + tmp1p[3]);
  result[9] = (double) (tmp2p[0] + tmp2p[1] + tmp2p[2] + tmp2p[3]);
  result[13] = (double) (tmp3p[0] + tmp3p[1] + tmp3p[2] + tmp3p[3]);  

  //third row of result (multiplying by righty2)
  __asm__ volatile(
		   "\n\t movaps %4, %%xmm7"
		   "\n\t movaps %5, %%xmm0"
		   "\n\t mulps %%xmm7, %%xmm0"
		   "\n\t movaps %%xmm0, %0"

		   "\n\t movaps %6, %%xmm1"
		   "\n\t mulps %%xmm7, %%xmm1"
		   "\n\t movaps %%xmm1, %1"

		   "\n\t movaps %7, %%xmm2"
		   "\n\t mulps %%xmm7, %%xmm2"
		   "\n\t movaps %%xmm2, %2"

		   "\n\t movaps %8, %%xmm3"
		   "\n\t mulps %%xmm7, %%xmm3"
		   "\n\t movaps %%xmm3, %3"

		   : "=m" (tmp0), "=m" (tmp1), "=m" (tmp2), "=m" (tmp3)
		   : "m" (righty2), "m" (lefty0), "m" (lefty1), "m" (lefty2), 
		   "m" (lefty3)
		   );


  result[2] = (double) (tmp0p[0] + tmp0p[1] + tmp0p[2] + tmp0p[3]);
  result[6] = (double) (tmp1p[0] + tmp1p[1] + tmp1p[2] + tmp1p[3]);
  result[10] = (double) (tmp2p[0] + tmp2p[1] + tmp2p[2] + tmp2p[3]);
  result[14] = (double) (tmp3p[0] + tmp3p[1] + tmp3p[2] + tmp3p[3]);

  //last row of result (multiplying by righty3)
  __asm__ volatile(
		   "\n\t movaps %4, %%xmm7"
		   "\n\t movaps %5, %%xmm0"
		   "\n\t mulps %%xmm7, %%xmm0"
		   "\n\t movaps %%xmm0, %0"

		   "\n\t movaps %6, %%xmm1"
		   "\n\t mulps %%xmm7, %%xmm1"
		   "\n\t movaps %%xmm1, %1"

		   "\n\t movaps %7, %%xmm2"
		   "\n\t mulps %%xmm7, %%xmm2"
		   "\n\t movaps %%xmm2, %2"

		   "\n\t movaps %8, %%xmm3"
		   "\n\t mulps %%xmm7, %%xmm3"
		   "\n\t movaps %%xmm3, %3"

		   : "=m" (tmp0), "=m" (tmp1), "=m" (tmp2), "=m" (tmp3)
		   : "m" (righty3), "m" (lefty0), "m" (lefty1), "m" (lefty2), 
		   "m" (lefty3)
		   );


  result[3] = (double) (tmp0p[0] + tmp0p[1] + tmp0p[2] + tmp0p[3]);
  result[7] = (double) (tmp1p[0] + tmp1p[1] + tmp1p[2] + tmp1p[3]);
  result[11] = (double) (tmp2p[0] + tmp2p[1] + tmp2p[2] + tmp2p[3]);
  result[15] = (double) (tmp3p[0] + tmp3p[1] + tmp3p[2] + tmp3p[3]);
}

// Transforms a vector by the matrix: result = M * V
void VectorTransform(Matrix_t left, Vector_t right, Vector_t result) {

  long double v, r0, r1, r2, r3;
  long double tmp0, tmp1, tmp2, tmp3;
  float *vp = (float *) &v;
  float *r0p = (float *) &r0;
  float *r1p = (float *) &r1;
  float *r2p = (float *) &r2;
  float *r3p = (float *) &r3;
  float *tmp0p = (float *) &tmp0;
  float *tmp1p = (float *) &tmp1;
  float *tmp2p = (float *) &tmp2;
  float *tmp3p = (float *) &tmp3;

  vp[0] = (float) right[0];
  vp[1] = (float) right[1];
  vp[2] = (float) right[2];
  vp[3] = (float) right[3];
  r0p[0] = (float) left[0];
  r0p[1] = (float) left[1];
  r0p[2] = (float) left[2];
  r0p[3] = (float) left[3];
  r1p[0] = (float) left[4];
  r1p[1] = (float) left[5];
  r1p[2] = (float) left[6];
  r1p[3] = (float) left[7];
  r2p[0] = (float) left[8];
  r2p[1] = (float) left[9];
  r2p[2] = (float) left[10];
  r2p[3] = (float) left[11];
  r3p[0] = (float) left[12];
  r3p[1] = (float) left[13];
  r3p[2] = (float) left[14];
  r3p[3] = (float) left[15];

  __asm__ volatile(
		   "\n\t movaps %4, %%xmm7"
		   "\n\t movaps %5, %%xmm0"
		   "\n\t mulps %%xmm7, %%xmm0"
		   "\n\t movaps %%xmm0, %0"

		   "\n\t movaps %6, %%xmm1"
		   "\n\t mulps %%xmm7, %%xmm1"
		   "\n\t movaps %%xmm1, %1"

		   "\n\t movaps %7, %%xmm2"
		   "\n\t mulps %%xmm7, %%xmm2"
		   "\n\t movaps %%xmm2, %2"

		   "\n\t movaps %8, %%xmm3"
		   "\n\t mulps %%xmm7, %%xmm3"
		   "\n\t movaps %%xmm3, %3"

		   : "=m" (tmp0), "=m" (tmp1), "=m" (tmp2), "=m" (tmp3)
		   : "m" (v), "m" (r0), "m" (r1), "m" (r2), "m" (r3)
		   );

  result[0] = (double) (tmp0p[0] + tmp0p[1] + tmp0p[2] + tmp0p[3]);
  result[1] = (double) (tmp1p[0] + tmp1p[1] + tmp1p[2] + tmp1p[3]);
  result[2] = (double) (tmp2p[0] + tmp2p[1] + tmp2p[2] + tmp2p[3]);
  result[3] = (double) (tmp3p[0] + tmp3p[1] + tmp3p[2] + tmp3p[3]);
}

// Applies a 2D scale to the left of the input matrix  r = S * r
void MatrixScale2D(double sx, double sy, Matrix_t r) {
  Matrix_t m;

  m[1] = 0;
  m[2] = 0;
  m[3] = 0;
  m[4] = 0;
  m[6] = 0;
  m[7] = 0;
  m[8] = 0;
  m[9] = 0;
  m[11] = 0;
  m[12] = 0;
  m[13] = 0;
  m[14] = 0;

  m[0] = sx; 
  m[5] = sy; 
  m[10] = m[15] = 1.0;

  MatrixMultiply(m, r, r);
}

// Applies a 2D rotation to the left of the input matrix r  = R * r
void MatrixRotate2D(double costh, double sinth, Matrix_t r) {
  Matrix_t m;

  m[1] = -sinth;
  m[2] = 0;
  m[3] = 0;
  m[4] = sinth;
  m[6] = 0;
  m[7] = 0;
  m[8] = 0;
  m[9] = 0;
  m[11] = 0;
  m[12] = 0;
  m[13] = 0;
  m[14] = 0;

  m[0] = costh; 
  m[5] = costh; 
  m[10] = m[15] = 1.0;

  MatrixMultiply(m, r, r);
}

// Applies a 2D translation to the left of the input matrix r = T * r
void MatrixTranslate2D(double tx, double ty, Matrix_t r) {
  Matrix_t m;

  m[1] = 0;
  m[2] = 0;
  m[3] = tx;
  m[4] = 0;
  m[6] = 0;
  m[7] = ty;
  m[8] = 0;
  m[9] = 0;
  m[11] = 0;
  m[12] = 0;
  m[13] = 0;
  m[14] = 0;

  m[0] = m[5] = m[10] = m[15] = 1.0;

  MatrixMultiply(m, r, r);
}

// Applies a 3D scale to the left of the input matrix r = S * r
void MatrixScale3D(double sx, double sy, double sz, Matrix_t r) {
  Matrix_t m;


  m[1] = 0;
  m[2] = 0;
  m[3] = 0;
  m[4] = 0;
  m[6] = 0;
  m[7] = 0;
  m[8] = 0;
  m[9] = 0;
  m[11] = 0;
  m[12] = 0;
  m[13] = 0;
  m[14] = 0;

  m[0] = sx; 
  m[5] = sy;
  m[10] = sz;
  m[15] = 1.0;

  MatrixMultiply(m, r, r);
}

// applies a rotation about Z to the left of the input matrix r = Rz * r;
void MatrixRotateZ(double costh, double sinth, Matrix_t r) {
  Matrix_t m;



  m[1] = -sinth;
  m[2] = 0;
  m[3] = 0;
  m[4] = sinth;
  m[6] = 0;
  m[7] = 0;
  m[8] = 0;
  m[9] = 0;
  m[11] = 0;
  m[12] = 0;
  m[13] = 0;
  m[14] = 0;

  m[0] = costh; 
  m[5] = costh;
  m[10] = 1.0;
  m[15] = 1.0;

  MatrixMultiply(m, r, r);
}

// applies a rotation about X to the left of the input matrix r = Rx * r;
void MatrixRotateX(double costh, double sinth, Matrix_t r) {
  Matrix_t m;


  m[1] = 0;
  m[2] = 0;
  m[3] = 0;
  m[4] = 0;
  m[6] = -sinth;
  m[7] = 0;
  m[8] = 0;
  m[9] = sinth;
  m[11] = 0;
  m[12] = 0;
  m[13] = 0;
  m[14] = 0;

  m[0] = 1.0; 
  m[5] = costh;
  m[10] = costh;
  m[15] = 1.0;

  MatrixMultiply(m, r, r);
}

// applies a rotation about Y to the left of the input matrix r = Ry * r;
void MatrixRotateY(double costh, double sinth, Matrix_t r) {
  Matrix_t m;


  m[1] = 0;
  m[2] = sinth;
  m[3] = 0;
  m[4] = 0;
  m[6] = 0;
  m[7] = 0;
  m[8] = -sinth;
  m[9] = 0;
  m[11] = 0;
  m[12] = 0;
  m[13] = 0;
  m[14] = 0;

  m[0] = costh; 
  m[5] = 1.0;
  m[10] = costh;
  m[15] = 1.0;

  MatrixMultiply(m, r, r);
}

// applies a coordinate system alignment based on the orthonormal
// vectors x, y, and z:  r = Rxyz * r
void MatrixRotateXYZ(Vector_t x, Vector_t y, Vector_t z, Matrix_t r) {
  Vector_t xbar;
  Vector_t ybar;
  Vector_t zbar;
  Matrix_t m;

  MatrixSetIdentity(m);
  VectorSetNormalized(x[0], x[1], x[2], xbar);
  VectorSetNormalized(y[0], y[1], y[2], ybar);
  VectorSetNormalized(z[0], z[1], z[2], zbar);

  m[0] = xbar[0]; m[1] = xbar[1]; m[2] = xbar[2];
  m[4] = ybar[0]; m[5] = ybar[1]; m[6] = ybar[2];
  m[8] = zbar[0]; m[9] = zbar[1]; m[10] = zbar[2];

  MatrixMultiply(m, r, r);
}

// applies a 3D translation to the left of the input matrix: r = T * r
void MatrixTranslate3D(double tx, double ty, double tz, Matrix_t r) {
  Matrix_t m;


  m[1] = 0;
  m[2] = 0;
  m[3] = tx;
  m[4] = 0;
  m[6] = 0;
  m[7] = ty;
  m[8] = 0;
  m[9] = 0;
  m[11] = tz;
  m[12] = 0;
  m[13] = 0;
  m[14] = 0;

  m[0] = 1.0; 
  m[5] = 1.0;
  m[10] = 1.0;
  m[15] = 1.0;

  MatrixMultiply(m, r, r);
}

// applies a perspective projection matrix to the left of the input matrix: r = P * r
void MatrixPerspective(double d, Matrix_t r) {
  Matrix_t m;


  m[1] = 0;
  m[2] = 0;
  m[3] = 0;
  m[4] = 0;
  m[6] = 0;
  m[7] = 0;
  m[8] = 0;
  m[9] = 0;
  m[11] = 0;
  m[12] = 0;
  m[13] = 0;
  m[14] = 1.0/d;

  m[0] = 1.0; 
  m[5] = 1.0;
  m[10] = 1.0;
  m[15] = 0.0;

  MatrixMultiply(m, r, r);
}

// applies a Z shear to the left of the input matrix: r = Shz * r
void MatrixShearZ(double a, double b, Matrix_t r) {
  Matrix_t m;
 

  m[1] = 0;
  m[2] = a;
  m[3] = 0;
  m[4] = 0;
  m[6] = b;
  m[7] = 0;
  m[8] = 0;
  m[9] = 0;
  m[11] = 0;
  m[12] = 0;
  m[13] = 0;
  m[14] = 0;

  m[0] = 1.0; 
  m[5] = 1.0;
  m[10] = 1.0;
  m[15] = 1.0;

  MatrixMultiply(m, r, r);
}

void MatrixMultiplyOld(Matrix_t left, Matrix_t right, Matrix_t result) {
  Matrix_t t;
  double sum;
  int i,j,k;
    
  for(i=0;i<4;i++) {
    for(j=0;j<4;j++) {
      for(sum=0.0,k=0;k<4;k++)
	sum += left[(i<<2) + k] * right[(k<<2) + j];
      t[(i<<2) + j] = sum;
    }
  }

  for(i=0;i<MatrixSize;i++)
    result[i] = t[i];
}


void SetTest1(Matrix_t m) {
  m[0] = 0.25001;
  m[1] = 0.25001;
  m[2] = 0.25001;
  m[3] = 0.25001;
  m[4] = 0.25001;
  m[5] = 0.25001;
  m[6] = 0.25001;
  m[7] = 0.25001;
  m[8] = 0.25001;
  m[9] = 0.25001;
  m[10] = 0.25001;
  m[11] = 0.25001;
  m[12] = 0.25001;
  m[13] = 0.25001;
  m[14] = 0.25001;
  m[15] = 0.25001;

}

void SetTest2(Matrix_t m) {
  m[0] = 0.25001;
  m[1] = 0.25001;
  m[2] = 0.25001;
  m[3] = 0.25001;
  m[4] = 0.25001;
  m[5] = 0.25001;
  m[6] = 0.25001;
  m[7] = 0.25001;
  m[8] = 0.25001;
  m[9] = 0.25001;
  m[10] = 0.25001;
  m[11] = 0.25001;
  m[12] = 0.25001;
  m[13] = 0.25001;
  m[14] = 0.25001;
  m[15] = 0.25001;
}


void SetTest3(Matrix_t m) {
  m[0] = 1.4;
  m[1] = 120.6;
  m[2] = 46.2;
  m[3] = 124.8;
  m[4] = 1664.7;
  m[5] = 94.2;
  m[6] = 97.4;
  m[7] = 1.2;
  m[8] = 2;
  m[9] = 3;
  m[10] = 5;
  m[11] = 8;
  m[12] = 9;
  m[13] = 156.2;
  m[14] = 98.5;
  m[15] = 31.1;
}


void SetTest4(Matrix_t m) {
  m[0] = 5.6;
  m[1] = 78.4;
  m[2] = 953.8;
  m[3] = 75.5;
  m[4] = 6.3;
  m[5] = 96.4;
  m[6] = 5742.6;
  m[7] = 127.9;
  m[8] = 134.0;
  m[9] = 97;
  m[10] = 24.1;
  m[11] = 98.5;
  m[12] = 647.235;
  m[13] = 21.1;
  m[14] = 95.7;
  m[15] = 6;
}

void SetTest5(Matrix_t m) {
  m[0] = 1;
  m[1] = 2;
  m[2] = 3;
  m[3] = 4;
  m[4] = 5;
  m[5] = 6;
  m[6] = 7;
  m[7] = 8;
  m[8] = 9;
  m[9] = 10;
  m[10] = 11;
  m[11] = 12;
  m[12] = 13;
  m[13] = 14;
  m[14] = 15;
  m[15] = 16;
}


void SetTest6(Matrix_t m) {
  m[0] = 0;
  m[1] = 1;
  m[2] = 2;
  m[3] = 3;
  m[4] = 4;
  m[5] = 5;
  m[6] = 6;
  m[7] = 7;
  m[8] = 8;
  m[9] = 9;
  m[10] = 10;
  m[11] = 11;
  m[12] = 12;
  m[13] = 13;
  m[14] = 14;
  m[15] = 15;
}


int main() {
  Matrix_t m, n, o;
  Vector_t v, r;
  struct timeb tstart;
  struct timeb tend;
  int i;
  unsigned short millis;
  time_t seconds;
  double d1, d2, d3;
  float f1, f2, f3;
  int count;

  while(1){
  printf("What number? ");
  scanf("%d", &count);

  if(count == 0)
    return;
  

  SetTest1(m);
  SetTest2(n);

  ftime(&tstart);
  for(i=0;i<count;i++){
    MatrixMultiplyOld(m,n,o);
    MatrixMultiplyOld(n,o,m);
  }
  
  ftime(&tend);
  seconds = tend.time - tstart.time;
  if(tend.millitm >= tstart.millitm){
    millis = tend.millitm - tstart.millitm;
  }
  else {
    millis = tend.millitm - tstart.millitm + 1000;
    seconds--;
  }
  // printf("C time = %ld sec and %d ms\n",seconds, millis);
  printf("C time = %ld ms\n", 1000*seconds + (long) millis);
  MatrixPrintD(m, stdout);
 
  SetTest1(m);
  SetTest2(n);

  ftime(&tstart);
  for(i=0;i<count;i++){
    MatrixMultiply(m,n,o);
    MatrixMultiply(n,o,m);
  }
    
  ftime(&tend);
  seconds = tend.time - tstart.time;
  if(tend.millitm >= tstart.millitm){
    millis = tend.millitm - tstart.millitm;
  }
  else {
    millis = tend.millitm - tstart.millitm + 1000;
    seconds--;
  }
  //printf("SSE time = %ld sec and %d ms\n",seconds, millis);
  printf("SSE time = %ld ms\n", 1000*seconds + (long) millis);
  MatrixPrintD(m, stdout);
  }

  /*
  d1 = 1.0;
  d2 = 1.00001;
  d3 = 1.00001;
  ftime(&tstart);
  for(i=0;i<500000;i++){
    d1 = d2*d3;
    d3 = d1*d2;
  }
    
  ftime(&tend);
  seconds = tend.time - tstart.time;
  if(tend.millitm >= tstart.millitm){
    millis = tend.millitm - tstart.millitm;
  }
  else {
    millis = tend.millitm - tstart.millitm + 1000;
    seconds--;
  }
  printf("Double time = %ld.%d\n",seconds, millis);
  printf("%f\n", d3);

  f1 = 1.0;
  f2 = 1.00001;
  f3 = 1.00001;
  ftime(&tstart);
  for(i=0;i<500000;i++){
    f1 = f2*f3;
    f3 = f1*f2;
  }
    
  ftime(&tend);
  seconds = tend.time - tstart.time;
  if(tend.millitm >= tstart.millitm){
    millis = tend.millitm - tstart.millitm;
  }
  else {
    millis = tend.millitm - tstart.millitm + 1000;
    seconds--;
  }
  printf("Float time = %ld.%d\n",seconds, millis);
  printf("%f\n", f3);
  */

  return 0;
}

