#define MEM0_ADDR  (0x07000000)
#define BMP0_ADDR  (0x08000000)
#define DMA0_ADDR  (0x09000000)

#define R_WIDTH 100   //
#define R_HEIGHT 50  //ι⤵
#define I_WIDTH 128   //ϲ
#define I_HEIGHT 128  //ϲι⤵

#define LOOPFLAG_ADDR (0x0700c500)
#define CORRELATIONS_ADDR (0x0700c502)
#define NUMBER_OF_PROCESSORS 5

#define WSEIDO 50
#define HSEIDO 20

struct DMAControlRegister{
  unsigned int source;
  unsigned int destination;
  unsigned int length;
  unsigned int command;
};


struct bitMapHeader{
  unsigned short bfType __attribute__((packed));
  unsigned int bfSize __attribute__((packed));
  unsigned short bfReserved1 __attribute__((packed));
  unsigned short bfReserved2 __attribute__((packed));
  unsigned int offBits __attribute__((packed));
  unsigned int biSize __attribute__((packed));
  unsigned int biWidth __attribute__((packed));
  unsigned int biHeight __attribute__((packed));
  unsigned short biPlanes __attribute__((packed));
  unsigned short biBitCount __attribute__((packed));
  unsigned int biCompression __attribute__((packed));
  unsigned int biSizeImage __attribute__((packed));
  unsigned int biXPelsPerMeter __attribute__((packed));
  unsigned int biYPelsPerMeter __attribute__((packed));
  unsigned int biClrUsed __attribute__((packed));
  unsigned int biClrImportant __attribute__((packed));
};


struct color{
  unsigned char B;
  unsigned char G;
  unsigned char R;
};


float sqrtf_mics(float x){
  float s,last;

  if(x <= 0.0)
    return 0.0;

  if(x > 1)
    s = x;
  else
    s = 1;

  do{
    last = s;
    s = (x / s + s) * 0.5;
  } while(s < last);

  return last;
}


float fabs_mics(float x){
  return (x > 0) ? x : -x;
}

int getKeta(float v){
  if(v >= 9)
    return 9;
  else if (v >= 8)
    return 8;
  else if (v >= 7)
    return 7;
  else if (v >= 6)
    return 6;
  else if (v >= 5)
    return 5;
  else if (v >= 4)
    return 4;
  else if (v >= 3)
    return 3;
  else if (v >= 2)
    return 2;
  else if (v >= 1)
    return 1;
  else
    return 0;
}


int main(){
  int i,j,k,point = -1,rect[4],testcounter,testheight,testwidth,testindex,tempIndex;
  float a,b,c,tmp,totalR,totalI,test1,correlation = 0, tempc;
  int *correlations, correlationInt;
  unsigned char *input,*reference,*target[R_HEIGHT],*c1;
  int man, sen, hyaku, juu, iti, waitingProcessors = 0;
  struct color line = {0, 0, 255},pic;
  struct DMAControlRegister *controlRegister0 = (struct DMAControlRegister*)DMA0_ADDR;


  //loopflag = false
  *((unsigned char*)LOOPFLAG_ADDR) = (unsigned char)waitingProcessors;
  correlations = (int *)CORRELATIONS_ADDR;

  controlRegister0->source = (unsigned int)54;
  controlRegister0->destination = (unsigned int)0;
  controlRegister0->length = (unsigned int)(I_WIDTH * I_HEIGHT * 3);
  controlRegister0->command = (unsigned int)1;


  input = (unsigned char*)(MEM0_ADDR + 54);
  reference = (unsigned char*)(MEM0_ADDR + 200000 + 54);


  c = totalR = 0;
  for(i = 0; i < R_HEIGHT * R_WIDTH * 3; i += R_WIDTH * 3 * HSEIDO)
    {
      for(j = 0; j < R_WIDTH * 3; j += (3 * WSEIDO))
	{
	  totalR += reference[i + j];
	  c += reference[i + j] * reference[i + j];
	}
    }
  c = c * R_WIDTH * R_HEIGHT - totalR * totalR;

  c1 = (unsigned char*)(BMP0_ADDR);
  for(k = 0; k < 458; ++k)
    {
      a = b = 0;
      totalI = 0;

      for(i = 0; i < 7; ++i)
	{
	  *c1++ = 255;
	  c1 += 2;
	}

      testheight = k / (I_WIDTH - R_WIDTH + 1);
      testwidth = k % (I_WIDTH - R_WIDTH + 1);

      for(i = 0; i < R_HEIGHT; ++i)
	{
	  target[i] = &input[(testheight + i) * I_WIDTH * 3 + testwidth * 3];
	}


      for(i = 0; i < R_HEIGHT; i += (1 * HSEIDO))
	{
	  for(j = 0; j < R_WIDTH * 3; j += 3 * WSEIDO)
	    {
	      test1 = target[i][j];
	      totalI += test1;
	      a += test1 * reference[i * R_WIDTH * 3 + j];
	      b += test1 * test1;
	    }
	}

      a = a * R_WIDTH * R_HEIGHT - totalI * totalR;
      b = b * R_WIDTH * R_HEIGHT - totalI * totalI;

      tempc = fabs_mics(a / sqrtf_mics((float)(b * c)) * 10000);

      man = getKeta(tempc / 10000);
      sen = getKeta((tempc - man * 10000) / 1000);
      hyaku = getKeta((tempc - man * 10000 - sen * 1000) / 100);
      juu = getKeta((tempc - man * 10000 - sen * 1000 - hyaku * 100) / 10);
      iti = getKeta((tempc - man * 10000 - sen * 1000 - hyaku * 100 - juu * 10));
      
      correlations[k] = man * 10000 + sen * 1000 + hyaku * 100 + juu * 10 + iti;
  }

  //Ʊ˥줿Ф
  waitingProcessors = *((unsigned char*)LOOPFLAG_ADDR);
  *((unsigned char*)LOOPFLAG_ADDR) = ((unsigned char)waitingProcessors + 1);

  while( *((unsigned char*)LOOPFLAG_ADDR) != NUMBER_OF_PROCESSORS ){
  }

  waitingProcessors = 0;
  *((unsigned char*)LOOPFLAG_ADDR) = (unsigned char)waitingProcessors;

  correlationInt = 0;
  for(i = 0; i < 2291; ++i){
        if(correlationInt < correlations[i]){
          correlationInt = correlations[i];
	  point = i;
	}
  }


  rect[0] = (point / (I_WIDTH - R_WIDTH + 1)) * I_WIDTH + (point % (I_WIDTH - R_WIDTH + 1));
  rect[1] = rect[0] + I_WIDTH * R_HEIGHT + R_WIDTH;
  rect[2] = rect[0] + I_WIDTH * R_HEIGHT;
  rect[3] = rect[0] + R_WIDTH;


  c1 = input;
  for(i = 0; i < 3276; ++i)
    {
      if(((rect[0] <= i && i <= rect[1]) && ((i % I_WIDTH == rect[0] % I_WIDTH) || (i % I_WIDTH == rect[1] % I_WIDTH))) || (rect[0] <= i && i <= rect[3]) || (rect[2] <= i && i <= rect[1]))
	{
	  *c1++ = 0;
	  *c1++ = 0;
	  *c1++ = 255;
	}
      else
	c1 += 3;
    }

  //Ʊ˥줿Ф
  waitingProcessors = *((unsigned char*)LOOPFLAG_ADDR);
  *((unsigned char*)LOOPFLAG_ADDR) = ((unsigned char)waitingProcessors + 1);

  while( *((unsigned char*)LOOPFLAG_ADDR) != NUMBER_OF_PROCESSORS ){
  }

  controlRegister0->source = (unsigned int)54;
  controlRegister0->destination = (unsigned int)0;
  controlRegister0->length = (unsigned int)(I_WIDTH * I_HEIGHT * 3);
  controlRegister0->command = (unsigned int)1;

  return 0;
}
