// --*- C++ -*------x---------------------------------------------------------
// $Id: clustervectors.cc,v 1.2 2007/08/14 18:26:44 bindewae Exp $
//
// Program:         - 
//
// Author:          Eckart Bindewald
//
// Project name:    -
//
// Date:            $Date: 2007/08/14 18:26:44 $
//
// Description:     
// 
// -----------------x-------------------x-------------------x-----------------

#include <iostream>
#include <fstream>
#include <string>
#include <Vec.h>
#include <vector>
#include <debug.h>
#include <GetArg.h>
#include <FileName.h>
#include <vectornumerics.h>
#include <clusterAlgorithms.h>

void
helpOutput(ostream& os)
{
  os << "usage: clustervectors (-i inputfile|--matrix matrixfile) [--if 0|1]" << endl;
  
}

/** output of command line parameter with which the program was called. */
void
parameterOutput(ostream& os, int argc, char** argv)
{
  for (int i = 0; i < argc; i++)
    {
      os << argv[i] << " ";
    }
  os << endl;
}

/* untested routine. Use readPlainMatrix instead
Vec<Vec<double> > 
readMatrix(istream& is) {
  Vec<Vec<double> > matrix;
  while (is) {
    string line = getLine(is);
    vector<string> words = getTokens(line, " ");
    if (words.size() > 0) {
      Vec<double> v;
      v.clear();
      for (int i = 0; i < words.size(); ++i) {
	v.push_back(stod(words[i]));
      }
      matrix.push_back(v);
    }
  }
  return matrix;
}
*/

/** reads matrix in format that is also used by program "R": moved to vectornumerics.h */
/*
Vec<Vec<double> >
readPlainMatrix(istream& is)
{
  Vec<Vec<double > > result;
  while (is) {
    string line = getLine(is);
    Vec<string> words = getTokens(line);
    if (words.size() > 0) {
      Vec<double> row(words.size());
      for (unsigned int i = 0; i < words.size(); ++i) {
	row[i] = stod(words[i]);
	if (!isDefined(row[i])){
	  row[i] = 0.0;
	}
      }
      result.push_back(row);
    }
  }
  return result;
}
*/

/** reads symmetric matrix without diagonals. */
Vec<Vec<double> >
readDistanceMatrix(istream& is)
{
  vector<string> lines = getLines(is);
  if (lines.size() == 0) {
    return Vec<Vec<double> >(1, Vec<double>(1, 0.0)); // only one element in matrix
  }
  Vec<Vec<double> >::size_type dim = getTokens(lines[lines.size()-1]).size() + 1;
  Vec<Vec<double > > result(dim, Vec<double>(dim, 0.0));

  for (vector<string>::size_type i = 0; i < lines.size(); ++i) {
    string line = lines[i];
    Vec<string> words = getTokens(line);
    if (words.size() > 0) {
      Vec<double> row(words.size());
      for (unsigned int j = 0; j < words.size(); ++j) {
	result[i+1][j] = stod(words[i]);
	if (!isDefined(result[i+1][j])){
	  result[i+1][j] = 0.0;
	}
	result[j][i+1] = result[i+1][j]; // symmetry
      }
    }
  }
  return result;
}


int
main(int argc, char ** argv)
{

  bool helpMode;
  int argcFile = 0;
  char ** argvFile = 0;
  unsigned int verboseLevel = 1;
  int inputFormat = 0;
  double cutoff = 5.0;
  string commandFileName;
  string inputFileName;
  string logFileName; //  = "mainprogramtemplate.log";
  string rootDir = ".";
  string matrixFileName;

  getArg("-help", helpMode, argc, argv);

  if ((argc < 2) || helpMode)  {
    helpOutput(cout);
    exit(0);
  }

  getArg("-root", rootDir, argc, argv, rootDir);
  addSlash(rootDir);

  getArg("-commands", commandFileName, argc, argv, commandFileName);
  addPathIfRelative(commandFileName, rootDir);

  if (commandFileName.size() > 0) {
    ifstream commandFile(commandFileName.c_str());
    if (!commandFile) {
      if (isPresent("-commands", argc, argv)) {
	ERROR_IF(!commandFile, "Error opening command file.");
      }
      else {
	cerr << "Warning: Could not find command file: " + commandFileName 
	     << endl;
      }
    }
    else {
      argvFile = streamToCommands(commandFile, argcFile, 
				  string("mainprogramtemplate"));
    }
    commandFile.close();
  }

  getArg("d", cutoff, argcFile, argvFile, cutoff);
  getArg("d", cutoff, argc, argv, cutoff);
  getArg("i", inputFileName, argc, argv, inputFileName);
  getArg("-if", inputFormat, argcFile, argvFile, inputFormat);
  getArg("-if", inputFormat, argc, argv, inputFormat);
  getArg("-log", logFileName, argc, argv, logFileName);
  getArg("-log", logFileName, argcFile, argvFile, logFileName);
  getArg("-matrix", matrixFileName, argcFile, argvFile, matrixFileName);
  getArg("-matrix", matrixFileName, argc, argv, matrixFileName);
  addPathIfRelative(logFileName, rootDir);
  getArg("-verbose", verboseLevel, argcFile, argvFile, verboseLevel);
  getArg("-verbose", verboseLevel, argc, argv, verboseLevel);


  if (logFileName.size() > 0) {
    ofstream logFile(logFileName.c_str(), ios::app);
    parameterOutput(logFile, argc, argv);
    if (argcFile > 1) {
      logFile << "Parameters from command file: ";
      parameterOutput(logFile, argcFile, argvFile);
    }
    logFile.close();
  }


  /***************** MAIN PROGRAM *****************************/
  
  Vec<Vec<double> > dataVecs;
  Vec<Vec<double> > distances;
  if (inputFileName.size() > 0) {
    ifstream inputFile(inputFileName.c_str());
    ERROR_IF(!inputFile, "Error opening input file!");
    inputFile >> dataVecs;
    inputFile.close();
    if (verboseLevel > 0) {
      cout << "The input data is: " << dataVecs << endl;
    }
    Vec<double> distanceRow(dataVecs.size(), 0.0);
    distances = Vec<Vec<double> >(dataVecs.size(), distanceRow);
    for (unsigned int i = 1; i < dataVecs.size(); ++i) {
      for (unsigned int j = 0; j < i; ++j) {
	distances[i][j] = euclidianDistance(dataVecs[i], dataVecs[j]);
	distances[j][i] = distances[i][j];
      }
    }
  }
  else if (matrixFileName.size() > 0) {
    ifstream matrixFile(matrixFileName.c_str());
    ERROR_IF(!matrixFile, "Error opening matrix file!");
    switch (inputFormat) {
      case 0: distances = readPlainMatrix(matrixFile);
	break;
      case 1: distances = readDistanceMatrix(matrixFile);
	break;
    default:
      ERROR("Unknown matrix input format!");
    }

    if (verboseLevel > 1) {
      cout << "The input data matrix is: " << distances << endl;
    }    
  }

  if (verboseLevel > 1) {
    cout << "The distance matrix is: " << distances << endl;
  }

  Vec<Vec<unsigned int> > clusters = singleLinkage(distances, cutoff);
  
  // sort individual clusters:
//   for (Vec<Vec<unsigned int> >::size_type i = 0; i < clusters.size(); ++i) {
//     sort(clusters[i].begin(), clusters[i].end());
//   }

  // find largest cluster
  unsigned int bestIds = 0;
  unsigned  int bestSize = clusters[0].size();
  for (unsigned int i = 1; i < clusters.size(); ++i) {
    if (clusters[i].size() > bestSize) {
      bestSize = clusters[i].size();
      bestIds = i;
    }
  }

  // find lowest line index in best cluster:
  // find largest cluster
  unsigned int bestIds2 = 0;
  unsigned  int lowest = clusters[bestIds][0];
  for (unsigned int i = 1; i < clusters[bestIds].size(); ++i) {
    if (clusters[bestIds][i] < lowest) {
      lowest = clusters[bestIds][i];
      bestIds2 = i;
    }
  }

  cout << "Found clusters: " << clusters.size() << endl;
  for (Vec<Vec<unsigned int> >::size_type i = 0; i < clusters.size(); ++i) {
    cout << clusters[i];
  }

  if (lowest < dataVecs.size()) {
    cout << "Representative of largest cluster: " << lowest << " " 
	 << dataVecs[lowest] << endl;
  }
  return 0;
}
