// --*- C++ -*------x---------------------------------------------------------
// $Id: alignedit2.cc,v 1.47 2011/10/27 21:33:28 bindewae Exp $
//
// Program:         - 
//
// Author:          Eckart Bindewald
//
// Project name:    -
//
// Date:            $Date: 2011/10/27 21:33:28 $
//
// Description:     
// 
// -----------------x-------------------x-------------------x-----------------

#include <iostream>
#include <fstream>
#include <string>
#include <Vec.h>
#include <debug.h>
#include <GetArg.h>
#include <FileName.h>
#include <SequenceAlignment.h>
#include <SequenceAlignmentTools.h>
#include <SimpleSequenceAlignment.h>
#include <sequencestat.h>
#include <vectornumerics.h>
#include <CompensationScorer.h>
#include <SequenceMutator.h>
#include <Random.h>
#include <Stem.h>
#include <stemhelp.h>
#include <SequenceAlignmentIOTools.h>
#include <NucleotideTools.h>

#define ALIGNEDIT_VERSION "1.1.0"

/** version history
 * 1.0.1 added output format 36: start positions; imagining that all sequences are concatenated.
 * 1.1.0 added output format 37: fraction-complementary matrix; improved help output
 */

double const DNA_MATCH_SCORE = 2.0; // 2 bit per matched position
unsigned int const FASTA_BLOCK_LENGTH = 80;

void
helpOutput(ostream& os)
{
  os << "alignedit2 version " << ALIGNEDIT_VERSION << endl;
  os << "usage: alignedit2 -i filename" << endl
     << "--absolute indices" << endl
     << "--add-polya length  : add n A characters at end of sequence" << endl
     << "--bootstrap 0|1  : bootstrap resampling" << endl
     << "--clip-after n   : remove all but first n sequences" << endl
     << "--clip-before n  : remove first n-1 sequences" << endl
     << "--collapse id    : collapse alignment with respect to sequence n" << endl
     << "--collapse-name name    : collapse alignment with respect to sequence with this name" << endl
     << "--columns subset : indices of alignment columns to keep. Example: --columns 3-10,20-23,30-" << endl
     << "--conform number : short sequence names to this many characters"
     << endl
     << "--cons-weight weight" << endl
     << "--del-gaps 0|1|2   : delete all gap charactors or all gap columns" << endl
     << "--fill 0|1    : if set to one, fill all sequences with gaps to match longest sequence of alignment." << endl
     << "--gaps n : maximum number of gaps. Use with option --remove-emptpy 2" << endl
     << "--i2 filename : input of second alignment" << endl
     << "-if formatid  : file format of output file 1: fasta " << endl
     << "--improve-accept 0|1" << endl
     << "--improve mode : 0: no action 1: find improvable columns 3: improve regions read from region file (--optimize-region-file)" << endl
     << "--improve-inflimit value" << endl
     << "--improve-limit1 value" << endl
     << "--improve-limit2 value" << endl
     << "--improve-glimit value" << endl
     << "--iter n  : number of iterations in optimization" << endl
     << "-l value  : lower bound for sequence similarity" << endl
     << "--lengths intervall : filter such that all sequence have length specified in intervall. Example: --length 45-76 or --length 10-20,40-55" << endl
     << "--make-first name : sequence with this name will be made first sequence." << endl
     << "--match-weight value" << endl
     << "--motif string : define required sequence motif" << endl
     << "--motif-list filename : file name with list of sequence motifs. Use with option --of 23" << endl
     << "--motif-pos position : start position of required sequence motif"
     << endl
     << "--names-edit 0|1|2|3 : 0:nothing, 1:numbers, 2:replace space with underscore, 3: copy names from alignment 2 (--i2 filename) to main alignment" << endl
     << "--names-keep filename" << endl
     << "-o outputfile : output filename" << endl
     << "--of formatid  : file format of output file 1: FASTA; 3: pairs; 4: ALN; 9: Cove SLX; 10: Stockholm; 12: sequence lines; 13: Zuker; 14: size; 15: Phylip; 16: names; 17: first words; 18: conservation matrix; 19: MSF; 20: Clustal; 21: GC content; 22: lengths; 23: motif scores; 24: purine content; 25: FASTA; 26: dinucleotide content; 27: dinucleotide frequencies; 28: average similarities; 29: window GC content; 30: Zuker, 32: consensus; 33: Tabular; 34: MPGAFold; 35: Zuker(3); 36: Starts; 37: fraction-complementary matrix" << endl
    // << "--optimize mode : algorithm id for optimzation " << endl
     << "--op command  : operation between two alignments. Implemented: overwrite, paste , fuse, complement. Use with option --i2 filename" << endl
     << "--op2 parameter  : parameter for operation between two alignments. Implemented: paste . Use with option --i2 filename" << endl
     << "--optimize-region min1 max1 min2 max2 : region to be optimized" << endl
     << "--optimize-region-file filename : stems define regions to be optimized" << endl
     << "--pick subsetstring   : define which sequences to keep, for example --pick 3-5,14,23" << endl
     << "--prune-pos n : keep region from here (use in combination with --prune-len)" << endl
     << "--prune-wings n : remove trailing gaps with respect to this sequence" << endl
     << "--pairs positions  : pairs of columns to be printed" << endl
     << "--pairs-match-limit frac : remove all sequences, that have not a least this fraction of position pairs a Watson-Crick pair" << endl
     << "--prune-len n : keep region from here (use in combination with --prune-pos)" << endl
     << "--prune-polya 0|1  : if set to one, remove poly-A signal of first seuqence" << endl
     << "--relative indices" << endl
     << "--remove-empty 0|1|2" << endl
     << "--remove-pick subsetstring   : define which sequences to remove, for example --pick 3-5,14,23" << endl
     << "--rename filename : read list of new sequences names, renames current alignment with that list" << endl
     << "--replace alphabet" << endl
     << "--sample n    : size of random sample of sequences." << endl
     << "--scramble 0|1|2|3|4|5 : randomize alignment: 0: not, 1: shuffle; 2: di-nucleotide preserving shuffle; 3: shuffle, keeping dinucleotide composition the same, do not shuffle columns; 4: scramble individual sequences even if difference lengths 5: completely new nucleotides according to alphabet (equal probabilities)" << endl
     << "--seed number  : random number seed. 0: no seed" << endl
     << "--seq-min number  : mininum number of sequences for thinning" << endl
     << "--structure-matrix filename : matrix of secondary structure probabilities. Residues above --structure-threshold will be changed to X (they are not available for readout" << endl
     << "--structure-thresh threshold : threshold for matrix of secondary structure probabilities. Residues above --structure-threshold will be changed to X (they are not available for readout" << endl
     << "--subset string  : define subset like for example -2,3-5,7-9,14"
     << endl
     << "--subset-file filename  : filename with sequence subset definition"
     << endl
     << "--thin 0|1" << endl
     << "--tu 0|1 : if set to one, convert T to U" << endl
     << "-u value  : upper bound for sequence similarity" << endl
     << "--ut 0|1 : if set to one, convert U to T" << endl;
}

/** output of command line parameter with which the program was called. */
void
parameterOutput(ostream& os, int argc, char** argv)
{
  for (int i = 0; i < argc; i++)
    {
      os << argv[i] << " ";
    }
  os << endl;
}

double
fractionMatching(const SequenceAlignment& ali, const CompensationScorer& scorer,
		 unsigned int pos1, unsigned int pos2)
{
  return scorer.countAllowedPairs(ali.getColumn(pos1), ali.getColumn(pos2))/static_cast<double>(ali.size());
}

double
fractionMatching(const string& s1, const string& s2, 
		 const CompensationScorer& scorer)
{
  PRECOND(s1.size() == s2.size());
  return scorer.countAllowedPairs(s1, s2)/static_cast<double>(s1.size());
}

double
fractionMatchingNoGaps(const string& s1, const string& s2, 
		       const CompensationScorer& scorer)
{
  PRECOND(s1.size() == s2.size());
  unsigned int numMatch = scorer.countAllowedPairs(s1, s2);
  unsigned int numGaps = scorer.countGaps(s1, s2, GAP_CHAR);
  if (numGaps >= s1.size()) {
    return 0.0;
  }
  return static_cast<double>(numMatch) / (s1.size()-numGaps);
}

/** Computes a matrix that contains the number of Watson-Crick complementary alignment column pairs */
Vec<Vec<double> > 
computeFractionMatchingMatrix(const SequenceAlignment& ali,
			      const CompensationScorer& scorer) {
  Vec<Vec<double> > result(ali.getLength(), Vec<double>(ali.getLength(), 0.0));
  for (Vec<Vec<double> >::size_type i = 0; i < ali.getLength(); ++i) {
    for (Vec<Vec<double> >::size_type j = 0; j < i; ++j) {
      result[i][j] = fractionMatching(ali.getColumn(i), ali.getColumn(j), scorer);
      result[j][i] = result[i][j];
    }
  }
  return result;
}

double
fractionMatchingNoGaps(const SequenceAlignment& ali, const CompensationScorer& scorer,
		       unsigned int pos1, unsigned int pos2)
{
  string s1 = ali.getColumn(pos1);
  string s2 = ali.getColumn(pos2);
  return fractionMatchingNoGaps(s1, s2, scorer);
}

double
computeFractionMatching(const string& s, 
			const Vec<unsigned int>& pairs,
			const CompensationScorer& scorer)
{
  unsigned int numPairs = pairs.size()/2;
  if (numPairs == 0) {
    return 0.0;
  }
  string s1(numPairs, 'X');
  string s2(numPairs, 'X');
  cout << s1 << " " << pairs << endl;
  for (unsigned int i = 0; i < s1.size(); ++i) {
    ERROR_IF((i*2+1) >= pairs.size(),
	     "Number of pair position indices must be even!");
    s1[i] = s[pairs[i*2]];
    s2[i] = s[pairs[i*2+1]];
  }
  cout << "# testing " << s1 << " " << s2 << endl;
  return fractionMatching(s1, s2, scorer);
}

void
removeNonMatching(SequenceAlignment& ali,
		  const Vec<unsigned int>& pairs,
		  const CompensationScorer& scorer,
		  double matchMin) {
  if ((pairs.size() == 0) || (matchMin <= 0.0)) {
    return;
  }
  for (int i = static_cast<int>(ali.size()) - 1; i >= 0; --i) {
    double frac = computeFractionMatching(ali.getSequence(i), pairs,
					  scorer);
    cout << "# Fraction matching of sequence : " << i + 1 << " " << frac
	 << endl;
    if (frac < matchMin) {
      ali.removeSequence(i);
    }
  }
}

void
printAlignmentPairs(ostream& os, 
		    const SequenceAlignment& ali, 
		    const Vec<unsigned int>& pairs,
		    const CompensationScorer& scorer)
{
  string s;
  unsigned int npairs = pairs.size()/2; // number of pairs
  if ((ali.size() == 0) || (npairs == 0)) {
    return;
  }
  string templateString(ali.size(), 'X');
  Vec<string> col1(npairs, templateString);
  Vec<string> col2(npairs, templateString);
  for (unsigned int i = 0; i < ali.size(); ++i) {
    os << ali.getName(i) << "\t";
    s = ali.getSequence(i);
    for (unsigned int j = 0; j + 1 < pairs.size(); j +=2) {
      unsigned int p1 = pairs[j];
      unsigned int p2 = pairs[j+1];
      unsigned pairCount = j/2;
      ERROR_IF(p1 >= s.size(),
	       "Too large sequence position in printAlignmentPairs!");
      ERROR_IF(p2 >= s.size(),
	       "Too large sequence position in printAlignmentPairs!");
      os << s[p1] << " ";
      col1[pairCount][i] = s[p1];
      s = ali.getSequence(i);
      col2[pairCount][i] = s[p2];
      os << s[p2] << "  ";
    }
    os << endl;
  }
  Vec<double> avgResults(4, 0.0);
  for (unsigned int j = 0; j < npairs; ++j) {
    cout << "# comparing  :" << pairs[j*2] + 1 << " " << pairs[j*2+1] + 1 << endl
	 << col1[j] << endl << col2[j] << endl;
    double mutInf = scorer.compensationScore(col1[j], col2[j], Vec<double>(ali.size(), 1.0));
    double fracMatch = fractionMatching(col1[j], col2[j], scorer);
    double fracMatchNoGaps = fractionMatchingNoGaps(col1[j], col2[j], scorer);
    double shuffleScore = scorer.averageRandomRenamedAllowedPairs(col1[j], col2[j]);
    os << mutInf << " " << fracMatch << " "
       <<  fracMatchNoGaps << " " <<  shuffleScore<< endl;
    avgResults[0] += mutInf;
    avgResults[1] += fracMatch;
    avgResults[2] += fracMatchNoGaps;
    avgResults[3] += shuffleScore;
  }
  for (unsigned int i = 0; i < avgResults.size(); ++i) {
    avgResults[i] /= npairs;
  }
  os << "Average values: " << avgResults;
}

/*
void
printAlignmentPairs(ostream& os, 
		    const Alignment& ali, 
		    const Vec<unsigned int>& pairs,
		    const CompensationScorer& scorer)
{
  for (unsigned int i = 0; i + 1< pairs.size(); i +=2) {
    os << pairs[i] << " " << pairs[i+1] << endl;
    printAlignmentPair(os, ali, pairs[i], pairs[i+1],
		       scorer);
    os << endl;
  }
}

*/

/** writes alignment in ALN format */
void
writeALN(ostream& os, const SequenceAlignment& ali) {
  int frontCount = 16; // number of characters of header
  os << "CLUSTAL" << endl;
  os << endl;
  int colCount = 0;
  int length = ali.getLength();
  while (colCount < length) {
    int diff = length - colCount;
    int sLen = diff;
    if (sLen > 60) {
      sLen = 60;
    }
    for (unsigned int i = 0; i < ali.size(); ++i) {
      string s = ali.getSequence(i);
      s = s.substr(colCount, sLen); 
      string name = ali.getName(i);
      if (static_cast<int>(name.size()) > frontCount) {
	name = name.substr(0, frontCount);
      }
      os << name;
      int dChar = frontCount - static_cast<int>(name.size());
      for (int j = 0; j < dChar; ++j) {
	os << " ";
      }
      os << s << " " << colCount + sLen << endl;
    }
    for (int j = 0; j < frontCount; ++j) {
      os << " ";
    }
    for (int i = 0; i < sLen; ++i) {
      os << ".";
    }
    os << endl << endl;
    colCount += 60;
  }

}


/** writes alignment in ALN format - rewrite */
void
writeALN2(ostream& os, const SequenceAlignment& ali) {
  int frontCount = 20; // number of characters of header
  os << "CLUSTAL" << endl;
  os << endl;
  int colCount = 0;
  int length = ali.getLength();
  while (colCount < length) {
    int diff = length - colCount;
    int sLen = diff;
    if (sLen > 60) {
      sLen = 60;
    }
    for (unsigned int i = 0; i < ali.size(); ++i) {
      string s = ali.getSequence(i);
      s = s.substr(colCount, sLen); 
      string name = ali.getName(i);
      if (static_cast<int>(name.size()) > (frontCount-1)) {
	name = name.substr(0, (frontCount-1));
      }
      os << name;
      int dChar = frontCount - static_cast<int>(name.size());
      for (int j = 0; j < dChar; ++j) {
	os << " ";
      }
      os << s << " " << colCount + sLen << endl;
    }
    for (int j = 0; j < frontCount; ++j) {
      os << " ";
    }
    for (int i = 0; i < sLen; ++i) {
      os << ".";
    }
    os << endl << endl;
    colCount += 60;
  }

}

/** writes "slx" format as used by Sean Eddy's Cove program */
void
writeSlx(ostream& os, const SequenceAlignment& ali) 
{
  for (unsigned int i = 0; i < ali.size(); ++i) {
    string name = ali.getName(i);
    if (name.size() > 10)  {
      name = name.substr(0, 10);
    }
    else if (name.size() < 10) {
      unsigned int ds = 10 - name.size();
      for (unsigned int j = 0; j < ds; ++j) {
	name = name + " ";
      }
    }
    string seq = ali.getSequence(i);
    seq = translate(seq, '-', ' ');
    seq = translate(seq, '.', ' ');
    os << name << seq << endl;
  }
}


void
writeStockholm(ostream& os, const SequenceAlignment& ali)
{
  os << "STOCKHOLM 1.0" << endl;
  for (unsigned int i = 0; i < ali.size(); ++i) {
    os << ali.getName(i) << " " << ali.getSequence(i) << endl;
  }
  os << "//" << endl; // end of alignment
}

void
writeSequenceLines(ostream& os,
		   const SequenceAlignment& ali)
{
  for (unsigned int i = 0; i < ali.size(); ++i) {
    os << ali.getSequence(i) << endl;
  }
}

void
writeZuker(ostream& os,
	   const SequenceAlignment& ali)
{
  os << ";" << endl;
  os << ali.getName(0) << endl;
  os << ali.getSequence(0) << endl;
}

void
writeZuker2(ostream& os,
	    const SequenceAlignment& ali)
{
  // os << ";" << endl;
  os << ali.getName(0) << endl;
  os << ali.getName(0) << endl;
  os << ali.getSequence(0) << endl;
}

/** Zuker format with trailing 1 character */
void
writeZuker3(ostream& os,
	   const SequenceAlignment& ali)
{
  os << ";" << endl;
  os << ali.getName(0) << endl;
  os << ali.getSequence(0) << "1" << endl;
}

/** returns string with specified length, fill missing characters with space */
string
stringToLength(const string& s, unsigned int len)
{
  string result;
  if (s.size() >= len) {
    result = s.substr(0, len);
  }
  else {
    unsigned int diff = len - s.size();
    result = s;
    for (unsigned int i = 0; i < diff; ++i) {
      result = result + " ";
    }
  }
  POSTCOND(result.size() == len);
  return result;
}

/** writes Phylip format read by programs like dnaml etc */
void
writePhylip(ostream& os,
	    const SequenceAlignment& ali)
{
  os << ali.size() << "\t" << ali.getLength() << endl;
  for (unsigned int i = 0; i < ali.size(); ++i) {
    os << stringToLength(ali.getName(i), 10)
       << ali.getSequence(i) << endl;
  }
}

void
writeNames(ostream& os,
	   const SequenceAlignment& ali) {
  for (unsigned int i = 0; i < ali.size(); ++i) {
    os << ali.getName(i) << endl;
  }
}

/** writes lengths of individual sequences. Only makes sense for unaligned sequences. */
void
writeLengths(ostream& os,
	   const SequenceAlignment& ali) {
  for (unsigned int i = 0; i < ali.size(); ++i) {
    os << ali.getSequence(i).size() << endl;
  }
}

void
writeNamesFirstWord(ostream& os,
	   const SequenceAlignment& ali) {
  for (unsigned int i = 0; i < ali.size(); ++i) {
    os << getTokens(ali.getName(i))[0] << endl;
  }
}

/** generates mean sequence conservation (sum of single column entropies */
Vec<Vec<double> >
generateConservationMatrix(const SequenceAlignment& ali,
			   const CompensationScorer& scorer)
{
  Vec<double> weights(ali.size(), 1.0);
  Vec<double> scores(ali.getLength(), 0);
  Vec<Vec<double> > matrix(ali.getLength(), Vec<double>(ali.getLength(), 0.0));
  for (SequenceAlignment::sequence_size_type i = 0; i < ali.getLength(); ++i) {
    scores[i] = scorer.singleEntropy3(ali.getColumn(i), weights);
  }
  for (SequenceAlignment::sequence_size_type i = 0; i < ali.getLength(); ++i) {
    for (SequenceAlignment::sequence_size_type j = 0; j < ali.getLength(); ++j) {
      matrix[i][j] = 0.5 * (scores[i] + scores[j]);
    }
  }
  return matrix;
}

/** writes mean sequence conservation (sum of single column entropies */
void
writeConservationMatrix(ostream& os,
			   const SequenceAlignment& ali,
			   const CompensationScorer& scorer)
{
  Vec<Vec<double> > matrix = generateConservationMatrix(ali, scorer);
  writeMatrix(os, matrix);
}

double
computeMotifScore(const string& seq,
		  const string& motif,
		  double matchScore) {
  double sum = 0.0;
  
  for (Vec<string>::size_type i = 0; i < seq.size(); ++i) {
    if ((i + motif.size()) < seq.size()) {
      string s = seq.substr(i, motif.size()); // TODO : not very efficient, use find method instead
      if (s.compare(motif) == 0) {
	sum += motif.size() * matchScore;
      }
    }
    
  }
  return sum;
}

double
computeMotifScore(const string& seq,
		  const Vec<string>& motifs,
		  double matchScore) {
  double sum = 0.0;
  for (Vec<string>::size_type i = 0; i < motifs.size(); ++i) {
    sum += computeMotifScore(seq, motifs[i], matchScore);
  }
  return sum;
}


void
writeMotifScores(ostream& os,
		 const SequenceAlignment& ali,
		 const Vec<string>& motifs) {
  for (SequenceAlignment::size_type i = 0; i < ali.size(); ++i) {
    os << (i+1) << " " << computeMotifScore(ali.getSequence(i), motifs, DNA_MATCH_SCORE) << endl;
  }
}

void
writePurineContent(ostream& os, const string& s) {
  NucleotideTools::size_type numPurines = NucleotideTools::countPurines(s);
  NucleotideTools::size_type numPyrimidines = NucleotideTools::countPyrimidines(s);
  string::size_type n = s.size();
  NucleotideTools::size_type sumNuc = (numPurines + numPyrimidines);
  double purineFrac1 = 0;
  if (sumNuc > 0) {
    purineFrac1 = numPurines / static_cast<double>(sumNuc);
  }
  double purineFrac2 = numPurines / static_cast<double>(n);
  os << numPurines << " " << numPyrimidines << " " << sumNuc << " " << n << " " << purineFrac1 << " " << purineFrac2;
}

void
writePurineContent(ostream& os,
		   const SequenceAlignment& ali) {
  for (SequenceAlignment::size_type i = 0; i < ali.getLength(); ++i) {
    os << (i+1) << " ";
    writePurineContent(os, ali.getColumn(i));
    os << endl;
  }
}		   

void 
writeDinucleotideContent(ostream& os,
			 const SequenceAlignment& ali,
			 const string& alphabet) {
  SequenceAlignment::sequence_size_type numChars = 0;
  SequenceAlignment::sequence_size_type numGaps = 0;
  ali.countCharacters(numChars, numGaps);
  for (string::size_type i = 0; i < alphabet.size(); ++i) {
    char c1 = alphabet[i];
    SequenceAlignment::size_type n1 = ali.countCharacter(alphabet[i]);
    double p1 = static_cast<double>(n1 + 1) / (numChars + alphabet.size());
    os << "Single: " << c1 << " " << p1 << endl;
    for (string::size_type j = 0; j < alphabet.size(); ++j) {
      char c2 = alphabet[j];
      SequenceAlignment::size_type n = ali.countDiCharacter(alphabet[i], alphabet[j]);
      SequenceAlignment::size_type n2 = ali.countCharacter(alphabet[j]);
      double p = static_cast<double>(n + 1) / (numChars - 1.0 + (alphabet.size()*alphabet.size()));
      double p2 = static_cast<double>(n2 + 1) / (numChars + alphabet.size());
      double frac = p / (p1 * p2);
      os << "Di: " << c1 << c2 << " " << frac << " " << p << " " << p1 << " " << p2 << endl;
    }
  }
}

double
computePercentIdentical(const string& s1, const string& s2, char gapChar)
{
  PRECOND(s1.size() == s2.size());
  unsigned int n = s1.size();
  unsigned int counter = 0;
  unsigned int len = 0;
  for (unsigned int i = 0; i < n; ++i) {
    if ( (s1[i] == gapChar) && (s2[i] == gapChar) ) {
      continue;
    }
    ++len;
    if (toupper(s1[i]) == toupper(s2[i])) {
      ++counter;
    }
  }
  if (len == 0) {
    return 0.0;
  }
  return 100.0 * counter / static_cast<double>(len);
}

/** return average sequence similarity */
double
computeAverageSimilarity(const SequenceAlignment& ali, char gapChar)
{
  double sum = 0.0;
  unsigned int counter = 0;
  for (int i = ali.size() - 1; i >= 0; --i) {
    for (int j = i - 1; j >= 0; --j) {
      double sim = computePercentIdentical(ali.getSequence(i), ali.getSequence(j), gapChar);
      sum += sim;
      ++counter;
    }
  }
  return sum / counter;
}

void
writeMPGAFold(ostream& os,  const string& sequence, const string& name) {
  os << ";" << name << endl;
  os << " " << sequence.size() << "    .." << endl;
  for (string::size_type i = 0; i < sequence.size(); ++i) {
    os << sequence[i];
    if (((i+1) % 60) == 0) {
      os << endl;
    }
  }
  os << endl;
}

void
writeMPGAFold(ostream& os,  const SequenceAlignment& ali) { 
  for (SequenceAlignment::size_type i = 0; i < ali.size(); ++i) {
    writeMPGAFold(os, ali.getSequence(i), ali.getName(i));
  }
}

void
writeStarts(ostream& os, const SequenceAlignment& ali, string::size_type base) {
  PRECOND(base == 0 || base == 1); // one-based or zero-based indices
  string::size_type pos = 0;
  for (SequenceAlignment::size_type i = 0; i < ali.size(); ++i) {
    os << (pos + base);
    if ((i + 1) < ali.size()) {
      os << " ";
    }
    pos += ali.getSequence(i).size();
  }
  os << endl;
}

void
saveAlignment(ostream& os, const SequenceAlignment& ali,
	      const Vec<unsigned int>& pairs,
	      int format,
	      const CompensationScorer& scorer,
	      const string& alphabet,
	      const Vec<string>& motifs)
{
  switch (format) {
  case 1: // fasta format:
    ali.writeFasta(os);
    break;
  case 2:
    ERROR("Sorry, format \"printable\" not supported anymore.");
    // ali.savePrintable(os);
    break;
  case 3:
    ERROR_IF(pairs.size() == 0,
	     "No pairs defined!");
    printAlignmentPairs(os, ali, pairs, scorer);
    break;
  case 4: // Clustal ALN format:
    // writeALN2(os, ali);
    writeALN2(os, ali);
    break;
  case 9: // Cove slx format:
    writeSlx(os, ali);
    break;
  case 10: // Stckholm alignment format:
    writeStockholm(os, ali);
    break;
  case 12: // one sequence per line, ignore names
    writeSequenceLines(os, ali);
    break;
  case 13:
    writeZuker(os, ali);
    break;
  case 14: // write number of sequences and length:
    cout << ali.size() << " " << ali.getLength() << endl;
    break;
  case 15: // sequence format for phylip package
    writePhylip(os, ali);
    break;
  case 16:  // only write names
    writeNames(os, ali);
    break;
  case 17:  // only first word of names
    writeNamesFirstWord(os, ali);
    break;
  case 18:
    writeConservationMatrix(os, ali, scorer);
    break;
  case 19: // MSF formate
    SequenceAlignmentIOTools::writeMsf(os, ali, alphabet);
    break;
  case 20: // Clustal W format
    SequenceAlignmentIOTools::writeClustal(os, ali, alphabet);
    break;
  case 21:
    os << (100.0 * SequenceAlignmentTools::computeGCContent(ali)) << endl;
    break;
  case 22:  // only write lengths
    writeLengths(os, ali);
    break;
  case 23: // output of motif scores
    ERROR_IF(motifs.size() == 0,
	     "No motifs defined!");
    writeMotifScores(os, ali, motifs);
    break;
  case 24:
    writePurineContent(os, ali);
    break;
  case 25:
    SequenceAlignmentIOTools::writeFasta(os, ali, FASTA_BLOCK_LENGTH);
    break;
  case 26:
    writeDinucleotideContent(os, ali, alphabet);
    break;
  case 27:
    os << SequenceAlignmentTools::computeDinucleotideFrequencies(ali, alphabet) << endl; 
    break;
  case 28:
    os << computeAverageSimilarity(ali, GAP_CHAR) << endl;
    break;
  case 29: {
    SequenceAlignment::size_type winLen = 20;
    Vec<double> values = SequenceAlignmentTools::computeWindowGCContent(ali, 20);
    for (Vec<double>::size_type i = 0; i < values.size(); ++i) {
      os << (i+1) << " " << ((i * winLen) + 1) << " " << (100.0 * values[i]) << endl;
    }
  }
    break;
  case 30:
    writeZuker2(os, ali);
    break;

  case 31:
    ERROR("Unknown output file format!");
    // SequenceAlignmentIOTools.writeStanford(os, ali, alphabet);
    break;
  case 32:
    os << ">consensus" << endl;
    os << SequenceAlignmentTools::computeConsensusSequence(ali) << endl;
    break;
  case 33: {
    for (SequenceAlignment::size_type i = 0; i < ali.size(); ++i) {
      os << ali.getName(i) << "\t" << ali.getSequence(i) << endl;
    }
  }
    break;
  case 34:
    writeMPGAFold(os, ali);
    break;
  case 35:
    writeZuker3(os, ali);
    break;
  case 36:
    writeStarts(os, ali, 1);
    break;
  case 37:
    writeMatrix(os, computeFractionMatchingMatrix(ali, scorer));
    break;
  default:
    ERROR("Unknown output file format!");
  }
}



/** removes all sequences which have one other sequence with sequence similarity higher than similarityMax */
double
findMostSimilar(SequenceAlignment& ali, unsigned int& ibest, unsigned int& jbest, char gapChar)
{
  Vec<Vec<double> > simMatrix(ali.size(), Vec<double>(ali.size(), 0.0));
  double bestSimi = -1.0;
  for (int i = ali.size() - 1; i >= 0; --i) {
    for (int j = i - 1; j >= 0; --j) {
      double sim = computePercentIdentical(ali.getSequence(i), ali.getSequence(j), gapChar);
      if (sim > bestSimi) {
	bestSimi = sim;
	ibest = i;
	jbest = j;
      }
    }
  }
  return bestSimi;
}

/** removes all sequences which have one other sequence with sequence similarity higher than similarityMax */
double
findLeastSimilar(SequenceAlignment& ali, unsigned int& ibest, unsigned int& jbest, char gapChar)
{
  Vec<Vec<double> > simMatrix(ali.size(), Vec<double>(ali.size(), 0.0));
  double bestSimi = 999.0;
  for (int i = ali.size() - 1; i >= 0; --i) {
    for (int j = i - 1; j >= 0; --j) {
      double sim = computePercentIdentical(ali.getSequence(i), ali.getSequence(j), gapChar);
      if (sim < bestSimi) {
	bestSimi = sim;
	ibest = i;
	jbest = j;
      }
    }
  }
  return bestSimi;
}




/** removes all sequences which have one other sequence with sequence similarity higher than similarityMax */
void
removeTooSimilar(SequenceAlignment& ali, double similarityMax, int seqMinNum, char gapChar)
{
  Vec<Vec<double> > simMatrix(ali.size(), Vec<double>(ali.size(), 0.0));
  for (int i = ali.size() - 1; i >= 1; --i) {
    if (static_cast<int>(ali.size()) < seqMinNum) {
      return;
    }
    double sim = computePercentIdentical(ali.getSequence(i), ali.getSequence(0), gapChar);
    if (sim > similarityMax) {
      ali.removeSequence(i);
    }
  }
  for (int i = ali.size() - 1; i >= 1; --i) {
    for (int j = 0; j < i; ++j) {
      if (static_cast<int>(ali.size()) < seqMinNum) {
	return;
      }
      double sim = computePercentIdentical(ali.getSequence(i), 
					   ali.getSequence(j), gapChar);
      if (sim > similarityMax) {
	ali.removeSequence(i);
	break;
      }
    }
  }
}

/** removes all sequences which have not at least one other sequence with sequence similarity similarityMin */
void
removeTooFar(SequenceAlignment& ali, double similarityMin, char gapChar)
{
  Vec<Vec<double> > simMatrix(ali.size(), Vec<double>(ali.size(), 0.0));
  for (int i = ali.size() - 1; i >= 1; --i) {
    double sim = computePercentIdentical(ali.getSequence(i), ali.getSequence(0), gapChar);
    if (sim < similarityMin) {
      ali.removeSequence(i);
    }
  }
  bool found = false;
  for (int i = ali.size() - 1; i >= 1; --i) {
    for (int j = 0; j < i; ++j) {
      double sim = computePercentIdentical(ali.getSequence(i), 
					   ali.getSequence(j), gapChar);
      if (sim >= similarityMin) {
	found = true;
	break;
      }
    }
    if (!found) {
      ali.removeSequence(i);
    }
  }
}


/** removes all sequences which have one other sequence with sequence similarity higher than similarityMax */
void
thinSequences(SequenceAlignment& ali, int seqMinNum, char gapChar)
{
  Vec<RankedSolution5<unsigned int, unsigned int> > pairs((ali.size() * (ali.size()-1))/2);
  unsigned int pc = 0;
  for (unsigned int i = 0; i < ali.size(); ++i) {
    for (unsigned int j = i + 1; j < ali.size(); ++j) {
      double sim = computePercentIdentical(ali.getSequence(i), 
					   ali.getSequence(j), gapChar);
      pairs[pc++] = RankedSolution5<unsigned int, unsigned int>(sim, i, j);
    }
  }
  sort(pairs.begin(), pairs.end());
  reverse(pairs.begin(), pairs.end()); // highest similarity first
  Vec<unsigned int> toBeDeleted;
  pc = 0;
  while (((static_cast<int>(ali.size()) 
	   - static_cast<int>(toBeDeleted.size()))) > seqMinNum) {
    if (pc >= pairs.size()) {
      break;
    }
    unsigned int i = pairs[pc].second;
    unsigned int j = pairs[pc++].third;
    // check if already deleted
    if ((findFirstIndex(toBeDeleted, i) >= toBeDeleted.size())
	&& (findFirstIndex(toBeDeleted, j) >= toBeDeleted.size()) ) {
      toBeDeleted.push_back(j); // add highest sequence index
    }
  }
  sort(toBeDeleted.begin(), toBeDeleted.end());
  reverse(toBeDeleted.begin(), toBeDeleted.end()); // delete highest ids first
  for (unsigned int i = 0; i < toBeDeleted.size(); ++i) {
    ERROR_IF(toBeDeleted[i] >= ali.size(),
	     "Internal error in line 416!");
    ali.removeSequence(toBeDeleted[i]);
  }
  ERROR_IF(static_cast<int>(ali.size()) > seqMinNum, "Internal error in line 418!");
}


/** removes all sequences which have one other sequence with sequence similarity higher than similarityMax */
void
removeBadLength(SequenceAlignment& ali)
{
  unsigned int len = ali.getLength();
  Vec<Vec<double> > simMatrix(ali.size(), Vec<double>(ali.size(), 0.0));
  for (int i = ali.size() - 1; i >= 0; --i) {
    if (ali.getSequence(i).size() != len) {
      ali.removeSequence(i);
    }
  }
}

/** removes all sequences which have a length that is not found in allowedLengths vector */
void
removeBadLength(SequenceAlignment& ali, const Vec<string::size_type>& allowedLengths)
{
  // Vec<Vec<double> > simMatrix(ali.size(), Vec<double>(ali.size(), 0.0));
  for (int i = ali.size() - 1; i >= 0; --i) {
    if (findFirstIndex(allowedLengths, ali.getSequence(i).size()) >= allowedLengths.size()) { // length not found!
      ali.removeSequence(i);
    }
  }
}

/** shortens names of sequences fo conformeNameLength characters */
string
conformAlignmentName(const string& s, 
		     int conformNameLength, 
		     unsigned int counter)
{
  string result = s;
  // translate "/" into "_"
  result = translate(s, '/', '_');
  result = translate(result, '|', '_');
  if ((conformNameLength < 1) || (static_cast<int>(result.size()) < conformNameLength)) {
    return result;
  }
  // delete all points, spaces and underscores if necessary:
  result = removeFromString(result, '.');
  result = removeFromString(result, '_');
  result = removeFromString(result, ' ');
  if (static_cast<int>(result.size()) < conformNameLength) {
    return result;
  }
  string postfix = uitos(counter); // "~" + 
  ERROR_IF(static_cast<int>(postfix.size()) >= conformNameLength,
	   "Cannot normalize alignment name!");
  result = result.substr(0, conformNameLength - postfix.size()) + postfix;
  return result;
}

void
conformAlignmentNames(SequenceAlignment& ali,
		      int conformNameLength)
{
  if (conformNameLength < 1) {
    return;
  }
  unsigned int counter = 0;
  for (unsigned int i = 0; i < ali.size(); ++i) {
    string s = ali.getName(i);
    if (static_cast<int>(s.size()) > conformNameLength) {
      ++counter;
    }
    s = conformAlignmentName(s, conformNameLength, counter);
    ali.setSequence(ali.getSequence(i), s, i);
  }
}

/** removes all sequences that are not in alignment index subset */
void
removeNotSampleSequences(SequenceAlignment& ali, 
			 const Vec<unsigned int>& sampleOrig)
{
  Vec<unsigned int> sample = sampleOrig;
  sort(sample.begin(), sample.end());
  reverse(sample.begin(), sample.end()); // highest indices last
  for (int i = ali.size()-1; i >= 0; --i) {
    if (findFirstIndex(sample, static_cast<unsigned int>(i)) >= sample.size()) {
      ali.removeSequence(i);
    }
  }
}

/** removes all sequences that are in alignment index subset */
void
removeSampleSequences(SequenceAlignment& ali, 
		      const Vec<unsigned int>& sampleOrig)
{
  Vec<unsigned int> sample = sampleOrig;
  sort(sample.begin(), sample.end());
  reverse(sample.begin(), sample.end()); // highest indices first
  for (unsigned int i = 0; i < sample.size(); ++i) {
    ali.removeSequence(sample[i]);
  }
}

/** removes gap at position col of sequence with number collapseId 
    in alignment,
    adjusts alignment, probability matrix and reference stems. */
void
collapseAlignment(SequenceAlignment& ali, 
		  unsigned int collapseId, 
		  unsigned int col)
{
  if ((collapseId >= ali.size()) || (col >= ali.getLength())
      || (ali.getLength() < 2)) {
    ERROR("Internal error in line 2584!");
    return;
  }
  // deletePositionInStems(referenceStems, ali.getSequence(collapseId), col); // order important!
  ali.deleteColumn(col);
}

/** removes all gaps of sequence with number collapseId in alignment,
    adjusts alignment, probability matrix and reference stems. */
void
collapseAlignment(SequenceAlignment& ali, 
		  unsigned int collapseId, 
		  char gapChar)
{
  if (collapseId >= ali.size()) {
    return;
  }
  for (int i = ali.getLength()-1; i >= 0; --i) {
    if (ali.getSequence(collapseId)[i] == gapChar) {
      collapseAlignment(ali, collapseId, 
			static_cast<unsigned int>(i));
    }
  }
}


/** fill ends too short sequences with gaps */
void
fillAliGaps(SequenceAlignment& ali, char gapChar)
{
  // find longest sequence of alignment:
  unsigned int maxLen = 0;
  for (unsigned int i = 0; i < ali.size(); ++i) {
    if (ali.getSequence(i).size() > maxLen) {
      maxLen = ali.getSequence(i).size();
    }
  }
  // extend sequences:
  for (unsigned int i = 0; i < ali.size(); ++i) {
    string s = ali.getSequence(i);
    if (s.size() < maxLen) {
      s = s + string((maxLen-s.size()), gapChar);
      ali.setSequence(s, ali.getName(i), i);
    }
  }  
}

bool
isOnlyGap(const string& s, char gapChar)
{
  for (unsigned int i = 0; i < s.size(); ++i) {
    if (s[i] != gapChar) {
      return false;
    }
  }
  return true;
}

void
removeGapOnlySequences(SequenceAlignment& ali, char gapChar)
{
  for (int i = static_cast<int>(ali.size())-1; i >= 0; --i) {
    if (isOnlyGap(ali.getSequence(i), gapChar)) {
      ali.removeSequence(static_cast<unsigned int>(i));
    }
  }
}

/** keep only subset of sequences */
void
getAlignmentSubset(SequenceAlignment& ali, 
		   const Vec<unsigned int>& subsetIndices)
{
  for (int i = static_cast<int>(ali.size())-1; i >= 0; --i) {
    if (findFirstIndex(subsetIndices, static_cast<unsigned int>(i)) 
	>= subsetIndices.size()) {
      cout << "# Deleting sequence " << i + 1 
	   << " because it was not found in subset!" << endl;
      ali.removeSequence(static_cast<unsigned int>(i));
    }
  }
}

/** get for each sequence of alignment only subset defined by subsetIndieces */
void
getAlignmentColumnSubset(SequenceAlignment& ali, 
			 const Vec<unsigned int>& subsetIndices)
{
  for (unsigned int i = 0; i < ali.size(); ++i) {
    string seq = ali.getSequence(i);
    seq = getSubset(seq, subsetIndices);
    ali.setSequence(seq, ali.getName(i), i);
  }
}

double 
letterFracNoGap(const string& s,
		char c)
{
  unsigned int count = 0;
  unsigned int len = 0;
  for (unsigned int i = 0; i < s.size(); ++i) {
    if (s[i] == c) {
      ++count;
    }
    if (s[i] != GAP_CHAR) {
      ++len;
    }
  }
  if (len > 0) {
    return static_cast<double>(count)/len;
  }
  return 0.0;
}

/** returns true if pair of columns is a good candidate for improvement: not all matching, but at least to different matching pairs */
bool
isImprovableCandidate(const SequenceAlignment& ali, const CompensationScorer& scorer,
		      unsigned int pos1, unsigned int pos2,
		      double fracLimMin,
		      double gFracMax)
{
  double frac = fractionMatchingNoGaps(ali, scorer, pos1, pos2);
  if ((frac >= fracLimMin) && (frac < 1.0)) {
    // see if there is more than one different pair:
    string col1 = ali.getColumn(pos1);
    string col2 = ali.getColumn(pos2);
    if ((letterFracNoGap(col1, 'G') >  gFracMax)
	|| (letterFracNoGap(col2, 'G') >  gFracMax)) {
      return false;
    }
    // also check for letter U, as promiscous as G
    if ((letterFracNoGap(col1, 'U') >  gFracMax)
	|| (letterFracNoGap(col2, 'U') >  gFracMax)) {
      return false;
    }
    Vec<unsigned int> sofarIds;
    unsigned int pairId;
    for (unsigned int i = 0; i < ali.size(); ++i) {
      pairId = scorer.numAllowedPair(col1[i], col2[i]);
      if (pairId < scorer.getAllowedPairs().size()) {
	if (sofarIds.size() == 0) {
	  sofarIds.push_back(pairId);
	}
	else if (findFirstIndex(sofarIds, pairId) >= sofarIds.size()) {
	  // second allowed pair found!
	  return true;
	}
      }
    }
  }
  return false;
}

char
getSequenceNeighbor(const string& s, int pos, int offset, char gapChar)
{
  int count = 0;
  if (offset == 0) {
    return s[pos];
  }
  else if (offset > 0) {
    for (int i = pos + 1; i < static_cast<int>(s.size()); ++i) {
      if (s[i] != gapChar) {
	++count;
      }
      if (count >= offset) {
	return s[i];
      }
    }
  }
  else {
    for (int i = pos - 1; i >= 0; --i) {
      if (s[i] != gapChar) {
	++count;
      }
      if (count >= (-offset)) {
	return s[i];
      }
    }
  }
  return gapChar; // no real neighbor found
}

bool
isRelaxedMatching(const string& sequence, unsigned int pos1, unsigned int pos2, char& c1, char& c2, 
		  const CompensationScorer& scorer, int range, char gapChar)
{
  c1 = sequence[pos1];
  c2 = sequence[pos2];
  if (scorer.isAllowedPair(sequence[pos1], sequence[pos2])) {
    return true;
  }
  // loop over range:
  for (int i = 1; i <= range; ++i) {
    c1 = sequence[pos1];
    c2 = getSequenceNeighbor(sequence, pos2, i, gapChar);
    if (scorer.isAllowedPair(c1, c2)) {
      return true;
    }
    c2 = getSequenceNeighbor(sequence, pos2, -i, gapChar);
    if (scorer.isAllowedPair(c1, c2)) {
      return true;
    }
    c1 = getSequenceNeighbor(sequence, pos1, i, gapChar);
    c2 = sequence[pos2];
    if (scorer.isAllowedPair(c1, c2)) {
      return true;
    }
    c1 = getSequenceNeighbor(sequence, pos1, -i, gapChar);
    c2 = sequence[pos2];
    if (scorer.isAllowedPair(c1, c2)) {
      return true;
    }
  }
  c1 = sequence[pos1];
  c2 = sequence[pos2];
  return false;
}

/** returns true if pair of columns is a good candidate for improvement: not all matching, but at least to different matching pairs */
bool
improveColumns(const SequenceAlignment& ali, const CompensationScorer& scorer,
	       unsigned int pos1, unsigned int pos2,
	       int range,
	       double fracLimMin,
	       double fracLimMin2,
	       double mutInfMin,
	       string& s1,
	       string& s2)
{
  string s1Orig = ali.getColumn(pos1);
  string s2Orig = ali.getColumn(pos2);
  s1 = s1Orig;
  s2 = s2Orig;
  Vec<double> wVec(s1.size(), 1.0);
  double mutInfOrig = scorer.pairwiseEntropy3(s1Orig, s2Orig, wVec);
  char c1, c2;
  unsigned int count = 0;
  double fracOrig = fractionMatching(ali, scorer, pos1, pos2);
  for (unsigned int i = 0; i < ali.size(); ++i) {
    if (isRelaxedMatching(ali.getSequence(i), pos1, pos2, c1, c2, scorer, range, GAP_CHAR)) {
      s1[i] = c1;
      s2[i] = c2;
      ++count;
    }
  }
  double fracFinal = static_cast<double>(count)/ali.size();
  double mutInfFinal = scorer.pairwiseEntropy3(s1, s2, wVec);
  if ((fracFinal <= fracOrig) || (fracFinal < fracLimMin2)
      || (mutInfFinal <= mutInfOrig) || (mutInfFinal < mutInfMin)) {
    return false;
  }

  cout << "Improved columns " << pos1+1 << " " << pos2+1 << " from " 
       << fracOrig << " to " << fracFinal << endl;
  cout << s1 << endl << s2 << endl;
  cout << scorer.singleEntropy3(s1, wVec) << " "
       << scorer.singleEntropy3(s2, wVec) << " "
       << scorer.pairwiseEntropy3(s1, s2, wVec) << endl;
  cout << "Original columns: single information values and mutual information" 
       << endl;
  cout << s1Orig << endl << s2Orig << endl;
  cout << scorer.singleEntropy3(s1Orig, wVec) << " "
       << scorer.singleEntropy3(s2Orig, wVec) << " "
       << scorer.pairwiseEntropy3(s1Orig, s2Orig, wVec) << endl;
  return true;
}

void
findImprovableColumns(SequenceAlignment& aliOrig, const CompensationScorer& scorer,
		      int range, double fracLimMin, double fracLimMin2, 
		      double gFracMax, double mutInfMin,
		      bool acceptFlag, const Vec<unsigned int>& indices)
{
  SimpleSequenceAlignment ali;
  ali.copy(aliOrig);
  string col1, col2;
  for (unsigned int ii = 0; ii < indices.size(); ++ii) {
    unsigned int i = indices[ii];
    // for (unsigned int i = 0; i < ali.getLength(); ++i) {
    for (unsigned int jj = ii+1; jj < indices.size(); ++jj) {
      unsigned int j = indices[jj];
      if ((i+3 >= j)) {
	continue; // too close
      }
      // for (unsigned int j = i + 3; j < ali.getLength(); ++j) {
      if (isImprovableCandidate(aliOrig, scorer, i, j, fracLimMin, gFracMax)) {
	if (improveColumns(aliOrig, scorer, 
			   i, j, range, fracLimMin, fracLimMin2,
			   mutInfMin, col1, col2)) {
	  if (acceptFlag) {
	    cout << "Accepting changed columns!" << endl;
	    ali.setColumn(col1, i);
	    ali.setColumn(col2, j);
	  }
	}
      }
    }
  }
  aliOrig = ali;
}

int
relativePosition(const SequenceAlignment& ali, 
		 int pos,
		 unsigned int seqId)
{
  PRECOND((pos >= 0) && (pos < static_cast<int>(ali.getLength())));
  const string& s = ali.getSequence(seqId);
  if (s[pos] == GAP_CHAR) {
    return -1; // invalid position
  }
  int gapCounter = 0;
  for (int i = 0; i < pos; ++i) {
    if (s[i] == GAP_CHAR) {
      ++gapCounter;
    }
  }
  return pos - gapCounter;
}

int
absolutePosition(const SequenceAlignment& ali, 
		 int pos,
		 unsigned int seqId)
{
  PRECOND((pos >= 0) && (pos < static_cast<int>(ali.getLength())));
  const string& s = ali.getSequence(seqId);
  int letterCounter = 0;
  for (int i = 0; i < static_cast<int>(s.size()); ++i) {
    if (s[i] != GAP_CHAR) {
      ++letterCounter;
    }
    if (letterCounter == pos) {
      return i;
    }
  }
  return -1; // invalid: sequence not that long
}

void
writeAbsolutePositions(ostream& os, 
		       const SequenceAlignment& ali,
		       const Vec<int>& indices)
{
  for (unsigned int i = 0; i + 1< indices.size(); i += 2) {
    // first index is sequence, second index is position:
    cout << "# Position of character " << indices[i+1] +1 << " of sequence " << indices[i] + 1 
	 << ali.getName(i) << " in alignment: " << absolutePosition(ali, indices[i+1], indices[i]) << endl;
  }
}

void
writeRelativePositions(ostream& os, 
		       const SequenceAlignment& ali,
		       const Vec<int>& indices)
{
  for (unsigned int i = 0; i + 1< indices.size(); i += 2) {
    // first index is sequence, second index is position:
    cout << "# Position of alignment column " << indices[i+1] +1 << " of sequence " << indices[i] + 1 
	 << ali.getName(i) << " in sequence: " << relativePosition(ali, indices[i+1], indices[i]) << endl;
  }

}

string
conformAlignmentCharacters(const string& s,
			   const string& alphabet)
{
  string result = s;
  for (unsigned int i = 0; i < s.size(); ++i) {
    if ((s[i] != GAP_CHAR)
	&& (alphabet.find(s[i]) >= alphabet.size())) {
      // not found in alphabet!
      result[i] = GAP_CHAR;
    }
  }  
  return result;
}

void
conformAlignmentCharacters(SequenceAlignment& ali, 
			   const string& alphabet)
{
  string tmpSeq;
  for (unsigned int i = 0; i < ali.size(); ++i) {
    tmpSeq = conformAlignmentCharacters(ali.getSequence(i), alphabet);
    ali.setSequence(tmpSeq, ali.getName(i), i);
  }

}

/* score two regions: [p1Min,p1Max] and [p2Min, p2Max] */
double
scoreRegionsConservation(const SequenceAlignment& ali, 
			 const Vec<double>& wVec,
			 const CompensationScorer& scorer, 
			 int p1Min,
			 int p1Max,
			 int p2Min,
			 int p2Max)
{
  double score = 0.0;
  int len = p1Max-p1Min+1;
  int startPos, stopPos;
  double term;
  for (int i = 0; i < len; ++i) {
    startPos = p1Min+i;
    stopPos = p2Max-i;
    term = scorer.singleEntropy3(ali.getColumn(startPos), wVec)
      + scorer.singleEntropy3(ali.getColumn(stopPos), wVec);
    //     cout << "Term for scoring " << startPos + 1 << " " 
    // 	 << stopPos + 1 << " " << term << endl;
    score += term;
  }
  score /= (2 * len); // divide by number of columns and by 2 -> perfect region gets "1"
  score *= 0.5;
  return score;
}


/* score two regions: [p1Min,p1Max] and [p2Min, p2Max] */
double
scoreRegionsMatching(const string& s, 
		     const CompensationScorer& scorer, 
		     int p1Min,
		     int p1Max,
		     int p2Min,
		     int p2Max)
{
  int len = p1Max-p1Min+1;
  int startPos, stopPos;
  if (len <= 0) {
    return 0.0;
  }
  unsigned int matchCount = 0;
  for (int i = 0; i < len; ++i) {
    startPos = p1Min+i;
    stopPos = p2Max-i;
//     cout << "matching: " << startPos + 1 << " " << stopPos + 1 << " " 
// 	 << s[startPos] << " " << s[stopPos] << endl;
    if (scorer.isAllowedPair(s[startPos], s[stopPos])) {
      ++matchCount;     
    }
  }
  double result = static_cast<double>(matchCount)/static_cast<double>(len);
  // cout << "Number matches: " << matchCount << " " << result << endl;
  return result;
}


/* score two regions: [p1Min,p1Max] and [p2Min, p2Max]
   @todo NOT YET IMPLEMENTED */
double
scoreRegions(const SequenceAlignment& ali, 
	     const Vec<double>& wVec,
	     unsigned int seqId,
	     const CompensationScorer& scorer, 
	     int p1Min,
	     int p1Max,
	     int p2Min,
	     int p2Max,
	     double wMatch,
	     double wCons)
{
//   cout << "Starting score regions with weights " << wMatch
//        << " " << wCons << endl;
  double result = 0.0;
  if (wMatch != 0.0) {
    result += wMatch * scoreRegionsMatching(ali.getSequence(seqId), scorer,
				    p1Min, p1Max, p2Min, p2Max );
  }
  if (wCons != 0.0) {
    result += wCons * scoreRegionsConservation(ali, wVec, scorer, 
					       p1Min, p1Max, p2Min, p2Max);
  }
  return result;
}

void
addRegionPadding(SequenceAlignment& ali, 
		 int pos,
		 Vec<Stem>& stems, 
		 int shiftMax,
		 Vec<unsigned int>& indexMapping)
{
  for (int i = 0; i < shiftMax; ++i) {
    ali.insertGap(pos);
  }
  for (unsigned int i = 0; i < stems.size(); ++i) {
    if (stems[i].getStop() >= pos) {
      stems[i].setStop(stems[i].getStop()+shiftMax);
    }
    if (stems[i].getStart() >= pos) {
      stems[i].setStart(stems[i].getStart()+shiftMax);
    }
  }
  for (unsigned int i = 0; i < indexMapping.size(); ++i) {
    if (static_cast<int>(indexMapping[i]) >= pos) {
      indexMapping[i] += shiftMax;
    }
  }
}

Vec<unsigned int>
addRegionPadding(SequenceAlignment& ali, 
		 Vec<Stem>& stems, 
		 int shiftMax)
{
  Vec<unsigned int> indexMapping = generateStair(ali.getLength());
  Vec<int> regionsSoFar;
  for (unsigned int i = 0; i < stems.size(); ++i) {
    addRegionPadding(ali, stems[i].getStop()+1, stems, shiftMax, indexMapping);
    addRegionPadding(ali, stems[i].getStop()+1-stems[i].getLength(), stems, shiftMax, indexMapping);
    addRegionPadding(ali, stems[i].getStart(), stems, shiftMax, indexMapping);
    addRegionPadding(ali, stems[i].getStart()+stems[i].getLength(), stems, shiftMax, indexMapping);
  }
  return indexMapping;
}

void
optimizeRegions(SequenceAlignment& ali, 
		const Vec<double>& wVec,
		const CompensationScorer& scorer, 
		int p1Min,
		int p1Max,
		int p2Min,
		int p2Max,
		int shiftMax,
		int delMax,
		int insMax,
		unsigned int iterMax,
		double wMatch,
		double wCons,
		int verboseLevel)
{
  cout << "# Starting optimizeRegions "
       << wMatch << " " << wCons << endl;
    
  Random& rnd = Random::getInstance();

  unsigned int seqId = 0;
  int p1MinH = p1Min - shiftMax;
  int p2MinH = p2Min - shiftMax;
  int p1LH = p1Max - p1Min + (2 * shiftMax) + 1;
  int p2LH = p2Max - p2Min + (2 * shiftMax) + 1;
  for (unsigned int iter = 0; iter < iterMax; ++iter) {    
    // choose sequence to optimize:
    seqId = rnd.getRand(ali.size());
    // seqId = iter;
    string oldSequence = ali.getSequence(seqId);
    string reg1 = oldSequence.substr(p1MinH, p1LH);
    string reg2 = oldSequence.substr(p2MinH, p2LH);
    SequenceMutator mutator1(reg1);
    SequenceMutator mutator2(reg2);
    string bestCurrSequence = oldSequence;
    double oldScore = scoreRegions(ali, wVec, seqId , scorer, 
				   p1Min, p1Max, p2Min, p2Max,
				   wMatch, wCons);
    double bestCurrScore = oldScore;
    cout << "# Iteration " << iter + 1 << " : optimizing sequence " << seqId
	 << " original score: " << oldScore << " sequences: " << reg1 << " " << reg2 << endl;

    while (mutator1.hasNext()) {
      mutator1.nextSequence();
      string s1 = mutator1.getSequenceCurr();
      string sWork = oldSequence;
      for (unsigned int i = 0; i < s1.size(); ++i) {
	sWork[p1MinH+i] = s1[i];
      }
      while (mutator2.hasNext()) {
	mutator2.nextSequence();
	string s2 = mutator2.getSequenceCurr();
	for (unsigned int i = 0; i < s2.size(); ++i) {
	  sWork[p2MinH+i] = s2[i];
	}
	ali.setSequence(sWork, ali.getName(seqId), seqId);
	double score = scoreRegions(ali, wVec, seqId,
				    scorer, p1Min, p1Max, p2Min, p2Max,
				    wMatch, wCons);
	if (verboseLevel > 1) {
	  cout << "# Trying patches : " << s1 << " " << s2 << " " 
	       << reverseString(s2) << " " << score << endl;
	}
	if (score > bestCurrScore) {
	  bestCurrScore = score;
	  bestCurrSequence = sWork;
	}
      }
    }
    if (bestCurrScore > oldScore) {
      cout << "# Accepting step! " << bestCurrScore << " " << seqId << endl;
      oldScore = bestCurrScore;
    }
    else {
      cout << "# Not accepting step! " << bestCurrScore << " " << seqId << endl;
      ali.setSequence(oldSequence, ali.getName(seqId), seqId);
    }
  }
}

void
optimizeRegions(SequenceAlignment& ali, 
		const Vec<double>& wVec,
		const CompensationScorer& scorer, 
		const Vec<Stem>& stems,
		int shiftMax,
		int delMax,
		int insMax,
		unsigned int iterMax,
		double wMatch,
		double wCons,
		int verboseLevel)
{
  cout << "# Optimizing alignment according to defined stems!" << endl;
  for (unsigned int i = 0; i < stems.size(); ++i) {
    int start1 = stems[i].getStart();
    int stop1 = stems[i].getStart() + (stems[i].getLength() - 1);
    int start2 = stems[i].getStop() - (stems[i].getLength() - 1);
    int stop2 = stems[i].getStop();
    if (start1 < shiftMax) {
      cout << "# Ignoring stem : " << stems[i] 
	   << " because start position is smaller than potential shifts"
	   << endl;
      continue;
    }
    cout << "# Optimizing in region of stem: " << stems[i] << " " 
	 << start1 + 1 << " " << stop1 + 1 << " " << start2 + 1 << " " << stop2 + 1 << endl;
    optimizeRegions(ali, wVec, scorer, start1, stop1, start2, stop2, shiftMax, delMax, insMax, iterMax, wMatch, wCons, verboseLevel);
  }
}

/** scrambles alignment */
void
scrambleAlignment(SequenceAlignment& ali)
{
  SimpleSequenceAlignment aliCopy;
  aliCopy.copy(ali); // careful, different types!
  Vec<unsigned int> ids = generateStair(ali.getLength());
  Random& rnd = Random::getInstance();
  random_shuffle(ids.begin(), ids.end(), rnd);
  for (unsigned int i = 0; i < ids.size(); ++i) {
    string slice = ali.getColumn(ids[i]);
    random_shuffle(slice.begin(), slice.end(), rnd);
    aliCopy.setColumn(slice, i);
  }
  for (unsigned int i = 0; i < ali.getLength(); ++i) {
    ali.setColumn(aliCopy.getColumn(i), i);
  }
}

/** scrambles alignment sequences, even if they have different lengths */
void
scrambleAlignmentSequences(SequenceAlignment& ali)
{
  Random& rnd = Random::getInstance();
  for (unsigned int i = 0; i < ali.size(); ++i) {
    string seq = ali.getSequence(i);
    random_shuffle(seq.begin(), seq.end(), rnd);
    ali.setSequence(seq, i);
  }
}

/** scrambles alignment sequences, even if they have different lengths */
string
randomSequence(const string& sOrig, const string& alphabet) {
  Random& rnd = Random::getInstance();
  string s = sOrig;
  for (string::size_type i = 0; i < s.size(); ++i) {
    s[i] = alphabet[rnd.getRand(alphabet.size())];
  }
  ERROR_IF(s.size() != sOrig.size(), "Internal error in randomSequence routine: result sequence has wrong length.");
  return s;
}


/** scrambles alignment sequences, even if they have different lengths */
void
randomAlignmentSequences(SequenceAlignment& ali, const string& alphabet)
{

  for (unsigned int i = 0; i < ali.size(); ++i) {
    string seq = ali.getSequence(i);
    seq = randomSequence(seq, alphabet);
    ali.setSequence(seq, i);
  }
}

void
replaceBadCharacters(SequenceAlignment& ali, 
		     const string& alphabet, 
		     char gapChar)
{
  for (int i = 0; i < 256; ++i) {
    char c = static_cast<char>(i);
    if (c == gapChar) {
      continue;
    }
    if (alphabet.find(c) >= alphabet.size()) {
      // replace if not part of standard alphabet:
      ali.replace(c, gapChar);
    }
  }
}

bool
checkSequenceOk(const string& s, 
		const Stem& stem,
		char gapChar)
{
  bool charFound = false;
  for (int i = 0; i < stem.getLength(); ++i) {
    if (s[stem.getStart()+i] != gapChar) {
      charFound = true;
      break;
    }
  }
  if (!charFound) {
    return false;
  }
  charFound = false;
  for (int i = 0; i < stem.getLength(); ++i) {
    if (s[stem.getStop()-i] != gapChar) {
      charFound = true;
      break;
    }
  }
  return charFound;
}

bool
checkSequenceGapsOk(const string& s, 
		    const Stem& stem,
		    char gapChar,
		    unsigned int maxAllowedGaps)
{
  unsigned int gapCount = 0;
  for (int i = 0; i < stem.getLength(); ++i) {
    if (s[stem.getStart()+i] == gapChar) {
      ++gapCount;
    }
  }
  for (int i = 0; i < stem.getLength(); ++i) {
    if (s[stem.getStop()-i] == gapChar) {
      ++gapCount;
    }
  }
  return (gapCount <= maxAllowedGaps);
}


void
removeEmptySequences(SequenceAlignment& ali, 
		     const Vec<Stem>& stems, char gapChar)

{  
  for (int j = ali.size()-1; j >= 0; --j) {
    for (unsigned int i = 0; i < stems.size(); ++i) {
      if (!checkSequenceOk(ali.getSequence(j), stems[i], gapChar)) {
	cout << "# Removing sequence: " << j + 1 << " " << ali.getName(j) << " "
	     << ali.getSequence(j) << endl;
	ali.removeSequence(static_cast<unsigned int>(j));
      }
    }
  }
}

void
removeGapSequences(SequenceAlignment& ali, 
		   const Vec<Stem>& stems, char gapChar, unsigned int maxAllowedGaps)

{  
  for (int j = ali.size()-1; j >= 0; --j) {
    for (unsigned int i = 0; i < stems.size(); ++i) {
      if (!checkSequenceGapsOk(ali.getSequence(j), stems[i], gapChar, maxAllowedGaps)) {
	cout << "# Removing sequence: " << j + 1 << " " << ali.getName(j) << " "
	     << ali.getSequence(j) << endl;
	ali.removeSequence(static_cast<unsigned int>(j));
      }
    }
  }
}

void
keepAliNames(SequenceAlignment& ali, const Vec<string>& keepNames)
{
  for (int i=ali.size()-1; i >= 0; --i) {
    string name = ali.getName(i);
    bool found = false;
    for (unsigned int j = 0; j < keepNames.size(); ++j) {
      if (name.compare(keepNames[j]) == 0) {
	found = true;
	break;
      }
    }
    if (!found) {
      ali.removeSequence(i);
    }
  }
}

/** override sequence names with "s1, s2, s3, ..." */
void
replaceAliNamesWithNumbers(SequenceAlignment& ali)
{
  string prefix = "s";
  for (unsigned int i = 0; i < ali.size(); ++i) {
    ali.setName( (prefix + uitos(i)) , i);
  }
}

/** replaces all spaces in names with underscores */
void
replaceAliNamesSpacesWithUnderscore(SequenceAlignment& ali)
{
  string prefix = "s";
  for (unsigned int i = 0; i < ali.size(); ++i) {
    string s = ali.getName(i);
    s = translate(s, ' ', '_');
    ali.setName(s, i);
  }
}

/** removes all columns with only gap characters */
void
deleteGapColumns(SequenceAlignment& ali)
{
  for (int i = ali.getLength()-1; i >= 0; --i) {
    if (ali.isGapColumn(i)) {
      ali.deleteColumn(i);
    }
  }  
}

/** remove all template that do not have a certain motif a position motifPos */
void
removeMotifMismatches(SequenceAlignment& ali, 
		      const string& motifString, 
		      unsigned int motifPos)
{
  int nn = ali.size();
  for (int i = nn-1; i >= 0; --i) {
    const string& seq = ali.getSequence(i);
    string s = seq.substr(motifPos, motifString.size());
    if (s.compare(motifString) != 0) {
      ali.removeSequence(static_cast<unsigned int>(i));
    }
  }
}

/** reads plain set of sequences, each line is one sequence */
void
readPlainSequences(istream& is, 
		   SequenceAlignment& ali)
{
  // ali.clear();
  Vec<string> sequences;
  while (is) {
    string line = getLine(is);
    line = removeWhiteSpaceFromString(line);
    if (line.size() > 0) {
      upperCase(line);
      sequences.push_back(line);
    }
  }
  for (unsigned int i = 0; i < sequences.size(); ++i) {
    string name = "seq" + uitos(i+1);
    ali.addSequence(sequences[i], name);
  }
}

/** performs sampling with replacement of alignment */
SimpleSequenceAlignment
bootstrapResample(const SequenceAlignment& ali)
{
  Random& rnd = Random::getInstance();
  SimpleSequenceAlignment ali2;
  ali2.copy(ali);
  for (unsigned int i = 0; i < ali.size(); ++i) {
    unsigned int idx = rnd.getRand(ali.size());
    ali2.setSequence(ali.getSequence(idx),ali.getName(idx), i);
  }
  return ali2;
}

bool 
isGap(char c) {
  return (c == '.') || (c=='-');
}

/** computes which region to leave by counting start and end gaps */
void
computePrunePosition(const string& s, int& prunePos, int& pruneLen)
{
  prunePos = 0;
  for (int i = 0; i < static_cast<int>(s.size()); ++i) {
    char c = s[i];
    if (isGap(c)) {
      ++prunePos;
    }
    else {
      break;
    }
  }
  int pruneFinalPos = s.size();
  for (int i = static_cast<int>(s.size())-1; i >= 0; --i) {
    char c = s[i];
    if (isGap(c)) {
      --pruneFinalPos;
    }
    else {
      break;
    }
  }
  pruneLen = pruneFinalPos - prunePos;
}

/** returns alignment with all sequences being concatenation between first and second alignment */
SimpleSequenceAlignment
pasteAlignments(const SimpleSequenceAlignment& ali,
		const SimpleSequenceAlignment& ali2) {
  PRECOND(ali.size() == ali2.size());
  SimpleSequenceAlignment newAli = ali;
  for (SequenceAlignment::size_type i = 0; i < ali.size(); ++i) {
    string newName = ali.getName(i) + " CONCATENATED " + ali2.getName(i);
    string newSeq = ali.getSequence(i) + ali2.getSequence(i);
    newAli.setSequence(newSeq, newName, i);
  }
  return newAli;
}

/** returns string s1 with s2 overwritten starting at offset */
string
overwriteSequences(const string& s1,
		   const string& s2, int offset) 
{
  string result = s1;
  for (string::size_type i = 0; i < s2.size(); ++i) {
    result[i + offset] = s2[i];
  }
  return result;
}

/** returns sequences s1 with s2 overwritten starting at offset */
SimpleSequenceAlignment
overwriteAlignments(const SimpleSequenceAlignment& ali,
		    const SimpleSequenceAlignment& ali2,
		    int offset) {
  SimpleSequenceAlignment newAli = ali;
  for (SequenceAlignment::size_type i = 0; i < ali.size(); ++i) {
    string newName = ali.getName(i) + " SPLICED " + ali2.getName(i);
    string newSeq = overwriteSequences(ali.getSequence(i), ali2.getSequence(i), offset);
    newAli.setSequence(newSeq, newName, i);
  }
  return newAli;
}

/** searches for fusable candidate of alignment 1 in alignment 2.
 * Boils down to matching sequences whose names have the same first word. 
 */
unsigned int
findBedFuseCandidate(unsigned int n, 
		     const SequenceAlignment& ali1, 
		     const SequenceAlignment& ali2)
{
  string chr1 = getTokens(ali1.getName(n), ". ", false)[0];
  // cout << "Trying to find partner for chromosone: " << chr1 << endl;
  for (unsigned int i = 0; i < ali2.size(); ++i) {
    string chr2 = getTokens(ali2.getName(i), ". ", false)[0];
    if (chr1.compare(chr2) == 0) {
      // cout << "Found partner for chromosone: " << chr1 << ":" << chr2 << endl;
      return i;
    }
  }
  return ali2.size();
}

/** fuses two sequences (names in BED format), result is stored in ali1 */
void
fuseBedSequences(unsigned int n1, 
		 SequenceAlignment& ali1, 
		 unsigned int n2,
		 const SequenceAlignment& ali2,
		 unsigned int numberGaps)
{
  string gapString(numberGaps, GAP_CHAR);
  ali1.setSequence(ali1.getSequence(n1) + gapString + ali2.getSequence(n2),
		   ali1.getName(n1) + " | " + ali2.getName(n2), 
		   n1);
}

/** returns sequences s1 with s2 overwritten starting at offset. Names must be BED format : chr start length */
SimpleSequenceAlignment
fuseBedAlignments(const SimpleSequenceAlignment& ali1,
		  const SimpleSequenceAlignment& ali2,
		  unsigned int numberGaps) {
  SimpleSequenceAlignment newAli = ali1;
  unsigned int len1 = ali1.getLength();
  unsigned int len2 = ali2.getLength();
  string gapString(numberGaps, GAP_CHAR);
  string gapString1(len1, GAP_CHAR);
  string gapString2(len2, GAP_CHAR);
  Vec<int> fuseFlags(ali2.size(), 0);
  for (SequenceAlignment::size_type i = 0; i < ali1.size(); ++i) {
    unsigned int fuseCandidate = findBedFuseCandidate(i, newAli, ali2);
    if (fuseCandidate < ali2.size()) {
      fuseFlags[fuseCandidate] = 1;
      fuseBedSequences(i, newAli, fuseCandidate, ali2, numberGaps);
    }
    else {
      cout << "# Could not find partner for " << ali1.getName(i) << endl;
      newAli.setSequence(ali1.getSequence(i) + gapString2 , i); // add gaps
    }
  }
  // add all sequences that could not be found:
  for (SequenceAlignment::size_type i = 0; i < fuseFlags.size(); ++i) {
    if (fuseFlags[i] == 0) {
      newAli.addSequence(gapString1 + ali2.getSequence(i), ali2.getName(i)); // add gaps
    }
  }
  return newAli;
}


/** copies names from ali2 to ali */
void
copyAliNames(const SequenceAlignment& ali2,
	     SequenceAlignment& ali) {
  PRECOND(ali.size() == ali2.size());
  for (SequenceAlignment::size_type i = 0; i < ali.size(); ++i) {
    ali.setName(ali2.getName(i), i);
  }
}

int
computePolyAPruneLength(const string& sOrig,
			int& prunePos) {
  string s = sOrig;
  upperCase(s);
  if (s[s.size()-1] != 'A') {
    return 0; // no pruning
  }
  if (prunePos < 0) {
    prunePos = 0;
  }
  for (int i = s.size()-1; i > (prunePos + 2); --i) {
    char c1 = s[i-2];
    char c2 = s[i-1];
    if ((c1 != 'A') && (c2 != 'A')) {
      return (i-prunePos);
    }
  }
  cerr << "Warning: sequence consists of only A!?\n" << endl;
  return 0; // all A !?
}

/** Renames all sequences of alignment according to provided list of names */
void
renameSequences(SequenceAlignment& ali, const Vec<string>& newNames) {
  for (SequenceAlignment::size_type i = 0; i < newNames.size(); ++i) {
    if (i < ali.size()) {
      ali.setName(newNames[i], i);
    }
    else {
      break;
    }
  }
}

string
hideStructuredResidues(const string& s, const Vec<double>& v, double thresh) {
  string result = s;
  for (Vec<double>::size_type i = 0; i < v.size(); ++i) {
    if (v[i] > thresh) {
      result[i] = 'X';
    }
  }
  return result;
}

void
hideStructuredResidues(SequenceAlignment& ali,
		       const Vec<Vec<double> >& matrix,
		       double thresh) {
  for (SequenceAlignment::size_type i = 0; i < ali.size(); ++i) {
    string seq = ali.getSequence(i);
    seq = hideStructuredResidues(seq, matrix[i], thresh);
    ali.setSequence(seq, i);
  }
}

// adds poly A signal to each sequence
void addCharacters(SequenceAlignment& ali, char c, unsigned int length) {
  string toAdd(length, c);
  ASSERT(toAdd.size() == length);
  for (SequenceAlignment::size_type i = 0; i < ali.size(); ++i) {
    string s = ali.getSequence(i);
    s = s + toAdd;
    ali.setSequence(s, i);
  }
}

int
main(int argc, char ** argv)
{
  bool helpMode;
  int argcFile = 0;
  char ** argvFile = 0;
  char gapChar = '-';
  unsigned int addPolyALen = 0;
  int bootstrapMode = 0; // if true perform bootstrap resampling
  int clipAfter = -1;
  int clipBefore = -1;
  int collapseId = 0; // collapse alignment with respect to sequence n
  string aliOperation = "";
  string aliOperation2 = "";
  int conformNameLength = -1; // if greater zero: prune sequence name lengths
  int improveAcceptMode = 0;
  // int optimizeMode = 0;
  int p1Min = 0;
  int p1Max = 0;
  int p2Min = 0;
  int p2Max = 0;
  int shiftMax = 1;
  int delMax = 1;
  int insMax = 1;
  int paddingMode = 0;
  int prunePolyAMode = 0;
  int range = 1;
  int scrambleMode = 0;
  int seed = 0; // seed for random number generator
  unsigned int gapsAllowedMax = 1;
  unsigned int ibest = 0;
  unsigned int jbest = 0;
  unsigned int motifPos = 0;
  unsigned int sampleSize = 0; // if greater zero, take only random subset
  unsigned int shuffleIterations = 5;
  int verboseLevel = 1;
  int inputFileFormat = 1;
  int checkDuplicate = 0; // 1; can be expensive, not default anymore
  int checkLength = 0; // 1; // can be expensive, not default anymore
  int delGapMode = 0;
  int fillMode = 0; // if set to one, extend all sequences to longest sequence but introducing gaps
  int improveMode = 0;
  int iterMax = 100;
  int nameEditMode = 0;
  int outputFileFormat = 1;
  // int outputLineLength = -1; // no line control
  int prunePos = -1;
  int pruneLen = -1;
  int pruneWings = -1; // remove all columns corresponding to tailing gaps in specified seqeunce
  int removeEmptySequenceMode = 0;
  int seqMinNum = 0;
  
  int thinMode = 0;
  // int referenceId = -1;
  int uToTMode = 0;
  int tToUMode = 0;
  double fracLimMin = 0.85;
  double fracLimMin2 = 0.95;
  double gFracMax = 0.9;
  double mutInfMin = 0.1;
  double pairsMatchLimit = 0.0;
  double shuffleNormLimit = 0.01; // not more than 1% deviation of dinucleotide frequencies
  double simiMax = 100.0;
  double simiMin = 0.0;
  double structureMatrixThreshold = 0.5;
  string allowedLengthsString;
  string alphabet = "ACGU";
  double wMatch = 1.0; // weight of sequence basepairings
  double wCons = 1.0; // weight of sequence conservation
  string collapseName;
  string columnSubsetString;
  string commandFileName;
  string inputFileName;
  string inputFileName2;
  string logFileName; //  = "mainprogramtemplate.log";
  string makeFirstName;
  string motifString;
  string motifListName;
  string nameKeepFileName;
  string outputFileName;
  string pickSubsetString;
  string regionFileName;
  string removePickSubsetString;
  string renameFileName;
  string replaceAlphabet;
  string rootDir = ".";
  string structureMatrixFileName;
  string subsetFileName;
  string subsetString;
  SimpleSequenceAlignment ali, ali2;
  CompensationScorer scorer;
  Random& rnd = Random::getInstance();
  Vec<unsigned int> aliOutPairs;
  Vec<unsigned int> columnSubsetIndices, subsetIndices;
  Vec<Vec<double> > structureMatrix;
  Vec<int> absoluteIndices;
  Vec<unsigned int> pickSubsetIndices, removePickSubsetIndices;
  Vec<int> relativeIndices;
  Vec<int> region;
  Vec<string> keepNames;
  Vec<string> motifs;

  Vec<Stem> regionStems;
  getArg("-help", helpMode, argc, argv);

  if ((argc < 2) || helpMode)  {
    helpOutput(cout);
    exit(0);
  }

  getArg("-root", rootDir, argc, argv, rootDir);
  addSlash(rootDir);

  getArg("-algorithm", scorer.algorithm, argc,argv, scorer.algorithm);
  getArg("-bootstrap", bootstrapMode, argc, argv, bootstrapMode);
  getArg("-commands", commandFileName, argc, argv, commandFileName);
  addPathIfRelative(commandFileName, rootDir);

  if (commandFileName.size() > 0) {
    ifstream commandFile(commandFileName.c_str());
    if (!commandFile) {
      if (isPresent("-commands", argc, argv)) {
	ERROR_IF(!commandFile, "Error opening command file.");
      }
      else {
	cerr << "Warning: Could not find command file: " + commandFileName 
	     << endl;
      }
    }
    else {
      argvFile = streamToCommands(commandFile, argcFile, 
				  string("mainprogramtemplate"));
    }
    commandFile.close();
  }


  getArg("-absolute", absoluteIndices, argc, argv);
  getArg("-add-polya", addPolyALen, argcFile, argvFile);
  getArg("-add-polya", addPolyALen, argc, argv);
  convert2InternalCounting(absoluteIndices);
  getArg("-check-dup", checkDuplicate, argcFile, argvFile, checkDuplicate);
  getArg("-check-dup", checkDuplicate, argc, argv, checkDuplicate);
  getArg("-check-length", checkLength, argcFile, argvFile, checkLength);
  getArg("-check-length", checkLength, argc, argv, checkLength);
  getArg("-collapse", collapseId, argcFile, argvFile, collapseId);
  getArg("-collapse", collapseId, argc, argv, collapseId);
  --collapseId; // internal counting starts at zero
  getArg("-collapse-name", collapseName, argcFile, argvFile, collapseName);
  getArg("-collapse-name", collapseName, argc, argv, collapseName);
  getArg("-columns", columnSubsetString ,argc, argv, columnSubsetString);
  columnSubsetIndices = parseStringToVector(columnSubsetString);
  sort(columnSubsetIndices.begin(), columnSubsetIndices.end());
  convert2InternalCounting(columnSubsetIndices);
  getArg("-conform", conformNameLength, argcFile, argvFile, conformNameLength);
  getArg("-conform", conformNameLength, argc, argv, conformNameLength);
  getArg("-cons-weight", wCons, argcFile, argvFile, wCons);
  getArg("-cons-weight", wCons, argc, argv, wCons);
  getArg("-del", insMax, argcFile, argvFile, delMax);
  getArg("-del", insMax, argc, argv, delMax);
  getArg("-del-gaps", delGapMode, argcFile, argvFile, delGapMode);
  getArg("-del-gaps", delGapMode, argc, argv, delGapMode);
  getArg("-clip-after", clipAfter, argc, argv, clipAfter);
  // --clipAfter; // internal counting
  getArg("-clip-before", clipBefore, argc, argv, clipBefore);
  --clipBefore;
  getArg("-fill", fillMode, argcFile, argvFile, fillMode);
  getArg("-fill", fillMode, argc, argv, fillMode);
  getArg("-gaps", gapsAllowedMax, argcFile, argvFile, gapsAllowedMax);
  getArg("-gaps", gapsAllowedMax, argc, argv, gapsAllowedMax);
  getArg("i", inputFileName, argcFile, argvFile, inputFileName);
  getArg("i", inputFileName, argc, argv, inputFileName);
  getArg("-i2", inputFileName2, argcFile, argvFile, inputFileName2);
  getArg("-i2", inputFileName2, argc, argv, inputFileName2);

  getArg("-if", inputFileFormat, argcFile, argvFile, inputFileFormat);
  getArg("-if", inputFileFormat, argc, argv, inputFileFormat);
  getArg("-improve", improveMode, argcFile, argvFile, improveMode);
  getArg("-improve", improveMode, argc, argv, improveMode);
  getArg("-improve-accept", improveAcceptMode, argcFile, argvFile,
	 improveAcceptMode);
  getArg("-improve-accept", improveAcceptMode, argc, argv,
	 improveAcceptMode);
  getArg("-improve-limit1", fracLimMin, argcFile, argvFile, fracLimMin);
  getArg("-improve-limit1", fracLimMin, argc, argv, fracLimMin);
  getArg("-improve-limit2", fracLimMin2, argcFile, argvFile, fracLimMin2);
  getArg("-improve-limit2", fracLimMin2, argc, argv, fracLimMin2);
  getArg("-improve-glimit", gFracMax, argcFile, argvFile, gFracMax);
  getArg("-improve-glimit", gFracMax, argc, argv, gFracMax);
  getArg("-improve-inflimit", mutInfMin, argcFile, argvFile, mutInfMin);
  getArg("-improve-inflimit", mutInfMin, argc, argv, mutInfMin);
  getArg("-ins", insMax, argcFile, argvFile, insMax);
  getArg("-ins", insMax, argc, argv, insMax);
  getArg("-iter", iterMax, argcFile, argvFile, iterMax);
  getArg("-iter", iterMax, argc, argv, iterMax);
  getArg("-lengths", allowedLengthsString, argcFile, argvFile, allowedLengthsString);
  getArg("-lengths", allowedLengthsString, argc, argv, allowedLengthsString);
  getArg("-match-weight", wMatch, argcFile, argvFile, wMatch);
  getArg("-match-weight", wMatch, argc, argv, wMatch);
  getArg("-motif", motifString, argc, argv, motifString);
  getArg("-motif-pos", motifPos, argc, argv, motifPos);
  if (motifPos > 0) {
    --motifPos;
  }
  getArg("-motif-list", motifListName, argcFile, argvFile, motifListName);
  getArg("-motif-list", motifListName, argc, argv, motifListName);
  getArg("-names-edit", nameEditMode, argcFile, argvFile, nameEditMode);
  getArg("-names-edit", nameEditMode, argc, argv, nameEditMode);
  getArg("-names-keep", nameKeepFileName, argc, argv, nameKeepFileName);
  getArg("l", simiMin, argcFile, argvFile, simiMin);
  getArg("l", simiMin, argc, argv, simiMin);

  getArg("-of", outputFileFormat, argcFile, argvFile, outputFileFormat);
  getArg("-of", outputFileFormat, argc, argv, outputFileFormat);
  getArg("-op", aliOperation, argcFile, argvFile, aliOperation);
  getArg("-op", aliOperation, argc, argv, aliOperation);
  getArg("-op2", aliOperation2, argcFile, argvFile, aliOperation2);
  getArg("-op2", aliOperation2, argc, argv, aliOperation2);
  getArg("-padding", paddingMode, argcFile, argvFile, paddingMode);
  getArg("-padding", paddingMode, argc, argv, paddingMode);
  getArg("-pairs", aliOutPairs, argc, argv);
  convert2InternalCounting(aliOutPairs);
  getArg("-pairs-match-limit", pairsMatchLimit, argc, argv, pairsMatchLimit);
  getArg("-prune-polya", prunePolyAMode, argc, argv, prunePolyAMode);
  getArg("-prune-pos", prunePos, argc, argv, prunePos);
  --prunePos; // internal counting starts at zero
  getArg("-prune-len", pruneLen, argc, argv, pruneLen);
  getArg("-prune-wings", pruneWings, argc, argv, pruneWings);
  --pruneWings; // convert to internal counting
  getArg("-log", logFileName, argc, argv, logFileName);
  getArg("-log", logFileName, argcFile, argvFile, logFileName);
  addPathIfRelative(logFileName, rootDir);
  getArg("-make-first", makeFirstName, argcFile, argvFile, makeFirstName);
  getArg("-make-first", makeFirstName, argc, argv, makeFirstName);
  getArg("o", outputFileName, argc, argv, outputFileName);
  // getArg("-optimize", optimizeMode, argc, argv, optimizeMode);
  getArg("-optimize-region", region, argc, argv);
  ERROR_IF((region.size() != 0) && (region.size() != 4),
	   "Region specifier must consist of 4 entries");
  convert2InternalCounting(region);
  if (region.size() == 4) {
    p1Min = region[0];
    p1Max = region[1];
    p2Min = region[2];
    p2Max = region[3];
  }
  getArg("-relative", relativeIndices, argc, argv);
  convert2InternalCounting(relativeIndices);
  getArg("-optimize-region-file", regionFileName, argc, argv);
  getArg("-pick", pickSubsetString, argc, argv, pickSubsetString);
  pickSubsetIndices = parseStringToVector(pickSubsetString);
  sort(pickSubsetIndices.begin(), pickSubsetIndices.end());
  convert2InternalCounting(pickSubsetIndices);
  getArg("-replace", replaceAlphabet, argcFile, argvFile, replaceAlphabet);
  getArg("-replace", replaceAlphabet, argc, argv, replaceAlphabet);
  getArg("-remove-empty", removeEmptySequenceMode, argcFile, argvFile, removeEmptySequenceMode);
  getArg("-remove-empty", removeEmptySequenceMode, argc, argv, removeEmptySequenceMode);
  getArg("-remove-pick", removePickSubsetString, argc, argv, removePickSubsetString);
  removePickSubsetIndices = parseStringToVector(removePickSubsetString);
  sort(removePickSubsetIndices.begin(), removePickSubsetIndices.end());
  convert2InternalCounting(removePickSubsetIndices);
  getArg("-rename", renameFileName, argcFile, argvFile);
  getArg("-rename", renameFileName, argc, argv);
  getArg("-sample", sampleSize, argcFile, argvFile, sampleSize);
  getArg("-sample", sampleSize, argc, argv, sampleSize);
  getArg("-scramble", scrambleMode, argcFile, argvFile, scrambleMode);
  getArg("-scramble", scrambleMode, argc, argv, scrambleMode);
  getArg("-seed", seed, argcFile, argvFile, seed);
  getArg("-seed", seed, argc, argv, seed);
  if (seed > 0) {
    rnd.resetWithSeed(seed);
  }
  getArg("-shift", shiftMax, argcFile, argvFile, shiftMax);
  getArg("-shift", shiftMax, argc, argv, shiftMax);
  getArg("-structure-matrix", structureMatrixFileName, argcFile, argvFile, structureMatrixFileName);
  getArg("-structure-matrix", structureMatrixFileName,  argc, argv, structureMatrixFileName);
  getArg("-structure-thresh", structureMatrixThreshold, argcFile, argvFile, structureMatrixThreshold);
  getArg("-structure-thresh", structureMatrixThreshold,  argc, argv, structureMatrixThreshold);
  getArg("-subset", subsetString ,argc, argv, subsetString);
  subsetIndices = parseStringToVector(subsetString);
  sort(subsetIndices.begin(), subsetIndices.end());
  convert2InternalCounting(subsetIndices);
  getArg("-subset-file", subsetFileName, argc, argv, subsetFileName);
  getArg("-thin", thinMode, argc, argv, thinMode);
  getArg("-tu", tToUMode, argcFile, argvFile, tToUMode);
  getArg("-tu", tToUMode, argc, argv, tToUMode);
  getArg("u", simiMax, argcFile, argvFile, simiMax);
  getArg("u", simiMax, argc, argv, simiMax);
  getArg("-seq-min", seqMinNum, argcFile, argvFile, seqMinNum);
  getArg("-seq-min", seqMinNum, argc, argv, seqMinNum);
  getArg("-ut", uToTMode, argcFile, argvFile, uToTMode);
  getArg("-ut", uToTMode, argc, argv, uToTMode);
  getArg("-verbose", verboseLevel, argcFile, argvFile, verboseLevel);
  getArg("-verbose", verboseLevel, argc, argv, verboseLevel);

  if (logFileName.size() > 0) {
    ofstream logFile(logFileName.c_str(), ios::app);
    parameterOutput(logFile, argc, argv);
    if (argcFile > 1) {
      logFile << "Parameters from command file: ";
      parameterOutput(logFile, argcFile, argvFile);
    }
    logFile.close();
  }

  /***************** MAIN PROGRAM *****************************/
  
  if (verboseLevel > 1) {
    cout << "# Programs called with parameters: " << endl;
    parameterOutput(cout, argc, argv);
  }

  ifstream inputFile(inputFileName.c_str());
  ERROR_IF(!inputFile, "Error reading input file!");

  switch (inputFileFormat) {
  case 1: // fasta format:
    ali.readFasta(inputFile);
    break;
  case 2: // Mase format:
    ERROR("Sorry, format not supported anymore!");
    // ali.loadMase(inputFile);
    break;
  case 20: // readh plean:
    readPlainSequences(inputFile, ali);
    break;
  default:
    ERROR("Unknown input file format!");
  }
  
  inputFile.close();

  if (verboseLevel > 0) {
    cout << "# Alignment with " << ali.size() << " sequences and length " 
	 << ali.getLength() << " read." << endl;
  }

  if (inputFileName2.size() > 0) {
    ifstream inputFile2(inputFileName2.c_str());
    ERROR_IF(!inputFile2, "Error reading input file 2!");
    
    switch (inputFileFormat) {
    case 1: // fasta format:
      ali2.readFasta(inputFile2);
      break;
    case 2: // Mase format:
      ERROR("Sorry, format not supported anymore!");
      // ali.loadMase(inputFile);
      break;
    case 20: // readh plean:
      readPlainSequences(inputFile2, ali2);
      break;
    default:
      ERROR("Unknown input file format!");
    }
    if (verboseLevel > 0) {
      cout << "# Second alignment with " << ali2.size() << " sequences and length " 
	   << ali2.getLength() << " read." << endl;
    }
    inputFile2.close();
  }

  if (renameFileName.size() > 0) {
    ifstream inputFile(renameFileName.c_str());
    Vec<string> newNames;
    string s;
    while (inputFile) {
      inputFile >> s;
      if (s.size() > 0) {
	newNames.push_back(s);
      }
    }
    inputFile.close();
    if (newNames.size() != ali.size()) {
      cout << "# Warning: Alignment size and number of new names do not match: " << ali.size() << " " 
	   << newNames.size() << endl;
    }
    renameSequences(ali, newNames);
  }

  if (addPolyALen > 0) {
    addCharacters(ali, 'A', addPolyALen); // adds poly A signal to each sequence
  }

  if (structureMatrixFileName.size() > 0) {
    ifstream structureMatrixFile(structureMatrixFileName.c_str());
    ERROR_IF(!structureMatrixFile, "Error opening structure matrix file: " + structureMatrixFileName);
    structureMatrix = readPlainMatrix(structureMatrixFile);
    if (structureMatrix.size() != ali.size()) {
      cout << "# Number of read sequences and rows in structure matrix: " << ali.size() << " " << structureMatrix.size()
	   << endl;
    }
    ERROR_IF(structureMatrix.size() != ali.size(),
	     "The structure matrix has not the same number of rows as number of alignment sequences!");
    ERROR_IF(structureMatrix[0].size() != ali.getLength(),
	     "The structure matrix has not the same number of residues as the alignment sequences!");
    structureMatrixFile.close();
  }

  if (structureMatrix.size() > 0) {
    hideStructuredResidues(ali, structureMatrix, structureMatrixThreshold);
  }

  if (aliOperation.size() > 0) {
    if (aliOperation.compare(string("complement")) == 0) {
      ali.transformReverseComplement(); // generates reverse complement alignment (transforms "+" strand into "-" strand)
    }
    else if (aliOperation.compare(string("paste")) == 0) {
      cout << "# Pasting alignments!" << endl;
      ERROR_IF(ali.size() != ali2.size(), "Alignments must have same number of sequences!");
      ali = pasteAlignments(ali, ali2);
    }
    else if (aliOperation.compare(string("overwrite")) == 0) {
      cout << "# Overwrite alignments!" << endl;
      ERROR_IF(ali.size() != ali2.size(), "Alignments must have same number of sequences!");
      ERROR_IF(aliOperation2.size() == 0, "You have to specify the offset position with --op2 position");
      int index = stoi(aliOperation2);
      ali = overwriteAlignments(ali, ali2, index);
    }
    else if (aliOperation.compare(string("fuse")) == 0) {
      cout << "# Fusing alignments!" << endl;
      unsigned int numberGaps = 0;
      if (aliOperation2.size() > 0) { // this allows to add gap characters between fused sequences
	numberGaps = stoui(aliOperation2);
      }
      ali = fuseBedAlignments(ali, ali2, numberGaps);
    }
    else {
      ERROR("Unknown command for option --op");
    }
  }

  Vec<double> weightVec(ali.size(), 1.0);
  // ali.setWeights(weightVec);

  if (makeFirstName.size() > 0) {
    int foundNameId = ali.findSequenceByName(makeFirstName);
    ERROR_IF(foundNameId >= static_cast<int>(ali.size()),
	     "Error: Could not find specified sequence name to be made first sequence!");
    if (verboseLevel > 0) {
      cout << "# Swapping sequences with names: " 
	   << ali.getName(0) << " and " << ali.getName(foundNameId) << endl;
    }
    if (foundNameId != 0) {
      ali.swapSequences(0, foundNameId);
    }
  }
  
  if (bootstrapMode) {
    if (verboseLevel > 0) {
      cout << "# Performing bootstrap resampling of sequences!" << endl;
    }
    ali = bootstrapResample(ali);
  }

  if (regionFileName.size() > 0) {
    ifstream regionFile(regionFileName.c_str());
    ERROR_IF(!regionFile, "Error opening region file!");
    regionStems = readStems(regionFile);
    regionFile.close();
    if (verboseLevel > 0) {
      cout << "# The following stems where read for optimization: " << endl 
	   << regionStems << endl;
    }
  }

  if (nameKeepFileName.size() > 0) {
    ifstream nameKeepFile(nameKeepFileName.c_str());
    ERROR_IF(!nameKeepFile, "Error opening name keep file!");
    string name;
    while(nameKeepFile) {
      nameKeepFile >> name;
      if (name.size() > 0) {
	if (name[0] == '>') {
	  ERROR_IF(name.size()==1, "Sequence name consists only of > character.");
	  keepNames.push_back(name.substr(1, name.size()-1));
	}
	else {
	  keepNames.push_back(name);
	}
      }
    }
    nameKeepFile.close();
    keepAliNames(ali, keepNames);
  }

  if (nameEditMode > 0) {
    if (verboseLevel > 0) {
      cout << "# Changing sequence names according to mode: " << nameEditMode
	   << endl;
    }
    switch (nameEditMode) {
    case 0:
      break; // do nothing
    case 1:
      replaceAliNamesWithNumbers(ali);
      break;
    case 2:
      replaceAliNamesSpacesWithUnderscore(ali);
      break;
    case 3:
      ERROR_IF(ali2.size() != ali.size(),
	       "Alignments specified with -i and --i2 must have same number of sequences!");
      copyAliNames(ali2, ali); // names from ali2 are copied to ali
      break;
    default: ERROR("Unknow name edit mode!");
    }
  }
  if (ali.size() > 0) {
    if (columnSubsetIndices.size() > 0) {
      if (verboseLevel > 0) {
	cout << "# Using following subset of columns: " 
	     << externalCounting(columnSubsetIndices) << endl;
      }
      ERROR_IF(columnSubsetIndices[columnSubsetIndices.size()-1] >= ali.getLength(),
	       "Column subset indices too large!");
      getAlignmentColumnSubset(ali, columnSubsetIndices);
    }
  }
  if (subsetFileName.size() > 0) {
    ifstream subsetFile(subsetFileName.c_str());
    ERROR_IF(!subsetFile, "Error opening subset file!");
    subsetFile >> subsetIndices;
    subsetFile.close();
    if (verboseLevel > 0) {
      cout << "# Read subset indices: " << subsetIndices << endl;
    }
    convert2InternalCounting(subsetIndices);
    getAlignmentSubset(ali, subsetIndices);    
  }

  // adjust sequences:
  if (verboseLevel > 0) {
    cout << "# Converting to upper case letters." << endl;
  }
  ali.upperCaseSequences();
  if (tToUMode) {
    ali.replace('T', 'U'); // replace DNA alphabet to RNA
  }
  else if (uToTMode) {
    ali.replace('U', 'T'); // replace DNA alphabet to RNA
  }
  ali.replace('.', gapChar); // replace "." with "-"
  if (replaceAlphabet.size() > 0) {
    if (verboseLevel > 0) {
      cout << "# Replacing all characters than are non in " << replaceAlphabet
	   << " with gap characters." << endl;
    }
    replaceBadCharacters(ali, replaceAlphabet, gapChar);
  }
  if (conformNameLength > 0) {
    if (verboseLevel > 0) {
      cout << "# Conforming alignment names to length " 
	   << conformNameLength << " !" << endl;
    }
    conformAlignmentNames(ali, conformNameLength);
    conformAlignmentCharacters(ali, alphabet);
  }
  if (fillMode) {
    if (verboseLevel > 0) {
      cout << "# Conforming sequence lengths to longest sequence but filling ends with gaps." << endl;
    }
    fillAliGaps(ali, gapChar);

    if (verboseLevel > 0) {
      cout << "# Alignment with " << ali.size() << " sequences and length " 
	   << ali.getLength() << " after filling end gaps." << endl;
    }
    ali.writeFasta(cout);
  }
  if (absoluteIndices.size() > 0) {
    if (verboseLevel > 0) {
      cout << "# Output of absolute positions: " << endl;
    }
    writeAbsolutePositions(cout, ali, absoluteIndices);
    cout << endl;
  }
  if (relativeIndices.size() > 0) {
    if (verboseLevel > 0) {
      cout << "# Output of relative positions: " << endl;
    }
    writeRelativePositions(cout, ali, relativeIndices);
  }
  if ((clipAfter >= 0) && (clipAfter < static_cast<int>(ali.size()))) {
    if (verboseLevel > 0) {
      cout << "# Clipping after " << clipAfter << " sequences." << endl;
    }
    ali.clipAfter(clipAfter);
    if (verboseLevel > 0) {
      cout << "# Result size: " << ali.size() << endl;
    }
  }
//   if (clipBefore > 0) {
//     alignmentClipBefore(ali, clipBefore);
//   }
  if (checkLength != 0) {
    removeBadLength(ali);
    if (verboseLevel > 0) {
      cout << "# Alignment with " << ali.size() << " sequences and length " 
	   << ali.getLength() << " after removing sequences with bad length." << endl;
    }
  }
  if (allowedLengthsString.size() > 0) {
    Vec<unsigned int> allowedLengthsOrig = parseStringToVector(allowedLengthsString);
    Vec<string::size_type> allowedLengths(allowedLengthsOrig.size(), 1);
    for (Vec<string::size_type>::size_type i = 0; i < allowedLengths.size(); ++i) {
      allowedLengths[i] = allowedLengthsOrig[i];
    }
    removeBadLength(ali, allowedLengths);
    if (verboseLevel > 0) {
      cout << "# Alignment with " << ali.size() << " sequences and length " 
	   << ali.getLength() << " after removing sequences with bad length." << endl;
    }
  }

 // remove all columns corresponding to tailing gaps in specified seqeunce
  if (pruneWings >= 0) {
    ERROR_IF((prunePos >= 0) && (pruneLen > 0),
	     "option --prune-wings seqnumber is not compatible with --prune-pos ; --prune-len");
    // determin prunePos and pruneLen
    ERROR_IF(pruneWings >= static_cast<int>(ali.size()),
	     "--prune-wings seqid called with too large sequence id!");
    computePrunePosition(ali.getSequence(pruneWings), prunePos, pruneLen);
    if (verboseLevel > 0) {
      cout << "# Computed prune-wings positions of sequence " << (pruneWings+1)
	   << " : " << (prunePos + 1) << " " << pruneLen << endl;
    }
    ERROR_IF(pruneLen <= 0,
	     "--prune-wings Sequence contains only gaps!");
  }

  if (prunePolyAMode != 0) {
    pruneLen = computePolyAPruneLength(ali.getSequence(0), prunePos);
    if (pruneLen > 0) {
      if (verboseLevel > 0) {
	cout << "# Computed poly-A signal position: " << (pruneLen + 1) << endl;
      }
    }

  }

  if ((prunePos >= 0) && (pruneLen > 0)) {
    if (verboseLevel > 0) {
      cout << "# Pruning: leaving only positions between " 
	   << prunePos << " and " << prunePos + pruneLen << endl;
    }
    ali.prune(prunePos, pruneLen);
    if (verboseLevel > 0) {
      cout << "# Alignment with " << ali.size() << " sequences and length " 
	   << ali.getLength() << " remains after pruning." << endl;
    }

  }
  if (removeEmptySequenceMode) {
    if (verboseLevel > 0) {
      cout << "# Removing sequences not compatible with region file!" << endl;
    }
    switch (removeEmptySequenceMode) {
    case 1:
      removeEmptySequences(ali, regionStems, gapChar);
      break;
    case 2:
      removeGapSequences(ali, regionStems, gapChar, gapsAllowedMax);
      break;
    default:
      ERROR("Unknown remove empty sequence mode!");
    }
    if (verboseLevel > 0) {
      cout << "# Number of sequences after removeEmptySequenceMode:" << ali.size() << endl;
    }
  }
  if (checkDuplicate) {
    cleanAlignment(ali);
    if (verboseLevel > 0) {
      cout << "# Alignment with " << ali.size() << " sequences and length " 
	   << ali.getLength() << " after removing sequences with duplicate." << endl;
    }
  }
  // ali.outputLineLength = outputLineLength;

  if (collapseName.size() > 0) {
    collapseId = ali.findSequenceByName(collapseName);
    ERROR_IF(collapseId >= static_cast<int>(ali.size()),
	     "Error: Could not find specified sequence name for collapsing!");
  }
  if (collapseId >= 0) {
    if (verboseLevel > 0) {
      cout << "# Collapsing gaps with respect to sequence "
	   << collapseId + 1 << " " << ali.getName(collapseId) << endl;
    }
    collapseAlignment(ali, collapseId, gapChar);
    if (verboseLevel > 0) {
      cout << "# New length of alignment after collapsing: " << ali.getLength() << endl;
    }
  }
  // delete gaps if wanted (default: OFF)
  switch (delGapMode) {
  case 0:
    break; // do nothing
  case 1: 
    if (verboseLevel > 0) {
      cout << "# Deleting all gap characters in alignment!" << endl;
    }
    ali.deleteGaps();
    break;
  case 2: 
    if (verboseLevel > 0) {
      cout << "# Deleting columns with all gap characters in alignment!" << endl;
    }
    deleteGapColumns(ali);
    break;
  default: ERROR("Unknown del-gap mode!");
  }
  switch (scrambleMode) {
  case 0: 
    break; // do nothing
  case 1: 
    if (verboseLevel > 0) {
      cout << "# Scrambling all characters in alignment!" << endl;
    }
    scrambleAlignment(ali);
    break; 
  case 2: 
    {
      Vec<Vec<double> > fOrig = SequenceAlignmentTools::computeDinucleotideFrequencies(ali, alphabet);
      if (verboseLevel > 0) {
	cout << "# Di-nucleotide frequencies before shuffle: " 
	     << fOrig << endl;
      }
      // shuffle, keeping dinucleotide composition the same, shuffle columns 
      SequenceAlignmentTools::size_type numTrials = 
	SequenceAlignmentTools::dinucleotideShuffle(ali, alphabet, shuffleNormLimit, shuffleIterations, true);
      if (verboseLevel > 0) {
	cout << "# Number of successful columns swaps: " << numTrials << endl;
	Vec<Vec<double> > fNew = SequenceAlignmentTools::computeDinucleotideFrequencies(ali, alphabet);
	cout << "# Di-nucleotide frequencies after shuffle: " 
	     << fNew << endl;
	cout << "# Frequency check: " << shuffleNormLimit << " " 
	     << SequenceAlignmentTools::checkFrequenciesOk(fOrig, fNew, shuffleNormLimit) << endl;
      }
    }
    break;
  case 3:
    {
      // shuffle, keeping dinucleotide composition the same, do not shuffle columns
      SequenceAlignmentTools::size_type numTrials = 
	SequenceAlignmentTools::dinucleotideShuffle(ali, alphabet, shuffleNormLimit, shuffleIterations, false);
      if (verboseLevel > 0) {
	cout << "# Number of successful columns swaps: " << numTrials << endl;
      }
    }
    break;
  case 4:
    scrambleAlignmentSequences(ali);
    break;
  case 5:
    randomAlignmentSequences(ali, alphabet);
    break;
  default:
    ERROR("Unknown alignmnet shuffling mode!");
 }
//   if (referenceId >= 0) {
//     if (verboseLevel > 0) {
//       cout << "Deleting all gaps with respect to sequence " 
// 	   << referenceId + 1 << " " << ali.getName(referenceId) 
// 	   << endl;
//     }
//     ERROR("Sorry, purgeTargetInsertions not supported anymore! Use --collapse instead of --reference ", 
// 	  exception);
//     // ali.purgeTargetInsertions(referenceId);
//   }
  if (verboseLevel > 0) {
    cout << "# size before removeGapOnlySequence: " << ali.size() << endl;
  }
  removeGapOnlySequences(ali, gapChar);
  if (verboseLevel > 0) {
    cout << "# size after removeGapOnlySequence: " << ali.size() << endl;
    cout << "# Checking for alignment consistency!" << endl;
  }
  if (ali.hasEvenLengths()) {
    if (verboseLevel > 1) {
      cout << "# Average sequence similarity: " << 3 << " " << computeAverageSimilarity(ali, gapChar) << endl;
      cout << "# Most similar sequence pair: " << findMostSimilar(ali, ibest, jbest, gapChar)
	   << endl;
      cout << ">" << ali.getName(ibest) << endl;
      cout << ali.getSequence(ibest) << endl;
      cout << ">" << ali.getName(jbest) << endl;
      cout << ali.getSequence(jbest) << endl;
      
      cout << "# Least similar sequence pair: " << findLeastSimilar(ali, ibest, jbest, gapChar)
	   << endl;
      cout << ">" << ali.getName(ibest) << endl;
      cout << ali.getSequence(ibest) << endl;
      cout << ">" << ali.getName(jbest) << endl;
      cout << ali.getSequence(jbest) << endl;
    }  
    if (simiMax < 100.0) {
      if (verboseLevel > 0) {
	cout << "# Number of sequences before removeTooSimilar: " << ali.size() << endl;
      }
      removeTooSimilar(ali, simiMax, seqMinNum, gapChar);
      if (verboseLevel > 0) {
	cout << "# Number of sequences after removeTooSimilar: " << ali.size() << endl;
      }
    }
    if (simiMin > 0.0) {
      if (verboseLevel > 0) {
	cout << "# Number of sequences before removeTooFar: " << ali.size() << endl;
      }
      removeTooFar(ali, simiMin, gapChar);
      if (verboseLevel > 0) {
	cout << "# Number of sequences after removeTooFar: " << ali.size() << endl;
      }
    }
    if (thinMode > 0) {
      switch (thinMode) {
      case 1:
	ERROR_IF(seqMinNum < 1, 
		 "Minimum number of sequences must be specified in thinning mode!");
	thinSequences(ali, seqMinNum, gapChar);
	break;
      default: ERROR("Unknown thin mode!");
      }
    }

  }
  else {
    if (verboseLevel > 0) {
      cout << "# Sequences have different lenghts! Skipping sequence comparison."
	   << endl;
    }
  }
  if (pairsMatchLimit > 0.0) {
    if (verboseLevel > 0) {
      cout << "# Number of sequences before removing non-matching region: " << ali.size() << endl;
    }
    removeNonMatching(ali, aliOutPairs, scorer, pairsMatchLimit);
    if (verboseLevel > 0) {
      cout << "# Number of sequences after removing non-matching region: " << ali.size() << endl;
    }
  }
  if (sampleSize > 0) {
    removeNotSampleSequences(ali, generateRandomIndexSubset( sampleSize, ali.size(), 0));
    if (verboseLevel > 0) {
      cout << "# Removed non-sample sequences! Remaining size: " << ali.size() << endl;
    }
  }
  if (pickSubsetIndices.size() > 0) {
    removeNotSampleSequences(ali, pickSubsetIndices);
    if (verboseLevel > 0) {
      cout << "# Removed not-picked sequences other than pickSubsetString" << endl;
    }
  }
  if (removePickSubsetIndices.size() > 0) {
    removeSampleSequences(ali, removePickSubsetIndices);
    if (verboseLevel > 0) {
      cout << "# Removed picked sequences" << endl;
    }
  }
  if (motifString.size() > 0) {
    if (verboseLevel > 0) {
      cout << "# Removing sequences that do not have motif " 
	   << motifString << " at position " << (motifPos + 1) << endl;
    }
    removeMotifMismatches(ali, motifString, motifPos);
  }
  if (motifListName.size() > 0) {
    ifstream motifFile(motifListName.c_str());
    ERROR_IF(!motifFile, "Error opening motif file!");
    motifFile >> motifs;
    motifFile.close();
  }
  if (subsetIndices.size() == 0) {
    subsetIndices = generateStair(ali.getLength());
  }
  else {
    if (verboseLevel > 0) {
      cout << "# Using subset: " << externalCounting(subsetIndices) << endl;
    }
  }
  Vec<unsigned int> indexMapping = generateStair(ali.getLength());
  if (paddingMode) {
    if (verboseLevel > 0) {
      cout << "# Inserting gaps around regions to be optimized." << endl;
    }
    ERROR_IF(regionStems.size() == 0, "No stems defined for optimization!");
    // indexMapping has length of old alignment
    indexMapping = addRegionPadding(ali, regionStems, shiftMax);
    // addjust aliOutPairs, they might have changed because of padding:
    for (unsigned int i = 0; i < aliOutPairs.size(); ++i) {
      aliOutPairs[i] = indexMapping[aliOutPairs[i]];
    }

  }
  switch (improveMode) {
  case 0: // do nothing
    break;
  case 1:
    if (verboseLevel > 0) {
      cout << "# Calling findImprovableColumns!" << endl;
    }
    findImprovableColumns(ali, scorer, range, fracLimMin, fracLimMin2, 
			  gFracMax, mutInfMin, improveAcceptMode,
			  subsetIndices);
    break;
  case 2:
    if (verboseLevel > 0) {
      cout << "# Calling optimizeRegion!" << endl;
    }
    optimizeRegions(ali, weightVec, scorer,
		    p1Min,p1Max,p2Min,p2Max,shiftMax,delMax,insMax,
		    iterMax, wMatch, wCons, verboseLevel);
    break;
  case 3:
    if (verboseLevel > 0) {
      cout << "# Calling optimizeRegion!" << endl;
    }
    optimizeRegions(ali, weightVec, scorer,
		    regionStems, shiftMax,delMax,insMax,
		    iterMax, wMatch, wCons, verboseLevel);
    break;
  default:
    break;
  }
  if (outputFileName.size() > 0) {
    if (verboseLevel > 0) {
      cout << "# Saving output file to : " << outputFileName << endl;
    }
    ofstream outputFile(outputFileName.c_str());
    ERROR_IF(!outputFile, "Error opening output file!");
    
    saveAlignment(outputFile, ali, aliOutPairs, outputFileFormat,
		  scorer, alphabet, motifs);

    outputFile.close();
  }
  else {
    saveAlignment(cout, ali, aliOutPairs, outputFileFormat, scorer, alphabet, motifs);
  }
//   else {
//     saveAlignment(cout, ali, outputFileFormat);
//   }
  return 0;
}
