// Copyright 2011 Martin C. Frith

// Find clusters in 1-dimensional data, using the method described in
// MC Frith et al. Genome Res. 2008 18(1):1-12.

/*
 * Copyright 2012 K.K.DNAFORM
 * This file is part of idr_paraclu program.
 * Idr_paraclu is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, any later version.
 *
 * Idr_paraclu is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Foobar. If not, see <http://www.gnu.org/licenses/>.
 *
 * FILE:
 * 		paraclu.cc
 * USAGE:
 * 		paraclu tpm ctssfile
 * 			tpm			TPM used for a threshold (We recommend 0.1)
 * 			ctssfile	CTSS file created by make_ctss3.sh
 * DESCRIPTION:
 * 		Parametric clustering for the CAGE peaks.
 * CREATED:
 * 		2011.08.18		Copied the source code by Frith.
 * REVISION:
 *
 */
#include <algorithm>  // max, min
#include <cassert>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <stdexcept>
#include <string>
#include <vector>

typedef unsigned Position;
typedef double Value;

struct Site {
  Position position;
  Value value;
  Site(Position p, Value v) : position(p), value(v) {}
};

typedef std::vector<Site>::const_iterator Ci;

const double infinity = 1e100;

// ******* [ADD] : 2011/11/29 : START
double		wholeCnt = 0.0;
// ******* [ADD] : 2011/11/29 : END

std::vector<Site> sites;
std::string seqname;
char strand;

Ci minPrefix;
double minPrefixDensity;
Ci minSuffix;
double minSuffixDensity;

Value totalValue;

Value minValue;

void weakestPrefix(Ci beg, Ci end) {
  assert(beg < end);
  Position origin = beg->position;
  //minPrefix = beg;
  minPrefixDensity = infinity;
  totalValue = beg->value;
  ++beg;

  while (beg < end) {
    double density = totalValue / (beg->position - origin);
    if (density < minPrefixDensity) {
      minPrefix = beg;
      minPrefixDensity = density;
    }
    totalValue += beg->value;
    ++beg;
  }
}

void weakestSuffix(Ci beg, Ci end) {
  assert(beg < end);
  --end;
  Position origin = end->position;
  //minSuffix = end + 1;
  minSuffixDensity = infinity;
  totalValue = end->value;

  while (end > beg) {
    --end;
    double density = totalValue / (origin - end->position);
    if (density < minSuffixDensity) {
      minSuffix = end + 1;
      minSuffixDensity = density;
    }
    totalValue += end->value;
  }
}

void writeClusters(Ci beg, Ci end, double minDensity) {
  if (beg == end) return;
  weakestPrefix(beg, end);
// ******* [MOD] : 2011/08/18 : START
//  if (totalValue < minValue) return;
	Value	region = (end - 1)->position - beg->position;
	Value	density;
	if( region > 0 ) {
// ******* [MOD] : 2011/11/29 : START
//		density = totalValue / region;
		density = totalValue / region / wholeCnt * 10 * 1000000;
// ******* [MOD] : 2011/11/29 : END
	} else {
// ******* [MOD] : 2011/11/29 : START
//		density = totalValue - 1;
		density = ( totalValue - 1 ) / wholeCnt * 10 * 1000000;
// ******* [MOD] : 2011/11/29 : END
	}
// ******* [MOD] : 2011/08/18 : END
  weakestSuffix(beg, end);

  double maxDensity = std::min(minPrefixDensity, minSuffixDensity);

// ******* [MOD] : 2011/08/18 : START
//  if (maxDensity > minDensity) {
	if( maxDensity > minDensity && density >= minValue ) {
// ******* [MOD] : 2011/08/18 : END
    std::cout << seqname << "\t" << strand << "\t"
              << std::setprecision(20)
              << beg->position << "\t" << (end-1)->position << "\t"
// ******* [MOD] : 2011/11/16 : START
//              << (end - beg) << "\t" << totalValue << "\t"
				<< ( (end-1)->position - beg->position + 1 ) << "\t" 
				<< totalValue << "\t"
// ******* [MOD] : 2011/11/16 : END
              << std::setprecision(3)
              << minDensity << "\t" << maxDensity << "\n";
  }

  if (maxDensity < infinity) {
    Ci mid = (minPrefixDensity < minSuffixDensity) ? minPrefix : minSuffix;
    double newMinDensity = std::max(minDensity, maxDensity);
    writeClusters(beg, mid, newMinDensity);
    writeClusters(mid, end, newMinDensity);
  }
}

void processOneStream(std::istream &stream) {
  std::string newSeqname;
  char newStrand;
  Position p;
  Value v;
  while (stream >> newSeqname >> newStrand >> p >> v) {
    if (newSeqname == seqname && newStrand == strand) {
      if (p > sites.back().position)
        sites.push_back(Site(p, v));
      else if (p == sites.back().position)
        sites.back().value += v;
      else
        throw std::runtime_error("unsorted input");
    } else {
      writeClusters(sites.begin(), sites.end(), -infinity);
      sites.clear();
      seqname = newSeqname;
      strand = newStrand;
      sites.push_back(Site(p, v));
    }
  }
  if (!stream.eof()) throw std::runtime_error("bad input");
  writeClusters(sites.begin(), sites.end(), -infinity);
}

void processOneFile(const std::string &fileName) {
  if (fileName == "-") {
    processOneStream(std::cin);
  } else {
    std::ifstream ifs(fileName.c_str());
    if (!ifs) throw std::runtime_error("can't open file: " + fileName);
    processOneStream(ifs);
  }
}

// ******* [ADD] : 2011/11/29 : START
void getWholeTagCount( const std::string &fileName )
{
	if( fileName == "-" ) {
		throw std::runtime_error( "you can't use stdin." );
		exit( 1 );
	}
	std::ifstream	ifs( fileName.c_str() );
	if( !ifs ) {
		throw std::runtime_error( "can't open file : " + fileName );
	}
	std::string		newSeqname;
	char			newStrand;
	Position		p;
	Value			v;
	while( ifs >> newSeqname >> newStrand >> p >> v ) {
		wholeCnt += v;
	}
}
// ******* [ADD] : 2011/11/29 : END

void parseMinValue(const std::string &s) {
  std::istringstream iss(s);
  if (!(iss >> minValue) || !(iss >> std::ws).eof())
    throw std::runtime_error("bad minValue: " + s);
}

int main(int argc, char **argv)
try {
  if (argc != 3) throw std::runtime_error("I need a minValue and a fileName");
  parseMinValue(argv[1]);
  std::cout << "# sequence, strand, start, end, sites, sum of values, min d, max d\n";
// ******* [ADD] : 2011/11/29 : START
	getWholeTagCount( argv[2] );
// ******* [ADD] : 2011/11/29 : END
  processOneFile(argv[2]);
  return EXIT_SUCCESS;
}
catch (const std::exception& e) {
  std::cerr << "paraclu: " << e.what() << '\n';
  return EXIT_FAILURE;
}
