/*
 PrefixSpan: An efficient algorithm for sequential pattern mining

 $Id: prefixspan.cpp,v 1.8 2002/04/03 13:35:23 taku-ku Exp $;

 Copyright (C) 2002 Taku Kudo  All rights reserved.
 This is free software with ABSOLUTELY NO WARRANTY.

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; either version 2 of the License, or
 (at your option) any later version.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
 02111-1307, USA
*/

#include <iostream>
#include <map>
#include <vector>
#include <string>
#include <string.h>
#include <strstream>
#include <unistd.h>
#include <stdlib.h>

using namespace std;

template <class T> class PrefixSpan {
private:
  vector < vector <T> >             transaction;
  vector < pair <T, unsigned int> > pattern;
  unsigned int minsup;
  unsigned int minpat;
  unsigned int maxpat;
  unsigned int mingap;
  unsigned int maxgap;
  unsigned int minskip;
  unsigned int maxskip;
  bool all;
  bool where;
  string delimiter;      
  bool verbose;
  ostream *os;
  unsigned int skipcount;
  bool skipped;
  
  void report (vector <pair <unsigned int, int> > &projected) 
  {
    if (minpat > pattern.size()) return;
    
    // print where & pattern
    if (where) { 
      *os << "<pattern>" << endl;
      
      // what:
      if (all) {
	*os << "<freq>" << pattern[pattern.size()-1].second << "</freq>" << endl;
	*os << "<what>";
	for (unsigned int i = 0; i < pattern.size(); i++) 
	  *os << (i ? " " : "") << pattern[i].first;
      } else {
	*os << "<what>";
	 for (unsigned int i = 0; i < pattern.size(); i++)
	   *os << (i ? " " : "") << pattern[i].first 
	       << delimiter << pattern[i].second;
      }

      *os << "</what>" << endl;
      
      // where
      *os << "<where>";
      for (unsigned int i = 0; i < projected.size(); i++) 
	*os << (i ? " " : "") << projected[i].first;
      *os << "</where>" << endl;

      *os << "</pattern>" << endl;

    } else {
      if (minskip <= skipcount) {
	// print found pattern only
	if (all) {
	  *os << pattern[pattern.size()-1].second;
	  for (unsigned int i = 0; i < pattern.size(); i++)
	    *os << " " << pattern[i].first;
	} else {
	  
	  skipped = false;
	  
	  for (unsigned int i = 0; i < pattern.size(); i++) {
	    if (! skipped &&  pattern[i].second == 0 ) {
	      *os << (i ? " " : "") << delimiter << delimiter;
	      skipped = true;
	    } else {
	      *os << (i ? " " : "") << pattern[i].first
		  << delimiter << pattern[i].second;
	      skipped = false;
	    }
	  }
	  
	}
	*os << endl;
	//	*os << skipcount;
	//	*os << endl;
      }
    }
  }

  void project (vector <pair <unsigned int, int> > &projected, bool init = false)
  {
    if (all) report(projected);
    
    map <T, vector <pair <unsigned int, int> > > ncounter;  // root or neighbor
    map <T, vector <pair <unsigned int, int> > > scounter;  // skip
  
    for (unsigned int i = 0; i < projected.size(); i++) {
      int pos = projected[i].second;
      unsigned int id  = projected[i].first;
      unsigned int size = transaction[id].size();
      map <T, int> ntmp; // for root or neighbor
      map <T, int> stmp; // for skip
      if (init) { // root
	 for (unsigned int j = pos + 1; j < size; j++) {
	    T item = transaction[id][j];
	    if (ntmp.find (item) == ntmp.end()) ntmp[item] = j ;
	 }
      } else {
	// neighbor
	 unsigned int j = pos + 1;
	 if (j < size) {
	    T item = transaction[id][j];
	    if (ntmp.find (item) == ntmp.end()) ntmp[item] = j ;
	 }
	 // skip
	 for (unsigned int j = pos + 2 ; j < size ; j++) {
	   if ((j >= pos + 1 + mingap) && (j <= pos + 1 + maxgap)){ 
	     T item = transaction[id][j];
	     if (stmp.find (item) == stmp.end()) stmp[item] = j ;
	   }
	 }
      }
      // root or neighbor
      for (map <T, int>::iterator k = ntmp.begin(); k != ntmp.end(); ++k) 
	ncounter[k->first].push_back (make_pair <unsigned int, int> (id, k->second));
      for (map <T, int>::iterator k = stmp.begin(); k != stmp.end(); ++k) 
	scounter[k->first].push_back (make_pair <unsigned int, int> (id, k->second));

    }
    // for root or neighbor
    for (map <T, vector <pair <unsigned int, int> > >::iterator l = ncounter.begin (); 
	 l != ncounter.end (); ) {
      if (l->second.size() < minsup) {
	map <T, vector <pair <unsigned int, int> > >::iterator tmp = l;
	tmp = l;
	++tmp;
	ncounter.erase (l);
	l = tmp;
      } else {
	++l;
      }
    }

    // for skip
    for (map <T, vector <pair <unsigned int, int> > >::iterator l = scounter.begin (); 
	 l != scounter.end (); ) {
      if (l->second.size() < minsup) {
	map <T, vector <pair <unsigned int, int> > >::iterator tmp = l;
	tmp = l;
	++tmp;
	scounter.erase (l);
	l = tmp;
      } else {
	++l;
      }
    }

    if (! all && ncounter.size () == 0 && scounter.size () == 0 ) {
      report (projected);
      return;
    }
     
    for (map <T, vector <pair <unsigned int, int> > >::iterator l = ncounter.begin (); 
	 l != ncounter.end(); ++l) {
      if (pattern.size () - skipcount < maxpat ){
	pattern.push_back (make_pair <T, unsigned int> (l->first, l->second.size()));
	project (l->second);
	pattern.erase (pattern.end());
      }
    }
    
    for (map <T, vector <pair <unsigned int, int> > >::iterator l = scounter.begin (); 
	 l != scounter.end(); ++l) {
      if (pattern.size () - skipcount < maxpat ){
	skipcount += 1;
	if (skipcount <= maxskip) {
	  pattern.push_back (make_pair <T, unsigned int> (l->first, 0));  // dummy for skip
	  pattern.push_back (make_pair <T, unsigned int> (l->first, l->second.size()));
	  project (l->second);
	  pattern.erase (pattern.end());
	  pattern.erase (pattern.end());
	}
	skipcount -= 1;
      }
      
    }
  }

public:
  PrefixSpan (unsigned int _minsup = 1,
	      unsigned int _minpat = 1, 	      
	      unsigned int _maxpat = 0x0fffffff,
	      unsigned int _mingap = 1,
	      unsigned int _maxgap = 0x0fffffff,
	      unsigned int _minskip = 0,
	      unsigned int _maxskip = 0x0fffffff,
	      bool _all = false,
	      bool _where = false,
	      string _delimiter = "/",
	      bool _verbose = false):
    minsup(_minsup), minpat (_minpat), maxpat (_maxpat), mingap (_mingap), maxgap (_maxgap), minskip (_minskip), maxskip (_maxskip), all(_all), 
    where(_where), delimiter (_delimiter),  verbose (_verbose) {};

  ~PrefixSpan () {};

  istream& read (istream &is) 
  {
    string line;
    vector <T> tmp;
    T item;
    while (getline (is, line)) {
       tmp.clear ();
       istrstream istrs ((char *)line.c_str());
       while (istrs >> item) tmp.push_back (item);
       transaction.push_back (tmp);
    }
    
    return is;
  }

  ostream& run (ostream &_os)
  {
    os = &_os;
    if (verbose) *os << transaction.size() << endl;
    vector <pair <unsigned int, int> > root;
    for (unsigned int i = 0; i < transaction.size(); i++) 
      root.push_back (make_pair (i, -1));
    skipcount = 0; 
    project (root, true); 
    return *os;
  }

  void clear ()
  {
    transaction.clear ();
    pattern.clear ();
  }
};

int main (int argc, char **argv)
{
  extern char *optarg;
  unsigned int minsup = 1;
  unsigned int minpat = 1;
  unsigned int maxpat = 0x0fffffff;
  unsigned int mingap = 1;
  unsigned int maxgap = 0x0fffffff;
  unsigned int minskip = 0;
  unsigned int maxskip = 0x0fffffff;
  bool all = false;
  bool where = false;
  string delimiter = "/";
  bool verbose = false;
  string type = "string"; 

  int opt;
  while ((opt = getopt(argc, argv, "awvt:M:m:L:d:s:S:g:G:")) != -1) {
    switch(opt) {
    case 'a':
      all = true;
      break;
    case 'w':
      where = true;
      break;
    case 'v':
      verbose = true;
      break;
    case 'm':
      minsup = atoi (optarg);
      break;
    case 'M':
      minpat = atoi (optarg);
      break;
    case 'L':
      maxpat = atoi (optarg);
      break;
    case 's':
      minskip = atoi (optarg);
      break;
    case 'S':
      maxskip = atoi (optarg);
      break;
    case 'g':
      mingap = atoi (optarg);
      break;
    case 'G':
      maxgap = atoi (optarg);
      break;
    case 't':
      type = string (optarg); 
      break;
    case 'd':
      delimiter = string (optarg);
      break;
    default:
      cout << "Usage: " << argv[0] 
	   << " [-m minsup] [-M minpat] [-L maxpat] [-s minskip] [-S maxskip] [-g mingap] [-G maxgap] [-a] [-w] [-v] [-t type] [-d delimiter] < data .." << endl;
      return -1;
    }
  }
   
  if (type == "int") { 
     PrefixSpan<unsigned int> prefixspan (minsup, minpat, maxpat, mingap, maxgap, minskip, maxskip, all, where, delimiter, verbose);
     prefixspan.read (cin);
     prefixspan.run  (cout);
  }else if (type == "short") {
     PrefixSpan<unsigned short> prefixspan (minsup, minpat, maxpat, mingap, maxgap, minskip, maxskip, all, where, delimiter, verbose);
     prefixspan.read (cin);
     prefixspan.run  (cout);
  } else if (type == "char") {
     PrefixSpan<unsigned char> prefixspan (minsup, minpat, maxpat, mingap, maxgap, minskip, maxskip, all, where, delimiter, verbose);
     prefixspan.read (cin);
     prefixspan.run  (cout);
  } else if (type == "string") {
     PrefixSpan<string> prefixspan (minsup, minpat, maxpat, mingap, maxgap, minskip, maxskip, all, where, delimiter, verbose);
     prefixspan.read (cin);
     prefixspan.run  (cout);
  } else { 
     cerr << "Unknown Item Type: " << type << " : choose from [string|int|short|char]" << endl;
     return -1;
  }

  return 0;
}
