//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////
//
// Program        : Cross Reference
//
// Author         : Richard E. Pattis
//                  Computer Science Department
//                  Carnegie Mellon University
//                  5000 Forbes Avenue
//                  Pittsburgh, PA 15213-3891
//                  e-mail: pattis@cs.cmu.edu
//
// Maintainer     : Author
//
//
// Description:
//
//   This program prompts for a file name, reads each of the words it contains
// (stripping out punctuation), and produces a cross reference (or concordance)
// of the text: it prints every word in the text (sorted alphabetically, case
// insensitive), followed by all the lines that it appears on (each line number
// appears just once for a word, even if that word appears multiple times on the
// line).
//
// This program is written with the Map and List collection classes. The data
// structure is modeled by: Map[String] -> List[Integer*].
//
//   Note this program uses non-generic collections (the only ones available before
// Java 1.5). As a result, Eclipse marks various lines with warnings: mostly those
// which add values to collections, whose types are unknown since generics are not
// used. Left-click the error to see the generic type parameters that are not 
// provided in this code. See GenericApplication for a translation.
//
// Known Bugs     : None
//
// Future Plans   : Update to Java 1.5 generics
//
// Program History:
//  10/08/04: R. Pattis - Operational for 15-200
//
//////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////


import edu.cmu.cs.pattis.cs151xx.Timer;
import edu.cmu.cs.pattis.cs151xx.TypedBufferReader;
import edu.cmu.cs.pattis.cs151xx.TypedBufferWriter;

import java.util.Iterator;
import java.util.Map;
import java.util.HashMap;
import java.util.List;
import java.util.ArrayList;
import java.util.Collections;

import java.util.Comparator;
import java.io.EOFException;



public class Application
{

	public static void main(String[] args)
	{
	  //Get input/output files; input file uses white-space and punctuation
	  //as token-separators
    TypedBufferReader input    = new TypedBufferReader("Enter name of file to XREF",
                                                       " \":?&/,.;+-*/()=!{}<>[]");
    TypedBufferWriter output   = new TypedBufferWriter(input.getFileName()+".xref.txt");;


    //Build the cross reference by reading all the words in the file
    //(timing this process)
    Map   xref = new HashMap();
    Timer t    = new Timer();
    t.start();
    for (;;)
      try {
        String word  = input.readString();
        List   lines = (List)xref.get(word);
        if (lines == null) {
          lines = new ArrayList();    //xref.put(word, lines = new ArrayList());
          xref.put(word,lines);
        }
        
        //word is in map; lines refers to its associated list;  add current line,
        //if not already in the list (if it is in, it must be in the last index)
        Integer newLine = new Integer(input.getLineNumber());
        if (lines.isEmpty() || !lines.get(lines.size()-1).equals(newLine))
          lines.add(newLine);
      } catch (EOFException eofe) {break;}
    t.stop();
    input.close();



    //Sort the words (keys of the map) alphabetically
    List allWords = new ArrayList(xref.keySet());
    Collections.sort(
    		        allWords,
                new Comparator() {
	                public int compare (Object o1, Object o2)
	                {return ((String)o1).compareToIgnoreCase((String)o2);}
               });
    
    //Print the cross references in a file (sorted alphabetically by word)
    int lineEntries = 0;
    for (Iterator words = allWords.iterator(); words.hasNext(); /*see body*/) {
    	String word = (String)words.next();
      List references = (List)(xref.get(word));
      lineEntries += references.size();
      output.print(word + "\n  ");
      for (Iterator i = references.iterator(); i.hasNext();  /*see body*/)
        output.print( i.next() + (i.hasNext() ? ", " : "\n\n"));
    }
    output.close();
      

    //Final output to the console: time and file name to check
    System.out.println("Processed " + xref.size() + " words and "
                       + lineEntries + " line entries for an average of "
                       + lineEntries/xref.size() + " lines/word");
    System.out.println("Time for processing = " + t.getElapsed() + " seconds");
    System.out.println("See " + output.getFileName());
  }	
}