package org.genemania.dw.tools;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.TreeMap;

import org.genemania.dw.util.DefParams;
import org.genemania.dw.util.GenUtil;

/**
 * Parser for Batch Entrez tool output (nucleotide and protein).
 * Specific for the GI output for the BIND project.
 * However, it can serve as a prototype for this type of annotation parsing.
 * Class designed as a utility.
 *
 * The 'standard' format of an entry in such a file ('Brief' format) is like:
 * 15: M68910 Enterococcus faec...[gi:148326]
 * - One exception if the record was replaced/renamed.
 * - All other cases reflect a deprecation of the entry of interest.
 * - To get a cumulative listing of all calls to the read () method, do not
 * specify the target type with each call. 
 * Instead of processing the output of the batch entrez file all in one
 * 'runtime', its processed in batches, and the output of the earlier processing
 * is read from the dumped file.
 *
 * @author rashadbadrawi
 */

public class BatchEntrezReader {

    protected static final String LOOKUP_NUC = "NUC";
    protected static final String LOOKUP_PRO = "PRO";
    private static final String REPLACE_ANNOT = "The record has been replaced by";
    private static final String RENAME_ANNOT = 
                 "Chain renamed from (blank) to A during PDB remediation 2007";
    private static TreeMap <String, String> IDMap = new TreeMap <String, String> ();
    private static TreeMap <String, String> IDMapRev = new TreeMap <String, String> ();
    private static TreeMap <String, String> IDNucMap = new TreeMap <String, String> ();
    private static TreeMap <String, String> IDProMap = new TreeMap <String, String> ();
    private static ArrayList <String> annotList = new ArrayList <String> ();
    private static int stCnt, replacedCnt, renamedCnt, removedCnt;

    private BatchEntrezReader () {}                     //no instances allowed

    public static TreeMap <String, String> read (String inputFileName, String 
       propFileName, String sourceType, String targetType) throws IOException {

        GenUtil.validateString (inputFileName);
        GenUtil.validateString (sourceType);
        BufferedReader br;
        br = new BufferedReader (new FileReader (inputFileName));
        String line;
        String ID1Str = null, ID2Str = null;
        while ((line = br.readLine ()) != null) {
            if (line.trim().length() == 0) {
                continue;
            }
            //a replacement entry cont'd
            if (line.indexOf(GenUtil.COLON) < 0) {
                line = line.trim ();
                //sanity check - 1
                if (line.indexOf (GenUtil.SPACE) != -1) {
                    System.err.println ("ERROR 1: Bad format for parsed file: " +
                                        line);
                }
                //sanity check - 2, also checks that ID1Str/ID2Str were preset.
                if (ID1Str.equals (line)) {
                   System.err.println ("ERROR 2: Bad format for parsed file: " +
                                       ID1Str + " " + line.trim());
                }
                addMappedIDs (ID2Str, line, targetType);
                //System.out.println ("Replaced: " + ID2Str + " " + line + " " + ID1Str);
                ID1Str = null;
                ID2Str = null;
                continue;
            }
            //a new entry
            //sanity check - 3
            String entryNum = line.substring (0, line.indexOf(GenUtil.COLON));
            try {
                Integer.parseInt(entryNum);
            } catch (NumberFormatException nfo) {
                System.err.println ("ERROR 3: Bad format for parsed file.");
                nfo.printStackTrace();
                continue;
            }
            line = line.substring (line.indexOf(GenUtil.COLON) + 1).trim();
            ID1Str = line.substring (0, line.indexOf(GenUtil.SPACE)).trim();
            ID2Str = line.substring (line.lastIndexOf (GenUtil.LEFT_BRACKET) + 1,
                                           line.lastIndexOf (GenUtil.RIGHT_BRACKET));
            //sanity check - 4
            String IDTypeParsed = ID2Str.substring (0, ID2Str.indexOf(GenUtil.COLON));
            if (!IDTypeParsed.equals (sourceType)) {
                System.err.println ("ERROR 4: ID types not matched: " +
                                    IDTypeParsed + " " + sourceType);
            }
            ID2Str = ID2Str.substring (ID2Str.indexOf (GenUtil.COLON) + 1).trim();
            line = line.substring (line.indexOf (GenUtil.RIGHT_BRACKET) + 1).trim();
            if (line.length () == 0) {                  //standard entry
               addMappedIDs(ID2Str, ID1Str, targetType);
               //System.out.println ("Standard: " + ID2Str + " " + ID1Str);
               BatchEntrezReader.stCnt++;
               ID1Str = null;
               ID2Str = null;
            } else if (line.equals(BatchEntrezReader.REPLACE_ANNOT)) {
                BatchEntrezReader.replacedCnt++;
                continue;
            } else if (line.equals (BatchEntrezReader.RENAME_ANNOT)) {
                addMappedIDs(ID2Str, ID1Str, targetType);
                //System.out.println ("Renamed: " + ID2Str + " " + ID1Str);
                BatchEntrezReader.renamedCnt++;
                ID1Str = null;
                ID2Str = null;
            } else {                                   //deprecated
                BatchEntrezReader.removedCnt++;
                //System.out.println ("Removed: " + ID2Str + " " + ID1Str);
                ID1Str = null;
                ID2Str = null;
            }
            if (line.length () > 0 && !annotList.contains (line)) {
                BatchEntrezReader.annotList.add (line);
            }
        }

        if (BatchEntrezReader.LOOKUP_NUC.equals(targetType)) {
            return IDNucMap;
        } else if (BatchEntrezReader.LOOKUP_PRO.equals (targetType)) {
            return IDProMap;
        } else {
            System.err.println ("Error 6: Unknown target type: " +  targetType);
            return IDMap;
        }
    }

    private static void addMappedIDs (String ID1, String ID2, String targetType) {

        if (IDMap.containsKey (ID1)) {
            System.err.println ("ERROR 5: ID listed twice: " + ID1);
        }
        IDMap.put (ID1, ID2);
        String existingID1 = IDMapRev.get (ID2);
        if (existingID1 != null) {
            ID1 = existingID1 + GenUtil.SEMICOLON + ID1;
        }
        IDMapRev.put (ID2, ID1);
        if (BatchEntrezReader.LOOKUP_NUC.equals (targetType)) {
            if (IDNucMap.containsKey (ID1)) {
                System.err.println ("ERROR 5: ID listed twice: " + ID1);
            }
            IDNucMap.put (ID1, ID2);
        } else if (BatchEntrezReader.LOOKUP_PRO.equals(targetType)) {
            if (IDProMap.containsKey (ID1)) {
                System.err.println ("ERROR 5: ID listed twice: " + ID1);
            }
            IDProMap.put (ID1, ID2);
        } else {
            System.err.println ("Error 6: Unknown target type: " +  targetType);
        }
    }

    public static TreeMap <String, String> getMappedIDs () {

        return BatchEntrezReader.IDMap;
    }

    public static TreeMap <String, String> getMappedIDs (String targetType) {

        if (BatchEntrezReader.LOOKUP_NUC.equals (targetType)) {
            return BatchEntrezReader.IDNucMap;
        } else if (BatchEntrezReader.LOOKUP_PRO.equals(targetType)) {
            return BatchEntrezReader.IDProMap;
        } else {
            System.err.println ("Error 6: Unknown target type: " +  targetType);
            return BatchEntrezReader.IDMap;
        }
    }

    public static TreeMap <String, ArrayList <String>> getSupMappedIDs () {

        TreeMap <String, ArrayList <String>> supMap = new TreeMap
                                               <String, ArrayList <String>> ();
        try {
            //load the supplementary mapping dumped earlier.
            String supOutputFileName =
                       DefParams.getDefaultProp(DefParams.SUPP_FILE1_NAME_PROP);
            BufferedReader br = new BufferedReader (new FileReader (supOutputFileName));
            String line;
            TreeMap <String, ArrayList <String>> loadedMap =
                                     new TreeMap <String, ArrayList <String>> ();
            while ((line = br.readLine ()) != null) {
                String tempArr [] = line.split(GenUtil.TAB);
                ArrayList <String> tempList ;
                if (!loadedMap.containsKey (tempArr [0])) {
                    tempList = new ArrayList <String> ();
                } else {
                    tempList = loadedMap.get (tempArr [0]);
                }
                tempList.add (tempArr [1]);
                loadedMap.put (tempArr [0], tempList);        //replace existing
            }
            br.close ();
            //refine the existing pros and reverse.
            TreeMap <String, String> IDProRefined = new TreeMap <String, String> ();
            Iterator iterator = IDProMap.keySet().iterator();
            while (iterator.hasNext()) {
                String key = (String)iterator.next();
                boolean isRefSeqPro = false;
                for (int i = 0; i < IdentifierMapperService.REFSEQ_PRO_PREFIX_ARR.length; i++) {
                    if (IDProMap.get (key).startsWith(IdentifierMapperService.REFSEQ_PRO_PREFIX_ARR[i])) {
                        isRefSeqPro = true;
                        break;
                    }
                }
                if (isRefSeqPro) {
                    if (IDProRefined.containsKey(IDProMap.get (key))) {
                        System.err.println ("Error 7: Duplicate entries for " +
                                             IDProMap.get (key));
                    }
                    IDProRefined.put (IDProMap.get (key), key);     //reversed
                }
            }
            //match both
            iterator = loadedMap.keySet().iterator();
            while (iterator.hasNext()) {
                String key = (String)iterator.next ();
                String newKey = IDProRefined.get(key);
                if (newKey == null) {
                    System.err.println ("Error 8: Inconsistent results. Missing: "
                            + key + " " + newKey);
                }
                ArrayList <String> tempList;
                if (!supMap.containsKey (newKey)) {
                    tempList = new ArrayList <String> ();
                } else {
                    tempList = supMap.get (newKey);
                }
                tempList.addAll (loadedMap.get (key));
                supMap.put (newKey, tempList);
            }
            /*
            //debugging
            iterator = supMap.keySet().iterator();
            System.out.println ("Listing sup mappings...");
            while (iterator.hasNext()) {
                String key = (String)iterator.next();
                ArrayList <String> tempList = supMap.get (key);
                System.out.print (key + GenUtil.TAB);
                for (int i = 0; i < tempList.size (); i++) {
                    System.out.print (tempList.get (i) + GenUtil.COMMA);
                }
                System.out.println ();
            }
            System.out.println ("Done.");
            */
            /*
            Iterator iterator = IDProMap.keySet().iterator();
            while (iterator.hasNext()) {
                String key = (String)iterator.next();
                boolean isRefSeqPro = false;
                for (int i = 0; i < IdentifierMapperService.REFSEQ_PRO_PREFIX_ARR.length; i++) {
                    if (IDProMap.get (key).startsWith(IdentifierMapperService.REFSEQ_PRO_PREFIX_ARR[i])) {
                        isRefSeqPro = true;
                        break;
                    }
                }
                /*
                if (!isRefSeqPro) {
                    continue;
                }
                TreeMap <String, ExtResource> loadedMap =
                                          UniprotMirrorTables.maptoXref(ExtResource.LIST_REFSEQ_PRO,
                                          IDProMap.get (key));
                if (loadedMap.size () == 0) {
                    System.out.println ("Not found: " + key + GenUtil.TAB +
                                         IDProMap.get (key));
                }
                Iterator iterator2 = loadedMap.keySet().iterator();
                while (iterator2.hasNext()) {
                    String uniID = (String)iterator2.next();
                    System.out.println ("Found: " + key + GenUtil.TAB +
                       IDProMap.get (key) + " " + loadedMap.get (uniID).toString());
                }
            }
            iterator = IDNucMap.keySet().iterator();
            while (iterator.hasNext()) {
                String key = (String)iterator.next();
                boolean isRefSeqMrna = false;
                for (int i = 0; i < IdentifierMapperService.REFSEQ_RNA_PREFIX_ARR.length; i++) {
                    if (IDNucMap.get (key).startsWith(IdentifierMapperService.REFSEQ_RNA_PREFIX_ARR[i])) {
                        isRefSeqMrna = true;
                        break;
                    }
                }
                if (!isRefSeqMrna) {
                    continue;
                }
                ExtResource entGene = EntrezMirrorTables.loadByAccn(null,
                                 ExtResource.LIST_REFSEQ_RNA, IDNucMap.get (key));
                if (entGene == null) {
                    System.out.println ("Not found2: " +
                                         key + GenUtil.TAB + IDNucMap.get (key));
                } else {
                    System.out.println ("Found2: " +
                                  key + GenUtil.TAB + IDNucMap.get (key) + " " +
                                  entGene.toString());
                }
            }
            */
        } catch (Exception e) {
            e.printStackTrace();
        } finally {
            return supMap;
        }
    }

    public static void clear () {

        BatchEntrezReader.IDMap.clear ();
        BatchEntrezReader.IDMapRev.clear ();
        BatchEntrezReader.IDNucMap.clear ();
        BatchEntrezReader.IDProMap.clear ();
        BatchEntrezReader.annotList.clear();
    }

    private static void dumpAllAnnots () {

        System.out.println ("CVs for all annot types: " + BatchEntrezReader.annotList.size());
        for (int i = 0; i < BatchEntrezReader.annotList.size(); i++) {
            System.out.println (BatchEntrezReader.annotList.get (i));
        }
    }
    
    //mainly for testing, takes two file arguments, first is against the nuc DB,
    //and second against the protein DB.
    public static void main (String args []) {

       PrintWriter log = GenUtil.getDefaultLog ();
       String usageMsg = "Usage: BatchEntrezReader IDType inputFileName1  " +
                                 "inputFileName2";
       String warnMsg = "WARNING: Missing command line args, using defaults";
       if (args == null || args.length == 0) {
           log.println (warnMsg);
           log.println (usageMsg);
           System.out.println (warnMsg);
           System.out.println (usageMsg);
           args = DefParams.getCommandLineArgs (BatchEntrezReader.class.getName ());
       }
       try {
            BatchEntrezReader.read (args [1], null, args [0], BatchEntrezReader.LOOKUP_NUC);
            
            /*Iterator iterator = BatchEntrezReader.IDNucMap.keySet().iterator();
            System.out.println ("Dumping by " + args [0] + " " + BatchEntrezReader.LOOKUP_NUC);
            String key;
            while (iterator.hasNext()) {
                key = (String)iterator.next ();
                System.out.println (key + GenUtil.TAB + BatchEntrezReader.IDNucMap.get (key));
            }*/
            
            BatchEntrezReader.read (args [2], null, args [0], BatchEntrezReader.LOOKUP_PRO);
            /*
            iterator = BatchEntrezReader.IDProMap.keySet().iterator();
            System.out.println ("Dumping by " + args [0] + " " + BatchEntrezReader.LOOKUP_PRO);
            while (iterator.hasNext()) {
                key = (String)iterator.next ();
                System.out.println (key + GenUtil.TAB + BatchEntrezReader.IDProMap.get (key));
            }
            */
            System.out.println ("Counts: Total: " + BatchEntrezReader.IDMap.size());
            System.out.println ("Found: " + (BatchEntrezReader.stCnt + BatchEntrezReader.renamedCnt));
            System.out.println ("Replaced: " + BatchEntrezReader.replacedCnt);
            System.out.println ("Removed: " + BatchEntrezReader.removedCnt);
            
            /*System.out.println ("Reverse Dumping: ");
            iterator = BatchEntrezReader.IDMapRev.keySet().iterator();
            while (iterator.hasNext()) {
                key = (String)iterator.next();
                System.out.println (key + " " + BatchEntrezReader.IDMapRev.get (key));
            }*/
            //BatchEntrezReader.dumpAllAnnots();
       } catch (Throwable e) {
           e.printStackTrace ();
           e.printStackTrace (log);
       } finally {
           log.flush ();
           log.close ();
       }
   }
}
