package org.genemania.dw.tools;

import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.TreeMap;
import org.genemania.dw.entity.EnsemblGene;
import org.genemania.dw.entity.EntrezGene;
import org.genemania.dw.entity.ExtResource;
import org.genemania.dw.entity.ExtResourceGene;
import org.genemania.dw.entity.TAIRGene;
import org.genemania.dw.util.DWUtil;
import org.genemania.dw.util.DefParams;
import org.genemania.dw.util.GenUtil;

/**
 * Identifier mapping service in DW. Serves as a command line tool for
 * generating ID mapping tables and validating them, and can be easily
 * wrapped by a front end as well. 
 * 
 * Currently maps Ensembl to Entrez, Entrez to TAIR, vice versa,
 * and Entrez to Ecoli.
 * No getters used for atts.
 * Optionally, the user can set the output file name (not for bulk mode)
 * Default file name generated: mappingType + '_' + speciesName
 * If command line arguments starts with ALL, the only other argument read
 * is the saveLocal (i.e. true/false). This bulk mode option generates all
 * 'required' mappings.
 * The 'saveLocal' option indirectly means generating unique GMIDs. If the IVFix
 * flag is off, leftovers are not saved, however.
 *
 * @author rashadbadrawi
 */

public class IdentifierMapper {

   //standard
   public static final String ENS_TO_ENTREZ  = "ENSEMBL_ENTREZ";
   public static final String ENTREZ_TO_ENS  = "ENTREZ_ENSEMBL";
   //special
   public static final String ENTREZ_TO_TAIR = "ENTREZ_TAIR";
   public static final String ENTREZ_ECOLI = "ENTREZ_ECOLI";
   public static final String TAIR_TO_ENTREZ = "TAIR_ENTREZ";

   private static ArrayList <String> mappingTypesList;
   private static PrintWriter log;

   private String fileName;
   private String mappingType;
   private String speciesName;
   private boolean saveLocal;
   private IdentifierValidator IDValidator;

   static {
       mappingTypesList = new ArrayList <String> ();
       mappingTypesList.add (ENS_TO_ENTREZ);
       mappingTypesList.add (ENTREZ_TO_ENS);
       mappingTypesList.add (ENTREZ_TO_TAIR);
       mappingTypesList.add (ENTREZ_ECOLI);
       mappingTypesList.add (TAIR_TO_ENTREZ);
   }

   public IdentifierMapper () {

       log = GenUtil.getDefaultLog();
       IDValidator = new IdentifierValidator ();
   }
   
   private void setMappingType (String mappingType) {
       
       if (!mappingTypesList.contains (mappingType)) {
           throw new IllegalArgumentException ("Unsupported identifier " +
                   "mapping: " + mappingType);
       } 
       this.mappingType = mappingType;
   }
   
   private void setFileName (String fileName) {
       
       GenUtil.validateString(fileName);
       this.fileName = fileName;
   }
   
   private void setSpeciesName (String speciesName) {
       
       if (!DWUtil.isSupportedSpecies(speciesName)) {
           throw new IllegalArgumentException ("Unsupported species: " +
                    speciesName);
       } 
       this.speciesName = speciesName;
   }
   
   private void setSaveLocal (boolean saveLocal) {
       
       this.saveLocal = saveLocal;    
   }

   public void map (boolean saveLocal) throws IOException, SQLException {

       for (int i = 0; i < mappingTypesList.size (); i++) {
           String currentMappingType = mappingTypesList.get (i);
           //additional section added to turn off 'bonus mapping'
           if (currentMappingType.equals (IdentifierMapper.ENTREZ_TO_ENS) ||
               currentMappingType.equals (IdentifierMapper.TAIR_TO_ENTREZ)) {
               continue;
           }
           //end additional section
           ArrayList <String> speciesList = DWUtil.getSpeciesAbbNames();
           for (int j = 0; j < speciesList.size (); j++) {
               String currentSpeciesName = speciesList.get (j);
               if (currentSpeciesName.equals (DWUtil.SP_CRESS) &&
                   (!currentMappingType.equals (IdentifierMapper.TAIR_TO_ENTREZ) &&
                    !currentMappingType.equals (IdentifierMapper.ENTREZ_TO_TAIR))) {
                   continue;
               }
               if ((currentMappingType.equals (IdentifierMapper.TAIR_TO_ENTREZ) ||
                    currentMappingType.equals (IdentifierMapper.ENTREZ_TO_TAIR)) &&
                   !currentSpeciesName.equals (DWUtil.SP_CRESS)) {
                   continue;
               }
               if (currentSpeciesName.equals (DWUtil.SP_ECOLI) &&
                   !currentMappingType.equals (IdentifierMapper.ENTREZ_ECOLI)) {
                   continue;
               }
               if (currentMappingType.equals (IdentifierMapper.ENTREZ_ECOLI) &&
                   !currentSpeciesName.equals (DWUtil.SP_ECOLI)) {
                   continue;
               }
               System.out.println (currentMappingType + " " + currentSpeciesName);
               try {
                   map (currentMappingType, currentSpeciesName, saveLocal);
               } catch (Exception e) {
                   e.printStackTrace();
                   continue;
               }
           }
       }
   }

   public void map (String mappingType, String speciesName, boolean saveLocal)
           throws IOException, SQLException {
       
       String defPathName = DefParams.getDefaultProp(DefParams.TOOLS_PATH_PROP);
       String defFileName = defPathName + mappingType + GenUtil.UNDERSCORE + 
                            speciesName;
       log.println ("Using default output file name:");
       log.println (defFileName);
       map (mappingType, speciesName, saveLocal, defFileName);
   }
   
   public void map (String mappingType, String speciesName, boolean
           saveLocal, String fileName) throws IOException, SQLException {
  
       setMappingType(mappingType);
       setFileName (fileName);
       setSpeciesName (speciesName);
       setSaveLocal (saveLocal);
       if (speciesName.equals (DWUtil.SP_CRESS) &&
           (!mappingType.equals (IdentifierMapper.TAIR_TO_ENTREZ) &&    
            !mappingType.equals (IdentifierMapper.ENTREZ_TO_TAIR))) {
           throw new IllegalArgumentException 
                ("Can only map between Entrez and TAIR for the 'At' species");    
       }
       if ((mappingType.equals (IdentifierMapper.TAIR_TO_ENTREZ) ||    
            mappingType.equals (IdentifierMapper.ENTREZ_TO_TAIR)) &&
            !speciesName.equals (DWUtil.SP_CRESS)) {
           throw new IllegalArgumentException 
                   ("Can only map between Entrez and TAIR for the 'At' species");
       }
       if (speciesName.equals (DWUtil.SP_ECOLI) &&
           !mappingType.equals (IdentifierMapper.ENTREZ_ECOLI)) {
           throw new IllegalArgumentException 
                ("Can only map from Entrez for the 'Ec' species");    
       }
       if (mappingType.equals (IdentifierMapper.ENTREZ_ECOLI) &&
            !speciesName.equals (DWUtil.SP_ECOLI)) {
           throw new IllegalArgumentException 
                   ("Can only map from Entrez for the 'Ec' species");
       }
       if (IdentifierMapper.ENS_TO_ENTREZ.equals (mappingType) ||
           IdentifierMapper.ENTREZ_TO_ENS.equals(mappingType)) {
           mapEnsemblEntrez ();
       } 
       if (IdentifierMapper.ENTREZ_TO_TAIR.equals (mappingType) ||
           IdentifierMapper.TAIR_TO_ENTREZ.equals (mappingType)) {
           mapEntrezTAIR ();
       }
       if (IdentifierMapper.ENTREZ_ECOLI.equals (mappingType)) {
           mapEntrezEcoli ();
       }
   }
   
   private void mapEnsemblEntrez () throws IOException, SQLException {
       
       BufferedWriter bw = new BufferedWriter (new FileWriter (fileName));
       TreeMap <String, ExtResource> leftOversMap;
       TreeMap <String, ExtResource> ensMap;
       TreeMap <String, ExtResource> entMap;
       if (IdentifierMapper.ENS_TO_ENTREZ.equals (mappingType)) { 
           ensMap = EnsemblGene.loadAllExt (speciesName);
           entMap = EntrezGene.loadAllExt (speciesName);
           //complement (ensMap, entMap);
           //complement (entMap, ensMap);
           leftOversMap = validateNSave (ensMap, entMap);
           bw.write (FFColumns.GMID_HEADER + GenUtil.TAB +
                     FFColumns.ENS_GENE_ID_HEADER + GenUtil.TAB +
                     FFColumns.PROTEIN_CODING_HEADER + GenUtil.TAB +
                     FFColumns.GENE_NAME_HEADER + GenUtil.TAB +
                     FFColumns.ENS_TRANS_ID_HEADER + GenUtil.TAB +
                     FFColumns.ENS_PRO_ID_HEADER + GenUtil.TAB +
                     FFColumns.UNIPROT_ID_HEADER + GenUtil.TAB);
           if (DWUtil.SP_MOUSE.equals (speciesName)) {
               bw.write (FFColumns.MGD_ID_HEADER + GenUtil.TAB);
           }
           bw.write (FFColumns.ENT_GENE_ID_HEADER + GenUtil.TAB +
                     FFColumns.REFSEQ_MRNA_HEADER + GenUtil.TAB +
                     FFColumns.REFSEQ_PRO_HEADER + GenUtil.TAB +
                     FFColumns.SYN_HEADER + GenUtil.TAB +
                     FFColumns.DEFN_HEADER);
           dumpEnsemblEntrez (bw, ensMap, leftOversMap);
       } else if (IdentifierMapper.ENTREZ_TO_ENS.equals(mappingType)) {
           //additional section added to override ID generation option for
           //'bonus' mappings
           setSaveLocal(false);
           System.out.println ("No persistence for 'bonus' mappings.");
          //end addition section
           ensMap = EnsemblGene.loadAllExt (speciesName);
           entMap = EntrezGene.loadAllExt (speciesName);
           //complement (ensMap, entMap);
           //complement (entMap, ensMap);
           leftOversMap = validateNSave (entMap, ensMap);
           bw.write (FFColumns.GMID_HEADER + GenUtil.TAB +
                     FFColumns.ENT_GENE_ID_HEADER + GenUtil.TAB +
                     FFColumns.PROTEIN_CODING_HEADER + GenUtil.TAB +
                     FFColumns.ENT_GENE_NAME_HEADER + GenUtil.TAB +
                     FFColumns.UNIPROT_ID_HEADER + GenUtil.TAB);
           if (DWUtil.SP_MOUSE.equals (speciesName)) {
               bw.write (FFColumns.MGD_ID_HEADER + GenUtil.TAB);
           }
           bw.write (FFColumns.ENS_GENE_ID_HEADER + GenUtil.TAB +
                     FFColumns.REFSEQ_MRNA_HEADER + GenUtil.TAB +
                     FFColumns.REFSEQ_PRO_HEADER + GenUtil.TAB +
                     FFColumns.SYN_HEADER + GenUtil.TAB +
                     FFColumns.DEFN_HEADER);
           dumpEnsemblEntrez (bw, entMap, leftOversMap);
       }
       //attempt cleaning up
       ensMap = null;
       entMap = null;
       leftOversMap = null;
       System.gc();
   }
   
   private void dumpEnsemblEntrez (BufferedWriter bw, TreeMap <String, 
           ExtResource> extResMap, TreeMap <String, ExtResource>
           leftOversMap) throws IOException {
    
       bw.newLine ();
       Iterator iterator = extResMap.keySet().iterator();
       while (iterator.hasNext()) {
           String extGeneID = (String)iterator.next();
           ExtResource extResGene = extResMap.get (extGeneID);
           if (saveLocal &&
               !ExtResource.STATUS_CURRENT.equals (extResGene.getStatus())) {
               continue;
           }
           bw.write (extResGene.toString());
           bw.newLine();
       }                     
       bw.flush ();
       if (!IDValidator.isFixing()) {
           bw.close ();
           return;
       }
       IDValidator.validate (leftOversMap, null, IdentifierValidator.VALID_TYPE_SHARED);
       iterator = leftOversMap.keySet().iterator();
       while (iterator.hasNext()) {
           String extGeneID = (String)iterator.next();
           ExtResourceGene extResGene = (ExtResourceGene)leftOversMap.get (extGeneID);
           if (saveLocal &&
               !ExtResource.STATUS_CURRENT.equals (extResGene.getStatus())) {
               continue;
           }
           if (extResGene instanceof EnsemblGene) {
               bw.write (extResGene.toStringLeftOver (ExtResource.RES_ENTREZ));
           } else {                 // if (extResGene instanceof EntrezGene) {
               bw.write (extResGene.toStringLeftOver (ExtResource.RES_ENSEMBL));
           }
           bw.newLine();
       }
       bw.flush ();
       bw.close ();
   }

   private void mapEntrezTAIR () throws IOException, SQLException {

       TreeMap <String, ExtResource> leftOversMap;
       BufferedWriter bw = new BufferedWriter (new FileWriter (fileName));
       Iterator iterator;
       TreeMap <String, ExtResource> entMap;
       TreeMap <String, ExtResource> tairMap;
       if (IdentifierMapper.ENTREZ_TO_TAIR.equals (mappingType)) {
           entMap = EntrezGene.loadAllExt (speciesName);
           tairMap =  TAIRGene.loadAllExt ();
           //complement (entMap, tairMap);
           //complement (tairMap, entMap);
           leftOversMap = validateNSave (entMap, tairMap);
           bw.write (FFColumns.GMID_HEADER + GenUtil.TAB +
                     FFColumns.ENT_GENE_ID_HEADER + GenUtil.TAB +
                     FFColumns.PROTEIN_CODING_HEADER + GenUtil.TAB +
                     FFColumns.GENE_NAME_HEADER + GenUtil.TAB +
                     FFColumns.UNIPROT_ID_HEADER + GenUtil.TAB +
                     FFColumns.TAIR_LOCUS_ID_HEADER + GenUtil.TAB +
                     FFColumns.REFSEQ_MRNA_HEADER + GenUtil.TAB +
                     FFColumns.REFSEQ_PRO_HEADER + GenUtil.TAB +
                     FFColumns.SYN_HEADER + GenUtil.TAB +
                     FFColumns.DEFN_HEADER);
           bw.newLine();
           iterator = entMap.keySet().iterator();
           while (iterator.hasNext()) {
               String entGeneID = (String)iterator.next();
               ExtResource entGene = entMap.get (entGeneID);
               if (saveLocal &&
                   !ExtResource.STATUS_CURRENT.equals (entGene.getStatus())) {
                   continue;
               }
               bw.write (((EntrezGene)entGene).toStringTAIR());
               bw.newLine();
           }
           bw.flush();
           dumpEntrezTAIRLeftOvers (bw, leftOversMap);
       } else if (IdentifierMapper.TAIR_TO_ENTREZ.equals (mappingType)) {
           //additional section added to override ID generation option for
           //'bonus' mappings
           setSaveLocal(false);
           System.out.println ("No persistence for 'bonus' mappings.");
          //end addition section
           entMap = EntrezGene.loadAllExt (speciesName);
           tairMap =  TAIRGene.loadAllExt ();
           //complement (entMap, tairMap);
           //complement (tairMap, entMap);
           leftOversMap = validateNSave (tairMap, entMap);
           bw.write (FFColumns.GMID_HEADER + GenUtil.TAB +
                     FFColumns.TAIR_LOCUS_ID_HEADER + GenUtil.TAB +
                     FFColumns.PROTEIN_CODING_HEADER + GenUtil.TAB +
                     FFColumns.TAIR_LOCUS_NAME_HEADER + GenUtil.TAB +
                     FFColumns.UNIPROT_ID_HEADER + GenUtil.TAB +
                     FFColumns.ENT_GENE_ID_HEADER + GenUtil.TAB +
                     FFColumns.REFSEQ_MRNA_HEADER + GenUtil.TAB +
                     FFColumns.REFSEQ_PRO_HEADER + GenUtil.TAB +
                     FFColumns.SYN_HEADER + GenUtil.TAB +
                     FFColumns.DEFN_HEADER);
           bw.newLine();
           iterator = tairMap.keySet().iterator();
           while (iterator.hasNext()) {
               String tairGeneID = (String)iterator.next();
               TAIRGene tairGene = (TAIRGene)tairMap.get (tairGeneID);
               if (saveLocal &&
                   !ExtResource.STATUS_CURRENT.equals (tairGene.getStatus())) {
                   continue;
               }
               bw.write (tairGene.toString ());
               bw.newLine();
           }
           bw.flush ();
           dumpEntrezTAIRLeftOvers (bw, leftOversMap);
       }
       bw.flush ();
       bw.close ();
       //attempt cleaning up
       entMap = null;
       tairMap = null;
       leftOversMap = null;
       System.gc();
   }

   private void dumpEntrezTAIRLeftOvers (BufferedWriter bw, TreeMap <String,
           ExtResource> leftOversMap) throws IOException {

       if (!IDValidator.isFixing()) {
           return;
       }
       Iterator iterator = leftOversMap.keySet().iterator();
       while (iterator.hasNext()) {
           String extResID = (String)iterator.next();
           ExtResourceGene extResGene = (ExtResourceGene)leftOversMap.get (extResID);
           if (saveLocal &&
               !ExtResource.STATUS_CURRENT.equals (extResGene.getStatus())) {
               continue;
           }
           if (extResGene instanceof EntrezGene) {
               bw.write (extResGene.toStringLeftOver (ExtResource.RES_TAIR));
           } else {                  // if (extResGene instanceof TAIRGene) {
               bw.write (extResGene.toStringLeftOver (ExtResource.RES_ENTREZ));
           }
           bw.newLine();
       }
   }

   private void mapEntrezEcoli () throws IOException, SQLException {
       
       BufferedWriter bw = new BufferedWriter (new FileWriter (fileName));
       TreeMap <String, ExtResource> entMap =
                                    EntrezGene.loadAllExt (speciesName);
       validateNSave (entMap, null);
       Iterator iterator;
       bw.write (FFColumns.GMID_HEADER + GenUtil.TAB +
                 FFColumns.ENT_GENE_ID_HEADER + GenUtil.TAB +
                 FFColumns.PROTEIN_CODING_HEADER + GenUtil.TAB +
                 FFColumns.ENT_GENE_NAME_HEADER + GenUtil.TAB +
                 FFColumns.UNIPROT_ID_HEADER + GenUtil.TAB +    //no xref here.
                 FFColumns.REFSEQ_MRNA_HEADER + GenUtil.TAB +
                 FFColumns.REFSEQ_PRO_HEADER + GenUtil.TAB +
                 FFColumns.SYN_HEADER + GenUtil.TAB +
                 FFColumns.DEFN_HEADER);
       bw.newLine();
       iterator = entMap.keySet().iterator();
       while (iterator.hasNext()) {
           String entGeneID = (String)iterator.next();
           ExtResource entGene = entMap.get (entGeneID);
           if (saveLocal &&
               !ExtResource.STATUS_CURRENT.equals (entGene.getStatus())) {
               continue;
           }
           bw.write (((EntrezGene)entGene).toStringEcoli());
           bw.newLine();
       }
       bw.flush();
       bw.close();
       //attempt cleaning up
       entMap = null;
       System.gc();
   }

   /*
   private void complement (TreeMap <String, ExtResource>
           extMap1, TreeMap <String, ExtResource> extMap2) {
       
       Iterator iterator1 = extMap1.keySet().iterator();
       while (iterator1.hasNext()) {
           ExtResource extGene = extMap1.get ((String)iterator1.next());
           TreeMap <String, ExtResource> tempMap =
           extGene.getXRef(extMap2.get (extMap2.firstKey()).getSource());
           if (tempMap == null) {       //i.e. no XRefs from that resource
               continue;
           }
           Iterator iterator2 = tempMap.keySet().iterator();
           while (iterator2.hasNext()) {
               String xRefGeneID = (String)iterator2.next();
               ExtResource xRefGene = extMap2.get (xRefGeneID);
               //overwrite existing
               if (xRefGene != null) {              
                   tempMap.put (xRefGeneID, xRefGene);
               } else {                                 //handled in validation
                   System.out.println ("Xref not found: " + xRefGeneID);
               }
           }
           extGene.setXRef(extMap2.get (extMap2.firstKey()).getSource(), tempMap);
       }
   }
   */
    
   private TreeMap <String, ExtResource>
                validateNSave (TreeMap <String, ExtResource> extResMap1,
                               TreeMap <String, ExtResource> extResMap2)
                               throws SQLException {

       if (extResMap2 != null) {
           IDValidator.validate (extResMap1, extResMap2);
       } else {
           IDValidator.validate (extResMap1, null, IdentifierValidator.VALID_TYPE_SHARED);
       }
       TreeMap <String, ExtResource> leftOversMap = new TreeMap
                                                    <String, ExtResource> ();
       if (extResMap2 != null) {
           leftOversMap = IDValidator.getLeftOvers();
       }
       if (saveLocal) {
            System.out.println ("Saving Summary - primary resource. ");
            //Section to override the
            ExtResourceGene.saveAll (extResMap1);
            if (leftOversMap.size() > 0 && IDValidator.isFixing()) {
                System.out.println ("Saving Summary - leftovers. ");
                ExtResourceGene.saveAll(leftOversMap);
            }
            System.out.println ("Saving done. ");
       }

       return leftOversMap;
   }

   public void dumpValidation () throws IOException {

       this.IDValidator.dump(true);
   }

   public static void main (String args []) {

       log = GenUtil.getDefaultLog ();
       log.println (GenUtil.getTimeStamp ());
       GenUtil.registerStart ();
       String usageMsg = "Usage: IdentifierMapper mappingType [" +
                          ENS_TO_ENTREZ + "|" + ENTREZ_TO_ENS + "|"
                           + ENTREZ_TO_TAIR + "|" + TAIR_TO_ENTREZ + 
                           "|" + ENTREZ_ECOLI + "]" +
                          "\n species [" + DWUtil.SP_HUMAN + "|" +
                          DWUtil.SP_MOUSE + "|" + DWUtil.SP_RAT + "|" +
                          DWUtil.SP_WORM  + "|" + DWUtil.SP_YEAST + "|" +
                          DWUtil.SP_FLY  + "|" + DWUtil.SP_ECOLI + "|" +
                          DWUtil.SP_CRESS + "]" +
                          " saveLocal [true|false] outputFileName OR \n" +
                          GenUtil.ALL + " saveLocal [true|false]";
       String warnMsg = "WARNING: Missing command line args, using defaults";
       if (args == null || args.length == 0) {
           log.println (warnMsg);
           log.println (usageMsg);
           System.out.println (warnMsg);
           System.out.println (usageMsg);
           args = DefParams.getCommandLineArgs (IdentifierMapper.class.getName ());
       }
       try {
            IdentifierMapper mapper = new IdentifierMapper ();
            if (GenUtil.ALL.equals (args [0])) {
                mapper.map (Boolean.parseBoolean (args[1]));
            } else {
                if (args.length == 4) {
                    mapper.map (args [0], args [1], 
                                Boolean.parseBoolean (args[2]), args [3]);
                } else {
                    mapper.map (args [0], args [1], Boolean.parseBoolean (args[2]));
                }
            }
            mapper.dumpValidation();
       } catch (Throwable e) {
           e.printStackTrace ();
           e.printStackTrace (log);
       } finally {
           log.println (GenUtil.getExecTimeStr ());
           log.flush ();
           log.close ();
       }
   }
}
