package org.baderlab.pdzsvmstruct.analysis;

import org.baderlab.pdzsvmstruct.data.manager.DataFileManager;
import org.baderlab.pdzsvmstruct.data.DataLoader;
import org.baderlab.pdzsvmstruct.utils.IRefIndexInteractome;
import org.baderlab.pdzsvmstruct.utils.Constants;
import org.baderlab.pdzsvmstruct.utils.PDZBaseUtils;
import org.baderlab.pdzsvmstruct.utils.PDZSVMUtils;
import org.baderlab.brain.ProteinProfile;
import org.biojava.bio.seq.Sequence;

import java.util.*;
import java.io.*;

/**
 * Copyright (c) 2011 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVMStruct.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVMStruct.  If not, see <http://www.gnu.org/licenses/>.
 */
public class ValidatePredictions {

    public ValidatePredictions()
    {

    }
    public static void main(String[] args)
    {
        ValidatePredictions vp = new ValidatePredictions();
        vp.validate(Constants.WORM);
    }
    private HashMap getValidatedStructHits(String domainName,IRefIndexInteractome iRefIndex)
    {
        HashMap geneNameToProteinNameMap = PDZSVMUtils.getGeneNameToProteinNameMap();

        String convertName = (String)geneNameToProteinNameMap.get(domainName);
        if (convertName==null)
            convertName = domainName;
        int ix = convertName.indexOf('-');
        String proteinName = convertName.substring(0,ix);

        String fastafilename = "/Users/shirleyhui/Desktop/Results/ScanTest/Predictions/STRUCT-Id-Human/"+domainName+".predictions.txt";
        HashMap map = new HashMap();
        try
        {
            BufferedReader br = new BufferedReader(new FileReader(new File(fastafilename)));
            String line = "";
            while((line=br.readLine())!=null)
            {
                String[] splitLine = line.split("\t");
                String peptide = splitLine[1];
                String[] splitTrans = splitLine[2].split(", ");
                for (int i = 0; i < splitTrans.length;i++)
                {
                    String transcriptId = splitTrans[i].trim();
                    String index = Integer.toString(i+1);
                    if (iRefIndex.lookupEnsemblId(proteinName, transcriptId))
                    {
                        String transIxString = (String)map.get(peptide);
                        if (transIxString==null)
                            transIxString= index;
                        else
                            transIxString= transIxString +"," + index;
                        map.put(peptide, transIxString);
                    }
                }
            }
            br.close();
        }
        catch(Exception e)
        {
            //System.out.println("Exception: " + e);
            return map;

        }
        return map;
    }

    private List getValidatedStructHits(String domainName,PDZBaseUtils pdzbase)
    {
        List cTermList = pdzbase.getPDZBaseIntList(domainName);

        String fastafilename = DataFileManager.OUTPUT_ROOT_DIR+"/ScanTest/Predictions/STRUCT-Id-Human/"+domainName+".predictions.txt";
        List list = new ArrayList();

        try
        {
            BufferedReader br = new BufferedReader(new FileReader(new File(fastafilename)));
            String line = "";
            while((line=br.readLine())!=null)
            {
                String[] splitLine = line.split("\t");
                String peptide = splitLine[1];
                String[] splitTrans = splitLine[2].split(", ");
                for (int i = 0; i < splitTrans.length;i++)
                {
                    String transcriptId = splitTrans[i].trim();
                    if (transcriptId.equals(""))
                        continue;
                    if (cTermList.contains(peptide))
                    {
                        list.add(peptide);
                        break;
                    }
                }
            }
            br.close();
        }
        catch(Exception e)
        {
            //System.out.println("Exception: " + e);
            return list;

        }
        return list;
    }
    private List[] getValidatedStructHits(String organism, String domainName, ProteinProfile posProfile, ProteinProfile negProfile)
    {
        String dir = "STRUCT-Id-Fly";

        if (organism.equals(Constants.WORM))
            dir = "STRUCT-Id-Worm";

        String fastafilename = DataFileManager.OUTPUT_ROOT_DIR+"/ScanTest/Predictions/"+dir+"/"+domainName+".predictions.txt";

        List[] retList = new ArrayList[2];
        List validPosList = new ArrayList();
        List validNegList = new ArrayList();
        if (posProfile != null)
        {
            Collection posColl = posProfile.getSequenceMap();
            Iterator it = posColl.iterator();
            while(it.hasNext())
            {
                Sequence seq = (Sequence)it.next();
                String seqString = seq.seqString();
                if (!validPosList.contains(seqString))
                    validPosList.add(seqString);
            }
        }
        if (negProfile != null)
        {
            Collection negColl = negProfile.getSequenceMap();
            Iterator it = negColl.iterator();
            while(it.hasNext())
            {
                Sequence seq = (Sequence)it.next();
                String seqString = seq.seqString();
                if (!validNegList.contains(seqString))
                    validNegList.add(seqString);
            }

        }
        List validHitsList = new ArrayList();
        List validFalseHitsList = new ArrayList();
        try
        {
            BufferedReader br = new BufferedReader(new FileReader(new File(fastafilename)));
            String line = "";
            while((line=br.readLine())!=null)
            {
                String[] splitLine = line.split("\t");
                String peptide = splitLine[1];
                if (validPosList.contains(peptide))
                    validHitsList.add(peptide);
                if (validNegList.contains(peptide))
                    validFalseHitsList.add(peptide);
            }
            br.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
            retList[0] = validHitsList;
            retList[1] = validFalseHitsList;
            return retList;

        }
        retList[0] = validHitsList;
        retList[1] = validFalseHitsList;
        return retList;
    }
    public void validate(String organism)
    {
        if (organism.equals(Constants.HUMAN))
        {
            IRefIndexInteractome iRefIndex = new IRefIndexInteractome(organism);
            PDZBaseUtils pdzbase = new PDZBaseUtils(organism);
            File dir = new File(DataFileManager.OUTPUT_ROOT_DIR +"/ScanTest/Predictions/STRUCT-Id-Human/");
            File[] files= dir.listFiles();

            for (int i=0; i < files.length;i++)
            {
                File file = files[i];
                String filename = file.getName();
                int ix = filename.indexOf(".");
                String domainName = filename.substring(0,ix);
                System.out.println(domainName);
                if (domainName.equals(""))
                    continue;
                validateHumanDomain(domainName, iRefIndex, pdzbase);
            }
        }
        else
        {
            DataLoader dl = new DataLoader();
            if (organism.equals(Constants.FLY))
            {
                dl.loadFlyPDBTest();
            }
            else
            {
                dl.loadWormPDBTest(Constants.CHEN_WORM_PDB);
            }
            List posTestProfileList = dl.getPosTestProfileList();
            List negTestProfileList = dl.getNegTestProfileList();
            HashMap posProfileMap = PDZSVMUtils.profileListToHashMap(posTestProfileList);
            HashMap negProfileMap = PDZSVMUtils.profileListToHashMap(negTestProfileList);
            Set keys = posProfileMap.keySet();
            List domainNameList = new ArrayList(keys);
            for(int i = 0; i < domainNameList.size();i++)
            {
                String domainName = (String)domainNameList.get(i);

                ProteinProfile posProfile  = (ProteinProfile)posProfileMap.get(domainName);
                ProteinProfile negProfile  = (ProteinProfile)negProfileMap.get(domainName);
                System.out.println(domainName);
                validateFlyWormDomain(organism, domainName, posProfile, negProfile);

            }
        }

    }
    public void validateFlyWormDomain(String organism, String domainName, ProteinProfile posProfile, ProteinProfile negProfile)
    {
        String dir = "STRUCT-Id-Fly";

        if (organism.equals(Constants.WORM))
            dir = "STRUCT-Id-Worm";

        String fastafilename = DataFileManager.OUTPUT_ROOT_DIR+"/ScanTest/Predictions/"+dir+"/"+domainName+".predictions.txt";
        String outFileName = DataFileManager.OUTPUT_ROOT_DIR + "/ScanTest/Predictions/STRUCT-Validate/";

        List[] expHitsArray = getValidatedStructHits(organism, domainName, posProfile, negProfile);
        List expHits = expHitsArray[0];
        List expFalseHits = expHitsArray[1];

        try
        {
            BufferedReader br = new BufferedReader(new FileReader(new File(fastafilename)));
            BufferedWriter bw = new BufferedWriter(new FileWriter(new File(outFileName+domainName+".predictions.txt")));
            String line = "";
            while((line=br.readLine())!=null)
            {
                String validString = "";

                String[] splitLine = line.split("\t");
                String peptide = splitLine[1];
                String decValue = splitLine[0];
                String transcripts = splitLine[2];
                String[] splitTranscripts= transcripts.split(",");
                String transcriptString = "";
                for (int i=0; i < splitTranscripts.length;i++)
                {
                    String transcript = splitTranscripts[i].trim();
                    if (organism.equals(Constants.FLY))
                        transcript = "FLYBASE:"+transcript;
                    
                    transcriptString  = transcriptString +transcript + " ";
                }
                String validToken = " ";
                if (expHits.contains(peptide))
                {
                    validString = "PM";
                    validToken = "*";
                }

                else if (expFalseHits.contains(peptide))
                {
                    validString = "PM";
                    validToken = "X";
                }
                else
                    validString = "-";

                String outString = validToken +"\t" + peptide + "\t" + decValue + "\t" + validString+"\t" + transcriptString;
                //System.out.println(outString);
                bw.write(outString+"\n");
            }
            br.close();
            bw.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }
    }
    public void validateHumanDomain(String domainName, IRefIndexInteractome iRefIndex, PDZBaseUtils pdzbase)
    {

        HashMap ppiStructHitsMap = getValidatedStructHits(domainName, iRefIndex);
        List pdzBasestructHits = getValidatedStructHits(domainName, pdzbase);
        
        String fastafilename = DataFileManager.OUTPUT_ROOT_DIR+"/ScanTest/Predictions/STRUCT-Id-Human/"+domainName+".predictions.txt";
        String outFileName = DataFileManager.OUTPUT_ROOT_DIR + "/ScanTest/Predictions/STRUCT-Validate/";
        try
        {
            BufferedReader br = new BufferedReader(new FileReader(new File(fastafilename)));
            BufferedWriter bw = new BufferedWriter(new FileWriter(new File(outFileName+domainName+".predictions.txt")));
            String line = "";
            Set keys = ppiStructHitsMap.keySet();
            List ppiStructHits = new ArrayList(keys);
            while((line=br.readLine())!=null)
            {
                String validString = "";
                
                String[] splitLine = line.split("\t");
                String peptide = splitLine[1];
                String decValue = splitLine[0];
                String transcripts = splitLine[2];
                String[] splitTranscripts= transcripts.split(",");
                String transcriptString = "";
                for (int i=0; i < splitTranscripts.length;i++)
                {
                    String transcript = splitTranscripts[i].trim();
                    transcript = "Ensembl:"+transcript;
                    transcriptString  = transcriptString +transcript + " ";
                }
                if (pdzBasestructHits.contains(peptide))
                {
                    validString = "PB";
                }
                if (ppiStructHits.contains(peptide))
                {
                    String transIxString = (String)ppiStructHitsMap.get(peptide);
                    validString = validString + " IR("+transIxString+")";
                }
                String validToken = " ";
                if (!validString.equals(""))
                {
                    validToken = "*";
                }
                else
                {
                    validString = "-";
                }
                String outString = validToken +"\t" + peptide + "\t" + decValue + "\t" + validString+"\t" + transcriptString;
                //System.out.println(outString);
                bw.write(outString+"\n");
            }
            br.close();
            bw.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }
    }


    /*public void validateAgainstExp(String organism)
    {
        HashMap geneNameToProteinNameMap = PDZSVMUtils.getGeneNameToProteinNameMap();

        DataLoader dl = new DataLoader();
        if (organism.equals(Constants.FLY))
        {
            dl.loadFlyPDBTest();
        }
        else
        {
            dl.loadWormPDBTest(Constants.CHEN_WORM_PDB);
        }
        List posTestProfileList = dl.getPosTestProfileList();
        List negTestProfileList = dl.getNegTestProfileList();
        HashMap posProfileMap = PDZSVMUtils.profileListToHashMap(posTestProfileList);
        HashMap negProfileMap = PDZSVMUtils.profileListToHashMap(negTestProfileList);

        Set domainKeys = posProfileMap.keySet();
        List domainNameList = new ArrayList(domainKeys);
        Collections.sort(domainNameList);

        HashMap bothDomainIntMap = new HashMap();
        HashMap structDomainIntMap = new HashMap();
        HashMap cmDomainIntMap = new HashMap();

        HashMap bothDomainFalseIntMap = new HashMap();
        HashMap structDomainFalseIntMap = new HashMap();
        HashMap cmDomainFalseIntMap = new HashMap();

        HashMap domainNameToNumPosMap = new HashMap();
        HashMap domainNameToNumNegMap = new HashMap();

        for (int ii=0; ii < domainNameList.size();ii++)
        {
            String domainName = (String)domainNameList.get(ii);
            ProteinProfile posProfile = (ProteinProfile) posProfileMap.get(domainName);
            ProteinProfile negProfile = (ProteinProfile) negProfileMap.get(domainName);
            if (negProfile == null)
                continue;

            int numPos = posProfile.getSequenceMap().size();
            int numNeg = negProfile.getSequenceMap().size();
            domainNameToNumPosMap.put(domainName, numPos);
            domainNameToNumNegMap.put(domainName, numNeg);

            List[] hits = getValidatedStructHits(organism, domainName, posProfile, negProfile);
            List structHits = hits[0];
            List structFalseHits = hits[1];

            hits = getValidatedCMHits(organism, domainName, posProfile, negProfile);
            List cmHits = hits[0];
            List cmFalseHits = hits[1];

            // sort hits out
            for (int j = 0; j < structHits.size();j++)
            {
                String hit = (String)structHits.get(j);
                if (cmHits.contains(hit))
                {

                    List hitList = (List)bothDomainIntMap.get(domainName);
                    if (hitList==null)
                        hitList = new ArrayList();
                    if (!hitList.contains(hit))
                    {
                        hitList.add(hit);
                        bothDomainIntMap.put(domainName,hitList);
                    }

                }
                else
                {
                    List hitList = (List)structDomainIntMap.get(domainName);
                    if (hitList==null)
                        hitList = new ArrayList();
                    if (!hitList.contains(hit))
                    {
                        hitList.add(hit);
                        structDomainIntMap.put(domainName,hitList);
                    }
                }
            }

            for (int j = 0; j < cmHits.size();j++)
            {
                String hit = (String)cmHits.get(j);
                if (!structHits.contains(hit))
                {

                    List hitList = (List)cmDomainIntMap.get(domainName);
                    if (hitList==null)
                        hitList = new ArrayList();
                    if (!hitList.contains(hit))
                    {
                        hitList.add(hit);
                        cmDomainIntMap.put(domainName,hitList);
                    }
                }
            }


            // sort hits out
            for (int j = 0; j < structFalseHits.size();j++)
            {
                String hit = (String)structFalseHits.get(j);
                if (cmFalseHits.contains(hit))
                {

                    List hitList = (List)bothDomainFalseIntMap.get(domainName);
                    if (hitList==null)
                        hitList = new ArrayList();
                    if (!hitList.contains(hit))
                    {
                        hitList.add(hit);
                        bothDomainFalseIntMap.put(domainName,hitList);
                    }

                }
                else
                {
                    List hitList = (List)structDomainFalseIntMap.get(domainName);
                    if (hitList==null)
                        hitList = new ArrayList();
                    if (!hitList.contains(hit))
                    {
                        hitList.add(hit);
                        structDomainFalseIntMap.put(domainName,hitList);
                    }
                }
            }

            for (int j = 0; j < cmFalseHits.size();j++)
            {
                String hit = (String)cmFalseHits.get(j);
                if (!structFalseHits.contains(hit))
                {

                    List hitList = (List)cmDomainFalseIntMap.get(domainName);
                    if (hitList==null)
                        hitList = new ArrayList();
                    if (!hitList.contains(hit))
                    {
                        hitList.add(hit);
                        cmDomainFalseIntMap.put(domainName,hitList);
                    }
                }
            }

        }

        String type = "STRUCT";
        Predictor p = getPredictorType(type);
        DataUtils d = new DataUtils(p);

        for (int ii=0;ii<domainNameList.size();ii++)
        {
            String domainName = (String)domainNameList.get(ii);
            String convertName = (String)geneNameToProteinNameMap.get(domainName);
            if (convertName==null)
                convertName = domainName;

            List structHitList = getStructHits(organism, domainName);
            List cmHitList = getCMHits(organism, domainName);

            int numStructHits = 0;
            int numCMHits = 0;
            int numBothHits = 0;

            for (int i=0; i < structHitList.size();i++)
            {
                String structHit = (String)structHitList.get(i);
                if (cmHitList.contains(structHit))
                {
                    numBothHits = numBothHits+1;
                }
                else
                {
                    numStructHits = numStructHits +1;
                }
            }
            for (int i=0; i < cmHitList.size();i++)
            {
                String cmHit = (String)cmHitList.get(i);
                if (!structHitList.contains(cmHit))
                {
                    numCMHits = numCMHits +1;
                }
            }


            int numValid = (Integer)domainNameToNumPosMap.get(domainName);
            int numValidNeg = (Integer)domainNameToNumNegMap.get(domainName);
            int numBoth = 0;
            int numStruct = 0;
            int numCM = 0;
            int numBothFalse = 0;
            int numStructFalse = 0;
            int numCMFalse = 0;
            List bothHitList = (List)bothDomainIntMap.get(domainName);
            if (bothHitList!=null)
                numBoth = bothHitList.size();
            structHitList = (List)structDomainIntMap.get(domainName);
            if (structHitList !=null)
                numStruct = structHitList.size();
            cmHitList = (List)cmDomainIntMap.get(domainName);
            if (cmHitList !=null)
                numCM = cmHitList.size();

            List bothHitFalseList = (List)bothDomainFalseIntMap.get(domainName);
            if (bothHitFalseList!=null)
                numBothFalse = bothHitFalseList.size();
            List structHitFalseList = (List)structDomainFalseIntMap.get(domainName);
            if (structHitFalseList !=null)
                numStructFalse = structHitFalseList.size();
            List cmHitFalseList = (List)cmDomainFalseIntMap.get(domainName);
            if (cmHitFalseList !=null)
                numCMFalse = cmHitFalseList.size();

            int numAll = numBoth + numCM + numStruct;
            double nnSim16 = 0.0;
            String nnName16 = "";
            try
            {
                DataUtils.SimInfo simInfo16 = d.seqSimInfo(domainName, organism, svm_parameter.CHEN_CONTACTMAP);
                nnSim16 = simInfo16.nnSim;
                nnName16 = simInfo16.nnName;

            }
            catch(Exception e)
            {
            }

            System.out.println(domainName +"\t" +numValid + "\t" + numAll + "\t" + numStructHits + "\t" + numCMHits + "\t" + numBothHits + "\t" + numStruct + "\t" + numCM+"\t"+numBoth  + "\t" + numValidNeg + "\t" +numStructFalse + "\t" + numCMFalse + "\t" + numBothFalse +"\t" +nnSim16+"\t" +  nnName16);
        }
    }
    */
}
