package org.baderlab.pdzsvmstruct.analysis;

import org.baderlab.pdzsvmstruct.utils.PDZSVMUtils;
import org.baderlab.pdzsvmstruct.utils.Constants;
import org.baderlab.pdzsvmstruct.data.manager.DataFileManager;
import org.baderlab.brain.ProteinProfile;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.ProteinTools;
import org.biojava.bio.seq.db.HashSequenceDB;

import java.util.List;
import java.util.HashMap;
import java.util.ArrayList;
import java.text.DecimalFormat;
import java.io.*;

/**
 * Copyright (c) 2011 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVMStruct.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVMStruct.  If not, see <http://www.gnu.org/licenses/>.
 */

public class ProteomeId
{
    private HashMap seqToTranscriptIDMap;

    public ProteomeId()
    {
    }
    public void makeLogoFiles()
    {
        File dir = new File(DataFileManager.OUTPUT_ROOT_DIR+"/ScanTest/Predictions/STRUCT-Id/");
        File[] files = dir.listFiles();
        String organism = Constants.HUMAN;
        for (int i=0; i < files.length;i++)
        {
            File file = files[i];

            String filename = file.getName();
            System.out.println(filename);

            int ixs = filename.indexOf('.');
            if (ixs==0)
                continue;
            String domainName = filename.substring(0,ixs);

            List posPredDecValueList = new ArrayList();
            HashMap posPredDecValueToPeptideMap = new HashMap();
            HashSequenceDB seqDB = new HashSequenceDB();

            try
            {
                BufferedReader br = new BufferedReader(new FileReader(file));
                String line = "";
                int ix = 0;
                while((line=br.readLine())!=null)
                {
                    String[] splitLine = line.split("\t");
                    double decValue = Double.parseDouble(splitLine[0]);
                    String peptide = splitLine[1];
                    posPredDecValueList.add(decValue);
                    posPredDecValueToPeptideMap.put(decValue, peptide);

                    try
                    {
                        Sequence seq = ProteinTools.createProteinSequence(peptide,peptide+ix);
                        ix= ix+1;
                        seqDB.addSequence(seq);
                    }
                    catch(Exception e)
                    {
                        System.out.println("Exception: " + e);
                    }
                }
                br.close();

                if (seqDB.ids().size()>0)
                {
                    ProteinProfile predProfile = PDZSVMUtils.makeProfile(
                            domainName,
                            0,
                            "",
                            organism,
                            seqDB);

                    System.out.println("Saving logos for "+predProfile.getName()+"...");
                    PDZSVMUtils.saveAsLogo(predProfile, DataFileManager.OUTPUT_ROOT_DIR+"/ScanTest/Logos/STRUCT/",Constants.CLASS_YES);

                }

            }
            catch(Exception e)
            {
                System.out.println("Exception: " + e);
                e.printStackTrace();

            }
        }
    }
    public void makeIdFiles()
    {
        File dir = new File(DataFileManager.OUTPUT_ROOT_DIR+"/ScanTest/Predictions/STRUCT/");
        File[] files = dir.listFiles();
        String organism = Constants.HUMAN;

        String fastaFileName = "";
        if (organism== Constants.HUMAN)
            fastaFileName = DataFileManager.DATA_ROOT_DIR+"/Data/Proteomes/Homo_sapiens.GRCh37.56.pep.all.fa";
        else if (organism == Constants.MOUSE)
            fastaFileName = DataFileManager.DATA_ROOT_DIR+"/Data/Proteomes/Mus_musculus.NCBIM37.59.pep.all.fa";
        else if (organism == Constants.FLY)
            fastaFileName = DataFileManager.DATA_ROOT_DIR+"/Data/Proteomes/Drosophila_melanogaster.BDGP5.13.56.pep.all.fa";
        else if (organism == Constants.WORM)
            fastaFileName = DataFileManager.DATA_ROOT_DIR+"/Data/Proteomes/Caenorhabditis_elegans.WS200.56.pep.all.fa";
        try
        {
            seqToTranscriptIDMap =  PDZSVMUtils.genomeSeqToTranscriptNameMap(fastaFileName, 5);
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }

        for (int i=0; i < files.length;i++)
        {
            File file = files[i];

            String filename = file.getName();
            System.out.println(filename);

            int ixs = filename.indexOf('.');
            if (ixs==0)
                continue;
            String domainName = filename.substring(0,ixs);

            List posPredDecValueList = new ArrayList();
            List posPredPeptideList = new ArrayList();
            HashSequenceDB seqDB = new HashSequenceDB();

            try
            {
                BufferedReader br = new BufferedReader(new FileReader(file));
                String line = "";
                int ix = 0;
                while((line=br.readLine())!=null)
                {
                    String[] splitLine = line.split("\t");
                    double decValue = Double.parseDouble(splitLine[0]);
                    String peptide = splitLine[1];
                    posPredDecValueList.add(decValue);
                    posPredPeptideList.add(peptide);

                    try
                    {
                        Sequence seq = ProteinTools.createProteinSequence(peptide,peptide+ix);
                        ix= ix+1;
                        seqDB.addSequence(seq);
                    }
                    catch(Exception e)
                    {
                        System.out.println("Exception: " + e);
                    }
                }
                br.close();
                addIds(domainName, organism, posPredDecValueList, posPredPeptideList);

            }
            catch(Exception e)
            {
                System.out.println("Exception: " + e);
                e.printStackTrace();

            }
        }
    }
    public void addIds(String domainName, String organism, List posPredDecValueList, List posPredPeptideList)
    {
        try
        {
            DecimalFormat format = new DecimalFormat("#.###");
            StringBuffer outString = new StringBuffer();
            for (int i=0; i < posPredDecValueList.size();i++)
            {
                Double decValueKey = (Double)posPredDecValueList.get(i);
                String pepString = (String)posPredPeptideList.get(i);
                List nameList = (List)seqToTranscriptIDMap.get(pepString);
                String nameString = (String)nameList.get(0);
                for (int j = 1; j < nameList.size();j++)
                    nameString = nameString +", "+nameList.get(j);

                outString.append(format.format(decValueKey) + "\t" + pepString + "\t" +nameString+"\n");

            }
            BufferedWriter bw = new BufferedWriter(new FileWriter(new File(DataFileManager.OUTPUT_ROOT_DIR+"/ScanTest/Predictions/STRUCT-Id/"+domainName+".predictions.txt")));
            bw.write(outString.toString());
            bw.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }
    }
    public static void main(String[]args)
    {
        ProteomeId p = new ProteomeId();
        p.makeIdFiles();
        //p.makeLogoFiles();
    }
}
