package org.baderlab.pdzsvm.data.utils;

import org.baderlab.pdzsvm.utils.Constants;
import org.baderlab.pdzsvm.data.manager.DataFileManager;

import java.util.*;
import java.io.*;

/**
 * Copyright (c) 2010 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVM.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVM.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * A utility class for uniprot PDZ domains
 */
public class UniprotPDZDomainData {
    private List pdzUniprotIdList;
    private List pdzUniprotiRefIndexIdList;

    private HashMap pdzUniprotToNumiRefIntMap;
    public UniprotPDZDomainData()
    {
        //loadPDZUniprotIds();
        //loadPDZiRefIndexInteractions();
        //makeFilteredPDZUniprotFastaFile2();
        //parseHmmerOutFile();
        //makeHumanScanChen16File();
        make213PDZScanFile();
    }
    private void loadPDZiRefIndexInteractions()
    {
        pdzUniprotToNumiRefIntMap =new HashMap();
        pdzUniprotiRefIndexIdList= new ArrayList();
        IRefIndexInteractome irefIndex = new IRefIndexInteractome(Constants.HUMAN);
        HashMap interactions = irefIndex.getInteractions();
        Set interactionsSet = interactions.keySet();
        List intSetList = new ArrayList(interactionsSet);
        for (int i=0; i < pdzUniprotIdList.size();i++)
        {
            String pdzUniprotId = (String)pdzUniprotIdList.get(i);
            //System.out.println("Looking up " + pdzUniprotId);
            int numPDZInteractions = 0;

            for (int j =0; j < intSetList.size();j++)
            {
                HashSet interaction = (HashSet)intSetList.get(j);
                if (interaction.contains(pdzUniprotId))
                {
                    numPDZInteractions = numPDZInteractions+1;
                    if (!pdzUniprotiRefIndexIdList.contains(pdzUniprotId))
                        pdzUniprotiRefIndexIdList.add(pdzUniprotId);
                }
            }
            pdzUniprotToNumiRefIntMap.put(pdzUniprotId, numPDZInteractions);

        }
        Set keys = pdzUniprotToNumiRefIntMap.keySet();
        List keyList = new ArrayList(keys);
        int totalInts = 0;
        int numProteins = 0;
        for (int i=0; i < keyList.size();i++)
        {
            String pdzUniprotId = (String) keyList.get(i);
            int numInts =  (Integer)pdzUniprotToNumiRefIntMap.get(pdzUniprotId);
            if (numInts>0)
            {
                System.out.println(pdzUniprotId);// + "," +numInts );
                totalInts = totalInts+numInts;
                numProteins = numProteins+1;
            }
        }
        System.out.println("Total interactions: " + totalInts);
        System.out.println("Total PDZ proteins: " + pdzUniprotiRefIndexIdList.size());

    }
    private void makeFilteredPDZUniprotFastaFile2()
    {
        try
        {
            List filterByList = new ArrayList();
            String filterByFileMame = DataFileManager.DATA_ROOT_DIR + "/Data/Uniprot/filterPDZDomains.txt";
            BufferedReader br1 = new BufferedReader(new FileReader(new File(filterByFileMame)));

            String line = "";
            while((line=br1.readLine())!=null)
            {
                filterByList.add(line);
            }
            br1.close();

            String pdzUniprotFastaFileName = DataFileManager.DATA_ROOT_DIR + "/Data/Uniprot/uniprot-domain-pdz.fasta";

            String outFastaFileName = DataFileManager.DATA_ROOT_DIR +"/Data/Uniprot/uniprot-irefindex-domain-pdz-filtered.fasta";
            BufferedWriter bw = new BufferedWriter(new FileWriter(new File(outFastaFileName)));
            BufferedReader br = new BufferedReader(new FileReader(new File(pdzUniprotFastaFileName)));

            boolean print = false;
            while((line=br.readLine())!=null)
            {
                if (line.startsWith(">"))
                {
                    print = false;

                    String[] splitLine = line.split("\\s+");
                    String[] splitLine2 = splitLine[0].split("\\|");
                    String pdzUniprotId = splitLine2[1];
                    if (filterByList.contains(pdzUniprotId))
                    {
                        print = true;
                        bw.write(line +"\n");
                    }

                }
                else
                {
                    if (print)
                    {
                        bw.write(line +"\n");

                    }
                }
            }
            bw.close();
            br.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }
    }
    private void makeFilteredPDZUniprotFastaFile()
    {
        try
        {
            String pdzUniprotFastaFileName = DataFileManager.DATA_ROOT_DIR + "/Data/Uniprot/uniprot-domain-pdz.fasta";

            String outFastaFileName = DataFileManager.DATA_ROOT_DIR +"/Data/Uniprot/uniprot-irefindex-domain-pdz.fasta";
            BufferedWriter bw = new BufferedWriter(new FileWriter(new File(outFastaFileName)));
            BufferedReader br = new BufferedReader(new FileReader(new File(pdzUniprotFastaFileName)));
            String line = "";
            boolean print = false;
            while((line=br.readLine())!=null)
            {
                if (line.startsWith(">"))
                {
                    print = false;

                    String[] splitLine = line.split("\\s+");
                    String[] splitLine2 = splitLine[0].split("\\|");
                    String pdzUniprotId = splitLine2[1];
                    if (pdzUniprotiRefIndexIdList.contains(pdzUniprotId))
                    {
                        print = true;
                        bw.write(line +"\n");
                    }
                    
                }
                else
                {
                    if (print)
                    {
                        bw.write(line +"\n");

                    }
                }
            }
            bw.close();
            br.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }
    }
    private void loadPDZUniprotIds()
    {
        HashMap pdzUniprotMap = new HashMap();
        pdzUniprotIdList= new ArrayList();
        String pdzUniprotFastaFileName = DataFileManager.DATA_ROOT_DIR + "/Data/Uniprot/uniprot-domain-pdz.fasta";
        try
        {
            BufferedReader br = new BufferedReader(new FileReader(new File(pdzUniprotFastaFileName)));
            String line = "";
            while((line=br.readLine())!=null)
            {
                if (line.startsWith(">"))
                {
                    String[] splitLine = line.split("\\s+");
                    String[] splitLine2 = splitLine[0].split("\\|");
                    String pdzUniprotId = splitLine2[1];
                    pdzUniprotMap.put(pdzUniprotId,pdzUniprotId);

                }
            }
            Set keys = pdzUniprotMap.keySet();
            List keyList = new ArrayList(keys);
            pdzUniprotIdList.addAll(keyList);

            br.close();
            System.out.println("num pdz uniprotids: " + pdzUniprotIdList.size());
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }

    }
    public static void main(String[] args)
    {
        UniprotPDZDomainData u = new UniprotPDZDomainData();
    }

    private void parseHmmerOutFile()
    {
        String hmmerOutFileName = DataFileManager.DATA_ROOT_DIR +"/Data/Hmmer/out.txt";
        //parse out the domain sequences
        try
        {
            HashMap uniProtIdToDomainSeqMap = new HashMap();
            BufferedReader br = new BufferedReader(new FileReader(new File(hmmerOutFileName)));
            String line = "";
            String uniprotId = "";
            int domainNum =0;
            while((line=br.readLine())!=null)
            {
                // domain section
                if (line.startsWith(">>"))
                {
                    String[] splitLine = line.split("\\s+");
                    String id = splitLine[1];
                    String[] idSplit = id.split("\\|");
                     uniprotId = idSplit[1];

                    List domainSeqList = (List)uniProtIdToDomainSeqMap.get(uniprotId);
                    if (domainSeqList == null)
                    {
                        domainSeqList = new ArrayList();
                        uniProtIdToDomainSeqMap.put(uniprotId,domainSeqList);
                        domainNum = 0;
                    }

                }

                if (line.startsWith("  == "))
                {
                    String[] splitLine = line.split("\\s+");
                    String condEValueString = splitLine[9];
                    double condEValue = Double.parseDouble(condEValueString);
                    if (condEValue > 0.0000000001)
                        continue;


                    // read 4 lines
                    br.readLine();
                    br.readLine();

                    br.readLine();

                    line = br.readLine();
                    splitLine = line.split("\\s+");
                    String[] idSplit = splitLine[1].split("\\|");
                    String id = idSplit[1];
                    String name = idSplit[2];

                    String seq = splitLine[3];
                    String finalSeq = "";
                    for (int i =0;i < seq.length();i++)
                    {
                        char ch = seq.charAt(i);
                        if (ch!='-')
                            finalSeq = finalSeq+ch;
                    }
                    List domainSeqList = (List)uniProtIdToDomainSeqMap.get(uniprotId);
                    domainSeqList.add(finalSeq.toUpperCase());
                    uniProtIdToDomainSeqMap.put(uniprotId,domainSeqList);
                    domainNum = domainNum+1;

                    System.out.println(">"+id + "-" + domainNum + " " + name) ;
                    System.out.println(finalSeq);
                }
            }
            br.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
        }

    }
    private void parsePfamAlignmentFile()
    {
        HashMap uniprotToNameMap = new HashMap();
        String hmmerOutFileName = DataFileManager.DATA_ROOT_DIR +"/Data/Pfam/PF00595.full";
        //parse out the domain sequences
        try
        {
            HashMap uniProtIdToAlignedDomainSeqMap = new HashMap();
            BufferedReader br = new BufferedReader(new FileReader(new File(hmmerOutFileName)));
            String line = "";
            int numLines = 0;
            List addedList = new ArrayList();
            while((line=br.readLine())!=null)
            {
                numLines = numLines+1;
                //System.out.println(numLines);
                // domain section
                if (numLines > 54 && numLines <= 13513)
                {

                    String[] splitLine = line.split("\\s+");
                    

                    String[] nameSplit = splitLine[1].split("/");
                    String name = nameSplit[0];
                    String[] orgSplit = name.split("_");
                    String org = orgSplit[1];
                    if (!org.equals("HUMAN"))
                        continue;
                    if (!addedList.contains(name))
                    {
                        addedList.add(name);
                        System.out.print(name +" ");
                     
                    String uniprotId = splitLine[3].substring(0,splitLine[3].indexOf("."));
                    System.out.print(uniprotId +" ");

                    uniprotToNameMap.put(uniprotId, name);
                    System.out.println();
                    }
                }
                else
                {
                    //System.out.println("here2");

                    String[] splitLine = line.split("\\s+");
                    if (splitLine.length != 2)
                        continue;
                    String id = splitLine[0].substring(0,splitLine[0].indexOf("/"));

                    String alignedSeq = splitLine[1];
                    System.out.println(id+">"+alignedSeq);

                    uniProtIdToAlignedDomainSeqMap.put(id, alignedSeq.toUpperCase());
                }
            }
            br.close();

            System.out.println("here");
            for (int i=0; i < pdzUniprotiRefIndexIdList.size();i++)
            {
                String uniprotId = (String)pdzUniprotiRefIndexIdList.get(i);
                String name = (String)uniprotToNameMap.get(uniprotId);
                if (name==null)
                {
                //System.out.println("Could not find corresponding name for " + uniprotId);
                    continue;
                }
                String alignedSeq = (String)uniProtIdToAlignedDomainSeqMap.get(name);
                System.out.println(">" + name);
                System.out.println(alignedSeq);
            }
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
            e.printStackTrace();
        }

    }

    private void makeHumanScanChen16File()
    {
        try
        {
            HashMap idToBindingSiteSeqMap = new HashMap();
            HashMap idToFullSeqMap = new HashMap();

            String idToBindingSiteSeqFileName = DataFileManager.DATA_ROOT_DIR +"/Data/Uniprot/PDZ-domain-bs-manualfix.fa";
            String idToFullSeqFileName = DataFileManager.DATA_ROOT_DIR +"/Data/Uniprot/uniprot-irefindex-domainseq-pdz-nophage-plus-seed.fa";
            BufferedReader br = new BufferedReader(new FileReader(new File(idToBindingSiteSeqFileName)));
            String line = "";
            String id = "";
            String seq = "";
            int it = 0;
            while((line=br.readLine())!=null)
            {
                if (line.startsWith(">"))
                {
                    if (it>0)
                    {
                        idToBindingSiteSeqMap.put(id, seq);
                        seq = "";
                    }
                    String[] splitLine = line.split("\\s+");
                    String[] splitSplitLine = splitLine[0].split("/");
                     id = splitSplitLine[0];
                    id = id.substring(1,id.length());


                }
                else
                {
                    seq = seq + line;
                }
                it = it +1;
                
            }
            idToBindingSiteSeqMap.put(id, seq);

            br.close();

            br = new BufferedReader(new FileReader(new File(idToFullSeqFileName)));
            line = "";
            id = "";
            seq = "";
            it= 0;
            while((line=br.readLine())!=null)
            {
                if (line.startsWith(">"))
                {
                    if (it >0)
                    {
                        idToFullSeqMap.put(id, seq);
                        seq = "";
                    }
                    String[] splitLine = line.split("\\s+");
                    id = splitLine[0];
                    id = id.substring(1,id.length());


                }
                else
                {
                        seq = seq+line;
                }
                it = it +1;
            }
            br.close();
            idToFullSeqMap.put(id, seq);
            
            System.out.println(idToBindingSiteSeqMap.keySet().size());

            System.out.println(idToFullSeqMap.keySet().size());

            Set keys = idToBindingSiteSeqMap.keySet();
            List keyList = new ArrayList(keys);
            for (int i=0; i < keyList.size();i++)
            {
                String idKey = (String)keyList.get(i);
                String bindingSiteSeq = (String)idToBindingSiteSeqMap.get(idKey);
                String fullSeq = (String)idToFullSeqMap.get(idKey);
                System.out.println(">" + fullSeq);
                System.out.println(bindingSiteSeq);


            }
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
            e.printStackTrace();
        }
    }

    private void make213PDZScanFile()
    {
        try
        {

            String allPDZFileName = DataFileManager.DATA_ROOT_DIR +"/Data/Uniprot/uniprot-irefindex-domainseq-pdz-all.fa";
            BufferedReader br = new BufferedReader(new FileReader(new File(allPDZFileName)));
            String line = "";
            String domainName = "";
            String domainNum = "";
            String seq = "";
            int it = 0;
            while((line=br.readLine())!=null)
            {
                if (line.startsWith(">"))
                {
                    if (it >0)
                    {
                        System.out.println("#" +domainName);
                        System.out.println("H\t" + domainName + "\t" + domainNum + "\t" + seq) ;
                              
                        seq = "";
                    }
                    String[] splitLine = line.split("\\s+");
                    domainName = splitLine[1];
                    domainNum = splitLine[2];
                    //System.out.println(domainName + domainNum);
                    int underScore = domainName.indexOf("_");
                    domainName = domainName.substring(0,underScore);
                    //System.out.println(domainName + domainNum);

                    domainName = domainName + "-" +domainNum;
                }
                else
                {

                        seq = seq+line;
                }
                it = it +1;

            }
            System.out.println("#" +domainName);
            System.out.println("H\t" + domainName + "\t" + domainNum + "\t" + seq) ;
                                    
            br.close();
        }
        catch(Exception e)
        {
            System.out.println("Exception: " + e);
            e.printStackTrace();
        }
    }
}
