package org.baderlab.pdzsvm.predictor.BRAIN;

import org.baderlab.brain.*;
import org.biojava.bio.BioException;
import org.biojava.bio.seq.Sequence;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.*;

import org.baderlab.pdzsvm.evaluation.Prediction;
import org.baderlab.pdzsvm.utils.PDZSVMUtils;
import org.baderlab.pdzsvm.data.manager.DataFileManager;

/**
 * Copyright (c) 2010 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVM.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVM.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * Bader's Brain predictor which implements a position weight matrix with
 * automated way of calculating PWM cutoff score.  Used in Tonikian paper.
 * Tonikian, R. et al. (2008) A Specificity Map for the PDZ Domain Family,
 *   PLoS Comput. Biol., 6, 2043-2059.
 * NOTE: NOT USED IN THE PAPER
 */
public class BrainPredictor {
    private BrainAlgorithm alg;
    private String proteomeFastaFileName = "";
    private BrainParameterSet params;
    private ProteinDatabaseSearch search = null;

    public BrainPredictor(String proteomeFastaFileName)
    {
        alg = new BrainAlgorithm();
        this.proteomeFastaFileName = proteomeFastaFileName;

        params = new BrainParameterSet();
        params.setFuzzFactor(0.01);
        params.setDatabaseFileName(new File(proteomeFastaFileName));
        params.setDatabaseFormat("FASTA");
        params.setScoreThreshold(1.0);
        ProteinDatabaseSearchParams dbparams = new ProteinDatabaseSearchParams(ProteinTerminus.C);
        dbparams.setNormalized(true);
        dbparams.setScoreType(ProteinDatabaseSearchParams.SCORE_TYPE_NORM_PROBABILITY);
        dbparams.setLength(5);
        params.setSearchParams(dbparams);
    }
    public void train()
    {

    }
    //find a good score threshold for the sequence search
    //The cutoff rule is to stop when the number of hits at the current score threshold is greater than the cumulative number of hits
    //not-inclusive (does not add the last batch of hits)
    //created for phage display results of PDZ domains, where exponential score increase is observed
    public List findAutoScoreThreshold(List profileList, BrainParameterSet inputParams) {
        final int maxScoreThreshold = 100;
        BrainParameterSet internalParams = new BrainParameterSet();
        internalParams.setDatabaseFileName(inputParams.getDatabaseFileName());
        internalParams.setDatabaseFormat(inputParams.getDatabaseFormat());
        internalParams.setScoreThreshold(maxScoreThreshold);
        internalParams.setFuzzFactor(inputParams.getFuzzFactor());
        ProteinDatabaseSearchParams dbparams = new ProteinDatabaseSearchParams(inputParams.getSearchParams().getTerminus());
        dbparams.setNormalized(inputParams.getSearchParams().isNormalized());
        dbparams.setDontSaveSequences(true);
        dbparams.setScoreType(ProteinDatabaseSearchParams.SCORE_TYPE_NORM_PROBABILITY);
        dbparams.setLength(5);
        internalParams.setSearchParams(dbparams);

        //List profileList = PeptideToProfileReader.readPeptidesAsProfiles(inputParams.getProfileFile(), inputParams.getFuzzFactor());

        Double[] scoreThresholdArray = new Double[profileList.size()];

        MultiSequenceSearchResultSet searchResults = runProfileSearch(profileList, null, internalParams);
        Collection results = searchResults.getAllResultSets();
        for (Iterator iterator = results.iterator(); iterator.hasNext();) {
            SequenceSearchResultSet sequenceSearchResultSet = (SequenceSearchResultSet) iterator.next();
            int histogram[] = sequenceSearchResultSet.getScoreHistogram(maxScoreThreshold);
            int cumulativeHitCount = 0;
            for (int i = 0; i < histogram.length; i++) {
                int j = histogram[i];
                if ((cumulativeHitCount > 1) && (j > cumulativeHitCount)) {
                    scoreThresholdArray[profileList.indexOf(sequenceSearchResultSet.getProfile())] = new Double(i - 1);
                    break;
                }
                cumulativeHitCount += j;
            }
            if (scoreThresholdArray[profileList.indexOf(sequenceSearchResultSet.getProfile())] == null) {
                //this means that we couldn't find any hits even when searching for maxScoreThreshold
                scoreThresholdArray[profileList.indexOf(sequenceSearchResultSet.getProfile())] = new Double(0.0);
            }
        }

        return (Arrays.asList(scoreThresholdArray));
    }
    public List scan(List profileList)
    {
        //String humanEnsemblFile = "/Users/shirleyhui/Data/SVMProject/Data/Ensembl/Homo_sapiens.GRCh37.56.pep.all.fa";

        //String humanSidhuPhageProjectFile = "/Users/shirleyhui/Data/SVMProject/Data/Human/PDZ/PeptideFiles/data/PDZ/Human/SidhuPhage/DLG1-1.pep.txt";

        BrainParameterSet paramsLocal = new BrainParameterSet();
        //paramsLocal.setProfileFileName(new File(humanSidhuPhageProjectFile));
        paramsLocal.setFuzzFactor(0.01);
        paramsLocal.setDatabaseFileName(new File(proteomeFastaFileName));
        paramsLocal.setDatabaseFormat("FASTA");
        paramsLocal.setScoreThreshold(100);
        ProteinDatabaseSearchParams dbparams = new ProteinDatabaseSearchParams(ProteinTerminus.C);
        dbparams.setNormalized(true);
        //dbparams.setDontSaveSequences(true);
        dbparams.setScoreType(ProteinDatabaseSearchParams.SCORE_TYPE_NORM_PROBABILITY);
        dbparams.setLength(5);
        paramsLocal.setSearchParams(dbparams);
        List predictionList= new ArrayList();
        for (int i=0; i < profileList.size();i++)
        {
            ProteinProfile profile = (ProteinProfile)profileList.get(i);

            predictionList= predict(profile, paramsLocal);
        }
        return predictionList;

    }
    public List predict(ProteinProfile profile, BrainParameterSet paramsLocal)
    {
        List predictionList = new ArrayList();

        List profileList = new ArrayList();
        profileList.add(profile);
        List scoreThresholdList = findAutoScoreThreshold(profileList, paramsLocal);
        System.out.println(scoreThresholdList);
        MultiSequenceSearchResultSet searchResults = runProfileSearch(profileList, scoreThresholdList, params);
        Collection results = searchResults.getAllResultSets();
        List resultsList = new ArrayList(results);

        for (int i=0; i < resultsList.size();i++)
        {
            SequenceSearchResultSet resultSet = (SequenceSearchResultSet)resultsList.get(i);
            TreeMap scoreToSeqMap = resultSet.getScoreToSequenceMap();
            Set keys = scoreToSeqMap.keySet();
            List keyList = new ArrayList(keys);
            double scoreThreshold = resultSet.getScoreThreshold();
            List nonRedSeqList = new ArrayList();
            for (int j = 0;j < keyList.size();j++)
            {
                Double decValue = (Double)keyList.get(j);
                Double score = decValue-scoreThreshold;

                List hitList = (List) scoreToSeqMap.get(decValue);
                for (int k = 0; k < hitList.size();k++)
                {
                    Hit hit = (Hit) hitList.get(k);
                    Sequence seq = hit.getSequence();
                    String seqString = seq.seqString();
                    if (!nonRedSeqList.contains(seqString))
                    {
                        nonRedSeqList.add(seqString);
                        String organismLong = profile.getOrganism();
                        String organism =  PDZSVMUtils.organismLongToShortForm(organismLong);
                        String method = PDZSVMUtils.methodShortToLongForm(profile.getExperimentalMethod());
                        Prediction prediction = new Prediction(1.0, 1, score, profile.getName(),profile.getDomainSequence(), seqString, organism, method);
                        predictionList.add(prediction);
                    }
                }

            }
            System.out.println(nonRedSeqList);
        }
        return predictionList;
    }
    /**
     * Run a profile search from a project file or protein (list of peptides)
     *
     * @return a result set for each profile defined in the input file
     */
    public MultiSequenceSearchResultSet runProfileSearch() {
        return (runProfileSearch(null, null, null));
    }

    /**
     * Run a profile search from a project file or protein (list of peptides)
     *
     * @return a result set for each profile defined in the input file
     */
    public MultiSequenceSearchResultSet runProfileSearch(List profileList, List scoreThresholdList, BrainParameterSet internalParams) {
        MultiSequenceSearchResultSet searchResults = null;

        if (internalParams == null) {
            internalParams = params;
        }

        //get codon bias file (expects null if no bias file set)
        File codonBiasFile = params.getCodonBiasFile();

        //get unique peptides flag (expects default to be set in 'params')
        boolean uniquePeptides = params.getUniquePeptides();

        //read profile file - could be a project or single profile (list of peptides)
        if (profileList == null) {
            profileList = PeptideToProfileReader.readPeptidesAsProfiles(internalParams.getProfileFile(), internalParams.getFuzzFactor(),
                    codonBiasFile, uniquePeptides, false);
        }
        //if no score list set, use the one in params for all profiles
        if (scoreThresholdList == null) {
            scoreThresholdList = new ArrayList(profileList.size());
            for (int i = 0; i < profileList.size(); i++) {
                scoreThresholdList.add(new Double(internalParams.getScoreThreshold()));
            }
        }

        //set-up a database search
        try {
            if ((internalParams.getDatabaseFileName() != null) && (internalParams.getDatabaseFormat() != null)) {
                search = new ProteinDatabaseSearch(params.getDatabaseFileName().toString(), internalParams.getDatabaseFormat());
            } else {
                System.err.println("Database filename or format not specified. Can't continue.");
                return (null);
            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (BioException e) {
            e.printStackTrace();
        }
        //search the database
        try {
            searchResults = search.multiProfileSearchDB(profileList, scoreThresholdList, internalParams.getSearchParams());
        } catch (BioException e) {
            e.printStackTrace();
        }
        try {
            search.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return searchResults;
    }

    public static void main(String[] args)
    {
        String humanSidhuPhageProjectFile = DataFileManager.DATA_ROOT_DIR + "/Data/Human/PDZ/ProjectFiles/projectFileHuman-orig.txt";
        String humanEnsemblFile = DataFileManager.DATA_ROOT_DIR + "/Data/Ensembl/Homo_sapiens.GRCh37.56.pep.all.fa";

        String codonBiasFile = DataFileManager.NNK_CODON_BIAS_FILENAME;
        BrainPredictor bp = new BrainPredictor(humanEnsemblFile);

        List profileList = PDZSVMUtils.readProteinProfileList(humanSidhuPhageProjectFile, codonBiasFile, 5);
        List predictionList = bp.scan(profileList);
        for (int j=0; j < predictionList.size();j++)
        {
            System.out.println(predictionList.get(j));
        }
    }

}
