package org.baderlab.pdzsvm.predictor.nn;

import java.util.List;
import org.baderlab.brain.ProteinProfile;
import org.baderlab.pdzsvm.utils.PDZSVMUtils;
import org.baderlab.pdzsvm.encoding.Chen16FeatureEncoding;

/**
 * Copyright (c) 2010 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVM.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVM.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * Methods relating to finding a nearest neighbour
 */
public class NN {


    public static ProteinProfile getNNBindingSiteSeqProfile(String testDomainSeq, String organism, List trainProfileList)
    {
        Chen16FeatureEncoding enc = new Chen16FeatureEncoding();

        String testDomainSeqBS = enc.getFeatures(testDomainSeq, organism);
        ProteinProfile closestProfile =null;
        double maxSim = Double.MIN_VALUE;

        for (int i=0; i < trainProfileList.size();i++)
        {
            ProteinProfile trainProfile = (ProteinProfile)trainProfileList.get(i);
            String trainDomainSeq = trainProfile.getDomainSequence();
            String organismLong = trainProfile.getOrganism();
            String trainOrganism = PDZSVMUtils.organismLongToShortForm(organismLong);
            String trainDomainSeqBS = enc.getFeatures(trainDomainSeq, trainOrganism);

            double bindingSiteSim = PDZSVMUtils.identity(testDomainSeqBS, trainDomainSeqBS);

            if (bindingSiteSim >= maxSim)
            {
                closestProfile = trainProfile;
                maxSim = bindingSiteSim;
            }

        }

        return closestProfile;
    }
    public static double getIdentity(ProteinProfile profile1, ProteinProfile profile2)
    {
        Chen16FeatureEncoding enc = new Chen16FeatureEncoding();

        String domainSeq1 = profile1.getDomainSequence();
        String domainSeq2 = profile2.getDomainSequence();
        String organismLong1 = profile1.getOrganism();
        String organism1 = PDZSVMUtils.organismLongToShortForm(organismLong1);
        String organismLong2 = profile2.getOrganism();

        String organism2 = PDZSVMUtils.organismLongToShortForm(organismLong2);


        String domainSeqBS1 = enc.getFeatures(domainSeq1, organism1);
        String domainSeqBS2 = enc.getFeatures(domainSeq2, organism2);
        //System.out.println("BS1: " + domainSeqBS1 + ", BS2: " + domainSeqBS2);
        double bindingSiteSim = PDZSVMUtils.identity(domainSeqBS1, domainSeqBS2);
        return bindingSiteSim;
    }

    // Called by NNPredictor for binding site seq sim
    public static NNInfo getNNBindingSiteSeqInfo(String refSeqString, List intSeqList)
    {
        NNInfo nn = new NNInfo();
        double maxSim = Double.MIN_VALUE;
        String maxIntSeqString = "";
        for (int i=0; i < intSeqList.size();i++)
        {
            String intSeqString = (String)intSeqList.get(i);
            double sim = 0.0;
            sim = PDZSVMUtils.identity(intSeqString, refSeqString);

            if (sim >= maxSim)
            {
                maxIntSeqString = intSeqString;
                maxSim = sim;

            }
        }
        nn.sim = maxSim;
        nn.intSeqString = maxIntSeqString;
        return nn;
    }

    // Called by Sequence Similarity Analysis
    public static double getDomainBindingSiteSeqSim(List allTrainProfileList, String testDomainSeq, String organism)
    {
        Chen16FeatureEncoding enc = new Chen16FeatureEncoding();
        //Sidhu10FeatureEncoding enc = new Sidhu10FeatureEncoding();
        
        String domain16Seq1 = enc.getFeatures(testDomainSeq,organism);

        double avgBindingSiteSeqSim = 0.0;
        double maxSim = Double.MIN_VALUE;
        for (int i=0; i < allTrainProfileList.size();i++)
        {
            ProteinProfile profile = (ProteinProfile)allTrainProfileList.get(i);

            String domainSeq2 = profile.getDomainSequence();
            String organism2 = PDZSVMUtils.organismLongToShortForm(profile.getOrganism());
            String domain16Seq2 = enc.getFeatures(domainSeq2,organism2);

            double sim = PDZSVMUtils.identity(domain16Seq1, domain16Seq2);
            if (sim >= maxSim)
            {
                maxSim = sim;
            }
            avgBindingSiteSeqSim = avgBindingSiteSeqSim +sim;
        }
        avgBindingSiteSeqSim = avgBindingSiteSeqSim = avgBindingSiteSeqSim/allTrainProfileList.size();
        return maxSim;
    }
}
