package org.baderlab.pdzsvmstruct.utils;

import org.baderlab.pdzsvmstruct.encoding.Features;
import org.baderlab.pdzsvmstruct.encoding.DomainFeatureEncoding;
import org.baderlab.pdzsvmstruct.data.Datum;
import org.baderlab.pdzsvmstruct.data.Data;
import org.baderlab.pdzsvmstruct.predictor.Predictor;
import org.baderlab.brain.ProteinProfile;

import java.util.*;

import libsvm.svm_parameter;

/**
 * Copyright (c) 2011 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVMStruct.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVMStruct.  If not, see <http://www.gnu.org/licenses/>.
 */

public class DataUtils
{
    private List posTrainProfileList;
    private List negTrainProfileList;
    private Data trainData;
    public DataUtils(Predictor p)
    {
        trainData = p.getTrainData();
        posTrainProfileList = p.getPosTrainProfileList();
        negTrainProfileList = p.getNegTrainProfileList();
    }

    public SimInfo seqSimInfo(String name, String org1,int encoding)
    {
        BindingSiteUtils bs1 = new BindingSiteUtils(org1,svm_parameter.SIDHU_CONTACTMAP);
        String testDomainSeq = bs1.getBindingSiteSequence(name);
        double nnSim = 0.0;
        String nnDomainName = "";
        String nnOrganism = "";
        List ftSimList = new ArrayList();
        for(int j = 0; j < posTrainProfileList.size();j++)
        {
            ProteinProfile profile = (ProteinProfile)posTrainProfileList.get(j);
            String org = PDZSVMUtils.organismLongToShortForm(profile.getOrganism());
            BindingSiteUtils bs2 = new BindingSiteUtils(org,encoding);

            String domainSeq = bs2.getBindingSiteSequence(profile.getName());
            double sim = PDZSVMUtils.identity(domainSeq, testDomainSeq);
            ftSimList.add(sim);
            if (sim >= nnSim)
            {
                nnSim = sim;
                nnDomainName = profile.getName();
                nnOrganism = PDZSVMUtils.organismLongToShortForm(profile.getOrganism());
            }
        }

        SimInfo ssInfo = new SimInfo();
        int domainNum = trainData.lookupDomainNum(nnDomainName,nnOrganism);
        List nnDataList= trainData.lookupDataList(domainNum);
        ssInfo.nnName = nnDomainName;
        ssInfo.nnSim=nnSim;
        ssInfo.numNN= nnDataList.size();
        //System.out.println(name+"\t"+nnDomainName+"\t"+nnSim);
        return ssInfo;
    }

    public class SimInfo
    {
        public double nnSim = 0.0;
        public int numNN = 0;
        public String nnName = "";
        public double templateSim = 0.0;

    }

    public SimInfo structSimInfo(String domainName, String organism)
    {
        SimInfo ssInfo = new SimInfo();
        List organismList = new ArrayList();
        organismList.add(organism);
        DomainFeatureEncoding enc = new DomainFeatureEncoding(organismList);

        Features ft = null;
        try
        {
            ft = enc.encodeFeatures(domainName, organism);
        }
        catch(Exception e)
        {
            return null;
        }
        List maxMinList = trainData.getMaxMin();
        ft = Features.scaleFeatures(ft,0,1,maxMinList);
        double[] ftArray =ft.getFeatureValuesAsDoubleArray();

        HashMap domainEncMap = trainData.getDomainEncToNumMap();

        Set ftKey = domainEncMap.keySet();
        List ftKeyList = new ArrayList(ftKey);
        double nnSim = Double.MIN_VALUE;
        Features nnFt = null;
        List ftSimList = new ArrayList();
        for (int j=0; j < ftKeyList.size();j++)
        {

            Features ftj = (Features)ftKeyList.get(j);
            double[] ftjArray =ftj.getFeatureValuesAsDoubleArray();
            double gamma = 0.02489353418393197;
            double sim = KernelUtils.rbf(ftArray,ftjArray,gamma);

            ftSimList.add(sim);
            if (sim >= nnSim)
            {
                nnSim = sim;
                nnFt = ftj;
            }


        }

        int numCountAll = 0;

        int domainNum = (Integer)domainEncMap.get(nnFt);
        List dataListAll = trainData.lookupDataList(domainNum);
        numCountAll = numCountAll+dataListAll.size();

        Datum dt = (Datum)dataListAll.get(0);
        ssInfo.nnName = dt.name;
        ssInfo.nnSim=nnSim;
        ssInfo.numNN= numCountAll;
        return ssInfo;
    }

}
