package org.baderlab.pdzsvm.predictor.svm;

import libsvm.*;

import java.util.*;
import java.util.List;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;

import org.baderlab.pdzsvm.data.Data;
import org.baderlab.pdzsvm.evaluation.Prediction;
import org.baderlab.pdzsvm.evaluation.Evaluation;
import org.baderlab.pdzsvm.data.*;
import org.baderlab.pdzsvm.encoding.*;
import org.baderlab.pdzsvm.validation.ValidationParameters;
import org.baderlab.pdzsvm.validation.ValidationFoldOutput;
import org.baderlab.pdzsvm.utils.PDZSVMUtils;

/**
 * Copyright (c) 2010 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVM.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVM.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * SVM class which encodes data and loads into format that is compatible with
 * LibSVM and loads formats LivSVM results into format PDZSVM can use.  This
 * class that calls LibSVM functions.
 */
public class SVM
{

    public SVM()
    {
    }

    public static svm_problem loadData(Data data, List dataSet)
    {
        svm_problem prob = new svm_problem();
        System.out.println("\n\tLoading data (size: " +dataSet.size() +")...");

        Vector<Double> vy = new Vector<Double>();
        Vector<svm_node[]> vx = new Vector<svm_node[]>();
        int max_index = 0;

        for (int i = 0;i < dataSet.size();i++)
        {

            Datum dt = (Datum)dataSet.get(i);
            Features pairFeatures = data.getEncodedDataPair(dt);

            Double classValue = Double.valueOf((double)dt.classToInt());
            vy.addElement(classValue);

            int m = pairFeatures.numFeatures() ;

            svm_node[] x = new svm_node[m];
            List pairFeatureValues = pairFeatures.getFeatureValues();
            //System.out.print(" " + m);
            for(int j=0;j<m;j++)
            {

                x[j] = new svm_node();
                x[j].index = (j+1);

                String valueString = pairFeatureValues.get(j).toString();
                Double theValue = Double.parseDouble(valueString);

                x[j].value = theValue;

                //System.out.print(" " + x[j].index + ":" + x[j].value);
            }

            //System.out.println();
            if(m>0) max_index = Math.max(max_index, x[m-1].index);
            vx.addElement(x);
        }

        prob.l = vy.size();
        prob.x = new svm_node[prob.l][];
        for(int i=0;i<prob.l;i++)
            prob.x[i] = vx.elementAt(i);
        prob.y = new double[prob.l];
        for(int i=0;i<prob.l;i++)
            prob.y[i] = vy.elementAt(i);

        prob.names = new String[prob.l];
        prob.domainSeqFull = new String[prob.l];
        prob.peptideSeq = new String[prob.l];
        prob.organismShort = new String[prob.l];
        prob.methodShort = new String[prob.l];
        prob.genomic = new boolean[prob.l];

        HashMap domainNumToRawMap = data.getDomainNumToRawMap();
        HashMap peptideNumToRawMap = data.getPeptideNumToRawMap();

        for (int i = 0;i < (dataSet.size());i++)
        {
            Datum dt = (Datum)dataSet.get(i);
            prob.names[i] = dt.name;

            Features domainFeatures = (Features)domainNumToRawMap.get(dt.domainNum);
            Features peptideFeatures = (Features)peptideNumToRawMap.get(dt.peptideNum);

            prob.domainSeqFull[i] = domainFeatures.toUndelimitedString();
            prob.peptideSeq[i] = peptideFeatures.toUndelimitedString();
            prob.organismShort[i] = dt.organism;
            prob.methodShort[i] = dt.expMethod;
            prob.genomic[i] = true;
        }
        return prob;
    }
    private static svm_problem load2020ContactData(Data data)
    {
        System.out.println("\tPerforming GROUP2020 encoding...");
        Chen16FeatureEncoding enc = new Chen16FeatureEncoding();

        int numContacts = Chen16FeatureEncoding.numContacts;
        List dataList = data.getDataList();
        HashMap domainNumToRawMap = data.getDomainNumToRawMap();
        HashMap peptideNumToRawMap = data.getPeptideNumToRawMap();

        Vector<Double> vy = new Vector<Double>();
        Vector<svm_node[]> vx = new Vector<svm_node[]>();

        for (int i=0;i < dataList.size();i++)
        {
            Datum dt = (Datum) dataList.get(i);

            Double classValue = Double.valueOf((double)dt.classToInt());
            //System.out.print(classValue + " " );
            vy.addElement(classValue);
            svm_node[] x = new svm_node[numContacts];
            Features domainAAFeatures = (Features)domainNumToRawMap.get(dt.domainNum);
            Features peptideAAFeatures = (Features)peptideNumToRawMap.get(dt.peptideNum);

            String domainSeq = domainAAFeatures.getFeatureValuesAsUndelimitedString();
            String peptideSeq = peptideAAFeatures.getFeatureValuesAsUndelimitedString();

            int len = peptideSeq.length();
            peptideSeq = peptideSeq.substring(len - 5,len);
            String organism = dt.organism;
            String domain16Seq = enc.getFeatures(domainSeq, organism);

            if (domain16Seq == null)
                System.out.println(dt.name +" is null");
            int ix = 0;
            // GO through all positions and encode each residue
            for (int j=0; j < enc.contactPositions.length;j++)
            {
                //System.out.print("*");
                int[] posPair = enc.contactPositions[j];
                int domainPos = posPair[0];
                int peptidePos = posPair[1];

                String domainRes = String.valueOf(domain16Seq.charAt(domainPos));
                String peptideRes = String.valueOf(peptideSeq.charAt(peptidePos));

                final String alphabet = "ACDEFGHIKLMNPQRSTVWY-";
                if (peptideRes.equals("X"))
                {
                    peptideRes = domainRes;
                }
                if (domainRes.equals("X"))
                {
                    System.out.println("domainRes is X -> " + peptideRes);
                    domainRes = peptideRes;
                }
                int domainIx = alphabet.indexOf(domainRes);
                int peptideIx = alphabet.indexOf(peptideRes);
                // only mark first 20 AA, gaps and X's don't count
                int ixx = 1;
                x[ix] = new svm_node();
                int finalIx= 0;
                for (int ii = 0; ii < 21;ii++)
                {
                    for (int jj = 0; jj < 21;jj++)
                    {
                        if (ii==domainIx && jj == peptideIx)
                        {
                            int shift = j*400;
                            //System.out.println(ii +"," + jj + ": " + ixx);
                            finalIx = shift + ixx;
                            break;
                        }
                        else
                        {


                            ixx = ixx + 1;
                        }
                    }
                } // 400 values
                //System.out.println(domainRes + "-" + peptideRes + ": " + domainIx + "-"+peptideIx +"->" + finalIx);
                //System.out.print(finalIx + " ");

                x[ix].value =1.0;

                x[ix].index =finalIx;

                ix = ix+1;

            } // 38 position pairs

            vx.addElement(x);
        }
        svm_problem prob = new svm_problem();
        prob.l = dataList.size();
        prob.x = new svm_node[prob.l][];
        for(int i=0;i<prob.l;i++)
            prob.x[i] = vx.elementAt(i);
        prob.y = new double[prob.l];
        for(int i=0;i<prob.l;i++)
            prob.y[i] = vy.elementAt(i);

        prob.names = new String[prob.l];
        prob.domainSeqFull = new String[prob.l];
        prob.peptideSeq = new String[prob.l];
        prob.organismShort = new String[prob.l];
        prob.methodShort = new String[prob.l];
        prob.genomic = new boolean[prob.l];
        for (int i = 0;i < (dataList.size());i++)
        {
            Datum dt = (Datum)dataList.get(i);
            prob.names[i] = dt.name;
            Features domainFeatures = (Features)domainNumToRawMap.get(dt.domainNum);
            Features peptideFeatures = (Features)peptideNumToRawMap.get(dt.peptideNum);
            prob.domainSeqFull[i] = domainFeatures.toUndelimitedString();
            prob.peptideSeq[i] = peptideFeatures.toUndelimitedString();
            prob.organismShort[i] = dt.organism;
            prob.methodShort[i] = dt.expMethod;
            prob.genomic[i] = true;
        }
        return prob;
    }
    public static HashMap kFoldCrossValidation(Data trainData, svm_parameter svmParams, ValidationParameters validParams)
    {
        System.out.println("\n\t"+validParams.k+" fold cross validation...");


        svm_problem trainProb = null;
        if (svmParams.data_encoding == svm_parameter.CONTACTMAP2020)
            trainProb = load2020ContactData(trainData);
        else if (svmParams.data_encoding==svm_parameter.PHYSICOCHEMICAL)
            trainProb = loadData(trainData, trainData.getDataList());
        else if (svmParams.data_encoding == svm_parameter.BINARY_SEQUENCE)
            trainProb = loadData(trainData, trainData.getDataList());

        HashMap kFoldResultsMap = new HashMap();

        List cvResults = new ArrayList();
        svm.svm_cross_validation_x(trainProb,svmParams,validParams.k,cvResults);

        for (int k = 0; k < cvResults.size();k++)
        {
            List predictionList = new ArrayList();
            List foldResults = (ArrayList)cvResults.get(k);
            double[] target = new double[foldResults.size()];
            double[] decValues = new double[foldResults.size()];
            int[] actual = new int[foldResults.size()];

            for (int m = 0; m < foldResults.size();m++)
            {

                double[] ret = (double[])foldResults.get(m);
                target[m] = PDZSVMUtils.zeroToOne(ret[1]);
                decValues[m] = ret[0];
                actual[m] = (int) PDZSVMUtils.zeroToOne(ret[2]);
                Prediction pred = new Prediction(target[m],actual[m],decValues[m]);
                predictionList.add(pred);
            }
            kFoldResultsMap.put(k,predictionList);

        }

        return kFoldResultsMap;
    }
    public static HashMap leaveKMOutCrossValidation(ValidationParameters validParams, Data trainData, svm_parameter svmParams, HashMap domainNumToDataListMap, HashMap peptideNumToDataListMap)
    {
        int k = validParams.k;
        int d = validParams.d;
        int p = validParams.p;
        double dPercent = (double)d/100;
        double pPercent = (double)p/100;

        System.out.println("\tLeave ["+d+","+p+"]% "+ ValidationParameters.CV_STRING[ValidationParameters.DOMAIN_PEPTIDE]+ " out cross validation...");
        HashMap kFoldResultsMap = new HashMap();

        Set domainNumKeys = domainNumToDataListMap.keySet();
        List domainNumKeyList = new ArrayList(domainNumKeys);
        Collections.sort(domainNumKeyList);

        Set peptideNumKeys = peptideNumToDataListMap.keySet();
        List peptideNumKeyList = new ArrayList(peptideNumKeys);
        Collections.sort(peptideNumKeyList);

        HashMap domainNumToRawMap = trainData.getDomainNumToRawMap();
        HashMap peptideNumToRawMap = trainData.getPeptideNumToRawMap();

        int numDomains = domainNumToRawMap.size();
        int numInDFold = (int)((double)numDomains*(dPercent));

        int numPeptides = peptideNumToRawMap.size();
        int numInPFold = (int)((double)numPeptides*(pPercent));

        System.out.println("\tNum in d Fold: " + numInDFold);
        System.out.println("\tNum in p Fold: " + numInPFold);

        int numFolds;
        if (d ==0)
        {
            System.out.println("\td cannot be zero...");
            return null;
        }
        else numFolds = k;
        System.out.println("\tNum folds: " + numFolds);

        if (p ==0)
        {
            System.out.println("\tp cannot be zero...");
            return null;
        }
        for (int i =0; i < numFolds;i++)
        {
            List testDFoldDataList = new ArrayList();
            Collections.shuffle(domainNumKeyList);
            List testDomainNumKeyList = domainNumKeyList.subList(0,numInDFold);
            //}
            for (int ii = 0; ii < testDomainNumKeyList.size();ii++)
            {
                int testFoldNum = (Integer)testDomainNumKeyList.get(ii);
                List testDomainDataList = (List) domainNumToDataListMap.get(testFoldNum);
                testDFoldDataList.addAll(testDomainDataList);
            }

            List domainTestNameList=new ArrayList();
            for (int ii=0;ii < testDFoldDataList.size();ii++)
            {
                Datum dt = (Datum)testDFoldDataList.get(ii);
                if (!domainTestNameList.contains(dt.name))
                    domainTestNameList.add(dt.name);
            }

            List testPFoldDataList = new ArrayList();
            List testPeptideNumKeyList = new ArrayList();

            Collections.shuffle(peptideNumKeyList);
            testPeptideNumKeyList = peptideNumKeyList.subList(0,numInPFold);

            // }
            for (int jj =0; jj < testPeptideNumKeyList.size();jj++)
            {
                int testPeptideNum = (Integer)testPeptideNumKeyList.get(jj);

                List testPeptideDataList = (List) peptideNumToDataListMap.get(testPeptideNum);
                List subTestPeptideDataList = new ArrayList();
                for (int kk =0; kk < testPeptideDataList.size();kk++)
                {
                    Datum dt = (Datum)testPeptideDataList.get(kk);
                    if (!testDomainNumKeyList.contains(dt.domainNum))
                    {
                        subTestPeptideDataList.add(dt);
                    }
                }

                testPFoldDataList.addAll(subTestPeptideDataList);

            }

            List peptideTestNameList=new ArrayList();
            for (int jj=0;jj < testPFoldDataList.size();jj++)
            {
                Datum dt = (Datum)testPFoldDataList.get(jj);
                if (!peptideTestNameList.contains(dt.peptideNum))
                    peptideTestNameList.add(dt.peptideNum);
            }

            List testKMFoldDataList = new ArrayList();
            testKMFoldDataList.addAll(testDFoldDataList);
            testKMFoldDataList.addAll(testPFoldDataList);
            Data testFoldData = new Data();
            testFoldData.addRawData(testKMFoldDataList, domainNumToRawMap, peptideNumToRawMap);
            if (svmParams.data_encoding == svm_parameter.PHYSICOCHEMICAL)
            {
                //ProfeatFeatureEncoding pfe = new ProfeatFeatureEncoding(testFoldData);
                AtchleyFactorFeatureEncoding fe = new AtchleyFactorFeatureEncoding();
                testFoldData.encodeData(fe,fe);

            }
            else if (svmParams.data_encoding == svm_parameter.BINARY_SEQUENCE)
            {
                BinarySequenceFeatureEncoding sfe = new BinarySequenceFeatureEncoding();
                testFoldData.encodeData(sfe,sfe);

            }

            List testDomainNames = testFoldData.getDomainNames();
            List testPeptideNums = testFoldData.getPeptideNums();
            if (testFoldData.getNumPositive() == 0 || testFoldData.getNumNegative()==0)
            {
                System.out.println("\n\t===== Fold " + (i+1) +" " +domainTestNameList.toString() + " x " + peptideTestNameList.toString() + " =====");
                System.out.println("\tAll: " + testDomainNames.toString() + " x " + testPeptideNums.toString());
                System.out.println("\tNo positive or negative data...");
                continue;
            }

            List trainKMFoldDataList = new ArrayList();
            for (int jj=0; jj < domainNumKeyList.size();jj++)
            {
                int trainFoldNum = (Integer)domainNumKeyList.get(jj);
                if (!testDomainNumKeyList.contains(trainFoldNum))
                {
                    List trainDomainDataList = (List) domainNumToDataListMap.get(trainFoldNum);
                    List subTrainDomainDataList = new ArrayList();
                    for (int kk=0; kk < trainDomainDataList.size();kk++)
                    {
                        Datum dt = (Datum) trainDomainDataList.get(kk);
                        if (!testPeptideNumKeyList.contains(dt.peptideNum))
                            subTrainDomainDataList.add(dt);
                    }
                    trainKMFoldDataList.addAll(subTrainDomainDataList);

                }

            }
            // test it!

            System.out.println("\n\t===== Fold " + (i+1) +" " +domainTestNameList.toString() + " x " + peptideTestNameList.toString() + " =====");
            System.out.println("\tAll: " + testDomainNames.toString() + " x " + testPeptideNums.toString());
            Data trainFoldData = new Data();
            trainFoldData.addRawData(trainKMFoldDataList, domainNumToRawMap, peptideNumToRawMap);
            if (svmParams.data_encoding == svm_parameter.PHYSICOCHEMICAL)
            {
                AtchleyFactorFeatureEncoding fe = new AtchleyFactorFeatureEncoding();
                trainFoldData.encodeData(fe,fe);

            }
            else if (svmParams.data_encoding == svm_parameter.BINARY_SEQUENCE)
            {
                BinarySequenceFeatureEncoding sfe = new BinarySequenceFeatureEncoding();
                trainFoldData.encodeData(sfe,sfe);

            }
            svm_model svmModel = SVM.train(trainFoldData, svmParams);
            List predictions = SVM.predict(trainFoldData, testFoldData, svmModel, svmParams);
            Evaluation eval = new Evaluation(predictions);
            System.out.println(eval.toString());

            kFoldResultsMap.put(i,predictions);
        } // for all folds (k)
        return kFoldResultsMap;
    }


    public static HashMap leaveKOutCrossValidation(ValidationParameters validParams, Data trainData, svm_parameter svmParams, HashMap numToDataListMap)
    {
        int k = validParams.k;
        int dp ;
        int type = validParams.type;
        if (validParams.type==ValidationParameters.DOMAIN)
            dp = validParams.d;
        else
            dp = validParams.p;
        StringBuffer seqMismatchPerformanceOutput = new StringBuffer();
        double percent = (double)dp/100;
        System.out.println("\tLeave "+dp+"% "+ValidationParameters.CV_STRING[type]+" out cross validation...");
        Set numKeys = numToDataListMap.keySet();
        List numKeyList = new ArrayList(numKeys);
        Collections.sort(numKeyList);
        HashMap domainNumToRawMap = trainData.getDomainNumToRawMap();
        HashMap peptideNumToRawMap = trainData.getPeptideNumToRawMap();
        List peptideList = new ArrayList(peptideNumToRawMap.keySet());
        for (int i=0; i < peptideList.size();i++)
        {
            int peptideNum = (Integer)peptideList.get(i);
            if (peptideNum==99)
                System.out.println(peptideNum +">" + peptideNumToRawMap.get(peptideNum));
        }
        int num;
        if (type==ValidationParameters.DOMAIN)
            num = domainNumToRawMap.size();
        else
            num = peptideNumToRawMap.size();

        int numInFold = (int)((double)num*(percent));

        System.out.println("\tNum in fold: " + numInFold );
        HashMap kFoldResultsMap = new HashMap();

        int numFolds;
        if (dp == 0) numFolds = num;
        else numFolds = k;
        System.out.println("\tNum Folds: " + numFolds);
        numFolds = 1;
        for (int i =0; i < numFolds;i++)
        {
            String testName = "";
            List testNameList= new ArrayList();
            List testNumKeyList = new ArrayList();
            List testFoldDataList = new ArrayList();
            if (dp==0)
            {
                int testFoldNum = (Integer)numKeyList.get(i);
                testNumKeyList.add(testFoldNum);
                List numToDataList = (List) numToDataListMap.get(testFoldNum);
                testFoldDataList.addAll(numToDataList);
                for (int jj= 0 ; jj < numToDataList.size();jj++)
                    {
                        Datum dt = (Datum)numToDataList.get(jj);
                        System.out.println(dt);
                    }
                Datum testDt = (Datum)testFoldDataList.get(0);

                Features ft = (Features)peptideNumToRawMap.get(testDt.peptideNum);
                String peptide = ft.toUndelimitedString();
                if (type==ValidationParameters.DOMAIN)
                    testName  = "["+testDt.name +"]";
                else
                    testName = "["+peptide+"]";
            }
            else
            {
                // Randomly pick numInFold from numKeyList
                Collections.shuffle(numKeyList);
                testNumKeyList = numKeyList.subList(0,numInFold);

                for (int j = 0; j < testNumKeyList.size();j++)
                {
                    int testFoldNum = (Integer)testNumKeyList.get(j);
                    List numToDataList = (List) numToDataListMap.get(testFoldNum);
                    testFoldDataList.addAll(numToDataList);
                }

                for (int ii = 0; ii < testFoldDataList.size();ii++)
                {
                    Datum testDt = (Datum)testFoldDataList.get(ii);
                    Features ft = (Features)peptideNumToRawMap.get(testDt.peptideNum);
                    String peptide = ft.toUndelimitedString();
                    String testNameTemp;
                    if (type==ValidationParameters.DOMAIN)
                        testNameTemp = testDt.name;
                    else
                        testNameTemp = peptide;

                    if (!testNameList.contains(testNameTemp))
                        testNameList.add(testNameTemp);
                }
                testName = testNameList.toString();
            }

            Data testFoldData = new Data();
            testFoldData.addRawData(testFoldDataList, domainNumToRawMap, peptideNumToRawMap);
            if (svmParams.data_encoding == svm_parameter.PHYSICOCHEMICAL)
            {
                AtchleyFactorFeatureEncoding fe = new AtchleyFactorFeatureEncoding();
                testFoldData.encodeData(fe,fe);
            }
            else if (svmParams.data_encoding == svm_parameter.BINARY_SEQUENCE)
            {
                BinarySequenceFeatureEncoding sfe = new BinarySequenceFeatureEncoding();
                testFoldData.encodeData(sfe,sfe);

            }
            if (testFoldData.getNumPositive() == 0 || testFoldData.getNumNegative()==0)
            {
                System.out.println("\n\t===== Fold " + (i+1) +" " +testName+ " =====");
                System.out.println("\tNo positive or negative data, skipping...");
                continue;
            }
            List trainFoldDataList = new ArrayList();
            for (int j=0; j < numKeyList.size();j++)
            {
                int trainFoldNum = (Integer)numKeyList.get(j);
                if (!testNumKeyList.contains(trainFoldNum))
                {
                    List numDataList = (List) numToDataListMap.get(trainFoldNum);
                    trainFoldDataList.addAll(numDataList);
                }
            }
            System.out.println("\n\t===== Fold " + (i+1) +" "+testName+" =====");
            Data trainFoldData = new Data();
            trainFoldData.addRawData(trainFoldDataList, domainNumToRawMap, peptideNumToRawMap);
            if (svmParams.data_encoding == svm_parameter.PHYSICOCHEMICAL)
            {
                AtchleyFactorFeatureEncoding fe = new AtchleyFactorFeatureEncoding();
                trainFoldData.encodeData(fe,fe);

            }
            else if (svmParams.data_encoding == svm_parameter.BINARY_SEQUENCE)
            {
                BinarySequenceFeatureEncoding sfe = new BinarySequenceFeatureEncoding();
                trainFoldData.encodeData(sfe,sfe);

            }

            svm_model svmModel = SVM.train(trainFoldData, svmParams);
            List predictionList = SVM.predict(trainFoldData, testFoldData, svmModel, svmParams);
            if (predictionList.size()==0)
                continue;
            Evaluation eval = new Evaluation(predictionList);
            System.out.println(eval.toString());

            kFoldResultsMap.put(i,predictionList);

            ValidationFoldOutput ss = new ValidationFoldOutput(trainFoldData);
            if (type==ValidationParameters.DOMAIN)
                seqMismatchPerformanceOutput.append(ss.byDomain(predictionList));
            else
                seqMismatchPerformanceOutput.append(ss.byPeptide(predictionList));


        }
        System.out.println("Valid params: " + validParams.d + "\t" + validParams.p);
        if (validParams.d==0 && validParams.p ==0)
        {
            try
            {
                String fileName = "";
                if (type==ValidationParameters.DOMAIN)
                    fileName = validParams.predictorName + "_LOOV_ByDomain.txt";
                else
                    fileName = validParams.predictorName + "_LOOV_ByPeptide.txt";
                System.out.println("Output to: " + validParams.outputDir+"/"+fileName);

                BufferedWriter bw = new BufferedWriter(new FileWriter(new File(validParams.outputDir+"/"+fileName)));
                bw.write(seqMismatchPerformanceOutput.toString());
                bw.close();
            }
            catch(Exception e)
            {
                System.out.println("Exception: " + e);
            }
        }
        return kFoldResultsMap;
    }


    public static HashMap leaveOutCrossValidation(Data trainData, svm_parameter svmParams, ValidationParameters validParams)
    {
        List dataList = trainData.getDataList();

        System.out.println("\n\tLeave out cross validation...");
        HashMap domainNumToDataListMap = new HashMap();
        HashMap peptideNumToDataListMap = new HashMap();

        for (int i=0; i < dataList.size();i++)
        {
            Datum dt = (Datum)dataList.get(i);

            int domainNum = dt.domainNum;
            int peptideNum = dt.peptideNum;

            List domainDataList = (List)domainNumToDataListMap.get(domainNum);
            if (domainDataList ==  null)
                domainDataList = new ArrayList();
            domainDataList.add(dt);
            domainNumToDataListMap.put(domainNum,domainDataList);

            List peptideDataList = (List)peptideNumToDataListMap.get(peptideNum);
            if (peptideDataList ==  null)
                peptideDataList = new ArrayList();
            peptideDataList.add(dt);
            peptideNumToDataListMap.put(peptideNum,peptideDataList);
        }
        HashMap kFoldResultsMap;
        if (validParams.type== ValidationParameters.DOMAIN)
            kFoldResultsMap = leaveKOutCrossValidation(validParams,  trainData, svmParams, domainNumToDataListMap);
        else if (validParams.type == ValidationParameters.PEPTIDE)
            kFoldResultsMap = leaveKOutCrossValidation(validParams, trainData, svmParams, peptideNumToDataListMap);
        else if (validParams.type == ValidationParameters.DOMAIN_PEPTIDE)
            kFoldResultsMap = leaveKMOutCrossValidation(validParams, trainData, svmParams, domainNumToDataListMap, peptideNumToDataListMap);
        else
            kFoldResultsMap = new HashMap();
        return kFoldResultsMap;
    }


    public static svm_model train(Data trainData, svm_parameter svmParams)
    {
        System.out.println("\tTraining...\n");
        System.out.println("\t=== TRAINING DATA ===");
        trainData.printSummary();
        System.out.println();
        svmParams.weight = new double[]{ 1.0, 1.0*((double)trainData.getNumPositive()/trainData.getNumNegative())};
        svmParams.nr_weight =2;
        svmParams.weight_label = new int[]{-1,1};

        svm_problem trainProb = null;

        if (svmParams.data_encoding == svm_parameter.CONTACTMAP2020)
        {
            System.out.println("\tLoading binary contact map training data");
            trainProb = load2020ContactData(trainData);
            //model_file_name = model_file_name + "CM.model";

        }
        else if (svmParams.data_encoding == svm_parameter.PHYSICOCHEMICAL)
        {
            System.out.println("\tLoading physicochemcial training data");
            trainProb = loadData(trainData, trainData.getDataList());
            //model_file_name = model_file_name + "PC.model";

        }
        else if (svmParams.data_encoding == svm_parameter.BINARY_SEQUENCE)
        {
            System.out.println("\tLoading binary sequence training data");
            trainProb = loadData(trainData, trainData.getDataList());
        }

        svm_model svmModel = null;
        try
        {
            svmParams.print();
            //if (trainProb != null)
            //{
            //System.out.println("\tSaving model...");
            svmModel = svm.svm_train( trainProb,  svmParams);
            //svm.svm_save_model(model_file_name, svmModel);
            //}
            //else
            //{
            //    System.out.println("\tLoading model...");
            //    svmModel = svm.svm_load_model(model_file_name);
            //}
        }
        catch(Exception e)
        {
            e.printStackTrace();
        }

        System.out.println("\tFinished training...\n");

        return svmModel;
    }


    public static List predict(Data trainData, Data testData, svm_model svmModel, svm_parameter svmParams)
    {
        System.out.println("\tPredicting...\n");
        System.out.println("\t=== TESTING DATA ===");
        testData.printSummary();
        System.out.println();

        svm_problem testProb = null;

        if (svmParams.data_encoding == svm_parameter.CONTACTMAP2020)
        {
            System.out.println("\tLoading binary contact map test data");
            testProb = load2020ContactData(testData);
        }
        else if (svmParams.data_encoding == svm_parameter.PHYSICOCHEMICAL)
        {
            System.out.println("\tLoading physicochemical test data");
            testProb = loadData(testData, testData.getDataList());
        }
        else if (svmParams.data_encoding == svm_parameter.BINARY_SEQUENCE)
        {
            System.out.println("\tLoading binary sequence test data");
            testProb = loadData(testData, testData.getDataList());
        }
        // Iterate over all testProb instances
        List predictions = new ArrayList();

        for (int i = 0; i < testProb.l;i++)
        {
            svm_node[] node = testProb.x[i];

            double[] ret = svm.svm_predict_x(svmModel,node);
            double decValue = ret[0];
            //System.out.print(decValue + "," );
            double predValue = PDZSVMUtils.zeroToOne(ret[1]);
            int actualValue = (int)PDZSVMUtils.zeroToOne(testProb.y[i]);
            String name = testProb.names[i];
            String domainSeqFull = testProb.domainSeqFull[i];
            String peptideSeq = testProb.peptideSeq[i];
            String organism = testProb.organismShort[i];
            String method = testProb.methodShort[i];
            Prediction prediction = new Prediction(predValue, actualValue, decValue, name, domainSeqFull, peptideSeq, organism, method);

            //System.out.println("*" + i + "\t" + decValue + "\t" +actualValue + "\t" + predValue);

            //System.out.println(i + "\t" + decValue + "\t" + predValue + "\t" + zeroToOne(testProb.y[i]));
            predictions.add(prediction);
        } // for


        System.out.println("\tFinished predicting...");

        // Don't compute AUC scores if we don't have any negatives or positives
        if (testData.getNumPositive() >0 && testData.getNumNegative() > 0)
        {
            Evaluation eval = new Evaluation(predictions);
            double rocAUC = eval.getROCAUC();
            System.out.println("\tFlipping signs...");
            // Flip the decvalue signs
            if (rocAUC<0.5)
            {
                for (int i = 0; i < predictions.size();i++)
                {
                    Prediction prediction = (Prediction)predictions.get(i);
                    double decValue = prediction.getDecValue();
                    prediction.setDecValue(-decValue);
                }
            }
            System.out.println();
        }
        return predictions;

    }

}
