package org.baderlab.pdzsvm.predictor.svm;

import org.baderlab.pdzsvm.data.*;

import java.util.*;
import java.util.List;

import libsvm.svm_parameter;
import libsvm.svm_model;
import org.baderlab.pdzsvm.predictor.Predictor;
import org.baderlab.pdzsvm.evaluation.Evaluation;
import org.baderlab.pdzsvm.encoding.*;
import weka.core.Instances;
import org.baderlab.pdzsvm.validation.ValidationParameters;
import org.baderlab.pdzsvm.utils.Constants;

/**
 * Copyright (c) 2010 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVM.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVM.  If not, see <http://www.gnu.org/licenses/>.
 */

/**
 * SVM predictor using binary sequence or factor feature encoding as described
 * in the paper.
 * TODO: This should be implemented as a generic svm predictor not associated with
 * any specific feature encoding
 */
public class GlobalSVMPredictor extends Predictor {

    private svm_parameter svmParams;
    private Data trainData;
    private Data testData;
    private svm_model svmModel;


    public GlobalSVMPredictor(List posTrainProfileList,
                              List negTrainProfileList,
                              svm_parameter svmParams)
    {
        super(posTrainProfileList, negTrainProfileList);


        this.svmParams = svmParams;
        
        trainData = new Data();
        trainData.addRawData(posTrainProfileList,Constants.CLASS_YES);
        trainData.addRawData(negTrainProfileList, Constants.CLASS_NO);

        if (!trainData.isEmpty())
        {
            // encode data
            if (svmParams.data_encoding == svm_parameter.PHYSICOCHEMICAL)
            {
                AtchleyFactorFeatureEncoding fe = new AtchleyFactorFeatureEncoding();
                trainData.encodeBindingSiteData(fe,fe);
                predictorName = "SVM PC";

            }
            else if (svmParams.data_encoding == svm_parameter.BINARY_SEQUENCE)
            {
                BinarySequenceFeatureEncoding sfe = new BinarySequenceFeatureEncoding();
                trainData.encodeBindingSiteData(sfe,sfe);
                predictorName = "SVM BSQ";
            }
        }
        trainData.printSummary();

    }
    public svm_parameter getSVMParams()
    {
        return svmParams;
    }
    public HashMap kFoldCrossValidation(ValidationParameters validParams)
    {
        HashMap cvResultsMap = SVM.kFoldCrossValidation(trainData, svmParams, validParams);
        return cvResultsMap;
    }
    public HashMap leaveOutCrossValidation(ValidationParameters validParams)
    {
        HashMap cvResultsMap = SVM.leaveOutCrossValidation(trainData,svmParams, validParams);
        return cvResultsMap;
    }
    public void train ()
    {
        System.out.println("\n\tTraining ...");
        svmModel = SVM.train(trainData, svmParams);
    }

    public List predict(List posTestProfileList, List negTestProfileList)
    {
        testData = new Data();
        testData.addRawData(posTestProfileList, Constants.CLASS_YES);
        testData.addRawData(negTestProfileList,Constants.CLASS_NO);

        if (!testData.isEmpty())
        {
            if (svmParams.data_encoding == svm_parameter.PHYSICOCHEMICAL)
            {
                AtchleyFactorFeatureEncoding fe = new AtchleyFactorFeatureEncoding();
                testData.encodeData(fe,fe);

            }
            else if (svmParams.data_encoding == svm_parameter.BINARY_SEQUENCE)
            {
                BinarySequenceFeatureEncoding sfe = new BinarySequenceFeatureEncoding();
                testData.encodeData(sfe,sfe);

            }
        }
        System.out.println("\n\tPredicting ...");
        predictionList  = SVM.predict(trainData, testData, svmModel, svmParams);


        return predictionList;
    }
    public int getNumTrainPositive()
    {
        return trainData.getNumPositive();
    }
    public int getNumTrainNegative()
    {
        return trainData.getNumNegative();    
    }
    public static void main(String[] args)
    {
        DataLoader dl = new DataLoader();
        dl.loadMouseChenTrain();
        dl.loadHumanTrain(Constants.PWM);
        String testName = "";
        String dirName = "";

        ///dl.loadWormTest(Constants.PROTEIN_MICROARRAY); testName = "PM WORM G"; dirName = "PMWormG";
        //dl.loadMouseTest("ORPHAN"); testName = "MOUSE ORPHAN PM";  dirName = "PMMouseOrphanG";
        dl.loadFlyTest(); testName = "FLY PM"; dirName = "PMFlyG";

        List posTrainProfileList = dl.getPosTrainProfileList();
        List negTrainProfileList = dl.getNegTrainProfileList();

        List posTestProfileList =dl.getPosTestProfileList();
        List negTestProfileList =dl.getNegTestProfileList();

        svm_parameter svmParams = new svm_parameter();
        svmParams.setDefaults();
        svmParams.data_encoding = svm_parameter.PHYSICOCHEMICAL;

        svmParams.C = Math.exp(5);
        svmParams.gamma = Math.exp(-Math.log(2)-2);
        /*
        svmParams.data_encoding = svm_parameter.BINARY_SEQUENCE;
        svmParams.C = Math.exp(2);
        svmParams.gamma = Math.exp(-Math.log(2)-2);
        */

        GlobalSVMPredictor gp =new GlobalSVMPredictor(posTrainProfileList,
                negTrainProfileList,
                svmParams);

        String predictorName = gp.getPredictorName();
        gp.train();
        List predictionList = gp.predict(posTestProfileList,negTestProfileList);

        Evaluation eval = new Evaluation(predictionList);
        List rocAUCList = new ArrayList();
        rocAUCList.add(eval.getROCAUC());
        List prAUCList = new ArrayList();
        prAUCList.add(eval.getPRAUC());
        List aucLabelList = new ArrayList();
        aucLabelList.add(predictorName);
        Instances inst = eval.getCurve(1);
        List instList = new ArrayList();
        instList.add(inst);
        System.out.println();
        System.out.println("=== Summary " +testName+ " ("+predictorName+") ===");
        System.out.println(eval.toString());
        
        plotCurves(instList,rocAUCList, prAUCList , aucLabelList,predictorName + " (" +testName+")");

    }


}
