package org.baderlab.pdzsvmstruct.data;

import org.biojava.bio.seq.Sequence;
import java.util.*;

import org.baderlab.brain.ProteinProfile;
import org.baderlab.pdzsvmstruct.encoding.*;
import org.baderlab.pdzsvmstruct.utils.PDZSVMUtils;
import org.baderlab.pdzsvmstruct.utils.Constants;

/**
 * Copyright (c) 2011 University of Toronto
 * Code written by: Shirley Hui
 * Authors: Shirley Hui, Gary Bader
 *
 * This file is part of PDZSVMStruct.
 *
 * PDZSVM is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * PDZSVM is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  The software and
 * documentation provided hereunder is on an "as is" basis, and the
 * University of Toronto has no obligations to provide maintenance,
 * support, updates, enhancements or modifications.  In no event shall
 * the University of Toronto be liable to any party for direct, indirect,
 * special, incidental or consequential damages, including lost profits,
 * arising out of the use of this software and its documentation, even if
 * the University of Toronto has been advised of the possibility of such
 * damage. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with PDZSVMStruct.  If not, see <http://www.gnu.org/licenses/>.
 */

public class Data
{
    // Mandatory
    private HashMap domainRawToNumMap;
    private HashMap peptideRawToNumMap;
    private HashMap domainNumToRawMap;
    private HashMap peptideNumToRawMap;

    private List dataList;
    private HashMap domainEncToNumMap;
    private HashMap peptideEncToNumMap;

    private HashMap domainNumToEncMap;
    private HashMap peptideNumToEncMap;

    private HashMap domainNumToBindingSiteSeqMap = null;


    private int numPos;
    private int numNeg;
    private List posProfileList;
    private List negProfileList;
    private boolean debug = false;
    private List organismList;

    private HashMap domainNumToNamesMap = null;
    private HashMap domainNumToOrganismMap = null;

    private double[] maxFeatureValues = null;
    private double[] minFeatureValues = null;

    private Sidhu10FeatureEncoding enc = new Sidhu10FeatureEncoding();

    public Data()
    {
        domainRawToNumMap = new HashMap();
        peptideRawToNumMap = new HashMap();
        domainEncToNumMap = new HashMap();
        peptideEncToNumMap = new HashMap();
        domainNumToRawMap = new HashMap();
        peptideNumToRawMap = new HashMap();
        domainNumToEncMap = new HashMap();
        peptideNumToEncMap = new HashMap();
        domainNumToBindingSiteSeqMap = new HashMap();

        dataList = new ArrayList();
        numPos = 0;
        numNeg = 0;
        posProfileList = new ArrayList();
        negProfileList = new ArrayList();

        organismList = new ArrayList();
    }

    public void setDomainNumToEncMap(HashMap domainNumToEncMap)
    {
        this.domainNumToEncMap = domainNumToEncMap;
        HashMap reverseMap = new HashMap();
        Set keys = domainNumToEncMap.keySet();
        List keyList = new ArrayList(keys);
        for (int i=0;i < keyList.size();i++)
        {
            Integer domainNumKey = (Integer)keyList.get(i);
            Features ft = (Features)domainNumToEncMap.get(domainNumKey);
            reverseMap.put(ft, domainNumKey);
        }
        this.domainEncToNumMap = reverseMap;
    }

    public void printSummary()
    {
        System.out.println("\t=== SUMMARY (# Interactions) ===");
        System.out.println("\tPositive: " + getNumPositive());
        System.out.println("\tNegative: " + getNumNegative());
        System.out.println("\tTotal: " + getNumInteractions());
        System.out.println();
    }
    public String getDomainBindingSiteSeq(int domainNum)
    {
        return (String)domainNumToBindingSiteSeqMap.get(domainNum);
    }

    public String getOrganism(int domainNum)
    {
        String organism = "";
        for (int i=0; i < dataList.size();i++)
        {
            Datum dt = (Datum) dataList.get(i);
            if (dt.domainNum == domainNum)
            {
                organism = dt.organism;
                break;
            }
        }
        return organism;
    }
    public HashMap getDomainNumToNamesMap()
    {
        if (domainNumToNamesMap==null)
        {
            domainNumToNamesMap = new HashMap();
            for (int i=0; i< dataList.size();i++)
            {
                Datum dt = (Datum)dataList.get(i);
                domainNumToNamesMap.put(dt.domainNum, dt.name);
            }
        }
        return domainNumToNamesMap;

    }
    public HashMap getDomainNumToOrganismMap()
    {
        if (domainNumToOrganismMap==null)
        {
            domainNumToOrganismMap = new HashMap();
            for (int i=0; i< dataList.size();i++)
            {
                Datum dt = (Datum)dataList.get(i);
                domainNumToOrganismMap.put(dt.domainNum, dt.organism);
            }
        }
        return domainNumToOrganismMap;

    }

    public int getNumFeatures()
    {

        HashMap domainNumToEncMap = getDomainNumToEncMap();
        HashMap peptideNumToEncMap = getPeptideNumToEncMap();
        List dataList = getDataList();
        Datum dt1 = (Datum) dataList.get(0);
        Features domainFeats1 = (Features)domainNumToEncMap.get(dt1.domainNum);
        Features peptideFeats1= (Features)peptideNumToEncMap.get(dt1.peptideNum);
        int numFeatures = domainFeats1.numFeatures() + peptideFeats1.numFeatures();
        return numFeatures;
    }

    public List getMaxMin()
    {
        if (maxFeatureValues == null || minFeatureValues == null)
        {
            Set keys = domainNumToEncMap.keySet();
            List domainNumList = new ArrayList(keys);

            Features encFeat0 = (Features)domainNumToEncMap.get(domainNumList.get(0));
            int numFeat = encFeat0.numFeatures();
            maxFeatureValues = new double[numFeat];
            minFeatureValues = new double[numFeat];

            for (int i = 0;i < maxFeatureValues.length;i++)
            {
                maxFeatureValues[i] = -Double.MAX_VALUE;
                minFeatureValues[i] = Double.MAX_VALUE;
            }

            for (int i = 0; i < domainNumList.size();i++)
            {
                Integer domainNum = (Integer)domainNumList.get(i);

                Features featureValues = (Features)domainNumToEncMap.get(domainNum);
                double[] featAsDoubleArray = featureValues.getFeatureValuesAsDoubleArray();
                for (int j = 0;j < featAsDoubleArray.length;j++)
                {
                    maxFeatureValues[j] = Math.max(maxFeatureValues[j], featAsDoubleArray[j]);
                    minFeatureValues[j] = Math.min(minFeatureValues[j], featAsDoubleArray[j]);
                }

            }
        }
        List retList = new ArrayList();
        retList.add(maxFeatureValues);
        retList.add(minFeatureValues);
        return retList;
    }
    
    public  void scaleData( double lower, double upper, List maxMinList)
    {
        Set keys = domainNumToEncMap.keySet();
        domainEncToNumMap.clear();
        List domainNumList = new ArrayList(keys);
        double[] feature_max = (double[]) maxMinList.get(0);
        double[] feature_min = (double[]) maxMinList.get(1);

        for (int i = 0; i < domainNumList.size();i++)
        {
            Integer domainNum = (Integer)domainNumList.get(i);
            Features encFeat = (Features)domainNumToEncMap.get(domainNum);
            double[] encFeatDoubles = encFeat.getFeatureValuesAsDoubleArray();

            List scaledEncFeatDoubles  = new ArrayList(encFeatDoubles.length);
            for (int j = 0;j < encFeatDoubles.length;j++)
            {
                double value =  encFeatDoubles[j];
                double orig = value;
                /* skip single-valued attribute */
                if(feature_max[j] == feature_min[j])
                {
                    scaledEncFeatDoubles.add(value);
                    continue;
                }
                if(value == feature_min[j])
                    value = lower;
                else if(value == feature_max[j])
                    value = upper;
                else
                    value = lower + (upper-lower) *
                            (value-feature_min[j])/
                            (feature_max[j]-feature_min[j]);

                scaledEncFeatDoubles.add(value);
            }
            Features scaledEncFeat = new Features();
            scaledEncFeat.addFeatureValues(scaledEncFeatDoubles);
            domainNumToEncMap.put(domainNum, scaledEncFeat);
            domainEncToNumMap.put(scaledEncFeat, domainNum);
        }
    }

    public void encodeBindingSiteStructureData(DomainFeatureEncoding domainEncoding, FeatureEncoding peptideEncoding)
    {
        System.out.println("\tEncoding binding site STRUCTURE data...");
        Set keys = domainRawToNumMap.keySet();
        Iterator it = keys.iterator();
        Features encodedDomainFeatures = null;
        int ix = 0;
        HashMap domainOrganismToEncMap = new HashMap();
        while(it.hasNext())
        {
            //System.out.println("\tFeature: " + ix);
            Features features = (Features)it.next();

            Integer domainNum = (Integer)domainRawToNumMap.get(features);

            String domainName = "";
            String organism = "";
            for (int i =0; i < dataList.size();i++)
            {
                Datum dt = (Datum) dataList.get(i);

                if (dt.domainNum == domainNum)
                {
                    domainName = dt.name;
                    organism = dt.organism;
                    break;
                }
            }
            ix = ix+1;
            String key = domainName + organism;
            encodedDomainFeatures = (Features)domainOrganismToEncMap.get(key);
            if (encodedDomainFeatures== null)
            {
                encodedDomainFeatures = domainEncoding.encodeFeatures(domainName,organism);
                domainOrganismToEncMap.put(key,encodedDomainFeatures);
            }

            if (encodedDomainFeatures == null)
            {   System.out.println(domainName + ", " + domainNum + "," + features.toUndelimitedString() + "-> NULL");
                continue;
            }

            //System.out.println(id + "->" + encodedFeatures.toString()) ;
            domainNumToEncMap.put(domainNum,encodedDomainFeatures);
            domainEncToNumMap.put(encodedDomainFeatures,domainNum);

        }
        int numDomainEncFeatures = 0;
        if (encodedDomainFeatures != null)
            numDomainEncFeatures = encodedDomainFeatures.numFeatures();
        int numPeptideEncFeatures =0;
        Features encodedPeptideFeatures = null;

        if (peptideEncoding != null)
        {
            keys = peptideRawToNumMap.keySet();
            it = keys.iterator();
            while(it.hasNext())
            {
                Features features = (Features)it.next();
                Integer id = (Integer)peptideRawToNumMap.get(features);
                encodedPeptideFeatures = peptideEncoding.encodeFeatures(features.toUndelimitedString());
                peptideNumToEncMap.put(id,encodedPeptideFeatures);
                peptideEncToNumMap.put(encodedPeptideFeatures,id);
            }

            numPeptideEncFeatures = encodedPeptideFeatures.numFeatures();


        }
        System.out.println("\tDomain encoding used: " + domainEncoding.getEncodingName()+ ", Num Features: " + encodedDomainFeatures.numFeatures());
        if (peptideEncoding != null)
            System.out.println("\tPeptide encoding used: " + peptideEncoding.getEncodingName()+ ", Num Features: " + encodedPeptideFeatures.numFeatures());
        else
            System.out.println("\tPeptide encoding used: None, Num Features: 0");

        System.out.println("\tDomain Map/Encoded Domain List Size:" + domainRawToNumMap.size() + "-" + domainNumToEncMap.size());
        System.out.println("\tPeptide Raw Map/Peptide Encoded List Size:" + peptideRawToNumMap.size() + "-" + peptideEncToNumMap.size());
        System.out.println("\tNumber of encoded features: " + numDomainEncFeatures + ", " + numPeptideEncFeatures);
        System.out.println();
    }

    public void encodeData(FeatureEncoding domainEncoding, FeatureEncoding peptideEncoding)
    {
        System.out.println("\n\tEncoding data...");

        Set keys = domainRawToNumMap.keySet();
        Iterator it = keys.iterator();
        Features encodedFeatures = null;
        while(it.hasNext())
        {
            Features features = (Features)it.next();

            Integer id = (Integer)domainRawToNumMap.get(features);

            encodedFeatures = domainEncoding.encodeFeatures(features.toUndelimitedString());

            if (encodedFeatures == null)
            {
                String domainName = "";
                for (int i =0; i < dataList.size();i++)
                {
                    Datum dt = (Datum) dataList.get(i);
                    if (dt.domainNum == id)
                    {
                        domainName = dt.name;
                        break;
                    }
                }
                System.out.println(domainName + ", " + id + "," + features.toUndelimitedString() + "-> NULL");
                continue;
            }

            //System.out.println(id + "->" + encodedFeatures.toString()) ;
            domainNumToEncMap.put(id,encodedFeatures);
            domainEncToNumMap.put(encodedFeatures,id);

        }
        int numEncFeatures = 0;
        if (encodedFeatures != null)
            numEncFeatures = encodedFeatures.numFeatures();

        if (debug)
            System.out.println("\tDomain encoding used: " + domainEncoding.getEncodingName() + ", Num Features: " + numEncFeatures);

        if (peptideEncoding != null)
        {
            keys = peptideRawToNumMap.keySet();
            it = keys.iterator();
            while(it.hasNext())
            {
                Features features = (Features)it.next();
                Integer id = (Integer)peptideRawToNumMap.get(features);
                encodedFeatures = peptideEncoding.encodeFeatures(features.toUndelimitedString());
                peptideNumToEncMap.put(id,encodedFeatures);
                peptideEncToNumMap.put(encodedFeatures,id);
            }
            System.out.println("\tPeptide encoding used: " + peptideEncoding.getEncodingName()+ ", Num Features: " + encodedFeatures.numFeatures());

            System.out.println("\tDomain Map/Encoded Domain List Size:" + domainRawToNumMap.size() + "-" + domainEncToNumMap.size());
            System.out.println("\tPeptide Raw Map/Peptide Encoded List Size:" + peptideRawToNumMap.size() + "-" + peptideEncToNumMap.size());
            System.out.println("\t Number of encoded features: " + numEncFeatures);
        }
    }

    public int getNumPositive()
    {
        return numPos;
    }
    public int getNumNegative()
    {
        return numNeg;
    }
    public int getNumInteractions()
    {
        return numPos + numNeg;
    }
    public boolean isEmpty()
    {
        return dataList.isEmpty();
    }
    public int getNumDomains()
    {
        return domainRawToNumMap.size();
    }
    public int getNumPeptides()
    {
        return peptideRawToNumMap.size();
    }
    public HashMap getDomainRawToNumMap()
    {
        return domainRawToNumMap;
    }

    public HashMap getPeptideRawToNumMap()
    {
        return peptideRawToNumMap;
    }
    public HashMap getDomainNumToRawMap()
    {
        return domainNumToRawMap;
    }
    public HashMap getPeptideNumToRawMap()
    {
        return peptideNumToRawMap;
    }
    public List getDataList()
    {
        return dataList;
    }
    public List lookupDomainInfo(int domainNum)
    {
        List domainInfoList = new ArrayList();
        List seenList = new ArrayList();
        for (int i=0; i < dataList.size();i++)
        {
            Datum dt = (Datum)dataList.get(i);
            if (dt.domainNum == domainNum)
            {
                if (!seenList.contains(dt.name))
                {
                    seenList.add(dt.name);
                    domainInfoList.add(dt);
                }
            }
        }
        return domainInfoList;

    }
    public int lookupDomainNum (String domainName, String organism)
    {
        for (int i=0; i < dataList.size();i++)
        {
            Datum dt = (Datum)dataList.get(i);
            if (dt.name.equals(domainName) && dt.organism.equals(organism))
                return dt.domainNum;
        }
        return -1;
    }
    public List lookupDataList(int domainNum)
    {
        List dtList = new ArrayList();
        for (int i=0; i < dataList.size();i++)
        {
            Datum dt = (Datum)dataList.get(i);
            if (dt.domainNum == domainNum)
                dtList.add(dt);
        }
        return dtList;
    }
    public List getDataList(int domainNum, String posorneg)
    {
        List dtList = new ArrayList();
        for (int i=0; i < dataList.size();i++)
        {
            Datum dt = (Datum)dataList.get(i);
            if (dt.domainNum == domainNum && dt.posneg.equals(posorneg))
                dtList.add(dt);
        }
        return dtList;
    }

    public HashMap getDomainEncToNumMap()
    {
        return domainEncToNumMap;
    }
    public HashMap getPeptideEncToNumMap()
    {
        return peptideEncToNumMap;
    }

    public HashMap getDomainNumToEncMap()
    {
        return domainNumToEncMap;
    }
    public HashMap getPeptideNumToEncMap()
    {
        return peptideNumToEncMap;
    }
    public ProteinProfile lookupProfile(String name, String organism, String posorneg)
    {

        List profileList;
        if (posorneg.equals(Constants.CLASS_YES))
            profileList = posProfileList;
        else
            profileList = negProfileList;

        for (int i=0; i < profileList.size();i++)
        {
            ProteinProfile profile = (ProteinProfile)profileList.get(i);
            if (profile.getName().equals(name)&&
                    profile.getOrganism().equals(PDZSVMUtils.organismShortToLongForm(organism)))
            {
                return profile;
            }
        }
        return null;
    }

    // Makes new data
    public void addRawData(List dataList, HashMap domainNumToRawMap, HashMap peptideNumToRawMap)
    {
        this.dataList = dataList;
        for (int i=0; i < dataList.size();i++)
        {
            Datum dt = (Datum) dataList.get(i);
            int domainNum = dt.domainNum;
            int peptideNum = dt.peptideNum;

            Features domainAAFeatures = (Features)domainNumToRawMap.get(domainNum);
            Features peptideAAFeatures = (Features)peptideNumToRawMap.get(peptideNum);

            this.domainRawToNumMap.put(domainAAFeatures, domainNum);
            this.peptideRawToNumMap.put(peptideAAFeatures, peptideNum);
            this.domainNumToRawMap.put(domainNum, domainAAFeatures);
            this.peptideNumToRawMap.put(peptideNum, peptideAAFeatures);
            if (!domainNumToBindingSiteSeqMap.containsKey(domainNum))
            {
                String domainBSSeq = enc.getFeatures(dt.name, dt.organism);
                domainNumToBindingSiteSeqMap.put(domainNum, domainBSSeq);
            }
            if (dt.posneg.equals(Constants.CLASS_YES))
                numPos = numPos + 1;
            else
                numNeg = numNeg + 1;
            String organism = dt.organism;
            if (!organismList.contains(organism))
                organismList.add(organism);
        }
    }
    // Makes new data
    public void addRawData(List dataList, HashMap domainNumToRawMap, HashMap peptideNumToRawMap,
                           HashMap domainNumToEncMap, HashMap peptideNumToEncMap)
    {
        this.dataList = dataList;
        for (int i=0; i < dataList.size();i++)
        {
            Datum dt = (Datum) dataList.get(i);
            int domainNum = dt.domainNum;
            int peptideNum = dt.peptideNum;

            Features domainAAFeatures = (Features)domainNumToRawMap.get(domainNum);
            Features peptideAAFeatures = (Features)peptideNumToRawMap.get(peptideNum);

            Features domainEncFeatures =(Features)domainNumToEncMap.get(domainNum);
            Features peptideEncFeatures =(Features)peptideNumToEncMap.get(peptideNum);

            this.domainRawToNumMap.put(domainAAFeatures, domainNum);
            this.peptideRawToNumMap.put(peptideAAFeatures, peptideNum);
            this.domainNumToRawMap.put(domainNum, domainAAFeatures);
            this.peptideNumToRawMap.put(peptideNum, peptideAAFeatures);
            this.domainNumToEncMap.put(domainNum,domainEncFeatures);
            this.peptideNumToEncMap.put(peptideNum,peptideEncFeatures);
            this.domainEncToNumMap.put(domainEncFeatures,domainNum);
            this.peptideEncToNumMap.put(peptideEncFeatures,peptideNum);

            if (!domainNumToBindingSiteSeqMap.containsKey(domainNum))
            {
                String domainBSSeq = enc.getFeatures(dt.name, dt.organism);
                domainNumToBindingSiteSeqMap.put(domainNum, domainBSSeq);
            }
            if (dt.posneg.equals(Constants.CLASS_YES))
                numPos = numPos + 1;
            else
                numNeg = numNeg + 1;
            String organism = dt.organism;
            if (!organismList.contains(organism))
                organismList.add(organism);
        }
    }
    // Makes new data
    public void addRawData(List domainList, List peptideList, List domainNamesList, String expMethod, String organism, String posneg)
    {
        AminoAcidFeatureEncoding rawAAEncoding = new AminoAcidFeatureEncoding();

        for (int i=0;i < peptideList.size();i++)
        {
            String peptide = (String)peptideList.get(i);
            Features peptideAAFeatures = rawAAEncoding.encodeFeatures(peptide);
            peptideNumToRawMap.put(i,peptideAAFeatures);
            peptideRawToNumMap.put(peptideAAFeatures, i);

        }
        for (int i=0;i < domainList.size();i++)
        {
            String domain = (String)domainList.get(i);
            Features domainAAFeatures = rawAAEncoding.encodeFeatures(domain);
            domainNumToRawMap.put(i,domainAAFeatures);
            domainRawToNumMap.put(domainAAFeatures, i);


        }

        for (int i=0; i < domainList.size();i++)
        {
            String name = (String)domainNamesList.get(i);

            for (int j=0; j < peptideList.size();j++)
            {
                Datum dh = new Datum(name, expMethod, organism, posneg, i, j);
                dataList.add(dh);
                if (!domainNumToBindingSiteSeqMap.containsKey(dh.domainNum))
                {
                    String domainBSSeq = enc.getFeatures(dh.name, dh.organism);
                    domainNumToBindingSiteSeqMap.put(dh.domainNum, domainBSSeq);
                }
            }

            if (!organismList.contains(PDZSVMUtils.organismLongToShortForm(organism)))
                organismList.add(PDZSVMUtils.organismLongToShortForm(organism));
        }


    }


    public void addRawData(List proteinProfileList, String posneg)
    {
        //System.out.println("Loading data for: " + projectFileName);
        if (posneg.equals(Constants.CLASS_YES))
        {
            posProfileList.addAll(proteinProfileList);
        }
        else
        {
            negProfileList.addAll(proteinProfileList);

        }

        AminoAcidFeatureEncoding rawAAEncoding = new AminoAcidFeatureEncoding();
        int domainsAdded = 0;
        int peptidesAdded = 0;
        int numPosAdded = 0;
        int numNegAdded = 0;
        boolean printHeading = false;
        // Iterate over each protein profile and add to data list
        for (int i = 0; i < proteinProfileList.size(); i++)
        {
            ProteinProfile proteinProfile = (ProteinProfile) proteinProfileList.get(i);

            String name = proteinProfile.getName();
            String organism = proteinProfile.getOrganism();
            String expMethod = PDZSVMUtils.methodLongToShortForm(proteinProfile.getExperimentalMethod());

            if (printHeading)
            {
                System.out.println("\n\t=== " +
                        PDZSVMUtils.organismLongToShortForm(organism) + " " +
                        expMethod + " " + posneg + " ===");
                printHeading = false;
            }
            String domainSequence = proteinProfile.getDomainSequence();
            Collection peptideCollection = proteinProfile.getSequenceMap();
            Iterator it = peptideCollection.iterator();

            // if this profile has no peptide sequences don't add it
            if (!it.hasNext())
            {
                System.out.println("\tProfile " +  proteinProfile.getName() + " has no peptides.  Not added.");
                continue;
            }
            // Encode the domain squence
            Features domainAAFeatures = rawAAEncoding.encodeFeatures(domainSequence);
            Integer domainNumInteger = (Integer)domainRawToNumMap.get(domainAAFeatures);
            if (domainNumInteger == null)
            {
                // Put the new domain into the domain raw map
                domainRawToNumMap.put(domainAAFeatures,domainRawToNumMap.size());
                domainNumInteger = (Integer)domainRawToNumMap.get(domainAAFeatures);
                domainsAdded = domainsAdded+1;
            }
            int domainNum = domainNumInteger.intValue();

            while(it.hasNext())
            {
                Sequence peptideSeq = (Sequence)it.next();
                String peptideSequence = peptideSeq.seqString();

                // Encode the peptide using amino acid encoding
                Features peptideAAFeatures = rawAAEncoding.encodeFeatures(peptideSequence);
                Integer peptideNumInteger = (Integer)peptideRawToNumMap.get(peptideAAFeatures);
                if (peptideNumInteger == null)
                {
                    // Put the peptide into the peptide raw map
                    peptideRawToNumMap.put(peptideAAFeatures,peptideRawToNumMap.size());
                    peptideNumInteger = (Integer)peptideRawToNumMap.get(peptideAAFeatures);
                    peptidesAdded = peptidesAdded+1;
                }
                int peptideNum = peptideNumInteger.intValue();

                //System.out.println( name + "\t" + method + "\t" + organism + "\t" + posneg + "\t" + domainNum + "\t" + peptideNum);

                // Put the new domain-peptide interaction into a Datum object
                Datum dh = new Datum(name, expMethod, organism, posneg, domainNum, peptideNum);
                //System.out.println("*" + dh.toString());
                if (dh.posneg.equals(Constants.CLASS_YES))
                    numPosAdded = numPosAdded +1;
                else
                    numNegAdded = numNegAdded + 1;

                // Add the new datum to the data list
                dataList.add(dh);
                if (!domainNumToBindingSiteSeqMap.containsKey(dh.domainNum))
                {
                    String domainBSSeq = enc.getFeatures(dh.name, dh.organism);
                    domainNumToBindingSiteSeqMap.put(dh.domainNum, domainBSSeq);
                }
                if (!organismList.contains(PDZSVMUtils.organismLongToShortForm(organism)))
                    organismList.add(PDZSVMUtils.organismLongToShortForm(organism));
            } // while
            // } // if
        } //for each profile
        numPos = numPos + numPosAdded;
        numNeg = numNeg + numNegAdded;
        //System.out.println("\tNum domains added: " + domainsAdded + ", total " + domainRawMap.size());
        //System.out.println("\tNum peptides added: " + peptidesAdded + ", total " + peptideRawMap.size());
        //System.out.println("\tNum positive interactions added: " + numPosAdded + ", total " + numPos);
        //System.out.println("\tNum negative interactions added:  " + numNegAdded + ", total " + numNeg);


        domainNumToRawMap = reverseMap(domainRawToNumMap);
        peptideNumToRawMap = reverseMap(peptideRawToNumMap);

    }
    private static HashMap reverseMap(HashMap keyToDataMap)
    {
        HashMap dataToKeyMap = new HashMap();
        Set keys = keyToDataMap.keySet();
        Iterator it = keys.iterator();
        while(it.hasNext())
        {
            Features keyFeatures = (Features) it.next();
            Integer dataNum = (Integer)keyToDataMap.get(keyFeatures);
            dataToKeyMap.put(dataNum, keyFeatures);
        }
        return dataToKeyMap;
    }

    public List getExpMethods()
    {
        List methodList = new ArrayList();
        for (int i=0; i < dataList.size();i++)
        {
            Datum dt = (Datum) dataList.get(i);
            if (!methodList.contains(dt.expMethod))
                methodList.add(dt.expMethod);
        }
        return methodList;
    }
    public List getDomainNames()
    {
        List domainNamesList = new ArrayList();
        for (int i=0; i < dataList.size();i++)
        {
            Datum dt = (Datum) dataList.get(i);
            if (!domainNamesList.contains(dt.name))
                domainNamesList.add(dt.name);
        }
        return domainNamesList;
    }
    public List getPeptideNums()
    {
        List peptideNumsList = new ArrayList();
        for (int i=0; i < dataList.size();i++)
        {
            Datum dt = (Datum) dataList.get(i);
            if (!peptideNumsList.contains(dt.peptideNum))
                peptideNumsList.add(dt.peptideNum);
        }
        return peptideNumsList;
    }

    public List getOrganismList()
    {
        return organismList;
    }
    public Features[] getRawDataPair(Datum dt)
    {
        int domainNum = dt.domainNum;
        int peptideNum = dt.peptideNum;

        HashMap domainNumToRawMap = getDomainNumToRawMap();
        HashMap peptideNumToRawMap = getPeptideNumToRawMap();


        Features[] featurePairArray = new Features[]{null, null};

        if (domainNumToRawMap.size() >0)
        {
            Features domainAAFeatures = (Features)domainNumToRawMap.get(domainNum);
            featurePairArray[0] = domainAAFeatures;
        }
        if (peptideNumToRawMap.size() > 0)
        {
            Features peptideAAFeatures = (Features)peptideNumToRawMap.get(peptideNum);
            featurePairArray[1] = peptideAAFeatures;

        }
        return featurePairArray;
    }

    public  Features getEncodedDataPair(Datum dt)
    {
        int domainNum = dt.domainNum;
        int peptideNum = dt.peptideNum;
        Features encodedDomainPeptideFeatures = new Features();

        // look up encoded features for this pair
        if (domainNumToEncMap.size() >0)
        {
            encodedDomainPeptideFeatures.addFeatures((Features)domainNumToEncMap.get(domainNum));
        }

        if (peptideNumToEncMap.size() >0)
        {
            //System.out.println("peptide encoded data size: " + peptideNum + ", " + peptideEncodedList.size());
            encodedDomainPeptideFeatures.addFeatures((Features)peptideNumToEncMap.get(peptideNum));
        }
        return encodedDomainPeptideFeatures;
    }

    public  double[] getEncodedDataPairAsDoubles(Datum dt)
    {
        int domainNum = dt.domainNum;
        int peptideNum = dt.peptideNum;
        double[] encodedDomainPeptideDoubles = new double[getNumFeatures()];
        // look up encoded features for this pair
        int ix= 0;
        if (domainNumToEncMap.size() >0)
        {

            Features domainFeatures = (Features)domainNumToEncMap.get(domainNum);
            double[] domainFeaturesDoubles= domainFeatures.getFeatureValuesAsDoubleArray();
            for (int i=0;i < domainFeaturesDoubles.length;i++)
            {
                encodedDomainPeptideDoubles[ix] = domainFeaturesDoubles[i];
                ix = ix+1;
            }
        }

        if (peptideNumToEncMap.size() >0)
        {
            Features peptideFeatures = (Features)peptideNumToEncMap.get(peptideNum);
            double[] peptideFeaturesDoubles= peptideFeatures.getFeatureValuesAsDoubleArray();
            for (int i=0;i < peptideFeaturesDoubles.length;i++)
            {
                encodedDomainPeptideDoubles[ix] = peptideFeaturesDoubles[i];
                ix = ix+1;
            }
        }
        return encodedDomainPeptideDoubles;
    }

    public static void main(String[] args)
    {
        DataLoader dl = new DataLoader();
        dl.loadMousePDBTrain();
        dl.loadSidhuHumanPDBTrain(Constants.SIDHU_HUMAN_G_PDB, Constants.PHAGE_DISPLAY);
        dl.loadSidhuHumanPDBTrain(Constants.SIDHU_HUMAN_G_PDB, Constants.HOMOLOGY_MODEL);

        List posTrainProfileList = dl.getPosTrainProfileList();
        List negTrainProfileList = dl.getNegTrainProfileList();
        Data trainData = new Data();
        trainData.addRawData(posTrainProfileList,Constants.CLASS_YES);
        trainData.addRawData(negTrainProfileList, Constants.CLASS_NO);
        trainData.printSummary();
    }

    
    public void print()
    {
        System.out.println("\tPrint Data...");
        System.out.println("\tTotal number instances: " + dataList.size());
        // print data Map out
        for(int i =0;i < dataList.size();i++)
        {
            Datum dh = (Datum)dataList.get(i);
            System.out.println(dh);
        }
        System.out.println("\tTotal number domains: " + domainRawToNumMap.size());
        Set domainNumSet = domainNumToRawMap.keySet();
        List domainNumList = new ArrayList(domainNumSet);
        Collections.sort(domainNumList);

        for (int i =0;i < domainNumList.size();i++)
        {
            int domainNum = (Integer)domainNumList.get(i);
            Features domainFeatures = (Features)domainNumToRawMap.get(domainNum);
            System.out.println(domainNum + "\t" + domainFeatures.toString());
        }

        System.out.println("\tTotal number peptides: " + peptideRawToNumMap.size());
        Set peptideNumSet = peptideNumToRawMap.keySet();
        List peptideNumList = new ArrayList(peptideNumSet);
        Collections.sort(peptideNumList);
        for (int i =0;i < peptideNumList.size();i++)
        {
            int peptideNum = (Integer)peptideNumList.get(i);
            Features peptideFeatures = (Features)peptideNumToRawMap.get(peptideNum);
            System.out.println(peptideNum + "\t" + peptideFeatures.toString());
        }

        System.out.println("\tTotal number Encoded domains: " + domainEncToNumMap.size());

        for (int i =0;i < domainNumToEncMap.size();i++)
        {
            Features domainFeatures = (Features)domainNumToEncMap.get(i);
            //System.out.println(i + "\t" + domainFeatures.toString());
        }

        System.out.println("\tTotal number Encoded peptides: " + peptideEncToNumMap.size());

        for (int i =0;i < peptideNumToEncMap.size();i++)
        {
            Features peptideFeatures = (Features)peptideNumToEncMap.get(i);
            //System.out.println(i + "\t" + peptideFeatures.toString());

        }
    }
}