/*
 * Decompiled with CFR 0.152.
 */
package org.baderlab.brain;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import org.baderlab.brain.ProteinTerminus;
import org.biojava.bio.BioException;
import org.biojava.bio.dist.Distribution;
import org.biojava.bio.dist.DistributionFactory;
import org.biojava.bio.dist.SimpleDistributionTrainerContext;
import org.biojava.bio.seq.ProteinTools;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.SequenceIterator;
import org.biojava.bio.seq.io.SeqIOTools;
import org.biojava.bio.seq.io.SymbolTokenization;
import org.biojava.bio.symbol.Alphabet;
import org.biojava.bio.symbol.AtomicSymbol;
import org.biojava.bio.symbol.FiniteAlphabet;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.Symbol;

public class ProteinDatabaseDistribution {
    SequenceIterator database = null;

    public ProteinDatabaseDistribution(String dbFileName, String dbFormat) throws FileNotFoundException, BioException {
        BufferedReader br = new BufferedReader(new FileReader(dbFileName));
        this.database = (SequenceIterator)SeqIOTools.fileToBiojava((String)dbFormat, (String)"PROTEIN", (BufferedReader)br);
    }

    private double[][] countsTableToDistribution(long[][] countsTable, long totalPairs) {
        double[][] distribution = new double[countsTable.length][countsTable[0].length];
        for (int i = 0; i < countsTable.length; ++i) {
            for (int j = 0; j < countsTable[0].length; ++j) {
                distribution[i][j] = (double)countsTable[i][j] / (double)totalPairs;
            }
        }
        return distribution;
    }

    private Object[] calcOrder2PairCount(SequenceIterator searchDB, int length, ProteinTerminus terminus, int numGapsInPair) {
        String aaList = "ACDEFGHIKLMNPQRSTUVWY";
        long[][] countsTable = new long["ACDEFGHIKLMNPQRSTUVWY".length()]["ACDEFGHIKLMNPQRSTUVWY".length()];
        long totalPairs = 0L;
        Sequence sequenceFromDB = null;
        while (searchDB.hasNext()) {
            Sequence sequenceToSearch;
            String sequenceToSearchString;
            try {
                sequenceFromDB = searchDB.nextSequence();
            }
            catch (BioException e) {
                System.err.println("Can't read next sequence from database.");
            }
            if ((sequenceToSearchString = (sequenceToSearch = ProteinTerminus.getSequenceTerminus(sequenceFromDB, length, terminus)).seqString()).indexOf(88) >= 0) continue;
            for (int i = 0; i < length; ++i) {
                for (int j = i + 1; j < length; ++j) {
                    if (j - i - 1 != numGapsInPair) continue;
                    long[] lArray = countsTable["ACDEFGHIKLMNPQRSTUVWY".indexOf(sequenceToSearchString.charAt(i))];
                    int n = "ACDEFGHIKLMNPQRSTUVWY".indexOf(sequenceToSearchString.charAt(j));
                    lArray[n] = lArray[n] + 1L;
                    ++totalPairs;
                }
            }
        }
        Object[] returnValue = new Object[]{countsTable, new Long(totalPairs), "ACDEFGHIKLMNPQRSTUVWY"};
        return returnValue;
    }

    public void printDistribution(double[][] distribution, String aaList) {
        int i;
        for (i = 0; i < aaList.length(); ++i) {
            if (aaList.charAt(i) == 'U') continue;
            System.out.print("\t" + aaList.charAt(i));
        }
        System.out.print("\n");
        for (i = 0; i < distribution.length; ++i) {
            if (aaList.charAt(i) == 'U') continue;
            System.out.print(aaList.charAt(i) + "\t");
            for (int j = 0; j < distribution[i].length; ++j) {
                if (aaList.charAt(j) == 'U') continue;
                System.out.print(distribution[i][j]);
                if (j >= distribution.length - 1) continue;
                System.out.print("\t");
            }
            System.out.print("\n");
        }
    }

    public void calcPairDistributionSearchDB(String fastaDatabaseFileName, int length, ProteinTerminus terminus) {
        SequenceIterator searchDB = null;
        BufferedReader br = null;
        long totalPairs = 0L;
        for (int i = 0; i <= length - 2; ++i) {
            try {
                br = new BufferedReader(new FileReader(fastaDatabaseFileName));
            }
            catch (FileNotFoundException e) {
                e.printStackTrace();
            }
            try {
                searchDB = (SequenceIterator)SeqIOTools.fileToBiojava((String)"FASTA", (String)"PROTEIN", (BufferedReader)br);
            }
            catch (BioException e) {
                e.printStackTrace();
            }
            Object[] returnValue = this.calcOrder2PairCount(searchDB, length, terminus, i);
            long[][] countsTable = (long[][])returnValue[0];
            totalPairs = (Long)returnValue[1];
            String aaList = (String)returnValue[2];
            double[][] distribution = this.countsTableToDistribution(countsTable, totalPairs);
            System.out.println("Gap:" + i);
            this.printDistribution(distribution, aaList);
            try {
                br.close();
                continue;
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public void calcAADistributionSearchDB(int length, ProteinTerminus terminus) {
        try {
            SimpleDistributionTrainerContext dtc = new SimpleDistributionTrainerContext();
            Distribution proteinDist = DistributionFactory.DEFAULT.createDistribution((Alphabet)ProteinTools.getAlphabet());
            dtc.registerDistribution(proteinDist);
            Sequence sequenceFromDB = null;
            while (this.database.hasNext()) {
                try {
                    sequenceFromDB = this.database.nextSequence();
                }
                catch (BioException e) {
                    System.err.println("Can't read next sequence from database.");
                }
                Sequence sequenceToSearch = ProteinTerminus.getSequenceTerminus(sequenceFromDB, length, terminus);
                for (int j = 1; j <= sequenceToSearch.length(); ++j) {
                    dtc.addCount(proteinDist, sequenceToSearch.symbolAt(j), 1.0);
                }
            }
            dtc.train();
            SymbolTokenization st = null;
            try {
                st = ProteinTools.getAlphabet().getTokenization("token");
            }
            catch (BioException e) {
                System.err.println("Unable to get symboltokenization");
            }
            String token = null;
            for (Symbol sym : (FiniteAlphabet)proteinDist.getAlphabet()) {
                try {
                    token = st.tokenizeSymbol((Symbol)((AtomicSymbol)sym));
                }
                catch (IllegalSymbolException e) {
                    System.err.println("Unable to convert symbol to token.");
                }
                if (token.equalsIgnoreCase("U")) continue;
                System.out.println(token + "\t" + proteinDist.getWeight(sym));
            }
            System.out.println("\n");
        }
        catch (Exception ex) {
            ex.printStackTrace();
        }
    }
}

