package org.genemania.dw.tools;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Iterator;
import java.util.TreeMap;

import org.genemania.dw.entity.OBOTerm;
import org.genemania.dw.util.DefParams;
import org.genemania.dw.util.GenUtil;

/**
 * Simple local tool that handles ontology files, mainly geared towards PSI-MI
 * CV (controlled vocabularly) but can be tweaked for others as well.
 * When reading the file, the intro section at the top, and trailing type-defs
 * at the bottom, are ignored.
 *
 * For PSI-MI OBO:
 * - Double quotes are not removed from definition.
 * - Definition is assumed not to have CRs.
 * - The 'OBSOLETE' tag is not parsed from the definition (if it exists).
 * - Only two subset types exist, and no namespaces.
 * - Only two syn types exist.
 * - xrefs are saved generically (i.e. no distinction for validation regexp, ...
 *   etc.
 *
 * General:
 * - Syns are assumed to be in double quotes.
 * - No tracking for replacement of obsolete entries.
 * - NO support for Replaced_by, alt_id, and consider tags.
 *
 * @author rashadbadrawi
 */

public class OBOReader {

    //general tags used in the obo flat file - partial list
    protected static final String DEF_NS_TAG = "default-namespace:";
    protected static final String TERM_TAG = "[Term]";
    protected static final String ID_TAG = "id:";
    protected static final String NAME_TAG = "name:";
    protected static final String DEF_TAG = "def:";
    protected static final String SUBSET_TAG = "subset:";
    protected static final String SYNONYM_TAG = "synonym:";
    protected static final String XREF_TAG = "xref:";
    protected static final String ISOBSOLETE_TAG = "is_obsolete:";
    protected static final String ISA_TAG = "is_a:";
    protected static final String RELATIONSHIP_TAG = "relationship:";
    protected static final String TYPE_DEF_TAG = "[Typedef]";
    protected static final String NAMESPACE_TAG = "namespace:";
    protected static final String COMMENT_TAG = "comment:";

    protected static final String SYN_EXACT = "EXACT";
    protected static final String SYN_NARROW = "NARROW";
    protected static final String SYN_RELATED = "RELATED";
    protected static final String REPLACED_BY_TAG = "replaced_by:";
    protected static final String CONSIDER_TAG = "consider:";
    protected static final String ALT_TAG = "alt_id:";
    //public static final String DISJOINT_TAG = "disjoint_from:";
    //public static final String REPLACED_BY_PSIMI_TAG =  "\nOBSOLETE:";

    //PSI-MI specific tags
    public static final String PSIMI_TAG = "MI:";

    private static PrintWriter log = GenUtil.getDefaultLog();
    private BufferedReader br;

    public OBOReader (String source) {

        this (source, DefParams.getDefaultProp(DefParams.OBO_FILE_PROP));
    }

    public OBOReader (String source, String OBOFileName) {

        try {
            br = new BufferedReader (new FileReader (OBOFileName));
            readTerms (source);
        } catch (IOException ioe) {
            System.err.println ("Unable to read OBO file.");
            ioe.printStackTrace (log);
            ioe.printStackTrace();
        }
    }

    private void readTerms (String source) throws IOException {

        if (!OBOTerm.SOURCE_PSI_MI.equals (source)) {
            //System.err.println ("Warning: Unsupported OBO source: " + source);
        }
        System.out.println ("Begin reading terms: " + source);
        String line;
        boolean firstFlag = false;
        String defNS = null;
        OBOTerm term = new OBOTerm ();
        term.setSource(source);
        while ((line = br.readLine ()) != null) {
            line = line.trim ();
            if (!firstFlag && line.startsWith(DEF_NS_TAG)) {
                line = line.substring(DEF_NS_TAG.length()).trim();
                defNS = line;
                continue;
            }
            if (line.startsWith(TERM_TAG)) {
                if (defNS != null) {
                    term.setDefNS(defNS);
                }
                if (!firstFlag) {
                    firstFlag = true;
                } else {                               //add parsed term
                    OBOContainer.add(term);
                    term = new OBOTerm ();
                    term.setSource(source);
                }
                continue;
            }
            if (!firstFlag) {                          //skip other headers
                continue;
            }
            //System.out.println (line);               //debugging
            //Get ID
            if (line.startsWith(ID_TAG)) {
                 line = line.substring (ID_TAG.length()).trim ();
                 if (OBOTerm.SOURCE_PSI_MI.equals (source)) {
                     line = line.substring (PSIMI_TAG.length ()).trim ();
                     term.setID (line);
                 } else {
                     if (line.contains(GenUtil.COLON)) {
                         line = line.substring (line.indexOf(GenUtil.COLON) + 1);
                     }
                     term.setID (line.trim());        //safer to grab all
                 }
                 continue;
            }
            if (line.startsWith (NAME_TAG)) {
                line = line.substring (NAME_TAG.length()).trim();
                term.setName (line);
                continue;
            }
            if (line.startsWith (DEF_TAG)) {
                line = line.substring (DEF_TAG.length()).trim();
                term.setDefinition(line);
                continue;
            }
            if (line.startsWith(COMMENT_TAG)) {
                line = line.substring (COMMENT_TAG.length()).trim();
                term.setComment(line);
                continue;
            }
            if (line.startsWith (SUBSET_TAG)) {
                line = line.substring (SUBSET_TAG.length()).trim();
                term.addSubset(line);
                continue;
            }
            if (line.startsWith (NAMESPACE_TAG)) {
                line = line.substring (NAMESPACE_TAG.length()).trim();
                term.addNamespace(line);
                continue;
            }
            if (line.startsWith (ALT_TAG)) {
                line = line.substring (ALT_TAG.length()).trim();
                //Support ALT tag here
                continue;
            }
            if (line.startsWith (CONSIDER_TAG)) {
                line = line.substring (CONSIDER_TAG.length()).trim();
                //Support CONSIDER tag here
                continue;
            }
            if (line.startsWith (REPLACED_BY_TAG)) {
                line = line.substring (REPLACED_BY_TAG.length()).trim();
                //Support REPLACED BY tag here
                continue;
            }
            if (line.startsWith (SYNONYM_TAG)) {
                term = readSyn (term, source,
                                line.substring (SYNONYM_TAG.length()).trim());
                continue;
            }
            if (line.startsWith (ISA_TAG)) {       //e.g.:is_a: MI:2160 ! logs
                line = line.substring (ISA_TAG.length()).trim();
                if (OBOTerm.SOURCE_PSI_MI.equals (source)) {
                    line = line.substring (PSIMI_TAG.length()).trim();
                    term.addRelationship(OBOTerm.RELATION_ISA,
                                     line.split (GenUtil.SPACE) [0].trim());
                } else {
                    if (line.contains(GenUtil.COLON)) {
                        line = line.substring (line.indexOf(GenUtil.COLON) + 1);
                    }
                    term.addRelationship(OBOTerm.RELATION_ISA,
                                     line.split (GenUtil.SPACE) [0].trim());

                }
                continue;
            }
            if (line.startsWith (RELATIONSHIP_TAG)) {
                line = line.substring (RELATIONSHIP_TAG.length()).trim ();
                String tempArr [] = line.split (GenUtil.SPACE);
                String type = tempArr [0].trim ();
                String ID = tempArr [1].trim();
                if (OBOTerm.SOURCE_PSI_MI.equals (source)) {
                    ID = ID.substring (PSIMI_TAG.length ()).trim();
                } else {
                    if (tempArr [1].contains(GenUtil.COLON)) {
                        ID = ID.substring (ID.indexOf(GenUtil.COLON) + 1);
                    }
                }
                term.addRelationship(type, ID);
                continue;
            }
            if (line.startsWith (ISOBSOLETE_TAG)) {
                term.setIsObsolete(Boolean.parseBoolean (
                              line.substring (ISOBSOLETE_TAG.length()).trim()));
                continue;
            }
            if (line.startsWith (XREF_TAG)) {
                line = line.substring(XREF_TAG.length()).trim();
                term.addXref(line);
                continue;
            }
            if (line.startsWith (TYPE_DEF_TAG)) {     //skip typedefs at the end
                break;
            }
            if (line.length () > 0) {
                System.err.println ("Unsupported tag: " + line);
            }
        }
        if (defNS != null) {
            term.setDefNS(defNS);
        }
        OBOContainer.add (term);                      //overwritten, if needed
        System.out.println ("Done reading terms: " + source);
    }

    private OBOTerm readSyn (OBOTerm term, String source, String synStr) {

        //syn is in double quotes
        String syn = synStr.substring(1, synStr.indexOf("\"", 1));
        if (OBOTerm.SOURCE_PSI_MI.equals (source)) {
            if (!synStr.contains (SYN_EXACT) && !synStr.contains (SYN_NARROW) &&
                !synStr.contains (SYN_RELATED)) {
                System.err.println ("Warning - syn format: " + synStr + " in " + source);
            }
            term.addSyn (syn);
            if (synStr.contains (OBOTerm.SYN_PSI_MI_ALT)) {
                term.addPSIMISyn(OBOTerm.SYN_PSI_MI_ALT, syn);
            } else if (synStr.contains (OBOTerm.SYN_PSI_MI_SHORT)) {
                term.addPSIMISyn(OBOTerm.SYN_PSI_MI_SHORT, syn);
            } else {
                System.err.println ("Warning - unsupported syn type: " + synStr);
            }
        } else {
            //System.err.println ("Warning: Unsupported OBO source for syns: " + source);
            term.addSyn (syn);
        }

        return term;
    }

    //for debugging purposes
    public void dumpAll () {

        TreeMap <String, OBOTerm> termsMap = OBOContainer.getAll();
        Iterator iterator = termsMap.keySet().iterator();
        String headerStr = "Source" + GenUtil.TAB + "ID"
                + GenUtil.TAB + "Name" + GenUtil.TAB + "Definition"
                + GenUtil.TAB + "Comment" + GenUtil.TAB + "Is Obsolete"
                + GenUtil.TAB + "Subset" + GenUtil.TAB + "Namespace" 
                + GenUtil.TAB + "Xref" + GenUtil.TAB + "Synonyms"
                + GenUtil.TAB + "Relationship";
        if (OBOTerm.SOURCE_PSI_MI.equals (
            termsMap.get (termsMap.firstKey()).getSource())) {
            headerStr += GenUtil.TAB + "PSI-MI Synonyms Details";
        }
        System.out.println (headerStr);
        log.println (headerStr);
        while (iterator.hasNext ()) {
            OBOTerm term = termsMap.get ((String)iterator.next ());
            log.println (term.toString());
            System.out.println (term.toString());
        }
    }

    //simple driver
    public static final void main (String args []) {

        log = GenUtil.getDefaultLog();
        log.println(GenUtil.getTimeStamp());
        GenUtil.registerStart();
        String usageMsg = "Usage: OBOReader OBOFileSource OBOFileName";
        String warnMsg = "WARNING: Missing command line args, using defaults";
        if (args == null || args.length < 1) {
            log.println(warnMsg);
            log.println(usageMsg);
            System.out.println(warnMsg);
            System.out.println(usageMsg);
            args = DefParams.getCommandLineArgs(OBOReader.class.getName());
        }
        try {
            OBOReader oboReader = new OBOReader (args [0], args [1]);
            oboReader.dumpAll ();
        } catch (Throwable e) {
            e.printStackTrace();
            e.printStackTrace(log);
        } finally {
            log.println(GenUtil.getExecTimeStr());
            log.flush();
            log.close();
        }
    }
}

