/*
 * Decompiled with CFR 0.152.
 */
package AceJet;

import AceJet.Ace;
import AceJet.AceDocument;
import AceJet.AceEntity;
import AceJet.AceEntityMention;
import AceJet.AceEntityName;
import AceJet.AceTimex;
import AceJet.AceTimexMention;
import AceJet.Gazetteer;
import Jet.JetTest;
import Jet.Lisp.FeatureSet;
import Jet.Refres.Resolve;
import Jet.Tipster.Annotation;
import Jet.Tipster.Document;
import Jet.Tipster.ExternalDocument;
import Jet.Tipster.Span;
import Jet.Zoner.SentenceSplitter;
import Jet.Zoner.SpecialZoner;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Set;
import java.util.TreeSet;
import java.util.Vector;

public class APFtoXML {
    static String fileList;
    static String ACEdir;
    static String outputDir;
    static String year;
    static String apfExtension;
    static String outputExtension;
    static Set<String> flags;
    static int docCount;
    static Gazetteer gazetteer;
    static TreeSet<String> unknownPre;
    static HashMap<String, String> preDict;

    public static void main(String[] stringArray) throws IOException {
        APFtoXML.init(stringArray);
        APFtoXML.processFileList(fileList);
        if (year.equals("2004")) {
            System.out.println("\nUnclassified items:  " + unknownPre.size());
            for (String string : unknownPre) {
                System.out.println(string);
            }
        }
    }

    public static void init(String[] stringArray) throws IOException {
        if (stringArray.length == 0) {
            APFtoXML.argErr();
        }
        JetTest.encoding = "UTF-8";
        year = stringArray[0];
        AceDocument.ace2004 = false;
        AceDocument.ace2005 = false;
        int n = 6;
        if (!year.equals("2002") && !year.equals("2003")) {
            if (year.equals("2004")) {
                n = 8;
                String string = stringArray[6];
                String string2 = stringArray[7];
                gazetteer = new Gazetteer();
                gazetteer.load(string);
                APFtoXML.loadPreDict(string2);
                AceDocument.ace2004 = true;
            } else if (year.equals("2005")) {
                AceDocument.ace2004 = true;
                AceDocument.ace2005 = true;
            } else {
                System.err.println("Invalid year:  must be 2002-2005");
                System.exit(1);
            }
        }
        if (stringArray.length <= n) {
            APFtoXML.argErr();
        }
        if (!(ACEdir = stringArray[1]).endsWith("/")) {
            ACEdir = ACEdir + "/";
        }
        if (!(outputDir = stringArray[2]).endsWith("/")) {
            outputDir = outputDir + "/";
        }
        fileList = stringArray[3];
        apfExtension = stringArray[4];
        outputExtension = stringArray[5];
        for (int i = n; i < stringArray.length; ++i) {
            APFtoXML.setFlag(stringArray[i]);
        }
    }

    public static void setFlag(String string) {
        if (string.equals("sentences") || string.equals("timex") || string.equals("mentions") || string.equals("extents") || string.equals("types") || string.equals("names")) {
            flags.add(string);
        } else {
            System.err.println("APFtoXML:  invalid flag");
            System.err.println("possible flags:  sentences timex mentions extents types names");
            System.exit(1);
        }
    }

    public static void clearFlags() {
        flags.clear();
    }

    private static void argErr() {
        System.err.println("APFtoXML arguments:");
        System.err.println("  year apf-directory  output-directory  filelist apf-extension output-extension [gazetteer pre-dictionary] flag ...");
        System.err.println("gazetteer and pre-dictionary needed for year = 2004");
        System.err.println("possible flags:  sentences timex mentions extents types names");
        System.exit(1);
    }

    private static void loadPreDict(String string) {
        try {
            String string2;
            BufferedReader bufferedReader = new BufferedReader(new FileReader(string));
            while ((string2 = bufferedReader.readLine()) != null) {
                String string3 = string2.substring(0, 1);
                String string4 = string2.substring(2);
                preDict.put(string4, string3);
            }
        }
        catch (IOException iOException) {
            System.err.print("Unable to load dictionary due to exception: ");
            System.err.println(iOException);
        }
    }

    private static void processFileList(String string) throws IOException {
        String string2;
        BufferedReader bufferedReader = new BufferedReader(new FileReader(string));
        while ((string2 = bufferedReader.readLine()) != null) {
            APFtoXML.processFileAndCatchError(string2);
        }
    }

    public static void processFileAndCatchError(String string) {
        try {
            APFtoXML.processFile(string);
        }
        catch (Exception exception) {
            System.err.println("Error: " + exception.toString());
            exception.printStackTrace();
        }
    }

    public static void processFile(String string) {
        System.out.println("\nProcessing document " + ++docCount + ": " + string);
        String string2 = ACEdir + string + ".sgm";
        ExternalDocument externalDocument = new ExternalDocument("sgml", string2);
        externalDocument.setAllTags(true);
        if (year.equals("2003") || year.equals("2004")) {
            externalDocument.setEmptyTags(new String[]{"TURN"});
        }
        externalDocument.open();
        String string3 = ACEdir + string + "." + apfExtension;
        AceDocument aceDocument = new AceDocument(string2, string3);
        APFtoXML.addAnnotations(externalDocument, aceDocument);
        externalDocument.setSGMLwrapMargin(0);
        externalDocument.saveAs(outputDir, string + "." + outputExtension);
    }

    public static String processDocument(Document document, AceDocument aceDocument) {
        APFtoXML.addAnnotations(document, aceDocument);
        return document.writeSGML(null).toString();
    }

    public static void addAnnotations(Document document, AceDocument aceDocument) {
        boolean bl = Ace.allLowerCase(document);
        if (year.equals("2004")) {
            gazetteer.setMonocase(bl);
        }
        if (flags.contains("sentences")) {
            APFtoXML.addSentences(document);
        }
        if (flags.contains("timex")) {
            APFtoXML.addTimexTags(document, aceDocument);
        }
        if (flags.contains("mentions")) {
            APFtoXML.addMentionTags(document, aceDocument);
        }
        if (flags.contains("names")) {
            APFtoXML.addENAMEXtags(document, aceDocument);
        }
    }

    static void addSentences(Document document) {
        SpecialZoner.findSpecialZones(document);
        Vector<Annotation> vector = document.annotationsOfType("TEXT");
        if (vector == null) {
            System.out.println("No <TEXT> in document");
            return;
        }
        for (Annotation annotation : vector) {
            Span span = annotation.span();
            Ace.monocase = Ace.allLowerCase(document);
            SentenceSplitter.split(document, span);
        }
        Vector<Annotation> vector2 = document.annotationsOfType("sentence");
        if (vector2 != null) {
            int n = 0;
            for (Annotation annotation : vector2) {
                annotation.put("ID", "SENT-" + ++n);
            }
        }
        document.removeAnnotationsOfType("dateline");
        document.removeAnnotationsOfType("textBreak");
        document.shrink("sentence");
    }

    static void addTimexTags(Document document, AceDocument aceDocument) {
        ArrayList<AceTimex> arrayList = aceDocument.timeExpressions;
        for (AceTimex aceTimex : arrayList) {
            AceTimexMention aceTimexMention = (AceTimexMention)aceTimex.mentions.get(0);
            Span span = aceTimexMention.extent;
            Span span2 = new Span(span.start(), span.end() + 1);
            FeatureSet featureSet = new FeatureSet();
            if (aceTimex.val != null && !aceTimex.val.equals("")) {
                featureSet.put("val", aceTimex.val);
            }
            if (aceTimex.anchorVal != null && !aceTimex.anchorVal.equals("")) {
                featureSet.put("anchor_val", aceTimex.anchorVal);
            }
            if (aceTimex.anchorDir != null && !aceTimex.anchorDir.equals("")) {
                featureSet.put("anchor_dir", aceTimex.anchorDir);
            }
            if (aceTimex.set != null && !aceTimex.set.equals("")) {
                featureSet.put("set", aceTimex.set);
            }
            if (aceTimex.mod != null && !aceTimex.mod.equals("")) {
                featureSet.put("mod", aceTimex.mod);
            }
            document.annotate("timex2", span2, featureSet);
        }
    }

    static void addENAMEXtags(Document document, AceDocument aceDocument) {
        ArrayList<AceEntity> arrayList = aceDocument.entities;
        for (int i = 0; i < arrayList.size(); ++i) {
            Object object;
            Comparable comparable;
            AceEntity aceEntity = arrayList.get(i);
            ArrayList arrayList2 = aceEntity.names;
            for (int j = 0; j < arrayList2.size(); ++j) {
                AceEntityName aceEntityName = (AceEntityName)arrayList2.get(j);
                comparable = aceEntityName.extent;
                object = new Span(comparable.start(), comparable.end() + 1);
                document.annotate("ENAMEX", (Span)object, new FeatureSet("TYPE", aceEntity.type));
            }
            if (!year.equals("2004")) continue;
            ArrayList arrayList3 = aceEntity.mentions;
            for (int j = 0; j < arrayList3.size(); ++j) {
                comparable = (AceEntityMention)arrayList3.get(j);
                object = Resolve.normalizeName(((AceEntityMention)comparable).headText);
                String[] stringArray = Gazetteer.splitAtWS((String)object);
                String string = preDict.get(((String)object).toLowerCase());
                if (!((AceEntityMention)comparable).type.equals("PRE")) continue;
                if (gazetteer.isNationality(stringArray) || gazetteer.isLocation(stringArray) || "N".equals(string)) {
                    Span span = ((AceEntityMention)comparable).head;
                    Span span2 = new Span(span.start(), span.end() + 1);
                    document.annotate("ENAMEX", span2, new FeatureSet("TYPE", aceEntity.type));
                    continue;
                }
                if (string != null) continue;
                System.out.println("Unclassified PRE: " + ((AceEntityMention)comparable).text + " {" + ((AceEntityMention)comparable).headText + ")");
                unknownPre.add(((String)object).toLowerCase());
            }
        }
    }

    static void addMentionTags(Document document, AceDocument aceDocument) {
        ArrayList<AceEntity> arrayList = aceDocument.entities;
        for (int i = 0; i < arrayList.size(); ++i) {
            AceEntity aceEntity = arrayList.get(i);
            ArrayList arrayList2 = aceEntity.mentions;
            for (int j = 0; j < arrayList2.size(); ++j) {
                AceEntityMention aceEntityMention = (AceEntityMention)arrayList2.get(j);
                Span span = aceEntityMention.head;
                if (span.start() < 0) continue;
                Span span2 = new Span(span.start(), span.end() + 1);
                FeatureSet featureSet = new FeatureSet("entity", new Integer(i));
                if (flags.contains("types")) {
                    featureSet.put("type", aceEntity.type.substring(0, 3));
                    if (aceEntity.subtype != null) {
                        featureSet.put("subtype", aceEntity.subtype);
                    }
                }
                if (flags.contains("extents")) {
                    String string = aceEntityMention.text.replaceAll("\n", " ");
                    featureSet.put("extent", AceEntityMention.addXmlEscapes(string));
                }
                document.annotate("mention", span2, featureSet);
            }
        }
    }

    static {
        year = "2005";
        flags = new HashSet<String>();
        docCount = 0;
        unknownPre = new TreeSet();
        preDict = new HashMap();
    }
}

