/*
 * Decompiled with CFR 0.152.
 */
package tratz.pos.train;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.List;
import java.util.Set;
import java.util.zip.GZIPOutputStream;
import tratz.cmdline.CommandLineOptions;
import tratz.cmdline.CommandLineOptionsParser;
import tratz.cmdline.ParsedCommandLine;
import tratz.jwni.WordNet;
import tratz.parse.io.SentenceReader;
import tratz.parse.types.Parse;
import tratz.parse.types.Token;
import tratz.pos.featgen.PosFeatureGenerator;

public class PosTagTrainingDataGenerator {
    public static final String OPT_INPUT_FILES = "input";
    public static final String OPT_SENTENCE_READER = "sentencereader";
    public static final String OPT_OUTPUT_FILE = "output";
    public static final String OPT_FEAT_GEN = "featuregenerator";
    public static final String OPT_WN_DIR = "wndir";

    private static CommandLineOptions createOptions() {
        CommandLineOptions options = new CommandLineOptions();
        options.addOption(OPT_INPUT_FILES, "file(s)", "the input files for training");
        options.addOption(OPT_SENTENCE_READER, "classname", "name of the sentence reader class (must implement " + SentenceReader.class.getName() + ")");
        options.addOption(OPT_OUTPUT_FILE, "file", "the output file");
        options.addOption(OPT_FEAT_GEN, "classname", "name of the feature generation class (must implement " + PosFeatureGenerator.class.getName() + ")");
        options.addOption(OPT_WN_DIR, "file", "the dictionary (dict) directory of WordNet");
        return options;
    }

    public static void main(String[] args) throws Exception {
        ParsedCommandLine cmdLine = new CommandLineOptionsParser().parseOptions(PosTagTrainingDataGenerator.createOptions(), args);
        String dataFiles = cmdLine.getStringValue(OPT_INPUT_FILES);
        String sentenceReaderClass = cmdLine.getStringValue(OPT_SENTENCE_READER);
        String outputFile = cmdLine.getStringValue(OPT_OUTPUT_FILE);
        String featureGeneratorClass = cmdLine.getStringValue(OPT_FEAT_GEN);
        String wordNetDir = cmdLine.getStringValue(OPT_WN_DIR);
        new WordNet(new File(wordNetDir));
        System.err.println("Input files: " + dataFiles);
        PosFeatureGenerator featGen = (PosFeatureGenerator)Class.forName(featureGeneratorClass).newInstance();
        SentenceReader sentenceReader = (SentenceReader)Class.forName(sentenceReaderClass).newInstance();
        PrintWriter writer = new PrintWriter(new OutputStreamWriter(new GZIPOutputStream(new FileOutputStream(outputFile))));
        for (String dataFile : dataFiles.split(File.pathSeparator)) {
            System.err.println("Reading from: " + dataFile);
            BufferedReader reader = new BufferedReader(new FileReader(dataFile));
            int snum = 0;
            Parse parse = null;
            while ((parse = sentenceReader.readSentence(reader)) != null) {
                List<Token> tokens = parse.getSentence().getTokens();
                int numTokens = tokens.size();
                for (int i = 0; i < numTokens; ++i) {
                    Token t = tokens.get(i);
                    Set<String> feats = featGen.getFeats(tokens, i);
                    writer.print("-1");
                    writer.print('\u0018');
                    writer.print(t.getPos());
                    writer.print('\u0018');
                    for (String feat : feats) {
                        writer.print(feat);
                        writer.print('\u0018');
                    }
                    writer.println();
                }
                if (snum % 100 == 0) {
                    System.err.println(snum);
                }
                ++snum;
            }
            reader.close();
        }
        writer.close();
    }
}

