/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.cmdline.tokenizer;

import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import opennlp.tools.cmdline.AbstractTrainerTool;
import opennlp.tools.cmdline.CmdLineUtil;
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.params.TrainingToolParams;
import opennlp.tools.cmdline.tokenizer.TrainingParams;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.ml.TrainerFactory;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenizerFactory;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.model.ModelUtil;

public final class TokenizerTrainerTool
extends AbstractTrainerTool<TokenSample, TrainerToolParams> {
    public TokenizerTrainerTool() {
        super(TokenSample.class, TrainerToolParams.class);
    }

    @Override
    public String getShortDescription() {
        return "trainer for the learnable tokenizer";
    }

    static Dictionary loadDict(File f) throws IOException {
        Dictionary dict = null;
        if (f != null) {
            CmdLineUtil.checkInputFile("abb dict", f);
            dict = new Dictionary(new FileInputStream(f));
        }
        return dict;
    }

    @Override
    public void run(String format, String[] args) {
        TokenizerModel model;
        super.run(format, args);
        this.mlParams = CmdLineUtil.loadTrainingParameters(((TrainerToolParams)this.params).getParams(), false);
        if (this.mlParams != null) {
            if (!TrainerFactory.isValid(this.mlParams)) {
                throw new TerminateToolException(1, "Training parameters file '" + ((TrainerToolParams)this.params).getParams() + "' is invalid!");
            }
            if (!TrainerFactory.TrainerType.EVENT_MODEL_TRAINER.equals((Object)TrainerFactory.getTrainerType(this.mlParams))) {
                throw new TerminateToolException(1, "Sequence training is not supported!");
            }
        }
        if (this.mlParams == null) {
            this.mlParams = ModelUtil.createDefaultTrainingParameters();
        }
        File modelOutFile = ((TrainerToolParams)this.params).getModel();
        CmdLineUtil.checkOutputFile("tokenizer model", modelOutFile);
        try {
            Dictionary dict = TokenizerTrainerTool.loadDict(((TrainerToolParams)this.params).getAbbDict());
            TokenizerFactory tokFactory = TokenizerFactory.create(((TrainerToolParams)this.params).getFactory(), ((TrainerToolParams)this.params).getLang(), dict, ((TrainerToolParams)this.params).getAlphaNumOpt(), null);
            model = TokenizerME.train(this.sampleStream, tokFactory, this.mlParams);
        }
        catch (IOException e) {
            throw this.createTerminationIOException(e);
        }
        finally {
            try {
                this.sampleStream.close();
            }
            catch (IOException iOException) {}
        }
        CmdLineUtil.writeModel("tokenizer", modelOutFile, model);
    }

    static interface TrainerToolParams
    extends TrainingParams,
    TrainingToolParams {
    }
}

