/*
 * Decompiled with CFR 0.152.
 */
package de.dfki.lt.tools.tokenizer.output;

import de.dfki.lt.tools.tokenizer.FileTools;
import de.dfki.lt.tools.tokenizer.JTok;
import de.dfki.lt.tools.tokenizer.annotate.AnnotatedString;
import de.dfki.lt.tools.tokenizer.exceptions.ProcessingException;
import de.dfki.lt.tools.tokenizer.output.Token;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

public class XMLOutputter {
    private static final Logger LOG = LoggerFactory.getLogger(XMLOutputter.class);
    public static final String XML_DOCUMENT = "Document";
    public static final String XML_PARAGRAPH = "p";
    public static final String XML_TEXT_UNIT = "tu";
    public static final String ID_ATT = "id";
    public static final String XML_TOKEN = "Token";
    public static final String IMAGE_ATT = "string";
    public static final String PTB_ATT = "ptb";
    public static final String TOK_TYPE_ATT = "type";
    public static final String OFFSET_ATT = "offset";
    public static final String LENGTH_ATT = "length";

    public static Document createXMLDocument(AnnotatedString input) {
        Document doc = null;
        try {
            DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
            DocumentBuilder builder = factory.newDocumentBuilder();
            doc = builder.newDocument();
        }
        catch (ParserConfigurationException pce) {
            throw new ProcessingException(pce.getMessage());
        }
        Element root = doc.createElement(XML_DOCUMENT);
        doc.appendChild(root);
        int tuId = 0;
        Element p = doc.createElement(XML_PARAGRAPH);
        Element tu = doc.createElement(XML_TEXT_UNIT);
        tu.setAttribute(ID_ATT, tuId + "");
        char c = input.setIndex(0);
        while (c != '\uffff') {
            int tokenStart = input.getRunStart("class");
            int tokenEnd = input.getRunLimit("class");
            if (null != input.getAnnotation("class")) {
                String type = (String)input.getAnnotation("class");
                if (null == type) {
                    throw new ProcessingException("undefined class " + input.getAnnotation("class"));
                }
                Element xmlToken = doc.createElement(XML_TOKEN);
                String image = input.substring(tokenStart, tokenEnd);
                xmlToken.setAttribute(IMAGE_ATT, image);
                String ptbImage = Token.applyPtbFormat(image, type);
                if (null != ptbImage) {
                    xmlToken.setAttribute(PTB_ATT, ptbImage);
                }
                xmlToken.setAttribute(TOK_TYPE_ATT, type);
                xmlToken.setAttribute(OFFSET_ATT, tokenStart + "");
                xmlToken.setAttribute(LENGTH_ATT, image.length() + "");
                if (null != input.getAnnotation("border") && tu.hasChildNodes()) {
                    p.appendChild(tu);
                    tu = doc.createElement(XML_TEXT_UNIT);
                    tu.setAttribute(ID_ATT, ++tuId + "");
                }
                if (input.getAnnotation("border") == XML_PARAGRAPH && p.hasChildNodes()) {
                    root.appendChild(p);
                    p = doc.createElement(XML_PARAGRAPH);
                }
                tu.appendChild(xmlToken);
            }
            c = input.setIndex(tokenEnd);
        }
        if (tu.hasChildNodes()) {
            p.appendChild(tu);
        }
        if (p.hasChildNodes()) {
            root.appendChild(p);
        }
        return doc;
    }

    public static void createXMLFile(AnnotatedString input, String anEncoding, String aFileName) {
        Document doc = XMLOutputter.createXMLDocument(input);
        try {
            OutputStreamWriter out = new OutputStreamWriter((OutputStream)new FileOutputStream(aFileName), anEncoding);
            Transformer transformer = TransformerFactory.newInstance().newTransformer();
            transformer.setOutputProperty("indent", "yes");
            transformer.setOutputProperty("encoding", anEncoding);
            DOMSource source = new DOMSource(doc);
            StreamResult result = new StreamResult(out);
            transformer.transform(source, result);
            ((Writer)out).close();
        }
        catch (TransformerException te) {
            throw new ProcessingException(te.getMessage());
        }
        catch (IOException ioe) {
            throw new ProcessingException(ioe.getMessage());
        }
    }

    public static String createXMLString(AnnotatedString input) {
        Document doc = XMLOutputter.createXMLDocument(input);
        StringWriter out = new StringWriter();
        try {
            Transformer transformer = TransformerFactory.newInstance().newTransformer();
            transformer.setOutputProperty("indent", "yes");
            DOMSource source = new DOMSource(doc);
            StreamResult result = new StreamResult(out);
            transformer.transform(source, result);
        }
        catch (TransformerException te) {
            throw new ProcessingException(te.getMessage());
        }
        return out.toString();
    }

    public static void main(String[] args) {
        if (args.length != 2 && args.length != 3) {
            System.out.println("This method needs two arguments:\n- a file name for the document to tokenize\n- the language of the document\n- an optional encoding to use (default is ISO-8859-1)\nSupported languages are: de, en, it");
            System.exit(1);
        }
        String encoding = "ISO-8859-1";
        if (args.length == 3) {
            encoding = args[2];
        }
        String text = null;
        try {
            text = FileTools.readFileAsString(new File(args[0]), encoding);
        }
        catch (IOException ioe) {
            System.err.println(ioe.toString());
            System.exit(1);
        }
        try {
            JTok testTok = new JTok();
            AnnotatedString result = testTok.tokenize(text, args[1]);
            System.out.println(XMLOutputter.createXMLString(result));
        }
        catch (IOException e) {
            LOG.error(e.getLocalizedMessage(), (Throwable)e);
        }
    }
}

