package smart_gs.transcription_tool.tesseract;

import java.io.File;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

public class Hocr2LineSegXmlConverter {

	public static void convert(HocrDocument hocrDoc, String out, Boolean isWordDetectionEnabled) {
		try {
			DocumentBuilderFactory docFactory = DocumentBuilderFactory
					.newInstance();
			DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
			// root elements
			Document doc = docBuilder.newDocument();
			Element rootElement = doc.createElement("segmentInfo");
			doc.appendChild(rootElement);
			// source
			Element source = doc.createElement("source");
			source.appendChild(doc.createTextNode(hocrDoc.getFileName()));
			rootElement.appendChild(source);
			// line direction
			Element lineDirection = doc.createElement("lineDirection");
			lineDirection.appendChild(doc.createTextNode("Horizontal"));
			rootElement.appendChild(lineDirection);
			// segments
			Element segments = doc.createElement("segments");
			rootElement.appendChild(segments);
			// line
			for (HocrLine line : hocrDoc.getLines()) {
				int[][] vertexes = line.getBbox().toVertexes();
				Element lineElement = doc.createElement("line");
				lineElement.setAttribute("id", line.getId());
				segments.appendChild(lineElement);
				// vertexes				
				lineElement.appendChild(generateVertexesElement(doc,vertexes));	
				if (isWordDetectionEnabled) {
					// words
					Element wordsElement = doc.createElement("words");
					lineElement.appendChild(wordsElement);
					// word
					for (HocrWord word : line.getWords()) {
						Element wordElement = doc.createElement("word");
						wordElement.setAttribute("id", word.getId());
						wordElement.setTextContent(word.getText());
						wordsElement.appendChild(wordElement);
						// vertexes
						int[][] wordVertexes = word.getBbox().toVertexes();
						wordElement.appendChild(generateVertexesElement(doc,wordVertexes));
					}
				}
			}
			TransformerFactory transformerFactory = TransformerFactory
					.newInstance();
			Transformer transformer = transformerFactory.newTransformer();
			DOMSource src = new DOMSource(doc);

			StreamResult result = new StreamResult(new File(out));
			transformer.transform(src, result);
		} catch (TransformerConfigurationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (TransformerException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (ParserConfigurationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	private static Element generateVertexesElement(Document doc, int[][] vertexes) {
		Element vertexesElement = doc.createElement("vertexes");		
		for (int i = 0; i < 4; i++) {
			// vertex
			Element vertex = doc.createElement("vertex");
			vertexesElement.appendChild(vertex);
			// x
			Element x = doc.createElement("x");
			x.appendChild(doc.createTextNode(String
					.valueOf(vertexes[i][0])));
			vertex.appendChild(x);
			// y
			Element y = doc.createElement("y");
			y.appendChild(doc.createTextNode(String
					.valueOf(vertexes[i][1])));
			vertex.appendChild(y);
		}
		return vertexesElement;
	}

}
