001 package org.maltparser.core.propagation.spec;
002
003 import java.io.IOException;
004 import java.net.URL;
005
006 import javax.xml.parsers.DocumentBuilder;
007 import javax.xml.parsers.DocumentBuilderFactory;
008 import javax.xml.parsers.ParserConfigurationException;
009
010 import org.maltparser.core.exception.MaltChainedException;
011 import org.maltparser.core.propagation.PropagationException;
012 import org.w3c.dom.Element;
013 import org.w3c.dom.NodeList;
014 import org.xml.sax.SAXException;
015
016 /**
017 * @author Johan Hall
018 *
019 */
020 public class PropagationSpecsReader {
021 public PropagationSpecsReader() { }
022
023 public void load(URL url, PropagationSpecs propagationSpecs) throws MaltChainedException {
024 try {
025 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
026 DocumentBuilder db = dbf.newDocumentBuilder();
027 Element root = null;
028
029 root = db.parse(url.openStream()).getDocumentElement();
030
031 if (root == null) {
032 throw new PropagationException("The propagation specification file '"+url.getFile()+"' cannot be found. ");
033 }
034
035 readPropagationSpecs(root, propagationSpecs);
036 } catch (IOException e) {
037 throw new PropagationException("The propagation specification file '"+url.getFile()+"' cannot be found. ", e);
038 } catch (ParserConfigurationException e) {
039 throw new PropagationException("Problem parsing the file "+url.getFile()+". ", e);
040 } catch (SAXException e) {
041 throw new PropagationException("Problem parsing the file "+url.getFile()+". ", e);
042 }
043 }
044
045 private void readPropagationSpecs(Element propagations, PropagationSpecs propagationSpecs) throws MaltChainedException {
046 NodeList propagationList = propagations.getElementsByTagName("propagation");
047 for (int i = 0; i < propagationList.getLength(); i++) {
048 readPropagationSpec((Element)propagationList.item(i), propagationSpecs);
049 }
050 }
051
052 private void readPropagationSpec(Element propagation, PropagationSpecs propagationSpecs) throws MaltChainedException {
053 int nFrom = propagation.getElementsByTagName("from").getLength();
054 if (nFrom < 1 && nFrom > 1) {
055 throw new PropagationException("Propagation specification wrongly formatted: Number of 'from' elements is '"+nFrom+"', must be 1.");
056 }
057
058 int nTo = propagation.getElementsByTagName("to").getLength();
059 if (nTo < 1 && nTo > 1) {
060 throw new PropagationException("Propagation specification wrongly formatted: Number of 'to' elements is '"+nTo+"', must be 1.");
061 }
062
063 int nFor = propagation.getElementsByTagName("for").getLength();
064 if (nFor > 1) {
065 throw new PropagationException("Propagation specification wrongly formatted: Number of 'for' elements is '"+nFor+"', at most 1.");
066 }
067
068 int nOver = propagation.getElementsByTagName("over").getLength();
069 if (nOver > 1) {
070 throw new PropagationException("Propagation specification wrongly formatted: Number of 'over' elements is '"+nOver+"',at most 1.");
071 }
072 String fromText = ((Element)propagation.getElementsByTagName("from").item(0)).getTextContent().trim();
073 if (fromText.length() == 0) {
074 throw new PropagationException("Propagation specification wrongly formatted: The 'from' element is empty");
075 }
076 String toText = ((Element)propagation.getElementsByTagName("to").item(0)).getTextContent().trim();
077 if (toText.length() == 0) {
078 throw new PropagationException("Propagation specification wrongly formatted: The 'to' element is empty");
079 }
080 String forText = "";
081 if (nFor != 0) {
082 forText = ((Element)propagation.getElementsByTagName("for").item(0)).getTextContent().trim();
083 }
084 String overText = "";
085 if (nOver != 0) {
086 overText = ((Element)propagation.getElementsByTagName("over").item(0)).getTextContent().trim();
087 }
088 propagationSpecs.add(new PropagationSpec(fromText, toText, forText, overText));
089 }
090 }