001 package org.maltparser.core.syntaxgraph.reader;
002
003 import java.text.SimpleDateFormat;
004 import java.util.Date;
005 import java.util.LinkedHashMap;
006 import java.util.SortedMap;
007 import java.util.TreeMap;
008
009 import org.maltparser.core.helper.Util;
010 import org.maltparser.core.symbol.SymbolTable;
011 import org.maltparser.core.symbol.SymbolTableHandler;
012 /**
013 *
014 *
015 * @author Johan Hall
016 */
017 public class TigerXMLHeader {
018 public enum Domain {
019 T, // feature for terminal nodes
020 NT, // feature for nonterminal nodes
021 FREC, //feature for both
022 EL, // edge label (same as "edgelabel" in TigerXML schema)
023 SEL // secondary edge Label (same as "secedgelabel" in TigerXML schema)
024 };
025
026 private String corpusID;
027 private String corpusVersion;
028 private String external;
029 private String metaName;
030 private String metaAuthor;
031 private String metaDescription;
032 private String metaInDate;
033 private String metaFormat;
034 private String metaHistory;
035 private SymbolTableHandler symbolTableHandler;
036 private FeatureEdgeLabel edgeLabels;
037 private FeatureEdgeLabel secEdgeLabels;
038 private LinkedHashMap<String,FeatureEdgeLabel> features;
039
040 public TigerXMLHeader(SymbolTableHandler symbolTableHandler) {
041 setSymbolTableHandler(symbolTableHandler);
042 features = new LinkedHashMap<String,FeatureEdgeLabel>();
043 }
044
045 public boolean isTigerXMLWritable() {
046 return true;
047 //return features.size() > 0;
048 }
049
050 public void addFeature(String featureName, String domainName) {
051 if (!features.containsKey(featureName)) {
052 features.put(featureName, new FeatureEdgeLabel(featureName, domainName));
053 }
054 }
055
056 public void addFeatureValue(String featureName, String name) {
057 addFeatureValue(featureName, name, "\t");
058 }
059
060 public void addFeatureValue(String featureName, String name, String desc) {
061 if (features.containsKey(featureName)) {
062 if (desc == null || desc.length() == 0) {
063 features.get(featureName).addValue(name, "\t");
064 } else {
065 features.get(featureName).addValue(name, desc);
066 }
067 }
068 }
069
070 public void addEdgeLabelValue(String name) {
071 addEdgeLabelValue(name, "\t");
072 }
073
074 public void addEdgeLabelValue(String name, String desc) {
075 if (edgeLabels == null) {
076 edgeLabels = new FeatureEdgeLabel("edgelabel", Domain.EL);
077 }
078 if (desc == null || desc.length() == 0) {
079 edgeLabels.addValue(name, "\t");
080 } else {
081 edgeLabels.addValue(name, desc);
082 }
083 }
084
085 public void addSecEdgeLabelValue(String name) {
086 addSecEdgeLabelValue(name, "\t");
087 }
088
089 public void addSecEdgeLabelValue(String name, String desc) {
090 if (secEdgeLabels == null) {
091 secEdgeLabels = new FeatureEdgeLabel("secedgelabel", Domain.SEL);
092 }
093 if (desc == null || desc.length() == 0) {
094 secEdgeLabels.addValue(name, "\t");
095 } else {
096 secEdgeLabels.addValue(name, desc);
097 }
098 }
099
100 public String getCorpusID() {
101 return corpusID;
102 }
103
104 public void setCorpusID(String corpusID) {
105 this.corpusID = corpusID;
106 }
107
108 public String getCorpusVersion() {
109 return corpusVersion;
110 }
111
112 public void setCorpusVersion(String corpusVersion) {
113 this.corpusVersion = corpusVersion;
114 }
115
116 public void setExternal(String external) {
117 this.external = external;
118 }
119
120 public String getExternal() {
121 return external;
122 }
123
124 public void setMeta(String metaElement, String value) {
125 if (metaElement.equals("name")) { setMetaName(value); }
126 if (metaElement.equals("author")) { setMetaAuthor(value); }
127 if (metaElement.equals("description")) { setMetaDescription(value); }
128 if (metaElement.equals("date")) { setMetaInDate(value); }
129 if (metaElement.equals("format")) { setMetaFormat(value); }
130 if (metaElement.equals("history")) { setMetaHistory(value); }
131 }
132
133 public String getMetaName() {
134 return metaName;
135 }
136
137 public void setMetaName(String metaName) {
138 this.metaName = metaName;
139 }
140
141 public String getMetaAuthor() {
142 return metaAuthor;
143 }
144
145 public void setMetaAuthor(String metaAuthor) {
146 this.metaAuthor = metaAuthor;
147 }
148
149 public String getMetaDescription() {
150 return metaDescription;
151 }
152
153 public void setMetaDescription(String metaDescription) {
154 this.metaDescription = metaDescription;
155 }
156
157 public String getMetaInDate() {
158 return metaInDate;
159 }
160
161 public String getMetaCurrentDate() {
162 return getMetaCurrentDate("yyyy-MM-dd HH:mm:ss");
163 }
164
165 public String getMetaCurrentDate(String format) {
166 return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date());
167 }
168
169 public void setMetaInDate(String metaInDate) {
170 this.metaInDate = metaInDate;
171 }
172
173 public String getMetaFormat() {
174 return metaFormat;
175 }
176
177 public void setMetaFormat(String metaFormat) {
178 this.metaFormat = metaFormat;
179 }
180
181 public String getMetaHistory() {
182 return metaHistory;
183 }
184
185 public void setMetaHistory(String metaHistory) {
186 this.metaHistory = metaHistory;
187 }
188
189 public SymbolTableHandler getSymbolTableHandler() {
190 return symbolTableHandler;
191 }
192
193 protected void setSymbolTableHandler(SymbolTableHandler symbolTableHandler) {
194 this.symbolTableHandler = symbolTableHandler;
195 }
196
197 public String toTigerXML() {
198 final StringBuilder sb = new StringBuilder();
199
200 if (getCorpusVersion() == null) {
201 sb.append("<corpus id=\"");
202 sb.append(((getCorpusID() == null)?"GeneratedByMaltParser":getCorpusID()));
203 sb.append("\">\n");
204 } else {
205 sb.append("<corpus id=\"");
206 sb.append(((getCorpusID() == null)?"GeneratedByMaltParser":getCorpusID()));
207 sb.append("\" version=\"");
208 sb.append(getCorpusVersion());
209 sb.append("\">\n");
210 }
211 sb.append(" <head>\n");
212 sb.append(" <meta>\n");
213 sb.append(" <name>");
214 sb.append(((getMetaName() == null)?"GeneratedByMaltParser":Util.xmlEscape(getMetaName())));
215 sb.append("</name>\n");
216 sb.append(" <author>MaltParser</author>\n");
217 sb.append(" <date>");
218 sb.append(getMetaCurrentDate());
219 sb.append("</date>\n");
220
221 sb.append(" <description>");
222 sb.append(Util.xmlEscape("Unfortunately, you have to add the annotations header data yourself. Maybe in later releases this will be fixed. "));
223 sb.append("</description>\n");
224
225 // if (getMetaDescription() != null) {
226 // sb.append(" <description>");
227 // sb.append(Util.xmlEscape(getMetaDescription()));
228 // sb.append("</description>\n");
229 // }
230 // if (getMetaFormat() != null) {
231 // sb.append(" <format>");
232 // sb.append(Util.xmlEscape(getMetaFormat()));
233 // sb.append("</format>\n");
234 // }
235 // if (getMetaHistory() != null) {
236 // sb.append(" <history>");
237 // sb.append(Util.xmlEscape(getMetaHistory()));
238 // sb.append("</history>\n");
239 // }
240 sb.append(" </meta>\n");
241 sb.append(" <annotation/>\n");
242 // sb.append(" <annotation>\n");
243 // for (String name : features.keySet()) {
244 // sb.append(features.get(name).toTigerXML());
245 // }
246 // if (edgeLabels != null) {
247 // sb.append(edgeLabels.toTigerXML());
248 // }
249 // if (secEdgeLabels != null) {
250 // sb.append(secEdgeLabels.toTigerXML());
251 // }
252 // sb.append(" </annotation>\n");
253 sb.append(" </head>\n");
254 sb.append(" <body>\n");
255 return sb.toString();
256 }
257
258 public String toString() {
259 return toTigerXML();
260 }
261
262 protected class FeatureEdgeLabel {
263 private String name;
264 private Domain domain;
265 // values: key mapped to \t (tab) indicates that the description part is missing
266 private SortedMap<String, String> values;
267 private SymbolTable table;
268
269 public FeatureEdgeLabel(String name, String domainName) {
270 setName(name);
271 setDomain(domainName);
272 }
273
274 public FeatureEdgeLabel(String name, Domain domain) {
275 setName(name);
276 setDomain(domain);
277 }
278
279 public String getName() {
280 return name;
281 }
282
283 public void setName(String name) {
284 this.name = name;
285 }
286
287 public void setDomain(String domainName) {
288 domain = Domain.valueOf(domainName);
289 }
290
291 public void setDomain(Domain domain) {
292 this.domain = domain;
293 }
294
295 public String getDomainName() {
296 return domain.toString();
297 }
298
299 public Domain getDomain() {
300 return domain;
301 }
302
303 public SymbolTable getTable() {
304 return table;
305 }
306
307 public void setTable(SymbolTable table) {
308 this.table = table;
309 }
310
311 public void addValue(String name) {
312 addValue(name, "\t");
313 }
314
315 public void addValue(String name, String desc) {
316 if (values == null) {
317 values = new TreeMap<String,String>();
318 }
319 values.put(name, desc);
320 }
321
322 public String toTigerXML() {
323 final StringBuilder sb = new StringBuilder();
324 if (domain == Domain.T || domain == Domain.FREC || domain == Domain.NT) {
325 sb.append(" <feature domain=\"");
326 sb.append(getDomainName());
327 sb.append("\" name=\"");
328 sb.append(getName());
329 sb.append((values == null)?"\" />\n":"\">\n");
330 }
331 if (domain == Domain.EL) {
332 sb.append((values != null)?" <edgelabel>\n":" <edgelabel />\n");
333 }
334 if (domain == Domain.SEL) {
335 sb.append((values != null)?" <secedgelabel>\n":" <secedgelabel />\n");
336 }
337 if (values != null) {
338 for (String name : values.keySet()) {
339 sb.append(" <value name=\"");
340 sb.append(name);
341 if (values.get(name).equals("\t")) {
342 sb.append("\" />\n");
343 } else {
344 sb.append("\">");
345 sb.append(Util.xmlEscape(values.get(name)));
346 sb.append("</value>\n");
347 }
348 }
349 }
350 if (domain == Domain.T || domain == Domain.FREC || domain == Domain.NT) {
351 if (values != null) {
352 sb.append(" </feature>\n");
353 }
354 }
355 if (domain == Domain.EL && values != null) {
356 sb.append(" </edgelabel>\n");
357 }
358 if (domain == Domain.SEL && values != null) {
359 sb.append(" </secedgelabel>\n");
360 }
361 return sb.toString();
362 }
363
364 public String toString() {
365 return toTigerXML();
366 }
367 }
368 }
369
370
371