001 package org.maltparser.core.syntaxgraph.writer;
002
003 import java.io.BufferedWriter;
004 import java.io.FileNotFoundException;
005 import java.io.FileOutputStream;
006 import java.io.IOException;
007 import java.io.OutputStream;
008 import java.io.OutputStreamWriter;
009 import java.io.UnsupportedEncodingException;
010 import java.util.Iterator;
011
012 import org.maltparser.core.exception.MaltChainedException;
013 import org.maltparser.core.io.dataformat.ColumnDescription;
014 import org.maltparser.core.io.dataformat.DataFormatException;
015 import org.maltparser.core.io.dataformat.DataFormatInstance;
016 import org.maltparser.core.syntaxgraph.DependencyStructure;
017 import org.maltparser.core.syntaxgraph.TokenStructure;
018 import org.maltparser.core.syntaxgraph.node.TokenNode;
019 /**
020 *
021 *
022 * @author Johan Hall
023 */
024 public class TabWriter implements SyntaxGraphWriter {
025 private BufferedWriter writer;
026 private DataFormatInstance dataFormatInstance;
027 private final StringBuilder output;
028 private boolean closeStream = true;
029 // private String ID = "ID";
030 // private String IGNORE_COLUMN_SIGN = "_";
031 private final char TAB = '\t';
032 private final char NEWLINE = '\n';
033
034
035 public TabWriter() {
036 output = new StringBuilder();
037 }
038
039 public void open(String fileName, String charsetName) throws MaltChainedException {
040 try {
041 open(new OutputStreamWriter(new FileOutputStream(fileName),charsetName));
042 } catch (FileNotFoundException e) {
043 throw new DataFormatException("The output file '"+fileName+"' cannot be found.", e);
044 } catch (UnsupportedEncodingException e) {
045 throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
046 }
047 }
048
049 public void open(OutputStream os, String charsetName) throws MaltChainedException {
050 try {
051 if (os == System.out || os == System.err) {
052 closeStream = false;
053 }
054 open(new OutputStreamWriter(os, charsetName));
055 } catch (UnsupportedEncodingException e) {
056 throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
057 }
058 }
059
060 private void open(OutputStreamWriter osw) throws MaltChainedException {
061 setWriter(new BufferedWriter(osw));
062 }
063
064 public void writeProlog() throws MaltChainedException {
065
066 }
067
068 public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException {
069 if (syntaxGraph == null || dataFormatInstance == null || !syntaxGraph.hasTokens()) {
070 return;
071 }
072 Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
073
074 for (int i : syntaxGraph.getTokenIndices()) {
075 try {
076 ColumnDescription column = null;
077 while (columns.hasNext()) {
078 column = columns.next();
079
080 if (column.getCategory() == ColumnDescription.INPUT && column.getType() != ColumnDescription.IGNORE) {
081 TokenNode node = syntaxGraph.getTokenNode(i);
082 if (!column.getName().equals("ID")) {
083 if (node.hasLabel(column.getSymbolTable())) {
084 output.append(node.getLabelSymbol(column.getSymbolTable()));
085 if (output.length() != 0) {
086 writer.write(output.toString());
087 } else {
088 writer.write('_');
089 }
090 } else {
091 writer.write('_');
092 }
093 } else {
094 writer.write(Integer.toString(i));
095 }
096 } else if (column.getCategory() == ColumnDescription.HEAD && column.getType() != ColumnDescription.IGNORE && syntaxGraph instanceof DependencyStructure) {
097 if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead()) {
098 writer.write(Integer.toString(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHead().getIndex()));
099 } else {
100 writer.write(Integer.toString(0));
101 }
102
103 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE && syntaxGraph instanceof DependencyStructure) {
104 if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead() && ((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHeadEdgeLabel(column.getSymbolTable())) {
105 output.append(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHeadEdgeLabelSymbol(column.getSymbolTable()));
106 } else {
107 output.append(((DependencyStructure)syntaxGraph).getDefaultRootEdgeLabelSymbol(column.getSymbolTable()));
108 }
109
110 if (output.length() != 0) {
111 writer.write(output.toString());
112 }
113 } else {
114 writer.write(column.getDefaultOutput());
115 }
116 if (columns.hasNext()) {
117 writer.write(TAB);
118 }
119 output.setLength(0);
120 }
121 writer.write(NEWLINE);
122 columns = dataFormatInstance.iterator();
123 } catch (IOException e) {
124 close();
125 throw new DataFormatException("Could not write to the output file. ", e);
126 }
127 }
128
129 try {
130 writer.write('\n');
131 writer.flush();
132 } catch (IOException e) {
133 close();
134 throw new DataFormatException("Could not write to the output file. ", e);
135 }
136 }
137
138 public void writeEpilog() throws MaltChainedException {
139
140 }
141
142 public BufferedWriter getWriter() {
143 return writer;
144 }
145
146 public void setWriter(BufferedWriter writer) throws MaltChainedException {
147 close();
148 this.writer = writer;
149 }
150
151 public DataFormatInstance getDataFormatInstance() {
152 return dataFormatInstance;
153 }
154
155 public void setDataFormatInstance(DataFormatInstance dataFormatInstance) {
156 this.dataFormatInstance = dataFormatInstance;
157 }
158
159 public String getOptions() {
160 return null;
161 }
162
163 public void setOptions(String optionString) throws MaltChainedException {
164
165 }
166
167 public void close() throws MaltChainedException {
168 try {
169 if (writer != null) {
170 writer.flush();
171 if (closeStream) {
172 writer.close();
173 }
174 writer = null;
175 }
176 } catch (IOException e) {
177 throw new DataFormatException("Could not close the output file. ", e);
178 }
179
180 }
181 }