5194 |
04 Dec 09 |
nicklas |
1 |
/** |
5194 |
04 Dec 09 |
nicklas |
$Id$ |
5194 |
04 Dec 09 |
nicklas |
3 |
|
5194 |
04 Dec 09 |
nicklas |
Copyright (C) 2009 Nicklas Nordborg |
5194 |
04 Dec 09 |
nicklas |
5 |
|
5194 |
04 Dec 09 |
nicklas |
This file is part of BASE - BioArray Software Environment. |
5194 |
04 Dec 09 |
nicklas |
Available at http://base.thep.lu.se/ |
5194 |
04 Dec 09 |
nicklas |
8 |
|
5194 |
04 Dec 09 |
nicklas |
BASE is free software; you can redistribute it and/or |
5194 |
04 Dec 09 |
nicklas |
modify it under the terms of the GNU General Public License |
5194 |
04 Dec 09 |
nicklas |
as published by the Free Software Foundation; either version 3 |
5194 |
04 Dec 09 |
nicklas |
of the License, or (at your option) any later version. |
5194 |
04 Dec 09 |
nicklas |
13 |
|
5194 |
04 Dec 09 |
nicklas |
BASE is distributed in the hope that it will be useful, |
5194 |
04 Dec 09 |
nicklas |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
5194 |
04 Dec 09 |
nicklas |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
5194 |
04 Dec 09 |
nicklas |
GNU General Public License for more details. |
5194 |
04 Dec 09 |
nicklas |
18 |
|
5194 |
04 Dec 09 |
nicklas |
You should have received a copy of the GNU General Public License |
5194 |
04 Dec 09 |
nicklas |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
5194 |
04 Dec 09 |
nicklas |
21 |
*/ |
5194 |
04 Dec 09 |
nicklas |
22 |
package net.sf.basedb.util.bfs; |
5194 |
04 Dec 09 |
nicklas |
23 |
|
5319 |
20 Apr 10 |
nicklas |
24 |
import java.io.FileInputStream; |
5194 |
04 Dec 09 |
nicklas |
25 |
import java.io.IOException; |
5194 |
04 Dec 09 |
nicklas |
26 |
import java.io.InputStream; |
5194 |
04 Dec 09 |
nicklas |
27 |
import java.util.regex.Pattern; |
5194 |
04 Dec 09 |
nicklas |
28 |
|
5236 |
05 Feb 10 |
nicklas |
29 |
import net.sf.basedb.core.File; |
5225 |
29 Jan 10 |
nicklas |
30 |
import net.sf.basedb.core.signal.ThreadSignalHandler; |
5319 |
20 Apr 10 |
nicklas |
31 |
import net.sf.basedb.util.FileUtil; |
5194 |
04 Dec 09 |
nicklas |
32 |
import net.sf.basedb.util.encode.EncoderDecoder; |
5194 |
04 Dec 09 |
nicklas |
33 |
import net.sf.basedb.util.encode.TabCrLfEncoderDecoder; |
5194 |
04 Dec 09 |
nicklas |
34 |
import net.sf.basedb.util.parser.FlatFileParser; |
5194 |
04 Dec 09 |
nicklas |
35 |
|
5194 |
04 Dec 09 |
nicklas |
36 |
/** |
5221 |
22 Jan 10 |
nicklas |
Parser implementation that parses BFS data files. Before parsing |
5221 |
22 Jan 10 |
nicklas |
is started the stream to parse must be specified by calling |
5221 |
22 Jan 10 |
nicklas |
{@link #setInputStream(InputStream)}. The actual parsing can be done |
5221 |
22 Jan 10 |
nicklas |
in two different ways: |
5194 |
04 Dec 09 |
nicklas |
<ul> |
5221 |
22 Jan 10 |
nicklas |
<li>"Manual" parsing, using the {@link #parseToBof()} and {@link #nextData()} |
5221 |
22 Jan 10 |
nicklas |
methods. |
5221 |
22 Jan 10 |
nicklas |
<li>Event-based parsing using the {@link #parse(EventHandler)} method. |
5221 |
22 Jan 10 |
nicklas |
This parser issues the following events: |
5221 |
22 Jan 10 |
nicklas |
<ul> |
5221 |
22 Jan 10 |
nicklas |
<li>{@link #DATA_EVENT}: For each data line that is parsed. |
5221 |
22 Jan 10 |
nicklas |
<li>{@link #END_OF_FILE_EVENT}: When the end of the file has been reached. |
5221 |
22 Jan 10 |
nicklas |
</ul> |
5221 |
22 Jan 10 |
nicklas |
50 |
|
5221 |
22 Jan 10 |
nicklas |
The {@link MatrixModel} class implements a simple event handler that |
5221 |
22 Jan 10 |
nicklas |
collects the information from the data file and provides methods |
5221 |
22 Jan 10 |
nicklas |
for accessing it, though it is recommended only for small files. |
5194 |
04 Dec 09 |
nicklas |
</ul> |
5194 |
04 Dec 09 |
nicklas |
55 |
|
5194 |
04 Dec 09 |
nicklas |
<p> |
5194 |
04 Dec 09 |
nicklas |
This class may be subclassed to provide customized behaviour. |
5194 |
04 Dec 09 |
nicklas |
58 |
|
5194 |
04 Dec 09 |
nicklas |
@author Nicklas |
5194 |
04 Dec 09 |
nicklas |
@version 2.15 |
5194 |
04 Dec 09 |
nicklas |
@base.modified $Date$ |
5194 |
04 Dec 09 |
nicklas |
62 |
*/ |
5194 |
04 Dec 09 |
nicklas |
63 |
public class DataParser |
5224 |
27 Jan 10 |
nicklas |
64 |
implements BfsParser |
5194 |
04 Dec 09 |
nicklas |
65 |
{ |
5194 |
04 Dec 09 |
nicklas |
66 |
|
5194 |
04 Dec 09 |
nicklas |
67 |
|
5194 |
04 Dec 09 |
nicklas |
68 |
/** |
5225 |
29 Jan 10 |
nicklas |
Event type that is issued for each data line. The event data is a |
5194 |
04 Dec 09 |
nicklas |
string array with that data. |
5194 |
04 Dec 09 |
nicklas |
71 |
*/ |
5194 |
04 Dec 09 |
nicklas |
72 |
public static final EventType<String[]> DATA_EVENT = |
5194 |
04 Dec 09 |
nicklas |
73 |
new EventType<String[]>("data.data"); |
5194 |
04 Dec 09 |
nicklas |
74 |
|
5194 |
04 Dec 09 |
nicklas |
75 |
/** |
5194 |
04 Dec 09 |
nicklas |
Event type that is issued when the end-of-file had been reached. |
5194 |
04 Dec 09 |
nicklas |
No event data is submitted. |
5194 |
04 Dec 09 |
nicklas |
78 |
*/ |
5194 |
04 Dec 09 |
nicklas |
79 |
public static final EventType<Object> END_OF_FILE_EVENT = |
5194 |
04 Dec 09 |
nicklas |
80 |
new EventType<Object>("data.end"); |
5224 |
27 Jan 10 |
nicklas |
81 |
|
5224 |
27 Jan 10 |
nicklas |
82 |
/** |
5224 |
27 Jan 10 |
nicklas |
Utility method for creating a data parser when you have an |
5224 |
27 Jan 10 |
nicklas |
input stream. |
5224 |
27 Jan 10 |
nicklas |
@param in The input stream the parser should read from |
5224 |
27 Jan 10 |
nicklas |
@param filename Optional, the name of the file the input stream |
5224 |
27 Jan 10 |
nicklas |
is reading from |
5224 |
27 Jan 10 |
nicklas |
88 |
*/ |
5225 |
29 Jan 10 |
nicklas |
89 |
public static DataParser create(InputStream in, String filename, long fileSize) |
5224 |
27 Jan 10 |
nicklas |
90 |
{ |
5224 |
27 Jan 10 |
nicklas |
91 |
DataParser parser = new DataParser(); |
5224 |
27 Jan 10 |
nicklas |
92 |
parser.setFilename(filename); |
5224 |
27 Jan 10 |
nicklas |
93 |
parser.setInputStream(in); |
5225 |
29 Jan 10 |
nicklas |
94 |
parser.setFileSize(fileSize); |
5224 |
27 Jan 10 |
nicklas |
95 |
return parser; |
5224 |
27 Jan 10 |
nicklas |
96 |
} |
5224 |
27 Jan 10 |
nicklas |
97 |
|
5236 |
05 Feb 10 |
nicklas |
98 |
/** |
5236 |
05 Feb 10 |
nicklas |
Utility method for creating a data parser for a file in the BASE |
5236 |
05 Feb 10 |
nicklas |
file system. |
5236 |
05 Feb 10 |
nicklas |
@param file The file in the BASE file system |
5236 |
05 Feb 10 |
nicklas |
102 |
*/ |
5236 |
05 Feb 10 |
nicklas |
103 |
public static DataParser create(File file) |
5236 |
05 Feb 10 |
nicklas |
104 |
{ |
5236 |
05 Feb 10 |
nicklas |
105 |
return create(file.getDownloadStream(0), file.getName(), file.getSize()); |
5236 |
05 Feb 10 |
nicklas |
106 |
} |
5236 |
05 Feb 10 |
nicklas |
107 |
|
5319 |
20 Apr 10 |
nicklas |
108 |
/** |
5319 |
20 Apr 10 |
nicklas |
Utility method for creating a data parser for a file in the local |
5319 |
20 Apr 10 |
nicklas |
file system. |
5319 |
20 Apr 10 |
nicklas |
@param file The file in the local file system |
5319 |
20 Apr 10 |
nicklas |
112 |
*/ |
5319 |
20 Apr 10 |
nicklas |
113 |
public static DataParser create(java.io.File file) |
5319 |
20 Apr 10 |
nicklas |
114 |
throws IOException |
5319 |
20 Apr 10 |
nicklas |
115 |
{ |
5319 |
20 Apr 10 |
nicklas |
116 |
return create(new FileInputStream(file), file.getName(), file.length()); |
5319 |
20 Apr 10 |
nicklas |
117 |
} |
5319 |
20 Apr 10 |
nicklas |
118 |
|
5319 |
20 Apr 10 |
nicklas |
119 |
|
5194 |
04 Dec 09 |
nicklas |
120 |
private final EncoderDecoder decoder; |
5221 |
22 Jan 10 |
nicklas |
121 |
private InputStream in; |
5221 |
22 Jan 10 |
nicklas |
122 |
private String filename; |
5225 |
29 Jan 10 |
nicklas |
123 |
private long fileSize = -1; |
5221 |
22 Jan 10 |
nicklas |
124 |
private FlatFileParser ffp; |
5221 |
22 Jan 10 |
nicklas |
125 |
private int numColumns = -1; |
5194 |
04 Dec 09 |
nicklas |
126 |
|
5194 |
04 Dec 09 |
nicklas |
127 |
/** |
5194 |
04 Dec 09 |
nicklas |
Create a new data parser. |
5194 |
04 Dec 09 |
nicklas |
129 |
*/ |
5194 |
04 Dec 09 |
nicklas |
130 |
public DataParser() |
5194 |
04 Dec 09 |
nicklas |
131 |
{ |
5194 |
04 Dec 09 |
nicklas |
132 |
this.decoder = new TabCrLfEncoderDecoder(true); |
5194 |
04 Dec 09 |
nicklas |
133 |
} |
5194 |
04 Dec 09 |
nicklas |
134 |
|
5194 |
04 Dec 09 |
nicklas |
135 |
/** |
5194 |
04 Dec 09 |
nicklas |
Decode an encoded value. Values are encoded/decoded |
5194 |
04 Dec 09 |
nicklas |
with a {@link TabCrLfEncoderDecoder}. |
5194 |
04 Dec 09 |
nicklas |
138 |
|
5194 |
04 Dec 09 |
nicklas |
@param value The encoded value |
5194 |
04 Dec 09 |
nicklas |
@return The decoded value |
5194 |
04 Dec 09 |
nicklas |
141 |
*/ |
5194 |
04 Dec 09 |
nicklas |
142 |
public String decodeValue(String value) |
5194 |
04 Dec 09 |
nicklas |
143 |
{ |
5194 |
04 Dec 09 |
nicklas |
144 |
return decoder.decode(value); |
5194 |
04 Dec 09 |
nicklas |
145 |
} |
5194 |
04 Dec 09 |
nicklas |
146 |
|
5194 |
04 Dec 09 |
nicklas |
147 |
/** |
5221 |
22 Jan 10 |
nicklas |
Get the file name that this parser is reading from. |
5221 |
22 Jan 10 |
nicklas |
@return The file name or null if not known |
5221 |
22 Jan 10 |
nicklas |
150 |
*/ |
5224 |
27 Jan 10 |
nicklas |
151 |
@Override |
5221 |
22 Jan 10 |
nicklas |
152 |
public String getFilename() |
5221 |
22 Jan 10 |
nicklas |
153 |
{ |
5221 |
22 Jan 10 |
nicklas |
154 |
return filename; |
5221 |
22 Jan 10 |
nicklas |
155 |
} |
5221 |
22 Jan 10 |
nicklas |
156 |
|
5221 |
22 Jan 10 |
nicklas |
157 |
/** |
5221 |
22 Jan 10 |
nicklas |
Set the file name that this parser is reading from. |
5221 |
22 Jan 10 |
nicklas |
159 |
*/ |
5221 |
22 Jan 10 |
nicklas |
160 |
public void setFilename(String filename) |
5221 |
22 Jan 10 |
nicklas |
161 |
{ |
5221 |
22 Jan 10 |
nicklas |
162 |
this.filename = filename; |
5221 |
22 Jan 10 |
nicklas |
163 |
} |
5221 |
22 Jan 10 |
nicklas |
164 |
|
5221 |
22 Jan 10 |
nicklas |
165 |
/** |
5225 |
29 Jan 10 |
nicklas |
Get the size in bytes of the file that this parser is reading from. |
5225 |
29 Jan 10 |
nicklas |
@return The size or -1 if not known |
5225 |
29 Jan 10 |
nicklas |
168 |
*/ |
5225 |
29 Jan 10 |
nicklas |
169 |
@Override |
5225 |
29 Jan 10 |
nicklas |
170 |
public long getFileSize() |
5225 |
29 Jan 10 |
nicklas |
171 |
{ |
5225 |
29 Jan 10 |
nicklas |
172 |
return fileSize; |
5225 |
29 Jan 10 |
nicklas |
173 |
} |
5225 |
29 Jan 10 |
nicklas |
174 |
|
5225 |
29 Jan 10 |
nicklas |
175 |
/** |
5225 |
29 Jan 10 |
nicklas |
Set the size of the file or -1 if not known. |
5225 |
29 Jan 10 |
nicklas |
177 |
*/ |
5225 |
29 Jan 10 |
nicklas |
178 |
public void setFileSize(long fileSize) |
5225 |
29 Jan 10 |
nicklas |
179 |
{ |
5225 |
29 Jan 10 |
nicklas |
180 |
this.fileSize = fileSize; |
5225 |
29 Jan 10 |
nicklas |
181 |
} |
5225 |
29 Jan 10 |
nicklas |
182 |
|
5319 |
20 Apr 10 |
nicklas |
183 |
@Override |
5319 |
20 Apr 10 |
nicklas |
184 |
public void close() |
5319 |
20 Apr 10 |
nicklas |
185 |
{ |
5319 |
20 Apr 10 |
nicklas |
186 |
if (in != null) FileUtil.close(in); |
5319 |
20 Apr 10 |
nicklas |
187 |
} |
5319 |
20 Apr 10 |
nicklas |
188 |
|
5225 |
29 Jan 10 |
nicklas |
189 |
/** |
5221 |
22 Jan 10 |
nicklas |
Set the input stream that should be parsed. This will also reset the |
5221 |
22 Jan 10 |
nicklas |
parser. All information from a previously parsed file is lost. |
5221 |
22 Jan 10 |
nicklas |
@param in The stream to parse (in UTF-8 format) |
5221 |
22 Jan 10 |
nicklas |
193 |
*/ |
5221 |
22 Jan 10 |
nicklas |
194 |
public void setInputStream(InputStream in) |
5221 |
22 Jan 10 |
nicklas |
195 |
{ |
5319 |
20 Apr 10 |
nicklas |
196 |
close(); |
5221 |
22 Jan 10 |
nicklas |
197 |
this.in = in; |
5221 |
22 Jan 10 |
nicklas |
198 |
this.ffp = null; |
5221 |
22 Jan 10 |
nicklas |
199 |
this.numColumns = -1; |
5221 |
22 Jan 10 |
nicklas |
200 |
} |
5221 |
22 Jan 10 |
nicklas |
201 |
|
5221 |
22 Jan 10 |
nicklas |
202 |
/** |
5224 |
27 Jan 10 |
nicklas |
Set the number of data columns that are expected to be found in this |
5224 |
27 Jan 10 |
nicklas |
file. |
5224 |
27 Jan 10 |
nicklas |
@param numColumns The number of columns or -1 to let the parser |
5224 |
27 Jan 10 |
nicklas |
automatically detect this on the first data line |
5224 |
27 Jan 10 |
nicklas |
207 |
*/ |
5224 |
27 Jan 10 |
nicklas |
208 |
public void setNumColumns(int numColumns) |
5224 |
27 Jan 10 |
nicklas |
209 |
{ |
5224 |
27 Jan 10 |
nicklas |
210 |
this.numColumns = numColumns; |
5224 |
27 Jan 10 |
nicklas |
211 |
} |
5224 |
27 Jan 10 |
nicklas |
212 |
|
5224 |
27 Jan 10 |
nicklas |
213 |
/** |
5194 |
04 Dec 09 |
nicklas |
Parse the input stream and notify the specified event handler with events. |
5194 |
04 Dec 09 |
nicklas |
The event handler is responsible for keeping track of and storing the |
5194 |
04 Dec 09 |
nicklas |
data of interest. |
5194 |
04 Dec 09 |
nicklas |
217 |
|
5194 |
04 Dec 09 |
nicklas |
@param handler An event handler |
5194 |
04 Dec 09 |
nicklas |
@throws IOException If there is an error reading the stream data |
5194 |
04 Dec 09 |
nicklas |
@throws NullPointerException If the stream or event handler is null |
5194 |
04 Dec 09 |
nicklas |
221 |
*/ |
5221 |
22 Jan 10 |
nicklas |
222 |
public void parse(EventHandler handler) |
5194 |
04 Dec 09 |
nicklas |
223 |
throws IOException |
5194 |
04 Dec 09 |
nicklas |
224 |
{ |
5194 |
04 Dec 09 |
nicklas |
225 |
if (handler == null) throw new NullPointerException("handler"); |
5221 |
22 Jan 10 |
nicklas |
226 |
parseToBof(); |
5194 |
04 Dec 09 |
nicklas |
227 |
|
5194 |
04 Dec 09 |
nicklas |
// Repeat as long as there is data in the file |
5221 |
22 Jan 10 |
nicklas |
229 |
String[] data = nextData(); |
5221 |
22 Jan 10 |
nicklas |
230 |
while (data != null) |
5194 |
04 Dec 09 |
nicklas |
231 |
{ |
5225 |
29 Jan 10 |
nicklas |
232 |
ThreadSignalHandler.checkInterrupted(); |
5194 |
04 Dec 09 |
nicklas |
233 |
handleData(handler, data); |
5221 |
22 Jan 10 |
nicklas |
234 |
data = nextData(); |
5194 |
04 Dec 09 |
nicklas |
235 |
} |
5194 |
04 Dec 09 |
nicklas |
236 |
handleEndOfFile(handler); |
5194 |
04 Dec 09 |
nicklas |
237 |
} |
5194 |
04 Dec 09 |
nicklas |
238 |
|
5194 |
04 Dec 09 |
nicklas |
239 |
/** |
5221 |
22 Jan 10 |
nicklas |
Initialize parsing of the data file. This method can only |
5221 |
22 Jan 10 |
nicklas |
be called once on a given input stream. To parse a second file, call |
5221 |
22 Jan 10 |
nicklas |
{@link #setInputStream(InputStream)} again to reset the parser. |
5221 |
22 Jan 10 |
nicklas |
243 |
|
5221 |
22 Jan 10 |
nicklas |
@throws IOException If the header line is not found |
5221 |
22 Jan 10 |
nicklas |
or if there is any other IO error |
5221 |
22 Jan 10 |
nicklas |
@throws IllegalStateException If the parsing has already started |
5221 |
22 Jan 10 |
nicklas |
@throws NullPointerException If no input stream has been specified |
5221 |
22 Jan 10 |
nicklas |
248 |
*/ |
5221 |
22 Jan 10 |
nicklas |
249 |
public void parseToBof() |
5221 |
22 Jan 10 |
nicklas |
250 |
throws IOException |
5221 |
22 Jan 10 |
nicklas |
251 |
{ |
5221 |
22 Jan 10 |
nicklas |
252 |
if (in == null) throw new NullPointerException("inputStream"); |
5221 |
22 Jan 10 |
nicklas |
253 |
if (ffp != null) |
5221 |
22 Jan 10 |
nicklas |
254 |
{ |
5221 |
22 Jan 10 |
nicklas |
255 |
throw new IllegalStateException("Parsing of file '" + getFilename() + |
5221 |
22 Jan 10 |
nicklas |
256 |
"' has already started. Call setInputStream() to reset the parser."); |
5221 |
22 Jan 10 |
nicklas |
257 |
} |
5221 |
22 Jan 10 |
nicklas |
258 |
ffp = createFlatFileParser(in); |
5221 |
22 Jan 10 |
nicklas |
259 |
} |
5221 |
22 Jan 10 |
nicklas |
260 |
|
5221 |
22 Jan 10 |
nicklas |
261 |
/** |
5221 |
22 Jan 10 |
nicklas |
Get the next data line in the file. |
5221 |
22 Jan 10 |
nicklas |
263 |
|
5221 |
22 Jan 10 |
nicklas |
@return An array with the data, or null if no more data is found |
5221 |
22 Jan 10 |
nicklas |
@throws IOException If there is an IO error or if the data is invalid |
5221 |
22 Jan 10 |
nicklas |
@throws IllegalStateException If not {@link #parseToBof()} has been called |
5221 |
22 Jan 10 |
nicklas |
267 |
*/ |
5221 |
22 Jan 10 |
nicklas |
268 |
public String[] nextData() |
5221 |
22 Jan 10 |
nicklas |
269 |
throws IOException |
5221 |
22 Jan 10 |
nicklas |
270 |
{ |
5221 |
22 Jan 10 |
nicklas |
271 |
if (ffp == null) |
5221 |
22 Jan 10 |
nicklas |
272 |
{ |
5221 |
22 Jan 10 |
nicklas |
273 |
throw new IllegalStateException("Parsing of file '" + getFilename() + |
5221 |
22 Jan 10 |
nicklas |
274 |
"' hasn't started. Call parseToBof() to begin parsing."); |
5221 |
22 Jan 10 |
nicklas |
275 |
} |
5221 |
22 Jan 10 |
nicklas |
276 |
if (!ffp.hasMoreData()) return null; |
5221 |
22 Jan 10 |
nicklas |
277 |
|
5221 |
22 Jan 10 |
nicklas |
278 |
String[] data = ffp.nextData().data(); |
5221 |
22 Jan 10 |
nicklas |
279 |
if (numColumns == -1) |
5221 |
22 Jan 10 |
nicklas |
280 |
{ |
5221 |
22 Jan 10 |
nicklas |
281 |
numColumns = data.length; |
5221 |
22 Jan 10 |
nicklas |
282 |
} |
5221 |
22 Jan 10 |
nicklas |
283 |
else if (numColumns != data.length) |
5221 |
22 Jan 10 |
nicklas |
284 |
{ |
5221 |
22 Jan 10 |
nicklas |
285 |
throw new IOException("Expected " + numColumns + " columns on line " + |
5221 |
22 Jan 10 |
nicklas |
286 |
ffp.getParsedLines() + " in file '" + getFilename() + |
5221 |
22 Jan 10 |
nicklas |
287 |
"'; found " + data.length + " columns"); |
5221 |
22 Jan 10 |
nicklas |
288 |
} |
5221 |
22 Jan 10 |
nicklas |
289 |
|
5221 |
22 Jan 10 |
nicklas |
// Decode the values |
5221 |
22 Jan 10 |
nicklas |
291 |
for (int j = 1; j < numColumns; ++j) |
5221 |
22 Jan 10 |
nicklas |
292 |
{ |
5221 |
22 Jan 10 |
nicklas |
293 |
data[j] = decodeValue(data[j]); |
5221 |
22 Jan 10 |
nicklas |
294 |
} |
5221 |
22 Jan 10 |
nicklas |
295 |
return data; |
5221 |
22 Jan 10 |
nicklas |
296 |
} |
5221 |
22 Jan 10 |
nicklas |
297 |
|
5224 |
27 Jan 10 |
nicklas |
298 |
@Override |
5224 |
27 Jan 10 |
nicklas |
299 |
public int getCurrentLine() |
5224 |
27 Jan 10 |
nicklas |
300 |
{ |
5224 |
27 Jan 10 |
nicklas |
301 |
return ffp == null ? -1 : ffp.getParsedLines(); |
5224 |
27 Jan 10 |
nicklas |
302 |
} |
5224 |
27 Jan 10 |
nicklas |
303 |
|
5225 |
29 Jan 10 |
nicklas |
304 |
@Override |
5225 |
29 Jan 10 |
nicklas |
305 |
public long getParsedBytes() |
5225 |
29 Jan 10 |
nicklas |
306 |
{ |
5225 |
29 Jan 10 |
nicklas |
307 |
return ffp == null ? -1 : ffp.getParsedBytes(); |
5225 |
29 Jan 10 |
nicklas |
308 |
} |
5225 |
29 Jan 10 |
nicklas |
309 |
|
5221 |
22 Jan 10 |
nicklas |
310 |
/** |
5194 |
04 Dec 09 |
nicklas |
Create a new flat file parser that can parse BFS annotation |
5194 |
04 Dec 09 |
nicklas |
files. This method should set all regular expressions that |
5194 |
04 Dec 09 |
nicklas |
are needed to parse the stream (assumed to be UTF-8). |
5194 |
04 Dec 09 |
nicklas |
314 |
*/ |
5194 |
04 Dec 09 |
nicklas |
315 |
protected FlatFileParser createFlatFileParser(InputStream in) |
5194 |
04 Dec 09 |
nicklas |
316 |
{ |
5194 |
04 Dec 09 |
nicklas |
317 |
FlatFileParser ffp = new FlatFileParser(); |
5194 |
04 Dec 09 |
nicklas |
318 |
ffp.setDataSplitterRegexp(Pattern.compile("\\t")); |
5194 |
04 Dec 09 |
nicklas |
319 |
ffp.setMinDataColumns(1); |
5194 |
04 Dec 09 |
nicklas |
320 |
ffp.setMaxDataColumns(0); // = any number of columns |
5194 |
04 Dec 09 |
nicklas |
321 |
ffp.setMaxUnknownLines(0); |
5194 |
04 Dec 09 |
nicklas |
322 |
ffp.setInputStream(in, "UTF-8"); |
5194 |
04 Dec 09 |
nicklas |
323 |
return ffp; |
5194 |
04 Dec 09 |
nicklas |
324 |
} |
5194 |
04 Dec 09 |
nicklas |
325 |
|
5194 |
04 Dec 09 |
nicklas |
326 |
/** |
5194 |
04 Dec 09 |
nicklas |
Handle the data event. The default implemention sends |
5194 |
04 Dec 09 |
nicklas |
a {@link #DATA_EVENT} to the event handler |
5221 |
22 Jan 10 |
nicklas |
@param handler The event handler from the {@link #parse(EventHandler)} method |
5194 |
04 Dec 09 |
nicklas |
@param data An array with the data |
5194 |
04 Dec 09 |
nicklas |
331 |
*/ |
5194 |
04 Dec 09 |
nicklas |
332 |
protected void handleData(EventHandler handler, String[] data) |
5194 |
04 Dec 09 |
nicklas |
333 |
{ |
5224 |
27 Jan 10 |
nicklas |
334 |
handler.handleEvent(DATA_EVENT, data, this); |
5194 |
04 Dec 09 |
nicklas |
335 |
} |
5194 |
04 Dec 09 |
nicklas |
336 |
|
5194 |
04 Dec 09 |
nicklas |
337 |
/** |
5194 |
04 Dec 09 |
nicklas |
Handle the end-of-file event. The default implementation sends an |
5194 |
04 Dec 09 |
nicklas |
{@link #END_OF_FILE_EVENT} notification to the event handler. |
5221 |
22 Jan 10 |
nicklas |
@param handler The event handler from the {@link #parse(EventHandler)} method |
5194 |
04 Dec 09 |
nicklas |
341 |
*/ |
5194 |
04 Dec 09 |
nicklas |
342 |
protected void handleEndOfFile(EventHandler handler) |
5194 |
04 Dec 09 |
nicklas |
343 |
{ |
5224 |
27 Jan 10 |
nicklas |
344 |
handler.handleEvent(END_OF_FILE_EVENT, null, this); |
5194 |
04 Dec 09 |
nicklas |
345 |
} |
5194 |
04 Dec 09 |
nicklas |
346 |
|
5194 |
04 Dec 09 |
nicklas |
347 |
} |