5193 |
27 Nov 09 |
nicklas |
1 |
/** |
5193 |
27 Nov 09 |
nicklas |
$Id$ |
5193 |
27 Nov 09 |
nicklas |
3 |
|
5193 |
27 Nov 09 |
nicklas |
Copyright (C) 2009 Nicklas Nordborg |
5193 |
27 Nov 09 |
nicklas |
5 |
|
5193 |
27 Nov 09 |
nicklas |
This file is part of BASE - BioArray Software Environment. |
5193 |
27 Nov 09 |
nicklas |
Available at http://base.thep.lu.se/ |
5193 |
27 Nov 09 |
nicklas |
8 |
|
5193 |
27 Nov 09 |
nicklas |
BASE is free software; you can redistribute it and/or |
5193 |
27 Nov 09 |
nicklas |
modify it under the terms of the GNU General Public License |
5193 |
27 Nov 09 |
nicklas |
as published by the Free Software Foundation; either version 3 |
5193 |
27 Nov 09 |
nicklas |
of the License, or (at your option) any later version. |
5193 |
27 Nov 09 |
nicklas |
13 |
|
5193 |
27 Nov 09 |
nicklas |
BASE is distributed in the hope that it will be useful, |
5193 |
27 Nov 09 |
nicklas |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
5193 |
27 Nov 09 |
nicklas |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
5193 |
27 Nov 09 |
nicklas |
GNU General Public License for more details. |
5193 |
27 Nov 09 |
nicklas |
18 |
|
5193 |
27 Nov 09 |
nicklas |
You should have received a copy of the GNU General Public License |
5193 |
27 Nov 09 |
nicklas |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
5193 |
27 Nov 09 |
nicklas |
21 |
*/ |
5193 |
27 Nov 09 |
nicklas |
22 |
package net.sf.basedb.util.bfs; |
5193 |
27 Nov 09 |
nicklas |
23 |
|
5319 |
20 Apr 10 |
nicklas |
24 |
import java.io.FileInputStream; |
5193 |
27 Nov 09 |
nicklas |
25 |
import java.io.IOException; |
5193 |
27 Nov 09 |
nicklas |
26 |
import java.io.InputStream; |
5193 |
27 Nov 09 |
nicklas |
27 |
import java.util.HashSet; |
5193 |
27 Nov 09 |
nicklas |
28 |
import java.util.List; |
5193 |
27 Nov 09 |
nicklas |
29 |
import java.util.Set; |
5193 |
27 Nov 09 |
nicklas |
30 |
import java.util.regex.Pattern; |
5193 |
27 Nov 09 |
nicklas |
31 |
|
5224 |
27 Jan 10 |
nicklas |
32 |
import net.sf.basedb.core.File; |
5225 |
29 Jan 10 |
nicklas |
33 |
import net.sf.basedb.core.signal.ThreadSignalHandler; |
5319 |
20 Apr 10 |
nicklas |
34 |
import net.sf.basedb.util.FileUtil; |
5193 |
27 Nov 09 |
nicklas |
35 |
import net.sf.basedb.util.Values; |
5193 |
27 Nov 09 |
nicklas |
36 |
import net.sf.basedb.util.encode.EncoderDecoder; |
5193 |
27 Nov 09 |
nicklas |
37 |
import net.sf.basedb.util.encode.TabCrLfEncoderDecoder; |
5193 |
27 Nov 09 |
nicklas |
38 |
import net.sf.basedb.util.parser.FlatFileParser; |
5193 |
27 Nov 09 |
nicklas |
39 |
|
5193 |
27 Nov 09 |
nicklas |
40 |
/** |
5221 |
22 Jan 10 |
nicklas |
Parser implementation that parses a BFS annotation files. Before parsing |
5221 |
22 Jan 10 |
nicklas |
is started the stream to parse must be specified by calling |
5221 |
22 Jan 10 |
nicklas |
{@link #setInputStream(InputStream)}. The actual parsing can be done |
5221 |
22 Jan 10 |
nicklas |
in two different ways: |
5193 |
27 Nov 09 |
nicklas |
<ul> |
5221 |
22 Jan 10 |
nicklas |
<li>"Manual" parsing, using the {@link #parseToBof()} and {@link #nextData()} |
5221 |
22 Jan 10 |
nicklas |
methods. |
5221 |
22 Jan 10 |
nicklas |
<li>Event-based parsing using the {@link #parse(EventHandler)} method. |
5221 |
22 Jan 10 |
nicklas |
This parser issues the following events: |
5221 |
22 Jan 10 |
nicklas |
<ul> |
5221 |
22 Jan 10 |
nicklas |
<li>{@link #HEADER_EVENT}: When the header-line (should be the first line) |
5221 |
22 Jan 10 |
nicklas |
has been found. |
5221 |
22 Jan 10 |
nicklas |
<li>{@link #DATA_EVENT}: For each data line that is parsed. |
5221 |
22 Jan 10 |
nicklas |
<li>{@link #END_OF_FILE_EVENT}: When the end of the file has been reached. |
5221 |
22 Jan 10 |
nicklas |
</ul> |
5221 |
22 Jan 10 |
nicklas |
56 |
|
5221 |
22 Jan 10 |
nicklas |
The {@link AnnotationModel} class implements a simple event handler that |
5221 |
22 Jan 10 |
nicklas |
collects the information from the annotations file and provides methods |
5221 |
22 Jan 10 |
nicklas |
for accessing it. |
5193 |
27 Nov 09 |
nicklas |
</ul> |
5193 |
27 Nov 09 |
nicklas |
61 |
|
5193 |
27 Nov 09 |
nicklas |
<p> |
5193 |
27 Nov 09 |
nicklas |
This class may be subclassed to provide customized behaviour. |
5193 |
27 Nov 09 |
nicklas |
64 |
|
5193 |
27 Nov 09 |
nicklas |
@author Nicklas |
5193 |
27 Nov 09 |
nicklas |
@version 2.15 |
5193 |
27 Nov 09 |
nicklas |
@base.modified $Date$ |
5193 |
27 Nov 09 |
nicklas |
68 |
*/ |
5193 |
27 Nov 09 |
nicklas |
69 |
public class AnnotationParser |
5224 |
27 Jan 10 |
nicklas |
70 |
implements BfsParser |
5193 |
27 Nov 09 |
nicklas |
71 |
{ |
5193 |
27 Nov 09 |
nicklas |
72 |
/** |
5193 |
27 Nov 09 |
nicklas |
Event type that is issued when the header line is found. |
5193 |
27 Nov 09 |
nicklas |
The event data is a string array with the headers, |
5193 |
27 Nov 09 |
nicklas |
including the ID header. |
5193 |
27 Nov 09 |
nicklas |
76 |
*/ |
5193 |
27 Nov 09 |
nicklas |
77 |
public static final EventType<String[]> HEADER_EVENT = |
5193 |
27 Nov 09 |
nicklas |
78 |
new EventType<String[]>("annotation.header"); |
5193 |
27 Nov 09 |
nicklas |
79 |
|
5193 |
27 Nov 09 |
nicklas |
80 |
/** |
5193 |
27 Nov 09 |
nicklas |
Event thep that is issued for each data line. The event data is a |
5193 |
27 Nov 09 |
nicklas |
string array with that data, including the ID column. |
5193 |
27 Nov 09 |
nicklas |
83 |
*/ |
5193 |
27 Nov 09 |
nicklas |
84 |
public static final EventType<String[]> DATA_EVENT = |
5193 |
27 Nov 09 |
nicklas |
85 |
new EventType<String[]>("annotation.data"); |
5193 |
27 Nov 09 |
nicklas |
86 |
|
5193 |
27 Nov 09 |
nicklas |
87 |
/** |
5193 |
27 Nov 09 |
nicklas |
Event type that is issued when the end-of-file had been reached. |
5193 |
27 Nov 09 |
nicklas |
No event data is submitted. |
5193 |
27 Nov 09 |
nicklas |
90 |
*/ |
5193 |
27 Nov 09 |
nicklas |
91 |
public static final EventType<Object> END_OF_FILE_EVENT = |
5193 |
27 Nov 09 |
nicklas |
92 |
new EventType<Object>("annotation.end"); |
5224 |
27 Jan 10 |
nicklas |
93 |
|
5224 |
27 Jan 10 |
nicklas |
94 |
/** |
5224 |
27 Jan 10 |
nicklas |
Utility method for creating an annotation parser when you have an |
5224 |
27 Jan 10 |
nicklas |
input stream. |
5224 |
27 Jan 10 |
nicklas |
@param in The input stream the parser should read from |
5224 |
27 Jan 10 |
nicklas |
@param filename Optional, the name of the file the input stream |
5224 |
27 Jan 10 |
nicklas |
is reading from |
5225 |
29 Jan 10 |
nicklas |
@param size The size of the file in bytes, or -1 if not |
5225 |
29 Jan 10 |
nicklas |
known |
5224 |
27 Jan 10 |
nicklas |
102 |
*/ |
5225 |
29 Jan 10 |
nicklas |
103 |
public static AnnotationParser create(InputStream in, String filename, long size) |
5224 |
27 Jan 10 |
nicklas |
104 |
{ |
5224 |
27 Jan 10 |
nicklas |
105 |
AnnotationParser parser = new AnnotationParser(); |
5224 |
27 Jan 10 |
nicklas |
106 |
parser.setFilename(filename); |
5224 |
27 Jan 10 |
nicklas |
107 |
parser.setInputStream(in); |
5225 |
29 Jan 10 |
nicklas |
108 |
parser.setFileSize(size); |
5224 |
27 Jan 10 |
nicklas |
109 |
return parser; |
5224 |
27 Jan 10 |
nicklas |
110 |
} |
5224 |
27 Jan 10 |
nicklas |
111 |
|
5224 |
27 Jan 10 |
nicklas |
112 |
/** |
5224 |
27 Jan 10 |
nicklas |
Utility method for creating an annotation parser for a file in the BASE |
5224 |
27 Jan 10 |
nicklas |
file system. |
5224 |
27 Jan 10 |
nicklas |
@param file The file in the BASE file system |
5224 |
27 Jan 10 |
nicklas |
116 |
*/ |
5224 |
27 Jan 10 |
nicklas |
117 |
public static AnnotationParser create(File file) |
5224 |
27 Jan 10 |
nicklas |
118 |
{ |
5225 |
29 Jan 10 |
nicklas |
119 |
return create(file.getDownloadStream(0), file.getName(), file.getSize()); |
5224 |
27 Jan 10 |
nicklas |
120 |
} |
5224 |
27 Jan 10 |
nicklas |
121 |
|
5319 |
20 Apr 10 |
nicklas |
122 |
/** |
5319 |
20 Apr 10 |
nicklas |
Utility method for creating an annotation parser for a file in the local |
5319 |
20 Apr 10 |
nicklas |
file system. |
5319 |
20 Apr 10 |
nicklas |
@param file The file in the local file system |
5319 |
20 Apr 10 |
nicklas |
126 |
*/ |
5319 |
20 Apr 10 |
nicklas |
127 |
public static AnnotationParser create(java.io.File file) |
5319 |
20 Apr 10 |
nicklas |
128 |
throws IOException |
5319 |
20 Apr 10 |
nicklas |
129 |
{ |
5319 |
20 Apr 10 |
nicklas |
130 |
return create(new FileInputStream(file), file.getName(), file.length()); |
5319 |
20 Apr 10 |
nicklas |
131 |
} |
5319 |
20 Apr 10 |
nicklas |
132 |
|
5319 |
20 Apr 10 |
nicklas |
133 |
|
5193 |
27 Nov 09 |
nicklas |
134 |
private final EncoderDecoder decoder; |
5221 |
22 Jan 10 |
nicklas |
135 |
private InputStream in; |
5221 |
22 Jan 10 |
nicklas |
136 |
private String filename; |
5225 |
29 Jan 10 |
nicklas |
137 |
private long fileSize = -1; |
5221 |
22 Jan 10 |
nicklas |
138 |
private FlatFileParser ffp; |
5221 |
22 Jan 10 |
nicklas |
139 |
private int numColumns = -1; |
5221 |
22 Jan 10 |
nicklas |
140 |
private Set<Integer> usedIds; |
5193 |
27 Nov 09 |
nicklas |
141 |
|
5193 |
27 Nov 09 |
nicklas |
142 |
/** |
5193 |
27 Nov 09 |
nicklas |
Create a new annotation parser. |
5193 |
27 Nov 09 |
nicklas |
144 |
*/ |
5193 |
27 Nov 09 |
nicklas |
145 |
public AnnotationParser() |
5193 |
27 Nov 09 |
nicklas |
146 |
{ |
5193 |
27 Nov 09 |
nicklas |
147 |
this.decoder = new TabCrLfEncoderDecoder(true); |
5193 |
27 Nov 09 |
nicklas |
148 |
} |
5193 |
27 Nov 09 |
nicklas |
149 |
|
5193 |
27 Nov 09 |
nicklas |
150 |
/** |
5193 |
27 Nov 09 |
nicklas |
Decode an encoded value. Values are encoded/decoded |
5193 |
27 Nov 09 |
nicklas |
with a {@link TabCrLfEncoderDecoder}. |
5193 |
27 Nov 09 |
nicklas |
153 |
|
5193 |
27 Nov 09 |
nicklas |
@param value The encoded value |
5193 |
27 Nov 09 |
nicklas |
@return The decoded value |
5193 |
27 Nov 09 |
nicklas |
156 |
*/ |
5193 |
27 Nov 09 |
nicklas |
157 |
public String decodeValue(String value) |
5193 |
27 Nov 09 |
nicklas |
158 |
{ |
5193 |
27 Nov 09 |
nicklas |
159 |
return decoder.decode(value); |
5193 |
27 Nov 09 |
nicklas |
160 |
} |
5193 |
27 Nov 09 |
nicklas |
161 |
|
5193 |
27 Nov 09 |
nicklas |
162 |
/** |
5221 |
22 Jan 10 |
nicklas |
Get the file name that this parser is reading from. |
5221 |
22 Jan 10 |
nicklas |
@return The file name or null if not known |
5221 |
22 Jan 10 |
nicklas |
165 |
*/ |
5224 |
27 Jan 10 |
nicklas |
166 |
@Override |
5221 |
22 Jan 10 |
nicklas |
167 |
public String getFilename() |
5221 |
22 Jan 10 |
nicklas |
168 |
{ |
5221 |
22 Jan 10 |
nicklas |
169 |
return filename; |
5221 |
22 Jan 10 |
nicklas |
170 |
} |
5221 |
22 Jan 10 |
nicklas |
171 |
|
5221 |
22 Jan 10 |
nicklas |
172 |
/** |
5221 |
22 Jan 10 |
nicklas |
Set the file name that this parser is reading from. |
5221 |
22 Jan 10 |
nicklas |
174 |
*/ |
5221 |
22 Jan 10 |
nicklas |
175 |
public void setFilename(String filename) |
5221 |
22 Jan 10 |
nicklas |
176 |
{ |
5221 |
22 Jan 10 |
nicklas |
177 |
this.filename = filename; |
5221 |
22 Jan 10 |
nicklas |
178 |
} |
5221 |
22 Jan 10 |
nicklas |
179 |
|
5221 |
22 Jan 10 |
nicklas |
180 |
/** |
5225 |
29 Jan 10 |
nicklas |
Get the size in bytes of the file that this parser is reading from. |
5225 |
29 Jan 10 |
nicklas |
@return The size or -1 if not known |
5225 |
29 Jan 10 |
nicklas |
183 |
*/ |
5225 |
29 Jan 10 |
nicklas |
184 |
@Override |
5225 |
29 Jan 10 |
nicklas |
185 |
public long getFileSize() |
5225 |
29 Jan 10 |
nicklas |
186 |
{ |
5225 |
29 Jan 10 |
nicklas |
187 |
return fileSize; |
5225 |
29 Jan 10 |
nicklas |
188 |
} |
5225 |
29 Jan 10 |
nicklas |
189 |
|
5225 |
29 Jan 10 |
nicklas |
190 |
/** |
5225 |
29 Jan 10 |
nicklas |
Set the size of the file or -1 if not known. |
5225 |
29 Jan 10 |
nicklas |
192 |
*/ |
5225 |
29 Jan 10 |
nicklas |
193 |
public void setFileSize(long fileSize) |
5225 |
29 Jan 10 |
nicklas |
194 |
{ |
5225 |
29 Jan 10 |
nicklas |
195 |
this.fileSize = fileSize; |
5225 |
29 Jan 10 |
nicklas |
196 |
} |
5225 |
29 Jan 10 |
nicklas |
197 |
|
5319 |
20 Apr 10 |
nicklas |
198 |
@Override |
5319 |
20 Apr 10 |
nicklas |
199 |
public void close() |
5319 |
20 Apr 10 |
nicklas |
200 |
{ |
5319 |
20 Apr 10 |
nicklas |
201 |
if (in != null) FileUtil.close(in); |
5319 |
20 Apr 10 |
nicklas |
202 |
} |
5319 |
20 Apr 10 |
nicklas |
203 |
|
5225 |
29 Jan 10 |
nicklas |
204 |
/** |
5221 |
22 Jan 10 |
nicklas |
Set the input stream that should be parsed. This will also reset the |
5221 |
22 Jan 10 |
nicklas |
parser. All information from a previously parsed file is lost. |
5221 |
22 Jan 10 |
nicklas |
@param in The stream to parse (in UTF-8 format) |
5221 |
22 Jan 10 |
nicklas |
208 |
*/ |
5221 |
22 Jan 10 |
nicklas |
209 |
public void setInputStream(InputStream in) |
5221 |
22 Jan 10 |
nicklas |
210 |
{ |
5319 |
20 Apr 10 |
nicklas |
211 |
close(); |
5221 |
22 Jan 10 |
nicklas |
212 |
this.in = in; |
5221 |
22 Jan 10 |
nicklas |
213 |
this.ffp = null; |
5221 |
22 Jan 10 |
nicklas |
214 |
this.usedIds = null; |
5221 |
22 Jan 10 |
nicklas |
215 |
this.numColumns = -1; |
5221 |
22 Jan 10 |
nicklas |
216 |
} |
5221 |
22 Jan 10 |
nicklas |
217 |
|
5221 |
22 Jan 10 |
nicklas |
218 |
/** |
5193 |
27 Nov 09 |
nicklas |
Parse the input stream and notify the specified event handler with events. |
5193 |
27 Nov 09 |
nicklas |
The event handler is responsible for keeping track of and storing the |
5193 |
27 Nov 09 |
nicklas |
data of interest. |
5193 |
27 Nov 09 |
nicklas |
222 |
|
5193 |
27 Nov 09 |
nicklas |
@param handler An event handler |
5193 |
27 Nov 09 |
nicklas |
@throws IOException If there is an error reading the stream data |
5193 |
27 Nov 09 |
nicklas |
@throws NullPointerException If the stream or event handler is null |
5193 |
27 Nov 09 |
nicklas |
226 |
*/ |
5221 |
22 Jan 10 |
nicklas |
227 |
public void parse(EventHandler handler) |
5193 |
27 Nov 09 |
nicklas |
228 |
throws IOException |
5193 |
27 Nov 09 |
nicklas |
229 |
{ |
5193 |
27 Nov 09 |
nicklas |
230 |
if (handler == null) throw new NullPointerException("handler"); |
5193 |
27 Nov 09 |
nicklas |
231 |
|
5221 |
22 Jan 10 |
nicklas |
232 |
String[] decoded = parseToBof(); |
5221 |
22 Jan 10 |
nicklas |
233 |
handleHeader(handler, decoded); |
5221 |
22 Jan 10 |
nicklas |
234 |
|
5221 |
22 Jan 10 |
nicklas |
// Repeat as long as there is data in the file |
5221 |
22 Jan 10 |
nicklas |
236 |
String[] data = nextData(); |
5221 |
22 Jan 10 |
nicklas |
237 |
while (data != null) |
5221 |
22 Jan 10 |
nicklas |
238 |
{ |
5225 |
29 Jan 10 |
nicklas |
239 |
ThreadSignalHandler.checkInterrupted(); |
5221 |
22 Jan 10 |
nicklas |
240 |
handleData(handler, data); |
5221 |
22 Jan 10 |
nicklas |
241 |
data = nextData(); |
5221 |
22 Jan 10 |
nicklas |
242 |
} |
5221 |
22 Jan 10 |
nicklas |
243 |
handleEndOfFile(handler); |
5221 |
22 Jan 10 |
nicklas |
244 |
} |
5193 |
27 Nov 09 |
nicklas |
245 |
|
5221 |
22 Jan 10 |
nicklas |
246 |
/** |
5221 |
22 Jan 10 |
nicklas |
Parse the header line of the annotations file. This method can only |
5221 |
22 Jan 10 |
nicklas |
be called once on a given input stream. To parse a second file, call |
5221 |
22 Jan 10 |
nicklas |
{@link #setInputStream(InputStream)} again to reset the parser. |
5221 |
22 Jan 10 |
nicklas |
250 |
|
5221 |
22 Jan 10 |
nicklas |
@return An array with the column headers |
5221 |
22 Jan 10 |
nicklas |
@throws IOException If the header line is not found |
5221 |
22 Jan 10 |
nicklas |
or if there is any other IO error |
5221 |
22 Jan 10 |
nicklas |
@throws IllegalStateException If the parsing has already started |
5221 |
22 Jan 10 |
nicklas |
@throws NullPointerException If no input stream has been specified |
5221 |
22 Jan 10 |
nicklas |
256 |
*/ |
5221 |
22 Jan 10 |
nicklas |
257 |
public String[] parseToBof() |
5221 |
22 Jan 10 |
nicklas |
258 |
throws IOException |
5221 |
22 Jan 10 |
nicklas |
259 |
{ |
5221 |
22 Jan 10 |
nicklas |
260 |
if (in == null) throw new NullPointerException("inputStream"); |
5221 |
22 Jan 10 |
nicklas |
261 |
if (ffp != null) |
5221 |
22 Jan 10 |
nicklas |
262 |
{ |
5221 |
22 Jan 10 |
nicklas |
263 |
throw new IllegalStateException("Parsing of file '" + getFilename() + |
5221 |
22 Jan 10 |
nicklas |
264 |
"' has already started. Call setInputStream() to reset the parser."); |
5221 |
22 Jan 10 |
nicklas |
265 |
} |
5221 |
22 Jan 10 |
nicklas |
266 |
ffp = createFlatFileParser(in); |
5193 |
27 Nov 09 |
nicklas |
// Parse to the data header |
5193 |
27 Nov 09 |
nicklas |
268 |
if (ffp.parseHeaders() != FlatFileParser.LineType.DATA_HEADER) |
5193 |
27 Nov 09 |
nicklas |
269 |
{ |
5193 |
27 Nov 09 |
nicklas |
270 |
throw new IOException("Can't find data header on first line"); |
5193 |
27 Nov 09 |
nicklas |
271 |
} |
5221 |
22 Jan 10 |
nicklas |
// Decode the headers |
5193 |
27 Nov 09 |
nicklas |
273 |
List<String> headers = ffp.getColumnHeaders(); |
5221 |
22 Jan 10 |
nicklas |
274 |
numColumns = headers.size(); |
5193 |
27 Nov 09 |
nicklas |
275 |
String[] decoded = new String[numColumns]; |
5193 |
27 Nov 09 |
nicklas |
276 |
int i = 0; |
5193 |
27 Nov 09 |
nicklas |
277 |
for (String h : headers) |
5193 |
27 Nov 09 |
nicklas |
278 |
{ |
5193 |
27 Nov 09 |
nicklas |
279 |
decoded[i] = decodeValue(h); |
5193 |
27 Nov 09 |
nicklas |
280 |
i++; |
5193 |
27 Nov 09 |
nicklas |
281 |
} |
5221 |
22 Jan 10 |
nicklas |
// Things that we must keep track of |
5221 |
22 Jan 10 |
nicklas |
283 |
usedIds = new HashSet<Integer>(); |
5221 |
22 Jan 10 |
nicklas |
284 |
return decoded; |
5221 |
22 Jan 10 |
nicklas |
285 |
} |
5221 |
22 Jan 10 |
nicklas |
286 |
|
5221 |
22 Jan 10 |
nicklas |
287 |
/** |
5221 |
22 Jan 10 |
nicklas |
Get the next data line in the annotations file. |
5193 |
27 Nov 09 |
nicklas |
289 |
|
5221 |
22 Jan 10 |
nicklas |
@return An array with the data, or null if no more data is found |
5221 |
22 Jan 10 |
nicklas |
@throws IOException If there is an IO error or if the data is invalid |
5221 |
22 Jan 10 |
nicklas |
@throws IllegalStateException If not {@link #parseToBof()} has been called |
5221 |
22 Jan 10 |
nicklas |
293 |
*/ |
5221 |
22 Jan 10 |
nicklas |
294 |
public String[] nextData() |
5221 |
22 Jan 10 |
nicklas |
295 |
throws IOException |
5221 |
22 Jan 10 |
nicklas |
296 |
{ |
5221 |
22 Jan 10 |
nicklas |
297 |
if (ffp == null) |
5193 |
27 Nov 09 |
nicklas |
298 |
{ |
5221 |
22 Jan 10 |
nicklas |
299 |
throw new IllegalStateException("Parsing of file '" + getFilename() + |
5221 |
22 Jan 10 |
nicklas |
300 |
"' hasn't started. Call parseToBof() to begin parsing."); |
5221 |
22 Jan 10 |
nicklas |
301 |
} |
5221 |
22 Jan 10 |
nicklas |
302 |
|
5221 |
22 Jan 10 |
nicklas |
303 |
if (!ffp.hasMoreData()) return null; |
5221 |
22 Jan 10 |
nicklas |
304 |
|
5221 |
22 Jan 10 |
nicklas |
305 |
String[] data = ffp.nextData().data(); |
5221 |
22 Jan 10 |
nicklas |
// Verify number of columns |
5221 |
22 Jan 10 |
nicklas |
307 |
if (data.length != numColumns) |
5221 |
22 Jan 10 |
nicklas |
308 |
{ |
5221 |
22 Jan 10 |
nicklas |
309 |
throw new IOException("Expected " + numColumns + " columns on line " + |
5221 |
22 Jan 10 |
nicklas |
310 |
ffp.getParsedLines() + " in file '" + getFilename() + |
5221 |
22 Jan 10 |
nicklas |
311 |
"'; found " + data.length + " columns"); |
5221 |
22 Jan 10 |
nicklas |
312 |
} |
5193 |
27 Nov 09 |
nicklas |
313 |
|
5221 |
22 Jan 10 |
nicklas |
// Verify that id is a unique positive integer |
5221 |
22 Jan 10 |
nicklas |
315 |
Integer id = Values.getInteger(data[0], 0); |
5221 |
22 Jan 10 |
nicklas |
316 |
if (id <= 0) |
5221 |
22 Jan 10 |
nicklas |
317 |
{ |
5221 |
22 Jan 10 |
nicklas |
318 |
throw new IOException("ID must be > 0 on line " + ffp.getParsedLines() + " in file '" + |
5221 |
22 Jan 10 |
nicklas |
319 |
getFilename() + "': " + data[0]); |
5221 |
22 Jan 10 |
nicklas |
320 |
} |
5221 |
22 Jan 10 |
nicklas |
321 |
if (usedIds.contains(id)) |
5221 |
22 Jan 10 |
nicklas |
322 |
{ |
5221 |
22 Jan 10 |
nicklas |
323 |
throw new IOException("Duplicate ID on line " + ffp.getParsedLines() + " in file '" + |
5221 |
22 Jan 10 |
nicklas |
324 |
getFilename() + ": " + id); |
5221 |
22 Jan 10 |
nicklas |
325 |
} |
5221 |
22 Jan 10 |
nicklas |
326 |
usedIds.add(id); |
5193 |
27 Nov 09 |
nicklas |
327 |
|
5221 |
22 Jan 10 |
nicklas |
// Decode the values |
5221 |
22 Jan 10 |
nicklas |
329 |
for (int j = 1; j < numColumns; ++j) |
5221 |
22 Jan 10 |
nicklas |
330 |
{ |
5221 |
22 Jan 10 |
nicklas |
331 |
data[j] = decodeValue(data[j]); |
5193 |
27 Nov 09 |
nicklas |
332 |
} |
5221 |
22 Jan 10 |
nicklas |
333 |
return data; |
5193 |
27 Nov 09 |
nicklas |
334 |
} |
5193 |
27 Nov 09 |
nicklas |
335 |
|
5224 |
27 Jan 10 |
nicklas |
336 |
@Override |
5224 |
27 Jan 10 |
nicklas |
337 |
public int getCurrentLine() |
5224 |
27 Jan 10 |
nicklas |
338 |
{ |
5224 |
27 Jan 10 |
nicklas |
339 |
return ffp == null ? -1 : ffp.getParsedLines(); |
5224 |
27 Jan 10 |
nicklas |
340 |
} |
5224 |
27 Jan 10 |
nicklas |
341 |
|
5225 |
29 Jan 10 |
nicklas |
342 |
@Override |
5225 |
29 Jan 10 |
nicklas |
343 |
public long getParsedBytes() |
5225 |
29 Jan 10 |
nicklas |
344 |
{ |
5225 |
29 Jan 10 |
nicklas |
345 |
return ffp == null ? -1 : ffp.getParsedBytes(); |
5225 |
29 Jan 10 |
nicklas |
346 |
} |
5225 |
29 Jan 10 |
nicklas |
347 |
|
5193 |
27 Nov 09 |
nicklas |
348 |
/** |
5193 |
27 Nov 09 |
nicklas |
Create a new flat file parser that can parse BFS annotation |
5193 |
27 Nov 09 |
nicklas |
files. This method should set all regular expressions that |
5193 |
27 Nov 09 |
nicklas |
are needed to parse the stream (assumed to be UTF-8). |
5193 |
27 Nov 09 |
nicklas |
352 |
*/ |
5193 |
27 Nov 09 |
nicklas |
353 |
protected FlatFileParser createFlatFileParser(InputStream in) |
5193 |
27 Nov 09 |
nicklas |
354 |
{ |
5193 |
27 Nov 09 |
nicklas |
355 |
FlatFileParser ffp = new FlatFileParser(); |
5193 |
27 Nov 09 |
nicklas |
356 |
ffp.setDataHeaderRegexp(Pattern.compile("ID.*")); |
5193 |
27 Nov 09 |
nicklas |
357 |
ffp.setDataSplitterRegexp(Pattern.compile("\\t")); |
5193 |
27 Nov 09 |
nicklas |
358 |
ffp.setMaxUnknownLines(0); |
5193 |
27 Nov 09 |
nicklas |
359 |
ffp.setInputStream(in, "UTF-8"); |
5193 |
27 Nov 09 |
nicklas |
360 |
return ffp; |
5193 |
27 Nov 09 |
nicklas |
361 |
} |
5193 |
27 Nov 09 |
nicklas |
362 |
|
5193 |
27 Nov 09 |
nicklas |
363 |
/** |
5193 |
27 Nov 09 |
nicklas |
Handle the header-found event. The default implemention sends |
5193 |
27 Nov 09 |
nicklas |
a {@link #HEADER_EVENT} to the event handler |
5221 |
22 Jan 10 |
nicklas |
@param handler The event handler from the {@link #parse(EventHandler)} method |
5193 |
27 Nov 09 |
nicklas |
@param headers An array with the column headers |
5193 |
27 Nov 09 |
nicklas |
368 |
*/ |
5193 |
27 Nov 09 |
nicklas |
369 |
protected void handleHeader(EventHandler handler, String[] headers) |
5193 |
27 Nov 09 |
nicklas |
370 |
{ |
5224 |
27 Jan 10 |
nicklas |
371 |
handler.handleEvent(HEADER_EVENT, headers, this); |
5193 |
27 Nov 09 |
nicklas |
372 |
} |
5193 |
27 Nov 09 |
nicklas |
373 |
|
5193 |
27 Nov 09 |
nicklas |
374 |
/** |
5193 |
27 Nov 09 |
nicklas |
Handle the data event. The default implemention sends |
5193 |
27 Nov 09 |
nicklas |
a {@link #DATA_EVENT} to the event handler |
5221 |
22 Jan 10 |
nicklas |
@param handler The event handler from the {@link #parse(EventHandler)} method |
5193 |
27 Nov 09 |
nicklas |
@param data An array with the data |
5193 |
27 Nov 09 |
nicklas |
379 |
*/ |
5193 |
27 Nov 09 |
nicklas |
380 |
protected void handleData(EventHandler handler, String[] data) |
5193 |
27 Nov 09 |
nicklas |
381 |
{ |
5224 |
27 Jan 10 |
nicklas |
382 |
handler.handleEvent(DATA_EVENT, data, this); |
5193 |
27 Nov 09 |
nicklas |
383 |
} |
5193 |
27 Nov 09 |
nicklas |
384 |
|
5193 |
27 Nov 09 |
nicklas |
385 |
/** |
5193 |
27 Nov 09 |
nicklas |
Handle the end-of-file event. The default implementation sends an |
5193 |
27 Nov 09 |
nicklas |
{@link #END_OF_FILE_EVENT} notification to the event handler. |
5221 |
22 Jan 10 |
nicklas |
@param handler The event handler from the {@link #parse(EventHandler)} method |
5193 |
27 Nov 09 |
nicklas |
389 |
*/ |
5193 |
27 Nov 09 |
nicklas |
390 |
protected void handleEndOfFile(EventHandler handler) |
5193 |
27 Nov 09 |
nicklas |
391 |
{ |
5224 |
27 Jan 10 |
nicklas |
392 |
handler.handleEvent(END_OF_FILE_EVENT, null, this); |
5193 |
27 Nov 09 |
nicklas |
393 |
} |
5193 |
27 Nov 09 |
nicklas |
394 |
|
5193 |
27 Nov 09 |
nicklas |
395 |
} |