5193 |
27 Nov 09 |
nicklas |
1 |
/** |
5193 |
27 Nov 09 |
nicklas |
$Id$ |
5193 |
27 Nov 09 |
nicklas |
3 |
|
5193 |
27 Nov 09 |
nicklas |
Copyright (C) 2009 Nicklas Nordborg |
5193 |
27 Nov 09 |
nicklas |
5 |
|
5193 |
27 Nov 09 |
nicklas |
This file is part of BASE - BioArray Software Environment. |
5193 |
27 Nov 09 |
nicklas |
Available at http://base.thep.lu.se/ |
5193 |
27 Nov 09 |
nicklas |
8 |
|
5193 |
27 Nov 09 |
nicklas |
BASE is free software; you can redistribute it and/or |
5193 |
27 Nov 09 |
nicklas |
modify it under the terms of the GNU General Public License |
5193 |
27 Nov 09 |
nicklas |
as published by the Free Software Foundation; either version 3 |
5193 |
27 Nov 09 |
nicklas |
of the License, or (at your option) any later version. |
5193 |
27 Nov 09 |
nicklas |
13 |
|
5193 |
27 Nov 09 |
nicklas |
BASE is distributed in the hope that it will be useful, |
5193 |
27 Nov 09 |
nicklas |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
5193 |
27 Nov 09 |
nicklas |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
5193 |
27 Nov 09 |
nicklas |
GNU General Public License for more details. |
5193 |
27 Nov 09 |
nicklas |
18 |
|
5193 |
27 Nov 09 |
nicklas |
You should have received a copy of the GNU General Public License |
5193 |
27 Nov 09 |
nicklas |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
5193 |
27 Nov 09 |
nicklas |
21 |
*/ |
5193 |
27 Nov 09 |
nicklas |
22 |
package net.sf.basedb.util.bfs; |
5193 |
27 Nov 09 |
nicklas |
23 |
|
5319 |
20 Apr 10 |
nicklas |
24 |
import java.io.FileInputStream; |
5193 |
27 Nov 09 |
nicklas |
25 |
import java.io.IOException; |
5193 |
27 Nov 09 |
nicklas |
26 |
import java.io.InputStream; |
5221 |
22 Jan 10 |
nicklas |
27 |
import java.util.LinkedList; |
5221 |
22 Jan 10 |
nicklas |
28 |
import java.util.List; |
5193 |
27 Nov 09 |
nicklas |
29 |
import java.util.regex.Pattern; |
5193 |
27 Nov 09 |
nicklas |
30 |
|
5224 |
27 Jan 10 |
nicklas |
31 |
import net.sf.basedb.core.File; |
5225 |
29 Jan 10 |
nicklas |
32 |
import net.sf.basedb.core.signal.ThreadSignalHandler; |
5319 |
20 Apr 10 |
nicklas |
33 |
import net.sf.basedb.util.FileUtil; |
5193 |
27 Nov 09 |
nicklas |
34 |
import net.sf.basedb.util.encode.EncoderDecoder; |
5193 |
27 Nov 09 |
nicklas |
35 |
import net.sf.basedb.util.encode.TabCrLfEncoderDecoder; |
5193 |
27 Nov 09 |
nicklas |
36 |
import net.sf.basedb.util.parser.FlatFileParser; |
5193 |
27 Nov 09 |
nicklas |
37 |
|
5193 |
27 Nov 09 |
nicklas |
38 |
/** |
5221 |
22 Jan 10 |
nicklas |
Parser implementation that parses a BFS metadata file. Before parsing |
5221 |
22 Jan 10 |
nicklas |
is started the stream to parse must be specified by calling |
5221 |
22 Jan 10 |
nicklas |
{@link #setInputStream(InputStream)}. The actual parsing can be done |
5221 |
22 Jan 10 |
nicklas |
in two different ways: |
5193 |
27 Nov 09 |
nicklas |
43 |
|
5193 |
27 Nov 09 |
nicklas |
<ul> |
5221 |
22 Jan 10 |
nicklas |
<li>"Manual" parsing, using the {@link #parseToBof()}, {@link #nextSection()} |
5221 |
22 Jan 10 |
nicklas |
and {@link #nextEntry()} methods. |
5221 |
22 Jan 10 |
nicklas |
<li>Event-based parsing using the {@link #parse(EventHandler)} method. |
5221 |
22 Jan 10 |
nicklas |
This parser issues the following events: |
5221 |
22 Jan 10 |
nicklas |
<ul> |
5221 |
22 Jan 10 |
nicklas |
<li>{@link #BOF_EVENT}: When the beginning-of-file marker is found. |
5221 |
22 Jan 10 |
nicklas |
<li>{@link #SECTION_EVENT}: When the start of a new section is found. |
5221 |
22 Jan 10 |
nicklas |
<li>{@link #ENTRY_EVENT}: For each entry in a section. |
5221 |
22 Jan 10 |
nicklas |
<li>{@link #SECTION_END_EVENT}: After the last entry in a section. |
5221 |
22 Jan 10 |
nicklas |
<li>{@link #END_OF_FILE_EVENT}: When the end of the file has been reached. |
5221 |
22 Jan 10 |
nicklas |
</ul> |
5221 |
22 Jan 10 |
nicklas |
The {@link MetadataModel} class implements a simple event handler that |
5221 |
22 Jan 10 |
nicklas |
collects the information from the metadata file and provides methods |
5221 |
22 Jan 10 |
nicklas |
for accessing it. |
5193 |
27 Nov 09 |
nicklas |
</ul> |
5221 |
22 Jan 10 |
nicklas |
60 |
|
5193 |
27 Nov 09 |
nicklas |
<p> |
5193 |
27 Nov 09 |
nicklas |
This class may be subclassed to provide customized behaviour. |
5193 |
27 Nov 09 |
nicklas |
63 |
|
5193 |
27 Nov 09 |
nicklas |
@author Nicklas |
5193 |
27 Nov 09 |
nicklas |
@version 2.15 |
5193 |
27 Nov 09 |
nicklas |
@base.modified $Date$ |
5193 |
27 Nov 09 |
nicklas |
67 |
*/ |
5193 |
27 Nov 09 |
nicklas |
68 |
public class MetadataParser |
5224 |
27 Jan 10 |
nicklas |
69 |
implements BfsParser |
5193 |
27 Nov 09 |
nicklas |
70 |
{ |
5193 |
27 Nov 09 |
nicklas |
71 |
|
5193 |
27 Nov 09 |
nicklas |
72 |
/** |
5193 |
27 Nov 09 |
nicklas |
Event type that is issued when the beginning-of-file marker is found. |
5193 |
27 Nov 09 |
nicklas |
The event data is a string with the BFS subtype, or null if no subtype |
5193 |
27 Nov 09 |
nicklas |
was defined. |
5193 |
27 Nov 09 |
nicklas |
76 |
*/ |
5193 |
27 Nov 09 |
nicklas |
77 |
public static final EventType<String> BOF_EVENT = |
5193 |
27 Nov 09 |
nicklas |
78 |
new EventType<String>("metadata.bof"); |
5193 |
27 Nov 09 |
nicklas |
79 |
|
5193 |
27 Nov 09 |
nicklas |
80 |
/** |
5193 |
27 Nov 09 |
nicklas |
Event type that is issued for each new section that is found in the file. |
5193 |
27 Nov 09 |
nicklas |
The event data is a string with the name (decoded) of the section. |
5193 |
27 Nov 09 |
nicklas |
83 |
*/ |
5193 |
27 Nov 09 |
nicklas |
84 |
public static final EventType<String> SECTION_EVENT = |
5193 |
27 Nov 09 |
nicklas |
85 |
new EventType<String>("metadata.section"); |
5193 |
27 Nov 09 |
nicklas |
86 |
|
5193 |
27 Nov 09 |
nicklas |
87 |
/** |
5193 |
27 Nov 09 |
nicklas |
Event type that is issued for each entry in a section in the order they |
5193 |
27 Nov 09 |
nicklas |
are present in the file. The event data is a {@link MetadataModel.SectionEntry} |
5193 |
27 Nov 09 |
nicklas |
object with the decoded key and values. |
5193 |
27 Nov 09 |
nicklas |
91 |
*/ |
5193 |
27 Nov 09 |
nicklas |
92 |
public static final EventType<MetadataModel.SectionEntry> ENTRY_EVENT = |
5193 |
27 Nov 09 |
nicklas |
93 |
new EventType<MetadataModel.SectionEntry>("metadata.entry"); |
5193 |
27 Nov 09 |
nicklas |
94 |
|
5193 |
27 Nov 09 |
nicklas |
95 |
/** |
5193 |
27 Nov 09 |
nicklas |
Event type that is issued at the end of each section in the file. |
5193 |
27 Nov 09 |
nicklas |
No event data is submitted. |
5193 |
27 Nov 09 |
nicklas |
98 |
*/ |
5193 |
27 Nov 09 |
nicklas |
99 |
public static final EventType<String> SECTION_END_EVENT = |
5193 |
27 Nov 09 |
nicklas |
100 |
new EventType<String>("metadata.section.end"); |
5193 |
27 Nov 09 |
nicklas |
101 |
|
5193 |
27 Nov 09 |
nicklas |
102 |
/** |
5193 |
27 Nov 09 |
nicklas |
Event type that is issued when the end-of-file had been reached. |
5193 |
27 Nov 09 |
nicklas |
No event data is submitted. |
5193 |
27 Nov 09 |
nicklas |
105 |
*/ |
5193 |
27 Nov 09 |
nicklas |
106 |
public static final EventType<Object> END_OF_FILE_EVENT = |
5193 |
27 Nov 09 |
nicklas |
107 |
new EventType<Object>("metadata.end"); |
5193 |
27 Nov 09 |
nicklas |
108 |
|
5193 |
27 Nov 09 |
nicklas |
109 |
|
5224 |
27 Jan 10 |
nicklas |
110 |
/** |
5224 |
27 Jan 10 |
nicklas |
Utility method for creating a metadata parser when you have an |
5224 |
27 Jan 10 |
nicklas |
input stream. |
5224 |
27 Jan 10 |
nicklas |
@param in The input stream the parser should read from |
5224 |
27 Jan 10 |
nicklas |
@param filename Optional, the name of the file the input stream |
5224 |
27 Jan 10 |
nicklas |
is reading from |
5224 |
27 Jan 10 |
nicklas |
116 |
*/ |
5225 |
29 Jan 10 |
nicklas |
117 |
public static MetadataParser create(InputStream in, String filename, long fileSize) |
5224 |
27 Jan 10 |
nicklas |
118 |
{ |
5224 |
27 Jan 10 |
nicklas |
119 |
MetadataParser parser = new MetadataParser(); |
5224 |
27 Jan 10 |
nicklas |
120 |
parser.setFilename(filename); |
5224 |
27 Jan 10 |
nicklas |
121 |
parser.setInputStream(in); |
5225 |
29 Jan 10 |
nicklas |
122 |
parser.setFileSize(fileSize); |
5224 |
27 Jan 10 |
nicklas |
123 |
return parser; |
5224 |
27 Jan 10 |
nicklas |
124 |
} |
5224 |
27 Jan 10 |
nicklas |
125 |
|
5224 |
27 Jan 10 |
nicklas |
126 |
/** |
5224 |
27 Jan 10 |
nicklas |
Utility method for creating a metadata parser for a file in the BASE |
5224 |
27 Jan 10 |
nicklas |
file system. |
5224 |
27 Jan 10 |
nicklas |
@param file The file in the BASE file system |
5224 |
27 Jan 10 |
nicklas |
130 |
*/ |
5224 |
27 Jan 10 |
nicklas |
131 |
public static MetadataParser create(File file) |
5224 |
27 Jan 10 |
nicklas |
132 |
{ |
5225 |
29 Jan 10 |
nicklas |
133 |
return create(file.getDownloadStream(0), file.getName(), file.getSize()); |
5224 |
27 Jan 10 |
nicklas |
134 |
} |
5224 |
27 Jan 10 |
nicklas |
135 |
|
5319 |
20 Apr 10 |
nicklas |
136 |
/** |
5319 |
20 Apr 10 |
nicklas |
Utility method for creating a metadata parser for a file in the local |
5319 |
20 Apr 10 |
nicklas |
file system. |
5319 |
20 Apr 10 |
nicklas |
@param file The file in the local file system |
5319 |
20 Apr 10 |
nicklas |
140 |
*/ |
5319 |
20 Apr 10 |
nicklas |
141 |
public static MetadataParser create(java.io.File file) |
5319 |
20 Apr 10 |
nicklas |
142 |
throws IOException |
5319 |
20 Apr 10 |
nicklas |
143 |
{ |
5319 |
20 Apr 10 |
nicklas |
144 |
return create(new FileInputStream(file), file.getName(), file.length()); |
5319 |
20 Apr 10 |
nicklas |
145 |
} |
5319 |
20 Apr 10 |
nicklas |
146 |
|
5193 |
27 Nov 09 |
nicklas |
147 |
private final EncoderDecoder decoder; |
5221 |
22 Jan 10 |
nicklas |
148 |
private InputStream in; |
5221 |
22 Jan 10 |
nicklas |
149 |
private String filename; |
5225 |
29 Jan 10 |
nicklas |
150 |
private long fileSize; |
5221 |
22 Jan 10 |
nicklas |
151 |
private FlatFileParser ffp; |
5221 |
22 Jan 10 |
nicklas |
152 |
private List<MetadataModel.SectionEntry> sectionEntries; |
5193 |
27 Nov 09 |
nicklas |
153 |
|
5193 |
27 Nov 09 |
nicklas |
154 |
/** |
5193 |
27 Nov 09 |
nicklas |
Create a new metadata parser. |
5193 |
27 Nov 09 |
nicklas |
156 |
*/ |
5193 |
27 Nov 09 |
nicklas |
157 |
public MetadataParser() |
5193 |
27 Nov 09 |
nicklas |
158 |
{ |
5193 |
27 Nov 09 |
nicklas |
159 |
this.decoder = new TabCrLfEncoderDecoder(true); |
5193 |
27 Nov 09 |
nicklas |
160 |
} |
5193 |
27 Nov 09 |
nicklas |
161 |
|
5193 |
27 Nov 09 |
nicklas |
162 |
/** |
5193 |
27 Nov 09 |
nicklas |
Decode an encoded value. Metadata values are encoded/decoded |
5193 |
27 Nov 09 |
nicklas |
with a {@link TabCrLfEncoderDecoder}. |
5193 |
27 Nov 09 |
nicklas |
165 |
|
5193 |
27 Nov 09 |
nicklas |
@param value The encoded value |
5193 |
27 Nov 09 |
nicklas |
@return The decoded value |
5193 |
27 Nov 09 |
nicklas |
168 |
*/ |
5193 |
27 Nov 09 |
nicklas |
169 |
public String decodeValue(String value) |
5193 |
27 Nov 09 |
nicklas |
170 |
{ |
5193 |
27 Nov 09 |
nicklas |
171 |
return decoder.decode(value); |
5193 |
27 Nov 09 |
nicklas |
172 |
} |
5193 |
27 Nov 09 |
nicklas |
173 |
|
5193 |
27 Nov 09 |
nicklas |
174 |
/** |
5221 |
22 Jan 10 |
nicklas |
Get the file name that this writer is printing to. |
5221 |
22 Jan 10 |
nicklas |
@return The file name or null if not known |
5221 |
22 Jan 10 |
nicklas |
177 |
*/ |
5224 |
27 Jan 10 |
nicklas |
178 |
@Override |
5221 |
22 Jan 10 |
nicklas |
179 |
public String getFilename() |
5221 |
22 Jan 10 |
nicklas |
180 |
{ |
5221 |
22 Jan 10 |
nicklas |
181 |
return filename; |
5221 |
22 Jan 10 |
nicklas |
182 |
} |
5221 |
22 Jan 10 |
nicklas |
183 |
|
5221 |
22 Jan 10 |
nicklas |
184 |
/** |
5221 |
22 Jan 10 |
nicklas |
Set the file name that this writer is printing to. |
5221 |
22 Jan 10 |
nicklas |
186 |
*/ |
5221 |
22 Jan 10 |
nicklas |
187 |
public void setFilename(String filename) |
5221 |
22 Jan 10 |
nicklas |
188 |
{ |
5221 |
22 Jan 10 |
nicklas |
189 |
this.filename = filename; |
5221 |
22 Jan 10 |
nicklas |
190 |
} |
5221 |
22 Jan 10 |
nicklas |
191 |
|
5221 |
22 Jan 10 |
nicklas |
192 |
/** |
5225 |
29 Jan 10 |
nicklas |
Get the size in bytes of the file that this parser is reading from. |
5225 |
29 Jan 10 |
nicklas |
@return The size or -1 if not known |
5225 |
29 Jan 10 |
nicklas |
195 |
*/ |
5225 |
29 Jan 10 |
nicklas |
196 |
@Override |
5225 |
29 Jan 10 |
nicklas |
197 |
public long getFileSize() |
5225 |
29 Jan 10 |
nicklas |
198 |
{ |
5225 |
29 Jan 10 |
nicklas |
199 |
return fileSize; |
5225 |
29 Jan 10 |
nicklas |
200 |
} |
5225 |
29 Jan 10 |
nicklas |
201 |
|
5225 |
29 Jan 10 |
nicklas |
202 |
/** |
5225 |
29 Jan 10 |
nicklas |
Set the size of the file or -1 if not known. |
5225 |
29 Jan 10 |
nicklas |
204 |
*/ |
5225 |
29 Jan 10 |
nicklas |
205 |
public void setFileSize(long fileSize) |
5225 |
29 Jan 10 |
nicklas |
206 |
{ |
5225 |
29 Jan 10 |
nicklas |
207 |
this.fileSize = fileSize; |
5225 |
29 Jan 10 |
nicklas |
208 |
} |
5225 |
29 Jan 10 |
nicklas |
209 |
|
5225 |
29 Jan 10 |
nicklas |
210 |
|
5319 |
20 Apr 10 |
nicklas |
211 |
@Override |
5319 |
20 Apr 10 |
nicklas |
212 |
public void close() |
5319 |
20 Apr 10 |
nicklas |
213 |
{ |
5319 |
20 Apr 10 |
nicklas |
214 |
if (in != null) FileUtil.close(in); |
5319 |
20 Apr 10 |
nicklas |
215 |
} |
5319 |
20 Apr 10 |
nicklas |
216 |
|
5225 |
29 Jan 10 |
nicklas |
217 |
/** |
5221 |
22 Jan 10 |
nicklas |
Set the input stream that should be parsed. This will also reset the |
5221 |
22 Jan 10 |
nicklas |
parser. All information from a previously parsed file is lost. |
5221 |
22 Jan 10 |
nicklas |
@param in The stream to parse (in UTF-8 format) |
5221 |
22 Jan 10 |
nicklas |
221 |
*/ |
5221 |
22 Jan 10 |
nicklas |
222 |
public void setInputStream(InputStream in) |
5221 |
22 Jan 10 |
nicklas |
223 |
{ |
5319 |
20 Apr 10 |
nicklas |
224 |
close(); |
5221 |
22 Jan 10 |
nicklas |
225 |
this.in = in; |
5221 |
22 Jan 10 |
nicklas |
226 |
this.ffp = null; |
5221 |
22 Jan 10 |
nicklas |
227 |
this.sectionEntries = null; |
5221 |
22 Jan 10 |
nicklas |
228 |
} |
5221 |
22 Jan 10 |
nicklas |
229 |
|
5221 |
22 Jan 10 |
nicklas |
230 |
/** |
5193 |
27 Nov 09 |
nicklas |
Parse the input stream and notify the specified event handler with events. |
5193 |
27 Nov 09 |
nicklas |
The event handler is responsible for keeping track of and storing the |
5193 |
27 Nov 09 |
nicklas |
data of interest. |
5193 |
27 Nov 09 |
nicklas |
234 |
|
5193 |
27 Nov 09 |
nicklas |
@param handler An event handler |
5193 |
27 Nov 09 |
nicklas |
@throws IOException If there is an error reading the stream data |
5193 |
27 Nov 09 |
nicklas |
@throws NullPointerException If the stream or event handler is null |
5193 |
27 Nov 09 |
nicklas |
238 |
*/ |
5221 |
22 Jan 10 |
nicklas |
239 |
public void parse(EventHandler handler) |
5193 |
27 Nov 09 |
nicklas |
240 |
throws IOException |
5193 |
27 Nov 09 |
nicklas |
241 |
{ |
5193 |
27 Nov 09 |
nicklas |
242 |
if (handler == null) throw new NullPointerException("handler"); |
5193 |
27 Nov 09 |
nicklas |
243 |
|
5221 |
22 Jan 10 |
nicklas |
244 |
String subType = parseToBof(); |
5221 |
22 Jan 10 |
nicklas |
245 |
handleBof(handler, subType); |
5221 |
22 Jan 10 |
nicklas |
246 |
|
5221 |
22 Jan 10 |
nicklas |
// Repeat as long as there are more sections in the file |
5221 |
22 Jan 10 |
nicklas |
248 |
String section = nextSection(); |
5221 |
22 Jan 10 |
nicklas |
249 |
while (section != null) |
5221 |
22 Jan 10 |
nicklas |
250 |
{ |
5225 |
29 Jan 10 |
nicklas |
251 |
ThreadSignalHandler.checkInterrupted(); |
5221 |
22 Jan 10 |
nicklas |
252 |
handleSection(handler, section); |
5221 |
22 Jan 10 |
nicklas |
253 |
MetadataModel.SectionEntry entry = nextEntry(); |
5221 |
22 Jan 10 |
nicklas |
254 |
while (entry != null) |
5221 |
22 Jan 10 |
nicklas |
255 |
{ |
5221 |
22 Jan 10 |
nicklas |
256 |
handleEntry(handler, entry); |
5221 |
22 Jan 10 |
nicklas |
257 |
entry = nextEntry(); |
5221 |
22 Jan 10 |
nicklas |
258 |
} |
5221 |
22 Jan 10 |
nicklas |
259 |
handleSectionEnd(handler, section); |
5221 |
22 Jan 10 |
nicklas |
260 |
section = nextSection(); |
5221 |
22 Jan 10 |
nicklas |
261 |
} |
5221 |
22 Jan 10 |
nicklas |
262 |
handleEndOfFile(handler); |
5221 |
22 Jan 10 |
nicklas |
263 |
} |
5221 |
22 Jan 10 |
nicklas |
264 |
|
5221 |
22 Jan 10 |
nicklas |
265 |
/** |
5221 |
22 Jan 10 |
nicklas |
Parse the start of the file until the beginning-of-file marker |
5221 |
22 Jan 10 |
nicklas |
(BFSfile) is found. This method can only be called once on |
5221 |
22 Jan 10 |
nicklas |
a given input stream. To parse a second file, call {@link |
5221 |
22 Jan 10 |
nicklas |
#setInputStream(InputStream)} again to reset the parser. |
5221 |
22 Jan 10 |
nicklas |
270 |
|
5221 |
22 Jan 10 |
nicklas |
@return The subtype of the BFS or null if no subtype is found |
5221 |
22 Jan 10 |
nicklas |
@throws IOException If the beginning-of-file marker is not found |
5221 |
22 Jan 10 |
nicklas |
or if there is any other IO error |
5221 |
22 Jan 10 |
nicklas |
@throws IllegalStateException If the parsing has already started |
5221 |
22 Jan 10 |
nicklas |
@throws NullPointerException If no input stream has been specified |
5221 |
22 Jan 10 |
nicklas |
276 |
*/ |
5221 |
22 Jan 10 |
nicklas |
277 |
public String parseToBof() |
5221 |
22 Jan 10 |
nicklas |
278 |
throws IOException |
5221 |
22 Jan 10 |
nicklas |
279 |
{ |
5221 |
22 Jan 10 |
nicklas |
280 |
if (in == null) throw new NullPointerException("inputStream"); |
5221 |
22 Jan 10 |
nicklas |
281 |
if (ffp != null) |
5221 |
22 Jan 10 |
nicklas |
282 |
{ |
5221 |
22 Jan 10 |
nicklas |
283 |
throw new IllegalStateException("Parsing of file '" + getFilename() + |
5221 |
22 Jan 10 |
nicklas |
284 |
"' has already started. Call setInputStream() to reset the parser."); |
5221 |
22 Jan 10 |
nicklas |
285 |
} |
5221 |
22 Jan 10 |
nicklas |
286 |
ffp = createFlatFileParser(in); |
5193 |
27 Nov 09 |
nicklas |
// Parse to the beginning-of-file marker |
5193 |
27 Nov 09 |
nicklas |
288 |
if (!ffp.parseToBof()) |
5193 |
27 Nov 09 |
nicklas |
289 |
{ |
5193 |
27 Nov 09 |
nicklas |
290 |
throw new IOException("Can't find 'BFSformat' in file"); |
5193 |
27 Nov 09 |
nicklas |
291 |
} |
5221 |
22 Jan 10 |
nicklas |
292 |
return decodeValue(ffp.getBofType()); |
5221 |
22 Jan 10 |
nicklas |
293 |
} |
5221 |
22 Jan 10 |
nicklas |
294 |
|
5221 |
22 Jan 10 |
nicklas |
295 |
/** |
5221 |
22 Jan 10 |
nicklas |
Parse to the next section in the metadata file. This method can be called |
5221 |
22 Jan 10 |
nicklas |
repeatedly until null is returned which indicates that the end of file has |
5221 |
22 Jan 10 |
nicklas |
been reached. |
5193 |
27 Nov 09 |
nicklas |
299 |
|
5221 |
22 Jan 10 |
nicklas |
@return The name of the section, or null if no more sections could be found |
5221 |
22 Jan 10 |
nicklas |
@throws IOException If there is an IO error during the parsing |
5221 |
22 Jan 10 |
nicklas |
@throws IllegalStateException If the parsing has not been started. |
5221 |
22 Jan 10 |
nicklas |
{@link #parseToBof()} must be called before this method is called. |
5221 |
22 Jan 10 |
nicklas |
304 |
*/ |
5221 |
22 Jan 10 |
nicklas |
305 |
public String nextSection() |
5221 |
22 Jan 10 |
nicklas |
306 |
throws IOException |
5221 |
22 Jan 10 |
nicklas |
307 |
{ |
5221 |
22 Jan 10 |
nicklas |
308 |
if (ffp == null) |
5193 |
27 Nov 09 |
nicklas |
309 |
{ |
5221 |
22 Jan 10 |
nicklas |
310 |
throw new IllegalStateException("Parsing of file '" + getFilename() + |
5221 |
22 Jan 10 |
nicklas |
311 |
"' hasn't started. Call parseToBof() to begin parsing."); |
5221 |
22 Jan 10 |
nicklas |
312 |
} |
5221 |
22 Jan 10 |
nicklas |
313 |
if (!ffp.hasMoreSections()) return null; |
5221 |
22 Jan 10 |
nicklas |
314 |
|
5221 |
22 Jan 10 |
nicklas |
315 |
FlatFileParser.Line sectionLine = ffp.nextSection(); |
5221 |
22 Jan 10 |
nicklas |
316 |
String section = decodeValue(sectionLine.name()); |
5221 |
22 Jan 10 |
nicklas |
317 |
ffp.parseHeaders(); |
5221 |
22 Jan 10 |
nicklas |
318 |
sectionEntries = new LinkedList<MetadataModel.SectionEntry>(); |
5221 |
22 Jan 10 |
nicklas |
319 |
for (FlatFileParser.Line l : ffp.getLines()) |
5221 |
22 Jan 10 |
nicklas |
320 |
{ |
5221 |
22 Jan 10 |
nicklas |
321 |
if (l.type() == FlatFileParser.LineType.HEADER) |
5193 |
27 Nov 09 |
nicklas |
322 |
{ |
5221 |
22 Jan 10 |
nicklas |
323 |
String key = decodeValue(l.name()); |
5221 |
22 Jan 10 |
nicklas |
324 |
String[] values = l.value().split("\\t", -1); |
5221 |
22 Jan 10 |
nicklas |
325 |
for (int i = 0; i < values.length; ++i) |
5193 |
27 Nov 09 |
nicklas |
326 |
{ |
5221 |
22 Jan 10 |
nicklas |
327 |
values[i] = decodeValue(values[i]); |
5193 |
27 Nov 09 |
nicklas |
328 |
} |
5221 |
22 Jan 10 |
nicklas |
329 |
sectionEntries.add(new MetadataModel.SectionEntry(key, values)); |
5193 |
27 Nov 09 |
nicklas |
330 |
} |
5193 |
27 Nov 09 |
nicklas |
331 |
} |
5221 |
22 Jan 10 |
nicklas |
332 |
return section; |
5193 |
27 Nov 09 |
nicklas |
333 |
} |
5193 |
27 Nov 09 |
nicklas |
334 |
|
5193 |
27 Nov 09 |
nicklas |
335 |
/** |
5221 |
22 Jan 10 |
nicklas |
Get the next entry in the current section. This method can be called |
5221 |
22 Jan 10 |
nicklas |
repeatedly until null is returned which indicates that the end of the |
5221 |
22 Jan 10 |
nicklas |
section has been reached. |
5221 |
22 Jan 10 |
nicklas |
339 |
|
5221 |
22 Jan 10 |
nicklas |
@return The key and values or the entry, or null if no more entries could be found |
5221 |
22 Jan 10 |
nicklas |
@throws IOException If there is an IO error during the parsing |
5221 |
22 Jan 10 |
nicklas |
@throws IllegalStateException If the parsing has not been started. |
5221 |
22 Jan 10 |
nicklas |
{@link #parseToBof()} must be called before this method is called. |
5221 |
22 Jan 10 |
nicklas |
344 |
*/ |
5221 |
22 Jan 10 |
nicklas |
345 |
public MetadataModel.SectionEntry nextEntry() |
5221 |
22 Jan 10 |
nicklas |
346 |
throws IOException |
5221 |
22 Jan 10 |
nicklas |
347 |
{ |
5221 |
22 Jan 10 |
nicklas |
348 |
if (ffp == null) |
5221 |
22 Jan 10 |
nicklas |
349 |
{ |
5221 |
22 Jan 10 |
nicklas |
350 |
throw new IllegalStateException("Parsing of file '" + getFilename() + |
5221 |
22 Jan 10 |
nicklas |
351 |
"' hasn't started. Call parseToBof() to begin parsing."); |
5221 |
22 Jan 10 |
nicklas |
352 |
} |
5221 |
22 Jan 10 |
nicklas |
353 |
if (sectionEntries == null || sectionEntries.size() == 0) return null; |
5221 |
22 Jan 10 |
nicklas |
354 |
MetadataModel.SectionEntry entry = sectionEntries.remove(0); |
5221 |
22 Jan 10 |
nicklas |
355 |
return entry; |
5221 |
22 Jan 10 |
nicklas |
356 |
} |
5221 |
22 Jan 10 |
nicklas |
357 |
|
5224 |
27 Jan 10 |
nicklas |
358 |
@Override |
5224 |
27 Jan 10 |
nicklas |
359 |
public int getCurrentLine() |
5224 |
27 Jan 10 |
nicklas |
360 |
{ |
5224 |
27 Jan 10 |
nicklas |
361 |
return ffp == null ? -1 : ffp.getParsedLines(); |
5224 |
27 Jan 10 |
nicklas |
362 |
} |
5224 |
27 Jan 10 |
nicklas |
363 |
|
5225 |
29 Jan 10 |
nicklas |
364 |
@Override |
5225 |
29 Jan 10 |
nicklas |
365 |
public long getParsedBytes() |
5225 |
29 Jan 10 |
nicklas |
366 |
{ |
5225 |
29 Jan 10 |
nicklas |
367 |
return ffp == null ? -1 : ffp.getParsedBytes(); |
5225 |
29 Jan 10 |
nicklas |
368 |
} |
5225 |
29 Jan 10 |
nicklas |
369 |
|
5221 |
22 Jan 10 |
nicklas |
370 |
/** |
5193 |
27 Nov 09 |
nicklas |
Create a new flat file parser that can parse BFS metadata |
5193 |
27 Nov 09 |
nicklas |
files. This method should set all regular expressions that |
5193 |
27 Nov 09 |
nicklas |
are needed to parse the stream (assumed to be UTF-8). |
5193 |
27 Nov 09 |
nicklas |
374 |
*/ |
5193 |
27 Nov 09 |
nicklas |
375 |
protected FlatFileParser createFlatFileParser(InputStream in) |
5193 |
27 Nov 09 |
nicklas |
376 |
{ |
5193 |
27 Nov 09 |
nicklas |
377 |
FlatFileParser ffp = new FlatFileParser(); |
5193 |
27 Nov 09 |
nicklas |
378 |
ffp.setBofMarkerRegexp(Pattern.compile("BFSformat\t?(.*)")); |
5193 |
27 Nov 09 |
nicklas |
379 |
ffp.setIgnoreRegexp(Pattern.compile("#.*|\\s*")); |
5193 |
27 Nov 09 |
nicklas |
380 |
ffp.setSectionRegexp(Pattern.compile("\\[(.*)\\]\\s*")); |
5193 |
27 Nov 09 |
nicklas |
381 |
ffp.setHeaderRegexp(Pattern.compile("(.+?)\\t(.*)")); |
5193 |
27 Nov 09 |
nicklas |
382 |
ffp.setInputStream(in, "UTF-8"); |
5193 |
27 Nov 09 |
nicklas |
383 |
return ffp; |
5193 |
27 Nov 09 |
nicklas |
384 |
} |
5193 |
27 Nov 09 |
nicklas |
385 |
|
5193 |
27 Nov 09 |
nicklas |
386 |
/** |
5193 |
27 Nov 09 |
nicklas |
Handle the beginning-of-file marker event. The default |
5193 |
27 Nov 09 |
nicklas |
implementation sends a {@link #BOF_EVENT} notification to the |
5193 |
27 Nov 09 |
nicklas |
event handler. |
5193 |
27 Nov 09 |
nicklas |
390 |
|
5221 |
22 Jan 10 |
nicklas |
@param handler The event handler from the {@link #parse(EventHandler)} method |
5193 |
27 Nov 09 |
nicklas |
@param subtype The decoded subtype or null if no subtype was specified in the file |
5193 |
27 Nov 09 |
nicklas |
393 |
*/ |
5193 |
27 Nov 09 |
nicklas |
394 |
protected void handleBof(EventHandler handler, String subtype) |
5193 |
27 Nov 09 |
nicklas |
395 |
{ |
5224 |
27 Jan 10 |
nicklas |
396 |
handler.handleEvent(BOF_EVENT, subtype, this); |
5193 |
27 Nov 09 |
nicklas |
397 |
} |
5193 |
27 Nov 09 |
nicklas |
398 |
|
5193 |
27 Nov 09 |
nicklas |
399 |
/** |
5193 |
27 Nov 09 |
nicklas |
Handle the new section event. The default implementation sends a |
5193 |
27 Nov 09 |
nicklas |
{@link #SECTION_EVENT} notification to the event handler. |
5193 |
27 Nov 09 |
nicklas |
402 |
|
5221 |
22 Jan 10 |
nicklas |
@param handler The event handler from the {@link #parse(EventHandler)} method |
5193 |
27 Nov 09 |
nicklas |
@param name The decoded name of the section |
5193 |
27 Nov 09 |
nicklas |
405 |
*/ |
5193 |
27 Nov 09 |
nicklas |
406 |
protected void handleSection(EventHandler handler, String name) |
5193 |
27 Nov 09 |
nicklas |
407 |
{ |
5224 |
27 Jan 10 |
nicklas |
408 |
handler.handleEvent(SECTION_EVENT, name, this); |
5193 |
27 Nov 09 |
nicklas |
409 |
} |
5193 |
27 Nov 09 |
nicklas |
410 |
|
5193 |
27 Nov 09 |
nicklas |
411 |
/** |
5193 |
27 Nov 09 |
nicklas |
Handle the section entry event. The default implementation sends a |
5193 |
27 Nov 09 |
nicklas |
{@link #ENTRY_EVENT} notification to the event handler. |
5193 |
27 Nov 09 |
nicklas |
414 |
|
5221 |
22 Jan 10 |
nicklas |
@param handler The event handler from the {@link #parse(EventHandler)} method |
5221 |
22 Jan 10 |
nicklas |
@param entry Key and values from the section entry |
5193 |
27 Nov 09 |
nicklas |
417 |
*/ |
5221 |
22 Jan 10 |
nicklas |
418 |
protected void handleEntry(EventHandler handler, MetadataModel.SectionEntry entry) |
5193 |
27 Nov 09 |
nicklas |
419 |
{ |
5224 |
27 Jan 10 |
nicklas |
420 |
handler.handleEvent(ENTRY_EVENT, entry, this); |
5193 |
27 Nov 09 |
nicklas |
421 |
} |
5193 |
27 Nov 09 |
nicklas |
422 |
|
5193 |
27 Nov 09 |
nicklas |
423 |
/** |
5193 |
27 Nov 09 |
nicklas |
Handle the end-of-section event. The default implementation sends a |
5193 |
27 Nov 09 |
nicklas |
{@link #SECTION_END_EVENT} notification to the event handler. |
5193 |
27 Nov 09 |
nicklas |
426 |
|
5221 |
22 Jan 10 |
nicklas |
@param handler The event handler from the {@link #parse(EventHandler)} method |
5221 |
22 Jan 10 |
nicklas |
@param name The decoded name of the section |
5193 |
27 Nov 09 |
nicklas |
429 |
*/ |
5221 |
22 Jan 10 |
nicklas |
430 |
protected void handleSectionEnd(EventHandler handler, String name) |
5193 |
27 Nov 09 |
nicklas |
431 |
{ |
5224 |
27 Jan 10 |
nicklas |
432 |
handler.handleEvent(SECTION_END_EVENT, name, this); |
5193 |
27 Nov 09 |
nicklas |
433 |
} |
5193 |
27 Nov 09 |
nicklas |
434 |
|
5193 |
27 Nov 09 |
nicklas |
435 |
|
5193 |
27 Nov 09 |
nicklas |
436 |
/** |
5193 |
27 Nov 09 |
nicklas |
Handle the end-of-file event. The default implementation sends an |
5193 |
27 Nov 09 |
nicklas |
{@link #END_OF_FILE_EVENT} notification to the event handler. |
5221 |
22 Jan 10 |
nicklas |
@param handler The event handler from the {@link #parse(EventHandler)} method |
5193 |
27 Nov 09 |
nicklas |
440 |
*/ |
5193 |
27 Nov 09 |
nicklas |
441 |
protected void handleEndOfFile(EventHandler handler) |
5193 |
27 Nov 09 |
nicklas |
442 |
{ |
5224 |
27 Jan 10 |
nicklas |
443 |
handler.handleEvent(END_OF_FILE_EVENT, null, this); |
5193 |
27 Nov 09 |
nicklas |
444 |
} |
5193 |
27 Nov 09 |
nicklas |
445 |
} |