29 |
22 Feb 05 |
nicklas |
1 |
/* |
192 |
17 Mar 05 |
jari |
$Id$ |
2203 |
28 Apr 06 |
nicklas |
3 |
|
4889 |
06 Apr 09 |
nicklas |
Copyright (C) 2005 Johan Enell, Jari Häkkinen, Nicklas Nordborg |
3675 |
16 Aug 07 |
jari |
Copyright (C) 2006 Johan Enell, Nicklas Nordborg, Martin Svensson, Gregory Vincic |
3675 |
16 Aug 07 |
jari |
Copyright (C) 2007 Johan Enell, Nicklas Nordborg |
2203 |
28 Apr 06 |
nicklas |
7 |
|
2203 |
28 Apr 06 |
nicklas |
This file is part of BASE - BioArray Software Environment. |
2203 |
28 Apr 06 |
nicklas |
Available at http://base.thep.lu.se/ |
2203 |
28 Apr 06 |
nicklas |
10 |
|
2203 |
28 Apr 06 |
nicklas |
BASE is free software; you can redistribute it and/or modify it |
2203 |
28 Apr 06 |
nicklas |
under the terms of the GNU General Public License as published by |
4479 |
05 Sep 08 |
jari |
the Free Software Foundation; either version 3 of the License, or |
2203 |
28 Apr 06 |
nicklas |
(at your option) any later version. |
2203 |
28 Apr 06 |
nicklas |
15 |
|
2203 |
28 Apr 06 |
nicklas |
BASE is distributed in the hope that it will be useful, but |
2203 |
28 Apr 06 |
nicklas |
WITHOUT ANY WARRANTY; without even the implied warranty of |
2203 |
28 Apr 06 |
nicklas |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
2203 |
28 Apr 06 |
nicklas |
General Public License for more details. |
2203 |
28 Apr 06 |
nicklas |
20 |
|
29 |
22 Feb 05 |
nicklas |
You should have received a copy of the GNU General Public License |
4515 |
11 Sep 08 |
jari |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
29 |
22 Feb 05 |
nicklas |
23 |
*/ |
2203 |
28 Apr 06 |
nicklas |
24 |
package net.sf.basedb.util.parser; |
29 |
22 Feb 05 |
nicklas |
25 |
|
29 |
22 Feb 05 |
nicklas |
26 |
import java.io.InputStream; |
29 |
22 Feb 05 |
nicklas |
27 |
import java.io.InputStreamReader; |
29 |
22 Feb 05 |
nicklas |
28 |
import java.io.BufferedReader; |
29 |
22 Feb 05 |
nicklas |
29 |
import java.io.IOException; |
2992 |
01 Dec 06 |
enell |
30 |
import java.nio.charset.Charset; |
3023 |
12 Dec 06 |
nicklas |
31 |
import java.text.NumberFormat; |
29 |
22 Feb 05 |
nicklas |
32 |
import java.util.regex.Pattern; |
29 |
22 Feb 05 |
nicklas |
33 |
import java.util.regex.Matcher; |
3911 |
06 Nov 07 |
nicklas |
34 |
import java.util.regex.PatternSyntaxException; |
7655 |
19 Mar 19 |
nicklas |
35 |
|
7664 |
20 Mar 19 |
nicklas |
36 |
import org.apache.poi.ss.usermodel.Cell; |
7655 |
19 Mar 19 |
nicklas |
37 |
|
2655 |
22 Sep 06 |
nicklas |
38 |
import java.util.Arrays; |
2203 |
28 Apr 06 |
nicklas |
39 |
import java.util.LinkedList; |
29 |
22 Feb 05 |
nicklas |
40 |
import java.util.Map; |
29 |
22 Feb 05 |
nicklas |
41 |
import java.util.HashMap; |
29 |
22 Feb 05 |
nicklas |
42 |
import java.util.List; |
29 |
22 Feb 05 |
nicklas |
43 |
import java.util.ArrayList; |
1368 |
21 Sep 05 |
nicklas |
44 |
import java.util.Set; |
1368 |
21 Sep 05 |
nicklas |
45 |
import java.util.Collections; |
7655 |
19 Mar 19 |
nicklas |
46 |
import java.util.Date; |
29 |
22 Feb 05 |
nicklas |
47 |
|
2262 |
16 May 06 |
nicklas |
48 |
import net.sf.basedb.core.BaseException; |
2992 |
01 Dec 06 |
enell |
49 |
import net.sf.basedb.core.Config; |
7664 |
20 Mar 19 |
nicklas |
50 |
import net.sf.basedb.core.Type; |
4128 |
05 Feb 08 |
nicklas |
51 |
import net.sf.basedb.core.signal.ThreadSignalHandler; |
4095 |
21 Jan 08 |
enell |
52 |
import net.sf.basedb.util.InputStreamTracker; |
7655 |
19 Mar 19 |
nicklas |
53 |
import net.sf.basedb.util.NumberFormatUtil; |
7655 |
19 Mar 19 |
nicklas |
54 |
import net.sf.basedb.util.Values; |
7627 |
08 Mar 19 |
nicklas |
55 |
import net.sf.basedb.util.charset.CharsetUtil; |
7655 |
19 Mar 19 |
nicklas |
56 |
import net.sf.basedb.util.excel.XlsxToCsvUtil; |
7656 |
19 Mar 19 |
nicklas |
57 |
import net.sf.basedb.util.excel.XlsxToCsvUtil.SheetInfo; |
7655 |
19 Mar 19 |
nicklas |
58 |
import net.sf.basedb.util.formatter.DateFormatter; |
7655 |
19 Mar 19 |
nicklas |
59 |
import net.sf.basedb.util.formatter.Formatter; |
7655 |
19 Mar 19 |
nicklas |
60 |
import net.sf.basedb.util.formatter.NumberFormatFormatter; |
5899 |
06 Dec 11 |
nicklas |
61 |
import net.sf.basedb.util.jep.JepFunction; |
2262 |
16 May 06 |
nicklas |
62 |
|
29 |
22 Feb 05 |
nicklas |
63 |
/** |
29 |
22 Feb 05 |
nicklas |
64 |
|
7667 |
21 Mar 19 |
nicklas |
This class can be used to parse data from flat text files and from |
7667 |
21 Mar 19 |
nicklas |
Excel workbooks in xlsx format. If the file is a text file it must |
7667 |
21 Mar 19 |
nicklas |
follow a few simple rules: |
29 |
22 Feb 05 |
nicklas |
<ul> |
29 |
22 Feb 05 |
nicklas |
<li>Data must be organised into columns, with one record per line |
29 |
22 Feb 05 |
nicklas |
<li>Each data column must be separated by some special character |
29 |
22 Feb 05 |
nicklas |
or character sequence not occuring in the data, for example |
29 |
22 Feb 05 |
nicklas |
a tab or a comma. Data in fixed-size columns cannot be parsed. |
29 |
22 Feb 05 |
nicklas |
<li>Data may optionally be preceeded by a data header, ie. |
29 |
22 Feb 05 |
nicklas |
the names of the columns |
29 |
22 Feb 05 |
nicklas |
<li>The data header may optionally be preceeded by file headers. |
29 |
22 Feb 05 |
nicklas |
A file header is something that can be split in a name-value |
29 |
22 Feb 05 |
nicklas |
pair. |
29 |
22 Feb 05 |
nicklas |
<li>The file may contain comments, which are ignored by the parser |
1616 |
15 Nov 05 |
enell |
<li>The file contain section where each section can contain a |
1616 |
15 Nov 05 |
enell |
header and/or a data part |
29 |
22 Feb 05 |
nicklas |
</ul> |
29 |
22 Feb 05 |
nicklas |
<p> |
29 |
22 Feb 05 |
nicklas |
<b>Example</b><br> |
29 |
22 Feb 05 |
nicklas |
<pre class="code"> |
29 |
22 Feb 05 |
nicklas |
# Example of a parsable file, not actual format of a GenePix file |
1616 |
15 Nov 05 |
enell |
section info |
29 |
22 Feb 05 |
nicklas |
Type=GenePix Results 1.3 |
29 |
22 Feb 05 |
nicklas |
DateTime=2002/09/04 13:59:48 |
29 |
22 Feb 05 |
nicklas |
Scanner=GenePix 4000B [83306] |
1616 |
15 Nov 05 |
enell |
90 |
|
1616 |
15 Nov 05 |
enell |
section data |
29 |
22 Feb 05 |
nicklas |
Block Column Row Name ID |
29 |
22 Feb 05 |
nicklas |
1 1 1 "Ly68_Lymphocyte antigen 68" "M000205_01" |
29 |
22 Feb 05 |
nicklas |
1 2 1 "Bag1_Bcl2-associated athanogene 1" "M000209_01" |
29 |
22 Feb 05 |
nicklas |
1 3 1 "Rps16_Ribosomal protein S16" "M000213_01" |
29 |
22 Feb 05 |
nicklas |
1 4 1 "Col4a1_Procollagen, type IV, alpha 1" "M000229_01" |
29 |
22 Feb 05 |
nicklas |
1 5 1 "Ace_Angiotensin converting enzyme" "M000233_01" |
29 |
22 Feb 05 |
nicklas |
1 6 1 "Cd5_CD5 antigen" "M000237_01" |
29 |
22 Feb 05 |
nicklas |
1 7 1 "Psme1_Protease (prosome, macropain) 28 s" |
29 |
22 Feb 05 |
nicklas |
</pre> |
29 |
22 Feb 05 |
nicklas |
101 |
|
29 |
22 Feb 05 |
nicklas |
<p> |
7667 |
21 Mar 19 |
nicklas |
If the file is an Excel file the first sheet is automatically converted to a |
7667 |
21 Mar 19 |
nicklas |
tab-separated text file by default. To use a different sheet call the |
7667 |
21 Mar 19 |
nicklas |
{@link #setExcelSheet(String)} method with the name or index of the sheet. |
7667 |
21 Mar 19 |
nicklas |
Initial parsing and regular expression matching is always done against the |
7667 |
21 Mar 19 |
nicklas |
text representation of the selected sheet. Note that empty cells on the top |
7667 |
21 Mar 19 |
nicklas |
and left are usually cut away. When retrieving values via the {@link Data} |
7667 |
21 Mar 19 |
nicklas |
class it will typically go directly to the mapped cell from the Excel sheet |
7667 |
21 Mar 19 |
nicklas |
and get the value, which means that numeric and date values doesn't have to |
7667 |
21 Mar 19 |
nicklas |
be converted to and from strings if not needed. For example, if a date |
7667 |
21 Mar 19 |
nicklas |
value is requested and the mapped cell is date that will be used as it is, |
7667 |
21 Mar 19 |
nicklas |
but it the mapped cell is a string, the same parsers that are used for CSV |
7667 |
21 Mar 19 |
nicklas |
files are used to convert the string to a date. |
7667 |
21 Mar 19 |
nicklas |
115 |
|
7667 |
21 Mar 19 |
nicklas |
<p> |
29 |
22 Feb 05 |
nicklas |
<b>How to use</b><br> |
29 |
22 Feb 05 |
nicklas |
The parsing is controlled by regular expressions. Start by |
29 |
22 Feb 05 |
nicklas |
creating a new <code>FlatFileParser</code> object. Use the |
29 |
22 Feb 05 |
nicklas |
various <code>set</code> methods to provide regular expression |
29 |
22 Feb 05 |
nicklas |
used to match the data/headers. |
29 |
22 Feb 05 |
nicklas |
<p> |
2992 |
01 Dec 06 |
enell |
Use the {@link #setInputStream(InputStream, String)} |
2317 |
24 May 06 |
nicklas |
method to specify a file to parse, and {@link #parseHeaders()} to start the |
2317 |
24 May 06 |
nicklas |
parsing. Note! Even if you know that |
29 |
22 Feb 05 |
nicklas |
the file doesn't contain any headers, you should always call this |
2317 |
24 May 06 |
nicklas |
method since the parser must initialize itself. If there are sections |
2317 |
24 May 06 |
nicklas |
in the file use {@link #nextSection()} first to control which section |
2317 |
24 May 06 |
nicklas |
you are parsing from. |
29 |
22 Feb 05 |
nicklas |
<p> |
2317 |
24 May 06 |
nicklas |
When the headers have been found use the {@link #hasMoreData()} |
2317 |
24 May 06 |
nicklas |
and {@link #nextData()} methods in a loop to read all |
1616 |
15 Nov 05 |
enell |
data from the section. |
29 |
22 Feb 05 |
nicklas |
<p> |
29 |
22 Feb 05 |
nicklas |
<b>Example</b><br> |
29 |
22 Feb 05 |
nicklas |
<pre class="code"> |
29 |
22 Feb 05 |
nicklas |
FlatFileParser ffp = new FlatFileParser(); |
29 |
22 Feb 05 |
nicklas |
ffp.setHeaderRegexp(Pattern.compile("(.*)=(.*)")); |
29 |
22 Feb 05 |
nicklas |
ffp.setDataHeaderRegexp(Pattern.compile("Block\\tColumn\\tRow\\tName\\tID")); |
29 |
22 Feb 05 |
nicklas |
ffp.setDataSplitterRegexp(Pattern.compile("\\t")); |
29 |
22 Feb 05 |
nicklas |
ffp.setIgnoreRegexp(Pattern.compile("#.*")); |
29 |
22 Feb 05 |
nicklas |
ffp.setMinDataColumns(5); |
29 |
22 Feb 05 |
nicklas |
ffp.setMaxDataColumns(5); |
2992 |
01 Dec 06 |
enell |
ffp.setInputStream(FileUtil.getInputStream(path_to_file), Config.getCharset()); |
29 |
22 Feb 05 |
nicklas |
ffp.parseHeaders(); |
6898 |
12 May 15 |
nicklas |
for (int i = 0; i < ffp.getLineCount(); i++) |
29 |
22 Feb 05 |
nicklas |
147 |
{ |
29 |
22 Feb 05 |
nicklas |
FlatFileParser.Line line = ffp.getLine(i); |
29 |
22 Feb 05 |
nicklas |
System.out.println(i+":"+line.type()+":"+line.line()); |
29 |
22 Feb 05 |
nicklas |
150 |
} |
29 |
22 Feb 05 |
nicklas |
int i = 0; |
29 |
22 Feb 05 |
nicklas |
while (ffp.hasMoreData()) |
29 |
22 Feb 05 |
nicklas |
153 |
{ |
29 |
22 Feb 05 |
nicklas |
FlatFileParser.Data data = ffp.nextData(); |
29 |
22 Feb 05 |
nicklas |
System.out.println(i+":"+data.columns()+":"+data.line()); |
29 |
22 Feb 05 |
nicklas |
156 |
} |
29 |
22 Feb 05 |
nicklas |
</pre> |
29 |
22 Feb 05 |
nicklas |
158 |
|
2317 |
24 May 06 |
nicklas |
<p> |
2317 |
24 May 06 |
nicklas |
<b>Mapping column values</b><br> |
2317 |
24 May 06 |
nicklas |
With the <code>FlatFileParser.Data</code> object you can only |
7667 |
21 Mar 19 |
nicklas |
access the data by column index (0-based). Another approach is to |
7667 |
21 Mar 19 |
nicklas |
use {@link Mapper}:s. A mapper takes a string template and inserts the |
7667 |
21 Mar 19 |
nicklas |
values of the data columns where you specify. Here are some example: |
2317 |
24 May 06 |
nicklas |
<pre class="code"> |
2317 |
24 May 06 |
nicklas |
\1\ |
2317 |
24 May 06 |
nicklas |
\row\ |
2317 |
24 May 06 |
nicklas |
Row: \row\, Col:\col\ |
2317 |
24 May 06 |
nicklas |
=2 * col('Radius') |
2317 |
24 May 06 |
nicklas |
</pre> |
2317 |
24 May 06 |
nicklas |
171 |
|
7667 |
21 Mar 19 |
nicklas |
The result can be retrieved either as a string, as a numeric value |
7667 |
21 Mar 19 |
nicklas |
or as a date. It is even possible to create expressions that |
2317 |
24 May 06 |
nicklas |
does a calculation on the value before it is returned. |
2317 |
24 May 06 |
nicklas |
See the {@link #getMapper(String)} method for more information. |
2317 |
24 May 06 |
nicklas |
176 |
|
1616 |
15 Nov 05 |
enell |
@author Nicklas, Enell |
29 |
22 Feb 05 |
nicklas |
@version 2.0 |
2317 |
24 May 06 |
nicklas |
@base.modified $Date$ |
29 |
22 Feb 05 |
nicklas |
180 |
*/ |
29 |
22 Feb 05 |
nicklas |
181 |
public class FlatFileParser |
29 |
22 Feb 05 |
nicklas |
182 |
{ |
29 |
22 Feb 05 |
nicklas |
183 |
|
29 |
22 Feb 05 |
nicklas |
184 |
/** |
29 |
22 Feb 05 |
nicklas |
The default value for the number of unknown lines |
29 |
22 Feb 05 |
nicklas |
in a row that may be encountered by the {@link |
29 |
22 Feb 05 |
nicklas |
#parseHeaders() parseHeaders} method before it |
29 |
22 Feb 05 |
nicklas |
gives up. |
29 |
22 Feb 05 |
nicklas |
@see #setMaxUnknownLines(int) setMaxUnknownLines |
29 |
22 Feb 05 |
nicklas |
190 |
*/ |
29 |
22 Feb 05 |
nicklas |
191 |
public static final int DEFAULT_MAX_UNKNOWN_LINES = 100; |
29 |
22 Feb 05 |
nicklas |
192 |
|
29 |
22 Feb 05 |
nicklas |
193 |
|
29 |
22 Feb 05 |
nicklas |
194 |
/** |
2203 |
28 Apr 06 |
nicklas |
Pattern used to find column mappings in a string, ie. abc \col\ def |
2203 |
28 Apr 06 |
nicklas |
196 |
*/ |
2203 |
28 Apr 06 |
nicklas |
197 |
private static final Pattern findColumn = Pattern.compile("\\\\([^\\\\]+)\\\\"); |
2203 |
28 Apr 06 |
nicklas |
198 |
|
2203 |
28 Apr 06 |
nicklas |
199 |
/** |
29 |
22 Feb 05 |
nicklas |
Reads from the given input stream |
29 |
22 Feb 05 |
nicklas |
201 |
*/ |
29 |
22 Feb 05 |
nicklas |
202 |
private BufferedReader reader; |
29 |
22 Feb 05 |
nicklas |
203 |
|
29 |
22 Feb 05 |
nicklas |
204 |
/** |
4095 |
21 Jan 08 |
enell |
For keeping track of the number of bytes parsed. |
4095 |
21 Jan 08 |
enell |
206 |
*/ |
4095 |
21 Jan 08 |
enell |
207 |
private InputStreamTracker tracker; |
4095 |
21 Jan 08 |
enell |
208 |
|
4095 |
21 Jan 08 |
enell |
209 |
/** |
7656 |
19 Mar 19 |
nicklas |
Name of the Excel sheet to parse if the file is an Excel file. |
7656 |
19 Mar 19 |
nicklas |
211 |
*/ |
7664 |
20 Mar 19 |
nicklas |
212 |
private String excelSheetName; |
7656 |
19 Mar 19 |
nicklas |
213 |
|
7656 |
19 Mar 19 |
nicklas |
214 |
/** |
7673 |
26 Mar 19 |
nicklas |
Flag to indicate if only a single (=false) or all (=true) Excel sheets should |
7673 |
26 Mar 19 |
nicklas |
be parsed in one go. |
7673 |
26 Mar 19 |
nicklas |
217 |
*/ |
7673 |
26 Mar 19 |
nicklas |
218 |
private boolean parseAllExcelSheets; |
7673 |
26 Mar 19 |
nicklas |
219 |
|
7673 |
26 Mar 19 |
nicklas |
220 |
/** |
7673 |
26 Mar 19 |
nicklas |
Excel workbook that has been loaded. |
7673 |
26 Mar 19 |
nicklas |
222 |
*/ |
7673 |
26 Mar 19 |
nicklas |
223 |
private XlsxToCsvUtil excelWorkbook; |
7673 |
26 Mar 19 |
nicklas |
224 |
|
7673 |
26 Mar 19 |
nicklas |
225 |
/** |
7664 |
20 Mar 19 |
nicklas |
Excel sheet that is currently being parsed. |
7664 |
20 Mar 19 |
nicklas |
227 |
*/ |
7664 |
20 Mar 19 |
nicklas |
228 |
private XlsxToCsvUtil.SheetInfo excelSheet; |
7664 |
20 Mar 19 |
nicklas |
229 |
|
7664 |
20 Mar 19 |
nicklas |
230 |
/** |
7676 |
27 Mar 19 |
nicklas |
The value of the parsedLines when the current Excel sheet is parsed. |
7676 |
27 Mar 19 |
nicklas |
Used to find the correct row number in the current sheet (=parsedLines - excelParsedLinesOffset) |
7676 |
27 Mar 19 |
nicklas |
233 |
*/ |
7676 |
27 Mar 19 |
nicklas |
234 |
private int excelParsedLinesOffset; |
7676 |
27 Mar 19 |
nicklas |
235 |
|
7676 |
27 Mar 19 |
nicklas |
236 |
/** |
5193 |
27 Nov 09 |
nicklas |
The regular expression for matching the beginning-of-file marker |
5193 |
27 Nov 09 |
nicklas |
238 |
*/ |
5193 |
27 Nov 09 |
nicklas |
239 |
private Pattern bofMarker; |
5193 |
27 Nov 09 |
nicklas |
240 |
|
5193 |
27 Nov 09 |
nicklas |
241 |
/** |
29 |
22 Feb 05 |
nicklas |
The regular expression for matching a header line. |
29 |
22 Feb 05 |
nicklas |
243 |
*/ |
29 |
22 Feb 05 |
nicklas |
244 |
private Pattern header; |
29 |
22 Feb 05 |
nicklas |
245 |
|
29 |
22 Feb 05 |
nicklas |
246 |
/** |
1616 |
15 Nov 05 |
enell |
The regular expression for matching the fist line of a section. |
1616 |
15 Nov 05 |
enell |
The expression must have one capturing group. |
1616 |
15 Nov 05 |
enell |
249 |
*/ |
1616 |
15 Nov 05 |
enell |
250 |
private Pattern section; |
1616 |
15 Nov 05 |
enell |
251 |
|
1616 |
15 Nov 05 |
enell |
252 |
/** |
29 |
22 Feb 05 |
nicklas |
The regular expression for matching the data header line. |
1616 |
15 Nov 05 |
enell |
The expression must have two capturing groups. |
29 |
22 Feb 05 |
nicklas |
255 |
*/ |
58 |
01 Mar 05 |
nicklas |
256 |
private Pattern dataHeader; |
29 |
22 Feb 05 |
nicklas |
257 |
|
29 |
22 Feb 05 |
nicklas |
258 |
/** |
29 |
22 Feb 05 |
nicklas |
The regular expression for splitting a data line. |
29 |
22 Feb 05 |
nicklas |
260 |
*/ |
58 |
01 Mar 05 |
nicklas |
261 |
private Pattern dataSplitter; |
29 |
22 Feb 05 |
nicklas |
262 |
|
29 |
22 Feb 05 |
nicklas |
263 |
/** |
2203 |
28 Apr 06 |
nicklas |
If quotes should be trimmed from data values or not. Default true. |
2203 |
28 Apr 06 |
nicklas |
Quotes are double or single quotes. |
2203 |
28 Apr 06 |
nicklas |
266 |
*/ |
2203 |
28 Apr 06 |
nicklas |
267 |
private boolean trimQuotes = true; |
2203 |
28 Apr 06 |
nicklas |
268 |
|
2203 |
28 Apr 06 |
nicklas |
269 |
/** |
7738 |
14 Aug 19 |
nicklas |
If white space should be trimmed from data values or not. |
7738 |
14 Aug 19 |
nicklas |
Default is false. |
7738 |
14 Aug 19 |
nicklas |
@since 3.15.1 |
7738 |
14 Aug 19 |
nicklas |
273 |
*/ |
7738 |
14 Aug 19 |
nicklas |
274 |
private boolean trimWhiteSpace; |
7738 |
14 Aug 19 |
nicklas |
275 |
|
7738 |
14 Aug 19 |
nicklas |
276 |
/** |
29 |
22 Feb 05 |
nicklas |
The regular expression for matching the data footer line. |
29 |
22 Feb 05 |
nicklas |
278 |
*/ |
58 |
01 Mar 05 |
nicklas |
279 |
private Pattern dataFooter; |
29 |
22 Feb 05 |
nicklas |
280 |
|
29 |
22 Feb 05 |
nicklas |
281 |
/** |
29 |
22 Feb 05 |
nicklas |
The minimun number of allowed data columns for a line to be considered |
29 |
22 Feb 05 |
nicklas |
a data line. |
29 |
22 Feb 05 |
nicklas |
284 |
*/ |
58 |
01 Mar 05 |
nicklas |
285 |
private int minDataColumns; |
29 |
22 Feb 05 |
nicklas |
286 |
|
29 |
22 Feb 05 |
nicklas |
287 |
/** |
29 |
22 Feb 05 |
nicklas |
The maximun number of allowed data columns for a line to be considered |
29 |
22 Feb 05 |
nicklas |
a data line. |
29 |
22 Feb 05 |
nicklas |
290 |
*/ |
58 |
01 Mar 05 |
nicklas |
291 |
private int maxDataColumns; |
29 |
22 Feb 05 |
nicklas |
292 |
|
29 |
22 Feb 05 |
nicklas |
293 |
/** |
29 |
22 Feb 05 |
nicklas |
The regular expression for matching a comment line. |
29 |
22 Feb 05 |
nicklas |
295 |
*/ |
29 |
22 Feb 05 |
nicklas |
296 |
private Pattern ignore; |
29 |
22 Feb 05 |
nicklas |
297 |
|
29 |
22 Feb 05 |
nicklas |
298 |
/** |
29 |
22 Feb 05 |
nicklas |
The maximum number of unkown lines to parse before giving up. |
29 |
22 Feb 05 |
nicklas |
300 |
*/ |
58 |
01 Mar 05 |
nicklas |
301 |
private int maxUnknownLines = DEFAULT_MAX_UNKNOWN_LINES; |
29 |
22 Feb 05 |
nicklas |
302 |
|
29 |
22 Feb 05 |
nicklas |
303 |
/** |
2539 |
17 Aug 06 |
nicklas |
If <code>null</code> should be returned for empty columns (instead of an empty string). |
29 |
22 Feb 05 |
nicklas |
305 |
*/ |
58 |
01 Mar 05 |
nicklas |
306 |
private boolean emptyIsNull = true; |
3472 |
11 Jun 07 |
nicklas |
307 |
|
3472 |
11 Jun 07 |
nicklas |
308 |
/** |
3472 |
11 Jun 07 |
nicklas |
If <code>null</code> should be returned if a (numeric) value can't be parsed. |
3472 |
11 Jun 07 |
nicklas |
310 |
*/ |
3472 |
11 Jun 07 |
nicklas |
311 |
private boolean useNullIfException = false; |
29 |
22 Feb 05 |
nicklas |
312 |
|
29 |
22 Feb 05 |
nicklas |
313 |
/** |
4104 |
28 Jan 08 |
nicklas |
If non-existing columns should be ignored (true) or result in an |
4104 |
28 Jan 08 |
nicklas |
exception (false) |
4104 |
28 Jan 08 |
nicklas |
316 |
*/ |
4104 |
28 Jan 08 |
nicklas |
317 |
private boolean ignoreNonExistingColumns = false; |
4104 |
28 Jan 08 |
nicklas |
318 |
|
4104 |
28 Jan 08 |
nicklas |
319 |
/** |
2539 |
17 Aug 06 |
nicklas |
If <code>null</code> should be returned for the string NULL (ignoring case) |
2539 |
17 Aug 06 |
nicklas |
or not. |
2539 |
17 Aug 06 |
nicklas |
322 |
*/ |
2539 |
17 Aug 06 |
nicklas |
323 |
private boolean nullIsNull = true; |
2539 |
17 Aug 06 |
nicklas |
324 |
|
2539 |
17 Aug 06 |
nicklas |
325 |
/** |
3023 |
12 Dec 06 |
nicklas |
The default number formatter to use for creating mappers. |
3023 |
12 Dec 06 |
nicklas |
327 |
*/ |
3023 |
12 Dec 06 |
nicklas |
328 |
private NumberFormat numberFormat = null; |
3023 |
12 Dec 06 |
nicklas |
329 |
|
3023 |
12 Dec 06 |
nicklas |
330 |
/** |
7655 |
19 Mar 19 |
nicklas |
The default date formatter to use when creating mappers. |
7655 |
19 Mar 19 |
nicklas |
332 |
*/ |
7655 |
19 Mar 19 |
nicklas |
333 |
private Formatter<Date> dateFormat = null; |
7655 |
19 Mar 19 |
nicklas |
334 |
|
7655 |
19 Mar 19 |
nicklas |
335 |
/** |
7655 |
19 Mar 19 |
nicklas |
The default timestamp format to use when creating mappers. |
7655 |
19 Mar 19 |
nicklas |
337 |
*/ |
7655 |
19 Mar 19 |
nicklas |
338 |
private Formatter<Date> timestampFormat = null; |
7655 |
19 Mar 19 |
nicklas |
339 |
|
7655 |
19 Mar 19 |
nicklas |
340 |
/** |
5193 |
27 Nov 09 |
nicklas |
The value that was captured by the bofMarker pattern. |
5193 |
27 Nov 09 |
nicklas |
342 |
*/ |
5193 |
27 Nov 09 |
nicklas |
343 |
private String bofType; |
5193 |
27 Nov 09 |
nicklas |
344 |
|
5193 |
27 Nov 09 |
nicklas |
345 |
/** |
29 |
22 Feb 05 |
nicklas |
List of lines parsed by the {@link #parseHeaders()} method. |
29 |
22 Feb 05 |
nicklas |
347 |
*/ |
29 |
22 Feb 05 |
nicklas |
348 |
private List<Line> lines; |
29 |
22 Feb 05 |
nicklas |
349 |
|
29 |
22 Feb 05 |
nicklas |
350 |
/** |
29 |
22 Feb 05 |
nicklas |
The total number of lines parsed so far. |
29 |
22 Feb 05 |
nicklas |
352 |
*/ |
58 |
01 Mar 05 |
nicklas |
353 |
private int parsedLines; |
29 |
22 Feb 05 |
nicklas |
354 |
|
29 |
22 Feb 05 |
nicklas |
355 |
/** |
7673 |
26 Mar 19 |
nicklas |
The number of sections parsed so far. |
7673 |
26 Mar 19 |
nicklas |
357 |
*/ |
7673 |
26 Mar 19 |
nicklas |
358 |
private int parsedSections; |
7673 |
26 Mar 19 |
nicklas |
359 |
|
7673 |
26 Mar 19 |
nicklas |
360 |
/** |
1248 |
06 Sep 05 |
nicklas |
The total number of parsed characters so far. |
1248 |
06 Sep 05 |
nicklas |
362 |
*/ |
1248 |
06 Sep 05 |
nicklas |
363 |
private long parsedCharacters; |
1248 |
06 Sep 05 |
nicklas |
364 |
|
1248 |
06 Sep 05 |
nicklas |
365 |
/** |
3618 |
01 Aug 07 |
nicklas |
The number of data lines parsed in the current section so far. |
3618 |
01 Aug 07 |
nicklas |
This value is reset at each new section. |
3618 |
01 Aug 07 |
nicklas |
368 |
*/ |
3618 |
01 Aug 07 |
nicklas |
369 |
private int parsedDataLines; |
3618 |
01 Aug 07 |
nicklas |
370 |
|
3618 |
01 Aug 07 |
nicklas |
371 |
/** |
29 |
22 Feb 05 |
nicklas |
Map of header lines parsed by the {@link #parseHeaders()} method. |
6898 |
12 May 15 |
nicklas |
The map contains name -> value pairs |
29 |
22 Feb 05 |
nicklas |
374 |
*/ |
2203 |
28 Apr 06 |
nicklas |
375 |
private Map<String, String> headers; |
1790 |
18 Jan 06 |
enell |
376 |
|
1616 |
15 Nov 05 |
enell |
377 |
/** |
2655 |
22 Sep 06 |
nicklas |
List of the column names found by splitting the data header using the |
2655 |
22 Sep 06 |
nicklas |
data splitter regexp. |
2203 |
28 Apr 06 |
nicklas |
380 |
*/ |
2655 |
22 Sep 06 |
nicklas |
381 |
private List<String> columnHeaders; |
2203 |
28 Apr 06 |
nicklas |
382 |
|
2203 |
28 Apr 06 |
nicklas |
383 |
/** |
1958 |
09 Feb 06 |
gregory |
The line that last matched the {@link #section}. |
1616 |
15 Nov 05 |
enell |
385 |
*/ |
1616 |
15 Nov 05 |
enell |
386 |
private Line nextSection; |
29 |
22 Feb 05 |
nicklas |
387 |
|
29 |
22 Feb 05 |
nicklas |
388 |
/** |
29 |
22 Feb 05 |
nicklas |
The next available data line as parsed by the {@link #hasMoreData()} |
29 |
22 Feb 05 |
nicklas |
method. |
29 |
22 Feb 05 |
nicklas |
391 |
*/ |
58 |
01 Mar 05 |
nicklas |
392 |
private Data nextData; |
29 |
22 Feb 05 |
nicklas |
393 |
|
29 |
22 Feb 05 |
nicklas |
394 |
/** |
2655 |
22 Sep 06 |
nicklas |
Number of ignored lines in the {@link #nextData()} method. |
2655 |
22 Sep 06 |
nicklas |
396 |
*/ |
2655 |
22 Sep 06 |
nicklas |
397 |
private int ignoredLines; |
2655 |
22 Sep 06 |
nicklas |
398 |
/** |
2655 |
22 Sep 06 |
nicklas |
Number of unknown lines in the {@link #nextData()} method. |
2655 |
22 Sep 06 |
nicklas |
400 |
*/ |
2655 |
22 Sep 06 |
nicklas |
401 |
private int unknownLines; |
2655 |
22 Sep 06 |
nicklas |
402 |
|
2655 |
22 Sep 06 |
nicklas |
403 |
/** |
2655 |
22 Sep 06 |
nicklas |
If unknown or ignored lines should be kept. |
2655 |
22 Sep 06 |
nicklas |
@see #getSkippedLines() |
2655 |
22 Sep 06 |
nicklas |
406 |
*/ |
2655 |
22 Sep 06 |
nicklas |
407 |
private boolean keepSkippedLines = false; |
2655 |
22 Sep 06 |
nicklas |
408 |
|
2655 |
22 Sep 06 |
nicklas |
409 |
/** |
2655 |
22 Sep 06 |
nicklas |
List for keeping ignored and unknown lines in the {@link #nextData()} method. |
2655 |
22 Sep 06 |
nicklas |
411 |
*/ |
2655 |
22 Sep 06 |
nicklas |
412 |
private List<Line> skippedLines; |
2655 |
22 Sep 06 |
nicklas |
413 |
|
2655 |
22 Sep 06 |
nicklas |
414 |
/** |
29 |
22 Feb 05 |
nicklas |
Create a new <code>FlatFileParser</code> object. |
29 |
22 Feb 05 |
nicklas |
416 |
*/ |
29 |
22 Feb 05 |
nicklas |
417 |
public FlatFileParser() |
2409 |
21 Jun 06 |
enell |
418 |
{ |
2409 |
21 Jun 06 |
enell |
419 |
parsedLines = 0; |
2409 |
21 Jun 06 |
enell |
420 |
parsedCharacters = 0; |
2409 |
21 Jun 06 |
enell |
421 |
} |
29 |
22 Feb 05 |
nicklas |
422 |
|
29 |
22 Feb 05 |
nicklas |
423 |
/** |
5193 |
27 Nov 09 |
nicklas |
Set a regular expression that maches a beginning-of-file |
5193 |
27 Nov 09 |
nicklas |
marker. This property should be set before starting to parse |
5193 |
27 Nov 09 |
nicklas |
the file (otherwise it is ignored). The first method call that |
5193 |
27 Nov 09 |
nicklas |
causes the parsing to be started will invoke {@link #parseToBof()} |
5193 |
27 Nov 09 |
nicklas |
(can also be invoked manually). |
5193 |
27 Nov 09 |
nicklas |
<p> |
5193 |
27 Nov 09 |
nicklas |
The regular expression may contain a single capturing group. The |
5193 |
27 Nov 09 |
nicklas |
matched value is returned by {@link #getBofType()}. |
5193 |
27 Nov 09 |
nicklas |
432 |
|
5193 |
27 Nov 09 |
nicklas |
@param regexp A regular expression |
5193 |
27 Nov 09 |
nicklas |
@since 2.15 |
5193 |
27 Nov 09 |
nicklas |
435 |
*/ |
5193 |
27 Nov 09 |
nicklas |
436 |
public void setBofMarkerRegexp(Pattern regexp) |
5193 |
27 Nov 09 |
nicklas |
437 |
{ |
5193 |
27 Nov 09 |
nicklas |
438 |
this.bofMarker = regexp; |
5193 |
27 Nov 09 |
nicklas |
439 |
} |
5193 |
27 Nov 09 |
nicklas |
440 |
|
5193 |
27 Nov 09 |
nicklas |
441 |
/** |
1616 |
15 Nov 05 |
enell |
Set a regular expression that can be matched against a header. |
29 |
22 Feb 05 |
nicklas |
The regular expression must contain two capturing |
29 |
22 Feb 05 |
nicklas |
groups, the first should capture the name and the second the value |
29 |
22 Feb 05 |
nicklas |
of the header. For example, the file contains headers like: |
29 |
22 Feb 05 |
nicklas |
<pre class="code"> |
29 |
22 Feb 05 |
nicklas |
"Type=GenePix Results 1.3" |
29 |
22 Feb 05 |
nicklas |
"DateTime=2002/09/04 13:59:48" |
29 |
22 Feb 05 |
nicklas |
</pre> |
29 |
22 Feb 05 |
nicklas |
To match this we can use the following regular expression: |
29 |
22 Feb 05 |
nicklas |
<code>"(.*)=(.*)"</code>. |
29 |
22 Feb 05 |
nicklas |
452 |
|
29 |
22 Feb 05 |
nicklas |
@param regexp A regular expression |
29 |
22 Feb 05 |
nicklas |
454 |
*/ |
29 |
22 Feb 05 |
nicklas |
455 |
public void setHeaderRegexp(Pattern regexp) |
29 |
22 Feb 05 |
nicklas |
456 |
{ |
29 |
22 Feb 05 |
nicklas |
457 |
this.header = regexp; |
29 |
22 Feb 05 |
nicklas |
458 |
} |
29 |
22 Feb 05 |
nicklas |
459 |
|
29 |
22 Feb 05 |
nicklas |
460 |
/** |
1616 |
15 Nov 05 |
enell |
Set a regular expression that can be matched against the section |
1616 |
15 Nov 05 |
enell |
line. For example, the file contains a section like: |
1616 |
15 Nov 05 |
enell |
<pre class="code"> |
1616 |
15 Nov 05 |
enell |
[FileInformation] |
1616 |
15 Nov 05 |
enell |
</pre> |
1616 |
15 Nov 05 |
enell |
To match this we can use the following regular expression: |
1616 |
15 Nov 05 |
enell |
<code>section (.*)</code>. This will match to anything |
1616 |
15 Nov 05 |
enell |
that starts with "section ". The section name will be in the |
1616 |
15 Nov 05 |
enell |
capturing group. |
1616 |
15 Nov 05 |
enell |
470 |
|
1616 |
15 Nov 05 |
enell |
@param regexp A regular expression |
1616 |
15 Nov 05 |
enell |
472 |
*/ |
1616 |
15 Nov 05 |
enell |
473 |
public void setSectionRegexp(Pattern regexp) |
1616 |
15 Nov 05 |
enell |
474 |
{ |
1616 |
15 Nov 05 |
enell |
475 |
this.section = regexp; |
1616 |
15 Nov 05 |
enell |
476 |
} |
1616 |
15 Nov 05 |
enell |
477 |
|
1616 |
15 Nov 05 |
enell |
478 |
/** |
29 |
22 Feb 05 |
nicklas |
Set a regular expression that can be matched against the data |
29 |
22 Feb 05 |
nicklas |
header. For example, the file contains a data header like: |
29 |
22 Feb 05 |
nicklas |
<pre class="code"> |
29 |
22 Feb 05 |
nicklas |
"Block"{tab}"Column"{tab}"Row"{tab}"Name"{tab}"ID" ...and so on |
29 |
22 Feb 05 |
nicklas |
</pre> |
29 |
22 Feb 05 |
nicklas |
To match this we can use the following regular expression: |
29 |
22 Feb 05 |
nicklas |
<code>"(.*?)"(\t"(.*?)")</code>. This will match to anything |
29 |
22 Feb 05 |
nicklas |
that has at least two columns. We could also be more specific and use: |
29 |
22 Feb 05 |
nicklas |
<code>"Block"\t"Column"\t"Row"\t"Name"\t"ID"...</code> |
29 |
22 Feb 05 |
nicklas |
488 |
|
29 |
22 Feb 05 |
nicklas |
@param regexp A regular expression |
29 |
22 Feb 05 |
nicklas |
490 |
*/ |
29 |
22 Feb 05 |
nicklas |
491 |
public void setDataHeaderRegexp(Pattern regexp) |
29 |
22 Feb 05 |
nicklas |
492 |
{ |
58 |
01 Mar 05 |
nicklas |
493 |
this.dataHeader = regexp; |
29 |
22 Feb 05 |
nicklas |
494 |
} |
29 |
22 Feb 05 |
nicklas |
495 |
|
29 |
22 Feb 05 |
nicklas |
496 |
/** |
29 |
22 Feb 05 |
nicklas |
Set a regular expression that is used to split a data line into |
2302 |
22 May 06 |
nicklas |
columns. To split on tabs we use: <code>\t</code>. This regular expression is |
2302 |
22 May 06 |
nicklas |
also used to split the data header line into column names, which can then be used |
2302 |
22 May 06 |
nicklas |
in the {@link #getMapper(String)} method. |
29 |
22 Feb 05 |
nicklas |
501 |
|
29 |
22 Feb 05 |
nicklas |
@param regexp A regular expression |
29 |
22 Feb 05 |
nicklas |
@see #setMinDataColumns(int) setMinDataColumns |
29 |
22 Feb 05 |
nicklas |
@see #setMaxDataColumns(int) setMaxDataColumns |
29 |
22 Feb 05 |
nicklas |
505 |
*/ |
29 |
22 Feb 05 |
nicklas |
506 |
public void setDataSplitterRegexp(Pattern regexp) |
29 |
22 Feb 05 |
nicklas |
507 |
{ |
58 |
01 Mar 05 |
nicklas |
508 |
this.dataSplitter = regexp; |
29 |
22 Feb 05 |
nicklas |
509 |
} |
2203 |
28 Apr 06 |
nicklas |
510 |
|
2203 |
28 Apr 06 |
nicklas |
511 |
/** |
2203 |
28 Apr 06 |
nicklas |
Set if quotes around each data value should be removed or not. A quote is |
2203 |
28 Apr 06 |
nicklas |
either a double quote (") or a single quote ('). The default setting of |
2203 |
28 Apr 06 |
nicklas |
this option is true. |
2203 |
28 Apr 06 |
nicklas |
@param trimQuotes TRUE to remove quotes, FALSE to keep them |
2203 |
28 Apr 06 |
nicklas |
516 |
*/ |
2203 |
28 Apr 06 |
nicklas |
517 |
public void setTrimQuotes(boolean trimQuotes) |
2203 |
28 Apr 06 |
nicklas |
518 |
{ |
2203 |
28 Apr 06 |
nicklas |
519 |
this.trimQuotes = trimQuotes; |
2203 |
28 Apr 06 |
nicklas |
520 |
} |
7738 |
14 Aug 19 |
nicklas |
521 |
|
7738 |
14 Aug 19 |
nicklas |
522 |
/** |
7738 |
14 Aug 19 |
nicklas |
Set a flag indicating if white-space should be trimmed from |
7738 |
14 Aug 19 |
nicklas |
start and end of data values. The default setting is false. |
7738 |
14 Aug 19 |
nicklas |
@param trimWhiteSpace TRUE to remove white-space, FALSE to keep them |
7738 |
14 Aug 19 |
nicklas |
@since 3.15.1 |
7738 |
14 Aug 19 |
nicklas |
527 |
*/ |
7738 |
14 Aug 19 |
nicklas |
528 |
public void setTrimWhiteSpace(boolean trimWhiteSpace) |
7738 |
14 Aug 19 |
nicklas |
529 |
{ |
7738 |
14 Aug 19 |
nicklas |
530 |
this.trimWhiteSpace = trimWhiteSpace; |
7738 |
14 Aug 19 |
nicklas |
531 |
} |
29 |
22 Feb 05 |
nicklas |
532 |
|
29 |
22 Feb 05 |
nicklas |
533 |
/** |
29 |
22 Feb 05 |
nicklas |
Set the minimum number of columns a data line must contain in |
29 |
22 Feb 05 |
nicklas |
order for it to be counted as a data line. |
29 |
22 Feb 05 |
nicklas |
536 |
|
29 |
22 Feb 05 |
nicklas |
@param columns The minimum number of columns |
29 |
22 Feb 05 |
nicklas |
538 |
*/ |
29 |
22 Feb 05 |
nicklas |
539 |
public void setMinDataColumns(int columns) |
29 |
22 Feb 05 |
nicklas |
540 |
{ |
58 |
01 Mar 05 |
nicklas |
541 |
this.minDataColumns = columns; |
29 |
22 Feb 05 |
nicklas |
542 |
} |
29 |
22 Feb 05 |
nicklas |
543 |
/** |
29 |
22 Feb 05 |
nicklas |
Set the maximum number of columns a data line can contain in |
2655 |
22 Sep 06 |
nicklas |
order for it to be counted as a data line. |
29 |
22 Feb 05 |
nicklas |
546 |
|
29 |
22 Feb 05 |
nicklas |
@param columns The maximum number of columns, or 0 for an |
2655 |
22 Sep 06 |
nicklas |
unlimited number, or -1 to disable counting the number of columns |
29 |
22 Feb 05 |
nicklas |
549 |
*/ |
29 |
22 Feb 05 |
nicklas |
550 |
public void setMaxDataColumns(int columns) |
29 |
22 Feb 05 |
nicklas |
551 |
{ |
58 |
01 Mar 05 |
nicklas |
552 |
this.maxDataColumns = columns; |
29 |
22 Feb 05 |
nicklas |
553 |
} |
29 |
22 Feb 05 |
nicklas |
554 |
|
29 |
22 Feb 05 |
nicklas |
555 |
/** |
29 |
22 Feb 05 |
nicklas |
Set a regular expression that can be matched against a |
29 |
22 Feb 05 |
nicklas |
data footer. If a line matching this pattern is found while |
29 |
22 Feb 05 |
nicklas |
looking for data with the {@link #hasMoreData() hasMoreData} |
29 |
22 Feb 05 |
nicklas |
method it will exit and no more data will be returned. |
29 |
22 Feb 05 |
nicklas |
560 |
|
29 |
22 Feb 05 |
nicklas |
@param regexp A regular expression |
29 |
22 Feb 05 |
nicklas |
562 |
*/ |
29 |
22 Feb 05 |
nicklas |
563 |
public void setDataFooterRegexp(Pattern regexp) |
29 |
22 Feb 05 |
nicklas |
564 |
{ |
58 |
01 Mar 05 |
nicklas |
565 |
this.dataFooter = regexp; |
29 |
22 Feb 05 |
nicklas |
566 |
} |
29 |
22 Feb 05 |
nicklas |
567 |
|
29 |
22 Feb 05 |
nicklas |
568 |
/** |
29 |
22 Feb 05 |
nicklas |
Set a regular expression that is used to match a line that should |
29 |
22 Feb 05 |
nicklas |
be ignored. For example, the file may contain comments starting |
29 |
22 Feb 05 |
nicklas |
with a #: <code>\#.*</code> |
29 |
22 Feb 05 |
nicklas |
572 |
|
29 |
22 Feb 05 |
nicklas |
@param regexp A regular expression |
29 |
22 Feb 05 |
nicklas |
574 |
*/ |
29 |
22 Feb 05 |
nicklas |
575 |
public void setIgnoreRegexp(Pattern regexp) |
29 |
22 Feb 05 |
nicklas |
576 |
{ |
29 |
22 Feb 05 |
nicklas |
577 |
this.ignore = regexp; |
29 |
22 Feb 05 |
nicklas |
578 |
} |
29 |
22 Feb 05 |
nicklas |
579 |
|
29 |
22 Feb 05 |
nicklas |
580 |
/** |
29 |
22 Feb 05 |
nicklas |
The number of unknown lines in a row that can be parsed by |
29 |
22 Feb 05 |
nicklas |
the {@link #parseHeaders() parseHeaders} method before it gives |
29 |
22 Feb 05 |
nicklas |
up. The default value is specified by {#link #DEFAULT_MAX_UNKNOWN_LINES}. |
29 |
22 Feb 05 |
nicklas |
This value is ignored while parsing data. |
29 |
22 Feb 05 |
nicklas |
585 |
|
29 |
22 Feb 05 |
nicklas |
@param lines The number of lines |
29 |
22 Feb 05 |
nicklas |
587 |
*/ |
29 |
22 Feb 05 |
nicklas |
588 |
public void setMaxUnknownLines(int lines) |
29 |
22 Feb 05 |
nicklas |
589 |
{ |
58 |
01 Mar 05 |
nicklas |
590 |
this.maxUnknownLines = lines; |
29 |
22 Feb 05 |
nicklas |
591 |
} |
29 |
22 Feb 05 |
nicklas |
592 |
|
29 |
22 Feb 05 |
nicklas |
593 |
/** |
2539 |
17 Aug 06 |
nicklas |
Specify if <code>null</code> values should be returned instead of empty strings |
29 |
22 Feb 05 |
nicklas |
for columns that doesn't contain any value. |
58 |
01 Mar 05 |
nicklas |
@param emptyIsNull TRUE to return null, FALSE to return an empty string |
29 |
22 Feb 05 |
nicklas |
597 |
*/ |
58 |
01 Mar 05 |
nicklas |
598 |
public void setUseNullIfEmpty(boolean emptyIsNull) |
29 |
22 Feb 05 |
nicklas |
599 |
{ |
58 |
01 Mar 05 |
nicklas |
600 |
this.emptyIsNull = emptyIsNull; |
29 |
22 Feb 05 |
nicklas |
601 |
} |
2539 |
17 Aug 06 |
nicklas |
602 |
|
2539 |
17 Aug 06 |
nicklas |
603 |
/** |
2539 |
17 Aug 06 |
nicklas |
Specify if <code>null</code> values should be returned for strings |
2539 |
17 Aug 06 |
nicklas |
having the value "NULL" (ignoring case). |
2539 |
17 Aug 06 |
nicklas |
@param nullIsNull TRUE to return null, FALSE to return the original string value |
2539 |
17 Aug 06 |
nicklas |
607 |
*/ |
2539 |
17 Aug 06 |
nicklas |
608 |
public void setUseNullIfNull(boolean nullIsNull) |
2539 |
17 Aug 06 |
nicklas |
609 |
{ |
2539 |
17 Aug 06 |
nicklas |
610 |
this.nullIsNull = nullIsNull; |
2539 |
17 Aug 06 |
nicklas |
611 |
} |
29 |
22 Feb 05 |
nicklas |
612 |
|
29 |
22 Feb 05 |
nicklas |
613 |
/** |
2655 |
22 Sep 06 |
nicklas |
If the {@link #nextData()} and {@link #hasMoreData()} methods should |
2655 |
22 Sep 06 |
nicklas |
keep information of lines that was skipped because they matched the |
2655 |
22 Sep 06 |
nicklas |
ignore pattern or could be interpreted as data lines. The default is |
2655 |
22 Sep 06 |
nicklas |
FALSE. The number of lines that was skipped is always available regardless |
2655 |
22 Sep 06 |
nicklas |
of this setting. |
2655 |
22 Sep 06 |
nicklas |
619 |
|
2655 |
22 Sep 06 |
nicklas |
@param keep TRUE to keep line information, FALSE to not |
2655 |
22 Sep 06 |
nicklas |
@see #getSkippedLines() |
2655 |
22 Sep 06 |
nicklas |
@see #getIgnoredLines() |
2655 |
22 Sep 06 |
nicklas |
@see #getUnknownLines() |
2655 |
22 Sep 06 |
nicklas |
@see #getNumSkippedLines() |
2655 |
22 Sep 06 |
nicklas |
625 |
*/ |
2655 |
22 Sep 06 |
nicklas |
626 |
public void setKeepSkippedLines(boolean keep) |
2655 |
22 Sep 06 |
nicklas |
627 |
{ |
2655 |
22 Sep 06 |
nicklas |
628 |
this.keepSkippedLines = keep; |
2655 |
22 Sep 06 |
nicklas |
629 |
} |
2655 |
22 Sep 06 |
nicklas |
630 |
|
2655 |
22 Sep 06 |
nicklas |
631 |
/** |
7673 |
26 Mar 19 |
nicklas |
If this flag is set and the source file is an Excel file, |
7673 |
26 Mar 19 |
nicklas |
then all sheets will be parsed unless a named sheet |
7673 |
26 Mar 19 |
nicklas |
specified. Each sheet is handled like a section with the sheet |
7673 |
26 Mar 19 |
nicklas |
name inside brackets ([name]). The regular expression for |
7673 |
26 Mar 19 |
nicklas |
detecting a section is automatically updated to match this |
7673 |
26 Mar 19 |
nicklas |
pattern. |
7673 |
26 Mar 19 |
nicklas |
@since 3.15 |
7673 |
26 Mar 19 |
nicklas |
639 |
*/ |
7673 |
26 Mar 19 |
nicklas |
640 |
public void setParseAllExcelSheets(boolean parseAllExcelSheets) |
7673 |
26 Mar 19 |
nicklas |
641 |
{ |
7673 |
26 Mar 19 |
nicklas |
642 |
this.parseAllExcelSheets = parseAllExcelSheets; |
7673 |
26 Mar 19 |
nicklas |
643 |
} |
7673 |
26 Mar 19 |
nicklas |
644 |
|
7673 |
26 Mar 19 |
nicklas |
645 |
/** |
7673 |
26 Mar 19 |
nicklas |
@see #setParseAllExcelSheets(boolean) |
7673 |
26 Mar 19 |
nicklas |
@since 3.15 |
7673 |
26 Mar 19 |
nicklas |
648 |
*/ |
7673 |
26 Mar 19 |
nicklas |
649 |
public boolean getParseAllExcelSheets() |
7673 |
26 Mar 19 |
nicklas |
650 |
{ |
7673 |
26 Mar 19 |
nicklas |
651 |
return parseAllExcelSheets; |
7673 |
26 Mar 19 |
nicklas |
652 |
} |
7673 |
26 Mar 19 |
nicklas |
653 |
|
7673 |
26 Mar 19 |
nicklas |
654 |
/** |
7656 |
19 Mar 19 |
nicklas |
Set the name of Excel worksheet to parse if the given file is an Excel |
7656 |
19 Mar 19 |
nicklas |
file, otherwise this is ignored. |
7656 |
19 Mar 19 |
nicklas |
@since 3.15 |
7656 |
19 Mar 19 |
nicklas |
658 |
*/ |
7656 |
19 Mar 19 |
nicklas |
659 |
public void setExcelSheet(String name) |
7656 |
19 Mar 19 |
nicklas |
660 |
{ |
7664 |
20 Mar 19 |
nicklas |
661 |
this.excelSheetName = name; |
7656 |
19 Mar 19 |
nicklas |
662 |
} |
7656 |
19 Mar 19 |
nicklas |
663 |
|
7656 |
19 Mar 19 |
nicklas |
664 |
/** |
7656 |
19 Mar 19 |
nicklas |
Get the name of the Excel sheet that should be or is parsed. |
7656 |
19 Mar 19 |
nicklas |
@since 3.15 |
7656 |
19 Mar 19 |
nicklas |
667 |
*/ |
7656 |
19 Mar 19 |
nicklas |
668 |
public String getExcelSheet() |
7656 |
19 Mar 19 |
nicklas |
669 |
{ |
7664 |
20 Mar 19 |
nicklas |
670 |
return excelSheetName; |
7656 |
19 Mar 19 |
nicklas |
671 |
} |
7656 |
19 Mar 19 |
nicklas |
672 |
|
7656 |
19 Mar 19 |
nicklas |
673 |
/** |
7738 |
14 Aug 19 |
nicklas |
If the input stream that is being parsed is an Excel |
7738 |
14 Aug 19 |
nicklas |
document, this method returns information about it. |
7738 |
14 Aug 19 |
nicklas |
@return An XlsxToCsvUtil object or null if the stream is not an Excel document |
7738 |
14 Aug 19 |
nicklas |
@since 3.15.1 |
7738 |
14 Aug 19 |
nicklas |
678 |
*/ |
7738 |
14 Aug 19 |
nicklas |
679 |
public XlsxToCsvUtil getCurrentExcelWorkbook() |
7738 |
14 Aug 19 |
nicklas |
680 |
{ |
7738 |
14 Aug 19 |
nicklas |
681 |
return excelWorkbook; |
7738 |
14 Aug 19 |
nicklas |
682 |
} |
7738 |
14 Aug 19 |
nicklas |
683 |
|
7738 |
14 Aug 19 |
nicklas |
684 |
/** |
7738 |
14 Aug 19 |
nicklas |
If the input stream that is being parsed is an Excel |
7738 |
14 Aug 19 |
nicklas |
document, this method returns information about the current |
7738 |
14 Aug 19 |
nicklas |
worksheet. |
7738 |
14 Aug 19 |
nicklas |
@return An XlsxToCsvUtil.SheetInfo object or null if the stream is not an Excel document |
7738 |
14 Aug 19 |
nicklas |
@since 3.15.1 |
7738 |
14 Aug 19 |
nicklas |
690 |
*/ |
7738 |
14 Aug 19 |
nicklas |
691 |
public XlsxToCsvUtil.SheetInfo getCurrentSheet() |
7738 |
14 Aug 19 |
nicklas |
692 |
{ |
7738 |
14 Aug 19 |
nicklas |
693 |
return excelSheet; |
7738 |
14 Aug 19 |
nicklas |
694 |
} |
7738 |
14 Aug 19 |
nicklas |
695 |
|
7738 |
14 Aug 19 |
nicklas |
696 |
/** |
7655 |
19 Mar 19 |
nicklas |
Set the input stream that will be parsed. The stream can be either a |
7655 |
19 Mar 19 |
nicklas |
text CSV-like stream or an Excel workbook (xlsx). |
7655 |
19 Mar 19 |
nicklas |
699 |
|
7655 |
19 Mar 19 |
nicklas |
If the stream is an Excel workbook the following apply: |
7655 |
19 Mar 19 |
nicklas |
701 |
|
7655 |
19 Mar 19 |
nicklas |
* If no date format has been specified, yyyy-MM-dd is used |
7832 |
24 Jun 20 |
nicklas |
* If no timestamp format has been specified, yyyy-MM-dd HH:mm:ss is used |
7655 |
19 Mar 19 |
nicklas |
* If no number format has been specified, 'dot' is used |
7655 |
19 Mar 19 |
nicklas |
* The data splitter regular expression is changed to \\t |
7673 |
26 Mar 19 |
nicklas |
* The section regular expression is changed to [.*] (if the {@link #getParseAllExcelSheets()} flag is set |
7655 |
19 Mar 19 |
nicklas |
707 |
|
29 |
22 Feb 05 |
nicklas |
@param in The <code>InputStream</code> |
7655 |
19 Mar 19 |
nicklas |
@param charsetOrSheetName If CSV, the name of the character set to |
7655 |
19 Mar 19 |
nicklas |
use when parsing the file, or null to use the default charset specified by |
2992 |
01 Dec 06 |
enell |
{@link Config#getCharset()} |
7655 |
19 Mar 19 |
nicklas |
If Excel, the name or index of the worksheet in the workbook, the default is |
7673 |
26 Mar 19 |
nicklas |
to parse the first sheet (index=0) or the whole workbook if the {@link #getParseAllExcelSheets()} flag is set |
2992 |
01 Dec 06 |
enell |
@since 2.1.1 |
29 |
22 Feb 05 |
nicklas |
715 |
*/ |
7655 |
19 Mar 19 |
nicklas |
716 |
public void setInputStream(InputStream in, String charsetOrSheetName) |
29 |
22 Feb 05 |
nicklas |
717 |
{ |
7655 |
19 Mar 19 |
nicklas |
718 |
in = XlsxToCsvUtil.prepareForCheck(in); |
7655 |
19 Mar 19 |
nicklas |
719 |
String charsetName = charsetOrSheetName; |
7889 |
04 Dec 20 |
nicklas |
720 |
InputStreamTracker.ScaleFactor scaleFactor = null; |
7655 |
19 Mar 19 |
nicklas |
721 |
if (XlsxToCsvUtil.isAnExcelFile(in)) |
7655 |
19 Mar 19 |
nicklas |
722 |
{ |
7673 |
26 Mar 19 |
nicklas |
723 |
excelWorkbook = new XlsxToCsvUtil(); |
7655 |
19 Mar 19 |
nicklas |
724 |
if (dateFormat == null) dateFormat = new DateFormatter("yyyy-MM-dd"); |
7832 |
24 Jun 20 |
nicklas |
725 |
if (timestampFormat == null) timestampFormat = new DateFormatter("yyyy-MM-dd HH:mm:ss"); |
7655 |
19 Mar 19 |
nicklas |
726 |
if (numberFormat == null) numberFormat = NumberFormatUtil.getNumberFormat('.', (char)0); |
7673 |
26 Mar 19 |
nicklas |
727 |
excelWorkbook.setDateFormat(dateFormat); |
7673 |
26 Mar 19 |
nicklas |
728 |
excelWorkbook.setTimestampFormat(timestampFormat); |
7673 |
26 Mar 19 |
nicklas |
729 |
excelWorkbook.setNumberFormat(new NumberFormatFormatter(numberFormat)); |
7673 |
26 Mar 19 |
nicklas |
730 |
excelWorkbook.setEvaluateFormulas(true); |
7655 |
19 Mar 19 |
nicklas |
731 |
setDataSplitterRegexp(Pattern.compile("\\t")); |
7655 |
19 Mar 19 |
nicklas |
732 |
charsetName = "UTF-8"; |
7889 |
04 Dec 20 |
nicklas |
733 |
InputStreamTracker tmpTracker = new InputStreamTracker(in); |
7655 |
19 Mar 19 |
nicklas |
734 |
try |
7655 |
19 Mar 19 |
nicklas |
735 |
{ |
7889 |
04 Dec 20 |
nicklas |
736 |
excelWorkbook.readWorkbook(tmpTracker); |
7655 |
19 Mar 19 |
nicklas |
737 |
} |
7655 |
19 Mar 19 |
nicklas |
738 |
catch (IOException ex) |
7655 |
19 Mar 19 |
nicklas |
739 |
{ |
7655 |
19 Mar 19 |
nicklas |
740 |
throw new RuntimeException(ex); |
7655 |
19 Mar 19 |
nicklas |
741 |
} |
7889 |
04 Dec 20 |
nicklas |
742 |
long allBytes = tmpTracker.getNumRead(); |
7673 |
26 Mar 19 |
nicklas |
743 |
|
7673 |
26 Mar 19 |
nicklas |
744 |
if (parseAllExcelSheets) |
7655 |
19 Mar 19 |
nicklas |
745 |
{ |
7673 |
26 Mar 19 |
nicklas |
746 |
setSectionRegexp(Pattern.compile("\\[(.*)\\]")); |
7673 |
26 Mar 19 |
nicklas |
747 |
in = excelWorkbook.getWorkbookAsCsv(); |
7889 |
04 Dec 20 |
nicklas |
// Apply a curved scale factor since we don't know how many bytes |
7889 |
04 Dec 20 |
nicklas |
// we get after converting all sheets to CSV |
7889 |
04 Dec 20 |
nicklas |
750 |
scaleFactor = new InputStreamTracker.CurvedScaleFactor(allBytes); |
7655 |
19 Mar 19 |
nicklas |
751 |
} |
7655 |
19 Mar 19 |
nicklas |
752 |
else |
7655 |
19 Mar 19 |
nicklas |
753 |
{ |
7673 |
26 Mar 19 |
nicklas |
754 |
List<String> sheets = excelWorkbook.getSheetNames(); |
7673 |
26 Mar 19 |
nicklas |
755 |
if (sheets.contains(charsetOrSheetName)) |
7673 |
26 Mar 19 |
nicklas |
756 |
{ |
7673 |
26 Mar 19 |
nicklas |
757 |
excelSheet = excelWorkbook.getSheetAsCsv(charsetOrSheetName); |
7673 |
26 Mar 19 |
nicklas |
758 |
} |
7673 |
26 Mar 19 |
nicklas |
759 |
else if (sheets.contains(excelSheetName)) |
7673 |
26 Mar 19 |
nicklas |
760 |
{ |
7673 |
26 Mar 19 |
nicklas |
761 |
excelSheet = excelWorkbook.getSheetAsCsv(excelSheetName); |
7673 |
26 Mar 19 |
nicklas |
762 |
} |
7673 |
26 Mar 19 |
nicklas |
763 |
else |
7673 |
26 Mar 19 |
nicklas |
764 |
{ |
7673 |
26 Mar 19 |
nicklas |
// Or try to convert to index with 0 as default |
7673 |
26 Mar 19 |
nicklas |
766 |
excelSheet = excelWorkbook.getSheetAsCsv(Values.getInt(charsetOrSheetName)); |
7673 |
26 Mar 19 |
nicklas |
767 |
} |
7673 |
26 Mar 19 |
nicklas |
768 |
excelSheetName = excelSheet.getName(); |
7673 |
26 Mar 19 |
nicklas |
769 |
in = excelSheet.parseToCsv(); |
7889 |
04 Dec 20 |
nicklas |
770 |
int sheetBytes = excelSheet.getSize(); |
7889 |
04 Dec 20 |
nicklas |
// Apply a scale factor so that when we have parsed the sheet it seems like we have parsed the original input stream |
7889 |
04 Dec 20 |
nicklas |
772 |
scaleFactor = new InputStreamTracker.LinearScaleFactor((float)allBytes / (float)sheetBytes); |
7655 |
19 Mar 19 |
nicklas |
773 |
} |
7655 |
19 Mar 19 |
nicklas |
774 |
} |
7627 |
08 Mar 19 |
nicklas |
775 |
Charset cs = CharsetUtil.getCharset(charsetName); |
7889 |
04 Dec 20 |
nicklas |
776 |
this.tracker = new InputStreamTracker(in, scaleFactor); |
4095 |
21 Jan 08 |
enell |
777 |
this.reader = new BufferedReader(new InputStreamReader(tracker, cs)); |
29 |
22 Feb 05 |
nicklas |
778 |
} |
29 |
22 Feb 05 |
nicklas |
779 |
|
29 |
22 Feb 05 |
nicklas |
780 |
/** |
5193 |
27 Nov 09 |
nicklas |
Parse the file until the beginning-of-file marker is found. If no |
5193 |
27 Nov 09 |
nicklas |
regular expression has been set with {@link #setBofMarkerRegexp(Pattern)} |
5193 |
27 Nov 09 |
nicklas |
or if the parsing of the file has already started, this method call is |
5193 |
27 Nov 09 |
nicklas |
ignored. |
5193 |
27 Nov 09 |
nicklas |
785 |
|
5193 |
27 Nov 09 |
nicklas |
@return TRUE if this call resulted in parsing and the BOF marker was found, |
5193 |
27 Nov 09 |
nicklas |
FALSE otherwise |
5193 |
27 Nov 09 |
nicklas |
@since 2.15 |
5193 |
27 Nov 09 |
nicklas |
789 |
*/ |
5193 |
27 Nov 09 |
nicklas |
790 |
public boolean parseToBof() |
5193 |
27 Nov 09 |
nicklas |
791 |
throws IOException |
5193 |
27 Nov 09 |
nicklas |
792 |
{ |
5193 |
27 Nov 09 |
nicklas |
793 |
if (bofMarker == null || parsedLines > 0) return false; |
5193 |
27 Nov 09 |
nicklas |
794 |
|
5193 |
27 Nov 09 |
nicklas |
795 |
boolean done = false; |
5193 |
27 Nov 09 |
nicklas |
796 |
boolean matched = false; |
5193 |
27 Nov 09 |
nicklas |
797 |
while (!done) |
5193 |
27 Nov 09 |
nicklas |
798 |
{ |
5193 |
27 Nov 09 |
nicklas |
799 |
ThreadSignalHandler.checkInterrupted(); |
5193 |
27 Nov 09 |
nicklas |
800 |
String line = reader.readLine(); |
5193 |
27 Nov 09 |
nicklas |
801 |
parsedLines++; |
5193 |
27 Nov 09 |
nicklas |
802 |
if (line == null) |
5193 |
27 Nov 09 |
nicklas |
803 |
{ |
5193 |
27 Nov 09 |
nicklas |
804 |
done = true; |
5193 |
27 Nov 09 |
nicklas |
805 |
} |
5193 |
27 Nov 09 |
nicklas |
806 |
else |
5193 |
27 Nov 09 |
nicklas |
807 |
{ |
5193 |
27 Nov 09 |
nicklas |
808 |
parsedCharacters += line.length(); |
5193 |
27 Nov 09 |
nicklas |
809 |
Matcher m = bofMarker.matcher(line); |
5193 |
27 Nov 09 |
nicklas |
810 |
if (m.matches()) |
5193 |
27 Nov 09 |
nicklas |
811 |
{ |
5193 |
27 Nov 09 |
nicklas |
812 |
bofType = m.groupCount() > 0 ? m.group(1) : m.group(); |
5193 |
27 Nov 09 |
nicklas |
813 |
matched = true; |
5193 |
27 Nov 09 |
nicklas |
814 |
done = true; |
5193 |
27 Nov 09 |
nicklas |
815 |
} |
5193 |
27 Nov 09 |
nicklas |
816 |
} |
5193 |
27 Nov 09 |
nicklas |
817 |
} |
5193 |
27 Nov 09 |
nicklas |
818 |
return matched; |
5193 |
27 Nov 09 |
nicklas |
819 |
} |
5193 |
27 Nov 09 |
nicklas |
820 |
|
5193 |
27 Nov 09 |
nicklas |
821 |
/** |
5193 |
27 Nov 09 |
nicklas |
Get the value captured by the BOF marker regular expression. If |
5193 |
27 Nov 09 |
nicklas |
no capturing groups was specified in the pattern this value is the |
5193 |
27 Nov 09 |
nicklas |
string that matched the entire pattern. |
5193 |
27 Nov 09 |
nicklas |
825 |
|
5193 |
27 Nov 09 |
nicklas |
@return The matched value, or null if BOF matching has not been done |
5193 |
27 Nov 09 |
nicklas |
@since 2.15 |
5193 |
27 Nov 09 |
nicklas |
828 |
*/ |
5193 |
27 Nov 09 |
nicklas |
829 |
public String getBofType() |
5193 |
27 Nov 09 |
nicklas |
830 |
{ |
5193 |
27 Nov 09 |
nicklas |
831 |
return bofType; |
5193 |
27 Nov 09 |
nicklas |
832 |
} |
5193 |
27 Nov 09 |
nicklas |
833 |
|
5193 |
27 Nov 09 |
nicklas |
834 |
/** |
29 |
22 Feb 05 |
nicklas |
Start parsing the input stream. The parser will read a single |
29 |
22 Feb 05 |
nicklas |
line at a time. Each line is checked in the following order: |
29 |
22 Feb 05 |
nicklas |
<ol> |
1627 |
17 Nov 05 |
enell |
<li>Does it match the {@link #setSectionRegexp(Pattern) section} |
1627 |
17 Nov 05 |
enell |
regular expression? |
29 |
22 Feb 05 |
nicklas |
<li>Does it match the {@link #setHeaderRegexp(Pattern) header} |
29 |
22 Feb 05 |
nicklas |
regular expression? |
29 |
22 Feb 05 |
nicklas |
<li>Does it match the {@link #setDataHeaderRegexp(Pattern) data header} |
29 |
22 Feb 05 |
nicklas |
regular expression? |
29 |
22 Feb 05 |
nicklas |
<li>Does it match the {@link #setIgnoreRegexp(Pattern) comment} |
29 |
22 Feb 05 |
nicklas |
regular expression? |
29 |
22 Feb 05 |
nicklas |
<li>Can it be split by the {@link #setDataSplitterRegexp(Pattern) data} |
29 |
22 Feb 05 |
nicklas |
regular expression into the appropriate number of columns? |
29 |
22 Feb 05 |
nicklas |
</ol> |
29 |
22 Feb 05 |
nicklas |
849 |
|
29 |
22 Feb 05 |
nicklas |
The first expression that matches stops the processing of that line. |
29 |
22 Feb 05 |
nicklas |
If the line matched a header or comment the parser continues with |
29 |
22 Feb 05 |
nicklas |
the next line. If the line matched the data header or data, the |
29 |
22 Feb 05 |
nicklas |
method returns. If none of the above is true the line is recorded |
1273 |
07 Sep 05 |
nicklas |
as {@link LineType#UNKNOWN} and processing is continued |
29 |
22 Feb 05 |
nicklas |
with the next line. If too many unkown lines in a row has been found |
29 |
22 Feb 05 |
nicklas |
the method also returns. This should be considered as a failure to |
29 |
22 Feb 05 |
nicklas |
parse the specified file. |
29 |
22 Feb 05 |
nicklas |
<p> |
29 |
22 Feb 05 |
nicklas |
The method returns the type of the last line that was parsed as |
29 |
22 Feb 05 |
nicklas |
follows: |
29 |
22 Feb 05 |
nicklas |
<ul> |
1627 |
17 Nov 05 |
enell |
<li>{@link LineType#SECTION}: The last line |
1627 |
17 Nov 05 |
enell |
was a section. Header, data header or data may |
1627 |
17 Nov 05 |
enell |
follow this line. |
1273 |
07 Sep 05 |
nicklas |
<li>{@link LineType#DATA_HEADER}: The last line |
29 |
22 Feb 05 |
nicklas |
was the data header. It is expected that data should |
29 |
22 Feb 05 |
nicklas |
follow. |
1273 |
07 Sep 05 |
nicklas |
<li>{@link LineType#DATA}: The last line |
29 |
22 Feb 05 |
nicklas |
was a data line. More data may follow. |
1273 |
07 Sep 05 |
nicklas |
<li>{@link LineType#UNKNOWN}: The last line |
29 |
22 Feb 05 |
nicklas |
was of unknown format. The file could not be parsed. |
29 |
22 Feb 05 |
nicklas |
</ul> |
29 |
22 Feb 05 |
nicklas |
873 |
|
1273 |
07 Sep 05 |
nicklas |
@return The {@link LineType} of the last parsed line |
4020 |
29 Nov 07 |
martin |
@throws IOException If reading the file fails. |
29 |
22 Feb 05 |
nicklas |
876 |
*/ |
1273 |
07 Sep 05 |
nicklas |
877 |
public LineType parseHeaders() |
29 |
22 Feb 05 |
nicklas |
878 |
throws IOException |
29 |
22 Feb 05 |
nicklas |
879 |
{ |
5193 |
27 Nov 09 |
nicklas |
880 |
if (parsedLines == 0) parseToBof(); |
1275 |
08 Sep 05 |
nicklas |
881 |
lines = new ArrayList<Line>(maxUnknownLines); |
29 |
22 Feb 05 |
nicklas |
882 |
headers = new HashMap<String,String>(); |
29 |
22 Feb 05 |
nicklas |
883 |
|
2317 |
24 May 06 |
nicklas |
884 |
nextData = null; |
2317 |
24 May 06 |
nicklas |
885 |
nextSection = null; |
58 |
01 Mar 05 |
nicklas |
886 |
int unknownLines = 0; |
29 |
22 Feb 05 |
nicklas |
887 |
boolean done = false; |
29 |
22 Feb 05 |
nicklas |
888 |
while (!done) |
29 |
22 Feb 05 |
nicklas |
889 |
{ |
4128 |
05 Feb 08 |
nicklas |
890 |
ThreadSignalHandler.checkInterrupted(); |
29 |
22 Feb 05 |
nicklas |
891 |
String line = reader.readLine(); |
58 |
01 Mar 05 |
nicklas |
892 |
parsedLines++; |
1275 |
08 Sep 05 |
nicklas |
893 |
if (line == null) |
29 |
22 Feb 05 |
nicklas |
894 |
{ |
1275 |
08 Sep 05 |
nicklas |
895 |
done = true; |
29 |
22 Feb 05 |
nicklas |
896 |
} |
1275 |
08 Sep 05 |
nicklas |
897 |
else |
1275 |
08 Sep 05 |
nicklas |
898 |
{ |
1275 |
08 Sep 05 |
nicklas |
899 |
parsedCharacters += line.length(); |
1275 |
08 Sep 05 |
nicklas |
900 |
boolean matched = false; |
1616 |
15 Nov 05 |
enell |
901 |
|
1616 |
15 Nov 05 |
enell |
// Check if the line is a section |
1616 |
15 Nov 05 |
enell |
903 |
if (section != null) |
1616 |
15 Nov 05 |
enell |
904 |
{ |
1616 |
15 Nov 05 |
enell |
905 |
Matcher m = section.matcher(line); |
1616 |
15 Nov 05 |
enell |
906 |
if (m.matches() && (m.groupCount() == 1)) |
1616 |
15 Nov 05 |
enell |
907 |
{ |
1616 |
15 Nov 05 |
enell |
908 |
matched = true; |
1616 |
15 Nov 05 |
enell |
909 |
done = true; |
2540 |
17 Aug 06 |
nicklas |
910 |
nextSection = new Line(parsedLines, line, m.group(1), LineType.SECTION); |
1627 |
17 Nov 05 |
enell |
911 |
lines.add(nextSection); |
7673 |
26 Mar 19 |
nicklas |
912 |
if (parseAllExcelSheets) |
7673 |
26 Mar 19 |
nicklas |
913 |
{ |
7673 |
26 Mar 19 |
nicklas |
914 |
excelSheet = excelWorkbook.getSheetAsCsv(parsedSections); |
7673 |
26 Mar 19 |
nicklas |
915 |
excelSheetName = excelSheet.getName(); |
7676 |
27 Mar 19 |
nicklas |
916 |
excelParsedLinesOffset = parsedLines; |
7673 |
26 Mar 19 |
nicklas |
917 |
} |
7673 |
26 Mar 19 |
nicklas |
918 |
parsedSections++; |
1616 |
15 Nov 05 |
enell |
919 |
} |
1616 |
15 Nov 05 |
enell |
920 |
} |
1616 |
15 Nov 05 |
enell |
921 |
|
1275 |
08 Sep 05 |
nicklas |
// Check if the line is a header |
1616 |
15 Nov 05 |
enell |
923 |
if (header != null && !matched) |
29 |
22 Feb 05 |
nicklas |
924 |
{ |
1275 |
08 Sep 05 |
nicklas |
925 |
Matcher m = header.matcher(line); |
1275 |
08 Sep 05 |
nicklas |
926 |
if (m.matches() && (m.groupCount() == 2)) |
1275 |
08 Sep 05 |
nicklas |
927 |
{ |
1275 |
08 Sep 05 |
nicklas |
928 |
matched = true; |
1275 |
08 Sep 05 |
nicklas |
929 |
String name = m.group(1); |
2539 |
17 Aug 06 |
nicklas |
930 |
String value = convertToNull(m.group(2)); |
1275 |
08 Sep 05 |
nicklas |
931 |
headers.put(name, value); |
2540 |
17 Aug 06 |
nicklas |
932 |
lines.add(new Line(parsedLines, line, name, value)); |
1275 |
08 Sep 05 |
nicklas |
933 |
} |
29 |
22 Feb 05 |
nicklas |
934 |
} |
29 |
22 Feb 05 |
nicklas |
935 |
|
1275 |
08 Sep 05 |
nicklas |
// Check if the line is a comment |
1289 |
08 Sep 05 |
nicklas |
937 |
if (ignore != null && !matched) |
29 |
22 Feb 05 |
nicklas |
938 |
{ |
1275 |
08 Sep 05 |
nicklas |
939 |
Matcher m = ignore.matcher(line); |
1275 |
08 Sep 05 |
nicklas |
940 |
if (m.matches()) |
1275 |
08 Sep 05 |
nicklas |
941 |
{ |
1275 |
08 Sep 05 |
nicklas |
942 |
matched = true; |
2540 |
17 Aug 06 |
nicklas |
943 |
lines.add(new Line(parsedLines, line, LineType.IGNORED)); |
1275 |
08 Sep 05 |
nicklas |
944 |
} |
1275 |
08 Sep 05 |
nicklas |
945 |
} |
1275 |
08 Sep 05 |
nicklas |
946 |
|
1275 |
08 Sep 05 |
nicklas |
// Check if the line matches the data header |
1289 |
08 Sep 05 |
nicklas |
948 |
if (dataHeader != null && !matched) |
1275 |
08 Sep 05 |
nicklas |
949 |
{ |
58 |
01 Mar 05 |
nicklas |
950 |
Matcher m = dataHeader.matcher(line); |
29 |
22 Feb 05 |
nicklas |
951 |
if (m.matches()) |
29 |
22 Feb 05 |
nicklas |
952 |
{ |
29 |
22 Feb 05 |
nicklas |
953 |
matched = true; |
1289 |
08 Sep 05 |
nicklas |
954 |
done = true; |
2540 |
17 Aug 06 |
nicklas |
955 |
lines.add(new Line(parsedLines, line, LineType.DATA_HEADER)); |
2203 |
28 Apr 06 |
nicklas |
956 |
|
2203 |
28 Apr 06 |
nicklas |
957 |
if (dataSplitter != null) |
2203 |
28 Apr 06 |
nicklas |
958 |
{ |
2203 |
28 Apr 06 |
nicklas |
959 |
String[] columns = trimQuotes(dataSplitter.split(line, -1)); |
2655 |
22 Sep 06 |
nicklas |
960 |
columnHeaders = Arrays.asList(columns); |
2203 |
28 Apr 06 |
nicklas |
961 |
} |
29 |
22 Feb 05 |
nicklas |
962 |
} |
29 |
22 Feb 05 |
nicklas |
963 |
} |
1275 |
08 Sep 05 |
nicklas |
964 |
|
1275 |
08 Sep 05 |
nicklas |
// Try the data splitter and check the number of columns |
2655 |
22 Sep 06 |
nicklas |
966 |
if (dataHeader == null && dataSplitter != null && !matched && maxDataColumns >= 0) |
29 |
22 Feb 05 |
nicklas |
967 |
{ |
2203 |
28 Apr 06 |
nicklas |
968 |
String[] columns = trimQuotes(dataSplitter.split(line, -1)); |
58 |
01 Mar 05 |
nicklas |
969 |
if ((columns.length >= minDataColumns) && |
58 |
01 Mar 05 |
nicklas |
970 |
((columns.length <= maxDataColumns) || (maxDataColumns == 0))) |
29 |
22 Feb 05 |
nicklas |
971 |
{ |
29 |
22 Feb 05 |
nicklas |
972 |
matched = true; |
29 |
22 Feb 05 |
nicklas |
973 |
done = true; |
2540 |
17 Aug 06 |
nicklas |
974 |
lines.add(new Line(parsedLines, line, LineType.DATA)); |
3618 |
01 Aug 07 |
nicklas |
975 |
parsedDataLines = 1; |
7738 |
14 Aug 19 |
nicklas |
976 |
nextData = new Data(parsedLines, parsedDataLines, line, columns, emptyIsNull, nullIsNull, trimWhiteSpace); |
29 |
22 Feb 05 |
nicklas |
977 |
} |
29 |
22 Feb 05 |
nicklas |
978 |
} |
29 |
22 Feb 05 |
nicklas |
979 |
|
1275 |
08 Sep 05 |
nicklas |
// The line was unknown |
1275 |
08 Sep 05 |
nicklas |
981 |
if (!matched) |
1275 |
08 Sep 05 |
nicklas |
982 |
{ |
1275 |
08 Sep 05 |
nicklas |
983 |
unknownLines++; |
1275 |
08 Sep 05 |
nicklas |
984 |
if (unknownLines > maxUnknownLines) done = true; |
2540 |
17 Aug 06 |
nicklas |
985 |
lines.add(new Line(parsedLines, line, LineType.UNKNOWN)); |
1275 |
08 Sep 05 |
nicklas |
986 |
} |
1275 |
08 Sep 05 |
nicklas |
987 |
else |
1275 |
08 Sep 05 |
nicklas |
988 |
{ |
1275 |
08 Sep 05 |
nicklas |
989 |
unknownLines = 0; |
1275 |
08 Sep 05 |
nicklas |
990 |
} |
29 |
22 Feb 05 |
nicklas |
991 |
} |
29 |
22 Feb 05 |
nicklas |
992 |
} |
1309 |
08 Sep 05 |
enell |
993 |
return getLineCount() == 0 ? LineType.UNKNOWN : getLine(getLineCount()-1).type(); |
29 |
22 Feb 05 |
nicklas |
994 |
} |
1616 |
15 Nov 05 |
enell |
995 |
|
2539 |
17 Aug 06 |
nicklas |
996 |
private String convertToNull(String value) |
2539 |
17 Aug 06 |
nicklas |
997 |
{ |
2539 |
17 Aug 06 |
nicklas |
998 |
return emptyIsNull && value.length() == 0 || |
2539 |
17 Aug 06 |
nicklas |
999 |
nullIsNull && "NULL".equalsIgnoreCase(value) ? null : value; |
2539 |
17 Aug 06 |
nicklas |
1000 |
} |
2539 |
17 Aug 06 |
nicklas |
1001 |
|
29 |
22 Feb 05 |
nicklas |
1002 |
/** |
1368 |
21 Sep 05 |
nicklas |
Get the names of all headers found by the {@link #parseHeaders()} |
1368 |
21 Sep 05 |
nicklas |
method. To get the value of a header, use the {@link #getHeader(String)} |
1368 |
21 Sep 05 |
nicklas |
method. |
1368 |
21 Sep 05 |
nicklas |
1006 |
*/ |
1368 |
21 Sep 05 |
nicklas |
1007 |
public Set<String> getHeaderNames() |
1368 |
21 Sep 05 |
nicklas |
1008 |
{ |
1368 |
21 Sep 05 |
nicklas |
1009 |
return Collections.unmodifiableSet(headers.keySet()); |
1368 |
21 Sep 05 |
nicklas |
1010 |
} |
1368 |
21 Sep 05 |
nicklas |
1011 |
|
1368 |
21 Sep 05 |
nicklas |
1012 |
/** |
29 |
22 Feb 05 |
nicklas |
Get the value of the header with the specified name. This method |
1368 |
21 Sep 05 |
nicklas |
should only be used after {@link #parseHeaders()} has |
1368 |
21 Sep 05 |
nicklas |
been completed. |
29 |
22 Feb 05 |
nicklas |
1016 |
|
29 |
22 Feb 05 |
nicklas |
@param name The name of the header |
29 |
22 Feb 05 |
nicklas |
@return The value of the header, or null if it was not found |
1368 |
21 Sep 05 |
nicklas |
@see #getLine(int) |
29 |
22 Feb 05 |
nicklas |
1020 |
*/ |
29 |
22 Feb 05 |
nicklas |
1021 |
public String getHeader(String name) |
29 |
22 Feb 05 |
nicklas |
1022 |
{ |
29 |
22 Feb 05 |
nicklas |
1023 |
return headers.get(name); |
29 |
22 Feb 05 |
nicklas |
1024 |
} |
29 |
22 Feb 05 |
nicklas |
1025 |
|
29 |
22 Feb 05 |
nicklas |
1026 |
/** |
1368 |
21 Sep 05 |
nicklas |
Get the number of lines that the {@link #parseHeaders()} |
29 |
22 Feb 05 |
nicklas |
method parsed. |
29 |
22 Feb 05 |
nicklas |
1029 |
|
29 |
22 Feb 05 |
nicklas |
@return The number of lines parsed |
29 |
22 Feb 05 |
nicklas |
1031 |
*/ |
29 |
22 Feb 05 |
nicklas |
1032 |
public int getLineCount() |
29 |
22 Feb 05 |
nicklas |
1033 |
{ |
29 |
22 Feb 05 |
nicklas |
1034 |
return lines.size(); |
29 |
22 Feb 05 |
nicklas |
1035 |
} |
29 |
22 Feb 05 |
nicklas |
1036 |
|
29 |
22 Feb 05 |
nicklas |
1037 |
/** |
1368 |
21 Sep 05 |
nicklas |
Get the line with the specified number. This method |
1368 |
21 Sep 05 |
nicklas |
should only be used after {@link #parseHeaders()} has |
1368 |
21 Sep 05 |
nicklas |
been completed. |
29 |
22 Feb 05 |
nicklas |
@param index The line number, starting at 0 |
29 |
22 Feb 05 |
nicklas |
@return A <code>Line</code> object |
1368 |
21 Sep 05 |
nicklas |
@see #getHeader(String) |
29 |
22 Feb 05 |
nicklas |
1044 |
*/ |
29 |
22 Feb 05 |
nicklas |
1045 |
public Line getLine(int index) |
29 |
22 Feb 05 |
nicklas |
1046 |
{ |
29 |
22 Feb 05 |
nicklas |
1047 |
return lines.get(index); |
29 |
22 Feb 05 |
nicklas |
1048 |
} |
2409 |
21 Jun 06 |
enell |
1049 |
|
2409 |
21 Jun 06 |
enell |
1050 |
/** |
2409 |
21 Jun 06 |
enell |
Get the lines read by {@link #parseHeaders()}. |
2601 |
29 Aug 06 |
martin |
@return The lines in the order that they have been read. |
2409 |
21 Jun 06 |
enell |
1053 |
*/ |
2409 |
21 Jun 06 |
enell |
1054 |
public List<Line> getLines() |
2409 |
21 Jun 06 |
enell |
1055 |
{ |
2409 |
21 Jun 06 |
enell |
1056 |
return new ArrayList<Line>(lines); |
2409 |
21 Jun 06 |
enell |
1057 |
} |
29 |
22 Feb 05 |
nicklas |
1058 |
|
29 |
22 Feb 05 |
nicklas |
1059 |
/** |
2203 |
28 Apr 06 |
nicklas |
Get all column headers that were found by splitting the line matching the |
2317 |
24 May 06 |
nicklas |
{@link #setDataHeaderRegexp(Pattern)} pattern using the |
2317 |
24 May 06 |
nicklas |
{@link #setDataSplitterRegexp(Pattern)} pattern. This method should only be called |
2317 |
24 May 06 |
nicklas |
after {@link #parseHeaders()} has been called. |
2203 |
28 Apr 06 |
nicklas |
1064 |
|
2655 |
22 Sep 06 |
nicklas |
@return A list containing the column headers, or null if no headers have been |
2203 |
28 Apr 06 |
nicklas |
found |
2317 |
24 May 06 |
nicklas |
1067 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1068 |
public List<String> getColumnHeaders() |
2203 |
28 Apr 06 |
nicklas |
1069 |
{ |
2203 |
28 Apr 06 |
nicklas |
1070 |
if (columnHeaders != null) |
2203 |
28 Apr 06 |
nicklas |
1071 |
{ |
2655 |
22 Sep 06 |
nicklas |
1072 |
return Collections.unmodifiableList(columnHeaders); |
2203 |
28 Apr 06 |
nicklas |
1073 |
} |
2203 |
28 Apr 06 |
nicklas |
1074 |
return null; |
2203 |
28 Apr 06 |
nicklas |
1075 |
} |
2203 |
28 Apr 06 |
nicklas |
1076 |
|
2203 |
28 Apr 06 |
nicklas |
1077 |
/** |
2317 |
24 May 06 |
nicklas |
Get the index of a column header with a given name. This method should only be called |
2655 |
22 Sep 06 |
nicklas |
after {@link #parseHeaders()} has been called. If more than one header |
2655 |
22 Sep 06 |
nicklas |
with the same name exists the index of the first is returned. |
2317 |
24 May 06 |
nicklas |
1081 |
|
2317 |
24 May 06 |
nicklas |
@param name The name of the column header |
2317 |
24 May 06 |
nicklas |
@return The index, or null if no header with that name exists |
3911 |
06 Nov 07 |
nicklas |
@see #findColumnHeaderIndex(String) |
2317 |
24 May 06 |
nicklas |
1085 |
*/ |
2317 |
24 May 06 |
nicklas |
1086 |
public Integer getColumnHeaderIndex(String name) |
2317 |
24 May 06 |
nicklas |
1087 |
{ |
2655 |
22 Sep 06 |
nicklas |
1088 |
int index = columnHeaders.indexOf(name); |
2655 |
22 Sep 06 |
nicklas |
1089 |
return index >= 0 ? index : null; |
2317 |
24 May 06 |
nicklas |
1090 |
} |
2317 |
24 May 06 |
nicklas |
1091 |
|
2317 |
24 May 06 |
nicklas |
1092 |
/** |
3911 |
06 Nov 07 |
nicklas |
Find the index of a column header using a regular expression for pattern |
3911 |
06 Nov 07 |
nicklas |
matching. This method should only be called after {@link #parseHeaders()} |
3911 |
06 Nov 07 |
nicklas |
has been called. If more than one header matches the regular expression |
3911 |
06 Nov 07 |
nicklas |
only the first one found is returned. |
3911 |
06 Nov 07 |
nicklas |
1097 |
|
3911 |
06 Nov 07 |
nicklas |
@param regex The regular expression used to match the header names |
3911 |
06 Nov 07 |
nicklas |
@return The index, or null if no header is matching the regular expression |
3911 |
06 Nov 07 |
nicklas |
or if the string is not a valid regular expression |
3911 |
06 Nov 07 |
nicklas |
@see #getColumnHeaderIndex(String) |
3911 |
06 Nov 07 |
nicklas |
@since 2.5 |
3911 |
06 Nov 07 |
nicklas |
1103 |
*/ |
3911 |
06 Nov 07 |
nicklas |
1104 |
public Integer findColumnHeaderIndex(String regex) |
3911 |
06 Nov 07 |
nicklas |
1105 |
{ |
3911 |
06 Nov 07 |
nicklas |
1106 |
try |
3911 |
06 Nov 07 |
nicklas |
1107 |
{ |
3911 |
06 Nov 07 |
nicklas |
1108 |
Pattern p = Pattern.compile(regex); |
3911 |
06 Nov 07 |
nicklas |
1109 |
int index = 0; |
3911 |
06 Nov 07 |
nicklas |
1110 |
for (String column : columnHeaders) |
3911 |
06 Nov 07 |
nicklas |
1111 |
{ |
3911 |
06 Nov 07 |
nicklas |
1112 |
if (p.matcher(column).matches()) return index; |
3911 |
06 Nov 07 |
nicklas |
1113 |
++index; |
3911 |
06 Nov 07 |
nicklas |
1114 |
} |
3911 |
06 Nov 07 |
nicklas |
1115 |
} |
3911 |
06 Nov 07 |
nicklas |
1116 |
catch (PatternSyntaxException ex) |
3911 |
06 Nov 07 |
nicklas |
1117 |
{} |
3911 |
06 Nov 07 |
nicklas |
1118 |
return null; |
3911 |
06 Nov 07 |
nicklas |
1119 |
} |
3911 |
06 Nov 07 |
nicklas |
1120 |
|
3911 |
06 Nov 07 |
nicklas |
1121 |
/** |
3023 |
12 Dec 06 |
nicklas |
Set the default number format to use when creating mappers. |
3023 |
12 Dec 06 |
nicklas |
@param numberFormat The number format to use, or null to parse |
3023 |
12 Dec 06 |
nicklas |
numbers with Float.valueOf or Double.valueOf |
3023 |
12 Dec 06 |
nicklas |
@since 2.2 |
3023 |
12 Dec 06 |
nicklas |
@see #getMapper(String) |
3023 |
12 Dec 06 |
nicklas |
@see #getMapper(String, NumberFormat) |
3023 |
12 Dec 06 |
nicklas |
1128 |
*/ |
3023 |
12 Dec 06 |
nicklas |
1129 |
public void setDefaultNumberFormat(NumberFormat numberFormat) |
3023 |
12 Dec 06 |
nicklas |
1130 |
{ |
3023 |
12 Dec 06 |
nicklas |
1131 |
this.numberFormat = numberFormat; |
3023 |
12 Dec 06 |
nicklas |
1132 |
} |
3023 |
12 Dec 06 |
nicklas |
1133 |
|
3023 |
12 Dec 06 |
nicklas |
1134 |
/** |
3023 |
12 Dec 06 |
nicklas |
Get the default number format. |
3023 |
12 Dec 06 |
nicklas |
@return The number format, or null if none has been specified |
3023 |
12 Dec 06 |
nicklas |
@since 2.2 |
3023 |
12 Dec 06 |
nicklas |
1138 |
*/ |
3023 |
12 Dec 06 |
nicklas |
1139 |
public NumberFormat getDefaultNumberFormat() |
3023 |
12 Dec 06 |
nicklas |
1140 |
{ |
3023 |
12 Dec 06 |
nicklas |
1141 |
return numberFormat; |
3023 |
12 Dec 06 |
nicklas |
1142 |
} |
3023 |
12 Dec 06 |
nicklas |
1143 |
|
3023 |
12 Dec 06 |
nicklas |
1144 |
/** |
7655 |
19 Mar 19 |
nicklas |
Set the default date format to use when creating mappers. |
7655 |
19 Mar 19 |
nicklas |
If null, xxx is used. |
7655 |
19 Mar 19 |
nicklas |
@since 3.15 |
7655 |
19 Mar 19 |
nicklas |
1148 |
*/ |
7655 |
19 Mar 19 |
nicklas |
1149 |
public void setDefaultDateFormat(Formatter<Date> dateFormat) |
7655 |
19 Mar 19 |
nicklas |
1150 |
{ |
7655 |
19 Mar 19 |
nicklas |
1151 |
this.dateFormat = dateFormat; |
7655 |
19 Mar 19 |
nicklas |
1152 |
} |
7655 |
19 Mar 19 |
nicklas |
1153 |
|
7655 |
19 Mar 19 |
nicklas |
1154 |
/** |
7655 |
19 Mar 19 |
nicklas |
Get the default date format. |
7655 |
19 Mar 19 |
nicklas |
@since 3.15 |
7655 |
19 Mar 19 |
nicklas |
1157 |
*/ |
7655 |
19 Mar 19 |
nicklas |
1158 |
public Formatter<Date> getDefaultDateFormat() |
7655 |
19 Mar 19 |
nicklas |
1159 |
{ |
7655 |
19 Mar 19 |
nicklas |
1160 |
return dateFormat; |
7655 |
19 Mar 19 |
nicklas |
1161 |
} |
7655 |
19 Mar 19 |
nicklas |
1162 |
|
7655 |
19 Mar 19 |
nicklas |
1163 |
/** |
7655 |
19 Mar 19 |
nicklas |
Set the default timestamp format to use when creating mappers. |
7655 |
19 Mar 19 |
nicklas |
If null, xxx is used. |
7655 |
19 Mar 19 |
nicklas |
@since 3.15 |
7655 |
19 Mar 19 |
nicklas |
1167 |
*/ |
7655 |
19 Mar 19 |
nicklas |
1168 |
public void setDefaultTimestampFormat(Formatter<Date> timestampFormat) |
7655 |
19 Mar 19 |
nicklas |
1169 |
{ |
7655 |
19 Mar 19 |
nicklas |
1170 |
this.timestampFormat = timestampFormat; |
7655 |
19 Mar 19 |
nicklas |
1171 |
} |
7655 |
19 Mar 19 |
nicklas |
1172 |
|
7655 |
19 Mar 19 |
nicklas |
1173 |
/** |
7655 |
19 Mar 19 |
nicklas |
Get the default timestamp format. |
7655 |
19 Mar 19 |
nicklas |
@since 3.15 |
7655 |
19 Mar 19 |
nicklas |
1176 |
*/ |
7655 |
19 Mar 19 |
nicklas |
1177 |
public Formatter<Date> getDefaultTimestampFormat() |
7655 |
19 Mar 19 |
nicklas |
1178 |
{ |
7655 |
19 Mar 19 |
nicklas |
1179 |
return timestampFormat; |
7655 |
19 Mar 19 |
nicklas |
1180 |
} |
7655 |
19 Mar 19 |
nicklas |
1181 |
|
7655 |
19 Mar 19 |
nicklas |
1182 |
/** |
3472 |
11 Jun 07 |
nicklas |
Specify if <code>null</code> should be returned if a (numeric) |
3472 |
11 Jun 07 |
nicklas |
value can't be parsed. If this setting is set to TRUE all mappers |
3472 |
11 Jun 07 |
nicklas |
created by one of the {@link #getMapper(String)} methods are wrapped |
3472 |
11 Jun 07 |
nicklas |
in a {@link NullIfExceptionMapper}. It is not possible to log |
3472 |
11 Jun 07 |
nicklas |
or get information about the exception. |
3472 |
11 Jun 07 |
nicklas |
@param useNullIfException TRUE to return null, FALSE to throw an exception |
3472 |
11 Jun 07 |
nicklas |
@since 2.4 |
3023 |
12 Dec 06 |
nicklas |
1190 |
*/ |
3472 |
11 Jun 07 |
nicklas |
1191 |
public void setUseNullIfException(boolean useNullIfException) |
3472 |
11 Jun 07 |
nicklas |
1192 |
{ |
3472 |
11 Jun 07 |
nicklas |
1193 |
this.useNullIfException = useNullIfException; |
3472 |
11 Jun 07 |
nicklas |
1194 |
} |
3472 |
11 Jun 07 |
nicklas |
1195 |
|
3472 |
11 Jun 07 |
nicklas |
1196 |
/** |
7668 |
21 Mar 19 |
nicklas |
@since 3.15 |
7668 |
21 Mar 19 |
nicklas |
1198 |
*/ |
7668 |
21 Mar 19 |
nicklas |
1199 |
public boolean getUseNullIfException() |
7668 |
21 Mar 19 |
nicklas |
1200 |
{ |
7668 |
21 Mar 19 |
nicklas |
1201 |
return useNullIfException; |
7668 |
21 Mar 19 |
nicklas |
1202 |
} |
7668 |
21 Mar 19 |
nicklas |
1203 |
|
7668 |
21 Mar 19 |
nicklas |
1204 |
|
7668 |
21 Mar 19 |
nicklas |
1205 |
/** |
4104 |
28 Jan 08 |
nicklas |
Specify if trying to create a mapper with one of the {@link #getMapper(String)} |
4104 |
28 Jan 08 |
nicklas |
methods for an expression which references a non-existing column should |
4104 |
28 Jan 08 |
nicklas |
result in an exception or be ignored. |
4104 |
28 Jan 08 |
nicklas |
@param ignoreNonExistingColumns TRUE to ignore, or FALSE to throw an exception |
4104 |
28 Jan 08 |
nicklas |
@since 2.6 |
4104 |
28 Jan 08 |
nicklas |
1211 |
*/ |
4104 |
28 Jan 08 |
nicklas |
1212 |
public void setIgnoreNonExistingColumns(boolean ignoreNonExistingColumns) |
4104 |
28 Jan 08 |
nicklas |
1213 |
{ |
4104 |
28 Jan 08 |
nicklas |
1214 |
this.ignoreNonExistingColumns = ignoreNonExistingColumns; |
4104 |
28 Jan 08 |
nicklas |
1215 |
} |
4104 |
28 Jan 08 |
nicklas |
1216 |
|
4104 |
28 Jan 08 |
nicklas |
1217 |
/** |
3472 |
11 Jun 07 |
nicklas |
Get a mapper using the default number format. |
3472 |
11 Jun 07 |
nicklas |
@see #getMapper(String, NumberFormat, boolean) |
3472 |
11 Jun 07 |
nicklas |
1220 |
*/ |
3023 |
12 Dec 06 |
nicklas |
1221 |
public Mapper getMapper(String expression) |
3023 |
12 Dec 06 |
nicklas |
1222 |
{ |
3472 |
11 Jun 07 |
nicklas |
1223 |
return getMapper(expression, numberFormat, useNullIfException); |
3023 |
12 Dec 06 |
nicklas |
1224 |
} |
3023 |
12 Dec 06 |
nicklas |
1225 |
|
5899 |
06 Dec 11 |
nicklas |
1226 |
public Mapper getMapper(String expression, JepFunction... functions) |
5899 |
06 Dec 11 |
nicklas |
1227 |
{ |
5899 |
06 Dec 11 |
nicklas |
1228 |
return getMapper(expression, numberFormat, useNullIfException, functions); |
5899 |
06 Dec 11 |
nicklas |
1229 |
} |
5899 |
06 Dec 11 |
nicklas |
1230 |
|
3023 |
12 Dec 06 |
nicklas |
1231 |
/** |
3472 |
11 Jun 07 |
nicklas |
Get a mapper using the default number format. |
3472 |
11 Jun 07 |
nicklas |
@see #getMapper(String, NumberFormat, boolean) |
3472 |
11 Jun 07 |
nicklas |
@since 2.4 |
3472 |
11 Jun 07 |
nicklas |
1235 |
*/ |
3472 |
11 Jun 07 |
nicklas |
1236 |
public Mapper getMapper(String expression, boolean nullIfException) |
3472 |
11 Jun 07 |
nicklas |
1237 |
{ |
3472 |
11 Jun 07 |
nicklas |
1238 |
return getMapper(expression, numberFormat, nullIfException); |
3472 |
11 Jun 07 |
nicklas |
1239 |
} |
3472 |
11 Jun 07 |
nicklas |
1240 |
|
3472 |
11 Jun 07 |
nicklas |
1241 |
/** |
3472 |
11 Jun 07 |
nicklas |
Get a mapper using a specific number format. |
3472 |
11 Jun 07 |
nicklas |
@see #getMapper(String, NumberFormat, boolean) |
3472 |
11 Jun 07 |
nicklas |
@since 2.2 |
3472 |
11 Jun 07 |
nicklas |
1245 |
*/ |
3472 |
11 Jun 07 |
nicklas |
1246 |
public Mapper getMapper(String expression, NumberFormat numberFormat) |
3472 |
11 Jun 07 |
nicklas |
1247 |
{ |
3472 |
11 Jun 07 |
nicklas |
1248 |
return getMapper(expression, numberFormat, useNullIfException); |
3472 |
11 Jun 07 |
nicklas |
1249 |
} |
3472 |
11 Jun 07 |
nicklas |
1250 |
|
3472 |
11 Jun 07 |
nicklas |
1251 |
/** |
5899 |
06 Dec 11 |
nicklas |
@see #getMapper(String, NumberFormat, boolean, JepFunction...) |
5899 |
06 Dec 11 |
nicklas |
@since 2.4 |
5899 |
06 Dec 11 |
nicklas |
1254 |
*/ |
5899 |
06 Dec 11 |
nicklas |
1255 |
public Mapper getMapper(String expression, NumberFormat numberFormat, boolean nullIfException) |
5899 |
06 Dec 11 |
nicklas |
1256 |
{ |
5899 |
06 Dec 11 |
nicklas |
1257 |
return getMapper(expression, numberFormat, nullIfException, (JepFunction[])null); |
5899 |
06 Dec 11 |
nicklas |
1258 |
} |
7667 |
21 Mar 19 |
nicklas |
1259 |
|
7667 |
21 Mar 19 |
nicklas |
1260 |
/** |
7667 |
21 Mar 19 |
nicklas |
Get a mapper using the default date format. |
7667 |
21 Mar 19 |
nicklas |
@since 3.15 |
7667 |
21 Mar 19 |
nicklas |
1263 |
*/ |
7667 |
21 Mar 19 |
nicklas |
1264 |
public Mapper getDateMapper(String expression) |
7667 |
21 Mar 19 |
nicklas |
1265 |
{ |
7667 |
21 Mar 19 |
nicklas |
1266 |
return getMapper(expression, dateFormat, useNullIfException); |
7667 |
21 Mar 19 |
nicklas |
1267 |
} |
7667 |
21 Mar 19 |
nicklas |
1268 |
|
7667 |
21 Mar 19 |
nicklas |
1269 |
/** |
7667 |
21 Mar 19 |
nicklas |
Get a mapper using the default timestamp format. |
7667 |
21 Mar 19 |
nicklas |
@since 3.15 |
7667 |
21 Mar 19 |
nicklas |
1272 |
*/ |
7667 |
21 Mar 19 |
nicklas |
1273 |
public Mapper getTimestampMapper(String expression) |
7667 |
21 Mar 19 |
nicklas |
1274 |
{ |
7667 |
21 Mar 19 |
nicklas |
1275 |
return getMapper(expression, timestampFormat, useNullIfException); |
7667 |
21 Mar 19 |
nicklas |
1276 |
} |
5899 |
06 Dec 11 |
nicklas |
1277 |
|
5899 |
06 Dec 11 |
nicklas |
1278 |
/** |
7667 |
21 Mar 19 |
nicklas |
Get a mapper using the specified date format. |
7667 |
21 Mar 19 |
nicklas |
@since 3.15 |
7667 |
21 Mar 19 |
nicklas |
1281 |
*/ |
7667 |
21 Mar 19 |
nicklas |
1282 |
public Mapper getMapper(String expression, Formatter<Date> dateFormat, boolean nullIfException) |
7667 |
21 Mar 19 |
nicklas |
1283 |
{ |
7667 |
21 Mar 19 |
nicklas |
1284 |
return getMapper(expression, numberFormat, dateFormat, nullIfException, (JepFunction[])null); |
7667 |
21 Mar 19 |
nicklas |
1285 |
} |
7667 |
21 Mar 19 |
nicklas |
1286 |
|
7667 |
21 Mar 19 |
nicklas |
1287 |
public Mapper getMapper(String expression, NumberFormat numberFormat, boolean nullIfException, JepFunction... functions) |
7667 |
21 Mar 19 |
nicklas |
1288 |
{ |
7668 |
21 Mar 19 |
nicklas |
1289 |
return getMapper(expression, numberFormat, dateFormat, nullIfException, functions); |
7667 |
21 Mar 19 |
nicklas |
1290 |
} |
7667 |
21 Mar 19 |
nicklas |
1291 |
|
7667 |
21 Mar 19 |
nicklas |
1292 |
/** |
2317 |
24 May 06 |
nicklas |
Create a mapper object that maps an expression string to a value. |
2203 |
28 Apr 06 |
nicklas |
An expression string is a regular string which contains placeholders |
2203 |
28 Apr 06 |
nicklas |
where the data column values will be inserted. For example: |
2203 |
28 Apr 06 |
nicklas |
<pre class="code"> |
2203 |
28 Apr 06 |
nicklas |
\1\ |
2203 |
28 Apr 06 |
nicklas |
\row\ |
2203 |
28 Apr 06 |
nicklas |
Row: \row\, Col:\col\ |
2203 |
28 Apr 06 |
nicklas |
</pre> |
2317 |
24 May 06 |
nicklas |
1301 |
|
2317 |
24 May 06 |
nicklas |
It is also possible to use expressions that are evaluated dynamically. |
2317 |
24 May 06 |
nicklas |
<pre class="code"> |
2317 |
24 May 06 |
nicklas |
=2 * col('Radius') |
2317 |
24 May 06 |
nicklas |
</pre> |
3911 |
06 Nov 07 |
nicklas |
If no column that is matching the exact name is found the placeholder |
3911 |
06 Nov 07 |
nicklas |
is interpreted as a regular expression which is checked against each of |
3911 |
06 Nov 07 |
nicklas |
the column headers. In all cases, the first column header found is used |
3911 |
06 Nov 07 |
nicklas |
if there are multiple matches. |
3911 |
06 Nov 07 |
nicklas |
<p> |
2203 |
28 Apr 06 |
nicklas |
If the expression is null, a mapper returning en empty string is returned, |
3911 |
06 Nov 07 |
nicklas |
unless the {@link #setUseNullIfEmpty(boolean)} has been activated. In that |
2203 |
28 Apr 06 |
nicklas |
case the mapper returns null. |
2203 |
28 Apr 06 |
nicklas |
1314 |
|
2203 |
28 Apr 06 |
nicklas |
@param expression The string containing the mapping expression |
3023 |
12 Dec 06 |
nicklas |
@param numberFormat The number format the mapper should use for |
3023 |
12 Dec 06 |
nicklas |
parsing numbers, or null to use Float.valueOf or Double.valueOf |
7667 |
21 Mar 19 |
nicklas |
@param dateFormat The date format the mapper should use for |
7667 |
21 Mar 19 |
nicklas |
parsing dates, or null to use Type.DATE.parseString() |
3616 |
01 Aug 07 |
nicklas |
@param nullIfException TRUE to return a null value instead of throwing |
3472 |
11 Jun 07 |
nicklas |
an exception when a value can't be parsed. |
5899 |
06 Dec 11 |
nicklas |
@param functions Optional array with Jep functions that should be |
5899 |
06 Dec 11 |
nicklas |
included in the parser |
2203 |
28 Apr 06 |
nicklas |
@return A mapper object |
7667 |
21 Mar 19 |
nicklas |
@since 3.15 |
2203 |
28 Apr 06 |
nicklas |
1326 |
*/ |
7667 |
21 Mar 19 |
nicklas |
1327 |
public Mapper getMapper(String expression, NumberFormat numberFormat, Formatter<Date> dateFormat, boolean nullIfException, JepFunction... functions) |
2203 |
28 Apr 06 |
nicklas |
1328 |
{ |
2203 |
28 Apr 06 |
nicklas |
1329 |
Mapper mapper = null; |
2203 |
28 Apr 06 |
nicklas |
1330 |
if (expression == null || expression.length() == 0) |
2203 |
28 Apr 06 |
nicklas |
1331 |
{ |
7667 |
21 Mar 19 |
nicklas |
1332 |
mapper = new ConstantMapper(emptyIsNull ? null : "", numberFormat, dateFormat, nullIfException); |
2203 |
28 Apr 06 |
nicklas |
1333 |
} |
2203 |
28 Apr 06 |
nicklas |
1334 |
else if (expression.startsWith("=")) |
2203 |
28 Apr 06 |
nicklas |
1335 |
{ |
7667 |
21 Mar 19 |
nicklas |
1336 |
mapper = new JepMapper(expression.substring(1), columnHeaders, numberFormat, dateFormat, ignoreNonExistingColumns, functions); |
3472 |
11 Jun 07 |
nicklas |
1337 |
if (nullIfException) mapper = new NullIfExceptionMapper(mapper); |
2203 |
28 Apr 06 |
nicklas |
1338 |
} |
2203 |
28 Apr 06 |
nicklas |
1339 |
else |
2203 |
28 Apr 06 |
nicklas |
1340 |
{ |
2203 |
28 Apr 06 |
nicklas |
1341 |
List<Mapper> mappers = new LinkedList<Mapper>(); |
2203 |
28 Apr 06 |
nicklas |
1342 |
Matcher m = findColumn.matcher(expression); |
2203 |
28 Apr 06 |
nicklas |
1343 |
int nextStart = 0; |
2203 |
28 Apr 06 |
nicklas |
1344 |
while (m.find()) |
2203 |
28 Apr 06 |
nicklas |
1345 |
{ |
2203 |
28 Apr 06 |
nicklas |
1346 |
int matchStart = m.start(); |
2203 |
28 Apr 06 |
nicklas |
1347 |
if (matchStart > nextStart) |
2203 |
28 Apr 06 |
nicklas |
1348 |
{ |
7667 |
21 Mar 19 |
nicklas |
1349 |
mappers.add(new ConstantMapper(expression.substring(nextStart, matchStart), numberFormat, dateFormat, nullIfException)); |
2203 |
28 Apr 06 |
nicklas |
1350 |
} |
2203 |
28 Apr 06 |
nicklas |
1351 |
try |
2203 |
28 Apr 06 |
nicklas |
1352 |
{ |
2203 |
28 Apr 06 |
nicklas |
1353 |
int column = Integer.parseInt(m.group(1)); |
7667 |
21 Mar 19 |
nicklas |
1354 |
mappers.add(new ColumnMapper(column, null, numberFormat, dateFormat, nullIfException)); |
2203 |
28 Apr 06 |
nicklas |
1355 |
} |
2203 |
28 Apr 06 |
nicklas |
1356 |
catch (NumberFormatException ex) |
2203 |
28 Apr 06 |
nicklas |
1357 |
{ |
2203 |
28 Apr 06 |
nicklas |
1358 |
if (columnHeaders != null) |
2203 |
28 Apr 06 |
nicklas |
1359 |
{ |
2203 |
28 Apr 06 |
nicklas |
1360 |
String name = m.group(1); |
3911 |
06 Nov 07 |
nicklas |
1361 |
Integer column = getColumnHeaderIndex(name); |
3911 |
06 Nov 07 |
nicklas |
1362 |
if (column == null) column = findColumnHeaderIndex(name); |
3911 |
06 Nov 07 |
nicklas |
1363 |
if (column == null) |
2262 |
16 May 06 |
nicklas |
1364 |
{ |
4104 |
28 Jan 08 |
nicklas |
1365 |
if (!ignoreNonExistingColumns) |
4104 |
28 Jan 08 |
nicklas |
1366 |
{ |
4104 |
28 Jan 08 |
nicklas |
1367 |
throw new BaseException("Column '" + name + "' not found in column headers."); |
4104 |
28 Jan 08 |
nicklas |
1368 |
} |
2262 |
16 May 06 |
nicklas |
1369 |
} |
4104 |
28 Jan 08 |
nicklas |
1370 |
else |
4104 |
28 Jan 08 |
nicklas |
1371 |
{ |
7667 |
21 Mar 19 |
nicklas |
1372 |
mappers.add(new ColumnMapper(column, name, numberFormat, dateFormat, nullIfException)); |
4104 |
28 Jan 08 |
nicklas |
1373 |
} |
2203 |
28 Apr 06 |
nicklas |
1374 |
} |
2203 |
28 Apr 06 |
nicklas |
1375 |
} |
2203 |
28 Apr 06 |
nicklas |
1376 |
nextStart = m.end(); |
2203 |
28 Apr 06 |
nicklas |
1377 |
} |
2203 |
28 Apr 06 |
nicklas |
1378 |
if (nextStart < expression.length()) |
2203 |
28 Apr 06 |
nicklas |
1379 |
{ |
7667 |
21 Mar 19 |
nicklas |
1380 |
mappers.add(new ConstantMapper(expression.substring(nextStart), numberFormat, dateFormat, nullIfException)); |
2203 |
28 Apr 06 |
nicklas |
1381 |
} |
4104 |
28 Jan 08 |
nicklas |
1382 |
if (mappers.size() == 0) |
2203 |
28 Apr 06 |
nicklas |
1383 |
{ |
7667 |
21 Mar 19 |
nicklas |
1384 |
mapper = new ConstantMapper(emptyIsNull ? null : "", numberFormat, dateFormat, nullIfException); |
4104 |
28 Jan 08 |
nicklas |
1385 |
} |
4104 |
28 Jan 08 |
nicklas |
1386 |
else if (mappers.size() == 1) |
4104 |
28 Jan 08 |
nicklas |
1387 |
{ |
2203 |
28 Apr 06 |
nicklas |
1388 |
mapper = mappers.get(0); |
2203 |
28 Apr 06 |
nicklas |
1389 |
} |
2203 |
28 Apr 06 |
nicklas |
1390 |
else |
2203 |
28 Apr 06 |
nicklas |
1391 |
{ |
7667 |
21 Mar 19 |
nicklas |
1392 |
mapper = new CompoundMapper(mappers, numberFormat, dateFormat, nullIfException); |
2203 |
28 Apr 06 |
nicklas |
1393 |
} |
2203 |
28 Apr 06 |
nicklas |
1394 |
} |
2203 |
28 Apr 06 |
nicklas |
1395 |
return mapper; |
2203 |
28 Apr 06 |
nicklas |
1396 |
} |
2203 |
28 Apr 06 |
nicklas |
1397 |
|
2203 |
28 Apr 06 |
nicklas |
1398 |
/** |
29 |
22 Feb 05 |
nicklas |
Check if the input stream contains more data. If it is unknown |
29 |
22 Feb 05 |
nicklas |
if there is more data or not, this method will start reading more |
29 |
22 Feb 05 |
nicklas |
lines from the stream. Each line is checked in the following order: |
29 |
22 Feb 05 |
nicklas |
<ol> |
29 |
22 Feb 05 |
nicklas |
<li>Does it match the {@link #setDataFooterRegexp(Pattern) data footer} |
3416 |
31 May 07 |
enell |
regular expression?</li> |
1616 |
15 Nov 05 |
enell |
<li>Does it match the {@link #setSectionRegexp(Pattern) section} |
3416 |
31 May 07 |
enell |
regular expression?</li> |
6458 |
19 May 14 |
nicklas |
<li>Does it match the {@link #setIgnoreRegexp(Pattern) ignore} regular |
6458 |
19 May 14 |
nicklas |
expression?</li> |
3416 |
31 May 07 |
enell |
<li>Can it be split by the {@link #setDataSplitterRegexp(Pattern) data} |
3416 |
31 May 07 |
enell |
regular expression into the appropriate number of columns?</li> |
29 |
22 Feb 05 |
nicklas |
</ol> |
6458 |
19 May 14 |
nicklas |
If the first or second check is true, FALSE is returned no more data may be |
6458 |
19 May 14 |
nicklas |
retrieved, but a section may be retrived with the {@link #nextSection() nextSection} |
6458 |
19 May 14 |
nicklas |
method. If the third check is true, the line is ignored and the processing |
6458 |
19 May 14 |
nicklas |
continues with the next line. If the fourth check is true, TRUE is |
6458 |
19 May 14 |
nicklas |
returned and the data may be retrieved with the {@link #nextData() nextData} method. |
29 |
22 Feb 05 |
nicklas |
1417 |
|
29 |
22 Feb 05 |
nicklas |
@return TRUE if there is more data, FALSE otherwise |
29 |
22 Feb 05 |
nicklas |
@see #nextData() nextData |
29 |
22 Feb 05 |
nicklas |
@throws IOException If there is an error reading from the input stream |
29 |
22 Feb 05 |
nicklas |
1421 |
*/ |
29 |
22 Feb 05 |
nicklas |
1422 |
public boolean hasMoreData() |
29 |
22 Feb 05 |
nicklas |
1423 |
throws IOException |
29 |
22 Feb 05 |
nicklas |
1424 |
{ |
2655 |
22 Sep 06 |
nicklas |
1425 |
if (dataSplitter == null) return false; |
2494 |
08 Aug 06 |
nicklas |
1426 |
if (nextData == null && nextSection == null) |
29 |
22 Feb 05 |
nicklas |
1427 |
{ |
5193 |
27 Nov 09 |
nicklas |
1428 |
if (parsedLines == 0) parseToBof(); |
2655 |
22 Sep 06 |
nicklas |
1429 |
ignoredLines = 0; |
2655 |
22 Sep 06 |
nicklas |
1430 |
unknownLines = 0; |
2655 |
22 Sep 06 |
nicklas |
1431 |
if (keepSkippedLines) skippedLines = new LinkedList<Line>(); |
29 |
22 Feb 05 |
nicklas |
1432 |
boolean done = false; |
29 |
22 Feb 05 |
nicklas |
1433 |
while (!done) |
29 |
22 Feb 05 |
nicklas |
1434 |
{ |
4128 |
05 Feb 08 |
nicklas |
1435 |
ThreadSignalHandler.checkInterrupted(); |
29 |
22 Feb 05 |
nicklas |
1436 |
String line = reader.readLine(); |
29 |
22 Feb 05 |
nicklas |
1437 |
|
29 |
22 Feb 05 |
nicklas |
// End of file? |
29 |
22 Feb 05 |
nicklas |
1439 |
if (line == null) |
29 |
22 Feb 05 |
nicklas |
1440 |
{ |
29 |
22 Feb 05 |
nicklas |
1441 |
done = true; |
29 |
22 Feb 05 |
nicklas |
1442 |
} |
29 |
22 Feb 05 |
nicklas |
1443 |
else |
29 |
22 Feb 05 |
nicklas |
1444 |
{ |
1616 |
15 Nov 05 |
enell |
1445 |
parsedLines++; |
3618 |
01 Aug 07 |
nicklas |
1446 |
parsedDataLines++; |
1256 |
06 Sep 05 |
nicklas |
1447 |
parsedCharacters += line.length(); |
29 |
22 Feb 05 |
nicklas |
1448 |
|
29 |
22 Feb 05 |
nicklas |
// Check if it matches the data footer regexp |
3416 |
31 May 07 |
enell |
1450 |
if (!done && dataFooter != null) |
29 |
22 Feb 05 |
nicklas |
1451 |
{ |
2402 |
21 Jun 06 |
enell |
1452 |
Matcher m = dataFooter.matcher(line); |
2539 |
17 Aug 06 |
nicklas |
1453 |
done = m.matches(); |
29 |
22 Feb 05 |
nicklas |
1454 |
} |
1616 |
15 Nov 05 |
enell |
1455 |
|
1616 |
15 Nov 05 |
enell |
// Check if it matches the section regexp |
3416 |
31 May 07 |
enell |
1457 |
if (!done && section != null) |
1616 |
15 Nov 05 |
enell |
1458 |
{ |
1616 |
15 Nov 05 |
enell |
1459 |
Matcher m = section.matcher(line); |
1616 |
15 Nov 05 |
enell |
1460 |
if (m.matches() && (m.groupCount() == 1)) |
1616 |
15 Nov 05 |
enell |
1461 |
{ |
1616 |
15 Nov 05 |
enell |
1462 |
done = true; |
3618 |
01 Aug 07 |
nicklas |
1463 |
parsedDataLines = 0; |
2540 |
17 Aug 06 |
nicklas |
1464 |
nextSection = new Line(parsedLines, line, m.group(1), LineType.SECTION); |
1627 |
17 Nov 05 |
enell |
1465 |
lines.add(nextSection); |
7673 |
26 Mar 19 |
nicklas |
1466 |
if (parseAllExcelSheets) |
7673 |
26 Mar 19 |
nicklas |
1467 |
{ |
7673 |
26 Mar 19 |
nicklas |
1468 |
excelSheet = excelWorkbook.getSheetAsCsv(parsedSections); |
7673 |
26 Mar 19 |
nicklas |
1469 |
excelSheetName = excelSheet.getName(); |
7676 |
27 Mar 19 |
nicklas |
1470 |
excelParsedLinesOffset = parsedLines; |
7673 |
26 Mar 19 |
nicklas |
1471 |
} |
7673 |
26 Mar 19 |
nicklas |
1472 |
parsedSections++; |
1616 |
15 Nov 05 |
enell |
1473 |
} |
1616 |
15 Nov 05 |
enell |
1474 |
} |
6458 |
19 May 14 |
nicklas |
1475 |
|
6458 |
19 May 14 |
nicklas |
// Check if it is a comment line |
6458 |
19 May 14 |
nicklas |
1477 |
if (!done && ignore != null) |
6458 |
19 May 14 |
nicklas |
1478 |
{ |
6458 |
19 May 14 |
nicklas |
1479 |
Matcher m = ignore.matcher(line); |
6458 |
19 May 14 |
nicklas |
1480 |
if (m.matches()) |
6458 |
19 May 14 |
nicklas |
1481 |
{ |
6458 |
19 May 14 |
nicklas |
1482 |
ignoredLines++; |
6458 |
19 May 14 |
nicklas |
1483 |
if (keepSkippedLines) skippedLines.add(new Line(parsedLines, line, LineType.IGNORED)); |
6458 |
19 May 14 |
nicklas |
1484 |
|
6458 |
19 May 14 |
nicklas |
// Skip to next line |
6458 |
19 May 14 |
nicklas |
/* #### CONTINUE-STATEMENT #### */ |
6458 |
19 May 14 |
nicklas |
1487 |
continue; |
6458 |
19 May 14 |
nicklas |
1488 |
} |
6458 |
19 May 14 |
nicklas |
1489 |
} |
6458 |
19 May 14 |
nicklas |
1490 |
|
2655 |
22 Sep 06 |
nicklas |
1491 |
if (!done) |
2655 |
22 Sep 06 |
nicklas |
1492 |
{ |
3416 |
31 May 07 |
enell |
// Check if it is a data line |
3416 |
31 May 07 |
enell |
1494 |
String[] columns = trimQuotes(dataSplitter.split(line, -1)); |
3416 |
31 May 07 |
enell |
1495 |
if ((columns.length >= minDataColumns) && |
3416 |
31 May 07 |
enell |
1496 |
((columns.length <= maxDataColumns) || (maxDataColumns <= 0))) |
3416 |
31 May 07 |
enell |
1497 |
{ |
3416 |
31 May 07 |
enell |
1498 |
done = true; |
7664 |
20 Mar 19 |
nicklas |
1499 |
if (excelSheet != null) |
7664 |
20 Mar 19 |
nicklas |
1500 |
{ |
7738 |
14 Aug 19 |
nicklas |
1501 |
nextData = new ExcelData(excelSheet, excelParsedLinesOffset, parsedLines, parsedDataLines, line, columns, emptyIsNull, nullIsNull, trimWhiteSpace); |
7664 |
20 Mar 19 |
nicklas |
1502 |
} |
7664 |
20 Mar 19 |
nicklas |
1503 |
else |
7664 |
20 Mar 19 |
nicklas |
1504 |
{ |
7738 |
14 Aug 19 |
nicklas |
1505 |
nextData = new Data(parsedLines, parsedDataLines, line, columns, emptyIsNull, nullIsNull, trimWhiteSpace); |
7664 |
20 Mar 19 |
nicklas |
1506 |
} |
3416 |
31 May 07 |
enell |
1507 |
} |
3416 |
31 May 07 |
enell |
1508 |
} |
3416 |
31 May 07 |
enell |
1509 |
|
3416 |
31 May 07 |
enell |
1510 |
if (!done) |
3416 |
31 May 07 |
enell |
1511 |
{ |
2655 |
22 Sep 06 |
nicklas |
1512 |
unknownLines++; |
2655 |
22 Sep 06 |
nicklas |
1513 |
if (keepSkippedLines) skippedLines.add(new Line(parsedLines, line, LineType.UNKNOWN)); |
2655 |
22 Sep 06 |
nicklas |
1514 |
} |
29 |
22 Feb 05 |
nicklas |
1515 |
} |
29 |
22 Feb 05 |
nicklas |
1516 |
} |
29 |
22 Feb 05 |
nicklas |
1517 |
} |
58 |
01 Mar 05 |
nicklas |
1518 |
return nextData != null; |
29 |
22 Feb 05 |
nicklas |
1519 |
} |
29 |
22 Feb 05 |
nicklas |
1520 |
|
29 |
22 Feb 05 |
nicklas |
1521 |
/** |
2203 |
28 Apr 06 |
nicklas |
Remove enclosing quotes (" or ') around all columns. |
2203 |
28 Apr 06 |
nicklas |
@param columns The columns |
2203 |
28 Apr 06 |
nicklas |
@return The trimmed columns |
2203 |
28 Apr 06 |
nicklas |
1525 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1526 |
public String[] trimQuotes(String[] columns) |
2203 |
28 Apr 06 |
nicklas |
1527 |
{ |
2203 |
28 Apr 06 |
nicklas |
1528 |
if (trimQuotes) |
2203 |
28 Apr 06 |
nicklas |
1529 |
{ |
2203 |
28 Apr 06 |
nicklas |
1530 |
for (int i = 0; i < columns.length; ++i) |
2203 |
28 Apr 06 |
nicklas |
1531 |
{ |
2203 |
28 Apr 06 |
nicklas |
1532 |
String c = columns[i]; |
2203 |
28 Apr 06 |
nicklas |
1533 |
if (c.length() >= 2) |
2203 |
28 Apr 06 |
nicklas |
1534 |
{ |
2203 |
28 Apr 06 |
nicklas |
// If start and end with " or ' --> remove them |
2203 |
28 Apr 06 |
nicklas |
1536 |
if ((c.startsWith("\"") && c.endsWith("\"")) || (c.startsWith("'") && c.endsWith("'"))) |
2203 |
28 Apr 06 |
nicklas |
1537 |
{ |
2203 |
28 Apr 06 |
nicklas |
1538 |
columns[i] = c.substring(1, c.length() - 1); |
2203 |
28 Apr 06 |
nicklas |
1539 |
} |
2203 |
28 Apr 06 |
nicklas |
1540 |
} |
2203 |
28 Apr 06 |
nicklas |
1541 |
} |
2203 |
28 Apr 06 |
nicklas |
1542 |
} |
2203 |
28 Apr 06 |
nicklas |
1543 |
return columns; |
2203 |
28 Apr 06 |
nicklas |
1544 |
} |
2203 |
28 Apr 06 |
nicklas |
1545 |
|
2203 |
28 Apr 06 |
nicklas |
1546 |
/** |
1248 |
06 Sep 05 |
nicklas |
Get the number of parsed lines so far. |
1248 |
06 Sep 05 |
nicklas |
1548 |
*/ |
1248 |
06 Sep 05 |
nicklas |
1549 |
public int getParsedLines() |
1248 |
06 Sep 05 |
nicklas |
1550 |
{ |
1248 |
06 Sep 05 |
nicklas |
1551 |
return parsedLines; |
1248 |
06 Sep 05 |
nicklas |
1552 |
} |
1248 |
06 Sep 05 |
nicklas |
1553 |
|
1248 |
06 Sep 05 |
nicklas |
1554 |
/** |
7673 |
26 Mar 19 |
nicklas |
Get the number of found sections so far. |
7673 |
26 Mar 19 |
nicklas |
@since 3.15 |
7673 |
26 Mar 19 |
nicklas |
1557 |
*/ |
7673 |
26 Mar 19 |
nicklas |
1558 |
public int getParsedSections() |
7673 |
26 Mar 19 |
nicklas |
1559 |
{ |
7673 |
26 Mar 19 |
nicklas |
1560 |
return parsedSections; |
7673 |
26 Mar 19 |
nicklas |
1561 |
} |
7673 |
26 Mar 19 |
nicklas |
1562 |
|
7673 |
26 Mar 19 |
nicklas |
1563 |
/** |
3618 |
01 Aug 07 |
nicklas |
Get the number of parsed data lines so far in the current section. |
3618 |
01 Aug 07 |
nicklas |
This value is reset for each new section. |
3618 |
01 Aug 07 |
nicklas |
1566 |
*/ |
3618 |
01 Aug 07 |
nicklas |
1567 |
public int getParsedDataLines() |
3618 |
01 Aug 07 |
nicklas |
1568 |
{ |
3618 |
01 Aug 07 |
nicklas |
1569 |
return parsedDataLines; |
3618 |
01 Aug 07 |
nicklas |
1570 |
} |
3618 |
01 Aug 07 |
nicklas |
1571 |
|
3618 |
01 Aug 07 |
nicklas |
1572 |
/** |
1248 |
06 Sep 05 |
nicklas |
Get the number of parsed characters so far. This value may or may not |
1248 |
06 Sep 05 |
nicklas |
correspond to the number of parsed bytes depending on the character |
1248 |
06 Sep 05 |
nicklas |
set of the file. |
4095 |
21 Jan 08 |
enell |
@see #getParsedBytes() |
1248 |
06 Sep 05 |
nicklas |
1577 |
*/ |
1248 |
06 Sep 05 |
nicklas |
1578 |
public long getParsedCharacters() |
1248 |
06 Sep 05 |
nicklas |
1579 |
{ |
1248 |
06 Sep 05 |
nicklas |
1580 |
return parsedCharacters; |
1248 |
06 Sep 05 |
nicklas |
1581 |
} |
1248 |
06 Sep 05 |
nicklas |
1582 |
|
1248 |
06 Sep 05 |
nicklas |
1583 |
/** |
4095 |
21 Jan 08 |
enell |
Get the number of parsed bytes so far. This value may or may not |
4095 |
21 Jan 08 |
enell |
correspond to the number of parsed characters depending on the character |
4095 |
21 Jan 08 |
enell |
set of the file. |
4095 |
21 Jan 08 |
enell |
@since 2.5.1 |
4095 |
21 Jan 08 |
enell |
@see #getParsedCharacters() |
4095 |
21 Jan 08 |
enell |
1589 |
*/ |
4095 |
21 Jan 08 |
enell |
1590 |
public long getParsedBytes() |
4095 |
21 Jan 08 |
enell |
1591 |
{ |
4095 |
21 Jan 08 |
enell |
1592 |
return tracker == null ? 0 : tracker.getNumRead(); |
4095 |
21 Jan 08 |
enell |
1593 |
} |
4095 |
21 Jan 08 |
enell |
1594 |
|
4095 |
21 Jan 08 |
enell |
1595 |
/** |
29 |
22 Feb 05 |
nicklas |
Get the next available data. |
29 |
22 Feb 05 |
nicklas |
@return A <code>Data</code> object, or null if there is no more data |
4020 |
29 Nov 07 |
martin |
@throws IOException If the is an error reading from the input stream. |
29 |
22 Feb 05 |
nicklas |
@see #hasMoreData() hasMoreData |
29 |
22 Feb 05 |
nicklas |
1600 |
*/ |
29 |
22 Feb 05 |
nicklas |
1601 |
public Data nextData() |
29 |
22 Feb 05 |
nicklas |
1602 |
throws IOException |
29 |
22 Feb 05 |
nicklas |
1603 |
{ |
58 |
01 Mar 05 |
nicklas |
1604 |
Data currentData = null; |
29 |
22 Feb 05 |
nicklas |
1605 |
if (hasMoreData()) |
29 |
22 Feb 05 |
nicklas |
1606 |
{ |
58 |
01 Mar 05 |
nicklas |
1607 |
currentData = nextData; |
58 |
01 Mar 05 |
nicklas |
1608 |
nextData = null; |
29 |
22 Feb 05 |
nicklas |
1609 |
} |
58 |
01 Mar 05 |
nicklas |
1610 |
return currentData; |
29 |
22 Feb 05 |
nicklas |
1611 |
} |
1627 |
17 Nov 05 |
enell |
1612 |
|
1627 |
17 Nov 05 |
enell |
1613 |
/** |
2655 |
22 Sep 06 |
nicklas |
Get the number of lines that the last call to {@link #nextData()} or |
2655 |
22 Sep 06 |
nicklas |
{@link #hasMoreData()} ignored because they matched the ignore regular |
2655 |
22 Sep 06 |
nicklas |
expression. |
2655 |
22 Sep 06 |
nicklas |
@return The number of ignored lines |
2655 |
22 Sep 06 |
nicklas |
@see #setIgnoreRegexp(Pattern) |
2655 |
22 Sep 06 |
nicklas |
@see #setKeepSkippedLines(boolean) |
2655 |
22 Sep 06 |
nicklas |
1620 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1621 |
public int getIgnoredLines() |
2655 |
22 Sep 06 |
nicklas |
1622 |
{ |
2655 |
22 Sep 06 |
nicklas |
1623 |
return ignoredLines; |
2655 |
22 Sep 06 |
nicklas |
1624 |
} |
2655 |
22 Sep 06 |
nicklas |
1625 |
|
2655 |
22 Sep 06 |
nicklas |
1626 |
/** |
2655 |
22 Sep 06 |
nicklas |
Get the number of lines that the last call to {@link #nextData()} or |
2655 |
22 Sep 06 |
nicklas |
{@link #hasMoreData()} ignored because they couldn't be interpreted as |
2655 |
22 Sep 06 |
nicklas |
data lines. |
2655 |
22 Sep 06 |
nicklas |
@return The number of unknown lines |
2655 |
22 Sep 06 |
nicklas |
@see #setKeepSkippedLines(boolean) |
2655 |
22 Sep 06 |
nicklas |
1632 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1633 |
public int getUnknownLines() |
2655 |
22 Sep 06 |
nicklas |
1634 |
{ |
2655 |
22 Sep 06 |
nicklas |
1635 |
return unknownLines; |
2655 |
22 Sep 06 |
nicklas |
1636 |
} |
2655 |
22 Sep 06 |
nicklas |
1637 |
|
2655 |
22 Sep 06 |
nicklas |
1638 |
/** |
2655 |
22 Sep 06 |
nicklas |
Get the number of lines that the last call to {@link #nextData()} or |
2655 |
22 Sep 06 |
nicklas |
{@link #hasMoreData()} ignored because they matched the ignore regular |
2655 |
22 Sep 06 |
nicklas |
expression or couldn't be interpreted as data lines. |
2655 |
22 Sep 06 |
nicklas |
@return The number of ignored or unknown lines |
2655 |
22 Sep 06 |
nicklas |
@see #getIgnoredLines() |
2655 |
22 Sep 06 |
nicklas |
@see #getUnknownLines() |
2655 |
22 Sep 06 |
nicklas |
@see #getSkippedLines() |
2655 |
22 Sep 06 |
nicklas |
1646 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1647 |
public int getNumSkippedLines() |
2655 |
22 Sep 06 |
nicklas |
1648 |
{ |
2655 |
22 Sep 06 |
nicklas |
1649 |
return ignoredLines + unknownLines; |
2655 |
22 Sep 06 |
nicklas |
1650 |
} |
2655 |
22 Sep 06 |
nicklas |
1651 |
|
2655 |
22 Sep 06 |
nicklas |
1652 |
/** |
2655 |
22 Sep 06 |
nicklas |
Get lines that was skipped during the last call to {@link #nextData()} or |
2655 |
22 Sep 06 |
nicklas |
{@link #hasMoreData()}. The list is only available if the {@link #setKeepSkippedLines(boolean)} |
2655 |
22 Sep 06 |
nicklas |
has been set to true (default is false). |
2655 |
22 Sep 06 |
nicklas |
@return A list with the skipped lines |
2655 |
22 Sep 06 |
nicklas |
@see #setKeepSkippedLines(boolean) |
2655 |
22 Sep 06 |
nicklas |
1658 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1659 |
public List<Line> getSkippedLines() |
2655 |
22 Sep 06 |
nicklas |
1660 |
{ |
2655 |
22 Sep 06 |
nicklas |
1661 |
return skippedLines; |
2655 |
22 Sep 06 |
nicklas |
1662 |
} |
2655 |
22 Sep 06 |
nicklas |
1663 |
|
2655 |
22 Sep 06 |
nicklas |
1664 |
/** |
1627 |
17 Nov 05 |
enell |
Check if the input stream contains more sections. If it is unknown |
1627 |
17 Nov 05 |
enell |
if there is more sections or not, this method will start reading more |
1627 |
17 Nov 05 |
enell |
lines from the stream. Each line is checked if it matches the |
1627 |
17 Nov 05 |
enell |
{@link #setSectionRegexp(Pattern) section} regular expression. The |
1627 |
17 Nov 05 |
enell |
parser will continue util a section line is found or end of file |
1627 |
17 Nov 05 |
enell |
is reached. If the metod return TRUE the section may be retrived with |
2317 |
24 May 06 |
nicklas |
the {@link #nextSection()} method. If the {@link |
2317 |
24 May 06 |
nicklas |
#setSectionRegexp(Pattern) section} regular expression isn't specified |
2317 |
24 May 06 |
nicklas |
the method returns FALSE and won't parse any line. |
1627 |
17 Nov 05 |
enell |
1674 |
|
1627 |
17 Nov 05 |
enell |
@return TRUE if there is more data, FALSE otherwise |
2317 |
24 May 06 |
nicklas |
@see #nextData() |
1627 |
17 Nov 05 |
enell |
@throws IOException If there is an error reading from the input stream |
1627 |
17 Nov 05 |
enell |
1678 |
*/ |
1616 |
15 Nov 05 |
enell |
1679 |
public boolean hasMoreSections() |
1616 |
15 Nov 05 |
enell |
1680 |
throws IOException |
1616 |
15 Nov 05 |
enell |
1681 |
{ |
1627 |
17 Nov 05 |
enell |
1682 |
if (section == null) |
1627 |
17 Nov 05 |
enell |
1683 |
{ |
1627 |
17 Nov 05 |
enell |
1684 |
return false; |
1627 |
17 Nov 05 |
enell |
1685 |
} |
1616 |
15 Nov 05 |
enell |
1686 |
if (nextSection == null) |
1616 |
15 Nov 05 |
enell |
1687 |
{ |
5193 |
27 Nov 09 |
nicklas |
1688 |
if (parsedLines == 0) parseToBof(); |
1616 |
15 Nov 05 |
enell |
1689 |
boolean done = false; |
1616 |
15 Nov 05 |
enell |
1690 |
while (!done) |
1616 |
15 Nov 05 |
enell |
1691 |
{ |
4128 |
05 Feb 08 |
nicklas |
1692 |
ThreadSignalHandler.checkInterrupted(); |
1616 |
15 Nov 05 |
enell |
1693 |
String line = reader.readLine(); |
1616 |
15 Nov 05 |
enell |
1694 |
|
2409 |
21 Jun 06 |
enell |
// End of file |
1616 |
15 Nov 05 |
enell |
1696 |
if (line == null) |
1616 |
15 Nov 05 |
enell |
1697 |
{ |
1616 |
15 Nov 05 |
enell |
1698 |
done = true; |
1616 |
15 Nov 05 |
enell |
1699 |
} |
1616 |
15 Nov 05 |
enell |
1700 |
else |
1616 |
15 Nov 05 |
enell |
1701 |
{ |
1616 |
15 Nov 05 |
enell |
1702 |
parsedLines++; |
1616 |
15 Nov 05 |
enell |
1703 |
parsedCharacters += line.length(); |
1616 |
15 Nov 05 |
enell |
1704 |
Matcher m = section.matcher(line); |
1790 |
18 Jan 06 |
enell |
1705 |
if (m.matches() && m.groupCount() == 1) |
1616 |
15 Nov 05 |
enell |
1706 |
{ |
1616 |
15 Nov 05 |
enell |
1707 |
done = true; |
2540 |
17 Aug 06 |
nicklas |
1708 |
nextSection = new Line(parsedLines, line, m.group(1), LineType.SECTION); |
3618 |
01 Aug 07 |
nicklas |
1709 |
parsedDataLines = 0; |
7673 |
26 Mar 19 |
nicklas |
1710 |
if (parseAllExcelSheets) |
7673 |
26 Mar 19 |
nicklas |
1711 |
{ |
7673 |
26 Mar 19 |
nicklas |
1712 |
excelSheet = excelWorkbook.getSheetAsCsv(parsedSections); |
7673 |
26 Mar 19 |
nicklas |
1713 |
excelSheetName = excelSheet.getName(); |
7676 |
27 Mar 19 |
nicklas |
1714 |
excelParsedLinesOffset = parsedLines; |
7673 |
26 Mar 19 |
nicklas |
1715 |
} |
7673 |
26 Mar 19 |
nicklas |
1716 |
parsedSections++; |
1616 |
15 Nov 05 |
enell |
1717 |
} |
1616 |
15 Nov 05 |
enell |
1718 |
} |
1616 |
15 Nov 05 |
enell |
1719 |
} |
1616 |
15 Nov 05 |
enell |
1720 |
} |
1616 |
15 Nov 05 |
enell |
1721 |
return nextSection != null; |
1616 |
15 Nov 05 |
enell |
1722 |
} |
1616 |
15 Nov 05 |
enell |
1723 |
|
2317 |
24 May 06 |
nicklas |
1724 |
/** |
2317 |
24 May 06 |
nicklas |
Get the next line that matches the {@link #setSectionRegexp(Pattern) section} |
2317 |
24 May 06 |
nicklas |
regular expression. |
2317 |
24 May 06 |
nicklas |
@return The line that matched the regular expression |
2317 |
24 May 06 |
nicklas |
@see #hasMoreSections() |
2317 |
24 May 06 |
nicklas |
1729 |
*/ |
1616 |
15 Nov 05 |
enell |
1730 |
public Line nextSection() |
1616 |
15 Nov 05 |
enell |
1731 |
throws IOException |
1616 |
15 Nov 05 |
enell |
1732 |
{ |
2409 |
21 Jun 06 |
enell |
1733 |
if (section == null) |
2409 |
21 Jun 06 |
enell |
1734 |
{ |
2409 |
21 Jun 06 |
enell |
1735 |
return null; |
2409 |
21 Jun 06 |
enell |
1736 |
} |
1616 |
15 Nov 05 |
enell |
1737 |
Line currentSection = null; |
1616 |
15 Nov 05 |
enell |
1738 |
if (hasMoreSections()) |
1616 |
15 Nov 05 |
enell |
1739 |
{ |
1616 |
15 Nov 05 |
enell |
1740 |
currentSection = nextSection; |
1616 |
15 Nov 05 |
enell |
1741 |
nextSection = null; |
1616 |
15 Nov 05 |
enell |
1742 |
} |
1616 |
15 Nov 05 |
enell |
1743 |
return currentSection; |
1616 |
15 Nov 05 |
enell |
1744 |
} |
2317 |
24 May 06 |
nicklas |
1745 |
|
2317 |
24 May 06 |
nicklas |
1746 |
/** |
2317 |
24 May 06 |
nicklas |
Represents the type of a line matched or unmatched by the parser. |
2317 |
24 May 06 |
nicklas |
1748 |
*/ |
1273 |
07 Sep 05 |
nicklas |
1749 |
public enum LineType |
1273 |
07 Sep 05 |
nicklas |
1750 |
{ |
1273 |
07 Sep 05 |
nicklas |
1751 |
/** |
1273 |
07 Sep 05 |
nicklas |
The parsed line is a header line, matching the |
1273 |
07 Sep 05 |
nicklas |
regular expression specified by {@link #setHeaderRegexp(Pattern)}. |
1273 |
07 Sep 05 |
nicklas |
1754 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1755 |
HEADER("Header"), |
1273 |
07 Sep 05 |
nicklas |
1756 |
|
1273 |
07 Sep 05 |
nicklas |
1757 |
/** |
1273 |
07 Sep 05 |
nicklas |
The parsed line is the data header line, matching the |
1273 |
07 Sep 05 |
nicklas |
regular expression specified by {@link #setDataHeaderRegexp(Pattern)}. |
1273 |
07 Sep 05 |
nicklas |
1760 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1761 |
DATA_HEADER("Data header"), |
1273 |
07 Sep 05 |
nicklas |
1762 |
|
1273 |
07 Sep 05 |
nicklas |
1763 |
/** |
1273 |
07 Sep 05 |
nicklas |
The parsed line is a data line, which could be split |
1273 |
07 Sep 05 |
nicklas |
into the specified number of columns by {@link #setDataSplitterRegexp(Pattern)}. |
1273 |
07 Sep 05 |
nicklas |
@see #setMinDataColumns(int) |
1273 |
07 Sep 05 |
nicklas |
@see #setMaxDataColumns(int) |
1273 |
07 Sep 05 |
nicklas |
1768 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1769 |
DATA("Data"), |
1273 |
07 Sep 05 |
nicklas |
1770 |
|
1273 |
07 Sep 05 |
nicklas |
1771 |
/** |
1616 |
15 Nov 05 |
enell |
The parsed line is the line starting a section, matching the |
1616 |
15 Nov 05 |
enell |
regular expression specified by {@link #setSectionRegexp(Pattern)}. |
1616 |
15 Nov 05 |
enell |
1774 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1775 |
SECTION("Section start"), |
1616 |
15 Nov 05 |
enell |
1776 |
|
1616 |
15 Nov 05 |
enell |
1777 |
/** |
1273 |
07 Sep 05 |
nicklas |
The parsed line is the data footer line, matching the |
1273 |
07 Sep 05 |
nicklas |
regular expression specified by {@link #setDataFooterRegexp(Pattern)}. |
1273 |
07 Sep 05 |
nicklas |
1780 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1781 |
DATA_FOOTER("Data footer"), |
1273 |
07 Sep 05 |
nicklas |
1782 |
|
1273 |
07 Sep 05 |
nicklas |
1783 |
/** |
1273 |
07 Sep 05 |
nicklas |
The parsed line was ignored since it matched the |
1273 |
07 Sep 05 |
nicklas |
regular expression specified by {@link #setIgnoreRegexp(Pattern)}. |
1273 |
07 Sep 05 |
nicklas |
1786 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1787 |
IGNORED("Ignored"), |
1273 |
07 Sep 05 |
nicklas |
1788 |
|
1616 |
15 Nov 05 |
enell |
1789 |
|
1273 |
07 Sep 05 |
nicklas |
1790 |
/** |
1273 |
07 Sep 05 |
nicklas |
The parsed line is of an unknown type. |
1273 |
07 Sep 05 |
nicklas |
1792 |
*/ |
2655 |
22 Sep 06 |
nicklas |
1793 |
UNKNOWN("Unknown"); |
2655 |
22 Sep 06 |
nicklas |
1794 |
|
2655 |
22 Sep 06 |
nicklas |
1795 |
private final String title; |
2655 |
22 Sep 06 |
nicklas |
1796 |
private LineType(String title) |
2655 |
22 Sep 06 |
nicklas |
1797 |
{ |
2655 |
22 Sep 06 |
nicklas |
1798 |
this.title = title; |
2655 |
22 Sep 06 |
nicklas |
1799 |
} |
1273 |
07 Sep 05 |
nicklas |
1800 |
|
6127 |
14 Sep 12 |
nicklas |
1801 |
@Override |
2655 |
22 Sep 06 |
nicklas |
1802 |
public String toString() |
2655 |
22 Sep 06 |
nicklas |
1803 |
{ |
2655 |
22 Sep 06 |
nicklas |
1804 |
return title; |
2655 |
22 Sep 06 |
nicklas |
1805 |
} |
1273 |
07 Sep 05 |
nicklas |
1806 |
} |
1273 |
07 Sep 05 |
nicklas |
1807 |
|
29 |
22 Feb 05 |
nicklas |
1808 |
/** |
29 |
22 Feb 05 |
nicklas |
This class holds data about a line parsed by the {@link |
29 |
22 Feb 05 |
nicklas |
FlatFileParser#parseHeaders()} method. |
29 |
22 Feb 05 |
nicklas |
1811 |
|
29 |
22 Feb 05 |
nicklas |
@author Nicklas |
29 |
22 Feb 05 |
nicklas |
@version 2.0 |
29 |
22 Feb 05 |
nicklas |
@see FlatFileParser#getLine(int) |
29 |
22 Feb 05 |
nicklas |
1815 |
*/ |
29 |
22 Feb 05 |
nicklas |
1816 |
public static class Line |
29 |
22 Feb 05 |
nicklas |
1817 |
{ |
2540 |
17 Aug 06 |
nicklas |
1818 |
private final int lineNo; |
29 |
22 Feb 05 |
nicklas |
1819 |
private final String line; |
1273 |
07 Sep 05 |
nicklas |
1820 |
private final LineType type; |
29 |
22 Feb 05 |
nicklas |
1821 |
private final String name; |
29 |
22 Feb 05 |
nicklas |
1822 |
private final String value; |
29 |
22 Feb 05 |
nicklas |
1823 |
|
29 |
22 Feb 05 |
nicklas |
1824 |
/** |
1616 |
15 Nov 05 |
enell |
Create a new line object. |
29 |
22 Feb 05 |
nicklas |
1826 |
*/ |
2540 |
17 Aug 06 |
nicklas |
1827 |
private Line(int lineNo, String line, LineType type) |
29 |
22 Feb 05 |
nicklas |
1828 |
{ |
2540 |
17 Aug 06 |
nicklas |
1829 |
this.lineNo = lineNo; |
29 |
22 Feb 05 |
nicklas |
1830 |
this.line = line; |
29 |
22 Feb 05 |
nicklas |
1831 |
this.type = type; |
29 |
22 Feb 05 |
nicklas |
1832 |
this.name = null; |
29 |
22 Feb 05 |
nicklas |
1833 |
this.value = null; |
29 |
22 Feb 05 |
nicklas |
1834 |
} |
29 |
22 Feb 05 |
nicklas |
1835 |
/** |
1616 |
15 Nov 05 |
enell |
Create a new section line object. |
1616 |
15 Nov 05 |
enell |
1837 |
*/ |
2540 |
17 Aug 06 |
nicklas |
1838 |
private Line(int lineNo, String line, String name, LineType type) |
1616 |
15 Nov 05 |
enell |
1839 |
{ |
2540 |
17 Aug 06 |
nicklas |
1840 |
this.lineNo = lineNo; |
1616 |
15 Nov 05 |
enell |
1841 |
this.line = line; |
1627 |
17 Nov 05 |
enell |
1842 |
this.type = type; |
1616 |
15 Nov 05 |
enell |
1843 |
this.name = name; |
1616 |
15 Nov 05 |
enell |
1844 |
this.value = null; |
1616 |
15 Nov 05 |
enell |
1845 |
} |
1616 |
15 Nov 05 |
enell |
1846 |
/** |
29 |
22 Feb 05 |
nicklas |
Create a new header line object. |
29 |
22 Feb 05 |
nicklas |
1848 |
*/ |
2540 |
17 Aug 06 |
nicklas |
1849 |
private Line(int lineNo, String line, String name, String value) |
29 |
22 Feb 05 |
nicklas |
1850 |
{ |
2540 |
17 Aug 06 |
nicklas |
1851 |
this.lineNo = lineNo; |
29 |
22 Feb 05 |
nicklas |
1852 |
this.line = line; |
1273 |
07 Sep 05 |
nicklas |
1853 |
this.type = LineType.HEADER; |
29 |
22 Feb 05 |
nicklas |
1854 |
this.name = name; |
29 |
22 Feb 05 |
nicklas |
1855 |
this.value = value; |
29 |
22 Feb 05 |
nicklas |
1856 |
} |
29 |
22 Feb 05 |
nicklas |
1857 |
|
6127 |
14 Sep 12 |
nicklas |
1858 |
@Override |
2655 |
22 Sep 06 |
nicklas |
1859 |
public String toString() |
2655 |
22 Sep 06 |
nicklas |
1860 |
{ |
2655 |
22 Sep 06 |
nicklas |
1861 |
return "Line " + lineNo + " [" + type + "]: " + line; |
2655 |
22 Sep 06 |
nicklas |
1862 |
} |
2655 |
22 Sep 06 |
nicklas |
1863 |
|
29 |
22 Feb 05 |
nicklas |
1864 |
/** |
2540 |
17 Aug 06 |
nicklas |
The line number of this line in the input stream. |
2540 |
17 Aug 06 |
nicklas |
@return The line number |
2540 |
17 Aug 06 |
nicklas |
1867 |
*/ |
2540 |
17 Aug 06 |
nicklas |
1868 |
public int lineNo() |
2540 |
17 Aug 06 |
nicklas |
1869 |
{ |
2540 |
17 Aug 06 |
nicklas |
1870 |
return lineNo; |
2540 |
17 Aug 06 |
nicklas |
1871 |
} |
2540 |
17 Aug 06 |
nicklas |
1872 |
|
2540 |
17 Aug 06 |
nicklas |
1873 |
/** |
29 |
22 Feb 05 |
nicklas |
The entire line as it was read from the input stream. |
29 |
22 Feb 05 |
nicklas |
@return A string |
29 |
22 Feb 05 |
nicklas |
1876 |
*/ |
29 |
22 Feb 05 |
nicklas |
1877 |
public String line() |
29 |
22 Feb 05 |
nicklas |
1878 |
{ |
29 |
22 Feb 05 |
nicklas |
1879 |
return line; |
29 |
22 Feb 05 |
nicklas |
1880 |
} |
29 |
22 Feb 05 |
nicklas |
1881 |
|
29 |
22 Feb 05 |
nicklas |
1882 |
/** |
29 |
22 Feb 05 |
nicklas |
The type of the line as determined by the {@link |
29 |
22 Feb 05 |
nicklas |
FlatFileParser#parseHeaders()} method. The value is |
1273 |
07 Sep 05 |
nicklas |
one of the values in the {@link LineType} enumeration. |
29 |
22 Feb 05 |
nicklas |
@return A code indicating the type of the line |
29 |
22 Feb 05 |
nicklas |
1887 |
*/ |
1273 |
07 Sep 05 |
nicklas |
1888 |
public LineType type() |
29 |
22 Feb 05 |
nicklas |
1889 |
{ |
29 |
22 Feb 05 |
nicklas |
1890 |
return type; |
29 |
22 Feb 05 |
nicklas |
1891 |
} |
29 |
22 Feb 05 |
nicklas |
1892 |
/** |
29 |
22 Feb 05 |
nicklas |
The name of the header if the line is a header line. |
29 |
22 Feb 05 |
nicklas |
@return The name of the header, or null if it isn't a header line |
29 |
22 Feb 05 |
nicklas |
1895 |
*/ |
29 |
22 Feb 05 |
nicklas |
1896 |
public String name() |
29 |
22 Feb 05 |
nicklas |
1897 |
{ |
29 |
22 Feb 05 |
nicklas |
1898 |
return name; |
29 |
22 Feb 05 |
nicklas |
1899 |
} |
29 |
22 Feb 05 |
nicklas |
1900 |
/** |
29 |
22 Feb 05 |
nicklas |
The value of the header if the line is a header line. |
29 |
22 Feb 05 |
nicklas |
@return The value of the header, or null if it isn't a header line |
29 |
22 Feb 05 |
nicklas |
1903 |
*/ |
29 |
22 Feb 05 |
nicklas |
1904 |
public String value() |
29 |
22 Feb 05 |
nicklas |
1905 |
{ |
29 |
22 Feb 05 |
nicklas |
1906 |
return value; |
29 |
22 Feb 05 |
nicklas |
1907 |
} |
29 |
22 Feb 05 |
nicklas |
1908 |
} |
29 |
22 Feb 05 |
nicklas |
1909 |
|
29 |
22 Feb 05 |
nicklas |
1910 |
/** |
29 |
22 Feb 05 |
nicklas |
This class holds data about a line parsed by the {@link |
29 |
22 Feb 05 |
nicklas |
FlatFileParser#hasMoreData()} method. |
29 |
22 Feb 05 |
nicklas |
1913 |
|
29 |
22 Feb 05 |
nicklas |
@author Nicklas |
29 |
22 Feb 05 |
nicklas |
@version 2.0 |
29 |
22 Feb 05 |
nicklas |
@see FlatFileParser#nextData() |
29 |
22 Feb 05 |
nicklas |
1917 |
*/ |
29 |
22 Feb 05 |
nicklas |
1918 |
public static class Data |
29 |
22 Feb 05 |
nicklas |
1919 |
{ |
58 |
01 Mar 05 |
nicklas |
1920 |
private final int lineNo; |
3618 |
01 Aug 07 |
nicklas |
1921 |
private final int dataLineNo; |
29 |
22 Feb 05 |
nicklas |
1922 |
private final String[] result; |
29 |
22 Feb 05 |
nicklas |
1923 |
private final String line; |
58 |
01 Mar 05 |
nicklas |
1924 |
private final boolean emptyIsNull; |
2539 |
17 Aug 06 |
nicklas |
1925 |
private final boolean nullIsNull; |
7738 |
14 Aug 19 |
nicklas |
1926 |
private final boolean trimWhiteSpace; |
29 |
22 Feb 05 |
nicklas |
1927 |
|
29 |
22 Feb 05 |
nicklas |
1928 |
/** |
29 |
22 Feb 05 |
nicklas |
Create a new data object. |
29 |
22 Feb 05 |
nicklas |
1930 |
*/ |
7738 |
14 Aug 19 |
nicklas |
1931 |
private Data(int lineNo, int dataLineNo, String line, String[] columns, boolean emptyIsNull, boolean nullIsNull, boolean trimWhiteSpace) |
29 |
22 Feb 05 |
nicklas |
1932 |
{ |
58 |
01 Mar 05 |
nicklas |
1933 |
this.lineNo = lineNo; |
3618 |
01 Aug 07 |
nicklas |
1934 |
this.dataLineNo = dataLineNo; |
29 |
22 Feb 05 |
nicklas |
1935 |
this.line = line; |
29 |
22 Feb 05 |
nicklas |
1936 |
this.result = columns; |
58 |
01 Mar 05 |
nicklas |
1937 |
this.emptyIsNull = emptyIsNull; |
2539 |
17 Aug 06 |
nicklas |
1938 |
this.nullIsNull = nullIsNull; |
7738 |
14 Aug 19 |
nicklas |
1939 |
this.trimWhiteSpace = trimWhiteSpace; |
29 |
22 Feb 05 |
nicklas |
1940 |
} |
2655 |
22 Sep 06 |
nicklas |
1941 |
|
6127 |
14 Sep 12 |
nicklas |
1942 |
@Override |
2655 |
22 Sep 06 |
nicklas |
1943 |
public String toString() |
2655 |
22 Sep 06 |
nicklas |
1944 |
{ |
2655 |
22 Sep 06 |
nicklas |
1945 |
return "Line " + lineNo +" [Data]: " + line; |
2655 |
22 Sep 06 |
nicklas |
1946 |
} |
2655 |
22 Sep 06 |
nicklas |
1947 |
|
29 |
22 Feb 05 |
nicklas |
1948 |
/** |
29 |
22 Feb 05 |
nicklas |
The line number of the data in the input stream. |
29 |
22 Feb 05 |
nicklas |
@return The line number |
29 |
22 Feb 05 |
nicklas |
1951 |
*/ |
29 |
22 Feb 05 |
nicklas |
1952 |
public int lineNo() |
29 |
22 Feb 05 |
nicklas |
1953 |
{ |
58 |
01 Mar 05 |
nicklas |
1954 |
return lineNo; |
29 |
22 Feb 05 |
nicklas |
1955 |
} |
29 |
22 Feb 05 |
nicklas |
1956 |
/** |
3618 |
01 Aug 07 |
nicklas |
The line number of the data in the current section. |
3618 |
01 Aug 07 |
nicklas |
@return The line number |
3618 |
01 Aug 07 |
nicklas |
1959 |
*/ |
3618 |
01 Aug 07 |
nicklas |
1960 |
public int dataLineNo() |
3618 |
01 Aug 07 |
nicklas |
1961 |
{ |
3618 |
01 Aug 07 |
nicklas |
1962 |
return dataLineNo; |
3618 |
01 Aug 07 |
nicklas |
1963 |
} |
3618 |
01 Aug 07 |
nicklas |
1964 |
/** |
29 |
22 Feb 05 |
nicklas |
The entire line as it was read from the input stream. |
29 |
22 Feb 05 |
nicklas |
@return A string |
29 |
22 Feb 05 |
nicklas |
1967 |
*/ |
29 |
22 Feb 05 |
nicklas |
1968 |
public String line() |
29 |
22 Feb 05 |
nicklas |
1969 |
{ |
29 |
22 Feb 05 |
nicklas |
1970 |
return line; |
29 |
22 Feb 05 |
nicklas |
1971 |
} |
29 |
22 Feb 05 |
nicklas |
1972 |
/** |
29 |
22 Feb 05 |
nicklas |
The number of columns as determined by the |
29 |
22 Feb 05 |
nicklas |
{@link FlatFileParser#setDataSplitterRegexp(Pattern)} |
29 |
22 Feb 05 |
nicklas |
regular expression. |
29 |
22 Feb 05 |
nicklas |
1976 |
|
29 |
22 Feb 05 |
nicklas |
@return The number of columns in the data line |
29 |
22 Feb 05 |
nicklas |
1978 |
*/ |
29 |
22 Feb 05 |
nicklas |
1979 |
public int columns() |
29 |
22 Feb 05 |
nicklas |
1980 |
{ |
29 |
22 Feb 05 |
nicklas |
1981 |
return result.length; |
29 |
22 Feb 05 |
nicklas |
1982 |
} |
29 |
22 Feb 05 |
nicklas |
1983 |
/** |
29 |
22 Feb 05 |
nicklas |
Get the value in the specified column. |
29 |
22 Feb 05 |
nicklas |
@param index The index for the column, starting at 0 |
29 |
22 Feb 05 |
nicklas |
@return The value in the column |
7664 |
20 Mar 19 |
nicklas |
@deprecated In 3.15, use {@link #getString(int)} instead or one of the |
7664 |
20 Mar 19 |
nicklas |
other getXxx() method to return a given data type directly |
29 |
22 Feb 05 |
nicklas |
1989 |
*/ |
7664 |
20 Mar 19 |
nicklas |
1990 |
@Deprecated |
29 |
22 Feb 05 |
nicklas |
1991 |
public String get(int index) |
29 |
22 Feb 05 |
nicklas |
1992 |
{ |
7664 |
20 Mar 19 |
nicklas |
1993 |
return getString(index); |
29 |
22 Feb 05 |
nicklas |
1994 |
} |
5193 |
27 Nov 09 |
nicklas |
1995 |
|
5193 |
27 Nov 09 |
nicklas |
1996 |
/** |
5193 |
27 Nov 09 |
nicklas |
The data line as an array of strings. |
5193 |
27 Nov 09 |
nicklas |
@since 2.15 |
5193 |
27 Nov 09 |
nicklas |
1999 |
*/ |
5193 |
27 Nov 09 |
nicklas |
2000 |
public String[] data() |
5193 |
27 Nov 09 |
nicklas |
2001 |
{ |
5193 |
27 Nov 09 |
nicklas |
2002 |
return result; |
5193 |
27 Nov 09 |
nicklas |
2003 |
} |
7664 |
20 Mar 19 |
nicklas |
2004 |
|
7664 |
20 Mar 19 |
nicklas |
2005 |
/** |
7664 |
20 Mar 19 |
nicklas |
Get the value in the specified column. |
7664 |
20 Mar 19 |
nicklas |
@param index The index for the column, starting at 0 |
7664 |
20 Mar 19 |
nicklas |
@return The value in the column |
7664 |
20 Mar 19 |
nicklas |
2009 |
*/ |
7664 |
20 Mar 19 |
nicklas |
2010 |
public String getString(int index) |
7664 |
20 Mar 19 |
nicklas |
2011 |
{ |
7738 |
14 Aug 19 |
nicklas |
2012 |
return fixString(result[index]); |
7664 |
20 Mar 19 |
nicklas |
2013 |
} |
7664 |
20 Mar 19 |
nicklas |
2014 |
|
7664 |
20 Mar 19 |
nicklas |
2015 |
/** |
7738 |
14 Aug 19 |
nicklas |
Fix the value string according to options |
7738 |
14 Aug 19 |
nicklas |
for 'emptyIsNull', 'nullIsNull', etc. |
7738 |
14 Aug 19 |
nicklas |
@since 3.15.1 |
7738 |
14 Aug 19 |
nicklas |
2019 |
*/ |
7738 |
14 Aug 19 |
nicklas |
2020 |
protected String fixString(String value) |
7738 |
14 Aug 19 |
nicklas |
2021 |
{ |
7874 |
22 Oct 20 |
nicklas |
2022 |
if (value != null && trimWhiteSpace) value = value.strip(); |
7738 |
14 Aug 19 |
nicklas |
2023 |
|
7738 |
14 Aug 19 |
nicklas |
2024 |
return value == null || |
7738 |
14 Aug 19 |
nicklas |
2025 |
emptyIsNull && value.length() == 0 || |
7738 |
14 Aug 19 |
nicklas |
2026 |
nullIsNull && "NULL".equalsIgnoreCase(value) |
7738 |
14 Aug 19 |
nicklas |
2027 |
? null : value; |
7738 |
14 Aug 19 |
nicklas |
2028 |
} |
7738 |
14 Aug 19 |
nicklas |
2029 |
|
7738 |
14 Aug 19 |
nicklas |
2030 |
/** |
7665 |
20 Mar 19 |
nicklas |
Shortcut for getInt(index, null, true) |
7665 |
20 Mar 19 |
nicklas |
2032 |
*/ |
7665 |
20 Mar 19 |
nicklas |
2033 |
public Integer getInt(int index) |
7665 |
20 Mar 19 |
nicklas |
2034 |
{ |
7665 |
20 Mar 19 |
nicklas |
2035 |
return getInt(index, null, true); |
7665 |
20 Mar 19 |
nicklas |
2036 |
} |
7665 |
20 Mar 19 |
nicklas |
2037 |
|
7665 |
20 Mar 19 |
nicklas |
2038 |
/** |
7664 |
20 Mar 19 |
nicklas |
Get the value as an integer. The default implementation parses |
7664 |
20 Mar 19 |
nicklas |
the string value with the given parser. If the source file is |
7664 |
20 Mar 19 |
nicklas |
an Excel file, the numeric value may be retrieved directly. |
7664 |
20 Mar 19 |
nicklas |
@since 3.15 |
7664 |
20 Mar 19 |
nicklas |
2043 |
*/ |
7664 |
20 Mar 19 |
nicklas |
2044 |
public Integer getInt(int index, NumberFormat parser, boolean nullIfException) |
7664 |
20 Mar 19 |
nicklas |
2045 |
{ |
7738 |
14 Aug 19 |
nicklas |
2046 |
return (Integer)Type.INT.parseString(getString(index), parser, nullIfException); |
7664 |
20 Mar 19 |
nicklas |
2047 |
} |
7664 |
20 Mar 19 |
nicklas |
2048 |
|
7664 |
20 Mar 19 |
nicklas |
2049 |
/** |
7664 |
20 Mar 19 |
nicklas |
Get the value as a long. The default implementation parses |
7664 |
20 Mar 19 |
nicklas |
the string value with the given parser. If the source file is |
7664 |
20 Mar 19 |
nicklas |
an Excel file, the numeric value may be retrieved directly. |
7664 |
20 Mar 19 |
nicklas |
@since 3.15 |
7664 |
20 Mar 19 |
nicklas |
2054 |
*/ |
7664 |
20 Mar 19 |
nicklas |
2055 |
public Long getLong(int index, NumberFormat parser, boolean nullIfException) |
7664 |
20 Mar 19 |
nicklas |
2056 |
{ |
7738 |
14 Aug 19 |
nicklas |
2057 |
return (Long)Type.LONG.parseString(getString(index), parser, nullIfException); |
7664 |
20 Mar 19 |
nicklas |
2058 |
} |
7664 |
20 Mar 19 |
nicklas |
2059 |
|
7664 |
20 Mar 19 |
nicklas |
2060 |
/** |
7665 |
20 Mar 19 |
nicklas |
Shortcut for getFloat(index, null, true) |
7665 |
20 Mar 19 |
nicklas |
2062 |
*/ |
7665 |
20 Mar 19 |
nicklas |
2063 |
public Float getFloat(int index) |
7665 |
20 Mar 19 |
nicklas |
2064 |
{ |
7665 |
20 Mar 19 |
nicklas |
2065 |
return getFloat(index, null, true); |
7665 |
20 Mar 19 |
nicklas |
2066 |
} |
7665 |
20 Mar 19 |
nicklas |
2067 |
|
7665 |
20 Mar 19 |
nicklas |
2068 |
/** |
7664 |
20 Mar 19 |
nicklas |
Get the value as a float. The default implementation parses |
7664 |
20 Mar 19 |
nicklas |
the string value with the given parser. If the source file is |
7664 |
20 Mar 19 |
nicklas |
an Excel file, the numeric value may be retrieved directly. |
7664 |
20 Mar 19 |
nicklas |
@since 3.15 |
7664 |
20 Mar 19 |
nicklas |
2073 |
*/ |
7664 |
20 Mar 19 |
nicklas |
2074 |
public Float getFloat(int index, NumberFormat parser, boolean nullIfException) |
7664 |
20 Mar 19 |
nicklas |
2075 |
{ |
7738 |
14 Aug 19 |
nicklas |
2076 |
return (Float)Type.FLOAT.parseString(getString(index), parser, nullIfException); |
7664 |
20 Mar 19 |
nicklas |
2077 |
} |
7664 |
20 Mar 19 |
nicklas |
2078 |
|
7664 |
20 Mar 19 |
nicklas |
2079 |
/** |
7664 |
20 Mar 19 |
nicklas |
Get the value as a double. The default implementation parses |
7664 |
20 Mar 19 |
nicklas |
the string value with the given parser. If the source file is |
7664 |
20 Mar 19 |
nicklas |
an Excel file, the numeric value may be retrieved directly. |
7664 |
20 Mar 19 |
nicklas |
@since 3.15 |
7664 |
20 Mar 19 |
nicklas |
2084 |
*/ |
7664 |
20 Mar 19 |
nicklas |
2085 |
public Double getDouble(int index, NumberFormat parser, boolean nullIfException) |
7664 |
20 Mar 19 |
nicklas |
2086 |
{ |
7738 |
14 Aug 19 |
nicklas |
2087 |
return (Double)Type.DOUBLE.parseString(getString(index), parser, nullIfException); |
7664 |
20 Mar 19 |
nicklas |
2088 |
} |
7664 |
20 Mar 19 |
nicklas |
2089 |
|
7664 |
20 Mar 19 |
nicklas |
2090 |
/** |
7664 |
20 Mar 19 |
nicklas |
Get the value as a date. The default implementation parses |
7664 |
20 Mar 19 |
nicklas |
the string value with the given parser. If the source file is |
7664 |
20 Mar 19 |
nicklas |
an Excel file, the date value may be retrieved directly. |
7664 |
20 Mar 19 |
nicklas |
@since 3.15 |
7664 |
20 Mar 19 |
nicklas |
2095 |
*/ |
7664 |
20 Mar 19 |
nicklas |
2096 |
public Date getDate(int index, Formatter<Date> parser, boolean nullIfException) |
7664 |
20 Mar 19 |
nicklas |
2097 |
{ |
7738 |
14 Aug 19 |
nicklas |
2098 |
return (Date)Type.DATE.parseString(getString(index), parser, nullIfException); |
7664 |
20 Mar 19 |
nicklas |
2099 |
} |
29 |
22 Feb 05 |
nicklas |
2100 |
} |
2203 |
28 Apr 06 |
nicklas |
2101 |
|
7664 |
20 Mar 19 |
nicklas |
2102 |
/** |
7664 |
20 Mar 19 |
nicklas |
Subclass that is used to return data when the source file is an Excel |
7664 |
20 Mar 19 |
nicklas |
file. It will forward all get() methods to retrieve the data directly |
7664 |
20 Mar 19 |
nicklas |
from the Cell item in Excel. |
7664 |
20 Mar 19 |
nicklas |
2106 |
*/ |
7664 |
20 Mar 19 |
nicklas |
2107 |
static class ExcelData |
7664 |
20 Mar 19 |
nicklas |
2108 |
extends Data |
7664 |
20 Mar 19 |
nicklas |
2109 |
{ |
7664 |
20 Mar 19 |
nicklas |
2110 |
|
7664 |
20 Mar 19 |
nicklas |
2111 |
private final SheetInfo sheet; |
7664 |
20 Mar 19 |
nicklas |
2112 |
private final int rowOffset; |
7664 |
20 Mar 19 |
nicklas |
2113 |
private final int colOffset; |
7664 |
20 Mar 19 |
nicklas |
2114 |
|
7664 |
20 Mar 19 |
nicklas |
2115 |
/** |
7664 |
20 Mar 19 |
nicklas |
Create a new data object. |
7664 |
20 Mar 19 |
nicklas |
2117 |
*/ |
7738 |
14 Aug 19 |
nicklas |
2118 |
ExcelData(SheetInfo sheet, int lineOffset, int lineNo, int dataLineNo, String line, String[] columns, boolean emptyIsNull, boolean nullIsNull, boolean trimWhiteSpace) |
7664 |
20 Mar 19 |
nicklas |
2119 |
{ |
7738 |
14 Aug 19 |
nicklas |
2120 |
super(lineNo, dataLineNo, line, columns, emptyIsNull, nullIsNull, trimWhiteSpace); |
7664 |
20 Mar 19 |
nicklas |
2121 |
this.sheet = sheet; |
7676 |
27 Mar 19 |
nicklas |
2122 |
this.rowOffset = sheet.getFirstRow()-1-lineOffset; |
7664 |
20 Mar 19 |
nicklas |
2123 |
this.colOffset = sheet.getFirstColumn(); |
7664 |
20 Mar 19 |
nicklas |
2124 |
} |
7664 |
20 Mar 19 |
nicklas |
2125 |
|
7664 |
20 Mar 19 |
nicklas |
2126 |
@Override |
7664 |
20 Mar 19 |
nicklas |
2127 |
public String getString(int index) |
7664 |
20 Mar 19 |
nicklas |
2128 |
{ |
7664 |
20 Mar 19 |
nicklas |
2129 |
Cell c = sheet.getCell(lineNo()+rowOffset, index+colOffset); |
7738 |
14 Aug 19 |
nicklas |
2130 |
return fixString(sheet.getCellValueAsString(c)); |
7664 |
20 Mar 19 |
nicklas |
2131 |
} |
7664 |
20 Mar 19 |
nicklas |
2132 |
|
7664 |
20 Mar 19 |
nicklas |
2133 |
@Override |
7664 |
20 Mar 19 |
nicklas |
2134 |
public Integer getInt(int index, NumberFormat parser, boolean nullIfException) |
7664 |
20 Mar 19 |
nicklas |
2135 |
{ |
7664 |
20 Mar 19 |
nicklas |
2136 |
Double d = getDouble(index, parser, nullIfException); |
7664 |
20 Mar 19 |
nicklas |
2137 |
return d == null ? null : d.intValue(); |
7664 |
20 Mar 19 |
nicklas |
2138 |
} |
7664 |
20 Mar 19 |
nicklas |
2139 |
|
7664 |
20 Mar 19 |
nicklas |
2140 |
@Override |
7664 |
20 Mar 19 |
nicklas |
2141 |
public Long getLong(int index, NumberFormat parser, boolean nullIfException) |
7664 |
20 Mar 19 |
nicklas |
2142 |
{ |
7664 |
20 Mar 19 |
nicklas |
2143 |
Double d = getDouble(index, parser, nullIfException); |
7664 |
20 Mar 19 |
nicklas |
2144 |
return d == null ? null : d.longValue(); |
7664 |
20 Mar 19 |
nicklas |
2145 |
} |
7664 |
20 Mar 19 |
nicklas |
2146 |
|
7664 |
20 Mar 19 |
nicklas |
2147 |
@Override |
7664 |
20 Mar 19 |
nicklas |
2148 |
public Float getFloat(int index, NumberFormat parser, boolean nullIfException) |
7664 |
20 Mar 19 |
nicklas |
2149 |
{ |
7664 |
20 Mar 19 |
nicklas |
2150 |
Double d = getDouble(index, parser, nullIfException); |
7664 |
20 Mar 19 |
nicklas |
2151 |
return d == null ? null : d.floatValue(); |
7664 |
20 Mar 19 |
nicklas |
2152 |
} |
7664 |
20 Mar 19 |
nicklas |
2153 |
|
7664 |
20 Mar 19 |
nicklas |
2154 |
@Override |
7664 |
20 Mar 19 |
nicklas |
2155 |
public Double getDouble(int index, NumberFormat parser, boolean nullIfException) |
7664 |
20 Mar 19 |
nicklas |
2156 |
{ |
7664 |
20 Mar 19 |
nicklas |
2157 |
Cell c = sheet.getCell(lineNo()+rowOffset, index+colOffset); |
7664 |
20 Mar 19 |
nicklas |
2158 |
return sheet.getCellValueAsNumber(c, parser, nullIfException); |
7664 |
20 Mar 19 |
nicklas |
2159 |
} |
7664 |
20 Mar 19 |
nicklas |
2160 |
|
7664 |
20 Mar 19 |
nicklas |
2161 |
@Override |
7664 |
20 Mar 19 |
nicklas |
2162 |
public Date getDate(int index, Formatter<Date> parser, boolean nullIfException) |
7664 |
20 Mar 19 |
nicklas |
2163 |
{ |
7664 |
20 Mar 19 |
nicklas |
2164 |
Cell c = sheet.getCell(lineNo()+rowOffset, index+colOffset); |
7664 |
20 Mar 19 |
nicklas |
2165 |
return sheet.getCellValueAsDate(c, parser, nullIfException); |
7664 |
20 Mar 19 |
nicklas |
2166 |
} |
7664 |
20 Mar 19 |
nicklas |
2167 |
|
7664 |
20 Mar 19 |
nicklas |
2168 |
} |
29 |
22 Feb 05 |
nicklas |
2169 |
} |