7654 |
15 Mar 19 |
nicklas |
1 |
package net.sf.basedb.util.excel; |
7654 |
15 Mar 19 |
nicklas |
2 |
|
7654 |
15 Mar 19 |
nicklas |
3 |
import java.io.ByteArrayInputStream; |
7654 |
15 Mar 19 |
nicklas |
4 |
import java.io.IOException; |
7654 |
15 Mar 19 |
nicklas |
5 |
import java.io.InputStream; |
7654 |
15 Mar 19 |
nicklas |
6 |
import java.nio.charset.Charset; |
7714 |
21 May 19 |
nicklas |
7 |
import java.nio.charset.StandardCharsets; |
7664 |
20 Mar 19 |
nicklas |
8 |
import java.text.NumberFormat; |
7654 |
15 Mar 19 |
nicklas |
9 |
import java.util.ArrayList; |
7654 |
15 Mar 19 |
nicklas |
10 |
import java.util.Date; |
7673 |
26 Mar 19 |
nicklas |
11 |
import java.util.HashMap; |
7654 |
15 Mar 19 |
nicklas |
12 |
import java.util.Iterator; |
7654 |
15 Mar 19 |
nicklas |
13 |
import java.util.List; |
7673 |
26 Mar 19 |
nicklas |
14 |
import java.util.Map; |
7654 |
15 Mar 19 |
nicklas |
15 |
|
7655 |
19 Mar 19 |
nicklas |
16 |
import org.apache.poi.poifs.filesystem.FileMagic; |
7738 |
14 Aug 19 |
nicklas |
17 |
import org.apache.poi.ss.formula.eval.NotImplementedException; |
7738 |
14 Aug 19 |
nicklas |
18 |
import org.apache.poi.ss.formula.eval.NotImplementedFunctionException; |
7654 |
15 Mar 19 |
nicklas |
19 |
import org.apache.poi.ss.usermodel.Cell; |
7654 |
15 Mar 19 |
nicklas |
20 |
import org.apache.poi.ss.usermodel.CellType; |
7654 |
15 Mar 19 |
nicklas |
21 |
import org.apache.poi.ss.usermodel.DataFormatter; |
7654 |
15 Mar 19 |
nicklas |
22 |
import org.apache.poi.ss.usermodel.DateUtil; |
7738 |
14 Aug 19 |
nicklas |
23 |
import org.apache.poi.ss.usermodel.FormulaError; |
7654 |
15 Mar 19 |
nicklas |
24 |
import org.apache.poi.ss.usermodel.FormulaEvaluator; |
7654 |
15 Mar 19 |
nicklas |
25 |
import org.apache.poi.ss.usermodel.Row; |
7654 |
15 Mar 19 |
nicklas |
26 |
import org.apache.poi.ss.usermodel.Sheet; |
7654 |
15 Mar 19 |
nicklas |
27 |
import org.apache.poi.ss.usermodel.Workbook; |
7654 |
15 Mar 19 |
nicklas |
28 |
import org.apache.poi.xssf.usermodel.XSSFWorkbook; |
7654 |
15 Mar 19 |
nicklas |
29 |
|
7654 |
15 Mar 19 |
nicklas |
30 |
import net.sf.basedb.core.File; |
7664 |
20 Mar 19 |
nicklas |
31 |
import net.sf.basedb.core.Type; |
7654 |
15 Mar 19 |
nicklas |
32 |
import net.sf.basedb.util.FileUtil; |
7673 |
26 Mar 19 |
nicklas |
33 |
import net.sf.basedb.util.InputStreamCombiner; |
7654 |
15 Mar 19 |
nicklas |
34 |
import net.sf.basedb.util.encode.EncoderDecoder; |
7654 |
15 Mar 19 |
nicklas |
35 |
import net.sf.basedb.util.encode.ToSpaceEncoderDecoder; |
7654 |
15 Mar 19 |
nicklas |
36 |
import net.sf.basedb.util.formatter.Formatter; |
7654 |
15 Mar 19 |
nicklas |
37 |
|
7654 |
15 Mar 19 |
nicklas |
38 |
/** |
7654 |
15 Mar 19 |
nicklas |
Utility class for converting Excel worksheets to CSV-formatted InputStreams. |
7654 |
15 Mar 19 |
nicklas |
40 |
|
7654 |
15 Mar 19 |
nicklas |
NOTE! If the Excel worksheet contains numbers and/or date values it is recommended |
7654 |
15 Mar 19 |
nicklas |
that a number and/or date formatter is specified. If not, the values will be converted |
7654 |
15 Mar 19 |
nicklas |
to text using the formatting specification from the Excel workbook which may depende on |
7654 |
15 Mar 19 |
nicklas |
locale settings and other stuff (=the results may be unpredicatable). So far we only |
7654 |
15 Mar 19 |
nicklas |
support for one formatter of each type. Future use cases may require different formatters |
7654 |
15 Mar 19 |
nicklas |
for different columns, but we will wait with this until it is needed. |
7654 |
15 Mar 19 |
nicklas |
47 |
|
7654 |
15 Mar 19 |
nicklas |
@since 3.15 |
7654 |
15 Mar 19 |
nicklas |
49 |
*/ |
7654 |
15 Mar 19 |
nicklas |
50 |
public class XlsxToCsvUtil |
7654 |
15 Mar 19 |
nicklas |
51 |
{ |
7654 |
15 Mar 19 |
nicklas |
52 |
|
7654 |
15 Mar 19 |
nicklas |
53 |
/** |
7654 |
15 Mar 19 |
nicklas |
The MIME type for Excel workbook files. |
7654 |
15 Mar 19 |
nicklas |
55 |
*/ |
7654 |
15 Mar 19 |
nicklas |
56 |
public static final String XLSX_MIME_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"; |
7654 |
15 Mar 19 |
nicklas |
57 |
|
7654 |
15 Mar 19 |
nicklas |
58 |
|
7654 |
15 Mar 19 |
nicklas |
59 |
/** |
7654 |
15 Mar 19 |
nicklas |
Check the MIME type and file extension to see if the file could be an Excel file. |
7654 |
15 Mar 19 |
nicklas |
The file contents is not checked. |
7654 |
15 Mar 19 |
nicklas |
62 |
*/ |
7654 |
15 Mar 19 |
nicklas |
63 |
public static boolean seemsLikeAnExcelFile(File file) |
7654 |
15 Mar 19 |
nicklas |
64 |
{ |
7654 |
15 Mar 19 |
nicklas |
65 |
if (file == null) return false; |
7654 |
15 Mar 19 |
nicklas |
66 |
return (XLSX_MIME_TYPE.equals(file.getMimeType()) || file.getName().endsWith(".xlsx")); |
7654 |
15 Mar 19 |
nicklas |
67 |
} |
7654 |
15 Mar 19 |
nicklas |
68 |
|
7655 |
19 Mar 19 |
nicklas |
69 |
/** |
7655 |
19 Mar 19 |
nicklas |
Check if the given stream is an Excel file by looking at the first few bytes. |
7655 |
19 Mar 19 |
nicklas |
The stream must support seek(). Call {@link #prepareForCheck(InputStream)} |
7655 |
19 Mar 19 |
nicklas |
before calling this method if it is not sure. |
7655 |
19 Mar 19 |
nicklas |
73 |
*/ |
7655 |
19 Mar 19 |
nicklas |
74 |
public static boolean isAnExcelFile(InputStream in) |
7655 |
19 Mar 19 |
nicklas |
75 |
{ |
7655 |
19 Mar 19 |
nicklas |
76 |
try |
7655 |
19 Mar 19 |
nicklas |
77 |
{ |
7655 |
19 Mar 19 |
nicklas |
78 |
return FileMagic.valueOf(in) == FileMagic.OOXML; |
7655 |
19 Mar 19 |
nicklas |
79 |
} |
7655 |
19 Mar 19 |
nicklas |
80 |
catch (Exception ex) |
7655 |
19 Mar 19 |
nicklas |
81 |
{} |
7655 |
19 Mar 19 |
nicklas |
82 |
return false; |
7655 |
19 Mar 19 |
nicklas |
83 |
} |
7655 |
19 Mar 19 |
nicklas |
84 |
|
7655 |
19 Mar 19 |
nicklas |
85 |
/** |
7655 |
19 Mar 19 |
nicklas |
Prepare the input stream for checking with |
7655 |
19 Mar 19 |
nicklas |
{@link #isAnExcelFile(InputStream)}. If the given stream |
7655 |
19 Mar 19 |
nicklas |
supports seek() it is returned as it is, otherwise it is |
7655 |
19 Mar 19 |
nicklas |
wrapped by a buffered stream. |
7655 |
19 Mar 19 |
nicklas |
90 |
*/ |
7655 |
19 Mar 19 |
nicklas |
91 |
public static InputStream prepareForCheck(InputStream in) |
7655 |
19 Mar 19 |
nicklas |
92 |
{ |
7655 |
19 Mar 19 |
nicklas |
93 |
return FileMagic.prepareToCheckMagic(in); |
7655 |
19 Mar 19 |
nicklas |
94 |
} |
7655 |
19 Mar 19 |
nicklas |
95 |
|
7654 |
15 Mar 19 |
nicklas |
96 |
private Charset charset; |
7654 |
15 Mar 19 |
nicklas |
97 |
private String columnSeparator; |
7654 |
15 Mar 19 |
nicklas |
98 |
private String rowSeparator; |
7654 |
15 Mar 19 |
nicklas |
99 |
private boolean writeTrailingColumns; |
7654 |
15 Mar 19 |
nicklas |
100 |
private boolean evaluateFormulas; |
7654 |
15 Mar 19 |
nicklas |
101 |
private EncoderDecoder encoder; |
7654 |
15 Mar 19 |
nicklas |
102 |
private Formatter<Date> dateFormat; |
7655 |
19 Mar 19 |
nicklas |
103 |
private Formatter<Date> timestampFormat; |
7654 |
15 Mar 19 |
nicklas |
104 |
private Formatter<Number> numberFormat; |
7654 |
15 Mar 19 |
nicklas |
105 |
|
7654 |
15 Mar 19 |
nicklas |
106 |
private Workbook workbook; |
7654 |
15 Mar 19 |
nicklas |
107 |
private List<String> sheetNames; |
7673 |
26 Mar 19 |
nicklas |
108 |
private Map<String, SheetInfo> sheets; |
7654 |
15 Mar 19 |
nicklas |
109 |
|
7654 |
15 Mar 19 |
nicklas |
110 |
/** |
7654 |
15 Mar 19 |
nicklas |
Create a new instance using defult settings. |
7654 |
15 Mar 19 |
nicklas |
112 |
*/ |
7654 |
15 Mar 19 |
nicklas |
113 |
public XlsxToCsvUtil() |
7654 |
15 Mar 19 |
nicklas |
114 |
{ |
7654 |
15 Mar 19 |
nicklas |
115 |
this.columnSeparator = "\t"; |
7654 |
15 Mar 19 |
nicklas |
116 |
this.rowSeparator = "\n"; |
7654 |
15 Mar 19 |
nicklas |
117 |
this.writeTrailingColumns = true; |
7654 |
15 Mar 19 |
nicklas |
118 |
this.evaluateFormulas = false; |
7714 |
21 May 19 |
nicklas |
119 |
this.charset = StandardCharsets.UTF_8; |
7654 |
15 Mar 19 |
nicklas |
120 |
this.encoder = new ToSpaceEncoderDecoder(); |
7673 |
26 Mar 19 |
nicklas |
121 |
this.sheets = new HashMap<>(); |
7654 |
15 Mar 19 |
nicklas |
122 |
} |
7654 |
15 Mar 19 |
nicklas |
123 |
|
7654 |
15 Mar 19 |
nicklas |
124 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the current column separator. The default is {tab}. |
7654 |
15 Mar 19 |
nicklas |
126 |
*/ |
7654 |
15 Mar 19 |
nicklas |
127 |
public String getColumnSeparator() |
7654 |
15 Mar 19 |
nicklas |
128 |
{ |
7654 |
15 Mar 19 |
nicklas |
129 |
return columnSeparator; |
7654 |
15 Mar 19 |
nicklas |
130 |
} |
7654 |
15 Mar 19 |
nicklas |
131 |
/** |
7654 |
15 Mar 19 |
nicklas |
Set the column separator. |
7654 |
15 Mar 19 |
nicklas |
133 |
*/ |
7654 |
15 Mar 19 |
nicklas |
134 |
public void setColumnSeparator(String columnSeparator) |
7654 |
15 Mar 19 |
nicklas |
135 |
{ |
7654 |
15 Mar 19 |
nicklas |
136 |
this.columnSeparator = columnSeparator; |
7654 |
15 Mar 19 |
nicklas |
137 |
} |
7654 |
15 Mar 19 |
nicklas |
138 |
|
7654 |
15 Mar 19 |
nicklas |
139 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the current row separator. The default is {newline}. |
7654 |
15 Mar 19 |
nicklas |
141 |
*/ |
7654 |
15 Mar 19 |
nicklas |
142 |
public String getRowSeparator() |
7654 |
15 Mar 19 |
nicklas |
143 |
{ |
7654 |
15 Mar 19 |
nicklas |
144 |
return rowSeparator; |
7654 |
15 Mar 19 |
nicklas |
145 |
} |
7654 |
15 Mar 19 |
nicklas |
146 |
/** |
7654 |
15 Mar 19 |
nicklas |
Set the row separator. |
7654 |
15 Mar 19 |
nicklas |
148 |
*/ |
7654 |
15 Mar 19 |
nicklas |
149 |
public void setRowSeparator(String rowSeparator) |
7654 |
15 Mar 19 |
nicklas |
150 |
{ |
7654 |
15 Mar 19 |
nicklas |
151 |
this.rowSeparator = rowSeparator; |
7654 |
15 Mar 19 |
nicklas |
152 |
} |
7654 |
15 Mar 19 |
nicklas |
153 |
|
7654 |
15 Mar 19 |
nicklas |
154 |
/** |
7654 |
15 Mar 19 |
nicklas |
If TRUE, trailing empty columns are written to the CSV file so |
7654 |
15 Mar 19 |
nicklas |
that all lines have the same number of columns. If FALSE, |
7654 |
15 Mar 19 |
nicklas |
trailing emtpy columns are skipped. The default is TRUE. |
7654 |
15 Mar 19 |
nicklas |
158 |
*/ |
7654 |
15 Mar 19 |
nicklas |
159 |
public boolean getWriteTrailingColumns() |
7654 |
15 Mar 19 |
nicklas |
160 |
{ |
7654 |
15 Mar 19 |
nicklas |
161 |
return writeTrailingColumns; |
7654 |
15 Mar 19 |
nicklas |
162 |
} |
7654 |
15 Mar 19 |
nicklas |
163 |
|
7654 |
15 Mar 19 |
nicklas |
164 |
public void setWriteTrailingColumns(boolean writeTrailingColumns) |
7654 |
15 Mar 19 |
nicklas |
165 |
{ |
7654 |
15 Mar 19 |
nicklas |
166 |
this.writeTrailingColumns = writeTrailingColumns; |
7654 |
15 Mar 19 |
nicklas |
167 |
} |
7654 |
15 Mar 19 |
nicklas |
168 |
|
7654 |
15 Mar 19 |
nicklas |
169 |
/** |
7654 |
15 Mar 19 |
nicklas |
If TRUE, formulas are evaluated and the result is written |
7654 |
15 Mar 19 |
nicklas |
to the CSV file. If FALSE, the formula expression is written. |
7654 |
15 Mar 19 |
nicklas |
The default is FALSE. |
7654 |
15 Mar 19 |
nicklas |
173 |
*/ |
7654 |
15 Mar 19 |
nicklas |
174 |
public boolean getEvaluateFormulas() |
7654 |
15 Mar 19 |
nicklas |
175 |
{ |
7654 |
15 Mar 19 |
nicklas |
176 |
return evaluateFormulas; |
7654 |
15 Mar 19 |
nicklas |
177 |
} |
7654 |
15 Mar 19 |
nicklas |
178 |
|
7654 |
15 Mar 19 |
nicklas |
179 |
public void setEvaluateFormulas(boolean evaluateFormulas) |
7654 |
15 Mar 19 |
nicklas |
180 |
{ |
7654 |
15 Mar 19 |
nicklas |
181 |
this.evaluateFormulas = evaluateFormulas; |
7654 |
15 Mar 19 |
nicklas |
182 |
} |
7654 |
15 Mar 19 |
nicklas |
183 |
|
7654 |
15 Mar 19 |
nicklas |
184 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the current character set. The default is UTF-8. |
7654 |
15 Mar 19 |
nicklas |
186 |
*/ |
7654 |
15 Mar 19 |
nicklas |
187 |
public Charset getCharset() |
7654 |
15 Mar 19 |
nicklas |
188 |
{ |
7654 |
15 Mar 19 |
nicklas |
189 |
return charset; |
7654 |
15 Mar 19 |
nicklas |
190 |
} |
7654 |
15 Mar 19 |
nicklas |
191 |
|
7654 |
15 Mar 19 |
nicklas |
192 |
/** |
7654 |
15 Mar 19 |
nicklas |
Set the character set to use for the text. |
7654 |
15 Mar 19 |
nicklas |
194 |
*/ |
7654 |
15 Mar 19 |
nicklas |
195 |
public void setCharset(Charset charset) |
7654 |
15 Mar 19 |
nicklas |
196 |
{ |
7654 |
15 Mar 19 |
nicklas |
197 |
this.charset = charset; |
7654 |
15 Mar 19 |
nicklas |
198 |
} |
7654 |
15 Mar 19 |
nicklas |
199 |
|
7654 |
15 Mar 19 |
nicklas |
200 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the current encoder. The default is the ToSpaceEncoderDecoder. |
7654 |
15 Mar 19 |
nicklas |
202 |
*/ |
7654 |
15 Mar 19 |
nicklas |
203 |
public EncoderDecoder getEncoder() |
7654 |
15 Mar 19 |
nicklas |
204 |
{ |
7654 |
15 Mar 19 |
nicklas |
205 |
return encoder; |
7654 |
15 Mar 19 |
nicklas |
206 |
} |
7654 |
15 Mar 19 |
nicklas |
207 |
|
7654 |
15 Mar 19 |
nicklas |
208 |
/** |
7654 |
15 Mar 19 |
nicklas |
Set the encoder to use for converting text values so that they |
7654 |
15 Mar 19 |
nicklas |
do not conflict with column or row separators. |
7654 |
15 Mar 19 |
nicklas |
211 |
*/ |
7654 |
15 Mar 19 |
nicklas |
212 |
public void setEncoder(EncoderDecoder encoder) |
7654 |
15 Mar 19 |
nicklas |
213 |
{ |
7654 |
15 Mar 19 |
nicklas |
214 |
this.encoder = encoder; |
7654 |
15 Mar 19 |
nicklas |
215 |
} |
7654 |
15 Mar 19 |
nicklas |
216 |
|
7654 |
15 Mar 19 |
nicklas |
217 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the date formatter that is used for formatting cells with date-like values. |
7654 |
15 Mar 19 |
nicklas |
If null, the default implementation is used (which may not be predicatable). |
7654 |
15 Mar 19 |
nicklas |
220 |
*/ |
7654 |
15 Mar 19 |
nicklas |
221 |
public Formatter<Date> getDateFormat() |
7654 |
15 Mar 19 |
nicklas |
222 |
{ |
7654 |
15 Mar 19 |
nicklas |
223 |
return dateFormat; |
7654 |
15 Mar 19 |
nicklas |
224 |
} |
7654 |
15 Mar 19 |
nicklas |
225 |
|
7654 |
15 Mar 19 |
nicklas |
226 |
/** |
7654 |
15 Mar 19 |
nicklas |
Set a date formatter to use for formatting cells with date-like values. |
7654 |
15 Mar 19 |
nicklas |
If null, the default implementation is used (which may not be predicatable). |
7654 |
15 Mar 19 |
nicklas |
229 |
*/ |
7654 |
15 Mar 19 |
nicklas |
230 |
public void setDateFormat(Formatter<Date> dateFormat) |
7654 |
15 Mar 19 |
nicklas |
231 |
{ |
7654 |
15 Mar 19 |
nicklas |
232 |
this.dateFormat = dateFormat; |
7654 |
15 Mar 19 |
nicklas |
233 |
} |
7654 |
15 Mar 19 |
nicklas |
234 |
|
7654 |
15 Mar 19 |
nicklas |
235 |
/** |
7655 |
19 Mar 19 |
nicklas |
Get the date formatter that is used for formatting cells with timestamp-like values. |
7655 |
19 Mar 19 |
nicklas |
If null, the default implementation is used (which may not be predicatable). |
7655 |
19 Mar 19 |
nicklas |
238 |
*/ |
7655 |
19 Mar 19 |
nicklas |
239 |
public Formatter<Date> getTimestampFormat() |
7655 |
19 Mar 19 |
nicklas |
240 |
{ |
7655 |
19 Mar 19 |
nicklas |
241 |
return timestampFormat; |
7655 |
19 Mar 19 |
nicklas |
242 |
} |
7655 |
19 Mar 19 |
nicklas |
243 |
|
7655 |
19 Mar 19 |
nicklas |
244 |
/** |
7655 |
19 Mar 19 |
nicklas |
Set a date formatter to use for formatting cells with timestamp-like values. |
7655 |
19 Mar 19 |
nicklas |
If null, the default implementation is used (which may not be predicatable). |
7655 |
19 Mar 19 |
nicklas |
247 |
*/ |
7655 |
19 Mar 19 |
nicklas |
248 |
public void setTimestampFormat(Formatter<Date> timestampFormat) |
7655 |
19 Mar 19 |
nicklas |
249 |
{ |
7655 |
19 Mar 19 |
nicklas |
250 |
this.timestampFormat = timestampFormat; |
7655 |
19 Mar 19 |
nicklas |
251 |
} |
7655 |
19 Mar 19 |
nicklas |
252 |
|
7655 |
19 Mar 19 |
nicklas |
253 |
|
7655 |
19 Mar 19 |
nicklas |
254 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the formatter that is used for formatting cells with numeric values. |
7654 |
15 Mar 19 |
nicklas |
If null, the default implementation is used (which may not be predicatable). |
7654 |
15 Mar 19 |
nicklas |
257 |
*/ |
7654 |
15 Mar 19 |
nicklas |
258 |
public Formatter<Number> getNumberFormat() |
7654 |
15 Mar 19 |
nicklas |
259 |
{ |
7654 |
15 Mar 19 |
nicklas |
260 |
return numberFormat; |
7654 |
15 Mar 19 |
nicklas |
261 |
} |
7654 |
15 Mar 19 |
nicklas |
262 |
|
7654 |
15 Mar 19 |
nicklas |
263 |
/** |
7654 |
15 Mar 19 |
nicklas |
Set a formatter that is used for formatting cells with numeric values. |
7654 |
15 Mar 19 |
nicklas |
If null, the default implementation is used (which may not be predicatable). |
7654 |
15 Mar 19 |
nicklas |
266 |
*/ |
7654 |
15 Mar 19 |
nicklas |
267 |
public void setNumberFormat(Formatter<Number> numberFormat) |
7654 |
15 Mar 19 |
nicklas |
268 |
{ |
7654 |
15 Mar 19 |
nicklas |
269 |
this.numberFormat = numberFormat; |
7654 |
15 Mar 19 |
nicklas |
270 |
} |
7654 |
15 Mar 19 |
nicklas |
271 |
|
7654 |
15 Mar 19 |
nicklas |
272 |
/** |
7654 |
15 Mar 19 |
nicklas |
Read an Excel workbook from the stream. |
7654 |
15 Mar 19 |
nicklas |
274 |
*/ |
7654 |
15 Mar 19 |
nicklas |
275 |
public void readWorkbook(InputStream xlsx) |
7654 |
15 Mar 19 |
nicklas |
276 |
throws IOException |
7654 |
15 Mar 19 |
nicklas |
277 |
{ |
7654 |
15 Mar 19 |
nicklas |
278 |
this.workbook = new XSSFWorkbook(xlsx); |
7654 |
15 Mar 19 |
nicklas |
279 |
this.sheetNames = new ArrayList<>(workbook.getNumberOfSheets()); |
7654 |
15 Mar 19 |
nicklas |
280 |
Iterator<Sheet> it = workbook.sheetIterator(); |
7654 |
15 Mar 19 |
nicklas |
281 |
while (it.hasNext()) sheetNames.add(it.next().getSheetName()); |
7654 |
15 Mar 19 |
nicklas |
282 |
} |
7654 |
15 Mar 19 |
nicklas |
283 |
|
7654 |
15 Mar 19 |
nicklas |
284 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the current workbook. |
7654 |
15 Mar 19 |
nicklas |
286 |
*/ |
7654 |
15 Mar 19 |
nicklas |
287 |
public Workbook getWorkbook() |
7654 |
15 Mar 19 |
nicklas |
288 |
{ |
7654 |
15 Mar 19 |
nicklas |
289 |
return workbook; |
7654 |
15 Mar 19 |
nicklas |
290 |
} |
7654 |
15 Mar 19 |
nicklas |
291 |
|
7654 |
15 Mar 19 |
nicklas |
292 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the number of sheets in the workbook. |
7654 |
15 Mar 19 |
nicklas |
294 |
*/ |
7654 |
15 Mar 19 |
nicklas |
295 |
public int getNumSheets() |
7654 |
15 Mar 19 |
nicklas |
296 |
{ |
7654 |
15 Mar 19 |
nicklas |
297 |
return workbook.getNumberOfSheets(); |
7654 |
15 Mar 19 |
nicklas |
298 |
} |
7654 |
15 Mar 19 |
nicklas |
299 |
|
7654 |
15 Mar 19 |
nicklas |
300 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the names of the sheets in the workbook. |
7654 |
15 Mar 19 |
nicklas |
302 |
*/ |
7654 |
15 Mar 19 |
nicklas |
303 |
public List<String> getSheetNames() |
7654 |
15 Mar 19 |
nicklas |
304 |
{ |
7654 |
15 Mar 19 |
nicklas |
305 |
return sheetNames; |
7654 |
15 Mar 19 |
nicklas |
306 |
} |
7654 |
15 Mar 19 |
nicklas |
307 |
|
7654 |
15 Mar 19 |
nicklas |
308 |
/** |
7654 |
15 Mar 19 |
nicklas |
Convert the given named sheet to a CSV InputStream. |
7654 |
15 Mar 19 |
nicklas |
310 |
*/ |
7654 |
15 Mar 19 |
nicklas |
311 |
public SheetInfo getSheetAsCsv(String name) |
7654 |
15 Mar 19 |
nicklas |
312 |
{ |
7673 |
26 Mar 19 |
nicklas |
313 |
SheetInfo si = sheets.get(name); |
7673 |
26 Mar 19 |
nicklas |
314 |
if (si == null) |
7673 |
26 Mar 19 |
nicklas |
315 |
{ |
7673 |
26 Mar 19 |
nicklas |
316 |
Sheet sheet = workbook.getSheet(name); |
7673 |
26 Mar 19 |
nicklas |
317 |
if (sheet == null) throw new IllegalArgumentException("No sheet with name: " + name); |
7673 |
26 Mar 19 |
nicklas |
318 |
si = new SheetInfo(sheet); |
7673 |
26 Mar 19 |
nicklas |
319 |
sheets.put(name, si); |
7673 |
26 Mar 19 |
nicklas |
320 |
} |
7673 |
26 Mar 19 |
nicklas |
321 |
return si; |
7654 |
15 Mar 19 |
nicklas |
322 |
} |
7654 |
15 Mar 19 |
nicklas |
323 |
|
7654 |
15 Mar 19 |
nicklas |
324 |
/** |
7654 |
15 Mar 19 |
nicklas |
Convert the given sheet to an CSV InputStream. The first sheet has index = 0. |
7654 |
15 Mar 19 |
nicklas |
326 |
*/ |
7654 |
15 Mar 19 |
nicklas |
327 |
public SheetInfo getSheetAsCsv(int index) |
7654 |
15 Mar 19 |
nicklas |
328 |
{ |
7654 |
15 Mar 19 |
nicklas |
329 |
Sheet sheet = workbook.getSheetAt(index); |
7673 |
26 Mar 19 |
nicklas |
330 |
SheetInfo si = sheets.get(sheet.getSheetName()); |
7673 |
26 Mar 19 |
nicklas |
331 |
if (si == null) |
7673 |
26 Mar 19 |
nicklas |
332 |
{ |
7673 |
26 Mar 19 |
nicklas |
333 |
si = new SheetInfo(sheet); |
7673 |
26 Mar 19 |
nicklas |
334 |
sheets.put(si.getName(), si); |
7673 |
26 Mar 19 |
nicklas |
335 |
} |
7673 |
26 Mar 19 |
nicklas |
336 |
return si; |
7654 |
15 Mar 19 |
nicklas |
337 |
} |
7654 |
15 Mar 19 |
nicklas |
338 |
|
7654 |
15 Mar 19 |
nicklas |
339 |
/** |
7673 |
26 Mar 19 |
nicklas |
Convert all sheets in the workbook to a single CSV-like stream. Each sheet |
7703 |
11 Apr 19 |
nicklas |
is converted as if {@link SheetInfo#parseToCsv()} has been called. To |
7703 |
11 Apr 19 |
nicklas |
separate the sheets each section is started with [Name-of-sheet] and |
7703 |
11 Apr 19 |
nicklas |
then followed by the sheet data. |
7673 |
26 Mar 19 |
nicklas |
344 |
*/ |
7673 |
26 Mar 19 |
nicklas |
345 |
public InputStream getWorkbookAsCsv() |
7673 |
26 Mar 19 |
nicklas |
346 |
{ |
7673 |
26 Mar 19 |
nicklas |
347 |
return new WorkbookStream(); |
7673 |
26 Mar 19 |
nicklas |
348 |
} |
7673 |
26 Mar 19 |
nicklas |
349 |
|
7673 |
26 Mar 19 |
nicklas |
350 |
|
7673 |
26 Mar 19 |
nicklas |
351 |
/** |
7654 |
15 Mar 19 |
nicklas |
Closes the current workbook. It is safe to do this before |
7654 |
15 Mar 19 |
nicklas |
reading the CSV streams to the end. |
7654 |
15 Mar 19 |
nicklas |
354 |
*/ |
7654 |
15 Mar 19 |
nicklas |
355 |
public void close() |
7654 |
15 Mar 19 |
nicklas |
356 |
{ |
7654 |
15 Mar 19 |
nicklas |
357 |
FileUtil.close(workbook); |
7654 |
15 Mar 19 |
nicklas |
358 |
} |
7654 |
15 Mar 19 |
nicklas |
359 |
|
7654 |
15 Mar 19 |
nicklas |
360 |
/** |
7654 |
15 Mar 19 |
nicklas |
Represents a single sheet in a workbook. Use it to get access to a CSV Input stream |
7654 |
15 Mar 19 |
nicklas |
that is generated from the table data. It can also be used to get access to the |
7654 |
15 Mar 19 |
nicklas |
actual underlying Cell element for reading data directly. |
7654 |
15 Mar 19 |
nicklas |
364 |
*/ |
7654 |
15 Mar 19 |
nicklas |
365 |
public class SheetInfo |
7654 |
15 Mar 19 |
nicklas |
366 |
{ |
7654 |
15 Mar 19 |
nicklas |
367 |
private final Sheet sheet; |
7664 |
20 Mar 19 |
nicklas |
368 |
private final DataFormatter dataFormat; |
7664 |
20 Mar 19 |
nicklas |
369 |
private final FormulaEvaluator formulaEval; |
7654 |
15 Mar 19 |
nicklas |
370 |
|
7654 |
15 Mar 19 |
nicklas |
371 |
private final int firstRow; |
7654 |
15 Mar 19 |
nicklas |
372 |
private final int lastRow; |
7654 |
15 Mar 19 |
nicklas |
373 |
|
7654 |
15 Mar 19 |
nicklas |
374 |
private final int firstColumn; |
7654 |
15 Mar 19 |
nicklas |
375 |
private final int lastColumn; |
7654 |
15 Mar 19 |
nicklas |
376 |
|
7654 |
15 Mar 19 |
nicklas |
377 |
private byte[] csvBytes; |
7654 |
15 Mar 19 |
nicklas |
378 |
|
7654 |
15 Mar 19 |
nicklas |
379 |
SheetInfo(Sheet sheet) |
7654 |
15 Mar 19 |
nicklas |
380 |
{ |
7654 |
15 Mar 19 |
nicklas |
381 |
this.sheet = sheet; |
7664 |
20 Mar 19 |
nicklas |
382 |
this.dataFormat = new DataFormatter(); |
7995 |
09 Aug 21 |
nicklas |
383 |
if (evaluateFormulas) |
7995 |
09 Aug 21 |
nicklas |
384 |
{ |
7995 |
09 Aug 21 |
nicklas |
385 |
this.formulaEval = workbook.getCreationHelper().createFormulaEvaluator(); |
7995 |
09 Aug 21 |
nicklas |
386 |
formulaEval.setIgnoreMissingWorkbooks(true); |
7995 |
09 Aug 21 |
nicklas |
387 |
} |
7995 |
09 Aug 21 |
nicklas |
388 |
else |
7995 |
09 Aug 21 |
nicklas |
389 |
{ |
7995 |
09 Aug 21 |
nicklas |
390 |
this.formulaEval = null; |
7995 |
09 Aug 21 |
nicklas |
391 |
} |
7654 |
15 Mar 19 |
nicklas |
392 |
|
7654 |
15 Mar 19 |
nicklas |
393 |
this.firstRow = sheet.getFirstRowNum(); |
7654 |
15 Mar 19 |
nicklas |
394 |
int maxRow = sheet.getLastRowNum(); // Note! index of last row that contains data! |
7654 |
15 Mar 19 |
nicklas |
395 |
int lastRowWithData = firstRow; |
7654 |
15 Mar 19 |
nicklas |
396 |
|
7654 |
15 Mar 19 |
nicklas |
// To make sure that columns align between rows, we must find the span of column indexes |
7654 |
15 Mar 19 |
nicklas |
398 |
int minCol = Integer.MAX_VALUE; |
7654 |
15 Mar 19 |
nicklas |
399 |
int maxCol = -1; |
7654 |
15 Mar 19 |
nicklas |
400 |
for (int rowNo = firstRow; rowNo <= maxRow; rowNo++) |
7654 |
15 Mar 19 |
nicklas |
401 |
{ |
7654 |
15 Mar 19 |
nicklas |
402 |
Row row = sheet.getRow(rowNo); |
7654 |
15 Mar 19 |
nicklas |
403 |
if (row == null || row.getPhysicalNumberOfCells() == 0) continue; // Empty row -- skip it |
7654 |
15 Mar 19 |
nicklas |
404 |
minCol = Math.min(minCol, row.getFirstCellNum()); |
7671 |
21 Mar 19 |
nicklas |
405 |
int maxRowCol = row.getLastCellNum(); // Note! getLastCellNum() returns index of last cell+1 that contains data! |
7654 |
15 Mar 19 |
nicklas |
406 |
|
7654 |
15 Mar 19 |
nicklas |
// Sometimes there are a lot of empty or otherwise unused cells to the right on each row... |
7654 |
15 Mar 19 |
nicklas |
// We try to skip null, BLANK and ERROR cells and FORMULA if we don't evaluate formulas |
7654 |
15 Mar 19 |
nicklas |
409 |
CellType cellType = null; |
7654 |
15 Mar 19 |
nicklas |
410 |
do |
7654 |
15 Mar 19 |
nicklas |
411 |
{ |
7671 |
21 Mar 19 |
nicklas |
412 |
maxRowCol--; |
7671 |
21 Mar 19 |
nicklas |
413 |
if (maxRowCol < 0) break; |
7671 |
21 Mar 19 |
nicklas |
414 |
Cell c = row.getCell(maxRowCol); |
7654 |
15 Mar 19 |
nicklas |
415 |
cellType = c == null ? CellType.BLANK : c.getCellType(); |
7654 |
15 Mar 19 |
nicklas |
416 |
if (cellType == CellType.FORMULA && evaluateFormulas) |
7654 |
15 Mar 19 |
nicklas |
417 |
{ |
7654 |
15 Mar 19 |
nicklas |
418 |
cellType = c.getCachedFormulaResultType(); |
7654 |
15 Mar 19 |
nicklas |
419 |
} |
7654 |
15 Mar 19 |
nicklas |
420 |
} while (cellType == CellType.BLANK || cellType == CellType.ERROR || cellType == CellType.FORMULA); |
7654 |
15 Mar 19 |
nicklas |
421 |
|
7654 |
15 Mar 19 |
nicklas |
// Ok, found data on this row |
7671 |
21 Mar 19 |
nicklas |
423 |
if (maxRowCol >= 0) lastRowWithData = rowNo; |
7671 |
21 Mar 19 |
nicklas |
424 |
maxCol = Math.max(maxCol, maxRowCol); |
7654 |
15 Mar 19 |
nicklas |
425 |
} |
7654 |
15 Mar 19 |
nicklas |
426 |
this.lastRow = lastRowWithData; |
7654 |
15 Mar 19 |
nicklas |
427 |
this.firstColumn = minCol; |
7654 |
15 Mar 19 |
nicklas |
428 |
this.lastColumn = maxCol; |
7654 |
15 Mar 19 |
nicklas |
429 |
} |
7654 |
15 Mar 19 |
nicklas |
430 |
|
7654 |
15 Mar 19 |
nicklas |
431 |
/** |
7656 |
19 Mar 19 |
nicklas |
Get the name of the sheet. |
7656 |
19 Mar 19 |
nicklas |
433 |
*/ |
7656 |
19 Mar 19 |
nicklas |
434 |
public String getName() |
7656 |
19 Mar 19 |
nicklas |
435 |
{ |
7656 |
19 Mar 19 |
nicklas |
436 |
return sheet.getSheetName(); |
7656 |
19 Mar 19 |
nicklas |
437 |
} |
7656 |
19 Mar 19 |
nicklas |
438 |
|
7656 |
19 Mar 19 |
nicklas |
439 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the row number for the first row that contains data. |
7654 |
15 Mar 19 |
nicklas |
441 |
*/ |
7654 |
15 Mar 19 |
nicklas |
442 |
public int getFirstRow() |
7654 |
15 Mar 19 |
nicklas |
443 |
{ |
7654 |
15 Mar 19 |
nicklas |
444 |
return firstRow; |
7654 |
15 Mar 19 |
nicklas |
445 |
} |
7654 |
15 Mar 19 |
nicklas |
446 |
|
7654 |
15 Mar 19 |
nicklas |
447 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the row number for the last row that contains data. |
7654 |
15 Mar 19 |
nicklas |
449 |
*/ |
7654 |
15 Mar 19 |
nicklas |
450 |
public int getLastRow() |
7654 |
15 Mar 19 |
nicklas |
451 |
{ |
7654 |
15 Mar 19 |
nicklas |
452 |
return lastRow; |
7654 |
15 Mar 19 |
nicklas |
453 |
} |
7654 |
15 Mar 19 |
nicklas |
454 |
|
7654 |
15 Mar 19 |
nicklas |
455 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the column number for the first column that contains data. |
7654 |
15 Mar 19 |
nicklas |
457 |
*/ |
7654 |
15 Mar 19 |
nicklas |
458 |
public int getFirstColumn() |
7654 |
15 Mar 19 |
nicklas |
459 |
{ |
7654 |
15 Mar 19 |
nicklas |
460 |
return firstColumn; |
7654 |
15 Mar 19 |
nicklas |
461 |
} |
7654 |
15 Mar 19 |
nicklas |
462 |
|
7654 |
15 Mar 19 |
nicklas |
463 |
/** |
7654 |
15 Mar 19 |
nicklas |
Get the column number for the last column that contains data. |
7654 |
15 Mar 19 |
nicklas |
465 |
*/ |
7654 |
15 Mar 19 |
nicklas |
466 |
public int getLastColumn() |
7654 |
15 Mar 19 |
nicklas |
467 |
{ |
7654 |
15 Mar 19 |
nicklas |
468 |
return lastColumn; |
7654 |
15 Mar 19 |
nicklas |
469 |
} |
7654 |
15 Mar 19 |
nicklas |
470 |
|
7654 |
15 Mar 19 |
nicklas |
471 |
/** |
7664 |
20 Mar 19 |
nicklas |
Get the cell at the specified row and column. |
7664 |
20 Mar 19 |
nicklas |
@return A Cell instance or null |
7664 |
20 Mar 19 |
nicklas |
474 |
*/ |
7664 |
20 Mar 19 |
nicklas |
475 |
public Cell getCell(int row, int col) |
7664 |
20 Mar 19 |
nicklas |
476 |
{ |
7664 |
20 Mar 19 |
nicklas |
477 |
Row r = sheet.getRow(row); |
7664 |
20 Mar 19 |
nicklas |
478 |
return r == null ? null : r.getCell(col); |
7664 |
20 Mar 19 |
nicklas |
479 |
} |
7664 |
20 Mar 19 |
nicklas |
480 |
|
7664 |
20 Mar 19 |
nicklas |
481 |
/** |
7664 |
20 Mar 19 |
nicklas |
Get the the cell value as a string. Numeric cells are |
7664 |
20 Mar 19 |
nicklas |
are checked if the seem to have a date-like format syntax |
7664 |
20 Mar 19 |
nicklas |
(with or without time). If so, the value is formatted with |
7664 |
20 Mar 19 |
nicklas |
the date or timestamp formatter, otherwise with the number |
7664 |
20 Mar 19 |
nicklas |
formatter. If the required formatter has not been set, the |
7664 |
20 Mar 19 |
nicklas |
value is formatted with the built-in formatter from Excel. |
7664 |
20 Mar 19 |
nicklas |
488 |
*/ |
7664 |
20 Mar 19 |
nicklas |
489 |
public String getCellValueAsString(Cell cell) |
7664 |
20 Mar 19 |
nicklas |
490 |
{ |
7664 |
20 Mar 19 |
nicklas |
491 |
if (cell == null) return null; |
7664 |
20 Mar 19 |
nicklas |
492 |
|
7664 |
20 Mar 19 |
nicklas |
493 |
String value = null; |
7664 |
20 Mar 19 |
nicklas |
494 |
CellType cellType = cell.getCellType(); |
7738 |
14 Aug 19 |
nicklas |
495 |
CellTypeWithInfo info = null; |
7664 |
20 Mar 19 |
nicklas |
496 |
if (cellType == CellType.FORMULA && evaluateFormulas) |
7664 |
20 Mar 19 |
nicklas |
497 |
{ |
7738 |
14 Aug 19 |
nicklas |
498 |
info = evaluateFormula(cell); |
7738 |
14 Aug 19 |
nicklas |
499 |
cellType = info.cellType; |
7664 |
20 Mar 19 |
nicklas |
500 |
} |
7664 |
20 Mar 19 |
nicklas |
501 |
if (cellType == CellType.NUMERIC) |
7664 |
20 Mar 19 |
nicklas |
502 |
{ |
7664 |
20 Mar 19 |
nicklas |
503 |
boolean isDate = DateUtil.isCellDateFormatted(cell); |
7664 |
20 Mar 19 |
nicklas |
504 |
if (isDate) |
7664 |
20 Mar 19 |
nicklas |
505 |
{ |
7664 |
20 Mar 19 |
nicklas |
506 |
String formatString = cell.getCellStyle().getDataFormatString(); |
7664 |
20 Mar 19 |
nicklas |
507 |
boolean hasTimePart = hasTimePart(formatString); |
7664 |
20 Mar 19 |
nicklas |
508 |
boolean hasDatePart = hasDatePart(formatString); |
7664 |
20 Mar 19 |
nicklas |
509 |
Date d = cell.getDateCellValue(); |
7664 |
20 Mar 19 |
nicklas |
510 |
if (hasDatePart && hasTimePart) |
7664 |
20 Mar 19 |
nicklas |
511 |
{ |
7664 |
20 Mar 19 |
nicklas |
512 |
if (timestampFormat != null) value = timestampFormat.format(d); |
7664 |
20 Mar 19 |
nicklas |
513 |
} |
7664 |
20 Mar 19 |
nicklas |
514 |
else if (hasDatePart) |
7664 |
20 Mar 19 |
nicklas |
515 |
{ |
7664 |
20 Mar 19 |
nicklas |
516 |
if (dateFormat != null) value = dateFormat.format(d); |
7664 |
20 Mar 19 |
nicklas |
517 |
} |
7664 |
20 Mar 19 |
nicklas |
518 |
else |
7664 |
20 Mar 19 |
nicklas |
519 |
{ |
7664 |
20 Mar 19 |
nicklas |
// Only time part -- let it be handled by the DataFormat |
7664 |
20 Mar 19 |
nicklas |
521 |
} |
7664 |
20 Mar 19 |
nicklas |
522 |
} |
7664 |
20 Mar 19 |
nicklas |
523 |
else |
7664 |
20 Mar 19 |
nicklas |
524 |
{ |
7664 |
20 Mar 19 |
nicklas |
525 |
if (numberFormat != null) |
7664 |
20 Mar 19 |
nicklas |
526 |
{ |
7664 |
20 Mar 19 |
nicklas |
527 |
value = numberFormat.format(cell.getNumericCellValue()); |
7664 |
20 Mar 19 |
nicklas |
528 |
} |
7664 |
20 Mar 19 |
nicklas |
529 |
} |
7664 |
20 Mar 19 |
nicklas |
530 |
} |
7975 |
07 Jun 21 |
nicklas |
531 |
else if (cellType == CellType.ERROR) |
7975 |
07 Jun 21 |
nicklas |
532 |
{ |
8026 |
15 Dec 21 |
nicklas |
533 |
value = FormulaError.forInt(cell.getErrorCellValue()).getString()+"#"; |
7975 |
07 Jun 21 |
nicklas |
534 |
} |
7664 |
20 Mar 19 |
nicklas |
535 |
if (value == null) |
7664 |
20 Mar 19 |
nicklas |
536 |
{ |
7664 |
20 Mar 19 |
nicklas |
537 |
value = dataFormat.formatCellValue(cell, formulaEval); |
7664 |
20 Mar 19 |
nicklas |
538 |
} |
7738 |
14 Aug 19 |
nicklas |
539 |
if (info != null && info.info != null) value += info.info; |
7664 |
20 Mar 19 |
nicklas |
540 |
return value; |
7664 |
20 Mar 19 |
nicklas |
541 |
} |
7664 |
20 Mar 19 |
nicklas |
542 |
|
7664 |
20 Mar 19 |
nicklas |
543 |
/** |
7664 |
20 Mar 19 |
nicklas |
Get the cell value as a number. If the cell is a numeric cell, the value is returned |
7664 |
20 Mar 19 |
nicklas |
directly, otherwise the given parser is used to parse the string data from cell, |
7664 |
20 Mar 19 |
nicklas |
via {@link Type#parseString(String, NumberFormat, boolean)} |
7664 |
20 Mar 19 |
nicklas |
547 |
*/ |
7664 |
20 Mar 19 |
nicklas |
548 |
public Double getCellValueAsNumber(Cell cell, NumberFormat parser, boolean nullIfException) |
7664 |
20 Mar 19 |
nicklas |
549 |
{ |
7664 |
20 Mar 19 |
nicklas |
550 |
if (cell == null) return null; |
7664 |
20 Mar 19 |
nicklas |
551 |
|
7664 |
20 Mar 19 |
nicklas |
552 |
Double value = null; |
7664 |
20 Mar 19 |
nicklas |
553 |
CellType cellType = cell.getCellType(); |
7664 |
20 Mar 19 |
nicklas |
554 |
if (cellType == CellType.FORMULA && evaluateFormulas) |
7664 |
20 Mar 19 |
nicklas |
555 |
{ |
7738 |
14 Aug 19 |
nicklas |
556 |
cellType = evaluateFormula(cell).cellType; |
7664 |
20 Mar 19 |
nicklas |
557 |
} |
7664 |
20 Mar 19 |
nicklas |
558 |
if (cellType == CellType.NUMERIC) |
7664 |
20 Mar 19 |
nicklas |
559 |
{ |
7664 |
20 Mar 19 |
nicklas |
560 |
value = cell.getNumericCellValue(); |
7664 |
20 Mar 19 |
nicklas |
561 |
} |
7975 |
07 Jun 21 |
nicklas |
562 |
else if (cellType != CellType.ERROR) |
7664 |
20 Mar 19 |
nicklas |
563 |
{ |
7664 |
20 Mar 19 |
nicklas |
564 |
String s = dataFormat.formatCellValue(cell, formulaEval); |
7664 |
20 Mar 19 |
nicklas |
565 |
value = (Double)Type.DOUBLE.parseString(s, parser, nullIfException); |
7664 |
20 Mar 19 |
nicklas |
566 |
} |
7664 |
20 Mar 19 |
nicklas |
567 |
return value; |
7664 |
20 Mar 19 |
nicklas |
568 |
} |
7664 |
20 Mar 19 |
nicklas |
569 |
|
7664 |
20 Mar 19 |
nicklas |
570 |
/** |
7664 |
20 Mar 19 |
nicklas |
Get the cell value as a date. If the cell is a numeric cell, the value is returned |
7664 |
20 Mar 19 |
nicklas |
directly as if it was a date (the style format string is NOT checked), |
7664 |
20 Mar 19 |
nicklas |
otherwise the given parser is used to parse the string data from cell, |
7664 |
20 Mar 19 |
nicklas |
via {@link Type#parseString(String, Formatter, boolean)} |
7664 |
20 Mar 19 |
nicklas |
575 |
*/ |
7664 |
20 Mar 19 |
nicklas |
576 |
public Date getCellValueAsDate(Cell cell, Formatter<Date> parser, boolean nullIfException) |
7664 |
20 Mar 19 |
nicklas |
577 |
{ |
7664 |
20 Mar 19 |
nicklas |
578 |
if (cell == null) return null; |
7664 |
20 Mar 19 |
nicklas |
579 |
|
7664 |
20 Mar 19 |
nicklas |
580 |
Date value = null; |
7664 |
20 Mar 19 |
nicklas |
581 |
CellType cellType = cell.getCellType(); |
7664 |
20 Mar 19 |
nicklas |
582 |
if (cellType == CellType.FORMULA && evaluateFormulas) |
7664 |
20 Mar 19 |
nicklas |
583 |
{ |
7738 |
14 Aug 19 |
nicklas |
584 |
cellType = evaluateFormula(cell).cellType; |
7664 |
20 Mar 19 |
nicklas |
585 |
} |
7664 |
20 Mar 19 |
nicklas |
586 |
if (cellType == CellType.NUMERIC) |
7664 |
20 Mar 19 |
nicklas |
587 |
{ |
7664 |
20 Mar 19 |
nicklas |
588 |
value = cell.getDateCellValue(); |
7664 |
20 Mar 19 |
nicklas |
589 |
} |
7975 |
07 Jun 21 |
nicklas |
590 |
else if (cellType != CellType.ERROR) |
7664 |
20 Mar 19 |
nicklas |
591 |
{ |
7664 |
20 Mar 19 |
nicklas |
592 |
String s = dataFormat.formatCellValue(cell, formulaEval); |
7664 |
20 Mar 19 |
nicklas |
593 |
value = (Date)Type.DATE.parseString(s, parser, nullIfException); |
7664 |
20 Mar 19 |
nicklas |
594 |
} |
7664 |
20 Mar 19 |
nicklas |
595 |
return value; |
7664 |
20 Mar 19 |
nicklas |
596 |
} |
7664 |
20 Mar 19 |
nicklas |
597 |
|
7738 |
14 Aug 19 |
nicklas |
598 |
private CellTypeWithInfo evaluateFormula(Cell cell) |
7738 |
14 Aug 19 |
nicklas |
599 |
{ |
7738 |
14 Aug 19 |
nicklas |
600 |
CellTypeWithInfo ct = new CellTypeWithInfo(); |
7738 |
14 Aug 19 |
nicklas |
601 |
try |
7738 |
14 Aug 19 |
nicklas |
602 |
{ |
7738 |
14 Aug 19 |
nicklas |
603 |
ct.cellType = formulaEval.evaluateFormulaCell(cell); |
8026 |
15 Dec 21 |
nicklas |
604 |
if (ct.cellType == CellType.ERROR) ct.info = cell.getCellFormula(); |
7738 |
14 Aug 19 |
nicklas |
605 |
} |
7738 |
14 Aug 19 |
nicklas |
606 |
catch (NotImplementedException ex) |
7738 |
14 Aug 19 |
nicklas |
607 |
{ |
7738 |
14 Aug 19 |
nicklas |
608 |
cell.setCellErrorValue(FormulaError.NAME.getCode()); |
7738 |
14 Aug 19 |
nicklas |
609 |
ct.cellType = CellType.ERROR; |
8026 |
15 Dec 21 |
nicklas |
610 |
ct.info = ex.getMessage(); |
7738 |
14 Aug 19 |
nicklas |
611 |
if (ex.getCause() instanceof NotImplementedFunctionException) |
7738 |
14 Aug 19 |
nicklas |
612 |
{ |
7738 |
14 Aug 19 |
nicklas |
613 |
NotImplementedFunctionException nof = (NotImplementedFunctionException)ex.getCause(); |
7738 |
14 Aug 19 |
nicklas |
614 |
ct.info = nof.getFunctionName(); |
7738 |
14 Aug 19 |
nicklas |
615 |
} |
7738 |
14 Aug 19 |
nicklas |
616 |
} |
7738 |
14 Aug 19 |
nicklas |
617 |
catch (RuntimeException ex) |
7738 |
14 Aug 19 |
nicklas |
618 |
{ |
7738 |
14 Aug 19 |
nicklas |
619 |
cell.setCellErrorValue(FormulaError.VALUE.getCode()); |
7738 |
14 Aug 19 |
nicklas |
620 |
ct.cellType = CellType.ERROR; |
7975 |
07 Jun 21 |
nicklas |
621 |
ct.info = ex.getMessage(); |
7738 |
14 Aug 19 |
nicklas |
622 |
} |
7738 |
14 Aug 19 |
nicklas |
623 |
return ct; |
7738 |
14 Aug 19 |
nicklas |
624 |
} |
7738 |
14 Aug 19 |
nicklas |
625 |
|
7664 |
20 Mar 19 |
nicklas |
626 |
/** |
7654 |
15 Mar 19 |
nicklas |
Convert the sheet to a CSV and get an InputStream for reading the data. |
7654 |
15 Mar 19 |
nicklas |
Note that the CSV will only contain data for the range of cells between |
7654 |
15 Mar 19 |
nicklas |
firstRow, lastRow, firstColumn and lastColumn. |
7654 |
15 Mar 19 |
nicklas |
630 |
*/ |
7654 |
15 Mar 19 |
nicklas |
631 |
public InputStream parseToCsv() |
7654 |
15 Mar 19 |
nicklas |
632 |
{ |
7654 |
15 Mar 19 |
nicklas |
633 |
if (csvBytes == null) parseToBytes(); |
7654 |
15 Mar 19 |
nicklas |
634 |
return new ByteArrayInputStream(csvBytes); |
7654 |
15 Mar 19 |
nicklas |
635 |
} |
7654 |
15 Mar 19 |
nicklas |
636 |
|
7889 |
04 Dec 20 |
nicklas |
637 |
/** |
7889 |
04 Dec 20 |
nicklas |
Get the size of the CSV in bytes. |
7889 |
04 Dec 20 |
nicklas |
@since 3.17.1 |
7889 |
04 Dec 20 |
nicklas |
640 |
*/ |
7889 |
04 Dec 20 |
nicklas |
641 |
public int getSize() |
7889 |
04 Dec 20 |
nicklas |
642 |
{ |
7889 |
04 Dec 20 |
nicklas |
643 |
if (csvBytes == null) parseToBytes(); |
7889 |
04 Dec 20 |
nicklas |
644 |
return csvBytes.length; |
7889 |
04 Dec 20 |
nicklas |
645 |
} |
7889 |
04 Dec 20 |
nicklas |
646 |
|
7654 |
15 Mar 19 |
nicklas |
647 |
private void parseToBytes() |
7654 |
15 Mar 19 |
nicklas |
648 |
{ |
7654 |
15 Mar 19 |
nicklas |
649 |
StringBuilder out = new StringBuilder(); |
7654 |
15 Mar 19 |
nicklas |
650 |
for (int rowNo = firstRow; rowNo <= lastRow; rowNo++) |
7654 |
15 Mar 19 |
nicklas |
651 |
{ |
7654 |
15 Mar 19 |
nicklas |
652 |
Row row = sheet.getRow(rowNo); |
7664 |
20 Mar 19 |
nicklas |
653 |
if (row == null || row.getPhysicalNumberOfCells() == 0) |
7654 |
15 Mar 19 |
nicklas |
654 |
{ |
7664 |
20 Mar 19 |
nicklas |
655 |
if (writeTrailingColumns) |
7654 |
15 Mar 19 |
nicklas |
656 |
{ |
7664 |
20 Mar 19 |
nicklas |
657 |
for (int colNo = firstColumn; colNo < lastColumn; colNo++) |
7654 |
15 Mar 19 |
nicklas |
658 |
{ |
7664 |
20 Mar 19 |
nicklas |
659 |
out.append(columnSeparator); |
7654 |
15 Mar 19 |
nicklas |
660 |
} |
7654 |
15 Mar 19 |
nicklas |
661 |
} |
7654 |
15 Mar 19 |
nicklas |
662 |
} |
7664 |
20 Mar 19 |
nicklas |
663 |
else |
7664 |
20 Mar 19 |
nicklas |
664 |
{ |
7664 |
20 Mar 19 |
nicklas |
665 |
int lastRowCol = writeTrailingColumns ? lastColumn : row.getLastCellNum()-1; // Note! getLastCellNum() returns index of last cell+1 that contains data! |
7664 |
20 Mar 19 |
nicklas |
666 |
for (int colNo = firstColumn; colNo <= lastRowCol; colNo++) |
7664 |
20 Mar 19 |
nicklas |
667 |
{ |
7664 |
20 Mar 19 |
nicklas |
668 |
if (colNo > firstColumn) out.append(columnSeparator); |
7664 |
20 Mar 19 |
nicklas |
669 |
Cell cell = row.getCell(colNo); |
7664 |
20 Mar 19 |
nicklas |
670 |
String value = getCellValueAsString(cell); |
7664 |
20 Mar 19 |
nicklas |
671 |
out.append(value == null ? "" : encoder.encode(value)); |
7664 |
20 Mar 19 |
nicklas |
672 |
} |
7664 |
20 Mar 19 |
nicklas |
673 |
} |
7654 |
15 Mar 19 |
nicklas |
674 |
out.append(rowSeparator); |
7671 |
21 Mar 19 |
nicklas |
675 |
} |
7654 |
15 Mar 19 |
nicklas |
676 |
csvBytes = out.toString().getBytes(charset); |
7654 |
15 Mar 19 |
nicklas |
677 |
} |
7654 |
15 Mar 19 |
nicklas |
678 |
} |
7654 |
15 Mar 19 |
nicklas |
679 |
|
7655 |
19 Mar 19 |
nicklas |
680 |
private boolean hasTimePart(String formatString) |
7655 |
19 Mar 19 |
nicklas |
681 |
{ |
7655 |
19 Mar 19 |
nicklas |
682 |
if (formatString == null) return false; |
7655 |
19 Mar 19 |
nicklas |
683 |
return formatString.contains("S") || formatString.contains("H"); |
7655 |
19 Mar 19 |
nicklas |
684 |
} |
7654 |
15 Mar 19 |
nicklas |
685 |
|
7655 |
19 Mar 19 |
nicklas |
686 |
private boolean hasDatePart(String formatString) |
7655 |
19 Mar 19 |
nicklas |
687 |
{ |
7655 |
19 Mar 19 |
nicklas |
688 |
if (formatString == null) return false; |
7655 |
19 Mar 19 |
nicklas |
689 |
return formatString.contains("Y") || formatString.contains("D"); |
7655 |
19 Mar 19 |
nicklas |
690 |
} |
7655 |
19 Mar 19 |
nicklas |
691 |
|
7673 |
26 Mar 19 |
nicklas |
692 |
/** |
7673 |
26 Mar 19 |
nicklas |
Input stream implementation that combines streams from all sheets in the current workbook. |
7673 |
26 Mar 19 |
nicklas |
694 |
*/ |
7673 |
26 Mar 19 |
nicklas |
695 |
class WorkbookStream |
7673 |
26 Mar 19 |
nicklas |
696 |
extends InputStream |
7673 |
26 Mar 19 |
nicklas |
697 |
{ |
7673 |
26 Mar 19 |
nicklas |
698 |
private SheetInfo currentSheet; |
7673 |
26 Mar 19 |
nicklas |
699 |
private InputStream currentStream; |
7673 |
26 Mar 19 |
nicklas |
700 |
private int nextIndex; |
7673 |
26 Mar 19 |
nicklas |
701 |
|
7673 |
26 Mar 19 |
nicklas |
702 |
WorkbookStream() |
7673 |
26 Mar 19 |
nicklas |
703 |
{ |
7673 |
26 Mar 19 |
nicklas |
704 |
this.nextIndex = 0; |
7673 |
26 Mar 19 |
nicklas |
705 |
nextSheet(); |
7673 |
26 Mar 19 |
nicklas |
706 |
} |
7673 |
26 Mar 19 |
nicklas |
707 |
|
7673 |
26 Mar 19 |
nicklas |
708 |
/** |
7673 |
26 Mar 19 |
nicklas |
Switch to the next worksheet. |
7673 |
26 Mar 19 |
nicklas |
710 |
*/ |
7673 |
26 Mar 19 |
nicklas |
711 |
private void nextSheet() |
7673 |
26 Mar 19 |
nicklas |
712 |
{ |
7673 |
26 Mar 19 |
nicklas |
713 |
FileUtil.close(currentStream); |
7673 |
26 Mar 19 |
nicklas |
714 |
if (nextIndex < getNumSheets()) |
7673 |
26 Mar 19 |
nicklas |
715 |
{ |
7673 |
26 Mar 19 |
nicklas |
716 |
currentSheet = getSheetAsCsv(nextIndex); |
7673 |
26 Mar 19 |
nicklas |
717 |
currentStream = getPrefixedStream(currentSheet); |
7673 |
26 Mar 19 |
nicklas |
718 |
nextIndex++; |
7673 |
26 Mar 19 |
nicklas |
719 |
} |
7673 |
26 Mar 19 |
nicklas |
720 |
else |
7673 |
26 Mar 19 |
nicklas |
721 |
{ |
7673 |
26 Mar 19 |
nicklas |
722 |
currentSheet = null; |
7673 |
26 Mar 19 |
nicklas |
723 |
currentStream = null; |
7673 |
26 Mar 19 |
nicklas |
724 |
} |
7673 |
26 Mar 19 |
nicklas |
725 |
} |
7673 |
26 Mar 19 |
nicklas |
726 |
|
7673 |
26 Mar 19 |
nicklas |
727 |
/** |
7673 |
26 Mar 19 |
nicklas |
Create a combined stream by adding a "prefix": [sheet-name] |
7673 |
26 Mar 19 |
nicklas |
and then the regular CSV stream for the given worksheet. |
7673 |
26 Mar 19 |
nicklas |
730 |
*/ |
7673 |
26 Mar 19 |
nicklas |
731 |
private InputStream getPrefixedStream(SheetInfo sheet) |
7673 |
26 Mar 19 |
nicklas |
732 |
{ |
7673 |
26 Mar 19 |
nicklas |
733 |
String prefix = "["+encoder.encode(sheet.getName())+"]"+rowSeparator; |
7673 |
26 Mar 19 |
nicklas |
734 |
return new InputStreamCombiner( |
7673 |
26 Mar 19 |
nicklas |
735 |
new ByteArrayInputStream(prefix.getBytes(charset)), |
7673 |
26 Mar 19 |
nicklas |
736 |
sheet.parseToCsv()); |
7673 |
26 Mar 19 |
nicklas |
737 |
} |
7673 |
26 Mar 19 |
nicklas |
738 |
|
7673 |
26 Mar 19 |
nicklas |
739 |
|
7673 |
26 Mar 19 |
nicklas |
740 |
@Override |
7673 |
26 Mar 19 |
nicklas |
741 |
public int available() |
7673 |
26 Mar 19 |
nicklas |
742 |
throws IOException |
7673 |
26 Mar 19 |
nicklas |
743 |
{ |
7673 |
26 Mar 19 |
nicklas |
744 |
return currentStream != null ? currentStream.available() : 0; |
7673 |
26 Mar 19 |
nicklas |
745 |
} |
7673 |
26 Mar 19 |
nicklas |
746 |
|
7673 |
26 Mar 19 |
nicklas |
747 |
@Override |
7673 |
26 Mar 19 |
nicklas |
748 |
public void close() |
7673 |
26 Mar 19 |
nicklas |
749 |
throws IOException |
7673 |
26 Mar 19 |
nicklas |
750 |
{ |
7673 |
26 Mar 19 |
nicklas |
751 |
if (currentStream != null) currentStream.close(); |
7673 |
26 Mar 19 |
nicklas |
752 |
currentStream = null; |
7673 |
26 Mar 19 |
nicklas |
753 |
currentSheet = null; |
7673 |
26 Mar 19 |
nicklas |
754 |
} |
7673 |
26 Mar 19 |
nicklas |
755 |
|
7673 |
26 Mar 19 |
nicklas |
756 |
@Override |
7673 |
26 Mar 19 |
nicklas |
757 |
public int read() |
7673 |
26 Mar 19 |
nicklas |
758 |
throws IOException |
7673 |
26 Mar 19 |
nicklas |
759 |
{ |
7673 |
26 Mar 19 |
nicklas |
760 |
int result = -1; |
7673 |
26 Mar 19 |
nicklas |
761 |
while (result == -1 && currentStream != null) |
7673 |
26 Mar 19 |
nicklas |
762 |
{ |
7673 |
26 Mar 19 |
nicklas |
763 |
result = currentStream.read(); |
7673 |
26 Mar 19 |
nicklas |
764 |
if (result == -1) nextSheet(); |
7673 |
26 Mar 19 |
nicklas |
765 |
} |
7673 |
26 Mar 19 |
nicklas |
766 |
return result; |
7673 |
26 Mar 19 |
nicklas |
767 |
} |
7673 |
26 Mar 19 |
nicklas |
768 |
|
7673 |
26 Mar 19 |
nicklas |
769 |
@Override |
7673 |
26 Mar 19 |
nicklas |
770 |
public int read(byte[] b, int off, int len) |
7673 |
26 Mar 19 |
nicklas |
771 |
throws IOException |
7673 |
26 Mar 19 |
nicklas |
772 |
{ |
7673 |
26 Mar 19 |
nicklas |
773 |
int result = -1; |
7673 |
26 Mar 19 |
nicklas |
774 |
while (result == -1 && currentStream != null) |
7673 |
26 Mar 19 |
nicklas |
775 |
{ |
7673 |
26 Mar 19 |
nicklas |
776 |
result = currentStream.read(b, off, len); |
7673 |
26 Mar 19 |
nicklas |
777 |
if (result == -1) nextSheet(); |
7673 |
26 Mar 19 |
nicklas |
778 |
} |
7673 |
26 Mar 19 |
nicklas |
779 |
return result; |
7673 |
26 Mar 19 |
nicklas |
780 |
} |
7673 |
26 Mar 19 |
nicklas |
781 |
|
7673 |
26 Mar 19 |
nicklas |
782 |
} |
7673 |
26 Mar 19 |
nicklas |
783 |
|
7738 |
14 Aug 19 |
nicklas |
// Used to return some error information if formula evaluation fails |
7738 |
14 Aug 19 |
nicklas |
785 |
static class CellTypeWithInfo |
7738 |
14 Aug 19 |
nicklas |
786 |
{ |
7738 |
14 Aug 19 |
nicklas |
787 |
CellType cellType; |
7738 |
14 Aug 19 |
nicklas |
788 |
String info; |
7738 |
14 Aug 19 |
nicklas |
789 |
} |
7738 |
14 Aug 19 |
nicklas |
790 |
|
7654 |
15 Mar 19 |
nicklas |
791 |
} |