5227 |
01 Feb 10 |
nicklas |
1 |
/** |
5227 |
01 Feb 10 |
nicklas |
$Id$ |
5227 |
01 Feb 10 |
nicklas |
3 |
|
5227 |
01 Feb 10 |
nicklas |
Copyright (C) 2010 Nicklas Nordborg |
5227 |
01 Feb 10 |
nicklas |
5 |
|
5227 |
01 Feb 10 |
nicklas |
This file is part of BASE - BioArray Software Environment. |
5227 |
01 Feb 10 |
nicklas |
Available at http://base.thep.lu.se/ |
5227 |
01 Feb 10 |
nicklas |
8 |
|
5227 |
01 Feb 10 |
nicklas |
BASE is free software; you can redistribute it and/or |
5227 |
01 Feb 10 |
nicklas |
modify it under the terms of the GNU General Public License |
5227 |
01 Feb 10 |
nicklas |
as published by the Free Software Foundation; either version 3 |
5227 |
01 Feb 10 |
nicklas |
of the License, or (at your option) any later version. |
5227 |
01 Feb 10 |
nicklas |
13 |
|
5227 |
01 Feb 10 |
nicklas |
BASE is distributed in the hope that it will be useful, |
5227 |
01 Feb 10 |
nicklas |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
5227 |
01 Feb 10 |
nicklas |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
5227 |
01 Feb 10 |
nicklas |
GNU General Public License for more details. |
5227 |
01 Feb 10 |
nicklas |
18 |
|
5227 |
01 Feb 10 |
nicklas |
You should have received a copy of the GNU General Public License |
5227 |
01 Feb 10 |
nicklas |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
5227 |
01 Feb 10 |
nicklas |
21 |
*/ |
5227 |
01 Feb 10 |
nicklas |
22 |
package net.sf.basedb.util.importer.spotdata; |
5227 |
01 Feb 10 |
nicklas |
23 |
|
5227 |
01 Feb 10 |
nicklas |
24 |
import java.io.IOException; |
5227 |
01 Feb 10 |
nicklas |
25 |
import java.util.ArrayList; |
5227 |
01 Feb 10 |
nicklas |
26 |
import java.util.Collections; |
5227 |
01 Feb 10 |
nicklas |
27 |
import java.util.List; |
5227 |
01 Feb 10 |
nicklas |
28 |
|
5238 |
09 Feb 10 |
nicklas |
29 |
import net.sf.basedb.core.AnyToAny; |
5319 |
20 Apr 10 |
nicklas |
30 |
import net.sf.basedb.core.BasicItem; |
5227 |
01 Feb 10 |
nicklas |
31 |
import net.sf.basedb.core.BioAssay; |
5227 |
01 Feb 10 |
nicklas |
32 |
import net.sf.basedb.core.BioAssaySet; |
5227 |
01 Feb 10 |
nicklas |
33 |
import net.sf.basedb.core.DbControl; |
5238 |
09 Feb 10 |
nicklas |
34 |
import net.sf.basedb.core.File; |
5227 |
01 Feb 10 |
nicklas |
35 |
import net.sf.basedb.core.IntensityTransform; |
5227 |
01 Feb 10 |
nicklas |
36 |
import net.sf.basedb.core.InvalidDataException; |
5227 |
01 Feb 10 |
nicklas |
37 |
import net.sf.basedb.core.InvalidUseOfNullException; |
5227 |
01 Feb 10 |
nicklas |
38 |
import net.sf.basedb.core.PositionBatcher; |
5227 |
01 Feb 10 |
nicklas |
39 |
import net.sf.basedb.core.ProgressReporter; |
5227 |
01 Feb 10 |
nicklas |
40 |
import net.sf.basedb.core.SpotBatcher; |
5227 |
01 Feb 10 |
nicklas |
41 |
import net.sf.basedb.core.Transformation; |
5405 |
10 Sep 10 |
nicklas |
42 |
import net.sf.basedb.core.signal.ThreadSignalHandler; |
5227 |
01 Feb 10 |
nicklas |
43 |
import net.sf.basedb.util.ChainedProgressReporter; |
5227 |
01 Feb 10 |
nicklas |
44 |
import net.sf.basedb.util.Values; |
5227 |
01 Feb 10 |
nicklas |
45 |
import net.sf.basedb.util.bfs.AnnotationModel; |
5227 |
01 Feb 10 |
nicklas |
46 |
import net.sf.basedb.util.bfs.AnnotationParser; |
5227 |
01 Feb 10 |
nicklas |
47 |
import net.sf.basedb.util.bfs.BfsParser; |
5227 |
01 Feb 10 |
nicklas |
48 |
import net.sf.basedb.util.bfs.DataParser; |
5227 |
01 Feb 10 |
nicklas |
49 |
import net.sf.basedb.util.bfs.EventHandler; |
5227 |
01 Feb 10 |
nicklas |
50 |
import net.sf.basedb.util.bfs.EventType; |
5238 |
09 Feb 10 |
nicklas |
51 |
import net.sf.basedb.util.bfs.ExtraFileImporter; |
5227 |
01 Feb 10 |
nicklas |
52 |
import net.sf.basedb.util.bfs.InputStreamLocator; |
5227 |
01 Feb 10 |
nicklas |
53 |
import net.sf.basedb.util.bfs.MetadataModel; |
5227 |
01 Feb 10 |
nicklas |
54 |
import net.sf.basedb.util.bfs.MetadataParser; |
5227 |
01 Feb 10 |
nicklas |
55 |
import net.sf.basedb.util.bfs.MultiEventHandler; |
5227 |
01 Feb 10 |
nicklas |
56 |
import net.sf.basedb.util.bfs.MetadataModel.Section; |
5227 |
01 Feb 10 |
nicklas |
57 |
|
5227 |
01 Feb 10 |
nicklas |
58 |
/** |
5227 |
01 Feb 10 |
nicklas |
Imports spot data from a serial or matrix BFS. Before it |
5227 |
01 Feb 10 |
nicklas |
can be used various configuration properties must be set. |
5227 |
01 Feb 10 |
nicklas |
<ul> |
5227 |
01 Feb 10 |
nicklas |
<li>An open DbControl: {@link #setDbControl(DbControl)} |
5227 |
01 Feb 10 |
nicklas |
<li>The parent transaction: {@link #setTransformation(Transformation)} |
5227 |
01 Feb 10 |
nicklas |
<li>A parser for the metadata file: {@link #setMetadataParser(MetadataParser)} |
5227 |
01 Feb 10 |
nicklas |
<li>A locator implementation for locating files that are referenced in the |
5227 |
01 Feb 10 |
nicklas |
metadata file: {@link #setInputStreamLocator(InputStreamLocator)} |
5227 |
01 Feb 10 |
nicklas |
</ul> |
5227 |
01 Feb 10 |
nicklas |
<p> |
5227 |
01 Feb 10 |
nicklas |
69 |
|
5227 |
01 Feb 10 |
nicklas |
The import is started with {@link #doImport()}. This will start by parsing the |
5227 |
01 Feb 10 |
nicklas |
metadata file. Additional files that are referenced in the [files] section |
5238 |
09 Feb 10 |
nicklas |
are opened via the {@link InputStreamLocator}. File entries that start with 'x-' |
5238 |
09 Feb 10 |
nicklas |
are considered to be extra files that should be attached to the child bioassay |
5238 |
09 Feb 10 |
nicklas |
set. But this is only done if an {@link ExtraFileImporter} has been specified. |
5227 |
01 Feb 10 |
nicklas |
<p> |
5227 |
01 Feb 10 |
nicklas |
76 |
|
5227 |
01 Feb 10 |
nicklas |
The section [settings] is used to control some aspects of the parsing. If |
5227 |
01 Feb 10 |
nicklas |
the data is going into the same datacube the section is not needed. If a |
5227 |
01 Feb 10 |
nicklas |
new datacube is needed the importer needs to know how to map the spot data |
5227 |
01 Feb 10 |
nicklas |
to positions/reporters and to assays. We support three cases: |
5227 |
01 Feb 10 |
nicklas |
<ol> |
5227 |
01 Feb 10 |
nicklas |
<li>The data uses the same datacube as the parent |
5227 |
01 Feb 10 |
nicklas |
<li>The data needs a new datacube but there is a one-to-one relation |
5227 |
01 Feb 10 |
nicklas |
with the parent assays |
5227 |
01 Feb 10 |
nicklas |
<li>The data needs a new datacube and there may be more than one |
5227 |
01 Feb 10 |
nicklas |
parent assay for each child assay |
5227 |
01 Feb 10 |
nicklas |
</ol> |
5227 |
01 Feb 10 |
nicklas |
88 |
|
5227 |
01 Feb 10 |
nicklas |
<p> |
5227 |
01 Feb 10 |
nicklas |
A new datacube is created by including a <code>new-data-cube</code> |
5227 |
01 Feb 10 |
nicklas |
entry in the settings section. Multiple parents per assay is enabled |
5227 |
01 Feb 10 |
nicklas |
by including a <code>multi-assay-parents</code> entry. |
5227 |
01 Feb 10 |
nicklas |
93 |
|
5227 |
01 Feb 10 |
nicklas |
<p> |
5240 |
09 Feb 10 |
nicklas |
The child bioassay set is by default assumed to use the same {@link IntensityTransform} |
5240 |
09 Feb 10 |
nicklas |
as the parent bioassay set. If the imported data uses a different transform |
5240 |
09 Feb 10 |
nicklas |
this should be specified by the <code>transform</code> setting. Allowed |
5240 |
09 Feb 10 |
nicklas |
values are those defined by the IntensityTransform enumeration |
5240 |
09 Feb 10 |
nicklas |
(<code>NONE</code>, <code>LOG2</code> or <code>LOG10</code>). |
5240 |
09 Feb 10 |
nicklas |
100 |
|
5240 |
09 Feb 10 |
nicklas |
<p> |
5227 |
01 Feb 10 |
nicklas |
The importer needs 'rdata' and 'pdata' files in all cases. The ID column in the |
5227 |
01 Feb 10 |
nicklas |
rdata file should be the position number. If the data is using the same datacube |
5227 |
01 Feb 10 |
nicklas |
(case 1) no more columns are needed. The order of the positions doesn't matter |
5227 |
01 Feb 10 |
nicklas |
as long as it matches the spot data files. All positions must be unique. If the data |
5227 |
01 Feb 10 |
nicklas |
needs a new datacube (case 2 and 3) the rdata file also needs one of 'External ID' |
5227 |
01 Feb 10 |
nicklas |
or 'Internal ID' columns which are used to map positions to reporters. |
5227 |
01 Feb 10 |
nicklas |
<p> |
5227 |
01 Feb 10 |
nicklas |
109 |
|
5227 |
01 Feb 10 |
nicklas |
The meaning of the ID column in the pdata file depends on the case. In case 1 |
5227 |
01 Feb 10 |
nicklas |
and 2 the ID is the internal ID of the parent assay. No more columns are needed. |
5227 |
01 Feb 10 |
nicklas |
In case 3, the ID column is just a unique positive integer. In this case, the |
5227 |
01 Feb 10 |
nicklas |
pdata file also needs a second column 'Parent ID', which should be a comma-separated |
5227 |
01 Feb 10 |
nicklas |
list of the internal assay ID values of the parent assays. |
5227 |
01 Feb 10 |
nicklas |
115 |
|
5227 |
01 Feb 10 |
nicklas |
<p> |
5227 |
01 Feb 10 |
nicklas |
The [sdata] section lists all spot data columns. The importer is looking for |
5227 |
01 Feb 10 |
nicklas |
entries of the form 'Ch N' where N is 1, 2, ... etc. Exactly one entry for each |
5227 |
01 Feb 10 |
nicklas |
channel is required. |
5227 |
01 Feb 10 |
nicklas |
120 |
|
5227 |
01 Feb 10 |
nicklas |
@author Nicklas |
5227 |
01 Feb 10 |
nicklas |
@version 2.15 |
5227 |
01 Feb 10 |
nicklas |
@base.modified $Date$ |
5227 |
01 Feb 10 |
nicklas |
124 |
*/ |
5227 |
01 Feb 10 |
nicklas |
125 |
public class BfsImporter |
5227 |
01 Feb 10 |
nicklas |
126 |
{ |
5319 |
20 Apr 10 |
nicklas |
127 |
private boolean autoCloseParsers; |
5227 |
01 Feb 10 |
nicklas |
128 |
private DbControl dc; |
5227 |
01 Feb 10 |
nicklas |
129 |
private ProgressReporter progress; |
5227 |
01 Feb 10 |
nicklas |
130 |
private MetadataParser metadataParser; |
5227 |
01 Feb 10 |
nicklas |
131 |
private Transformation transformation; |
5227 |
01 Feb 10 |
nicklas |
132 |
private InputStreamLocator locator; |
5238 |
09 Feb 10 |
nicklas |
133 |
private ExtraFileImporter extraFileImporter; |
5227 |
01 Feb 10 |
nicklas |
134 |
|
5227 |
01 Feb 10 |
nicklas |
135 |
/** |
5227 |
01 Feb 10 |
nicklas |
Create a new importer object. |
5227 |
01 Feb 10 |
nicklas |
137 |
*/ |
5227 |
01 Feb 10 |
nicklas |
138 |
public BfsImporter() |
5319 |
20 Apr 10 |
nicklas |
139 |
{ |
5319 |
20 Apr 10 |
nicklas |
140 |
this.autoCloseParsers = true; |
5319 |
20 Apr 10 |
nicklas |
141 |
} |
5227 |
01 Feb 10 |
nicklas |
142 |
|
5227 |
01 Feb 10 |
nicklas |
143 |
/* |
5227 |
01 Feb 10 |
nicklas |
public configuration properties |
5227 |
01 Feb 10 |
nicklas |
145 |
------------------------------- |
5227 |
01 Feb 10 |
nicklas |
146 |
*/ |
5227 |
01 Feb 10 |
nicklas |
147 |
/** |
5319 |
20 Apr 10 |
nicklas |
If this option is set then all parsers are automatically closed |
5319 |
20 Apr 10 |
nicklas |
when all data has been read to them. This setting is enabled by default. |
5319 |
20 Apr 10 |
nicklas |
150 |
*/ |
5319 |
20 Apr 10 |
nicklas |
151 |
public void setAutoCloseParsers(boolean autoClose) |
5319 |
20 Apr 10 |
nicklas |
152 |
{ |
5319 |
20 Apr 10 |
nicklas |
153 |
this.autoCloseParsers = autoClose; |
5319 |
20 Apr 10 |
nicklas |
154 |
} |
5319 |
20 Apr 10 |
nicklas |
155 |
|
5319 |
20 Apr 10 |
nicklas |
156 |
/** |
5227 |
01 Feb 10 |
nicklas |
Set's the DbControl that should be used to get data from the database. |
5227 |
01 Feb 10 |
nicklas |
158 |
*/ |
5227 |
01 Feb 10 |
nicklas |
159 |
public void setDbControl(DbControl dc) |
5227 |
01 Feb 10 |
nicklas |
160 |
{ |
5227 |
01 Feb 10 |
nicklas |
161 |
this.dc = dc; |
5227 |
01 Feb 10 |
nicklas |
162 |
} |
5227 |
01 Feb 10 |
nicklas |
163 |
|
5227 |
01 Feb 10 |
nicklas |
164 |
/** |
5227 |
01 Feb 10 |
nicklas |
Get the current DbControl. |
5227 |
01 Feb 10 |
nicklas |
166 |
*/ |
5227 |
01 Feb 10 |
nicklas |
167 |
public DbControl getDbControl() |
5227 |
01 Feb 10 |
nicklas |
168 |
{ |
5227 |
01 Feb 10 |
nicklas |
169 |
return dc; |
5227 |
01 Feb 10 |
nicklas |
170 |
} |
5227 |
01 Feb 10 |
nicklas |
171 |
|
5227 |
01 Feb 10 |
nicklas |
172 |
/** |
5227 |
01 Feb 10 |
nicklas |
Set the progress reporter that is used to report progress. Call |
5227 |
01 Feb 10 |
nicklas |
{@link #setProgress(int, String)} to update the current status. |
5227 |
01 Feb 10 |
nicklas |
175 |
*/ |
5227 |
01 Feb 10 |
nicklas |
176 |
public void setProgressReporter(ProgressReporter progress) |
5227 |
01 Feb 10 |
nicklas |
177 |
{ |
5227 |
01 Feb 10 |
nicklas |
178 |
this.progress = progress; |
5227 |
01 Feb 10 |
nicklas |
179 |
} |
5227 |
01 Feb 10 |
nicklas |
180 |
|
5227 |
01 Feb 10 |
nicklas |
181 |
/** |
5227 |
01 Feb 10 |
nicklas |
Get the progress reporter. |
5227 |
01 Feb 10 |
nicklas |
183 |
*/ |
5227 |
01 Feb 10 |
nicklas |
184 |
public ProgressReporter getProgressReporter() |
5227 |
01 Feb 10 |
nicklas |
185 |
{ |
5227 |
01 Feb 10 |
nicklas |
186 |
return progress; |
5227 |
01 Feb 10 |
nicklas |
187 |
} |
5227 |
01 Feb 10 |
nicklas |
188 |
|
5227 |
01 Feb 10 |
nicklas |
189 |
/** |
5227 |
01 Feb 10 |
nicklas |
Set the destination transformation. A child bioassay set is created. |
5227 |
01 Feb 10 |
nicklas |
@param transformation The transformation |
5227 |
01 Feb 10 |
nicklas |
192 |
*/ |
5227 |
01 Feb 10 |
nicklas |
193 |
public void setTransformation(Transformation transformation) |
5227 |
01 Feb 10 |
nicklas |
194 |
{ |
5227 |
01 Feb 10 |
nicklas |
195 |
this.transformation = transformation; |
5227 |
01 Feb 10 |
nicklas |
196 |
} |
5227 |
01 Feb 10 |
nicklas |
197 |
|
5227 |
01 Feb 10 |
nicklas |
198 |
/** |
5227 |
01 Feb 10 |
nicklas |
Get the destination transformation. |
5227 |
01 Feb 10 |
nicklas |
200 |
*/ |
5227 |
01 Feb 10 |
nicklas |
201 |
public Transformation getTransformation() |
5227 |
01 Feb 10 |
nicklas |
202 |
{ |
5227 |
01 Feb 10 |
nicklas |
203 |
return transformation; |
5227 |
01 Feb 10 |
nicklas |
204 |
} |
5227 |
01 Feb 10 |
nicklas |
205 |
|
5227 |
01 Feb 10 |
nicklas |
206 |
/** |
5227 |
01 Feb 10 |
nicklas |
Set the metadata file parser. The metadata file should contains |
5227 |
01 Feb 10 |
nicklas |
information about other files and their contents. |
5227 |
01 Feb 10 |
nicklas |
209 |
*/ |
5227 |
01 Feb 10 |
nicklas |
210 |
public void setMetadataParser(MetadataParser parser) |
5227 |
01 Feb 10 |
nicklas |
211 |
{ |
5227 |
01 Feb 10 |
nicklas |
212 |
this.metadataParser = parser; |
5227 |
01 Feb 10 |
nicklas |
213 |
} |
5227 |
01 Feb 10 |
nicklas |
214 |
|
5227 |
01 Feb 10 |
nicklas |
215 |
public MetadataParser getMetadataParser() |
5227 |
01 Feb 10 |
nicklas |
216 |
{ |
5227 |
01 Feb 10 |
nicklas |
217 |
return metadataParser; |
5227 |
01 Feb 10 |
nicklas |
218 |
} |
5227 |
01 Feb 10 |
nicklas |
219 |
|
5227 |
01 Feb 10 |
nicklas |
220 |
/** |
5227 |
01 Feb 10 |
nicklas |
Set the input stream locator that is responsible for opening files |
5227 |
01 Feb 10 |
nicklas |
that are referenced from the metadata file. |
5227 |
01 Feb 10 |
nicklas |
223 |
*/ |
5227 |
01 Feb 10 |
nicklas |
224 |
public void setInputStreamLocator(InputStreamLocator locator) |
5227 |
01 Feb 10 |
nicklas |
225 |
{ |
5227 |
01 Feb 10 |
nicklas |
226 |
this.locator = locator; |
5227 |
01 Feb 10 |
nicklas |
227 |
} |
5238 |
09 Feb 10 |
nicklas |
228 |
|
5238 |
09 Feb 10 |
nicklas |
229 |
public void setExtraFileImporter(ExtraFileImporter extraFileImporter) |
5238 |
09 Feb 10 |
nicklas |
230 |
{ |
5238 |
09 Feb 10 |
nicklas |
231 |
this.extraFileImporter = extraFileImporter; |
5238 |
09 Feb 10 |
nicklas |
232 |
} |
5227 |
01 Feb 10 |
nicklas |
233 |
|
5227 |
01 Feb 10 |
nicklas |
234 |
/* |
5227 |
01 Feb 10 |
nicklas |
Helper methods |
5227 |
01 Feb 10 |
nicklas |
236 |
-------------- |
5227 |
01 Feb 10 |
nicklas |
237 |
*/ |
5227 |
01 Feb 10 |
nicklas |
238 |
/** |
5227 |
01 Feb 10 |
nicklas |
Update the progress of the export. |
5227 |
01 Feb 10 |
nicklas |
@see ProgressReporter |
5227 |
01 Feb 10 |
nicklas |
241 |
*/ |
5227 |
01 Feb 10 |
nicklas |
242 |
protected void setProgress(int percent, String message) |
5227 |
01 Feb 10 |
nicklas |
243 |
{ |
5227 |
01 Feb 10 |
nicklas |
244 |
if (progress != null) |
5227 |
01 Feb 10 |
nicklas |
245 |
{ |
5227 |
01 Feb 10 |
nicklas |
246 |
progress.display(percent, message); |
5227 |
01 Feb 10 |
nicklas |
247 |
} |
5227 |
01 Feb 10 |
nicklas |
248 |
} |
5227 |
01 Feb 10 |
nicklas |
249 |
// ---------------------------- |
5227 |
01 Feb 10 |
nicklas |
250 |
|
5227 |
01 Feb 10 |
nicklas |
251 |
/** |
5227 |
01 Feb 10 |
nicklas |
Start the import. |
5227 |
01 Feb 10 |
nicklas |
@return The created bioassay set or null if the BASEfile didn't contain |
5227 |
01 Feb 10 |
nicklas |
spot data |
5227 |
01 Feb 10 |
nicklas |
255 |
*/ |
5227 |
01 Feb 10 |
nicklas |
256 |
public BioAssaySet doImport() |
5227 |
01 Feb 10 |
nicklas |
257 |
throws IOException |
5227 |
01 Feb 10 |
nicklas |
258 |
{ |
5227 |
01 Feb 10 |
nicklas |
// Check required configuration parameters |
5227 |
01 Feb 10 |
nicklas |
260 |
MetadataParser metadataParser = getMetadataParser(); |
5227 |
01 Feb 10 |
nicklas |
261 |
DbControl dc = getDbControl(); |
5227 |
01 Feb 10 |
nicklas |
262 |
Transformation t = getTransformation(); |
5227 |
01 Feb 10 |
nicklas |
263 |
if (metadataParser == null) throw new InvalidUseOfNullException("metadataParser"); |
5227 |
01 Feb 10 |
nicklas |
264 |
if (dc == null) throw new InvalidUseOfNullException("dbControl"); |
5227 |
01 Feb 10 |
nicklas |
265 |
if (t == null) throw new InvalidUseOfNullException("transformation"); |
5227 |
01 Feb 10 |
nicklas |
266 |
|
5227 |
01 Feb 10 |
nicklas |
// Get optional and derived parameters |
5227 |
01 Feb 10 |
nicklas |
268 |
BioAssaySet parent = t.getSource(); |
5227 |
01 Feb 10 |
nicklas |
269 |
int numChannels = parent.getRawDataType().getChannels(); |
5227 |
01 Feb 10 |
nicklas |
270 |
String metadataFile = metadataParser.getFilename(); |
5240 |
09 Feb 10 |
nicklas |
271 |
IntensityTransform transform = parent.getIntensityTransform(); |
5227 |
01 Feb 10 |
nicklas |
272 |
ChainedProgressReporter chainedProgress = progress == null ? |
5227 |
01 Feb 10 |
nicklas |
273 |
null : new ChainedProgressReporter(progress); |
5227 |
01 Feb 10 |
nicklas |
274 |
|
5227 |
01 Feb 10 |
nicklas |
275 |
setProgress(0, "Initializing BFS importer"); |
5227 |
01 Feb 10 |
nicklas |
276 |
|
5227 |
01 Feb 10 |
nicklas |
// Parse the metadata file |
5227 |
01 Feb 10 |
nicklas |
278 |
MetadataModel metadata = new MetadataModel(); |
5227 |
01 Feb 10 |
nicklas |
279 |
metadataParser.parse(metadata); |
5319 |
20 Apr 10 |
nicklas |
280 |
if (autoCloseParsers) metadataParser.close(); |
5319 |
20 Apr 10 |
nicklas |
281 |
|
5227 |
01 Feb 10 |
nicklas |
// Serial or matrix file? |
5227 |
01 Feb 10 |
nicklas |
283 |
boolean isSerial = "serial".equals(metadata.getSubtype()); |
5319 |
20 Apr 10 |
nicklas |
284 |
|
5319 |
20 Apr 10 |
nicklas |
285 |
Section sdata = metadata.getSection("sdata"); |
5319 |
20 Apr 10 |
nicklas |
286 |
boolean hasSdata = sdata != null; |
5319 |
20 Apr 10 |
nicklas |
287 |
BioAssaySet child = null; |
5227 |
01 Feb 10 |
nicklas |
288 |
|
5319 |
20 Apr 10 |
nicklas |
289 |
if (hasSdata) |
5227 |
01 Feb 10 |
nicklas |
290 |
{ |
5319 |
20 Apr 10 |
nicklas |
291 |
MetadataModel.Section settings = metadata.getSection("settings"); |
5319 |
20 Apr 10 |
nicklas |
292 |
boolean newDataCube = false; |
5319 |
20 Apr 10 |
nicklas |
293 |
boolean multiAssayParents = false; |
5319 |
20 Apr 10 |
nicklas |
294 |
|
5319 |
20 Apr 10 |
nicklas |
295 |
if (settings != null) |
5227 |
01 Feb 10 |
nicklas |
296 |
{ |
5319 |
20 Apr 10 |
nicklas |
297 |
newDataCube = Values.getBoolean(settings.getValue("new-data-cube")); |
5319 |
20 Apr 10 |
nicklas |
298 |
if (newDataCube) |
5319 |
20 Apr 10 |
nicklas |
299 |
{ |
5319 |
20 Apr 10 |
nicklas |
300 |
multiAssayParents = Values.getBoolean(settings.getValue("multi-assay-parents")); |
5319 |
20 Apr 10 |
nicklas |
301 |
} |
5319 |
20 Apr 10 |
nicklas |
302 |
String intensityTransform = settings.getValue("transform"); |
5319 |
20 Apr 10 |
nicklas |
303 |
if (intensityTransform != null) |
5319 |
20 Apr 10 |
nicklas |
304 |
{ |
5319 |
20 Apr 10 |
nicklas |
305 |
transform = IntensityTransform.valueOf(intensityTransform.toUpperCase()); |
5319 |
20 Apr 10 |
nicklas |
306 |
} |
5227 |
01 Feb 10 |
nicklas |
307 |
} |
5319 |
20 Apr 10 |
nicklas |
308 |
|
5319 |
20 Apr 10 |
nicklas |
// Parse the assay annotations file |
5319 |
20 Apr 10 |
nicklas |
310 |
String pdataFile = metadata.getFile("pdata"); |
5319 |
20 Apr 10 |
nicklas |
311 |
if (pdataFile == null) |
5240 |
09 Feb 10 |
nicklas |
312 |
{ |
5319 |
20 Apr 10 |
nicklas |
313 |
throw new InvalidDataException("Missing 'pdata' entry in [files] section in metadata file: " + metadataFile); |
5240 |
09 Feb 10 |
nicklas |
314 |
} |
5319 |
20 Apr 10 |
nicklas |
315 |
AnnotationParser pdataParser = AnnotationParser.create( |
5319 |
20 Apr 10 |
nicklas |
316 |
locator.getInputStream(pdataFile), pdataFile, locator.getSize(pdataFile)); |
5319 |
20 Apr 10 |
nicklas |
317 |
AnnotationModel pdataModel = new AnnotationModel(); |
5319 |
20 Apr 10 |
nicklas |
318 |
pdataParser.parse(pdataModel); |
5319 |
20 Apr 10 |
nicklas |
319 |
if (autoCloseParsers) pdataParser.close(); |
5227 |
01 Feb 10 |
nicklas |
320 |
|
5319 |
20 Apr 10 |
nicklas |
321 |
int parentIndex = pdataModel.getColumnIndex("Parent ID"); |
5319 |
20 Apr 10 |
nicklas |
322 |
if (multiAssayParents && parentIndex == -1) |
5227 |
01 Feb 10 |
nicklas |
323 |
{ |
5319 |
20 Apr 10 |
nicklas |
324 |
throw new InvalidDataException("Missing column 'Parent ID' in rdata file: " + pdataFile); |
5227 |
01 Feb 10 |
nicklas |
325 |
} |
5319 |
20 Apr 10 |
nicklas |
326 |
|
5319 |
20 Apr 10 |
nicklas |
327 |
|
5319 |
20 Apr 10 |
nicklas |
// Create child bioassay set and bioassays |
5319 |
20 Apr 10 |
nicklas |
329 |
int numAssays = pdataModel.getRowCount(); |
5319 |
20 Apr 10 |
nicklas |
330 |
int nameIndex = pdataModel.getColumnIndex("Name"); |
5590 |
16 Mar 11 |
nicklas |
331 |
ThreadSignalHandler.checkInterrupted(); |
5319 |
20 Apr 10 |
nicklas |
332 |
setProgress(5, "Creating " + numAssays + " child assays"); |
5319 |
20 Apr 10 |
nicklas |
333 |
|
5319 |
20 Apr 10 |
nicklas |
334 |
child = t.newProduct(newDataCube ? "new" : null, "new", false); |
5319 |
20 Apr 10 |
nicklas |
335 |
child.setIntensityTransform(transform); |
5319 |
20 Apr 10 |
nicklas |
336 |
dc.saveItem(child); |
5319 |
20 Apr 10 |
nicklas |
337 |
BioAssay[] children = new BioAssay[numAssays]; |
5319 |
20 Apr 10 |
nicklas |
338 |
for (int i = 0; i < numAssays; ++i) |
5227 |
01 Feb 10 |
nicklas |
339 |
{ |
5319 |
20 Apr 10 |
nicklas |
340 |
BioAssay ba; |
5319 |
20 Apr 10 |
nicklas |
341 |
if (multiAssayParents) |
5227 |
01 Feb 10 |
nicklas |
342 |
{ |
5319 |
20 Apr 10 |
nicklas |
343 |
String tmp = pdataModel.getData(i, parentIndex); |
5319 |
20 Apr 10 |
nicklas |
344 |
Integer[] parentIds = Values.getInt(tmp.split(",")); |
5319 |
20 Apr 10 |
nicklas |
345 |
List<BioAssay> parents = new ArrayList<BioAssay>(); |
5319 |
20 Apr 10 |
nicklas |
346 |
for (int parentAssayId : parentIds) |
5319 |
20 Apr 10 |
nicklas |
347 |
{ |
5319 |
20 Apr 10 |
nicklas |
348 |
parents.add(BioAssay.getById(dc, parentAssayId)); |
5319 |
20 Apr 10 |
nicklas |
349 |
} |
5319 |
20 Apr 10 |
nicklas |
350 |
ba = child.newBioAssay(parents); |
5227 |
01 Feb 10 |
nicklas |
351 |
} |
5227 |
01 Feb 10 |
nicklas |
352 |
else |
5227 |
01 Feb 10 |
nicklas |
353 |
{ |
5319 |
20 Apr 10 |
nicklas |
354 |
int parentAssayId = pdataModel.getRowId(i); |
5319 |
20 Apr 10 |
nicklas |
355 |
if (newDataCube) |
5319 |
20 Apr 10 |
nicklas |
356 |
{ |
5319 |
20 Apr 10 |
nicklas |
357 |
ba = child.newBioAssay(Collections.singleton(BioAssay.getById(dc, parentAssayId))); |
5319 |
20 Apr 10 |
nicklas |
358 |
} |
5319 |
20 Apr 10 |
nicklas |
359 |
else |
5319 |
20 Apr 10 |
nicklas |
360 |
{ |
5319 |
20 Apr 10 |
nicklas |
361 |
ba = child.newBioAssay(BioAssay.getById(dc, parentAssayId)); |
5319 |
20 Apr 10 |
nicklas |
362 |
} |
5227 |
01 Feb 10 |
nicklas |
363 |
} |
5319 |
20 Apr 10 |
nicklas |
364 |
if (nameIndex >= 0) ba.setName(pdataModel.getData(i, nameIndex)); |
5319 |
20 Apr 10 |
nicklas |
365 |
children[i] = ba; |
5319 |
20 Apr 10 |
nicklas |
366 |
dc.saveItem(ba); |
5227 |
01 Feb 10 |
nicklas |
367 |
} |
5227 |
01 Feb 10 |
nicklas |
368 |
|
5319 |
20 Apr 10 |
nicklas |
// Reporter annotations file has the position values for each row |
5319 |
20 Apr 10 |
nicklas |
370 |
String rdataFile = metadata.getFile("rdata"); |
5319 |
20 Apr 10 |
nicklas |
371 |
if (rdataFile == null) |
5319 |
20 Apr 10 |
nicklas |
372 |
{ |
5319 |
20 Apr 10 |
nicklas |
373 |
throw new InvalidDataException("Missing 'rdata' entry in [files] section in metadata file: " + metadataFile); |
5319 |
20 Apr 10 |
nicklas |
374 |
} |
5319 |
20 Apr 10 |
nicklas |
375 |
AnnotationParser rdataParser = AnnotationParser.create( |
5319 |
20 Apr 10 |
nicklas |
376 |
locator.getInputStream(rdataFile), rdataFile, locator.getSize(rdataFile)); |
5319 |
20 Apr 10 |
nicklas |
377 |
AnnotationModel rdataModel = new AnnotationModel(); |
5319 |
20 Apr 10 |
nicklas |
378 |
rdataModel.setHeaders(rdataParser.parseToBof()); |
5319 |
20 Apr 10 |
nicklas |
379 |
int posIndex = rdataModel.getColumnIndex("ID"); |
5319 |
20 Apr 10 |
nicklas |
380 |
int externalIdIndex = rdataModel.getColumnIndex("External ID"); |
5319 |
20 Apr 10 |
nicklas |
381 |
int internalIdIndex = rdataModel.getColumnIndex("Internal ID"); |
5319 |
20 Apr 10 |
nicklas |
382 |
if (posIndex == -1) |
5319 |
20 Apr 10 |
nicklas |
383 |
{ |
5319 |
20 Apr 10 |
nicklas |
384 |
throw new InvalidDataException("Missing column 'ID' in rdata file: " + rdataFile); |
5319 |
20 Apr 10 |
nicklas |
385 |
} |
5319 |
20 Apr 10 |
nicklas |
386 |
if (newDataCube && externalIdIndex == -1 && internalIdIndex == -1) |
5319 |
20 Apr 10 |
nicklas |
387 |
{ |
5319 |
20 Apr 10 |
nicklas |
388 |
throw new InvalidDataException("Missing 'External ID' and " + |
5319 |
20 Apr 10 |
nicklas |
389 |
"'Internal ID' columns in rdata file: " + rdataFile); |
5319 |
20 Apr 10 |
nicklas |
390 |
} |
5227 |
01 Feb 10 |
nicklas |
391 |
|
5319 |
20 Apr 10 |
nicklas |
// Load 'sdata' information |
5319 |
20 Apr 10 |
nicklas |
393 |
int numSdata = sdata.getEntryCount(); |
5319 |
20 Apr 10 |
nicklas |
394 |
int[] chIndex = new int[numChannels]; |
5319 |
20 Apr 10 |
nicklas |
395 |
for (int i = 0; i < numChannels; ++i) |
5227 |
01 Feb 10 |
nicklas |
396 |
{ |
5319 |
20 Apr 10 |
nicklas |
397 |
chIndex[i] = sdata.getIndex("Ch " + (i+1)); |
5319 |
20 Apr 10 |
nicklas |
398 |
if (chIndex[i] == -1) |
5319 |
20 Apr 10 |
nicklas |
399 |
{ |
5319 |
20 Apr 10 |
nicklas |
400 |
throw new InvalidDataException("Missing 'Ch "+ (i+1) + |
5319 |
20 Apr 10 |
nicklas |
401 |
"' entry in [sdata] section in file: " + metadataFile); |
5319 |
20 Apr 10 |
nicklas |
402 |
} |
5227 |
01 Feb 10 |
nicklas |
403 |
} |
5227 |
01 Feb 10 |
nicklas |
404 |
|
5319 |
20 Apr 10 |
nicklas |
// Initialize a synchronized parser and load data files |
5590 |
16 Mar 11 |
nicklas |
406 |
ThreadSignalHandler.checkInterrupted(); |
5319 |
20 Apr 10 |
nicklas |
407 |
setProgress(10, "Parsing " + numSdata + " spot data files..."); |
5319 |
20 Apr 10 |
nicklas |
408 |
SynchronizedSpotDataParser mainParser = new SynchronizedSpotDataParser(); |
5319 |
20 Apr 10 |
nicklas |
409 |
mainParser.setMasterParser(rdataParser); |
5319 |
20 Apr 10 |
nicklas |
410 |
int numSdataFiles = isSerial ? numAssays : numSdata; |
5319 |
20 Apr 10 |
nicklas |
411 |
int numDataColumns = isSerial ? numSdata : numAssays; |
5319 |
20 Apr 10 |
nicklas |
412 |
for (int i = 1; i <= numSdataFiles; ++i) |
5227 |
01 Feb 10 |
nicklas |
413 |
{ |
5319 |
20 Apr 10 |
nicklas |
414 |
String sdataFile = metadata.getFile("sdata" + i); |
5319 |
20 Apr 10 |
nicklas |
415 |
if (sdataFile == null) |
5319 |
20 Apr 10 |
nicklas |
416 |
{ |
5319 |
20 Apr 10 |
nicklas |
417 |
throw new InvalidDataException("Missing 'sdata" + i + |
5319 |
20 Apr 10 |
nicklas |
418 |
"' entry in [files] section in metadata file: " + metadataFile); |
5319 |
20 Apr 10 |
nicklas |
419 |
} |
5319 |
20 Apr 10 |
nicklas |
420 |
DataParser dp = DataParser.create( |
5319 |
20 Apr 10 |
nicklas |
421 |
locator.getInputStream(sdataFile), sdataFile, locator.getSize(sdataFile)); |
5319 |
20 Apr 10 |
nicklas |
422 |
dp.setNumColumns(numDataColumns); |
5319 |
20 Apr 10 |
nicklas |
423 |
dp.parseToBof(); |
5319 |
20 Apr 10 |
nicklas |
424 |
mainParser.addDataParser(dp); |
5227 |
01 Feb 10 |
nicklas |
425 |
} |
5227 |
01 Feb 10 |
nicklas |
426 |
|
5319 |
20 Apr 10 |
nicklas |
// Set up event handlers |
5319 |
20 Apr 10 |
nicklas |
428 |
SpotBatcher spotBatcher = child.getSpotBatcher(); |
5319 |
20 Apr 10 |
nicklas |
429 |
MultiEventHandler masterEventHandler = new MultiEventHandler(); |
5319 |
20 Apr 10 |
nicklas |
430 |
PositionBatcher posBatcher = null; |
5319 |
20 Apr 10 |
nicklas |
431 |
if (newDataCube) |
5319 |
20 Apr 10 |
nicklas |
432 |
{ |
5319 |
20 Apr 10 |
nicklas |
433 |
posBatcher = child.getPositionBatcher(); |
5319 |
20 Apr 10 |
nicklas |
434 |
} |
5319 |
20 Apr 10 |
nicklas |
// The position handler extracts the position value and is used by other handlers |
5319 |
20 Apr 10 |
nicklas |
436 |
PositionEventHandler posHandler = |
5319 |
20 Apr 10 |
nicklas |
437 |
new PositionEventHandler(0, posIndex, posBatcher, internalIdIndex, externalIdIndex); |
5319 |
20 Apr 10 |
nicklas |
438 |
masterEventHandler.addEventHandler(posHandler); |
5319 |
20 Apr 10 |
nicklas |
// The spot intensity handler extracts spot intensity values for all channels |
5319 |
20 Apr 10 |
nicklas |
440 |
if (isSerial) |
5319 |
20 Apr 10 |
nicklas |
441 |
{ |
5319 |
20 Apr 10 |
nicklas |
442 |
masterEventHandler.addEventHandler(new SerialSpotIntensityEventHandler(spotBatcher, posHandler, children, chIndex)); |
5319 |
20 Apr 10 |
nicklas |
443 |
} |
5319 |
20 Apr 10 |
nicklas |
444 |
else |
5319 |
20 Apr 10 |
nicklas |
445 |
{ |
5319 |
20 Apr 10 |
nicklas |
446 |
masterEventHandler.addEventHandler(new MatrixSpotIntensityEventHandler(spotBatcher, posHandler, children, chIndex)); |
5319 |
20 Apr 10 |
nicklas |
447 |
} |
5319 |
20 Apr 10 |
nicklas |
448 |
if (chainedProgress != null) |
5319 |
20 Apr 10 |
nicklas |
449 |
{ |
5319 |
20 Apr 10 |
nicklas |
450 |
chainedProgress.setRange(15, 90); |
5319 |
20 Apr 10 |
nicklas |
451 |
masterEventHandler.addEventHandler( |
5319 |
20 Apr 10 |
nicklas |
452 |
new ProgressEventHandler(chainedProgress, rdataParser.getFileSize())); |
5319 |
20 Apr 10 |
nicklas |
453 |
} |
5319 |
20 Apr 10 |
nicklas |
454 |
|
5319 |
20 Apr 10 |
nicklas |
// Start parsing |
5319 |
20 Apr 10 |
nicklas |
456 |
mainParser.parse(masterEventHandler); |
5319 |
20 Apr 10 |
nicklas |
457 |
if (autoCloseParsers) mainParser.close(); |
5227 |
01 Feb 10 |
nicklas |
458 |
} |
5227 |
01 Feb 10 |
nicklas |
459 |
|
5238 |
09 Feb 10 |
nicklas |
// Import extra files |
5238 |
09 Feb 10 |
nicklas |
461 |
if (extraFileImporter != null) |
5238 |
09 Feb 10 |
nicklas |
462 |
{ |
5238 |
09 Feb 10 |
nicklas |
463 |
MetadataModel.Section files = metadata.getSection("files"); |
5238 |
09 Feb 10 |
nicklas |
464 |
if (chainedProgress != null) |
5238 |
09 Feb 10 |
nicklas |
465 |
{ |
5238 |
09 Feb 10 |
nicklas |
466 |
chainedProgress.setRange(90, 100); |
5238 |
09 Feb 10 |
nicklas |
467 |
} |
5238 |
09 Feb 10 |
nicklas |
468 |
int fileCount = files.getEntryCount(); |
5319 |
20 Apr 10 |
nicklas |
469 |
BasicItem linkWithFile = child == null ? t : child; |
5238 |
09 Feb 10 |
nicklas |
470 |
for (int i = 0; i < fileCount; ++i) |
5238 |
09 Feb 10 |
nicklas |
471 |
{ |
5590 |
16 Mar 11 |
nicklas |
472 |
ThreadSignalHandler.checkInterrupted(); |
5238 |
09 Feb 10 |
nicklas |
473 |
MetadataModel.SectionEntry entry = files.getEntry(i); |
5238 |
09 Feb 10 |
nicklas |
474 |
if (entry.getKey().startsWith("x-")) |
5238 |
09 Feb 10 |
nicklas |
475 |
{ |
5238 |
09 Feb 10 |
nicklas |
476 |
String filename = entry.getValue(); |
5238 |
09 Feb 10 |
nicklas |
477 |
if (chainedProgress != null) |
5238 |
09 Feb 10 |
nicklas |
478 |
{ |
5238 |
09 Feb 10 |
nicklas |
479 |
chainedProgress.display((100 * i) / fileCount, "Importing file: " + filename); |
5238 |
09 Feb 10 |
nicklas |
480 |
} |
5238 |
09 Feb 10 |
nicklas |
481 |
File file = extraFileImporter.importExtraFile(filename, locator); |
5238 |
09 Feb 10 |
nicklas |
482 |
if (file != null) |
5238 |
09 Feb 10 |
nicklas |
483 |
{ |
5319 |
20 Apr 10 |
nicklas |
484 |
AnyToAny ata = AnyToAny.getNew(dc, linkWithFile, file, entry.getKey().replace("x-", ""), false); |
5238 |
09 Feb 10 |
nicklas |
485 |
dc.saveItem(ata); |
5238 |
09 Feb 10 |
nicklas |
486 |
} |
5238 |
09 Feb 10 |
nicklas |
487 |
} |
5238 |
09 Feb 10 |
nicklas |
488 |
} |
5238 |
09 Feb 10 |
nicklas |
489 |
} |
5319 |
20 Apr 10 |
nicklas |
490 |
|
5319 |
20 Apr 10 |
nicklas |
// Final check: did we import anything? |
5319 |
20 Apr 10 |
nicklas |
492 |
if (!hasSdata && extraFileImporter == null) |
5319 |
20 Apr 10 |
nicklas |
493 |
{ |
5319 |
20 Apr 10 |
nicklas |
494 |
throw new InvalidDataException("Missing [sdata] section and " + |
5319 |
20 Apr 10 |
nicklas |
495 |
"extra file importer in metadata file:" + metadataFile); |
5319 |
20 Apr 10 |
nicklas |
496 |
} |
5227 |
01 Feb 10 |
nicklas |
497 |
return child; |
5227 |
01 Feb 10 |
nicklas |
498 |
} |
5227 |
01 Feb 10 |
nicklas |
499 |
|
5227 |
01 Feb 10 |
nicklas |
500 |
static class ProgressEventHandler |
5227 |
01 Feb 10 |
nicklas |
501 |
implements EventHandler |
5227 |
01 Feb 10 |
nicklas |
502 |
{ |
5227 |
01 Feb 10 |
nicklas |
503 |
|
5227 |
01 Feb 10 |
nicklas |
504 |
private ProgressReporter progress; |
5227 |
01 Feb 10 |
nicklas |
505 |
private long totalBytes; |
5232 |
04 Feb 10 |
nicklas |
506 |
private long nextReport; |
5232 |
04 Feb 10 |
nicklas |
507 |
private long reportInterval; |
5227 |
01 Feb 10 |
nicklas |
508 |
|
5227 |
01 Feb 10 |
nicklas |
509 |
ProgressEventHandler(ProgressReporter progress, long totalBytes) |
5227 |
01 Feb 10 |
nicklas |
510 |
{ |
5227 |
01 Feb 10 |
nicklas |
511 |
this.progress = progress; |
5227 |
01 Feb 10 |
nicklas |
512 |
this.totalBytes = totalBytes; |
5232 |
04 Feb 10 |
nicklas |
513 |
this.nextReport = 0; |
5232 |
04 Feb 10 |
nicklas |
514 |
this.reportInterval = Math.max(totalBytes / 20, 100); |
5227 |
01 Feb 10 |
nicklas |
515 |
} |
5227 |
01 Feb 10 |
nicklas |
516 |
|
6875 |
20 Apr 15 |
nicklas |
517 |
@SuppressWarnings("rawtypes") |
5227 |
01 Feb 10 |
nicklas |
518 |
@Override |
5227 |
01 Feb 10 |
nicklas |
519 |
public void handleEvent(EventType eventType, Object eventData, BfsParser parser) |
5227 |
01 Feb 10 |
nicklas |
520 |
{ |
5227 |
01 Feb 10 |
nicklas |
521 |
if (eventType == SynchronizedSpotDataParser.DATA_EVENT) |
5227 |
01 Feb 10 |
nicklas |
522 |
{ |
5227 |
01 Feb 10 |
nicklas |
523 |
long parsedBytes = parser.getParsedBytes(); |
5232 |
04 Feb 10 |
nicklas |
524 |
if (parsedBytes > nextReport) |
5232 |
04 Feb 10 |
nicklas |
525 |
{ |
5232 |
04 Feb 10 |
nicklas |
526 |
nextReport = parsedBytes + reportInterval; |
5329 |
29 Apr 10 |
nicklas |
527 |
int percent = totalBytes == -1 ? -1 : (int)((100 * parsedBytes) / totalBytes); |
5329 |
29 Apr 10 |
nicklas |
528 |
progress.display(percent, "Importing spot data. Parsed " + parser.getCurrentLine() + |
5232 |
04 Feb 10 |
nicklas |
529 |
" lines so far."); |
5232 |
04 Feb 10 |
nicklas |
530 |
} |
5227 |
01 Feb 10 |
nicklas |
531 |
} |
5227 |
01 Feb 10 |
nicklas |
532 |
} |
5227 |
01 Feb 10 |
nicklas |
533 |
|
5227 |
01 Feb 10 |
nicklas |
534 |
} |
5227 |
01 Feb 10 |
nicklas |
535 |
|
5227 |
01 Feb 10 |
nicklas |
536 |
} |