5093 |
10 Sep 09 |
nicklas |
1 |
/** |
5093 |
10 Sep 09 |
nicklas |
$Id$ |
5093 |
10 Sep 09 |
nicklas |
3 |
|
5093 |
10 Sep 09 |
nicklas |
Copyright (C) 2009 Nicklas Nordborg |
5093 |
10 Sep 09 |
nicklas |
5 |
|
5093 |
10 Sep 09 |
nicklas |
This file is part of BASE - BioArray Software Environment. |
5093 |
10 Sep 09 |
nicklas |
Available at http://base.thep.lu.se/ |
5093 |
10 Sep 09 |
nicklas |
8 |
|
5093 |
10 Sep 09 |
nicklas |
BASE is free software; you can redistribute it and/or |
5093 |
10 Sep 09 |
nicklas |
modify it under the terms of the GNU General Public License |
5093 |
10 Sep 09 |
nicklas |
as published by the Free Software Foundation; either version 3 |
5093 |
10 Sep 09 |
nicklas |
of the License, or (at your option) any later version. |
5093 |
10 Sep 09 |
nicklas |
13 |
|
5093 |
10 Sep 09 |
nicklas |
BASE is distributed in the hope that it will be useful, |
5093 |
10 Sep 09 |
nicklas |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
5093 |
10 Sep 09 |
nicklas |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
5093 |
10 Sep 09 |
nicklas |
GNU General Public License for more details. |
5093 |
10 Sep 09 |
nicklas |
18 |
|
5093 |
10 Sep 09 |
nicklas |
You should have received a copy of the GNU General Public License |
5093 |
10 Sep 09 |
nicklas |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
5093 |
10 Sep 09 |
nicklas |
21 |
*/ |
5093 |
10 Sep 09 |
nicklas |
22 |
package net.sf.basedb.util.importer.spotdata; |
5093 |
10 Sep 09 |
nicklas |
23 |
|
5093 |
10 Sep 09 |
nicklas |
24 |
import java.io.IOException; |
5093 |
10 Sep 09 |
nicklas |
25 |
import java.util.List; |
5093 |
10 Sep 09 |
nicklas |
26 |
|
5093 |
10 Sep 09 |
nicklas |
27 |
import net.sf.basedb.core.BioAssay; |
5093 |
10 Sep 09 |
nicklas |
28 |
import net.sf.basedb.core.BioAssaySet; |
5093 |
10 Sep 09 |
nicklas |
29 |
import net.sf.basedb.core.DbControl; |
5093 |
10 Sep 09 |
nicklas |
30 |
import net.sf.basedb.core.ExtraValueType; |
5093 |
10 Sep 09 |
nicklas |
31 |
import net.sf.basedb.core.Job; |
5093 |
10 Sep 09 |
nicklas |
32 |
import net.sf.basedb.core.SpotBatcher; |
5093 |
10 Sep 09 |
nicklas |
33 |
import net.sf.basedb.core.SpotExtraValueBatcher; |
5405 |
10 Sep 10 |
nicklas |
34 |
import net.sf.basedb.core.signal.ThreadSignalHandler; |
5093 |
10 Sep 09 |
nicklas |
35 |
import net.sf.basedb.util.basefile.BaseFileParser; |
5093 |
10 Sep 09 |
nicklas |
36 |
import net.sf.basedb.util.basefile.BaseFileSectionParser; |
5093 |
10 Sep 09 |
nicklas |
37 |
import net.sf.basedb.util.importer.spotdata.BaseFileInfo.SpotSectionInfo; |
5093 |
10 Sep 09 |
nicklas |
38 |
import net.sf.basedb.util.parser.FlatFileParser; |
5093 |
10 Sep 09 |
nicklas |
39 |
|
5093 |
10 Sep 09 |
nicklas |
40 |
/** |
5093 |
10 Sep 09 |
nicklas |
41 |
|
5093 |
10 Sep 09 |
nicklas |
The second pass of the 'spots' section extracts spot and extra value |
5093 |
10 Sep 09 |
nicklas |
data. |
5093 |
10 Sep 09 |
nicklas |
44 |
|
5093 |
10 Sep 09 |
nicklas |
@author Nicklas |
5093 |
10 Sep 09 |
nicklas |
@version 2.14 |
5093 |
10 Sep 09 |
nicklas |
@base.modified $Date$ |
5093 |
10 Sep 09 |
nicklas |
48 |
*/ |
5093 |
10 Sep 09 |
nicklas |
49 |
public class SecondPassSectionSpotsParser |
5093 |
10 Sep 09 |
nicklas |
50 |
implements BaseFileSectionParser |
5093 |
10 Sep 09 |
nicklas |
51 |
{ |
5093 |
10 Sep 09 |
nicklas |
52 |
|
5093 |
10 Sep 09 |
nicklas |
53 |
private final DbControl dc; |
5093 |
10 Sep 09 |
nicklas |
54 |
private final BaseFileInfo info; |
5093 |
10 Sep 09 |
nicklas |
55 |
private final BioAssaySet child; |
5093 |
10 Sep 09 |
nicklas |
56 |
private final int linesToParse; |
5093 |
10 Sep 09 |
nicklas |
57 |
private final int progressReportInterval; |
5093 |
10 Sep 09 |
nicklas |
58 |
private int nextProgressReport; |
5093 |
10 Sep 09 |
nicklas |
59 |
private final int channels; |
5093 |
10 Sep 09 |
nicklas |
60 |
|
5093 |
10 Sep 09 |
nicklas |
61 |
private int sectionCount; |
5093 |
10 Sep 09 |
nicklas |
62 |
|
5093 |
10 Sep 09 |
nicklas |
63 |
/** |
5093 |
10 Sep 09 |
nicklas |
Creates a new 'section spot' parser for the first pass. This |
5093 |
10 Sep 09 |
nicklas |
parser will only extract header and reporter/position mapping |
5093 |
10 Sep 09 |
nicklas |
information. |
5093 |
10 Sep 09 |
nicklas |
67 |
|
5093 |
10 Sep 09 |
nicklas |
@param dc A DbControl to use for database access |
5093 |
10 Sep 09 |
nicklas |
@param info Information about the file we are parsing |
5093 |
10 Sep 09 |
nicklas |
@param child The new child bioassay set that we are importing data to |
5093 |
10 Sep 09 |
nicklas |
71 |
*/ |
5093 |
10 Sep 09 |
nicklas |
72 |
public SecondPassSectionSpotsParser(DbControl dc, BaseFileInfo info, |
5093 |
10 Sep 09 |
nicklas |
73 |
BioAssaySet child, int linesToParse) |
5093 |
10 Sep 09 |
nicklas |
74 |
{ |
5093 |
10 Sep 09 |
nicklas |
75 |
this.dc = dc; |
5093 |
10 Sep 09 |
nicklas |
76 |
this.info = info; |
5093 |
10 Sep 09 |
nicklas |
77 |
this.child = child; |
5093 |
10 Sep 09 |
nicklas |
78 |
this.linesToParse = linesToParse; |
5093 |
10 Sep 09 |
nicklas |
79 |
this.progressReportInterval = Math.max(linesToParse / 50, 50); |
5093 |
10 Sep 09 |
nicklas |
80 |
this.channels = child.getRawDataType().getChannels(); |
5093 |
10 Sep 09 |
nicklas |
81 |
this.nextProgressReport = progressReportInterval; |
5093 |
10 Sep 09 |
nicklas |
82 |
} |
5093 |
10 Sep 09 |
nicklas |
83 |
|
5093 |
10 Sep 09 |
nicklas |
84 |
/* |
5093 |
10 Sep 09 |
nicklas |
From the BaseFileSectionParser interface |
5093 |
10 Sep 09 |
nicklas |
86 |
---------------------------------------- |
5093 |
10 Sep 09 |
nicklas |
87 |
*/ |
6875 |
20 Apr 15 |
nicklas |
88 |
@SuppressWarnings({"unchecked", "rawtypes"}) |
5093 |
10 Sep 09 |
nicklas |
89 |
@Override |
5093 |
10 Sep 09 |
nicklas |
90 |
public void parseSection(BaseFileParser parser, FlatFileParser ffp) |
5093 |
10 Sep 09 |
nicklas |
91 |
throws IOException |
5093 |
10 Sep 09 |
nicklas |
92 |
{ |
5093 |
10 Sep 09 |
nicklas |
93 |
++sectionCount; |
5093 |
10 Sep 09 |
nicklas |
94 |
|
5093 |
10 Sep 09 |
nicklas |
95 |
ffp.parseHeaders(); |
5093 |
10 Sep 09 |
nicklas |
96 |
|
5093 |
10 Sep 09 |
nicklas |
// Get information about this spot section that was extracted |
5093 |
10 Sep 09 |
nicklas |
// in the first pass |
5093 |
10 Sep 09 |
nicklas |
99 |
SpotSectionInfo ssInfo = info.getSpotSectionInfo(sectionCount); |
5093 |
10 Sep 09 |
nicklas |
100 |
int positionIndex = ssInfo.getPositionIndex(); |
5093 |
10 Sep 09 |
nicklas |
101 |
int numAssayFields = ssInfo.getAssayFields().size(); |
5093 |
10 Sep 09 |
nicklas |
102 |
|
5093 |
10 Sep 09 |
nicklas |
// Get the data cube column numbers for each child assay |
5093 |
10 Sep 09 |
nicklas |
104 |
List<Integer> assays = ssInfo.getAssays(); |
5093 |
10 Sep 09 |
nicklas |
105 |
short[] dataCubeColumns = new short[assays.size()]; |
5093 |
10 Sep 09 |
nicklas |
106 |
for (int i = 0; i < assays.size(); ++i) |
5093 |
10 Sep 09 |
nicklas |
107 |
{ |
5093 |
10 Sep 09 |
nicklas |
108 |
BioAssay child = info.getChildAssay(assays.get(i)).getChild(); |
5093 |
10 Sep 09 |
nicklas |
109 |
dataCubeColumns[i] = child.getDataCubeColumnNo(); |
5093 |
10 Sep 09 |
nicklas |
110 |
} |
5093 |
10 Sep 09 |
nicklas |
111 |
|
5093 |
10 Sep 09 |
nicklas |
// Prepare the spot intensity parser and the spot batcher |
5093 |
10 Sep 09 |
nicklas |
113 |
SpotIntensityParser spiParser = ssInfo.getSpotIntensityParser(); |
5096 |
14 Sep 09 |
nicklas |
114 |
spiParser.beginSection(parser, ffp, ssInfo.getAssayFields()); |
5093 |
10 Sep 09 |
nicklas |
115 |
SpotBatcher spotBatcher = child.getSpotBatcher(); |
5093 |
10 Sep 09 |
nicklas |
116 |
float[] intensities = new float[channels]; |
5093 |
10 Sep 09 |
nicklas |
117 |
|
5093 |
10 Sep 09 |
nicklas |
// Prepare extra value parsers |
5093 |
10 Sep 09 |
nicklas |
119 |
ExtraFloatParser[] extraParsers = null; |
5093 |
10 Sep 09 |
nicklas |
120 |
SpotExtraValueBatcher<Float>[] extraBatchers = null; |
5093 |
10 Sep 09 |
nicklas |
121 |
if (ssInfo.getExtraFloatParsers() != null) |
5093 |
10 Sep 09 |
nicklas |
122 |
{ |
5093 |
10 Sep 09 |
nicklas |
123 |
List<ExtraFloatParser> efp = ssInfo.getExtraFloatParsers(); |
5093 |
10 Sep 09 |
nicklas |
124 |
extraParsers = new ExtraFloatParser[efp.size()]; |
5093 |
10 Sep 09 |
nicklas |
125 |
extraBatchers = new SpotExtraValueBatcher[efp.size()]; |
5093 |
10 Sep 09 |
nicklas |
126 |
Job job = child.getTransformation().getJob(); |
5093 |
10 Sep 09 |
nicklas |
127 |
for (int i = 0; i < efp.size(); ++i) |
5093 |
10 Sep 09 |
nicklas |
128 |
{ |
5093 |
10 Sep 09 |
nicklas |
129 |
ExtraFloatParser p = efp.get(i); |
5093 |
10 Sep 09 |
nicklas |
130 |
extraParsers[i] = p; |
5093 |
10 Sep 09 |
nicklas |
131 |
ExtraValueType type = info.getExtraFloat(p.getId()); |
5093 |
10 Sep 09 |
nicklas |
132 |
extraBatchers[i] = child.getSpotExtraValueBatcher(Float.class, type, job); |
5093 |
10 Sep 09 |
nicklas |
133 |
} |
5093 |
10 Sep 09 |
nicklas |
134 |
} |
5093 |
10 Sep 09 |
nicklas |
135 |
boolean hasExtraValues = extraParsers != null; |
5093 |
10 Sep 09 |
nicklas |
136 |
|
5093 |
10 Sep 09 |
nicklas |
// Parse data and check if each position has same reporter as before |
5093 |
10 Sep 09 |
nicklas |
138 |
FlatFileParser.Data data = null; |
5093 |
10 Sep 09 |
nicklas |
139 |
while ((data = ffp.nextData()) != null) |
5093 |
10 Sep 09 |
nicklas |
140 |
{ |
5405 |
10 Sep 10 |
nicklas |
141 |
ThreadSignalHandler.checkInterrupted(); |
5093 |
10 Sep 09 |
nicklas |
142 |
int parsedLines = ffp.getParsedLines(); |
5093 |
10 Sep 09 |
nicklas |
143 |
if (parsedLines >= nextProgressReport) |
5093 |
10 Sep 09 |
nicklas |
144 |
{ |
5093 |
10 Sep 09 |
nicklas |
145 |
nextProgressReport = parsedLines + progressReportInterval; |
5093 |
10 Sep 09 |
nicklas |
146 |
parser.setProgress(parsedLines, "Importing spot data (second pass): " + |
5093 |
10 Sep 09 |
nicklas |
147 |
parsedLines + " of " + linesToParse + " lines"); |
5093 |
10 Sep 09 |
nicklas |
148 |
} |
5093 |
10 Sep 09 |
nicklas |
149 |
|
5093 |
10 Sep 09 |
nicklas |
150 |
int offset = ssInfo.getFirstAssayFieldIndex(); |
7665 |
20 Mar 19 |
nicklas |
151 |
int position = data.getInt(positionIndex); |
5093 |
10 Sep 09 |
nicklas |
152 |
|
5093 |
10 Sep 09 |
nicklas |
153 |
for (short dataCubeColumn : dataCubeColumns) |
5093 |
10 Sep 09 |
nicklas |
154 |
{ |
5093 |
10 Sep 09 |
nicklas |
155 |
if (spiParser.setIntensities(data, intensities, offset)) |
5093 |
10 Sep 09 |
nicklas |
156 |
{ |
5093 |
10 Sep 09 |
nicklas |
157 |
spotBatcher.insert(dataCubeColumn, position, intensities); |
5093 |
10 Sep 09 |
nicklas |
158 |
if (hasExtraValues) |
5093 |
10 Sep 09 |
nicklas |
159 |
{ |
5093 |
10 Sep 09 |
nicklas |
160 |
for (int i = 0; i < extraParsers.length; ++i) |
5093 |
10 Sep 09 |
nicklas |
161 |
{ |
5093 |
10 Sep 09 |
nicklas |
162 |
float value = extraParsers[i].getValue(data, offset); |
5093 |
10 Sep 09 |
nicklas |
163 |
if (!Float.isNaN(value)) |
5093 |
10 Sep 09 |
nicklas |
164 |
{ |
5093 |
10 Sep 09 |
nicklas |
165 |
extraBatchers[i].insert(dataCubeColumn, position, value); |
5093 |
10 Sep 09 |
nicklas |
166 |
} |
5093 |
10 Sep 09 |
nicklas |
167 |
} |
5093 |
10 Sep 09 |
nicklas |
168 |
} |
5093 |
10 Sep 09 |
nicklas |
169 |
} |
5093 |
10 Sep 09 |
nicklas |
170 |
offset += numAssayFields; |
5093 |
10 Sep 09 |
nicklas |
171 |
} |
5093 |
10 Sep 09 |
nicklas |
172 |
} |
5093 |
10 Sep 09 |
nicklas |
173 |
} |
5093 |
10 Sep 09 |
nicklas |
174 |
// ---------------------------------------------------- |
5093 |
10 Sep 09 |
nicklas |
175 |
|
5093 |
10 Sep 09 |
nicklas |
176 |
|
5093 |
10 Sep 09 |
nicklas |
177 |
} |