5623 |
06 May 11 |
nicklas |
1 |
/** |
5623 |
06 May 11 |
nicklas |
$Id$ |
5623 |
06 May 11 |
nicklas |
3 |
|
5623 |
06 May 11 |
nicklas |
Copyright (C) 2011 Nicklas Nordborg |
5623 |
06 May 11 |
nicklas |
5 |
|
5623 |
06 May 11 |
nicklas |
This file is part of BASE - BioArray Software Environment. |
5623 |
06 May 11 |
nicklas |
Available at http://base.thep.lu.se/ |
5623 |
06 May 11 |
nicklas |
8 |
|
5623 |
06 May 11 |
nicklas |
BASE is free software; you can redistribute it and/or |
5623 |
06 May 11 |
nicklas |
modify it under the terms of the GNU General Public License |
5623 |
06 May 11 |
nicklas |
as published by the Free Software Foundation; either version 3 |
5623 |
06 May 11 |
nicklas |
of the License, or (at your option) any later version. |
5623 |
06 May 11 |
nicklas |
13 |
|
5623 |
06 May 11 |
nicklas |
BASE is distributed in the hope that it will be useful, |
5623 |
06 May 11 |
nicklas |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
5623 |
06 May 11 |
nicklas |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
5623 |
06 May 11 |
nicklas |
GNU General Public License for more details. |
5623 |
06 May 11 |
nicklas |
18 |
|
5623 |
06 May 11 |
nicklas |
You should have received a copy of the GNU General Public License |
5623 |
06 May 11 |
nicklas |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
5623 |
06 May 11 |
nicklas |
21 |
*/ |
5623 |
06 May 11 |
nicklas |
22 |
package net.sf.basedb.util.affymetrix; |
5623 |
06 May 11 |
nicklas |
23 |
|
5623 |
06 May 11 |
nicklas |
24 |
import affymetrix.fusion.cdf.FusionCDFData; |
5623 |
06 May 11 |
nicklas |
25 |
import net.sf.basedb.core.ArrayDesign; |
5623 |
06 May 11 |
nicklas |
26 |
import net.sf.basedb.core.BaseException; |
5623 |
06 May 11 |
nicklas |
27 |
import net.sf.basedb.core.File; |
5623 |
06 May 11 |
nicklas |
28 |
import net.sf.basedb.core.InvalidDataException; |
5623 |
06 May 11 |
nicklas |
29 |
import net.sf.basedb.core.InvalidUseOfNullException; |
5623 |
06 May 11 |
nicklas |
30 |
import net.sf.basedb.core.ItemNotFoundException; |
5623 |
06 May 11 |
nicklas |
31 |
import net.sf.basedb.core.ProgressReporter; |
5623 |
06 May 11 |
nicklas |
32 |
import net.sf.basedb.core.Reporter; |
5623 |
06 May 11 |
nicklas |
33 |
import net.sf.basedb.core.ReporterBatcher; |
5623 |
06 May 11 |
nicklas |
34 |
import net.sf.basedb.core.ReporterType; |
5623 |
06 May 11 |
nicklas |
35 |
import net.sf.basedb.core.data.ReporterData; |
5623 |
06 May 11 |
nicklas |
36 |
import net.sf.basedb.core.signal.ThreadSignalHandler; |
5623 |
06 May 11 |
nicklas |
37 |
|
5623 |
06 May 11 |
nicklas |
38 |
/** |
5623 |
06 May 11 |
nicklas |
Helper class for working with Affymetrix CDF files. |
5623 |
06 May 11 |
nicklas |
40 |
|
5623 |
06 May 11 |
nicklas |
@author Nicklas |
5623 |
06 May 11 |
nicklas |
@since 3.0 |
5623 |
06 May 11 |
nicklas |
@base.modified $Date$ |
5623 |
06 May 11 |
nicklas |
44 |
*/ |
5623 |
06 May 11 |
nicklas |
45 |
public class CdfValidator |
5623 |
06 May 11 |
nicklas |
46 |
{ |
5623 |
06 May 11 |
nicklas |
47 |
|
5623 |
06 May 11 |
nicklas |
48 |
/** |
5623 |
06 May 11 |
nicklas |
Create a new instance. |
5623 |
06 May 11 |
nicklas |
50 |
*/ |
5623 |
06 May 11 |
nicklas |
51 |
public CdfValidator() |
5623 |
06 May 11 |
nicklas |
52 |
{} |
5623 |
06 May 11 |
nicklas |
53 |
|
5623 |
06 May 11 |
nicklas |
54 |
/** |
5623 |
06 May 11 |
nicklas |
Load a CDF file using the Affymetric Fusion SDK. This method |
5623 |
06 May 11 |
nicklas |
checks that the file exists, and read all headers. |
5623 |
06 May 11 |
nicklas |
57 |
|
5623 |
06 May 11 |
nicklas |
@param cdfFile The file to load |
5623 |
06 May 11 |
nicklas |
@return A FusionCDFData object representing the CDF file |
5623 |
06 May 11 |
nicklas |
@throws ItemNotFoundException If the actual file is not on the server |
5623 |
06 May 11 |
nicklas |
@throws InvalidDataException If the file is not a CDF file |
5623 |
06 May 11 |
nicklas |
62 |
*/ |
5623 |
06 May 11 |
nicklas |
63 |
public FusionCDFData loadCdfFile(File cdfFile) |
5623 |
06 May 11 |
nicklas |
64 |
throws ItemNotFoundException, InvalidDataException |
5623 |
06 May 11 |
nicklas |
65 |
{ |
5623 |
06 May 11 |
nicklas |
66 |
if (cdfFile == null) throw new InvalidUseOfNullException("cdfFile"); |
5623 |
06 May 11 |
nicklas |
67 |
if (!cdfFile.getLocation().isDownloadable()) |
5623 |
06 May 11 |
nicklas |
68 |
{ |
5623 |
06 May 11 |
nicklas |
69 |
throw new ItemNotFoundException("Can't download file '" + |
5623 |
06 May 11 |
nicklas |
70 |
cdfFile.getName() + "'; location=" + cdfFile.getLocation()); |
5623 |
06 May 11 |
nicklas |
71 |
} |
5623 |
06 May 11 |
nicklas |
72 |
FusionCDFData cdf = new FusionCDFData(); |
5623 |
06 May 11 |
nicklas |
73 |
cdf.setFileName(cdfFile.getName()); |
5623 |
06 May 11 |
nicklas |
74 |
cdf.setInputStream(cdfFile.getDownloadStream(0)); |
5623 |
06 May 11 |
nicklas |
75 |
if (!cdf.readHeader()) |
5623 |
06 May 11 |
nicklas |
76 |
{ |
5623 |
06 May 11 |
nicklas |
77 |
throw new InvalidDataException("Could not read CDF file '" + |
5623 |
06 May 11 |
nicklas |
78 |
cdfFile.getPath()+"': " + cdf.getError()); |
5623 |
06 May 11 |
nicklas |
79 |
} |
5623 |
06 May 11 |
nicklas |
80 |
return cdf; |
5623 |
06 May 11 |
nicklas |
81 |
} |
5623 |
06 May 11 |
nicklas |
82 |
|
5623 |
06 May 11 |
nicklas |
83 |
/** |
5623 |
06 May 11 |
nicklas |
Copy metadata from the CDF file to the array design. |
5623 |
06 May 11 |
nicklas |
This sets the number of features to the number of |
5623 |
06 May 11 |
nicklas |
probesets. |
5623 |
06 May 11 |
nicklas |
87 |
|
5623 |
06 May 11 |
nicklas |
@param cdf The CDF file |
5623 |
06 May 11 |
nicklas |
@param design The array design |
5623 |
06 May 11 |
nicklas |
90 |
*/ |
5623 |
06 May 11 |
nicklas |
91 |
public void copyMetadata(FusionCDFData cdf, ArrayDesign design) |
5623 |
06 May 11 |
nicklas |
92 |
{ |
5623 |
06 May 11 |
nicklas |
93 |
design.setNumFileFeatures(cdf.getHeader().getNumProbeSets()); |
5623 |
06 May 11 |
nicklas |
94 |
} |
5623 |
06 May 11 |
nicklas |
95 |
|
5623 |
06 May 11 |
nicklas |
96 |
/** |
5623 |
06 May 11 |
nicklas |
Reset metadata on the given array design. This set the |
5623 |
06 May 11 |
nicklas |
number of features to 0. |
5623 |
06 May 11 |
nicklas |
@param design The array design |
5623 |
06 May 11 |
nicklas |
100 |
*/ |
5623 |
06 May 11 |
nicklas |
101 |
public void resetMetadata(ArrayDesign design) |
5623 |
06 May 11 |
nicklas |
102 |
{ |
5623 |
06 May 11 |
nicklas |
103 |
design.setNumFileFeatures(0); |
5623 |
06 May 11 |
nicklas |
104 |
} |
5623 |
06 May 11 |
nicklas |
105 |
|
5623 |
06 May 11 |
nicklas |
106 |
/** |
5623 |
06 May 11 |
nicklas |
Verify if probesets in a CDF file exists as reporters and |
5623 |
06 May 11 |
nicklas |
optionally create those that doesn't exists. This method will clear |
5623 |
06 May 11 |
nicklas |
the CDF file, read it, and then clear it again. The calling code is |
5623 |
06 May 11 |
nicklas |
responsible for committing the transaction. |
5623 |
06 May 11 |
nicklas |
111 |
|
5623 |
06 May 11 |
nicklas |
@param cdf The CDF file |
5623 |
06 May 11 |
nicklas |
@param batcher A reporter batcher used to check and insert reporters |
5623 |
06 May 11 |
nicklas |
@param create TRUE to create non-existing reporters, FALSE to throw |
5623 |
06 May 11 |
nicklas |
an ItemNotFoundException |
5623 |
06 May 11 |
nicklas |
@param type The reporter type to assign to new reporters (if create is true) |
5623 |
06 May 11 |
nicklas |
@param progress An optional progress reporter |
5623 |
06 May 11 |
nicklas |
@return The number of created reporter |
5623 |
06 May 11 |
nicklas |
@throws ItemNotFoundException If create is FALSE and a reporter isn't found |
5623 |
06 May 11 |
nicklas |
@throws BaseException If there is another error |
5623 |
06 May 11 |
nicklas |
@since 2.4 |
5623 |
06 May 11 |
nicklas |
122 |
*/ |
5623 |
06 May 11 |
nicklas |
123 |
public int checkIfReportersExists(FusionCDFData cdf, ReporterBatcher batcher, |
5623 |
06 May 11 |
nicklas |
124 |
boolean create, ReporterType type, ProgressReporter progress) |
5623 |
06 May 11 |
nicklas |
125 |
throws ItemNotFoundException, BaseException |
5623 |
06 May 11 |
nicklas |
126 |
{ |
5623 |
06 May 11 |
nicklas |
127 |
if (batcher == null) throw new InvalidUseOfNullException("batcher"); |
5623 |
06 May 11 |
nicklas |
128 |
if (cdf == null) throw new InvalidUseOfNullException("cdf"); |
5623 |
06 May 11 |
nicklas |
129 |
|
5623 |
06 May 11 |
nicklas |
130 |
final boolean hasType = type != null; |
5623 |
06 May 11 |
nicklas |
131 |
final boolean hasProgress = progress != null; |
5623 |
06 May 11 |
nicklas |
132 |
int numCreated = 0; |
5623 |
06 May 11 |
nicklas |
133 |
|
5623 |
06 May 11 |
nicklas |
134 |
if (hasProgress) progress.display(5, "Reading CDF file..."); |
5623 |
06 May 11 |
nicklas |
135 |
if (!cdf.read()) |
5623 |
06 May 11 |
nicklas |
136 |
{ |
5623 |
06 May 11 |
nicklas |
137 |
throw new BaseException("Error reading CDF file: " + cdf.getError()); |
5623 |
06 May 11 |
nicklas |
138 |
} |
5623 |
06 May 11 |
nicklas |
139 |
int numProbesets = cdf.getHeader().getNumProbeSets(); |
5623 |
06 May 11 |
nicklas |
140 |
int progressInterval = numProbesets / 50; |
5623 |
06 May 11 |
nicklas |
141 |
if (progressInterval < 10) progressInterval = 10; |
5623 |
06 May 11 |
nicklas |
142 |
|
5623 |
06 May 11 |
nicklas |
143 |
int index = 0; |
5623 |
06 May 11 |
nicklas |
144 |
while (index < numProbesets) |
5623 |
06 May 11 |
nicklas |
145 |
{ |
5623 |
06 May 11 |
nicklas |
146 |
ThreadSignalHandler.checkInterrupted(); |
5623 |
06 May 11 |
nicklas |
147 |
String probesetId = cdf.getProbeSetName(index); |
5623 |
06 May 11 |
nicklas |
148 |
if (!batcher.exists(probesetId, true, true)) |
5623 |
06 May 11 |
nicklas |
149 |
{ |
5623 |
06 May 11 |
nicklas |
150 |
if (create) |
5623 |
06 May 11 |
nicklas |
151 |
{ |
5623 |
06 May 11 |
nicklas |
152 |
ReporterData reporter = Reporter.getNew(probesetId); |
5623 |
06 May 11 |
nicklas |
153 |
if (hasType) Reporter.setReporterType(reporter, type); |
5623 |
06 May 11 |
nicklas |
154 |
reporter.setName(probesetId); |
5623 |
06 May 11 |
nicklas |
155 |
batcher.insert(reporter); |
5623 |
06 May 11 |
nicklas |
156 |
++numCreated; |
5623 |
06 May 11 |
nicklas |
157 |
} |
5623 |
06 May 11 |
nicklas |
158 |
else |
5623 |
06 May 11 |
nicklas |
159 |
{ |
5623 |
06 May 11 |
nicklas |
160 |
throw new ItemNotFoundException("Reporter[externalId=" + probesetId+"]"); |
5623 |
06 May 11 |
nicklas |
161 |
} |
5623 |
06 May 11 |
nicklas |
162 |
} |
5623 |
06 May 11 |
nicklas |
163 |
++index; |
5623 |
06 May 11 |
nicklas |
164 |
if (hasProgress && (index % progressInterval == 0)) |
5623 |
06 May 11 |
nicklas |
165 |
{ |
5623 |
06 May 11 |
nicklas |
166 |
int percent = 5 + (int)(90L * (long)index / numProbesets); |
5623 |
06 May 11 |
nicklas |
167 |
int remain = numProbesets - index; |
5623 |
06 May 11 |
nicklas |
168 |
String created = create ? "; " + numCreated + " created" : ""; |
5623 |
06 May 11 |
nicklas |
169 |
String msg = "Checking probsets (" + index + " done" + created + "; " + remain + " remaining)..."; |
5623 |
06 May 11 |
nicklas |
170 |
progress.display(percent, msg); |
5623 |
06 May 11 |
nicklas |
171 |
} |
5623 |
06 May 11 |
nicklas |
172 |
} |
5623 |
06 May 11 |
nicklas |
173 |
batcher.flush(); |
5623 |
06 May 11 |
nicklas |
174 |
cdf.clear(); |
5623 |
06 May 11 |
nicklas |
175 |
if (hasProgress) progress.display(100, "Checked: " + numProbesets + "; created: " + numCreated); |
5623 |
06 May 11 |
nicklas |
176 |
return numCreated; |
5623 |
06 May 11 |
nicklas |
177 |
} |
5623 |
06 May 11 |
nicklas |
178 |
|
5623 |
06 May 11 |
nicklas |
179 |
} |