5093 |
10 Sep 09 |
nicklas |
1 |
/** |
5093 |
10 Sep 09 |
nicklas |
$Id$ |
5093 |
10 Sep 09 |
nicklas |
3 |
|
5093 |
10 Sep 09 |
nicklas |
Copyright (C) 2009 Nicklas Nordborg |
5093 |
10 Sep 09 |
nicklas |
5 |
|
5093 |
10 Sep 09 |
nicklas |
This file is part of BASE - BioArray Software Environment. |
5093 |
10 Sep 09 |
nicklas |
Available at http://base.thep.lu.se/ |
5093 |
10 Sep 09 |
nicklas |
8 |
|
5093 |
10 Sep 09 |
nicklas |
BASE is free software; you can redistribute it and/or |
5093 |
10 Sep 09 |
nicklas |
modify it under the terms of the GNU General Public License |
5093 |
10 Sep 09 |
nicklas |
as published by the Free Software Foundation; either version 3 |
5093 |
10 Sep 09 |
nicklas |
of the License, or (at your option) any later version. |
5093 |
10 Sep 09 |
nicklas |
13 |
|
5093 |
10 Sep 09 |
nicklas |
BASE is distributed in the hope that it will be useful, |
5093 |
10 Sep 09 |
nicklas |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
5093 |
10 Sep 09 |
nicklas |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
5093 |
10 Sep 09 |
nicklas |
GNU General Public License for more details. |
5093 |
10 Sep 09 |
nicklas |
18 |
|
5093 |
10 Sep 09 |
nicklas |
You should have received a copy of the GNU General Public License |
5093 |
10 Sep 09 |
nicklas |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
5093 |
10 Sep 09 |
nicklas |
21 |
*/ |
5093 |
10 Sep 09 |
nicklas |
22 |
package net.sf.basedb.util.importer.spotdata; |
5093 |
10 Sep 09 |
nicklas |
23 |
|
5093 |
10 Sep 09 |
nicklas |
24 |
import java.io.IOException; |
5093 |
10 Sep 09 |
nicklas |
25 |
import java.util.List; |
5093 |
10 Sep 09 |
nicklas |
26 |
|
5093 |
10 Sep 09 |
nicklas |
27 |
import net.sf.basedb.core.AnyToAny; |
5093 |
10 Sep 09 |
nicklas |
28 |
import net.sf.basedb.core.DbControl; |
5093 |
10 Sep 09 |
nicklas |
29 |
import net.sf.basedb.core.ItemNotFoundException; |
5093 |
10 Sep 09 |
nicklas |
30 |
import net.sf.basedb.core.ReporterBatcher; |
5093 |
10 Sep 09 |
nicklas |
31 |
import net.sf.basedb.core.ReporterList; |
5093 |
10 Sep 09 |
nicklas |
32 |
import net.sf.basedb.core.Transformation; |
5093 |
10 Sep 09 |
nicklas |
33 |
import net.sf.basedb.core.data.ReporterData; |
5405 |
10 Sep 10 |
nicklas |
34 |
import net.sf.basedb.core.signal.ThreadSignalHandler; |
5093 |
10 Sep 09 |
nicklas |
35 |
import net.sf.basedb.util.Values; |
5093 |
10 Sep 09 |
nicklas |
36 |
import net.sf.basedb.util.basefile.BaseFileParser; |
5093 |
10 Sep 09 |
nicklas |
37 |
import net.sf.basedb.util.basefile.BaseFileSectionParser; |
5093 |
10 Sep 09 |
nicklas |
38 |
import net.sf.basedb.util.parser.FlatFileParser; |
5093 |
10 Sep 09 |
nicklas |
39 |
|
5093 |
10 Sep 09 |
nicklas |
40 |
/** |
5093 |
10 Sep 09 |
nicklas |
Parses the 'reporterlist' section of a BASEfile. This section |
5093 |
10 Sep 09 |
nicklas |
is optional. If it exists it must 'reporterId' and 'score' |
5093 |
10 Sep 09 |
nicklas |
data columns in the 'columns' header. The name of the new reporter |
5093 |
10 Sep 09 |
nicklas |
list can be set by the 'name' header. If this doesn't exists a |
5093 |
10 Sep 09 |
nicklas |
name is genarated automatically. |
5096 |
14 Sep 09 |
nicklas |
<p> |
5096 |
14 Sep 09 |
nicklas |
NOTE! The column names can be redefined by calling |
5096 |
14 Sep 09 |
nicklas |
{@link BaseFileParser#setRedefinedColumnName(String, String, String)} |
5093 |
10 Sep 09 |
nicklas |
49 |
|
5093 |
10 Sep 09 |
nicklas |
@author Nicklas |
5093 |
10 Sep 09 |
nicklas |
@version 2.14 |
5093 |
10 Sep 09 |
nicklas |
@base.modified $Date$ |
5093 |
10 Sep 09 |
nicklas |
53 |
*/ |
5093 |
10 Sep 09 |
nicklas |
54 |
public class SectionReporterListParser |
5093 |
10 Sep 09 |
nicklas |
55 |
implements BaseFileSectionParser |
5093 |
10 Sep 09 |
nicklas |
56 |
{ |
5093 |
10 Sep 09 |
nicklas |
57 |
|
5093 |
10 Sep 09 |
nicklas |
58 |
private final DbControl dc; |
5093 |
10 Sep 09 |
nicklas |
59 |
private final BaseFileInfo info; |
5093 |
10 Sep 09 |
nicklas |
60 |
private final Transformation transformation; |
5093 |
10 Sep 09 |
nicklas |
61 |
|
5093 |
10 Sep 09 |
nicklas |
62 |
private int sectionCount; |
5093 |
10 Sep 09 |
nicklas |
63 |
private final String totalBytes; |
5093 |
10 Sep 09 |
nicklas |
64 |
private final long progressReportInterval; |
5093 |
10 Sep 09 |
nicklas |
65 |
private long nextProgressReport; |
5093 |
10 Sep 09 |
nicklas |
66 |
|
5093 |
10 Sep 09 |
nicklas |
67 |
public SectionReporterListParser(DbControl dc, BaseFileInfo info, Transformation transformation) |
5093 |
10 Sep 09 |
nicklas |
68 |
{ |
5093 |
10 Sep 09 |
nicklas |
69 |
this.dc = dc; |
5093 |
10 Sep 09 |
nicklas |
70 |
this.info = info; |
5093 |
10 Sep 09 |
nicklas |
71 |
this.transformation = transformation; |
5093 |
10 Sep 09 |
nicklas |
72 |
this.progressReportInterval = info.getSize() / 100; |
5093 |
10 Sep 09 |
nicklas |
73 |
this.nextProgressReport = progressReportInterval; |
5093 |
10 Sep 09 |
nicklas |
74 |
this.totalBytes = Values.formatBytes(info.getSize()); |
5093 |
10 Sep 09 |
nicklas |
75 |
} |
5093 |
10 Sep 09 |
nicklas |
76 |
|
5093 |
10 Sep 09 |
nicklas |
77 |
/* |
5093 |
10 Sep 09 |
nicklas |
From the BaseFileSectionParser interface |
5093 |
10 Sep 09 |
nicklas |
79 |
----------------------------------------- |
5093 |
10 Sep 09 |
nicklas |
80 |
*/ |
5093 |
10 Sep 09 |
nicklas |
81 |
@Override |
5093 |
10 Sep 09 |
nicklas |
82 |
public void parseSection(BaseFileParser parser, FlatFileParser ffp) |
5093 |
10 Sep 09 |
nicklas |
83 |
throws IOException |
5093 |
10 Sep 09 |
nicklas |
84 |
{ |
5093 |
10 Sep 09 |
nicklas |
85 |
++sectionCount; |
5093 |
10 Sep 09 |
nicklas |
86 |
String filename = info.getName(); |
5093 |
10 Sep 09 |
nicklas |
87 |
|
5093 |
10 Sep 09 |
nicklas |
// Parse the headers and verify that they meet the requirements |
5093 |
10 Sep 09 |
nicklas |
89 |
ffp.parseHeaders(); |
5093 |
10 Sep 09 |
nicklas |
90 |
int line = ffp.getParsedLines(); |
5093 |
10 Sep 09 |
nicklas |
91 |
List<String> columns = parser.getRequiredHeader(ffp, "columns", "\\t", "reporterlist", filename); |
5096 |
14 Sep 09 |
nicklas |
92 |
int reporterIndex = parser.getRequiredIndex(columns, |
5096 |
14 Sep 09 |
nicklas |
93 |
parser.getRedefinedColumnName("reporterlist", "reporterId"), "columns", "reporterlist", line, filename); |
5096 |
14 Sep 09 |
nicklas |
94 |
int scoreIndex = columns.indexOf(parser.getRedefinedColumnName("reporterlist", "score")); |
5093 |
10 Sep 09 |
nicklas |
95 |
|
5093 |
10 Sep 09 |
nicklas |
96 |
String name = Values.getStringOrNull(ffp.getHeader("name")); |
5093 |
10 Sep 09 |
nicklas |
97 |
if (name == null) name = transformation.getName() + "." + sectionCount; |
5093 |
10 Sep 09 |
nicklas |
98 |
|
5093 |
10 Sep 09 |
nicklas |
99 |
ReporterList list = ReporterList.getNew(dc); |
5093 |
10 Sep 09 |
nicklas |
100 |
list.setName(name); |
5093 |
10 Sep 09 |
nicklas |
101 |
list.setExperiment(transformation.getExperiment()); |
5093 |
10 Sep 09 |
nicklas |
102 |
|
5093 |
10 Sep 09 |
nicklas |
103 |
FlatFileParser.Data data; |
5093 |
10 Sep 09 |
nicklas |
104 |
ReporterBatcher reporters = ReporterBatcher.getNew(dc); |
5093 |
10 Sep 09 |
nicklas |
105 |
|
5093 |
10 Sep 09 |
nicklas |
106 |
while ((data = ffp.nextData()) != null) |
5093 |
10 Sep 09 |
nicklas |
107 |
{ |
5405 |
10 Sep 10 |
nicklas |
108 |
ThreadSignalHandler.checkInterrupted(); |
5093 |
10 Sep 09 |
nicklas |
109 |
long parsedBytes = ffp.getParsedBytes(); |
5093 |
10 Sep 09 |
nicklas |
110 |
if (parsedBytes >= nextProgressReport) |
5093 |
10 Sep 09 |
nicklas |
111 |
{ |
5093 |
10 Sep 09 |
nicklas |
112 |
nextProgressReport = parsedBytes + progressReportInterval; |
5093 |
10 Sep 09 |
nicklas |
113 |
parser.setProgress(parsedBytes, "Importing reporter list '" + name + "': " + |
5093 |
10 Sep 09 |
nicklas |
114 |
Values.formatBytes(parsedBytes) + " of " + totalBytes); |
5093 |
10 Sep 09 |
nicklas |
115 |
} |
5093 |
10 Sep 09 |
nicklas |
116 |
|
7665 |
20 Mar 19 |
nicklas |
117 |
String reporterId = Values.getStringOrNull(data.getString(reporterIndex)); |
5093 |
10 Sep 09 |
nicklas |
// Ignore missing reporters |
5093 |
10 Sep 09 |
nicklas |
119 |
if (reporterId == null) continue; |
5093 |
10 Sep 09 |
nicklas |
120 |
|
5093 |
10 Sep 09 |
nicklas |
121 |
ReporterData reporter = reporters.getByExternalId(reporterId, false); |
5093 |
10 Sep 09 |
nicklas |
122 |
if (reporter == null) |
5093 |
10 Sep 09 |
nicklas |
123 |
{ |
5093 |
10 Sep 09 |
nicklas |
124 |
throw new ItemNotFoundException("Reporter[externalId=" + reporterId + |
5093 |
10 Sep 09 |
nicklas |
125 |
"] in section 'reporterlist' at line " + |
5093 |
10 Sep 09 |
nicklas |
126 |
ffp.getParsedLines() + " in file '" +filename + "'"); |
5093 |
10 Sep 09 |
nicklas |
127 |
} |
5093 |
10 Sep 09 |
nicklas |
// If no score is provided we use the line number as some kind of 'rank' |
5093 |
10 Sep 09 |
nicklas |
129 |
Float score = scoreIndex == -1 ? |
7665 |
20 Mar 19 |
nicklas |
130 |
data.dataLineNo() : data.getFloat(scoreIndex); |
5093 |
10 Sep 09 |
nicklas |
131 |
list.addReporter(reporter, score); |
5093 |
10 Sep 09 |
nicklas |
132 |
} |
5093 |
10 Sep 09 |
nicklas |
133 |
|
5093 |
10 Sep 09 |
nicklas |
134 |
dc.saveItem(list); |
5093 |
10 Sep 09 |
nicklas |
135 |
AnyToAny any2Any = AnyToAny.getNew(dc, transformation, list, "reporterList." + sectionCount, false); |
5093 |
10 Sep 09 |
nicklas |
136 |
dc.saveItem(any2Any); |
5093 |
10 Sep 09 |
nicklas |
137 |
} |
5093 |
10 Sep 09 |
nicklas |
138 |
// ----------------------------------------- |
5093 |
10 Sep 09 |
nicklas |
139 |
} |