5093 |
10 Sep 09 |
nicklas |
1 |
/** |
5093 |
10 Sep 09 |
nicklas |
$Id$ |
5093 |
10 Sep 09 |
nicklas |
3 |
|
5093 |
10 Sep 09 |
nicklas |
Copyright (C) 2009 Nicklas Nordborg |
5093 |
10 Sep 09 |
nicklas |
5 |
|
5093 |
10 Sep 09 |
nicklas |
This file is part of BASE - BioArray Software Environment. |
5093 |
10 Sep 09 |
nicklas |
Available at http://base.thep.lu.se/ |
5093 |
10 Sep 09 |
nicklas |
8 |
|
5093 |
10 Sep 09 |
nicklas |
BASE is free software; you can redistribute it and/or |
5093 |
10 Sep 09 |
nicklas |
modify it under the terms of the GNU General Public License |
5093 |
10 Sep 09 |
nicklas |
as published by the Free Software Foundation; either version 3 |
5093 |
10 Sep 09 |
nicklas |
of the License, or (at your option) any later version. |
5093 |
10 Sep 09 |
nicklas |
13 |
|
5093 |
10 Sep 09 |
nicklas |
BASE is distributed in the hope that it will be useful, |
5093 |
10 Sep 09 |
nicklas |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
5093 |
10 Sep 09 |
nicklas |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
5093 |
10 Sep 09 |
nicklas |
GNU General Public License for more details. |
5093 |
10 Sep 09 |
nicklas |
18 |
|
5093 |
10 Sep 09 |
nicklas |
You should have received a copy of the GNU General Public License |
5093 |
10 Sep 09 |
nicklas |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
5093 |
10 Sep 09 |
nicklas |
21 |
*/ |
5093 |
10 Sep 09 |
nicklas |
22 |
package net.sf.basedb.util.importer.spotdata; |
5093 |
10 Sep 09 |
nicklas |
23 |
|
5093 |
10 Sep 09 |
nicklas |
24 |
import java.io.IOException; |
5093 |
10 Sep 09 |
nicklas |
25 |
import java.util.Arrays; |
5093 |
10 Sep 09 |
nicklas |
26 |
import java.util.HashSet; |
5093 |
10 Sep 09 |
nicklas |
27 |
import java.util.Iterator; |
5093 |
10 Sep 09 |
nicklas |
28 |
import java.util.List; |
5093 |
10 Sep 09 |
nicklas |
29 |
import java.util.Set; |
5093 |
10 Sep 09 |
nicklas |
30 |
|
5093 |
10 Sep 09 |
nicklas |
31 |
import net.sf.basedb.core.BaseException; |
5093 |
10 Sep 09 |
nicklas |
32 |
import net.sf.basedb.core.DbControl; |
5093 |
10 Sep 09 |
nicklas |
33 |
import net.sf.basedb.util.Values; |
5093 |
10 Sep 09 |
nicklas |
34 |
import net.sf.basedb.util.basefile.BaseFileParser; |
5093 |
10 Sep 09 |
nicklas |
35 |
import net.sf.basedb.util.basefile.BaseFileSectionParser; |
5093 |
10 Sep 09 |
nicklas |
36 |
import net.sf.basedb.util.importer.spotdata.BaseFileInfo.ChildBioAssay; |
5093 |
10 Sep 09 |
nicklas |
37 |
import net.sf.basedb.util.parser.FlatFileParser; |
5093 |
10 Sep 09 |
nicklas |
38 |
|
5093 |
10 Sep 09 |
nicklas |
39 |
/** |
5093 |
10 Sep 09 |
nicklas |
Parses the 'assays' section of a BASEfile with bioassay set spot |
5093 |
10 Sep 09 |
nicklas |
data. This section is optional, but if it exists, it must define |
5093 |
10 Sep 09 |
nicklas |
'id' and 'name' columns of child bioassays. The 'id' must be the |
5093 |
10 Sep 09 |
nicklas |
same as the id of a parent bioassay unless a 'parents' column is |
5093 |
10 Sep 09 |
nicklas |
present. One or more parents can be given for each child. Separate |
5093 |
10 Sep 09 |
nicklas |
the values with a /. |
5096 |
14 Sep 09 |
nicklas |
<p> |
5096 |
14 Sep 09 |
nicklas |
NOTE! The column names can be redefined by calling {@link |
5096 |
14 Sep 09 |
nicklas |
BaseFileParser#setRedefinedColumnName(String, String, String)} |
5096 |
14 Sep 09 |
nicklas |
49 |
|
5093 |
10 Sep 09 |
nicklas |
<pre class="code"> |
5093 |
10 Sep 09 |
nicklas |
section assays |
5093 |
10 Sep 09 |
nicklas |
columns id\tname\tparents |
5093 |
10 Sep 09 |
nicklas |
count 2 |
5093 |
10 Sep 09 |
nicklas |
54 |
% |
5093 |
10 Sep 09 |
nicklas |
100 The first child 1/2/3 |
5093 |
10 Sep 09 |
nicklas |
101 The second child 4/5/6 |
5093 |
10 Sep 09 |
nicklas |
</pre> |
5093 |
10 Sep 09 |
nicklas |
58 |
|
5093 |
10 Sep 09 |
nicklas |
@author Nicklas |
5093 |
10 Sep 09 |
nicklas |
@version 2.14 |
5093 |
10 Sep 09 |
nicklas |
@base.modified $Date$ |
5093 |
10 Sep 09 |
nicklas |
62 |
*/ |
5093 |
10 Sep 09 |
nicklas |
63 |
public class SectionAssaysParser |
5093 |
10 Sep 09 |
nicklas |
64 |
implements BaseFileSectionParser |
5093 |
10 Sep 09 |
nicklas |
65 |
{ |
5093 |
10 Sep 09 |
nicklas |
66 |
|
5093 |
10 Sep 09 |
nicklas |
67 |
private final DbControl dc; |
5093 |
10 Sep 09 |
nicklas |
68 |
private final BaseFileInfo info; |
5093 |
10 Sep 09 |
nicklas |
69 |
|
5093 |
10 Sep 09 |
nicklas |
70 |
|
5093 |
10 Sep 09 |
nicklas |
71 |
/** |
5093 |
10 Sep 09 |
nicklas |
Creates a new 'section assays' parser. |
5093 |
10 Sep 09 |
nicklas |
@param dc The DbControl to use for database access |
5093 |
10 Sep 09 |
nicklas |
@param info Information collector for the file we are parsing |
5093 |
10 Sep 09 |
nicklas |
75 |
*/ |
5093 |
10 Sep 09 |
nicklas |
76 |
public SectionAssaysParser(DbControl dc, BaseFileInfo info) |
5093 |
10 Sep 09 |
nicklas |
77 |
{ |
5093 |
10 Sep 09 |
nicklas |
78 |
this.dc = dc; |
5093 |
10 Sep 09 |
nicklas |
79 |
this.info = info; |
5093 |
10 Sep 09 |
nicklas |
80 |
} |
5093 |
10 Sep 09 |
nicklas |
81 |
|
5093 |
10 Sep 09 |
nicklas |
82 |
/* |
5093 |
10 Sep 09 |
nicklas |
From the BaseFileSectionParser interface |
5093 |
10 Sep 09 |
nicklas |
84 |
----------------------------------------- |
5093 |
10 Sep 09 |
nicklas |
85 |
*/ |
5093 |
10 Sep 09 |
nicklas |
86 |
@Override |
5093 |
10 Sep 09 |
nicklas |
87 |
public void parseSection(BaseFileParser parser, FlatFileParser ffp) |
5093 |
10 Sep 09 |
nicklas |
88 |
throws IOException |
5093 |
10 Sep 09 |
nicklas |
89 |
{ |
5093 |
10 Sep 09 |
nicklas |
90 |
String filename = info.getName(); |
5093 |
10 Sep 09 |
nicklas |
91 |
|
5093 |
10 Sep 09 |
nicklas |
// Parse the headers and verify that they meet the requirements |
5093 |
10 Sep 09 |
nicklas |
93 |
ffp.parseHeaders(); |
5093 |
10 Sep 09 |
nicklas |
94 |
int line = ffp.getParsedLines(); |
5093 |
10 Sep 09 |
nicklas |
95 |
List<String> columns = parser.getRequiredHeader(ffp, "columns", "\\t", "assays", filename); |
5096 |
14 Sep 09 |
nicklas |
96 |
int idIndex = parser.getRequiredIndex(columns, |
5096 |
14 Sep 09 |
nicklas |
97 |
parser.getRedefinedColumnName("assays", "id"), "columns", "assays", line, filename); |
5096 |
14 Sep 09 |
nicklas |
98 |
int nameIndex = parser.getRequiredIndex(columns, |
5096 |
14 Sep 09 |
nicklas |
99 |
parser.getRedefinedColumnName("assays", "name"), "columns", "assays", line, filename); |
5096 |
14 Sep 09 |
nicklas |
100 |
int parentIndex = columns.indexOf(parser.getRedefinedColumnName("assays", "parents")); |
5093 |
10 Sep 09 |
nicklas |
101 |
|
5093 |
10 Sep 09 |
nicklas |
// Store for future reference |
5093 |
10 Sep 09 |
nicklas |
103 |
boolean hasParentAssaysMapping = parentIndex != -1; |
5093 |
10 Sep 09 |
nicklas |
104 |
info.setAssaysHaveParentAssaysMapping(hasParentAssaysMapping); |
5093 |
10 Sep 09 |
nicklas |
105 |
|
5093 |
10 Sep 09 |
nicklas |
// Read data part of the 'assays' section |
5093 |
10 Sep 09 |
nicklas |
// Each line has ID, NAME and PARENTS (optional) |
5093 |
10 Sep 09 |
nicklas |
108 |
ffp.setMinDataColumns(columns.size()); |
5093 |
10 Sep 09 |
nicklas |
109 |
FlatFileParser.Data data; |
5093 |
10 Sep 09 |
nicklas |
110 |
while ((data = ffp.nextData()) != null) |
5093 |
10 Sep 09 |
nicklas |
111 |
{ |
5093 |
10 Sep 09 |
nicklas |
112 |
line = ffp.getParsedLines(); |
5093 |
10 Sep 09 |
nicklas |
113 |
|
7665 |
20 Mar 19 |
nicklas |
114 |
int assayId = data.getInt(idIndex); |
5093 |
10 Sep 09 |
nicklas |
115 |
ChildBioAssay child = new ChildBioAssay(); |
5093 |
10 Sep 09 |
nicklas |
116 |
child.setFileId(assayId); |
7665 |
20 Mar 19 |
nicklas |
117 |
child.setName(data.getString(nameIndex)); |
5093 |
10 Sep 09 |
nicklas |
118 |
if (hasParentAssaysMapping) |
5093 |
10 Sep 09 |
nicklas |
119 |
{ |
7665 |
20 Mar 19 |
nicklas |
120 |
Set<Integer> parents = new HashSet<Integer>(Arrays.asList(Values.getInt(data.getString(parentIndex).split("/")))); |
5093 |
10 Sep 09 |
nicklas |
121 |
Iterator<Integer> it = parents.iterator(); |
5093 |
10 Sep 09 |
nicklas |
122 |
while (it.hasNext()) |
5093 |
10 Sep 09 |
nicklas |
123 |
{ |
5093 |
10 Sep 09 |
nicklas |
// The given parent must already exist in the database |
5093 |
10 Sep 09 |
nicklas |
125 |
info.checkBioAssay(dc, it.next(), line); |
5093 |
10 Sep 09 |
nicklas |
126 |
} |
5093 |
10 Sep 09 |
nicklas |
127 |
child.setParents(parents); |
5093 |
10 Sep 09 |
nicklas |
128 |
} |
5093 |
10 Sep 09 |
nicklas |
129 |
else |
5093 |
10 Sep 09 |
nicklas |
130 |
{ |
5093 |
10 Sep 09 |
nicklas |
// If no explicit parent mapping is defined, the id of the child must |
5093 |
10 Sep 09 |
nicklas |
// reference the parent bioassay |
5093 |
10 Sep 09 |
nicklas |
133 |
info.checkBioAssay(dc, assayId, line); |
5093 |
10 Sep 09 |
nicklas |
134 |
} |
5093 |
10 Sep 09 |
nicklas |
135 |
|
5093 |
10 Sep 09 |
nicklas |
136 |
if (!info.addChildAssay(child)) |
5093 |
10 Sep 09 |
nicklas |
137 |
{ |
5093 |
10 Sep 09 |
nicklas |
138 |
throw new BaseException("Duplicate assay id '" + assayId + |
5093 |
10 Sep 09 |
nicklas |
139 |
"' found in section 'assays' at line " + line + |
5093 |
10 Sep 09 |
nicklas |
140 |
" in file '" + filename + "'"); |
5093 |
10 Sep 09 |
nicklas |
141 |
} |
5093 |
10 Sep 09 |
nicklas |
142 |
} |
5093 |
10 Sep 09 |
nicklas |
143 |
} |
5093 |
10 Sep 09 |
nicklas |
144 |
// ------------------------------------------- |
5093 |
10 Sep 09 |
nicklas |
145 |
|
5093 |
10 Sep 09 |
nicklas |
146 |
|
5093 |
10 Sep 09 |
nicklas |
147 |
} |