5764 |
27 Sep 11 |
nicklas |
1 |
/** |
5764 |
27 Sep 11 |
nicklas |
$Id$ |
5764 |
27 Sep 11 |
nicklas |
3 |
|
5764 |
27 Sep 11 |
nicklas |
Copyright (C) 2011 Nicklas Nordborg |
5764 |
27 Sep 11 |
nicklas |
5 |
|
5764 |
27 Sep 11 |
nicklas |
This file is part of BASE - BioArray Software Environment. |
5764 |
27 Sep 11 |
nicklas |
Available at http://base.thep.lu.se/ |
5764 |
27 Sep 11 |
nicklas |
8 |
|
5764 |
27 Sep 11 |
nicklas |
BASE is free software; you can redistribute it and/or |
5764 |
27 Sep 11 |
nicklas |
modify it under the terms of the GNU General Public License |
5764 |
27 Sep 11 |
nicklas |
as published by the Free Software Foundation; either version 3 |
5764 |
27 Sep 11 |
nicklas |
of the License, or (at your option) any later version. |
5764 |
27 Sep 11 |
nicklas |
13 |
|
5764 |
27 Sep 11 |
nicklas |
BASE is distributed in the hope that it will be useful, |
5764 |
27 Sep 11 |
nicklas |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
5764 |
27 Sep 11 |
nicklas |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
5764 |
27 Sep 11 |
nicklas |
GNU General Public License for more details. |
5764 |
27 Sep 11 |
nicklas |
18 |
|
5764 |
27 Sep 11 |
nicklas |
You should have received a copy of the GNU General Public License |
5764 |
27 Sep 11 |
nicklas |
along with BASE. If not, see <http://www.gnu.org/licenses/>. |
5764 |
27 Sep 11 |
nicklas |
21 |
*/ |
5764 |
27 Sep 11 |
nicklas |
22 |
package net.sf.basedb.util.gtf; |
5764 |
27 Sep 11 |
nicklas |
23 |
|
5764 |
27 Sep 11 |
nicklas |
24 |
import java.io.BufferedReader; |
5764 |
27 Sep 11 |
nicklas |
25 |
import java.io.IOException; |
5764 |
27 Sep 11 |
nicklas |
26 |
import java.io.InputStream; |
5764 |
27 Sep 11 |
nicklas |
27 |
import java.io.InputStreamReader; |
5764 |
27 Sep 11 |
nicklas |
28 |
|
5764 |
27 Sep 11 |
nicklas |
29 |
import net.sf.basedb.core.ArrayDesign; |
5764 |
27 Sep 11 |
nicklas |
30 |
import net.sf.basedb.core.DataFileType; |
5764 |
27 Sep 11 |
nicklas |
31 |
import net.sf.basedb.core.DbControl; |
5764 |
27 Sep 11 |
nicklas |
32 |
import net.sf.basedb.core.File; |
5764 |
27 Sep 11 |
nicklas |
33 |
import net.sf.basedb.core.FileSetMember; |
5764 |
27 Sep 11 |
nicklas |
34 |
import net.sf.basedb.core.InvalidDataException; |
5764 |
27 Sep 11 |
nicklas |
35 |
import net.sf.basedb.core.InvalidRelationException; |
6880 |
21 Apr 15 |
nicklas |
36 |
import net.sf.basedb.util.FileUtil; |
5764 |
27 Sep 11 |
nicklas |
37 |
import net.sf.basedb.util.fileset.SingleFileValidationAction; |
5764 |
27 Sep 11 |
nicklas |
38 |
|
5764 |
27 Sep 11 |
nicklas |
39 |
/** |
5764 |
27 Sep 11 |
nicklas |
Action for validation of GTF files in a file set. The |
5764 |
27 Sep 11 |
nicklas |
GTF file must be connected with an {@link ArrayDesign}. |
5764 |
27 Sep 11 |
nicklas |
<p> |
5764 |
27 Sep 11 |
nicklas |
43 |
|
5764 |
27 Sep 11 |
nicklas |
The validation uses the {@link GtfInputStream}. The number of lines |
5764 |
27 Sep 11 |
nicklas |
to parse is configurable. If the entire file is parsed, it will |
5764 |
27 Sep 11 |
nicklas |
set the {@link ArrayDesign#getNumFileFeatures()} value to the number |
5764 |
27 Sep 11 |
nicklas |
of unique transcript_id values found in the file. |
5764 |
27 Sep 11 |
nicklas |
48 |
|
5764 |
27 Sep 11 |
nicklas |
@author Nicklas |
5764 |
27 Sep 11 |
nicklas |
@since 3.0 |
5764 |
27 Sep 11 |
nicklas |
@base.modified $Date$ |
5764 |
27 Sep 11 |
nicklas |
52 |
*/ |
5764 |
27 Sep 11 |
nicklas |
53 |
public class GtfValidationAction |
5764 |
27 Sep 11 |
nicklas |
54 |
extends SingleFileValidationAction |
5764 |
27 Sep 11 |
nicklas |
55 |
{ |
5764 |
27 Sep 11 |
nicklas |
56 |
|
5764 |
27 Sep 11 |
nicklas |
57 |
private final DbControl dc; |
5764 |
27 Sep 11 |
nicklas |
58 |
private final ArrayDesign design; |
5764 |
27 Sep 11 |
nicklas |
59 |
private final int linesToParse; |
5764 |
27 Sep 11 |
nicklas |
60 |
|
5764 |
27 Sep 11 |
nicklas |
61 |
/** |
5764 |
27 Sep 11 |
nicklas |
Create a new action. |
5764 |
27 Sep 11 |
nicklas |
@param dc An open DbControl |
5764 |
27 Sep 11 |
nicklas |
@param design The array design the GTF file is connected with |
5764 |
27 Sep 11 |
nicklas |
@param linesToParse The maximum number of lines to parse from the GTF file. |
5764 |
27 Sep 11 |
nicklas |
Use {@link Integer#MAX_VALUE} to ensure that the entire file is parsed. |
5764 |
27 Sep 11 |
nicklas |
The lowest value is 10 |
5764 |
27 Sep 11 |
nicklas |
68 |
*/ |
5764 |
27 Sep 11 |
nicklas |
69 |
public GtfValidationAction(DbControl dc, ArrayDesign design, int linesToParse) |
5764 |
27 Sep 11 |
nicklas |
70 |
{ |
5764 |
27 Sep 11 |
nicklas |
71 |
super(DataFileType.REF_SEQ_GTF); |
5764 |
27 Sep 11 |
nicklas |
72 |
this.dc = dc; |
5764 |
27 Sep 11 |
nicklas |
73 |
this.design = design; |
5764 |
27 Sep 11 |
nicklas |
74 |
this.linesToParse = linesToParse < 10 ? 10 : linesToParse; |
5764 |
27 Sep 11 |
nicklas |
75 |
} |
5764 |
27 Sep 11 |
nicklas |
76 |
|
5764 |
27 Sep 11 |
nicklas |
77 |
/* |
5764 |
27 Sep 11 |
nicklas |
From the ValidationAction interface |
5764 |
27 Sep 11 |
nicklas |
79 |
----------------------------------- |
5764 |
27 Sep 11 |
nicklas |
80 |
*/ |
5764 |
27 Sep 11 |
nicklas |
81 |
@Override |
5764 |
27 Sep 11 |
nicklas |
82 |
public void validateAndExtractMetadata() |
5764 |
27 Sep 11 |
nicklas |
83 |
throws InvalidDataException, InvalidRelationException |
5764 |
27 Sep 11 |
nicklas |
84 |
{ |
5764 |
27 Sep 11 |
nicklas |
85 |
FileSetMember gtfMember = getAcceptedFile(); |
5764 |
27 Sep 11 |
nicklas |
86 |
File gtf = gtfMember.getFile(); |
5764 |
27 Sep 11 |
nicklas |
87 |
|
6880 |
21 Apr 15 |
nicklas |
88 |
InputStream in = null; |
6880 |
21 Apr 15 |
nicklas |
89 |
BufferedReader reader = null; |
5764 |
27 Sep 11 |
nicklas |
90 |
String charset = gtf.getCharacterSet(); |
5764 |
27 Sep 11 |
nicklas |
91 |
if (charset == null) charset = "ISO-8859-1"; |
5764 |
27 Sep 11 |
nicklas |
92 |
try |
5764 |
27 Sep 11 |
nicklas |
93 |
{ |
6880 |
21 Apr 15 |
nicklas |
94 |
in = gtf.getDownloadStream(0); |
5764 |
27 Sep 11 |
nicklas |
95 |
GtfInputStream gtfStream = new GtfInputStream(in, charset, false); |
6880 |
21 Apr 15 |
nicklas |
96 |
reader = new BufferedReader(new InputStreamReader(gtfStream, charset)); |
5764 |
27 Sep 11 |
nicklas |
97 |
|
5764 |
27 Sep 11 |
nicklas |
// Read 10 lines |
5764 |
27 Sep 11 |
nicklas |
99 |
int lineNo = 0; |
5764 |
27 Sep 11 |
nicklas |
100 |
while (reader.readLine() != null) |
5764 |
27 Sep 11 |
nicklas |
101 |
{ |
5764 |
27 Sep 11 |
nicklas |
102 |
++lineNo; |
5764 |
27 Sep 11 |
nicklas |
103 |
if (lineNo == linesToParse) break; |
5764 |
27 Sep 11 |
nicklas |
104 |
} |
5764 |
27 Sep 11 |
nicklas |
105 |
|
5764 |
27 Sep 11 |
nicklas |
// If we reached the end, use the real number, 0 otherwise |
5764 |
27 Sep 11 |
nicklas |
107 |
design.setNumFileFeatures(lineNo == linesToParse ? 0 : gtfStream.getNumUniqueTranscriptIds()); |
5764 |
27 Sep 11 |
nicklas |
108 |
} |
5764 |
27 Sep 11 |
nicklas |
109 |
catch (IOException ex) |
5764 |
27 Sep 11 |
nicklas |
110 |
{ |
5764 |
27 Sep 11 |
nicklas |
111 |
throw new InvalidDataException(ex); |
5764 |
27 Sep 11 |
nicklas |
112 |
} |
6880 |
21 Apr 15 |
nicklas |
113 |
finally |
6880 |
21 Apr 15 |
nicklas |
114 |
{ |
6880 |
21 Apr 15 |
nicklas |
115 |
FileUtil.close(reader); |
6880 |
21 Apr 15 |
nicklas |
116 |
FileUtil.close(in); |
6880 |
21 Apr 15 |
nicklas |
117 |
} |
5764 |
27 Sep 11 |
nicklas |
118 |
} |
5764 |
27 Sep 11 |
nicklas |
119 |
|
5764 |
27 Sep 11 |
nicklas |
120 |
@Override |
5764 |
27 Sep 11 |
nicklas |
121 |
public void resetMetadata() |
5764 |
27 Sep 11 |
nicklas |
122 |
{ |
5764 |
27 Sep 11 |
nicklas |
123 |
design.setNumFileFeatures(0); |
5764 |
27 Sep 11 |
nicklas |
124 |
} |
5764 |
27 Sep 11 |
nicklas |
125 |
// ----------------------------------- |
5764 |
27 Sep 11 |
nicklas |
126 |
|
5764 |
27 Sep 11 |
nicklas |
127 |
} |