779 |
05 Mar 07 |
peter |
// $Id$ |
779 |
05 Mar 07 |
peter |
2 |
|
779 |
05 Mar 07 |
peter |
3 |
/* |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2007 Peter Johansson |
4359 |
23 Aug 23 |
peter |
Copyright (C) 2008 Jari Häkkinen, Peter Johansson |
3439 |
20 Nov 15 |
peter |
Copyright (C) 2011, 2012, 2013, 2015 Peter Johansson |
779 |
05 Mar 07 |
peter |
7 |
|
1437 |
25 Aug 08 |
peter |
This file is part of the yat library, http://dev.thep.lu.se/yat |
779 |
05 Mar 07 |
peter |
9 |
|
779 |
05 Mar 07 |
peter |
The yat library is free software; you can redistribute it and/or |
779 |
05 Mar 07 |
peter |
modify it under the terms of the GNU General Public License as |
1486 |
09 Sep 08 |
jari |
published by the Free Software Foundation; either version 3 of the |
779 |
05 Mar 07 |
peter |
License, or (at your option) any later version. |
779 |
05 Mar 07 |
peter |
14 |
|
779 |
05 Mar 07 |
peter |
The yat library is distributed in the hope that it will be useful, |
779 |
05 Mar 07 |
peter |
but WITHOUT ANY WARRANTY; without even the implied warranty of |
779 |
05 Mar 07 |
peter |
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
779 |
05 Mar 07 |
peter |
General Public License for more details. |
779 |
05 Mar 07 |
peter |
19 |
|
779 |
05 Mar 07 |
peter |
You should have received a copy of the GNU General Public License |
1487 |
10 Sep 08 |
jari |
along with yat. If not, see <http://www.gnu.org/licenses/>. |
779 |
05 Mar 07 |
peter |
22 |
*/ |
779 |
05 Mar 07 |
peter |
23 |
|
2881 |
18 Nov 12 |
peter |
24 |
#include <config.h> |
2881 |
18 Nov 12 |
peter |
25 |
|
1246 |
17 Mar 08 |
peter |
26 |
#include "Suite.h" |
1246 |
17 Mar 08 |
peter |
27 |
|
822 |
18 Mar 07 |
peter |
28 |
#include "yat/classifier/DataLookupWeighted1D.h" |
2708 |
14 Mar 12 |
peter |
29 |
#include "yat/random/random.h" |
779 |
05 Mar 07 |
peter |
30 |
#include "yat/classifier/Target.h" |
2708 |
14 Mar 12 |
peter |
31 |
#include "yat/statistics/Averager.h" |
2556 |
20 Aug 11 |
peter |
32 |
#include "yat/statistics/Fisher.h" |
779 |
05 Mar 07 |
peter |
33 |
#include "yat/statistics/ROC.h" |
822 |
18 Mar 07 |
peter |
34 |
#include "yat/statistics/utility.h" |
1120 |
21 Feb 08 |
peter |
35 |
#include "yat/utility/Vector.h" |
779 |
05 Mar 07 |
peter |
36 |
|
2708 |
14 Mar 12 |
peter |
37 |
#include <gsl/gsl_cdf.h> |
2708 |
14 Mar 12 |
peter |
38 |
|
2556 |
20 Aug 11 |
peter |
39 |
#include <cassert> |
779 |
05 Mar 07 |
peter |
40 |
#include <cmath> |
2708 |
14 Mar 12 |
peter |
41 |
#include <deque> |
779 |
05 Mar 07 |
peter |
42 |
#include <fstream> |
779 |
05 Mar 07 |
peter |
43 |
#include <iostream> |
2708 |
14 Mar 12 |
peter |
44 |
#include <set> |
2708 |
14 Mar 12 |
peter |
45 |
#include <vector> |
779 |
05 Mar 07 |
peter |
46 |
|
779 |
05 Mar 07 |
peter |
47 |
using namespace theplu::yat; |
779 |
05 Mar 07 |
peter |
48 |
|
2601 |
30 Oct 11 |
peter |
49 |
void test_empty(test::Suite&); |
2708 |
14 Mar 12 |
peter |
50 |
void test_p_approx_weighted(test::Suite& suite); |
2708 |
14 Mar 12 |
peter |
51 |
void test_p_approx_with_ties(test::Suite& suite); |
2708 |
14 Mar 12 |
peter |
52 |
void test_p_approx(test::Suite& suite); |
2708 |
14 Mar 12 |
peter |
53 |
void test_p_double_weights(test::Suite& suite); |
2593 |
30 Oct 11 |
peter |
54 |
void test_p_exact(test::Suite& suite); |
2708 |
14 Mar 12 |
peter |
55 |
void test_p_exact_weighted(test::Suite& suite); |
2556 |
20 Aug 11 |
peter |
56 |
void test_p_exact_with_ties(test::Suite& suite); |
2708 |
14 Mar 12 |
peter |
57 |
void test_p_with_weights(test::Suite& suite); |
2718 |
12 Apr 12 |
peter |
58 |
void test_p_with_weights_and_ties(test::Suite& suite); |
2720 |
12 Apr 12 |
peter |
59 |
void test_remove(test::Suite& suite); |
2708 |
14 Mar 12 |
peter |
60 |
void test_ties(test::Suite& suite); |
2549 |
12 Aug 11 |
peter |
61 |
|
1246 |
17 Mar 08 |
peter |
62 |
int main(int argc, char* argv[]) |
2707 |
13 Mar 12 |
peter |
63 |
{ |
1246 |
17 Mar 08 |
peter |
64 |
test::Suite suite(argc, argv); |
779 |
05 Mar 07 |
peter |
65 |
|
1246 |
17 Mar 08 |
peter |
66 |
suite.err() << "testing ROC" << std::endl; |
1120 |
21 Feb 08 |
peter |
67 |
utility::Vector value(31); |
779 |
05 Mar 07 |
peter |
68 |
std::vector<std::string> label(31,"negative"); |
2707 |
13 Mar 12 |
peter |
69 |
for (size_t i=0; i<16; i++) |
779 |
05 Mar 07 |
peter |
70 |
label[i] = "positive"; |
779 |
05 Mar 07 |
peter |
71 |
classifier::Target target(label); |
2707 |
13 Mar 12 |
peter |
72 |
for (size_t i=0; i<value.size(); i++) |
779 |
05 Mar 07 |
peter |
73 |
value(i)=i; |
779 |
05 Mar 07 |
peter |
74 |
statistics::ROC roc; |
1145 |
25 Feb 08 |
peter |
75 |
add(roc, value.begin(), value.end(), target); |
821 |
18 Mar 07 |
peter |
76 |
double area = roc.area(); |
1246 |
17 Mar 08 |
peter |
77 |
if (!suite.equal(area,0.0)){ |
2707 |
13 Mar 12 |
peter |
78 |
suite.err() << "test_roc: area is " << area << " should be 0.0" |
2707 |
13 Mar 12 |
peter |
79 |
<< std::endl; |
1246 |
17 Mar 08 |
peter |
80 |
suite.add(false); |
779 |
05 Mar 07 |
peter |
81 |
} |
779 |
05 Mar 07 |
peter |
82 |
target.set_binary(0,false); |
779 |
05 Mar 07 |
peter |
83 |
target.set_binary(1,true); |
821 |
18 Mar 07 |
peter |
84 |
roc.reset(); |
1145 |
25 Feb 08 |
peter |
85 |
add(roc, value.begin(), value.end(), target); |
821 |
18 Mar 07 |
peter |
86 |
area = roc.area(); |
1246 |
17 Mar 08 |
peter |
87 |
if (!suite.equal(area,1.0)){ |
2707 |
13 Mar 12 |
peter |
88 |
suite.err() << "test_roc: area is " << area << " should be 1.0" |
2707 |
13 Mar 12 |
peter |
89 |
<< std::endl; |
1246 |
17 Mar 08 |
peter |
90 |
suite.add(false); |
779 |
05 Mar 07 |
peter |
91 |
} |
2707 |
13 Mar 12 |
peter |
92 |
|
3006 |
01 Apr 13 |
peter |
93 |
double p = roc.p_right(); |
821 |
18 Mar 07 |
peter |
94 |
double p2 = roc.p_value(); |
779 |
05 Mar 07 |
peter |
95 |
double p_matlab = 0.00000115; |
1246 |
17 Mar 08 |
peter |
96 |
if (!(p/p_matlab < 1.01 && p/p_matlab > 0.99)){ |
1246 |
17 Mar 08 |
peter |
97 |
suite.err() << "get_p_approx: p-value not correct" << std::endl; |
1246 |
17 Mar 08 |
peter |
98 |
suite.err() << p << " expected " << p_matlab << std::endl; |
1246 |
17 Mar 08 |
peter |
99 |
suite.add(false); |
779 |
05 Mar 07 |
peter |
100 |
} |
1246 |
17 Mar 08 |
peter |
101 |
if (!(p2==2*p)) { |
1246 |
17 Mar 08 |
peter |
102 |
suite.add(false); |
3006 |
01 Apr 13 |
peter |
103 |
suite.err() << "Two-sided P-value should equal 2 * p_right.\n"; |
821 |
18 Mar 07 |
peter |
104 |
} |
779 |
05 Mar 07 |
peter |
105 |
roc.minimum_size() = 20; |
3006 |
01 Apr 13 |
peter |
106 |
p = roc.p_right(); |
821 |
18 Mar 07 |
peter |
107 |
p2 = roc.p_value(); |
1246 |
17 Mar 08 |
peter |
108 |
if (!( p < 1e-8 && p > 1e-9) ){ |
1246 |
17 Mar 08 |
peter |
109 |
suite.err() << "get_p_exact: p-value not correct" << std::endl; |
1246 |
17 Mar 08 |
peter |
110 |
suite.add(false); |
779 |
05 Mar 07 |
peter |
111 |
} |
1246 |
17 Mar 08 |
peter |
112 |
if (!( p2==2*p)) { |
1246 |
17 Mar 08 |
peter |
113 |
suite.add(false); |
3006 |
01 Apr 13 |
peter |
114 |
suite.err() << "Two-sided P-value should equal 2 * p_right.\n"; |
821 |
18 Mar 07 |
peter |
115 |
} |
2707 |
13 Mar 12 |
peter |
116 |
|
822 |
18 Mar 07 |
peter |
117 |
classifier::DataLookupWeighted1D dlw(target.size(),1.3); |
1145 |
25 Feb 08 |
peter |
118 |
add(roc, dlw.begin(), dlw.end(), target); |
2549 |
12 Aug 11 |
peter |
119 |
test_ties(suite); |
2556 |
20 Aug 11 |
peter |
120 |
test_p_approx_with_ties(suite); |
2556 |
20 Aug 11 |
peter |
121 |
test_p_exact_with_ties(suite); |
2593 |
30 Oct 11 |
peter |
122 |
test_p_approx(suite); |
2593 |
30 Oct 11 |
peter |
123 |
test_p_exact(suite); |
2601 |
30 Oct 11 |
peter |
124 |
test_empty(suite); |
2708 |
14 Mar 12 |
peter |
125 |
test_p_with_weights(suite); |
2720 |
12 Apr 12 |
peter |
126 |
test_remove(suite); |
1246 |
17 Mar 08 |
peter |
127 |
return suite.return_value(); |
779 |
05 Mar 07 |
peter |
128 |
} |
2549 |
12 Aug 11 |
peter |
129 |
|
2556 |
20 Aug 11 |
peter |
130 |
|
2556 |
20 Aug 11 |
peter |
131 |
void test_p_exact_with_ties(test::Suite& suite) |
2556 |
20 Aug 11 |
peter |
132 |
{ |
2556 |
20 Aug 11 |
peter |
133 |
suite.out() << "test p exact with ties\n"; |
2556 |
20 Aug 11 |
peter |
134 |
statistics::ROC roc; |
2556 |
20 Aug 11 |
peter |
135 |
/* |
2707 |
13 Mar 12 |
peter |
+++-- 6 |
2556 |
20 Aug 11 |
peter |
++-+- 5 4.5 *** our case *** |
2556 |
20 Aug 11 |
peter |
+-++- 4 4.5 |
2556 |
20 Aug 11 |
peter |
++--+ 4 3.5 |
2556 |
20 Aug 11 |
peter |
+-+-+ 3 3.5 |
2556 |
20 Aug 11 |
peter |
+--++ 2 2 |
2556 |
20 Aug 11 |
peter |
-+++- 3 3 |
2556 |
20 Aug 11 |
peter |
-++-+ 2 2 |
2595 |
30 Oct 11 |
peter |
-+-++ 1 0.5 *** our second case *** |
2556 |
20 Aug 11 |
peter |
--+++ 0 0.5 |
2556 |
20 Aug 11 |
peter |
146 |
*/ |
2556 |
20 Aug 11 |
peter |
147 |
roc.add(2, true); |
2556 |
20 Aug 11 |
peter |
148 |
roc.add(1, true); |
2556 |
20 Aug 11 |
peter |
149 |
roc.add(1, false); |
2556 |
20 Aug 11 |
peter |
150 |
roc.add(0, true); |
2556 |
20 Aug 11 |
peter |
151 |
roc.add(-1, false); |
2556 |
20 Aug 11 |
peter |
152 |
roc.area(); |
3006 |
01 Apr 13 |
peter |
153 |
if (!suite.equal(roc.p_right(), 3.0/10.0)) { |
2585 |
25 Oct 11 |
peter |
154 |
suite.add(false); |
3006 |
01 Apr 13 |
peter |
155 |
suite.out() << " p_right: expected 0.3\n"; |
2556 |
20 Aug 11 |
peter |
156 |
} |
2556 |
20 Aug 11 |
peter |
157 |
else |
2585 |
25 Oct 11 |
peter |
158 |
suite.add(true); |
2556 |
20 Aug 11 |
peter |
159 |
if (!suite.equal(roc.p_value(), 5.0/10.0)) { |
2585 |
25 Oct 11 |
peter |
160 |
suite.add(false); |
2585 |
25 Oct 11 |
peter |
161 |
suite.out() << " (two-sided) p_value: expected 0.5\n"; |
2556 |
20 Aug 11 |
peter |
162 |
} |
2556 |
20 Aug 11 |
peter |
163 |
else |
2585 |
25 Oct 11 |
peter |
164 |
suite.add(true); |
2595 |
30 Oct 11 |
peter |
165 |
|
2595 |
30 Oct 11 |
peter |
166 |
suite.out() << "test p exact with ties II\n"; |
2595 |
30 Oct 11 |
peter |
167 |
roc.reset(); |
2595 |
30 Oct 11 |
peter |
168 |
roc.add(2, false); |
2595 |
30 Oct 11 |
peter |
169 |
roc.add(1, true); |
2595 |
30 Oct 11 |
peter |
170 |
roc.add(1, false); |
2595 |
30 Oct 11 |
peter |
171 |
roc.add(0, true); |
2595 |
30 Oct 11 |
peter |
172 |
roc.add(-1, true); |
2595 |
30 Oct 11 |
peter |
173 |
suite.add(suite.equal(roc.area(), 0.5/6)); |
3006 |
01 Apr 13 |
peter |
174 |
if (!suite.add(suite.equal(roc.p_right(), 10.0/10.0))) |
3006 |
01 Apr 13 |
peter |
175 |
suite.out() << " p_right: expected 0.3\n"; |
2595 |
30 Oct 11 |
peter |
176 |
if (!suite.add(suite.equal(roc.p_value(), 3.0/10.0))) |
2595 |
30 Oct 11 |
peter |
177 |
suite.out() << " (two-sided) p_value: expected 0.5\n"; |
2556 |
20 Aug 11 |
peter |
178 |
} |
2556 |
20 Aug 11 |
peter |
179 |
|
2556 |
20 Aug 11 |
peter |
180 |
|
2556 |
20 Aug 11 |
peter |
181 |
void test_p_approx_with_ties(test::Suite& suite) |
2556 |
20 Aug 11 |
peter |
182 |
{ |
2556 |
20 Aug 11 |
peter |
183 |
suite.out() << "test p approx with ties\n"; |
2556 |
20 Aug 11 |
peter |
184 |
statistics::ROC roc; |
2556 |
20 Aug 11 |
peter |
185 |
for (size_t i=0; i<100; ++i) { |
2556 |
20 Aug 11 |
peter |
186 |
roc.add(1, i<60); |
2556 |
20 Aug 11 |
peter |
187 |
roc.add(0, i<40); |
2556 |
20 Aug 11 |
peter |
188 |
} |
2556 |
20 Aug 11 |
peter |
189 |
suite.add(suite.equal(roc.area(), 0.6)); |
2556 |
20 Aug 11 |
peter |
// Having only two data values, 0 and 1, data can be represented as |
2556 |
20 Aug 11 |
peter |
// a 2x2 contigency table, and ROC test is same as Fisher's exact |
2556 |
20 Aug 11 |
peter |
// test. |
2556 |
20 Aug 11 |
peter |
193 |
statistics::Fisher fisher; |
2556 |
20 Aug 11 |
peter |
194 |
fisher.oddsratio(60, 40, 40, 60); |
2556 |
20 Aug 11 |
peter |
195 |
suite.add(suite.equal_fix(roc.p_value(), fisher.p_value(), 0.0002)); |
2556 |
20 Aug 11 |
peter |
196 |
} |
2556 |
20 Aug 11 |
peter |
197 |
|
2549 |
12 Aug 11 |
peter |
198 |
void test_ties(test::Suite& suite) |
2549 |
12 Aug 11 |
peter |
199 |
{ |
2549 |
12 Aug 11 |
peter |
200 |
suite.out() << "test ties\n"; |
2549 |
12 Aug 11 |
peter |
201 |
statistics::ROC roc; |
2549 |
12 Aug 11 |
peter |
202 |
for (size_t i=0; i<20; ++i) |
2549 |
12 Aug 11 |
peter |
203 |
roc.add(10.0, i<10); |
2551 |
12 Aug 11 |
peter |
204 |
if (!suite.add(suite.equal(roc.area(), 0.5))) { |
2549 |
12 Aug 11 |
peter |
205 |
suite.err() << "error: roc with ties: area: " << roc.area() << "\n"; |
2549 |
12 Aug 11 |
peter |
206 |
} |
2549 |
12 Aug 11 |
peter |
207 |
} |
2593 |
30 Oct 11 |
peter |
208 |
|
2593 |
30 Oct 11 |
peter |
209 |
void test_p_exact(test::Suite& suite) |
2593 |
30 Oct 11 |
peter |
210 |
{ |
2593 |
30 Oct 11 |
peter |
211 |
suite.out() << "test_p_exact\n"; |
2593 |
30 Oct 11 |
peter |
212 |
statistics::ROC roc; |
2593 |
30 Oct 11 |
peter |
213 |
for (size_t i=0; i<9; ++i) |
2593 |
30 Oct 11 |
peter |
214 |
roc.add(i, i<5); |
3006 |
01 Apr 13 |
peter |
215 |
if (roc.p_right()<0.5) { |
2595 |
30 Oct 11 |
peter |
216 |
suite.add(false); |
2707 |
13 Mar 12 |
peter |
217 |
suite.err() << "error: expected p-value>0.5\n found: " |
3006 |
01 Apr 13 |
peter |
218 |
<< roc.p_right() << "\n"; |
2593 |
30 Oct 11 |
peter |
219 |
} |
2593 |
30 Oct 11 |
peter |
220 |
} |
2593 |
30 Oct 11 |
peter |
221 |
|
2593 |
30 Oct 11 |
peter |
222 |
|
2593 |
30 Oct 11 |
peter |
223 |
void test_p_approx(test::Suite& suite) |
2593 |
30 Oct 11 |
peter |
224 |
{ |
2593 |
30 Oct 11 |
peter |
225 |
suite.out() << "test_p_approx\n"; |
2593 |
30 Oct 11 |
peter |
226 |
statistics::ROC roc; |
2593 |
30 Oct 11 |
peter |
227 |
for (size_t i=0; i<100; ++i) |
2593 |
30 Oct 11 |
peter |
228 |
roc.add(i, i<50); |
3006 |
01 Apr 13 |
peter |
229 |
if (roc.p_right()<0.5) { |
2593 |
30 Oct 11 |
peter |
230 |
suite.add(false); |
2707 |
13 Mar 12 |
peter |
231 |
suite.err() << "error: expected p-value>0.5\n found: " |
3006 |
01 Apr 13 |
peter |
232 |
<< roc.p_right() << "\n"; |
2593 |
30 Oct 11 |
peter |
233 |
} |
3439 |
20 Nov 15 |
peter |
234 |
if (roc.p_value() > 1.0) { |
3439 |
20 Nov 15 |
peter |
235 |
suite.err() << "error: expected p-value <= 1\n found: " |
3439 |
20 Nov 15 |
peter |
236 |
<< roc.p_value() << "\n"; |
3439 |
20 Nov 15 |
peter |
237 |
suite.add(false); |
3439 |
20 Nov 15 |
peter |
238 |
} |
2593 |
30 Oct 11 |
peter |
239 |
} |
2601 |
30 Oct 11 |
peter |
240 |
|
2601 |
30 Oct 11 |
peter |
241 |
|
2601 |
30 Oct 11 |
peter |
242 |
void test_empty(test::Suite& suite) |
2601 |
30 Oct 11 |
peter |
243 |
{ |
2601 |
30 Oct 11 |
peter |
244 |
suite.err() << "test empty\n"; |
2707 |
13 Mar 12 |
peter |
// testing bug #669 |
2601 |
30 Oct 11 |
peter |
246 |
statistics::ROC roc; |
2601 |
30 Oct 11 |
peter |
247 |
roc.p_value(); |
2601 |
30 Oct 11 |
peter |
248 |
roc.area(); |
2601 |
30 Oct 11 |
peter |
249 |
suite.err() << "test empty done\n"; |
2601 |
30 Oct 11 |
peter |
250 |
} |
2708 |
14 Mar 12 |
peter |
251 |
|
2708 |
14 Mar 12 |
peter |
252 |
|
2708 |
14 Mar 12 |
peter |
253 |
void test_p_with_weights(test::Suite& suite) |
2708 |
14 Mar 12 |
peter |
254 |
{ |
2708 |
14 Mar 12 |
peter |
255 |
suite.out() << "test p with weights\n"; |
2708 |
14 Mar 12 |
peter |
256 |
test_p_double_weights(suite); |
2708 |
14 Mar 12 |
peter |
257 |
test_p_exact_weighted(suite); |
2708 |
14 Mar 12 |
peter |
258 |
test_p_approx_weighted(suite); |
2718 |
12 Apr 12 |
peter |
259 |
test_p_with_weights_and_ties(suite); |
2708 |
14 Mar 12 |
peter |
260 |
} |
2708 |
14 Mar 12 |
peter |
261 |
|
2708 |
14 Mar 12 |
peter |
262 |
|
2718 |
12 Apr 12 |
peter |
263 |
void test_p_with_weights_and_ties(test::Suite& suite) |
2718 |
12 Apr 12 |
peter |
264 |
{ |
2718 |
12 Apr 12 |
peter |
265 |
suite.out() << "test p with weights and ties\n"; |
2718 |
12 Apr 12 |
peter |
266 |
statistics::ROC roc; |
2718 |
12 Apr 12 |
peter |
267 |
roc.add(10, true, 1.0); |
2718 |
12 Apr 12 |
peter |
268 |
roc.add(10, false, 3.0); |
2718 |
12 Apr 12 |
peter |
269 |
roc.add(20, true, 2.0); |
2718 |
12 Apr 12 |
peter |
270 |
roc.add(30, true, 1.0); |
2718 |
12 Apr 12 |
peter |
271 |
if (!suite.equal(roc.area(), 0.875)) { |
2718 |
12 Apr 12 |
peter |
272 |
suite.add(false); |
2718 |
12 Apr 12 |
peter |
273 |
suite.out() << "roc area: " << roc.area() << "\n"; |
2718 |
12 Apr 12 |
peter |
274 |
} |
3006 |
01 Apr 13 |
peter |
275 |
double p = roc.p_right(); |
2718 |
12 Apr 12 |
peter |
276 |
if (!suite.equal(p, 8.0/24.0)) { |
2718 |
12 Apr 12 |
peter |
277 |
suite.add(false); |
3006 |
01 Apr 13 |
peter |
278 |
suite.out() << "p_right() failed\n"; |
2718 |
12 Apr 12 |
peter |
279 |
} |
2718 |
12 Apr 12 |
peter |
280 |
p = roc.p_value(); |
2718 |
12 Apr 12 |
peter |
281 |
if (!suite.equal(p, (8.0+6.0)/24.0)) { |
2718 |
12 Apr 12 |
peter |
282 |
suite.add(false); |
2718 |
12 Apr 12 |
peter |
283 |
suite.out() << "p_value() failed\n"; |
2718 |
12 Apr 12 |
peter |
284 |
} |
2718 |
12 Apr 12 |
peter |
285 |
} |
2718 |
12 Apr 12 |
peter |
286 |
|
2718 |
12 Apr 12 |
peter |
287 |
|
2708 |
14 Mar 12 |
peter |
288 |
void test_p_double_weights(test::Suite& suite) |
2708 |
14 Mar 12 |
peter |
289 |
{ |
2708 |
14 Mar 12 |
peter |
290 |
suite.out() << "test p with double weights\n"; |
2708 |
14 Mar 12 |
peter |
291 |
std::vector<double> w(5,1.0); |
2708 |
14 Mar 12 |
peter |
292 |
w[0]=0.1; |
2708 |
14 Mar 12 |
peter |
293 |
w[4]=10; |
2708 |
14 Mar 12 |
peter |
294 |
std::vector<double> x(5); |
2708 |
14 Mar 12 |
peter |
295 |
for (size_t i=0; i<x.size(); ++i) |
2708 |
14 Mar 12 |
peter |
296 |
x[i] = i; |
2708 |
14 Mar 12 |
peter |
297 |
statistics::ROC roc; |
2708 |
14 Mar 12 |
peter |
298 |
statistics::ROC roc2; |
2708 |
14 Mar 12 |
peter |
299 |
for (size_t i=0; i<x.size(); ++i) { |
2708 |
14 Mar 12 |
peter |
300 |
roc.add(x[i], i<2, w[i]); |
2708 |
14 Mar 12 |
peter |
301 |
roc2.add(x[i], i<2, 2*w[i]); |
2708 |
14 Mar 12 |
peter |
302 |
} |
2708 |
14 Mar 12 |
peter |
303 |
if (!suite.equal(roc.area(), roc2.area())) { |
2708 |
14 Mar 12 |
peter |
304 |
suite.add(false); |
2708 |
14 Mar 12 |
peter |
305 |
suite.err() << "area failed\n"; |
2708 |
14 Mar 12 |
peter |
306 |
} |
3006 |
01 Apr 13 |
peter |
307 |
if (!suite.equal(roc.p_right(), roc2.p_right())) { |
2708 |
14 Mar 12 |
peter |
308 |
suite.add(false); |
2708 |
14 Mar 12 |
peter |
309 |
suite.err() << "exact p failed\n"; |
2708 |
14 Mar 12 |
peter |
310 |
} |
2708 |
14 Mar 12 |
peter |
311 |
roc.minimum_size() = 0; |
2708 |
14 Mar 12 |
peter |
312 |
roc2.minimum_size() = 0; |
3006 |
01 Apr 13 |
peter |
313 |
if (!suite.equal(roc.p_right(), roc2.p_right())) { |
2709 |
15 Mar 12 |
peter |
314 |
suite.add(false); |
2708 |
14 Mar 12 |
peter |
315 |
suite.err() << "approximative p failed\n"; |
2708 |
14 Mar 12 |
peter |
316 |
} |
2708 |
14 Mar 12 |
peter |
317 |
} |
2708 |
14 Mar 12 |
peter |
318 |
|
2708 |
14 Mar 12 |
peter |
319 |
|
2708 |
14 Mar 12 |
peter |
320 |
void test_p_exact_weighted(test::Suite& suite) |
2708 |
14 Mar 12 |
peter |
321 |
{ |
2708 |
14 Mar 12 |
peter |
322 |
suite.out() << "test p exact weighted\n"; |
2708 |
14 Mar 12 |
peter |
323 |
std::vector<double> x(10); |
2708 |
14 Mar 12 |
peter |
324 |
std::vector<std::pair<bool, double> > w(10); |
2708 |
14 Mar 12 |
peter |
325 |
for (size_t i=0; i<x.size(); ++i) { |
2708 |
14 Mar 12 |
peter |
326 |
x[i] = i; |
2708 |
14 Mar 12 |
peter |
327 |
w[i].first = false; |
2708 |
14 Mar 12 |
peter |
328 |
w[i].second = 1.0; |
2708 |
14 Mar 12 |
peter |
329 |
} |
2708 |
14 Mar 12 |
peter |
330 |
w[3].first = true; |
2708 |
14 Mar 12 |
peter |
331 |
w[7].first = true; |
2708 |
14 Mar 12 |
peter |
332 |
w[8].first = true; |
2708 |
14 Mar 12 |
peter |
333 |
w[1].second = 10; |
2708 |
14 Mar 12 |
peter |
334 |
w[7].second = 10; |
2708 |
14 Mar 12 |
peter |
335 |
w[9].second = 0.1; |
2708 |
14 Mar 12 |
peter |
336 |
|
2708 |
14 Mar 12 |
peter |
337 |
statistics::ROC roc; |
2708 |
14 Mar 12 |
peter |
338 |
for (size_t i=0; i<x.size(); ++i) |
2708 |
14 Mar 12 |
peter |
339 |
roc.add(x[i], w[i].first, w[i].second); |
2712 |
16 Mar 12 |
peter |
340 |
roc.minimum_size() = 100; |
2708 |
14 Mar 12 |
peter |
341 |
|
2708 |
14 Mar 12 |
peter |
342 |
std::sort(w.begin(), w.end()); |
2708 |
14 Mar 12 |
peter |
343 |
unsigned long perm = 0; |
2708 |
14 Mar 12 |
peter |
344 |
unsigned long k = 0; |
2718 |
12 Apr 12 |
peter |
345 |
unsigned long k2 = 0; |
2708 |
14 Mar 12 |
peter |
346 |
while (true) { |
2708 |
14 Mar 12 |
peter |
347 |
++perm; |
2708 |
14 Mar 12 |
peter |
348 |
statistics::ROC roc2; |
2708 |
14 Mar 12 |
peter |
349 |
for (size_t i=0; i<x.size(); ++i) |
2708 |
14 Mar 12 |
peter |
350 |
roc2.add(x[i], w[i].first, w[i].second); |
2708 |
14 Mar 12 |
peter |
351 |
if (roc2.area() >= roc.area()) |
2708 |
14 Mar 12 |
peter |
352 |
++k; |
2718 |
12 Apr 12 |
peter |
353 |
if (roc2.area() <= 1-roc.area()+1e-10) |
2718 |
12 Apr 12 |
peter |
354 |
++k2; |
2708 |
14 Mar 12 |
peter |
355 |
|
2708 |
14 Mar 12 |
peter |
356 |
if (!next_permutation(w.begin(), w.end())) |
2708 |
14 Mar 12 |
peter |
357 |
break; |
2708 |
14 Mar 12 |
peter |
358 |
} |
3006 |
01 Apr 13 |
peter |
359 |
double p_value = roc.p_right(); |
2718 |
12 Apr 12 |
peter |
360 |
if (!suite.add(suite.equal(p_value, static_cast<double>(k)/perm))) { |
2708 |
14 Mar 12 |
peter |
361 |
suite.out() << "area: " << roc.area() << "\n" |
2708 |
14 Mar 12 |
peter |
362 |
<< perm << " permutations of which\n" |
2708 |
14 Mar 12 |
peter |
363 |
<< k << " with larger (or equal) area " |
2708 |
14 Mar 12 |
peter |
364 |
<< "corresponding to P=" << static_cast<double>(k)/perm << "\n" |
3006 |
01 Apr 13 |
peter |
365 |
<< "p_right() returned: " << p_value |
2708 |
14 Mar 12 |
peter |
366 |
<< "\n"; |
2708 |
14 Mar 12 |
peter |
367 |
} |
2718 |
12 Apr 12 |
peter |
368 |
p_value = roc.p_value(); |
2718 |
12 Apr 12 |
peter |
369 |
if (!suite.add(suite.equal(p_value, static_cast<double>(k+k2)/perm))) { |
2718 |
12 Apr 12 |
peter |
370 |
suite.out() << "area: " << roc.area() << "\n" |
2718 |
12 Apr 12 |
peter |
371 |
<< perm << " permutations of which\n" |
2718 |
12 Apr 12 |
peter |
372 |
<< k << " with larger (or equal) area and\n" |
2718 |
12 Apr 12 |
peter |
373 |
<< k2 << " with smaller (or equal) area\n" |
2718 |
12 Apr 12 |
peter |
374 |
<< "corresponding to P=" |
2718 |
12 Apr 12 |
peter |
375 |
<< static_cast<double>(k+k2)/perm << "\n" |
2718 |
12 Apr 12 |
peter |
376 |
<< "p_value() returned: " << p_value |
2718 |
12 Apr 12 |
peter |
377 |
<< "\n"; |
2718 |
12 Apr 12 |
peter |
378 |
} |
2708 |
14 Mar 12 |
peter |
379 |
} |
2708 |
14 Mar 12 |
peter |
380 |
|
2708 |
14 Mar 12 |
peter |
381 |
|
2708 |
14 Mar 12 |
peter |
382 |
void test_p_approx_weighted(test::Suite& suite) |
2708 |
14 Mar 12 |
peter |
383 |
{ |
2708 |
14 Mar 12 |
peter |
384 |
suite.out() << "test p approx weighted\n"; |
2709 |
15 Mar 12 |
peter |
385 |
std::vector<double> x(200); |
2709 |
15 Mar 12 |
peter |
386 |
std::vector<double> w(200, 1.0); |
2709 |
15 Mar 12 |
peter |
387 |
std::deque<bool> label(200); |
2708 |
14 Mar 12 |
peter |
388 |
|
2708 |
14 Mar 12 |
peter |
389 |
for (size_t i=0; i<x.size(); ++i) { |
2708 |
14 Mar 12 |
peter |
390 |
x[i] = i; |
2708 |
14 Mar 12 |
peter |
391 |
label[i] = i>30 && i<70; |
2709 |
15 Mar 12 |
peter |
392 |
if (i<100) |
2709 |
15 Mar 12 |
peter |
393 |
w[i] = 100.0 / (100+i); |
2709 |
15 Mar 12 |
peter |
394 |
else |
2709 |
15 Mar 12 |
peter |
395 |
w[i] = 0.0001; |
2708 |
14 Mar 12 |
peter |
396 |
} |
2708 |
14 Mar 12 |
peter |
397 |
|
2708 |
14 Mar 12 |
peter |
398 |
statistics::ROC roc; |
2708 |
14 Mar 12 |
peter |
399 |
for (size_t i=0; i<x.size(); ++i) |
2708 |
14 Mar 12 |
peter |
400 |
roc.add(x[i], label[i], w[i]); |
2708 |
14 Mar 12 |
peter |
401 |
roc.minimum_size() = 0; |
3006 |
01 Apr 13 |
peter |
402 |
double p = roc.p_right(); |
2708 |
14 Mar 12 |
peter |
403 |
|
2708 |
14 Mar 12 |
peter |
404 |
std::set<size_t> checkpoints; |
2709 |
15 Mar 12 |
peter |
405 |
size_t perm = 100000; |
2708 |
14 Mar 12 |
peter |
406 |
checkpoints.insert(10); |
2708 |
14 Mar 12 |
peter |
407 |
checkpoints.insert(100); |
2708 |
14 Mar 12 |
peter |
408 |
checkpoints.insert(1000); |
2708 |
14 Mar 12 |
peter |
409 |
checkpoints.insert(10000); |
2708 |
14 Mar 12 |
peter |
410 |
checkpoints.insert(perm); |
2708 |
14 Mar 12 |
peter |
411 |
statistics::Averager averager; |
2708 |
14 Mar 12 |
peter |
412 |
for (size_t i=1; i<=perm; ++i) { |
2736 |
29 May 12 |
peter |
413 |
theplu::yat::random::random_shuffle(x.begin(), x.end()); |
2708 |
14 Mar 12 |
peter |
414 |
statistics::ROC roc2; |
2708 |
14 Mar 12 |
peter |
415 |
for (size_t j=0; j<x.size(); ++j) |
2708 |
14 Mar 12 |
peter |
416 |
roc2.add(x[j], label[j], w[j]); |
2708 |
14 Mar 12 |
peter |
417 |
if (roc2.area()>=roc.area()) |
2708 |
14 Mar 12 |
peter |
418 |
averager.add(1.0); |
2708 |
14 Mar 12 |
peter |
419 |
else |
2708 |
14 Mar 12 |
peter |
420 |
averager.add(0.0); |
2708 |
14 Mar 12 |
peter |
421 |
if (checkpoints.find(i)!=checkpoints.end()) { |
2708 |
14 Mar 12 |
peter |
422 |
if (gsl_cdf_binomial_P(averager.sum_x(), p, averager.n())<1e-10 || |
2708 |
14 Mar 12 |
peter |
423 |
gsl_cdf_binomial_Q(averager.sum_x(), p, averager.n())<1e-10) { |
2708 |
14 Mar 12 |
peter |
424 |
suite.err() << "error: approx p value and permutation p-value " |
2708 |
14 Mar 12 |
peter |
425 |
<< "deviate more than expected\n" |
2709 |
15 Mar 12 |
peter |
426 |
<< "area: " << roc.area() << "\n" |
2708 |
14 Mar 12 |
peter |
427 |
<< "approx p: " << p << "\n" |
2708 |
14 Mar 12 |
peter |
428 |
<< "permutations: " << averager.n() << "\n" |
2708 |
14 Mar 12 |
peter |
429 |
<< "successful: " << averager.sum_x() << "\n" |
2708 |
14 Mar 12 |
peter |
430 |
<< "corresponds to P=" << averager.mean() << "\n"; |
2709 |
15 Mar 12 |
peter |
431 |
suite.add(false); |
2708 |
14 Mar 12 |
peter |
432 |
return; |
2708 |
14 Mar 12 |
peter |
433 |
} |
2708 |
14 Mar 12 |
peter |
434 |
} |
2708 |
14 Mar 12 |
peter |
435 |
} |
2708 |
14 Mar 12 |
peter |
436 |
} |
2720 |
12 Apr 12 |
peter |
437 |
|
2720 |
12 Apr 12 |
peter |
438 |
void test_remove(test::Suite& suite) |
2720 |
12 Apr 12 |
peter |
439 |
{ |
2720 |
12 Apr 12 |
peter |
440 |
using statistics::ROC; |
2720 |
12 Apr 12 |
peter |
441 |
ROC roc; |
2720 |
12 Apr 12 |
peter |
442 |
roc.add(1, true); |
2720 |
12 Apr 12 |
peter |
443 |
roc.add(2, false); |
2720 |
12 Apr 12 |
peter |
444 |
ROC roc2(roc); |
2720 |
12 Apr 12 |
peter |
445 |
if (!suite.add(suite.equal(roc.area(), roc2.area()))) |
2720 |
12 Apr 12 |
peter |
446 |
suite.out() << "test_remove failed: copy failed\n"; |
2720 |
12 Apr 12 |
peter |
447 |
roc.add(2.3, true, 1.2); |
2720 |
12 Apr 12 |
peter |
448 |
try { |
2720 |
12 Apr 12 |
peter |
449 |
roc.remove(2.3, true, 1.2); |
2720 |
12 Apr 12 |
peter |
450 |
} |
2720 |
12 Apr 12 |
peter |
451 |
catch (std::runtime_error& e) { |
2720 |
12 Apr 12 |
peter |
452 |
suite.add(false); |
2720 |
12 Apr 12 |
peter |
453 |
suite.out() << "exception what(): " << e.what() << "\n"; |
2720 |
12 Apr 12 |
peter |
454 |
} |
2720 |
12 Apr 12 |
peter |
455 |
if (!suite.add(suite.equal(roc.area(), roc2.area()))) |
2720 |
12 Apr 12 |
peter |
456 |
suite.out() << "test remove failed\n"; |
2720 |
12 Apr 12 |
peter |
457 |
try { |
2720 |
12 Apr 12 |
peter |
458 |
roc.remove(2, true); |
2720 |
12 Apr 12 |
peter |
459 |
suite.out() << "no exception thrown\n"; |
2720 |
12 Apr 12 |
peter |
460 |
suite.add(false); |
2720 |
12 Apr 12 |
peter |
461 |
} |
2720 |
12 Apr 12 |
peter |
462 |
catch (std::runtime_error& e) { |
2720 |
12 Apr 12 |
peter |
463 |
suite.add(true); |
2720 |
12 Apr 12 |
peter |
464 |
} |
2720 |
12 Apr 12 |
peter |
465 |
} |