1 #ifndef _theplu_yat_statistics_roc_
2 #define _theplu_yat_statistics_roc_
30 #include "yat/utility/yat_assert.h"
32 #include <gsl/gsl_randist.h>
41 namespace statistics {
74 void add(
double value,
bool target,
double weight=1.0);
83 double area(
void)
const;
114 double n(
void)
const;
121 double n_neg(
void)
const;
128 double n_pos(
void)
const;
136 double p_left(
void)
const;
219 void remove(
double value,
bool target,
double weight=1.0);
227 typedef std::multimap<double, std::pair<bool, double> > Map;
228 typedef std::vector<std::pair<bool, Map::mapped_type> > Vector;
241 double get_p_approx(
double)
const;
246 bool is_weighted(
void)
const;
251 size_t nof_points(
const Averager& a)
const;
257 double p_left_weighted(
double area)
const;
263 double p_right_weighted(
double area)
const;
273 template <
typename Iterator>
274 double count(Iterator first, Iterator last,
double threshold)
const;
279 template <
typename Iterator>
280 double count(Vector& weights, Iterator iter, Iterator last,
281 double threshold,
double sum,
const Weights& weight)
const;
288 template <
typename Iterator>
289 double count(Vector& weights, Iterator iter, Iterator last,
290 double threshold,
double sum, Weights weight,
291 const std::pair<bool, double>& entry)
const;
298 template<
typename ForwardIterator>
299 double p_exact_with_ties(ForwardIterator first, ForwardIterator last,
300 double block,
unsigned int pos,
301 unsigned int neg)
const;
306 double p_exact_right(
double area)
const;
311 double p_exact_left(
double area)
const;
313 bool use_exact_method(
void)
const;
315 mutable double area_;
317 unsigned int minimum_size_;
323 template<
typename ForwardIterator>
325 ROC::p_exact_with_ties(ForwardIterator begin, ForwardIterator end,
326 double block,
unsigned int pos,
unsigned int neg)
const
333 ForwardIterator iter(begin);
335 while (iter!=end && iter->first == begin->first) {
348 unsigned int pos1 = n - std::min(n, neg);
350 unsigned int max = std::min(n, pos);
351 YAT_ASSERT(pos1<=max);
352 for ( ; pos1<=
max; ++pos1) {
353 unsigned int neg1 = n-pos1;
355 unsigned int pos2 = pos-pos1;
356 YAT_ASSERT(pos2<=pos);
357 unsigned int neg2 = neg-neg1;
358 YAT_ASSERT(neg2<=neg);
359 result += gsl_ran_hypergeometric_pdf(pos1, static_cast<unsigned int>(pos),
360 static_cast<unsigned int>(neg), n)
361 * p_exact_with_ties(iter, end,
362 block - pos2*neg1 - 0.5*pos1*neg1,
369 template <
typename Iterator>
370 double ROC::count(Iterator first, Iterator last,
double threshold)
const
373 vec.reserve(multimap_.size());
375 for (Map::const_iterator i = multimap_.begin(); i!=multimap_.end(); ++i)
376 vec.push_back(std::make_pair(
false, i->second));
379 w.small_pos = pos_weights_.
sum_x();
380 w.small_neg = neg_weights_.
sum_x();
381 return count(vec, first, last, threshold*w.small_pos*w.small_neg, 0, w);
386 template <
typename Iterator>
387 double ROC::count(ROC::Vector& v, Iterator iter, Iterator last,
388 double threshold,
double sum,
const Weights& w)
const
392 int nof_elements = 0;
393 for (ROC::Vector::iterator i=v.begin(); i!=v.end(); ++i) {
397 result += count(v, iter, last, threshold, sum, w, i->second);
401 YAT_ASSERT(nof_elements);
402 return result/nof_elements;
406 template <
typename Iterator>
407 double ROC::count(Vector& weights, Iterator iter, Iterator last,
408 double threshold,
double sum, Weights w,
409 const std::pair<bool, double>& entry)
const
411 double tiny = 10e-10;
414 YAT_ASSERT(next!=last);
419 w.tied_pos += entry.second;
420 w.small_pos -= entry.second;
423 w.tied_neg += entry.second;
424 w.small_neg -= entry.second;
428 if (next==last || *next!=*iter) {
429 sum += 0.5*w.tied_pos*w.tied_neg + w.tied_pos * w.small_neg;
436 double max_sum = sum + 0.5*(w.tied_pos+w.small_pos)*w.tied_neg +
437 (w.tied_pos+w.small_pos)*w.small_neg;
439 if (max_sum<threshold-tiny)
441 if (sum + 0.5*w.tied_pos*(w.tied_neg+w.small_neg) >= threshold-tiny)
445 return count(weights, next, last, threshold, sum, w);
double area(void) const
Area Under Curve, AUC.
ROC(void)
Default constructor.
unsigned int & minimum_size(void)
threshold for p_value calculation
Class to calculate simple (first and second moments) averages.
Definition: Averager.h:46
double n_neg(void) const
number of negative samples
double p_value(void) const
Two-sided p-value.
void add(double value, bool target, double weight=1.0)
Add a data value.
double p_right(void) const
One-sided P-value.
T max(const T &a, const T &b, const T &c)
Definition: stl_utility.h:697
double p_value_one_sided(void) const
double n_pos(void) const
number of positive samples
double p_left(void) const
double n(void) const
number of samples
Reciever Operating Characteristic.
Definition: ROC.h:52
double sum_x(void) const
Definition: averager_base.h:128
void reset(void)
Set everything to zero.