1 #ifndef _theplu_yat_statistics_utility_
2 #define _theplu_yat_statistics_utility_
29 #include "Percentiler.h"
31 #include "yat/classifier/DataLookupWeighted1D.h"
32 #include "yat/classifier/Target.h"
33 #include "yat/normalizer/utility.h"
34 #include "yat/utility/concept_check.h"
35 #include "yat/utility/DataIterator.h"
37 #include "yat/utility/iterator_traits.h"
38 #include "yat/utility/sort_index.h"
39 #include "yat/utility/Vector.h"
40 #include "yat/utility/VectorBase.h"
41 #include "yat/utility/yat_assert.h"
43 #include <boost/concept_check.hpp>
44 #include <boost/iterator/iterator_concepts.hpp>
45 #include <boost/iterator/iterator_traits.hpp>
46 #include <boost/iterator/permutation_iterator.hpp>
48 #include <gsl/gsl_math.h>
49 #include <gsl/gsl_statistics_double.h>
59 namespace statistics {
70 template <
typename T,
typename ForwardIterator>
71 void add(T& o, ForwardIterator first, ForwardIterator last,
72 const classifier::Target& target);
94 template<
typename B
idirectionalIterator1,
typename B
idirectionalIterator2>
96 BidirectionalIterator1 last,
97 BidirectionalIterator2 result);
119 template<
typename RandomAccessIterator,
typename MutableRandomAccessIterator>
121 RandomAccessIterator last,
122 MutableRandomAccessIterator result);
137 unsigned int n2,
unsigned int t);
151 template<
typename InputIterator>
152 double entropy(InputIterator first, InputIterator last);
178 double kurtosis(
const utility::VectorBase&);
194 template <
class RandomAccessIterator>
195 double mad(RandomAccessIterator first, RandomAccessIterator last,
216 template <
class RandomAccessIterator>
217 double median(RandomAccessIterator first, RandomAccessIterator last,
270 template <
class RandomAccessIterator>
271 double percentile(RandomAccessIterator first, RandomAccessIterator last,
272 double p,
bool sorted=
false) YAT_DEPRECATE;
284 template <class RandomAccessIterator>
285 double percentile2(RandomAccessIterator first, RandomAccessIterator last,
286 double p,
bool sorted=false)
289 return percentiler(first, last);
303 template <
typename T,
typename ForwardIterator>
304 void add(T& o, ForwardIterator first, ForwardIterator last,
307 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<ForwardIterator>));
308 BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<ForwardIterator>));
310 for (
size_t i=0; first!=last; ++i, ++first)
315 template<
typename B
idirectionalIterator1,
typename B
idirectionalIterator2>
317 BidirectionalIterator1 last,
318 BidirectionalIterator2 result)
320 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<BidirectionalIterator1>));
321 BOOST_CONCEPT_ASSERT((boost_concepts::BidirectionalTraversal<BidirectionalIterator1>));
323 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<BidirectionalIterator2>));
324 BOOST_CONCEPT_ASSERT((boost_concepts::WritableIterator<BidirectionalIterator2>));
325 BOOST_CONCEPT_ASSERT((boost_concepts::BidirectionalTraversal<BidirectionalIterator2>));
327 typedef typename boost::iterator_difference<BidirectionalIterator1>::type
330 diff_type n = std::distance(first, last);
333 std::advance(result, n-1);
340 x = std::min(*first * n/static_cast<double>(rank), x);
349 template<
typename RandomAccessIterator,
typename MutableRandomAccessIterator>
351 RandomAccessIterator last,
352 MutableRandomAccessIterator result)
354 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<RandomAccessIterator>));
355 BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<RandomAccessIterator>));
357 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<MutableRandomAccessIterator>));
358 BOOST_CONCEPT_ASSERT((boost_concepts::WritableIterator<MutableRandomAccessIterator>));
359 BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<MutableRandomAccessIterator>));
361 std::vector<size_t> idx;
364 boost::make_permutation_iterator(first, idx.end()),
365 boost::make_permutation_iterator(result, idx.begin()));
369 template<
typename InputIterator>
370 double entropy(InputIterator first, InputIterator last)
372 BOOST_CONCEPT_ASSERT((boost_concepts::ReadableIterator<InputIterator>));
373 BOOST_CONCEPT_ASSERT((boost_concepts::SinglePassIterator<InputIterator>));
374 using boost::Convertible;
375 typedef typename boost::iterator_value<InputIterator>::type T;
376 BOOST_CONCEPT_ASSERT((Convertible<T,double>));
379 for (; first != last; ++first) {
382 sum += *first * std::log(static_cast<double>(*first));
385 return -sum / N + log(N);
389 template <
class RandomAccessIterator>
390 double mad(RandomAccessIterator first, RandomAccessIterator last,
394 BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<RandomAccessIterator>));
395 double m =
median(first, last, sorted);
396 typedef typename boost::iterator_value<RandomAccessIterator>::type T;
397 std::vector<T> ad(std::distance(first, last));
399 normalizer::detail::copy_weight_if_weighted(first, last, ad.begin());
403 while (first!=last) {
404 *first2 = std::abs(traits.data(first)-m);
408 return median(ad.begin(), ad.end(),
false);
412 template <
class RandomAccessIterator>
413 double median(RandomAccessIterator first, RandomAccessIterator last,
417 BOOST_CONCEPT_ASSERT((boost_concepts::RandomAccessTraversal<RandomAccessIterator>));
426 using boost::Convertible;
427 BOOST_CONCEPT_ASSERT((Convertible<typename T::value_type,double>));
439 for (
size_t c = 0; c<n.columns(); ++c)
440 rowsum(c) = std::accumulate(n.begin_column(c), n.end_column(c), 0);
443 for (
size_t r = 0; r<n.rows(); ++r)
444 colsum(r) = std::accumulate(n.begin_row(r), n.end_row(r), 0);
446 double mi = -
entropy(n.begin(), n.end());
447 mi +=
entropy(rowsum.begin(), rowsum.end());
448 mi +=
entropy(colsum.begin(), colsum.end());
454 template <
class RandomAccessIterator>
455 double percentile(RandomAccessIterator first, RandomAccessIterator last,
456 double p,
bool sorted)
458 BOOST_CONCEPT_ASSERT((boost::RandomAccessIterator<RandomAccessIterator>));
467 double j = p/100 * (std::distance(first,last)-1);
468 int i =
static_cast<int>(j);
469 return (1-j+floor(j))*first[i] + (j-floor(j))*first[i+1];
471 using std::iterator_traits;
472 typedef typename iterator_traits<RandomAccessIterator>::value_type value_t;
473 std::vector<value_t> v_copy(first, last);
474 size_t i =
static_cast<size_t>(p/100 * (v_copy.size()-1));
475 if (i+2 < v_copy.size()) {
476 std::partial_sort(v_copy.begin(), v_copy.begin()+i+2, v_copy.end());
479 std::sort(v_copy.begin(), v_copy.end());
480 return percentile(v_copy.begin(), v_copy.end(), p,
true);
void benjamini_hochberg(BidirectionalIterator1 first, BidirectionalIterator1 last, BidirectionalIterator2 result)
Benjamini Hochberg multiple test correction.
Definition: utility.h:316
double mad(RandomAccessIterator first, RandomAccessIterator last, bool sorted=false)
Median absolute deviation from median.
Definition: utility.h:390
data_reference data(Iter iter) const
Definition: iterator_traits.h:440
Class for containing sample labels.
Definition: Target.h:47
double mutual_information(const T &A)
Calculates the mutual information of A.
Definition: utility.h:423
Concept check for Data Iterator.
Definition: concept_check.h:228
Definition: iterator_traits.h:412
Concept check for Container2D.
Definition: concept_check.h:59
double skewness(const utility::VectorBase &)
Computes the skewness of the data in a vector.
double pearson_p_value(double r, unsigned int n)
one-sided p-value
double median(RandomAccessIterator first, RandomAccessIterator last, bool sorted=false)
Definition: utility.h:413
double entropy(InputIterator first, InputIterator last)
Definition: utility.h:370
DataIterator.
Definition: DataIterator.h:63
This is the yat interface to GSL vector.
Definition: Vector.h:57
This is the yat interface to GSL vector.
Definition: VectorBase.h:52
bool binary(size_t i) const
Default binary is set to false for all classes except class 0.
void sort_index(InputIterator first, InputIterator last, std::vector< size_t > &sort_index)
Definition: sort_index.h:146
weight_reference weight(Iter iter) const
Definition: iterator_traits.h:446
double kurtosis(const utility::VectorBase &)
Computes the kurtosis of the data in a vector.
void benjamini_hochberg_unsorted(RandomAccessIterator first, RandomAccessIterator last, MutableRandomAccessIterator result)
Benjamini Hochberg multiple test correction.
Definition: utility.h:350
void add(T &o, ForwardIterator first, ForwardIterator last, const classifier::Target &target)
Definition: utility.h:304
double cdf_hypergeometric_P(unsigned int k, unsigned int n1, unsigned int n2, unsigned int t)
double percentile2(RandomAccessIterator first, RandomAccessIterator last, double p, bool sorted=false)
Definition: utility.h:285
double percentile(RandomAccessIterator first, RandomAccessIterator last, double p, bool sorted=false)
Definition: utility.h:455
Functor to calculate percentile of a range.
Definition: Percentiler.h:51