yat  0.14.5pre
qQuantileNormalizer.h
1 #ifndef _theplu_yat_normalizer_qquantile_normalizer_
2 #define _theplu_yat_normalizer_qquantile_normalizer_
3 
4 /*
5  Copyright (C) 2009 Jari Häkkinen, Peter Johansson
6  Copyright (C) 2010, 2016 Peter Johansson
7 
8  This file is part of the yat library, http://dev.thep.lu.se/yat
9 
10  The yat library is free software; you can redistribute it and/or
11  modify it under the terms of the GNU General Public License as
12  published by the Free Software Foundation; either version 3 of the
13  License, or (at your option) any later version.
14 
15  The yat library is distributed in the hope that it will be useful,
16  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18  General Public License for more details.
19 
20  You should have received a copy of the GNU General Public License
21  along with yat. If not, see <http://www.gnu.org/licenses/>.
22 */
23 
24 #include "utility.h"
25 
26 #include "yat/regression/CSplineInterpolation.h"
27 #include "yat/utility/concept_check.h"
28 #include "yat/utility/DataIterator.h"
29 #include "yat/utility/DataWeight.h"
30 #include "yat/utility/Exception.h"
31 #include "yat/utility/iterator_traits.h"
32 #include "yat/utility/sort_index.h"
33 #include "yat/utility/Vector.h"
34 #include "yat/utility/WeightIterator.h"
35 #include "yat/utility/yat_assert.h"
36 
37 #include <boost/concept_check.hpp>
38 
39 #include <algorithm>
40 #include <cmath>
41 #include <iterator>
42 #include <limits>
43 #include <numeric>
44 #include <stdexcept>
45 #include <vector>
46 
47 namespace theplu {
48 namespace yat {
49 namespace utility {
50  class VectorBase;
51 }
52 namespace normalizer {
53 
89  {
90  public:
117  template<typename ForwardIterator>
118  qQuantileNormalizer(ForwardIterator first, ForwardIterator last,
119  unsigned int Q);
120 
143  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
144  void operator()(RandomAccessIterator1 first, RandomAccessIterator1 last,
145  RandomAccessIterator2 result) const;
146 
147  private:
148 
158  class Partitioner
159  {
160  public:
164  template<typename ForwardIterator>
165  Partitioner(ForwardIterator first, ForwardIterator last,
166  unsigned int N);
167 
173  const utility::Vector& averages(void) const;
174 
188  const utility::Vector& quantiles(void) const;
189 
193  size_t size(void) const;
194 
195  private:
196  // unweighted "constructor"
197  template<typename ForwardIterator>
198  void build(ForwardIterator first, ForwardIterator last, unsigned int N,
200  // weighted "constructor"
201  template<typename ForwardIterator>
202  void build(ForwardIterator first, ForwardIterator last, unsigned int N,
204  void init(const utility::VectorBase&, unsigned int N);
205  void init(const std::vector<utility::DataWeight>&, unsigned int N);
206 
207  utility::Vector average_;
208  utility::Vector quantiles_;
209  };
210 
211  Partitioner target_;
212 
213  // unweighted version
214  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
215  void normalize(const Partitioner& source,RandomAccessIterator1 first,
216  RandomAccessIterator1 last, RandomAccessIterator2 result,
218 
219  // weighted version
220  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
221  void normalize(const Partitioner& source,RandomAccessIterator1 first,
222  RandomAccessIterator1 last, RandomAccessIterator2 result,
224  };
225 
226 
227  // template implementations
228 
229  template<typename ForwardIterator>
231  ForwardIterator last,
232  unsigned int Q)
233  : target_(Partitioner(first, last, Q))
234  {
235  YAT_ASSERT(Q>2);
236  }
237 
238 
239  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
240  void qQuantileNormalizer::operator()(RandomAccessIterator1 first,
241  RandomAccessIterator1 last,
242  RandomAccessIterator2 result) const
243  {
244  BOOST_CONCEPT_ASSERT((utility::DataIterator<RandomAccessIterator1>));
245  using boost_concepts::RandomAccessTraversal;
246  BOOST_CONCEPT_ASSERT((RandomAccessTraversal<RandomAccessIterator1>));
247  BOOST_CONCEPT_ASSERT((utility::DataIterator<RandomAccessIterator2>));
248  BOOST_CONCEPT_ASSERT((RandomAccessTraversal<RandomAccessIterator2>));
249  using boost_concepts::WritableIterator;
250  BOOST_CONCEPT_ASSERT((WritableIterator<RandomAccessIterator2>));
251 
252  Partitioner source(first, last, target_.size());
254  normalize(source, first, last, result, tag);
255  }
256 
257 
258  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
259  void
260  qQuantileNormalizer::normalize(const qQuantileNormalizer::Partitioner& source,
261  RandomAccessIterator1 first,
262  RandomAccessIterator1 last,
263  RandomAccessIterator2 result,
265  {
268  size_t N = last-first;
269  YAT_ASSERT(N >= target_.size());
270 
271  std::vector<size_t> sorted_index(last-first);
272  utility::sort_index(first, last, sorted_index);
273 
274  utility::Vector diff(source.averages());
275  diff-=target_.averages();
276  const utility::Vector& idx=target_.quantiles();
277  regression::CSplineInterpolation cspline(idx,diff);
278 
279  // linear extrapolation for first part, i.e., use first diff for
280  // all points in the first part.
281  size_t start=0;
282  size_t end = static_cast<size_t>(std::ceil(N*idx(0) - 0.5));
283  // take care of limiting case number of parts approximately equal
284  // to the number of elements in range.
285  if (end==0)
286  ++end;
287  for (size_t i=start; i<end; ++i) {
288  size_t si = sorted_index[i];
289  result[si] = first[si] - diff(0);
290  }
291 
292  using utility::yat_assert;
293 
294  // cspline interpolation for all data between the mid points of
295  // the first and last part
296  start=end;
297  end = static_cast<size_t>(std::ceil(N*idx(idx.size()-1) - 0.5));
298  if (end>=N)
299  end = N-1;
300  for ( size_t i=start; i<end; ++i) {
301  size_t si = sorted_index[i];
302 
303  YAT_ASSERT((i+0.5)/N>idx(0));
304  result[si] = first[si] - cspline.evaluate((i+0.5)/N);
305  }
306 
307  // linear extrapolation for last part, i.e., use last diff for
308  // all points in the last part.
309  for (size_t i=end ; i<N; ++i) {
310  size_t si = sorted_index[i];
311  result[si] = first[si] - diff(diff.size()-1);
312  }
313  }
314 
315 
316  template<typename RandomAccessIterator1, typename RandomAccessIterator2>
317  void qQuantileNormalizer::normalize(const Partitioner& source,
318  RandomAccessIterator1 first,
319  RandomAccessIterator1 last,
320  RandomAccessIterator2 result,
321  utility::weighted_iterator_tag tag) const
322  {
323  // copy the weights
324  detail::copy_weight_if_weighted(first, last, result);
325 
326  double total_w = std::accumulate(utility::weight_iterator(first),
327  utility::weight_iterator(last), 0.0);
328 
329  std::vector<size_t> sorted_index(last-first);
330  // Code to avoid problems with NaN (ticket:535)
331  // utility::sort_index(utility::data_iterator(first),
332  // utility::data_iterator(last), sorted_index);
333  // ... above statement replaced below code block
334  {
335  using namespace utility;
336  std::vector<double> vec;
337  vec.reserve(std::distance(first, last));
338  std::copy(utility::data_iterator(first), utility::data_iterator(last),
339  std::back_inserter(vec));
340  for (std::vector<double>::iterator i(vec.begin()); i!=vec.end(); ++i)
341  if (std::isnan(*i))
342  *i = std::numeric_limits<double>::infinity();
343  utility::sort_index(vec.begin(), vec.end(), sorted_index);
344  }
345  // end Code to avoid problems with NaN (ticket:535)
346 
347  utility::Vector diff(source.averages());
348  diff-=target_.averages();
349  const utility::Vector& idx=target_.quantiles();
350  regression::CSplineInterpolation cspline(idx,diff);
351 
352  double sum_w=0;
353  utility::iterator_traits<RandomAccessIterator1> traits1;
354  utility::iterator_traits<RandomAccessIterator2> traits2;
355  for (size_t i=0; i<sorted_index.size(); ++i) {
356  size_t si = sorted_index[i];
357  double w = (sum_w + 0.5*traits1.weight(first+si))/total_w;
358  double correction = 0;
359  if (w <= idx(0)) {
360  // linear extrapolation for first part, i.e., use first diff for
361  // all points in the first part.
362  correction = diff(0);
363  }
364  else if (w < idx(idx.size()-1) ) {
365  // cspline interpolation for all data between the mid points of
366  // the first and last part
367  correction = cspline.evaluate(w);
368  }
369  else {
370  // linear extrapolation for last part, i.e., use last diff for
371  // all points in the last part.
372  correction = diff(diff.size()-1);
373  }
374  traits2.data(result+si) = traits1.data(first+si) - correction;
375  sum_w += traits1.weight(first+si);
376  }
377  }
378 
379 
380  template<typename ForwardIterator>
381  qQuantileNormalizer::Partitioner::Partitioner(ForwardIterator first,
382  ForwardIterator last,
383  unsigned int N)
384  : average_(utility::Vector(N)), quantiles_(utility::Vector(N))
385  {
386  BOOST_CONCEPT_ASSERT((boost_concepts::ForwardTraversal<ForwardIterator>));
387  BOOST_CONCEPT_ASSERT((utility::DataIterator<ForwardIterator>));
388  build(first, last, N,
389  typename utility::weighted_iterator_traits<ForwardIterator>::type());
390  }
391 
392 
393  template<typename ForwardIterator>
394  void qQuantileNormalizer::Partitioner::build(ForwardIterator first,
395  ForwardIterator last,
396  unsigned int N,
397  utility::unweighted_iterator_tag)
398  {
399  utility::Vector vec(std::distance(first, last));
400  std::copy(first, last, vec.begin());
401  std::sort(vec.begin(), vec.end());
402  init(vec, N);
403  }
404 
405 
406  template<typename ForwardIterator>
407  void qQuantileNormalizer::Partitioner::build(ForwardIterator first,
408  ForwardIterator last,
409  unsigned int N,
410  utility::weighted_iterator_tag)
411  {
412  using namespace utility;
413  std::vector<DataWeight> vec(first, last);
414  std::sort(vec.begin(), vec.end());
415  init(vec, N);
416  }
417 
418 
419 }}} // end of namespace normalizer, yat and thep
420 
421 #endif
Perform Q-quantile normalization.
Definition: qQuantileNormalizer.h:88
Cubic spline with natural boundary conditions.
Definition: CSplineInterpolation.h:47
Definition: iterator_traits.h:47
void operator()(RandomAccessIterator1 first, RandomAccessIterator1 last, RandomAccessIterator2 result) const
Perform the Q-quantile normalization.
Definition: qQuantileNormalizer.h:240
Definition: iterator_traits.h:55
DataIterator.
Definition: DataIterator.h:63
This is the yat interface to GSL vector.
Definition: Vector.h:57
qQuantileNormalizer(ForwardIterator first, ForwardIterator last, unsigned int Q)
Constructor.
Definition: qQuantileNormalizer.h:230
This is the yat interface to GSL vector.
Definition: VectorBase.h:52
void check_iterator_is_unweighted(Iter iter)
check (at compile time) that iterator is unweighted.
Definition: iterator_traits.h:200
void sort_index(InputIterator first, InputIterator last, std::vector< size_t > &sort_index)
Definition: sort_index.h:146
detail::unweighted_type_and< w_type1, w_type2 >::type type
return unweighted if both are unweighted
Definition: iterator_traits.h:165

Generated on Tue Sep 26 2017 02:33:29 for yat by  doxygen 1.8.5