1 | // -*- C++ -*- |
---|
2 | /*************************************************************************** |
---|
3 | * blitz/array/reduce.h Reductions of an array (or array expression) in a |
---|
4 | * single rank: sum, mean, min, minIndex, max, maxIndex, |
---|
5 | * product, count, any, all |
---|
6 | * |
---|
7 | * $Id$ |
---|
8 | * |
---|
9 | * Copyright (C) 1997-2011 Todd Veldhuizen <tveldhui@acm.org> |
---|
10 | * |
---|
11 | * This file is a part of Blitz. |
---|
12 | * |
---|
13 | * Blitz is free software: you can redistribute it and/or modify |
---|
14 | * it under the terms of the GNU Lesser General Public License |
---|
15 | * as published by the Free Software Foundation, either version 3 |
---|
16 | * of the License, or (at your option) any later version. |
---|
17 | * |
---|
18 | * Blitz is distributed in the hope that it will be useful, |
---|
19 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
20 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
21 | * GNU Lesser General Public License for more details. |
---|
22 | * |
---|
23 | * You should have received a copy of the GNU Lesser General Public |
---|
24 | * License along with Blitz. If not, see <http://www.gnu.org/licenses/>. |
---|
25 | * |
---|
26 | * Suggestions: blitz-devel@lists.sourceforge.net |
---|
27 | * Bugs: blitz-support@lists.sourceforge.net |
---|
28 | * |
---|
29 | * For more information, please see the Blitz++ Home Page: |
---|
30 | * https://sourceforge.net/projects/blitz/ |
---|
31 | * |
---|
32 | ****************************************************************************/ |
---|
33 | #ifndef BZ_ARRAYREDUCE_H |
---|
34 | #define BZ_ARRAYREDUCE_H |
---|
35 | |
---|
36 | #include <blitz/reduce.h> |
---|
37 | #include <blitz/meta/vecassign.h> |
---|
38 | |
---|
39 | BZ_NAMESPACE(blitz) |
---|
40 | |
---|
41 | template<bool needIndex,bool needInit> struct _bz_ReduceReset; |
---|
42 | |
---|
43 | template<> |
---|
44 | struct _bz_ReduceReset<true,true> { |
---|
45 | template<typename T_reduction,typename T_index,typename T_expr> |
---|
46 | void operator()(T_reduction& reduce,const T_index& index,const T_expr& expr) { |
---|
47 | reduce.reset(index,expr.first_value()); |
---|
48 | } |
---|
49 | }; |
---|
50 | |
---|
51 | template<> |
---|
52 | struct _bz_ReduceReset<false,true> { |
---|
53 | template<typename T_reduction,typename T_index,typename T_expr> |
---|
54 | void operator()(T_reduction& reduce,const T_index&,const T_expr& expr) { |
---|
55 | reduce.reset(expr.first_value()); |
---|
56 | } |
---|
57 | }; |
---|
58 | |
---|
59 | template<> |
---|
60 | struct _bz_ReduceReset<true,false> { |
---|
61 | template<typename T_reduction,typename T_index,typename T_expr> |
---|
62 | void operator()(T_reduction& reduce,const T_index& index,const T_expr&) { |
---|
63 | reduce.reset(index); |
---|
64 | } |
---|
65 | }; |
---|
66 | |
---|
67 | template<> |
---|
68 | struct _bz_ReduceReset<false,false> { |
---|
69 | template<typename T_reduction,typename T_index,typename T_expr> |
---|
70 | void operator()(T_reduction& reduce,const T_index&,const T_expr&) { |
---|
71 | reduce.reset(); |
---|
72 | } |
---|
73 | }; |
---|
74 | |
---|
75 | |
---|
76 | /** Expression template class for reductions. \todo We should be able |
---|
77 | to do vectorization, at least for complete reduction. */ |
---|
78 | template<typename T_expr, int N_index, typename T_reduction> |
---|
79 | class _bz_ArrayExprReduce { |
---|
80 | |
---|
81 | public: |
---|
82 | typedef _bz_typename T_reduction::T_numtype T_numtype; |
---|
83 | |
---|
84 | // select return type |
---|
85 | typedef typename unwrapET<typename T_expr::T_result>::T_unwrapped test; |
---|
86 | typedef typename selectET<typename T_expr::T_typeprop, |
---|
87 | T_numtype, |
---|
88 | _bz_ArrayExprReduce<test, N_index, T_reduction> >::T_selected T_typeprop; |
---|
89 | typedef typename unwrapET<T_typeprop>::T_unwrapped T_result; |
---|
90 | typedef T_numtype T_optype; |
---|
91 | |
---|
92 | typedef T_expr T_ctorArg1; |
---|
93 | typedef T_reduction T_ctorArg2; |
---|
94 | typedef int T_range_result; // dummy |
---|
95 | |
---|
96 | static const int |
---|
97 | numArrayOperands = T_expr::numArrayOperands, |
---|
98 | numTVOperands = T_expr::numTVOperands, |
---|
99 | numTMOperands = T_expr::numTMOperands, |
---|
100 | numIndexPlaceholders = T_expr::numIndexPlaceholders + 1, |
---|
101 | minWidth = simdTypes<T_numtype>::vecWidth, |
---|
102 | maxWidth = simdTypes<T_numtype>::vecWidth, |
---|
103 | rank_ = T_expr::rank_ - 1; |
---|
104 | |
---|
105 | /** Vectorization doesn't work for index expressions, so we can use |
---|
106 | a dummy here. */ |
---|
107 | template<int N> struct tvresult { |
---|
108 | typedef FastTV2Iterator<T_numtype, N> Type; |
---|
109 | }; |
---|
110 | |
---|
111 | _bz_ArrayExprReduce(const _bz_ArrayExprReduce& reduce) |
---|
112 | : reduce_(reduce.reduce_), iter_(reduce.iter_), ordering_(reduce.ordering_) { } |
---|
113 | |
---|
114 | _bz_ArrayExprReduce(T_expr expr) |
---|
115 | : iter_(expr) |
---|
116 | { computeOrdering(); } |
---|
117 | |
---|
118 | #if 0 |
---|
119 | _bz_ArrayExprReduce(T_expr expr, T_reduction reduce) |
---|
120 | : iter_(expr), reduce_(reduce) |
---|
121 | { computeOrdering(); } |
---|
122 | #endif |
---|
123 | |
---|
124 | int ascending(const int r) const { return iter_.ascending(r); } |
---|
125 | int ordering(const int r) const { return ordering_[r]; } |
---|
126 | int lbound(const int r) const { return iter_.lbound(r); } |
---|
127 | int ubound(const int r) const { return iter_.ubound(r); } |
---|
128 | RectDomain<rank_> domain() const { return iter_.domain(); } |
---|
129 | |
---|
130 | template<int N_destRank> |
---|
131 | T_numtype operator()(const TinyVector<int, N_destRank>& destIndex) const |
---|
132 | { |
---|
133 | BZPRECHECK(N_destRank == N_index, |
---|
134 | "Array reduction performed over rank " << N_index |
---|
135 | << " to produce a rank " << N_destRank << " expression." << endl |
---|
136 | << "You must reduce over rank " << N_destRank << " instead."); |
---|
137 | |
---|
138 | TinyVector<int, N_destRank + 1> index; |
---|
139 | |
---|
140 | // This metaprogram copies elements 0..N-1 of destIndex into index |
---|
141 | _bz_meta_vecAssign<N_index, 0>::assign(index, destIndex, |
---|
142 | _bz_update<int,int>()); |
---|
143 | |
---|
144 | int lbound = iter_.lbound(N_index); |
---|
145 | int ubound = iter_.ubound(N_index); |
---|
146 | |
---|
147 | BZPRECHECK((lbound != tiny(int())) && (ubound != huge(int())), |
---|
148 | "Array reduction performed over rank " << N_index |
---|
149 | << " is unbounded." << endl |
---|
150 | << "There must be an array object in the expression being reduced" |
---|
151 | << endl << "which provides a bound in rank " << N_index << "."); |
---|
152 | |
---|
153 | // If we are doing minIndex/maxIndex, initialize with lower bound |
---|
154 | |
---|
155 | _bz_ReduceReset<T_reduction::needIndex,T_reduction::needInit> reset; |
---|
156 | reset(reduce_,lbound,iter_); |
---|
157 | |
---|
158 | for (index[N_index]=lbound; index[N_index]<=ubound; ++index[N_index]) { |
---|
159 | if (!reduce_(iter_(index), index[N_index])) |
---|
160 | break; |
---|
161 | } |
---|
162 | |
---|
163 | return reduce_.result(ubound-lbound+1); |
---|
164 | } |
---|
165 | |
---|
166 | // If you have a precondition failure on these routines, it means |
---|
167 | // you are trying to use stack iteration mode on an expression |
---|
168 | // which contains an index placeholder. You must use index |
---|
169 | // iteration mode instead. |
---|
170 | |
---|
171 | int operator*() const { |
---|
172 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); return 0; } |
---|
173 | int suggestStride(int) const { |
---|
174 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); return 0; } |
---|
175 | |
---|
176 | void push(int) const { |
---|
177 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); } |
---|
178 | void pop(int) const { |
---|
179 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); } |
---|
180 | void advance() const { |
---|
181 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); } |
---|
182 | void advance(int) const { |
---|
183 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); } |
---|
184 | void loadStride(int) const { |
---|
185 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); } |
---|
186 | void advanceUnitStride() const { |
---|
187 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); } |
---|
188 | |
---|
189 | template<int N_rank> |
---|
190 | void moveTo(const TinyVector<int,N_rank>&) const { |
---|
191 | BZPRECHECK(0,"Stencils of reductions are not implemented"); } |
---|
192 | |
---|
193 | bool isUnitStride(int) const { |
---|
194 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); return false; } |
---|
195 | bool isUnitStride() const { |
---|
196 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); return false; } |
---|
197 | bool canCollapse(int,int) const { |
---|
198 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); return false; } |
---|
199 | bool isStride(int,int) const { |
---|
200 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); return true; } |
---|
201 | |
---|
202 | T_numtype operator[](int) const { |
---|
203 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); return T_numtype(); } |
---|
204 | T_numtype fastRead(int) const { |
---|
205 | BZPRECHECK(0,"Can't use stack iteration on a reduction."); return T_numtype(); } |
---|
206 | |
---|
207 | template<int N> |
---|
208 | typename tvresult<N>::Type fastRead_tv(int) const { |
---|
209 | BZPRECHECK(0,"Can't use stack iteration on an index mapping."); |
---|
210 | return TinyVector<T_numtype, N>(); |
---|
211 | } |
---|
212 | |
---|
213 | /** Determining whether the resulting expression is aligned is |
---|
214 | difficult, so to be safe we say no. It shouldn't be attempted |
---|
215 | anyway, though. */ |
---|
216 | bool isVectorAligned(diffType offset) const { |
---|
217 | return false; } |
---|
218 | |
---|
219 | // don't know how to define these, so stencil expressions won't work |
---|
220 | T_result shift(int offset, int dim) const |
---|
221 | { BZPRECHECK(0,"Stencils of reductions are not implemented"); return T_numtype(); } |
---|
222 | T_result shift(int offset1, int dim1,int offset2, int dim2) const |
---|
223 | { BZPRECHECK(0,"Stencils of reductions are not implemented"); return T_numtype(); } |
---|
224 | void _bz_offsetData(sizeType i) { BZPRECONDITION(0); } |
---|
225 | |
---|
226 | // Unclear how to define this, and stencils don't work anyway |
---|
227 | T_range_result operator()(RectDomain<rank_> d) const |
---|
228 | { BZPRECHECK(0,"Stencils of reductions are not implemented"); |
---|
229 | return T_range_result(); } |
---|
230 | |
---|
231 | void prettyPrint(BZ_STD_SCOPE(string) &str, prettyPrintFormat& format) const |
---|
232 | { |
---|
233 | // NEEDS_WORK-- do real formatting for reductions |
---|
234 | str += "reduce[NEEDS_WORK]("; |
---|
235 | iter_.prettyPrint(str,format); |
---|
236 | str += ")"; |
---|
237 | } |
---|
238 | |
---|
239 | /** \todo do a real shape check (tricky) */ |
---|
240 | template<typename T_shape> |
---|
241 | bool shapeCheck(const T_shape&) const |
---|
242 | { |
---|
243 | return true; |
---|
244 | } |
---|
245 | |
---|
246 | // sliceinfo for expressions |
---|
247 | template<typename T1, typename T2 = nilArraySection, |
---|
248 | class T3 = nilArraySection, typename T4 = nilArraySection, |
---|
249 | class T5 = nilArraySection, typename T6 = nilArraySection, |
---|
250 | class T7 = nilArraySection, typename T8 = nilArraySection, |
---|
251 | class T9 = nilArraySection, typename T10 = nilArraySection, |
---|
252 | class T11 = nilArraySection> |
---|
253 | class SliceInfo { |
---|
254 | public: |
---|
255 | typedef typename T_expr::template SliceInfo<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11>::T_slice T_slice1; |
---|
256 | typedef _bz_ArrayExprReduce<T_slice1, N_index, T_reduction> T_slice; |
---|
257 | }; |
---|
258 | |
---|
259 | template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, |
---|
260 | typename T7, typename T8, typename T9, typename T10, typename T11> |
---|
261 | typename SliceInfo<T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11>::T_slice |
---|
262 | operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const |
---|
263 | { |
---|
264 | // for slicing reduction results, we would need to set the |
---|
265 | // dimension reduced over to Range::all(). That's not easy to do |
---|
266 | // because it requires us to change the type of one of the rn's. |
---|
267 | BZPRECONDITION(0); |
---|
268 | } |
---|
269 | |
---|
270 | private: |
---|
271 | _bz_ArrayExprReduce() { } |
---|
272 | |
---|
273 | /** Method for properly initializing the ordering values. \todo If |
---|
274 | the expression being reduced consist of arrays with different |
---|
275 | orderings, the call to iter_.ordering() will fail with a |
---|
276 | "different orderings" error. But just like it can happen that |
---|
277 | ordering values are missing from the expression, it seems |
---|
278 | equally valid that ordering is indefinite in cases where the |
---|
279 | expression has differing values. This doesn't prevent us from |
---|
280 | assigning the expression to an array, and it shouldn't prevent |
---|
281 | the expression from being used in a reduction either. (This is |
---|
282 | bug 2058441.) */ |
---|
283 | void computeOrdering() |
---|
284 | { |
---|
285 | TinyVector<bool,rank_> in_ordering; |
---|
286 | in_ordering = false; |
---|
287 | |
---|
288 | int j = 0; |
---|
289 | for (int i=0; i<rank_; ++i) { |
---|
290 | const int orderingj = iter_.ordering(i); |
---|
291 | if (orderingj != tiny(int()) && orderingj < rank_ && !in_ordering(orderingj)) { |
---|
292 | // unique value in ordering array |
---|
293 | in_ordering(orderingj) = true; |
---|
294 | ordering_(j++) = orderingj; |
---|
295 | } |
---|
296 | } |
---|
297 | |
---|
298 | // It is possible that ordering is not a permutation of 0,...,rank-1. |
---|
299 | // In that case j will be less than rank. We fill in ordering with |
---|
300 | // the unused values in decreasing order. |
---|
301 | for (int i = rank_; j < rank_; ++j) { |
---|
302 | while (in_ordering(--i)); // find an unused index |
---|
303 | ordering_(j) = i; |
---|
304 | } |
---|
305 | } |
---|
306 | |
---|
307 | T_reduction reduce_; |
---|
308 | T_expr iter_; |
---|
309 | TinyVector<int,rank_> ordering_; |
---|
310 | }; |
---|
311 | |
---|
312 | #define BZ_DECL_ARRAY_PARTIAL_REDUCE(fn,reduction) \ |
---|
313 | template<typename T_expr, int N_index> \ |
---|
314 | inline \ |
---|
315 | _bz_ArrayExpr<_bz_ArrayExprReduce<_bz_typename BZ_BLITZ_SCOPE(asExpr)<T_expr>::T_expr, \ |
---|
316 | N_index, \ |
---|
317 | reduction<_bz_typename T_expr::T_numtype> > > \ |
---|
318 | fn(const BZ_BLITZ_SCOPE(ETBase)<T_expr>& expr, \ |
---|
319 | const IndexPlaceholder<N_index>&) \ |
---|
320 | { \ |
---|
321 | return _bz_ArrayExprReduce<_bz_typename BZ_BLITZ_SCOPE(asExpr)<T_expr>::T_expr, \ |
---|
322 | N_index, \ |
---|
323 | reduction<_bz_typename T_expr::T_numtype> > \ |
---|
324 | (BZ_BLITZ_SCOPE(asExpr)<T_expr>::getExpr(expr.unwrap())); \ |
---|
325 | } |
---|
326 | |
---|
327 | BZ_DECL_ARRAY_PARTIAL_REDUCE(sum, ReduceSum) |
---|
328 | BZ_DECL_ARRAY_PARTIAL_REDUCE(mean, ReduceMean) |
---|
329 | BZ_DECL_ARRAY_PARTIAL_REDUCE((min), ReduceMin) |
---|
330 | BZ_DECL_ARRAY_PARTIAL_REDUCE(minIndex, ReduceMinIndex) |
---|
331 | BZ_DECL_ARRAY_PARTIAL_REDUCE((max), ReduceMax) |
---|
332 | BZ_DECL_ARRAY_PARTIAL_REDUCE(maxIndex, ReduceMaxIndex) |
---|
333 | BZ_DECL_ARRAY_PARTIAL_REDUCE(product, ReduceProduct) |
---|
334 | BZ_DECL_ARRAY_PARTIAL_REDUCE(count, ReduceCount) |
---|
335 | BZ_DECL_ARRAY_PARTIAL_REDUCE(any, ReduceAny) |
---|
336 | BZ_DECL_ARRAY_PARTIAL_REDUCE(all, ReduceAll) |
---|
337 | BZ_DECL_ARRAY_PARTIAL_REDUCE(first, ReduceFirst) |
---|
338 | BZ_DECL_ARRAY_PARTIAL_REDUCE(last, ReduceLast) |
---|
339 | |
---|
340 | /* |
---|
341 | * Complete reductions |
---|
342 | */ |
---|
343 | |
---|
344 | // Prototype of reduction functions |
---|
345 | template<typename T_expr, typename T_reduction> |
---|
346 | _bz_typename T_reduction::T_resulttype |
---|
347 | _bz_ArrayExprFullReduce(T_expr expr, T_reduction reduction); |
---|
348 | |
---|
349 | template<typename T_expr, typename T_reduction> |
---|
350 | _bz_typename T_reduction::T_resulttype |
---|
351 | _bz_reduceWithIndexTraversal(T_expr expr, T_reduction reduction); |
---|
352 | |
---|
353 | template<typename T_expr, typename T_reduction> |
---|
354 | _bz_typename T_reduction::T_resulttype |
---|
355 | _bz_reduceWithIndexVectorTraversal(T_expr expr, T_reduction reduction); |
---|
356 | |
---|
357 | #define BZ_DECL_ARRAY_FULL_REDUCE(fn,reduction) \ |
---|
358 | template<typename T_expr> \ |
---|
359 | _bz_inline_et \ |
---|
360 | _bz_typename reduction<_bz_typename T_expr::T_numtype>::T_resulttype \ |
---|
361 | fn(const BZ_BLITZ_SCOPE(ETBase)<T_expr>& expr) \ |
---|
362 | { \ |
---|
363 | return _bz_ArrayExprFullReduce \ |
---|
364 | (BZ_BLITZ_SCOPE(asExpr)<T_expr>::getExpr(expr.unwrap()), \ |
---|
365 | reduction<_bz_typename T_expr::T_numtype>()); \ |
---|
366 | } \ |
---|
367 | |
---|
368 | BZ_DECL_ARRAY_FULL_REDUCE(sum, ReduceSum) |
---|
369 | BZ_DECL_ARRAY_FULL_REDUCE(mean, ReduceMean) |
---|
370 | BZ_DECL_ARRAY_FULL_REDUCE((min), ReduceMin) |
---|
371 | BZ_DECL_ARRAY_FULL_REDUCE((max), ReduceMax) |
---|
372 | BZ_DECL_ARRAY_FULL_REDUCE((minmax), ReduceMinMax) |
---|
373 | BZ_DECL_ARRAY_FULL_REDUCE(product, ReduceProduct) |
---|
374 | BZ_DECL_ARRAY_FULL_REDUCE(count, ReduceCount) |
---|
375 | BZ_DECL_ARRAY_FULL_REDUCE(any, ReduceAny) |
---|
376 | BZ_DECL_ARRAY_FULL_REDUCE(all, ReduceAll) |
---|
377 | BZ_DECL_ARRAY_FULL_REDUCE(first, ReduceFirst) |
---|
378 | BZ_DECL_ARRAY_FULL_REDUCE(last, ReduceLast) |
---|
379 | |
---|
380 | // Special versions of complete reductions: minIndex and |
---|
381 | // maxIndex |
---|
382 | |
---|
383 | #define BZ_DECL_ARRAY_FULL_REDUCE_INDEXVECTOR(fn,reduction) \ |
---|
384 | template<typename T_expr> \ |
---|
385 | _bz_inline_et \ |
---|
386 | _bz_typename reduction<_bz_typename T_expr::T_numtype, \ |
---|
387 | T_expr::rank_>::T_resulttype \ |
---|
388 | fn(const BZ_BLITZ_SCOPE(ETBase)<T_expr>& expr) \ |
---|
389 | { \ |
---|
390 | return _bz_reduceWithIndexVectorTraversal \ |
---|
391 | (BZ_BLITZ_SCOPE(asExpr)<T_expr>::getExpr(expr.unwrap()), \ |
---|
392 | reduction<_bz_typename T_expr::T_numtype, T_expr::rank_>()); \ |
---|
393 | } |
---|
394 | |
---|
395 | BZ_DECL_ARRAY_FULL_REDUCE_INDEXVECTOR(minIndex, ReduceMinIndexVector) |
---|
396 | BZ_DECL_ARRAY_FULL_REDUCE_INDEXVECTOR(maxIndex, ReduceMaxIndexVector) |
---|
397 | |
---|
398 | BZ_NAMESPACE_END |
---|
399 | |
---|
400 | #include <blitz/array/reduce.cc> |
---|
401 | |
---|
402 | #endif // BZ_ARRAYREDUCE_H |
---|