1 | // -*- C++ -*- |
---|
2 | /*************************************************************************** |
---|
3 | * blitz/array/stencil-et.h Expression-template-capable stencils |
---|
4 | * |
---|
5 | * $Id: stencil-et.h,v 1.15 2011/03/25 22:41:17 julianc Exp $ |
---|
6 | * |
---|
7 | * Copyright (C) 1997-2011 Todd Veldhuizen <tveldhui@acm.org> |
---|
8 | * |
---|
9 | * This file is a part of Blitz. |
---|
10 | * |
---|
11 | * Blitz is free software: you can redistribute it and/or modify |
---|
12 | * it under the terms of the GNU Lesser General Public License |
---|
13 | * as published by the Free Software Foundation, either version 3 |
---|
14 | * of the License, or (at your option) any later version. |
---|
15 | * |
---|
16 | * Blitz is distributed in the hope that it will be useful, |
---|
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
19 | * GNU Lesser General Public License for more details. |
---|
20 | * |
---|
21 | * You should have received a copy of the GNU Lesser General Public |
---|
22 | * License along with Blitz. If not, see <http://www.gnu.org/licenses/>. |
---|
23 | * |
---|
24 | * Suggestions: blitz-devel@lists.sourceforge.net |
---|
25 | * Bugs: blitz-support@lists.sourceforge.net |
---|
26 | * |
---|
27 | * For more information, please see the Blitz++ Home Page: |
---|
28 | * https://sourceforge.net/projects/blitz/ |
---|
29 | * |
---|
30 | ****************************************************************************/ |
---|
31 | #ifndef BZ_ARRAY_STENCIL_ET_MACROS_H |
---|
32 | #define BZ_ARRAY_STENCIL_ET_MACROS_H |
---|
33 | |
---|
34 | BZ_NAMESPACE(blitz) |
---|
35 | |
---|
36 | /* This file contains the macros that used to declare all |
---|
37 | stencils. They are now declared in the generated file |
---|
38 | stencil-classes.cc. You only need to include this file if you need |
---|
39 | to declare your own stencil ET classes. Note that this file is NOT |
---|
40 | necessary for declaring stencil operators that are applied with |
---|
41 | applyStencil, as described in the documentation. To use these |
---|
42 | macros to declare an ET stencil called "name", you first need to |
---|
43 | declare a stencil operator called "name_stencilop" using |
---|
44 | e.g. BZ_DECLARE_STENCIL_OPERATOR1, and then call the appropriate |
---|
45 | macro here. |
---|
46 | */ |
---|
47 | |
---|
48 | |
---|
49 | /* To avoid matching to the stencil operator in stencilops.h, we must |
---|
50 | explicitly define stencils that operate on arrays. This macro makes |
---|
51 | this slightly less painful for the majority of the stencil classes. */ |
---|
52 | #define BZ_ET_STENCIL_REDIRECT(name) \ |
---|
53 | template<typename T, int N> \ |
---|
54 | inline _bz_ArrayExpr<name ## _et<_bz_typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result> > \ |
---|
55 | name(const Array<T,N>& d1) \ |
---|
56 | { return name(d1.wrap()); } \ |
---|
57 | template<typename T, int N> \ |
---|
58 | inline _bz_ArrayExpr<name ## _et<_bz_typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result> > \ |
---|
59 | name(Array<T,N>& d1) \ |
---|
60 | { return name(d1.wrap()); } |
---|
61 | |
---|
62 | |
---|
63 | /* Defines a stencil ET that operates on an array<P_numtype, N_rank> |
---|
64 | and specifies the return type as array<result, N_rank>. The result |
---|
65 | type is used when running on an array and the etresult type when |
---|
66 | running on an expression. If you want to refer to the native type |
---|
67 | of the expression, set result="P_numtype" and etresult="typename |
---|
68 | T1::T_numtype". Sorry for that ugliness, but they define types |
---|
69 | differently. */ |
---|
70 | |
---|
71 | #define BZ_ET_STENCIL(name,result, etresult, MINB, MAXB) \ |
---|
72 | template<typename P_expr, _bz_typename P_numtype> \ |
---|
73 | class name ## _et : public _bz_StencilExpr<P_expr, P_numtype> \ |
---|
74 | { \ |
---|
75 | public: \ |
---|
76 | typedef _bz_StencilExpr<P_expr, P_numtype> T_base; \ |
---|
77 | typedef _bz_typename T_base::T_numtype T_numtype; \ |
---|
78 | typedef _bz_typename T_base::T_expr T_expr; \ |
---|
79 | \ |
---|
80 | /* if P_numtype is an ET-type, we need to return an expr */ \ |
---|
81 | typedef typename selectET<P_numtype, \ |
---|
82 | T_numtype, \ |
---|
83 | ETBase<_bz_ArrayExpr<_bz_ArrayExprConstant<P_numtype> > > >::T_selected T_typeprop;\ |
---|
84 | typedef typename unwrapET<T_typeprop>::T_unwrapped T_result; \ |
---|
85 | typedef T_numtype T_optype; \ |
---|
86 | \ |
---|
87 | /* dummy */ \ |
---|
88 | template<int N> struct tvresult { \ |
---|
89 | typedef name ## _et<typename T_expr::template tvresult<N>::Type,T_numtype> Type; \ |
---|
90 | }; \ |
---|
91 | \ |
---|
92 | typedef name ## _et<_bz_typename P_expr::T_range_result, T_numtype> T_range_result; \ |
---|
93 | \ |
---|
94 | using T_base::iter_; \ |
---|
95 | using T_base::rank_; \ |
---|
96 | public: \ |
---|
97 | name ## _et(const name ## _et& a) : \ |
---|
98 | _bz_StencilExpr<P_expr, T_numtype>(a) \ |
---|
99 | { } \ |
---|
100 | \ |
---|
101 | name ## _et(BZ_ETPARM(T_expr) a) : \ |
---|
102 | _bz_StencilExpr<P_expr, T_numtype>(a) \ |
---|
103 | { } \ |
---|
104 | \ |
---|
105 | name ## _et(_bz_typename T_expr::T_ctorArg1 a) : \ |
---|
106 | _bz_StencilExpr<P_expr, T_numtype>(a) \ |
---|
107 | { } \ |
---|
108 | \ |
---|
109 | T_result operator*() const \ |
---|
110 | { return name ## _stencilop(iter_); } \ |
---|
111 | \ |
---|
112 | /* this is not really const, because we don't undo the moveTo, but \ |
---|
113 | that should not be visible to outside. It would be if we used \ |
---|
114 | some kind of mixed index and stack traversal, but that will \ |
---|
115 | screw things up, const or not. */ \ |
---|
116 | template<int N_rank2> \ |
---|
117 | T_result operator()(const TinyVector<int, N_rank2>& i) const \ |
---|
118 | { iter_.moveTo(i); return name ## _stencilop(iter_); } \ |
---|
119 | \ |
---|
120 | T_range_result operator()(const RectDomain<rank_>& d) const \ |
---|
121 | { return T_range_result(iter_(d)); } \ |
---|
122 | \ |
---|
123 | T_result operator[](int i) const \ |
---|
124 | { return name ## _stencilop(iter_[i]); } \ |
---|
125 | \ |
---|
126 | T_result fastRead(sizeType i) const \ |
---|
127 | {/* this probably isn't very fast... */ \ |
---|
128 | iter_._bz_offsetData(i); \ |
---|
129 | T_result r = name ## _stencilop (iter_); \ |
---|
130 | iter_._bz_offsetData(-i); \ |
---|
131 | return r; \ |
---|
132 | } \ |
---|
133 | \ |
---|
134 | /** This way of vectorizing won't work on stencils. */ \ |
---|
135 | template<int N> \ |
---|
136 | typename tvresult<N>::Type fastRead_tv(int i) const { \ |
---|
137 | BZPRECHECK(0, "Can't vectorize stencils"); \ |
---|
138 | return iter_.template fastRead_tv<N>(i); } \ |
---|
139 | \ |
---|
140 | T_result shift(int offset, int dim) const \ |
---|
141 | { \ |
---|
142 | iter_._bz_offsetData(offset, dim); \ |
---|
143 | T_result r = name ## _stencilop (iter_); \ |
---|
144 | iter_._bz_offsetData(-offset, dim); \ |
---|
145 | return r; \ |
---|
146 | } \ |
---|
147 | \ |
---|
148 | T_result shift(int offset1, int dim1, int offset2, int dim2) const \ |
---|
149 | { \ |
---|
150 | iter_._bz_offsetData(offset1, dim1, offset2, dim2); \ |
---|
151 | T_result r = name ## _stencilop (iter_); \ |
---|
152 | iter_._bz_offsetData(-offset1, dim1, -offset2, dim2); \ |
---|
153 | return r; \ |
---|
154 | } \ |
---|
155 | \ |
---|
156 | void prettyPrint(BZ_STD_SCOPE(string) &str, \ |
---|
157 | prettyPrintFormat& format) const \ |
---|
158 | { \ |
---|
159 | str += "name (stencil)"; \ |
---|
160 | str += "("; \ |
---|
161 | iter_.prettyPrint(str, format); \ |
---|
162 | str += ")"; \ |
---|
163 | } \ |
---|
164 | \ |
---|
165 | template<typename T1, typename T2 = nilArraySection, \ |
---|
166 | class T3 = nilArraySection, typename T4 = nilArraySection, \ |
---|
167 | class T5 = nilArraySection, typename T6 = nilArraySection, \ |
---|
168 | class T7 = nilArraySection, typename T8 = nilArraySection, \ |
---|
169 | class T9 = nilArraySection, typename T10 = nilArraySection, \ |
---|
170 | class T11 = nilArraySection> \ |
---|
171 | class SliceInfo { \ |
---|
172 | public: \ |
---|
173 | typedef name ## _et<T_expr, T_numtype> T_slice; \ |
---|
174 | }; \ |
---|
175 | \ |
---|
176 | template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, \ |
---|
177 | typename T7, typename T8, typename T9, typename T10, typename T11> \ |
---|
178 | name ## _et \ |
---|
179 | operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const \ |
---|
180 | { \ |
---|
181 | /* because stencils work inherently in several dimensions it's \ |
---|
182 | complicated to slice the domain. slices will be changed to unit \ |
---|
183 | \ ranges instead. slicing stencil result thus *never* changes \ |
---|
184 | the rank of the expression, unlike the normal case. */ \ |
---|
185 | return name ## _et \ |
---|
186 | (iter_(_bz_makeRange(r1), \ |
---|
187 | _bz_makeRange(r2), \ |
---|
188 | _bz_makeRange(r3), \ |
---|
189 | _bz_makeRange(r4), \ |
---|
190 | _bz_makeRange(r5), \ |
---|
191 | _bz_makeRange(r6), \ |
---|
192 | _bz_makeRange(r7), \ |
---|
193 | _bz_makeRange(r8), \ |
---|
194 | _bz_makeRange(r9), \ |
---|
195 | _bz_makeRange(r10), \ |
---|
196 | _bz_makeRange(r11))); \ |
---|
197 | } \ |
---|
198 | \ |
---|
199 | }; \ |
---|
200 | /* generate an ET object from an expression */ \ |
---|
201 | template<typename T1> \ |
---|
202 | inline _bz_ArrayExpr<name ## _et<typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result, etresult> > \ |
---|
203 | name(const BZ_BLITZ_SCOPE(ETBase)<T1>& d1) \ |
---|
204 | { \ |
---|
205 | return _bz_ArrayExpr<name ## _et<typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result, etresult> > \ |
---|
206 | (BZ_BLITZ_SCOPE(asExpr)<T1>::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),MINB, MAXB))); \ |
---|
207 | } \ |
---|
208 | /* redirect calls with bare arrays to the main function */ \ |
---|
209 | template<typename T, int N> \ |
---|
210 | inline _bz_ArrayExpr<name ## _et<typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result, result> > \ |
---|
211 | name(const Array<T,N>& d1) \ |
---|
212 | { return name(d1.wrap()); } \ |
---|
213 | \ |
---|
214 | template<typename T, int N> \ |
---|
215 | inline _bz_ArrayExpr<name ## _et<typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result, result> > \ |
---|
216 | name(Array<T,N>& d1) \ |
---|
217 | { return name(d1.wrap()); } |
---|
218 | |
---|
219 | |
---|
220 | /* Defines a stencil ET that operates on two arrays of arbitrary type |
---|
221 | and specifies the return type as array<result, N_rank>. The result |
---|
222 | type is used when running on an array and the etresult type when |
---|
223 | running on an expression. If you want to refer to the native type |
---|
224 | of the expression, set result="P_numtype" and etresult="typename |
---|
225 | T1::T_numtype". Sorry for that ugliness, but they define types |
---|
226 | differently. */ |
---|
227 | |
---|
228 | #define BZ_ET_STENCIL2(name,result, etresult, MINB, MAXB) \ |
---|
229 | template<typename P_expr1, typename P_expr2, _bz_typename P_numtype> \ |
---|
230 | class name ## _et2 : public _bz_StencilExpr2<P_expr1, P_expr2, P_numtype> \ |
---|
231 | { \ |
---|
232 | public: \ |
---|
233 | typedef _bz_StencilExpr2<P_expr1, P_expr2, P_numtype> T_base; \ |
---|
234 | typedef _bz_typename T_base::T_numtype T_numtype; \ |
---|
235 | typedef _bz_typename T_base::T_expr1 T_expr1; \ |
---|
236 | typedef _bz_typename T_base::T_expr2 T_expr2; \ |
---|
237 | \ |
---|
238 | /* if P_numtype is an ET-type, we need to return an expr */ \ |
---|
239 | typedef typename selectET<P_numtype, \ |
---|
240 | T_numtype, \ |
---|
241 | ETBase<_bz_ArrayExpr<_bz_ArrayExprConstant<P_numtype> > > >::T_selected T_typeprop;\ |
---|
242 | typedef typename unwrapET<T_typeprop>::T_unwrapped T_result; \ |
---|
243 | typedef T_numtype T_optype; \ |
---|
244 | \ |
---|
245 | /* dummy */ \ |
---|
246 | template<int N> struct tvresult { \ |
---|
247 | typedef name ## _et2<typename T_expr1::template tvresult<N>::Type,typename T_expr2::template tvresult<N>::Type,T_numtype> Type; \ |
---|
248 | }; \ |
---|
249 | \ |
---|
250 | typedef name ## _et2<_bz_typename P_expr1::T_range_result, _bz_typename P_expr2::T_range_result, T_numtype> T_range_result; \ |
---|
251 | \ |
---|
252 | using T_base::iter1_; \ |
---|
253 | using T_base::iter2_; \ |
---|
254 | using T_base::rank_; \ |
---|
255 | public: \ |
---|
256 | name ## _et2(const name ## _et2& a) : \ |
---|
257 | _bz_StencilExpr2<P_expr1, P_expr2, T_numtype>(a) \ |
---|
258 | { } \ |
---|
259 | \ |
---|
260 | name ## _et2(BZ_ETPARM(T_expr1) a, BZ_ETPARM(T_expr2) b) : \ |
---|
261 | _bz_StencilExpr2<P_expr1, P_expr2, T_numtype>(a, b) \ |
---|
262 | { } \ |
---|
263 | /* \ |
---|
264 | name ## _et2(_bz_typename T_expr::T_ctorArg1 a) : \ |
---|
265 | _bz_StencilExpr2<P_expr, T_numtype>(a) \ |
---|
266 | { } \ |
---|
267 | */ \ |
---|
268 | T_result operator*() const \ |
---|
269 | { return name ## _stencilop(iter1_, iter2_); } \ |
---|
270 | \ |
---|
271 | T_result operator()(_bz_typename _bz_IndexParameter<TinyVector<int, rank_> >::type i) const \ |
---|
272 | { iter1_.moveTo(i); iter2_.moveTo(i); return name ## _stencilop(iter1_, iter2_); } \ |
---|
273 | \ |
---|
274 | T_range_result operator()(const RectDomain<rank_>& d) const \ |
---|
275 | { return T_range_result(iter1_(d), iter2_(d)); } \ |
---|
276 | \ |
---|
277 | T_result operator[](int i) const \ |
---|
278 | { return name ## _stencilop(iter1_[i], iter2_[i]); } \ |
---|
279 | \ |
---|
280 | T_result fastRead(sizeType i) const \ |
---|
281 | {/* this probably isn't very fast... */ \ |
---|
282 | iter1_._bz_offsetData(i); iter2_._bz_offsetData(i); \ |
---|
283 | T_result r = name ## _stencilop (iter1_, iter2_); \ |
---|
284 | iter1_._bz_offsetData(-i); iter2_._bz_offsetData(-i); \ |
---|
285 | return r; \ |
---|
286 | } \ |
---|
287 | \ |
---|
288 | /** This way of vectorizing won't work on stencils. */ \ |
---|
289 | template<int N> \ |
---|
290 | typename tvresult<N>::Type fastRead_tv(int i) const { \ |
---|
291 | BZPRECHECK(0, "Can't vectorize stencils"); \ |
---|
292 | return typename tvresult<N>::Type(iter1_.template fastRead_tv<N>(i), \ |
---|
293 | iter2_.template fastRead_tv<N>(i)); } \ |
---|
294 | \ |
---|
295 | T_result shift(int offset, int dim) const \ |
---|
296 | { \ |
---|
297 | iter1_._bz_offsetData(offset, dim); \ |
---|
298 | iter2_._bz_offsetData(offset, dim); \ |
---|
299 | T_result r = name ## _stencilop (iter1_, iter2_); \ |
---|
300 | iter1_._bz_offsetData(-offset, dim); \ |
---|
301 | iter2_._bz_offsetData(-offset, dim); \ |
---|
302 | return r; \ |
---|
303 | } \ |
---|
304 | \ |
---|
305 | T_result shift(int offset1, int dim1, int offset2, int dim2) const \ |
---|
306 | { \ |
---|
307 | iter1_._bz_offsetData(offset1, dim1, offset2, dim2); \ |
---|
308 | iter2_._bz_offsetData(offset1, dim1, offset2, dim2); \ |
---|
309 | T_result r = name ## _stencilop (iter1_, iter2_); \ |
---|
310 | iter1_._bz_offsetData(-offset1, dim1, -offset2, dim2); \ |
---|
311 | iter2_._bz_offsetData(-offset1, dim1, -offset2, dim2); \ |
---|
312 | return r; \ |
---|
313 | } \ |
---|
314 | \ |
---|
315 | void prettyPrint(BZ_STD_SCOPE(string) &str, \ |
---|
316 | prettyPrintFormat& format) const \ |
---|
317 | { \ |
---|
318 | str += "name (stencil)"; \ |
---|
319 | str += "("; \ |
---|
320 | iter1_.prettyPrint(str, format); \ |
---|
321 | str += ", "; \ |
---|
322 | iter2_.prettyPrint(str, format); \ |
---|
323 | str += ")"; \ |
---|
324 | } \ |
---|
325 | \ |
---|
326 | template<typename T1, typename T2 = nilArraySection, \ |
---|
327 | class T3 = nilArraySection, typename T4 = nilArraySection, \ |
---|
328 | class T5 = nilArraySection, typename T6 = nilArraySection, \ |
---|
329 | class T7 = nilArraySection, typename T8 = nilArraySection, \ |
---|
330 | class T9 = nilArraySection, typename T10 = nilArraySection, \ |
---|
331 | class T11 = nilArraySection> \ |
---|
332 | class SliceInfo { \ |
---|
333 | public: \ |
---|
334 | typedef name ## _et2<T_expr1, T_expr2, T_numtype> T_slice; \ |
---|
335 | }; \ |
---|
336 | \ |
---|
337 | template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, \ |
---|
338 | typename T7, typename T8, typename T9, typename T10, typename T11> \ |
---|
339 | name ## _et2 \ |
---|
340 | operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const \ |
---|
341 | { \ |
---|
342 | return name ## _et2 \ |
---|
343 | (iter_(_bz_makeRange(r1), \ |
---|
344 | _bz_makeRange(r2), \ |
---|
345 | _bz_makeRange(r3), \ |
---|
346 | _bz_makeRange(r4), \ |
---|
347 | _bz_makeRange(r5), \ |
---|
348 | _bz_makeRange(r6), \ |
---|
349 | _bz_makeRange(r7), \ |
---|
350 | _bz_makeRange(r8), \ |
---|
351 | _bz_makeRange(r9), \ |
---|
352 | _bz_makeRange(r10), \ |
---|
353 | _bz_makeRange(r11))); \ |
---|
354 | } \ |
---|
355 | }; \ |
---|
356 | \ |
---|
357 | /* create ET object from application to expression */ \ |
---|
358 | template<typename T1, typename T2> \ |
---|
359 | inline _bz_ArrayExpr<name ## _et2<typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result, typename BZ_BLITZ_SCOPE(asExpr)<T2>::T_expr::T_range_result, etresult> > \ |
---|
360 | name(const BZ_BLITZ_SCOPE(ETBase)<T1>& d1, \ |
---|
361 | const BZ_BLITZ_SCOPE(ETBase)<T2>& d2) \ |
---|
362 | { \ |
---|
363 | return _bz_ArrayExpr<name ## _et2<typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result, typename BZ_BLITZ_SCOPE(asExpr)<T2>::T_expr::T_range_result, etresult> > \ |
---|
364 | (BZ_BLITZ_SCOPE(asExpr)<T1>::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),MINB, MAXB)), \ |
---|
365 | BZ_BLITZ_SCOPE(asExpr)<T2>::getExpr(d2.unwrap())(_bz_shrinkDomain(d2.unwrap().domain(),MINB, MAXB))); \ |
---|
366 | } \ |
---|
367 | /* matches to calls involving bare arrays (this is very annoying \ |
---|
368 | because we have to exactly match every possible call combination \ |
---|
369 | to ensure that this matches instead of the operator in \ |
---|
370 | stencilops.h) */ \ |
---|
371 | template<typename T1, typename T2, int N2> \ |
---|
372 | inline _bz_ArrayExpr<name ## _et2<typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result, typename BZ_BLITZ_SCOPE(asExpr)<Array<T2,N2> >::T_expr::T_range_result, result> > \ |
---|
373 | name(const BZ_BLITZ_SCOPE(ETBase)<T1>& d1, Array<T2,N2>& d2) \ |
---|
374 | { return name(d1.wrap(), d2.wrap()); } \ |
---|
375 | \ |
---|
376 | template<typename T1, typename T2, int N2> \ |
---|
377 | inline _bz_ArrayExpr<name ## _et2<typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result, typename BZ_BLITZ_SCOPE(asExpr)<Array<T2,N2> >::T_expr::T_range_result, result> > \ |
---|
378 | name(const BZ_BLITZ_SCOPE(ETBase)<T1>& d1, const Array<T2,N2>& d2) \ |
---|
379 | { return name(d1.wrap(), d2.wrap()); } \ |
---|
380 | \ |
---|
381 | template<typename T1, int N1, typename T2> \ |
---|
382 | inline _bz_ArrayExpr<name ## _et2<typename BZ_BLITZ_SCOPE(asExpr)<Array<T1,N1> >::T_expr::T_range_result, typename BZ_BLITZ_SCOPE(asExpr)<T2>::T_expr::T_range_result, result> > \ |
---|
383 | name(Array<T1,N1>& d1, const BZ_BLITZ_SCOPE(ETBase)<T2>& d2) \ |
---|
384 | { return name(d1.wrap(), d2.wrap()); } \ |
---|
385 | \ |
---|
386 | template<typename T1, int N1, typename T2> \ |
---|
387 | inline _bz_ArrayExpr<name ## _et2<typename BZ_BLITZ_SCOPE(asExpr)<Array<T1,N1> >::T_expr::T_range_result, typename BZ_BLITZ_SCOPE(asExpr)<T2>::T_expr::T_range_result, result> > \ |
---|
388 | name(const Array<T1,N1>& d1, const BZ_BLITZ_SCOPE(ETBase)<T2>& d2) \ |
---|
389 | { return name(d1.wrap(), d2.wrap()); } \ |
---|
390 | \ |
---|
391 | template<typename T1, int N1, typename T2, int N2> \ |
---|
392 | inline _bz_ArrayExpr<name ## _et2<typename BZ_BLITZ_SCOPE(asExpr)<Array<T1,N1> >::T_expr::T_range_result, typename BZ_BLITZ_SCOPE(asExpr)<Array<T2,N2> >::T_expr::T_range_result, result> > \ |
---|
393 | name(const Array<T1,N1>& d1, Array<T2,N2>& d2) \ |
---|
394 | { return name(d1.wrap(), d2.wrap()); } \ |
---|
395 | \ |
---|
396 | template<typename T1, int N1, typename T2, int N2> \ |
---|
397 | inline _bz_ArrayExpr<name ## _et2<typename BZ_BLITZ_SCOPE(asExpr)<Array<T1,N1> >::T_expr::T_range_result, typename BZ_BLITZ_SCOPE(asExpr)<Array<T2,N2> >::T_expr::T_range_result, result> > \ |
---|
398 | name(Array<T1,N1>& d1, const Array<T2,N2>& d2) \ |
---|
399 | { return name(d1.wrap(), d2.wrap()); } \ |
---|
400 | \ |
---|
401 | template<typename T, int N> \ |
---|
402 | inline _bz_ArrayExpr<name ## _et2<typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result, typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result, result> > \ |
---|
403 | name(Array<T,N>& d1, Array<T,N>& d2) \ |
---|
404 | { return name(d1.wrap(), d2.wrap()); } \ |
---|
405 | \ |
---|
406 | template<typename T, int N> \ |
---|
407 | inline _bz_ArrayExpr<name ## _et2<typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result, typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result, result> > \ |
---|
408 | name(const Array<T,N>& d1, const Array<T,N>& d2) \ |
---|
409 | { return name(d1.wrap(), d2.wrap()); } |
---|
410 | |
---|
411 | |
---|
412 | /* Defines a stencil ET that operates on an array<P_numtype, N_rank> |
---|
413 | and returns a multicomponent array<TinyMatrix<P_numtype::T_element, |
---|
414 | rank, rank> >, N_rank>. P_numtype can be a TinyVector or a scalar, |
---|
415 | I think. */ |
---|
416 | |
---|
417 | #define BZ_ET_STENCILM(name,result_rank, MINB, MAXB) \ |
---|
418 | template<typename P_expr> \ |
---|
419 | class name ## _et : public _bz_StencilExpr<P_expr, TinyMatrix<_bz_typename multicomponent_traits<typename P_expr::T_numtype>::T_element, result_rank, result_rank> > \ |
---|
420 | { \ |
---|
421 | public: \ |
---|
422 | typedef _bz_StencilExpr<P_expr, TinyMatrix<_bz_typename multicomponent_traits<typename P_expr::T_numtype>::T_element, result_rank, result_rank> > T_base; \ |
---|
423 | typedef _bz_typename T_base::T_numtype T_numtype; \ |
---|
424 | typedef _bz_typename T_base::T_expr T_expr; \ |
---|
425 | \ |
---|
426 | /* there is no return type selection, as we are returning a \ |
---|
427 | TinyMatrix. This must be returned as a FastTMCopyIterator since the \ |
---|
428 | output of the stencil operator is a temporary. */ \ |
---|
429 | typedef ETBase<_bz_ArrayExpr<FastTM2CopyIterator<typename multicomponent_traits<typename P_expr::T_numtype>::T_element, result_rank, result_rank> > > T_typeprop;\ |
---|
430 | typedef typename unwrapET<T_typeprop>::T_unwrapped T_result;\ |
---|
431 | typedef T_numtype T_optype;\ |
---|
432 | \ |
---|
433 | /* dummy */ \ |
---|
434 | template<int N> struct tvresult { \ |
---|
435 | typedef name ## _et<typename T_expr::template tvresult<N>::Type> Type; \ |
---|
436 | }; \ |
---|
437 | \ |
---|
438 | typedef name ## _et<_bz_typename P_expr::T_range_result> T_range_result; \ |
---|
439 | \ |
---|
440 | using T_base::iter_; \ |
---|
441 | using T_base::rank_; \ |
---|
442 | public: \ |
---|
443 | name ## _et(const name ## _et& a) : \ |
---|
444 | _bz_StencilExpr<P_expr, T_numtype>(a) \ |
---|
445 | { } \ |
---|
446 | \ |
---|
447 | name ## _et(BZ_ETPARM(T_expr) a) : \ |
---|
448 | _bz_StencilExpr<P_expr, T_numtype>(a) \ |
---|
449 | { } \ |
---|
450 | \ |
---|
451 | name ## _et(_bz_typename T_expr::T_ctorArg1 a) : \ |
---|
452 | _bz_StencilExpr<P_expr, T_numtype>(a) \ |
---|
453 | { } \ |
---|
454 | \ |
---|
455 | T_result operator*() const \ |
---|
456 | { return name ## _stencilop(iter_); } \ |
---|
457 | T_result operator()(_bz_typename _bz_IndexParameter<TinyVector<int, rank_> >::type i) const \ |
---|
458 | { iter_.moveTo(i); return name ## _stencilop(iter_); } \ |
---|
459 | \ |
---|
460 | T_range_result operator()(const RectDomain<rank_>& d) const \ |
---|
461 | { return T_range_result(iter_(d)); } \ |
---|
462 | \ |
---|
463 | T_result operator[](int i) const \ |
---|
464 | { return name ## _stencilop(iter_[i]); } \ |
---|
465 | \ |
---|
466 | T_result fastRead(sizeType i) const \ |
---|
467 | {/* this probably isn't very fast... */ \ |
---|
468 | iter_._bz_offsetData(i); \ |
---|
469 | T_result r = name ## _stencilop (iter_); \ |
---|
470 | iter_._bz_offsetData(-i); \ |
---|
471 | return r; \ |
---|
472 | } \ |
---|
473 | \ |
---|
474 | /** This way of vectorizing won't work on stencils. */ \ |
---|
475 | template<int N> \ |
---|
476 | typename tvresult<N>::Type fastRead_tv(int i) const { \ |
---|
477 | BZPRECHECK(0, "Can't vectorize stencils"); \ |
---|
478 | return iter_.template fastRead_tv<N>(i); } \ |
---|
479 | \ |
---|
480 | T_result shift(int offset, int dim) const \ |
---|
481 | { \ |
---|
482 | iter_._bz_offsetData(offset, dim); \ |
---|
483 | T_result r = name ## _stencilop (iter_); \ |
---|
484 | iter_._bz_offsetData(-offset, dim); \ |
---|
485 | return r; \ |
---|
486 | } \ |
---|
487 | \ |
---|
488 | T_result shift(int offset1, int dim1, int offset2, int dim2) const \ |
---|
489 | { \ |
---|
490 | iter_._bz_offsetData(offset1, dim1, offset2, dim2); \ |
---|
491 | T_result r = name ## _stencilop (iter_); \ |
---|
492 | iter_._bz_offsetData(-offset1, dim1, -offset2, dim2); \ |
---|
493 | return r; \ |
---|
494 | } \ |
---|
495 | \ |
---|
496 | void prettyPrint(BZ_STD_SCOPE(string) &str, \ |
---|
497 | prettyPrintFormat& format) const \ |
---|
498 | { \ |
---|
499 | str += "name (stencil)"; \ |
---|
500 | str += "("; \ |
---|
501 | iter_.prettyPrint(str, format); \ |
---|
502 | str += ")"; \ |
---|
503 | } \ |
---|
504 | \ |
---|
505 | template<typename T1, typename T2 = nilArraySection, \ |
---|
506 | class T3 = nilArraySection, typename T4 = nilArraySection, \ |
---|
507 | class T5 = nilArraySection, typename T6 = nilArraySection, \ |
---|
508 | class T7 = nilArraySection, typename T8 = nilArraySection, \ |
---|
509 | class T9 = nilArraySection, typename T10 = nilArraySection, \ |
---|
510 | class T11 = nilArraySection> \ |
---|
511 | class SliceInfo { \ |
---|
512 | public: \ |
---|
513 | typedef name ## _et<T_expr> T_slice; \ |
---|
514 | }; \ |
---|
515 | \ |
---|
516 | template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, \ |
---|
517 | typename T7, typename T8, typename T9, typename T10, typename T11> \ |
---|
518 | name ## _et \ |
---|
519 | operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const \ |
---|
520 | { \ |
---|
521 | return name ## _et \ |
---|
522 | (iter_(_bz_makeRange(r1), \ |
---|
523 | _bz_makeRange(r2), \ |
---|
524 | _bz_makeRange(r3), \ |
---|
525 | _bz_makeRange(r4), \ |
---|
526 | _bz_makeRange(r5), \ |
---|
527 | _bz_makeRange(r6), \ |
---|
528 | _bz_makeRange(r7), \ |
---|
529 | _bz_makeRange(r8), \ |
---|
530 | _bz_makeRange(r9), \ |
---|
531 | _bz_makeRange(r10), \ |
---|
532 | _bz_makeRange(r11))); \ |
---|
533 | } \ |
---|
534 | }; \ |
---|
535 | /* create ET from application to expression */ \ |
---|
536 | template<typename T1> \ |
---|
537 | inline _bz_ArrayExpr<name ## _et<_bz_typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result> > \ |
---|
538 | name(const BZ_BLITZ_SCOPE(ETBase)<T1>& d1) \ |
---|
539 | { \ |
---|
540 | return _bz_ArrayExpr<name ## _et<_bz_typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result> > \ |
---|
541 | (BZ_BLITZ_SCOPE(asExpr)<T1>::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),MINB, MAXB))); \ |
---|
542 | } \ |
---|
543 | BZ_ET_STENCIL_REDIRECT(name) \ |
---|
544 | |
---|
545 | |
---|
546 | /* Defines a stencil ET that operates on a (scalar) array<P_numtype, |
---|
547 | N_rank> and returns a multicomponent |
---|
548 | array<TinyVector<P_numtype::T_element, result_rank> >, N_rank>. */ |
---|
549 | |
---|
550 | #define BZ_ET_STENCILV(name,result_rank, MINB, MAXB) \ |
---|
551 | template<typename P_expr> \ |
---|
552 | class name ## _et : public _bz_StencilExpr<P_expr, TinyVector<typename P_expr::T_numtype,result_rank> > \ |
---|
553 | { \ |
---|
554 | public: \ |
---|
555 | typedef _bz_StencilExpr<P_expr, TinyVector<typename P_expr::T_numtype,result_rank> > T_base; \ |
---|
556 | typedef _bz_typename T_base::T_numtype T_numtype; \ |
---|
557 | typedef _bz_typename T_base::T_expr T_expr; \ |
---|
558 | \ |
---|
559 | /* there is no return type selection, we assume P_numtype is scalar \ |
---|
560 | and that we are returning a TinyVector. This needs to be returned \ |
---|
561 | as a FastTVCopyIterator that keeps a copy of the TV it is \ |
---|
562 | iterating over, since the result of the stencil operator is \ |
---|
563 | a temporary. */ \ |
---|
564 | typedef ETBase<_bz_ArrayExpr<FastTV2CopyIterator<typename P_expr::T_numtype, result_rank> > > T_typeprop; \ |
---|
565 | typedef typename unwrapET<T_typeprop>::T_unwrapped T_result; \ |
---|
566 | typedef typename T_expr::T_numtype T_optype; \ |
---|
567 | \ |
---|
568 | /* dummy */ \ |
---|
569 | template<int N> struct tvresult { \ |
---|
570 | typedef name ## _et<typename T_expr::template tvresult<N>::Type> Type; \ |
---|
571 | }; \ |
---|
572 | \ |
---|
573 | typedef name ## _et<_bz_typename P_expr::T_range_result> T_range_result; \ |
---|
574 | \ |
---|
575 | using T_base::iter_; \ |
---|
576 | using T_base::rank_; \ |
---|
577 | public: \ |
---|
578 | name ## _et(const name ## _et& a) : \ |
---|
579 | _bz_StencilExpr<P_expr, T_numtype>(a) \ |
---|
580 | { } \ |
---|
581 | \ |
---|
582 | name ## _et(BZ_ETPARM(T_expr) a) : \ |
---|
583 | _bz_StencilExpr<P_expr, T_numtype>(a) \ |
---|
584 | { } \ |
---|
585 | \ |
---|
586 | name ## _et(_bz_typename T_expr::T_ctorArg1 a) : \ |
---|
587 | _bz_StencilExpr<P_expr, T_numtype>(a) \ |
---|
588 | { } \ |
---|
589 | \ |
---|
590 | T_result operator*() const \ |
---|
591 | { return name ## _stencilop(iter_); } \ |
---|
592 | T_result operator()(_bz_typename _bz_IndexParameter<TinyVector<int, rank_> >::type i) const \ |
---|
593 | { iter_.moveTo(i); return name ## _stencilop(iter_); } \ |
---|
594 | \ |
---|
595 | T_range_result operator()(const RectDomain<rank_>& d) const \ |
---|
596 | { return T_range_result(iter_(d)); } \ |
---|
597 | \ |
---|
598 | T_result operator[](int i) const \ |
---|
599 | { return name ## _stencilop(iter_[i]); } \ |
---|
600 | \ |
---|
601 | T_result fastRead(sizeType i) const \ |
---|
602 | {/* this probably isn't very fast... */ \ |
---|
603 | iter_._bz_offsetData(i); \ |
---|
604 | T_result r = name ## _stencilop (iter_); \ |
---|
605 | iter_._bz_offsetData(-i); \ |
---|
606 | return r; \ |
---|
607 | } \ |
---|
608 | \ |
---|
609 | /** This way of vectorizing won't work on stencils. */ \ |
---|
610 | template<int N> \ |
---|
611 | typename tvresult<N>::Type fastRead_tv(int i) const { \ |
---|
612 | BZPRECHECK(0, "Can't vectorize stencils"); \ |
---|
613 | return iter_.template fastRead_tv<N>(i); } \ |
---|
614 | \ |
---|
615 | T_result shift(int offset, int dim) const \ |
---|
616 | { \ |
---|
617 | iter_._bz_offsetData(offset, dim); \ |
---|
618 | T_result r = name ## _stencilop (iter_); \ |
---|
619 | iter_._bz_offsetData(-offset, dim); \ |
---|
620 | return r; \ |
---|
621 | } \ |
---|
622 | \ |
---|
623 | T_result shift(int offset1, int dim1, int offset2, int dim2) const \ |
---|
624 | { \ |
---|
625 | iter_._bz_offsetData(offset1, dim1, offset2, dim2); \ |
---|
626 | T_result r = name ## _stencilop (iter_); \ |
---|
627 | iter_._bz_offsetData(-offset1, dim1, -offset2, dim2); \ |
---|
628 | return r; \ |
---|
629 | } \ |
---|
630 | \ |
---|
631 | void prettyPrint(BZ_STD_SCOPE(string) &str, \ |
---|
632 | prettyPrintFormat& format) const \ |
---|
633 | { \ |
---|
634 | str += "name (stencil)"; \ |
---|
635 | str += "("; \ |
---|
636 | iter_.prettyPrint(str, format); \ |
---|
637 | str += ")"; \ |
---|
638 | } \ |
---|
639 | \ |
---|
640 | template<typename T1, typename T2 = nilArraySection, \ |
---|
641 | class T3 = nilArraySection, typename T4 = nilArraySection, \ |
---|
642 | class T5 = nilArraySection, typename T6 = nilArraySection, \ |
---|
643 | class T7 = nilArraySection, typename T8 = nilArraySection, \ |
---|
644 | class T9 = nilArraySection, typename T10 = nilArraySection, \ |
---|
645 | class T11 = nilArraySection> \ |
---|
646 | class SliceInfo { \ |
---|
647 | public: \ |
---|
648 | typedef name ## _et<T_expr> T_slice; \ |
---|
649 | }; \ |
---|
650 | \ |
---|
651 | template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, \ |
---|
652 | typename T7, typename T8, typename T9, typename T10, typename T11> \ |
---|
653 | name ## _et \ |
---|
654 | operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const \ |
---|
655 | { \ |
---|
656 | return name ## _et \ |
---|
657 | (iter_(_bz_makeRange(r1), \ |
---|
658 | _bz_makeRange(r2), \ |
---|
659 | _bz_makeRange(r3), \ |
---|
660 | _bz_makeRange(r4), \ |
---|
661 | _bz_makeRange(r5), \ |
---|
662 | _bz_makeRange(r6), \ |
---|
663 | _bz_makeRange(r7), \ |
---|
664 | _bz_makeRange(r8), \ |
---|
665 | _bz_makeRange(r9), \ |
---|
666 | _bz_makeRange(r10), \ |
---|
667 | _bz_makeRange(r11))); \ |
---|
668 | } \ |
---|
669 | }; \ |
---|
670 | /* create ET from application to expression */ \ |
---|
671 | template<typename T1> \ |
---|
672 | inline _bz_ArrayExpr<name ## _et<_bz_typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result> > \ |
---|
673 | name(const BZ_BLITZ_SCOPE(ETBase)<T1>& d1) \ |
---|
674 | { \ |
---|
675 | return _bz_ArrayExpr<name ## _et<_bz_typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result> > \ |
---|
676 | (BZ_BLITZ_SCOPE(asExpr)<T1>::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),MINB, MAXB))); \ |
---|
677 | } \ |
---|
678 | BZ_ET_STENCIL_REDIRECT(name) |
---|
679 | |
---|
680 | |
---|
681 | /* Defines a stencil ET that operates on an array<P_numtype, N_rank> |
---|
682 | (where P_numtype presumably is a multicomponent type) and returns a |
---|
683 | scalar array<P_numtype::T_element, N_rank>. */ |
---|
684 | |
---|
685 | #define BZ_ET_STENCIL_SCA(name, MINB, MAXB) \ |
---|
686 | template<typename P_expr> \ |
---|
687 | class name ## _et : public _bz_StencilExpr<P_expr, _bz_typename multicomponent_traits<typename P_expr::T_numtype>::T_element> \ |
---|
688 | { \ |
---|
689 | public: \ |
---|
690 | typedef _bz_typename multicomponent_traits<typename P_expr::T_numtype>::T_element T_result; \ |
---|
691 | typedef _bz_StencilExpr<P_expr, T_result> T_base; \ |
---|
692 | typedef _bz_typename T_base::T_numtype T_numtype; \ |
---|
693 | typedef _bz_typename T_base::T_expr T_expr; \ |
---|
694 | \ |
---|
695 | /* there is no selecting return type here. because we *know* it is \ |
---|
696 | scalar T_result, there's no question of whether we could be doing \ |
---|
697 | multicomponent evaluations. */ \ |
---|
698 | typedef T_result T_typeprop; \ |
---|
699 | typedef T_numtype T_optype; \ |
---|
700 | \ |
---|
701 | /* dummy */ \ |
---|
702 | template<int N> struct tvresult { \ |
---|
703 | typedef name ## _et<typename T_expr::template tvresult<N>::Type> Type; \ |
---|
704 | }; \ |
---|
705 | \ |
---|
706 | typedef name ## _et<_bz_typename P_expr::T_range_result> T_range_result; \ |
---|
707 | \ |
---|
708 | using T_base::iter_; \ |
---|
709 | using T_base::rank_; \ |
---|
710 | public: \ |
---|
711 | name ## _et(const name ## _et& a) : \ |
---|
712 | _bz_StencilExpr<P_expr, T_numtype>(a) \ |
---|
713 | { } \ |
---|
714 | \ |
---|
715 | name ## _et(BZ_ETPARM(T_expr) a) : \ |
---|
716 | _bz_StencilExpr<P_expr, T_numtype>(a) \ |
---|
717 | { } \ |
---|
718 | \ |
---|
719 | name ## _et(_bz_typename T_expr::T_ctorArg1 a) : \ |
---|
720 | _bz_StencilExpr<P_expr, T_numtype>(a) \ |
---|
721 | { } \ |
---|
722 | \ |
---|
723 | T_result operator*() const \ |
---|
724 | { return name ## _stencilop(iter_); } \ |
---|
725 | T_result operator()(_bz_typename _bz_IndexParameter<TinyVector<int, rank_> >::type i) const \ |
---|
726 | { iter_.moveTo(i); return name ## _stencilop(iter_); } \ |
---|
727 | \ |
---|
728 | T_range_result operator()(const RectDomain<rank_>& d) const \ |
---|
729 | { return T_range_result(iter_(d)); } \ |
---|
730 | \ |
---|
731 | T_result operator[](int i) const \ |
---|
732 | { return name ## _stencilop(iter_[i]); } \ |
---|
733 | \ |
---|
734 | T_result fastRead(sizeType i) const \ |
---|
735 | {/* this probably isn't very fast... */ \ |
---|
736 | iter_._bz_offsetData(i); \ |
---|
737 | T_result r = name ## _stencilop (iter_); \ |
---|
738 | iter_._bz_offsetData(-i); \ |
---|
739 | return r; \ |
---|
740 | } \ |
---|
741 | \ |
---|
742 | /** This way of vectorizing won't work on stencils. */ \ |
---|
743 | template<int N> \ |
---|
744 | typename tvresult<N>::Type fastRead_tv(int i) const { \ |
---|
745 | BZPRECHECK(0, "Can't vectorize stencils"); \ |
---|
746 | return iter_.template fastRead_tv<N>(i); } \ |
---|
747 | \ |
---|
748 | T_result shift(int offset, int dim) const \ |
---|
749 | { \ |
---|
750 | iter_._bz_offsetData(offset, dim); \ |
---|
751 | T_result r = name ## _stencilop (iter_); \ |
---|
752 | iter_._bz_offsetData(-offset, dim); \ |
---|
753 | return r; \ |
---|
754 | } \ |
---|
755 | \ |
---|
756 | T_result shift(int offset1, int dim1, int offset2, int dim2) const \ |
---|
757 | { \ |
---|
758 | iter_._bz_offsetData(offset1, dim1, offset2, dim2); \ |
---|
759 | T_result r = name ## _stencilop (iter_); \ |
---|
760 | iter_._bz_offsetData(-offset1, dim1, -offset2, dim2); \ |
---|
761 | return r; \ |
---|
762 | } \ |
---|
763 | \ |
---|
764 | void prettyPrint(BZ_STD_SCOPE(string) &str, \ |
---|
765 | prettyPrintFormat& format) const \ |
---|
766 | { \ |
---|
767 | str += "name (stencil)"; \ |
---|
768 | str += "("; \ |
---|
769 | iter_.prettyPrint(str, format); \ |
---|
770 | str += ")"; \ |
---|
771 | } \ |
---|
772 | \ |
---|
773 | template<typename T1, typename T2 = nilArraySection, \ |
---|
774 | class T3 = nilArraySection, typename T4 = nilArraySection, \ |
---|
775 | class T5 = nilArraySection, typename T6 = nilArraySection, \ |
---|
776 | class T7 = nilArraySection, typename T8 = nilArraySection, \ |
---|
777 | class T9 = nilArraySection, typename T10 = nilArraySection, \ |
---|
778 | class T11 = nilArraySection> \ |
---|
779 | class SliceInfo { \ |
---|
780 | public: \ |
---|
781 | typedef name ## _et<T_expr> T_slice; \ |
---|
782 | }; \ |
---|
783 | \ |
---|
784 | template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, \ |
---|
785 | typename T7, typename T8, typename T9, typename T10, typename T11> \ |
---|
786 | name ## _et \ |
---|
787 | operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const \ |
---|
788 | { \ |
---|
789 | return name ## _et \ |
---|
790 | (iter_(_bz_makeRange(r1), \ |
---|
791 | _bz_makeRange(r2), \ |
---|
792 | _bz_makeRange(r3), \ |
---|
793 | _bz_makeRange(r4), \ |
---|
794 | _bz_makeRange(r5), \ |
---|
795 | _bz_makeRange(r6), \ |
---|
796 | _bz_makeRange(r7), \ |
---|
797 | _bz_makeRange(r8), \ |
---|
798 | _bz_makeRange(r9), \ |
---|
799 | _bz_makeRange(r10), \ |
---|
800 | _bz_makeRange(r11))); \ |
---|
801 | } \ |
---|
802 | }; \ |
---|
803 | /* create ET from application to expression */ \ |
---|
804 | template<typename T1> \ |
---|
805 | inline _bz_ArrayExpr<name ## _et<_bz_typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result> > \ |
---|
806 | name(const BZ_BLITZ_SCOPE(ETBase)<T1>& d1) \ |
---|
807 | { \ |
---|
808 | return _bz_ArrayExpr<name ## _et<_bz_typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result> > \ |
---|
809 | (BZ_BLITZ_SCOPE(asExpr)<T1>::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),MINB, MAXB))); \ |
---|
810 | } \ |
---|
811 | BZ_ET_STENCIL_REDIRECT(name) |
---|
812 | |
---|
813 | |
---|
814 | /* Defines a stencil ET difference operator that operates on an |
---|
815 | array<P_numtype, N_rank> and returns a array<P_numtype, |
---|
816 | N_rank>. (The only significance of the "difference" aspect is that |
---|
817 | the operator is assumed to take a second argument which is the |
---|
818 | dimension to do the difference in). MINB and MAXB are integer |
---|
819 | expressions describing the extent of the operator in the operating |
---|
820 | dimension. */ |
---|
821 | |
---|
822 | #define BZ_ET_STENCIL_DIFF(name, MINB, MAXB) \ |
---|
823 | template<typename P_expr> \ |
---|
824 | class name ## _et : \ |
---|
825 | public _bz_StencilExpr<P_expr, _bz_typename P_expr::T_numtype> \ |
---|
826 | { \ |
---|
827 | public: \ |
---|
828 | typedef _bz_StencilExpr<P_expr, _bz_typename P_expr::T_numtype> T_base; \ |
---|
829 | typedef _bz_typename T_base::T_numtype T_numtype; \ |
---|
830 | typedef _bz_typename T_base::T_expr T_expr; \ |
---|
831 | \ |
---|
832 | /* select return type */ \ |
---|
833 | typedef typename unwrapET<typename T_expr::T_result>::T_unwrapped test; \ |
---|
834 | typedef typename selectET<typename T_expr::T_typeprop, \ |
---|
835 | T_numtype, \ |
---|
836 | name ##_et<test> >::T_selected T_typeprop; \ |
---|
837 | typedef typename unwrapET<T_typeprop>::T_unwrapped T_result; \ |
---|
838 | typedef T_numtype T_optype; \ |
---|
839 | \ |
---|
840 | /* dummy */ \ |
---|
841 | template<int N> struct tvresult { \ |
---|
842 | typedef name ## _et<typename T_expr::template tvresult<N>::Type> Type; \ |
---|
843 | }; \ |
---|
844 | \ |
---|
845 | typedef name ## _et<_bz_typename P_expr::T_range_result> T_range_result; \ |
---|
846 | \ |
---|
847 | using T_base::iter_; \ |
---|
848 | using T_base::rank_; \ |
---|
849 | public: \ |
---|
850 | name ## _et(const name ## _et& a) : \ |
---|
851 | _bz_StencilExpr<P_expr, T_numtype>(a), dim_(a.dim_) \ |
---|
852 | { } \ |
---|
853 | \ |
---|
854 | name ## _et(BZ_ETPARM(T_expr) a, int dim) : \ |
---|
855 | _bz_StencilExpr<P_expr, T_numtype>(a), dim_(dim) \ |
---|
856 | { } \ |
---|
857 | \ |
---|
858 | name ## _et(_bz_typename T_expr::T_ctorArg1 a, int dim) : \ |
---|
859 | _bz_StencilExpr<P_expr, T_numtype>(a), dim_(dim) \ |
---|
860 | { } \ |
---|
861 | \ |
---|
862 | T_result operator*() const \ |
---|
863 | { return name ## _stencilop(iter_, dim_); } \ |
---|
864 | T_result operator()(_bz_typename _bz_IndexParameter<TinyVector<int, rank_> >::type i) const \ |
---|
865 | { iter_.moveTo(i); return name ## _stencilop(iter_, dim_); } \ |
---|
866 | \ |
---|
867 | T_range_result operator()(const RectDomain<rank_>& d) const \ |
---|
868 | { return T_range_result(iter_(d), dim_); } \ |
---|
869 | \ |
---|
870 | T_result operator[](int i) const \ |
---|
871 | { return name ## _stencilop(iter_[i], dim_); } \ |
---|
872 | \ |
---|
873 | T_result fastRead(sizeType i) const \ |
---|
874 | {/* this probably isn't very fast... */ \ |
---|
875 | iter_._bz_offsetData(i); \ |
---|
876 | T_result r = name ## _stencilop (iter_, dim_); \ |
---|
877 | iter_._bz_offsetData(-i); \ |
---|
878 | return r; \ |
---|
879 | } \ |
---|
880 | \ |
---|
881 | /** This way of vectorizing won't work on stencils. */ \ |
---|
882 | template<int N> \ |
---|
883 | typename tvresult<N>::Type fastRead_tv(int i) const { \ |
---|
884 | BZPRECHECK(0, "Can't vectorize stencils"); \ |
---|
885 | return typename tvresult<N>::Type(iter_.template fastRead_tv<N>(i),dim_); } \ |
---|
886 | \ |
---|
887 | T_result shift(int offset, int dim) const \ |
---|
888 | { \ |
---|
889 | iter_._bz_offsetData(offset, dim); \ |
---|
890 | T_result r = name ## _stencilop (iter_); \ |
---|
891 | iter_._bz_offsetData(-offset, dim); \ |
---|
892 | return r; \ |
---|
893 | } \ |
---|
894 | \ |
---|
895 | T_result shift(int offset1, int dim1, int offset2, int dim2) const \ |
---|
896 | { \ |
---|
897 | iter_._bz_offsetData(offset1, dim1, offset2, dim2); \ |
---|
898 | T_result r = name ## _stencilop (iter_); \ |
---|
899 | iter_._bz_offsetData(-offset1, dim1, -offset2, dim2); \ |
---|
900 | return r; \ |
---|
901 | } \ |
---|
902 | \ |
---|
903 | void prettyPrint(BZ_STD_SCOPE(string) &str, \ |
---|
904 | prettyPrintFormat& format) const \ |
---|
905 | { \ |
---|
906 | str += "name (stencil)"; \ |
---|
907 | str += "("; \ |
---|
908 | iter_.prettyPrint(str, format); \ |
---|
909 | str += ")"; \ |
---|
910 | } \ |
---|
911 | \ |
---|
912 | template<typename T1, typename T2 = nilArraySection, \ |
---|
913 | class T3 = nilArraySection, typename T4 = nilArraySection, \ |
---|
914 | class T5 = nilArraySection, typename T6 = nilArraySection, \ |
---|
915 | class T7 = nilArraySection, typename T8 = nilArraySection, \ |
---|
916 | class T9 = nilArraySection, typename T10 = nilArraySection, \ |
---|
917 | class T11 = nilArraySection> \ |
---|
918 | class SliceInfo { \ |
---|
919 | public: \ |
---|
920 | typedef name ## _et<T_expr> T_slice; \ |
---|
921 | }; \ |
---|
922 | \ |
---|
923 | template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, \ |
---|
924 | typename T7, typename T8, typename T9, typename T10, typename T11> \ |
---|
925 | name ## _et \ |
---|
926 | operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const \ |
---|
927 | { \ |
---|
928 | return name ## _et \ |
---|
929 | (iter_(_bz_makeRange(r1), \ |
---|
930 | _bz_makeRange(r2), \ |
---|
931 | _bz_makeRange(r3), \ |
---|
932 | _bz_makeRange(r4), \ |
---|
933 | _bz_makeRange(r5), \ |
---|
934 | _bz_makeRange(r6), \ |
---|
935 | _bz_makeRange(r7), \ |
---|
936 | _bz_makeRange(r8), \ |
---|
937 | _bz_makeRange(r9), \ |
---|
938 | _bz_makeRange(r10), \ |
---|
939 | _bz_makeRange(r11)),dim_); \ |
---|
940 | } \ |
---|
941 | \ |
---|
942 | private: \ |
---|
943 | int dim_; \ |
---|
944 | }; \ |
---|
945 | /* create ET from application to expression */ \ |
---|
946 | template<typename T1> \ |
---|
947 | inline _bz_ArrayExpr<name ## _et<typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result> > \ |
---|
948 | name(const BZ_BLITZ_SCOPE(ETBase)<T1>& d1, int dim) \ |
---|
949 | { \ |
---|
950 | TinyVector<int, BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::rank_> minb(0), maxb(0); \ |
---|
951 | minb[dim]=MINB; maxb[dim]=MAXB; \ |
---|
952 | return _bz_ArrayExpr<name ## _et<typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result> > \ |
---|
953 | (BZ_BLITZ_SCOPE(asExpr)<T1>::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),minb, maxb)), dim); \ |
---|
954 | } \ |
---|
955 | /* forward operations on arrays to main function */ \ |
---|
956 | template<typename T, int N> \ |
---|
957 | inline _bz_ArrayExpr<name ## _et<_bz_typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result> > \ |
---|
958 | name(const Array<T,N>& d1, int dim) \ |
---|
959 | { return name(d1.wrap(), dim); } \ |
---|
960 | template<typename T, int N> \ |
---|
961 | inline _bz_ArrayExpr<name ## _et<_bz_typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result> > \ |
---|
962 | name(Array<T,N>& d1, int dim) \ |
---|
963 | { return name(d1.wrap(), dim); } |
---|
964 | |
---|
965 | |
---|
966 | /* Defines a stencil ET difference operator that operates on a |
---|
967 | multicomponent array<P_numtype, N_rank> and returns an |
---|
968 | array<P_numtype::T_element, N_rank>. */ |
---|
969 | |
---|
970 | #define BZ_ET_STENCIL_MULTIDIFF(name, MINB, MAXB) \ |
---|
971 | template<typename P_expr> \ |
---|
972 | class name ## _et_multi : public _bz_StencilExpr<P_expr, _bz_typename multicomponent_traits<typename P_expr::T_numtype>::T_element> \ |
---|
973 | { \ |
---|
974 | public: \ |
---|
975 | typedef _bz_typename multicomponent_traits<typename P_expr::T_numtype>::T_element T_result; \ |
---|
976 | typedef _bz_StencilExpr<P_expr, T_result> T_base; \ |
---|
977 | typedef _bz_typename T_base::T_numtype T_numtype; \ |
---|
978 | typedef _bz_typename T_base::T_expr T_expr; \ |
---|
979 | \ |
---|
980 | /* there is no selecting return type here. because we *know* it is \ |
---|
981 | T_result, there's no question of whether we could be doing \ |
---|
982 | multicomponent evaluations. */ \ |
---|
983 | typedef T_result T_typeprop; \ |
---|
984 | typedef T_numtype T_optype; \ |
---|
985 | \ |
---|
986 | /* dummy */ \ |
---|
987 | template<int N> struct tvresult { \ |
---|
988 | typedef name ## _et_multi<typename T_expr::template tvresult<N>::Type> Type; \ |
---|
989 | }; \ |
---|
990 | \ |
---|
991 | typedef name ## _et_multi<_bz_typename P_expr::T_range_result> T_range_result; \ |
---|
992 | \ |
---|
993 | using T_base::iter_; \ |
---|
994 | using T_base::rank_; \ |
---|
995 | public: \ |
---|
996 | name ## _et_multi(const name ## _et_multi& a) : \ |
---|
997 | _bz_StencilExpr<P_expr, T_numtype>(a), comp_(a.comp_), dim_(a.dim_) \ |
---|
998 | { } \ |
---|
999 | \ |
---|
1000 | name ## _et_multi(BZ_ETPARM(T_expr) a, int comp, int dim) : \ |
---|
1001 | _bz_StencilExpr<P_expr, T_numtype>(a), \ |
---|
1002 | comp_(comp), dim_(dim) \ |
---|
1003 | { } \ |
---|
1004 | \ |
---|
1005 | name ## _et_multi(_bz_typename T_expr::T_ctorArg1 a, int comp, int dim) : \ |
---|
1006 | _bz_StencilExpr<P_expr, T_numtype>(a), \ |
---|
1007 | comp_(comp), dim_(dim) \ |
---|
1008 | { } \ |
---|
1009 | \ |
---|
1010 | T_result operator*() const \ |
---|
1011 | { return name ## _stencilop(iter_, comp_, dim_); } \ |
---|
1012 | T_result operator()(_bz_typename _bz_IndexParameter<TinyVector<int, rank_> >::type i) const \ |
---|
1013 | { iter_.moveTo(i); return name ## _stencilop(iter_, comp_, dim_); } \ |
---|
1014 | \ |
---|
1015 | T_range_result operator()(const RectDomain<rank_>& d) const \ |
---|
1016 | { return T_range_result(iter_(d), comp_, dim_); } \ |
---|
1017 | \ |
---|
1018 | T_result operator[](int i) const \ |
---|
1019 | { return name ## _stencilop(iter_[i], comp_, dim_); } \ |
---|
1020 | \ |
---|
1021 | T_result fastRead(sizeType i) const \ |
---|
1022 | {/* this probably isn't very fast... */ \ |
---|
1023 | iter_._bz_offsetData(i); \ |
---|
1024 | T_result r = name ## _stencilop (iter_, comp_, dim_); \ |
---|
1025 | iter_._bz_offsetData(-i); \ |
---|
1026 | return r; \ |
---|
1027 | } \ |
---|
1028 | \ |
---|
1029 | /** This way of vectorizing won't work on stencils. */ \ |
---|
1030 | template<int N> \ |
---|
1031 | typename tvresult<N>::Type fastRead_tv(int i) const { \ |
---|
1032 | BZPRECHECK(0, "Can't vectorize stencils"); \ |
---|
1033 | return typename tvresult<N>::Type(iter_.template fastRead_tv<N>(i),comp_,dim_); } \ |
---|
1034 | \ |
---|
1035 | T_result shift(int offset, int dim) const \ |
---|
1036 | { \ |
---|
1037 | iter_._bz_offsetData(offset, dim); \ |
---|
1038 | T_result r = name ## _stencilop (iter_); \ |
---|
1039 | iter_._bz_offsetData(-offset, dim); \ |
---|
1040 | return r; \ |
---|
1041 | } \ |
---|
1042 | \ |
---|
1043 | T_result shift(int offset1, int dim1, int offset2, int dim2) const \ |
---|
1044 | { \ |
---|
1045 | iter_._bz_offsetData(offset1, dim1, offset2, dim2); \ |
---|
1046 | T_result r = name ## _stencilop (iter_); \ |
---|
1047 | iter_._bz_offsetData(-offset1, dim1, -offset2, dim2); \ |
---|
1048 | return r; \ |
---|
1049 | } \ |
---|
1050 | \ |
---|
1051 | void prettyPrint(BZ_STD_SCOPE(string) &str, \ |
---|
1052 | prettyPrintFormat& format) const \ |
---|
1053 | { \ |
---|
1054 | str += "name (stencil)"; \ |
---|
1055 | str += "("; \ |
---|
1056 | iter_.prettyPrint(str, format); \ |
---|
1057 | str += ")"; \ |
---|
1058 | } \ |
---|
1059 | \ |
---|
1060 | template<typename T1, typename T2 = nilArraySection, \ |
---|
1061 | class T3 = nilArraySection, typename T4 = nilArraySection, \ |
---|
1062 | class T5 = nilArraySection, typename T6 = nilArraySection, \ |
---|
1063 | class T7 = nilArraySection, typename T8 = nilArraySection, \ |
---|
1064 | class T9 = nilArraySection, typename T10 = nilArraySection, \ |
---|
1065 | class T11 = nilArraySection> \ |
---|
1066 | class SliceInfo { \ |
---|
1067 | public: \ |
---|
1068 | typedef name ## _et_multi<T_expr> T_slice; \ |
---|
1069 | }; \ |
---|
1070 | \ |
---|
1071 | template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, \ |
---|
1072 | typename T7, typename T8, typename T9, typename T10, typename T11> \ |
---|
1073 | name ## _et_multi \ |
---|
1074 | operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const \ |
---|
1075 | { \ |
---|
1076 | return name ## _et_multi \ |
---|
1077 | (iter_(_bz_makeRange(r1), \ |
---|
1078 | _bz_makeRange(r2), \ |
---|
1079 | _bz_makeRange(r3), \ |
---|
1080 | _bz_makeRange(r4), \ |
---|
1081 | _bz_makeRange(r5), \ |
---|
1082 | _bz_makeRange(r6), \ |
---|
1083 | _bz_makeRange(r7), \ |
---|
1084 | _bz_makeRange(r8), \ |
---|
1085 | _bz_makeRange(r9), \ |
---|
1086 | _bz_makeRange(r10), \ |
---|
1087 | _bz_makeRange(r11)),comp_, dim_); \ |
---|
1088 | } \ |
---|
1089 | \ |
---|
1090 | private: \ |
---|
1091 | int comp_; \ |
---|
1092 | int dim_; \ |
---|
1093 | }; \ |
---|
1094 | /* create ET from application to expression */ \ |
---|
1095 | template<typename T1> \ |
---|
1096 | inline _bz_ArrayExpr<name ## _et_multi<typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result> > \ |
---|
1097 | name(const BZ_BLITZ_SCOPE(ETBase)<T1>& d1, int comp, int dim) \ |
---|
1098 | { \ |
---|
1099 | TinyVector<int, BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::rank_> minb(0), maxb(0); \ |
---|
1100 | minb[dim]=MINB; maxb[dim]=MAXB; \ |
---|
1101 | return _bz_ArrayExpr<name ## _et_multi<typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result> > \ |
---|
1102 | (BZ_BLITZ_SCOPE(asExpr)<T1>::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),minb, maxb)), comp, dim); \ |
---|
1103 | } \ |
---|
1104 | /* forward operations on arrays to main function */ \ |
---|
1105 | template<typename T, int N> \ |
---|
1106 | inline _bz_ArrayExpr<name ## _et_multi<_bz_typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result> > \ |
---|
1107 | name(const Array<T,N>& d1, int comp, int dim) \ |
---|
1108 | { return name(d1.wrap(), comp, dim); } \ |
---|
1109 | \ |
---|
1110 | template<typename T, int N> \ |
---|
1111 | inline _bz_ArrayExpr<name ## _et_multi<_bz_typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result> > \ |
---|
1112 | name(Array<T,N>& d1, int comp, int dim) \ |
---|
1113 | { return name(d1.wrap(), comp, dim); } |
---|
1114 | |
---|
1115 | |
---|
1116 | /* Defines a stencil ET double-difference operator that operates on an |
---|
1117 | array<P_numtype, N_rank> and returns a array<P_numtype, |
---|
1118 | N_rank>. (The only significance of the "difference" aspect is that |
---|
1119 | the operator is assumed to take two extra arguments which are the |
---|
1120 | dimensions to do the differences in). */ |
---|
1121 | |
---|
1122 | #define BZ_ET_STENCIL_DIFF2(name, MINB1, MAXB1, MINB2, MAXB2) \ |
---|
1123 | template<typename P_expr> \ |
---|
1124 | class name ## _et : public _bz_StencilExpr<P_expr, _bz_typename P_expr::T_numtype> \ |
---|
1125 | { \ |
---|
1126 | public: \ |
---|
1127 | typedef _bz_StencilExpr<P_expr, _bz_typename P_expr::T_numtype> T_base; \ |
---|
1128 | typedef _bz_typename T_base::T_numtype T_numtype; \ |
---|
1129 | typedef _bz_typename T_base::T_expr T_expr; \ |
---|
1130 | \ |
---|
1131 | /* select return type */ \ |
---|
1132 | typedef typename unwrapET<typename T_expr::T_result>::T_unwrapped test; \ |
---|
1133 | typedef typename selectET<typename T_expr::T_typeprop, \ |
---|
1134 | T_numtype, \ |
---|
1135 | name ## _et<test> >::T_selected T_typeprop; \ |
---|
1136 | typedef typename unwrapET<T_typeprop>::T_unwrapped T_result; \ |
---|
1137 | typedef T_numtype T_optype; \ |
---|
1138 | \ |
---|
1139 | /* dummy */ \ |
---|
1140 | template<int N> struct tvresult { \ |
---|
1141 | typedef name ## _et<typename T_expr::template tvresult<N>::Type> Type; \ |
---|
1142 | }; \ |
---|
1143 | \ |
---|
1144 | typedef name ## _et<_bz_typename P_expr::T_range_result> T_range_result; \ |
---|
1145 | \ |
---|
1146 | using T_base::iter_; \ |
---|
1147 | using T_base::rank_; \ |
---|
1148 | public: \ |
---|
1149 | name ## _et(const name ## _et& a) : \ |
---|
1150 | _bz_StencilExpr<P_expr, T_numtype>(a), \ |
---|
1151 | dim1_(a.dim1_), dim2_(a.dim2_) \ |
---|
1152 | { } \ |
---|
1153 | \ |
---|
1154 | name ## _et(BZ_ETPARM(T_expr) a, int dim1, int dim2) : \ |
---|
1155 | _bz_StencilExpr<P_expr, T_numtype>(a), \ |
---|
1156 | dim1_(dim1), dim2_(dim2) \ |
---|
1157 | { } \ |
---|
1158 | \ |
---|
1159 | name ## _et(_bz_typename T_expr::T_ctorArg1 a, \ |
---|
1160 | int dim1, int dim2) : \ |
---|
1161 | _bz_StencilExpr<P_expr, T_numtype>(a), \ |
---|
1162 | dim1_(dim1), dim2_(dim2) \ |
---|
1163 | { } \ |
---|
1164 | \ |
---|
1165 | T_result operator*() const \ |
---|
1166 | { return name ## _stencilop(iter_, dim1_, dim2_); } \ |
---|
1167 | T_result operator()(_bz_typename _bz_IndexParameter<TinyVector<int, rank_> >::type i) const \ |
---|
1168 | { iter_.moveTo(i); return name ## _stencilop(iter_, dim1_, dim2_); } \ |
---|
1169 | \ |
---|
1170 | T_range_result operator()(const RectDomain<rank_>& d) const \ |
---|
1171 | { return T_range_result(iter_(d), dim1_, dim2_); } \ |
---|
1172 | \ |
---|
1173 | T_result operator[](int i) const \ |
---|
1174 | { return name ## _stencilop(iter_[i], dim1_, dim2_); } \ |
---|
1175 | \ |
---|
1176 | T_result fastRead(sizeType i) const \ |
---|
1177 | {/* this probably isn't very fast... */ \ |
---|
1178 | iter_._bz_offsetData(i); \ |
---|
1179 | T_result r = name ## _stencilop (iter_, dim1_, dim2_); \ |
---|
1180 | iter_._bz_offsetData(-i); \ |
---|
1181 | return r; \ |
---|
1182 | } \ |
---|
1183 | \ |
---|
1184 | /** This way of vectorizing won't work on stencils. */ \ |
---|
1185 | template<int N> \ |
---|
1186 | typename tvresult<N>::Type fastRead_tv(int i) const { \ |
---|
1187 | BZPRECHECK(0, "Can't vectorize stencils"); \ |
---|
1188 | return typename tvresult<N>::Type(iter_.template fastRead_tv<N>(i),dim1_,dim2_); } \ |
---|
1189 | \ |
---|
1190 | T_result shift(int offset, int dim) const \ |
---|
1191 | { \ |
---|
1192 | iter_._bz_offsetData(offset, dim); \ |
---|
1193 | T_result r = name ## _stencilop (iter_); \ |
---|
1194 | iter_._bz_offsetData(-offset, dim); \ |
---|
1195 | return r; \ |
---|
1196 | } \ |
---|
1197 | \ |
---|
1198 | T_result shift(int offset1, int dim1, int offset2, int dim2) const \ |
---|
1199 | { \ |
---|
1200 | iter_._bz_offsetData(offset1, dim1, offset2, dim2); \ |
---|
1201 | T_result r = name ## _stencilop (iter_); \ |
---|
1202 | iter_._bz_offsetData(-offset1, dim1, -offset2, dim2); \ |
---|
1203 | return r; \ |
---|
1204 | } \ |
---|
1205 | \ |
---|
1206 | void prettyPrint(BZ_STD_SCOPE(string) &str, \ |
---|
1207 | prettyPrintFormat& format) const \ |
---|
1208 | { \ |
---|
1209 | str += "name (stencil)"; \ |
---|
1210 | str += "("; \ |
---|
1211 | iter_.prettyPrint(str, format); \ |
---|
1212 | str += ")"; \ |
---|
1213 | } \ |
---|
1214 | \ |
---|
1215 | template<typename T1, typename T2 = nilArraySection, \ |
---|
1216 | class T3 = nilArraySection, typename T4 = nilArraySection, \ |
---|
1217 | class T5 = nilArraySection, typename T6 = nilArraySection, \ |
---|
1218 | class T7 = nilArraySection, typename T8 = nilArraySection, \ |
---|
1219 | class T9 = nilArraySection, typename T10 = nilArraySection, \ |
---|
1220 | class T11 = nilArraySection> \ |
---|
1221 | class SliceInfo { \ |
---|
1222 | public: \ |
---|
1223 | typedef name ## _et<T_expr> T_slice; \ |
---|
1224 | }; \ |
---|
1225 | \ |
---|
1226 | template<typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, \ |
---|
1227 | typename T7, typename T8, typename T9, typename T10, typename T11> \ |
---|
1228 | name ## _et \ |
---|
1229 | operator()(T1 r1, T2 r2, T3 r3, T4 r4, T5 r5, T6 r6, T7 r7, T8 r8, T9 r9, T10 r10, T11 r11) const \ |
---|
1230 | { \ |
---|
1231 | return name ## _et \ |
---|
1232 | (iter_(_bz_makeRange(r1), \ |
---|
1233 | _bz_makeRange(r2), \ |
---|
1234 | _bz_makeRange(r3), \ |
---|
1235 | _bz_makeRange(r4), \ |
---|
1236 | _bz_makeRange(r5), \ |
---|
1237 | _bz_makeRange(r6), \ |
---|
1238 | _bz_makeRange(r7), \ |
---|
1239 | _bz_makeRange(r8), \ |
---|
1240 | _bz_makeRange(r9), \ |
---|
1241 | _bz_makeRange(r10), \ |
---|
1242 | _bz_makeRange(r11)), dim1_, dim2_); \ |
---|
1243 | } \ |
---|
1244 | \ |
---|
1245 | private: \ |
---|
1246 | int dim1_, dim2_; \ |
---|
1247 | }; \ |
---|
1248 | \ |
---|
1249 | /* create ET from application to expression */ \ |
---|
1250 | template<typename T1> \ |
---|
1251 | inline _bz_ArrayExpr<name ## _et<typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result> > \ |
---|
1252 | name(const BZ_BLITZ_SCOPE(ETBase)<T1>& d1, int dim1, int dim2) \ |
---|
1253 | { \ |
---|
1254 | TinyVector<int, BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::rank_> minb(0), maxb(0); \ |
---|
1255 | minb[dim1]=MINB1; maxb[dim1]=MAXB1; \ |
---|
1256 | minb[dim2]=MINB2; maxb[dim2]=MAXB2; \ |
---|
1257 | return _bz_ArrayExpr<name ## _et<typename BZ_BLITZ_SCOPE(asExpr)<T1>::T_expr::T_range_result> > \ |
---|
1258 | (BZ_BLITZ_SCOPE(asExpr)<T1>::getExpr(d1.unwrap())(_bz_shrinkDomain(d1.unwrap().domain(),minb, maxb)), dim1, dim2); \ |
---|
1259 | } \ |
---|
1260 | /* forward operations on arrays to main function */ \ |
---|
1261 | template<typename T, int N> \ |
---|
1262 | inline _bz_ArrayExpr<name ## _et<_bz_typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result> > \ |
---|
1263 | name(const Array<T,N>& d1, int dim1, int dim2) \ |
---|
1264 | { return name(d1.wrap(), dim1, dim2); } \ |
---|
1265 | \ |
---|
1266 | template<typename T, int N> \ |
---|
1267 | inline _bz_ArrayExpr<name ## _et<_bz_typename BZ_BLITZ_SCOPE(asExpr)<Array<T,N> >::T_expr::T_range_result> > \ |
---|
1268 | name(Array<T,N>& d1, int dim1, int dim2) \ |
---|
1269 | { return name(d1.wrap(), dim1, dim2); } |
---|
1270 | |
---|
1271 | BZ_NAMESPACE_END |
---|
1272 | |
---|
1273 | #endif // BZ_ARRAY_STENCIL_ET_MACROS_H |
---|