source: XIOS/dev/dev_olga/src/extern/blitz/include/blitz/array/methods.cc @ 1022

Last change on this file since 1022 was 1022, checked in by mhnguyen, 7 years ago
File size: 15.2 KB
Line 
1/***************************************************************************
2 * blitz/array/methods.cc   General array class methods.
3 *
4 * $Id$
5 *
6 * Copyright (C) 1997-2011 Todd Veldhuizen <tveldhui@acm.org>
7 *
8 * This file is a part of Blitz.
9 *
10 * Blitz is free software: you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License
12 * as published by the Free Software Foundation, either version 3
13 * of the License, or (at your option) any later version.
14 *
15 * Blitz is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18 * GNU Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Blitz.  If not, see <http://www.gnu.org/licenses/>.
22 *
23 * Suggestions:          blitz-devel@lists.sourceforge.net
24 * Bugs:                 blitz-support@lists.sourceforge.net   
25 *
26 * For more information, please see the Blitz++ Home Page:
27 *    https://sourceforge.net/projects/blitz/
28 *
29 ****************************************************************************/
30#ifndef BZ_ARRAYMETHODS_CC
31#define BZ_ARRAYMETHODS_CC
32
33#ifndef BZ_ARRAY_H
34 #error <blitz/array/methods.cc> must be included via <blitz/array.h>
35#endif
36
37BZ_NAMESPACE(blitz)
38
39template<typename P_numtype, int N_rank> template<typename T_expr>
40Array<P_numtype,N_rank>::Array(_bz_ArrayExpr<T_expr> expr)
41{
42    // Determine extent of the array expression
43
44    TinyVector<int,N_rank> lbound, extent, ordering;
45    TinyVector<bool,N_rank> ascendingFlag;
46    TinyVector<bool,N_rank> in_ordering;
47    in_ordering = false;
48
49    int j = 0;
50    for (int i=0; i < N_rank; ++i)
51    {
52        lbound(i) = expr.lbound(i);
53        int ubound = expr.ubound(i);
54        extent(i) = ubound - lbound(i) + 1;
55        int orderingj = expr.ordering(i);
56        if (orderingj != INT_MIN && orderingj < N_rank &&
57            !in_ordering( orderingj )) { // unique value in ordering array
58            in_ordering( orderingj ) = true;
59            ordering(j++) = orderingj;
60        }
61        int ascending = expr.ascending(i);
62        ascendingFlag(i) = (ascending == 1);
63
64#ifdef BZ_DEBUG
65        if ((lbound(i) == INT_MIN) || (ubound == INT_MAX) 
66          || (ordering(i) == INT_MIN) || (ascending == INT_MIN))
67        {
68          BZPRECHECK(0,
69           "Attempted to construct an array from an expression " << endl
70           << "which does not have a shape.  To use this constructor, "
71           << endl
72           << "the expression must contain at least one array operand.");
73          return;
74        }
75#endif
76    }
77
78    // It is possible that ordering is not a permutation of 0,...,N_rank-1.
79    // In that case j will be less than N_rank. We fill in ordering with the
80    // usused values in decreasing order.
81    for (int i = N_rank-1; j < N_rank; ++j) {
82        while (in_ordering(i))
83          --i;
84        ordering(j) = i--;
85    }
86
87    Array<T_numtype,N_rank> A(lbound,extent,
88        GeneralArrayStorage<N_rank>(ordering,ascendingFlag));
89    A = expr;
90    reference(A);
91}
92
93template<typename P_numtype, int N_rank>
94Array<P_numtype,N_rank>::Array(const TinyVector<int, N_rank>& lbounds,
95    const TinyVector<int, N_rank>& extent,
96    const GeneralArrayStorage<N_rank>& storage)
97    : storage_(storage)
98{
99    length_ = extent;
100    storage_.setBase(lbounds);
101    setupStorage(N_rank - 1);
102}
103
104
105/** This routine takes the storage information for the array
106    (ascendingFlag_[], base_[], and ordering_[]) and the size of the
107    array (length_[]) and computes the stride vector (stride_[]) and
108    the zero offset (see explanation in array.h).
109 */
110template<typename P_numtype, int N_rank>
111_bz_inline2 void Array<P_numtype, N_rank>::computeStrides()
112{
113    if (N_rank > 1)
114    {
115      diffType stride = 1;
116
117      // This flag simplifies the code in the loop, encouraging
118      // compile-time computation of strides through constant folding.
119      bool allAscending = storage_.allRanksStoredAscending();
120
121      // BZ_OLD_FOR_SCOPING
122      int n;
123      for (n=0; n < N_rank; ++n)
124      {
125          int strideSign = +1;
126
127          // If this rank is stored in descending order, then the stride
128          // will be negative.
129          if (!allAscending)
130          {
131            if (!isRankStoredAscending(ordering(n)))
132                strideSign = -1;
133          }
134
135          // The stride for this rank is the product of the lengths of
136          // the ranks minor to it.
137          stride_[ordering(n)] = stride * strideSign;
138
139          if((storage_.padding()==paddedData)&&(n==0)) {
140            // The lowest rank dimension is padded to vecWidth, so this
141            // needs to be accounted for in the stride
142            stride *= simdTypes<T_numtype>::paddedLength(length_[ordering(0)]);
143          }
144          else
145            stride *= length_[ordering(n)];
146      }
147    }
148    else {
149        // Specialization for N_rank == 1
150        // This simpler calculation makes it easier for the compiler
151        // to propagate stride values.
152
153        if (isRankStoredAscending(0))
154            stride_[0] = 1;
155        else
156            stride_[0] = -1;
157    }
158
159    calculateZeroOffset();
160}
161
162template<typename P_numtype, int N_rank>
163void Array<P_numtype, N_rank>::calculateZeroOffset()
164{
165    // Calculate the offset of (0,0,...,0)
166    zeroOffset_ = 0;
167
168    // zeroOffset_ = - sum(where(ascendingFlag_, stride_ * base_,
169    //     (length_ - 1 + base_) * stride_))
170    for (int n=0; n < N_rank; ++n)
171    {
172        if (!isRankStoredAscending(n))
173            zeroOffset_ -= (length_[n] - 1 + base(n)) * stride_[n];
174        else
175            zeroOffset_ -= stride_[n] * base(n);
176    }
177}
178
179template<typename P_numtype, int N_rank>
180bool Array<P_numtype, N_rank>::isStorageContiguous() const
181{
182    // The storage is contiguous if for the set
183    // { | stride[i] * extent[i] | }, i = 0..N_rank-1,
184    // there is only one value which is not in the set
185    // of strides; and if there is one stride which is 1.
186
187    // This algorithm is quadratic in the rank.  It is hard
188    // to imagine this being a serious problem.
189
190    int numStridesMissing = 0;
191    bool haveUnitStride = false;
192
193    for (int i=0; i < N_rank; ++i)
194    {
195      diffType stride = BZ_MATHFN_SCOPE(abs)(stride_[i]);
196        if (stride == 1)
197            haveUnitStride = true;
198
199        diffType vi = stride * length_[i];
200
201        int j = 0;
202        for (j=0; j < N_rank; ++j)
203            if (BZ_MATHFN_SCOPE(abs)(stride_[j]) == vi)
204                break;
205
206        if (j == N_rank)
207        {
208            ++numStridesMissing;
209            if (numStridesMissing == 2)
210                return false;
211        }
212    }
213
214    return haveUnitStride;
215}
216
217template<typename P_numtype, int N_rank>
218void Array<P_numtype, N_rank>::dumpStructureInformation(ostream& os) const
219{
220    os << "Dump of Array<" << BZ_DEBUG_TEMPLATE_AS_STRING_LITERAL(P_numtype) 
221       << ", " << N_rank << ">:" << endl
222       << "ordering_      = " << storage_.ordering() << endl
223       << "ascendingFlag_ = " << storage_.ascendingFlag() << endl
224       << "base_          = " << storage_.base() << endl
225       << "length_        = " << length_ << endl
226       << "stride_        = " << stride_ << endl
227       << "zeroOffset_    = " << zeroOffset_ << endl
228       << "numElements()  = " << numElements() << endl
229       << "isStorageContiguous() = " << isStorageContiguous() << endl;
230}
231
232/**
233  Make this array a view of another array's data. This overrides the
234  current storage of the array.
235 */
236template<typename P_numtype, int N_rank>
237void Array<P_numtype, N_rank>::reference(const Array<P_numtype, N_rank>& array)
238{
239    storage_ = array.storage_;
240    length_ = array.length_;
241    stride_ = array.stride_;
242    zeroOffset_ = array.zeroOffset_;
243
244    T_base::changeBlock(array.noConst());
245}
246
247/** This method makes the array reference another, but it does it as a
248    "weak" reference that is not counted. If you can guarantee that
249    the array memory block containing the data is persistent, this
250    will allow reference counting to be bypassed for this array, which
251    if mutex-locking is involved is a significant overhead. */
252template<typename P_numtype, int N_rank>
253void 
254Array<P_numtype, N_rank>::weakReference(const Array<P_numtype, N_rank>& array)
255{
256    storage_ = array.storage_;
257    length_ = array.length_;
258    stride_ = array.stride_;
259    zeroOffset_ = array.zeroOffset_;
260
261    T_base::changeToNullBlock();
262    data_ = array.data_;
263}
264
265
266/**
267   Modify the Array storage.  Array must be unallocated.
268 */
269template<typename P_numtype, int N_rank>
270void Array<P_numtype, N_rank>::setStorage(GeneralArrayStorage<N_rank> x)
271{
272#ifdef BZ_DEBUG
273    if (size() != 0) {
274        BZPRECHECK(0,"Cannot modify storage format of an Array that has already been allocated!" << endl);
275        return;
276    }
277#endif
278    storage_ = x;
279    return;
280}
281
282/**
283   This method is called to allocate memory for a new array. It
284   assumes the storage_ and length_ members are already initialized.
285 */
286template<typename P_numtype, int N_rank>
287_bz_inline2 void Array<P_numtype, N_rank>::setupStorage(int lastRankInitialized)
288{
289    TAU_TYPE_STRING(p1, "Array<T,N>::setupStorage() [T="
290        + CT(P_numtype) + ",N=" + CT(N_rank) + "]");
291    TAU_PROFILE(" ", p1, TAU_BLITZ);
292
293    /*
294     * If the length of some of the ranks was unspecified, fill these
295     * in using the last specified value.
296     *
297     * e.g. Array<int,3> A(40) results in a 40x40x40 array.
298     */
299    for (int i=lastRankInitialized + 1; i < N_rank; ++i)
300    {
301        storage_.setBase(i, storage_.base(lastRankInitialized));
302        length_[i] = length_[lastRankInitialized];
303    }
304
305    // Compute strides
306    computeStrides();
307
308    // Allocate a block of memory.
309    TinyVector<int, N_rank> alloc_length = length();
310    if(storage_.padding()==paddedData) {
311      // The size of the block is NOT equal to numelements, because the
312      // lowest rank dimension is padded to vecWidth
313      alloc_length[ordering(0)] = 
314        simdTypes<T_numtype>::paddedLength(alloc_length[ordering(0)]);
315    }
316    sizeType numElem = _bz_returntype<sizeType>::product(alloc_length);
317    if (numElem==0)
318        T_base::changeToNullBlock();
319    else
320        T_base::newBlock(numElem);
321
322    // Adjust the base of the array to account for non-zero base
323    // indices and reversals
324    data_ += zeroOffset_;
325}
326
327/** Return a deep copy of an array (as opposed to the reference copy
328    done by the copy constructor. */
329template<typename P_numtype, int N_rank>
330Array<P_numtype, N_rank> Array<P_numtype, N_rank>::copy() const
331{
332    if (numElements())
333    {
334        Array<T_numtype, N_rank> z(length_, storage_);
335        z = *this;
336        return z;
337    }
338    else {
339        // Null array-- don't bother allocating an empty block.
340        return *this;
341    }
342}
343
344/** Make the array have its own memory block by making a copy if the
345    block has a reference count greater than one. */
346template<typename P_numtype, int N_rank>
347void Array<P_numtype, N_rank>::makeUnique()
348{
349    if (T_base::numReferences() > 1)
350    {
351        T_array tmp = copy();
352        reference(tmp);
353    }
354}
355
356template<typename P_numtype, int N_rank>
357Array<P_numtype, N_rank> Array<P_numtype, N_rank>::transpose(int r0, int r1, 
358    int r2, int r3, int r4, int r5, int r6, int r7, int r8, int r9, int r10) const
359{
360    T_array B(*this);
361    B.transposeSelf(r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10);
362    return B;
363}
364
365template<typename P_numtype, int N_rank>
366void Array<P_numtype, N_rank>::transposeSelf(int r0, int r1, int r2, int r3,
367    int r4, int r5, int r6, int r7, int r8, int r9, int r10)
368{
369    BZPRECHECK(r0+r1+r2+r3+r4+r5+r6+r7+r8+r9+r10 == N_rank * (N_rank-1) / 2,
370        "Invalid array transpose() arguments." << endl
371        << "Arguments must be a permutation of the numerals (0,...,"
372        << (N_rank - 1) << ")");
373
374    // Create a temporary reference copy of this array
375    Array<T_numtype, N_rank> x(*this);
376
377    // Now reorder the dimensions using the supplied permutation
378    doTranspose(0, r0, x);
379    doTranspose(1, r1, x);
380    doTranspose(2, r2, x);
381    doTranspose(3, r3, x);
382    doTranspose(4, r4, x);
383    doTranspose(5, r5, x);
384    doTranspose(6, r6, x);
385    doTranspose(7, r7, x);
386    doTranspose(8, r8, x);
387    doTranspose(9, r9, x);
388    doTranspose(10, r10, x);
389}
390
391template<typename P_numtype, int N_rank>
392void Array<P_numtype, N_rank>::doTranspose(int destRank, int sourceRank,
393    Array<T_numtype, N_rank>& array)
394{
395    // BZ_NEEDS_WORK: precondition check
396
397    if (destRank >= N_rank)
398        return;
399
400    length_[destRank] = array.length_[sourceRank];
401    stride_[destRank] = array.stride_[sourceRank];
402    storage_.setAscendingFlag(destRank, 
403        array.isRankStoredAscending(sourceRank));
404    storage_.setBase(destRank, array.base(sourceRank));
405
406    // BZ_NEEDS_WORK: Handling the storage ordering is currently O(N^2)
407    // but it can be done fairly easily in linear time by constructing
408    // the appropriate permutation.
409
410    // Find sourceRank in array.storage_.ordering_
411    int i=0;
412    for (; i < N_rank; ++i)
413        if (array.storage_.ordering(i) == sourceRank)
414            break;
415
416    storage_.setOrdering(i, destRank);
417}
418
419template<typename P_numtype, int N_rank>
420void Array<P_numtype, N_rank>::reverseSelf(int rank)
421{
422    BZPRECONDITION(rank < N_rank);
423
424    storage_.setAscendingFlag(rank, !isRankStoredAscending(rank));
425
426    diffType adjustment = static_cast<ptrdiff_t>(stride_[rank]) * (lbound(rank) + ubound(rank));
427    zeroOffset_ += adjustment;
428    data_ += adjustment;
429    stride_[rank] *= -1;
430}
431
432template<typename P_numtype, int N_rank>
433Array<P_numtype, N_rank> Array<P_numtype,N_rank>::reverse(int rank)
434{
435    T_array B(*this);
436    B.reverseSelf(rank);
437    return B;
438}
439
440template<typename P_numtype, int N_rank> template<typename P_numtype2>
441Array<P_numtype2,N_rank> Array<P_numtype,N_rank>::extractComponent(P_numtype2, 
442    int componentNumber, int numComponents) const
443{
444    BZPRECONDITION((componentNumber >= 0) 
445        && (componentNumber < numComponents));
446
447    // If P_numtype is a multicomponent type, it may have an alignment
448    // setting. For this reason it is not correct to use
449    // numComponents, we must use sizeof(P_numtype)/sizeof(P_numtype2)
450    // instead.
451    BZASSERT(sizeof(P_numtype)%sizeof(P_numtype2)==0);
452
453    TinyVector<diffType, N_rank> stride2;
454    for (int i=0; i < N_rank; ++i)
455      stride2(i) = stride_(i) * sizeof(P_numtype)/sizeof(P_numtype2);
456    const P_numtype2* dataFirst2 = 
457        ((const P_numtype2*)dataFirst()) + componentNumber;
458    return Array<P_numtype2,N_rank>(const_cast<P_numtype2*>(dataFirst2), 
459        length_, stride2, storage_);
460}
461
462/*
463 * These routines reindex the current array to use a new base vector.
464 * The first reindexes the array, the second just returns a reindex view
465 * of the current array, leaving the current array unmodified.
466 * (Contributed by Derrick Bass)
467 */
468template<typename P_numtype, int N_rank>
469_bz_inline2 void Array<P_numtype, N_rank>::reindexSelf(const 
470    TinyVector<int, N_rank>& newBase) 
471{
472  diffType delta = 0;
473    for (int i=0; i < N_rank; ++i)
474      delta += (base(i) - newBase(i)) * stride_(i);
475
476    data_ += delta;
477
478    // WAS: dot(base() - newBase, stride_);
479
480    storage_.setBase(newBase);
481    calculateZeroOffset();
482}
483
484template<typename P_numtype, int N_rank>
485_bz_inline2 Array<P_numtype, N_rank> 
486Array<P_numtype, N_rank>::reindex(const TinyVector<int, N_rank>& newBase) 
487{
488    T_array B(*this);
489    B.reindexSelf(newBase);
490    return B;
491}
492
493BZ_NAMESPACE_END
494
495#endif // BZ_ARRAY_CC
496
Note: See TracBrowser for help on using the repository browser.