1 | /*************************************************************************** |
---|
2 | * blitz/tinyvec.cc Declaration of TinyVector methods |
---|
3 | * |
---|
4 | * $Id$ |
---|
5 | * |
---|
6 | * Copyright (C) 1997-2011 Todd Veldhuizen <tveldhui@acm.org> |
---|
7 | * |
---|
8 | * This file is a part of Blitz. |
---|
9 | * |
---|
10 | * Blitz is free software: you can redistribute it and/or modify |
---|
11 | * it under the terms of the GNU Lesser General Public License |
---|
12 | * as published by the Free Software Foundation, either version 3 |
---|
13 | * of the License, or (at your option) any later version. |
---|
14 | * |
---|
15 | * Blitz is distributed in the hope that it will be useful, |
---|
16 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
17 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
18 | * GNU Lesser General Public License for more details. |
---|
19 | * |
---|
20 | * You should have received a copy of the GNU Lesser General Public |
---|
21 | * License along with Blitz. If not, see <http://www.gnu.org/licenses/>. |
---|
22 | * |
---|
23 | * Suggestions: blitz-devel@lists.sourceforge.net |
---|
24 | * Bugs: blitz-support@lists.sourceforge.net |
---|
25 | * |
---|
26 | * For more information, please see the Blitz++ Home Page: |
---|
27 | * https://sourceforge.net/projects/blitz/ |
---|
28 | * |
---|
29 | ***************************************************************************/ |
---|
30 | |
---|
31 | #ifndef BZ_TMEVALUATE_H |
---|
32 | #define BZ_TMEVALUATE_H |
---|
33 | |
---|
34 | #include <blitz/tinymat2.h> |
---|
35 | #include <blitz/update.h> |
---|
36 | #include <blitz/blitz.h> |
---|
37 | #include <blitz/meta/vecassign.h> |
---|
38 | |
---|
39 | BZ_NAMESPACE(blitz) |
---|
40 | |
---|
41 | |
---|
42 | /** The _tm_evaluator class has a bool template argument that is used |
---|
43 | to select code paths at compile time. */ |
---|
44 | template<bool unroll, int N_rows, int N_columns> |
---|
45 | struct _tm_evaluator { |
---|
46 | |
---|
47 | /** The select_evaluation function redirects expressions that do not |
---|
48 | contains solely TinyMatrix operands to the general evaluation |
---|
49 | function. The generic template (for unroll=false, note that |
---|
50 | "unroll" us the wrong name for this function, the template |
---|
51 | parameter in this context really means "use_full_eval") uses the |
---|
52 | TinyMatrix-only evaluation. Since TinyMatrixs can't have funny |
---|
53 | storage, ordering, stride, or anything, it's now just a matter |
---|
54 | of evaluating it like in the old vecassign. */ |
---|
55 | template<typename T, typename T_expr, typename T_update> |
---|
56 | static _bz_forceinline void |
---|
57 | select_evaluation(TinyMatrix<T, N_rows, N_columns>& dest, |
---|
58 | const T_expr& expr, T_update) { |
---|
59 | |
---|
60 | // since we can't resize tinyvectors, there are two options: all |
---|
61 | // vectors have our size or the expression is malformed. |
---|
62 | // Check that all operands have the same shape |
---|
63 | #ifdef BZ_DEBUG |
---|
64 | if (!expr.shapeCheck(dest.shape())) |
---|
65 | { |
---|
66 | if (assertFailMode == false) |
---|
67 | { |
---|
68 | cerr << "[Blitz++] Shape check failed: Module " << __FILE__ |
---|
69 | << " line " << __LINE__ << endl |
---|
70 | << " Expression: "; |
---|
71 | prettyPrintFormat format(true); // Use terse formatting |
---|
72 | BZ_STD_SCOPE(string) str; |
---|
73 | expr.prettyPrint(str, format); |
---|
74 | cerr << str << endl ; |
---|
75 | } |
---|
76 | } |
---|
77 | #endif |
---|
78 | |
---|
79 | BZPRECHECK(expr.shapeCheck(dest.shape()), |
---|
80 | "Shape check failed." << endl << "Expression:"); |
---|
81 | |
---|
82 | // now call the aligned (unrolled or not) evaluation function |
---|
83 | const bool do_unroll = N_rows*N_columns < BZ_TV_EVALUATE_UNROLL_LENGTH; |
---|
84 | _tm_evaluator<do_unroll, N_rows, N_columns>::evaluate_aligned(dest.data(), expr, T_update()); |
---|
85 | } |
---|
86 | |
---|
87 | /** This version of the evaluation function assumes that the |
---|
88 | TinyMatrixs have appropriate alignment (as will always be the |
---|
89 | case if they are actual TinyMatrix objects and not created using |
---|
90 | reinterpret_cast in the chunked_updater. */ |
---|
91 | template<typename T_numtype, typename T_expr, typename T_update> |
---|
92 | static _bz_forceinline void |
---|
93 | evaluate_aligned(T_numtype* data, const T_expr& expr, T_update) { |
---|
94 | #ifdef BZ_USE_ALIGNMENT_PRAGMAS |
---|
95 | #pragma ivdep |
---|
96 | #pragma vector aligned |
---|
97 | #endif |
---|
98 | for (int i=0; i < N_rows*N_columns; ++i) |
---|
99 | T_update::update(data[i], expr.fastRead(i)); |
---|
100 | } |
---|
101 | |
---|
102 | /** This version of the evaluation function is used when vectorizing |
---|
103 | expressions that we know can't be aligned. The only difference |
---|
104 | with evaluate_aligned is the compiler pragma that tells the |
---|
105 | compiler it is unaligned. */ |
---|
106 | template<typename T_numtype, typename T_expr, typename T_update> |
---|
107 | static _bz_forceinline void |
---|
108 | evaluate_unaligned(T_numtype* data, const T_expr& expr, T_update) { |
---|
109 | #ifdef BZ_USE_ALIGNMENT_PRAGMAS |
---|
110 | #pragma ivdep |
---|
111 | #pragma vector unaligned |
---|
112 | #endif |
---|
113 | for (int i=0; i < N_rows*N_columns; ++i) |
---|
114 | T_update::update(data[i], expr.fastRead(i)); |
---|
115 | } |
---|
116 | }; |
---|
117 | |
---|
118 | /** Specialization of the _tm_evaluator class for false template arguments. */ |
---|
119 | template<int N_rows, int N_columns> |
---|
120 | struct _tm_evaluator<true, N_rows, N_columns> { |
---|
121 | |
---|
122 | /** The false version of select_evaluation is picked for expressions |
---|
123 | that contain operands other than TinyMatrixs. It just redirects |
---|
124 | to the general evaluation function. */ |
---|
125 | template<typename T, typename T_expr, typename T_update> |
---|
126 | static _bz_forceinline void |
---|
127 | select_evaluation(TinyMatrix<T, N_rows, N_columns>& dest, |
---|
128 | const T_expr& expr, T_update) { |
---|
129 | _bz_evaluate(dest, expr, T_update()); |
---|
130 | } |
---|
131 | |
---|
132 | /** This version of the evaluation function assumes that the |
---|
133 | TinyMatrixs have appropriate alignment (as will always be the |
---|
134 | case if they are actual TinyMatrix objects and not created using |
---|
135 | reinterpret_cast in the chunked_updater. */ |
---|
136 | template<typename T_numtype, typename T_expr, typename T_update> |
---|
137 | static _bz_forceinline void |
---|
138 | evaluate_aligned(T_numtype* data, const T_expr& expr, T_update) { |
---|
139 | #ifdef BZ_USE_ALIGNMENT_PRAGMAS |
---|
140 | //#pragma ivdep |
---|
141 | //#pragma vector aligned |
---|
142 | #endif |
---|
143 | _bz_meta_vecAssign<N_rows*N_columns, 0>::fastAssign(data, expr, T_update()); |
---|
144 | } |
---|
145 | |
---|
146 | /** This version of the evaluation function is used when vectorizing |
---|
147 | expressions that we know can't be aligned. The only difference |
---|
148 | with evaluate_aligned is the compiler pragma that tells the |
---|
149 | compiler it is unaligned. */ |
---|
150 | template<typename T_numtype, typename T_expr, typename T_update> |
---|
151 | static _bz_forceinline void |
---|
152 | evaluate_unaligned(T_numtype* data, const T_expr& expr, T_update) { |
---|
153 | //#pragma ivdep |
---|
154 | //#pragma vector unaligned |
---|
155 | _bz_meta_vecAssign<N_rows*N_columns, 0>::fastAssign(data, expr, T_update()); |
---|
156 | } |
---|
157 | }; |
---|
158 | |
---|
159 | |
---|
160 | /** This function selects evaluation path by calling select_evaluation |
---|
161 | with a bool argument which is false if the expression only |
---|
162 | contains TinyMatrix operands. */ |
---|
163 | template<typename P_numtype, int N_rows, int N_columns> |
---|
164 | template<typename T_expr, typename T_update> |
---|
165 | _bz_forceinline |
---|
166 | void |
---|
167 | TinyMatrix<P_numtype,N_rows, N_columns>::_tm_evaluate(const T_expr& expr, T_update) |
---|
168 | { |
---|
169 | const bool mixed_expr = |
---|
170 | (T_expr::numArrayOperands>0) || |
---|
171 | (T_expr::numTVOperands>0) || |
---|
172 | (T_expr::numIndexPlaceholders>0); |
---|
173 | _tm_evaluator<mixed_expr, N_rows, N_columns>::select_evaluation(*this, expr, T_update()); |
---|
174 | } |
---|
175 | |
---|
176 | |
---|
177 | BZ_NAMESPACE_END |
---|
178 | |
---|
179 | #endif // BZ_TVEVALUATE_H |
---|