1 | // -*- C++ -*- |
---|
2 | /*************************************************************************** |
---|
3 | * blitz/tuning.h Platform-specific code tuning |
---|
4 | * |
---|
5 | * $Id$ |
---|
6 | * |
---|
7 | * Copyright (C) 1997-2011 Todd Veldhuizen <tveldhui@acm.org> |
---|
8 | * |
---|
9 | * This file is a part of Blitz. |
---|
10 | * |
---|
11 | * Blitz is free software: you can redistribute it and/or modify |
---|
12 | * it under the terms of the GNU Lesser General Public License |
---|
13 | * as published by the Free Software Foundation, either version 3 |
---|
14 | * of the License, or (at your option) any later version. |
---|
15 | * |
---|
16 | * Blitz is distributed in the hope that it will be useful, |
---|
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
---|
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
---|
19 | * GNU Lesser General Public License for more details. |
---|
20 | * |
---|
21 | * You should have received a copy of the GNU Lesser General Public |
---|
22 | * License along with Blitz. If not, see <http://www.gnu.org/licenses/>. |
---|
23 | * |
---|
24 | * Suggestions: blitz-devel@lists.sourceforge.net |
---|
25 | * Bugs: blitz-support@lists.sourceforge.net |
---|
26 | * |
---|
27 | * For more information, please see the Blitz++ Home Page: |
---|
28 | * https://sourceforge.net/projects/blitz/ |
---|
29 | * |
---|
30 | ***************************************************************************/ |
---|
31 | |
---|
32 | #ifndef BZ_TUNING_H |
---|
33 | #define BZ_TUNING_H |
---|
34 | |
---|
35 | // These estimates should be conservative (i.e. underestimate the |
---|
36 | // cache sizes). \todo these can be const ints instead of macros. |
---|
37 | #define BZ_L1_CACHE_ESTIMATED_SIZE 32768 |
---|
38 | #define BZ_L2_CACHE_ESTIMATED_SIZE 6291456 |
---|
39 | // This will work for 32, 16 also |
---|
40 | #define BZ_L1_CACHE_LINE_SIZE 64 |
---|
41 | #define BZ_CACHE_LINES_TO_ALIGN 16 |
---|
42 | |
---|
43 | #undef BZ_PARTIAL_LOOP_UNROLL |
---|
44 | #define BZ_PASS_EXPR_BY_VALUE |
---|
45 | #undef BZ_PTR_INC_FASTER_THAN_INDIRECTION |
---|
46 | #define BZ_MANUAL_VECEXPR_COPY_CONSTRUCTOR |
---|
47 | #undef BZ_KCC_COPY_PROPAGATION_KLUDGE |
---|
48 | #undef BZ_ALTERNATE_FORWARD_BACKWARD_TRAVERSALS |
---|
49 | #undef BZ_ARRAY_EXPR_PASS_INDEX_BY_VALUE |
---|
50 | #define BZ_INLINE_GROUP1 |
---|
51 | #define BZ_INLINE_GROUP2 |
---|
52 | #define BZ_COLLAPSE_LOOPS |
---|
53 | #define BZ_USE_FAST_READ_ARRAY_EXPR |
---|
54 | #define BZ_ARRAY_EXPR_USE_COMMON_STRIDE |
---|
55 | #undef BZ_ARRAY_SPACE_FILLING_TRAVERSAL |
---|
56 | #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL |
---|
57 | #undef BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS |
---|
58 | #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL |
---|
59 | #define BZ_ARRAY_2D_STENCIL_TILING |
---|
60 | #define BZ_ARRAY_2D_STENCIL_TILE_SIZE 128 |
---|
61 | #undef BZ_INTERLACE_ARRAYS |
---|
62 | #define BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY |
---|
63 | #define BZ_FAST_COMPILE |
---|
64 | #define BZ_TV_EVALUATE_UNROLL_LENGTH 0 |
---|
65 | #define BZ_MAX_BITS_FOR_BINARY_UNROLL 8 |
---|
66 | #define BZ_VECTORIZED_LOOP_WIDTH 32 |
---|
67 | |
---|
68 | |
---|
69 | #ifndef BZ_DISABLE_NEW_ET |
---|
70 | #define BZ_NEW_EXPRESSION_TEMPLATES |
---|
71 | #endif |
---|
72 | |
---|
73 | #ifdef BZ_FAST_COMPILE |
---|
74 | #define BZ_ETPARMS_CONSTREF |
---|
75 | #define BZ_NO_INLINE_ET |
---|
76 | #endif |
---|
77 | |
---|
78 | // possibly overridden by specific compilers below |
---|
79 | #define _bz_forceinline inline |
---|
80 | #define _bz_inline_et inline |
---|
81 | |
---|
82 | |
---|
83 | /* |
---|
84 | * Platform-specific tuning |
---|
85 | */ |
---|
86 | |
---|
87 | #ifdef _CRAYT3E |
---|
88 | // The backend compiler on the T3E does a better job of |
---|
89 | // loop unrolling. |
---|
90 | #undef BZ_PARTIAL_LOOP_UNROLL |
---|
91 | #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL |
---|
92 | #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL |
---|
93 | #endif |
---|
94 | |
---|
95 | #ifdef __INTEL_COMPILER |
---|
96 | // icpc does not vectorize the unrolled loop so this is def. bad |
---|
97 | #define BZ_TV_EVALUATE_UNROLL_LENGTH 0 |
---|
98 | |
---|
99 | // defines for inlining |
---|
100 | #undef _bz_forceinline |
---|
101 | #undef _bz_inline_et |
---|
102 | #define _bz_forceinline __forceinline |
---|
103 | #define _bz_inline_et __forceinline |
---|
104 | |
---|
105 | #else // need this since icpc also defines __GNUC__ |
---|
106 | #ifdef __GNUC__ |
---|
107 | // The egcs compiler does a good job of loop unrolling, if |
---|
108 | // -funroll-loops is used. |
---|
109 | #undef BZ_PARTIAL_LOOP_UNROLL |
---|
110 | #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL |
---|
111 | #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL |
---|
112 | #endif |
---|
113 | #endif |
---|
114 | |
---|
115 | #ifdef BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE |
---|
116 | #undef BZ_KCC_COPY_PROPAGATION_KLUDGE |
---|
117 | #endif |
---|
118 | |
---|
119 | #ifdef BZ_INLINE_GROUP1 |
---|
120 | #define _bz_inline1 inline |
---|
121 | #else |
---|
122 | #define _bz_inline1 |
---|
123 | #endif |
---|
124 | |
---|
125 | #ifdef BZ_INLINE_GROUP2 |
---|
126 | #define _bz_inline2 inline |
---|
127 | #else |
---|
128 | #define _bz_inline2 |
---|
129 | #endif |
---|
130 | |
---|
131 | // override definitions above |
---|
132 | #ifdef BZ_NO_INLINE_ET |
---|
133 | #undef _bz_inline_et |
---|
134 | #define _bz_inline_et |
---|
135 | #endif |
---|
136 | |
---|
137 | #ifdef BZ_ETPARMS_CONSTREF |
---|
138 | #define BZ_ETPARM(X) const X& |
---|
139 | #else |
---|
140 | #define BZ_ETPARM(X) X |
---|
141 | #endif |
---|
142 | |
---|
143 | #ifdef __DECCXX |
---|
144 | // The DEC cxx compiler has problems with loop unrolling |
---|
145 | // because of aliasing. Loop unrolling and anti-aliasing |
---|
146 | // is done by Blitz++. |
---|
147 | |
---|
148 | #define BZ_PARTIAL_LOOP_UNROLL |
---|
149 | #define BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS |
---|
150 | #define BZ_ARRAY_STACK_TRAVERSAL_UNROLL |
---|
151 | #endif |
---|
152 | |
---|
153 | /* |
---|
154 | * BZ_NO_PROPAGATE(X) prevents the compiler from performing |
---|
155 | * copy propagation on a variable. This is used for loop |
---|
156 | * unrolling to prevent KAI C++ from rearranging the |
---|
157 | * ordering of memory accesses. |
---|
158 | */ |
---|
159 | |
---|
160 | #define BZ_NO_PROPAGATE(X) X |
---|
161 | |
---|
162 | #ifdef __KCC |
---|
163 | #ifdef BZ_USE_NO_PROPAGATE |
---|
164 | extern "C" int __kai_apply(const char*, ...); |
---|
165 | |
---|
166 | #undef BZ_NO_PROPAGATE(X) |
---|
167 | #define BZ_NO_PROPAGATE(X) __kai_apply("(%a)",&X) |
---|
168 | #endif |
---|
169 | #endif |
---|
170 | |
---|
171 | #endif // BZ_TUNING_H |
---|