source: XIOS/dev/dev_olga/src/extern/blitz/include/blitz/tuning.h @ 1022

Last change on this file since 1022 was 1022, checked in by mhnguyen, 7 years ago
File size: 4.7 KB
Line 
1// -*- C++ -*-
2/***************************************************************************
3 * blitz/tuning.h      Platform-specific code tuning
4 *
5 * $Id$
6 *
7 * Copyright (C) 1997-2011 Todd Veldhuizen <tveldhui@acm.org>
8 *
9 * This file is a part of Blitz.
10 *
11 * Blitz is free software: you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser General Public License
13 * as published by the Free Software Foundation, either version 3
14 * of the License, or (at your option) any later version.
15 *
16 * Blitz is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 * GNU Lesser General Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser General Public
22 * License along with Blitz.  If not, see <http://www.gnu.org/licenses/>.
23 *
24 * Suggestions:          blitz-devel@lists.sourceforge.net
25 * Bugs:                 blitz-support@lists.sourceforge.net   
26 *
27 * For more information, please see the Blitz++ Home Page:
28 *    https://sourceforge.net/projects/blitz/
29 *
30 ***************************************************************************/
31
32#ifndef BZ_TUNING_H
33#define BZ_TUNING_H
34
35// These estimates should be conservative (i.e. underestimate the
36// cache sizes). \todo these can be const ints instead of macros.
37#define BZ_L1_CACHE_ESTIMATED_SIZE    32768
38#define BZ_L2_CACHE_ESTIMATED_SIZE    6291456
39// This will work for 32, 16 also
40#define BZ_L1_CACHE_LINE_SIZE         64
41#define BZ_CACHE_LINES_TO_ALIGN       16
42
43#undef  BZ_PARTIAL_LOOP_UNROLL
44#define BZ_PASS_EXPR_BY_VALUE
45#undef  BZ_PTR_INC_FASTER_THAN_INDIRECTION
46#define BZ_MANUAL_VECEXPR_COPY_CONSTRUCTOR
47#undef  BZ_KCC_COPY_PROPAGATION_KLUDGE
48#undef  BZ_ALTERNATE_FORWARD_BACKWARD_TRAVERSALS
49#undef  BZ_ARRAY_EXPR_PASS_INDEX_BY_VALUE
50#define BZ_INLINE_GROUP1
51#define BZ_INLINE_GROUP2
52#define BZ_COLLAPSE_LOOPS
53#define BZ_USE_FAST_READ_ARRAY_EXPR
54#define BZ_ARRAY_EXPR_USE_COMMON_STRIDE
55#undef  BZ_ARRAY_SPACE_FILLING_TRAVERSAL
56#undef  BZ_ARRAY_FAST_TRAVERSAL_UNROLL
57#undef  BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS
58#undef  BZ_ARRAY_STACK_TRAVERSAL_UNROLL
59#define BZ_ARRAY_2D_STENCIL_TILING
60#define BZ_ARRAY_2D_STENCIL_TILE_SIZE       128
61#undef  BZ_INTERLACE_ARRAYS
62#define  BZ_ALIGN_BLOCKS_ON_CACHELINE_BOUNDARY
63#define BZ_FAST_COMPILE
64#define BZ_TV_EVALUATE_UNROLL_LENGTH 0
65#define BZ_MAX_BITS_FOR_BINARY_UNROLL 8
66#define BZ_VECTORIZED_LOOP_WIDTH 32
67
68
69#ifndef BZ_DISABLE_NEW_ET
70 #define BZ_NEW_EXPRESSION_TEMPLATES
71#endif
72
73#ifdef BZ_FAST_COMPILE
74#define BZ_ETPARMS_CONSTREF
75#define BZ_NO_INLINE_ET
76#endif
77
78// possibly overridden by specific compilers below
79#define _bz_forceinline inline
80#define _bz_inline_et inline
81
82
83/*
84 * Platform-specific tuning
85 */
86
87#ifdef _CRAYT3E
88 // The backend compiler on the T3E does a better job of
89 // loop unrolling.
90 #undef BZ_PARTIAL_LOOP_UNROLL
91 #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
92 #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL
93#endif
94
95#ifdef __INTEL_COMPILER
96 // icpc does not vectorize the unrolled loop so this is def. bad
97 #define BZ_TV_EVALUATE_UNROLL_LENGTH 0
98
99 // defines for inlining
100 #undef _bz_forceinline
101 #undef _bz_inline_et
102 #define _bz_forceinline __forceinline
103 #define _bz_inline_et __forceinline
104
105#else // need this since icpc also defines __GNUC__
106#ifdef __GNUC__
107 // The egcs compiler does a good job of loop unrolling, if
108 // -funroll-loops is used.
109 #undef BZ_PARTIAL_LOOP_UNROLL
110 #undef BZ_ARRAY_FAST_TRAVERSAL_UNROLL
111 #undef BZ_ARRAY_STACK_TRAVERSAL_UNROLL
112#endif
113#endif
114
115#ifdef  BZ_DISABLE_KCC_COPY_PROPAGATION_KLUDGE
116 #undef BZ_KCC_COPY_PROPAGATION_KLUDGE
117#endif
118
119#ifdef  BZ_INLINE_GROUP1
120 #define _bz_inline1 inline
121#else
122 #define _bz_inline1
123#endif
124
125#ifdef  BZ_INLINE_GROUP2
126 #define _bz_inline2 inline
127#else
128 #define _bz_inline2
129#endif
130
131// override definitions above
132#ifdef  BZ_NO_INLINE_ET
133 #undef _bz_inline_et
134 #define _bz_inline_et
135#endif
136
137#ifdef  BZ_ETPARMS_CONSTREF
138 #define BZ_ETPARM(X) const X&
139#else
140 #define BZ_ETPARM(X) X
141#endif
142
143#ifdef __DECCXX
144 // The DEC cxx compiler has problems with loop unrolling
145 // because of aliasing.  Loop unrolling and anti-aliasing
146 // is done by Blitz++.
147
148  #define  BZ_PARTIAL_LOOP_UNROLL
149  #define  BZ_ARRAY_STACK_TRAVERSAL_CSE_AND_ANTIALIAS
150  #define  BZ_ARRAY_STACK_TRAVERSAL_UNROLL
151#endif
152
153/*
154 * BZ_NO_PROPAGATE(X) prevents the compiler from performing
155 * copy propagation on a variable.  This is used for loop
156 * unrolling to prevent KAI C++ from rearranging the
157 * ordering of memory accesses.
158 */
159
160#define BZ_NO_PROPAGATE(X)   X
161
162#ifdef __KCC
163#ifdef BZ_USE_NO_PROPAGATE
164    extern "C" int __kai_apply(const char*, ...);
165
166    #undef  BZ_NO_PROPAGATE(X)
167    #define BZ_NO_PROPAGATE(X)  __kai_apply("(%a)",&X)
168#endif
169#endif
170
171#endif // BZ_TUNING_H
Note: See TracBrowser for help on using the repository browser.