forked from tud-zih-energy/FIRESTARTER
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig.cfg
227 lines (208 loc) · 6.58 KB
/
config.cfg
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
###############################################################################
# FIRESTARTER - A Processor Stress Test Utility
# Copyright (C) 2019 TU Dresden, Center for Information Services and High
# Performance Computing
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# Contact: [email protected]
###############################################################################
###############################################################################
# this file specifies which functions will be generated
###############################################################################
# the source code (<isa>_functions.c) and templates (<isa>_functions.c.py) for
# each isa have to be placed in the source_files and templates directory,
# respectively
###############################################################################
[VERSION]
major=1
minor=7
# additional information, e.g., "BETA"
info="3 (github)"
# optional features
enable_cuda=1
enable_win64=1
enable_mac=0
[ISA_AVX512]
template= avx512
feature_req= avx512
fallback= func_skl_xeonep_avx512_1t, func_skl_xeonep_avx512_2t
flags = -mavx512f
win64_incl = 1
[ISA_FMA4]
template= fma4
feature_req= fma4
fallback= func_bld_opteron_fma4_1t
flags= -mavx, -mfma4
win64_incl = 1
[ISA_FMA]
template= fma
feature_req= fma
fallback= func_hsw_corei_fma_1t, func_hsw_corei_fma_2t
flags= -mavx, -mfma
win64_incl = 1
[ISA_AVX]
template= avx
feature_req= avx
fallback= func_snb_corei_avx_1t, func_snb_corei_avx_2t
flags = -mavx
win64_incl = 1
[ISA_SSE2]
template= sse2
feature_req= sse2
fallback= func_nhm_corei_sse2_1t, func_nhm_corei_sse2_2t
flags = -msse2
win64_incl = 1
###############################################################################
# supported processors
###############################################################################
# arch/model: used as prefixes for function names in generated code
# threads: generate code sequences for listed numbers of threads
# isa: specifies the template that contains the required code snippets
# cpu_family: cpu family of the processors that may use this code path
# cpu_model: cpu models (within the cpu_family) that use this code path
# buffer_sizes: L1 cache size / L2 cache size / L3 cache size / memory per core
# - sizes are defined per core
# - SMT code paths use size/num_threads_per_core per thread
# lines: minimal number of instruction groups for assembler loops
# - SMT code will use lines/num_threads_per_core per thread
# instr_groups: defines which code snippets are used for each level
# amounts: specifies proportion of accesses to each level
# - sequence is repeated to reach the minimal number of lines
###############################################################################
# Knights Landing
# TODO: MCDRAM support:
# - increase ram_size to 945116501 (64\,GiB/72)
# - cache mode: implicitely use l3_size as in other architectures
# - flat/hybrid: explicitely use l3_size/2 allocated in extra MCDRAM buffer
[Knights_Landing]
arch= knl
model= xeonphi
threads= 4
isa= avx512
cpu_family= 6
cpu_model= 87
buffer_sizes= 32768,524288,236279125,26214400
lines= 1536
instr_groups= RAM_P,L2_S,L1_L,REG
proportion= 3,8,40,10
# Skylake / Kaby Lake desktop
[Skylake]
arch= skl
model= corei
threads= 1,2
isa= fma
cpu_family= 6
cpu_model= 78,94
buffer_sizes= 32768,262144,1572864,104857600
lines= 1536
instr_groups= RAM_L,L3_LS_256,L2_LS_256,L1_2LS_256,REG
proportion= 3,5,18,78,40
# Skylake server
[Skylake-SP]
arch= skl
model= xeonep
threads= 1,2
isa= avx512
cpu_family= 6
cpu_model= 85
buffer_sizes= 32768,1048576,1441792,1048576000
lines= 1536
instr_groups= RAM_S,RAM_P,L3_S,L3_P,L2_S,L2_L,L1_S,L1_L,L1_BROADCAST,REG
proportion= 3,1,1,1,4,70,0,40,120,160
# Haswell/Broadwell desktop
[Haswell]
arch= hsw
model= corei
threads= 1,2
isa= fma
cpu_family= 6
cpu_model= 60,61,69,70,71
buffer_sizes= 32768,262144,1572864,104857600
lines= 1536
instr_groups= RAM_L,L3_LS,L2_LS,L1_LS,REG
proportion= 2,3,9,90,40
# Haswell/Broadwell server
[Haswell-EP]
arch= hsw
model= xeonep
threads= 1,2
isa= fma
cpu_family= 6
cpu_model= 63,79
buffer_sizes= 32768,262144,2621440,104857600
lines= 1536
instr_groups= RAM_L,L3_LS,L2_LS,L1_LS,REG
proportion= 2,1,9,79,35
# Sandy/Ivy Bridge desktop
[Sandy Bridge]
arch= snb
model= corei
threads= 1,2
isa= avx
cpu_family= 6
cpu_model= 42,58
buffer_sizes= 32768,262144,1572864,104857600
lines= 1536
instr_groups= RAM_L,L3_LS,L2_LS,L1_LS,REG
proportion= 2,4,10,90,45
# Sandy/Ivy Bridge server
[Sandy Bridge-EP]
arch= snb
model= xeonep
threads= 1,2
isa= avx
cpu_family= 6
cpu_model= 45,62
buffer_sizes= 32768,262144,2621440,104857600
lines= 1536
instr_groups= RAM_L,L3_LS,L2_LS,L1_LS,REG
proportion= 3,2,10,90,30
# Nehalem/Westmere desktop
[Nehalem]
arch= nhm
model= corei
threads= 1,2
isa= sse2
cpu_family= 6
cpu_model= 30,37,23
buffer_sizes= 32768,262144,1572864,104857600
coverage= 0.5,0.8,0.8,1.0
lines= 1536
instr_groups= RAM_P,L1_LS,REG
proportion= 1,70,2
# Nehalem/Westmere server
[Nehalem-EP]
arch= nhm
model= xeonep
threads= 1,2
isa= sse2
cpu_family= 6
cpu_model= 26,44
buffer_sizes= 32768,262144,2097152,104857600
lines= 1536
instr_groups= RAM_P,L1_LS,REG
proportion= 1,60,2
# Bulldozer/Piledriver
[Bulldozer]
arch= bld
model= opteron
threads= 1
isa= fma4
cpu_family= 21
cpu_model= 1,2,3
buffer_sizes= 16384,1048576,786432,104857600
lines= 1536
instr_groups= RAM_L,L3_L,L2_LS,L1_L,REG
proportion= 1,1,5,90,45