forked from ridiculousfish/libdivide
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdivide_by_constants_codegen_reference.c
264 lines (209 loc) · 9.61 KB
/
divide_by_constants_codegen_reference.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
/*
Reference implementations of computing and using the "magic number" approach to dividing
by constants, including codegen instructions. The unsigned division incorporates the
"round down" optimization per ridiculous_fish.
This is free and unencumbered software. Any copyright is dedicated to the Public Domain.
*/
#include <limits.h> //for CHAR_BIT
#include <assert.h>
/* Types used in the computations below. These can be redefined to the types appropriate
for the desired division type (i.e. uint can be defined as unsigned long long).
Note that the uint type is used in compute_signed_magic_info, so the uint type must
not be smaller than the sint type.
*/
typedef unsigned int uint;
typedef signed int sint;
/* Computes "magic info" for performing signed division by a fixed integer D.
The type 'sint' is assumed to be defined as a signed integer type large enough
to hold both the dividend and the divisor.
Here >> is arithmetic (signed) shift, and >>> is logical shift.
To emit code for n/d, rounding towards zero, use the following sequence:
m = compute_signed_magic_info(D)
emit("result = (m.multiplier * n) >> SINT_BITS");
if d > 0 and m.multiplier < 0: emit("result += n")
if d < 0 and m.multiplier > 0: emit("result -= n")
if m.post_shift > 0: emit("result >>= m.shift")
emit("result += (result < 0)")
The shifts by SINT_BITS may be "free" if the high half of the full multiply
is put in a separate register.
The final add can of course be implemented via the sign bit, e.g.
result += (result >>> (SINT_BITS - 1))
or
result -= (result >> (SINT_BITS - 1))
This code is heavily indebted to Hacker's Delight by Henry Warren.
See http://www.hackersdelight.org/HDcode/magic.c.txt
Used with permission from http://www.hackersdelight.org/permissions.htm
*/
struct magics_info {
sint multiplier; // the "magic number" multiplier
unsigned shift; // shift for the dividend after multiplying
};
struct magics_info compute_signed_magic_info(sint D);
/* Computes "magic info" for performing unsigned division by a fixed positive integer D.
The type 'uint' is assumed to be defined as an unsigned integer type large enough
to hold both the dividend and the divisor. num_bits can be set appropriately if n is
known to be smaller than the largest uint; if this is not known then pass
(sizeof(uint) * CHAR_BIT) for num_bits.
Assume we have a hardware register of width UINT_BITS, a known constant D which is
not zero and not a power of 2, and a variable n of width num_bits (which may be
up to UINT_BITS). To emit code for n/d, use one of the two following sequences
(here >>> refers to a logical bitshift):
m = compute_unsigned_magic_info(D, num_bits)
if m.pre_shift > 0: emit("n >>>= m.pre_shift")
if m.increment: emit("n = saturated_increment(n)")
emit("result = (m.multiplier * n) >>> UINT_BITS")
if m.post_shift > 0: emit("result >>>= m.post_shift")
or
m = compute_unsigned_magic_info(D, num_bits)
if m.pre_shift > 0: emit("n >>>= m.pre_shift")
emit("result = m.multiplier * n")
if m.increment: emit("result = result + m.multiplier")
emit("result >>>= UINT_BITS")
if m.post_shift > 0: emit("result >>>= m.post_shift")
The shifts by UINT_BITS may be "free" if the high half of the full multiply
is put in a separate register.
saturated_increment(n) means "increment n unless it would wrap to 0," i.e.
if n == (1 << UINT_BITS)-1: result = n
else: result = n+1
A common way to implement this is with the carry bit. For example, on x86:
add 1
sbb 0
Some invariants:
1: At least one of pre_shift and increment is zero
2: multiplier is never zero
This code incorporates the "round down" optimization per ridiculous_fish.
*/
struct magicu_info {
uint multiplier; // the "magic number" multiplier
unsigned pre_shift; // shift for the dividend before multiplying
unsigned post_shift; //shift for the dividend after multiplying
int increment; // 0 or 1; if set then increment the numerator, using one of the two strategies
};
struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits);
/* Implementations follow */
struct magicu_info compute_unsigned_magic_info(uint D, unsigned num_bits) {
//The numerator must fit in a uint
assert(num_bits > 0 && num_bits <= sizeof(uint) * CHAR_BIT);
// D must be larger than zero and not a power of 2
assert(D & (D-1));
// The eventual result
struct magicu_info result;
// Bits in a uint
const unsigned UINT_BITS = sizeof(uint) * CHAR_BIT;
// The extra shift implicit in the difference between UINT_BITS and num_bits
const unsigned extra_shift = UINT_BITS - num_bits;
// The initial power of 2 is one less than the first one that can possibly work
const uint initial_power_of_2 = (uint)1 << (UINT_BITS-1);
// The remainder and quotient of our power of 2 divided by d
uint quotient = initial_power_of_2 / D, remainder = initial_power_of_2 % D;
// ceil(log_2 D)
unsigned ceil_log_2_D;
// The magic info for the variant "round down" algorithm
uint down_multiplier = 0;
unsigned down_exponent = 0;
int has_magic_down = 0;
// Compute ceil(log_2 D)
ceil_log_2_D = 0;
uint tmp;
for (tmp = D; tmp > 0; tmp >>= 1)
ceil_log_2_D += 1;
// Begin a loop that increments the exponent, until we find a power of 2 that works.
unsigned exponent;
for (exponent = 0; ; exponent++) {
// Quotient and remainder is from previous exponent; compute it for this exponent.
if (remainder >= D - remainder) {
// Doubling remainder will wrap around D
quotient = quotient * 2 + 1;
remainder = remainder * 2 - D;
} else {
// Remainder will not wrap
quotient = quotient * 2;
remainder = remainder * 2;
}
// We're done if this exponent works for the round_up algorithm.
// Note that exponent may be larger than the maximum shift supported,
// so the check for >= ceil_log_2_D is critical.
if ((exponent + extra_shift >= ceil_log_2_D) || (D - remainder) <= ((uint)1 << (exponent + extra_shift)))
break;
// Set magic_down if we have not set it yet and this exponent works for the round_down algorithm
if (! has_magic_down && remainder <= ((uint)1 << (exponent + extra_shift))) {
has_magic_down = 1;
down_multiplier = quotient;
down_exponent = exponent;
}
}
if (exponent < ceil_log_2_D) {
// magic_up is efficient
result.multiplier = quotient + 1;
result.pre_shift = 0;
result.post_shift = exponent;
result.increment = 0;
} else if (D & 1) {
// Odd divisor, so use magic_down, which must have been set
assert(has_magic_down);
result.multiplier = down_multiplier;
result.pre_shift = 0;
result.post_shift = down_exponent;
result.increment = 1;
} else {
// Even divisor, so use a prefix-shifted dividend
unsigned pre_shift = 0;
uint shifted_D = D;
while ((shifted_D & 1) == 0) {
shifted_D >>= 1;
pre_shift += 1;
}
result = compute_unsigned_magic_info(shifted_D, num_bits - pre_shift);
assert(result.increment == 0 && result.pre_shift == 0); //expect no increment or pre_shift in this path
result.pre_shift = pre_shift;
}
return result;
}
struct magics_info compute_signed_magic_info(sint D) {
// D must not be zero and must not be a power of 2 (or its negative)
assert(D != 0 && (D & -D) != D && (D & -D) != -D);
// Our result
struct magics_info result;
// Bits in an sint
const unsigned SINT_BITS = sizeof(sint) * CHAR_BIT;
// Absolute value of D (we know D is not the most negative value since that's a power of 2)
const uint abs_d = (D < 0 ? -D : D);
// The initial power of 2 is one less than the first one that can possibly work
// "two31" in Warren
unsigned exponent = SINT_BITS - 1;
const uint initial_power_of_2 = (uint)1 << exponent;
// Compute the absolute value of our "test numerator,"
// which is the largest dividend whose remainder with d is d-1.
// This is called anc in Warren.
const uint tmp = initial_power_of_2 + (D < 0);
const uint abs_test_numer = tmp - 1 - tmp % abs_d;
// Initialize our quotients and remainders (q1, r1, q2, r2 in Warren)
uint quotient1 = initial_power_of_2 / abs_test_numer, remainder1 = initial_power_of_2 % abs_test_numer;
uint quotient2 = initial_power_of_2 / abs_d, remainder2 = initial_power_of_2 % abs_d;
uint delta;
// Begin our loop
do {
// Update the exponent
exponent++;
// Update quotient1 and remainder1
quotient1 *= 2;
remainder1 *= 2;
if (remainder1 >= abs_test_numer) {
quotient1 += 1;
remainder1 -= abs_test_numer;
}
// Update quotient2 and remainder2
quotient2 *= 2;
remainder2 *= 2;
if (remainder2 >= abs_d) {
quotient2 += 1;
remainder2 -= abs_d;
}
// Keep going as long as (2**exponent) / abs_d <= delta
delta = abs_d - remainder2;
} while (quotient1 < delta || (quotient1 == delta && remainder1 == 0));
result.multiplier = quotient2 + 1;
if (D < 0) result.multiplier = -result.multiplier;
result.shift = exponent - SINT_BITS;
return result;
}