-
Notifications
You must be signed in to change notification settings - Fork 31
/
Copy pathreciprocal_div64.h
68 lines (56 loc) · 1.64 KB
/
reciprocal_div64.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#ifndef RECIPROCAL_DIV64_H
#define RECIPROCAL_DIV64_H
#include <immintrin.h>
#include "helper.h"
/*
* This algorithm is based on the paper "Division by Invariant
* Integers Using Multiplication" by Torbjörn Granlund and Peter
* L. Montgomery.
*
* The assembler implementation from Agner Fog, which this code is
* based on, can be found here:
* http://www.agner.org/optimize/asmlib.zip
*
* This optimization for A/B is helpful if the divisor B is mostly
* runtime invariant. The reciprocal of B is calculated in the
* slow-path with reciprocal_value(). The fast-path can then just use
* a much faster multiplication operation with a variable dividend A
* to calculate the division A/B.
*/
struct reciprocal_value64 {
u64 m;
u8 sh1, sh2;
};
static inline struct reciprocal_value64 reciprocal_value64(u64 d)
{
struct reciprocal_value64 R;
unsigned __int128 m;
int l;
BUILD_BUG_ON((sizeof(unsigned long long)) != (sizeof(u64)));
if (unlikely(d == 1)) {
l = 0;
} else {
l = ((sizeof(unsigned long long)*8)) - __builtin_clzll(d - 1);
}
m = (((unsigned __int128)1 << 64) * ((1ULL << l) - d));
m /= d;
++m;
R.m = (u64)m;
R.sh1 = helpermin(l, 1);
R.sh2 = helpermax(l - 1, 0);
return R;
}
static inline u64 reciprocal_divide64(u64 a, struct reciprocal_value64 R)
{
u64 t = (u64)(((unsigned __int128)a * R.m) >> 64);
return (t + ((a - t) >> R.sh1)) >> R.sh2;
}
static __always_inline uint64_t reciprocal_remainder64(uint64_t A, uint64_t B, struct reciprocal_value64 R)
{
uint64_t div, mod;
div = reciprocal_divide64(A, R);
mod = A - (uint64_t) (div * B);
if (mod >= B) mod -= B;
return mod;
}
#endif /* RECIPROCAL_DIV64_H */