CppCommon  1.0.4.1
C++ Common Library
math.cpp
Go to the documentation of this file.
1 
9 #include "math/math.h"
10 
11 #if defined(_MSC_VER)
12 #include <intrin.h>
13 #endif
14 
15 namespace CppCommon {
16 
17 uint64_t Math::MulDiv64(uint64_t operant, uint64_t multiplier, uint64_t divider)
18 {
19 #if defined(__GNUC__) && defined(__SIZEOF_INT128__)
20  __uint128_t a = operant;
21  __uint128_t b = multiplier;
22  __uint128_t c = divider;
23 
24  return (uint64_t)(a * b / c);
25 #elif defined(_MSC_VER)
26 #if defined(_M_IX86)
27  // Declare 128bit storage
28  struct
29  {
30  unsigned long DW[4];
31  } var128, quotient;
32 
33  // Change semantics for intermediate results for Full Div by renaming the vars
34  #define REMAINDER quotient
35  #define QUOTIENT edi
36 
37  // Save combined sign on stack
38  _asm
39  {
40  mov eax, dword ptr[operant+4]
41  xor eax, dword ptr[multiplier+4]
42  xor eax, dword ptr[divider+4]
43  pushfd
44  }
45 
46  _asm
47  {
48  // First check divider for 0
49  mov eax, dword ptr[divider+4]
50  or eax, dword ptr[divider]
51  jnz dividerOK
52  div eax
53 dividerOK:
54  lea edi,[var128] // edi = &var128
55  // Check multiplier for 1 or 0
56  xor eax, eax
57  cmp eax, dword ptr[multiplier+4]
58  jnz startMUL
59  cmp eax, dword ptr[multiplier]
60  jnz multiNotNUL
61  xor edx, edx
62  popfd // cleanup stack
63  jmp done
64 multiNotNUL:
65  // Set result HI part to 0
66  xor eax,eax
67  mov dword ptr[edi+12], eax
68  mov dword ptr[edi+8], eax
69  mov eax, 1
70  cmp eax, dword ptr[multiplier]
71  jnz smallMUL
72  // Multiplier is 1 so just copy operant to result
73  mov eax, dword ptr[operant+4]
74  mov dword ptr[edi+4], eax
75  mov eax, dword ptr[operant]
76  mov dword ptr[edi], eax
77  jmp startDIV
78 smallMUL:
79  // Test for 32/32 bit multiplication
80  xor eax, eax
81  mov ecx, dword ptr[operant+4]
82  or ecx, eax ;test for both hiwords zero.
83  jnz startMUL
84  // Do 32/32 bit multiplication
85  mov ecx, dword ptr[multiplier]
86  mov eax, dword ptr[operant]
87  mul ecx
88  mov dword ptr[edi+4], edx
89  mov dword ptr[edi], eax
90  jmp startDIV
91 startMUL:
92  // Check signs
93  // Multiply: var128 = operant * multiplier
94  mov eax, dword ptr[multiplier] // eax = LO(multiplier)
95  mul dword ptr[operant] // edx:eax = eax * LO(operant)
96  mov dword ptr[edi], eax // var128.DW0 = eax
97  mov ecx, edx // ecx = edx
98 
99  mov eax, dword ptr[multiplier] // eax = LO(multiplier)
100  mul dword ptr[operant+4] // edx:eax = eax * HI(operant)
101  add eax, ecx // eax = eax + ecx
102  adc edx, 0 // edx = edx + 0 + carry
103  mov ebx, eax
104  mov ecx, edx
105 
106  mov eax, dword ptr[multiplier+4]
107  mul dword ptr[operant]
108  add eax, ebx
109  mov dword ptr[edi+4], eax
110  adc ecx, edx
111  pushfd
112 
113  mov eax, dword ptr[multiplier+4]
114  mul dword ptr[operant+4]
115  popfd
116  adc eax, ecx
117  adc edx, 0
118  mov dword ptr[edi+8], eax
119  mov dword ptr[edi+12], edx
120 startDIV:
121  // Divide: var128 = var128 / divider
122  //
123  // Test divider = 32bit value
124  mov eax, dword ptr[divider+4]
125  cmp eax, 0
126  jnz fullDIV
127  mov ecx, dword ptr[divider]
128  cmp ecx, 1
129  jz applySign
130 
131  // Start 128/32 bit division
132  mov eax, dword ptr[edi+12]
133  xor edx, edx
134  div ecx
135  mov dword ptr[quotient+12], eax
136 
137  mov eax, dword ptr[edi+8]
138  div ecx
139  mov dword ptr[quotient+8], eax
140 
141  mov eax, dword ptr[edi+4]
142  div ecx
143  mov dword ptr[quotient+4], eax
144 
145  mov eax, dword ptr[edi]
146  div ecx
147  mov dword ptr[quotient], eax
148 
149  // Copy the quotient to the result storage (var128)
150  mov eax, dword ptr[quotient+12]
151  mov dword ptr[edi+12], eax
152  mov eax, dword ptr[quotient+8]
153  mov dword ptr[edi+8], eax
154  mov eax, dword ptr[quotient+4]
155  mov dword ptr[edi+4], eax
156  mov eax, dword ptr[quotient]
157  mov dword ptr[edi], eax
158  // To sign correction and return
159  jmp applySign
160 
161 fullDIV:
162  // Full 128/64 bit division
163  xor eax, eax
164  mov dword ptr[REMAINDER+12], eax
165  mov dword ptr[REMAINDER+8], eax
166  mov dword ptr[REMAINDER+4], eax
167  mov dword ptr[REMAINDER], eax
168 
169  mov ecx, 128
170 loop1:
171  // Compute REMAINDER:QUOTIENT = REMAINDER:QUOTIENT shl 1
172  shl dword ptr[QUOTIENT], 1
173  rcl dword ptr[QUOTIENT+4], 1
174  rcl dword ptr[QUOTIENT+8], 1
175  rcl dword ptr[QUOTIENT+12], 1
176  rcl dword ptr[REMAINDER], 1
177  rcl dword ptr[REMAINDER+4], 1
178  rcl dword ptr[REMAINDER+8], 1
179  rcl dword ptr[REMAINDER+12], 1
180 
181  // Test (REMAINDER >= Divider)
182  xor eax, eax
183  cmp dword ptr[REMAINDER+12], eax
184  ja iftrue
185  jb iffalse
186 
187  cmp dword ptr[REMAINDER+8], eax
188  ja iftrue
189  jb iffalse
190 
191  mov eax, dword ptr[REMAINDER+4]
192  cmp eax, dword ptr[divider+4]
193  ja iftrue
194  jb iffalse
195 
196  mov eax, dword ptr[REMAINDER]
197  cmp eax, dword ptr[divider]
198  jb iffalse
199 iftrue:
200  // Remainder = remainder - divider
201  mov eax, dword ptr[divider]
202  sub dword ptr[REMAINDER], eax
203  mov eax, dword ptr[divider+4]
204  sbb dword ptr[REMAINDER+4], eax
205  xor eax, eax
206  sbb dword ptr[REMAINDER+8], eax
207  sbb dword ptr[REMAINDER+12], eax
208  // Quotient = quotient +1
209  add dword ptr[QUOTIENT], 1
210  adc dword ptr[QUOTIENT+4], 0
211  adc dword ptr[QUOTIENT+8], 0
212  adc dword ptr[QUOTIENT+12], 0
213 iffalse:
214  // Loop size = 101 bytes, is less than 127 so loop is possible
215  loop loop1
216 
217 applySign:
218  // Correct the sign of the result based on the stored combined sign
219  popfd
220  jns storeRes
221  not dword ptr[edi+12]
222  not dword ptr[edi+ 8]
223  not dword ptr[edi+ 4]
224  not dword ptr[edi]
225  add dword ptr[edi], 1
226  adc dword ptr[edi+ 4], 0
227  adc dword ptr[edi+ 8], 0
228  adc dword ptr[edi+12], 0
229 
230 storeRES:
231  // Get low order qword from var128
232  mov edx, dword ptr[edi+4]
233  mov eax, dword ptr[edi]
234 done:
235  }
236  // result is returned in edx:eax
237 #elif defined (_M_X64 )
238 #pragma warning(push)
239 #pragma warning(disable: 4244) // C4244: 'conversion' conversion from 'type1' to 'type2', possible loss of data
240  uint64_t a = operant;
241  uint64_t b = multiplier;
242  uint64_t c = divider;
243 
244  // Normalize divisor
245  unsigned long shift;
246  _BitScanReverse64(&shift, c);
247  shift = 63 - shift;
248 
249  c <<= shift;
250 
251  // Multiply
252  a = _umul128(a, b, &b);
253  if (((b << shift) >> shift) != b)
254  {
255  // Overflow
256  return 0xFFFFFFFFFFFFFFFF;
257  }
258  b = __shiftleft128(a, b, shift);
259  a <<= shift;
260 
261  uint32_t div;
262  uint32_t q0, q1;
263  uint64_t t0, t1;
264 
265  // 1st Reduction
266  div = (uint32_t)(c >> 32);
267  t0 = b / div;
268  if (t0 > 0xFFFFFFFF)
269  t0 = 0xFFFFFFFF;
270  q1 = (uint32_t)t0;
271  while (1)
272  {
273  t0 = _umul128(c, (uint64_t)q1 << 32, &t1);
274  if (t1 < b || (t1 == b && t0 <= a))
275  break;
276  q1--;
277  }
278  b -= t1;
279  if (t0 > a)
280  b--;
281  a -= t0;
282 
283  if (b > 0xFFFFFFFF)
284  {
285  // Overflow
286  return 0xFFFFFFFFFFFFFFFF;
287  }
288 
289  // 2nd reduction
290  t0 = ((b << 32) | (a >> 32)) / div;
291  if (t0 > 0xFFFFFFFF)
292  t0 = 0xFFFFFFFF;
293  q0 = (uint32_t)t0;
294 
295  while (1)
296  {
297  t0 = _umul128(c, q0, &t1);
298  if (t1 < b || (t1 == b && t0 <= a))
299  break;
300  q0--;
301  }
302 
303  return ((uint64_t)q1 << 32) | q0;
304 #pragma warning(pop)
305 #endif
306 #else
307  #error MulDiv64 is no supported!
308 #endif
309 }
310 
311 } // namespace CppCommon
static uint64_t MulDiv64(uint64_t operant, uint64_t multiplier, uint64_t divider)
Calculate (operant * multiplier / divider) with 64-bit unsigned integer values.
Definition: math.cpp:17
Math definition.
C++ Common project definitions.
Definition: token_bucket.h:15