Panda3D
pdtoa.cxx
1 /*
2 See pdtoa.h for explanation.
3 
4 Copyright (C) 2014 Milo Yip
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
11 
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
14 
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 THE SOFTWARE.
22 */
23 
24 #include "pdtoa.h"
25 #include "cmath.h"
26 
27 #include <assert.h>
28 #include <math.h>
29 #include <stdint.h>
30 
31 #if defined(_MSC_VER)
32 #include <intrin.h>
33 #include <float.h>
34 #define copysign _copysign
35 #endif
36 
37 #define UINT64_C2(h, l) ((static_cast<uint64_t>(h) << 32) | static_cast<uint64_t>(l))
38 
39 struct DiyFp {
40  DiyFp() {}
41 
42  DiyFp(uint64_t f, int e) : f(f), e(e) {}
43 
44  DiyFp(double d) {
45  union {
46  double d;
47  uint64_t u64;
48  } u = { d };
49 
50  int biased_e = (u.u64 & kDpExponentMask) >> kDpSignificandSize;
51  uint64_t significand = (u.u64 & kDpSignificandMask);
52  if (biased_e != 0) {
53  f = significand + kDpHiddenBit;
54  e = biased_e - kDpExponentBias;
55  }
56  else {
57  f = significand;
58  e = kDpMinExponent + 1;
59  }
60  }
61 
62  DiyFp operator-(const DiyFp& rhs) const {
63  assert(e == rhs.e);
64  assert(f >= rhs.f);
65  return DiyFp(f - rhs.f, e);
66  }
67 
68  DiyFp operator*(const DiyFp& rhs) const {
69 #if defined(_MSC_VER) && defined(_M_AMD64)
70  uint64_t h;
71  uint64_t l = _umul128(f, rhs.f, &h);
72  if (l & (uint64_t(1) << 63)) // rounding
73  h++;
74  return DiyFp(h, e + rhs.e + 64);
75 #elif (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6)) && defined(__x86_64__)
76  unsigned __int128 p = static_cast<unsigned __int128>(f) * static_cast<unsigned __int128>(rhs.f);
77  uint64_t h = p >> 64;
78  uint64_t l = static_cast<uint64_t>(p);
79  if (l & (uint64_t(1) << 63)) // rounding
80  h++;
81  return DiyFp(h, e + rhs.e + 64);
82 #else
83  const uint64_t M32 = 0xFFFFFFFF;
84  const uint64_t a = f >> 32;
85  const uint64_t b = f & M32;
86  const uint64_t c = rhs.f >> 32;
87  const uint64_t d = rhs.f & M32;
88  const uint64_t ac = a * c;
89  const uint64_t bc = b * c;
90  const uint64_t ad = a * d;
91  const uint64_t bd = b * d;
92  uint64_t tmp = (bd >> 32) + (ad & M32) + (bc & M32);
93  tmp += 1U << 31; /// mult_round
94  return DiyFp(ac + (ad >> 32) + (bc >> 32) + (tmp >> 32), e + rhs.e + 64);
95 #endif
96  }
97 
98  DiyFp Normalize() const {
99 #if defined(_MSC_VER) && defined(_M_AMD64)
100  unsigned long index;
101  _BitScanReverse64(&index, f);
102  return DiyFp(f << (63 - index), e - (63 - index));
103 #elif defined(__GNUC__)
104  int s = __builtin_clzll(f);
105  return DiyFp(f << s, e - s);
106 #else
107  DiyFp res = *this;
108  while (!(res.f & kDpHiddenBit)) {
109  res.f <<= 1;
110  res.e--;
111  }
112  res.f <<= (kDiySignificandSize - kDpSignificandSize - 1);
113  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 1);
114  return res;
115 #endif
116  }
117 
118  DiyFp NormalizeBoundary() const {
119 #if defined(_MSC_VER) && defined(_M_AMD64)
120  unsigned long index;
121  _BitScanReverse64(&index, f);
122  return DiyFp (f << (63 - index), e - (63 - index));
123 #else
124  DiyFp res = *this;
125  while (!(res.f & (kDpHiddenBit << 1))) {
126  res.f <<= 1;
127  res.e--;
128  }
129  res.f <<= (kDiySignificandSize - kDpSignificandSize - 2);
130  res.e = res.e - (kDiySignificandSize - kDpSignificandSize - 2);
131  return res;
132 #endif
133  }
134 
135  void NormalizedBoundaries(DiyFp* minus, DiyFp* plus) const {
136  DiyFp pl = DiyFp((f << 1) + 1, e - 1).NormalizeBoundary();
137  DiyFp mi = (f == kDpHiddenBit) ? DiyFp((f << 2) - 1, e - 2) : DiyFp((f << 1) - 1, e - 1);
138  mi.f <<= mi.e - pl.e;
139  mi.e = pl.e;
140  *plus = pl;
141  *minus = mi;
142  }
143 
144  static const int kDiySignificandSize = 64;
145  static const int kDpSignificandSize = 52;
146  static const int kDpExponentBias = 0x3FF + kDpSignificandSize;
147  static const int kDpMinExponent = -kDpExponentBias;
148  static const uint64_t kDpExponentMask = UINT64_C2(0x7FF00000, 0x00000000);
149  static const uint64_t kDpSignificandMask = UINT64_C2(0x000FFFFF, 0xFFFFFFFF);
150  static const uint64_t kDpHiddenBit = UINT64_C2(0x00100000, 0x00000000);
151 
152  uint64_t f;
153  int e;
154 };
155 
156 inline static DiyFp GetCachedPower(int e, int* K) {
157  // 10^-348, 10^-340, ..., 10^340
158  static const uint64_t kCachedPowers_F[] = {
159  UINT64_C2(0xfa8fd5a0, 0x081c0288), UINT64_C2(0xbaaee17f, 0xa23ebf76),
160  UINT64_C2(0x8b16fb20, 0x3055ac76), UINT64_C2(0xcf42894a, 0x5dce35ea),
161  UINT64_C2(0x9a6bb0aa, 0x55653b2d), UINT64_C2(0xe61acf03, 0x3d1a45df),
162  UINT64_C2(0xab70fe17, 0xc79ac6ca), UINT64_C2(0xff77b1fc, 0xbebcdc4f),
163  UINT64_C2(0xbe5691ef, 0x416bd60c), UINT64_C2(0x8dd01fad, 0x907ffc3c),
164  UINT64_C2(0xd3515c28, 0x31559a83), UINT64_C2(0x9d71ac8f, 0xada6c9b5),
165  UINT64_C2(0xea9c2277, 0x23ee8bcb), UINT64_C2(0xaecc4991, 0x4078536d),
166  UINT64_C2(0x823c1279, 0x5db6ce57), UINT64_C2(0xc2109436, 0x4dfb5637),
167  UINT64_C2(0x9096ea6f, 0x3848984f), UINT64_C2(0xd77485cb, 0x25823ac7),
168  UINT64_C2(0xa086cfcd, 0x97bf97f4), UINT64_C2(0xef340a98, 0x172aace5),
169  UINT64_C2(0xb23867fb, 0x2a35b28e), UINT64_C2(0x84c8d4df, 0xd2c63f3b),
170  UINT64_C2(0xc5dd4427, 0x1ad3cdba), UINT64_C2(0x936b9fce, 0xbb25c996),
171  UINT64_C2(0xdbac6c24, 0x7d62a584), UINT64_C2(0xa3ab6658, 0x0d5fdaf6),
172  UINT64_C2(0xf3e2f893, 0xdec3f126), UINT64_C2(0xb5b5ada8, 0xaaff80b8),
173  UINT64_C2(0x87625f05, 0x6c7c4a8b), UINT64_C2(0xc9bcff60, 0x34c13053),
174  UINT64_C2(0x964e858c, 0x91ba2655), UINT64_C2(0xdff97724, 0x70297ebd),
175  UINT64_C2(0xa6dfbd9f, 0xb8e5b88f), UINT64_C2(0xf8a95fcf, 0x88747d94),
176  UINT64_C2(0xb9447093, 0x8fa89bcf), UINT64_C2(0x8a08f0f8, 0xbf0f156b),
177  UINT64_C2(0xcdb02555, 0x653131b6), UINT64_C2(0x993fe2c6, 0xd07b7fac),
178  UINT64_C2(0xe45c10c4, 0x2a2b3b06), UINT64_C2(0xaa242499, 0x697392d3),
179  UINT64_C2(0xfd87b5f2, 0x8300ca0e), UINT64_C2(0xbce50864, 0x92111aeb),
180  UINT64_C2(0x8cbccc09, 0x6f5088cc), UINT64_C2(0xd1b71758, 0xe219652c),
181  UINT64_C2(0x9c400000, 0x00000000), UINT64_C2(0xe8d4a510, 0x00000000),
182  UINT64_C2(0xad78ebc5, 0xac620000), UINT64_C2(0x813f3978, 0xf8940984),
183  UINT64_C2(0xc097ce7b, 0xc90715b3), UINT64_C2(0x8f7e32ce, 0x7bea5c70),
184  UINT64_C2(0xd5d238a4, 0xabe98068), UINT64_C2(0x9f4f2726, 0x179a2245),
185  UINT64_C2(0xed63a231, 0xd4c4fb27), UINT64_C2(0xb0de6538, 0x8cc8ada8),
186  UINT64_C2(0x83c7088e, 0x1aab65db), UINT64_C2(0xc45d1df9, 0x42711d9a),
187  UINT64_C2(0x924d692c, 0xa61be758), UINT64_C2(0xda01ee64, 0x1a708dea),
188  UINT64_C2(0xa26da399, 0x9aef774a), UINT64_C2(0xf209787b, 0xb47d6b85),
189  UINT64_C2(0xb454e4a1, 0x79dd1877), UINT64_C2(0x865b8692, 0x5b9bc5c2),
190  UINT64_C2(0xc83553c5, 0xc8965d3d), UINT64_C2(0x952ab45c, 0xfa97a0b3),
191  UINT64_C2(0xde469fbd, 0x99a05fe3), UINT64_C2(0xa59bc234, 0xdb398c25),
192  UINT64_C2(0xf6c69a72, 0xa3989f5c), UINT64_C2(0xb7dcbf53, 0x54e9bece),
193  UINT64_C2(0x88fcf317, 0xf22241e2), UINT64_C2(0xcc20ce9b, 0xd35c78a5),
194  UINT64_C2(0x98165af3, 0x7b2153df), UINT64_C2(0xe2a0b5dc, 0x971f303a),
195  UINT64_C2(0xa8d9d153, 0x5ce3b396), UINT64_C2(0xfb9b7cd9, 0xa4a7443c),
196  UINT64_C2(0xbb764c4c, 0xa7a44410), UINT64_C2(0x8bab8eef, 0xb6409c1a),
197  UINT64_C2(0xd01fef10, 0xa657842c), UINT64_C2(0x9b10a4e5, 0xe9913129),
198  UINT64_C2(0xe7109bfb, 0xa19c0c9d), UINT64_C2(0xac2820d9, 0x623bf429),
199  UINT64_C2(0x80444b5e, 0x7aa7cf85), UINT64_C2(0xbf21e440, 0x03acdd2d),
200  UINT64_C2(0x8e679c2f, 0x5e44ff8f), UINT64_C2(0xd433179d, 0x9c8cb841),
201  UINT64_C2(0x9e19db92, 0xb4e31ba9), UINT64_C2(0xeb96bf6e, 0xbadf77d9),
202  UINT64_C2(0xaf87023b, 0x9bf0ee6b)
203  };
204  static const int16_t kCachedPowers_E[] = {
205  -1220, -1193, -1166, -1140, -1113, -1087, -1060, -1034, -1007, -980,
206  -954, -927, -901, -874, -847, -821, -794, -768, -741, -715,
207  -688, -661, -635, -608, -582, -555, -529, -502, -475, -449,
208  -422, -396, -369, -343, -316, -289, -263, -236, -210, -183,
209  -157, -130, -103, -77, -50, -24, 3, 30, 56, 83,
210  109, 136, 162, 189, 216, 242, 269, 295, 322, 348,
211  375, 402, 428, 455, 481, 508, 534, 561, 588, 614,
212  641, 667, 694, 720, 747, 774, 800, 827, 853, 880,
213  907, 933, 960, 986, 1013, 1039, 1066
214  };
215 
216  //int k = static_cast<int>(ceil((-61 - e) * 0.30102999566398114)) + 374;
217  double dk = (-61 - e) * 0.30102999566398114 + 347; // dk must be positive, so can do ceiling in positive
218  int k = static_cast<int>(dk);
219  if (k != dk)
220  k++;
221 
222  unsigned index = static_cast<unsigned>((k >> 3) + 1);
223  *K = -(-348 + static_cast<int>(index << 3)); // decimal exponent no need lookup table
224 
225  assert(index < sizeof(kCachedPowers_F) / sizeof(kCachedPowers_F[0]));
226  return DiyFp(kCachedPowers_F[index], kCachedPowers_E[index]);
227 }
228 
229 inline static void GrisuRound(char* buffer, int len, uint64_t delta, uint64_t rest, uint64_t ten_kappa, uint64_t wp_w) {
230  while (rest < wp_w && delta - rest >= ten_kappa &&
231  (rest + ten_kappa < wp_w || /// closer
232  wp_w - rest > rest + ten_kappa - wp_w)) {
233  buffer[len - 1]--;
234  rest += ten_kappa;
235  }
236 }
237 
238 inline static unsigned CountDecimalDigit32(uint32_t n) {
239  // Simple pure C++ implementation was faster than __builtin_clz version in this situation.
240  if (n < 10) return 1;
241  if (n < 100) return 2;
242  if (n < 1000) return 3;
243  if (n < 10000) return 4;
244  if (n < 100000) return 5;
245  if (n < 1000000) return 6;
246  if (n < 10000000) return 7;
247  if (n < 100000000) return 8;
248  if (n < 1000000000) return 9;
249  return 10;
250 }
251 
252 inline static void DigitGen(const DiyFp& W, const DiyFp& Mp, uint64_t delta, char* buffer, int* len, int* K) {
253  static const uint32_t kPow10[] = { 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000, 1000000000, 0, 0, 0, 0, 0 };
254  const DiyFp one(uint64_t(1) << -Mp.e, Mp.e);
255  const DiyFp wp_w = Mp - W;
256  uint32_t p1 = static_cast<uint32_t>(Mp.f >> -one.e);
257  uint64_t p2 = Mp.f & (one.f - 1);
258  int kappa = static_cast<int>(CountDecimalDigit32(p1));
259  *len = 0;
260 
261  while (kappa > 0) {
262  uint32_t d;
263  switch (kappa) {
264  case 10: d = p1 / 1000000000; p1 %= 1000000000; break;
265  case 9: d = p1 / 100000000; p1 %= 100000000; break;
266  case 8: d = p1 / 10000000; p1 %= 10000000; break;
267  case 7: d = p1 / 1000000; p1 %= 1000000; break;
268  case 6: d = p1 / 100000; p1 %= 100000; break;
269  case 5: d = p1 / 10000; p1 %= 10000; break;
270  case 4: d = p1 / 1000; p1 %= 1000; break;
271  case 3: d = p1 / 100; p1 %= 100; break;
272  case 2: d = p1 / 10; p1 %= 10; break;
273  case 1: d = p1; p1 = 0; break;
274  default:
275 #if defined(_MSC_VER)
276  __assume(0);
277 #elif __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)
278  __builtin_unreachable();
279 #else
280  d = 0;
281 #endif
282  }
283  if (d || *len)
284  buffer[(*len)++] = '0' + static_cast<char>(d);
285  kappa--;
286  uint64_t tmp = (static_cast<uint64_t>(p1) << -one.e) + p2;
287  if (tmp <= delta) {
288  *K += kappa;
289  GrisuRound(buffer, *len, delta, tmp, static_cast<uint64_t>(kPow10[kappa]) << -one.e, wp_w.f);
290  return;
291  }
292  }
293 
294  // kappa = 0
295  for (;;) {
296  p2 *= 10;
297  delta *= 10;
298  char d = static_cast<char>(p2 >> -one.e);
299  if (d || *len)
300  buffer[(*len)++] = '0' + d;
301  p2 &= one.f - 1;
302  kappa--;
303  if (p2 < delta) {
304  *K += kappa;
305  GrisuRound(buffer, *len, delta, p2, one.f, wp_w.f * kPow10[-kappa]);
306  return;
307  }
308  }
309 }
310 
311 inline static void Grisu2(double value, char* buffer, int* length, int* K) {
312  const DiyFp v(value);
313  DiyFp w_m, w_p;
314  v.NormalizedBoundaries(&w_m, &w_p);
315 
316  const DiyFp c_mk = GetCachedPower(w_p.e, K);
317  const DiyFp W = v.Normalize() * c_mk;
318  DiyFp Wp = w_p * c_mk;
319  DiyFp Wm = w_m * c_mk;
320  Wm.f++;
321  Wp.f--;
322  DigitGen(W, Wp, Wp.f - Wm.f, buffer, length, K);
323 }
324 
325 static const char cDigitsLut[200] = {
326  '0', '0', '0', '1', '0', '2', '0', '3', '0', '4', '0', '5', '0', '6', '0', '7', '0', '8', '0', '9',
327  '1', '0', '1', '1', '1', '2', '1', '3', '1', '4', '1', '5', '1', '6', '1', '7', '1', '8', '1', '9',
328  '2', '0', '2', '1', '2', '2', '2', '3', '2', '4', '2', '5', '2', '6', '2', '7', '2', '8', '2', '9',
329  '3', '0', '3', '1', '3', '2', '3', '3', '3', '4', '3', '5', '3', '6', '3', '7', '3', '8', '3', '9',
330  '4', '0', '4', '1', '4', '2', '4', '3', '4', '4', '4', '5', '4', '6', '4', '7', '4', '8', '4', '9',
331  '5', '0', '5', '1', '5', '2', '5', '3', '5', '4', '5', '5', '5', '6', '5', '7', '5', '8', '5', '9',
332  '6', '0', '6', '1', '6', '2', '6', '3', '6', '4', '6', '5', '6', '6', '6', '7', '6', '8', '6', '9',
333  '7', '0', '7', '1', '7', '2', '7', '3', '7', '4', '7', '5', '7', '6', '7', '7', '7', '8', '7', '9',
334  '8', '0', '8', '1', '8', '2', '8', '3', '8', '4', '8', '5', '8', '6', '8', '7', '8', '8', '8', '9',
335  '9', '0', '9', '1', '9', '2', '9', '3', '9', '4', '9', '5', '9', '6', '9', '7', '9', '8', '9', '9'
336 };
337 
338 inline void WriteExponent(int K, char* buffer) {
339  if (K < 0) {
340  *buffer++ = '-';
341  K = -K;
342  }
343 
344  if (K >= 100) {
345  *buffer++ = '0' + static_cast<char>(K / 100);
346  K %= 100;
347  const char* d = cDigitsLut + K * 2;
348  *buffer++ = d[0];
349  *buffer++ = d[1];
350  }
351  else if (K >= 10) {
352  const char* d = cDigitsLut + K * 2;
353  *buffer++ = d[0];
354  *buffer++ = d[1];
355  }
356  else
357  *buffer++ = '0' + static_cast<char>(K);
358 
359  *buffer = '\0';
360 }
361 
362 inline static void Prettify(char* buffer, int length, int k) {
363  const int kk = length + k; // 10^(kk-1) <= v < 10^kk
364 
365  if (length <= kk && kk <= 21) {
366  // 1234e7 -> 12340000000
367  for (int i = length; i < kk; i++)
368  buffer[i] = '0';
369  buffer[kk] = '.';
370  buffer[kk + 1] = '0';
371  buffer[kk + 2] = '\0';
372  }
373  else if (0 < kk && kk <= 21) {
374  // 1234e-2 -> 12.34
375  memmove(&buffer[kk + 1], &buffer[kk], length - kk);
376  buffer[kk] = '.';
377  buffer[length + 1] = '\0';
378  }
379  else if (-6 < kk && kk <= 0) {
380  // 1234e-6 -> 0.001234
381  const int offset = 2 - kk;
382  memmove(&buffer[offset], &buffer[0], length);
383  buffer[0] = '0';
384  buffer[1] = '.';
385  for (int i = 2; i < offset; i++)
386  buffer[i] = '0';
387  buffer[length + offset] = '\0';
388  }
389  else if (length == 1) {
390  // 1e30
391  buffer[1] = 'e';
392  WriteExponent(kk - 1, &buffer[2]);
393  }
394  else {
395  // 1234e30 -> 1.234e33
396  memmove(&buffer[2], &buffer[1], length - 1);
397  buffer[1] = '.';
398  buffer[length + 1] = 'e';
399  WriteExponent(kk - 1, &buffer[0 + length + 2]);
400  }
401 }
402 
403 void pdtoa(double value, char *buffer) {
404 #ifdef _MSC_VER
405  if (copysign(1.0, value) < 0) {
406 #else
407  if (signbit(value)) {
408 #endif
409  *buffer++ = '-';
410  value = -value;
411  }
412  if (cinf(value)) {
413  buffer[0] = 'i';
414  buffer[1] = 'n';
415  buffer[2] = 'f';
416  buffer[3] = '\0';
417  } else if (cnan(value)) {
418  buffer[0] = 'n';
419  buffer[1] = 'a';
420  buffer[2] = 'n';
421  buffer[3] = '\0';
422  } else if (value == 0.0) {
423  buffer[0] = '0';
424  buffer[1] = '.';
425  buffer[2] = '0';
426  buffer[3] = '\0';
427  } else if (value == 1.0) {
428  buffer[0] = '1';
429  buffer[1] = '.';
430  buffer[2] = '0';
431  buffer[3] = '\0';
432  } else {
433  int length, K;
434  Grisu2(value, buffer, &length, &K);
435  Prettify(buffer, length, K);
436  }
437 }
Definition: pdtoa.cxx:39