lexical_write_float/
algorithm.rs

1//! Implementation of the Dragonbox algorithm.
2//!
3//! This is modified from the Rust port of Dragonbox, available
4//! [here](https://github.com/dtolnay/dragonbox). It also uses a direct
5//! port of Dragonbox, available [here](https://github.com/jk-jeon/dragonbox/).
6//!
7//! This is therefore under an Apache 2.0/Boost Software dual-license.
8//!
9//! We use a u64 for the significant digits, even for a 32-bit integer,
10//! however, we use the proper bitshifts, etc. for the float in question,
11//! rather than clobbering the result to f64, as Rust's port does.
12//!
13//! Each one of the algorithms described here has the main implementation,
14//! according to the reference Dragonbox paper, as well as an alias for
15//! our own purposes. The existing algorithms include:
16//!
17//! 1. compute_nearest_normal
18//! 2. compute_nearest_shorter
19//! 3. compute_left_closed_directed
20//! 4. compute_right_closed_directed
21//!
22//! `compute_nearest_normal` and `compute_nearest_shorter` are used for
23//! round-nearest, tie-even and `compute_right_closed_directed` is used
24//! for round-to-zero (see below for details).
25
26#![cfg(not(feature = "compact"))]
27#![doc(hidden)]
28
29use crate::float::{ExtendedFloat80, RawFloat};
30use crate::options::{Options, RoundMode};
31use crate::shared;
32use crate::table::*;
33#[cfg(feature = "f16")]
34use lexical_util::bf16::bf16;
35#[cfg(feature = "f16")]
36use lexical_util::f16::f16;
37use lexical_util::format::{NumberFormat, STANDARD};
38use lexical_util::num::{AsPrimitive, Float};
39use lexical_write_integer::decimal::DigitCount;
40use lexical_write_integer::write::WriteInteger;
41
42/// Optimized float-to-string algorithm for decimal strings.
43///
44/// # Safety
45///
46/// Safe as long as the float isn't special (NaN or Infinity), and `bytes`
47/// is large enough to hold the significant digits.
48#[inline]
49pub unsafe fn write_float<F: RawFloat, const FORMAT: u128>(
50    float: F,
51    bytes: &mut [u8],
52    options: &Options,
53) -> usize {
54    debug_assert!(!float.is_special());
55    debug_assert!(float >= F::ZERO);
56
57    let fp = to_decimal(float);
58    let digit_count = F::digit_count(fp.mant);
59    let sci_exp = fp.exp + digit_count as i32 - 1;
60
61    // Note that for performance reasons, we write the significant digits
62    // later into the algorithms, since we can determine the right path
63    // and write the significant digits without using an intermediate buffer
64    // in most cases.
65
66    write_float!(
67        FORMAT,
68        sci_exp,
69        options,
70        write_float_scientific,
71        write_float_positive_exponent,
72        write_float_negative_exponent,
73        generic => F,
74        args => bytes, fp, sci_exp, options,
75    )
76}
77
78/// Write float to string in scientific notation.
79///
80/// # Safety
81///
82/// Safe as long as `bytes` is large enough to hold the number of digits
83/// and the scientific notation's exponent digits.
84pub unsafe fn write_float_scientific<F: DragonboxFloat, const FORMAT: u128>(
85    bytes: &mut [u8],
86    fp: ExtendedFloat80,
87    sci_exp: i32,
88    options: &Options,
89) -> usize {
90    // Config options.
91    debug_assert_eq!(count_factors(10, fp.mant), 0);
92    let format = NumberFormat::<{ FORMAT }> {};
93    assert!(format.is_valid());
94    let decimal_point = options.decimal_point();
95
96    // Write the significant digits. Write at index 1, so we can shift 1
97    // for the decimal point without intermediate buffers.
98    // SAFETY: safe, if we have enough bytes to write the significant digits.
99    let digits = unsafe { &mut index_unchecked_mut!(bytes[1..]) };
100    let digit_count = unsafe { F::write_digits(digits, fp.mant) };
101
102    // Truncate and round the significant digits.
103    // SAFETY: safe since `digit_count < digits.len()`.
104    let (digit_count, carried) =
105        unsafe { shared::truncate_and_round_decimal(digits, digit_count, options) };
106    let sci_exp = sci_exp + carried as i32;
107
108    // Determine the exact number of digits to write.
109    let exact_count = shared::min_exact_digits(digit_count, options);
110
111    // Write any trailing digits.
112    // SAFETY: safe, if we have enough bytes to write the significant digits.
113    let mut cursor: usize;
114    unsafe {
115        index_unchecked_mut!(bytes[0] = bytes[1]);
116        index_unchecked_mut!(bytes[1]) = decimal_point;
117
118        if !format.no_exponent_without_fraction() && digit_count == 1 && options.trim_floats() {
119            cursor = 1;
120        } else if digit_count < exact_count {
121            // Adjust the number of digits written, by appending zeros.
122            cursor = digit_count + 1;
123            let zeros = exact_count - digit_count;
124            unsafe {
125                slice_fill_unchecked!(index_unchecked_mut!(bytes[cursor..cursor + zeros]), b'0');
126            }
127            cursor += zeros;
128        } else if digit_count == 1 {
129            index_unchecked_mut!(bytes[2]) = b'0';
130            cursor = 3;
131        } else {
132            cursor = digit_count + 1;
133        }
134    }
135
136    // Now, write our scientific notation.
137    // SAFETY: safe since bytes must be large enough to store all digits.
138    unsafe { shared::write_exponent::<FORMAT>(bytes, &mut cursor, sci_exp, options.exponent()) };
139
140    cursor
141}
142
143/// Write negative float to string without scientific notation.
144/// Has a negative exponent (shift right) and no scientific notation.
145///
146/// # Safety
147///
148/// Safe as long as `bytes` is large enough to hold the number of
149/// significant digits and the leading zeros.
150pub unsafe fn write_float_negative_exponent<F: DragonboxFloat, const FORMAT: u128>(
151    bytes: &mut [u8],
152    fp: ExtendedFloat80,
153    sci_exp: i32,
154    options: &Options,
155) -> usize {
156    debug_assert!(sci_exp < 0);
157    debug_assert_eq!(count_factors(10, fp.mant), 0);
158
159    // Config options.
160    let decimal_point = options.decimal_point();
161    let sci_exp = sci_exp.wrapping_neg() as usize;
162
163    // Write our 0 digits.
164    let mut cursor = sci_exp + 1;
165    debug_assert!(cursor >= 2);
166    // SAFETY: safe, if we have enough bytes to write the significant digits.
167    unsafe {
168        // We write 0 digits even over the decimal point, since we might have
169        // to round carry over. This is rare, but it could happen, and would
170        // require a shift after. The good news is: if we have a shift, we
171        // only need to move 1 digit.
172        let digits = &mut index_unchecked_mut!(bytes[..cursor]);
173        slice_fill_unchecked!(digits, b'0');
174    }
175
176    // Write out our significant digits.
177    // SAFETY: safe, if we have enough bytes to write the significant digits.
178    let digits = unsafe { &mut index_unchecked_mut!(bytes[cursor..]) };
179    let digit_count = unsafe { F::write_digits(digits, fp.mant) };
180
181    // Truncate and round the significant digits.
182    // SAFETY: safe since `cursor > 0 && cursor < digits.len()`.
183    debug_assert!(cursor > 0);
184    let (digit_count, carried) =
185        unsafe { shared::truncate_and_round_decimal(digits, digit_count, options) };
186
187    // Handle any trailing digits.
188    let mut trimmed = false;
189    if carried && cursor == 2 {
190        // Rounded-up, and carried to the first byte, so instead of having
191        // 0.9999, we have 1.0.
192        // SAFETY: safe if `bytes.len() >= 3`.
193        unsafe {
194            index_unchecked_mut!(bytes[0]) = b'1';
195            if options.trim_floats() {
196                cursor = 1;
197                trimmed = true;
198            } else {
199                index_unchecked_mut!(bytes[1]) = decimal_point;
200                index_unchecked_mut!(bytes[2]) = b'0';
201                cursor = 3;
202            }
203        }
204    } else if carried {
205        // Carried, so we need to remove 1 zero before our digits.
206        // SAFETY: safe if `bytes.len() > cursor && cursor > 0`.
207        unsafe {
208            index_unchecked_mut!(bytes[1]) = decimal_point;
209            index_unchecked_mut!(bytes[cursor - 1] = bytes[cursor]);
210        }
211    } else {
212        // SAFETY: safe if `bytes.len() >= 2`.
213        unsafe { index_unchecked_mut!(bytes[1]) = decimal_point };
214        cursor += digit_count;
215    }
216
217    // Determine the exact number of digits to write.
218    let exact_count = shared::min_exact_digits(digit_count, options);
219
220    // Write any trailing digits.
221    // Cursor is 1 if we trimmed floats, in which case skip this.
222    if !trimmed && digit_count < exact_count {
223        let zeros = exact_count - digit_count;
224        // SAFETY: safe if bytes is large enough to hold the significant digits.
225        unsafe {
226            slice_fill_unchecked!(index_unchecked_mut!(bytes[cursor..cursor + zeros]), b'0');
227        }
228        cursor += zeros;
229    }
230
231    cursor
232}
233
234/// Write positive float to string without scientific notation.
235/// Has a positive exponent (shift left) and no scientific notation.
236///
237/// # Safety
238///
239/// Safe as long as `bytes` is large enough to hold the number of
240/// significant digits and the (optional) trailing zeros.
241pub unsafe fn write_float_positive_exponent<F: DragonboxFloat, const FORMAT: u128>(
242    bytes: &mut [u8],
243    fp: ExtendedFloat80,
244    sci_exp: i32,
245    options: &Options,
246) -> usize {
247    // Config options.
248    debug_assert!(sci_exp >= 0);
249    debug_assert_eq!(count_factors(10, fp.mant), 0);
250    let decimal_point = options.decimal_point();
251
252    // Write out our significant digits.
253    // Let's be optimistic and try to write without needing to move digits.
254    // This only works if the if the resulting leading digits, or `sci_exp + 1`,
255    // is greater than the written digits. If not, we have to move digits after
256    // and then adjust the decimal point. However, with truncating and remove
257    // trailing zeros, we **don't** know the exact digit count **yet**.
258    // SAFETY: safe, if we have enough bytes to write the significant digits.
259    let digit_count = unsafe { F::write_digits(bytes, fp.mant) };
260    let (mut digit_count, carried) =
261        unsafe { shared::truncate_and_round_decimal(bytes, digit_count, options) };
262
263    // Now, check if we have shift digits.
264    let leading_digits = sci_exp as usize + 1 + carried as usize;
265    let mut cursor: usize;
266    let mut trimmed = false;
267    if leading_digits >= digit_count {
268        // Great: we have more leading digits than we wrote, can write trailing zeros
269        // and an optional decimal point.
270        // SAFETY: safe if the buffer is large enough to hold the significant digits.
271        unsafe {
272            let digits = &mut index_unchecked_mut!(bytes[digit_count..leading_digits]);
273            slice_fill_unchecked!(digits, b'0');
274        }
275        cursor = leading_digits;
276        digit_count = leading_digits;
277        // Only write decimal point if we're not trimming floats.
278        if !options.trim_floats() {
279            // SAFETY: safe if `bytes.len() >= cursor + 2`.
280            unsafe { index_unchecked_mut!(bytes[cursor]) = decimal_point };
281            cursor += 1;
282            unsafe { index_unchecked_mut!(bytes[cursor]) = b'0' };
283            cursor += 1;
284            digit_count += 1;
285        } else {
286            trimmed = true;
287        }
288    } else {
289        // Need to shift digits internally, and write the decimal point.
290        // First, move the digits right by 1 after leading digits.
291        // SAFETY: safe if the buffer is large enough to hold the significant digits.
292        let count = digit_count - leading_digits;
293        unsafe {
294            let buf = &mut index_unchecked_mut!(bytes[leading_digits..digit_count + 1]);
295            safe_assert!(buf.len() > count);
296            for i in (0..count).rev() {
297                index_unchecked_mut!(buf[i + 1] = buf[i]);
298            }
299        }
300
301        // Now, write the decimal point.
302        // SAFETY: safe if the above step was safe, since `leading_digits < digit_count`.
303        unsafe { index_unchecked_mut!(bytes[leading_digits]) = decimal_point };
304        cursor = digit_count + 1;
305    }
306
307    // Determine the exact number of digits to write.
308    // Don't worry if we carried: we cannot write **MORE** digits if we've
309    // already previously truncated the input.
310    let exact_count = shared::min_exact_digits(digit_count, options);
311
312    // Change the number of digits written, if we need to add more or trim digits.
313    if !trimmed && exact_count > digit_count {
314        // Check if we need to write more trailing digits.
315        let zeros = exact_count - digit_count;
316        // SAFETY: safe if the buffer is large enough to hold the significant digits.
317        unsafe {
318            let digits = &mut index_unchecked_mut!(bytes[cursor..cursor + zeros]);
319            slice_fill_unchecked!(digits, b'0');
320        }
321        cursor += zeros;
322    }
323
324    cursor
325}
326
327// ALGORITHM
328// ---------
329
330/// Get an extended representation of the decimal float.
331///
332/// The returned float has a decimal exponent, and the significant digits
333/// returned to the nearest mantissa. For example, `1.5f32` will return
334/// `ExtendedFloat80 { mant: 15, exp: -1 }`, although trailing zeros
335/// might not be removed.
336///
337/// This algorithm **only** fails when `float == 0.0`, and we want to
338/// short-circuit anyway.
339#[inline]
340pub fn to_decimal<F: RawFloat>(float: F) -> ExtendedFloat80 {
341    let bits = float.to_bits();
342    let mantissa_bits = bits & F::MANTISSA_MASK;
343
344    if (bits & !F::SIGN_MASK).as_u64() == 0 {
345        return extended_float(0, 0);
346    }
347
348    // Shorter interval case; proceed like Schubfach.
349    // One might think this condition is wrong, since when exponent_bits == 1
350    // and two_fc == 0, the interval is actullay regular. However, it turns out
351    // that this seemingly wrong condition is actually fine, because the end
352    // result is anyway the same.
353    //
354    // [binary32]
355    // (fc-1/2) * 2^e = 1.175'494'28... * 10^-38
356    // (fc-1/4) * 2^e = 1.175'494'31... * 10^-38
357    //    fc    * 2^e = 1.175'494'35... * 10^-38
358    // (fc+1/2) * 2^e = 1.175'494'42... * 10^-38
359    //
360    // Hence, shorter_interval_case will return 1.175'494'4 * 10^-38.
361    // 1.175'494'3 * 10^-38 is also a correct shortest representation that will
362    // be rejected if we assume shorter interval, but 1.175'494'4 * 10^-38 is
363    // closer to the true value so it doesn't matter.
364    //
365    // [binary64]
366    // (fc-1/2) * 2^e = 2.225'073'858'507'201'13... * 10^-308
367    // (fc-1/4) * 2^e = 2.225'073'858'507'201'25... * 10^-308
368    //    fc    * 2^e = 2.225'073'858'507'201'38... * 10^-308
369    // (fc+1/2) * 2^e = 2.225'073'858'507'201'63... * 10^-308
370    //
371    // Hence, shorter_interval_case will return 2.225'073'858'507'201'4 *
372    // 10^-308. This is indeed of the shortest length, and it is the unique one
373    // closest to the true value among valid representations of the same length.
374
375    // Toward zero case:
376    //
377    // What we need is a compute-nearest, but with truncated digits in the
378    // truncated case. Note that we don't need the left-closed direct
379    // rounding case of I = [w,w+), or right-closed directed rounding
380    // case of I = (w−,w], since these produce the shortest intervals for
381    // a **float parser** assuming the rounding of the float-parser.
382    // The left-directed case assumes the float parser will round-down,
383    // while the right-directed case assumed the float parser will round-up.
384    //
385    // A few examples of this behavior is described here:
386    //    **compute_nearest_normal**
387    //
388    //    - `1.23456 => (123456, -5)` for binary32.
389    //    - `1.23456 => (123456, -5)` for binary64.
390    //    - `13.9999999999999982236431606 => (13999999999999998, -15)` for binary64.
391    //
392    //     **compute_left_closed_directed**
393    //
394    //    - `1.23456 => (12345601, -7)` for binary32.
395    //    - `1.23456 => (12345600000000002, -16)` for binary64.
396    //    - `13.9999999999999982236431606 => (13999999999999999, -15)` for binary64.
397    //
398    //     **compute_right_closed_directed**
399    //
400    //    - `1.23456 => (123456, -5)` for binary32.
401    //    - `1.23456 => (123456, -5)` for binary64.
402    //    - `13.9999999999999982236431606 => (13999999999999982, -15)` for binary64.
403
404    if mantissa_bits.as_u64() == 0 {
405        compute_round_short(float)
406    } else {
407        compute_round(float)
408    }
409}
410
411/// Compute for a simple case when rounding nearest, tie-even.
412#[inline(always)]
413pub fn compute_round_short<F: RawFloat>(float: F) -> ExtendedFloat80 {
414    compute_nearest_shorter(float)
415}
416
417/// Compute for a non-simple case when rounding nearest, tie-even.
418#[inline(always)]
419pub fn compute_round<F: RawFloat>(float: F) -> ExtendedFloat80 {
420    compute_nearest_normal(float)
421}
422
423/// Compute the interval I = [m−w,m+w] if even, otherwise, (m−w,m+w).
424/// This is the simple case for a finite number where only the hidden bit is set.
425pub fn compute_nearest_shorter<F: RawFloat>(float: F) -> ExtendedFloat80 {
426    // Compute k and beta.
427    let exponent = float.exponent();
428    let minus_k = floor_log10_pow2_minus_log10_4_over_3(exponent);
429    let beta = exponent + floor_log2_pow10(-minus_k);
430
431    // Compute xi and zi.
432    // SAFETY: safe, since value must be finite and therefore in the correct range.
433    let pow5 = unsafe { F::dragonbox_power(-minus_k) };
434    let mut xi = F::compute_left_endpoint(&pow5, beta);
435    let mut zi = F::compute_right_endpoint(&pow5, beta);
436
437    // Get the interval type.
438    // Must be Round since we only use compute_round with a round-nearest direction.
439    let interval_type = IntervalType::Closed;
440
441    // If we don't accept the right endpoint and if the right endpoint is an
442    // integer, decrease it.
443    if !interval_type.include_right_endpoint() && is_right_endpoint::<F>(exponent) {
444        zi -= 1;
445    }
446
447    // If the left endpoint is not an integer, increase it.
448    if !(interval_type.include_left_endpoint() && is_left_endpoint::<F>(exponent)) {
449        xi += 1;
450    }
451
452    // Try bigger divisor.
453    let significand = zi / 10;
454
455    // If succeed, remove trailing zeros if necessary and return.
456    if significand * 10 >= xi {
457        let (mant, exp) = F::process_trailing_zeros(significand, minus_k + 1);
458        return extended_float(mant, exp);
459    }
460
461    // Otherwise, compute the round-up of y.
462    let mut significand = F::compute_round_up(&pow5, beta);
463
464    // When tie occurs, choose one of them according to the rule.
465    let bits: i32 = F::MANTISSA_SIZE;
466    let lower_threshold: i32 = -floor_log5_pow2_minus_log5_3(bits + 4) - 2 - bits;
467    let upper_threshold: i32 = -floor_log5_pow2(bits + 2) - 2 - bits;
468
469    let round_down = RoundMode::Round.prefer_round_down(significand);
470    if round_down && exponent >= lower_threshold && exponent <= upper_threshold {
471        significand -= 1;
472    } else if significand < xi {
473        significand += 1;
474    }
475
476    // Ensure we haven't re-assigned exponent or minus_k, since this
477    // is a massive potential security vulnerability.
478    debug_assert!(float.exponent() == exponent);
479    debug_assert!(minus_k == floor_log10_pow2_minus_log10_4_over_3(exponent));
480
481    extended_float(significand, minus_k)
482}
483
484/// Compute the interval I = [m−w,m+w] if even, otherwise, (m−w,m+w).
485/// This is the normal case for a finite number with non-zero significant digits.
486#[allow(clippy::comparison_chain)]
487pub fn compute_nearest_normal<F: RawFloat>(float: F) -> ExtendedFloat80 {
488    let mantissa = float.mantissa().as_u64();
489    let exponent = float.exponent();
490    let is_even = mantissa % 2 == 0;
491
492    // Step 1: Schubfach multiplier calculation
493    // Compute k and beta.
494    let minus_k = floor_log10_pow2(exponent) - F::KAPPA as i32;
495    // SAFETY: safe, since value must be finite and therefore in the correct range.
496    let pow5 = unsafe { F::dragonbox_power(-minus_k) };
497    let beta = exponent + floor_log2_pow10(-minus_k);
498
499    // Compute zi and deltai.
500    // 10^kappa <= deltai < 10^(kappa + 1)
501    let two_fc = mantissa << 1;
502    let deltai = F::compute_delta(&pow5, beta);
503    // For the case of binary32, the result of integer check is not correct for
504    // 29711844 * 2^-82
505    // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18
506    // and 29711844 * 2^-81
507    // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17,
508    // and they are the unique counterexamples. However, since 29711844 is even,
509    // this does not cause any problem for the endpoints calculations; it can only
510    // cause a problem when we need to perform integer check for the center.
511    // Fortunately, with these inputs, that branch is never executed, so we are fine.
512    let (zi, is_z_integer) = F::compute_mul((two_fc | 1) << beta, &pow5);
513
514    // Step 2: Try larger divisor; remove trailing zeros if necessary
515    let big_divisor = pow32(10, F::KAPPA + 1);
516    let small_divisor = pow32(10, F::KAPPA);
517
518    // Using an upper bound on zi, we might be able to optimize the division
519    // better than the compiler; we are computing zi / big_divisor here.
520    let exp = F::KAPPA + 1;
521    let n_max = (1 << (F::MANTISSA_SIZE + 1)) * big_divisor as u64 - 1;
522    let mut significand = F::divide_by_pow10(zi, exp, n_max);
523    let mut r = (zi - (big_divisor as u64).wrapping_mul(significand)) as u32;
524
525    // Get the interval type.
526    // Must be Round since we only use compute_round with a round-nearest direction.
527    let interval_type = IntervalType::Symmetric(is_even);
528
529    // Check for short-circuit.
530    // We use this, since the `goto` statements in dragonbox are unidiomatic
531    // in Rust and lead to unmaintainable code. Using a simple closure is much
532    // simpler, however, we do store a boolean in some cases to determine
533    // if we need to short-circuit.
534    let mut should_short_circuit = true;
535    if r < deltai {
536        // Exclude the right endpoint if necessary.
537        let include_right = interval_type.include_right_endpoint();
538        if r == 0 && !include_right && is_z_integer {
539            significand -= 1;
540            r = big_divisor;
541            should_short_circuit = false;
542        }
543    } else if r > deltai {
544        should_short_circuit = false;
545    } else {
546        // r == deltai; compare fractional parts.
547        // Due to the more complex logic in the new dragonbox algorithm,
548        // it's much easier logically to store if we should short circuit,
549        // the default, and only mark
550        let two_fl = two_fc - 1;
551        let include_left = interval_type.include_left_endpoint();
552
553        if !include_left || exponent < F::FC_PM_HALF_LOWER || exponent > F::DIV_BY_5_THRESHOLD {
554            // If the left endpoint is not included, the condition for
555            // success is z^(f) < delta^(f) (odd parity).
556            // Otherwise, the inequalities on exponent ensure that
557            // x is not an integer, so if z^(f) >= delta^(f) (even parity), we in fact
558            // have strict inequality.
559            let parity = F::compute_mul_parity(two_fl, &pow5, beta).0;
560            if !parity {
561                should_short_circuit = false;
562            }
563        } else {
564            let (xi_parity, x_is_integer) = F::compute_mul_parity(two_fl, &pow5, beta);
565            if !xi_parity && !x_is_integer {
566                should_short_circuit = false
567            }
568        }
569    }
570
571    if should_short_circuit {
572        // Short-circuit case.
573        let (mant, exp) = F::process_trailing_zeros(significand, minus_k + F::KAPPA as i32 + 1);
574        extended_float(mant, exp)
575    } else {
576        // Step 3: Find the significand with the smaller divisor
577        significand *= 10;
578
579        let dist = r - (deltai / 2) + (small_divisor / 2);
580        let approx_y_parity = ((dist ^ (small_divisor / 2)) & 1) != 0;
581
582        // Is dist divisible by 10^kappa?
583        let (dist, is_dist_div_by_kappa) = F::check_div_pow10(dist);
584
585        // Add dist / 10^kappa to the significand.
586        significand += dist as u64;
587
588        if is_dist_div_by_kappa {
589            // Check z^(f) >= epsilon^(f).
590            // We have either yi == zi - epsiloni or yi == (zi - epsiloni) - 1,
591            // where yi == zi - epsiloni if and only if z^(f) >= epsilon^(f).
592            // Since there are only 2 possibilities, we only need to care about the
593            // parity. Also, zi and r should have the same parity since the divisor is
594            // an even number.
595            let (yi_parity, is_y_integer) = F::compute_mul_parity(two_fc, &pow5, beta);
596            let round_down = RoundMode::Round.prefer_round_down(significand);
597
598            if yi_parity != approx_y_parity || (is_y_integer && round_down) {
599                // If z^(f) >= epsilon^(f), we might have a tie
600                // when z^(f) == epsilon^(f), or equivalently, when y is an integer.
601                // For tie-to-up case, we can just choose the upper one.
602                //significand -= 1;
603                significand -= 1;
604            }
605        }
606
607        // Ensure we haven't re-assigned exponent or minus_k, since this
608        // is a massive potential security vulnerability.
609        debug_assert!(float.exponent() == exponent);
610        debug_assert!(minus_k == floor_log10_pow2(exponent) - F::KAPPA as i32);
611
612        extended_float(significand, minus_k + F::KAPPA as i32)
613    }
614}
615
616/// Compute the interval I = [w,w+).
617#[allow(clippy::comparison_chain)]
618pub fn compute_left_closed_directed<F: RawFloat>(float: F) -> ExtendedFloat80 {
619    let mantissa = float.mantissa().as_u64();
620    let exponent = float.exponent();
621
622    // Step 1: Schubfach multiplier calculation
623    // Compute k and beta.
624    let minus_k = floor_log10_pow2(exponent) - F::KAPPA as i32;
625    // SAFETY: safe, since value must be finite and therefore in the correct range.
626    let pow5 = unsafe { F::dragonbox_power(-minus_k) };
627    let beta = exponent + floor_log2_pow10(-minus_k);
628
629    // Compute zi and deltai.
630    // 10^kappa <= deltai < 10^(kappa + 1)
631    let two_fc = mantissa << 1;
632    let deltai = F::compute_delta(&pow5, beta);
633    let (mut xi, mut is_x_integer) = F::compute_mul(two_fc << beta, &pow5);
634
635    // Deal with the unique exceptional cases
636    // 29711844 * 2^-82
637    // = 6.1442653300000000008655037797566933477355632930994033813476... * 10^-18
638    // and 29711844 * 2^-81
639    // = 1.2288530660000000001731007559513386695471126586198806762695... * 10^-17
640    // for binary32.
641    if F::BITS == 32 && exponent <= -80 {
642        is_x_integer = false;
643    }
644
645    if !is_x_integer {
646        xi += 1;
647    }
648
649    // Step 2: Try larger divisor; remove trailing zeros if necessary
650    let big_divisor = pow32(10, F::KAPPA + 1);
651
652    // Using an upper bound on xi, we might be able to optimize the division
653    // better than the compiler; we are computing xi / big_divisor here.
654    let exp = F::KAPPA + 1;
655    let n_max = (1 << (F::MANTISSA_SIZE + 1)) * big_divisor as u64 - 1;
656    let mut significand = F::divide_by_pow10(xi, exp, n_max);
657    let mut r = (xi - (big_divisor as u64).wrapping_mul(significand)) as u32;
658
659    if r != 0 {
660        significand += 1;
661        r = big_divisor - r;
662    }
663
664    // Check for short-circuit.
665    // We use this, since the `goto` statements in dragonbox are unidiomatic
666    // in Rust and lead to unmaintainable code. Using a simple closure is much
667    // simpler, however, we do store a boolean in some cases to determine
668    // if we need to short-circuit.
669    let mut should_short_circuit = true;
670    if r > deltai {
671        should_short_circuit = false;
672    } else if r == deltai {
673        // Compare the fractional parts.
674        // This branch is never taken for the exceptional cases
675        // 2f_c = 29711482, e = -81
676        // (6.1442649164096937243516663440523473127541365101933479309082... * 10^-18)
677        // and 2f_c = 29711482, e = -80
678        // (1.2288529832819387448703332688104694625508273020386695861816... * 10^-17).
679        let (zi_parity, is_z_integer) = F::compute_mul_parity(two_fc + 2, &pow5, beta);
680        if zi_parity || is_z_integer {
681            should_short_circuit = false;
682        }
683    }
684
685    if should_short_circuit {
686        let (mant, exp) = F::process_trailing_zeros(significand, minus_k + F::KAPPA as i32 + 1);
687        extended_float(mant, exp)
688    } else {
689        // Step 3: Find the significand with the smaller divisor
690        significand *= 10;
691        significand -= F::div_pow10(r) as u64;
692
693        // Ensure we haven't re-assigned exponent or minus_k, since this
694        // is a massive potential security vulnerability.
695        debug_assert!(float.exponent() == exponent);
696        debug_assert!(minus_k == floor_log10_pow2(exponent) - F::KAPPA as i32);
697
698        extended_float(significand, minus_k + F::KAPPA as i32)
699    }
700}
701
702/// Compute the interval I = (w−,w]..
703#[allow(clippy::comparison_chain, clippy::if_same_then_else)]
704pub fn compute_right_closed_directed<F: RawFloat>(float: F, shorter: bool) -> ExtendedFloat80 {
705    let mantissa = float.mantissa().as_u64();
706    let exponent = float.exponent();
707
708    // Step 1: Schubfach multiplier calculation
709    // Compute k and beta.
710    let minus_k = floor_log10_pow2(exponent - shorter as i32) - F::KAPPA as i32;
711    // SAFETY: safe, since value must be finite and therefore in the correct range.
712    let pow5 = unsafe { F::dragonbox_power(-minus_k) };
713    let beta = exponent + floor_log2_pow10(-minus_k);
714
715    // Compute zi and deltai.
716    // 10^kappa <= deltai < 10^(kappa + 1)
717    let two_fc = mantissa << 1;
718    let deltai = F::compute_delta(&pow5, beta - shorter as i32);
719    let zi = F::compute_mul(two_fc << beta, &pow5).0;
720
721    // Step 2: Try larger divisor; remove trailing zeros if necessary
722    let big_divisor = pow32(10, F::KAPPA + 1);
723
724    // Using an upper bound on zi, we might be able to optimize the division better than
725    // the compiler; we are computing zi / big_divisor here.
726    let exp = F::KAPPA + 1;
727    let n_max = (1 << (F::MANTISSA_SIZE + 1)) * big_divisor as u64 - 1;
728    let mut significand = F::divide_by_pow10(zi, exp, n_max);
729    let r = (zi - (big_divisor as u64).wrapping_mul(significand)) as u32;
730
731    // Check for short-circuit.
732    // We use this, since the `goto` statements in dragonbox are unidiomatic
733    // in Rust and lead to unmaintainable code. Using a simple closure is much
734    // simpler, however, we do store a boolean in some cases to determine
735    // if we need to short-circuit.
736    let mut should_short_circuit = true;
737    if r > deltai {
738        should_short_circuit = false;
739    } else if r == deltai {
740        // Compare the fractional parts.
741        let two_f = two_fc
742            - if shorter {
743                1
744            } else {
745                2
746            };
747        if !F::compute_mul_parity(two_f, &pow5, beta).0 {
748            should_short_circuit = false;
749        }
750    }
751
752    if should_short_circuit {
753        let (mant, exp) = F::process_trailing_zeros(significand, minus_k + F::KAPPA as i32 + 1);
754        extended_float(mant, exp)
755    } else {
756        // Step 3: Find the significand with the smaller divisor
757        significand *= 10;
758        significand -= F::div_pow10(r) as u64;
759
760        // Ensure we haven't re-assigned exponent or minus_k, since this
761        // is a massive potential security vulnerability.
762        debug_assert!(float.exponent() == exponent);
763        debug_assert!(minus_k == floor_log10_pow2(exponent - shorter as i32) - F::KAPPA as i32);
764
765        extended_float(significand, minus_k + F::KAPPA as i32)
766    }
767}
768
769// DIGITS
770// ------
771
772//  NOTE:
773//      Dragonbox has a heavily-branched, dubiously optimized algorithm using
774//      fast division, that leads to no practical performance benefits in my
775//      benchmarks, and the division algorithm is at best ~3% faster. It also
776//      tries to avoid writing digits extensively, but requires division operations
777//      for each step regardless, which means the **actual** overhead of said
778//      branching likely exceeds any benefits. The code is also impossible to
779//      maintain, and in my benchmarks is slower (by a small amount) for
780//      a 32-bit mantissa, and a **lot** slower for a 64-bit mantissa,
781//      where we need to trim trailing zeros.
782
783/// Write the significant digits, when the significant digits can fit in a
784/// 32-bit integer. Returns the number of digits written. This assumes any
785/// trailing zeros have been removed.
786///
787/// # Safety
788///
789/// Safe if `bytes.len() >= 10`, since `u32::MAX` is at most 10 digits.
790#[inline]
791pub unsafe fn write_digits_u32(bytes: &mut [u8], mantissa: u32) -> usize {
792    debug_assert!(bytes.len() >= 10);
793    unsafe { mantissa.write_mantissa::<u32, { STANDARD }>(bytes) }
794}
795
796/// Write the significant digits, when the significant digits cannot fit in a
797/// 32-bit integer. Returns the number of digits written. Note that this
798/// might not be the same as the number of digits in the mantissa, since
799/// trailing zeros will be removed.
800///
801/// # Safety
802///
803/// Safe if `bytes.len() >= 20`, since `u64::MAX` is at most 20 digits.
804#[inline]
805#[allow(clippy::branches_sharing_code)]
806pub unsafe fn write_digits_u64(bytes: &mut [u8], mantissa: u64) -> usize {
807    debug_assert!(bytes.len() >= 20);
808    unsafe { mantissa.write_mantissa::<u64, { STANDARD }>(bytes) }
809}
810
811// EXTENDED
812// --------
813
814/// Create extended float from significant digits and exponent.
815#[inline(always)]
816pub const fn extended_float(mant: u64, exp: i32) -> ExtendedFloat80 {
817    ExtendedFloat80 {
818        mant,
819        exp,
820    }
821}
822
823// COMPUTE
824// -------
825
826#[inline(always)]
827pub const fn floor_log2(mut n: u64) -> i32 {
828    let mut count = -1;
829    while n != 0 {
830        count += 1;
831        n >>= 1;
832    }
833    count
834}
835
836#[inline(always)]
837pub const fn is_endpoint(exponent: i32, lower: i32, upper: i32) -> bool {
838    exponent >= lower && exponent <= upper
839}
840
841#[inline(always)]
842pub fn is_right_endpoint<F: Float>(exponent: i32) -> bool {
843    let lower_threshold = 0;
844    let factors = count_factors(5, (1u64 << (F::MANTISSA_SIZE + 1)) + 1) + 1;
845    let upper_threshold = 2 + floor_log2(pow64(10, factors) / 3);
846    is_endpoint(exponent, lower_threshold, upper_threshold)
847}
848
849#[inline(always)]
850pub fn is_left_endpoint<F: Float>(exponent: i32) -> bool {
851    let lower_threshold = 2;
852    let factors = count_factors(5, (1u64 << (F::MANTISSA_SIZE + 2)) - 1) + 1;
853    let upper_threshold = 2 + floor_log2(pow64(10, factors) / 3);
854    is_endpoint(exponent, lower_threshold, upper_threshold)
855}
856
857// MUL
858// ---
859
860#[inline(always)]
861pub const fn umul128_upper64(x: u64, y: u64) -> u64 {
862    let p = x as u128 * y as u128;
863    (p >> 64) as u64
864}
865
866#[inline(always)]
867pub const fn umul192_upper128(x: u64, hi: u64, lo: u64) -> (u64, u64) {
868    let mut r = x as u128 * hi as u128;
869    r += umul128_upper64(x, lo) as u128;
870    ((r >> 64) as u64, r as u64)
871}
872
873#[inline(always)]
874pub const fn umul192_lower128(x: u64, yhi: u64, ylo: u64) -> (u64, u64) {
875    let hi = x.wrapping_mul(yhi);
876    let hi_lo = x as u128 * ylo as u128;
877    // NOTE: This can wrap exactly to 0, and this is desired.
878    (hi.wrapping_add((hi_lo >> 64) as u64), hi_lo as u64)
879}
880
881#[inline(always)]
882pub const fn umul96_upper64(x: u64, y: u64) -> u64 {
883    umul128_upper64(x << 32, y)
884}
885
886#[inline(always)]
887pub const fn umul96_lower64(x: u64, y: u64) -> u64 {
888    x.wrapping_mul(y)
889}
890
891// LOG
892// ---
893
894/// Calculate `x * log5(2)` quickly.
895/// Generated by `etc/log.py`.
896/// Only needs to be valid for values from `[-1492, 1492]`
897#[inline(always)]
898pub const fn floor_log5_pow2(q: i32) -> i32 {
899    q.wrapping_mul(225799) >> 19
900}
901
902/// Calculate `x * log10(2)` quickly.
903/// Generated by `etc/log.py`.
904/// Only needs to be valid for values from `[-1700, 1700]`
905#[inline(always)]
906pub const fn floor_log10_pow2(q: i32) -> i32 {
907    q.wrapping_mul(315653) >> 20
908}
909
910/// Calculate `x * log2(10)` quickly.
911/// Generated by `etc/log.py`.
912/// Only needs to be valid for values from `[-1233, 1233]`
913#[inline(always)]
914pub const fn floor_log2_pow10(q: i32) -> i32 {
915    q.wrapping_mul(1741647) >> 19
916}
917
918/// Calculate `x * log5(2) - log5(3)` quickly.
919/// Generated by `etc/log.py`.
920/// Only needs to be valid for values from `[-2427, 2427]`
921#[inline(always)]
922pub const fn floor_log5_pow2_minus_log5_3(q: i32) -> i32 {
923    q.wrapping_mul(451597).wrapping_sub(715764) >> 20
924}
925
926/// Calculate `(x * log10(2) - log10(4 / 3))` quickly.
927/// Generated by `etc/log.py`.
928/// Only needs to be valid for values from `[-1700, 1700]`
929#[inline(always)]
930pub const fn floor_log10_pow2_minus_log10_4_over_3(q: i32) -> i32 {
931    // NOTE: these values aren't actually exact:
932    //      They're off for -295 and 97, so any automated way of computing
933    //      them will also be off.
934    q.wrapping_mul(1262611).wrapping_sub(524031) >> 22
935}
936
937// POW
938// ---
939
940/// const fn to calculate `radix^exp`.
941#[inline(always)]
942pub const fn pow32(radix: u32, mut exp: u32) -> u32 {
943    let mut p = 1;
944    while exp > 0 {
945        p *= radix;
946        exp -= 1;
947    }
948    p
949}
950
951/// const fn to calculate `radix^exp`.
952#[inline(always)]
953pub const fn pow64(radix: u32, mut exp: u32) -> u64 {
954    let mut p = 1;
955    while exp > 0 {
956        p *= radix as u64;
957        exp -= 1;
958    }
959    p
960}
961
962/// Counter the number of powers of radix are in `n`.
963#[inline(always)]
964pub const fn count_factors(radix: u32, mut n: u64) -> u32 {
965    let mut c = 0;
966    while n != 0 && n % radix as u64 == 0 {
967        n /= radix as u64;
968        c += 1;
969    }
970    c
971}
972
973// DIV
974// ---
975
976// Compute floor(n / 10^exp) for small exp.
977// Precondition: exp >= 0.
978#[inline(always)]
979pub const fn divide_by_pow10_32(n: u32, exp: u32) -> u32 {
980    // Specialize for 32-bit division by 100.
981    // Compiler is supposed to generate the identical code for just writing
982    // "n / 100", but for some reason MSVC generates an inefficient code
983    // (mul + mov for no apparent reason, instead of single imul),
984    // so we does this manually.
985    if exp == 2 {
986        ((n as u64 * 1374389535) >> 37) as u32
987    } else {
988        let divisor = pow32(exp as u32, 10);
989        n / divisor
990    }
991}
992
993// Compute floor(n / 10^exp) for small exp.
994// Precondition: n <= n_max
995#[inline(always)]
996pub const fn divide_by_pow10_64(n: u64, exp: u32, n_max: u64) -> u64 {
997    // Specialize for 64-bit division by 1000.
998    // Ensure that the correctness condition is met.
999    if exp == 3 && n_max <= 15534100272597517998 {
1000        umul128_upper64(n, 2361183241434822607) >> 7
1001    } else {
1002        let divisor = pow64(exp as u32, 10);
1003        n / divisor
1004    }
1005}
1006
1007// ROUNDING
1008// --------
1009
1010impl RoundMode {
1011    /// Determine if we should round down.
1012    #[inline(always)]
1013    pub const fn prefer_round_down(&self, significand: u64) -> bool {
1014        match self {
1015            RoundMode::Round => significand % 2 != 0,
1016            RoundMode::Truncate => true,
1017        }
1018    }
1019}
1020
1021// INTERVAL TYPE
1022// -------------
1023
1024/// Interval types for rounding modes to compute endpoints.
1025#[non_exhaustive]
1026pub enum IntervalType {
1027    Symmetric(bool),
1028    Asymmetric(bool),
1029    Closed,
1030    Open,
1031    LeftClosedRightOpen,
1032    RightClosedLeftOpen,
1033}
1034
1035impl IntervalType {
1036    /// Determine if the interval type is symmetric.
1037    #[inline(always)]
1038    pub fn is_symmetric(&self) -> bool {
1039        match self {
1040            Self::Symmetric(_) => true,
1041            Self::Asymmetric(_) => false,
1042            Self::Closed => true,
1043            Self::Open => true,
1044            Self::LeftClosedRightOpen => false,
1045            Self::RightClosedLeftOpen => false,
1046        }
1047    }
1048
1049    /// Determine if we include the left endpoint.
1050    #[inline(always)]
1051    pub fn include_left_endpoint(&self) -> bool {
1052        match self {
1053            Self::Symmetric(closed) => *closed,
1054            Self::Asymmetric(left_closed) => *left_closed,
1055            Self::Closed => true,
1056            Self::Open => false,
1057            Self::LeftClosedRightOpen => true,
1058            Self::RightClosedLeftOpen => false,
1059        }
1060    }
1061
1062    /// Determine if we include the right endpoint.
1063    #[inline(always)]
1064    pub fn include_right_endpoint(&self) -> bool {
1065        match self {
1066            Self::Symmetric(closed) => *closed,
1067            Self::Asymmetric(left_closed) => !*left_closed,
1068            Self::Closed => true,
1069            Self::Open => false,
1070            Self::LeftClosedRightOpen => false,
1071            Self::RightClosedLeftOpen => true,
1072        }
1073    }
1074}
1075
1076// ENDPOINTS
1077// ---------
1078
1079/// Compute the left endpoint from a 64-bit power-of-5.
1080#[inline(always)]
1081pub fn compute_left_endpoint_u64<F: DragonboxFloat>(pow5: u64, beta: i32) -> u64 {
1082    let zero_carry = pow5 >> (F::MANTISSA_SIZE as usize + 2);
1083    let mantissa_shift = 64 - F::MANTISSA_SIZE as usize - 1;
1084    (pow5 - zero_carry) >> (mantissa_shift as i32 - beta)
1085}
1086
1087#[inline(always)]
1088pub fn compute_right_endpoint_u64<F: DragonboxFloat>(pow5: u64, beta: i32) -> u64 {
1089    let zero_carry = pow5 >> (F::MANTISSA_SIZE as usize + 1);
1090    let mantissa_shift = 64 - F::MANTISSA_SIZE as usize - 1;
1091    (pow5 + zero_carry) >> (mantissa_shift as i32 - beta)
1092}
1093
1094/// Determine if we should round up for the short interval case.
1095#[inline(always)]
1096pub fn compute_round_up_u64<F: DragonboxFloat>(pow5: u64, beta: i32) -> u64 {
1097    let shift = 64 - F::MANTISSA_SIZE - 2;
1098    ((pow5 >> (shift - beta)) + 1) / 2
1099}
1100
1101// DRAGONBOX FLOAT
1102// ---------------
1103
1104/// Get the high bits from the power-of-5.
1105#[inline(always)]
1106pub const fn high(pow5: &(u64, u64)) -> u64 {
1107    pow5.0
1108}
1109
1110/// Get the low bits from the power-of-5.
1111#[inline(always)]
1112pub const fn low(pow5: &(u64, u64)) -> u64 {
1113    pow5.1
1114}
1115
1116/// ROR instruction for 32-bit type.
1117#[inline(always)]
1118pub const fn rotr32(n: u32, r: u32) -> u32 {
1119    let r = r & 31;
1120    (n >> r) | (n << (32 - r))
1121}
1122
1123/// ROR instruction for 64-bit type.
1124#[inline(always)]
1125pub const fn rotr64(n: u64, r: u64) -> u64 {
1126    let r = r & 63;
1127    (n >> r) | (n << (64 - r))
1128}
1129
1130/// Magic numbers for division by a power of 10.
1131/// Replace n by floor(n / 10^N).
1132/// Returns true if and only if n is divisible by 10^N.
1133/// Precondition: n <= 10^(N+1)
1134/// !!It takes an in-out parameter!!
1135struct Div10Info {
1136    magic_number: u32,
1137    shift_amount: i32,
1138}
1139
1140const F32_DIV10_INFO: Div10Info = Div10Info {
1141    magic_number: 6554,
1142    shift_amount: 16,
1143};
1144
1145const F64_DIV10_INFO: Div10Info = Div10Info {
1146    magic_number: 656,
1147    shift_amount: 16,
1148};
1149
1150macro_rules! check_div_pow10 {
1151    ($n:ident, $exp:literal, $float:ident, $info:ident) => {{
1152        // Make sure the computation for max_n does not overflow.
1153        debug_assert!($exp + 2 < floor_log10_pow2(31));
1154        debug_assert!($n as u64 <= pow64(10, $exp + 1));
1155
1156        let n = $n.wrapping_mul($info.magic_number);
1157        let mask = (1u32 << $info.shift_amount) - 1;
1158        let r = (n & mask) < $info.magic_number;
1159
1160        (n >> $info.shift_amount, r)
1161    }};
1162}
1163
1164// These constants are efficient because we can do it in 32-bits.
1165const MOD_INV_5_U32: u32 = 0xCCCC_CCCD;
1166const MOD_INV_25_U32: u32 = MOD_INV_5_U32.wrapping_mul(MOD_INV_5_U32);
1167const MOD_INV_5_U64: u64 = 0xCCCC_CCCC_CCCC_CCCD;
1168const MOD_INV_25_U64: u64 = MOD_INV_5_U64.wrapping_mul(MOD_INV_5_U64);
1169
1170macro_rules! div_pow10 {
1171    ($n:ident, $info:ident) => {{
1172        $n.wrapping_mul($info.magic_number) >> $info.shift_amount
1173    }};
1174}
1175
1176/// Trait with specialized methods for the Dragonbox algorithm.
1177pub trait DragonboxFloat: Float {
1178    /// Constant derived in Section 4.5 of the Dragonbox algorithm.
1179    const KAPPA: u32;
1180    /// Ceiling of the maximum number of float decimal digits + 1.
1181    /// Or, ceil((MANTISSA_SIZE + 1) / log2(10)) + 1.
1182    const DECIMAL_DIGITS: usize;
1183    const FC_PM_HALF_LOWER: i32 = -(Self::KAPPA as i32) - floor_log5_pow2(Self::KAPPA as i32);
1184    const DIV_BY_5_THRESHOLD: i32 = floor_log2_pow10(Self::KAPPA as i32 + 1);
1185
1186    type Power;
1187
1188    /// Quick calculation for the number of significant digits in the float.
1189    fn digit_count(mantissa: u64) -> usize;
1190
1191    /// Write the significant digits to a buffer.
1192    /// Does not handle rounding or truncated digits.
1193    ///
1194    /// # Safety
1195    ///
1196    /// Safe if `bytes` is large enough to hold a decimal string for mantissa.
1197    unsafe fn write_digits(bytes: &mut [u8], mantissa: u64) -> usize;
1198
1199    /// Get the pre-computed Dragonbox power from the exponent.
1200    ///
1201    /// # Safety
1202    ///
1203    /// Safe as long as the exponent is within the valid power-of-5 range.
1204    unsafe fn dragonbox_power(exponent: i32) -> Self::Power;
1205
1206    /// Compute the left endpoint for the shorter interval case.
1207    fn compute_left_endpoint(pow5: &Self::Power, beta_minus_1: i32) -> u64;
1208
1209    /// Compute the right endpoint for the shorter interval case.
1210    fn compute_right_endpoint(pow5: &Self::Power, beta_minus_1: i32) -> u64;
1211
1212    /// Handle rounding-up for the short interval case.
1213    fn compute_round_up(pow5: &Self::Power, beta_minus_1: i32) -> u64;
1214
1215    fn compute_mul(u: u64, pow5: &Self::Power) -> (u64, bool);
1216    fn compute_mul_parity(two_f: u64, pow5: &Self::Power, beta_minus_1: i32) -> (bool, bool);
1217    fn compute_delta(pow5: &Self::Power, beta_minus_1: i32) -> u32;
1218
1219    /// Handle trailing zeros, conditional on the float type.
1220    fn process_trailing_zeros(mantissa: u64, exponent: i32) -> (u64, i32);
1221
1222    /// Remove trailing zeros from the float.
1223    fn remove_trailing_zeros(mantissa: u64) -> (u64, i32);
1224
1225    /// Determine if two_f is divisible by 2^exp.
1226    #[inline(always)]
1227    fn divisible_by_pow2(x: u64, exp: u32) -> bool {
1228        // Preconditions: exp >= 1 && x != 0
1229        x.trailing_zeros() >= exp
1230    }
1231
1232    // Replace n by floor(n / 10^N).
1233    // Returns true if and only if n is divisible by 10^N.
1234    // Precondition: n <= 10^(N+1)
1235    fn check_div_pow10(n: u32) -> (u32, bool);
1236
1237    // Compute floor(n / 10^N) for small n and exp.
1238    // Precondition: n <= 10^(N+1)
1239    fn div_pow10(n: u32) -> u32;
1240
1241    // Compute floor(n / 10^N) for small N.
1242    // Precondition: n <= n_max
1243    fn divide_by_pow10(n: u64, exp: u32, n_max: u64) -> u64;
1244}
1245
1246impl DragonboxFloat for f32 {
1247    const KAPPA: u32 = 1;
1248    const DECIMAL_DIGITS: usize = 9;
1249
1250    type Power = u64;
1251
1252    #[inline(always)]
1253    fn digit_count(mantissa: u64) -> usize {
1254        debug_assert!(mantissa <= u32::MAX as u64);
1255        (mantissa as u32).digit_count()
1256    }
1257
1258    #[inline(always)]
1259    unsafe fn write_digits(bytes: &mut [u8], mantissa: u64) -> usize {
1260        debug_assert!(mantissa <= u32::MAX as u64);
1261        // SAFETY: safe is `bytes.len() >= 10`.
1262        unsafe { write_digits_u32(bytes, mantissa as u32) }
1263    }
1264
1265    #[inline(always)]
1266    unsafe fn dragonbox_power(exponent: i32) -> Self::Power {
1267        debug_assert!((SMALLEST_F32_POW5..=LARGEST_F32_POW5).contains(&exponent));
1268        let index = (exponent - SMALLEST_F32_POW5) as usize;
1269        // SAFETY: safe if the exponent is in the correct range.
1270        unsafe { index_unchecked!(DRAGONBOX32_POWERS_OF_FIVE[index]) }
1271    }
1272
1273    #[inline(always)]
1274    fn compute_left_endpoint(pow5: &Self::Power, beta_minus_1: i32) -> u64 {
1275        compute_left_endpoint_u64::<Self>(*pow5, beta_minus_1)
1276    }
1277
1278    #[inline(always)]
1279    fn compute_right_endpoint(pow5: &Self::Power, beta_minus_1: i32) -> u64 {
1280        compute_right_endpoint_u64::<Self>(*pow5, beta_minus_1)
1281    }
1282
1283    #[inline(always)]
1284    fn compute_round_up(pow5: &Self::Power, beta_minus_1: i32) -> u64 {
1285        compute_round_up_u64::<Self>(*pow5, beta_minus_1)
1286    }
1287
1288    #[inline(always)]
1289    fn compute_mul(u: u64, pow5: &Self::Power) -> (u64, bool) {
1290        let r = umul96_upper64(u, *pow5);
1291        (r >> 32, (r as u32) == 0)
1292    }
1293
1294    #[inline(always)]
1295    fn compute_mul_parity(two_f: u64, pow5: &Self::Power, beta: i32) -> (bool, bool) {
1296        debug_assert!((1..64).contains(&beta));
1297
1298        let r = umul96_lower64(two_f, *pow5);
1299        let parity = (r >> (64 - beta)) & 1;
1300        let is_integer = r >> (32 - beta);
1301        (parity != 0, is_integer == 0)
1302    }
1303
1304    #[inline(always)]
1305    fn compute_delta(pow5: &Self::Power, beta: i32) -> u32 {
1306        (*pow5 >> (64 - 1 - beta)) as u32
1307    }
1308
1309    #[inline(always)]
1310    fn process_trailing_zeros(mantissa: u64, exponent: i32) -> (u64, i32) {
1311        // Policy is to remove the trailing zeros.
1312        let (mantissa, trailing) = Self::remove_trailing_zeros(mantissa);
1313        (mantissa, exponent + trailing)
1314    }
1315
1316    #[inline(always)]
1317    fn remove_trailing_zeros(mantissa: u64) -> (u64, i32) {
1318        debug_assert!(mantissa <= u32::MAX as u64);
1319        debug_assert!(mantissa != 0);
1320
1321        let mut n = mantissa as u32;
1322        let mut quo: u32;
1323        let mut s: i32 = 0;
1324        loop {
1325            quo = rotr32(n.wrapping_mul(MOD_INV_25_U32), 2);
1326            if quo <= u32::MAX / 100 {
1327                n = quo;
1328                s += 2;
1329            } else {
1330                break;
1331            }
1332        }
1333
1334        quo = rotr32(n.wrapping_mul(MOD_INV_5_U32), 1);
1335        if quo <= u32::MAX / 10 {
1336            n = quo;
1337            s |= 1;
1338        }
1339        (n as u64, s)
1340    }
1341
1342    #[inline(always)]
1343    fn check_div_pow10(n: u32) -> (u32, bool) {
1344        check_div_pow10!(n, 1, f32, F32_DIV10_INFO)
1345    }
1346
1347    #[inline(always)]
1348    fn div_pow10(n: u32) -> u32 {
1349        div_pow10!(n, F32_DIV10_INFO)
1350    }
1351
1352    #[inline(always)]
1353    fn divide_by_pow10(n: u64, exp: u32, _: u64) -> u64 {
1354        divide_by_pow10_32(n as u32, exp) as u64
1355    }
1356}
1357
1358impl DragonboxFloat for f64 {
1359    const KAPPA: u32 = 2;
1360    const DECIMAL_DIGITS: usize = 17;
1361
1362    type Power = (u64, u64);
1363
1364    #[inline(always)]
1365    fn digit_count(mantissa: u64) -> usize {
1366        mantissa.digit_count()
1367    }
1368
1369    #[inline(always)]
1370    unsafe fn write_digits(bytes: &mut [u8], mantissa: u64) -> usize {
1371        // SAFETY: safe if `bytes.len() >= 20`.
1372        unsafe { write_digits_u64(bytes, mantissa) }
1373    }
1374
1375    #[inline(always)]
1376    unsafe fn dragonbox_power(exponent: i32) -> Self::Power {
1377        debug_assert!((SMALLEST_F64_POW5..=LARGEST_F64_POW5).contains(&exponent));
1378        let index = (exponent - SMALLEST_F64_POW5) as usize;
1379        // SAFETY: safe if the exponent is in the correct range.
1380        unsafe { index_unchecked!(DRAGONBOX64_POWERS_OF_FIVE[index]) }
1381    }
1382
1383    #[inline(always)]
1384    fn compute_left_endpoint(pow5: &Self::Power, beta_minus_1: i32) -> u64 {
1385        compute_left_endpoint_u64::<Self>(high(pow5), beta_minus_1)
1386    }
1387
1388    #[inline(always)]
1389    fn compute_right_endpoint(pow5: &Self::Power, beta_minus_1: i32) -> u64 {
1390        compute_right_endpoint_u64::<Self>(high(pow5), beta_minus_1)
1391    }
1392
1393    #[inline(always)]
1394    fn compute_round_up(pow5: &Self::Power, beta_minus_1: i32) -> u64 {
1395        compute_round_up_u64::<Self>(high(pow5), beta_minus_1)
1396    }
1397
1398    #[inline(always)]
1399    fn compute_mul(u: u64, pow5: &Self::Power) -> (u64, bool) {
1400        let (hi, lo) = umul192_upper128(u, high(pow5), low(pow5));
1401        (hi, lo == 0)
1402    }
1403
1404    #[inline(always)]
1405    fn compute_mul_parity(two_f: u64, pow5: &Self::Power, beta: i32) -> (bool, bool) {
1406        debug_assert!((1..64).contains(&beta));
1407
1408        let (rhi, rlo) = umul192_lower128(two_f, high(pow5), low(pow5));
1409        let parity = (rhi >> (64 - beta)) & 1;
1410        let is_integer = (rhi << beta) | (rlo >> (64 - beta));
1411        (parity != 0, is_integer == 0)
1412    }
1413
1414    #[inline(always)]
1415    fn compute_delta(pow5: &Self::Power, beta: i32) -> u32 {
1416        (high(pow5) >> (64 - 1 - beta)) as u32
1417    }
1418
1419    #[inline(always)]
1420    fn process_trailing_zeros(mantissa: u64, exponent: i32) -> (u64, i32) {
1421        // Policy is to remove the trailing zeros.
1422        // This differs from dragonbox proper, but leads to faster benchmarks.
1423        let (mantissa, trailing) = Self::remove_trailing_zeros(mantissa);
1424        (mantissa, exponent + trailing)
1425    }
1426
1427    #[inline(always)]
1428    fn remove_trailing_zeros(mantissa: u64) -> (u64, i32) {
1429        debug_assert!(mantissa != 0);
1430
1431        // This magic number is ceil(2^90 / 10^8).
1432        let magic_number = 12379400392853802749u64;
1433        let nm = mantissa as u128 * magic_number as u128;
1434
1435        // Is n is divisible by 10^8?
1436        let high = (nm >> 64) as u64;
1437        let mask = (1 << (90 - 64)) - 1;
1438        let low = nm as u64;
1439        if high & mask == 0 && low < magic_number {
1440            // If yes, work with the quotient.
1441            let mut n = (high >> (90 - 64)) as u32;
1442            let mut s: i32 = 8;
1443            let mut quo: u32;
1444
1445            loop {
1446                quo = rotr32(n.wrapping_mul(MOD_INV_25_U32), 2);
1447                if quo <= u32::MAX / 100 {
1448                    n = quo;
1449                    s += 2;
1450                } else {
1451                    break;
1452                }
1453            }
1454
1455            quo = rotr32(n.wrapping_mul(MOD_INV_5_U32), 1);
1456            if quo <= u32::MAX / 10 {
1457                n = quo;
1458                s |= 1;
1459            }
1460
1461            (n as u64, s)
1462        } else {
1463            // If n is not divisible by 10^8, work with n itself.
1464            let mut n = mantissa;
1465            let mut s: i32 = 0;
1466            let mut quo: u64;
1467
1468            loop {
1469                quo = rotr64(n.wrapping_mul(MOD_INV_25_U64), 2);
1470                if quo <= u64::MAX / 100 {
1471                    n = quo;
1472                    s += 2;
1473                } else {
1474                    break;
1475                }
1476            }
1477
1478            quo = rotr64(n.wrapping_mul(MOD_INV_5_U64), 1);
1479            if quo <= u64::MAX / 10 {
1480                n = quo;
1481                s |= 1;
1482            }
1483
1484            (n, s)
1485        }
1486    }
1487
1488    #[inline(always)]
1489    fn check_div_pow10(n: u32) -> (u32, bool) {
1490        check_div_pow10!(n, 2, f64, F64_DIV10_INFO)
1491    }
1492
1493    #[inline(always)]
1494    fn div_pow10(n: u32) -> u32 {
1495        div_pow10!(n, F64_DIV10_INFO)
1496    }
1497
1498    #[inline(always)]
1499    fn divide_by_pow10(n: u64, exp: u32, n_max: u64) -> u64 {
1500        divide_by_pow10_64(n, exp, n_max)
1501    }
1502}
1503
1504#[cfg(feature = "f16")]
1505macro_rules! dragonbox_unimpl {
1506    ($($t:ident)*) => ($(
1507        impl DragonboxFloat for $t {
1508            const KAPPA: u32 = 0;
1509            const DECIMAL_DIGITS: usize = 0;
1510
1511            type Power = u64;
1512
1513            #[inline(always)]
1514            fn digit_count(_: u64) -> usize {
1515                unimplemented!()
1516            }
1517
1518            #[inline(always)]
1519            unsafe fn write_digits(_: &mut [u8], _: u64) -> usize {
1520                unimplemented!()
1521            }
1522
1523            #[inline(always)]
1524            unsafe fn dragonbox_power(_: i32) -> Self::Power {
1525                unimplemented!()
1526            }
1527
1528            #[inline(always)]
1529            fn compute_left_endpoint(_: &Self::Power, _: i32) -> u64 {
1530                unimplemented!()
1531            }
1532
1533            #[inline(always)]
1534            fn compute_right_endpoint(_: &Self::Power, _: i32) -> u64 {
1535                unimplemented!()
1536            }
1537
1538            #[inline(always)]
1539            fn compute_round_up(_: &Self::Power, _: i32) -> (u64, bool) {
1540                unimplemented!()
1541            }
1542
1543            #[inline(always)]
1544            fn compute_mul(_: u64, _: &Self::Power) -> (u64, bool) {
1545                unimplemented!()
1546            }
1547
1548            #[inline(always)]
1549            fn compute_mul_parity(_: u64, _: &Self::Power, _: i32) -> (bool, bool) {
1550                unimplemented!()
1551            }
1552
1553            #[inline(always)]
1554            fn compute_delta(_: &Self::Power, _: i32) -> u32 {
1555                unimplemented!()
1556            }
1557
1558            #[inline(always)]
1559            fn process_trailing_zeros(_: u64, _: i32) -> (u64, i32) {
1560                unimplemented!()
1561            }
1562
1563            #[inline(always)]
1564            fn remove_trailing_zeros(_: u64) -> (u64, i32) {
1565                unimplemented!()
1566            }
1567
1568            #[inline(always)]
1569            fn check_div_pow10(_: u32) -> (u32, bool) {
1570                unimplemented!()
1571            }
1572
1573            #[inline(always)]
1574            fn div_pow10(_: u32) -> u32 {
1575                unimplemented!()
1576            }
1577        }
1578    )*);
1579}
1580
1581#[cfg(feature = "f16")]
1582dragonbox_unimpl! { bf16 f16 }
lexical_write_float/algorithm.rs

lexical_write_float/
algorithm.rs