#977575 (Rust) No.2440 Accuracy of Integer Division Approximate Functions

提出ソース

結果

問題	No.2440 Accuracy of Integer Division Approximate Functions
ユーザー	👑 Mizar
提出日時	2024-04-27 01:48:30
言語	Rust (1.93.0 + proconio + num + itertools) コンパイル: `/usr/bin/rustc_custom` 実行: `./target/release/main`
結果	AC
実行時間	8 ms / 2,000 ms
コード長	6,250 bytes
記録記録タグの例: 初AC ショートコード純ショートコード純主流ショートコード最速実行時間
コンパイル時間	13,471 ms
コンパイル使用メモリ	406,640 KB
実行使用メモリ	6,820 KB
最終ジャッジ日時	2024-11-14 18:19:59
合計ジャッジ時間	15,261 ms
ジャッジサーバーID （参考情報）	judge1 / judge5

このコードへのチャレンジ
（要ログイン）

ファイルパターン	結果
sample	AC * 1
other	AC * 20

権限があれば一括ダウンロードができます

ソースコード

raw source code

use proconio::{fastout, input};

/// chmax, chmin sugar syntax
pub trait Change {
    fn chmax(&mut self, x: Self);
    fn chmin(&mut self, x: Self);
}
impl<T: PartialOrd> Change for T {
    fn chmax(&mut self, x: T) {
        if *self < x {
            *self = x;
        }
    }
    fn chmin(&mut self, x: T) {
        if *self > x {
            *self = x;
        }
    }
}

use core::num::NonZeroU64;
#[cfg(target_arch = "x86_64")]
pub unsafe fn divrem_helper(x: u128, y: NonZeroU64) -> (u64, u64) {
    debug_assert!(((x >> 64) as u64) < y.get(), "division overflow");
    let (mut quot, mut rem): (u64, u64);
    core::arch::asm!(
        "div {0}",
        in(reg) y.get(),
        inout("rax") (x as u64) => quot,
        inout("rdx") ((x >> 64) as u64) => rem,
        options(pure, nomem, nostack)
    );
    (quot, rem)
}
#[cfg(not(target_arch = "x86_64"))]
pub unsafe fn divrem_helper(a: u128, b: NonZeroU64) -> (u64, u64) {
    use core::num::NonZeroU128;
    (
        (a / NonZeroU128::from(b)) as u64,
        (a % NonZeroU128::from(b)) as u64,
    )
}

pub fn udivrem_shim(x: u128, y: u128) -> (u128, u128) {
    let (xh, xl) = ((x >> 64) as u64, x as u64);
    let (yh, yl) = ((y >> 64) as u64, y as u64);
    if yh == 0 {
        if xh >= yl {
            if yl == 0 {
                panic!()
            }
            let (qh, rh) = (xh / yl, xh % yl);
            let (ql, rl) = unsafe {
                divrem_helper(
                    ((rh as u128) << 64) + (xl as u128),
                    NonZeroU64::new_unchecked(yl),
                )
            };
            (((qh as u128) << 64) + (ql as u128), rl as u128)
        } else {
            let (q, r) = unsafe { divrem_helper(x, NonZeroU64::new_unchecked(yl)) };
            (q as u128, r as u128)
        }
    } else {
        if xh >= yh {
            let s = yh.leading_zeros();
            if s != 0 {
                let ys = y << s;
                let (ysh, _ysl) = ((ys >> 64) as u64, ys as u64);
                let xs = x >> (64 - s);
                let (q, _r) = unsafe { divrem_helper(xs, NonZeroU64::new_unchecked(ysh)) };
                match x.overflowing_sub((q as u128) * y) {
                    (r, false) => ((q as u128), r),
                    (r, true) => (((q - 1) as u128), r.wrapping_add(y)),
                }
            } else {
                if xh <= yh && xl < yl {
                    (0, x)
                } else {
                    (1, x - y)
                }
            }
        } else {
            (0, x)
        }
    }
}

// calc sum_{i=0}^{n-1} floor((ai + b) / m) (mod 2^64)
pub fn floor_sum_unsigned_mod64(mut n: u128, mut m: u128, mut a: u128, mut b: u128) -> u64 {
    if m == 0 {
        panic!();
    }
    let mut ans = 0u64;
    // 2^64 <= max(n, m, a, b) < 2^128, a * n + b < 2^128, a < 2^64
    while (n | m | a | b) >> 64 != 0 {
        if a >= m {
            let (q, r) = udivrem_shim(a, m);
            ans = ans.wrapping_add(((n * (n - 1) >> 1).wrapping_mul(q)) as u64);
            a = r;
        }
        if b >= m {
            let (q, r) = udivrem_shim(b, m);
            ans = ans.wrapping_add((n.wrapping_mul(q)) as u64);
            b = r;
        }
        let y = a * n + b;
        if y < m {
            return ans;
        }
        (n, b) = udivrem_shim(y, m);
        (m, a) = (a, m);
    }
    let (mut n, mut m, mut a, mut b) = (n as u64, m as u64, a as u64, b as u64);
    // 2^32 <= max(n, m, a, b) < 2^64
    while (n | m | a | b) >> 32 != 0 {
        if a >= m {
            ans = ans.wrapping_add(
                ((n >> 1).wrapping_mul(if (n & 1) == 0 { n - 1 } else { n })).wrapping_mul(a / m),
            );
            a %= m;
        }
        if b >= m {
            ans = ans.wrapping_add(n.wrapping_mul(b / m));
            b %= m;
        }
        let y = (a as u128) * (n as u128) + (b as u128);
        if y < (m as u128) {
            return ans;
        }
        let (q, r) = udivrem_shim(y, m as u128);
        (n, b, m, a) = (q as u64, r as u64, a, m);
    }
    // max(n, m, a, b) < 2^32
    loop {
        if a >= m {
            ans = ans.wrapping_add(
                ((n >> 1).wrapping_mul(if (n & 1) == 0 { n - 1 } else { n })).wrapping_mul(a / m),
            );
            a %= m;
        }
        if b >= m {
            ans = ans.wrapping_add(n.wrapping_mul(b / m));
            b %= m;
        }
        let y = a * n + b;
        if y < m {
            return ans;
        }
        (n, b, m, a) = (y / m, y % m, a, m);
    }
}

// calc min(floor(a * 2^s / b), 2^64 - 1)
pub fn solve_div_helper(mut a: u128, b: u128, mut s: u32) -> u64 {
    debug_assert!(b < (1u128 << 127));
    debug_assert!(s < 128);
    if b == 0 {
        // divide zero: return max_value
        return !0u64;
    }
    let mut ans = 0u64;
    loop {
        let t = s.min(a.leading_zeros());
        s -= t;
        a <<= t;
        if ans > 0 {
            if ans.leading_zeros() < t {
                return !0u64;
            }
            ans <<= t;
        }
        let (q, r) = udivrem_shim(a, b);
        ans = match u64::try_from(q).ok().and_then(|q| ans.checked_add(q)) {
            Some(ans) => ans,
            None => return !0u64,
        };
        a = r;
        if s == 0 {
            return ans;
        }
    }
}

pub fn calc(mut n: u64, d: u64, m: u64, s: u32) -> u64 {
    use std::cmp::Ordering::*;
    debug_assert!(n < (1u64 << 60));
    debug_assert!(d < (1u64 << 60) && d > 0);
    debug_assert!(m < (1u64 << 60));
    debug_assert!(s < 121);
    let (d, m) = (d as u128, m as u128);
    let (pow2s, dm) = (1u128 << s, d * m);
    n.chmin(solve_div_helper(d, dm.abs_diff(pow2s), s));
    let n1 = (n + 1) as u128;
    match pow2s.cmp(&dm) {
        Equal => n,
        Less => n
            .wrapping_sub(floor_sum_unsigned_mod64(n1, pow2s, m, 0))
            .wrapping_add(floor_sum_unsigned_mod64(n1, d, 1, 0)),
        Greater => n
            .wrapping_add(floor_sum_unsigned_mod64(n1, pow2s, m, 0))
            .wrapping_sub(floor_sum_unsigned_mod64(n1, d, 1, 0)),
    }
}

#[fastout]
fn main() {
    input! { q: u32 }
    for _ in 0..q {
        input! { n: u64, d: u64, m: u64, s: u32 }
        println!("{}", calc(n, d, m, s));
    }
}

yukicoder

結果

ソースコード