結果

問題 No.2440 Accuracy of Integer Division Approximate Functions
ユーザー 👑 MizarMizar
提出日時 2023-08-30 21:51:16
言語 Rust
(1.77.0)
結果
AC  
実行時間 8 ms / 2,000 ms
コード長 6,744 bytes
コンパイル時間 2,782 ms
コンパイル使用メモリ 153,548 KB
実行使用メモリ 4,380 KB
最終ジャッジ日時 2023-08-30 21:51:22
合計ジャッジ時間 4,255 ms
ジャッジサーバーID
(参考情報)
judge12 / judge15
このコードへのチャレンジ
(要ログイン)

テストケース

テストケース表示
入力 結果 実行時間
実行使用メモリ
testcase_00 AC 1 ms
4,380 KB
testcase_01 AC 8 ms
4,380 KB
testcase_02 AC 7 ms
4,380 KB
testcase_03 AC 8 ms
4,376 KB
testcase_04 AC 7 ms
4,380 KB
testcase_05 AC 8 ms
4,380 KB
testcase_06 AC 6 ms
4,376 KB
testcase_07 AC 7 ms
4,380 KB
testcase_08 AC 7 ms
4,376 KB
testcase_09 AC 6 ms
4,380 KB
testcase_10 AC 7 ms
4,376 KB
testcase_11 AC 8 ms
4,380 KB
testcase_12 AC 8 ms
4,380 KB
testcase_13 AC 8 ms
4,380 KB
testcase_14 AC 7 ms
4,380 KB
testcase_15 AC 7 ms
4,380 KB
testcase_16 AC 5 ms
4,380 KB
testcase_17 AC 5 ms
4,376 KB
testcase_18 AC 5 ms
4,380 KB
testcase_19 AC 5 ms
4,380 KB
testcase_20 AC 5 ms
4,376 KB
権限があれば一括ダウンロードができます

ソースコード

diff #

use core::num::NonZeroU64;
#[cfg(target_arch = "x86_64")]
pub unsafe fn divrem_helper(x: u128, y: NonZeroU64) -> (u64, u64) {
    debug_assert!(((x >> 64) as u64) < y.get(), "division overflow");
    let (mut quot, mut rem): (u64, u64);
    core::arch::asm!(
        "div {0}",
        in(reg) y.get(),
        inout("rax") (x as u64) => quot,
        inout("rdx") ((x >> 64) as u64) => rem,
        options(pure, nomem, nostack)
    );
    (quot, rem)
}
#[cfg(not(target_arch = "x86_64"))]
pub unsafe fn divrem_helper(a: u128, b: NonZeroU64) -> (u64, u64) {
    use core::num::NonZeroU128;
    (
        (a / NonZeroU128::from(b)) as u64,
        (a % NonZeroU128::from(b)) as u64,
    )
}

pub fn udivrem_shim(x: u128, y: u128) -> (u128, u128) {
    let (xh, xl) = ((x >> 64) as u64, x as u64);
    let (yh, yl) = ((y >> 64) as u64, y as u64);
    if yh == 0 {
        if xh >= yl {
            if yl == 0 {
                panic!()
            }
            let (qh, rh) = (xh / yl, xh % yl);
            let (ql, rl) = unsafe { divrem_helper(((rh as u128) << 64) + (xl as u128), NonZeroU64::new_unchecked(yl)) };
            (((qh as u128) << 64) + (ql as u128), rl as u128)
        } else {
            let (q, r) = unsafe { divrem_helper(x, NonZeroU64::new_unchecked(yl)) };
            (q as u128, r as u128)
        }
    } else {
        if xh >= yh {
            let s = yh.leading_zeros();
            if s != 0 {
                let ys = y << s;
                let (ysh, _ysl) = ((ys >> 64) as u64, ys as u64);
                let xs = x >> (64-s);
                let (q, _r) = unsafe { divrem_helper(xs, NonZeroU64::new_unchecked(ysh)) };
                match x.overflowing_sub((q as u128) * y) {
                    (r, false) => ((q as u128), r),
                    (r, true) => (((q-1) as u128), r.wrapping_add(y)),
                }
            } else {
                if xh <= yh && xl < yl {
                    (0, x)
                } else {
                    (1, x - y)
                }
            }
        } else {
            (0, x)
        }
    }
}

// calc sum_{i=0}^{n-1} floor((ai + b) / m) (mod 2^64)
pub fn floor_sum_unsigned_mod64(mut n: u128, mut m: u128, mut a: u128, mut b: u128) -> u64 {
    let mut ans = 0u64;
    // 2^64 <= max(n, m, a, b) < 2^128, a * n + b < 2^128, a < 2^64
    while (n | m | a | b) >> 64 != 0 {
        if a >= m {
            let (q, r) = udivrem_shim(a, m);
            ans = ans.wrapping_add(((n * (n - 1) >> 1).wrapping_mul(q)) as u64);
            a = r;
        }
        if b >= m {
            let (q, r) = udivrem_shim(b, m);
            ans = ans.wrapping_add((n.wrapping_mul(q)) as u64);
            b = r;
        }
        let y = a * n + b;
        if y < m {
            return ans;
        }
        (n, b) = udivrem_shim(y, m);
        (m, a) = (a, m);
    }
    let (mut n, mut m, mut a, mut b) = (n as u64, m as u64, a as u64, b as u64);
    // 2^32 <= max(n, m, a, b) < 2^64
    while (n | m | a | b) >> 32 != 0 {
        if a >= m {
            ans = ans.wrapping_add(
                ((n >> 1).wrapping_mul(if (n & 1) == 0 { n - 1 } else { n })).wrapping_mul(a / m),
            );
            a %= m;
        }
        if b >= m {
            ans = ans.wrapping_add(n.wrapping_mul(b / m));
            b %= m;
        }
        let y = (a as u128) * (n as u128) + (b as u128);
        if y < (m as u128) {
            return ans;
        }
        let (q, r) = udivrem_shim(y, m as u128);
        (n, b, m, a) = (q as u64, r as u64, a, m);
    }
    // max(n, m, a, b) < 2^32
    loop {
        if a >= m {
            ans = ans.wrapping_add(
                ((n >> 1).wrapping_mul(if (n & 1) == 0 { n - 1 } else { n })).wrapping_mul(a / m),
            );
            a %= m;
        }
        if b >= m {
            ans = ans.wrapping_add(n.wrapping_mul(b / m));
            b %= m;
        }
        let y = a * n + b;
        if y < m {
            return ans;
        }
        (n, b, m, a) = (y / m, y % m, a, m);
    }
}

// calc min(floor(a * 2^s / b), 2^64 - 1)
pub fn solve_div_helper(mut a: u128, b: u128, mut s: u32) -> u64 {
    debug_assert!(b < (1u128 << 127));
    debug_assert!(s < 128);
    if b == 0 {
        // divide zero: return max_value
        return !0u64;
    }
    let mut ans = 0u64;
    loop {
        let t = s.min(a.leading_zeros());
        s -= t;
        a <<= t;
        if ans > 0 {
            if ans.leading_zeros() < t {
                return !0u64;
            }
            ans <<= t;
        }
        ans = match u64::try_from(a / b).ok().and_then(|q| ans.checked_add(q)) {
            Some(ans) => ans,
            None => return !0u64,
        };
        a %= b;
        if s == 0 {
            return ans;
        }
    }
}

pub fn solve(mut n: u64, d: u64, m: u64, s: u32) -> u64 {
    use std::cmp::Ordering::*;
    debug_assert!(n < (1u64 << 60));
    debug_assert!(d < (1u64 << 60) && d > 0);
    debug_assert!(m < (1u64 << 60));
    debug_assert!(s < 121);
    let (d, m) = (d as u128, m as u128);
    let (pow2s, dm) = (1u128 << s, d * m);
    n.chmin(solve_div_helper(d, dm.abs_diff(pow2s), s));
    let n1 = (n + 1) as u128;
    match pow2s.cmp(&dm) {
        Equal => n,
        Less => n
            .wrapping_sub(floor_sum_unsigned_mod64(n1, pow2s, m, 0))
            .wrapping_add(floor_sum_unsigned_mod64(n1, d, 1, 0)),
        Greater => n
            .wrapping_add(floor_sum_unsigned_mod64(n1, pow2s, m, 0))
            .wrapping_sub(floor_sum_unsigned_mod64(n1, d, 1, 0)),
    }
}

fn main() {
    use std::io::prelude::*;
    let tins = std::time::Instant::now();
    let stdin = std::io::stdin();
    let stdout = std::io::stdout();
    let stdinlock = stdin.lock();
    let stdoutlock = stdout.lock();
    let mut bufwriter = std::io::BufWriter::new(stdoutlock);
    let mut lines = stdinlock.lines();
    let q = lines.next().unwrap().unwrap().parse::<usize>().unwrap();
    for _ in 0..q {
        let l = lines.next().unwrap().unwrap();
        let mut token = l.split_ascii_whitespace();
        let n = token.next().unwrap().parse::<u64>().unwrap();
        let d = token.next().unwrap().parse::<u64>().unwrap();
        let m = token.next().unwrap().parse::<u64>().unwrap();
        let s = token.next().unwrap().parse::<u32>().unwrap();
        writeln!(&mut bufwriter, "{}", solve(n, d, m, s)).unwrap();
    }
    eprintln!("{}us", tins.elapsed().as_micros());
}

trait Change {
    fn chmax(&mut self, x: Self);
    fn chmin(&mut self, x: Self);
}
impl<T: PartialOrd> Change for T {
    fn chmax(&mut self, x: T) {
        if *self < x {
            *self = x;
        }
    }
    fn chmin(&mut self, x: T) {
        if *self > x {
            *self = x;
        }
    }
}
0