use proconio::{fastout, input}; /// chmax, chmin sugar syntax pub trait Change { fn chmax(&mut self, x: Self); fn chmin(&mut self, x: Self); } impl Change for T { fn chmax(&mut self, x: T) { if *self < x { *self = x; } } fn chmin(&mut self, x: T) { if *self > x { *self = x; } } } use core::num::NonZeroU64; #[cfg(target_arch = "x86_64")] pub unsafe fn divrem_helper(x: u128, y: NonZeroU64) -> (u64, u64) { debug_assert!(((x >> 64) as u64) < y.get(), "division overflow"); let (mut quot, mut rem): (u64, u64); core::arch::asm!( "div {0}", in(reg) y.get(), inout("rax") (x as u64) => quot, inout("rdx") ((x >> 64) as u64) => rem, options(pure, nomem, nostack) ); (quot, rem) } #[cfg(not(target_arch = "x86_64"))] pub unsafe fn divrem_helper(a: u128, b: NonZeroU64) -> (u64, u64) { use core::num::NonZeroU128; ( (a / NonZeroU128::from(b)) as u64, (a % NonZeroU128::from(b)) as u64, ) } pub fn udivrem_shim(x: u128, y: u128) -> (u128, u128) { let (xh, xl) = ((x >> 64) as u64, x as u64); let (yh, yl) = ((y >> 64) as u64, y as u64); if yh == 0 { if xh >= yl { if yl == 0 { panic!() } let (qh, rh) = (xh / yl, xh % yl); let (ql, rl) = unsafe { divrem_helper( ((rh as u128) << 64) + (xl as u128), NonZeroU64::new_unchecked(yl), ) }; (((qh as u128) << 64) + (ql as u128), rl as u128) } else { let (q, r) = unsafe { divrem_helper(x, NonZeroU64::new_unchecked(yl)) }; (q as u128, r as u128) } } else { if xh >= yh { let s = yh.leading_zeros(); if s != 0 { let ys = y << s; let (ysh, _ysl) = ((ys >> 64) as u64, ys as u64); let xs = x >> (64 - s); let (q, _r) = unsafe { divrem_helper(xs, NonZeroU64::new_unchecked(ysh)) }; match x.overflowing_sub((q as u128) * y) { (r, false) => ((q as u128), r), (r, true) => (((q - 1) as u128), r.wrapping_add(y)), } } else { if xh <= yh && xl < yl { (0, x) } else { (1, x - y) } } } else { (0, x) } } } // calc sum_{i=0}^{n-1} floor((ai + b) / m) (mod 2^64) pub fn floor_sum_unsigned_mod64(mut n: u128, mut m: u128, mut a: u128, mut b: u128) -> u64 { if m == 0 { panic!(); } let mut ans = 0u64; // 2^64 <= max(n, m, a, b) < 2^128, a * n + b < 2^128, a < 2^64 while (n | m | a | b) >> 64 != 0 { if a >= m { let (q, r) = udivrem_shim(a, m); ans = ans.wrapping_add(((n * (n - 1) >> 1).wrapping_mul(q)) as u64); a = r; } if b >= m { let (q, r) = udivrem_shim(b, m); ans = ans.wrapping_add((n.wrapping_mul(q)) as u64); b = r; } let y = a * n + b; if y < m { return ans; } (n, b) = udivrem_shim(y, m); (m, a) = (a, m); } let (mut n, mut m, mut a, mut b) = (n as u64, m as u64, a as u64, b as u64); // 2^32 <= max(n, m, a, b) < 2^64 while (n | m | a | b) >> 32 != 0 { if a >= m { ans = ans.wrapping_add( ((n >> 1).wrapping_mul(if (n & 1) == 0 { n - 1 } else { n })).wrapping_mul(a / m), ); a %= m; } if b >= m { ans = ans.wrapping_add(n.wrapping_mul(b / m)); b %= m; } let y = (a as u128) * (n as u128) + (b as u128); if y < (m as u128) { return ans; } let (q, r) = udivrem_shim(y, m as u128); (n, b, m, a) = (q as u64, r as u64, a, m); } // max(n, m, a, b) < 2^32 loop { if a >= m { ans = ans.wrapping_add( ((n >> 1).wrapping_mul(if (n & 1) == 0 { n - 1 } else { n })).wrapping_mul(a / m), ); a %= m; } if b >= m { ans = ans.wrapping_add(n.wrapping_mul(b / m)); b %= m; } let y = a * n + b; if y < m { return ans; } (n, b, m, a) = (y / m, y % m, a, m); } } // calc min(floor(a * 2^s / b), 2^64 - 1) pub fn solve_div_helper(mut a: u128, b: u128, mut s: u32) -> u64 { debug_assert!(b < (1u128 << 127)); debug_assert!(s < 128); if b == 0 { // divide zero: return max_value return !0u64; } let mut ans = 0u64; loop { let t = s.min(a.leading_zeros()); s -= t; a <<= t; if ans > 0 { if ans.leading_zeros() < t { return !0u64; } ans <<= t; } let (q, r) = udivrem_shim(a, b); ans = match u64::try_from(q).ok().and_then(|q| ans.checked_add(q)) { Some(ans) => ans, None => return !0u64, }; a = r; if s == 0 { return ans; } } } pub fn calc(mut n: u64, d: u64, m: u64, s: u32) -> u64 { use std::cmp::Ordering::*; debug_assert!(n < (1u64 << 60)); debug_assert!(d < (1u64 << 60) && d > 0); debug_assert!(m < (1u64 << 60)); debug_assert!(s < 121); let (d, m) = (d as u128, m as u128); let (pow2s, dm) = (1u128 << s, d * m); n.chmin(solve_div_helper(d, dm.abs_diff(pow2s), s)); let n1 = (n + 1) as u128; match pow2s.cmp(&dm) { Equal => n, Less => n .wrapping_sub(floor_sum_unsigned_mod64(n1, pow2s, m, 0)) .wrapping_add(floor_sum_unsigned_mod64(n1, d, 1, 0)), Greater => n .wrapping_add(floor_sum_unsigned_mod64(n1, pow2s, m, 0)) .wrapping_sub(floor_sum_unsigned_mod64(n1, d, 1, 0)), } } #[fastout] fn main() { input! { q: u32 } for _ in 0..q { input! { n: u64, d: u64, m: u64, s: u32 } println!("{}", calc(n, d, m, s)); } }