#pragma GCC optimize("O3") #pragma GCC target("avx2") #pragma GCC optimize("fast-math") #pragma GCC optimize("unroll-loops") #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include typedef int8_t i8; typedef int16_t i16; typedef int32_t i32; typedef int64_t i64; typedef __int128_t i128; typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64; typedef __uint128_t u128; typedef float f32; typedef double f64; typedef long double f80; #define PRIVATE static #define PUBLIC #define MIN(a, b) ((a) < (b) ? (a) : (b)) #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define SWAP(a, b) \ do { \ (a) ^= (b); \ (b) ^= (a); \ (a) ^= (b); \ } \ while (0); #define CTZ32(a) ((a) ? __builtin_ctz((a)) : (32)) #define CTZ64(a) ((a) ? __builtin_ctzll((a)) : (64)) #define CLZ32(a) ((a) ? __builtin_clz((a)) : (32)) #define CLZ64(a) ((a) ? __builtin_clzll((a)) : (64)) #define POPCNT32(a) ((a) ? __builtin_popcount((a)) : (0)) #define POPCNT64(a) ((a) ? __builtin_popcountll((a)) : (0)) #define BIT_WIDTH32(a) ((32) - CLZ32((a))) #define BIT_WIDTH64(a) ((64) - CLZ64((a))) #define BIT_FLOOR32(a) ((a) ? ((1u) << (BIT_WIDTH32((a)) - (1))) : (0)) #define BIT_FLOOR64(a) ((a) ? ((1ul) << (BIT_WIDTH64((a)) - (1))) : (0)) #define BIT_CEIL32(a) (((a) <= 1) ? (1u) : ((1u) << BIT_WIDTH32((a) - (1)))) #define BIT_CEIL64(a) (((a) <= 1) ? (1ul) : ((1ul) << BIT_WIDTH64((a) - (1)))) #define LSBit(a) ((a) & (-(a))) #define CLSBit(a) ((a) & ((a) - (1))) #define HAS_SINGLE_BIT32(a) (((a) != (0)) && (CLSBit((a)) == (0))) #define HAS_SINGLE_BIT64(a) (((a) != (0)) && (CLSBit((a)) == (0))) #define _ROTL32_INNER(x, l) (((x) << (l)) | ((x) >> ((-l) & (31)))) #define _ROTR32_INNER(x, r) (((x) >> (r)) | ((x) << ((-r) & (31)))) #define _ROTL64_INNER(x, l) (((x) << (l)) | ((x) >> ((-l) & (63)))) #define _ROTR64_INNER(x, r) (((x) >> (r)) | ((x) << ((-r) & (63)))) #define ROTR32(x, r) (((r) < (0)) ? (_ROTL32_INNER((x), ((u64)(-r) % (32)))) : (_ROTR32_INNER((x), ((r) % (32))))) #define ROTL32(x, l) ROTR32((x), (-l)) #define ROTR64(x, r) (((r) < (0)) ? (_ROTL64_INNER((x), ((u64)(-r) % (64)))) : (_ROTR64_INNER((x), ((r) % (64))))) #define ROTL64(x, l) ROTR64((x), (-l)) i32 in_i32(void) { i32 c, x = 0, f = 1; while (c = getchar_unlocked(), c < 48 || c > 57) if (c == 45) f = -f; while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return f * x; } i64 in_i64(void) { i64 c, x = 0, f = 1; while (c = getchar_unlocked(), c < 48 || c > 57) if (c == 45) f = -f; while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return f * x; } i128 in_i128(void) { i128 c, x = 0, f = 1; while (c = getchar_unlocked(), c < 48 || c > 57) if (c == 45) f = -f; while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return f * x; } u32 in_u32(void) { u32 c, x = 0; while (c = getchar_unlocked(), c < 48 || c > 57) ; while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return x; } u64 in_u64(void) { u64 c, x = 0; while (c = getchar_unlocked(), c < 48 || c > 57) ; while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return x; } u128 in_u128(void) { u128 c, x = 0; while (c = getchar_unlocked(), c < 48 || c > 57) ; while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return x; } static inline void out_i32_inner(i32 x) { if (x >= 10) out_i32_inner(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } void out_i32(i32 x) { if (x < 0) { putchar_unlocked('-'); x = -x; } out_i32_inner(x); } static inline void out_i64_inner(i64 x) { if (x >= 10) out_i64_inner(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } void out_i64(i64 x) { if (x < 0) { putchar_unlocked('-'); x = -x; } out_i64_inner(x); } static inline void out_i128_inner(i128 x) { if (x >= 10) out_i128_inner(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } void out_i128(i128 x) { if (x < 0) { putchar_unlocked('-'); x = -x; } out_i128_inner(x); } void out_u32(u32 x) { if (x >= 10) out_u32(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } void out_u64(u64 x) { if (x >= 10) out_u64(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } void out_u128(u128 x) { if (x >= 10) out_u128(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } void NL(void) { putchar_unlocked('\n'); } void SP(void) { putchar_unlocked(' '); } void dump_i32(i32 x) { fprintf(stderr, "\033[1;36m%" PRId32 "\033[0m\n", x); } void dump_i64(i64 x) { fprintf(stderr, "\033[1;36m%" PRId64 "\033[0m\n", x); } void dump_u32(u32 x) { fprintf(stderr, "\033[1;36m%" PRIu32 "\033[0m\n", x); } void dump_u64(u64 x) { fprintf(stderr, "\033[1;36m%" PRIu64 "\033[0m\n", x); } void dump_i32_array(size_t a_len, i32 *a) { for (size_t i = 0; i < a_len; i++) { if (i == a_len - 1) { fprintf(stderr, "\033[1;36m%" PRId32 "\033[0m\n", a[i]); } else { fprintf(stderr, "\033[1;36m%" PRId32 "\033[0m ", a[i]); } } } void dump_i64_array(size_t a_len, i64 *a) { for (size_t i = 0; i < a_len; i++) { if (i == a_len - 1) { fprintf(stderr, "\033[1;36m%" PRId64 "\033[0m\n", a[i]); } else { fprintf(stderr, "\033[1;36m%" PRId64 "\033[0m ", a[i]); } } } void dump_u32_array(size_t a_len, u32 *a) { for (size_t i = 0; i < a_len; i++) { if (i == a_len - 1) { fprintf(stderr, "\033[1;36m%" PRIu32 "\033[0m\n", a[i]); } else { fprintf(stderr, "\033[1;36m%" PRIu32 "\033[0m ", a[i]); } } } void dump_u64_array(size_t a_len, u64 *a) { for (size_t i = 0; i < a_len; i++) { if (i == a_len - 1) { fprintf(stderr, "\033[1;36m%" PRIu64 "\033[0m\n", a[i]); } else { fprintf(stderr, "\033[1;36m%" PRIu64 "\033[0m ", a[i]); } } } void dump_i32_array_range(size_t a_len, i32 *a, size_t l, size_t r) { if (a_len <= r) { r = a_len - 1; } if (l > r) { return; } for (size_t i = l; i <= r; i++) { if (i == r) { fprintf(stderr, "\033[1;36m%" PRId32 "\033[0m\n", a[i]); } else { fprintf(stderr, "\033[1;36m%" PRId32 "\033[0m ", a[i]); } } } void dump_i64_array_range(size_t a_len, i64 *a, size_t l, size_t r) { if (a_len <= r) { r = a_len - 1; } if (l > r) { return; } for (size_t i = l; i <= r; i++) { if (i == r) { fprintf(stderr, "\033[1;36m%" PRId64 "\033[0m\n", a[i]); } else { fprintf(stderr, "\033[1;36m%" PRId64 "\033[0m ", a[i]); } } } void dump_u32_array_range(size_t a_len, u32 *a, size_t l, size_t r) { if (a_len <= r) { r = a_len - 1; } if (l > r) { return; } for (size_t i = l; i <= r; i++) { if (i == r) { fprintf(stderr, "\033[1;36m%" PRIu32 "\033[0m\n", a[i]); } else { fprintf(stderr, "\033[1;36m%" PRIu32 "\033[0m ", a[i]); } } } void dump_u64_array_range(size_t a_len, u64 *a, size_t l, size_t r) { if (a_len <= r) { r = a_len - 1; } if (l > r) { return; } for (size_t i = l; i <= r; i++) { if (i == r) { fprintf(stderr, "\033[1;36m%" PRIu64 "\033[0m\n", a[i]); } else { fprintf(stderr, "\033[1;36m%" PRIu64 "\033[0m ", a[i]); } } } void printb_32bit(u32 v) { u32 mask = (u32)1 << (sizeof(v) * CHAR_BIT - 1); do { putchar_unlocked(mask & v ? '1' : '0'); } while (mask >>= 1); } void printb_64bit(u64 v) { u64 mask = (u64)1 << (sizeof(v) * CHAR_BIT - 1); do { putchar_unlocked(mask & v ? '1' : '0'); } while (mask >>= 1); } u32 gcd32(u32 a, u32 b) { if (!a || !b) return a | b; u32 sh = CTZ32(a | b); a >>= CTZ32(a); do { b >>= CTZ32(b); if (a > b) SWAP(a, b); b -= a; } while (b); return a << sh; } u64 gcd64(u64 a, u64 b) { if (!a || !b) return a | b; u64 sh = CTZ64(a | b); a >>= CTZ64(a); do { b >>= CTZ64(b); if (a > b) SWAP(a, b); b -= a; } while (b); return a << sh; } typedef struct { i32 f, s; u32 t; } Bezout32; typedef struct { i64 f, s; u64 t; } Bezout64; PRIVATE Bezout32 bezout32(u32 x, u32 y) { bool swap = x < y; if (swap) SWAP(x, y); if (y == 0) { if (x == 0) return (Bezout32){0, 0, 0}; else if (swap) return (Bezout32){0, 1, x}; else return (Bezout32){1, 0, x}; } i32 s0 = 1, s1 = 0, t0 = 0, t1 = 1; while (true) { u32 q = x / y, r = x % y; if (r == 0) { if (swap) return (Bezout32){t1, s1, y}; else return (Bezout32){s1, t1, y}; } i32 s2 = s0 - (i32)(q)*s1, t2 = t0 - (i32)(q)*t1; x = y, y = r; s0 = s1, s1 = s2, t0 = t1, t1 = t2; } } PRIVATE Bezout64 bezout64(u64 x, u64 y) { bool swap = x < y; if (swap) SWAP(x, y); if (y == 0) { if (x == 0) return (Bezout64){0, 0, 0}; else if (swap) return (Bezout64){0, 1, x}; else return (Bezout64){1, 0, x}; } i64 s0 = 1, s1 = 0, t0 = 0, t1 = 1; while (true) { u64 q = x / y, r = x % y; if (r == 0) { if (swap) return (Bezout64){t1, s1, y}; else return (Bezout64){s1, t1, y}; } i64 s2 = s0 - (i64)(q)*s1, t2 = t0 - (i64)(q)*t1; x = y, y = r; s0 = s1, s1 = s2, t0 = t1, t1 = t2; } } PUBLIC u32 mod_inverse32(u32 x, u32 mod) { assert(gcd32(x, mod) == 1); Bezout32 b = bezout32(x, mod); assert(b.t == 1); return b.f < 0 ? mod + b.f : (u32)b.f; } PUBLIC u64 mod_inverse64(u64 x, u64 mod) { assert(gcd64(x, mod) == 1); Bezout64 b = bezout64(x, mod); assert(b.t == 1); return b.f < 0 ? mod + b.f : (u64)b.f; } PRIVATE u32 N_32, N2_32, NI_32, R1_32, R2_32, R3_32; PUBLIC void Montgomery32(u32 mod) { assert(mod < 1073741824u); N_32 = mod; N2_32 = mod << 1; NI_32 = mod; NI_32 *= 2 - NI_32 * mod; NI_32 *= 2 - NI_32 * mod; NI_32 *= 2 - NI_32 * mod; NI_32 *= 2 - NI_32 * mod; R1_32 = (u32)(i32)-1 % mod + 1; R2_32 = (u64)(i64)-1 % mod + 1; R3_32 = (u32)(((u64)R1_32 * (u64)R2_32) % mod); } PUBLIC u32 get_mod32(void) { return N_32; } PUBLIC u32 get_dmod32(void) { return N2_32; } PUBLIC u32 get_one32(void) { return R1_32; } PRIVATE u32 mr32(u64 A) { u32 y = (u32)(A >> 32) - (u32)(((u64)((u32)A * NI_32) * N_32) >> 32); return (i32)y < 0 ? y + N_32 : y; } PUBLIC u32 To32(u32 a) { return mr32((u64)a * R2_32); } PUBLIC u32 From32(u32 A) { return mr32((u64)A); } PUBLIC u32 Add32(u32 A, u32 B) { A += B - N2_32; A += N2_32 & -(A >> 31); return A; } PUBLIC u32 Sub32(u32 A, u32 B) { A -= B; A += N2_32 & -(A >> 31); return A; } PUBLIC u32 SAdd32(u32 A, u32 B) { A = A >= N_32 ? A % N_32 : A; B = B >= N_32 ? B % N_32 : B; A += B; A -= (A >= N_32 ? N_32 : 0); return A; } PUBLIC u32 SSub32(u32 A, u32 B) { A = A >= N_32 ? A % N_32 : A; B = B >= N_32 ? B % N_32 : B; A += (A < B ? N_32 : 0); A -= B; return A; } PUBLIC u32 Min32(u32 A) { return SSub32(0, A); } PUBLIC u32 Mul32(u32 A, u32 B) { return mr32((u64)A * B); } PUBLIC u32 Square32(u32 A) { return mr32((u64)A * A); } PUBLIC u32 Twice32(u32 A) { return (A <<= 1) >= N_32 ? A - N_32 : A; } PUBLIC u32 Power32(u32 A, size_t k) { return k ? Mul32(Power32(Square32(A), k >> 1), k & 1 ? A : R1_32) : R1_32; } PUBLIC u32 Inverse32(u32 A) { return mr32((u64)R3_32 * mod_inverse32(A, N_32)); } PUBLIC u32 Div32(u32 A, u32 B) { return Mul32(A, Inverse32(B)); } PUBLIC u32 Half32(u32 A) { return (A & 1) ? ((A >> 1) + (N_32 >> 1) + 1) : (A >> 1); } PUBLIC int Equal32(u32 A, u32 B) { return (((A >= N_32) ? (A - N_32) : A) == ((B >= N_32) ? (B - N_32) : B)) ? 1 : 0; } PUBLIC int NotEqual32(u32 A, u32 B) { return (((A >= N_32) ? (A - N_32) : A) != ((B >= N_32) ? (B - N_32) : B)) ? 1 : 0; } PUBLIC u32 In32(void) { u32 c = 0; u32 a = 0; while (c = getchar_unlocked(), c < 48 || c > 57) ; while (47 < c && c < 58) { a = a * 10 + c - 48; c = getchar_unlocked(); } return To32(a); } PUBLIC void Out32(u32 A) { u32 a = From32(A); out_u32(a); } PRIVATE u64 N_64, N2_64, NI_64, R1_64, R2_64, R3_64; PUBLIC void Montgomery64(u64 mod) { assert(mod < 4611686018427387904ull); N_64 = mod; N2_64 = mod << 1; NI_64 = mod; NI_64 *= 2 - NI_64 * mod; NI_64 *= 2 - NI_64 * mod; NI_64 *= 2 - NI_64 * mod; NI_64 *= 2 - NI_64 * mod; NI_64 *= 2 - NI_64 * mod; R1_64 = (u64)(i64)-1 % mod + 1; R2_64 = (u128)(i128)-1 % mod + 1; R3_64 = (u64)(((u128)R1_64 * (u128)R2_64) % mod); } PUBLIC u64 get_mod64(void) { return N_64; } PUBLIC u64 get_dmod64(void) { return N2_64; } PUBLIC u64 get_one64(void) { return R1_64; } PRIVATE u64 mr64(u128 A) { u64 y = (u64)(A >> 64) - (u64)(((u128)((u64)A * NI_64) * N_64) >> 64); return (i64)y < 0 ? y + N_64 : y; } PUBLIC u64 To64(u64 a) { return mr64((u128)a * R2_64); } PUBLIC u64 From64(u64 A) { return mr64((u128)A); } PUBLIC u64 Add64(u64 A, u64 B) { A += B - N2_64; A += N2_64 & -(A >> 63); return A; } PUBLIC u64 Sub64(u64 A, u64 B) { A -= B; A += N2_64 & -(A >> 63); return A; } PUBLIC u64 SAdd64(u64 A, u64 B) { A = A >= N_64 ? A % N_64 : A; B = B >= N_64 ? B % N_64 : B; A += B; A -= (A >= N_64 ? N_64 : 0); return A; } PUBLIC u64 SSub64(u64 A, u64 B) { A = A >= N_64 ? A % N_64 : A; B = B >= N_64 ? B % N_64 : B; A += (A < B ? N_64 : 0); A -= B; return A; } PUBLIC u64 Min64(u64 A) { return SSub64(0, A); } PUBLIC u64 Mul64(u64 A, u64 B) { return mr64((u128)A * B); } PUBLIC u64 Square64(u64 A) { return mr64((u128)A * A); } PUBLIC u64 Twice64(u64 A) { return (A <<= 1) >= N_64 ? A - N_64 : A; } PUBLIC u64 Power64(u64 A, size_t k) { return k ? Mul64(Power64(Square64(A), k >> 1), k & 1 ? A : R1_64) : R1_64; } PUBLIC u64 Inverse64(u64 A) { return mr64((u128)R3_64 * mod_inverse64(A, N_64)); } PUBLIC u64 Div64(u64 A, u64 B) { return Mul64(A, Inverse64(B)); } PUBLIC u64 Half64(u64 A) { return (A & 1) ? ((A >> 1) + (N_64 >> 1) + 1) : (A >> 1); } PUBLIC int Equal64(u64 A, u64 B) { return (((A >= N_64) ? (A - N_64) : A) == ((B >= N_64) ? (B - N_64) : B)) ? 1 : 0; } PUBLIC int NotEqual64(u64 A, u64 B) { return (((A >= N_64) ? (A - N_64) : A) != ((B >= N_64) ? (B - N_64) : B)) ? 1 : 0; } PUBLIC u64 In64() { u64 c = 0; u64 a = 0; while (c = getchar_unlocked(), c < 48 || c > 57) ; while (47 < c && c < 58) { a = a * 10 + c - 48; c = getchar_unlocked(); } return To64(a); } PUBLIC void Out64(u64 A) { u64 a = From64(A); out_u64(a); } PRIVATE u64 m_b64, im_b64; PRIVATE u64 divrem64[2] = {0}; PUBLIC void new_br64(u32 mod) { m_b64 = (u64)mod; im_b64 = (~((u64)0ul)) / (u64)mod; } PUBLIC u32 get_mod_br32(void) { return (u32)m_b64; } PRIVATE void div_rem_br64(u64 lhs) { if (m_b64 == 1) { divrem64[0] = lhs; divrem64[1] = 0; return; } u64 q = (u64)(((u128)lhs * (u128)im_b64) >> 64); u64 r = lhs - q * m_b64; if (m_b64 <= r) { r -= m_b64; q += 1ul; } divrem64[0] = q; divrem64[1] = r; } PUBLIC u32 add_br32(u32 a, u32 b) { a = a >= m_b64 ? a - m_b64 : a; b = b >= m_b64 ? b - m_b64 : b; a += b; a -= (a >= (u32)m_b64 ? (u32)m_b64 : 0); return a; } PUBLIC u32 sub_br32(u32 a, u32 b) { a = a >= m_b64 ? a - m_b64 : a; b = b >= m_b64 ? b - m_b64 : b; a += (a < b ? (u32)m_b64 : 0); a -= b; return a; } PUBLIC u32 min_br32(u32 a) { return sub_br32(0, a); } PUBLIC u32 mul_br32(u32 a, u32 b) { div_rem_br64((u64)a * b); return (u32)divrem64[1]; } PUBLIC u32 square_br32(u32 a) { div_rem_br64((u64)a * a); return (u32)divrem64[1]; } PUBLIC u32 twice_br32(u32 a) { return mul_br32(a, 2); } PUBLIC u32 power_br32(u32 a, size_t k) { return k ? mul_br32(power_br32(square_br32(a), k >> 1), k & 1 ? a : 1) : 1; } PUBLIC u32 inverse_br32(u32 a) { if (gcd32(a, m_b64) != 1) { return 0; } return mod_inverse32(a, m_b64); } PUBLIC u32 div_br32(u32 a, u32 b) { u32 c = inverse_br32(b); if (b == 0) { return (u32)(i32)-1; } return mul_br32(a, inverse_br32(b)); } PUBLIC u32 half_br32(u32 a) { return (a & 1) ? ((a >> 1) + (m_b64 >> 1) + 1) : (a >> 1); } PRIVATE u128 m_b128, im_b128; PRIVATE u128 divrem128[2] = {0}; PUBLIC void new_br128(u64 mod) { m_b128 = (u128)mod; im_b128 = (~((u128)0ull)) / (u128)mod; } PUBLIC u64 get_mod_br64(void) { return (u64)m_b128; } PRIVATE void div_rem_br128(u128 lhs) { if (m_b128 == 1) { divrem128[0] = lhs; divrem128[1] = 0; return; } u128 t = (lhs >> 64) * (im_b128 >> 64); u128 x = ((lhs & 0xffffffffffffffffull) * (im_b128 & 0xffffffffffffffffull)) >> 64; u8 flag; u128 auil = (lhs >> 64) * (im_b128 & 0xffffffffffffffffull); if (auil <= (u128)((i128)(-1L)) - x) flag = 0; else flag = 1; x += auil; t += flag; u128 aliu = (lhs & 0xffffffffffffffffull) * (im_b128 >> 64); if (aliu <= (u128)((i128)(-1L)) - x) flag = 0; else flag = 1; x += aliu; t += flag; u128 q = t + (x >> 64); u128 r = lhs - q * m_b128; if (m_b128 <= r) { r -= m_b128; q += 1; } divrem128[0] = q; divrem128[1] = r; } PUBLIC u64 add_br64(u64 a, u64 b) { a = a >= m_b128 ? a - m_b128 : a; b = b >= m_b128 ? b - m_b128 : b; a += b; a -= (a >= (u64)m_b128 ? (u64)m_b128 : 0); return a; } PUBLIC u64 sub_br64(u64 a, u64 b) { a = a >= m_b128 ? a - m_b128 : a; b = b >= m_b128 ? b - m_b128 : b; a += (a < b ? (u64)m_b128 : 0); a -= b; return a; } PUBLIC u64 min_br64(u64 a) { return sub_br64(0, a); } PUBLIC u64 mul_br64(u64 a, u64 b) { div_rem_br128((u128)a * b); return (u64)divrem128[1]; } PUBLIC u64 square_br64(u64 a) { div_rem_br128((u128)a * a); return (u64)divrem128[1]; } PUBLIC u64 twice_br64(u64 a) { return mul_br64(a, 2); } PUBLIC u64 power_br64(u64 a, size_t k) { return k ? mul_br64(power_br64(square_br64(a), k >> 1), k & 1 ? a : 1) : 1; } PUBLIC u64 inverse_br64(u64 a) { if (gcd64(a, m_b128) != 1) { return 0; } return mod_inverse64(a, m_b128); } PUBLIC u64 div_br64(u64 a, u64 b) { u64 c = inverse_br64(b); if (b == 0) { return (u64)(i64)-1; } return mul_br64(a, inverse_br64(b)); } PUBLIC u64 half_br64(u64 a) { return (a & 1) ? ((a >> 1) + (m_b128 >> 1) + 1) : (a >> 1); } int isqrt(u64 n) { if (n == 0) return 0; u64 a = n; u64 b = 1; u64 c; if ((c = (a >> 32)) != 0) { a = c; b <<= 16; } if ((c = (a >> 16)) != 0) { a = c; b <<= 8; } if ((c = (a >> 8)) != 0) { a = c; b <<= 4; } if ((c = (a >> 4)) != 0) { a = c; b <<= 2; } if ((c = (a >> 2)) != 0) { a = c; b <<= 1; } if (a <= 1) b += b >> 1; else b <<= 1; do { a = b; b = (b + n / b) >> 1; } while (b < a); return (int)a; } enum isqrt_type { Floor = 0, Ceil = 1, Remain = 2, }; static inline u64 floor_ceil_remain_isqrt(u64 x, enum isqrt_type mode) { static u64 x_floor_sqrt; static u64 x_ceil_sqrt; static u64 x_remain_sqrt; if (x == 0) return 0; u32 lz = __builtin_clzll(x); u64 n = 32 - (lz >> 1); u64 s = (lz >> 1) << 1; u64 t = n << 1; u128 a = (u128)x; u128 b = ((u128)1ull << 62) >> s; u128 c = ((u128)1ull << 64) >> s; u128 d = (((u128)1ull << 64) - 1) >> s; u128 e = ((((u128)1ull << 64) - 1) << 65) >> s; for (int _ = 0; _ < n; ++_) { if (a >= b) { a -= b; b = ((b + b) & e) + c + (b & d); } else { b = ((b + b) & e) + (b & d); } a <<= 2; } x_floor_sqrt = b >> t; x_ceil_sqrt = (a >> t) ? 1ull + (b >> t) : (b >> t); x_remain_sqrt = a >> t; if (mode == Floor) { return x_floor_sqrt; } else if (mode == Ceil) { return x_ceil_sqrt; } else { return x_remain_sqrt; } } u64 floor_isqrt(u64 n) { return floor_ceil_remain_isqrt(n, Floor); } u64 ceil_isqrt(u64 n) { return floor_ceil_remain_isqrt(n, Ceil); } u64 remain_isqrt(u64 n) { return floor_ceil_remain_isqrt(n, Remain); } bool is_square(u64 n) { if (n <= 1) return true; if ((0x02030213u >> ((u32)n & 31)) & 1 != 1) return false; const u64 SQTABLE_MOD4095[64] = {0x2001002010213ul, 0x4200001008028001ul, 0x20000010004ul, 0x80200082010ul, 0x1800008200044029ul, 0x120080000010ul, 0x2200000080410400ul, 0x8100041000200800ul, 0x800004000020100ul, 0x402000400082201ul, 0x9004000040ul, 0x800002000880ul, 0x18002000012000ul, 0x801208ul, 0x26100000804010ul, 0x80000080000002ul, 0x108040040101045ul, 0x20c00004000102ul, 0x400000100c0010ul, 0x1300000040208ul, 0x804000020010000ul, 0x1008402002400080ul, 0x201001000200040ul, 0x4402000000806000ul, 0x10402000000ul, 0x1040008001200801ul, 0x4080000000020400ul, 0x10083080000002ul, 0x8220140000040000ul, 0x800084020100000ul, 0x80010400010000ul, 0x1200020108008060ul, 0x180000000ul, 0x400002400000018ul, 0x4241000200ul, 0x100800000000ul, 0x10201008400483ul, 0xc008000208201000ul, 0x800420000100ul, 0x2010002000410ul, 0x28041000000ul, 0x4010080000024ul, 0x400480010010080ul, 0x200040028000008ul, 0x100810084020ul, 0x20c0401000080000ul, 0x1000240000220000ul, 0x4000020800ul, 0x410000000480000ul, 0x8004008000804201ul, 0x806020000104000ul, 0x2080002000211000ul, 0x1001008001000ul, 0x20000010024000ul, 0x480200002040000ul, 0x48200044008000ul, 0x100000000010080ul, 0x80090400042ul, 0x41040200800200ul, 0x4000020100110ul, 0x2000400082200010ul, 0x1008200000000040ul, 0x2004800002ul, 0x2002010000080ul}; size_t p = n % 4095; if ((SQTABLE_MOD4095[p >> 6] >> (p & 63)) & 1 != 1) return false; u64 newton_sqrt; size_t k = 32 - (CLZ64(n - 1) >> 1); u64 s = (u64)(1ul) << k; u64 t = (s + (n >> k)) >> 1; while (t < s) { s = t; t = (s + n / s) >> 1; } newton_sqrt = s; if (newton_sqrt * newton_sqrt != n) return false; return true; } int jacobi_symbol32(i32 a, i32 n) { int j = 1; while (a) { if (a < 0) { a = -a; if ((n & 3) == 3) j = -j; } int s = CTZ32(a); a >>= s; if (((n & 7) == 3 || (n & 7) == 5) && (s & 1)) j = -j; if ((a & n & 3) == 3) j = -j; SWAP(a, n); a %= n; if (a > n / 2) a -= n; } return n == 1 ? j : 0; } int jacobi_symbol64(i64 a, i64 n) { int j = 1; while (a) { if (a < 0) { a = -a; if ((n & 3) == 3) j = -j; } int s = CTZ64(a); a >>= s; if (((n & 7) == 3 || (n & 7) == 5) && (s & 1)) j = -j; if ((a & n & 3) == 3) j = -j; SWAP(a, n); a %= n; if (a > n / 2) a -= n; } return n == 1 ? j : 0; } PRIVATE bool _miller_rabin(u64 n, size_t bases_len, u64 bases[]) { u64 s = CTZ64(n - 1); u64 d = (n - 1) >> s; Montgomery64(n); for (size_t i = 0; i < bases_len; ++i) { if (n <= bases[i]) return true; u64 a = Power64(To64(bases[i]), d); if (a == get_one64()) continue; u64 r = 1; while (a != n - get_one64()) { if (r == s) return false; a = Square64(a); ++r; } } return true; } PRIVATE bool _miller_rabin_br(u64 n) { new_br128(n); u64 s = CTZ64(n - 1); u64 d = (n - 1) >> s; u64 bases[7] = {2ul, 325ul, 9375ul, 28178ul, 450775ul, 9780504ul, 1795265022ul}; for (size_t i = 0; i < 7; ++i) { u64 a = power_br64(bases[i], d); if (a == 1) continue; u64 r = 1; while (a != n - 1) { if (r == s) return false; a = square_br64(a); ++r; } } return true; } PUBLIC bool miller_rabin(u64 n) { if (n < 64ull) return (1ull << n) & 2891462833508853932ull; if (!(n & 1)) return false; if (n < 1073741824ull) { u64 bases[3] = {2ul, 7ul, 61ul}; return _miller_rabin(n, 3, bases); } if (n < 4611686018427387904ull) { u64 bases[7] = {2ul, 325ul, 9375ul, 28178ul, 450775ul, 9780504ul, 1795265022ul}; return _miller_rabin(n, 7, bases); } return _miller_rabin_br(n); } PUBLIC bool baillie_psw(u64 n) { if (n < 64ull) return (1ull << n) & 2891462833508853932ull; if (!(n & 1)) return false; new_br128(n); { u64 d = (n - 1) << CLZ64(n - 1); u64 t = 2ull; for (d <<= 1; d; d <<= 1) { t = square_br64(t); if (d >> 63) t = twice_br64(t); } if (t != 1) { u64 x = LSBit(n - 1); u64 rev = n - 1; for (x >>= 1; t != rev; x >>= 1) { if (x == 0) return false; t = square_br64(t); } } } { i64 D = 5; for (int i = 0; jacobi_symbol64(D, n) != -1 && i < 64; ++i) { if (i == 32 && is_square(n)) { return false; } if (i & 1) D -= 2; else D += 2; D = -D; } u64 Q = (D < 0) ? ((1 - D) / 4 % n) : (n - (D - 1) / 4 % n); u64 u = 1, v = 1, Qn = Q; u64 k = (n + 1) << CLZ64(n + 1); D %= (i64)n; D = (D < 0) ? (D + n) : D; for (k <<= 1; k; k <<= 1) { u = mul_br64(u, v); v = sub_br64(square_br64(v), twice_br64(Qn)); Qn = square_br64(Qn); if (k >> 63) { u64 uu = add_br64(u, v); uu = half_br64(uu); v = half_br64(add_br64(mul_br64(D, u), v)); u = uu; Qn = mul_br64(Qn, Q); } } if (u == 0 || v == 0) return true; u64 x = (n + 1) & ~n; for (x >>= 1; x; x >>= 1) { u = mul_br64(u, v); v = sub_br64(square_br64(v), twice_br64(Qn)); if (v == 0) return true; Qn = square_br64(Qn); } } return false; } int main(int argc, char *argv[]) { int Q = in_i32(); while (Q--) { u64 x = in_u64(); out_u64(x); SP(); putchar_unlocked(baillie_psw(x) ? '1' : '0'); NL(); } return 0; }