#include #include #include #include #define MOD 998244353ULL uint64_t INV2, INV4, INV6, INV30; // Binary exponentiation for modular inverse uint64_t power(uint64_t base, uint64_t exp) { uint64_t res = 1; base %= MOD; while (exp > 0) { if (exp % 2 == 1) res = (res * base) % MOD; base = (base * base) % MOD; exp /= 2; } return res; } uint64_t inv(uint64_t n) { return power(n, MOD - 2); } // Precompute inverses to prevent calculating them inside the tight loop void init_inv() { INV2 = inv(2); INV4 = inv(4); INV6 = inv(6); INV30 = inv(30); } // O(1) mathematical summation of: Sum_{i=1}^{M} i * floor(sqrt(i)) uint64_t S(uint64_t M) { if (M == 0) return 0; // Accurate 80-bit integer square root protection up to 10^18 uint64_t X = (uint64_t)sqrtl((long double)M); while ((X + 1) * (X + 1) <= M) X++; while (X * X > M) X--; uint64_t sum = 0; // Compute full perfect-square blocks if (X >= 2) { uint64_t n = (X - 1) % MOD; uint64_t n_sq = (n * n) % MOD; uint64_t S2 = (n * (n + 1)) % MOD; S2 = (S2 * (2 * n + 1)) % MOD; S2 = (S2 * INV6) % MOD; uint64_t S3 = (n * (n + 1)) % MOD; S3 = (S3 * S3) % MOD; S3 = (S3 * INV4) % MOD; uint64_t S4 = (n * (n + 1)) % MOD; S4 = (S4 * (2 * n + 1)) % MOD; uint64_t poly = (3 * n_sq + 3 * n + MOD - 1) % MOD; S4 = (S4 * poly) % MOD; S4 = (S4 * INV30) % MOD; uint64_t part1 = (2 * S4) % MOD; uint64_t part2 = (3 * S3) % MOD; uint64_t part3 = S2; sum = (part1 + part2 + part3) % MOD; } // Compute the dangling remainder block up to M uint64_t count = (M - X * X + 1) % MOD; uint64_t sum_i = ((X * X % MOD) + (M % MOD)) % MOD; uint64_t term = (count * sum_i) % MOD; term = (term * INV2) % MOD; term = (term * (X % MOD)) % MOD; sum = (sum + term) % MOD; return sum; } // Safely compute x^k, returning UINT64_MAX on overflow uint64_t safe_pow(uint64_t x, int k) { uint64_t p = 1; for (int i = 0; i < k; i++) { if (__builtin_mul_overflow(p, x, &p)) return UINT64_MAX; } return p; } uint64_t cp[1500000]; int cp_sz = 0; int cmp(const void *a, const void *b) { uint64_t arg1 = *(const uint64_t *)a; uint64_t arg2 = *(const uint64_t *)b; if (arg1 < arg2) return -1; if (arg1 > arg2) return 1; return 0; } int main() { init_inv(); uint64_t N; if (scanf("%llu", &N) != 1) return 0; // Step 1: Pre-generate all distinct transition boundary points cp[cp_sz++] = 1; cp[cp_sz++] = N + 1; for (int k = 3; k <= 59; k++) { for (uint64_t x = 2; ; x++) { uint64_t p = safe_pow(x, k); if (p == UINT64_MAX || p > N) break; cp[cp_sz++] = p; } } qsort(cp, cp_sz, sizeof(uint64_t), cmp); int unique_sz = 0; for (int i = 0; i < cp_sz; i++) { if (i == 0 || cp[i] != cp[i-1]) { cp[unique_sz++] = cp[i]; } } cp_sz = unique_sz; // Step 2: Track bounds to bypass slow loop checks uint64_t v[60]; uint64_t next_pow[60]; for (int k = 3; k <= 59; k++) { v[k] = 1; next_pow[k] = safe_pow(2, k); // Next trigger point } uint64_t ans = 0; uint64_t prev_S = S(cp[0] - 1); // Caching initial state S(0) for (int j = 0; j < cp_sz - 1; j++) { uint64_t cur = cp[j]; uint64_t nxt = cp[j+1]; // Fast-forward tracking the floor evaluations only when bounds are crossed for (int k = 3; k <= 59; k++) { while (cur >= next_pow[k]) { v[k]++; next_pow[k] = safe_pow(v[k] + 1, k); } } uint64_t prod = 1; for (int k = 3; k <= 59; k++) { if (v[k] == 1) break; // Break early! Floor roots decline rapidly. prod = (prod * (v[k] % MOD)) % MOD; } // Calculate the chunk sum using the cached S value from the previous loop uint64_t curr_S = S(nxt - 1); uint64_t sum_val = (curr_S + MOD - prev_S) % MOD; uint64_t term = (prod * sum_val) % MOD; ans = (ans + term) % MOD; prev_S = curr_S; } printf("%llu\n", ans); return 0; }