結果

問題 No.3505 Sum of Prod of Root
コンテスト
ユーザー gojoxd
提出日時 2026-04-18 19:28:02
言語 C
(gcc 15.2.0)
コンパイル:
gcc-15 -O2 -DONLINE_JUDGE -o a.out _filename_ -lm
実行:
./a.out
結果
AC  
実行時間 299 ms / 2,000 ms
コード長 4,505 bytes
記録
記録タグの例:
初AC ショートコード 純ショートコード 純主流ショートコード 最速実行時間
コンパイル時間 610 ms
コンパイル使用メモリ 45,156 KB
実行使用メモリ 18,048 KB
最終ジャッジ日時 2026-04-18 19:28:23
合計ジャッジ時間 2,816 ms
ジャッジサーバーID
(参考情報)
judge3_1 / judge2_0
このコードへのチャレンジ
(要ログイン)
ファイルパターン 結果
sample AC * 1
other AC * 13
権限があれば一括ダウンロードができます

ソースコード

diff #
raw source code

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <math.h>

#define MOD 998244353ULL

uint64_t INV2, INV4, INV6, INV30;

// Binary exponentiation for modular inverse
uint64_t power(uint64_t base, uint64_t exp) {
    uint64_t res = 1;
    base %= MOD;
    while (exp > 0) {
        if (exp % 2 == 1) res = (res * base) % MOD;
        base = (base * base) % MOD;
        exp /= 2;
    }
    return res;
}

uint64_t inv(uint64_t n) {
    return power(n, MOD - 2);
}

// Precompute inverses to prevent calculating them inside the tight loop
void init_inv() {
    INV2 = inv(2);
    INV4 = inv(4);
    INV6 = inv(6);
    INV30 = inv(30);
}

// O(1) mathematical summation of: Sum_{i=1}^{M} i * floor(sqrt(i))
uint64_t S(uint64_t M) {
    if (M == 0) return 0;
    
    // Accurate 80-bit integer square root protection up to 10^18
    uint64_t X = (uint64_t)sqrtl((long double)M);
    while ((X + 1) * (X + 1) <= M) X++;
    while (X * X > M) X--;
    
    uint64_t sum = 0;
    
    // Compute full perfect-square blocks
    if (X >= 2) {
        uint64_t n = (X - 1) % MOD;
        uint64_t n_sq = (n * n) % MOD;
        
        uint64_t S2 = (n * (n + 1)) % MOD;
        S2 = (S2 * (2 * n + 1)) % MOD;
        S2 = (S2 * INV6) % MOD;
        
        uint64_t S3 = (n * (n + 1)) % MOD;
        S3 = (S3 * S3) % MOD;
        S3 = (S3 * INV4) % MOD;
        
        uint64_t S4 = (n * (n + 1)) % MOD;
        S4 = (S4 * (2 * n + 1)) % MOD;
        uint64_t poly = (3 * n_sq + 3 * n + MOD - 1) % MOD;
        S4 = (S4 * poly) % MOD;
        S4 = (S4 * INV30) % MOD;
        
        uint64_t part1 = (2 * S4) % MOD;
        uint64_t part2 = (3 * S3) % MOD;
        uint64_t part3 = S2;
        
        sum = (part1 + part2 + part3) % MOD;
    }
    
    // Compute the dangling remainder block up to M
    uint64_t count = (M - X * X + 1) % MOD;
    uint64_t sum_i = ((X * X % MOD) + (M % MOD)) % MOD;
    uint64_t term = (count * sum_i) % MOD;
    term = (term * INV2) % MOD;
    term = (term * (X % MOD)) % MOD;
    
    sum = (sum + term) % MOD;
    return sum;
}

// Safely compute x^k, returning UINT64_MAX on overflow
uint64_t safe_pow(uint64_t x, int k) {
    uint64_t p = 1;
    for (int i = 0; i < k; i++) {
        if (__builtin_mul_overflow(p, x, &p)) return UINT64_MAX;
    }
    return p;
}

uint64_t cp[1500000];
int cp_sz = 0;

int cmp(const void *a, const void *b) {
    uint64_t arg1 = *(const uint64_t *)a;
    uint64_t arg2 = *(const uint64_t *)b;
    if (arg1 < arg2) return -1;
    if (arg1 > arg2) return 1;
    return 0;
}

int main() {
    init_inv();
    
    uint64_t N;
    if (scanf("%llu", &N) != 1) return 0;
    
    // Step 1: Pre-generate all distinct transition boundary points
    cp[cp_sz++] = 1;
    cp[cp_sz++] = N + 1;
    
    for (int k = 3; k <= 59; k++) {
        for (uint64_t x = 2; ; x++) {
            uint64_t p = safe_pow(x, k);
            if (p == UINT64_MAX || p > N) break;
            cp[cp_sz++] = p;
        }
    }
    
    qsort(cp, cp_sz, sizeof(uint64_t), cmp);
    
    int unique_sz = 0;
    for (int i = 0; i < cp_sz; i++) {
        if (i == 0 || cp[i] != cp[i-1]) {
            cp[unique_sz++] = cp[i];
        }
    }
    cp_sz = unique_sz;
    
    // Step 2: Track bounds to bypass slow loop checks
    uint64_t v[60];
    uint64_t next_pow[60];
    for (int k = 3; k <= 59; k++) {
        v[k] = 1;
        next_pow[k] = safe_pow(2, k); // Next trigger point
    }
    
    uint64_t ans = 0;
    uint64_t prev_S = S(cp[0] - 1); // Caching initial state S(0)
    
    for (int j = 0; j < cp_sz - 1; j++) {
        uint64_t cur = cp[j];
        uint64_t nxt = cp[j+1];
        
        // Fast-forward tracking the floor evaluations only when bounds are crossed
        for (int k = 3; k <= 59; k++) {
            while (cur >= next_pow[k]) {
                v[k]++;
                next_pow[k] = safe_pow(v[k] + 1, k);
            }
        }
        
        uint64_t prod = 1;
        for (int k = 3; k <= 59; k++) {
            if (v[k] == 1) break; // Break early! Floor roots decline rapidly.
            prod = (prod * (v[k] % MOD)) % MOD;
        }
        
        // Calculate the chunk sum using the cached S value from the previous loop
        uint64_t curr_S = S(nxt - 1);
        uint64_t sum_val = (curr_S + MOD - prev_S) % MOD;
        
        uint64_t term = (prod * sum_val) % MOD;
        ans = (ans + term) % MOD;
        
        prev_S = curr_S;
    }
    
    printf("%llu\n", ans);
    return 0;
}
0