結果

問題 No.1145 Sums of Powers
ユーザー qwewe
提出日時 2025-05-14 13:04:58
言語 C++17
(gcc 13.3.0 + boost 1.87.0)
結果
AC  
実行時間 611 ms / 2,000 ms
コード長 10,458 bytes
コンパイル時間 1,173 ms
コンパイル使用メモリ 88,404 KB
実行使用メモリ 18,200 KB
最終ジャッジ日時 2025-05-14 13:06:37
合計ジャッジ時間 3,745 ms
ジャッジサーバーID
(参考情報)
judge3 / judge2
このコードへのチャレンジ
(要ログイン)
ファイルパターン 結果
other AC * 6
権限があれば一括ダウンロードができます

ソースコード

diff #

#include <iostream>
#include <vector>
#include <numeric>
#include <algorithm> // for std::min, std::swap

// Define the modulus
const int MOD = 998244353;
// Define a primitive root for NTT
const int PRIMITIVE_ROOT = 3;

// Modular exponentiation: (base^exp) % MOD
long long power(long long base, long long exp) {
    long long res = 1;
    base %= MOD;
    while (exp > 0) {
        if (exp % 2 == 1) res = (res * base) % MOD;
        base = (base * base) % MOD;
        exp /= 2;
    }
    return res;
}

// Modular inverse: n^(MOD-2) % MOD using Fermat's Little Theorem
long long modInverse(long long n) {
    // Assumes n is non-zero and MOD is prime
    return power(n, MOD - 2);
}

// Number Theoretic Transform (NTT)
// Performs NTT or inverse NTT in place on vector 'a'
void ntt(std::vector<long long>& a, bool invert) {
    int n = a.size();
    if (n == 1) return; // Base case for recursion/smallest unit

    // Bit-reversal permutation: rearranges elements for iterative FFT
    // This puts elements in the order required by the butterfly operations
    for (int i = 1, j = 0; i < n; i++) {
        int bit = n >> 1;
        for (; j & bit; bit >>= 1)
            j ^= bit;
        j ^= bit;
        if (i < j)
            std::swap(a[i], a[j]);
    }
    
    // Iterative Cooley-Tukey FFT algorithm
    for (int len = 2; len <= n; len <<= 1) { // len is the current size of DFT blocks
        // Calculate the principal len-th root of unity
        long long wlen_pow = (MOD - 1) / len;
        if (invert) wlen_pow = MOD - 1 - wlen_pow; // For inverse NTT, use inverse root
        long long wlen = power(PRIMITIVE_ROOT, wlen_pow);
        
        // Perform butterfly operations for each block
        for (int i = 0; i < n; i += len) {
            long long w = 1; // Current power of the root of unity
            for (int j = 0; j < len / 2; j++) {
                // Apply butterfly operation:
                // u = a[i+j], v = w * a[i+j+len/2]
                // a[i+j] = u + v
                // a[i+j+len/2] = u - v
                long long u = a[i + j];
                long long v = (w * a[i + j + len / 2]) % MOD;
                a[i + j] = (u + v) % MOD;
                a[i + j + len / 2] = (u - v + MOD) % MOD; // Add MOD to handle potential negative result
                w = (w * wlen) % MOD; // Update root of unity power for next iteration
            }
        }
    }

    // If performing inverse NTT, scale the result by 1/n
    if (invert) {
        long long n_inv = modInverse(n);
        for (long long& x : a) {
            x = (x * n_inv) % MOD;
        }
    }
}

// Polynomial multiplication using NTT
// Multiplies polynomials 'a' and 'b', stores result in 'res'
void multiply(const std::vector<long long>& a, const std::vector<long long>& b, std::vector<long long>& res) {
    // Handle trivial cases like multiplication by polynomial 1 or 0
     if ((a.size() == 1 && a[0] == 0) || (b.size() == 1 && b[0] == 0)) { res = {0}; return; } // Multiply by 0
     if (a.size() == 1 && a[0] == 1) { res = b; return; } // Multiply by 1
     if (b.size() == 1 && b[0] == 1) { res = a; return; }
     if (a.empty() || b.empty()) { // Based on problem context, empty should mean polynomial 1.
         if (a.empty()) { res = b; return; } else { res = a; return; }
     }


    std::vector<long long> fa(a.begin(), a.end()), fb(b.begin(), b.end());
    int n = 1; // NTT length (power of 2)
    // Calculate the required size for convolution result
    int target_size = a.size() + b.size() - 1; 
    if (target_size <= 0) target_size = 1; // Ensure minimum size 1 for constant polynomials
    // Find the smallest power of 2 greater than or equal to target_size
    while (n < target_size) n <<= 1; 
    
    // Resize polynomials to NTT length, padding with zeros
    fa.resize(n);
    fb.resize(n);

    // Transform polynomials to frequency domain
    ntt(fa, false); 
    ntt(fb, false); 

    // Perform pointwise multiplication in frequency domain
    res.resize(n);
    for (int i = 0; i < n; i++) {
        res[i] = (fa[i] * fb[i]) % MOD;
    }

    // Transform result back to coefficient domain
    ntt(res, true); 
    
    // Trim result to the actual polynomial degree
    res.resize(target_size);
}

// Computes the product of linear factors (1 - A_i x) for A[l..r] using divide and conquer
// Returns the resulting polynomial P(x)
std::vector<long long> compute_product(const std::vector<int>& A, int l, int r) {
    // Base case: empty range, product is polynomial 1
    if (l > r) {
       return {1}; 
    }
    // Base case: single element range, product is (1 - A[l] x)
    if (l == r) {
        long long val = A[l];
        // Compute -A[l] mod MOD. Correctly handles A[l]=0.
        val = (MOD - val % MOD) % MOD; 
        if (val == MOD) val = 0; // Ensure -0 % MOD = 0
        return {1, val}; // Polynomial 1 + (-A[l])x
    }
    
    // Recursive step: split range, compute products for sub-ranges, multiply results
    int mid = l + (r - l) / 2;
    std::vector<long long> P_L = compute_product(A, l, mid);
    std::vector<long long> P_R = compute_product(A, mid + 1, r);
    
    std::vector<long long> result;
    multiply(P_L, P_R, result); // Multiply sub-results using NTT
    
    return result;
}

// Computes polynomial inverse InvP = P^{-1} mod x^M using Newton's method
// P is the input polynomial, M is the required precision
// Result is stored in InvP
void poly_inverse(const std::vector<long long>& P, int M, std::vector<long long>& InvP) {
    // Handle edge case M=0
    if (M == 0) { InvP.clear(); return; } 
    
    // Initial guess for inverse: InvP = P[0]^{-1} mod x^1. P[0] = 1 for this problem.
    InvP.assign(1, modInverse(P[0])); 
    int current_M = 1; // Current precision level (mod x^{current_M})

    // Iteratively double the precision using Newton's iteration
    while (current_M < M) {
        current_M *= 2; // Double precision target
        // Extract the relevant segment of P mod x^{current_M}
        std::vector<long long> P_segment(P.begin(), P.begin() + std::min((int)P.size(), current_M));
        
        // Keep a copy of the current inverse estimate (mod x^{current_M/2})
        std::vector<long long> InvP_copy = InvP; 
        // Extend the copy to size current_M for calculations
        InvP_copy.resize(current_M, 0); 

        // Compute P * InvP^2 mod x^{current_M}
        std::vector<long long> InvP_squared;
        multiply(InvP, InvP, InvP_squared); // Computes InvP^2 
        // Truncate InvP_squared to mod x^{current_M}
        if ((int)InvP_squared.size() > current_M) InvP_squared.resize(current_M); 
        
        std::vector<long long> P_InvP_squared;
        multiply(P_segment, InvP_squared, P_InvP_squared); // Computes P * InvP^2
        // Truncate P_InvP_squared to mod x^{current_M}
        if ((int)P_InvP_squared.size() > current_M) P_InvP_squared.resize(current_M);

        // Update InvP using Newton's iteration formula: InvP_{new} = 2*InvP_{old} - P*InvP_{old}^2 mod x^{current_M}
        InvP.resize(current_M); // Resize InvP to store the new estimate
        for (int i = 0; i < current_M; ++i) {
            long long two_InvP_i = (2 * InvP_copy[i]) % MOD; // 2 * InvP_{old} term
            long long term_i = (i < P_InvP_squared.size()) ? P_InvP_squared[i] : 0; // P*InvP_{old}^2 term
            InvP[i] = (two_InvP_i - term_i + MOD) % MOD; // Calculate new coefficient
        }
    }
    // Final result needs to be truncated to the requested precision M
    InvP.resize(M); 
}


int main() {
    // Faster I/O operations
    std::ios_base::sync_with_stdio(false);
    std::cin.tie(NULL);

    int N_raw, M; // N_raw is the initial length from input
    std::cin >> N_raw >> M;

    std::vector<int> A; // Store non-zero elements of the input array
    A.reserve(N_raw);
    for (int i = 0; i < N_raw; ++i) {
        int val;
        std::cin >> val;
        if (val != 0) { // Filter out zeros, as they don't contribute to sums of powers
            A.push_back(val);
        }
    }
    
    int N = A.size(); // N is the effective number of non-zero elements

    // Handle edge case M=0: No sums needed, output empty line.
    if (M == 0) { 
       std::cout << std::endl;
       return 0;
    }

    // Handle edge case N=0: If all A_i were 0, all sums S_K are 0.
    if (N == 0) { 
        for (int k = 1; k <= M; ++k) {
            std::cout << 0 << (k == M ? "" : " ");
        }
        std::cout << std::endl;
        return 0;
    }
    
    // Step 1: Compute the polynomial P(x) = product_{i=1..N} (1 - A_i x)
    // P(x) will have degree N, represented by a vector of size N+1. P[k] is coefficient of x^k.
    std::vector<long long> P = compute_product(A, 0, N - 1); 
    
    // Step 2: Compute the derivative Q(x) = P'(x)
    // Q(x) will have degree N-1, represented by a vector of size N. Q[k] is coefficient of x^k.
    std::vector<long long> Q(N); 
    for (int k = 1; k <= N; ++k) {
        // The coefficient of x^k in P(x) is P[k]. Its derivative contributes k*P[k]*x^{k-1}.
        // This is the coefficient Q[k-1] of x^{k-1} in Q(x).
        if (k < P.size()) // Check index bounds for safety
            Q[k - 1] = (k * P[k]) % MOD;
    }
    
    // Compute -Q(x) needed for the formula S(x) = -Q(x)/P(x)
    std::vector<long long> negQ(N);
    for(int i=0; i<N; ++i) {
        negQ[i] = (MOD - Q[i]) % MOD; // Calculate -Q[i] mod MOD
    }

    // Step 3: Compute the polynomial inverse InvP(x) = P(x)^{-1} mod x^M
    // InvP will have size M, representing the inverse polynomial truncated to degree M-1.
    std::vector<long long> InvP;
    poly_inverse(P, M, InvP); 

    // Step 4: Compute Temp(x) = -Q(x) * InvP(x) mod x^M
    // Temp(x) corresponds to the generating function S_1 + S_2 x + ... + S_M x^{M-1}
    std::vector<long long> Temp;
    multiply(negQ, InvP, Temp); // Polynomial multiplication
    // Resize Temp to M, padding with zeros if necessary. This ensures we have M coefficients.
    Temp.resize(M, 0); 

    // Step 5: Extract and print the required sums S_1, ..., S_M
    // The coefficient of x^{k-1} in Temp(x) is S_k.
    for (int k = 1; k <= M; ++k) {
        // Temp[k-1] holds the value of S_k. Access safely.
        long long Sk = (k - 1 < Temp.size()) ? Temp[k - 1] : 0; 
        std::cout << Sk << (k == M ? "" : " "); // Print with spaces, no trailing space
    }
    std::cout << std::endl; // Final newline

    return 0;
}
0