#1084056 (C++17) No.1496 不思議な数え上げ

提出ソース
結果

問題	No.1496 不思議な数え上げ
コンテスト
ユーザー	qwewe
提出日時	2025-05-14 13:16:34
言語	C++17 (gcc 13.3.0 + boost 1.87.0)
結果	TLE
実行時間	-
コード長	10,226 bytes
コンパイル時間	1,260 ms
コンパイル使用メモリ	106,468 KB
実行使用メモリ	33,836 KB
最終ジャッジ日時	2025-05-14 13:18:09
合計ジャッジ時間	7,724 ms
ジャッジサーバーID （参考情報）	judge2 / judge3
このコードへのチャレンジ
（要ログイン）
ファイルパターン	結果
sample	AC * 3
other	AC * 3 TLE * 1 -- * 35
権限があれば一括ダウンロードができます
ソースコード

raw source code
#include <iostream>
#include <vector>
#include <numeric>
#include <algorithm>
#include <cmath>

// Using pragmas or compiler flags to increase stack size might be necessary for large N due to deep recursion.
// Example for g++: Compile with -Wl,--stack,268435456 (sets stack size to 256MB)
// Example for MSVC: #pragma comment(linker, "/STACK:268435456")

using namespace std;

// Use long long for sums, counts, and threshold values as they can exceed 32-bit integer limits.
typedef long long ll;

// Constants for array sizes and logarithm calculations.
const int MAXN = 200005; // Maximum N + a small buffer
const int LOGN = 18; // Sufficient for N up to 2^18 > 200005. ceil(log2(MAXN))

int N; // Size of the permutation.

// Use vectors with size N+1 for 1-based indexing compatibility with problem statement.
vector<ll> P; // Stores the permutation P_1...P_N. Using ll is safe, though int might suffice for P values themselves.
vector<ll> A; // Stores the threshold values A_1...A_N. Needs ll as values can be large.
vector<ll> S; // Stores prefix sums S_0...S_N. S[k] = sum(P_1...P_k). Needs ll.
vector<ll> Ans; // Stores the final counts Ans_1...Ans_N. Needs ll as counts can be large.

// Sparse table for Range Minimum Query (RMQ). 
// st[k][i] stores the index of the minimum element in the range P[i...i + 2^k - 1].
vector<vector<int>> st;
// Precomputed floor(log base 2) values. log_table[x] = floor(log2(x)).
vector<int> log_table;

// Precomputes floor(log base 2) values for i from 1 to size.
// Used for O(1) calculation of k in RMQ query.
// Time complexity: O(size)
void compute_log_table(int size) {
    log_table.resize(size + 1);
    // log2(1) = 0. Start loop from 2.
    if (size >= 1) log_table[1] = 0; 
    for (int i = 2; i <= size; ++i) {
        // Basic property of logarithms: log2(i) = log2(i/2) + 1. Integer division handles floor.
        log_table[i] = log_table[i / 2] + 1;
    }
}

// Builds the sparse table for RMQ using dynamic programming.
// Time complexity: O(N log N)
void build_sparse_table(int size) {
    // Initialize sparse table dimensions. LOGN rows, size+1 columns.
    st.assign(LOGN, vector<int>(size + 1));
    // Base case: ranges of length 1 (2^0). The minimum element is the element itself.
    for (int i = 1; i <= size; ++i) {
        st[0][i] = i; // Store the index of the minimum element.
    }
    // Build table for ranges of length 2^k based on ranges of length 2^(k-1).
    for (int k = 1; k < LOGN; ++k) {
        // Iterate through all possible starting positions i for ranges of length 2^k.
        // The range is [i, i + 2^k - 1]. Ensure the range endpoint does not exceed N.
        // Maximum starting index i is size - (1 << k) + 1.
        for (int i = 1; i <= size - (1 << k) + 1; ++i) {
            // The range [i, i + 2^k - 1] is composed of two overlapping ranges of length 2^(k-1):
            // First half: [i, i + 2^(k-1) - 1]
            // Second half: [i + 2^(k-1), i + 2^k - 1]
            int idx1 = st[k - 1][i]; // Minimum index in the first half.
            int idx2 = st[k - 1][i + (1 << (k - 1))]; // Minimum index in the second half.
            // Compare the values P[idx1] and P[idx2] to find the minimum index for the combined range.
            // Tie-breaking rule: If P[idx1] == P[idx2], prefer the smaller index idx1. This is consistent.
            if (P[idx1] <= P[idx2]) { 
                st[k][i] = idx1;
            } else {
                st[k][i] = idx2;
            }
        }
    }
}

// Queries the sparse table for the index of the minimum element in the range P[L...R].
// Time complexity: O(1)
int query_rmq(int L, int R) {
     // Handle edge cases: empty or invalid range. Though valid calls shouldn't trigger this.
     if (L > R) return 0; 
     // Base case: range with a single element.
     if (L == R) return L; 
    // Calculate k = floor(log2(length of range)). Length is R - L + 1. Use precomputed log table.
    int k = log_table[R - L + 1];
    // The range [L, R] is covered by two ranges of length 2^k that might overlap:
    // Range 1: [L, L + 2^k - 1]
    // Range 2: [R - 2^k + 1, R]
    int idx1 = st[k][L]; // Minimum index in the first range.
    int idx2 = st[k][R - (1 << k) + 1]; // Minimum index in the second range.
    // Compare the values at these indices to find the overall minimum index for the range [L, R].
     if (P[idx1] <= P[idx2]) { // Apply consistent tie-breaking rule.
        return idx1;
    } else {
        return idx2;
    }
}

// Recursive function implementing the divide and conquer strategy.
// Processes the range [start, end], finds the minimum element P[k] = i,
// counts valid subarrays crossing k, and recursively calls for left/right parts.
// Time complexity analysis: Each element is processed when it's the minimum in a range.
// The counting part takes O(M log M) where M is the size of the larger half.
// Summing over all recursive calls gives O(N log^2 N).
void solve(int start, int end) {
    if (start > end) { // Base case: If the range is empty, do nothing.
        return;
    }
    
    // Find the index k of the minimum element P[k] within the current range [start, end].
    int k = query_rmq(start, end); 
    // The value of this minimum element is i = P[k].
    ll i = P[k]; 
    
    // Calculate the lengths of the left part [start...k] and right part [k...end].
    // Note that k itself is included in both conceptual parts for the purpose of defining subarrays crossing k.
    int lenL = k - start + 1; // Number of elements from start to k, inclusive.
    int lenR = end - k + 1;   // Number of elements from k to end, inclusive.
    
    // Optimization: Iterate over the smaller half and perform queries on the data from the larger half.
    // This is known as the "smaller to larger" trick or similar to centroid decomposition logic.
    if (lenL <= lenR) { // Case 1: Left part [start...k] is smaller or equal in size.
        // Collect prefix sums S[R] for R in the right part [k...end].
        vector<ll> SR_values; 
        SR_values.reserve(lenR); // Optimize vector capacity.
        for (int R_idx = k; R_idx <= end; ++R_idx) {
            SR_values.push_back(S[R_idx]);
        }
        // Sort these values to enable efficient binary search (using std::upper_bound).
        sort(SR_values.begin(), SR_values.end());
        
        ll current_count = 0; // Accumulator for the count of valid subarrays with minimum P[k]=i crossing k.
        // Iterate through all possible start indices L_idx from start to k.
        for (int L_idx = start; L_idx <= k; ++L_idx) {
            // A subarray P[L_idx...R_idx] must satisfy: sum <= A[i].
            // Sum = S[R_idx] - S[L_idx - 1]. So, S[R_idx] - S[L_idx - 1] <= A[i].
            // Rearranging for querying S[R]: S[R_idx] <= A[i] + S[L_idx - 1].
            ll target = A[i] + S[L_idx - 1];
            // Use upper_bound to find the first element strictly greater than target.
            // The number of elements <= target is the distance from the beginning of the sorted vector.
            auto it = upper_bound(SR_values.begin(), SR_values.end(), target);
            current_count += (it - SR_values.begin());
        }
        // Add the count computed for this specific minimum value i=P[k] to the global answer array.
        Ans[i] += current_count;
        
    } else { // Case 2: Right part [k...end] is smaller (lenR < lenL).
        // Collect prefix sums S[L-1] for L in the left part [start...k].
        vector<ll> SL_minus_1_values; 
        SL_minus_1_values.reserve(lenL); // Optimize capacity.
        for (int L_idx = start; L_idx <= k; ++L_idx) {
            SL_minus_1_values.push_back(S[L_idx - 1]);
        }
         // Sort these values for binary search (using std::lower_bound).
        sort(SL_minus_1_values.begin(), SL_minus_1_values.end());
        
        ll current_count = 0; // Accumulator.
         // Iterate through all possible end indices R_idx from k to end.
        for (int R_idx = k; R_idx <= end; ++R_idx) {
             // Condition: S[R_idx] - S[L_idx - 1] <= A[i].
            // Rearranging for querying S[L-1]: S[L_idx - 1] >= S[R_idx] - A[i].
            ll target = S[R_idx] - A[i];
            // Use lower_bound to find the first element >= target.
            // The number of elements >= target is the count from this iterator to the end of the vector.
            auto it = lower_bound(SL_minus_1_values.begin(), SL_minus_1_values.end(), target);
            current_count += (SL_minus_1_values.end() - it);
        }
         // Add the count computed to the global answer array.
         Ans[i] += current_count;
    }
    
    // Recursively call 'solve' for the subproblems corresponding to ranges strictly to the left and right of k.
    // Subarrays fully contained within these ranges will be processed by these recursive calls.
    solve(start, k - 1); // Process the left subproblem.
    solve(k + 1, end);   // Process the right subproblem.
}


int main() {
    // Use fast I/O operations.
    ios_base::sync_with_stdio(false);
    cin.tie(NULL);

    // Read input N: the length of the permutation.
    cin >> N;
    
    // Resize vectors based on N. Use N+1 elements for 1-based indexing.
    P.resize(N + 1);
    A.resize(N + 1);
    S.resize(N + 1);
    Ans.resize(N + 1, 0); // Initialize answer counts to 0.

    // Read the permutation P_1...P_N.
    for (int i = 1; i <= N; ++i) {
        cin >> P[i];
    }
    // Read the threshold values A_1...A_N.
    for (int i = 1; i <= N; ++i) {
        cin >> A[i];
    }
    
    // Compute prefix sums S. S[k] = P_1 + ... + P_k. Set S[0] = 0.
    S[0] = 0;
    for (int i = 1; i <= N; ++i) {
        S[i] = S[i - 1] + P[i];
    }

    // Precompute necessary structures: log table and sparse table for RMQ.
    compute_log_table(N); // Pass N to size log_table correctly.
    build_sparse_table(N); // Pass N for correct bounds in sparse table construction.

    // Initiate the divide and conquer process starting with the full range [1, N].
    solve(1, N);

    // Output the final counts for each i from 1 to N, each on a new line.
    for (int i = 1; i <= N; ++i) {
        cout << Ans[i] << "\n";
    }

    return 0;
}
yukicoder

結果

ソースコード