#1108074 (C++23) No.5022 XOR Printer

提出ソース
結果

問題	No.5022 XOR Printer
コンテスト
ユーザー	Ang1077
提出日時	2025-07-26 16:36:58
言語	C++23 (gcc 13.3.0 + boost 1.87.0)
結果	AC
実行時間	1,983 ms / 2,000 ms
コード長	13,491 bytes
コンパイル時間	4,982 ms
コンパイル使用メモリ	343,360 KB
実行使用メモリ	7,720 KB
スコア	5,193,786,499
最終ジャッジ日時	2025-07-26 16:39:07
合計ジャッジ時間	107,122 ms
ジャッジサーバーID （参考情報）	judge4 / judge5
	純コード判定しない問題か言語
このコードへのチャレンジ
（要ログイン）
ファイルパターン	結果
other	AC * 50
権限があれば一括ダウンロードができます
ソースコード

raw source code
// #pragma GCC optimize
// "-O3,omit-frame-pointer,inline,unroll-all-loops,fast-math" #pragma GCC target
// "tune=native"
#include <bits/stdc++.h>
#include <ext/pb_ds/assoc_container.hpp>

using namespace std;

// ==== Typedefs / utils ====
using u8 = uint8_t;
using u32 = uint32_t;
using u64 = uint64_t;
using i64 = long long;

struct RNG {
    uint64_t s[2];
    RNG() {
        reset((uint64_t)chrono::steady_clock::now().time_since_epoch().count());
    }
    static inline uint64_t rotl(uint64_t x, int k) {
        return (x << k) | (x >> (64 - k));
    }
    void reset(uint64_t seed) {
        auto sm = [s = seed]() mutable {
            s += 0x9E3779B97f4A7C15ull;
            u64 z = s;
            z = (z ^ (z >> 30)) * 0xBF58476D1CE4E5B9ull;
            z = (z ^ (z >> 27)) * 0x94D049BB133111EBull;
            return z ^ (z >> 31);
        };
        s[0] = sm();
        s[1] = sm();
    }
    uint64_t next() {
        uint64_t s0 = s[0], s1 = s[1];
        uint64_t r = rotl(s0 * 5, 7) * 9;
        s1 ^= s0;
        s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16);
        s[1] = rotl(s1, 37);
        return r;
    }
    double randomDouble() { return (double)(uint32_t)next() / 4294967296.0; }
    uint32_t random32(uint32_t r) {
        return (uint64_t)(uint32_t)next() * r >> 32;
    }
} rng;

struct timer {
    chrono::high_resolution_clock::time_point t0;
    timer() { t0 = chrono::high_resolution_clock::now(); }
    float elapsed() const {
        return chrono::duration<float>(chrono::high_resolution_clock::now() -
                                       t0)
            .count();
    }
};

struct EvalResult {
    u64 sum = 0;
    vector<char> ops;
    vector<u32> Aflat; // final matrix (flattened by row-major)
};

// ==== Simulator (fast eval + emitting version) ====
struct Simulator {
    int N, T, NN;       // N=10, NN=100
    vector<u32> A0;     // flattened A
    vector<int> xr, yc; // coordinates per id
    vector<int> dist;   // dist[a*NN + b] = Manhattan
    // MSB-based pruning for 1C
    array<vector<int>, 20>
        msb_bucket; // indices of cells with given MSB (0..19)
    array<array<vector<int>, 20>, 100>
        near_msb; // near_msb[target][bit] = top-L closest ids

    // working state (per simulation)
    vector<u32> A;       // working grid (flattened)
    bitset<100> written; // whether W already done
    int cur;             // current id
    u32 s;               // register
    int used;            // used turns

    inline int DID(int a, int b) const { return dist[a * NN + b]; }

    void init_precompute(int N_, int T_, const vector<vector<u32>> &A2d,
                         int L = 12) {
        N = N_;
        T = T_;
        NN = N * N;
        A0.resize(NN);
        xr.resize(NN);
        yc.resize(NN);
        int id = 0;
        for (int i = 0; i < N; i++)
            for (int j = 0; j < N; j++) {
                A0[id] = A2d[i][j];
                xr[id] = i;
                yc[id] = j;
                ++id;
            }
        // distances
        dist.assign(NN * NN, 0);
        for (int a = 0; a < NN; a++)
            for (int b = 0; b < NN; b++)
                dist[a * NN + b] = abs(xr[a] - xr[b]) + abs(yc[a] - yc[b]);
        // msb buckets
        for (int b = 0; b < 20; b++)
            msb_bucket[b].clear();
        for (int i2 = 0; i2 < NN; i2++) {
            u32 v = A0[i2];
            if (v == 0)
                continue;
            int b = 31 - __builtin_clz(v);
            if (0 <= b && b < 20)
                msb_bucket[b].push_back(i2);
        }
        // near_msb precompute
        for (int t = 0; t < NN; t++) {
            for (int b = 0; b < 20; b++) {
                auto &src = msb_bucket[b];
                auto cmp = [&](int p, int q) { return DID(p, t) < DID(q, t); };
                if (src.empty()) {
                    near_msb[t][b].clear();
                    continue;
                }
                vector<int> tmp = src;
                if ((int)tmp.size() > L) {
                    nth_element(tmp.begin(), tmp.begin() + L, tmp.end(), cmp);
                    tmp.resize(L);
                    sort(tmp.begin(), tmp.end(), cmp);
                } else {
                    sort(tmp.begin(), tmp.end(), cmp);
                }
                near_msb[t][b] = move(tmp);
            }
        }
    }

    // candidate action for a target id, using current (cur, s, used)
    struct Cand {
        long long val;
        int cost;
        int mode;
        int pr;
    }; // mode: 0=0C, 1=1C, -1=none
    inline Cand best_action_for_target(int t) const {
        Cand best{LLONG_MIN, INT_MAX, -1, -1};
        int rem = T - used;

        // 0C
        {
            int c0 = DID(cur, t) + 1;
            if (c0 <= rem) {
                u32 nv = (A[t] ^ s);
                if ((long long)nv > (long long)A[t]) {
                    best = {(long long)nv, c0, 0, -1};
                }
            }
        }

        // 1C: focus on first highest zero-bit of base
        u32 base = A[t] ^ s;
        int k = -1;
        for (int b = 19; b >= 0; --b) {
            if (((base >> b) & 1u) == 0u) {
                k = b;
                break;
            }
        }
        auto try_bucket = [&](int bit) {
            if (bit < 0)
                return;
            auto const &cand = near_msb[t][bit];
            for (int pr : cand) {
                int c1 = DID(cur, pr) + 1 + DID(pr, t) + 1;
                if (c1 > rem)
                    continue;
                u32 s2 = s ^ A[pr];
                u32 nv = (A[t] ^ s2);
                if ((long long)nv <= (long long)A[t])
                    continue;
                if (nv > (u32)best.val ||
                    (nv == (u32)best.val && c1 < best.cost)) {
                    best = {(long long)nv, c1, 1, pr};
                }
            }
        };
        if (k >= 0) {
            try_bucket(k);
            if (best.mode < 0)
                try_bucket(k - 1);
        }

        return best;
    }

    // --- FAST evaluation (no ops emission) ---
    u64 simulate_value(const vector<int> &order) {
        A = A0;
        written.reset();
        cur = 0;
        s = 0;
        used = 0;

        bool progressed = true;
        while (progressed && used < T) {
            progressed = false;
            for (int id : order) {
                if (used >= T)
                    break;
                if (written.test(id))
                    continue;

                Cand cand = best_action_for_target(id);
                if (cand.mode < 0)
                    continue;

                if (cand.mode == 0) {
                    // move -> W
                    if (used + cand.cost > T)
                        continue;
                    used += DID(cur, id);
                    cur = id;
                    used += 1; // W
                    A[id] ^= s;
                    written.set(id);
                    progressed = true;
                } else {
                    // move pr -> C -> move t -> W
                    if (used + cand.cost > T)
                        continue;
                    used += DID(cur, cand.pr);
                    cur = cand.pr;
                    used += 1; // C
                    s ^= A[cand.pr];
                    used += DID(cur, id);
                    cur = id;
                    used += 1; // W
                    A[id] ^= s;
                    written.set(id);
                    progressed = true;
                }
            }
        }
        u64 sum = 0;
        for (int i = 0; i < NN; i++)
            sum += A[i];
        return sum;
    }

    // --- Full simulation with ops emission (UDLR + C/W) ---
    inline void emit_move_path(int from, int to, vector<char> &ops_out) {
        int dr = xr[to] - xr[from];
        int dc = yc[to] - yc[from];
        if (dr >= 0)
            for (int k = 0; k < dr; ++k)
                ops_out.push_back('D');
        else
            for (int k = 0; k < -dr; ++k)
                ops_out.push_back('U');
        if (dc >= 0)
            for (int k = 0; k < dc; ++k)
                ops_out.push_back('R');
        else
            for (int k = 0; k < -dc; ++k)
                ops_out.push_back('L');
    }

    EvalResult simulate_emit(const vector<int> &order) {
        A = A0;
        written.reset();
        cur = 0;
        s = 0;
        used = 0;
        vector<char> ops_out;
        ops_out.reserve(6000);

        bool progressed = true;
        while (progressed && used < T) {
            progressed = false;
            for (int id : order) {
                if (used >= T)
                    break;
                if (written.test(id))
                    continue;

                Cand cand = best_action_for_target(id);
                if (cand.mode < 0)
                    continue;
                if (used + cand.cost > T)
                    continue;

                if (cand.mode == 0) {
                    // move to t
                    emit_move_path(cur, id, ops_out);
                    used += DID(cur, id);
                    cur = id;
                    // W
                    ops_out.push_back('W');
                    ++used;
                    A[id] ^= s;
                    written.set(id);
                    progressed = true;
                } else {
                    // move to pr
                    emit_move_path(cur, cand.pr, ops_out);
                    used += DID(cur, cand.pr);
                    cur = cand.pr;
                    // C
                    ops_out.push_back('C');
                    ++used;
                    s ^= A[cand.pr];
                    // move to t
                    emit_move_path(cur, id, ops_out);
                    used += DID(cur, id);
                    cur = id;
                    // W
                    ops_out.push_back('W');
                    ++used;
                    A[id] ^= s;
                    written.set(id);
                    progressed = true;
                }
            }
        }

        EvalResult res;
        res.ops = move(ops_out);
        res.Aflat = A;
        res.sum = 0;
        for (int i = 0; i < NN; i++)
            res.sum += A[i];
        return res;
    }
};

// ==== Main with Simulated Annealing (adjacent swap / 2-point swap / 1-point
// insertion) ====
int main() {
    ios::sync_with_stdio(false);
    cin.tie(nullptr);

    int N, T;
    if (!(cin >> N >> T))
        return 0;
    vector<vector<u32>> A2(N, vector<u32>(N));
    for (int i = 0; i < N; i++)
        for (int j = 0; j < N; j++)
            cin >> A2[i][j];
    T = 1000;
    Simulator sim;
    sim.init_precompute(N, T, A2, /*L=*/12);

    vector<int> order(sim.NN);
    iota(order.begin(), order.end(), 0);

    timer tim;
    const double LIM = 1.98;
    const double T0 = 1e8, T1 = 1e-3;

    // initial
    u64 cur_sum = sim.simulate_value(order);
    u64 best_sum = cur_sum;
    vector<int> best_order = order;
    EvalResult best_emit =
        sim.simulate_emit(order); // initial feasible sequence

    // SA loop
    while (tim.elapsed() < LIM) {
        // temperature
        double ft = tim.elapsed() / LIM;
        double temp = T0 * pow(T1 / T0, ft);

        // Neighborhood: 0=adjacent swap, 1=two-point swap, 2=one-point
        // insertion
        vector<int> cand = order;
        u32 kind = rng.random32(3);
        if (kind == 0) {
            // adjacent swap
            if ((int)cand.size() >= 2) {
                int i = rng.random32((uint32_t)max(1, (int)cand.size() - 1));
                swap(cand[i], cand[i + 1]);
            }
        } else if (kind == 1) {
            // two-point swap (i != j)
            int n = (int)cand.size();
            if (n >= 2) {
                int i = rng.random32((uint32_t)n);
                int j = rng.random32((uint32_t)(n - 1));
                if (j >= i)
                    ++j;
                swap(cand[i], cand[j]);
            }
        } else {
            // one-point insertion
            int n = (int)cand.size();
            int i = rng.random32((uint32_t)n);
            int j = rng.random32((uint32_t)(n - 1));
            if (j >= i)
                ++j; // ensure j != i
            int v = cand[i];
            if (i < j) {
                for (int k = i; k < j; ++k)
                    cand[k] = cand[k + 1];
                cand[j] = v;
            } else {
                for (int k = i; k > j; --k)
                    cand[k] = cand[k - 1];
                cand[j] = v;
            }
        }

        u64 cand_sum = sim.simulate_value(cand);
        long long delta = (long long)cand_sum - (long long)cur_sum;

        bool accept = (delta >= 0) || (rng.randomDouble() < exp(delta / temp));
        if (accept) {
            order.swap(cand);
            cur_sum = cand_sum;
            if (cur_sum > best_sum) {
                best_sum = cur_sum;
                best_order = order;
                best_emit = sim.simulate_emit(best_order); // only for the best
            }
        }
    }

    // --- Debug output to stderr ---
    cerr << best_emit.ops.size() << '\n';
    u64 out_sum = 0;
    for (int i = 0; i < sim.NN; i++) {
        cerr << best_emit.Aflat[i] << ((i % N == N - 1) ? '\n' : ' ');
        out_sum += best_emit.Aflat[i];
    }
    cerr << out_sum << '\n';

    // --- Answer (ops to stdout, one per line) ---
    for (char ch : best_emit.ops)
        cout << ch << '\n';
    return 0;
}
yukicoder

結果

ソースコード