結果
| 問題 | 
                            No.2159 Filling 4x4 array
                             | 
                    
| コンテスト | |
| ユーザー | 
                             | 
                    
| 提出日時 | 2022-12-23 00:37:19 | 
| 言語 | C++17(gcc12)  (gcc 12.3.0 + boost 1.87.0)  | 
                    
| 結果 | 
                             
                                AC
                                 
                             
                            
                         | 
                    
| 実行時間 | 1,276 ms / 5,000 ms | 
| コード長 | 2,781 bytes | 
| コンパイル時間 | 6,262 ms | 
| コンパイル使用メモリ | 235,624 KB | 
| 最終ジャッジ日時 | 2025-02-09 18:43:14 | 
| 
                            ジャッジサーバーID (参考情報)  | 
                        judge5 / judge2 | 
(要ログイン)
| ファイルパターン | 結果 | 
|---|---|
| sample | AC * 5 | 
| other | AC * 45 | 
ソースコード
#pragma GCC target("avx2")
#pragma GCC optimize("O3")
#pragma GCC optimize("unroll-loops")
#ifdef _MSC_VER
#  include <intrin.h>
#else
#  include <x86intrin.h>
#endif
#include <array>
#include <iostream>
#include <unordered_map>
#include <atcoder/modint>
using mint = atcoder::modint998244353;
constexpr int L = 30;
constexpr int N = 4;
uint16_t bit_reverse(uint16_t b) {
    b = (b & uint16_t(0xFF00)) >> 8 | (b & uint16_t(0x00FF)) << 8;
    b = (b & uint16_t(0xF0F0)) >> 4 | (b & uint16_t(0x0F0F)) << 4;
    b = (b & uint16_t(0xCCCC)) >> 2 | (b & uint16_t(0x3333)) << 2;
    b = (b & uint16_t(0xAAAA)) >> 1 | (b & uint16_t(0x5555)) << 1;
    return b;
}
int main() {
    std::array<int, N> h, w;
    for (auto&& e : h) std::cin >> e, e -= 4;
    for (auto&& e : w) std::cin >> e, e -= 4;
    if (std::accumulate(h.begin(), h.end(), 0LL) != std::accumulate(w.begin(), w.end(), 0LL)) {
        std::cout << 0 << std::endl;
        return 0;
    }
    constexpr int K = 3;
    std::array<uint32_t, 1 << (N - 1) * (N - 1)> add{};
    for (int s = 0; s < 1 << ((N - 1) * (N - 1)); ++s) {
        for (int a = 0; a < N - 1; ++a) for (int b = 0; b < N - 1; ++b) {
            int bit = (s >> (a * (N - 1) + b)) & 1;
            add[s] += bit << a * K;
            add[s] += bit << b * K << N * K;
        }
    }
    std::unordered_map<uint32_t, mint> pd{ { 0, 1 } };
    for (int i = 0; i < L; ++i) {
        uint16_t mask_lo = 0, mask_hi = 0;
        uint16_t sub_lo = 0, sub_hi = 0;
        for (int a = 0; a < N; ++a) {
            mask_lo += 1 << a * K;
            sub_lo += ((h[a] >> i) & 1) << a * K;
        }
        for (int b = 0; b < N; ++b) {
            if (b != N - 1) {
                mask_hi += 1 << b * K;
            }
            sub_hi += ((w[b] >> i) & 1) << b * K;
        }
        auto split = [](uint32_t s) {
            return std::make_pair<uint16_t, uint16_t>(s & ((1 << N * K) - 1), s >> N * K);
        };
        auto merge = [](uint32_t lo, uint32_t hi) {
            return (hi << N * K) | lo;
        };
        auto rev = [](uint16_t v) {
            return bit_reverse(v) >> (15 - (N - 1) * K);
        };
        std::unordered_map<uint32_t, mint> dp;
        for (const auto& [k, v] : pd) {
            for (uint32_t x : add) {
                auto [lo, hi] = split(k + x);
                uint32_t ahi = (hi ^ sub_hi) & mask_hi;
                hi += ahi;
                lo += _mm_popcnt_u32(ahi) << (N - 1) * K;
                uint32_t alo = (lo ^ sub_lo) & mask_lo;
                lo += alo;
                hi += _mm_popcnt_u32(alo) << (N - 1) * K;
                dp[merge(lo - sub_lo, hi - sub_hi) >> 1] += v;
            }
        }
        pd.swap(dp);
    }
    std::cout << pd[0].val() << std::endl;
    return 0;
}