#400251 (C++14) No.931 Multiplicative Convolution

提出ソース
結果

問題	No.931 Multiplicative Convolution
ユーザー	lumc_
提出日時	2019-11-22 22:28:49
言語	C++14 (gcc 13.3.0 + boost 1.87.0)
結果	AC
実行時間	769 ms / 2,000 ms
コード長	12,207 bytes
コンパイル時間	1,248 ms
コンパイル使用メモリ	119,388 KB
実行使用メモリ	14,592 KB
最終ジャッジ日時	2024-10-11 04:14:07
合計ジャッジ時間	7,221 ms
ジャッジサーバーID （参考情報）	judge2 / judge1
このコードへのチャレンジ
（要ログイン）
ファイルパターン	結果
sample	AC * 3
other	AC * 14
権限があれば一括ダウンロードができます
ソースコード

raw source code
// includes {{{
#include<iostream>
#include<iomanip>
#include<algorithm>
#include<vector>
#include<stack>
#include<queue>
#include<map>
#include<set>
#include<tuple>
#include<cmath>
#include<random>
#include<cassert>
#include<bitset>
#include<cstdlib>
// #include<deque>
// #include<multiset>
// #include<cstring>
// #include<bits/stdc++.h>
// }}}
using namespace std;
using ll = long long;

// #undef DEBUG
// #define DEBUG
// DEBUG {{{
#include <array>
#include <deque>
#include <iostream>
#include <list>
#include <queue>
#include <stack>
#include <tuple>
#include <valarray>
#include <vector>
template < int n, class... T >
typename std::enable_if< (n >= sizeof...(T)) >::type __output_tuple(
    std::ostream &, std::tuple< T... > const &) {}
template < int n, class... T >
typename std::enable_if< (n < sizeof...(T)) >::type __output_tuple(
    std::ostream &os, std::tuple< T... > const &t) {
  os << (n == 0 ? "" : ", ") << std::get< n >(t);
  __output_tuple< n + 1 >(os, t);
}
template < class... T >
std::ostream &operator<<(std::ostream &os, std::tuple< T... > const &t) {
  os << "(";
  __output_tuple< 0 >(os, t);
  os << ")";
  return os;
}
template < class T, class U >
std::ostream &operator<<(std::ostream &os, std::pair< T, U > const &p) {
  os << "(" << p.first << ", " << p.second << ")";
  return os;
}
template < class T >
std::ostream &operator<<(std::ostream &os, const std::stack< T > &a) {
  os << "{";
  for(auto tmp = a; tmp.size(); tmp.pop())
    os << (a.size() == tmp.size() ? "" : ", ") << tmp.top();
  os << "}";
  return os;
}
template < class T, class Container, class Compare >
std::ostream &operator<<(std::ostream &os,
    std::priority_queue< T, Container, Compare > a) {
  os << "{ (top) ";
  while(a.size()) os << a.top() << (a.size() == 1 ? "" : ", "), a.pop();
  os << " }";
  return os;
}
template < class T, class Container >
std::ostream &operator<<(std::ostream &os, std::queue< T, Container > a) {
  os << "{ ";
  while(a.size()) os << a.front() << (a.size() == 1 ? "" : ", "), a.pop();
  os << " }";
  return os;
}
#ifdef DEBUG
#if !defined(DEBUG_OUT)
#define DEBUG_OUT std::cerr
#endif
#define dump(...)                                                                \
  [&]() {                                                                        \
    auto __debug_tap = std::make_tuple(__VA_ARGS__);                             \
    DEBUG_OUT << "[" << __LINE__ << "] " << #__VA_ARGS__ << " = " << __debug_tap \
    << std::endl;                                                      \
  }()
template < class T >
inline void dump2D(T &d, size_t sizey, size_t sizex) {
  for(size_t i = 0; i < sizey; i++) {
    DEBUG_OUT << "\t";
    for(size_t j = 0; j < sizex; j++)
      DEBUG_OUT << d[i][j] << (j + 1 == sizex ? "" : "\t");
    DEBUG_OUT << std::endl;
  }
}
template < class T >
inline void dump1D(T &d, size_t sizey) {
  for(size_t i = 0; i < sizey; i++) {
    DEBUG_OUT << d[i] << (i + 1 == sizey ? "" : " ");
  }
  DEBUG_OUT << std::endl;
}
template <
class T, class = typename std::iterator_traits< decltype(begin(T())) >::value_type,
      class = typename std::enable_if< !std::is_same< T, std::string >::value >::type >
      std::ostream &operator<<(std::ostream &os, const T &a) {
        os << "{";
        for(auto ite = begin(a); ite != end(a); ++ite)
          os << (ite == begin(a) ? "" : ", ") << *ite;
        os << "}";
        return os;
      }
#else
#define dump(...) ((void) 42)
#define dump2D(...) ((void) 42)
#define dump1D(...) ((void) 42)
template <
class T, class = typename std::iterator_traits< decltype(begin(T())) >::value_type,
      class = typename std::enable_if< !std::is_same< T, std::string >::value >::type >
      std::ostream &operator<<(std::ostream &os, const T &a) {
        for(auto ite = begin(a); ite != end(a); ++ite)
          os << (ite == begin(a) ? "" : " ") << *ite;
        return os;
      }
#endif
// }}}


// MAX_N is max size of OUTPUT, DOUBLED INPUT
// MAX_RES_VALUE = MAX_VALUE^2 * MAX_N
// if MAX_N > 2^20, comment out primes!
// NTT {{{
#include <cassert>
#include <cstdint>
#include <vector>

namespace NTT {
  using uint = uint_fast32_t;

  // NTT_PRIMES {{{
  constexpr ll NTT_PRIMES[][2] = {
    {1224736769, 3}, // 2^24 * 73 + 1,
    {1053818881, 7}, // 2^20 * 3 * 5 * 67 + 1
    {1051721729, 6}, // 2^20 * 17 * 59 + 1
    {1045430273, 3}, // 2^20 * 997 + 1
    {1012924417, 5}, // 2^21 * 3 * 7 * 23 + 1
    {1007681537, 3}, // 2^20 * 31^2 + 1
    {1004535809, 3}, // 2^21 * 479 + 1
    {998244353, 3},  // 2^23 * 7 * 17 + 1
    {985661441, 3},  // 2^22 * 5 * 47 + 1
    {976224257, 3},  // 2^20 * 7^2 * 19 + 1
    {975175681, 17}, // 2^21 * 3 * 5 * 31 + 1
    {962592769, 7},  // 2^21 * 3^3 * 17 + 1
    {950009857, 7},  // 2^21 * 4 * 151 + 1
    {943718401, 7},  // 2^22 * 3^2 * 5^2 + 1
    {935329793, 3},  // 2^22 * 223 + 1
    {924844033, 5},  // 2^21 * 3^2 * 7^2 + 1
    {469762049, 3},  // 2^26 * 7 + 1
    {167772161, 3},  // 2^25 * 5 + 1
  };
  // }}}

  // general math {{{
  ll extgcd(ll a, ll b, ll &x, ll &y) {
    ll d;
    return b == 0 ? (x = a < 0 ? -1 : 1, y = 0, a < 0 ? -a : a)
      : (d = extgcd(b, a % b, y, x), y -= a / b * x, d);
  }
  ll modinv(ll a, ll mod) {
    ll x, y;
    extgcd(a, mod, x, y);
    x %= mod;
    return x < 0 ? x + mod : x;
  }
  ll modpow(ll a, ll b, ll mod) {
    ll r = 1;
    a %= mod;
    while(b) {
      if(b & 1) r = r * a % mod;
      a = a * a % mod;
      b >>= 1;
    }
    return r;
  }
  // }}}

  // NTT Core {{{
  template < int MAX_H >
    struct Pool {
      static ll *tmp, *A, *B;
    };
  template < int MAX_H >
    ll *Pool< MAX_H >::tmp = new ll[1 << MAX_H];
  template < int MAX_H >
    ll *Pool< MAX_H >::A = new ll[1 << MAX_H];
  template < int MAX_H >
    ll *Pool< MAX_H >::B = new ll[1 << MAX_H];

  template < int MAX_H, ll mod, ll primitive >
    class Core {
    public:
      static_assert((mod & ((1 << MAX_H) - 1)) == 1, "mod is too small; comment out");
      // ord zetaList[i] = 2^(i + 1)
      ll zetaList[MAX_H], zetaInvList[MAX_H];
      // constexpr
      Core() {
        zetaList[MAX_H - 1] = modpow(primitive, (mod - 1) / (1 << MAX_H), mod);
        zetaInvList[MAX_H - 1] = modinv(zetaList[MAX_H - 1], mod);
        for(int ih = MAX_H - 2; ih >= 0; --ih) {
          zetaList[ih] = zetaList[ih + 1] * zetaList[ih + 1] % mod;
          zetaInvList[ih] = zetaInvList[ih + 1] * zetaInvList[ih + 1] % mod;
        }
      }
      void fft(ll *a, uint n, uint nh, bool inverse) const {
        ll *tmp = Pool< MAX_H >::tmp;
        uint mask = n - 1;
        for(uint i = n >> 1, ih = nh - 1; i >= 1; i >>= 1, --ih) {
          ll zeta = inverse ? zetaInvList[nh - 1 - ih] : zetaList[nh - 1 - ih];
          ll powZeta = 1;
          for(uint j = 0; j < n; j += i) {
            for(uint k = 0; k < i; ++k) {
              tmp[j | k] =
                (a[((j << 1) & mask) | k] + powZeta * a[(((j << 1) | i) & mask) | k]) % mod;
            }
            powZeta = powZeta * zeta % mod;
          }
          swap(a, tmp);
        }
        if(nh & 1) {
          swap(a, tmp);
          for(uint i = 0; i < n; ++i) a[i] = tmp[i];
        }
        if(inverse) {
          ll invN = modinv(n, mod);
          for(uint i = 0; i < n; ++i) a[i] = a[i] * invN % mod;
        }
      }
      vector< ll > conv(const vector< ll > &a, const vector< ll > &b) const {
        uint t = a.size() + b.size() - 1;
        uint n = 1, nh = 0;
        while(n < t) n <<= 1, ++nh;
        return convStrict(a, b, n, nh);
      }
      vector< ll > convStrict(const vector< ll > &a, const vector< ll > &b, uint n,
          uint nh) const {
        ll *A = Pool< MAX_H >::A, *B = Pool< MAX_H >::B;
        for(uint i = 0; i < n; ++i) A[i] = B[i] = 0;
        copy(a.begin(), a.end(), A);
        copy(b.begin(), b.end(), B);
        fft(A, n, nh, 0), fft(B, n, nh, 0);
        for(uint i = 0; i < n; ++i) A[i] = A[i] * B[i] % mod;
        fft(A, n, nh, 1);
        return vector< ll >(A, A + n);
      }
    };
  // }}}

  // Convolution With Garner {{{
  template < int MAX_H, int I >
    class ConvolutionWithGarnerCore {
    public:
      static void conv_for(uint n, uint nh, const vector< ll > &a, const vector< ll > &b,
          vector< ll > &mods, vector< ll > &coeffs,
          vector< vector< ll > > &constants) {
        static const Core< MAX_H, NTT_PRIMES[I][0], NTT_PRIMES[I][1] > ntt;
        auto c = ntt.convStrict(a, b, n, nh);
        mods[I] = NTT_PRIMES[I][0];
        ConvolutionWithGarnerCore< MAX_H, I - 1 >::conv_for(
            n, nh, a, b, mods, coeffs, constants);
        // garner
        for(size_t i = 0; i < c.size(); ++i) {
          ll v = (c[i] - constants[I][i]) * modinv(coeffs[I], mods[I]) % mods[I];
          if(v < 0) v += mods[I];
          for(size_t j = I + 1; j < mods.size(); ++j) {
            constants[j][i] = (constants[j][i] + coeffs[j] * v) % mods[j];
          }
        }
        for(size_t j = I + 1; j < mods.size(); ++j) {
          coeffs[j] = (coeffs[j] * mods[I]) % mods[j];
        }
      }
    };

  template < int MAX_H >
    class ConvolutionWithGarnerCore< MAX_H, -1 > {
    public:
      static void conv_for(uint, uint, const vector< ll > &, const vector< ll > &,
          vector< ll > &, vector< ll > &, vector< vector< ll > > &) {}
    };

  template < int MAX_H >
    class ConvolutionWithGarner {
    public:
      template < int USE >
        static vector< ll > conv(const vector< ll > &a, const vector< ll > &b, ll mod) {
          static_assert(USE >= 1, "USE must be positive");
          static_assert(USE <= sizeof(NTT_PRIMES) / sizeof(*NTT_PRIMES), "USE is too big");
          uint nt = a.size() + b.size() - 1;
          uint n = 1, nh = 0;
          while(n < nt) n <<= 1, ++nh;
          vector< ll > coeffs(USE + 1, 1);
          vector< vector< ll > > constants(USE + 1, vector< ll >(n));
          vector< ll > mods(USE + 1, mod);
          ConvolutionWithGarnerCore< MAX_H, USE - 1 >::conv_for(
              n, nh, a, b, mods, coeffs, constants);
          return constants.back();
        }
    };

  // }}}

} // namespace NTT
// }}}

// 1st param is MAX_H
NTT::Core< 18, NTT::NTT_PRIMES[0][0], NTT::NTT_PRIMES[0][1] > nttBig;
NTT::Core< 18, 998244353, 5 > ntt;
using nttconv = NTT::ConvolutionWithGarner< 18 >;
// nttconv::conv< USE >(a, b, mod)

constexpr ll mod = 998244353;

int a[112345];
int b[112345];
int c[112345];
int modlog[112345];
int modexp[112345];

// O(N^.5)
/// --- divisor {{{ ///
#include <vector>
vector< ll > divisor(ll n) {
  vector< ll > res;
  for(ll i = 1; i * i <= n; i++) {
    if(n % i == 0) {
      res.emplace_back(i);
      if(i != n / i) res.emplace_back(n / i);
    }
  }
  return res;
}
/// }}}--- ///
/// --- math {{{ ///
ll gcd(ll a, ll b) { return b == 0 ? a : gcd(b, a % b); }
ll lcm(ll a, ll b) { return a / gcd(a, b) * b; }
ll extgcd(ll a, ll b, ll &x, ll &y) {
  ll d;
  return b == 0 ? (x = a < 0 ? -1 : 1, y = 0, a < 0 ? -a : a)
    : (d = extgcd(b, a % b, y, x), y -= a / b * x, d);
}
ll modinv(ll a, ll mod) {
  ll x, y;
  extgcd(a, mod, x, y);
  if(x < 0) x += mod;
  return x;
}
ll modpow(ll a, ll b, ll mod) {
  ll r = 1;
  a %= mod;
  while(b) {
    if(b & 1) r = r * a % mod;
    a = a * a % mod;
    b >>= 1;
  }
  return r;
}
/// }}}--- ///

// O(log p)
/// --- isPrimitive {{{ ///
bool isPrimitive(ll x, ll p) {
  auto ds = divisor(p - 1);
  for(ll d : ds)
    if(d != p - 1) {
      if(modpow(x, d, p) == 1) return false;
    }
  return true;
}
/// }}}--- ///

int main() {
  std::ios::sync_with_stdio(false), std::cin.tie(0);
  int p;
  cin >> p;
  for(int i = 1; i < p; i++) cin >> a[i];
  for(int i = 1; i < p; i++) cin >> b[i];
  int g = 0;
  for(int i = 1; i < p; i++) if(isPrimitive(i, p)) g = i;
  dump(g);
  for(int i = 0, now = 1; i < p - 1; i++, now = (ll) now * g % p) modlog[now] = i, modexp[i] = now;

  vector<ll> A(p), B(p);
  for(int i = 1; i < p; i++) A[modlog[i]] = a[i];
  for(int i = 1; i < p; i++) B[modlog[i]] = b[i];

  auto C = ntt.conv(A, B);
  for(int i = p - 1; i < C.size(); i++) (C[i % (p - 1)] += C[i]) %= mod;
  dump(C);

  for(int i = 1; i < p; i++) {
    cout << C[modlog[i] % (p - 1)] << " \n"[i == p - 1];
  }

  return 0;
}
yukicoder

結果

ソースコード