結果
問題 | No.215 素数サイコロと合成数サイコロ (3-Hard) |
ユーザー | btk |
提出日時 | 2015-06-05 13:10:25 |
言語 | C++11 (gcc 11.4.0) |
結果 |
WA
|
実行時間 | - |
コード長 | 9,167 bytes |
コンパイル時間 | 1,204 ms |
コンパイル使用メモリ | 99,784 KB |
実行使用メモリ | 6,944 KB |
最終ジャッジ日時 | 2024-07-06 14:10:47 |
合計ジャッジ時間 | 1,857 ms |
ジャッジサーバーID (参考情報) |
judge3 / judge4 |
(要ログイン)
ソースコード
#include<iostream> #include<fstream> #include<vector> #include<list> #include<algorithm> #include<utility> #include<complex> #include<functional> #include<cassert> using namespace std; #define reE(i,a,b) for(auto (i)=(a);(i)<=(b);(i)++) #define rE(i,b) reE(i,0,b) #define reT(i,a,b) for(auto (i)=(a);(i)<(b);(i)++) #define rT(i,b) reT(i,0,b) #define rep(i,a,b) reE(i,a,b); #define rev(i,a,b) for(auto (i)=(b)-1;(i)>=(a);(i)--) #define itr(i,b) for(auto (i)=(b).begin();(i)!=(b).end();++(i)) #define LL long long #define BUFFSIZE (long long)1300 #define MAX_LOGN 60 typedef long long ll; typedef pair<int, int> Pii; #define FOR(i,n) for(int i = 0; i < (n); i++) #define sz(c) ((int)(c).size()) #define ten(x) ((int)1e##x) template<class T> T extgcd(T a, T b, T& x, T& y) { for (T u = y = 1, v = x = 0; a;) { T q = b / a; swap(x -= q * u, u); swap(y -= q * v, v); swap(b -= q * a, a); } return b; } template<class T> T mod_inv(T a, T m) { T x, y; extgcd(a, m, x, y); return (m + x % m) % m; } ll mod_pow(ll a, ll n, ll mod) { ll ret = 1; ll p = a % mod; while (n) { if (n & 1) ret = ret * p % mod; p = p * p % mod; n >>= 1; } return ret; } template<int mod, int primitive_root> class NTT { public: int get_mod() const { return mod; } void _ntt(vector<ll>& a, int sign) { const int n = sz(a); assert((n ^ (n&-n)) == 0); //n = 2^k const int g = 3; //g is primitive root of mod int h = (int)mod_pow(g, (mod - 1) / n, mod); // h^n = 1 if (sign == -1) h = (int)mod_inv(h, mod); //h = h^-1 % mod //bit reverse int i = 0; for (int j = 1; j < n - 1; ++j) { for (int k = n >> 1; k >(i ^= k); k >>= 1); if (j < i) swap(a[i], a[j]); } for (int m = 1; m < n; m *= 2) { const int m2 = 2 * m; const ll base = mod_pow(h, n / m2, mod); ll w = 1; FOR(x, m) { for (int s = x; s < n; s += m2) { ll u = a[s]; ll d = a[s + m] * w % mod; a[s] = u + d; if (a[s] >= mod) a[s] -= mod; a[s + m] = u - d; if (a[s + m] < 0) a[s + m] += mod; } w = w * base % mod; } } for (auto& x : a) if (x < 0) x += mod; } void ntt(vector<ll>& input) { _ntt(input, 1); } void intt(vector<ll>& input) { _ntt(input, -1); const int n_inv = mod_inv(sz(input), mod); for (auto& x : input) x = x * n_inv % mod; } // 畳み込み演算を行う vector<ll> convolution(const vector<ll>& a, const vector<ll>& b){ int ntt_size = 1; while (ntt_size < sz(a) + sz(b)) ntt_size *= 2; vector<ll> _a = a, _b = b; _a.resize(ntt_size); _b.resize(ntt_size); ntt(_a); ntt(_b); FOR(i, ntt_size){ (_a[i] *= _b[i]) %= mod; } intt(_a); return _a; } }; ll garner(vector<Pii> mr, int mod){ mr.emplace_back(mod, 0); vector<ll> coffs(sz(mr), 1); vector<ll> constants(sz(mr), 0); FOR(i, sz(mr) - 1){ // coffs[i] * v + constants[i] == mr[i].second (mod mr[i].first) を解く ll v = (mr[i].second - constants[i]) * mod_inv<ll>(coffs[i], mr[i].first) % mr[i].first; if (v < 0) v += mr[i].first; for (int j = i + 1; j < sz(mr); j++) { (constants[j] += coffs[j] * v) %= mr[j].first; (coffs[j] *= mr[i].first) %= mr[j].first; } } return constants[sz(mr) - 1]; } typedef NTT<167772161, 3> NTT_1; typedef NTT<469762049, 3> NTT_2; typedef NTT<1224736769, 3> NTT_3; //任意のmodで畳み込み演算 O(n log n) vector<ll> int32mod_convolution(vector<ll> a, vector<ll> b, int mod){ for (auto& x : a) x %= mod; for (auto& x : b) x %= mod; NTT_1 ntt1; NTT_2 ntt2; NTT_3 ntt3; auto x = ntt1.convolution(a, b); auto y = ntt2.convolution(a, b); auto z = ntt3.convolution(a, b); vector<ll> ret(sz(x)); vector<Pii> mr(3); FOR(i, sz(x)){ mr[0].first = ntt1.get_mod(), mr[0].second = (int)x[i]; mr[1].first = ntt2.get_mod(), mr[1].second = (int)y[i]; mr[2].first = ntt3.get_mod(), mr[2].second = (int)z[i]; ret[i] = garner(mr, mod); } return ret; } // garnerのアルゴリズムを直書きしたversion,速い vector<ll> fast_int32mod_convolution(vector<ll> a, vector<ll> b, int mod){ for (auto& x : a) x %= mod; for (auto& x : b) x %= mod; NTT_1 ntt1; NTT_2 ntt2; NTT_3 ntt3; assert(ntt1.get_mod() < ntt2.get_mod() && ntt2.get_mod() < ntt3.get_mod()); auto x = ntt1.convolution(a, b); auto y = ntt2.convolution(a, b); auto z = ntt3.convolution(a, b); // garnerのアルゴリズムを極力高速化した const ll m1 = ntt1.get_mod(), m2 = ntt2.get_mod(), m3 = ntt3.get_mod(); const ll m1_inv_m2 = mod_inv<ll>(m1, m2); const ll m12_inv_m3 = mod_inv<ll>(m1 * m2, m3); const ll m12_mod = m1 * m2 % mod; vector<ll> ret(sz(x)); FOR(i, sz(x)){ ll v1 = (y[i] - x[i]) * m1_inv_m2 % m2; if (v1 < 0) v1 += m2; ll v2 = (z[i] - (x[i] + m1 * v1) % m3) * m12_inv_m3 % m3; if (v2 < 0) v2 += m3; ll constants3 = (x[i] + m1 * v1 + m12_mod * v2) % mod; if (constants3 < 0) constants3 += mod; ret[i] = constants3; } return ret; } #define MOD 1000000007 template <class T> class Mr{ public: vector<T> first; vector<T> C; vector<vector<T>> bin; vector<T> t; vector<LL> lw, rw;//M+1個の作業用 vector<LL> lu, ru;//M個の作業用 T zero, one; int M; //n(1,,,2M)をn(1,,,M)に修正、O(M^2) void form(vector<T> &n){ for (int i = 0; i < M; i++){ lu[i] = t[i].val; ru[i] = n[2 * M - i].val; } auto b = fast_int32mod_convolution(lu, ru, MOD); for (int i = 0; i < M; i++){ rw[i] = b[i]; lw[i + 1] = C[M - i - 1].val; } rw[M] = 0; lw[0] = 0; auto bb = fast_int32mod_convolution(lw, rw, MOD); n[0] = zero; for (int i = 1; i <= M; i++){ n[i] = (n[i].val + bb[2*M - i])%MOD; n[M + i] = zero; } } //lとrを足し合わせる、O(M^2) inline void add(vector<T> &l, vector<T> &r, vector<T> &ans){ lw[0] = 0; rw[0] = 0; for (int i = 1; i <= M; i++){ lw[i] = l[i].val; rw[i] = r[i].val; } auto tmp = fast_int32mod_convolution(lw, rw, MOD); reE(i, 1, 2 * M)ans[i].val = tmp[i]; form(ans); } //初期化、O(M*MAX_LOGN) Mr(const vector<T>& f, const vector<T>& c, int m, T e1, T e0){ M = m; first.reserve(M + 1); C.reserve(M); lw.resize(M + 1); rw.resize(M + 1); lu.resize(M); ru.resize(M); zero = e0, one = e1; first.push_back(zero); rT(i, M){ first.push_back(f[i]); C.push_back(c[i]); } t.resize(M); t[0] = one; for (int i = 0; i < M; i++) for (int j = i+1; j < M; j++){ t[j] = t[j] + C[M - (j - i)]*t[i]; } bin.resize(MAX_LOGN); rT(i, MAX_LOGN)bin[i].resize(2 * M + 1); rE(i, 2 * M)bin[0][i] = zero; bin[0][1] = one; reT(i, 1, MAX_LOGN){ add(bin[i - 1], bin[i - 1], bin[i]); } } //N項目の計算、戻り値がTの形であることに注意、O(M^2*logN) vector<T> calc(LL n){ n--; vector<T> tmp, result = bin[0]; for (int b = 0; n; b++, n >>= 1) if (1 & n){ tmp = result; add(tmp, bin[b], result); } //reE(i, 1, M)ans = ans + (result[i] * first[i]); return result; } }; //テンプレート、デフォルトコンストラクタのオーバーロードを忘れない struct X{ LL val; X(LL v){ val = v; } X(){ val = 0; } LL operator=(const X &another){ return val = another.val; } LL operator*(const X &another)const{ return (val*another.val) % MOD; } LL operator+(const X &another)const{ return (val + another.val) % MOD; } }; #define DPBUFF 4000 LL comb[BUFFSIZE + 1]; LL prevDP[DPBUFF+ 1][6]; LL nextDP[DPBUFF+1][6]; LL dp[2 * BUFFSIZE + 1] = { 1 }; LL sumc[BUFFSIZE + 1] = { 0 }; vector<int> sosuu = { 2, 3, 5, 7, 11, 13 }, gousei = { 4, 6, 8, 9, 10, 12 }; int p, c; vector<LL> dpcalc(vector<int>s,int pc){ for(int i=0;i<DPBUFF;i++) for(int j=0;j<6;j++) prevDP[i][j]=nextDP[i][j]=0; prevDP[0][0]=1; for(int n=0;n<pc;n++){ for(int j=0;j<6;j++) for(int i=s[j];i<DPBUFF;i++) for (int k = 0; k <= j; k++) nextDP[i][j]=(nextDP[i][j]+prevDP[i-s[j]][k])%MOD; for(int i=0;i<DPBUFF;i++) for(int j=0;j<6;j++){ prevDP[i][j]=nextDP[i][j]; nextDP[i][j]=0; } } vector<LL>res(DPBUFF,0); for(int i=0;i<DPBUFF;i++) for(int j=0;j<6;j++) res[i]=(res[i]+prevDP[i][j])%MOD; return res; } vector<X> C(BUFFSIZE + 1); int main(void){ LL N; cin >> N >> p >> c; //combの計算 auto xx=dpcalc(sosuu,p); auto yy=dpcalc(gousei,c); auto zz=fast_int32mod_convolution(xx, yy, MOD); for(int i=0;i<BUFFSIZE;i++)comb[i]=zz[i]; //漸化式の初項部分 for (LL i = 0; i <= 2 * BUFFSIZE; i++){ for (LL j = 1; j <= min(i, BUFFSIZE); j++){ dp[i] = (dp[i] + comb[j] * dp[i - j]) % MOD; } } //漸化式の係数部分 for (int i = 1; i <= BUFFSIZE; i++){ C[BUFFSIZE - i] = X(comb[i]); } for (int i = BUFFSIZE - 1; i >= 0; i--){ sumc[i] = (sumc[i + 1] + comb[i]) % MOD; } LL res = 0; return 0; if (N < BUFFSIZE + 100){ for (LL i = 0; i < min(N, BUFFSIZE); i++) res = (res + dp[i] * sumc[min(N, BUFFSIZE) - i]) % MOD; } else{ // cout << "M" << endl; Mr<X> mr(C, C, BUFFSIZE, X(1), X(0)); auto cc = mr.calc(N + 1 - BUFFSIZE); // cout << "M" << endl; for (LL i = 0; i < BUFFSIZE; i++) if (sumc[BUFFSIZE - i]){ LL x = 0; for (int j = 0; j < BUFFSIZE; j++) x = (x + dp[i + j] * cc[j + 1].val) % MOD; res = (res + x*sumc[BUFFSIZE - i]) % MOD; // cout << i << endl; } } cout << res << endl; return(0); }