結果
問題 | No.1518 Simple Combinatorics |
ユーザー | social_chamereon |
提出日時 | 2022-03-10 15:32:07 |
言語 | C++14 (gcc 12.3.0 + boost 1.83.0) |
結果 |
AC
|
実行時間 | 2 ms / 2,000 ms |
コード長 | 23,005 bytes |
コンパイル時間 | 3,443 ms |
コンパイル使用メモリ | 281,936 KB |
実行使用メモリ | 5,376 KB |
最終ジャッジ日時 | 2024-09-14 16:15:19 |
合計ジャッジ時間 | 4,377 ms |
ジャッジサーバーID (参考情報) |
judge1 / judge2 |
(要ログイン)
テストケース
テストケース表示入力 | 結果 | 実行時間 実行使用メモリ |
---|---|---|
testcase_00 | AC | 2 ms
5,248 KB |
testcase_01 | AC | 2 ms
5,248 KB |
testcase_02 | AC | 2 ms
5,248 KB |
testcase_03 | AC | 2 ms
5,376 KB |
testcase_04 | AC | 2 ms
5,376 KB |
testcase_05 | AC | 2 ms
5,376 KB |
testcase_06 | AC | 2 ms
5,376 KB |
testcase_07 | AC | 2 ms
5,376 KB |
testcase_08 | AC | 2 ms
5,376 KB |
testcase_09 | AC | 2 ms
5,376 KB |
testcase_10 | AC | 2 ms
5,376 KB |
testcase_11 | AC | 2 ms
5,376 KB |
testcase_12 | AC | 2 ms
5,376 KB |
testcase_13 | AC | 2 ms
5,376 KB |
testcase_14 | AC | 2 ms
5,376 KB |
testcase_15 | AC | 2 ms
5,376 KB |
testcase_16 | AC | 2 ms
5,376 KB |
testcase_17 | AC | 2 ms
5,376 KB |
testcase_18 | AC | 2 ms
5,376 KB |
testcase_19 | AC | 2 ms
5,376 KB |
testcase_20 | AC | 2 ms
5,376 KB |
testcase_21 | AC | 2 ms
5,376 KB |
ソースコード
/** * author: social_chameleon * created: 2022/03/09 */ #pragma region MACROS #pragma region HEADER #pragma GCC optimize("O3") #ifdef LOCAL #include "utility/debug_print.hpp" #define debug(...) debug_print::multi_print(#__VA_ARGS__, __VA_ARGS__) #else #define debug(...) (static_cast<void>(0)) #endif #include <bits/stdc++.h> #include <immintrin.h> using namespace std; #define endl '\n' #pragma endregion #pragma region TYPES using ll = long long; using pii = pair<ll, ll>; using pll = pair<ll, ll>; using pil = pair<ll, ll>; using pli = pair<ll, ll>; using ld = long double; template <typename T> using vc = vector<T>; template <typename T> using vvc = vector<vc<T>>; template <typename T> using vvvc = vector<vvc<T>>; using vi = vc<ll>; using vl = vc<ll>; using vld = vc<ld>; using vb = vc<bool>; using vpi = vc<pii>; using vpl = vc<pll>; template <class T> using pq = priority_queue<T>; template <class T> using pqg = priority_queue<T, vector<T>, greater<T>>; #pragma endregion #pragma region UTILITY_FUNCTIONS template <typename T> int si(const T& x) { return x.size(); } template <class T> inline bool chmax(T& a, const T& b) { return (a < b ? a = b, 1 : 0); } template <class T> inline bool chmin(T& a, const T& b) { return (a > b ? a = b, 1 : 0); } #define overload2(a, b, name, ...) name #define overload4(a, b, c, d, name, ...) name #define overload5(a, b, c, d, e, name, ...) name #pragma endregion #pragma region WORDS #define fi first #define se second #define pb push_back #define pf push_front #define ppb pop_back #define ppf pop_front #define eb emplace_back #define ef emplace_front #pragma endregion #pragma region LOOPS #define rep0(n) for (int _ = 0; _ < n; ++_) #define rep1(i, n) for (ll i = 0; i < (n); ++i) #define rep2(i, a, b) for (ll i = (a); i < (b); ++i) #define rep3(i, a, b, c) for (ll i = (a); i < (b); i += (c)) #define rep(...) overload4(__VA_ARGS__, rep3, rep2, rep1, rep0)(__VA_ARGS__) #define rrep0(n) for (int jidlsjf = 0; jidlsjf < n; ++jidlsjf) #define rrep1(i, n) for (ll i = (n)-1; i >= 0; --i) #define rrep2(i, a, b) for (ll i = (a)-1; i >= b; --i) #define rrep3(i, a, b, c) for (ll i = (a)-1; i >= b; i -= c) #define rrep(...) overload4(__VA_ARGS__, rrep3, rrep2, rrep1, rrep0)(__VA_ARGS__) #define fore0(v) rep(a.size()) #define fore1(a, v) for (auto&& a : v) #define fore2(a, b, v) for (auto&& [a, b] : v) #define fore3(a, b, c, v) for (auto&& [a, b, c] : v) #define fore4(a, b, c, d, v) for (auto&& [a, b, c, d] : v) #define fore(...) overload5(__VA_ARGS__, fore4, fore3, fore2, fore1, fore0)(__VA_ARGS__) #pragma endregion #pragma region CONTAINER_METHODS #define all(c) begin(c), end(c) #define rall(c) rbegin(c), rend(c) #define lb(c, x) distance((c).begin(), lower_bound(all(c), (x))) #define ub(c, x) distance((c).begin(), upper_bound(all(c), (x))) #define fi_geq(c, x) distance((c).begin(), lower_bound(all(c), (x))) #define la_l(c, x) distance((c).begin(), lower_bound(all(c), (x))) - 1 #define la_leq(c, x) distance((c).begin(), lower_bound(all(c), (x))) - 1 #define fi_g(c, x) distance((c).begin(), lower_bound(all(c), (x))) #define fi_leq(c, x) distance((c).begin(), lower_bound(all(c), (x), greater<ll>())) #define la_g(c, x) distance((c).begin(), lower_bound(all(c), (x), greater<ll>())) - 1 #define la_geq(c, x) distance((c).begin(), upper_bound(all(c), (x), greater<ll>())) - 1 #define fi_l(c, x) distance((c).begin(), upper_bound(all(c), (x), greater<ll>())) template <typename T> int closest(vector<T>& v, T x) { int n = (int)v.size(); int pos = lower_bound(v.begin(), v.end(), x) - v.begin(); if (pos == 0) return pos; if (pos == n) return n - 1; return (v[pos] - x <= x - v[pos - 1]) ? pos : pos - 1; } #define SORT(v) sort(all(v)) #define REV(v) reverse(all(v)) #define UNIQUE(x) SORT(x), x.erase(unique(all(x)), x.end()) template <typename T = ll, typename S> T SUM(const S& v) { return accumulate(all(v), T(0)); } #define MIN(v) *min_element(all(v)) #define MAX(v) *max_element(all(v)) template <typename T = int> T mex(unordered_set<T> st) { T ret = T(0); while (st.count(ret) != 0) ++ret; return ret; } #pragma endregion #pragma region VECTOR_DEFINITIONS #define vec(type, name, ...) vector<type> name(__VA_ARGS__) #define vec2(type, name1, name2, ...) vector<type> name1(__VA_ARGS__), name2(__VA_ARGS__) #define vec3(type, name1, name2, name3, ...) vector<type> name1(__VA_ARGS__), name2(__VA_ARGS__), name3(__VA_ARGS__) #define vec4(type, name1, name2, name3, name4, ...) vector<type> name1(__VA_ARGS__), name2(__VA_ARGS__), name3(__VA_ARGS__), name4(__VA_ARGS__) #define vv(type, name, a, ...) vector<vector<type>> name(a, vector<type>(__VA_ARGS__)) #define vvv(type, name, a, b, ...) vector<vector<vector<type>>> name(a, vector<vector<type>>(b, vector<type>(__VA_ARGS__))) #define vvvv(type, name, a, b, c, ...) vector<vector<vector<vector<type>>>> name(a, vector<vector<vector<type>>>(b, vector<vector<type>>(c, vector<type>(__VA_ARGS__)))) constexpr pii dx4[4] = {pii{1, 0}, pii{0, 1}, pii{-1, 0}, pii{0, -1}}; constexpr pii dx8[8] = {{1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}, {-1, -1}, {0, -1}, {1, -1}}; #pragma endregion #pragma region TYPICAL_OUTPUT const string YESNO[2] = {"NO", "YES"}; const string YesNo[2] = {"No", "Yes"}; const string yesno[2] = {"no", "yes"}; void YES(bool t = 1) { cout << YESNO[t] << endl; } void NO(bool t = 1) { YES(!t); } void Yes(bool t = 1) { cout << YesNo[t] << endl; } void No(bool t = 1) { Yes(!t); } void yes(bool t = 1) { cout << yesno[t] << endl; } void no(bool t = 1) { yes(!t); } #pragma endregion #pragma region INPUT int scan() { return getchar(); } void scan(signed& a) { cin >> a; } void scan(long long& a) { cin >> a; } void scan(char& a) { cin >> a; } void scan(double& a) { cin >> a; } void scan(string& a) { cin >> a; } template <class T, class S> void scan(pair<T, S>& p) { scan(p.first), scan(p.second); } template <class T> void scan(vector<T>& a) { for (auto& i : a) scan(i); } template <class T> void scan(T& a) { cin >> a; } void IN() {} template <class Head, class... Tail> void IN(Head& head, Tail&... tail) { scan(head); IN(tail...); } #define INT(...) \ int __VA_ARGS__; \ IN(__VA_ARGS__) #define LL(...) \ ll __VA_ARGS__; \ IN(__VA_ARGS__) #define STR(...) \ string __VA_ARGS__; \ IN(__VA_ARGS__) #define CHR(...) \ char __VA_ARGS__; \ IN(__VA_ARGS__) #define DBL(...) \ double __VA_ARGS__; \ IN(__VA_ARGS__) #define VEC(type, name, size) \ vector<type> name(size); \ IN(name) #define VEC2(type, name1, name2, size) \ vector<type> name1(size), name2(size); \ for (int i = 0; i < size; i++) IN(name1[i], name2[i]) #define VEC3(type, name1, name2, name3, size) \ vector<type> name1(size), name2(size), name3(size); \ for (int i = 0; i < size; i++) IN(name1[i], name2[i], name3[i]) #define VEC4(type, name1, name2, name3, name4, size) \ vector<type> name1(size), name2(size), name3(size), name4(size); \ for (int i = 0; i < size; i++) IN(name1[i], name2[i], name3[i], name4[i]) #define VV(type, name, h, w) \ vector<vector<type>> name(h, vector<type>(w)); \ IN(name) #pragma endregion #pragma region MATH ll max(signed a, ll b) { return max(ll(a), b); } ll max(ll a, signed b) { return max(a, ll(b)); } ll min(signed a, ll b) { return min(ll(a), b); } ll min(ll a, signed b) { return min(a, ll(b)); } static constexpr ll inf = numeric_limits<ll>::max() / 2; // static constexpr ll infll = numeric_limits<ll>::max() / 2; static constexpr double infdbl = numeric_limits<double>::max() / 2; template <typename T, typename S> T ceil(T x, S y) { assert(y); return (y < 0 ? ceil(-x, -y) : (x > 0 ? (x + y - 1) / y : x / y)); } template <typename T, typename S> T floor(T x, S y) { assert(y); return (y < 0 ? floor(-x, -y) : (x > 0 ? x / y : x / y - (x % y == 0 ? 0 : 1))); } template <class T> ll POW(T x, int n) { ll res = 1; for (; n; n >>= 1, x *= x) if (n & 1) res *= x; return res; } template <class T, class S> ll POW(T x, S n, const ll& mod) { ll res = 1; x %= mod; for (; n; n >>= 1, x = x * x % mod) if (n & 1) res = res * x % mod; return res; } template <typename T> map<T, int> factor(T n) { map<T, int> ret; for (T i = 2; i * i <= n; i++) { while (n % i == 0) { ret[i]++; n /= i; } } if (n != 1) ret[n] = 1; return ret; } template <class T> vector<T> divisor(T x) { vector<T> ret; for (T i = 1; i * i <= x; i++) if (x % i == 0) { ret.pb(i); if (i * i != x) ret.pb(x / i); } return ret; } vector<int> cumsum(const vector<bool>& v) { vector<int> ret(v.size()); for (int i = 0; i < (int)v.size(); ++i) ret[i] = (i == 0 ? 0 : ret[i - 1]) + v[i]; return ret; } template <typename T> vector<T> cumsum(const vector<T>& v) { vector<T> ret(v.size()); for (int i = 0; i < (int)v.size(); ++i) ret[i] = (i == 0 ? 0 : ret[i - 1]) + v[i]; return ret; } template <typename T> vector<vector<T>> cumsum2d(const vector<vector<T>>& v) { vector<vector<T>> ret = v; for (int i = 0; i < (int)v.size(); ++i) { for (int j = 0; j < (int)v[0].size(); ++j) { if (i > 0) ret[i][j] += ret[i - 1][j]; if (j > 0) ret[i][j] += ret[i][j - 1]; if (i > 0 && j > 0) ret[i][j] -= ret[i - 1][j - 1]; } } return ret; } template <typename T> T parsum2d(const vector<vector<T>>& cum, int i1, int i2, int j1, int j2) { if (i1 > i2 || j1 > j2) return 0; T ret = cum[i2][j2]; if (i1 > 0) ret -= cum[i1 - 1][j2]; if (j1 > 0) ret -= cum[i2][j1 - 1]; if (i1 > 0 && j1 > 0) ret += cum[i1 - 1][j1 - 1]; return ret; } template <typename T, typename S, typename U> bool inc(const T& x, const S& l, const U& r) { return l <= x and x < r; } constexpr ll ten(int n) { return n == 0 ? 1 : ten(n - 1) * 10; } template <typename T> T arith_sum_from_range(T first, T last, T diff = T(1)) { assert((last - first) % diff == 0); return ((last - first) / 2 + 1) * (first + last) / 2; } template <typename T, typename S> T arith_sum_from_term(T first, S n, T diff = T(1)) { return (2 * first + (n - 1) * diff) * n / 2; } #pragma endregion #pragma region BIT_FUNCTIONS ll pow2(int i) { return 1LL << i; } #define bit(n, k) (((n) >> (k)) & 1) int topbit(signed t) { return t == 0 ? -1 : 31 - __builtin_clz(t); } int topbit(ll t) { return t == 0 ? -1 : 63 - __builtin_clzll(t); } int lowbit(signed a) { return a == 0 ? 32 : __builtin_ctz(a); } int lowbit(ll a) { return a == 0 ? 64 : __builtin_ctzll(a); } constexpr ll mask(int n) { return (1LL << n) - 1; } int popcount(ll t) { return __builtin_popcountll(t); } bool ispow2(int i) { return i && (i & -i) == i; } template <typename T> T inv_all_bit(T a) { T ret = 0; int n = topbit(a); rep(i, n) if (((a >> i) & 1) == 0) ret += (1 << i); return ret; } template <typename T> T inv_all_bit(T a, int n) { T ret = 0; rep(i, n) if (((a >> i) & 1) == 0) ret += (1 << i); return ret; } template <typename T> T inv_bit(T a, int k) { return a ^ (1 << k); } #pragma endregion #pragma region PAIR_OPERATORS template <class T, class S> pair<T, S> operator-(const pair<T, S>& x, const pair<T, S>& y) { return pair<T, S>(x.fi - y.fi, x.se - y.se); } template <class T, class S> pair<T, S> operator+(const pair<T, S>& x, const pair<T, S>& y) { return pair<T, S>(x.fi + y.fi, x.se + y.se); } template <class T> pair<T, T> operator&(const pair<T, T>& l, const pair<T, T>& r) { return pair<T, T>(max(l.fi, r.fi), min(l.se, r.se)); } template <class T, class S> pair<T, S> operator+=(pair<T, S>& l, const pair<T, S>& r) { return l = l + r; } template <class T, class S> pair<T, S> operator-=(pair<T, S>& l, const pair<T, S>& r) { return l = l - r; } template <class T> bool intersect(const pair<T, T>& l, const pair<T, T>& r) { return (l.se < r.se ? r.fi < l.se : l.fi < r.se); } #pragma endregion #pragma region SEARCH template <class F> void bit_search(int n, const F& f) { for (int i = 0; i < (1 << n); ++i) { set<int> st; for (int j = 0; j < n; ++j) { if ((i >> j & 1) == 1) { st.insert(j); } } f(st); } } template <typename S, typename T, class F> ll bin_search(S ok, T ng, const F& f) { ll _ok = ok, _ng = ng; while (abs(_ok - _ng) > 1) { ll mid = (_ok + _ng) >> 1; (f(mid) ? _ok : _ng) = mid; } return _ok; } template <typename S, typename T, class F> double bin_search_double(S ok, T ng, const F& f, int iter = 80) { double _ok = ok, _ng = ng; while (iter--) { double mid = (_ok + _ng) / 2; (f(mid) ? _ok : _ng) = mid; } return _ok; } #pragma endregion #pragma region STRING void ERASE(string& s, char c) { s.erase(remove(all(s), c), s.end()); } void ERASE(string& s, const string& chars) { fore(c, chars) s.erase(remove(all(s), c), s.end()); } void ERASE(string& s, const vector<char>& chars) { fore(c, chars) s.erase(remove(all(s), c), s.end()); } template <typename T> void ERASE(vector<T>& v, T x) { v.erase(remove(all(v), x), v.end()); } template <typename T> void ERASE(vector<T>& v, const vector<T>& list) { fore(x, list) v.erase(remove(all(v), x), v.end()); } #pragma endregion #pragma region OUTPUT template <typename T, typename S> ostream& operator<<(ostream& os, const pair<T, S>& p) { os << p.first << " " << p.second; return os; } template <typename T> ostream& operator<<(ostream& os, const vector<T>& v) { for (int i = 0; i < (int)v.size(); i++) { os << v[i] << (((i + 1) != v.size()) ? " " : ""); } return os; } void OUT() { cout << endl; } template <class Head, class... Tail> void OUT(const Head& head, const Tail&... tail) { cout << head; if (sizeof...(tail)) cout << ' '; OUT(tail...); } template <typename T> string pad(T n, int d, char c) { ostringstream sout; sout << setfill(c) << setw(d) << n; return sout.str(); } #pragma endregion #pragma region GRAPH template <typename T = ll> struct Edge { int from, to; T cost; int id; Edge() = default; Edge(int from, int to, T cost = 1, int id = -1) : from(from), to(to), cost(cost), id(id) {} operator int() const { return to; } }; template <typename T = ll> struct Graph { vector<vector<Edge<T>>> g; int edge_id; Graph() = default; explicit Graph(int n) : g(n), edge_id(0) {} size_t size() const { return g.size(); } void add_directed_edge(int from, int to, T cost = 1) { g[from].emplace_back(from, to, cost, edge_id++); } void add_edge(int from, int to, T cost = 1) { g[from].emplace_back(from, to, cost, edge_id); g[to].emplace_back(to, from, cost, edge_id++); } void read(int m, int padding = -1, bool weighted = false, bool directed = false) { for (int i = 0; i < m; ++i) { int a, b; cin >> a >> b; a += padding; b += padding; T c = T(1); if (weighted) cin >> c; if (directed) add_directed_edge(a, b, c); else add_edge(a, b, c); } } inline vector<Edge<T>>& operator[](const int& k) { return g[k]; } inline const vector<Edge<T>>& operator[](const int& k) const { return g[k]; } }; #pragma endregion #pragma region PARSE inline int toi(char c) { return c - '0'; } int toi(string s) { return stoi(s); } ll toll(string s) { return stoll(s); } template <typename T> string tos(T x) { return to_string(x); } inline char toc(int i) { return '0' + i; } template <typename T> string tobit(T x, size_t d) { if (d <= 2) return bitset<2>(x).to_string(); else if (d <= 4) return bitset<4>(x).to_string(); else if (d <= 8) return bitset<8>(x).to_string(); else if (d <= 16) return bitset<16>(x).to_string(); else if (d <= 32) return bitset<32>(x).to_string(); else return bitset<64>(x).to_string(); } #pragma endregion #pragma region MOD template <uint32_t mod> struct LazyMontgomeryModInt { using mint = LazyMontgomeryModInt; using i32 = int32_t; using u32 = uint32_t; using u64 = uint64_t; static constexpr u32 get_r() { u32 ret = mod; for (i32 i = 0; i < 4; ++i) ret *= 2 - mod * ret; return ret; } static constexpr u32 r = get_r(); static constexpr u32 n2 = -u64(mod) % mod; static_assert(r * mod == 1, "invalid, r * mod != 1"); static_assert(mod < (1 << 30), "invalid, mod >= 2 ^ 30"); static_assert((mod & 1) == 1, "invalid, mod % 2 == 0"); u32 a; constexpr LazyMontgomeryModInt() : a(0) {} constexpr LazyMontgomeryModInt(const int64_t& b) : a(reduce(u64(b % mod + mod) * n2)){}; static constexpr u32 reduce(const u64& b) { return (b + u64(u32(b) * u32(-r)) * mod) >> 32; } constexpr mint& operator+=(const mint& b) { if (i32(a += b.a - 2 * mod) < 0) a += 2 * mod; return *this; } constexpr mint& operator-=(const mint& b) { if (i32(a -= b.a) < 0) a += 2 * mod; return *this; } constexpr mint& operator*=(const mint& b) { a = reduce(u64(a) * b.a); return *this; } constexpr mint& operator/=(const mint& b) { *this *= b.inverse(); return *this; } constexpr mint operator+(const mint& b) const { return mint(*this) += b; } constexpr mint operator-(const mint& b) const { return mint(*this) -= b; } constexpr mint operator*(const mint& b) const { return mint(*this) *= b; } constexpr mint operator/(const mint& b) const { return mint(*this) /= b; } constexpr bool operator==(const mint& b) const { return (a >= mod ? a - mod : a) == (b.a >= mod ? b.a - mod : b.a); } constexpr bool operator!=(const mint& b) const { return (a >= mod ? a - mod : a) != (b.a >= mod ? b.a - mod : b.a); } constexpr mint operator-() const { return mint() - mint(*this); } constexpr mint pow(u64 n) const { mint ret(1), mul(*this); while (n > 0) { if (n & 1) ret *= mul; mul *= mul; n >>= 1; } return ret; } constexpr mint inverse() const { return pow(mod - 2); } friend ostream& operator<<(ostream& os, const mint& b) { return os << b.get(); } friend istream& operator>>(istream& is, mint& b) { int64_t t; is >> t; b = LazyMontgomeryModInt<mod>(t); return (is); } constexpr u32 get() const { u32 ret = reduce(a); return ret >= mod ? ret - mod : ret; } static constexpr u32 get_mod() { return mod; } }; __attribute__((target("sse4.2"))) inline __m128i my128_mullo_epu32( const __m128i& a, const __m128i& b) { return _mm_mullo_epi32(a, b); } __attribute__((target("sse4.2"))) inline __m128i my128_mulhi_epu32( const __m128i& a, const __m128i& b) { __m128i a13 = _mm_shuffle_epi32(a, 0xF5); __m128i b13 = _mm_shuffle_epi32(b, 0xF5); __m128i prod02 = _mm_mul_epu32(a, b); __m128i prod13 = _mm_mul_epu32(a13, b13); __m128i prod = _mm_unpackhi_epi64(_mm_unpacklo_epi32(prod02, prod13), _mm_unpackhi_epi32(prod02, prod13)); return prod; } __attribute__((target("sse4.2"))) inline __m128i montgomery_mul_128( const __m128i& a, const __m128i& b, const __m128i& r, const __m128i& m1) { return _mm_sub_epi32( _mm_add_epi32(my128_mulhi_epu32(a, b), m1), my128_mulhi_epu32(my128_mullo_epu32(my128_mullo_epu32(a, b), r), m1)); } __attribute__((target("sse4.2"))) inline __m128i montgomery_add_128( const __m128i& a, const __m128i& b, const __m128i& m2, const __m128i& m0) { __m128i ret = _mm_sub_epi32(_mm_add_epi32(a, b), m2); return _mm_add_epi32(_mm_and_si128(_mm_cmpgt_epi32(m0, ret), m2), ret); } __attribute__((target("sse4.2"))) inline __m128i montgomery_sub_128( const __m128i& a, const __m128i& b, const __m128i& m2, const __m128i& m0) { __m128i ret = _mm_sub_epi32(a, b); return _mm_add_epi32(_mm_and_si128(_mm_cmpgt_epi32(m0, ret), m2), ret); } __attribute__((target("avx2"))) inline __m256i my256_mullo_epu32( const __m256i& a, const __m256i& b) { return _mm256_mullo_epi32(a, b); } __attribute__((target("avx2"))) inline __m256i my256_mulhi_epu32( const __m256i& a, const __m256i& b) { __m256i a13 = _mm256_shuffle_epi32(a, 0xF5); __m256i b13 = _mm256_shuffle_epi32(b, 0xF5); __m256i prod02 = _mm256_mul_epu32(a, b); __m256i prod13 = _mm256_mul_epu32(a13, b13); __m256i prod = _mm256_unpackhi_epi64(_mm256_unpacklo_epi32(prod02, prod13), _mm256_unpackhi_epi32(prod02, prod13)); return prod; } __attribute__((target("avx2"))) inline __m256i montgomery_mul_256( const __m256i& a, const __m256i& b, const __m256i& r, const __m256i& m1) { return _mm256_sub_epi32( _mm256_add_epi32(my256_mulhi_epu32(a, b), m1), my256_mulhi_epu32(my256_mullo_epu32(my256_mullo_epu32(a, b), r), m1)); } __attribute__((target("avx2"))) inline __m256i montgomery_add_256( const __m256i& a, const __m256i& b, const __m256i& m2, const __m256i& m0) { __m256i ret = _mm256_sub_epi32(_mm256_add_epi32(a, b), m2); return _mm256_add_epi32(_mm256_and_si256(_mm256_cmpgt_epi32(m0, ret), m2), ret); } __attribute__((target("avx2"))) inline __m256i montgomery_sub_256( const __m256i& a, const __m256i& b, const __m256i& m2, const __m256i& m0) { __m256i ret = _mm256_sub_epi32(a, b); return _mm256_add_epi32(_mm256_and_si256(_mm256_cmpgt_epi32(m0, ret), m2), ret); } #pragma endregion #pragma endregion using mint = LazyMontgomeryModInt<1000000007>; // using mint = LazyMontgomeryModInt<998244353>; using vmi = vc<mint>; #define int ll template <typename T> struct Enumeration { private: static vector<T> _fact, _finv, _inv; inline static void expand(size_t sz) { if (_fact.size() < sz + 1) { int pre_sz = max((int)1, (int)_fact.size()); _fact.resize(sz + 1, T(1)); _finv.resize(sz + 1, T(1)); _inv.resize(sz + 1, T(1)); for (int i = pre_sz; i <= (int)sz; i++) { _fact[i] = _fact[i - 1] * T(i); } _finv[sz] = T(1) / _fact[sz]; for (int i = (int)sz - 1; i >= pre_sz; i--) { _finv[i] = _finv[i + 1] * T(i + 1); } for (int i = pre_sz; i <= (int)sz; i++) { _inv[i] = _finv[i] * _fact[i - 1]; } } } public: explicit Enumeration(size_t sz = 0) { expand(sz); } static inline T fact(int k) { expand(k); return _fact[k]; } static inline T finv(int k) { expand(k); return _finv[k]; } static inline T inv(int k) { expand(k); return _inv[k]; } static T P(int n, int r) { if (r < 0 || n < r) return 0; return fact(n) * finv(n - r); } static T C(int p, int q) { if (q < 0 || p < q) return 0; return fact(p) * finv(q) * finv(p - q); } static T H(int n, int r) { if (n < 0 || r < 0) return 0; return r == 0 ? 1 : C(n + r - 1, r); } }; template <typename T> vector<T> Enumeration<T>::_fact = vector<T>(); template <typename T> vector<T> Enumeration<T>::_finv = vector<T>(); template <typename T> vector<T> Enumeration<T>::_inv = vector<T>(); signed main() { ios::sync_with_stdio(false); cin.tie(nullptr); cout << fixed << setprecision(9); cerr << fixed << setprecision(9); INT(n, k); OUT(mint(n) * (mint(n).pow(k) - mint(n - 1).pow(k))); return 0; }