#ifdef DEBUG #define _GLIBCXX_DEBUG #define SIGNAL signal( SIGABRT , &AlertAbort ); #define DEXPR( LL , BOUND , VALUE , DEBUG_VALUE ) CEXPR( LL , BOUND , DEBUG_VALUE ) #define ASSERT( A , MIN , MAX ) CERR( "ASSERTチェック: " , ( MIN ) , ( ( MIN ) <= A ? "<=" : ">" ) , A , ( A <= ( MAX ) ? "<=" : ">" ) , ( MAX ) ); assert( ( MIN ) <= A && A <= ( MAX ) ) #define CERR( ... ) VariadicCout( cerr , __VA_ARGS__ ) << endl #define COUT( ... ) VariadicCout( cout << "出力: " , __VA_ARGS__ ) << endl #define CERR_A( A , N ) OUTPUT_ARRAY( cerr , A , N ) << endl #define COUT_A( A , N ) cout << "出力: "; OUTPUT_ARRAY( cout , A , N ) << endl #define CERR_ITR( A ) OUTPUT_ITR( cerr , A ) << endl #define COUT_ITR( A ) cout << "出力: "; OUTPUT_ITR( cout , A ) << endl #else #pragma GCC optimize ( "O3" ) #pragma GCC optimize ( "unroll-loops" ) #pragma GCC target ( "sse4.2,fma,avx2,popcnt,lzcnt,bmi2" ) #define SIGNAL #define DEXPR( LL , BOUND , VALUE , DEBUG_VALUE ) CEXPR( LL , BOUND , VALUE ) #define ASSERT( A , MIN , MAX ) assert( ( MIN ) <= A && A <= ( MAX ) ) #define CERR( ... ) #define COUT( ... ) VariadicCout( cout , __VA_ARGS__ ) << "\n" #define CERR_A( A , N ) #define COUT_A( A , N ) OUTPUT_ARRAY( cout , A , N ) << "\n" #define CERR_ITR( A ) #define COUT_ITR( A ) OUTPUT_ITR( cout , A ) << "\n" #endif #include using namespace std; using uint = unsigned int; using ll = long long; using ull = unsigned long long; using ld = long double; using lld = __float128; template using T2 = pair; template using T3 = tuple; template using T4 = tuple; #define REPEAT_MAIN( BOUND ) int main(){ ios_base::sync_with_stdio( false ); cin.tie( nullptr ); SIGNAL; DEXPR( int , bound_test_case_num , BOUND , min( BOUND , 100 ) ); int test_case_num = 1; if constexpr( bound_test_case_num > 1 ){ SET_ASSERT( test_case_num , 1 , bound_test_case_num ); } REPEAT( test_case_num ){ if constexpr( bound_test_case_num > 1 ){ CERR( "testcase " , VARIABLE_FOR_REPEAT_test_case_num , ":" ); } Solve(); CERR( "" ); } } #define START_WATCH chrono::system_clock::time_point watch = chrono::system_clock::now() #define CURRENT_TIME static_cast( chrono::duration_cast( chrono::system_clock::now() - watch ).count() / 1000.0 ) #define CHECK_WATCH( TL_MS ) ( CURRENT_TIME < TL_MS - 100.0 ) #define TYPE_OF( VAR ) decay_t #define CEXPR( LL , BOUND , VALUE ) constexpr LL BOUND = VALUE #define CIN( LL , ... ) LL __VA_ARGS__; VariadicCin( cin , __VA_ARGS__ ) #define SET_ASSERT( A , MIN , MAX ) cin >> A; ASSERT( A , MIN , MAX ) #define CIN_ASSERT( A , MIN , MAX ) TYPE_OF( MAX ) A; SET_ASSERT( A , MIN , MAX ) #define SET_A( A , N ) SOLVE_ONLY; FOR( VARIABLE_FOR_CIN_A , 0 , N ){ cin >> A[VARIABLE_FOR_CIN_A]; } #define CIN_A( LL , A , N ) LL A[N]; SET_A( A , N ); #define GETLINE_SEPARATE( SEPARATOR , ... ) string __VA_ARGS__; VariadicGetline( cin , SEPARATOR , __VA_ARGS__ ) #define GETLINE( ... ) GETLINE_SEPARATE( '\n' , __VA_ARGS__ ) #define FOR( VAR , INITIAL , FINAL_PLUS_ONE ) for( TYPE_OF( FINAL_PLUS_ONE ) VAR = INITIAL ; VAR < FINAL_PLUS_ONE ; VAR ++ ) #define FOREQ( VAR , INITIAL , FINAL ) for( TYPE_OF( FINAL ) VAR = INITIAL ; VAR <= FINAL ; VAR ++ ) #define FOREQINV( VAR , INITIAL , FINAL ) for( TYPE_OF( INITIAL ) VAR = INITIAL ; VAR >= FINAL ; VAR -- ) #define AUTO_ITR( ARRAY ) auto itr_ ## ARRAY = ARRAY .begin() , end_ ## ARRAY = ARRAY .end() #define FOR_ITR( ARRAY ) for( AUTO_ITR( ARRAY ) , itr = itr_ ## ARRAY ; itr_ ## ARRAY != end_ ## ARRAY ; itr_ ## ARRAY ++ , itr++ ) #define REPEAT( HOW_MANY_TIMES ) FOR( VARIABLE_FOR_REPEAT_ ## HOW_MANY_TIMES , 0 , HOW_MANY_TIMES ) #define SET_PRECISION( DECIMAL_DIGITS ) cout << fixed << setprecision( DECIMAL_DIGITS ) #define OUTPUT_ARRAY( OS , A , N ) FOR( VARIABLE_FOR_OUTPUT_ARRAY , 0 , N ){ OS << A[VARIABLE_FOR_OUTPUT_ARRAY] << (VARIABLE_FOR_OUTPUT_ARRAY==N-1?"":" "); } OS #define OUTPUT_ITR( OS , A ) { auto ITERATOR_FOR_OUTPUT_ITR = A.begin() , END_FOR_OUTPUT_ITR = A.end(); bool VARIABLE_FOR_OUTPUT_ITR = ITERATOR_FOR_COUT_ITR != END_FOR_COUT_ITR; while( VARIABLE_FOR_OUTPUT_ITR ){ OS << *ITERATOR_FOR_COUT_ITR; ( VARIABLE_FOR_OUTPUT_ITR = ++ITERATOR_FOR_COUT_ITR != END_FOR_COUT_ITR ) ? OS : OS << " "; } } OS #define RETURN( ... ) COUT( __VA_ARGS__ ); return // 入出力用 template inline basic_istream& VariadicCin( basic_istream& is ) { return is; } template inline basic_istream& VariadicCin( basic_istream& is , Arg& arg , ARGS&... args ) { return VariadicCin( is >> arg , args... ); } template inline basic_istream& VariadicGetline( basic_istream& is , const char& separator ) { return is; } template inline basic_istream& VariadicGetline( basic_istream& is , const char& separator , Arg& arg , ARGS&... args ) { return VariadicGetline( getline( is , arg , separator ) , separator , args... ); } template inline basic_ostream& VariadicCout( basic_ostream& os , const Arg& arg ) { return os << arg; } template inline basic_ostream& VariadicCout( basic_ostream& os , const Arg1& arg1 , const Arg2& arg2 , const ARGS&... args ) { return VariadicCout( os << arg1 << " " , arg2 , args... ); } // デバッグ用 #ifdef DEBUG inline void AlertAbort( int n ) { CERR( "abort関数が呼ばれました。assertマクロのメッセージが出力されていない場合はオーバーフローの有無を確認をしてください。" ); } void AutoCheck( bool& auto_checked ); #endif #define POWER_MOD( ANSWER , ARGUMENT , EXPONENT , MODULO ) \ ll ANSWER{ 1 }; \ { \ ll ARGUMENT_FOR_SQUARE_FOR_POWER = ( ( ARGUMENT ) % ( MODULO ) ) % ( MODULO ); \ ARGUMENT_FOR_SQUARE_FOR_POWER < 0 ? ARGUMENT_FOR_SQUARE_FOR_POWER += ( MODULO ) : ARGUMENT_FOR_SQUARE_FOR_POWER; \ TYPE_OF( EXPONENT ) EXPONENT_FOR_SQUARE_FOR_POWER = ( EXPONENT ); \ while( EXPONENT_FOR_SQUARE_FOR_POWER != 0 ){ \ if( EXPONENT_FOR_SQUARE_FOR_POWER % 2 == 1 ){ \ ANSWER = ( ANSWER * ARGUMENT_FOR_SQUARE_FOR_POWER ) % ( MODULO ); \ } \ ARGUMENT_FOR_SQUARE_FOR_POWER = ( ARGUMENT_FOR_SQUARE_FOR_POWER * ARGUMENT_FOR_SQUARE_FOR_POWER ) % ( MODULO ); \ EXPONENT_FOR_SQUARE_FOR_POWER /= 2; \ } \ } \ #define FACTORIAL_MOD( ANSWER , ANSWER_INV , INVERSE , MAX_INDEX , CONSTEXPR_LENGTH , MODULO ) \ ll ANSWER[CONSTEXPR_LENGTH]; \ ll ANSWER_INV[CONSTEXPR_LENGTH]; \ ll INVERSE[CONSTEXPR_LENGTH]; \ { \ ll VARIABLE_FOR_PRODUCT_FOR_FACTORIAL = 1; \ ANSWER[0] = VARIABLE_FOR_PRODUCT_FOR_FACTORIAL; \ FOREQ( i , 1 , MAX_INDEX ){ \ ANSWER[i] = ( VARIABLE_FOR_PRODUCT_FOR_FACTORIAL *= i ) %= ( MODULO ); \ } \ ANSWER_INV[0] = ANSWER_INV[1] = INVERSE[1] = VARIABLE_FOR_PRODUCT_FOR_FACTORIAL = 1; \ FOREQ( i , 2 , MAX_INDEX ){ \ ANSWER_INV[i] = ( VARIABLE_FOR_PRODUCT_FOR_FACTORIAL *= INVERSE[i] = ( MODULO ) - ( ( ( ( MODULO ) / i ) * INVERSE[ ( MODULO ) % i ] ) % ( MODULO ) ) ) %= ( MODULO ); \ } \ } \ vector Convolution( const vector& f , const vector& g , const uint& N , const ll& P ) { vector answer( N + 1 ); FOREQ( d , 0 , N ){ FOREQ( i , 0 , d ){ ( answer[d] += f[d-i] * g[i] ) %= P; } } return answer; } inline void Solve() { CEXPR( uint , bound_N , 1000 ); // 0が3個 CIN_ASSERT( N , 1 , bound_N ); CEXPR( ll , bound_P , 1000000000 ); // 0が9個 CIN_ASSERT( P , N , bound_P ); if( N <= 3 ){ RETURN( N == 1 ? 1 : N == 2 ? 3 % P : 18 % P ); } FACTORIAL_MOD( fact , fact_inv , inv , N , bound_N + 1 , P ); ll f[N + 1] = { 0 }; uint H = sqrt( N ); uint K = N / H; vector g_power[K] = { vector( N + 1 ) }; g_power[0][0] = 1; g_power[1].resize( N + 1 ); FOREQ( d , 1 , N ){ POWER_MOD( d_power , d , d == 1 ? 0 : d - 2 , P ); ( ( g_power[1][d] = f[d] = d_power * fact_inv[d] % P ) *= d ) %= P; } FOR( k , 2 , K ){ g_power[k] = Convolution( g_power[k-1] , g_power[1] , N , P ); } vector g_power2[H + 1] = { g_power[0] , Convolution( g_power[K - 1] , g_power[1] , N , P ) }; FOREQ( h , 2 , H ){ g_power2[h] = Convolution( g_power2[h-1] , g_power2[1] , N , P ); } ll answer = 0; uint k = 0; uint h = 0; uint n_max = N; vector fg_h( N + 1 ); FOREQ( d , 0 , N ){ FOREQ( n , k , n_max ){ ( fg_h[n] += f[d] * g_power[k][n] ) %= P; } if( ++k == K || d == N ){ k = 0; ( answer += Convolution( fg_h , g_power2[h++] , N , P )[N] ) < P ? answer : answer -= P; fg_h = vector( N + 1 ); n_max -= K; } } ( answer *= fact[N] ) %= P;; RETURN( answer ); } REPEAT_MAIN(1);