結果

問題 No.502 階乗を計算するだけ
ユーザー nonamaenonamae
提出日時 2021-10-25 21:42:04
言語 C
(gcc 13.3.0)
結果
WA  
実行時間 -
コード長 24,998 bytes
コンパイル時間 2,575 ms
コンパイル使用メモリ 81,252 KB
実行使用メモリ 6,824 KB
最終ジャッジ日時 2024-10-03 19:43:29
合計ジャッジ時間 5,267 ms
ジャッジサーバーID
(参考情報)
judge4 / judge5
このコードへのチャレンジ
(要ログイン)

テストケース

テストケース表示
入力 結果 実行時間
実行使用メモリ
testcase_00 AC 1 ms
6,820 KB
testcase_01 AC 1 ms
6,820 KB
testcase_02 WA -
testcase_03 WA -
testcase_04 WA -
testcase_05 WA -
testcase_06 WA -
testcase_07 WA -
testcase_08 WA -
testcase_09 WA -
testcase_10 WA -
testcase_11 WA -
testcase_12 WA -
testcase_13 WA -
testcase_14 WA -
testcase_15 WA -
testcase_16 WA -
testcase_17 WA -
testcase_18 WA -
testcase_19 WA -
testcase_20 WA -
testcase_21 WA -
testcase_22 WA -
testcase_23 WA -
testcase_24 WA -
testcase_25 WA -
testcase_26 WA -
testcase_27 WA -
testcase_28 WA -
testcase_29 WA -
testcase_30 WA -
testcase_31 WA -
testcase_32 WA -
testcase_33 WA -
testcase_34 WA -
testcase_35 WA -
testcase_36 WA -
testcase_37 WA -
testcase_38 WA -
testcase_39 WA -
testcase_40 WA -
testcase_41 WA -
testcase_42 AC 1 ms
6,816 KB
testcase_43 AC 1 ms
6,820 KB
testcase_44 AC 1 ms
6,816 KB
testcase_45 AC 1 ms
6,816 KB
testcase_46 AC 1 ms
6,816 KB
testcase_47 AC 1 ms
6,816 KB
testcase_48 AC 1 ms
6,820 KB
testcase_49 AC 1 ms
6,816 KB
testcase_50 AC 1 ms
6,820 KB
testcase_51 AC 1 ms
6,820 KB
権限があれば一括ダウンロードができます

ソースコード

diff #

#pragma region opt
#pragma GCC optimize("O3")
#pragma GCC optimize("fast-math")
#pragma GCC optimize("unroll-loops")
#pragma GCC target("avx2")
#pragma endregion opt

#pragma region header
#define _GNU_SOURCE
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <limits.h>
#include <math.h>
#include <time.h>
#pragma endregion header

#pragma region type
/* signed integer */
typedef   int8_t      i8;
typedef   int16_t     i16;
typedef   int32_t     i32;
typedef   int64_t     i64;
typedef __int128_t    i128;
/* unsigned integer */
typedef   uint8_t     u8;
typedef   uint16_t    u16;
typedef   uint32_t    u32;
typedef   uint64_t    u64;
typedef __uint128_t   u128;
/* floating point number */
typedef   float       f32;
typedef   double      f64;
typedef   long double f80;
#pragma endregion type

#pragma region macro
#define MIN(a, b) (((a) < (b)) ? (a) : (b))
#define MAX(a, b) (((a) > (b)) ? (a) : (b))
#define SWAP(a, b) (((a) ^= (b)), ((b) ^= (a)), ((a) ^= (b)))

#define POPCNT32(a) __builtin_popcount((a))
#define POPCNT64(a) __builtin_popcountll((a))
#define CTZ32(a) __builtin_ctz((a))
#define CLZ32(a) __builtin_clz((a))
#define CTZ64(a) __builtin_ctzll((a))
#define CLZ64(a) __builtin_clzll((a))
#define HAS_SINGLE_BIT32(a) (__builtin_popcount((a)) == (1))
#define HAS_SINGLE_BIT64(a) (__builtin_popcountll((a)) == (1))
#define MSB32(a) ((31) - __builtin_clz((a)))
#define MSB64(a) ((63) - __builtin_clzll((a)))
#define BIT_WIDTH32(a) ((a) ? ((32) - __builtin_clz((a))) : (0))
#define BIT_WIDTH64(a) ((a) ? ((64) - __builtin_clzll((a))) : (0))
#define LSBit(a) ((a) & (-(a)))
#define CLSBit(a) ((a) & ((a) - (1)))
#define BIT_CEIL32(a) ((!(a)) ? (1) : ((POPCNT32(a)) == (1) ? ((1u) << ((31) - CLZ32((a)))) : ((1u) << ((32) - CLZ32(a)))))
#define BIT_CEIL64(a) ((!(a)) ? (1) : ((POPCNT64(a)) == (1) ? ((1ull) << ((63) - CLZ64((a)))) : ((1ull) << ((64) - CLZ64(a)))))
#define BIT_FLOOR32(a) ((!(a)) ? (0) : ((1u) << ((31) - CLZ32((a)))))
#define BIT_FLOOR64(a) ((!(a)) ? (0) : ((1ull) << ((63) - CLZ64((a)))))
#define _ROTL64(x, s) (((x) << ((s) % (64))) | (((x) >> ((64) - ((s) % (64))))))
#define _ROTR64(x, s) (((x) >> ((s) % (64))) | (((x) << ((64) - ((s) % (64))))))
#define ROTL64(x, s) (((s) == (0)) ? (x) : ((((i128)(s)) < (0)) ? (_ROTR64((x), -(s))) : (_ROTL64((x), (s)))))
#define ROTR64(x, s) (((s) == (0)) ? (x) : ((((i128)(s)) < (0)) ? (_ROTL64((x), -(s))) : (_ROTR64((x), (s)))))
#pragma endregion macro

#pragma region io
static inline int read_int(void) {
  // -2147483648 ~ 2147483647 (> 10 ^ 9)
  int c, x = 0, f = 1;
  while (c = getchar_unlocked(), c < 48 || c > 57) if (c == 45) f = -f;
  while (47 < c && c < 58) {
    x = x * 10 + c - 48;
    c = getchar_unlocked();
  }
  return f * x;
}
static inline i32 in_i32(void) {
  // -2147483648 ~ 2147483647 (> 10 ^ 9)
  i32 c, x = 0, f = 1;
  while (c = getchar_unlocked(), c < 48 || c > 57) if (c == 45) f = -f;
  while (47 < c && c < 58) {
    x = x * 10 + c - 48;
    c = getchar_unlocked();
  }
  return f * x;
}
static inline u32 in_u32(void) {
  // 0 ~ 4294967295 (> 10 ^ 9)
  u32 c, x = 0;
  while (c = getchar_unlocked(), c < 48 || c > 57);
  while (47 < c && c < 58) {
    x = x * 10 + c - 48;
    c = getchar_unlocked();
  }
  return x;
}
static inline i64 in_i64(void) {
  // -9223372036854775808 ~ 9223372036854775807 (> 10 ^ 18)
  i64 c, x = 0, f = 1;
  while (c = getchar_unlocked(), c < 48 || c > 57) if (c == 45) f = -f;
  while (47 < c && c < 58) {
    x = x * 10 + c - 48;
    c = getchar_unlocked();
  }
  return f * x;
}
static inline u64 in_u64(void) {
  // 0 ~ 18446744073709551615 (> 10 ^ 19)
  u64 c, x = 0;
  while (c = getchar_unlocked(), c < 48 || c > 57);
  while (47 < c && c < 58) {
    x = x * 10 + c - 48;
    c = getchar_unlocked();
  }
  return x;
}
static inline void write_int_inner(int x) {
  if (x >= 10) write_int_inner(x / 10);
  putchar_unlocked(x - x / 10 * 10 + 48);
}
static inline void write_int(int x) {
  if (x < 0) {
    putchar_unlocked('-');
    x = -x;
  }
  write_int_inner(x);
}
static inline void out_i32_inner(i32 x) {
  if (x >= 10) out_i32_inner(x / 10);
  putchar_unlocked(x - x / 10 * 10 + 48);
}
static inline void out_i32(i32 x) {
  if (x < 0) {
    putchar_unlocked('-');
    x = -x;
  }
  out_i32_inner(x);
}
static inline void out_u32(u32 x) {
  if (x >= 10) out_u32(x / 10);
  putchar_unlocked(x - x / 10 * 10 + 48);
}
static inline void out_i64_inner(i64 x) {
  if (x >= 10) out_i64_inner(x / 10);
  putchar_unlocked(x - x / 10 * 10 + 48);
}
static inline void out_i64(i64 x) {
  if (x < 0) {
    putchar_unlocked('-');
    x = -x;
  }
  out_i64_inner(x);
}
static inline void out_u64(u64 x) {
  if (x >= 10) out_u64(x / 10);
  putchar_unlocked(x - x / 10 * 10 + 48);
}
static inline void NL(void) { putchar_unlocked('\n'); }
static inline void SP(void) { putchar_unlocked(' '); }
#pragma endregion io

#pragma region dump
void dump_int(int x) { fprintf(stderr, "\033[1;31m%d\033[0m\n", x); }
void dump_i32(i32 x) { fprintf(stderr, "\033[1;32m%d\033[0m\n", x); }
void dump_i64(i64 x) { fprintf(stderr, "\033[1;33m%ld\033[0m\n", x); }
void dump_u32(u32 x) { fprintf(stderr, "\033[1;34m%u\033[0m\n", x); }
void dump_u64(u64 x) { fprintf(stderr, "\033[1;35m%lu\033[0m\n", x); }
void dump_int_array(int *a, int a_len) {
  fprintf(stderr, "\033[1;31m");
  for (int i = 0; i < a_len; i++) {
    if (i == a_len - 1) fprintf(stderr, "%d\n", a[i]);
    else fprintf(stderr, "%d ", a[i]);
  }
  fprintf(stderr, "\033[0m");
}
void dump_i32_array(i32 *a, int a_len) {
  fprintf(stderr, "\033[1;32m");
  for (int i = 0; i < a_len; i++) {
    if (i == a_len - 1) fprintf(stderr, "%d\n", a[i]);
    else fprintf(stderr, "%d ", a[i]);
  }
  fprintf(stderr, "\033[0m");
}
void dump_i64_array(i64 *a, int a_len) {
  fprintf(stderr, "\033[1;33m");
  for (int i = 0; i < a_len; i++) {
    if (i == a_len - 1) fprintf(stderr, "%ld\n", a[i]);
    else fprintf(stderr, "%ld ", a[i]);
  }
  fprintf(stderr, "\033[0m");
}
void dump_u32_array(u32 *a, int a_len) {
  fprintf(stderr, "\033[1;34m");
  for (int i = 0; i < a_len; i++) {
    if (i == a_len - 1) fprintf(stderr, "%u\n", a[i]);
    else fprintf(stderr, "%u ", a[i]);
  }
  fprintf(stderr, "\033[0m");
}
void dump_u64_array(u64 *a, int a_len) {
  fprintf(stderr, "\033[1;35m");
  for (int i = 0; i < a_len; i++) {
    if (i == a_len - 1) fprintf(stderr, "%lu\n", a[i]);
    else fprintf(stderr, "%lu ", a[i]);
  }
  fprintf(stderr, "\033[0m");
}
void dump_int_array_range(int *a, int a_len, int l, int r) {
  if (r < l || r <= 0 || l >= a_len) return;
  if (l < 0) l = 0;
  if (r >= a_len) r = a_len - 1;
  fprintf(stderr, "\033[1;31m");
  for (int i = l; i <= r; i++) {
    if (i == r) fprintf(stderr, "%d\n", a[i]);
    else fprintf(stderr, "%d ", a[i]);
  }
  fprintf(stderr, "\033[0m");
}
void dump_i32_array_range(i32 *a, int a_len, int l, int r) {
  if (r < l || r <= 0 || l >= a_len) return;
  if (l < 0) l = 0;
  if (r >= a_len) r = a_len - 1;
  fprintf(stderr, "\033[1;32m");
  for (int i = l; i <= r; i++) {
    if (i == r) fprintf(stderr, "%d\n", a[i]);
    else fprintf(stderr, "%d ", a[i]);
  }
  fprintf(stderr, "\033[0m");
}
void dump_i64_array_range(i64 *a, int a_len, int l, int r) {
  if (r < l || r <= 0 || l >= a_len) return;
  if (l < 0) l = 0;
  if (r >= a_len) r = a_len - 1;
  fprintf(stderr, "\033[1;33m");
  for (int i = l; i <= r; i++) {
    if (i == r) fprintf(stderr, "%ld\n", a[i]);
    else fprintf(stderr, "%ld ", a[i]);
  }
  fprintf(stderr, "\033[0m");
}
void dump_u32_array_range(u32 *a, int a_len, int l, int r) {
  if (r < l || r <= 0 || l >= a_len) return;
  if (l < 0) l = 0;
  if (r >= a_len) r = a_len - 1;
  fprintf(stderr, "\033[1;34m");
  for (int i = l; i <= r; i++) {
    if (i == r) fprintf(stderr, "%u\n", a[i]);
    else fprintf(stderr, "%u ", a[i]);
  }
  fprintf(stderr, "\033[0m");
}
void dump_u64_array_range(u64 *a, int a_len, int l, int r) {
  if (r < l || r <= 0 || l >= a_len) return;
  if (l < 0) l = 0;
  if (r >= a_len) r = a_len - 1;
  fprintf(stderr, "\033[1;35m");
  for (int i = l; i <= r; i++) {
    if (i == r) fprintf(stderr, "%lu\n", a[i]);
    else fprintf(stderr, "%lu ", a[i]);
  }
  fprintf(stderr, "\033[0m");
}
void printb(u64 v) {
  u64 mask = (u64)1 << (sizeof(v) * CHAR_BIT - 1);
  do
    putchar(mask & v ? '1' : '0');
  while (mask >>= 1);
}
void putb(u64 v) { putchar('0'), putchar('b'), printb(v), putchar('\n'); }
#pragma endregion dump

#pragma region montgomery32
typedef uint32_t m32;
m32 _one_m32(u32 mod) {
  return -1u % mod + 1;
}
m32 _r2_m32(u32 mod) {
  return (u64)(i64)-1 % mod + 1;
}
m32 _inv_m32(u32 mod) {
  u32 u = 1, v = 0, x = 1u << 31;
  for (int i = 0; i < 32; i++) {
    if (u & 1) u = (u + mod) >> 1, v = (v >> 1) + x;
    else u >>= 1, v >>= 1;
  }
  return -v;
}
m32 _reduce_m32(u64 a, m32 inv, u32 mod) {
  i64 z = (a >> 32) - ((((u32)a * inv) * (u64)mod) >> 32);
  return z < 0 ? z + mod : z;
}
m32 to_m32(u32 a, m32 r2, m32 inv, u32 mod) { return _reduce_m32((u64)a * r2, inv, mod); }
u32 from_m32(m32 A, m32 inv, u32 mod) {
  m32 t = _reduce_m32((u64)A, inv, mod) - mod;
  return t + (mod & -(t >> 31u));
}
m32 add_m32(m32 A, m32 B, u32 mod2) {
  // assert(mod2 == (mod << 1));
  A += B - mod2;
  A += mod2 & -(A >> 31u);
  return A;
}
m32 sub_m32(m32 A, m32 B, u32 mod2) {
  // assert(mod2 == (mod << 1));
  A -= B;
  A += mod2 & -(A >> 31u);
  return A;
}
m32 min_m32(m32 A, u32 mod2) {
  // assert(mod2 == (mod << 1));
  return sub_m32(0u, A, mod2);
}
m32 mul_m32(m32 A, m32 B, m32 inv, u32 mod) { return _reduce_m32((u64)A * B, inv, mod); }
m32 pow_m32(m32 A, i64 n, m32 inv, u32 mod) {
  m32 ret = _one_m32(mod);
  while (n > 0) {
    if (n & 1) ret = mul_m32(ret, A, inv, mod);
    A = mul_m32(A, A, inv, mod);
    n >>= 1;
  }
  return ret;
}
m32 inv_m32(m32 A, m32 inv, u32 mod) { return pow_m32(A, (i64)mod - 2, inv, mod); }
m32 div_m32(m32 A, m32 B, m32 inv, u32 mod) {
  /* assert(is_prime(mod)); */
  return mul_m32(A, inv_m32(B, inv, mod), inv, mod);
}
m32 in_m32(m32 r2, m32 inv, u32 mod) {
  u32 c, a = 0;
  while (c = getchar_unlocked(), c < 48 || c > 57);
  while (47 < c && c < 58) {
    a = a * 10 + c - 48;
    c = getchar_unlocked();
  }
  return to_m32(a, r2, inv, mod);
}
void out_m32(m32 A, m32 inv, u32 mod) {
  u32 a = from_m32(A, inv, mod);
  out_u32(a);
}
void dump_m32(m32 x, m32 inv, u32 mod) { fprintf(stderr, "\033[1;34m%u\033[0m\n", from_m32(x, inv, mod)); }
void dump_m32_array(m32 *x, int x_len, m32 inv, u32 mod) {
  fprintf(stderr, "\033[1;37m");
  for (int i = 0; i < x_len; i++) {
    if (i == x_len - 1) fprintf(stderr, "%u\n", x[i]);
    else fprintf(stderr, "%u ", x[i]);
  }
  fprintf(stderr, "\033[0m");
  fprintf(stderr, "\033[1;33m");
  for (int i = 0; i < x_len; i++) {
    if (i == x_len - 1) fprintf(stderr, "%u\n", from_m32(x[i], inv, mod));
    else fprintf(stderr, "%u ", from_m32(x[i], inv, mod));
  }
  fprintf(stderr, "\033[0m");
}
void dump_m32_array_range(m32 *x, int x_len, int l, int r, m32 inv, u32 mod) {
  if (r < l || r <= 0 || l >= x_len) return;
  if (l < 0) l = 0;
  if (r >= x_len) r = x_len - 1;
  fprintf(stderr, "\033[1;31m");
  for (int i = l; i <= r; i++) {
    if (i == r) fprintf(stderr, "%u\n", x[i]);
    else fprintf(stderr, "%u ", x[i]);
  }
  fprintf(stderr, "\033[0m");
  fprintf(stderr, "\033[1;35m");
  for (int i = l; i <= r; i++) {
    if (i == r) fprintf(stderr, "%u\n", from_m32(x[i], inv, mod));
    else fprintf(stderr, "%u ", from_m32(x[i], inv, mod));
  }
  fprintf(stderr, "\033[0m");
}
#pragma endregion montgomery32

#pragma region ntt1000000007
const u32 pr1 = 3u;
const u32 pr2 = 6u;
const u32 pr3 = 7u;
const u32 m1 = 1045430273u; const u32 m1_d = 2090860546u; const m32 m1_r2 = 798281873u; const m32 m1_inv = 3249537025u; const m32 m1_one = 113246204u;
const u32 m2 = 1051721729u; const u32 m2_d = 2103443458u; const m32 m2_r2 = 748646691u; const m32 m2_inv = 3243245569u; const m32 m2_one = 88080380u;
const u32 m3 = 1053818881u; const u32 m3_d = 2107637762u; const m32 m3_r2 = 159648582u; const m32 m3_inv = 3241148417u; const m32 m3_one = 79691772u;
const m32 sum_e1[]  = { 885458361u, 900924659u, 962059782u, 415593171u,  1042963020u, 670679244u, 541100730u, 791232404u, 1017138063u, 1009790060u, 862479644u, 14783221u,  244867424u, 431174616u, 403070125u, 826977693u, 130509512u, 234433839u, 583350782u };
const m32 sum_e2[]  = { 467413740u, 850027207u, 242082110u, 469704687u,  798875242u,  948071666u, 405859818u, 794843055u, 771520840u,  176008702u,  644573208u, 715864305u, 237085256u, 357934180u, 654888890u, 961560509u, 740357787u, 329663983u, 679393590u };
const m32 sum_e3[]  = { 882957072u, 871538451u, 673579336u, 73537272u,   425574675u,  697950553u, 938595210u, 127556594u, 879998874u,  1019737367u, 353383985u, 36465421u,  901066455u, 93757427u,  979102987u, 501430327u, 40143827u,  92524426u,  698947874u };
const m32 sum_ie1[] = { 159971912u, 22701698u,  360170016u, 9164511u,    967212496u,  707952636u, 928400066u, 788102537u, 248599354u,  954265785u,  539282025u, 749013941u, 66727014u,  835563346u, 909028729u, 532555753u, 488995841u, 863413679u, 575546441u };
const m32 sum_ie2[] = { 584307989u, 414819672u, 727414907u, 1042290158u, 763418088u,  582659108u, 106169741u, 486228312u, 106468254u,  164241005u,  856035592u, 117294265u, 988587910u, 117912639u, 562619904u, 608072619u, 298409394u, 523681378u, 94283612u  };
const m32 sum_ie3[] = { 170861809u, 516220135u, 927824564u, 603681864u,  586404698u,  720550924u, 639641177u, 182451670u, 257278119u,  437349354u,  600413897u, 648372120u, 399254828u, 182276433u, 483901237u, 893329600u, 390180935u, 919465749u, 333795344u };
void ntt1(m32 *A, int A_len) {
  int h = 0;
  while (A_len > (1 << h)) h++;
  for (int ph = 1; ph <= h; ph++) {
    int w = 1 << (ph - 1);
    int p = 1 << (h - ph);
    m32 now = m1_one;
    for (int s = 0; s < w; s++) {
      int offset = s << (h - ph + 1);
      for (int i = 0; i < p; i++) {
        m32 l = A[i + offset];
        m32 r = mul_m32(A[i + offset + p], now, m1_inv, m1);
        A[i + offset] = add_m32(l, r, m1_d);
        A[i + offset + p] = sub_m32(l, r, m1_d);
      }
      now = mul_m32(now, sum_e1[CTZ32(~s)], m1_inv, m1);
    }
  }
}
void ntt2(m32 *A, int A_len) {
  int h = 0;
  while (A_len > (1 << h)) h++;  
  for (int ph = 1; ph <= h; ph++) {
    int w = 1 << (ph - 1);
    int p = 1 << (h - ph);
    m32 now = m2_one;
    for (int s = 0; s < w; s++) {
      int offset = s << (h - ph + 1);
      for (int i = 0; i < p; i++) {
        m32 l = A[i + offset];
        m32 r = mul_m32(A[i + offset + p], now, m2_inv, m2);
        A[i + offset] = add_m32(l, r, m2_d);
        A[i + offset + p] = sub_m32(l, r, m2_d);
      }
      now = mul_m32(now, sum_e2[CTZ32(~s)], m2_inv, m2);
    }
  }
}
void ntt3(m32 *A, int A_len) {
  int h = 0;
  while (A_len > (1 << h)) h++;
  for (int ph = 1; ph <= h; ph++) {
    int w = 1 << (ph - 1);
    int p = 1 << (h - ph);
    m32 now = m3_one;
    for (int s = 0; s < w; s++) {
      int offset = s << (h - ph + 1);
      for (int i = 0; i < p; i++) {
        m32 l = A[i + offset];
        m32 r = mul_m32(A[i + offset + p], now, m3_inv, m3);
        A[i + offset] = add_m32(l, r, m3_d);
        A[i + offset + p] = sub_m32(l, r, m3_d);
      }
      now = mul_m32(now, sum_e3[CTZ32(~s)], m3_inv, m3);
    }
  }
}
void intt1(m32 *A, int A_len) {
  int h = 0;
  while (A_len > (1 << h)) h++;
  for (int ph = h; ph >= 1; ph--) {
    int w = 1 << (ph - 1);
    int p = 1 << (h - ph);
    m32 inow = m1_one;
    for (int s = 0; s < w; s++) {
      int offset = s << (h - ph + 1);
      for (int i = 0; i < p; i++) {
        m32 l = A[i + offset];
        m32 r = A[i + offset + p];
        A[i + offset] = add_m32(l, r, m1_d);
        A[i + offset + p] = mul_m32(sub_m32(l, r, m1_d), inow, m1_inv, m1);
      }
      inow = mul_m32(inow, sum_ie1[CTZ32(~s)], m1_inv, m1);
    }
  }
  m32 inv2t = inv_m32(to_m32(A_len, m1_r2, m1_inv, m1), m1_inv, m1);
  for (int i = 0; i < A_len; i++) A[i] = from_m32(mul_m32(A[i], inv2t, m1_inv, m1), m1_inv, m1);
}
void intt2(m32 *A, int A_len) {
  int h = 0;
  while (A_len > (1 << h)) h++;
  for (int ph = h; ph >= 1; ph--) {
    int w = 1 << (ph - 1);
    int p = 1 << (h - ph);
    m32 inow = m2_one;
    for (int s = 0; s < w; s++) {
      int offset = s << (h - ph + 1);
      for (int i = 0; i < p; i++) {
        m32 l = A[i + offset];
        m32 r = A[i + offset + p];
        A[i + offset] = add_m32(l, r, m2_d);
        A[i + offset + p] = mul_m32(sub_m32(l, r, m2_d), inow, m2_inv, m2);
      }
      inow = mul_m32(inow, sum_ie2[CTZ32(~s)], m2_inv, m2);
    }
  }
  m32 inv2t = inv_m32(to_m32(A_len, m2_r2, m2_inv, m2), m2_inv, m2);
  for (int i = 0; i < A_len; i++) A[i] = from_m32(mul_m32(A[i], inv2t, m2_inv, m2), m2_inv, m2);
}
void intt3(m32 *A, int A_len) {
  int h = 0;
  while (A_len > (1 << h)) h++;  
  for (int ph = h; ph >= 1; ph--) {
    int w = 1 << (ph - 1);
    int p = 1 << (h - ph);
    m32 inow = m3_one;
    for (int s = 0; s < w; s++) {
      int offset = s << (h - ph + 1);
      for (int i = 0; i < p; i++) {
        m32 l = A[i + offset];
        m32 r = A[i + offset + p];
        A[i + offset] = add_m32(l, r, m3_d);
        A[i + offset + p] = mul_m32(sub_m32(l, r, m3_d), inow, m3_inv, m3);
      }
      inow = mul_m32(inow, sum_ie3[CTZ32(~s)], m3_inv, m3);
    }
  }
  m32 inv2t = inv_m32(to_m32(A_len, m3_r2, m3_inv, m3), m3_inv, m3);
  for (int i = 0; i < A_len; i++) A[i] = from_m32(mul_m32(A[i], inv2t, m3_inv, m3), m3_inv, m3);
}
u32 *convolute1(m32 *A, int A_len, m32 *B, int B_len) {
  int C_len = BIT_CEIL32(A_len + B_len - 1);
  m32 *C = (m32 *)calloc(C_len, sizeof(m32));
  m32 *D = (m32 *)calloc(C_len, sizeof(m32));
#ifdef LOCAL
  if (C == NULL || D == NULL) exit(EXIT_FAILURE);
#endif
  memcpy(C, A, sizeof(m32) * A_len);
  memcpy(D, B, sizeof(m32) * B_len);
  ntt1(C, C_len);
  ntt1(D, C_len);
  for (int i = 0; i < C_len; i++) C[i] = mul_m32(C[i], D[i], m1_inv, m1);
  free(D);
  intt1(C, C_len);
  return C;
}
u32 *convolute2(m32 *A, int A_len, m32 *B, int B_len) {
  int C_len = BIT_CEIL32(A_len + B_len - 1);
  m32 *C = (m32 *)calloc(C_len, sizeof(m32));
  m32 *D = (m32 *)calloc(C_len, sizeof(m32));
#ifdef LOCAL
  if (C == NULL || D == NULL) exit(EXIT_FAILURE);
#endif
  memcpy(C, A, sizeof(m32) * A_len);
  memcpy(D, B, sizeof(m32) * B_len);
  ntt2(C, C_len);
  ntt2(D, C_len);
  for (int i = 0; i < C_len; i++) C[i] = mul_m32(C[i], D[i], m2_inv, m2);
  free(D);
  intt2(C, C_len);
  return C;
}
u32 *convolute3(m32 *A, int A_len, m32 *B, int B_len) {
  int C_len = BIT_CEIL32(A_len + B_len - 1);
  m32 *C = (m32 *)calloc(C_len, sizeof(m32));
  m32 *D = (m32 *)calloc(C_len, sizeof(m32));
#ifdef LOCAL
  if (C == NULL || D == NULL) exit(EXIT_FAILURE);
#endif
  memcpy(C, A, sizeof(m32) * A_len);
  memcpy(D, B, sizeof(m32) * B_len);
  ntt3(C, C_len);
  ntt3(D, C_len);
  for (int i = 0; i < C_len; i++) C[i] = mul_m32(C[i], D[i], m3_inv, m3);
  free(D);
  intt3(C, C_len);
  return C;
}
u32 *arbitrary_mod_convolute(u32 *a, int a_len, u32 *b, int b_len, u32 mod) {
  u32 m0 = mod;
  u32 m0_d = m0 << 1;
  m32 m0_r2 = _r2_m32(m0);
  m32 m0_inv = _inv_m32(m0);

  m32 *A1 = (m32 *)calloc(a_len, sizeof(m32));
  m32 *A2 = (m32 *)calloc(a_len, sizeof(m32));
  m32 *A3 = (m32 *)calloc(a_len, sizeof(m32));
  m32 *B1 = (m32 *)calloc(b_len, sizeof(m32));
  m32 *B2 = (m32 *)calloc(b_len, sizeof(m32));
  m32 *B3 = (m32 *)calloc(b_len, sizeof(m32));
#ifdef LOCAL
  if (A1 == NULL || A2 == NULL || A3 == NULL || B1 == NULL || B2 == NULL || B3 == NULL)) { exit(EXIT_FAILURE); }
#endif
  for (int i = 0; i < a_len; i++) {
    A1[i] = to_m32(a[i], m1_r2, m1_inv, m1);
    A2[i] = to_m32(a[i], m2_r2, m2_inv, m2);
    A3[i] = to_m32(a[i], m3_r2, m3_inv, m3);
  }
  for (int i = 0; i < b_len; i++) {
    B1[i] = to_m32(b[i], m1_r2, m1_inv, m1);
    B2[i] = to_m32(b[i], m2_r2, m2_inv, m2);
    B3[i] = to_m32(b[i], m3_r2, m3_inv, m3);
  }
  u32 *x = convolute1(A1, a_len, B1, b_len);
  u32 *y = convolute2(A2, a_len, B2, b_len);
  u32 *z = convolute3(A3, a_len, B3, b_len);
  free(A1);
  free(A2);
  free(A3);
  free(B1);
  free(B2);
  free(B3);
  m32 m1_inv_m2 = inv_m32(to_m32(m1, m2_r2, m2_inv, m2), m2_inv, m2);
  m32 m12_inv_m3 = inv_m32(mul_m32(to_m32(m1, m3_r2, m3_inv, m3), to_m32(m2, m3_r2, m3_inv, m3), m3_inv, m3), m3_inv, m3);
  m32 m1_m3 = to_m32(m1, m3_r2, m3_inv, m3);
  m32 m1_m0 = to_m32(m1, m0_r2, m0_inv, m0);
  m32 m12_m0 = mul_m32(to_m32(m1, m0_r2, m0_inv, m0), to_m32(m2, m0_r2, m0_inv, m0), m0_inv, m0);
  u32 *ret = (u32 *)calloc(a_len + b_len - 1, sizeof(u32));
#ifdef LOCAL
  if (ret == NULL) exit(EXIT_FAILURE);
#endif
  for (u32 i = 0; i < a_len + b_len - 1; i++) {
    m32 xi_m2 = to_m32(x[i], m2_r2, m2_inv, m2);
    m32 yi_m2 = to_m32(y[i], m2_r2, m2_inv, m2);
    m32 zi_m3 = to_m32(z[i], m3_r2, m3_inv, m3);
    m32 xi_m3 = to_m32(x[i], m3_r2, m3_inv, m3);
    u32 v1 = from_m32(mul_m32(sub_m32(yi_m2, xi_m2, m2_d), m1_inv_m2, m2_inv, m2), m2_inv, m2);
    m32 v1_m3 = to_m32(v1,   m3_r2, m3_inv, m3);
    u32 v2 = from_m32(mul_m32(sub_m32(zi_m3, add_m32(xi_m3, mul_m32(m1_m3, v1_m3, m3_inv, m3), m3_d), m3_d), m12_inv_m3, m3_inv, m3), m3_inv, m3);
    m32 v2_m0 = to_m32(v2,   m0_r2, m0_inv, m0);
    m32 xi_m0 = to_m32(x[i], m0_r2, m0_inv, m0);
    m32 v1_m0 = to_m32(v1,   m0_r2, m0_inv, m0);
    ret[i] = from_m32(add_m32(add_m32(xi_m0, mul_m32(m1_m0, v1_m0, m0_inv, m0), m0_d), mul_m32(m12_m0, v2_m0, m0_inv, m0), m0_d), m0_inv, m0);
  }
  free(x);
  free(y);
  free(z);
  return ret;
}
#pragma endregion ntt1000000007

#pragma region sample point shift
m32 _fact[1<<20];
m32 _inv_fact[1<<20];
m32 _inv_table[1<<20];
void pre_fact(int n, m32 r2, m32 inv, u32 mod) {
  _fact[0] = 1;
  for (int i = 0; i <= n + 1; i++) _fact[i + 1] = mul_m32(_fact[i], to_m32(i + 1, r2, inv, mod), inv, mod);
  _inv_fact[n + 2] = inv_m32(_fact[n + 2], inv, mod);
  for (int i = n + 2; i > 0; i--) _inv_fact[i - 1] = mul_m32(_inv_fact[i], to_m32(i, r2, inv, mod), inv, mod);
  for (int i = 1; i <= n + 1; i++) _inv_table[i] = mul_m32(_inv_fact[i], _fact[i - 1], inv, mod);
}
m32 *sample_point_shift(m32 *A, int A_len, m32 m, m32 r2, m32 inv, u32 mod) {
  u32 *f = (m32 *)calloc(A_len, sizeof(m32));
  u32 *g = (m32 *)calloc((A_len << 1) - 1, sizeof(m32));
#ifdef LOCAL
  if (F == NULL || G == NULL || f == NULL || g == NULL) exit(EXIT_FAILURE);
#endif
  for (int i = 0; i < A_len; i++) {
    f[i] = mul_m32(mul_m32(A[i], _inv_fact[i], inv, mod), _inv_fact[A_len - 1 - i], inv, mod);
    if ((A_len - 1 - i) & 1) f[i] = min_m32(f[i], mod << 1);
    f[i] = from_m32(f[i], inv, mod);
  }
  m32 d_m = to_m32(A_len - 1, r2, inv, mod);
  for (int i = 0; i < (A_len << 1) - 1; i++) {
    m32 i_m = to_m32(i, r2, inv, mod);
    g[i] = inv_m32(add_m32(sub_m32(m, d_m, mod << 1), i_m, mod << 1), inv, mod);
    g[i] = from_m32(g[i], inv, mod);
  }
  u32 *h = arbitrary_mod_convolute(f, A_len, g, (A_len << 1) - 1, mod);
  m32 coef = _one_m32(mod);
  for (int i = 0; i < (A_len << 1) - 1; i++) h[i] = to_m32(h[i], r2, inv, mod);
  for (int i = 0; i < A_len; i++) {
    m32 i_m = to_m32(i, r2, inv, mod);
    m32 mdi = add_m32(sub_m32(m, d_m, mod << 1), i_m, mod << 1);
    coef = mul_m32(coef, mdi, inv, mod);
  }
  for (int i = 0; i < A_len; i++) {
    m32 i1 = to_m32(i + 1, r2, inv, mod);
    h[i + A_len - 1] = mul_m32(h[i + A_len - 1], coef, inv, mod);
    coef = mul_m32(coef, mul_m32(add_m32(m, i1, mod << 1), g[i], inv, mod), inv, mod);
  }
  for (int i = A_len - 1; i < (A_len << 1) - 1; i++) f[i - A_len + 1] = h[i];
  free(g);
  free(h);
  return f;
}
m32 factorial_mod(i64 n, m32 r2, m32 inv, u32 mod) {
  if (n <= 1) return _one_m32(mod);
  if (n >= mod) return 0;
  i64 v = 1;
  while (v * v < n) v <<= 1;
  m32 iv = inv_m32(to_m32(v, r2, inv, mod), inv, mod);
  m32 *G = (m32 *)calloc(v + 1, sizeof(m32));
  G[0] = _one_m32(mod);
  G[1] = to_m32(v + 1, r2, inv, mod);
  for (i64 d = 1; d != v; d <<= 1) {
    m32 d_m = to_m32(d, r2, inv, mod);
    m32 *g1 = sample_point_shift(G, d + 1, mul_m32(d_m, iv, inv, mod), r2, inv, mod);
    m32 *g2 = sample_point_shift(G, d + 1, mul_m32(add_m32(mul_m32(d_m, v, inv, mod), v, mod << 1), iv, inv, mod), r2, inv, mod);
    m32 *g3 = sample_point_shift(G, d + 1, mul_m32(add_m32(add_m32(mul_m32(d_m, v, inv, mod), d_m, mod << 1), v, mod << 1), iv, inv, mod), r2, inv, mod);
    for (int i = 0; i <= d; i++) {
      G[i] = mul_m32(G[i], g1[i], inv, mod);
      g2[i] = mul_m32(g2[i], g3[i], inv, mod);
    }
    for (int i = 0; i < d; i++) G[d + i] = g2[i];
  }
  m32 ret = _one_m32(mod);
  i64 i = 0;
  while (i + v <= n) ret = mul_m32(ret, G[i / v], inv, mod), i += v;
  while (i < n) ret = mul_m32(ret, to_m32(++i, r2, inv, mod), inv, mod);
  return ret;
}
#pragma endregion sample point shift

void Main(void) {
  i64 n = in_i64();
  u32 mod = 1000000007u;
  m32 r2 = _r2_m32(mod);
  m32 inv = _inv_m32(mod);
  out_m32(factorial_mod(n, r2, inv, mod), inv, mod);
  NL();
}

int main(void) {
  Main();
  return 0;
}
0