#pragma GCC target("avx2") #pragma GCC optimize("O3") #include #include struct FastI { char buf[1000000]; char *ptr = buf, *end; void read_() { end = (ptr = buf) + fread(buf, 1, sizeof(buf) - 1, stdin); } FastI () { read_(); } void inc() { if (++ptr == end) read_(); } template value_t read() { bool neg = false; value_t res = 0; while ((*ptr < '0' || *ptr > '9') && *ptr != '-') inc(); if (*ptr == '-') neg = true, inc(); while (*ptr >= '0' && *ptr <= '9') res = res * 10 + *ptr - '0', inc(); return neg ? -res : res; } } fasti; #define ri fasti.read #define rs64 fasti.read #define N 100030 #define MOD 1000000007 using vec_t = __m256i; __attribute__((aligned(32))) uint32_t table[N], dp[N]; void calc0() { dp[0] = 1; vec_t mod2 = _mm256_set1_epi32(2 * MOD); # define BLOCK 14 for (int i = 0; i < N; i += BLOCK) { for (int j = i; j < i + BLOCK; j++) if (dp[j] >= MOD) dp[j] -= MOD; for (int j = i; j < i + BLOCK; j++) for (int k = j + 1; k < i + BLOCK; k++) if (table[k - j]) { dp[k] += dp[j]; if (dp[k] >= MOD) dp[k] -= MOD; } # define DEF_A(x) vec_t a##x = _mm256_set1_epi32(dp[i + x]) DEF_A(0); DEF_A(1); DEF_A(2); DEF_A(3); DEF_A(4); DEF_A(5); DEF_A(6); DEF_A(7); DEF_A(8); DEF_A(9); DEF_A(10); DEF_A(11); DEF_A(12); DEF_A(13); for (int j = BLOCK; j < N - i - 8; j += 8) { vec_t added = _mm256_load_si256((vec_t *) (dp + i + j)); # define MASKED(x) _mm256_and_si256(a##x, _mm256_loadu_si256((vec_t *) (table + j - x))) # define ADD(x1, x2) added = _mm256_add_epi32(_mm256_add_epi32(MASKED(x1), MASKED(x2)), added), \ added = _mm256_max_epi32(added, _mm256_sub_epi32(added, mod2)) ADD(13, 12); ADD(11, 10); ADD(9, 8); ADD(7, 6); ADD(5, 4); ADD(3, 2); ADD(1, 0); _mm256_storeu_si256((vec_t *) (dp + i + j), added); } } } int main() { int k = ri(); int n = ri(); for (int i = 0; i < n; i++) table[ri()] = (uint32_t) -1; calc0(); printf("%d\n", (int) dp[k]); return 0; }