#include #include using namespace std; #pragma GCC target("avx2") #pragma GCC optimize("O3") #pragma GCC optimize("unroll-loops") const int mod = 1000000007; int K, N, X[100009]; long long dp[100009], res[15]; void calc(int d, int pos, int l, int r) { long long ans = 0; for (int i = l; i < r; ++i) { ans += dp[pos - X[i]]; } res[d] = ans; } int main() { cin.tie(0); ios_base::sync_with_stdio(false); cin >> K >> N; for (int i = 0; i < N; ++i) { cin >> X[i]; } dp[0] = 1; int ptr = 0; for (int i = 1; i <= K; ++i) { while (ptr != N && X[ptr] <= i) ++ptr; thread t1, t2, t3, t4, t5, t6, t7, t8; t1 = thread(calc, 0, i, 0, ptr / 8); t2 = thread(calc, 1, i, ptr / 8, 2 * ptr / 8); t3 = thread(calc, 2, i, 2 * ptr / 8, 3 * ptr / 8); t4 = thread(calc, 3, i, 3 * ptr / 8, 4 * ptr / 8); t5 = thread(calc, 4, i, 4 * ptr / 8, 5 * ptr / 8); t6 = thread(calc, 5, i, 5 * ptr / 8, 6 * ptr / 8); t7 = thread(calc, 6, i, 6 * ptr / 8, 7 * ptr / 8); t8 = thread(calc, 7, i, 7 * ptr / 8, 8 * ptr / 8); t1.join(); t2.join(); t3.join(); t4.join(); t5.join(); t6.join(); t7.join(); t8.join(); for (int j = 0; j < 8; ++j) { dp[i] += res[j]; } dp[i] %= mod; } cout << dp[K] << endl; return 0; }