#include using namespace std; using int64 = long long; constexpr int64 CYCLES_PER_SEC = 2800000000; struct Timer { int64 start; Timer() { reset(); } void reset() { start = getCycle(); } inline double get() { return (double) (getCycle() - start) / CYCLES_PER_SEC; } inline int64 getCycle() { unsigned low, high; __asm__ volatile ("rdtsc" : "=a" (low), "=d" (high)); return ((int64) low) | ((int64) high << 32); } }; inline uint32_t xor128() { static uint32_t y = 2463534242; y = y ^ (y << 13); y = y ^ (y >> 17); return y = y ^ (y << 5); } constexpr double TL = 1.5; const int inf = (1 << 30) - 1; int main() { int64 N, K; cin >> N >> K; int64 M = N / K; if(N == 1) { cout << "Yes" << endl; cout << 1 << endl; return 0; } if(N == K) { cout << "No" << endl; return 0; } // Nが偶数かつMが奇数のとき,sum%Kが非0なので解なし if(N % 2 == 0 && M % 2 == 1) { cout << "No" << endl; return 0; } // それ以外は解を構成可能 cout << "Yes" << endl; if(M % 2 == 0) { // K個のペアを偶数個作れるので両端からとっていけばよい // abcde abcde ... dcba dcba deque< int > tap(N); iota(begin(tap), end(tap), 1); vector< vector< int > > ans(K); while(tap.size()) { for(int k = 0; k < K; k++) { ans[k].emplace_back(tap.front()); ans[k].emplace_back(tap.back()); tap.pop_front(); tap.pop_back(); } } for(auto &p : ans) { for(int k = 0; k < p.size(); k++) cout << p[k] << " "; cout << endl; } } else { // Nが奇数かつMが奇数の場合 // わかりません! // わからないのでマラソンをします int64 S = N * (N + 1) / 2 / K; deque< int > tap(N); iota(begin(tap), end(tap), 1); vector< vector< int > > ans(K); while(tap.size() >= 2 * K) { for(int k = 0; k < K; k++) { ans[k].emplace_back(tap.front()); ans[k].emplace_back(tap.back()); tap.pop_front(); tap.pop_back(); } } for(int i = 0; i < K; i++) { ans[i].emplace_back(tap.front()); tap.pop_front(); } double start_temp = 1.0; double end_temp = 0.1; vector< int64 > sum(K); int64 current_score = 0; for(int i = 0; i < K; i++) { sum[i] += accumulate(begin(ans[i]), end(ans[i]), 0LL); current_score += abs(sum[i] - S); } Timer timer; for(;;) { auto now_time = timer.get(); if(now_time >= TL) break; int x = xor128() % K; int y = xor128() % (K - 1); (y += x) %= K; int x_pos = xor128() % 3; int y_pos = x_pos; int64 pre_score = current_score; current_score -= abs(sum[x] - S); current_score -= abs(sum[y] - S); sum[x] -= ans[x][x_pos]; sum[y] -= ans[y][y_pos]; swap(ans[x][x_pos], ans[y][y_pos]); sum[x] += ans[x][x_pos]; sum[y] += ans[y][y_pos]; current_score += abs(sum[x] - S); current_score += abs(sum[y] - S); double temp = start_temp + (end_temp - start_temp) * now_time / TL; double prob = exp((pre_score - current_score) / temp); if(current_score == 0) { break; } if(prob > (xor128() % inf) / (double) inf) { } else { current_score -= abs(sum[x] - S); current_score -= abs(sum[y] - S); sum[x] -= ans[x][x_pos]; sum[y] -= ans[y][y_pos]; swap(ans[x][x_pos], ans[y][y_pos]); sum[x] += ans[x][x_pos]; sum[y] += ans[y][y_pos]; current_score += abs(sum[x] - S); current_score += abs(sum[y] - S); } } for(auto &p : ans) { for(int k = 0; k < p.size(); k++) cout << p[k] << " "; cout << endl; } } }