#include using namespace std; // Define using ll = long long; using ull = unsigned long long; using ld = long double; template using pvector = vector>; template using rpriority_queue = priority_queue, greater>; constexpr const ll dx[4] = {1, 0, -1, 0}; constexpr const ll dy[4] = {0, 1, 0, -1}; constexpr const ll MOD = 1e9 + 7; constexpr const ll mod = 998244353; constexpr const ll INF = 1LL << 60; constexpr const ll inf = 1 << 30; constexpr const char rt = '\n'; constexpr const char sp = ' '; #define mp make_pair #define mt make_tuple #define pb push_back #define eb emplase_back #define elif else if #define all(a, v, ...) \ ([&](decltype((v)) w) { return (a)(begin(w), end(w), ##__VA_ARGS__); })(v) #define fi first #define se second template bool chmax(T &a, const T &b) { if (a < b) { a = b; return 1; } return 0; } template bool chmin(T &a, const T &b) { if (a > b) { a = b; return 1; } return 0; } // Debug #define debug(...) \ { \ cerr << __LINE__ << ": " << #__VA_ARGS__ << " = "; \ for (auto &&X : {__VA_ARGS__}) cerr << "[" << X << "] "; \ cerr << rt; \ } #define dump(a, h, w) \ { \ cerr << __LINE__ << ": " << #a << " = [" << rt; \ rep(i, h) { \ rep(j, w) cerr << a[i][j] << sp; \ cerr << rt; \ } \ cerr << "]" << rt; \ } #define vdump(a, n) \ { \ cerr << __LINE__ << ": " << #a << " = ["; \ rep(i, n) cerr << a[i] << (i == n - 1 ? rt : sp); \ cerr << "]" << rt; \ } // Loop #define inc(i, a, n) for (ll i = (a), _##i = (n); i <= _##i; ++i) #define dec(i, a, n) for (ll i = (a), _##i = (n); i >= _##i; --i) #define rep(i, n) for (ll i = 0, _##i = (n); i < _##i; ++i) #define each(i, a) for (auto &&i : a) // Stream #define fout(n) cout << fixed << setprecision(n) struct io { io() { cin.tie(nullptr), ios::sync_with_stdio(false); } } io; // Speed #pragma GCC optimize("Ofast") #pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,tune=native") // Math inline constexpr ll gcd(const ll a, const ll b) { return b ? gcd(b, a % b) : a; } inline constexpr ll lcm(const ll a, const ll b) { return a / gcd(a, b) * b; } inline constexpr ll modulo(const ll n, const ll m = MOD) { ll k = n % m; return k + m * (k < 0); } inline constexpr ll chmod(ll &n, const ll m = MOD) { n %= m; return n += m * (n < 0); } inline constexpr ll mpow(ll a, ll n, const ll m = MOD) { ll r = 1; rep(i, 64) { if (n & (1LL << i)) r *= a; chmod(r, m); a *= a; chmod(a, m); } return r; } inline ll inv(const ll n, const ll m = MOD) { ll a = n, b = m, x = 1, y = 0; while (b) { ll t = a / b; a -= t * b; swap(a, b); x -= t * y; swap(x, y); } return modulo(x, m); } signed main() { ll n, k; cin >> n >> k; if ((n * (n + 1) / 2) % k or (n != 1 and n / k == 1)) return puts("No") & 0; cout << "Yes" << rt; vector ans[k]; if ((n / k) & 1) { rep(i, n / k) { if (i >= 2) { rep(j, k) { if (i & 1) ans[k - 1 - j].pb(i * k + j + 1); else ans[j].pb(i * k + j + 1); } } } rep(j, k) { if (j < (k + 1) / 2) ans[j].pb((k + 1) / 2 + j), ans[j].pb(2 * (k - j)); else ans[j].pb(- (k + 1) / 2 + j + 1), ans[j].pb(2 * (k - j + (k + 1) / 2) - 1); } } else { rep(i, n / k) { rep(j, k) { if (i & 1) ans[k - 1 - j].pb(i * k + j + 1); else ans[j].pb(i * k + j + 1); } } } rep(i, k) { rep(j, n / k) { cout << ans[i][j] << (j == n / k - 1 ? rt : sp); } } } // -g -D_GLIBCXX_DEBUG -fsanitize=undefined