import sys sys.setrecursionlimit(10**6) int1 = lambda x: int(x)-1 p2D = lambda x: print(*x, sep="\n") def II(): return int(sys.stdin.buffer.readline()) def LI(): return list(map(int, sys.stdin.buffer.readline().split())) def LI1(): return list(map(int1, sys.stdin.buffer.readline().split())) def LLI(rows_number): return [LI() for _ in range(rows_number)] def LLI1(rows_number): return [LI1() for _ in range(rows_number)] def BI(): return sys.stdin.buffer.readline().rstrip() def SI(): return sys.stdin.buffer.readline().rstrip().decode() # dij = [(0, 1), (-1, 0), (0, -1), (1, 0)] dij = [(0, 1), (-1, 0), (0, -1), (1, 0), (1, 1), (1, -1), (-1, 1), (-1, -1)] inf = 10**16 md = 998244353 # md = 10**9+7 n, k = LI() if n == k or n%k or n*(n+1)//2%k: print("No") exit() ans = [[] for _ in range(k)] h = n//k if h & 1: for j0 in range(k): j = j0 for i in range(h-3, h): ans[j0].append(i*k+j+1) j = (j-1)%k h -= 3 for a in range(h*k//2): ans[a%k] += [a+1, h*k-a] print("Yes") for row in ans: print(*row)