#define ATCODER #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using namespace std; using ll = long long; #define FOR(i, a, b) for(ll i=(a); i<(b);i++) #define REP(i, n) for(ll i=0; i<(n);i++) #define ROF(i, a, b) for(ll i=(b-1); i>=(a);i--) #define PER(i, n) for(ll i=n-1; i>=0;i--) #define VL vector #define VVL vector> #define VP vector< pair > #define VVP vector>> #define all(i) begin(i),end(i) #define SORT(i) sort(all(i)) #define EXISTBIT(x,i) (((x>>i) & 1) != 0) #define MP(a,b) make_pair(a,b) #ifdef ATCODER #include using namespace atcoder; using mint = modint1000000007; using mint2 = modint998244353; #endif template vector read(size_t n) { vector ts(n); for (size_t i = 0; i < n; i++) cin >> ts[i]; return ts; } template void read_tuple_impl(TV&) {} template void read_tuple_impl(TV& ts) { get(ts).emplace_back(*(istream_iterator(cin))); read_tuple_impl(ts); } template decltype(auto) read_tuple(size_t n) { tuple...> ts; for (size_t i = 0; i < n; i++) read_tuple_impl(ts); return ts; } template T det2(array ar) { return ar[0] * ar[3] - ar[1] * ar[2]; } template T det3(array ar) { return ar[0] * ar[4] * ar[8] + ar[1] * ar[5] * ar[6] + ar[2] * ar[3] * ar[7] - ar[0] * ar[5] * ar[7] - ar[1] * ar[3] * ar[8] - ar[2] * ar[4] * ar[6]; } template bool chmax(T& tar, T src) { return tar < src ? tar = src, true : false; } template bool chmin(T& tar, T src) { return tar > src ? tar = src, true : false; } template void inc(vector& ar) { for (auto& v : ar) v++; } template void dec(vector& ar) { for (auto& v : ar) v--; } template vector> id_sort(vector& a) { vector res(a.size()); for (int i = 0; i < a.size(); i++)res[i] = MP(a[i], i); SORT(res); return res; } using val = ll; using func = ll; val op(val a, val b) { return max(a, b); } val e() { return 0; } val mp(func f, val a) { return a + f; } func comp(func f, func g) { return f + g; } func id() { return 0; } // Rook ll dxr[4] = { 1,0,-1,0 }; ll dyr[4] = { 0,1,0,-1 }; // Bishop ll dxb[4] = { -1,-1,1,1 }; ll djb[4] = { -1,1,-1,1 }; // qween ll dxq[8] = { 0,-1,-1,-1,0,1,1,1 }; ll dyq[8] = { -1,-1,0,1,1,1,0,-1 }; class BIT { public: BIT(ll s) { size = 1; while (size < s) size *= 2; bit.assign(size, 0); // def unit } void update(ll k, ll var) { k += 1; while (k <= size) { bit[k - 1] += var; // def func k += k & -k; } } ll query(ll a) { ll s = 0; a++; while (a > 0) { s += bit[a - 1]; // def func a -= a & -a; } return s; } ll size; vector bit; }; void solve() { ll n; cin >> n; ll n2 = n * n; VL a = read(n2); VP ai(n2); REP(i, n2)ai[i] = MP(a[i], i); SORT(ai); if (n == 2) { VVL ps = { { 0,1,2,3 },{1,0,2,3},{1,3,2,0},{1,3,0,2} }; } // スライドパズルなんだからそら動くの空白だけだよなぁ(誤読) BIT bit(n2); ll t = 0; REP(i, n2) { t += bit.query(ai[i].second); bit.update(ai[i].second, 1); } // Nがでかい場合は絶対にいい内分点があるので、それでよさそう REP(i, n2 - 2) { if (abs(ai[i + 2].first - ai[i].first) < n2) { VL ans(n2); vector use(n2); auto [v0, i0] = ai[i]; auto [v1, i1] = ai[i + 1]; auto [v2, i2] = ai[i + 2]; use[0] = use[v2 - v0] = true; use[v1 - v0] = true; ans[i0] = 0; ans[i2] = v2 - v0; ans[i1] = v1 - v0; ll vv = 0; REP(j, n2) { if (j == i0 || j == i1 || j == i2)continue; while (use[vv])vv++; ans[j] = vv; use[vv] = true; } BIT b2(n2); ll t2 = 0; REP(j, n2) { t += bit.query(ans[j]); bit.update(ans[j], 1); } bool ok = true; if (t % 2) { ok = false; REP(j, n2 - 1) { if (j == i0 || j == i1 || j == i2 || j + 1 == i0 || j + 1 == i1 || j + 1 == i2)continue; swap(ans[j], ans[j + 1]); ok = true; break; } } if (!ok)continue; swap(ans[i1], ans[i2]); swap(ans[i0], ans[i1]); cout << "Yes\n"; REP(i, n2) { cout << ans[i] << " "; if (i % n == n - 1)cout << "\n"; } swap(ans[i1], ans[i2]); swap(ans[i0], ans[i1]); REP(i, n2) { cout << ans[i] << " "; if (i % n == n - 1)cout << "\n"; } return; } } // とりあえず定数倍軽いし雑にTLEするのと、転倒数がバラバラに変わるのでたぶんちがう REP(i, n2) { FOR(j, i + 1, n2) { FOR(k, j + 1, n2) { auto [v0, i0] = ai[i]; auto [v1, i1] = ai[j]; auto [v2, i2] = ai[k]; ll g = gcd(v2 - v1, v1 - v0); if ((v2 - v0) / g < n2) { VL ans(n2); vector use(n2); use[0] = use[(v2 - v0) / g] = true; use[(v1 - v0) / g] = true; ans[i0] = 0; ans[i2] = (v2 - v0) / g; ans[i1] = (v1 - v0) / g; ll vv = 0; REP(j, n2) { if (j == i0 || j == i1 || j == i2)continue; while (use[vv])vv++; ans[j] = vv; use[vv] = true; } BIT b2(n2); ll t2 = 0; REP(j, n2) { t += bit.query(ans[j]); bit.update(ans[j], 1); } bool ok = true; if (t % 2) { ok = false; REP(j, n2 - 1) { if (j == i0 || j == i1 || j == i2 || j + 1 == i0 || j + 1 == i1 || j + 1 == i2)continue; swap(ans[j], ans[j + 1]); ok = true; break; } } if (!ok)continue; cout << "Yes\n"; swap(ans[i1], ans[i2]); swap(ans[i0], ans[i1]); REP(i, n2) { cout << ans[i] << " "; if (i % n == n - 1)cout << "\n"; } swap(ans[i1], ans[i2]); swap(ans[i0], ans[i1]); REP(i, n2) { cout << ans[i] << " "; if (i % n == n - 1)cout << "\n"; } return; } } } } cout << "No"; return; } int main() { ll t = 1; //cin >> t; while (t--) { solve(); } return 0; }