#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define all(a) a.begin(),a.end() #define rep(i, n) for (ll i = 0; i < (n); i++) #define pb push_back #define debug(x) cerr << __LINE__ << ' ' << #x << ':' << x << '\n' #pragma GCC optimize("O3") #pragma GCC optimize("unroll-loops") using namespace std; typedef long long ll; typedef unsigned long long ull; typedef long double ld; typedef pair P; typedef complex com; constexpr int inf = 1000000010; constexpr ll INF = 1000000000000000010; constexpr ld eps = 1e-12; constexpr ld pi = 3.141592653589793238; template inline bool chmax(T &a, const U &b) { if (a < b) { a = b; return true; } return false; } template inline bool chmin(T &a, const U &b) { if (a > b) { a = b; return true; } return false; } constexpr ll mod = 1000000007; vector fac, inv, facinv; void modcalc(int n) { fac.resize(n); inv.resize(n); facinv.resize(n); fac[0] = 1; fac[1] = 1; inv[1] = 1; facinv[0] = 1; facinv[1] = 1; for (ll i = 2; i < n; i++) { fac[i] = fac[i - 1] * i % mod; inv[i] = mod - inv[mod % i] * (mod / i) % mod; facinv[i] = facinv[i - 1] * inv[i] % mod; } } ll modinv(ll a) { a %= mod; if (a == 0) abort(); if (a < (ll)inv.size()) return inv[a]; ll b = mod, u = 1, v = 0; while (b) { ll t = a / b; a -= t * b; swap(a, b); u -= t * v; swap(u, v); } u %= mod; if (u < 0) u += mod; return u; } ll modpow(ll a, ll b, ll m = mod) { ll ans = 1; a %= m; while (b) { if (b & 1) ans = ans * a % m; a = a * a % m; b >>= 1; } return ans; } ll modcomb(ll n, ll k) { if (n < 0 || k < 0 || n < k) return 0; return fac[n] * facinv[k] % mod * facinv[n - k] % mod; } ll modperm(ll n, ll k) { if (n < 0 || k < 0 || n < k) return 0; return fac[n] * facinv[n - k] % mod; } ll modhom(ll n, ll k) { if (n < 0 || k < 0 || n == 0 && k > 0) return 0; if (n == 0 && k == 0) return 1; return fac[n + k - 1] * facinv[k] % mod * facinv[n - 1] % mod; } int main() { cin.tie(0); ios::sync_with_stdio(false); cout << fixed << setprecision(20); modcalc(3000010); int n, m; cin >> n >> m; vector> a(n, vector(m)); rep(i, n) rep(j, m) cin >> a[i][j]; ll ans = 0; rep(i, m) { ll c = modcomb(i + n - 1, i); c *= modhom(i + n + 1, a[n - 1][i] - 1); c %= mod; ans += c; } rep(i, n) { ll c = modcomb(i + m - 1, i); c *= modhom(i + m + 1, a[i][m - 1] - 1); c %= mod; ans += c; } rep(i, n) { rep(j, m) { int s = a[i][j] - 1; ll c = fac[i + j + s] * facinv[i] % mod * facinv[j] % mod * facinv[s] % mod; ans += c; } } for (int i = 1; i < n; i++) { rep(j, m) { ll c = modcomb(i + j - 1, j); ll t = modhom(i + j + 1, a[i - 1][j] - 1); t -= modhom(i + j + 1, a[i][j] - 1); if (t < 0) t += mod; ans += c * t % mod; } } rep(i, n) { for (int j = 1; j < m; j++) { ll c = modcomb(i + j - 1, i); ll t = modhom(i + j + 1, a[i][j - 1] - 1); t -= modhom(i + j + 1, a[i][j] - 1); if (t < 0) t += mod; ans += c * t % mod; } } cout << ans % mod << '\n'; }