#include <bits/stdc++.h>
#include <atcoder/all>
using namespace std;
using namespace atcoder;
istream &operator>>(istream &is, modint &a) { long long v; is >> v; a = v; return is; }
ostream &operator<<(ostream &os, const modint &a) { return os << a.val(); }
istream &operator>>(istream &is, modint998244353 &a) { long long v; is >> v; a = v; return is; }
ostream &operator<<(ostream &os, const modint998244353 &a) { return os << a.val(); }
istream &operator>>(istream &is, modint1000000007 &a) { long long v; is >> v; a = v; return is; }
ostream &operator<<(ostream &os, const modint1000000007 &a) { return os << a.val(); } 

typedef long long ll;
typedef vector<vector<int>> Graph;
typedef pair<int, int> pii;
typedef pair<ll, ll> pll;
#define FOR(i,l,r) for (int i = l;i < (int)(r); i++)
#define rep(i,n) for (int i = 0;i < (int)(n); i++)
#define all(x) x.begin(), x.end()
#define rall(x) x.rbegin(), x.rend()
#define my_sort(x) sort(x.begin(), x.end())
#define my_max(x) *max_element(all(x))
#define my_min(x) *min_element(all(x))
template<class T> inline bool chmax(T& a, T b) { if (a < b) { a = b; return 1; } return 0; }
template<class T> inline bool chmin(T& a, T b) { if (a > b) { a = b; return 1; } return 0; }
const int INF = (1<<30) - 1;
const ll LINF = (1LL<<62) - 1;
const double PI = acos(-1);
vector<int> di = {1,0,-1,0};
vector<int> dj = {0,1,0,-1};

#ifdef LOCAL
#  include <debug_print.hpp>
#  define debug(...) debug_print::multi_print(#__VA_ARGS__, __VA_ARGS__)
#else
#  define debug(...) (static_cast<void>(0))
#endif

using mint = modint;

int main(){
    cin.tie(0);
    ios_base::sync_with_stdio(false);

    ll N, M, P; cin >> N >> M >> P;
    mint::set_mod(P);

    vector<mint> fac(N + 1, 1), finv(N + 1, 1);
    for(int i = 1; i <= N; i++) {
        fac[i] = fac[i - 1] * (mint)i;
        finv[i] = (mint)1 / (mint)fac[i];
    }

    auto nCr = [&](int n, int k){
        return fac[n] * finv[k] * finv[n-k];
    };

    auto nPr = [&](int n, int k){
        return fac[n] * finv[n-k];
    };

    mint ans = 0;
    vector<mint> prv(N + 1);
    for(int j = 1; j <= M; j++){
        vector<mint> coef(N + 1);
        if(j == 1) {
            for(ll n = 1; n <= N; n++){
                coef[n] = (n * (n + 1LL) / 2LL) % P;
            }
        }
        else{
            coef[0] = 0;
            for(ll n = 1; n <= N; n++){
                coef[n] = coef[n - 1] + prv[n - 1] * (mint)n;
            }
        }
        
        ans += coef[N] * nCr(M, j) / nPr(N, j);

        swap(prv, coef);
    }

    cout << ans << endl;
}