#!/usr/bin/python3.8
import sys
read = sys.stdin.buffer.read
readline = sys.stdin.buffer.readline
readlines = sys.stdin.buffer.readlines
from functools import lru_cache
import numpy as np

N, MOD = map(int, read().split())


def prime_table(N):
    is_prime = np.zeros(N, np.bool)
    is_prime[2] = 1
    is_prime[3::2] = 1
    for p in range(3, N, 2):
        if p * p >= N:
            break
        if is_prime[p]:
            is_prime[p * p:: p + p] = 0
    primes = np.where(is_prime)[0]
    return is_prime, primes


def mobius_table(N, primes):
    mu = np.ones(N, np.int64)
    mu[0] = 0
    for p in primes:
        mu[p::p] *= -1
        pp = p * p
        if p < N:
            mu[pp::pp] = 0
    return mu


@lru_cache(None)
def F(N, MOD):
    """return sum(|x| + |y|) for lattice points (x,y), satisfying x^2 + y^2 <= N"""
    x_max = int(N ** .5)
    x = np.arange(1, x_max + 1, dtype=np.int64)
    y_max = np.sqrt(N - x * x).astype(int)
    S_xplus = (x * (1 + 2 * y_max) % MOD).sum() % MOD
    return 4 * S_xplus % MOD


def f(N, MOD):
    is_prime, primes = prime_table(N + 10)
    mu = mobius_table(N + 10, primes)
    F_values = np.array([0] + [F((N * N) // (n * n), MOD) for n in range(1, N + 1)], np.int64)
    return (mu[:N + 1] * F_values * np.arange(N + 1, dtype=np.int64) % MOD).sum() % MOD


print(f(N, MOD))