// #include <bits/allocator.h> // Temp fix for gcc13 global pragma
// #pragma GCC target("avx2,bmi2,popcnt,lzcnt")
// #pragma GCC optimize("O3,unroll-loops")
#include <bits/stdc++.h>
// #include <x86intrin.h>
using namespace std;
#if __cplusplus >= 202002L
using namespace numbers;
#endif
#ifdef LOCAL
	#include "Debug.h"
#else
	#define debug_endl() 42
	#define debug(...) 42
	#define debug2(...) 42
	#define debugbin(...) 42
#endif



int main(){
	cin.tie(0)->sync_with_stdio(0);
	cin.exceptions(ios::badbit | ios::failbit);
	auto __solve_tc = [&](auto __tc_num)->int{
		int n, k;
		cin >> n >> k;
		vector<long long> a(n);
		copy_n(istream_iterator<long long>(cin), n, a.begin());
		if(accumulate(a.begin(), a.end(), 0LL) % k){
			cout << "-1\n";
			return 0;
		}
		ranges::sort(a, [&](auto x, auto y){ return x % k < y % k; });
		long long res = 0, req = 0;
		vector<long long> use(n);
		for(auto i = 0; i < n; ++ i){
			res += a[i] % k;
			req = max(req, a[i] % k * k);
			use[i] = a[i] % k;
		}
		for(auto rep = 0; rep <= k && res < req; ++ rep){
			for(auto i = 0; i < n; ++ i){
				if(res >= req){
					break;
				}
				if(use[i] + k > a[i]){
					continue;
				}
				use[i] += k;
				req = max(req, use[i] * k);
				res += k;
			}
		}
		if(res < req){
			cout << "-1\n";
			return 0;
		}
		cout << res / k << "\n";
		return 0;
	};
	int __tc_cnt;
	cin >> __tc_cnt;
	for(auto __tc_num = 0; __tc_num < __tc_cnt; ++ __tc_num){
		__solve_tc(__tc_num);
	}
	return 0;
}

/*
k = 105
1 1 1 1 100
*/