// #include <bits/allocator.h> // Temp fix for gcc13 global pragma
// #pragma GCC target("avx2,bmi2,popcnt,lzcnt")
// #pragma GCC optimize("O3,unroll-loops")
#include <bits/stdc++.h>
// #include <x86intrin.h>
using namespace std;
#if __cplusplus >= 202002L
using namespace numbers;
#endif
#ifdef LOCAL
	#include "Debug.h"
#else
	#define debug_endl() 42
	#define debug(...) 42
	#define debug2(...) 42
	#define debugbin(...) 42
#endif



int main(){
	cin.tie(0)->sync_with_stdio(0);
	cin.exceptions(ios::badbit | ios::failbit);
	auto __solve_tc = [&](auto __tc_num)->int{
		int n, k;
		cin >> n >> k;
		vector<long long> a(n);
		copy_n(istream_iterator<long long>(cin), n, a.begin());
		if(accumulate(a.begin(), a.end(), 0LL) % k){
			cout << "-1\n";
			return 0;
		}
		long long res = 0, req = 0;
		for(auto i = 0; i < n; ++ i){
			res += a[i] % k;
			req = max(req, a[i] % k * k);
		}
		ranges::sort(a, [&](auto x, auto y){ return x % k < y % k; });
		for(auto x: a){
			if(res >= req){
				break;
			}
			if(x < k){
				continue;
			}
			req = max(req, (x % k + k) * k);
			res += k;
		}
		if(res < req){
			cout << "-1\n";
			return 0;
		}
		cout << res / k << "\n";
		return 0;
	};
	int __tc_cnt;
	cin >> __tc_cnt;
	for(auto __tc_num = 0; __tc_num < __tc_cnt; ++ __tc_num){
		__solve_tc(__tc_num);
	}
	return 0;
}

/*
k = 105
1 1 1 1 100
*/