#include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef LOCAL #ifndef NDEBUG #define MEASURE_TIME #define DEBUG #endif #else #define NDEBUG // #define DEBUG #endif #include using namespace std; using u8=uint8_t; using u16=uint16_t; using u32=uint32_t; using u64=uint64_t; using i64=int64_t; using ll=int64_t; using ull=uint64_t; using vi=vector; using vvi=vector; namespace { constexpr ll TL = 4900; inline ll get_time() { struct timeval tv; gettimeofday(&tv, NULL); ll result = tv.tv_sec * 1000LL + tv.tv_usec / 1000LL; return result; } const ll start_time = get_time(); // msec inline ll get_elapsed_msec() { return get_time() - start_time; } inline bool reach_time_limit() { return get_elapsed_msec() >= TL; } struct XorShift { uint32_t x,y,z,w; static const double TO_DOUBLE; XorShift() { x = 123456789; y = 362436069; z = 521288629; w = 88675123; } uint32_t nextUInt(uint32_t n) { uint32_t t = x ^ (x << 11); x = y; y = z; z = w; w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); return w % n; } uint32_t nextUInt() { uint32_t t = x ^ (x << 11); x = y; y = z; z = w; return w = (w ^ (w >> 19)) ^ (t ^ (t >> 8)); } double nextDouble() { return nextUInt() * TO_DOUBLE; } }; const double XorShift::TO_DOUBLE = 1.0 / (1LL << 32); struct Counter { vector cnt; void add(int i) { if (i >= cnt.size()) { cnt.resize(i+1); } ++cnt[i]; } void print() { cerr << "counter:["; for (int i = 0; i < cnt.size(); ++i) { cerr << cnt[i] << ", "; if (i % 10 == 9) cerr << endl; } cerr << "]" << endl; } }; struct Timer { vector at; vector sum; void start(int i) { if (i >= at.size()) { at.resize(i+1); sum.resize(i+1); } at[i] = get_time(); } void stop(int i) { sum[i] += get_time() - at[i]; } void print() { cerr << "timer:["; for (int i = 0; i < at.size(); ++i) { cerr << sum[i] << ", "; if (i % 10 == 9) cerr << endl; } cerr << "]" << endl; } }; } #ifdef MEASURE_TIME #define START_TIMER(i) (timer.start(i)) #define STOP_TIMER(i) (timer.stop(i)) #define PRINT_TIMER() (timer.print()) #define ADD_COUNTER(i) (counter.add(i)) #define PRINT_COUNTER() (counter.print()) #else #define START_TIMER(i) #define STOP_TIMER(i) #define PRINT_TIMER() #define ADD_COUNTER(i) #define PRINT_COUNTER() #endif #ifdef DEBUG #define debug(format, ...) fprintf(stderr, format, __VA_ARGS__) #define debugStr(str) fprintf(stderr, str) #define debugln() fprintf(stderr, "\n") #else #define debug(format, ...) #define debugStr(str) #define debugln() #endif template constexpr inline T sq(T v) { return v * v; } void debug_vec(const vi& vec) { debugStr("["); for (int i = 0; i < vec.size(); ++i) { debug("%d ", vec[i]); } debugStr("]"); } XorShift rnd; Timer timer; Counter counter; //////// end of template //////// constexpr int N = 200000; constexpr int Q = 200000; constexpr ll INF = 1ll << 62; int WT, ST; array W; array WS; array L; array R; constexpr int PART = 51; constexpr int DP_BIT = 3; constexpr int DP_SKIP = -1; ll dp[Q + 1][1 << DP_BIT][DP_BIT]; ll dp_prev[Q + 1][1 << DP_BIT][DP_BIT]; ll calc_cost(int l0, int r0, int l1, int r1) { ll cost = WS[max(l0, l1)] - WS[min(l0, l1)]; cost += WS[max(r0, r1) + 1] - WS[min(r0, r1) + 1]; return cost; } vi solve_dp(const vvi& buckets) { for (int i = 0; i < Q; ++i) { for (int j = 0; j < (1 << DP_BIT); ++j) { fill(dp[i][j], dp[i][j] + DP_BIT, INF); } } vi ranges = buckets[0]; for (int i = 1; i < PART; ++i) { ranges.insert(ranges.end(), buckets[i].begin(), buckets[i].end()); } for (int i = 0; i < DP_BIT; ++i) { dp[0][1 << i][i] = WS[R[ranges[0]] + 1] - WS[L[ranges[0]]]; } for (int j = 0; j < (1 << DP_BIT); ++j) { int pk = j; while (pk) { int k = __builtin_ctz(pk); pk &= pk - 1; int left = L[ranges[k]]; int right = R[ranges[k]]; int nl = ((1 << DP_BIT) - 1) ^ j; while (nl) { int l = __builtin_ctz(nl); nl &= nl - 1; int left1 = L[ranges[l]]; int right1 = R[ranges[l]]; ll cost = WS[max(left, left1)] - WS[min(left, left1)]; cost += WS[max(right, right1) + 1] - WS[min(right, right1) + 1]; int next_bit = j | (1 << l); if (dp[0][next_bit][l] > dp[0][j][k] + cost) { dp[0][next_bit][l] = dp[0][j][k] + cost; dp_prev[0][next_bit][l] = k; if ((j & 1) && l > 0 && dp[1][next_bit >> 1][l - 1] > dp[0][next_bit][l]) { dp[1][next_bit >> 1][l - 1] = dp[0][next_bit][l]; dp_prev[1][next_bit >> 1][l - 1] = DP_SKIP; } } } } } for (int i = 1; i <= Q - DP_BIT; ++i) { for (int j = 0; j < (1 << (DP_BIT - 1)); ++j) { int pk = j; while (pk) { int k = __builtin_ctz(pk); pk &= pk - 1; if ((j & 1) && k > 0 && dp[i + 1][j >> 1][k - 1] > dp[i][j][k]) { dp[i + 1][j >> 1][k - 1] = dp[i][j][k]; dp_prev[i + 1][j >> 1][k - 1] = DP_SKIP; } int left = L[ranges[i + k]]; int right = R[ranges[i + k]]; int l = DP_BIT - 1; int left1 = L[ranges[i + l]]; int right1 = R[ranges[i + l]]; ll cost = WS[max(left, left1)] - WS[min(left, left1)]; cost += WS[max(right, right1) + 1] - WS[min(right, right1) + 1]; int next_bit = j | (1 << l); if (dp[i][next_bit][l] > dp[i][j][k] + cost) { dp[i][next_bit][l] = dp[i][j][k] + cost; dp_prev[i][next_bit][l] = k; } if ((j & 1) && dp[i + 1][next_bit >> 1][l - 1] > dp[i][next_bit][l]) { dp[i + 1][next_bit >> 1][l - 1] = dp[i][next_bit][l]; dp_prev[i + 1][next_bit >> 1][l - 1] = DP_SKIP; } } } for (int j = (1 << (DP_BIT - 1)); j < (1 << DP_BIT); ++j) { int pk = j; while (pk) { int k = __builtin_ctz(pk); pk &= pk - 1; int left = L[ranges[i + k]]; int right = R[ranges[i + k]]; int nl = ((1 << DP_BIT) - 1) ^ j; while (nl) { int l = __builtin_ctz(nl); nl &= nl - 1; int left1 = L[ranges[i + l]]; int right1 = R[ranges[i + l]]; ll cost = WS[max(left, left1)] - WS[min(left, left1)]; cost += WS[max(right, right1) + 1] - WS[min(right, right1) + 1]; int next_bit = j | (1 << l); if (dp[i][next_bit][l] > dp[i][j][k] + cost) { dp[i][next_bit][l] = dp[i][j][k] + cost; dp_prev[i][next_bit][l] = k; } if ((j & 1) && dp[i + 1][next_bit >> 1][l - 1] > dp[i][next_bit][l]) { dp[i + 1][next_bit >> 1][l - 1] = dp[i][next_bit][l]; dp_prev[i + 1][next_bit >> 1][l - 1] = DP_SKIP; } } } } // if (i > Q - DP_BIT - 3) { // debug("i:%d\n", i); // for (int j = 0; j < 1 << DP_BIT; ++j) { // for (int k = 0; k < DP_BIT; ++k) { // debug("%d %d %lld\n", j, k, dp[i][j][k]); // } // } // } } int best_k = 0; for (int i = 0; i < DP_BIT; ++i) { if (dp[Q - DP_BIT][(1 << DP_BIT) - 1][i] < dp[Q - DP_BIT][(1 << DP_BIT) - 1][best_k]) { best_k = i; } } debug("best_cost:%lld\n", dp[Q - DP_BIT][(1 << DP_BIT) - 1][best_k]); vi ans; int ci = Q - DP_BIT; int cj = (1 << DP_BIT) - 1; int k = best_k; ans.push_back(ranges[ci + k]); while (ci > 0 || cj > 0) { int prev = dp_prev[ci][cj][k]; if (prev == DP_SKIP) { ci--; cj = (cj << 1) | 1; k++; // debugln(); } else { // debug("%d\n", prev); assert(cj | (1 << prev)); ans.push_back(ranges[ci + prev]); cj ^= 1 << k; k = prev; } } ans.pop_back(); reverse(ans.begin(), ans.end()); assert(ans.size() == Q); return ans; } bool accept(ll diff, double cooler) { if (diff <= 0) return true; double v = -diff * cooler; if (v < -9) return false; return rnd.nextDouble() < exp(v); } void solve_tsp(vi& ans) { constexpr int RANGE = 300; ll diff_sum = 0; int turn = 0; double cooler = 0.0005; while (true) { for (int i = 0; i < Q; ++i) { if ((i & 0xFFF) == 0) { ll elapsed = get_time() - start_time; if (elapsed > TL) { debug("turn:%d %d diff_sum:%lld\n", turn, i, diff_sum); return; } } int j = i + 2; int l00 = L[ans[i]]; int r00 = R[ans[i]]; int l01 = L[ans[i + 1]]; int r01 = R[ans[i + 1]]; ll diff_base = -calc_cost(l00, r00, l01, r01); for (int j = i + 2; j < i + RANGE && j < Q; ++j) { int l10 = L[ans[j]]; int r10 = R[ans[j]]; int l11 = L[ans[j + 1]]; int r11 = R[ans[j + 1]]; ll diff = diff_base - calc_cost(l10, r10, l11, r11); diff += calc_cost(l00, r00, l10, r10); diff += calc_cost(l01, r01, l11, r11); if (accept(diff, cooler)) { diff_sum += diff; reverse(ans.begin() + i + 1, ans.begin() + j + 1); l01 = L[ans[i + 1]]; r01 = R[ans[i + 1]]; diff_base = -calc_cost(l00, r00, l01, r01); } } } debug("diff_sum:%lld\n", diff_sum); turn += 1; cooler *= 3; } } int main() { int _; scanf("%d %d %d %d", &_, &_, &WT, &ST); for (int i = 0; i < N; ++i) { scanf("%lld", &W[i]); WS[i + 1] = WS[i] + W[i]; } for (int i = 0; i < Q; ++i) { scanf("%d %d", &L[i], &R[i]); L[i]--; R[i]--; } vector> buckets(PART); for (int i = 0; i < Q; ++i) { buckets[(ll)L[i] * PART / N].push_back(i); } for (int i = 0; i < PART; ++i) { if (i & 1) { sort(buckets[i].begin(), buckets[i].end(), [](int p0, int p1){ return R[p0] > R[p1]; }); } else { sort(buckets[i].begin(), buckets[i].end(), [](int p0, int p1){ return R[p0] < R[p1]; }); } } vi ans = solve_dp(buckets); solve_tsp(ans); int left = L[ans[0]]; int right = R[ans[0]]; ll ans_w = WS[right + 1] - WS[left]; for (int i = 1; i < Q; ++i) { int left1 = L[ans[i]]; int right1 = R[ans[i]]; ans_w += WS[max(left, left1)] - WS[min(left, left1)]; ans_w += WS[max(right, right1) + 1] - WS[min(right, right1) + 1]; left = left1; right = right1; } debug("ans_w=%lld\n", ans_w); debug("score=%lld\n", (ll)round(1000000000000000000ll / ans_w)); for (int p : ans) { printf("%d ", p + 1); } printf("\n"); }