This is the editorial for a recent contest Teamscode. The problems are open for upsolving on this gym. Problems were prepared by oursaco, dutin, thehunterjames, Bossologist, and me.

### A. What do you do when the contest starts? Are you busy? Will you solve Bingo?

**Editorial**

**Are you busy?**

- WorldEnd/SukaSuka
- Bocchi the Rock

**No Sweep**

- Thomas

**Multiplication Table**

- Lycoris Recoil

**Greatest Common Multiple**

- Bokuben

**A Certain Scientific Tree Problem**

- A Certain Scientific Railgun

**Two and Three**

- Quintessential Quintluplets

**That Time I got Reincarnated As A String Problem**

- Tensura
- Re:Zero
- Hayate the Combat Butler
- WorldEnd/SukaSuka

**Stuck on Bricks**

- Hunter x Hunter

**The Tree Problem Is Done For**

- Onimai

**In Another World With My Range Query Problems**

- Smartphone Isekai

**Bingo**

- Tonikawa

**Code**

No code is provided. There are $$$49$$$ accepted answers to this problem. Try to find them all!

### B. Mountain Climbing Easy

**Solution**

Iterate through the mountain from left to right and maintain a counter for the number of times the altitude increases consecutively. Make sure to reset this counter whenever the altitude does not increase. Also, ensure that each sequence of consecutive altitude increases is counted only once to avoid overcounting mountains.

**Code**

```
#include <iostream>
#include <fstream>
using namespace std;
int main() {
int N;
cin >> N;
int cnt = 0;
int prev = -1;
int ans = 0;
for (int i = 0; i < N; i++)
{
int a;
cin >> a;
if (a > prev && i) cnt++;
else if (a <= prev) cnt = 0;
if (cnt == 2) ans++;
prev = a;
}
cout << ans << "\n";
return 0;
}
```

### C. No Sweep

**Solution**

There is only $$$1$$$ scenario in any game where Thomas sweeps. So we should do complementary counting and count the total number of ways to assign winners and subtract $$$1$$$ from that. Each game there $$$(k + 1)$$$ possible winners, the $$$k$$$ problemsetters and Thomas. As there are $$$n$$$ rounds, the final result is that in $$$(k + 1)^n - 1$$$ games Thomas will not sweep.

**Code**

```
n,k=input().split()
print((int(k) + 1) ** int(n) - 1)
```

### D. Multiplication Table

**Solution**

We can guess that the number is the smallest prime $$$ > n$$$. Our intuition is right but the proof is a little harder that that.

**Proof**

Bertands Postulate states that there is at least one prime $$$p$$$ between $$$n$$$ and $$$2n$$$. We can know that $$$n < p < 2n$$$ and that $$$[n+1, p)$$$ are all composite. Consider for the sake of contradiction that there existed some $$$x$$$ such that $$$n < x < p$$$ and $$$x$$$ was not present on the multiplication table. We know that $$$x$$$ is composite and its lowest prime factor $$$l$$$ is at least $$$2$$$. As $$$x < 2n$$$, $$$\max(\frac{x}{l}, l) \leq n$$$ so $$$x$$$ will be written on cell $$$(\frac{x}{l}, l)$$$. As $$$x$$$ cannot exist, the answer will have to be the smallest prime number greater than $$$n$$$.

This gives us an $$$O(72 \sqrt{n})$$$ solution. The $$$72$$$ comes from the fact that for any integer $$$n \leq 10^5$$$ there is a prime that is at most $$$n + 72$$$.

**Code**

```
#include <cstdio>
int isp(int x){
for(int i = 2; i*i <= x; i++){
if(x%i == 0) return 0;
}
return 1;
}
int main(){
int n;
scanf("%d", &n);
do{
n++;
}while(isp(n) == 0);
printf("%d\n", n);
return 0;
}
```

### E. Cyclic Shifts

**Hint**

How many elements change between two adjacent cyclic shifts?

**Solution**

First, notice that there always exists an optimal series of operations where the cycle operation, if performed at all, is done first. There are only $$$\mathcal{O}(n)$$$ possible different cycle operations, and the answer afterwards is the sum over the absolute differences between $$$a$$$ and $$$b$$$ for each index.

Consider the first and second such cyclic shift of length $$$5$$$ on an array $$$a=[1, 2, \dots, 8]$$$:

Observe that between the first and second shift, only a small number of elements change positions. In fact, for a cyclic shift of length $$$k$$$ starting at $$$i$$$, only the elements at positions $$$i$$$, $$$i+k$$$, and $$$i+k-1$$$ change positions when transitioning to the cyclic shift at $$$i+1$$$. Thus, we can loop over all possible cyclic shifts, maintaining the sum and updating the indicies accordingly. Because there are only a constant number of updates for each cyclic shift, this runs in $$$\mathcal{O}(n)$$$.

**Code**

```
#include <bits/stdc++.h>
long long calculate_cost(int n, const std::vector<long long> &a, const std::vector<long long> &b) {
long long ret = 0;
for (int i = 0; i < n; i++)
ret += llabs(a[i] - b[i]);
return ret;
}
template<typename I>
void rotate_left(I l, I r) { std::rotate(l, std::next(l), r); }
int main() {
using namespace std;
int TC;
cin >> TC;
while (TC--) {
int N, K;
cin >> N >> K;
vector<long long> A(N), B(N);
for (auto &a : A)
cin >> a;
for (auto &b : B)
cin >> b;
long long ans = calculate_cost(N, A, B);
rotate_left(A.begin(), A.begin() + K);
long long cur = calculate_cost(N, A, B);
ans = min(ans, cur + 1);
for (int i = 1; i + K - 1 < N; i++) {
cur -= llabs(A[i - 1] - B[i - 1]) +
llabs(A[i + K - 2] - B[i + K - 2]) +
llabs(A[i + K - 1] - B[i + K - 1]);
auto tem = A[i - 1];
A[i - 1] = A[i + K - 2];
A[i + K - 2] = A[i + K - 1];
A[i + K - 1] = tem;
cur += llabs(A[i - 1] - B[i - 1]) +
llabs(A[i + K - 2] - B[i + K - 2]) +
llabs(A[i + K - 1] - B[i + K - 1]);
ans = min(ans, cur + 1);
}
cout << ans << '\n';
}
}
```

### F. Great Common Multiple

**Solution**

Let's define $$$a, b, c, d$$$ as they were defined in the statement. We first must pick $$$d$$$ as a multiple of $$$l = lcm(a, b)$$$. This is to guarantee that $$$d$$$ can be divided by both $$$a$$$ and $$$b$$$. Then instead of thinking of taking some number $$$p \cdot l$$$ mod $$$c$$$, we can instead imagine it as $$$p \cdot l - q \cdot c$$$. By Bezout's identity it is always possible to represent $$$g = gcd(c, l)$$$ with some choice of $$$p$$$ and $$$q$$$. As a consequence, $$$g, 2g, 3g, \dots, (\frac{c}{g}-1)g$$$ are possible results modulus $$$c$$$ for some choice of $$$d$$$ and the maximal of these will be $$$ (\frac{c}{g}-1)g$$$.

This gives us a $$$O(\log(\max(a, b, c))$$$ sol per testcase as just outputting $$$c - gcd(c, lcm(a, b))$$$.

**Code**

```
#include <iostream>
#include <cmath>
using ll = long long;
using namespace std;
ll gcd(ll a, ll b){
if(b == 0) return a;
return gcd(b, a%b);
}
int main(){
cin.tie(0) -> sync_with_stdio(0);
int T;
cin >> T;
while(T--){
ll a, b, c;
cin >> a >> b >> c;
ll lcm = a*b/gcd(a, b);
ll step = gcd(lcm, c);
cout << c - step << "\n";
}
}
```

### G. Daggers

**Hint 1**

Can you calculate what is the complexity of simulating the process greedily?

**Hint 2**

You can use harmonic series to calculate it.

**Solution**

We will use a greedy algorithm. We will always go the farthest shield at a distance $$$\le t$$$ from where we are and stay there until a dagger is thrown. We will repeat the same process until we are at a distance $$$\le t$$$ from $$$n$$$, then we can just arrive at $$$n$$$ and pass the level, or until there is no shield at a distance $$$\le t$$$ from where we are, then we can't pass the level. This is obviously optimal because we spend the least possible amount of time waiting in the same position.

The next step is to calculate on how many shields do we actually stop at for each level. We claim that the answer is at most $$$2\cdot \lceil \frac{n}{t} \rceil$$$.

**Proof**

Let's group the points into chunks of size $$$t$$$, there are $$$\lceil \frac{n}{t} \rceil$$$ such groups. Then we can see that we will visit at most $$$2$$$ shields from each group. That is because when we are at any shield we will either go to the last shield in that same group or go to a shield in the next group, visiting at most $$$2$$$ shields from each group.

Now we know that the number of shields that we will visit after the $$$n$$$ levels is at most $$$2 \cdot \sum \limits_{t=1}^{n} \lceil \frac{n}{t} \rceil$$$. Using harmonic series we get to the conclusion that after the $$$n$$$ levels we will have stopped at around $$$n \ln n$$$ shields, which actually isn't much and can lead to fast solutions.

Everything that's left now is to be able to quickly get the location of the farthest shield at a distance $$$\le t$$$ from where we are. We can do this with sets and the function upper_bound(), which has a complexity of $$$\mathcal{O}(\log{n})$$$. That results in the complexity of the solution being $$$\mathcal{O}(n \log^2{n})$$$.

**Code**

```
#include <bits\stdc++.h>
using namespace std;
int main(){
ios_base::sync_with_stdio(0);
cin.tie(0);
int n, q; cin >> n >> q;
set<int> shields;
for(int t = 1; t <= q; t++){
int x; cin >> x;
shields.insert(x);
long long ans = 0;
int curr = 0;
bool done = 0;
while(!done){
if(n - curr <= t){
cout << ans + n - curr << "\n";
done = 1;
}else{
auto it = shields.upper_bound(curr + t);
if(it == shields.begin() || *(--it) == curr){
cout << -1 << "\n";
done = 1;
}else{
curr = *it;
ans += t;
}
}
}
}
}
```

### H. A Certain Scientific Tree Problem

There are many solutions to this problem, some simpler than others, but I'll present the intended solution which involves the distance formula between two nodes.

**Hint 1**

$$$dist(i, j) = dep[i] + dep[j] - 2 \cdot dep[lca(i, j)]$$$

**Hint 2**

How does splitting into $$$3$$$ terms help? Can we count each one individually?

**Solution**

Let's define $$$d$$$ as the depth of the tree and $$$n = 2^d - 1$$$ or the number of nodes on the tree.

We want to find

Let's try to count for each node $$$u$$$ how many times $$$dep[u]$$$ will be counted by this sum. There are $$$2n$$$ times from when $$$i = u$$$ or $$$j = u$$$ but counting $$$lca(i, j) = u$$$ will take a little more work.

For $$$lca(i, j) = u$$$ we need $$$i$$$ and $$$j$$$ to be in different subtrees of $$$u$$$ (or if $$$i = u$$$ or $$$j = u$$$). Define $$$sub[u] = 2^{(d - dep[u] + 1)} - 1$$$ or $$$sub[u] = $$$ the number of nodes in node $$$u$$$'s subtree. We can also define $$$chsub[u] = \frac{sub[u]-1}{2}$$$ or $$$chsub[u] = $$$ the number of nodes in the subtree of node $$$u$$$'s children.

Then the number of pairs of $$$(i, j)$$$ with $$$lca(i, j) = u$$$ is

chsub[u] \cdot (sub[u] - chsub[u]) + sub[u]

$$$

The final observation is that all nodes with the same depth behave the same so it suffices to find the total contribution of one node of some depth and multiply that by the number of nodes on that depth.

Our final sum will be

This formula can be evaluated in $$$O(n)$$$ or $$$O(n \log n)$$$ per test case depending on whether or not you precomputed powers of $$$2$$$.

There is another, much more beautiful solution to this problem that involves counting the contribution of each edge but I won't explain it here :).

**Code**

```
#include <iostream>
using ll = long long;
const ll mod = 1e9 + 7;
const int MX = 2e5 +10;
using namespace std;
int two[MX];
void solve(){
int d; cin >> d;
ll ans = 0;
ll n = two[d] - 1;
for(int i = 0; i<d; i++){
ll ch = (i == d-1) ? 0 : 2;
ll ch_sub = two[d-1 - i]-1;
ll sub = ch*ch_sub + 1;
ll cnt = two[i];
ans += cnt*((n*i%mod + n*i%mod + (mod - 2)*i%mod*(sub + ch*ch_sub%mod*(sub - ch_sub)%mod))%mod)%mod;
ans %= mod;
}
cout << ans << "\n";
}
signed main(){
cin.tie(0) -> sync_with_stdio(0);
two[0] = 1;
for(int i = 1; i<=1e5; i++){
two[i] = 2*two[i-1]%mod;
}
int T = 1;
cin >> T;
for(int i = 1; i<=T; i++){
solve();
}
return 0;
}
```

### I. Mountain Climbing Hard

**Solution**

Lets first solve the subtask with negligible fog to build up to our final solution. We need to find the nearest point to the left and right of a given point $$$p$$$ with an altitude at least as high as $$$p$$$. There are a couple ways of doing this, but using a stack, we can precompute these points efficiently. We iterate through the points from left to right, and at each point $$$p$$$, we remove all stack elements with altitude at most $$$p$$$ before adding $$$p$$$ to the stack. This process identifies points $$$q$$$ to the left of $$$p$$$ for which $$$p$$$ is the first non-visible point from $$$q$$$. We repeat this process, iterating from right to left, to find the nearest points to the left and right of each point $$$p$$$ with altitude greater than or equal to $$$p$$$. Note that the stack remains sorted throughout the process, ensuring each point is visited at most twice, resulting in linear complexity.

To incorporate the fog in our solution, we need to find the number of points within visible range for point $$$p$$$ excluding points below the fog altitude (given that the fog is not above $$$p$$$). Note that we are essentially finding the number of elements in a subarray which are less than a certain value. A Binary Index Tree might come to mind, but it's not entirely clear how updates would work. Here, it's important to make the observation that the queries can be done offline. If we answer each query in order of the fog altitude, we can maintain a BIT such that all points below the fog have a value of 1, while all other points have value 0. We can then query on the visible range for a point $$$p$$$ to see how many points are not visible due to the fog, which is subtracted from the result from the stack method, to get the final solution. The overall complexity is $$$O(N \log N)$$$.

**Code**

```
#include <iostream>
#include <vector>
#include <algorithm>
#include <cassert>
using namespace std;
#define MAXN 1000001
#define MAXQ 100001
vector<pair<int, int> > points;
vector<pair<pair<int, int>, int> > qs;
int N, Q, a[MAXN], BIT[MAXN], r[MAXN], l[MAXN], sols[MAXQ];
int getSum(int index)
{
int sum = 0;
index = index + 1;
while (index > 0)
{
sum += BIT[index];
index -= index & (-index);
}
return sum;
}
void updateBIT(int index)
{
index = index + 1;
while (index <= N)
{
BIT[index] += 1;
index += index & (-index);
}
}
int main()
{
ios_base::sync_with_stdio(0);
cin.tie(0);
cin >> N >> Q;
for (int i = 0; i < N; i++)
{
cin >> a[i];
points.push_back({ a[i], i }); // used to iterate in increasing altitude
}
for (int i = 0; i < Q; i++)
{
int p, f;
cin >> p >> f;
qs.push_back({ {f, p - 1}, i }); //offline queries
}
for (int i = 0; i < N; i++) r[i] = N, l[i] = -1;
vector<pair<int, int> > sr, sl;
for (int i = 0; i < N; i++) {
int alt = a[i];
while (sr.size() && sr.back().first <= alt)
{
r[sr.back().second] = i;
sr.pop_back();
}
sr.push_back(points[i]);
int ind = N - i - 1;
alt = a[ind];
while (sl.size() && sl.back().first <= alt)
{
l[sl.back().second] = ind;
sl.pop_back();
}
sl.push_back(points[ind]);
}
sort(qs.begin(), qs.end());
sort(points.begin(), points.end());
int ind = 0;
for (int i = 0; i < Q; i++)
{
int fog = qs[i].first.first;
while (ind < N && points[ind].first < fog) {
updateBIT(points[ind].second);
ind++;
}
//point index of query
int qp = qs[i].first.second;
//no fog
int ans = r[qp] - l[qp] - 1;
//amt can't see cuz fog
int sub = getSum(r[qp] - 1) - getSum(l[qp]);
if (a[qp] >= fog) ans -= sub;
sols[qs[i].second] = ans;
}
for (int i = 0; i < Q; i++) cout << sols[i] << "\n";
return 0;
}
```

### J. Two and Three

**Editorial**

Many solutions for this problem invoke Sprague Grundy, but I will present a solution that does not.

Let us first solve the subtask for $$$n = 1$$$.

**n = 1**

The first important fact is that since we are discussing a game between two players, it will come down to parity. Imagine you are the second player, the most important part of your strategy is to keep something constant that the first player can't change. In particular, you can make sure that every two turns the number will be divided by $$$6$$$.

So with this fact in mind, if the first digit of your number in base $$$6$$$ is $$$1$$$, you can always win as the second player. On the other hand, if the first digit in base $$$6$$$ is not $$$1$$$ the first player will always win. This is because the first player can divide by $$$2$$$ if the first digit is $$$2$$$ or $$$3$$$ and $$$3$$$ if the first digit is $$$4$$$ or $$$5$$$ resulting in a scenario where they are the second player with the first digit as $$$1$$$.

These two facts give a $$$O(\log a_1)$$$ solution for the $$$n = 1$$$ subtask by just checking if the first digit in base $$$6$$$ is $$$1$$$ or not.

So now to solve for an array of integers, we will have to generalize our approach quite a bit.

**First off**

First off, only the first digit in base $$$6$$$ matters for each $$$a_i$$$. This is because if the number has more than $$$1$$$ digit in base $$$6$$$ and the first player makes a move on that number, the second player is guaranteed to have a move on that number still as well. This makes it so that some array $$$c$$$ where $$$c_i = $$$ the number of elements with digit $$$i$$$ as the first digit in base $$$6$$$ is all that matters to determine the winner.

**Secondly**

Secondly, if two numbers have the digits $$$2$$$ or $$$3$$$ as their first digit in base $$$6$$$, they behave the same. Both $$$2$$$ and $$$3$$$ are one move away from becoming $$$1$$$. Similarly, $$$4$$$ and $$$5$$$ behave the same. They can either become $$$2$$$ or become $$$1$$$. So with this in mind, let's define

$$$x = \text{#}$$$ with $$$2$$$ or $$$3$$$ as first digit in base $$$6$$$ and

$$$y = \text{#}$$$ with $$$4$$$ or $$$5$$$ as first digit in base $$$6$$$.

The winner will be solely determined off what $$$x$$$ and $$$y$$$ are.

**Lastly**

Lastly, we can notice that only the parity of $$$x$$$ and parity of $$$y$$$ ($$$x$$$ and $$$y$$$ as defined from the second point) matter. This is because if $$$x \geq 2$$$ and the first player makes a move that changes $$$x$$$, the second player can mirror the move the first player made on another number and restore the parity. The same logic applies to $$$y$$$.

**Full solution**

These three observations reduce our problem down to $$$4$$$ cases, which can be solved by hand.

- odd $$$x$$$, odd $$$y$$$
- odd $$$x$$$, even $$$y$$$
- even $$$x$$$, odd $$$y$$$
- even $$$x$$$, even $$$y$$$

$$$x$$$ and $$$y$$$ are defined in the second point. We can ascertain that only in the $$$4$$$ th case the second player wins.

**Proof**

- Consider the array $$$[2, 4]$$$. If the first player divides $$$4$$$ by $$$2$$$, they can be guaranteed to win.
- Consider the array $$$[2]$$$. If the first player divides $$$2$$$ by $$$2$$$, they can be guaranteed to win.
- Consider the array $$$[4]$$$. If the first player divides $$$4$$$ by $$$3$$$, they can be guaranteed to win.
- Consider the array $$$[1]$$$. The first player will lose.

The final complexity is $$$O(n \log a_i)$$$ as we need $$$O(\log a_i)$$$ to recover the first digit in base $$$6$$$.

As a side note. This is actually the exact same resulting solution if you used Sprague Grundy (SG) to solve.

**Relationship between this solution and Sprague Grundy**

You can find out from table printing that $$$SG(x) = \left \lfloor \frac{\text{first digit in base } 6}{2} \right \rfloor$$$. Then SG asks you xor the different SG numbers from independent games which would have left you with the same result as what I arrived at above.

**Code**

```
#include <iostream>
#include <vector>
#include <array>
using namespace std;
int dig(int x){
while(x >= 6){
x /= 6;
}
return x;
}
void solve(){
int n;
cin >> n;
array<int, 6> cnt; cnt.fill(0);
for(int i = 0; i<n; i++){
int x; cin >> x;
cnt[dig(x)]++;
}
int x = (cnt[2] + cnt[3])%2;
int y = (cnt[4] + cnt[5])%2;
if((x%2)|(y%2)) cout << "Nino\n";
else cout << "Miku\n";
}
signed main(){
cin.tie(0) -> sync_with_stdio(0);
int T = 1;
cin >> T;
for(int i = 1; i<=T; i++){
solve();
}
return 0;
}
```

### K. That Time I Got Reincarnated As A String Problem

**Hint 1**

Solve the first subtask for tests $$$1 - 4$$$.

**Solution**

We will ignore the expected value for now and compute the sum of unique strings over all ways to fill in question marks.

**The Math**

The number of unique strings formed where each character appears $$$c_i$$$ times is $$$ \frac{(\sum_i c_i)!}{\prod_i c_i!}$$$. Proof by wikipedia.

Let's replace the letters with numbers and define $$$a_i$$$ as the number of elements with value $$$i$$$ initially and $$$b_i$$$ the number of question marks that were replaced to be $$$i$$$. Let's also define $$$n$$$ as the length of the string and $$$q$$$ as the number of question marks. $$$\sum_i a_i + b_i = n$$$ and $$$\sum_i b_i = q$$$ should hold. For a given choice of $$$b_i$$$

is the total contribution to the sum. The first half is the number of unique strings formed with that choice of $$$b_i$$$ and the second half is the number of ways to split the question marks to fit that choice of $$$b_i$$$.

We can factor out the $$$n!$$$ and $$$q!$$$ we can observe that the sum we desire is

**The Algorithm**

So we can imagine that we have many bags (one for each number) and each in bag there are many items. The value of a certain item with weight $$$j \geq 0$$$ in the $$$i$$$'th bag is $$$\frac{1}{(a_i + j)!\cdot j!}$$$. We are going to pick exactly one item out of each bag and set the cost of that choice of items as the product of all the values taken. We are going to consider every possible way to pick one item out of each bag such that their total weight sums to $$$n$$$ and take the sums of costs over all choices. This weird setup leads us to arrive at the sum desired as from the above equation.

There are many to model this type of problem such as dp but surprisingly this is the exact definition of polynomial multiplication. If we consider the coefficient of the $$$x^n$$$'th term of the product

that would be the exact sum we need. This is because the weights are handled by the power of $$$x$$$ and the sum of costs is handled by adding the product of all the nomials taken from each bag.

We only need to store the first $$$n$$$ terms per character, and there is no need for FFT as we can just use $$$n^2$$$ multiplication. This gives a total complexity of $$$O(26 \cdot n^2)$$$As a side note, you need to use modular inverse to divide by $$$26^q$$$ to output the EV and not the sum. You also have to make sure to trim out excess terms so that each character isn't having too long of a polynomial multiplication time.

Also, it is worth noting that using dp here is perfectly viable as well, in fact way the code will end up working is the exact same as polynomial multiplication but that is up for the reader to figure out :).

**Code**

```
//hina best girl
#include <iostream>
#include <vector>
#include <cassert>
#include <array>
#include <string>
#define ll long long
#define sz(x) ((int)(x.size()))
const int mod = 1e9 + 7;
const int MX = 1e3 +10;
using namespace std;
ll fac[MX], ifac[MX];
ll binpow(ll a, ll b = mod - 2){
ll ans = 1;
for(int i = 1; i<=b; i*=2){
if(i&b) (ans *= a) %= mod;
(a *= a) %= mod;
}
return ans;
}
void precomp(int n){
fac[0] = 1;
for(int i = 1; i<=n; i++){
fac[i] = 1ll*i*fac[i-1]%mod;
}
ifac[n] = binpow(fac[n]);
for(int i = n; i>=1; i--){
ifac[i-1] = 1ll*i*ifac[i]%mod;
}
assert(ifac[0] == 1);
}
vector<ll> mult(vector<ll> a, vector<ll> b){
vector<ll> c(sz(a) + sz(b) - 1, 0ll);
for(int i = 0; i<sz(a); i++){
for(int j = 0; j<sz(b); j++){
c[i + j] += a[i]*b[j]%mod;
}
}
for(int i = 0; i<sz(c); i++) c[i] %= mod;
return c;
}
void solve(){
string s; cin >> s;
int n = sz(s);
array<int, 26> cnt = {0};
int q = 0;
for(char c : s){
if(c != '?'){
cnt[c - 'a']++;
}else{
q++;
}
}
vector<ll> ans = {1};
for(int i = 0; i<26; i++){
vector<ll> cur;
for(int j = 0; j<=cnt[i] + q; j++){
if(j < cnt[i]) cur.push_back(0);
else cur.push_back(ifac[j] * ifac[j - cnt[i]]%mod);
}
ans = mult(ans, cur);
if(sz(ans) > n+1) ans.erase(ans.begin() + n + 1, ans.end());
}
assert(sz(ans) == n+1);
ll oup = ans[n] * fac[n]%mod *fac[q]%mod;
oup *= binpow(binpow(26, q));
oup %= mod;
cout << oup << "\n";
}
int main(){
cin.tie(0) -> sync_with_stdio(0);
precomp(1000);
int T; cin >> T;
for(int i = 0; i<T; i++){
solve();
}
return 0;
}
```

### L. Stuck on Bricks

**Solution**

Draw the line from $$$(0, 0)$$$ to $$$(x_1, y_1)$$$. Notice that any time that this line touches the boundary between two bricks our answer increases by one.

Let's count the number of times we jump from a brick to a brick that is above it. This happens $$$y_1-1$$$ times since we always move to a new brick whenever we cross some line $$$y = a$$$ for $$$1 \le a \le y_1-1$$$.

Now count the number of times we jump from a brick to a brick that is to the right. To do this, write the equation for our line. It is $$$y_1x - x_1y = 0$$$. We check if this line crosses a boundary between two bricks at $$$x = b$$$. We find the intersection that these lines have with each other, $$$(c, d)$$$. If both $$$c$$$ and $$$d$$$ are integers, we do not count the movement to the right since we already counted the movement upwards to the next brick. Now we check if $$$(c, d)$$$ is a point between two horizontally adjacent bricks or in the space inside a brick. To do this, we notice that for any odd $$$x$$$ value, the point brings us to a new brick if it's $$$y$$$ coordinate is in any range $$$[1, 2], [3, 4], \dots$$$ while for even values of $$$x$$$ the point brings us to a new brick if it's $$$y$$$ coordinate is any range $$$[0, 1], [2, 3], \dots$$$. My code checks this by looking at the parity of the x coordinate and the floor of the y coordinate of this point. If their parity is the same, it means that we move to a new brick to the right at that point. This gives an $$$O(x_1)$$$ solution.

For the complete solution, we have to deal with cases when $$$x_1$$$ is large. To do this, notice that if it is then for every range $$$y_1, y_2$$$ we cross right boundary lines multiple times. You can solve this by calculations based on the boundary points for each consecutive pairs of $$$y$$$, or by scaling down $$$x_1$$$ when you notice that if $$$x > 2y_1$$$ we are guaranteed to pass through a range of $$$(2y_1)$$$ when we move to a point 1 coordinate upwards, so we can calculate the answer by adding $$$y_1$$$ with the answer to $$$(x_1-2y_1, y_1)$$$. Through this we can make use of mods to bring the $$$x$$$ coordinate down to $$$2y$$$. Then our time complexity is $$$O(\min(x, y))$$$.

**Code**

```
#include <bits/stdc++.h>
using namespace std;
void solve() {
long long x, y; cin>>x>>y;
{
long long ans = y;
if (x > 2 * y) {
long long c = (x-1) / (2*y);
ans += c * (y);
x = ((x-1) % (2 * y)) + 1;
}
for (int i = 1; i < x; i++) {
long long y1 = (y * i)/x;
if (y1*x==y*i) continue;
if (i%2==1 && y1%2==1) {
ans++;
}
else if (i%2==0 && y1%2==0) {
ans++;
}
}
cout<<ans<<endl;
}
}
int main() {
ios_base::sync_with_stdio(0); cin.tie(0); cout.sync_with_stdio(0);
int t = 1;
cin>>t;
while (t--)
solve();
return 0;
}
```

### M. Magic labyrinth

**Hint 1**

$$$k$$$ is very large, that should guide you in your solution.

**Hint 2**

Every $$$n$$$ seconds, the array goes back to its initial position.

**Hint 3**

Have you thought of matrix exponentiation or binary lifting?

**Solution**

We can compute a $$$dp[a][b][t]$$$, which will keep track of the minimum amount of gas possible to go from $$$a$$$ to $$$b$$$ in $$$t$$$ seconds. That means that the answer is $$$\min \limits_{b=1}^{n} dp[1][b][k]$$$. The problem is that, given the size of $$$k$$$, we can't compute the $$$dp$$$ for all $$$t \le k$$$.

The key observation that we must make is that the changes on the array are cyclic, that means that every $$$n$$$ seconds the array returns to the initial position. Because of that, we can compute the matrix $$$dp[a][b][n]$$$ and elevate it to the exponent $$$\lfloor \frac{k}{n} \rfloor$$$, and that will give us $$$dp[a][b][n \cdot \lfloor \frac{k}{n} \rfloor]$$$. All that's left now is computing the matrix for the remaining seconds, which is $$$dp[a][b][k \mod n]$$$, and adding the answers.

Complexity of the solution: $$$\mathcal{O}(n^{2} \cdot (n + m) + n^{3}\log{n})$$$.

Note that if you don't know about matrix exponentiation, there is a similar solution with the same complexity using binary lifting. You could compute all $$$dp[a][b][n \cdot 2^t]$$$, and add all those such that the bit in position $$$t$$$ of the binary representation of $$$\lfloor \frac{k}{n} \rfloor$$$ is $$$1$$$.

**Code for the first method**

```
#include<bits/stdc++.h>
using namespace std;
typedef long long int ll;
const int MOD = 1e9 + 7;
void mult(vector<vector<ll>>& a, vector<vector<ll>>& b){
int n = (int)a.size();
vector<vector<ll>> nw(n, vector<ll>(n));
ll mx = 1e18;
for(int i = 0; i < n; i++){
for(int j = 0; j < n; j++){
for(int q = 0; q < n; q++){
if(q == 0) nw[i][j] = min(a[i][q] + b[q][j], mx);
else nw[i][j] = min(nw[i][j], min(a[i][q] + b[q][j], mx));
}
}
}
a = nw;
}
vector<vector<ll>> exp(vector<vector<ll>> dp, int e){
if(e == 1) return dp;
if(e % 2 == 0){
vector<vector<ll>> dp2 = exp(dp, e / 2);
mult(dp2, dp2);
return dp2;
}else{
vector<vector<ll>> dp2 = exp(dp, e / 2);
mult(dp2, dp2); mult(dp2, dp);
return dp2;
}
}
void solve(){
int n, m, k; cin >> n >> m >> k;
vector<int> a(n);
for(auto &i : a) cin >> i;
vector<vector<int>> adj(n);
for(int i = 0; i < m; i++){
int u, v; cin >> u >> v; u--; v--;
adj[u].push_back(v);
}
int additional = k % n;
k /= n;
vector<vector<ll>> ad(n);
vector<vector<ll>> dp(n);
for(int i = 0; i < n; i++){
vector<ll> curr(n, 1e18);
curr[i] = 0;
for(int t = 1; t <= n; t++){
vector<ll> lst = curr;
curr.assign(n, 1e18);
for(int j = 0; j < n; j++){
curr[j] = min(curr[j], lst[j] + a[(j + (t - 1)) % n]);
for(int x : adj[j]){
curr[x] = min(curr[x], lst[j] + a[(j + (t - 1)) % n]);
}
}
if(t == additional){
ad[i] = curr;
}
}
dp[i] = curr;
}
if(k == 0){
vector<vector<ll>> ans = ad;
ll res = 1e18;
for(int i = 0; i < n; i++) res = min(res, ans[0][i]);
cout << res << endl;
return;
}
vector<vector<ll>> ans = exp(dp, k);
if(additional > 0) mult(ans, ad);
ll res = 1e18;
for(int i = 0; i < n; i++) res = min(res, ans[0][i]);
cout << res << endl;
}
signed main(){
ios_base::sync_with_stdio(0);
cin.tie(0);
//~ int tt; cin >> tt;
int tt = 1;
for(int t = 1; t <= tt; t++){
solve();
}
}
```

**Code for the second method**

```
#include<bits/stdc++.h>
using namespace std;
typedef long long int ll;
const int MOD = 1e9 + 7;
void mult(vector<vector<ll>>& a, vector<vector<ll>>& b){
int n = (int)a.size();
vector<vector<ll>> nw(n, vector<ll>(n));
ll mx = 1e18;
for(int i = 0; i < n; i++){
for(int j = 0; j < n; j++){
for(int q = 0; q < n; q++){
if(q == 0) nw[i][j] = min(a[i][q] + b[q][j], mx);
else nw[i][j] = min(nw[i][j], min(a[i][q] + b[q][j], mx));
}
}
}
a = nw;
}
void solve(){
int n, m, k; cin >> n >> m >> k;
vector<int> a(n);
for(auto &i : a) cin >> i;
vector<vector<int>> adj(n);
for(int i = 0; i < m; i++){
int u, v; cin >> u >> v; u--; v--;
adj[u].push_back(v);
}
int additional = k % n;
k /= n;
vector<vector<ll>> ad(n);
vector<vector<ll>> dp(n);
for(int i = 0; i < n; i++){
vector<ll> curr(n, 1e18);
curr[i] = 0;
for(int t = 1; t <= n; t++){
vector<ll> lst = curr;
curr.assign(n, 1e18);
for(int j = 0; j < n; j++){
curr[j] = min(curr[j], lst[j] + a[(j + (t - 1)) % n]);
for(int x : adj[j]){
curr[x] = min(curr[x], lst[j] + a[(j + (t - 1)) % n]);
}
}
if(t == additional){
ad[i] = curr;
}
}
dp[i] = curr;
}
if(k == 0){
vector<vector<ll>> ans = ad;
ll res = 1e18;
for(int i = 0; i < n; i++) res = min(res, ans[0][i]);
cout << res << endl;
return;
}
vector<vector<ll>> ans = dp;
k--;
for(int b = 0; b < 30; b++){
if((1 << b) > k) break;
if((1<<b) & k){
mult(ans, dp);
}
mult(dp, dp);
}
if(additional > 0) mult(ans, ad);
ll res = 1e18;
for(int i = 0; i < n; i++) res = min(res, ans[0][i]);
cout << res << endl;
}
signed main(){
ios_base::sync_with_stdio(0);
cin.tie(0);
//~ int tt; cin >> tt;
int tt = 1;
for(int t = 1; t <= tt; t++){
solve();
}
}
```

### N. This Tree Problem Is Done For

**Hint 1**

The min function is hard to deal it, so how can we separate the two terms?

**Hint 2**

If we can split the tree into two subtrees and find the maximum path in each subtree, then this will be the optimal solution for that split. If we find the maximum of this over all possible splits, then we have our answer.

**Solution**

It is easy to see that all possible splitting of the tree can be obtained by removing an edge and looking at the remaining subtrees. This leads us to an $$$O(N^2)$$$ solution where we try splitting the tree over every edge, then run a dp in each of the two remaining subtrees.

We can define $$$dp[x][0] = \text{maximum path that contains node x}$$$ and $$$dp[x][1] = \text{maximum path in the subtree of x}$$$. $$$dp[x][0]$$$ can be calculated by using the maximum $$$dp[i][0]$$$, where $$$i$$$ is a child of $$$x$$$. Meanwhile, $$$dp[x][1]$$$ can be calculated by using the two maximum $$$dp[i][0]$$$ and maximum $$$dp[i][1]$$$. With these dp states, we can easily update the values when attaching or removing children from a node. If we maintain a multiset storing all of the $$$dp[i][0]$$$ and $$$dp[i][1]$$$, then attaching a child becomes inserting a value into the multiset, and removing a child becomes removing a value from the multiset. $$$dp[x][0]$$$ then transitions to the largest value in the multiset of $$$dp[i][0]$$$, while $$$dp[x][1]$$$ transitions to the two largest values of $$$dp[i][0]$$$ and the largest value of the multiset of $$$dp[i][1]$$$.

Now, if our tree is rooted at $$$x$$$, we can effectively split the tree over every edge attached to $$$x$$$ by removing and adding children. We can also use this method to efficiently reroot the tree, thus allowing us to be able to split the tree over every edge. Lets say we are rerooting our tree from node $$$a$$$ to node $$$b$$$. To do this, we can remove $$$b$$$ from being a child of $$$a$$$, then attach $$$a$$$ to be a child of $$$b$$$ using the values computed after the first removal. Using this described method yields a solution of $$$O(N \text{ log } N)$$$, which if well implemented, will pass. However, we can actually optimize this further by only storing the three maximum values for each multiset instead of all of them. It is easy to see that those are all we need to support our operations.

There also exists other solutions such as binary searching on the answer for $$$O(N \text{ log } N)$$$ complexity and a greedy solution involving diameters for $$$O(N)$$$.

**Code**

```
#pragma GCC optimize("O3")
#pragma GCC optimization ("unroll-loops")
#pragma GCC target("avx,avx2,fma")
#pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,tune=native")
#include <bits/stdc++.h>
using namespace std;
#define pb push_back
#define ff first
#define ss second
typedef long long ll;
typedef pair<int, int> pii;
typedef pair<ll, ll> pll;
const int INF = 1e9;
const ll LLINF = 1e18;
const int MOD = 1e9 + 7;
vector<pii> g[500005];
ll dp[500005][2];
multiset<ll, greater<ll>> deg[500005][2];
void dfs1(int x, int p = -1){
deg[x][0].insert(0), deg[x][0].insert(0);
for(pii i : g[x]){
if(i.ff == p) continue;
dfs1(i.ff, x);
deg[x][0].insert(dp[i.ff][0] + i.ss);
deg[x][1].insert(dp[i.ff][1]);
}
deg[x][1].insert(*deg[x][0].begin() + *next(deg[x][0].begin()));
dp[x][0] = *deg[x][0].begin();
dp[x][1] = *deg[x][1].begin();
while(deg[x][0].size() > 3) deg[x][0].erase(prev(deg[x][0].end()));
while(deg[x][1].size() > 3) deg[x][1].erase(prev(deg[x][1].end()));
}
ll ans;
void rem(int x, int t, ll v){
multiset<ll, greater<ll>>::iterator it = deg[x][t].find(v);
if(it != deg[x][t].end()) deg[x][t].erase(it);
}
void ins(int x, int t, ll v){
deg[x][t].insert(v);
while(deg[x][t].size() > 3) deg[x][t].erase(prev(deg[x][t].end()));
}
void move(int a, int b, int c){
rem(a, 1, dp[b][1]);
rem(a, 1, *deg[a][0].begin() + *next(deg[a][0].begin()));
rem(a, 0, dp[b][0] + c);
ins(a, 1, *deg[a][0].begin() + *next(deg[a][0].begin()));
dp[a][0] = *deg[a][0].begin();
dp[a][1] = *deg[a][1].begin();
ans = max(ans, min(dp[a][1], dp[b][1]));
rem(b, 1, *deg[b][0].begin() + *next(deg[b][0].begin()));
ins(b, 0, dp[a][0] + c);
ins(b, 1, *deg[b][0].begin() + *next(deg[b][0].begin()));
ins(b, 1, dp[a][1]);
dp[b][0] = *deg[b][0].begin();
dp[b][1] = *deg[b][1].begin();
}
void dfs2(int x, int p = - 1){
for(pii i : g[x]){
if(i.ff == p) continue;
move(x, i.ff, i.ss);
dfs2(i.ff, x);
move(i.ff, x, i.ss);
}
}
const int BUFSIZE = 20 << 21;
char Buf[BUFSIZE + 1], *buf = Buf;
template<class T>
void scan(T &x){
int neg = 1;
for(x = 0; *buf < '0' || *buf > '9'; buf++) if(*buf == '-') neg = -1;
while(*buf >= '0' && *buf <= '9') x = x*10 + (*buf - '0'), buf++;
x *= neg;
}
void setIO(){
fread(Buf, 1, BUFSIZE, stdin);
}
int main(){
setIO();
int t;
scan(t);
for(int tt = 1; tt <= t; tt++){
int n;
scan(n);
for(int i = 0; i < n - 1; i++){
int a, b, c;
scan(a), scan(b), scan(c);
g[a].pb({b, c});
g[b].pb({a, c});
}
ans = 0;
dfs1(1);
dfs2(1);
cout << ans << endl;
for(int i = 1; i <= n; i++){
g[i].clear();
deg[i][0].clear();
deg[i][1].clear();
}
}
}
```

### O. Prefix queries

**Hint 1**

Create an array $$$b$$$ such that $$$b_i = a_i - \sum \limits_{j=1}^{i-1} a_j$$$.

**Hint 2**

Can you determine which indices satisfy the conditions using the array $$$b$$$?

**Hint 3**

How do the queries affect the array $$$b$$$?

**Solution**

Please, read all the hints if you haven't.

We can see that the $$$i$$$ which satisfy the conditions are those such that $$$2 \le i \le n$$$ and $$$b_i \ge 0$$$. Now, the key idea to solve the problem relies on the answer to the third hint. After every query, only $$$b_l$$$ can go from negative to positive. That is because the array $$$b$$$ changes in the following way:

The elements with index $$$i < l$$$ remain unchanged.

For the elements with index $$$l \le i \le r$$$, $$$b_i: =b_i -x \cdot (i - l - 1)$$$. $$$b_l$$$ increases, $$$b_{l+1}$$$ remains the same, and the rest decrease.

For the elements with index $$$i > r$$$, $$$b_i: =b_i - x \cdot (r - l + 1)$$$. They all decrease.

Keeping this in mind, we know that we can maintain a set with all the potential answers. Each time that some $$$b_i$$$ becomes positive, we add $$$i$$$ to the set, knowing that we will do at most $$$n + q$$$ insertions. Then, to answer the queries, we just traverse the set from lower to greater, erasing the elements until we find one that is positive, and we print it, or the set becomes empty, then we output $$$-1$$$. To know whether some $$$b_i$$$ is positive or not, we can use a lazy segment tree supporting range update range queries.

Complexity of the solution: $$$\mathcal{O}((n+q)\log{(n+q)})$$$.

**Code**

```
#include<bits/stdc++.h>
using namespace std;
struct segTree{
vector<long long> v;
vector<long long> upd;
int size;
void init(int n){
size = 1;
while(size < n) size *= 2;
v.assign(2 * size, 0);
upd.assign(2 * size, 0);
}
void set(int l, int r, int x, int lx, int rx, long long u){
if(lx >= l && rx <= r){
v[x] += u * (rx - lx);
upd[x] += u;
return;
}
if(lx >= r || l >= rx) return;
int m = (lx + rx) / 2;
set(l, r, 2 * x + 1, lx, m, u);
set(l, r, 2 * x + 2, m, rx, u);
v[x] = v[2 * x + 1] + v[2 * x + 2] + upd[x] * (rx - lx);
}
void set(int l, int r, long long u){
return set(l, r, 0, 0, size, u);
}
void build(vector<int>& a, int x, int lx, int rx){
if (rx - lx == 1){
if (lx < (int)a.size()){
v[x] = a[lx];
}
return;
}
int m = (lx + rx) / 2;
build(a, 2 * x + 1, lx, m);
build(a, 2 * x + 2, m, rx);
v[x] = v[2 * x + 1] + v[2 * x + 2];
}
void build(vector<int>& a){
build(a, 0, 0, size);
}
pair<long long, int> sum(int l, int r, int x, int lx, int rx){
if(lx >= l && rx <= r) return {v[x], rx - lx};
else if(lx >= r || rx <= l) return {0, 0};
int m = (lx + rx) / 2;
pair<long long, int> s1 = sum(l, r, 2 * x + 1, lx, m);
pair<long long, int> s2 = sum(l, r, 2 * x + 2, m, rx);
return {s1.first + s2.first + upd[x] * (s1.second + s2.second), (s1.second + s2.second)};
}
long long sum(int l, int r){
pair<long long, int> p = sum(l, r, 0, 0, size);
return p.first;
}
};
int main(){
ios_base::sync_with_stdio(0);
cin.tie(0);
int n, q; cin >> n >> q;
vector<int> a(n);
set<int> positive; //Contains all the indices which could be potential answers.
long long sum = 0;
for(int i = 0; i < n; i++){
int x; cin >> x;
a[i] = x;
if(i > 0 && x - sum >= 0) positive.insert(i);
sum += x;
}
segTree st; st.init(n); st.build(a);
while(q--){
int l, r, x; cin >> l >> r >> x; l--; r--;
st.set(l, r + 1, (long long)x);
if(l > 0 && st.sum(l, l + 1) - st.sum(0, l) >= 0) positive.insert(l);
bool done = 0;
while((int)positive.size() > 0 && done == 0){
int ind = *positive.begin();
if(st.sum(ind, ind + 1) - st.sum(0, ind) >= 0){
done = 1;
cout << ind + 1 << "\n";
}else{
positive.erase(positive.begin());
}
}
if(!done) cout << -1 << "\n";
}
}
```

### P. In Another World With My Range Query Problems

**Hint 1**

To count the sum of all subarrays in an array, we can count how many subarrays each element belongs to, then multiply by the element value. In other words, the sum over all subarrays is $$$\sum a_i \cdot i \cdot (N - i + 1)$$$

**Hint 2**

You can maintain this on a segtree.

**Solution**

For each $$$a_i$$$ in the range $$$[l, r]$$$, we want to add $$$a_i \cdot (r - i + 1) \cdot (i - l + 1)$$$ to the answer. Expanding this, we get $$$-a_i \cdot i^2 + a_i \cdot i \cdot l + a_i \cdot i \cdot r - a_i \cdot l \cdot r - a_i \cdot l + a_i \cdot r + a_i$$$. To maintain this on a segtree, we can just maintain $$$\sum a_i \cdot i^2$$$, $$$\sum a_i \cdot i$$$, $$$\sum a_i$$$, $$$\sum i$$$, and $$$\sum i^2$$$ for each node. Then, when querying, we can just find the sum of these values and put them in the formula to retrieve the answer. Updates can be done using lazy propagation similar to range add, using $$$\sum i$$$ and $$$\sum i^2$$$ to update $$$\sum a_i \cdot i$$$ and $$$\sum a_i \cdot i^2$$$, respectively.

**Code**

```
#pragma GCC optimize("O3,unroll-loops")
#pragma GCC target("avx,avx2,fma")
#pragma GCC target("sse4,popcnt,abm,mmx,tune=native")
#include <bits/stdc++.h>
using namespace std;
#define pb push_back
#define ff first
#define ss second
typedef long long ll;
typedef pair<int, int> pii;
typedef pair<ll, ll> pll;
const int MOD = 1e9 + 7;
void setIO() {
ios_base::sync_with_stdio(0); cin.tie(0);
}
int n, q;
int arr[200005];
struct node {
int iia, ia, a, ii, i, sz;
node(){
iia = ia = a = sz = 0;
}
};
node seg[800005];
int tag[800005];
void push_down(int x){
if(!tag[x]) return;
for(int i = x*2 + 1; i <= x*2 + 2; i++){
seg[i].a = (seg[i].a + (ll)seg[i].sz*tag[x]%MOD)%MOD;
seg[i].ia = (seg[i].ia + (ll)seg[i].i*tag[x]%MOD)%MOD;
seg[i].iia = (seg[i].iia + (ll)seg[i].ii*tag[x]%MOD)%MOD;
tag[i] = (tag[i] + tag[x])%MOD;
}
tag[x] = 0;
}
node pull(node a, node b){
node ret;
ret.iia = (a.iia + b.iia)%MOD;
ret.ia = (a.ia + b.ia)%MOD;
ret.ii = (a.ii + b.ii)%MOD;
ret.a = (a.a + b.a)%MOD;
ret.i = (a.i + b.i)%MOD;
ret.sz = (a.sz + b.sz)%MOD;
return ret;
}
void build(int l = 1, int r = n, int cur = 0){
if(l == r){
seg[cur].sz = 1;
seg[cur].i = l;
seg[cur].a = arr[l];
seg[cur].ii = (ll)l*l%MOD;
seg[cur].ia = (ll)l*arr[l]%MOD;
seg[cur].iia = (ll)l*l%MOD*arr[l]%MOD;
return;
}
int mid = (l + r)/2;
build(l, mid, cur*2 + 1);
build(mid + 1, r, cur*2 + 2);
seg[cur] = pull(seg[cur*2 + 1], seg[cur*2 + 2]);
}
int get(node x, int l, int r){
int ret = MOD - x.iia;
(ret += (ll)x.ia*l%MOD) %= MOD;
(ret += (ll)x.ia*r%MOD) %= MOD;
(ret += MOD - (ll)x.a*l%MOD*r%MOD) %= MOD;
(ret += MOD - (ll)x.a*l%MOD) %= MOD;
(ret += (ll)x.a*r%MOD) %= MOD;
(ret += x.a) %= MOD;
return ret;
}
int query(int l, int r, int ul = 1, int ur = n, int cur = 0){
if(l <= ul && ur <= r){
return get(seg[cur], l, r);
}
if(ur < l || ul > r) return 0;
push_down(cur);
int mid = (ul + ur)/2;
return (query(l, r, ul, mid, cur*2 + 1) + query(l, r, mid + 1, ur, cur*2 + 2))%MOD;
}
void update(int l, int r, int v, int ul = 1, int ur = n, int cur = 0){
if(l <= ul && ur <= r){
seg[cur].a = (seg[cur].a + (ll)seg[cur].sz*v%MOD)%MOD;
seg[cur].ia = (seg[cur].ia + (ll)seg[cur].i*v%MOD)%MOD;
seg[cur].iia = (seg[cur].iia + (ll)seg[cur].ii*v%MOD)%MOD;
tag[cur] = (tag[cur] + v)%MOD;
return;
}
push_down(cur);
int mid = (ul + ur)/2;
if(l <= mid) update(l, r, v, ul, mid, cur*2 + 1);
if(r > mid) update(l, r, v, mid + 1, ur, cur*2 + 2);
seg[cur] = pull(seg[cur*2 + 1], seg[cur*2 + 2]);
}
int main(){
setIO();
cin >> n >> q;
for(int i = 1; i <= n; i++) cin >> arr[i];
build();
while(q--){
int t, l, r;
cin >> t >> l >> r;
if(t == 1){
cout << query(l, r) << endl;
} else {
int v;
cin >> v;
update(l, r, v);
}
}
}
```

### Q. Another Floors Problem

A solution for this problem is not published yet. For now, please refer to this tester solution.

**Code**

```
# O(N*log(precision + MAXA*log(MAXA))
# ANALYZING WHY WE CAN'T EXPRESS ALL VALUES:
#
# The sum changes at x = b/z if for some i, z | a[i].
# However, there may be multiple such i, and so the sum "skips" a few values
# since multiple a transition (floor(ax) increases). This is why we can't express all integers.
#
# IDEA:
#
# The number of expressible integers = the number of x such that the sum increases.
#
# SOLUTION:
#
# Realize that for a fixed max x, we can do a dp on divisors to compute:
# transition_points[i] = # of x such that x in simplest form = b/i
#
# Then, the sum is increased to a new value at transition_points[i] x's if there exists an
# a that transitions at those points x = b/i => i | a.
#
# Thus, we can find # of expressible numbers below r and l-1 by binary searching on the maximum x
# (be careful about precision!).
# We can binary search on the integer part and then the fractional part.
# Subtract the values for the answer.
from math import floor
MAXA = 10**5
def expressible_numbers_below_bound(a, right_bound):
# Binary search to find max_x
# First binary search on the integer part of max_x
l = 0
r = 10**18
while l != r:
m = (l + r + 1) // 2
tot_sum = sum(floor(m*val) for val in a)
if tot_sum <= right_bound:
l = m
else:
r = m-1
# Now binary search on the fractional part of max_x
fractional_l = 0
fractional_r = 1
# We can stop at <= 2e-10 actually
while fractional_r - fractional_l > 1e-12:
m = (fractional_l + fractional_r) / 2
# This fails if abs(m - b/val) < 1e-15 and we have nlog(precision) calculations.
# Thus, there is a 1e-9 chance of occurring over all calculations
tot_sum = sum(l*val + floor(m*val) for val in a)
if tot_sum <= right_bound:
fractional_l = m
else:
fractional_r = m
max_x_integer = l
max_x_fractional = fractional_l
# Now we find the number of expressible integers using 0 <= x <= max_x
cnt = [0] * (MAXA + 1)
for i in a:
cnt[i] += 1
divisors = [[] for _ in range(MAXA+1)]
number_of_a_that_transition = [0] * (MAXA + 1)
for i in range(1, MAXA+1):
for j in range(i, MAXA+1, i):
divisors[j].append(i)
number_of_a_that_transition[i] += cnt[j]
transition_points = [0] * (MAXA + 1)
for i in range(1, MAXA + 1):
for div in divisors[i]:
if div != i:
transition_points[i] -= transition_points[div]
transition_points[i] += i*max_x_integer + floor(i*max_x_fractional)
return sum((number_of_a_that_transition[i] > 0) * transition_points[i] for i in range(1, MAXA + 1))
n, l, r = map(int, input().split())
a = list(map(int, input().split()))
ans = expressible_numbers_below_bound(a, r) — expressible_numbers_below_bound(a, l-1)
print(ans)
```

### R. Bingo

**Solution for k = 20 that also happens to cheese and ac**

Consider each valid row/column as a bitmask with at most $$$5$$$ bits. Then we can consider a query as an order to turn on bits in a size $$$20$$$ bitmask. If at any point a row/column appears in this bitmask we can know the game ends, and if we can determine the minimum ID that forms a working row/column we will have solved the problem.

The best algorithm to use for this is a bitmask dp/SOS dp. For each valid row/column we can write in the minimum ID that has that row/column. Then for all the other masks we can compute the minimum mask out of all of the submasks. This gives us an $$$O(k 2^k + q)$$$ solution.

I, as the problemsetter, made a mistake and did not realize that $$$25 \cdot 2^{25}$$$ runs comfortably in the time limit and watched my problem get cheesed by a subtask sol :<.

**Intended solution. Can be used to solve k = 27**

The intended solution is built off the subtask solution. We still want to use the bitmask idea. Consider splitting the $$$k$$$ possible numbers into $$$5$$$ parts with sizes $$$a_1, a_2, a_3, a_4, a_5$$$. We will build a bitmask dp that covers any mask which spans at most $$$4$$$ of these parts and we will manually enumerate the masks which span all $$$5$$$ every query. This gives us an $$$O({5 \choose 4} \cdot 2^{(k - \min(a_i))} + q \cdot \prod a_i)$$$ solution. For $$$k = 25$$$ we should pick $$$a = [5, 5, 5, 5, 5]$$$ and for $$$ k = 27$$$ we should pick $$$k = [6, 6, 5, 5, 5]$$$ which should fit comfortably within the limit.

There are also ways to ac by constant optimizing out a $$$k \choose 4$$$ per query.

**Code**

```
//misaka, hitori, and elaina will carry me to red
#include <iostream>
#include <cmath>
#include <utility>
#include <cassert>
#include <algorithm>
#include <vector>
#include <array>
#include <cstdio>
#include <cstring>
#include <functional>
#include <numeric>
#include <set>
#include <queue>
#include <map>
#include <chrono>
#include <random>
#define sz(x) ((int)(x.size()))
#define all(x) x.begin(), x.end()
#define pb push_back
#define eb emplace_back
#define kill(x, s) {if(x){ cout << s << "\n"; return ; }}
#define lsb(x) ((x)&(-(x)))
#define lg2(x) (31 — __builtin_clz(x))
#define pcnt(x) __builtin_popcount(x)
#ifndef LOCAL
#define cerr while(0) cerr
#endif
using ll = long long;
using lb = long double;
const lb eps = 1e-9;
const ll mod = 1e9 + 7, ll_max = 1e18;
//const ll mod = (1 << (23)) * 119 +1, ll_max = 1e18;
const int MX = 2e5 +10, int_max = 0x3f3f3f3f;
struct {
template<class T>
operator T() {
T x; std::cin >> x; return x;
}
} in;
using namespace std;
const int k = 5;
const int p = 5;
const int A = 25;
vector<int> mem[40];
array<int, A*A*A*A*A> val;
int cond(int x, int mask){
int ans = 0, b = 1;
for(int i = 0; i<k; i++){
if(mask&(1 << i)){
ans += ((x&(((1 << p)-1) << (i*p))) >> (i*p)) * b;
b *= (1 << p);
}
}
return ans;
}
int check(int x, int mask){
for(int i = 0; i<k; i++){
if(!(mask&(1 << i))){
if(x&(((1 << p)-1) << (i*p))) return 0;
}
}
return 1;
}
vector<int> sos(vector<int> a){
int n = lg2(sz(a));
for(int i = 0; i<n; i++){
for(int j = 0; j<(1 << n); j++){
if(j&(1 << i)){
a[j] = min(a[j], a[j — (1 << i)]);
}
}
}
return a;
}
void solve(){
int n = in;
int mx = in;
vector<pair<int, int>> small;
val.fill(int_max);
for(int i = 1; i<=n; i++){
array<array<int, k>, k> board;
for(int j = 0; j<k; j++){
for(int h = 0; h<k; h++){
board[j][h] = in;
board[j][h]--;
}
}
for(int j = 0; j<k; j++){
unsigned int r = 0, c = 0;
array<int, k> rr, cc;
for(int h = 0; h<k; h++){
r |= (1 << board[j][h]);
c |= (1 << board[h][j]);
rr[h] = board[j][h];
cc[h] = board[h][j];
}
sort(all(rr));
sort(all(cc));
int r1 = 0, c1 = 0;
int b = 1;
for(int h = 0; h<k; h++){
r1 += b*rr[h];
c1 += b*cc[h];
b *= A;
}
if(val[c1] == int_max){
small.pb({c, i});
val[c1] = i;
}
if(val[r1] == int_max){
small.pb({r, i});
val[r1] = i;
}
}
}
for(int mask = 1; mask<(1 << k)-1; mask++){
vector<int> a(cond((1 << (k*p))-1, mask)+1, int_max);
for(auto [m, id] : small){
if(check(m, mask)){
a[cond(m, mask)] = min(a[cond(m, mask)], id);
}
}
mem[mask] = sos(a);
}
int q = in;
for(int qaq = 1; qaq<=q; qaq++){
array<int, A> perm, inv = perm;
iota(all(perm), 0);
for(int i = 0; i<mx; i++){
perm[i] = in;
perm[i]--;
}
for(int i = 0; i<A; i++){
inv[perm[i]] = i;
}
int glob = 0;
pair<int, int> ans = {int_max, int_max};
for(int i = 0, wo = 0; i<A && wo == 0; i++){
if(perm[i] < p*k)
glob |= (1 << perm[i]);
for(int mask = 1; mask<(1 << k)-1; mask++){
int v = mem[mask][cond(glob, mask)];
if(v != int_max){
ans = min(ans, pair(i, v));
wo = 1;
}
}
}
for(int i = 0; i<p; i++){
for(int j = p; j<p*2; j++){
for(int h = p*2; h<p*3; h++){
for(int l = p*3; l<p*4; l++){
for(int m = p*4; m<p*5; m++){
int hsh = i + A*j + A*A*h + A*A*A*l + A*A*A*A*m;
int t = max({inv[i], inv[j], inv[h], inv[l], inv[m]});
if(val[hsh] != int_max) ans = min(ans, pair(t, val[hsh]));
}
}
}
}
}
cout << ans.second << " " << perm[ans.first]+1 << "\n";
}
}
signed main(){
cin.tie(0) -> sync_with_stdio(0);
int T = 1;
//cin >> T;
for(int i = 1; i<=T; i++){
//cout << "Case #" << i << ": ";
solve();
}
return 0;
}
```