CSES String Section Editorial

#	User	Rating
1	tourist	3880
2	jiangly	3669
3	ecnerwala	3654
4	Benq	3627
5	orzdevinwang	3612
6	Geothermal	3569
6	cnnfls_csy	3569
8	jqdai0815	3532
9	Radewoosh	3522
10	gyh20	3447

#	User	Contrib.
1	awoo	161
1	maomao90	161
3	adamant	156
4	maroonrk	153
5	-is-this-fft-	148
5	atcoder_official	148
5	SecondThread	148
8	Petr	147
9	nor	144
10	TheScrasse	142

I've seen CF tutorials for many other sections of CSES but didn't see one for strings, so I thought of writing one. Constructive criticism (or just regular criticism) is always welcome!↵
↵
Note that there are many ways to do a problem. For instance, linear time string searching can be done with hashing, KMP, Z-Algorithm, Aho-Corasick Automaton, etc. . I used the way that was most intuitive for me, but there might exist other ways that have shorter code. ↵
↵
**1. Word Combinations**↵
↵
<spoiler summary="Solution">↵
This problem reduces to knapsack if there exists a way to quickly match multiple strings. Precomputing the hashes is too slow ($\mathcal{O}(nk)$), so a faster method is needed. Aho-Corasick automaton suffices. ↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <bits/stdc++.h>↵
using namespace std;↵
↵
#define PB push_back↵
↵
const int MOD = 1e9 + 7;↵
↵
string S;↵
int K, I = 1, DP[5005];↵
↵
vector<int> adj[500005];↵
↵
struct node {↵
 int fail, ch[26] = {};↵
 vector<int> lens;↵
 string s = "";↵
} T[500005];↵
↵
void insert(string s) {↵
 int x = 1;↵
 for (int i = 0; i < s.size(); i++) {↵
 if (T[x].ch[s[i] - 'a'] == 0)↵
 T[x].ch[s[i] - 'a'] = ++I;↵
 T[T[x].ch[s[i] - 'a']].s = T[x].s + s[i];↵
 x = T[x].ch[s[i] - 'a'];↵
 }↵
 T[x].lens.PB(s.size());↵
}↵
↵
// initializes lens↵
void dfs(int u) {↵
 T[u].lens.insert(T[u].lens.end(), T[T[u].fail].lens.begin(), T[T[u].fail].lens.end());↵
 for (int v : adj[u])↵
 dfs(v);↵
}↵
↵
void build() {↵
 queue<int> Q;↵
 int x = 1; ↵
 T[1].fail = 1;↵
 for (int i = 0; i < 26; i++) {↵
 if (T[x].ch[i])↵
 T[T[x].ch[i]].fail = x, Q.push(T[x].ch[i]);↵
 else ↵
 T[x].ch[i] = 1;↵
 }↵
 while (!Q.empty()) {↵
 x = Q.front(); Q.pop();↵
 for (int i = 0; i < 26; i++) {↵
 if (T[x].ch[i])↵
 T[T[x].ch[i]].fail = T[T[x].fail].ch[i], Q.push(T[x].ch[i]);↵
 else ↵
 T[x].ch[i] = T[T[x].fail].ch[i];↵
 }↵
 }↵
 for (int i = 2; i <= I; i++)↵
 adj[T[i].fail].PB(i);↵
 dfs(1);↵
}↵
↵
int run(string s) {↵
 DP[0] = 1;↵
 for (int i = 1, x = 1; i < s.size(); i++) {↵
 x = T[x].ch[s[i] - 'a'];↵
 for (int l : T[x].lens)↵
 if (l <= i)↵
 DP[i] = (DP[i] + DP[i - l]) % MOD;↵
 }↵
 return DP[s.size() - 1];↵
}↵
↵
int main() {↵
 cin >> S >> K;↵
 for (int i = 0; i < K; i++) {↵
 string s; cin >> s;↵
 insert(s);↵
 }↵
↵
 build();↵
 cout << run(" " + S) << '\n';↵
}↵
~~~~~↵
↵
↵
</spoiler>↵
↵
**2. String Matching**↵
↵
<spoiler summary="Solution">↵
There are many ways to solve this problem, including but not limited to: Knuth-Morris-Pratt Algorithm, Z-Algorithm, Rabin-Karp Algorithm, and Suffix Tree/Automaton. ↵
↵
The code below uses Z-Algorithm. ↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <bits/stdc++.h>↵
using namespace std;↵
↵
string T, P, S;↵
int Z[2000005];↵
↵
int main() {↵
 ios_base::sync_with_stdio(0); cin.tie(0);↵
↵
 cin >> T >> P;↵
 S = P + "$" + T;↵
↵
 int n = (int)S.size();↵
 for (int i = 1, l = 0, r = 0; i < n; i++) {↵
 if (i <= r)↵
 Z[i] = min(Z[i - l], r - i + 1);↵
 while (i + Z[i] < n && S[Z[i]] == S[i + Z[i]])↵
 Z[i]++;↵
 if (i + Z[i] - 1 > r)↵
 l = i, r = i + Z[i] - 1;↵
 }↵
↵
 int ans = 0;↵
 for (int i = P.size() + 1; i < S.size(); i++)↵
 if (Z[i] == P.size())↵
 ans++;↵
 ↵
 cout << ans << '\n';↵
}↵
~~~~~↵
↵
↵
</spoiler>↵
↵
**3. Finding Borders**↵
↵
<spoiler summary="Solution">↵
This problem can be done with hashing. We will hash the entire string, and for each prefix we check if the corresponding suffix has the same hash value↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <bits/stdc++.h>↵
using namespace std;↵
↵
const ll R = 9973, MOD = 99999989;↵
↵
string S;↵
ll H[1000005], P = 1;↵
↵
int main() {↵
cin >> S;↵
for (int i = 0; i < S.size(); i++) ↵
H[i] = ((i == 0 ? 0 : H[i - 1]) * R + (ll)S[i]) % MOD;↵
↵
for (int k = 1; k < (int)S.size(); k++) {↵
P = (P * R) % MOD;↵
ll suf = (H[S.size() - 1] - (P * H[S.size() - k - 1]) % MOD + MOD) % MOD;↵
if (H[k - 1] == suf)↵
cout << k << ' ';↵
}↵
cout << '\n';↵
}↵
~~~~~↵
</spoiler>↵
↵
**4. Finding Periods**↵
↵
<spoiler summary="Solution">↵
We can use a modified KMP/Z-Algorithm to calculate the prefix/z-function. Then it suffices for each index $i \in [0, n)$ to check if $i + Z_i \geq n$. ↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <bits/stdc++.h>↵
using namespace std;↵
↵
string S;↵
int Z[1000005];↵
↵
int main() {↵
 cin >> S;↵
 ↵
 int n = (int)S.size();↵
 for (int i = 1, l = 0, r = 0; i < n; i++) {↵
 if (i <= r)↵
 Z[i] = min(Z[i - l], r - i + 1);↵
 while (i + Z[i] < n && S[Z[i]] == S[i + Z[i]])↵
 Z[i]++;↵
 if (i + Z[i] - 1 > r)↵
 l = i, r = i + Z[i] - 1;↵
 }↵
↵
 for (int i = 0; i < n; i++)↵
 if (i + Z[i] >= n)↵
 cout <↵
↵
**5. Minimal Rotation**↵
↵
<spoiler summary="Solution">↵
Multiple solution methods can be found [here](https://en.wikipedia.org/wiki/Lexicographically_minimal_string_rotation). Interestingly enough, $\mathcal{O}(n \log n)$ suffix array seems to TLE, but I have not tried a linear suffix array method yet. ↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
def least_rotation(S: str) -> int:↵
 """Booth's algorithm."""↵
 S += S # Concatenate string to it self to avoid modular arithmetic↵
 f = [-1] * len(S) # Failure function↵
 k = 0 # Least rotation of string found so far↵
 for j in range(1, len(S)):↵
 sj = S[j]↵
 i = f[j - k - 1]↵
 while i != -1 and sj != S[k + i + 1]:↵
 if sj < S[k + i + 1]:↵
 k = j - i - 1↵
 i = f[i]↵
 if sj != S[k + i + 1]: # if sj != S[k+i+1], then i == -1↵
 if sj < S[k]: # k+i+1 = k↵
 k = j↵
 f[j - k] = -1↵
 else:↵
 f[j - k] = i + 1↵
 return k↵
↵
S = input("")↵
i = least_rotation(S)↵
print(S[i:] + S[:i])↵
~~~~~↵
↵
↵
</spoiler>↵
↵
**6. Longest Palindrome**↵
↵
<spoiler summary="Solution">↵
This is a textbook application of [Manacher's Algorithm](https://cp-algorithms.com/string/manacher.html). ↵
↵
Interestingly enough, a shorter code can be attained by simply inserting a special character between two adjacent indicies (so "baacba" would become "b#a#a#c#b#a") then running the odd case of Manacher's Algorithm. ↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <bits/stdc++.h>↵
using namespace std;↵
↵
string S;↵
int d1[1000005], d2[1000005];↵
↵
int main() {↵
 cin >> S;↵
↵
 int n = S.size();↵
 for (int i = 0, l = 0, r = -1; i < n; i++) {↵
 int k = (i > r) ? 1 : min(d1[l + r - i], r - i + 1);↵
 while (0 <= i - k && i + k < n && S[i - k] == S[i + k]) {↵
 k++;↵
 }↵
 d1[i] = k--;↵
 if (i + k > r) {↵
 l = i - k;↵
 r = i + k;↵
 }↵
 }↵
↵
 for (int i = 0, l = 0, r = -1; i < n; i++) {↵
 int k = (i > r) ? 0 : min(d2[l + r - i + 1], r - i + 1);↵
 while (0 <= i - k - 1 && i + k < n && S[i - k - 1] == S[i + k]) {↵
 k++;↵
 }↵
 d2[i] = k--;↵
 if (i + k > r) {↵
 l = i - k - 1;↵
 r = i + k ;↵
 }↵
 }↵
↵
 int ans = 0, ind = -1;↵
 for (int i = 0; i < n; i++) {↵
 if (d1[i] * 2 - 1 > ans) ↵
 ans = d1[i] * 2 - 1, ind = i;↵
 if (d2[i] * 2 > ans)↵
 ans = d2[i] * 2, ind = i;↵
 }↵
 if (ans % 2 == 1)↵
 cout << S.substr(ind - ans / 2, ans);↵
 else ↵
 cout << S.substr(ind - ans / 2, ans);↵
}↵
~~~~~↵
↵
↵
</spoiler>↵
↵
**7. Required Substring**↵
↵
**8. Palindrome Queries**↵
↵
<spoiler summary="Solution">↵
We will build two segment trees over the hashes: one forward and one backwards. Updates correspond to updates on the segment tree, and queries are just comparing the (adjusted) hash values on the two segment trees, as palindromes are defined to be a string that is the same forward and backwards.↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <bits/stdc++.h>↵
using namespace std;↵
↵
using ll = long long;↵
const ll HASH = 257, MOD = 1e9 + 7;↵
↵
int N, Q;↵
ll powH[200005] = {1};↵
↵
namespace forward_hash {↵
 ll T[400005];↵
↵
 void update(int i, ll v) {↵
 for (T[i += N] = v; i > 1; i >>= 1)↵
 T[i >> 1] = (T[i] + T[i ^ 1]) % MOD;↵
 }↵
↵
 ll query(int l, int r) {↵
 ll res = 0;↵
 for (l += N, r += N + 1; l < r; l >>= 1, r >>= 1) {↵
 if (l & 1) res = (res + T[l++]) % MOD;↵
 if (r & 1) res = (res + T[--r]) % MOD;↵
 }↵
 return res;↵
 }↵
}↵
↵
namespace backward_hash {↵
 ll T[400005];↵
↵
 void update(int i, ll v) {↵
 for (T[i += N] = v; i > 1; i >>= 1)↵
 T[i >> 1] = (T[i] + T[i ^ 1]) % MOD;↵
 }↵
↵
 ll query(int l, int r) {↵
 ll res = 0;↵
 for (l += N, r += N + 1; l < r; l >>= 1, r >>= 1) {↵
 if (l & 1) res = (res + T[l++]) % MOD;↵
 if (r & 1) res = (res + T[--r]) % MOD;↵
 }↵
 return res;↵
 }↵
}↵
↵
int main() {↵
 cin >> N >> Q;↵
 for (int i = 1; i < N; i++) {↵
 powH[i] = (powH[i - 1] * HASH) % MOD;↵
 //cout << powH[i] << " \n"[i == N - 1];↵
 }↵
 for (int i = 0; i < N; i++) {↵
 char c; cin >> c;↵
 forward_hash::update(i, powH[i] * (ll)c % MOD);↵
 backward_hash::update(i, powH[N - i - 1] * (ll)c % MOD);↵
 }↵
↵
 while (Q--) {↵
 int t; cin >> t;↵
 if (t == 1) {↵
 int k; char c;↵
 cin >> k >> c;↵
 k--;↵
 forward_hash::update(k, powH[k] * (ll)c % MOD);↵
 backward_hash::update(k, powH[N - k - 1] * (ll)c % MOD);↵
 }↵
 else if (t == 2) {↵
 int l, r;↵
 cin >> l >> r;↵
 l--, r--;↵
 ll forward = forward_hash::query(l, r);↵
 forward = (forward * powH[N - 1 - r]) % MOD;↵
 ll backward = backward_hash::query(l, r);↵
 backward = (backward * powH[l]) % MOD;↵
 if (forward == backward)↵
 cout << "YES\n";↵
 else↵
 cout << "NO\n";↵
 }↵
 }↵
}↵
~~~~~↵
↵
↵
</spoiler>↵
↵
**9. Finding Patterns**↵
↵
<spoiler summary="Solution">↵
See solution for Count Patterns. ↵
↵
A possible optimization for the code below is to simply remove a word if it has been found, which allows us to mitigate having to DFS through the AC Automaton. ↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <iostream>↵
#include <algorithm>↵
#include <queue>↵
using namespace std;↵
↵
#define PB push_back↵
↵
string S;↵
int K, I = 1, ans[500005];↵
↵
vector<int> adj[500005];↵
↵
struct node {↵
 int fail, ch[26] = {}, cnt = 0;↵
 vector<int> word;↵
} T[500005];↵
↵
void insert(string s, int i) {↵
 int x = 1;↵
 for (int i = 0; i < s.size(); i++) {↵
 if (T[x].ch[s[i] - 'a'] == 0)↵
 T[x].ch[s[i] - 'a'] = ++I;↵
 x = T[x].ch[s[i] - 'a'];↵
 }↵
 T[x].word.PB(i);↵
}↵
↵
void build() {↵
 queue<int> Q;↵
 int x = 1; ↵
 T[1].fail = 1;↵
 for (int i = 0; i < 26; i++) {↵
 if (T[x].ch[i])↵
 T[T[x].ch[i]].fail = x, Q.push(T[x].ch[i]);↵
 else ↵
 T[x].ch[i] = 1;↵
 }↵
 while (!Q.empty()) {↵
 x = Q.front(); Q.pop();↵
 for (int i = 0; i < 26; i++) {↵
 if (T[x].ch[i])↵
 T[T[x].ch[i]].fail = T[T[x].fail].ch[i], Q.push(T[x].ch[i]);↵
 else ↵
 T[x].ch[i] = T[T[x].fail].ch[i];↵
 }↵
 }↵
 for (int i = 2; i <= I; i++)↵
 adj[T[i].fail].PB(i);↵
}↵
↵
void run(string s) {↵
 for (int i = 0, x = 1; i < s.size(); i++) {↵
 x = T[x].ch[s[i] - 'a'];↵
 T[x].cnt++;↵
 }↵
}↵
↵
int dfs(int u) {↵
 int res = T[u].cnt;↵
 for (int v : adj[u])↵
 res += dfs(v);↵
 for (int w : T[u].word)↵
 ans[w] = res;↵
 return res;↵
}↵
↵
int main() {↵
 ios_base::sync_with_stdio(0); cin.tie(0);↵
 ↵
 cin >> S >> K;↵
 for (int i = 0; i < K; i++) {↵
 string s; cin >> s;↵
 insert(s, i);↵
 }↵
↵
 build();↵
 run(S);↵
 dfs(1);↵
↵
 for (int i = 0; i < K; i++)↵
 cout << (ans[i] ? "YES" : "NO") << '\n';↵
}↵
~~~~~↵
↵
↵
</spoiler>↵
↵
**10. Counting Patterns**↵
↵
<spoiler summary="Solution">↵
We will use an [Aho-Corasick Automaton](https://cp-algorithms.com/string/aho_corasick.html). However, there is one major optimization to be made. When we visit a node, we normally visit all of the nodes in its fail pointer tree. However, we can lazily process these visits by marking the number of times we visit a node. At the end, we simply DFS along the fail tree, keeping track of the number of visits to a nodes subtree. ↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~~↵
#include <iostream>↵
#include <algorithm>↵
#include <cassert>↵
#include <queue>↵
using namespace std;↵
↵
#define PB push_back↵
↵
string S;↵
int K, I = 1, ans[500005];↵
↵
vector<int> adj[500005];↵
↵
struct node {↵
 int fail, ch[26] = {}, cnt = 0;↵
 vector<int> word;↵
 string s = "";↵
} T[500005];↵
↵
void insert(string s, int i) {↵
 int x = 1;↵
 for (int i = 0; i < s.size(); i++) {↵
 if (T[x].ch[s[i] — 'a'] == 0)↵
 T[x].ch[s[i] — 'a'] = ++I;↵
 T[T[x].ch[s[i] — 'a']].s = T[x].s + s[i];↵
 x = T[x].ch[s[i] — 'a'];↵
 }↵
 T[x].word.PB(i);↵
}↵
↵
void build() {↵
 queue<int> Q;↵
 int x = 1; ↵
 T[1].fail = 1;↵
 for (int i = 0; i < 26; i++) {↵
 if (T[x].ch[i])↵
 T[T[x].ch[i]].fail = x, Q.push(T[x].ch[i]);↵
 else ↵
 T[x].ch[i] = 1;↵
 }↵
 while (!Q.empty()) {↵
 x = Q.front(); Q.pop();↵
 for (int i = 0; i < 26; i++) {↵
 if (T[x].ch[i])↵
 T[T[x].ch[i]].fail = T[T[x].fail].ch[i], Q.push(T[x].ch[i]);↵
 else ↵
 T[x].ch[i] = T[T[x].fail].ch[i];↵
 }↵
 }↵
 for (int i = 2; i <= I; i++)↵
 adj[T[i].fail].PB(i);↵
}↵
↵
void run(string s) {↵
 for (int i = 0, x = 1; i < s.size(); i++) {↵
 x = T[x].ch[s[i] — 'a'];↵
 // stuff depending on problem↵
 T[x].cnt++;↵
 }↵
}↵
↵
int dfs(int u) {↵
 int res = T[u].cnt;↵
 for (int v : adj[u])↵
 res += dfs(v);↵
 for (int w : T[u].word)↵
 ans[w] = res;↵
 return res;↵
}↵
↵
int main() {↵
 cin >> S >> K;↵
 for (int i = 0; i < K; i++) {↵
 string s; cin >> s;↵
 insert(s, i);↵
 }↵
↵
 build();↵
 run(S);↵
 dfs(1);↵
↵
 for (int i = 0; i < K; i++)↵
 cout << ans[i] << '\n';↵
}↵
~~~~~~↵
↵
</spoiler>↵
↵
**11. Pattern Positions**↵
↵
<spoiler summary="Solution">↵
See solution to Counting Patterns↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <iostream>↵
#include <algorithm>↵
#include <vector>↵
#include <unordered_map>↵
#include <ext/pb_ds/assoc_container.hpp>↵
using namespace std;↵
using namespace __gnu_pbds;↵
↵
#define PB push_back↵
#define EB emplace_back↵
↵
template<class K, class V> ↵
using fast_map = gp_hash_table<K, V>;↵
↵
struct node {↵
 fast_map<char, int> adj, adv;↵
 vector<int> w;↵
 int p = -1, suf = -1, ext = -1; ↵
 char c;↵
↵
 node(int p = -1, char c = '$') : p(p), c(c) {}↵
};↵
↵
vector<node> T(1);↵
↵
void insert(string s, int id) {↵
 int t = 0;↵
 for (char c : s) {↵
 if (T[t].adj.find(c) == T[t].adj.end()) {↵
 T[t].adj[c] = T.size();↵
 T.EB(t, c);↵
 }↵
 t = T[t].adj[c];↵
 }↵
 T[t].w.PB(id);↵
}↵
↵
int advance(int t, char c);↵
↵
int suffix(int t) {↵
 if (T[t].suf == -1) {↵
 if (t == 0 || T[t].p == 0)↵
 T[t].suf = 0;↵
 else {↵
 T[t].suf = advance(suffix(T[t].p), T[t].c);↵
 }↵
 }↵
 return T[t].suf;↵
}↵
↵
int advance(int t, char c) {↵
 if (T[t].adv.find(c) == T[t].adv.end()) {↵
 if (T[t].adj.find(c) != T[t].adj.end())↵
 T[t].adv[c] = T[t].adj[c];↵
 else↵
 T[t].adv[c] = t == 0 ? 0 : advance(suffix(t), c);↵
 }↵
 return T[t].adv[c];↵
}↵
↵
int K, ans[500005];↵
string S, W[500005];↵
↵
int jump(int t) {↵
 if (T[t].ext == -1) {↵
 int tt = t;↵
 do {↵
 tt = suffix(tt);↵
 } while (T[tt].w.empty() && tt != 0);↵
 T[t].ext = tt;↵
 }↵
 return T[t].ext;↵
}↵
↵
void answer(int t, int cnt) {↵
 while (t != 0) {↵
 for (int u : T[t].w)↵
 ans[u] = cnt;↵
 T[t].w.clear();↵
 t = jump(t);↵
 }↵
}↵
↵
↵
int main() {↵
 ios_base::sync_with_stdio(0); cin.tie(0);↵
 ↵
 getline(cin, S);↵
 cin >> K; ↵
 string ddd; getline(cin, ddd);↵
 for (int i = 0; i < K; i++) {↵
 getline(cin, W[i]);↵
 insert(W[i], i);↵
 }↵
↵
↵
 int t = 0, cnt = 1;↵
 for (char c : S) {↵
 t = advance(t, c);↵
 answer(t, cnt++);↵
 }↵
↵
 for (int i = 0; i < K; i++)↵
 cout << (ans[i] ? to_string(ans[i] - W[i].size() + 1) : "-1") << '\n';↵
}↵
~~~~~↵
↵
↵
</spoiler>↵
↵
**12. Distinct Substring**↵
↵
<spoiler summary="Solution">↵
A suffix array solution can be found [here](https://cp-algorithms.com/string/suffix-array.html#toc-tgt-10). My solution below uses suffix automaton, explanation is [here](https://cp-algorithms.com/string/suffix-automaton.html#toc-tgt-16). ↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <bits/stdc++.h>↵
using namespace std;↵
↵
struct SuffixAuto {↵
 struct State {↵
 int len, link;↵
 int next[26];↵
 State(int _len = 0, int _link = -1) : len(_len), link(_link) {↵
 memset(next, -1, sizeof(next));↵
 }↵
 };↵
↵
 vector<State> states;↵
 int last;↵
↵
 SuffixAuto() {}↵
↵
 SuffixAuto(const string &S) {↵
 init(S);↵
 }↵
↵
 inline int state(int len = 0, int link = -1) {↵
 states.emplace_back(len, link);↵
 return states.size() - 1;↵
 }↵
↵
 void init(const string &S) {↵
 states.reserve(2 * S.size());↵
 last = state();↵
 for (char c : S)↵
 extend(c);↵
 }↵
↵
 void extend(char _c) {↵
 int c = _c - 'a'; // change for different alphabet↵
 int cur = state(states[last].len + 1), P = last; ↵
 while (P != -1 && states[P].next[c] == -1) {↵
 states[P].next[c] = cur;↵
 P = states[P].link;↵
 }↵
 if (P == -1)↵
 states[cur].link = 0;↵
 else {↵
 int Q = states[P].next[c];↵
 if (states[P].len + 1 == states[Q].len)↵
 states[cur].link = Q;↵
 else {↵
 int C = state(states[P].len + 1, states[Q].link);↵
 copy(states[Q].next, states[Q].next + 26, states[C].next);↵
 while (P != -1 && states[P].next[c] == Q) {↵
 states[P].next[c] = C;↵
 P = states[P].link;↵
 }↵
 states[Q].link = states[cur].link = C;↵
 }↵
 }↵
 last = cur;↵
 }↵
};↵
↵
string S;↵
SuffixAuto sa;↵
ll dp[200005];↵
↵
ll dfs(int u) {↵
 if (dp[u] > 0)↵
 return dp[u];↵
 for (int i = 0; i < 26; i++)↵
 if (sa.states[u].next[i] != -1) ↵
 dp[u] += dfs(sa.states[u].next[i]);↵
 return dp[u] += 1;↵
}↵
↵
↵
int main() {↵
 ios_base::sync_with_stdio(0); cin.tie(0);↵
↵
 cin >> S;↵
 sa.init(S);↵
 cout << dfs(0) - 1 << '\n';↵
}↵
~~~~~↵
↵
↵
</spoiler>↵
↵
↵
**13. Repeating Substring**↵
↵
<spoiler summary="Solution">↵
First, build a suffix array and a corresponding LCP array over the string $S$. A longest repeating substring must be between two adjacent suffixes in the suffix array, so the length of the answer is simply the maximum number in the LCP array. ↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <bits/stdc++.h>↵
using namespace std;↵
↵
vector<int> buildSuffixArray(const string& S) {↵
 int n = S.size();↵
 const int alphabet = 256;↵
 vector<int> p(n), c(n), cnt(max(alphabet, n), 0);↵
 for (int i = 0; i < n; i++)↵
 cnt[S[i]]++;↵
 for (int i = 1; i < alphabet; i++)↵
 cnt[i] += cnt[i-1];↵
 for (int i = 0; i < n; i++)↵
 p[--cnt[S[i]]] = i;↵
 c[p[0]] = 0;↵
 int classes = 1;↵
 for (int i = 1; i < n; i++) {↵
 if (S[p[i]] != S[p[i-1]])↵
 classes++;↵
 c[p[i]] = classes - 1;↵
 }↵
 vector<int> pn(n), cn(n);↵
 for (int h = 0; (1 << h) < n; ++h) {↵
 for (int i = 0; i < n; i++) {↵
 pn[i] = p[i] - (1 << h);↵
 if (pn[i] < 0)↵
 pn[i] += n;↵
 }↵
 fill(cnt.begin(), cnt.begin() + classes, 0);↵
 for (int i = 0; i < n; i++)↵
 cnt[c[pn[i]]]++;↵
 for (int i = 1; i < classes; i++)↵
 cnt[i] += cnt[i-1];↵
 for (int i = n-1; i >= 0; i--)↵
 p[--cnt[c[pn[i]]]] = pn[i];↵
 cn[p[0]] = 0;↵
 classes = 1;↵
 for (int i = 1; i < n; i++) {↵
 pair<int, int> cur = {c[p[i]], c[(p[i] + (1 << h)) % n]};↵
 pair<int, int> prev = {c[p[i-1]], c[(p[i-1] + (1 << h)) % n]};↵
 if (cur != prev)↵
 ++classes;↵
 cn[p[i]] = classes - 1;↵
 }↵
 c.swap(cn);↵
 }↵
 return p;↵
}↵
↵
vector<int> buildLCP(const string& S, const vector<int>& P) {↵
 int N = S.size();↵
 vector<int> R (N, 0);↵
 for (int i = 0; i < N; i++)↵
 R[P[i]] = i;↵
 ↵
 int k = 0;↵
 vector<int> lcp(N - 1, 0);↵
 for (int i = 0; i < N; i++) {↵
 if (R[i] == N - 1) {↵
 k = 0;↵
 continue;↵
 }↵
 int j = P[R[i] + 1];↵
 while (i + k < N && j + k < N && S[i + k] == S[j + k])↵
 k++;↵
 lcp[R[i]] = k;↵
 if (k) k--;↵
 }↵
 return lcp;↵
}↵
↵
string S;↵
↵
int main() {↵
 ios_base::sync_with_stdio(0); cin.tie(0);↵
↵
 cin >> S;↵
 vector<int> suf = buildSuffixArray(S), lcp = buildLCP(S, suf);↵
 int ans = 0;↵
 for (int i = 0; i < S.size() - 1; i++)↵
 ans = max(ans, lcp[i]);↵
 if (ans == 0) {↵
 cout << -1 << '\n';↵
 return 0;↵
 }↵
 for (int i = 0; i < S.size() - 1; i++)↵
 if (ans == lcp[i]) {↵
 cout << S.substr(suf[i], lcp[i]) << '\n';↵
 return 0;↵
 }↵
}↵
~~~~~↵
↵
↵
</spoiler>↵
↵
**14. String Functions**↵
↵
<spoiler summary="Solution">↵
This problem is very straightforward. Tutorials for the prefix function and z-function can be found [here](https://cp-algorithms.com/string/prefix-function.html) and [here](https://cp-algorithms.com/string/z-function.html)↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <bits/stdc++.h>↵
using namespace std;↵
↵
namespace str {↵
 vector<int> pi(const string &s) {↵
 int n = (int)s.length();↵
 vector<int> _pi(n);↵
 for (int i = 1, j; i < n; i++) {↵
 for (j = _pi[i - 1]; j > 0 && s[j] != s[i]; j = _pi[j - 1]);↵
 if (s[i] == s[j])↵
 j++;↵
 _pi[i] = j;↵
 }↵
 return _pi;↵
 }↵
↵
 vector<int> z(const string &s) {↵
 int n = (int)s.size();↵
 vector<int> _z(n);↵
 for (int i = 1, l = 0, r = 0; i < n; i++) {↵
 if (i <= r)↵
 _z[i] = min(_z[i - l], r - i + 1);↵
 while (i + _z[i] < n && s[_z[i]] == s[i + _z[i]])↵
 _z[i]++;↵
 if (i + _z[i] - 1 > r)↵
 l = i, r = i + _z[i] - 1;↵
 }↵
 return _z;↵
 }↵
}↵
↵
string S;↵
↵
int main() { ↵
 cin >> S;↵
 vector<int> z = str::z(S);↵
 for (int x : z)↵
 cout << x << ' ';↵
 cout << '\n';↵
 vector<int> pi = str::pi(S);↵
 for (int x : pi)↵
 cout << x << ' ';↵
 cout << '\n';↵
}↵
~~~~~↵
↵
↵
</spoiler>↵
↵
**15. Substring Order I**↵
↵
<spoiler summary="Solution">↵
Using suffix automata, the solution becomes quite straightforward. An in-depth look can be found [here](https://cp-algorithms.com/string/suffix-automaton.html#toc-tgt-18). ↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <bits/stdc++.h>↵
using namespace std;↵
↵
struct SuffixAuto {↵
 struct State {↵
 int len, link;↵
 int next[26];↵
 State(int _len = 0, int _link = -1) : len(_len), link(_link) {↵
 memset(next, -1, sizeof(next));↵
 }↵
 };↵
↵
 vector<State> states;↵
 int last;↵
↵
 SuffixAuto() {}↵
↵
 SuffixAuto(const string &S) {↵
 init(S);↵
 }↵
↵
 inline int state(int len = 0, int link = -1) {↵
 states.emplace_back(len, link);↵
 return states.size() - 1;↵
 }↵
↵
 void init(const string &S) {↵
 states.reserve(2 * S.size());↵
 last = state();↵
 for (char c : S)↵
 extend(c);↵
 }↵
↵
 void extend(char _c) {↵
 int c = _c - 'a'; // change for different alphabet↵
 int cur = state(states[last].len + 1), P = last; ↵
 while (P != -1 && states[P].next[c] == -1) {↵
 states[P].next[c] = cur;↵
 P = states[P].link;↵
 }↵
 if (P == -1)↵
 states[cur].link = 0;↵
 else {↵
 int Q = states[P].next[c];↵
 if (states[P].len + 1 == states[Q].len)↵
 states[cur].link = Q;↵
 else {↵
 int C = state(states[P].len + 1, states[Q].link);↵
 copy(states[Q].next, states[Q].next + 26, states[C].next);↵
 while (P != -1 && states[P].next[c] == Q) {↵
 states[P].next[c] = C;↵
 P = states[P].link;↵
 }↵
 states[Q].link = states[cur].link = C;↵
 }↵
 }↵
 last = cur;↵
 }↵
};↵
↵
string S;↵
SuffixAuto sa;↵
ll N, dp[200005];↵
↵
ll dfs_dp(int u = 0) {↵
 if (dp[u] > 0)↵
 return dp[u];↵
 for (int i = 0; i < 26; i++)↵
 if (sa.states[u].next[i] != -1) ↵
 dp[u] += dfs_dp(sa.states[u].next[i]);↵
 return dp[u] += 1;↵
}↵
↵
string p = "";↵
↵
void dfs(int u = 0) {↵
 if (N == 0) ↵
 return;↵
 else ↵
 N--;↵
 for (int i = 0; i < 26; i++) {↵
 int v = sa.states[u].next[i];↵
 if (v != -1) {↵
 if (N >= dp[v]) {↵
 N -= dp[v];↵
 }↵
 else {↵
 p += (char)(i + 'a');↵
 return dfs(v);↵
 }↵
 }↵
 }↵
}↵
↵
int main() {↵
 ios_base::sync_with_stdio(0); cin.tie(0);↵
↵
 cin >> S >> N;↵
 sa.init(S);↵
 dfs_dp();↵
 dfs();↵
 cout <↵
↵
**16. Substring Order II**↵
↵
<spoiler summary="Solution">↵
Build a suffix automaton on $S$. First, let $cnt$ denote the number of times a state appears in the string. Initially, we let each newly created node have $cnt = 1$ and each cloned node have $cnt = 0$. After the suffix automaton has been created, we process the nodes in order of length from largest to smallest: $$cnt[\texttt{link}(u)] += cnt[u]$$↵
We will create another array $dp$ such that $dp[u]$ is equal to the number of (not necessarily distinct) substrings that start at the current node. Note that $$dp[u] = cnt[u] + \sum_{v \in \texttt{transition}(u)} dp[v]$$. We then use an approach similar to Substring Order I where we either go down a transition, or subtract $dp[v]$ from $K$. ↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <bits/stdc++.h>↵
using namespace std;↵
↵
string S;↵
ll N, cnt[200005], dp[200005];↵
↵
struct SuffixAuto {↵
 struct State {↵
 int len, link;↵
 int next[26];↵
 State(int _len = 0, int _link = -1) : len(_len), link(_link) {↵
 memset(next, -1, sizeof(next));↵
 }↵
 };↵
↵
 vector<State> states;↵
 int last;↵
↵
 SuffixAuto() {}↵
↵
 SuffixAuto(const string &S) {↵
 init(S);↵
 }↵
↵
 inline int state(int len = 0, int link = -1) {↵
 states.emplace_back(len, link);↵
 return states.size() - 1;↵
 }↵
↵
 void init(const string &S) {↵
 states.reserve(2 * S.size());↵
 last = state();↵
 for (char c : S)↵
 extend(c);↵
 }↵
↵
 void extend(char _c) {↵
 int c = _c - 'a'; // change for different alphabet↵
 int cur = state(states[last].len + 1), P = last; ↵
 cnt[cur] = 1;↵
 while (P != -1 && states[P].next[c] == -1) {↵
 states[P].next[c] = cur;↵
 P = states[P].link;↵
 }↵
 if (P == -1)↵
 states[cur].link = 0;↵
 else {↵
 int Q = states[P].next[c];↵
 if (states[P].len + 1 == states[Q].len)↵
 states[cur].link = Q;↵
 else {↵
 int C = state(states[P].len + 1, states[Q].link);↵
 cnt[C] = 0;↵
 copy(states[Q].next, states[Q].next + 26, states[C].next);↵
 while (P != -1 && states[P].next[c] == Q) {↵
 states[P].next[c] = C;↵
 P = states[P].link;↵
 }↵
 states[Q].link = states[cur].link = C;↵
 }↵
 }↵
 last = cur;↵
 }↵
↵
 void debug(int u = 0, string s = "") {↵
 cout << "state " <";↵
 for (int i = 0; i < 26; i++) ↵
 if (states[u].next[i] != -1)↵
 cout << ' ' << states[u].next[i];↵
 cout << '\n';↵
 for (int i = 0; i < 26; i++) ↵
 if (states[u].next[i] != -1)↵
 debug(states[u].next[i], s + (char)(i + 'a'));↵
 }↵
} sa;↵
↵
ll dfs_dp(int u = 0) {↵
 if (dp[u] != 0)↵
 return dp[u];↵
 for (int i = 0; i < 26; i++) {↵
 int v = sa.states[u].next[i];↵
 if (v != -1) ↵
 dp[u] += dfs_dp(v);↵
 }↵
 return dp[u] += cnt[u];↵
}↵
↵
string p = "";↵
↵
void dfs(int u = 0) {↵
 if (N < cnt[u])↵
 return;↵
 else ↵
 N -= cnt[u];↵
 for (int i = 0; i < 26; i++) {↵
 int v = sa.states[u].next[i];↵
 if (v != -1) {↵
 if (N >= dp[v])↵
 N -= dp[v];↵
 else {↵
 p += (char)(i + 'a');↵
 return dfs(v);↵
 }↵
 }↵
 }↵
}↵
↵
int main() {↵
 cin >> S >> N;↵
 sa.init(S);↵
↵
 vector<pii> v(sa.states.size());↵
 for (int i = 0; i < sa.states.size(); i++)↵
 v[i] = {sa.states[i].len, i};↵
 sort(v.begin(), v.end(), greater<pii>());↵
 for (auto [len, id] : v)↵
 if (sa.states[id].link != -1)↵
 cnt[sa.states[id].link] += cnt[id];↵
 cnt[0] = 1;↵
↵
 dfs_dp();↵
↵
 dfs();↵
↵
 cout <↵
↵
↵
**17. Substring Distribution**↵
↵
<spoiler summary="Solution">↵
Let us build a suffix automata over the string $S$. The length of a path from the initial state $s_0$ corresponds to the length of a string. By definition, all substrings of the initial string are uniquely encoded. It suffices to count the number of different ways to get to any given node for all nodes in the suffix automaton. ↵
↵
We can take advantage of the fact that each node corresponds to exactly one set of substrings, and that each substring in a node is a suffix of another (except for the largest substring). Thus, for each node, there exists some $l, r$ such that it is always possible to reach it in $d \in [l, r]$ steps and impossible if $d \notin [l, r]$. Because $r$ has already been conveniently computed when initializing the suffix automaton, we simply have to compute $l$ for each node, which is just the shortest distance, It suffices to perform a BFS on the suffix automaton↵
</spoiler>↵
↵
↵
<spoiler summary="Code">↵
↵
~~~~~↵
#include <bits/stdc++.h>↵
using namespace std;↵
↵
struct SuffixAuto {↵
 struct State {↵
 int len, link;↵
 int next[26];↵
 State(int _len = 0, int _link = -1) : len(_len), link(_link) {↵
 memset(next, -1, sizeof(next));↵
 }↵
 };↵
↵
 vector<State> states;↵
 int last;↵
↵
 SuffixAuto() {}↵
↵
 SuffixAuto(const string &S) {↵
 init(S);↵
 }↵
↵
 inline int state(int len = 0, int link = -1) {↵
 states.emplace_back(len, link);↵
 return states.size() - 1;↵
 }↵
↵
 void init(const string &S) {↵
 states.reserve(2 * S.size());↵
 last = state();↵
 for (char c : S)↵
 extend(c);↵
 }↵
↵
 void extend(char _c) {↵
 int c = _c - 'a'; // change for different alphabet↵
 int cur = state(states[last].len + 1), P = last; ↵
 while (P != -1 && states[P].next[c] == -1) {↵
 states[P].next[c] = cur;↵
 P = states[P].link;↵
 }↵
 if (P == -1)↵
 states[cur].link = 0;↵
 else {↵
 int Q = states[P].next[c];↵
 if (states[P].len + 1 == states[Q].len)↵
 states[cur].link = Q;↵
 else {↵
 int C = state(states[P].len + 1, states[Q].link);↵
 copy(states[Q].next, states[Q].next + 26, states[C].next);↵
 while (P != -1 && states[P].next[c] == Q) {↵
 states[P].next[c] = C;↵
 P = states[P].link;↵
 }↵
 states[Q].link = states[cur].link = C;↵
 }↵
 }↵
 last = cur;↵
 }↵
↵
 void debug(int u = 0, string s = "") {↵
 cout << "state " <";↵
 for (int i = 0; i < 26; i++) ↵
 if (states[u].next[i] != -1)↵
 cout << ' ' << states[u].next[i];↵
 cout << '\n';↵
 for (int i = 0; i < 26; i++) ↵
 if (states[u].next[i] != -1)↵
 debug(states[u].next[i], s + (char)(i + 'a'));↵
 }↵
};↵
↵
string S;↵
SuffixAuto sa;↵
int lb[200005], ans[100005];↵
↵
int main() {↵
 cin >> S;↵
 sa.init(S);↵
 ↵
 queue<int> q;↵
 memset(lb, -1, sizeof(lb));↵
↵
 q.push(0);↵
 lb[0] = 0;↵
 while (!q.empty()) {↵
 int u = q.front(); q.pop();↵
 int d = lb[u];↵
 for (int i = 0; i < 26; i++) {↵
 int v = sa.states[u].next[i];↵
 if (v != -1 && lb[v] == -1) {↵
 lb[v] = d + 1;↵
 q.push(v);↵
 }↵
 }↵
 }↵
↵
 for (int i = 1; i < sa.states.size(); i++) ↵
 ans[lb[i]]++, ans[sa.states[i].len + 1]--;↵
 ↵
 ans[0] = 0;↵
 for (int i = 1; i <= S.size(); i++) {↵
 ans[i] += ans[i - 1];↵
 cout << ans[i] << ' ';↵
 }↵
 cout << '\n';↵
}↵
~~~~~↵
↵
↵
</spoiler>↵
↵
Solution and Code for Required Substring coming soon!↵

Rev.	By	When	Δ	Comment
en5	dutin	2021-10-18 18:36:27	62	Tiny change: 'spoiler>\n\nSolution and Code for Required Substring coming soon!\n' -> 'spoiler>\n'
en4	dutin	2021-09-18 17:14:31	3773
en3	dutin	2021-09-17 06:27:42	52
en2	dutin	2021-09-16 21:46:09	23505	(published)
en1	dutin	2021-09-16 20:45:50	12344	Initial revision (saved to drafts)

History