字符串哈希

多项式字符串哈希:可以得到所有字串的哈希值

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
// 多项式字符串哈希(方便计算子串哈希值)
// 哈希函数 hash(s) = s[0] * base^(n-1) + s[1] * base^(n-2) + ... + s[n-2] * base + s[n-1]
char[] t = target.toCharArray();
final int MOD = 1_070_777_777;
final int BASE = (int) 8e8 + new Random().nextInt((int) 1e8); // 随机 base,防止 hack
int[] powBase = new int[n + 1]; // powBase[i] = base^i
int[] preHash = new int[n + 1]; // 前缀哈希值 preHash[i] = hash(target[0] 到 target[i-1])
powBase[0] = 1;
for (int i = 0; i < n; i++) { // 计算target字符串所有前缀的哈希值和相关项
powBase[i + 1] = (int) ((long) powBase[i] * BASE % MOD);
preHash[i + 1] = (int) (((long) preHash[i] * BASE + t[i]) % MOD); // 秦九韶算法计算多项式哈希
}

// 计算子串 target[i-len:i-1] 的哈希值(计算方法类似前缀和)
int subHash = (int) (((preHash[i] - (long) preHash[i - len] * powBase[len]) % MOD + MOD) % MOD);
// 同理:target[i:i+len-1]
long subHash = (((long) preHash[i + len] - (long) preHash[i] * powBase[len]) % MOD + MOD) % MOD;

双模哈希

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
// 构建
char[] t = target.toCharArray();
int n = t.length;
final int MOD1 = 1_070_777_777;
final int MOD2 = 1_000_000_007;
final int BASE1 = (int) 8e8 + new Random().nextInt((int) 1e8);
final int BASE2 = (int) 8e8 + new Random().nextInt((int) 1e8);
int[] powBase1 = new int[n + 1];
int[] powBase2 = new int[n + 1];
int[] preHash1 = new int[n + 1];
int[] preHash2 = new int[n + 1];
powBase1[0] = powBase2[0] = 1;
for (int i = 0; i < n; i++) {
powBase1[i + 1] = (int) ((long) powBase1[i] * BASE1 % MOD1);
powBase2[i + 1] = (int) ((long) powBase2[i] * BASE2 % MOD2);
preHash1[i + 1] = (int) (((long) preHash1[i] * BASE1 + t[i]) % MOD1);
preHash2[i + 1] = (int) (((long) preHash2[i] * BASE2 + t[i]) % MOD2);
}
// 计算子串 target[i-len:i-1] 的哈希值(计算方法类似前缀和)
long subHash1 = ((preHash1[i] - (long) preHash1[i - len] * powBase1[len]) % MOD1 + MOD1) % MOD1;
long subHash2 = ((preHash2[i] - (long) preHash2[i - len] * powBase2[len]) % MOD2 + MOD2) % MOD2;
long subHash = subHash1 << 32 | subHash2;