卡后 LLL 时代哈希
cancan123456 · · 算法·理论
后 LLL 时代哈希
使用随机权值确实能规避 LLL 算法卡哈希。
但是考虑到我们有 Tree Attack 来解决单模数随机权值情况下的哈希。
所以考虑先求出几组碰撞(第一个模数意义下)然后把这几组碰撞当字符集再求一次碰撞(第二个模数意义下)。
然后就可以解决多模数随机权值下的哈希,运行 30 秒即可找到一组碰撞。
#include <iostream>
#include <algorithm>
using namespace std;
typedef long long ll;
const int mod1 = 998244353, mod2 = 1000000007, mod3 = 100000007, B = 114514;
const int LENGTH1 = 64, CNT1 = 64;
const int LENGTH2 = 64, CNT2 = 64;
const int LENGTH3 = 64;
mt19937 rnd(mod1);
int g[26];
ll pow(ll a, ll b, ll mod) {
ll ans = 1;
while (b != 0) {
if (b % 2 == 1) {
ans = ans * a % mod;
}
a = a * a % mod;
b = b / 2;
}
return ans;
}
int H(string s, const int mod) {
ll h = 0;
for (char c : s) {
h = (h * B + g[c - 'a']) % mod;
}
return h % mod;
}
ll normalize(ll x, ll mod) {
return (x % mod + mod) % mod;
}
struct Answer {
ll sum;
vector < int > id;
};
bool operator < (const Answer & a, const Answer & b) {
return a.sum < b.sum;
}
vector < int > TreeAttack(vector < int > val) {
vector < int > coe;
int n = val.size();
for (int i = 0; i < n; i++) {
coe.push_back(1);
}
vector < Answer > vec;
for (int i = 0; i < n; i++) {
Answer now;
now.sum = val[i];
now.id.push_back(i);
vec.push_back(now);
}
while ((int)vec.size() != 1) {
if (vec.size() % 2 == 1) {
for (int i : vec.back().id) {
coe[i] = 0;
}
}
int len = vec.size();
sort(vec.begin(), vec.end());
for (int i = 0; i < len; i += 2) {
if (vec[i].sum < vec[i + 1].sum) {
swap(vec[i], vec[i + 1]);
}
for (int j : vec[i + 1].id) {
coe[j] = -coe[j];
}
vec[i / 2].sum = vec[i].sum - vec[i + 1].sum;
vec[i / 2].id = vec[i].id;
vec[i / 2].id.insert(vec[i / 2].id.end(), vec[i + 1].id.begin(), vec[i + 1].id.end());
}
vec.erase(vec.begin() + len / 2, vec.end());
for (int i = 0; i < len / 2; i++) {
if (vec[i].sum == 0) {
vector < int > res;
for (int j = 0; j < n; j++) {
res.push_back(0);
}
for (int j : vec[i].id) {
res[j] = coe[j];
}
return res;
}
}
}
return vector < int > ();
}
pair < string, string > find_collision1() {
while (true) {
vector < pair < char, char > > vec;
vector < int > val;
for (int i = 0; i < LENGTH1; i++) {
char ch1 = rnd() % 26 + 'a', ch2 = rnd() % 25 + 'a';
if (ch2 >= ch1) {
ch2++;
}
vec.push_back(make_pair(ch1, ch2));
int diff = normalize(g[ch1 - 'a'] - g[ch2 - 'a'], mod1);
ll bit_val = pow(B, LENGTH1 - 1 - i, mod1);
val.push_back(diff * bit_val % mod1);
}
vector < int > r = TreeAttack(val);
if (r.empty()) {
continue;
}
string s1, s2;
for (int i = 0; i < LENGTH1; i++) {
char ch1 = vec[i].first, ch2 = vec[i].second;
if (r[i] == 0) {
s1.push_back(ch1);
s2.push_back(ch1);
} else if (r[i] == 1) {
s1.push_back(ch1);
s2.push_back(ch2);
} else {
s1.push_back(ch2);
s2.push_back(ch1);
}
}
return make_pair(s1, s2);
}
}
pair < string, string > layer1[CNT1];
ll layer1_hash_diff[CNT1];
pair < string, string > find_collision2() {
while (true) {
vector < int > vec, val;
for (int i = 0; i < LENGTH2; i++) {
int id = rnd() % CNT1;
vec.push_back(id);
ll diff = layer1_hash_diff[id];
ll bit_val = pow(B, (LENGTH2 - 1 - i) * LENGTH1, mod2);
val.push_back(diff * bit_val % mod2);
}
vector < int > r = TreeAttack(val);
if (r.empty()) {
continue;
}
string s1, s2;
for (int i = 0; i < LENGTH2; i++) {
string ch1 = layer1[vec[i]].first, ch2 = layer1[vec[i]].second;
if (r[i] == 0) {
s1 += ch1;
s2 += ch1;
} else if (r[i] == 1) {
s1 += ch1;
s2 += ch2;
} else {
s1 += ch2;
s2 += ch1;
}
}
return make_pair(s1, s2);
}
}
pair < string, string > layer2[CNT2];
ll layer2_hash_diff[CNT2];
pair < string, string > find_collision3() {
while (true) {
vector < int > vec, val;
for (int i = 0; i < LENGTH3; i++) {
int id = rnd() % CNT2;
vec.push_back(id);
ll diff = layer2_hash_diff[id];
ll bit_val = pow(B, (LENGTH3 - 1 - i) * LENGTH2 * LENGTH1, mod3);
val.push_back(diff * bit_val % mod3);
}
vector < int > r = TreeAttack(val);
if (r.empty()) {
continue;
}
string s1, s2;
for (int i = 0; i < LENGTH3; i++) {
string ch1 = layer2[vec[i]].first, ch2 = layer2[vec[i]].second;
if (r[i] == 0) {
s1 += ch1;
s2 += ch1;
} else if (r[i] == 1) {
s1 += ch1;
s2 += ch2;
} else {
s1 += ch2;
s2 += ch1;
}
}
return make_pair(s1, s2);
}
}
bool islow(string s) {
for (char c : s) {
if (!islower(c)) {
return false;
}
}
return true;
}
int main() {
for (int i = 0; i < 26; i++) {
g[i] = rnd() & 2147483647;
printf("g[%d] = %d\n", i, g[i]);
}
for (int i = 0; i < CNT1; i++) {
layer1[i] = find_collision1();
layer1_hash_diff[i] = normalize(H(layer1[i].first, mod2) - H(layer1[i].second, mod2), mod2);
printf("Layer1 i = %d finished.\n", i);
}
for (int i = 0; i < CNT2; i++) {
layer2[i] = find_collision2();
layer2_hash_diff[i] = normalize(H(layer2[i].first, mod3) - H(layer2[i].second, mod3), mod3);
printf("Layer2 i = %d finished.\n", i);
}
pair < string, string > r = find_collision3();
string s = r.first, t = r.second;
FILE * fhack = fopen("hack.txt", "w");
fprintf(fhack, "%s\n", s.c_str());
fprintf(fhack, "%s\n", t.c_str());
int n = s.size();
printf("n = %d\n", n);
if (!islow(s) || !islow(t)) {
printf("you should only contain lowercase letters\n");
} else if (n != (int)t.size()) {
printf("len not equal\n");
} else {
printf("mod %d, s : %d t : %d\n", mod1, H(s, mod1), H(t, mod1));
if (H(s, mod1) != H(t, mod1)) {
printf("different\n");
} else {
printf("mod %d, s : %d t : %d\n", mod2, H(s, mod2), H(t, mod2));
if (H(s, mod2) != H(t, mod2)) {
printf("different\n");
} else {
printf("mod %d, s : %d t : %d\n", mod3, H(s, mod3), H(t, mod3));
if (H(s, mod3) != H(t, mod3)) {
printf("different\n");
} else {
if (s == t) {
printf("s is equal to t\n");
} else {
printf("You Hack it!\n");
}
}
}
}
}
return 0;
}
这是之前脑子不清醒写的代码。
#include <iostream>
#include <algorithm>
using namespace std;
typedef long long ll;
const int mod1 = 998244353, mod2 = 1000000007, mod3 = 100000007, B = 114514;
//const int mod1 = 1145, mod2 = 1419, mod3 = 1981, B = 114514;
mt19937 rnd(mod1);
int g[26];
ll pow(ll a, ll b, ll mod) {
ll ans = 1;
while (b != 0) {
if (b % 2 == 1) {
ans = ans * a % mod;
}
a = a * a % mod;
b = b / 2;
}
return ans;
}
int H(string s, const int mod) {
ll h = 0;
for (char c : s) {
h = (h * B + g[c - 'a']) % mod;
}
return h % mod;
}
ll normalize(ll x, ll mod) {
return (x % mod + mod) % mod;
}
struct Answer {
ll sum;
vector < int > id;
};
bool operator < (const Answer & a, const Answer & b) {
return a.sum < b.sum;
}
vector < int > TreeAttack(vector < int > val) {
vector < int > coe;
int n = val.size();
for (int i = 0; i < n; i++) {
coe.push_back(1);
}
vector < Answer > vec;
for (int i = 0; i < n; i++) {
Answer now;
now.sum = val[i];
now.id.push_back(i);
vec.push_back(now);
}
while ((int)vec.size() != 1) {
if (vec.size() % 2 == 1) {
for (int i : vec.back().id) {
coe[i] = 0;
}
}
int len = vec.size();
sort(vec.begin(), vec.end());
for (int i = 0; i < len; i += 2) {
if (vec[i].sum < vec[i + 1].sum) {
swap(vec[i], vec[i + 1]);
}
for (int j : vec[i + 1].id) {
coe[j] = -coe[j];
}
vec[i / 2].sum = vec[i].sum - vec[i + 1].sum;
vec[i / 2].id = vec[i].id;
vec[i / 2].id.insert(vec[i / 2].id.end(), vec[i + 1].id.begin(), vec[i + 1].id.end());
}
vec.erase(vec.begin() + len / 2, vec.end());
for (int i = 0; i < len / 2; i++) {
if (vec[i].sum == 0) {
vector < int > res;
for (int j = 0; j < n; j++) {
res.push_back(0);
}
for (int j : vec[i].id) {
res[j] = coe[j];
}
return res;
}
}
}
return vector < int > ();
}
const int LENGTH1 = 128;
int FAILED_cnt1;
pair < string, string > find_collision1() {
while (true) {
vector < pair < char, char > > vec;
vector < int > val;
for (int i = 0; i < LENGTH1; i++) {
char ch1 = rnd() % 26 + 'a', ch2 = rnd() % 25 + 'a';
if (ch2 >= ch1) {
ch2++;
}
vec.push_back(make_pair(ch1, ch2));
int diff = normalize(g[ch1 - 'a'] - g[ch2 - 'a'], mod1);
ll bit_val = pow(B, LENGTH1 - 1 - i, mod1);
val.push_back(diff * bit_val % mod1);
}
vector < int > r = TreeAttack(val);
if (r.empty()) {
FAILED_cnt1++;
continue;
}
string s1, s2;
for (int i = 0; i < LENGTH1; i++) {
char ch1 = vec[i].first, ch2 = vec[i].second;
if (r[i] == 0) {
s1.push_back(ch1);
s2.push_back(ch1);
} else if (r[i] == 1) {
s1.push_back(ch1);
s2.push_back(ch2);
} else {
s1.push_back(ch2);
s2.push_back(ch1);
}
}
return make_pair(s1, s2);
}
}
const int LENGTH2 = 256;
int FAILED_cnt2;
pair < string, string > find_collision2() {
while (true) {
vector < pair < string, string > > vec;
vector < int > val;
for (int i = 0; i < LENGTH2; i++) {
pair < string, string > p = find_collision1();
string & ch1 = p.first;
string & ch2 = p.second;
vec.push_back(make_pair(ch1, ch2));
int diff = normalize(H(ch1, mod2) - H(ch2, mod2), mod2);
ll bit_val = pow(B, (LENGTH2 - 1 - i) * LENGTH1, mod2);
val.push_back(diff * bit_val % mod2);
}
vector < int > r = TreeAttack(val);
if (r.empty()) {
printf("Stage 2 FAILED\n");
FAILED_cnt2++;
continue;
}
string s1, s2;
for (int i = 0; i < LENGTH2; i++) {
string ch1 = vec[i].first, ch2 = vec[i].second;
if (r[i] == 0) {
s1 += ch1;
s2 += ch1;
} else if (r[i] == 1) {
s1 += ch1;
s2 += ch2;
} else {
s1 += ch2;
s2 += ch1;
}
}
return make_pair(s1, s2);
}
}
const int LENGTH3 = 512;
int FAILED_cnt3;
pair < string, string > find_collision3() {
while (true) {
vector < pair < string, string > > vec;
vector < int > val;
for (int i = 0; i < LENGTH3; i++) {
pair < string, string > p = find_collision2();
string & ch1 = p.first;
string & ch2 = p.second;
vec.push_back(make_pair(ch1, ch2));
int diff = normalize(H(ch1, mod3) - H(ch2, mod3), mod3);
ll bit_val = pow(B, (LENGTH3 - 1 - i) * LENGTH1 * LENGTH2, mod3);
val.push_back(diff * bit_val % mod3);
}
vector < int > r = TreeAttack(val);
if (r.empty()) {
printf("Stage 3 FAILED\n");
FAILED_cnt3++;
continue;
}
string s1, s2;
for (int i = 0; i < LENGTH3; i++) {
string ch1 = vec[i].first, ch2 = vec[i].second;
if (r[i] == 0) {
s1 += ch1;
s2 += ch1;
} else if (r[i] == 1) {
s1 += ch1;
s2 += ch2;
} else {
s1 += ch2;
s2 += ch1;
}
}
return make_pair(s1, s2);
}
}
bool islow(string s) {
for (char c : s) {
if (!islower(c)) {
return false;
}
}
return true;
}
int main() {
for (int i = 0; i < 26; i++) {
g[i] = rnd() & 2147483647;
printf("g[%d] = %d\n", i, g[i]);
}
pair < string, string > r = find_collision3();
string s = r.first, t = r.second;
printf("LENGTH = %d\n", LENGTH1 * LENGTH2 * LENGTH3);
FILE * fhack = fopen("hack.txt", "w");
fprintf(fhack, "%s\n", s.c_str());
fprintf(fhack, "%s\n", t.c_str());
cout << FAILED_cnt1 << ' ' << FAILED_cnt2 << ' ' << FAILED_cnt3 << endl;
int n = s.size();
if (!islow(s) || !islow(t)) {
printf("you should only contain lowercase letters\n");
} else if (n != (int)t.size()) {
printf("len not equal\n");
} else {
printf("mod %d, s : %d t : %d\n", mod1, H(s, mod1), H(t, mod1));
if (H(s, mod1) != H(t, mod1)) {
printf("different\n");
} else {
printf("mod %d, s : %d t : %d\n", mod2, H(s, mod2), H(t, mod2));
if (H(s, mod2) != H(t, mod2)) {
printf("different\n");
} else {
printf("mod %d, s : %d t : %d\n", mod3, H(s, mod3), H(t, mod3));
if (H(s, mod3) != H(t, mod3)) {
printf("different\n");
} else {
if (s == t) {
printf("s is equal to t\n");
} else {
printf("You Hack it!\n");
}
}
}
}
}
return 0;
}