卡后 LLL 时代哈希

· · 算法·理论

后 LLL 时代哈希

使用随机权值确实能规避 LLL 算法卡哈希。

但是考虑到我们有 Tree Attack 来解决单模数随机权值情况下的哈希。

所以考虑先求出几组碰撞(第一个模数意义下)然后把这几组碰撞当字符集再求一次碰撞(第二个模数意义下)。

然后就可以解决多模数随机权值下的哈希,运行 30 秒即可找到一组碰撞。

#include <iostream>
#include <algorithm>
using namespace std;
typedef long long ll;
const int mod1 = 998244353, mod2 = 1000000007, mod3 = 100000007, B = 114514;
const int LENGTH1 = 64, CNT1 = 64;
const int LENGTH2 = 64, CNT2 = 64;
const int LENGTH3 = 64;
mt19937 rnd(mod1);
int g[26];
ll pow(ll a, ll b, ll mod) {
    ll ans = 1;
    while (b != 0) {
        if (b % 2 == 1) {
            ans = ans * a % mod;
        }
        a = a * a % mod;
        b = b / 2;
    }
    return ans;
}
int H(string s, const int mod) {
    ll h = 0;
    for (char c : s) {
        h = (h * B + g[c - 'a']) % mod;
    }
    return h % mod;
}
ll normalize(ll x, ll mod) {
    return (x % mod + mod) % mod;
}
struct Answer {
    ll sum;
    vector < int > id;
};
bool operator < (const Answer & a, const Answer & b) {
    return a.sum < b.sum;
}
vector < int > TreeAttack(vector < int > val) {
    vector < int > coe;
    int n = val.size();
    for (int i = 0; i < n; i++) {
        coe.push_back(1);
    }
    vector < Answer > vec;
    for (int i = 0; i < n; i++) {
        Answer now;
        now.sum = val[i];
        now.id.push_back(i);
        vec.push_back(now);
    }
    while ((int)vec.size() != 1) {
        if (vec.size() % 2 == 1) {
            for (int i : vec.back().id) {
                coe[i] = 0;
            }
        }
        int len = vec.size();
        sort(vec.begin(), vec.end());
        for (int i = 0; i < len; i += 2) {
            if (vec[i].sum < vec[i + 1].sum) {
                swap(vec[i], vec[i + 1]);
            }
            for (int j : vec[i + 1].id) {
                coe[j] = -coe[j];
            }
            vec[i / 2].sum = vec[i].sum - vec[i + 1].sum;
            vec[i / 2].id = vec[i].id;
            vec[i / 2].id.insert(vec[i / 2].id.end(), vec[i + 1].id.begin(), vec[i + 1].id.end());
        }
        vec.erase(vec.begin() + len / 2, vec.end());
        for (int i = 0; i < len / 2; i++) {
            if (vec[i].sum == 0) {
                vector < int > res;
                for (int j = 0; j < n; j++) {
                    res.push_back(0);
                }
                for (int j : vec[i].id) {
                    res[j] = coe[j];
                }
                return res;
            }
        }
    }
    return vector < int > ();
}
pair < string, string > find_collision1() {
    while (true) {
        vector < pair < char, char > > vec;
        vector < int > val;
        for (int i = 0; i < LENGTH1; i++) {
            char ch1 = rnd() % 26 + 'a', ch2 = rnd() % 25 + 'a';
            if (ch2 >= ch1) {
                ch2++;
            }
            vec.push_back(make_pair(ch1, ch2));
            int diff = normalize(g[ch1 - 'a'] - g[ch2 - 'a'], mod1);
            ll bit_val = pow(B, LENGTH1 - 1 - i, mod1);
            val.push_back(diff * bit_val % mod1);
        }
        vector < int > r = TreeAttack(val);
        if (r.empty()) {
            continue;
        }
        string s1, s2;
        for (int i = 0; i < LENGTH1; i++) {
            char ch1 = vec[i].first, ch2 = vec[i].second;
            if (r[i] == 0) {
                s1.push_back(ch1);
                s2.push_back(ch1);
            } else if (r[i] == 1) {
                s1.push_back(ch1);
                s2.push_back(ch2);
            } else {
                s1.push_back(ch2);
                s2.push_back(ch1);
            }
        }
        return make_pair(s1, s2);
    }
}
pair < string, string > layer1[CNT1];
ll layer1_hash_diff[CNT1];
pair < string, string > find_collision2() {
    while (true) {
        vector < int > vec, val;
        for (int i = 0; i < LENGTH2; i++) {
            int id = rnd() % CNT1;
            vec.push_back(id);
            ll diff = layer1_hash_diff[id];
            ll bit_val = pow(B, (LENGTH2 - 1 - i) * LENGTH1, mod2);
            val.push_back(diff * bit_val % mod2);
        }
        vector < int > r = TreeAttack(val);
        if (r.empty()) {
            continue;
        }
        string s1, s2;
        for (int i = 0; i < LENGTH2; i++) {
            string ch1 = layer1[vec[i]].first, ch2 = layer1[vec[i]].second;
            if (r[i] == 0) {
                s1 += ch1;
                s2 += ch1;
            } else if (r[i] == 1) {
                s1 += ch1;
                s2 += ch2;
            } else {
                s1 += ch2;
                s2 += ch1;
            }
        }
        return make_pair(s1, s2);
    }
}
pair < string, string > layer2[CNT2];
ll layer2_hash_diff[CNT2];
pair < string, string > find_collision3() {
    while (true) {
        vector < int > vec, val;
        for (int i = 0; i < LENGTH3; i++) {
            int id = rnd() % CNT2;
            vec.push_back(id);
            ll diff = layer2_hash_diff[id];
            ll bit_val = pow(B, (LENGTH3 - 1 - i) * LENGTH2 * LENGTH1, mod3);
            val.push_back(diff * bit_val % mod3);
        }
        vector < int > r = TreeAttack(val);
        if (r.empty()) {
            continue;
        }
        string s1, s2;
        for (int i = 0; i < LENGTH3; i++) {
            string ch1 = layer2[vec[i]].first, ch2 = layer2[vec[i]].second;
            if (r[i] == 0) {
                s1 += ch1;
                s2 += ch1;
            } else if (r[i] == 1) {
                s1 += ch1;
                s2 += ch2;
            } else {
                s1 += ch2;
                s2 += ch1;
            }
        }
        return make_pair(s1, s2);
    }
}
bool islow(string s) {
    for (char c : s) {
        if (!islower(c)) {
            return false;
        }
    }
    return true;
}
int main() {
    for (int i = 0; i < 26; i++) {
        g[i] = rnd() & 2147483647;
        printf("g[%d] = %d\n", i, g[i]);
    }
    for (int i = 0; i < CNT1; i++) {
        layer1[i] = find_collision1();
        layer1_hash_diff[i] = normalize(H(layer1[i].first, mod2) - H(layer1[i].second, mod2), mod2);
        printf("Layer1 i = %d finished.\n", i);
    }
    for (int i = 0; i < CNT2; i++) {
        layer2[i] = find_collision2();
        layer2_hash_diff[i] = normalize(H(layer2[i].first, mod3) - H(layer2[i].second, mod3), mod3);
        printf("Layer2 i = %d finished.\n", i);
    }
    pair < string, string > r = find_collision3();
    string s = r.first, t = r.second;
    FILE * fhack = fopen("hack.txt", "w");
    fprintf(fhack, "%s\n", s.c_str());
    fprintf(fhack, "%s\n", t.c_str());
    int n = s.size();
    printf("n = %d\n", n);
    if (!islow(s) || !islow(t)) {
        printf("you should only contain lowercase letters\n");
    } else if (n != (int)t.size()) {
        printf("len not equal\n");
    } else {
        printf("mod %d, s : %d t : %d\n", mod1, H(s, mod1), H(t, mod1));
        if (H(s, mod1) != H(t, mod1)) {
            printf("different\n");
        } else {
            printf("mod %d, s : %d t : %d\n", mod2, H(s, mod2), H(t, mod2));
            if (H(s, mod2) != H(t, mod2)) {
                printf("different\n");
            } else {
                printf("mod %d, s : %d t : %d\n", mod3, H(s, mod3), H(t, mod3));
                if (H(s, mod3) != H(t, mod3)) {
                    printf("different\n");
                } else {
                    if (s == t) {
                        printf("s is equal to t\n");
                    } else {
                        printf("You Hack it!\n");
                    }
                }
            }
        }
    }
    return 0;
}

这是之前脑子不清醒写的代码。

#include <iostream>
#include <algorithm>
using namespace std;
typedef long long ll;
const int mod1 = 998244353, mod2 = 1000000007, mod3 = 100000007, B = 114514;
//const int mod1 = 1145, mod2 = 1419, mod3 = 1981, B = 114514;
mt19937 rnd(mod1);
int g[26];
ll pow(ll a, ll b, ll mod) {
    ll ans = 1;
    while (b != 0) {
        if (b % 2 == 1) {
            ans = ans * a % mod;
        }
        a = a * a % mod;
        b = b / 2;
    }
    return ans;
}
int H(string s, const int mod) {
    ll h = 0;
    for (char c : s) {
        h = (h * B + g[c - 'a']) % mod;
    }
    return h % mod;
}
ll normalize(ll x, ll mod) {
    return (x % mod + mod) % mod;
}
struct Answer {
    ll sum;
    vector < int > id;
};
bool operator < (const Answer & a, const Answer & b) {
    return a.sum < b.sum;
}
vector < int > TreeAttack(vector < int > val) {
    vector < int > coe;
    int n = val.size();
    for (int i = 0; i < n; i++) {
        coe.push_back(1);
    }
    vector < Answer > vec;
    for (int i = 0; i < n; i++) {
        Answer now;
        now.sum = val[i];
        now.id.push_back(i);
        vec.push_back(now);
    }
    while ((int)vec.size() != 1) {
        if (vec.size() % 2 == 1) {
            for (int i : vec.back().id) {
                coe[i] = 0;
            }
        }
        int len = vec.size();
        sort(vec.begin(), vec.end());
        for (int i = 0; i < len; i += 2) {
            if (vec[i].sum < vec[i + 1].sum) {
                swap(vec[i], vec[i + 1]);
            }
            for (int j : vec[i + 1].id) {
                coe[j] = -coe[j];
            }
            vec[i / 2].sum = vec[i].sum - vec[i + 1].sum;
            vec[i / 2].id = vec[i].id;
            vec[i / 2].id.insert(vec[i / 2].id.end(), vec[i + 1].id.begin(), vec[i + 1].id.end());
        }
        vec.erase(vec.begin() + len / 2, vec.end());
        for (int i = 0; i < len / 2; i++) {
            if (vec[i].sum == 0) {
                vector < int > res;
                for (int j = 0; j < n; j++) {
                    res.push_back(0);
                }
                for (int j : vec[i].id) {
                    res[j] = coe[j];
                }
                return res;
            }
        }
    }
    return vector < int > ();
}
const int LENGTH1 = 128;
int FAILED_cnt1;
pair < string, string > find_collision1() {
    while (true) {
        vector < pair < char, char > > vec;
        vector < int > val;
        for (int i = 0; i < LENGTH1; i++) {
            char ch1 = rnd() % 26 + 'a', ch2 = rnd() % 25 + 'a';
            if (ch2 >= ch1) {
                ch2++;
            }
            vec.push_back(make_pair(ch1, ch2));
            int diff = normalize(g[ch1 - 'a'] - g[ch2 - 'a'], mod1);
            ll bit_val = pow(B, LENGTH1 - 1 - i, mod1);
            val.push_back(diff * bit_val % mod1);
        }
        vector < int > r = TreeAttack(val);
        if (r.empty()) {
            FAILED_cnt1++;
            continue;
        }
        string s1, s2;
        for (int i = 0; i < LENGTH1; i++) {
            char ch1 = vec[i].first, ch2 = vec[i].second;
            if (r[i] == 0) {
                s1.push_back(ch1);
                s2.push_back(ch1);
            } else if (r[i] == 1) {
                s1.push_back(ch1);
                s2.push_back(ch2);
            } else {
                s1.push_back(ch2);
                s2.push_back(ch1);
            }
        }
        return make_pair(s1, s2);
    }
}
const int LENGTH2 = 256;
int FAILED_cnt2;
pair < string, string > find_collision2() {
    while (true) {
        vector < pair < string, string > > vec;
        vector < int > val;
        for (int i = 0; i < LENGTH2; i++) {
            pair < string, string > p = find_collision1();
            string & ch1 = p.first;
            string & ch2 = p.second;
            vec.push_back(make_pair(ch1, ch2));
            int diff = normalize(H(ch1, mod2) - H(ch2, mod2), mod2);
            ll bit_val = pow(B, (LENGTH2 - 1 - i) * LENGTH1, mod2);
            val.push_back(diff * bit_val % mod2);
        }
        vector < int > r = TreeAttack(val);
        if (r.empty()) {
            printf("Stage 2 FAILED\n");
            FAILED_cnt2++;
            continue;
        }
        string s1, s2;
        for (int i = 0; i < LENGTH2; i++) {
            string ch1 = vec[i].first, ch2 = vec[i].second;
            if (r[i] == 0) {
                s1 += ch1;
                s2 += ch1;
            } else if (r[i] == 1) {
                s1 += ch1;
                s2 += ch2;
            } else {
                s1 += ch2;
                s2 += ch1;
            }
        }
        return make_pair(s1, s2);
    }
}
const int LENGTH3 = 512;
int FAILED_cnt3;
pair < string, string > find_collision3() {
    while (true) {
        vector < pair < string, string > > vec;
        vector < int > val;
        for (int i = 0; i < LENGTH3; i++) {
            pair < string, string > p = find_collision2();
            string & ch1 = p.first;
            string & ch2 = p.second;
            vec.push_back(make_pair(ch1, ch2));
            int diff = normalize(H(ch1, mod3) - H(ch2, mod3), mod3);
            ll bit_val = pow(B, (LENGTH3 - 1 - i) * LENGTH1 * LENGTH2, mod3);
            val.push_back(diff * bit_val % mod3);
        }
        vector < int > r = TreeAttack(val);
        if (r.empty()) {
            printf("Stage 3 FAILED\n");
            FAILED_cnt3++;
            continue;
        }
        string s1, s2;
        for (int i = 0; i < LENGTH3; i++) {
            string ch1 = vec[i].first, ch2 = vec[i].second;
            if (r[i] == 0) {
                s1 += ch1;
                s2 += ch1;
            } else if (r[i] == 1) {
                s1 += ch1;
                s2 += ch2;
            } else {
                s1 += ch2;
                s2 += ch1;
            }
        }
        return make_pair(s1, s2);
    }
}
bool islow(string s) {
    for (char c : s) {
        if (!islower(c)) {
            return false;
        }
    }
    return true;
}
int main() {
    for (int i = 0; i < 26; i++) {
        g[i] = rnd() & 2147483647;
        printf("g[%d] = %d\n", i, g[i]);
    }
    pair < string, string > r = find_collision3();
    string s = r.first, t = r.second;
    printf("LENGTH = %d\n", LENGTH1 * LENGTH2 * LENGTH3);
    FILE * fhack = fopen("hack.txt", "w");
    fprintf(fhack, "%s\n", s.c_str());
    fprintf(fhack, "%s\n", t.c_str());
    cout << FAILED_cnt1 << ' ' << FAILED_cnt2 << ' ' << FAILED_cnt3 << endl;
    int n = s.size();
    if (!islow(s) || !islow(t)) {
        printf("you should only contain lowercase letters\n");
    } else if (n != (int)t.size()) {
        printf("len not equal\n");
    } else {
        printf("mod %d, s : %d t : %d\n", mod1, H(s, mod1), H(t, mod1));
        if (H(s, mod1) != H(t, mod1)) {
            printf("different\n");
        } else {
            printf("mod %d, s : %d t : %d\n", mod2, H(s, mod2), H(t, mod2));
            if (H(s, mod2) != H(t, mod2)) {
                printf("different\n");
            } else {
                printf("mod %d, s : %d t : %d\n", mod3, H(s, mod3), H(t, mod3));
                if (H(s, mod3) != H(t, mod3)) {
                    printf("different\n");
                } else {
                    if (s == t) {
                        printf("s is equal to t\n");
                    } else {
                        printf("You Hack it!\n");
                    }
                }
            }
        }
    }
    return 0;
}