题解 P4718 【【模板】Pollard-Rho算法】

皎月半洒花

2019-01-31 13:30:09

Solution

upd :多谢大佬` zhoutb2333(UID31564)`的指正,现在的题解已经是正确的了。 其实我的源程序有个很严重的bug,就是两个`Int64`类型我给直接乘起来了……于是就会导致一些奇怪的溢出。对于某些素数会被当成合数,于是无论怎样都判不出来质因子导致TLE…… 然后大家如果想知道我是怎么知道这个问题的,可以左转讨论区看看我是怎么智障的…… 反正最后呢,我发现在用龟速乘之后程序的判素性会变弱,于是又加了几个质数,开了O2就过了233 现在最新的代码放在了最下面。以下是原题解: ____ 我想说一下这个题目是怎么卡常的。 虽然我的代码也不是很快……但是不至于$94pts$…… 以下是战果: ![](https://cdn.luogu.com.cn/upload/pic/50558.png) 不着急,我们一点一点来: ## 一、$6000+\rm{ms} \to 4000+\rm{ms}$ 首先奉上在下最慢的代码: ```cpp LL Prime[11] = {2, 3, 5, 7, 13, 17, 19, 23} ; inline LL expow(LL a, LL b, LL p){ LL res = 1 ; while (b){ if (b & 1) (res *= a) %= p ; (a *= a) %= p, b >>= 1 ; } return res % p ; } inline bool Test(LL p, LL x){ LL r = 0, d = x - 1 ; while (!(d & 1)) d >>= 1, ++ r ; for (LL i = expow(p, d, x), j ; r ; -- r){ j = i * i % x ; if (j == 1){ if (i == 1 || i == x - 1) return 1 ; return 0 ; } i = j ; } return 0 ; } inline bool Miller_Rabin(LL x){ if (x == 1) return 0 ; for (int i = 0 ; i < 8 ; ++ i){ if (x == Prime[i]) return 1 ; if (!(x % Prime[i])) return 0 ; if (!Test(Prime[i], x)) return 0 ; } return 1 ; } LL res[MAXN], tot ; inline int pksrand(){ return rand() << 15 ^ rand() ; } // 2 inline LL Irand(LL x){ return (((LL)pksrand()) << 30 ^ pksrand()) % x ; //2 } inline LL guisuMul(LL a, LL b, LL x){ return (a * b - (LL) ((long double) a * b / x + 1e-9) * x) % x; } inline LL qwq(LL x){ LL A = Irand(x), C = Irand(x) ; LL t1 = A, t2 = (guisuMul(A, A, x) + C) % x ; // 1 LL dif = max(t1, t2) - min(t1, t2), G = __gcd(x, dif) ; while (G == 1){ t1 = (guisuMul(t1, t1, x) + C) % x ; t2 = (guisuMul(t2, t2, x) + C) % x ; t2 = (guisuMul(t2, t2, x) + C) % x ; dif = max(t1, t2) - min(t1, t2), G = __gcd(x, dif) ; } return G ; } inline void Pollard_Rho(LL x){ if (x == 1) return ; if (Miller_Rabin(x)) { res[++ tot] = x ; return ; } LL y = x ; while (y == x) y = qwq(x) ; Pollard_Rho(y), Pollard_Rho(x / y) ; } int main(){ cin >> T ; while (T --){ scanf("%lld", &N), tot = 0, Pollard_Rho(N) ; if (tot == 1) { puts("Prime") ; continue ; } sort(res + 1, res + tot + 1), printf("%lld\n", res[tot]) ; } } ``` 那么开始优化 * * $1$、题目中让求最大的因子,而我一开始的做法是全部保存并且排一遍序。 但这显然是很蠢的举动。于是我们修改一下: ```cpp inline void Pollard_Rho(LL x){ if (x == 1) return ; if (Miller_Rabin(x)){ Ans = max(Ans, x) ; return ; } register LL y = x ; while (y == x) y = qwq(x) ; Pollard_Rho(y), Pollard_Rho(x / y) ; } int main(){ srand(19260817) ; T =qr() ; while (T --){ N = qr(), Ans = -1, Pollard_Rho(N) ; if (Ans == N) puts("Prime") ;else printf("%lld", Ans), putchar('\n') ; } ``` ~~虽然这似乎没个卵用~~ * $2$、我们求$gcd$是很频繁的,所以我们尝试思考一种比较好的方式。大概就是二进制转化: ```cpp #define mytz __builtin_ctzll inline LL gcd(LL a, LL b){ if(!a) return b; if(!b) return a; register int t = mytz(a|b) ; a >>= mytz(a) ; do{ b >>= mytz(b) ; if(a>b){LL t=b;b=a,a=t;} b-=a; }while(b); return a<<t; } ``` 其中$\_\_builtin\_ctzll$是用来判断二进制下末尾零的数量(针对$int64$)的。原理的话大概就是二进制乱搞$233$…… * $3$、素数个数不用太多。这是实测,只需要$2$和$61$两个质数就可以卡过去。虽然质数的多少之于时间是个玄学的问题,但看起来似乎会快一些。并且,很重要的,**循环展开**,我们在$Miller\_Rabin$中完全没有必要$for$,毕竟素数个数很少,于是: ```cpp inline bool Miller_Rabin(LL x){ if (x == Prime[1] || x == Prime[0]) return 1 ; if (!(x % Prime[1]) || !(x % Prime[0])) return 0 ; if (Test(Prime[1], x) ^ 1) return 0 ; if (Test(Prime[0], x) ^ 1) return 0 ; return 1 ; } ``` 然后就喜闻乐见地拿到了$4500ms$左右的好成绩——可这似乎不能解决最后一个点仍然卡不过去的问题。 ## 二、$4000+ms \to 3000+ms$ 下面才是真正的卡常数…… 首先注意到,我们需要频繁调用的$Rho$函数和$Miller$中的$check$,都或多或少定义了一些临时变量,而对于临时变量,我们是可以`register`优化的……当你全部加上`register`修饰符之后,你会发现你的程序快了接近$300+ms$…… 但这不重要,重要的是接下来的卡常: 在$4000+ms$时,我的$rand$是这么写的: ```cpp inline LL Irand(LL x){ return ((LL)(rand() << 15 ^ rand()) << 30 ^ (rand() << 15 ^ rand())) % x ; //2 }//2 ``` 看上去并不可以优化,但事实上,只要我们改变一下那个强制类型转换的位置: ```cpp inline LL Irand(LL x){ return 1ll * ((rand() << 15 ^ rand()) << 30 ^ (rand() << 15 ^ rand())) % x ; //2 }//2 ``` 他就会快整整$500+ms$!! 这也从侧面印证了,强制类型转换才是最慢的运算。。。 所以,最后我们就成功地卡了过去qaq 稍微提一句,环境优化参数: ```cpp #pragma GCC diagnostic error "-std=c++14" #pragma GCC target("avx") #pragma GCC optimize(3) #pragma GCC optimize("Ofast") #pragma GCC optimize("inline") #pragma GCC optimize("-fgcse") #pragma GCC optimize("-fgcse-lm") #pragma GCC optimize("-fipa-sra") #pragma GCC optimize("-ftree-pre") #pragma GCC optimize("-ftree-vrp") #pragma GCC optimize("-fpeephole2") #pragma GCC optimize("-ffast-math") #pragma GCC optimize("-fsched-spec") #pragma GCC optimize("unroll-loops") #pragma GCC optimize("-falign-jumps") #pragma GCC optimize("-falign-loops") #pragma GCC optimize("-falign-labels") #pragma GCC optimize("-fdevirtualize") #pragma GCC optimize("-fcaller-saves") #pragma GCC optimize("-fcrossjumping") #pragma GCC optimize("-fthread-jumps") #pragma GCC optimize("-funroll-loops") #pragma GCC optimize("-fwhole-program") #pragma GCC optimize("-freorder-blocks") #pragma GCC optimize("-fschedule-insns") #pragma GCC optimize("inline-functions") #pragma GCC optimize("-ftree-tail-merge") #pragma GCC optimize("-fschedule-insns2") #pragma GCC optimize("-fstrict-aliasing") #pragma GCC optimize("-fstrict-overflow") #pragma GCC optimize("-falign-functions") #pragma GCC optimize("-fcse-skip-blocks") #pragma GCC optimize("-fcse-follow-jumps") #pragma GCC optimize("-fsched-interblock") #pragma GCC optimize("-fpartial-inlining") #pragma GCC optimize("no-stack-protector") #pragma GCC optimize("-freorder-functions") #pragma GCC optimize("-findirect-inlining") #pragma GCC optimize("-fhoist-adjacent-loads") #pragma GCC optimize("-frerun-cse-after-loop") #pragma GCC optimize("inline-small-functions") #pragma GCC optimize("-finline-small-functions") #pragma GCC optimize("-ftree-switch-conversion") #pragma GCC optimize("-foptimize-sibling-calls") #pragma GCC optimize("-fexpensive-optimizations") #pragma GCC optimize("-funsafe-loop-optimizations") #pragma GCC optimize("inline-functions-called-once") #pragma GCC optimize("-fdelete-null-pointer-checks") ``` 都加上的效果不是特别好,最好的是下面几句: ```cpp #pragma GCC diagnostic error "-std=c++14" #pragma GCC target("avx") #pragma GCC optimize(3) #pragma GCC optimize("Ofast") ``` 下面是完整版$\rm{qaq}$ ```cpp #define min my_min #define LL long long #define max(a, b) (((a) > (b)) ? (a) : (b)) using namespace std ; LL T, N ; LL Prime[11] = {2, 61, 97, 7, 13, 17, 23, 29}, Ans = -1 ; inline LL guisuMul(LL a, LL b, LL m){ LL d =((long double)a / m * b + 1e-8) ; LL r = a * b - d * m ; return r < 0 ? r + m : r ; } inline LL gsm(LL a, LL b, LL p){ register LL res = 0 ; while (b){ if (b & 1) res = (res + a) % p ; a = (a + a) % p, b >>= 1 ; } return res ; } inline LL expow(LL a, LL b, LL p){ if (a == 1) return 1 ; register LL res = 1 ; while (b){ if (b & 1) res = gsm(res, a, p) ; a = gsm(a, a, p), b >>= 1 ; } return res ; } inline LL my_max(LL a, LL b){ return a > b ? a : b ; } inline LL my_min(LL a, LL b){ return a < b ? a : b ; } inline LL qr(){ register LL k = 0 ; char c = getchar() ; while (c < '0' || c > '9') c = getchar() ; while (c <= '9' && c >= '0') k = (k << 1) + (k << 3) + c - 48, c = getchar() ; return k ; } inline bool Test(LL p, LL x){ register LL r = 0, d = x - 1 ; while (!(d & 1)) d >>= 1, ++ r ; for (register LL i = expow(p, d, x), j ; r ; -- r){ j = gsm(i, i, x) ; if (j == 1){ if (i == 1 || i == x - 1) return 1 ; return 0 ; } i = j ; } return 0 ; } #define ctz __builtin_ctzll inline LL gcd(LL a, LL b){ if(!a) return b; if(!b) return a; register int t = ctz(a|b) ; a >>= ctz(a) ; do{ b >>= ctz(b) ; if(a>b){LL t=b;b=a,a=t;} b-=a; }while(b); return a<<t; } inline bool Miller_Rabin(LL x){ if (x == Prime[1] || x == Prime[0]) return 1 ; if (!(x % Prime[1]) || !(x % Prime[0])) return 0 ; if (Test(Prime[1], x) ^ 1) return 0 ; if (Test(Prime[0], x) ^ 1) return 0 ; if (x == Prime[2] || x == Prime[3]) return 1 ; if (!(x % Prime[2]) || !(x % Prime[3])) return 0 ; if (Test(Prime[2], x) ^ 1) return 0 ; if (Test(Prime[3], x) ^ 1) return 0 ; if (x == Prime[4] || x == Prime[5]) return 1 ; if (!(x % Prime[4]) || !(x % Prime[5])) return 0 ; if (Test(Prime[4], x) ^ 1) return 0 ; if (Test(Prime[5], x) ^ 1) return 0 ; if (x == Prime[6] || x == Prime[7]) return 1 ; if (!(x % Prime[6]) || !(x % Prime[7])) return 0 ; if (Test(Prime[6], x) ^ 1) return 0 ; if (Test(Prime[7], x) ^ 1) return 0 ; return 1 ; } inline LL Irand(LL x){ return 1ll * ((rand() << 15 ^ rand()) << 30 ^ (rand() << 15 ^ rand())) % x ; //2 }//2 inline LL qwq(LL x){ register LL C = Irand(x) ; register LL t1 = Irand(x), t2 = guisuMul(t1, t1, x) + C ; // 1 register LL dif = t1 > t2 ? (t1 - t2) : (t2 - t1), G = gcd(x, dif) ; while (G == 1){ t1 = guisuMul(t1, t1, x) + C ;if (t1 >= x) t1 %= x ; t2 = guisuMul(t2, t2, x) + C, t2 = guisuMul(t2, t2, x) + C ; if (t2 >= x) t2 %= x ; dif = t1 > t2 ? (t1 - t2) : (t2 - t1), G = gcd(x, dif) ; } return G ; } inline void Pollard_Rho(LL x){ if (x == 1) return ; if (Miller_Rabin(x)){ Ans = max(Ans, x) ; return ; } register LL y = x ; while (y == x) y = qwq(x) ; Pollard_Rho(y), Pollard_Rho(x / y) ; } int main(){ srand(19260817) ; freopen("sttd.out", "w", stdout) ; T =qr() ; while (T --){ N = qr(), Ans = -1, Pollard_Rho(N) ; if (Ans == N) puts("Prime") ;else printf("%lld", Ans), putchar('\n') ; } } ```