题解 P4718 【【模板】Pollard-Rho算法】
皎月半洒花
2019-01-31 13:30:09
upd :多谢大佬` zhoutb2333(UID31564)`的指正,现在的题解已经是正确的了。
其实我的源程序有个很严重的bug,就是两个`Int64`类型我给直接乘起来了……于是就会导致一些奇怪的溢出。对于某些素数会被当成合数,于是无论怎样都判不出来质因子导致TLE……
然后大家如果想知道我是怎么知道这个问题的,可以左转讨论区看看我是怎么智障的……
反正最后呢,我发现在用龟速乘之后程序的判素性会变弱,于是又加了几个质数,开了O2就过了233
现在最新的代码放在了最下面。以下是原题解:
____
我想说一下这个题目是怎么卡常的。
虽然我的代码也不是很快……但是不至于$94pts$……
以下是战果:
![](https://cdn.luogu.com.cn/upload/pic/50558.png)
不着急,我们一点一点来:
## 一、$6000+\rm{ms} \to 4000+\rm{ms}$
首先奉上在下最慢的代码:
```cpp
LL Prime[11] = {2, 3, 5, 7, 13, 17, 19, 23} ;
inline LL expow(LL a, LL b, LL p){
LL res = 1 ;
while (b){
if (b & 1)
(res *= a) %= p ;
(a *= a) %= p, b >>= 1 ;
}
return res % p ;
}
inline bool Test(LL p, LL x){
LL r = 0, d = x - 1 ;
while (!(d & 1)) d >>= 1, ++ r ;
for (LL i = expow(p, d, x), j ; r ; -- r){
j = i * i % x ;
if (j == 1){
if (i == 1 || i == x - 1)
return 1 ;
return 0 ;
}
i = j ;
}
return 0 ;
}
inline bool Miller_Rabin(LL x){
if (x == 1) return 0 ;
for (int i = 0 ; i < 8 ; ++ i){
if (x == Prime[i]) return 1 ;
if (!(x % Prime[i])) return 0 ;
if (!Test(Prime[i], x)) return 0 ;
}
return 1 ;
}
LL res[MAXN], tot ;
inline int pksrand(){
return rand() << 15 ^ rand() ;
} // 2
inline LL Irand(LL x){
return (((LL)pksrand()) << 30 ^ pksrand()) % x ; //2
}
inline LL guisuMul(LL a, LL b, LL x){
return (a * b - (LL) ((long double) a * b / x + 1e-9) * x) % x;
}
inline LL qwq(LL x){
LL A = Irand(x), C = Irand(x) ;
LL t1 = A, t2 = (guisuMul(A, A, x) + C) % x ; // 1
LL dif = max(t1, t2) - min(t1, t2), G = __gcd(x, dif) ;
while (G == 1){
t1 = (guisuMul(t1, t1, x) + C) % x ;
t2 = (guisuMul(t2, t2, x) + C) % x ;
t2 = (guisuMul(t2, t2, x) + C) % x ;
dif = max(t1, t2) - min(t1, t2), G = __gcd(x, dif) ;
}
return G ;
}
inline void Pollard_Rho(LL x){
if (x == 1) return ;
if (Miller_Rabin(x)) {
res[++ tot] = x ; return ;
}
LL y = x ; while (y == x) y = qwq(x) ;
Pollard_Rho(y), Pollard_Rho(x / y) ;
}
int main(){
cin >> T ;
while (T --){
scanf("%lld", &N), tot = 0, Pollard_Rho(N) ;
if (tot == 1) { puts("Prime") ; continue ; }
sort(res + 1, res + tot + 1), printf("%lld\n", res[tot]) ;
}
}
```
那么开始优化
*
* $1$、题目中让求最大的因子,而我一开始的做法是全部保存并且排一遍序。
但这显然是很蠢的举动。于是我们修改一下:
```cpp
inline void Pollard_Rho(LL x){
if (x == 1) return ;
if (Miller_Rabin(x)){
Ans = max(Ans, x) ; return ;
}
register LL y = x ;
while (y == x) y = qwq(x) ;
Pollard_Rho(y), Pollard_Rho(x / y) ;
}
int main(){
srand(19260817) ;
T =qr() ;
while (T --){
N = qr(), Ans = -1, Pollard_Rho(N) ;
if (Ans == N) puts("Prime") ;else printf("%lld", Ans), putchar('\n') ;
}
```
~~虽然这似乎没个卵用~~
* $2$、我们求$gcd$是很频繁的,所以我们尝试思考一种比较好的方式。大概就是二进制转化:
```cpp
#define mytz __builtin_ctzll
inline LL gcd(LL a, LL b){
if(!a) return b;
if(!b) return a;
register int t = mytz(a|b) ;
a >>= mytz(a) ;
do{
b >>= mytz(b) ;
if(a>b){LL t=b;b=a,a=t;}
b-=a;
}while(b);
return a<<t;
}
```
其中$\_\_builtin\_ctzll$是用来判断二进制下末尾零的数量(针对$int64$)的。原理的话大概就是二进制乱搞$233$……
* $3$、素数个数不用太多。这是实测,只需要$2$和$61$两个质数就可以卡过去。虽然质数的多少之于时间是个玄学的问题,但看起来似乎会快一些。并且,很重要的,**循环展开**,我们在$Miller\_Rabin$中完全没有必要$for$,毕竟素数个数很少,于是:
```cpp
inline bool Miller_Rabin(LL x){
if (x == Prime[1] || x == Prime[0]) return 1 ;
if (!(x % Prime[1]) || !(x % Prime[0])) return 0 ;
if (Test(Prime[1], x) ^ 1) return 0 ;
if (Test(Prime[0], x) ^ 1) return 0 ;
return 1 ;
}
```
然后就喜闻乐见地拿到了$4500ms$左右的好成绩——可这似乎不能解决最后一个点仍然卡不过去的问题。
## 二、$4000+ms \to 3000+ms$
下面才是真正的卡常数……
首先注意到,我们需要频繁调用的$Rho$函数和$Miller$中的$check$,都或多或少定义了一些临时变量,而对于临时变量,我们是可以`register`优化的……当你全部加上`register`修饰符之后,你会发现你的程序快了接近$300+ms$……
但这不重要,重要的是接下来的卡常:
在$4000+ms$时,我的$rand$是这么写的:
```cpp
inline LL Irand(LL x){
return ((LL)(rand() << 15 ^ rand()) << 30 ^ (rand() << 15 ^ rand())) % x ; //2
}//2
```
看上去并不可以优化,但事实上,只要我们改变一下那个强制类型转换的位置:
```cpp
inline LL Irand(LL x){
return 1ll * ((rand() << 15 ^ rand()) << 30 ^ (rand() << 15 ^ rand())) % x ; //2
}//2
```
他就会快整整$500+ms$!!
这也从侧面印证了,强制类型转换才是最慢的运算。。。
所以,最后我们就成功地卡了过去qaq
稍微提一句,环境优化参数:
```cpp
#pragma GCC diagnostic error "-std=c++14"
#pragma GCC target("avx")
#pragma GCC optimize(3)
#pragma GCC optimize("Ofast")
#pragma GCC optimize("inline")
#pragma GCC optimize("-fgcse")
#pragma GCC optimize("-fgcse-lm")
#pragma GCC optimize("-fipa-sra")
#pragma GCC optimize("-ftree-pre")
#pragma GCC optimize("-ftree-vrp")
#pragma GCC optimize("-fpeephole2")
#pragma GCC optimize("-ffast-math")
#pragma GCC optimize("-fsched-spec")
#pragma GCC optimize("unroll-loops")
#pragma GCC optimize("-falign-jumps")
#pragma GCC optimize("-falign-loops")
#pragma GCC optimize("-falign-labels")
#pragma GCC optimize("-fdevirtualize")
#pragma GCC optimize("-fcaller-saves")
#pragma GCC optimize("-fcrossjumping")
#pragma GCC optimize("-fthread-jumps")
#pragma GCC optimize("-funroll-loops")
#pragma GCC optimize("-fwhole-program")
#pragma GCC optimize("-freorder-blocks")
#pragma GCC optimize("-fschedule-insns")
#pragma GCC optimize("inline-functions")
#pragma GCC optimize("-ftree-tail-merge")
#pragma GCC optimize("-fschedule-insns2")
#pragma GCC optimize("-fstrict-aliasing")
#pragma GCC optimize("-fstrict-overflow")
#pragma GCC optimize("-falign-functions")
#pragma GCC optimize("-fcse-skip-blocks")
#pragma GCC optimize("-fcse-follow-jumps")
#pragma GCC optimize("-fsched-interblock")
#pragma GCC optimize("-fpartial-inlining")
#pragma GCC optimize("no-stack-protector")
#pragma GCC optimize("-freorder-functions")
#pragma GCC optimize("-findirect-inlining")
#pragma GCC optimize("-fhoist-adjacent-loads")
#pragma GCC optimize("-frerun-cse-after-loop")
#pragma GCC optimize("inline-small-functions")
#pragma GCC optimize("-finline-small-functions")
#pragma GCC optimize("-ftree-switch-conversion")
#pragma GCC optimize("-foptimize-sibling-calls")
#pragma GCC optimize("-fexpensive-optimizations")
#pragma GCC optimize("-funsafe-loop-optimizations")
#pragma GCC optimize("inline-functions-called-once")
#pragma GCC optimize("-fdelete-null-pointer-checks")
```
都加上的效果不是特别好,最好的是下面几句:
```cpp
#pragma GCC diagnostic error "-std=c++14"
#pragma GCC target("avx")
#pragma GCC optimize(3)
#pragma GCC optimize("Ofast")
```
下面是完整版$\rm{qaq}$
```cpp
#define min my_min
#define LL long long
#define max(a, b) (((a) > (b)) ? (a) : (b))
using namespace std ; LL T, N ;
LL Prime[11] = {2, 61, 97, 7, 13, 17, 23, 29}, Ans = -1 ;
inline LL guisuMul(LL a, LL b, LL m){
LL d =((long double)a / m * b + 1e-8) ;
LL r = a * b - d * m ;
return r < 0 ? r + m : r ;
}
inline LL gsm(LL a, LL b, LL p){
register LL res = 0 ;
while (b){
if (b & 1)
res = (res + a) % p ;
a = (a + a) % p, b >>= 1 ;
}
return res ;
}
inline LL expow(LL a, LL b, LL p){
if (a == 1) return 1 ;
register LL res = 1 ;
while (b){
if (b & 1)
res = gsm(res, a, p) ;
a = gsm(a, a, p), b >>= 1 ;
}
return res ;
}
inline LL my_max(LL a, LL b){ return a > b ? a : b ; }
inline LL my_min(LL a, LL b){ return a < b ? a : b ; }
inline LL qr(){
register LL k = 0 ; char c = getchar() ;
while (c < '0' || c > '9') c = getchar() ;
while (c <= '9' && c >= '0') k = (k << 1) + (k << 3) + c - 48, c = getchar() ;
return k ;
}
inline bool Test(LL p, LL x){
register LL r = 0, d = x - 1 ;
while (!(d & 1)) d >>= 1, ++ r ;
for (register LL i = expow(p, d, x), j ; r ; -- r){
j = gsm(i, i, x) ;
if (j == 1){
if (i == 1 || i == x - 1)
return 1 ;
return 0 ;
}
i = j ;
}
return 0 ;
}
#define ctz __builtin_ctzll
inline LL gcd(LL a, LL b){
if(!a) return b;
if(!b) return a;
register int t = ctz(a|b) ;
a >>= ctz(a) ;
do{
b >>= ctz(b) ;
if(a>b){LL t=b;b=a,a=t;}
b-=a;
}while(b);
return a<<t;
}
inline bool Miller_Rabin(LL x){
if (x == Prime[1] || x == Prime[0]) return 1 ;
if (!(x % Prime[1]) || !(x % Prime[0])) return 0 ;
if (Test(Prime[1], x) ^ 1) return 0 ;
if (Test(Prime[0], x) ^ 1) return 0 ;
if (x == Prime[2] || x == Prime[3]) return 1 ;
if (!(x % Prime[2]) || !(x % Prime[3])) return 0 ;
if (Test(Prime[2], x) ^ 1) return 0 ;
if (Test(Prime[3], x) ^ 1) return 0 ;
if (x == Prime[4] || x == Prime[5]) return 1 ;
if (!(x % Prime[4]) || !(x % Prime[5])) return 0 ;
if (Test(Prime[4], x) ^ 1) return 0 ;
if (Test(Prime[5], x) ^ 1) return 0 ;
if (x == Prime[6] || x == Prime[7]) return 1 ;
if (!(x % Prime[6]) || !(x % Prime[7])) return 0 ;
if (Test(Prime[6], x) ^ 1) return 0 ;
if (Test(Prime[7], x) ^ 1) return 0 ;
return 1 ;
}
inline LL Irand(LL x){
return 1ll * ((rand() << 15 ^ rand()) << 30 ^ (rand() << 15 ^ rand())) % x ; //2
}//2
inline LL qwq(LL x){
register LL C = Irand(x) ;
register LL t1 = Irand(x), t2 = guisuMul(t1, t1, x) + C ; // 1
register LL dif = t1 > t2 ? (t1 - t2) : (t2 - t1), G = gcd(x, dif) ;
while (G == 1){
t1 = guisuMul(t1, t1, x) + C ;if (t1 >= x) t1 %= x ;
t2 = guisuMul(t2, t2, x) + C, t2 = guisuMul(t2, t2, x) + C ;
if (t2 >= x) t2 %= x ; dif = t1 > t2 ? (t1 - t2) : (t2 - t1), G = gcd(x, dif) ;
}
return G ;
}
inline void Pollard_Rho(LL x){
if (x == 1) return ;
if (Miller_Rabin(x)){
Ans = max(Ans, x) ; return ;
}
register LL y = x ;
while (y == x) y = qwq(x) ;
Pollard_Rho(y), Pollard_Rho(x / y) ;
}
int main(){
srand(19260817) ;
freopen("sttd.out", "w", stdout) ;
T =qr() ;
while (T --){
N = qr(), Ans = -1, Pollard_Rho(N) ;
if (Ans == N) puts("Prime") ;else printf("%lld", Ans), putchar('\n') ;
}
}
```