P13844

· · 题解

逐点牛顿迭代。

G 求关于 x_n 的偏导得:

\begin{aligned} \frac{\partial}{\partial{x_n}}G&=\frac{\partial}{\partial{x_n}}\ln{F}\\ &=\frac{1}{F}\times\left(\frac{\partial}{\partial{x_n}}F\right) \end{aligned}

提取 [x_n^0] 系数,得:

[x_n^1]G=\left([x_n^0]\frac{1}{F}\right)\times\left([x_n^1]F\right)

转换为计算 H=\frac{1}{F};然后对于 k=1\to n 依次算出 G\bmod{x_1^2x_2^2\cdots x_k^2} 的值即可。复杂度 \sum\limits_{1\le k\le n}k^22^k=\mathcal{O}(n^22^n)

计算 H 是同理的,同样求偏导:

\begin{aligned} \frac{\partial}{\partial{x_n}}H&=\frac{\partial}{\partial{x_n}} \frac{1}{F}\\ &=-\frac{1}{F^2}\times\left(\frac{\partial}{\partial{x_n}}F\right)\\ &=-H^2\times\left(\frac{\partial}{\partial{x_n}}F\right) \end{aligned}

提取 [x_n^0] 的系数,得:

[x_n^1]H=-\left([x_n^0]H\right)^2\times\left([x_n^1]F\right)

同样是,对于 k=1\to n 依次算出 H\bmod{x_1^2x_2^2\cdots x_k^2} 的值。

总复杂度 \mathcal{O}(n^22^n),过程中不涉及任何求乘法逆元操作。需要注意循环顺序对常数因子得影响,下面给出一份可以通过本题的代码(删去了 FastIO 部分):

#include<bits/stdc++.h>
// #define int long long
// #pragma GCC optimize(3,"Ofast","inline")
#define debug(...) fprintf(stderr,__VA_ARGS__)
#define ll long long
#define bint __int128
#define ull unsigned long long
#define uint unsigned int
#define ld double
#define PII pair<int,int>
#define chkmax(a,b) a=max(a,b)
#define chkmin(a,b) a=min(a,b)
#define umap unordered_map
#define rep(k,l,r) for(int k=l;k<=r;++k)
#define per(k,r,l) for(int k=r;k>=l;--k)
#define cl(f,x) memset(f,x,sizeof(f))
#define pcnt(x) __builtin_popcount(x)
#define lg(x) (31-__builtin_clz(x))
using namespace std;
void file_IO() {
    freopen("test.in","r",stdin);
    freopen("test.out","w",stdout);
}
bool M1;
const int INF=0x3f3f3f3f;
const ll INFLL=0x3f3f3f3f3f3f3f3f;
const ld eps=1e-9;
const int N=21,M=1<<20;
ull _f[N][M],_g[N][M],_h[N][M];
inline void fmt(ull f[],int n,ull op) {
    n=1<<n;
    for(int d=2,w=1;d<=n;d<<=1,w<<=1) {
        for(int i=0;i<n;i+=d) {
            rep(j,0,w-1)
                f[i+j+w]+=f[i+j]*op;
        }
    }
}
inline void conv(ull f[],ull g[],ull h[],int n) {
    rep(i,0,n) {
        rep(S,0,(1<<n)-1)
            _f[i][S]=_g[i][S]=_h[i][S]=0;
    }
    rep(S,0,(1<<n)-1) {
        _f[pcnt(S)][S]=f[S];
        _g[pcnt(S)][S]=g[S];
    }
    rep(i,0,n) {
        fmt(_f[i],n,1);
        fmt(_g[i],n,1);
    }
    rep(i,0,n) {
        rep(j,0,i) {
            rep(S,0,(1<<n)-1)
                _h[i][S]+=_f[j][S]*_g[i-j][S];
        }
    }
    rep(i,0,n)
        fmt(_h[i],n,-1);
    rep(S,0,(1<<n)-1)
        h[S]=_h[pcnt(S)][S];
}
ull _g2[N][M];
inline void conv2(ull f[],ull g[],ull h[],int n) { // g^2
    rep(i,0,n) {
        rep(S,0,(1<<n)-1)
            _f[i][S]=_g[i][S]=_g2[i][S]=_h[i][S]=0;
    }
    rep(S,0,(1<<n)-1) {
        _f[pcnt(S)][S]=f[S];
        _g[pcnt(S)][S]=g[S];
    }
    rep(i,0,n) {
        fmt(_f[i],n,1);
        fmt(_g[i],n,1);
    }
    rep(i,0,n) {
        rep(j,0,i) {
            rep(S,0,(1<<n)-1)
                _g2[i][S]+=_g[j][S]*_g[i-j][S];
        }
    }
    rep(i,0,n) {
        rep(j,0,i) {
            rep(S,0,(1<<n)-1)
                _h[i][S]+=_f[j][S]*_g2[i-j][S];
        }
    }
    rep(i,0,n)
        fmt(_h[i],n,-1);
    rep(S,0,(1<<n)-1)
        h[S]=-_h[pcnt(S)][S];
}
ull _ft[M],_gt[M];
inline void inv(ull f[],int n) {
    _ft[0]=1;
    rep(i,0,n-1)
        conv2(f+(1<<i),_ft,_ft+(1<<i),i);
    rep(S,0,(1<<n)-1)
        f[S]=_ft[S];
}
inline void ln(ull f[],int n) {
    rep(S,0,(1<<n)-1)
        _gt[S]=f[S];
    inv(_gt,n);
    _ft[0]=0;
    rep(i,0,n-1)
        conv(_gt,f+(1<<i),_ft+(1<<i),i);
    rep(S,0,(1<<n)-1)
        f[S]=_ft[S];
}
ull f[M];
void solve() {
    int n=read();
    rep(i,0,(1<<n)-1)
        f[i]=read();
    ln(f,n);
    rep(i,0,(1<<n)-1)
        printf("%llu ",f[i]);
    puts("");
}
bool M2;
// g++ P13843.cpp -std=c++14 -Wall -O2 -o P13843
signed main() {
    // file_IO();
    int testcase=1;
    // scanf("%d",&testcase);
    while(testcase--)
        solve();
    debug("used time = %dms\n",(signed)(1000*clock()/CLOCKS_PER_SEC));
    debug("used memory = %dMB\n",(signed)((&M1-&M2)/1024/1024));
    return 0;
}