#pragma once
#include<cassert>
#include<functional>usingnamespacestd;#include"../modulo/binomial.hpp"
#include"formal-power-series.hpp"
#include"pow-enumerate.hpp"// f を入力として, f(g(x)) = x を満たす g(x) mod x^{deg} を返すtemplate<typenamemint>FormalPowerSeries<mint>compositional_inverse(FormalPowerSeries<mint>f,intdeg=-1){usingfps=FormalPowerSeries<mint>;assert((int)f.size()>=2andf[1]!=0);if(deg==-1)deg=f.size();if(deg<2)returnfps{0,f[1].inverse()}.pre(deg);intn=deg-1;fpsh=pow_enumerate(f)*n;for(intk=1;k<=n;k++)h[k]/=k;h=h.rev();h*=h[0].inverse();fpsg=(h.log()*mint{-n}.inverse()).exp();g*=f[1].inverse();return(g<<1).pre(deg);}// f(g(x)) = x を満たす g(x) mod x^{deg} を返す// calc_f(g, d) は f(g(x)) mod x^d を計算する関数template<typenamefps>fpscompositional_inverse(function<fps(fps,int)>calc_f,intdeg){if(deg<=2){fpsg=calc_f(fps{0,1},2);assert(g[0]==0&&g[1]!=0);g[1]=g[1].inverse();returng.pre(deg);}fpsg=compositional_inverse(calc_f,(deg+1)/2);fpsfg=calc_f(g,deg+1);fpsfdg=(fg.diff()*g.diff().inv(deg)).pre(deg);return(g-(fg-fps{0,1})*fdg.inv()).pre(deg);}/*
* @brief 逆関数
*/
#line 2 "fps/fps-compositional-inverse.hpp"
#include<cassert>
#include<functional>usingnamespacestd;#line 2 "modulo/binomial.hpp"
#line 4 "modulo/binomial.hpp"
#include<type_traits>
#include<vector>usingnamespacestd;// コンストラクタの MAX に 「C(n, r) や fac(n) でクエリを投げる最大の n 」// を入れると倍速くらいになる// mod を超えて前計算して 0 割りを踏むバグは対策済みtemplate<typenameT>structBinomial{vector<T>f,g,h;Binomial(intMAX=0){assert(T::get_mod()!=0&&"Binomial<mint>()");f.resize(1,T{1});g.resize(1,T{1});h.resize(1,T{1});if(MAX>0)extend(MAX+1);}voidextend(intm=-1){intn=f.size();if(m==-1)m=n*2;m=min<int>(m,T::get_mod());if(n>=m)return;f.resize(m);g.resize(m);h.resize(m);for(inti=n;i<m;i++)f[i]=f[i-1]*T(i);g[m-1]=f[m-1].inverse();h[m-1]=g[m-1]*f[m-2];for(inti=m-2;i>=n;i--){g[i]=g[i+1]*T(i+1);h[i]=g[i]*f[i-1];}}Tfac(inti){if(i<0)returnT(0);while(i>=(int)f.size())extend();returnf[i];}Tfinv(inti){if(i<0)returnT(0);while(i>=(int)g.size())extend();returng[i];}Tinv(inti){if(i<0)return-inv(-i);while(i>=(int)h.size())extend();returnh[i];}TC(intn,intr){if(n<0||n<r||r<0)returnT(0);returnfac(n)*finv(n-r)*finv(r);}inlineToperator()(intn,intr){returnC(n,r);}template<typenameI>Tmultinomial(constvector<I>&r){static_assert(is_integral<I>::value==true);intn=0;for(auto&x:r){if(x<0)returnT(0);n+=x;}Tres=fac(n);for(auto&x:r)res*=finv(x);returnres;}template<typenameI>Toperator()(constvector<I>&r){returnmultinomial(r);}TC_naive(intn,intr){if(n<0||n<r||r<0)returnT(0);Tret=T(1);r=min(r,n-r);for(inti=1;i<=r;++i)ret*=inv(i)*(n--);returnret;}TP(intn,intr){if(n<0||n<r||r<0)returnT(0);returnfac(n)*finv(n-r);}// [x^r] 1 / (1-x)^nTH(intn,intr){if(n<0||r<0)returnT(0);returnr==0?1:C(n+r-1,r);}};#line 2 "fps/formal-power-series.hpp"
template<typenamemint>structFormalPowerSeries:vector<mint>{usingvector<mint>::vector;usingFPS=FormalPowerSeries;FPS&operator+=(constFPS&r){if(r.size()>this->size())this->resize(r.size());for(inti=0;i<(int)r.size();i++)(*this)[i]+=r[i];return*this;}FPS&operator+=(constmint&r){if(this->empty())this->resize(1);(*this)[0]+=r;return*this;}FPS&operator-=(constFPS&r){if(r.size()>this->size())this->resize(r.size());for(inti=0;i<(int)r.size();i++)(*this)[i]-=r[i];return*this;}FPS&operator-=(constmint&r){if(this->empty())this->resize(1);(*this)[0]-=r;return*this;}FPS&operator*=(constmint&v){for(intk=0;k<(int)this->size();k++)(*this)[k]*=v;return*this;}FPS&operator/=(constFPS&r){if(this->size()<r.size()){this->clear();return*this;}intn=this->size()-r.size()+1;if((int)r.size()<=64){FPSf(*this),g(r);g.shrink();mintcoeff=g.back().inverse();for(auto&x:g)x*=coeff;intdeg=(int)f.size()-(int)g.size()+1;intgs=g.size();FPSquo(deg);for(inti=deg-1;i>=0;i--){quo[i]=f[i+gs-1];for(intj=0;j<gs;j++)f[i+j]-=quo[i]*g[j];}*this=quo*coeff;this->resize(n,mint(0));return*this;}return*this=((*this).rev().pre(n)*r.rev().inv(n)).pre(n).rev();}FPS&operator%=(constFPS&r){*this-=*this/r*r;shrink();return*this;}FPSoperator+(constFPS&r)const{returnFPS(*this)+=r;}FPSoperator+(constmint&v)const{returnFPS(*this)+=v;}FPSoperator-(constFPS&r)const{returnFPS(*this)-=r;}FPSoperator-(constmint&v)const{returnFPS(*this)-=v;}FPSoperator*(constFPS&r)const{returnFPS(*this)*=r;}FPSoperator*(constmint&v)const{returnFPS(*this)*=v;}FPSoperator/(constFPS&r)const{returnFPS(*this)/=r;}FPSoperator%(constFPS&r)const{returnFPS(*this)%=r;}FPSoperator-()const{FPSret(this->size());for(inti=0;i<(int)this->size();i++)ret[i]=-(*this)[i];returnret;}voidshrink(){while(this->size()&&this->back()==mint(0))this->pop_back();}FPSrev()const{FPSret(*this);reverse(begin(ret),end(ret));returnret;}FPSdot(FPSr)const{FPSret(min(this->size(),r.size()));for(inti=0;i<(int)ret.size();i++)ret[i]=(*this)[i]*r[i];returnret;}// 前 sz 項を取ってくる。sz に足りない項は 0 埋めするFPSpre(intsz)const{FPSret(begin(*this),begin(*this)+min((int)this->size(),sz));if((int)ret.size()<sz)ret.resize(sz);returnret;}FPSoperator>>(intsz)const{if((int)this->size()<=sz)return{};FPSret(*this);ret.erase(ret.begin(),ret.begin()+sz);returnret;}FPSoperator<<(intsz)const{FPSret(*this);ret.insert(ret.begin(),sz,mint(0));returnret;}FPSdiff()const{constintn=(int)this->size();FPSret(max(0,n-1));mintone(1),coeff(1);for(inti=1;i<n;i++){ret[i-1]=(*this)[i]*coeff;coeff+=one;}returnret;}FPSintegral()const{constintn=(int)this->size();FPSret(n+1);ret[0]=mint(0);if(n>0)ret[1]=mint(1);automod=mint::get_mod();for(inti=2;i<=n;i++)ret[i]=(-ret[mod%i])*(mod/i);for(inti=0;i<n;i++)ret[i+1]*=(*this)[i];returnret;}minteval(mintx)const{mintr=0,w=1;for(auto&v:*this)r+=w*v,w*=x;returnr;}FPSlog(intdeg=-1)const{assert(!(*this).empty()&&(*this)[0]==mint(1));if(deg==-1)deg=(int)this->size();return(this->diff()*this->inv(deg)).pre(deg-1).integral();}FPSpow(int64_tk,intdeg=-1)const{constintn=(int)this->size();if(deg==-1)deg=n;if(k==0){FPSret(deg);if(deg)ret[0]=1;returnret;}for(inti=0;i<n;i++){if((*this)[i]!=mint(0)){mintrev=mint(1)/(*this)[i];FPSret=(((*this*rev)>>i).log(deg)*k).exp(deg);ret*=(*this)[i].pow(k);ret=(ret<<(i*k)).pre(deg);if((int)ret.size()<deg)ret.resize(deg,mint(0));returnret;}if(__int128_t(i+1)*k>=deg)returnFPS(deg,mint(0));}returnFPS(deg,mint(0));}staticvoid*ntt_ptr;staticvoidset_fft();FPS&operator*=(constFPS&r);voidntt();voidintt();voidntt_doubling();staticintntt_pr();FPSinv(intdeg=-1)const;FPSexp(intdeg=-1)const;};template<typenamemint>void*FormalPowerSeries<mint>::ntt_ptr=nullptr;/**
* @brief 多項式/形式的冪級数ライブラリ
* @docs docs/fps/formal-power-series.md
*/#line 2 "fps/pow-enumerate.hpp"
#line 5 "fps/pow-enumerate.hpp"
usingnamespacestd;#line 8 "fps/pow-enumerate.hpp"
// [x^n] f(x)^i g(x) を i=0,1,...,m で列挙// n = (f の次数) - 1template<typenamemint>FormalPowerSeries<mint>pow_enumerate(FormalPowerSeries<mint>f,FormalPowerSeries<mint>g={1},intm=-1){usingfps=FormalPowerSeries<mint>;intn=f.size()-1,k=1;g.resize(n+1);if(m==-1)m=n;inth=1;while(h<n+1)h*=2;fpsP((n+1)*k),Q((n+1)*k),nP,nQ,buf,buf2;for(inti=0;i<=n;i++)P[i*k+0]=g[i];for(inti=0;i<=n;i++)Q[i*k+0]=-f[i];Q[0]+=1;while(n){mintinv2=mint{2}.inverse();mintw=mint{fps::ntt_pr()}.pow((mint::get_mod()-1)/(2*k));mintiw=w.inverse();buf2.resize(k);autontt_doubling=[&](){copy(begin(buf),end(buf),begin(buf2));buf2.intt();mintc=1;for(inti=0;i<k;i++)buf2[i]*=c,c*=w;buf2.ntt();copy(begin(buf2),end(buf2),back_inserter(buf));};nP.clear(),nQ.clear();for(inti=0;i<=n;i++){buf.resize(k);copy(begin(P)+i*k,begin(P)+(i+1)*k,begin(buf));ntt_doubling();copy(begin(buf),end(buf),back_inserter(nP));buf.resize(k);copy(begin(Q)+i*k,begin(Q)+(i+1)*k,begin(buf));if(i==0){for(intj=0;j<k;j++)buf[j]-=1;ntt_doubling();for(intj=0;j<k;j++)buf[j]+=1;for(intj=0;j<k;j++)buf[k+j]-=1;}else{ntt_doubling();}copy(begin(buf),end(buf),back_inserter(nQ));}nP.resize(2*h*2*k);nQ.resize(2*h*2*k);fpsp(2*h),q(2*h);w=mint{fps::ntt_pr()}.pow((mint::get_mod()-1)/(2*h));iw=w.inverse();vector<int>btr;if(n%2){btr.resize(h);for(inti=0,lg=__builtin_ctz(h);i<h;i++){btr[i]=(btr[i>>1]>>1)+((i&1)<<(lg-1));}}for(intj=0;j<2*k;j++){p.assign(2*h,0);q.assign(2*h,0);for(inti=0;i<h;i++){p[i]=nP[i*2*k+j],q[i]=nQ[i*2*k+j];}p.ntt(),q.ntt();for(inti=0;i<2*h;i+=2)swap(q[i],q[i+1]);for(inti=0;i<2*h;i++)p[i]*=q[i];for(inti=0;i<h;i++)q[i]=q[i*2]*q[i*2+1];if(n%2==0){for(inti=0;i<h;i++)p[i]=(p[i*2]+p[i*2+1])*inv2;}else{mintc=inv2;buf.resize(h);for(inti:btr)buf[i]=(p[i*2]-p[i*2+1])*c,c*=iw;swap(p,buf);}p.resize(h),q.resize(h);p.intt(),q.intt();for(inti=0;i<h;i++)nP[i*2*k+j]=p[i];for(inti=0;i<h;i++)nQ[i*2*k+j]=q[i];}nP.resize((n/2+1)*2*k);nQ.resize((n/2+1)*2*k);swap(P,nP),swap(Q,nQ);n/=2,h/=2,k*=2;}fpsS{begin(P),begin(P)+k};fpsT{begin(Q),begin(Q)+k};S.intt(),T.intt(),T[0]-=1;if(f[0]==0)returnS.rev().pre(m+1);return(S.rev()*(T+(fps{1}<<k)).rev().inv(m+1)).pre(m+1);}/*
// 別バージョン
// [x^n] f(x)^i g(x) を i=0,1,...,m で列挙
// n = (f の次数) - 1
FormalPowerSeries<mint> pow_enumerate(FormalPowerSeries<mint> f,
FormalPowerSeries<mint> g = {1},
int m = -1) {
using fps = FormalPowerSeries<mint>;
int n = f.size() - 1, k = 1;
g.resize(n + 1);
if (m == -1) m = n;
int h = 1;
while (h < n + 1) h *= 2;
fps P(h * k), Q(h * k), nP(4 * h * k), nQ(4 * h * k), nR(2 * h * k);
for (int i = 0; i <= n; i++) P[i] = g[i], Q[i] = -f[i];
while (n) {
nP.assign(4 * h * k, 0);
nQ.assign(4 * h * k, 0);
for (int i = 0; i < k; i++) {
copy(begin(P) + i * h, begin(P) + i * h + n + 1, begin(nP) + i * 2 * h);
copy(begin(Q) + i * h, begin(Q) + i * h + n + 1, begin(nQ) + i * 2 * h);
}
nQ[k * 2 * h] += 1;
nP.ntt(), nQ.ntt();
for (int i = 0; i < 4 * h * k; i += 2) swap(nQ[i], nQ[i + 1]);
for (int i = 0; i < 4 * h * k; i++) nP[i] *= nQ[i];
for (int i = 0; i < 2 * h * k; i++) nR[i] = nQ[i * 2] * nQ[i * 2 + 1];
nP.intt(), nR.intt();
nR[0] -= 1;
P.assign(h * k, 0), Q.assign(h * k, 0);
for (int i = 0; i < 2 * k; i++) {
for (int j = 0; j <= n / 2; j++) {
P[i * h / 2 + j] = nP[i * 2 * h + j * 2 + n % 2];
Q[i * h / 2 + j] = nR[i * h + j];
}
}
n /= 2, h /= 2, k *= 2;
}
fps S{begin(P), begin(P) + k}, T{begin(Q), begin(Q) + k};
T.push_back(1);
return (S.rev() * T.rev().inv(m + 1)).pre(m + 1);
}
*//**
* @brief pow 列挙
*/#line 10 "fps/fps-compositional-inverse.hpp"
// f を入力として, f(g(x)) = x を満たす g(x) mod x^{deg} を返すtemplate<typenamemint>FormalPowerSeries<mint>compositional_inverse(FormalPowerSeries<mint>f,intdeg=-1){usingfps=FormalPowerSeries<mint>;assert((int)f.size()>=2andf[1]!=0);if(deg==-1)deg=f.size();if(deg<2)returnfps{0,f[1].inverse()}.pre(deg);intn=deg-1;fpsh=pow_enumerate(f)*n;for(intk=1;k<=n;k++)h[k]/=k;h=h.rev();h*=h[0].inverse();fpsg=(h.log()*mint{-n}.inverse()).exp();g*=f[1].inverse();return(g<<1).pre(deg);}// f(g(x)) = x を満たす g(x) mod x^{deg} を返す// calc_f(g, d) は f(g(x)) mod x^d を計算する関数template<typenamefps>fpscompositional_inverse(function<fps(fps,int)>calc_f,intdeg){if(deg<=2){fpsg=calc_f(fps{0,1},2);assert(g[0]==0&&g[1]!=0);g[1]=g[1].inverse();returng.pre(deg);}fpsg=compositional_inverse(calc_f,(deg+1)/2);fpsfg=calc_f(g,deg+1);fpsfdg=(fg.diff()*g.diff().inv(deg)).pre(deg);return(g-(fg-fps{0,1})*fdg.inv()).pre(deg);}/*
* @brief 逆関数
*/