P-recursiveの高速計算
(fps/find-p-recursive.hpp)
- View this file on GitHub
- Last update: 2024-05-03 21:06:15+09:00
- Include:
#include "fps/find-p-recursive.hpp"
P-recursiveの高速計算
P-recursiveの第$k$項を$\mathrm{O}(\sqrt{k} \log k)$で計算出来たり出来なかったりするライブラリ。
使い方
find_p_recursive(a, d)
十分長い数列$a$および次数$d$が与えられたとき、
\[a_{i+n} f_n(i) + \ldots + a_{i}f_0(i) \equiv 0 \pmod {p}\]を満たす$d$次多項式の列$f_n(i),\ldots,f_1(i),f_0(i)$を返すライブラリ。他のライブラリとの関係上、列が降順($(f_n,\ldots,f_1,f_0)$の順番)になっているのに注意。
適切に立式すれば一次連立方程式に帰着するのでガウスの掃き出し法を使えば求まる。計算量は$\mathrm{O}((dn)^3)$程度。
kth_term_of_p_recursive(a, k, d)
十分長い数列$a$および次数$d$、整数$k$が与えられたとき、$a_k$を返すライブラリ。
計算量は数列が$n$項間漸化式で表せるときに$\mathrm{O}(n^2 \sqrt{kd} (\log p + \log kd))$程度。
アルゴリズム初出の日本語記事は削除済みだが、CF 1479E editorialに関連するアルゴリズムの解説がある。
kth_term_of_p_recursive(a, k)
エスパー用の関数。
verification codeの出力(一部加筆)
P-recursiveの例も兼ねて貼る。
Constant Function
a_{k+1} = a_{k}
Identity Function (a_i = i + 1)
(k + 1)a_{k+1} = (k + 2)a_{k}
Factorial
a_{k+1} = (k + 1)a_{k}
Inversion of Factorial
(k + 1)a_{k+1} = a_{k}
Binomial (a_i = binom(i + 3, 3))
(k + 1)a_{k+1} = (k + 4)a_{k}
Montmort Number
a_{k+2} = (k + 1)a_{k+1} + (k + 1)a_{k}
Catalan Number
(k + 2)a_{k+1} = (4k + 2)a_{k}
Motzkin Number
(k + 4)a_{k+2} = (2k + 5)a_{k+1} + (3k + 3)a_{k}
Schroder number
(k + 3)a_{k+2} = (6k + 9)a_{k+1} + (998244352k + 0)a_{k}
注意点
バグりやすい上に数列によっては正しい値を求められない。
- $d$に正しい値を指定しないと、バグる(誤った関係式を導出する)
- $i + k \leq n$について$f_{n}(i) \not \equiv 0$が必要。そうでない場合、バグる(そもそも漸化式から正しい値を求めることが不可能?)
また、P-recursiveで表せない列の第$k$項は当然求められない。
- 例えばBell Numberや$S(n,k)$は二項係数の和で表せるのでP-recursiveのような気がしてくるが(本当?)、実際は違うので高速計算できない(と思う)。
Depends on
多項式/形式的冪級数ライブラリ (fps/formal-power-series.hpp)
fps/sample-point-shift.hpp
matrix/gauss-elimination.hpp
matrix/inverse-matrix.hpp
matrix/linear-equation.hpp
行列ライブラリ (matrix/matrix.hpp)
多項式行列のprefix product (matrix/polynomial-matrix-prefix-prod.hpp)
modulo/binomial.hpp
Verified with
verify/verify-unit-test/p-recursive.test.cpp
verify/verify-yosupo-fps/yosupo-factorial-p-recursive.test.cpp
verify/verify-yuki/yuki-1533.test.cpp
Code
#pragma once
#include "../matrix/linear-equation.hpp"
#include "../matrix/polynomial-matrix-prefix-prod.hpp"
#include "formal-power-series.hpp"
// return polynomial coefficient s.t. sum_{j=k...0} f_j(i) a_{i+j} = 0
// (In more details, read verification code.)
template <typename mint>
vector<FormalPowerSeries<mint>> find_p_recursive(vector<mint>& a, int d) {
using fps = FormalPowerSeries<mint>;
int n = a.size();
int k = (n + 2) / (d + 2) - 1;
if (k <= 0) return {};
int m = (k + 1) * (d + 1);
vector<vector<mint>> M(m - 1, vector<mint>(m));
for (int i = 0; i < m - 1; i++) {
for (int j = 0; j <= k; j++) {
mint base = 1;
for (int l = 0; l <= d; l++) {
M[i][(d + 1) * j + l] = base * a[i + j];
base *= i + j;
}
}
}
auto gauss = LinearEquation<mint>(M, vector<mint>(m - 1, 0));
if (gauss.size() <= 1) return {};
auto c = gauss[1];
while (all_of(end(c) - d - 1, end(c), [](mint x) { return x == mint(0); })) {
c.erase(end(c) - d - 1, end(c));
}
k = c.size() / (d + 1) - 1;
vector<fps> res;
for (int i = 0, j = 0; i < (int)c.size(); i += d + 1, j++) {
fps f{1}, base{j, 1};
fps sm;
for (int l = 0; l <= d; l++) sm += f * c[i + l], f *= base;
res.push_back(sm);
}
reverse(begin(res), end(res));
return res;
}
template <typename mint>
mint kth_term_of_p_recursive(vector<mint>& a, long long k, int d) {
if (k < (int)a.size()) return a[k];
if (all_of(begin(a), end(a), [](mint x) { return x == mint(0); })) return 0;
auto fs = find_p_recursive(a, d);
assert(fs.empty() == false);
int deg = fs.size() - 1;
assert(deg >= 1);
Matrix<FormalPowerSeries<mint>> m(deg), denom(1);
for (int i = 0; i < deg; i++) m[0][i] = -fs[i + 1];
for (int i = 1; i < deg; i++) m[i][i - 1] = fs[0];
denom[0][0] = fs[0];
Matrix<mint> a0(deg);
for (int i = 0; i < deg; i++) a0[i][0] = a[deg - 1 - i];
mint res = (polynomial_matrix_prod(m, k - deg + 1) * a0)[0][0];
res /= polynomial_matrix_prod(denom, k - deg + 1)[0][0];
return res;
}
// K 項列挙
template <typename mint>
vector<mint> kth_term_of_p_recursive_enumerate(vector<mint>& a, long long K,
int d) {
if (K < (int)a.size()) return {begin(a), begin(a) + K};
if (all_of(begin(a), end(a), [](mint x) { return x == mint(0); }))
return vector<mint>(K, 0);
auto fs = find_p_recursive(a, d);
if (fs.empty()) return {};
int k = fs.size() - 1, is = a.size();
a.resize(K);
for (auto& f : fs) f.shrink();
reverse(begin(fs), end(fs));
for (int ipk = is; ipk < K; ipk++) {
int i = ipk - k;
mint s = 0;
for (int j = 0; j < k; j++) {
if (i + j >= 0) s += a[i + j] * fs[j].eval(i);
}
a[ipk] = -s / fs[k].eval(i);
}
return a;
}
template <typename mint>
mint kth_term_of_p_recursive(vector<mint>& a, long long k) {
if (k < (int)a.size()) return a[k];
if (all_of(begin(a), end(a), [](mint x) { return x == mint(0); })) return 0;
int n = a.size() - 1;
vector<mint> b{begin(a), end(a) - 1};
for (int d = 0;; d++) {
#ifdef NyaanLocal
cerr << "d = " << d << endl;
#endif
if ((n + 2) / (d + 2) <= 1) break;
if (kth_term_of_p_recursive(b, n, d) == a.back()) {
#ifdef NyaanLocal
cerr << "Found, d = " << d << endl;
#endif
return kth_term_of_p_recursive(a, k, d);
}
}
cerr << "Failed." << endl;
exit(1);
}
/**
* @brief P-recursiveの高速計算
* @docs docs/fps/find-p-recursive.md
*/
#line 2 "fps/find-p-recursive.hpp"
#line 2 "matrix/linear-equation.hpp"
#line 2 "matrix/gauss-elimination.hpp"
#include <utility>
#include <vector>
using namespace std;
// {rank, det(非正方行列の場合は未定義)} を返す
// 型が double や Rational でも動くはず?(未検証)
//
// pivot 候補 : [0, pivot_end)
template <typename T>
std::pair<int, T> GaussElimination(vector<vector<T>> &a, int pivot_end = -1,
bool diagonalize = false) {
if (a.empty()) return {0, 1};
int H = a.size(), W = a[0].size(), rank = 0;
if (pivot_end == -1) pivot_end = W;
T det = 1;
for (int j = 0; j < pivot_end; j++) {
int idx = -1;
for (int i = rank; i < H; i++) {
if (a[i][j] != T(0)) {
idx = i;
break;
}
}
if (idx == -1) {
det = 0;
continue;
}
if (rank != idx) det = -det, swap(a[rank], a[idx]);
det *= a[rank][j];
if (diagonalize && a[rank][j] != T(1)) {
T coeff = T(1) / a[rank][j];
for (int k = j; k < W; k++) a[rank][k] *= coeff;
}
int is = diagonalize ? 0 : rank + 1;
for (int i = is; i < H; i++) {
if (i == rank) continue;
if (a[i][j] != T(0)) {
T coeff = a[i][j] / a[rank][j];
for (int k = j; k < W; k++) a[i][k] -= a[rank][k] * coeff;
}
}
rank++;
}
return make_pair(rank, det);
}
#line 4 "matrix/linear-equation.hpp"
// 解が存在する場合は, 解が v + C_1 w_1 + ... + C_k w_k と表せるとして
// (v, w_1, ..., w_k) を返す
// 解が存在しない場合は空のベクトルを返す
//
// double や Rational でも動くはず?(未検証)
template <typename T>
vector<vector<T>> LinearEquation(vector<vector<T>> a, vector<T> b) {
int H = a.size(), W = a[0].size();
for (int i = 0; i < H; i++) a[i].push_back(b[i]);
auto p = GaussElimination(a, W, true);
int rank = p.first;
for (int i = rank; i < H; ++i) {
if (a[i][W] != 0) return vector<vector<T>>{};
}
vector<vector<T>> res(1, vector<T>(W));
vector<int> pivot(W, -1);
for (int i = 0, j = 0; i < rank; ++i) {
while (a[i][j] == 0) ++j;
res[0][j] = a[i][W], pivot[j] = i;
}
for (int j = 0; j < W; ++j) {
if (pivot[j] == -1) {
vector<T> x(W);
x[j] = 1;
for (int k = 0; k < j; ++k) {
if (pivot[k] != -1) x[k] = -a[pivot[k]][j];
}
res.push_back(x);
}
}
return res;
}
#line 2 "matrix/polynomial-matrix-prefix-prod.hpp"
#line 2 "fps/formal-power-series.hpp"
template <typename mint>
struct FormalPowerSeries : vector<mint> {
using vector<mint>::vector;
using FPS = FormalPowerSeries;
FPS &operator+=(const FPS &r) {
if (r.size() > this->size()) this->resize(r.size());
for (int i = 0; i < (int)r.size(); i++) (*this)[i] += r[i];
return *this;
}
FPS &operator+=(const mint &r) {
if (this->empty()) this->resize(1);
(*this)[0] += r;
return *this;
}
FPS &operator-=(const FPS &r) {
if (r.size() > this->size()) this->resize(r.size());
for (int i = 0; i < (int)r.size(); i++) (*this)[i] -= r[i];
return *this;
}
FPS &operator-=(const mint &r) {
if (this->empty()) this->resize(1);
(*this)[0] -= r;
return *this;
}
FPS &operator*=(const mint &v) {
for (int k = 0; k < (int)this->size(); k++) (*this)[k] *= v;
return *this;
}
FPS &operator/=(const FPS &r) {
if (this->size() < r.size()) {
this->clear();
return *this;
}
int n = this->size() - r.size() + 1;
if ((int)r.size() <= 64) {
FPS f(*this), g(r);
g.shrink();
mint coeff = g.back().inverse();
for (auto &x : g) x *= coeff;
int deg = (int)f.size() - (int)g.size() + 1;
int gs = g.size();
FPS quo(deg);
for (int i = deg - 1; i >= 0; i--) {
quo[i] = f[i + gs - 1];
for (int j = 0; j < gs; j++) f[i + j] -= quo[i] * g[j];
}
*this = quo * coeff;
this->resize(n, mint(0));
return *this;
}
return *this = ((*this).rev().pre(n) * r.rev().inv(n)).pre(n).rev();
}
FPS &operator%=(const FPS &r) {
*this -= *this / r * r;
shrink();
return *this;
}
FPS operator+(const FPS &r) const { return FPS(*this) += r; }
FPS operator+(const mint &v) const { return FPS(*this) += v; }
FPS operator-(const FPS &r) const { return FPS(*this) -= r; }
FPS operator-(const mint &v) const { return FPS(*this) -= v; }
FPS operator*(const FPS &r) const { return FPS(*this) *= r; }
FPS operator*(const mint &v) const { return FPS(*this) *= v; }
FPS operator/(const FPS &r) const { return FPS(*this) /= r; }
FPS operator%(const FPS &r) const { return FPS(*this) %= r; }
FPS operator-() const {
FPS ret(this->size());
for (int i = 0; i < (int)this->size(); i++) ret[i] = -(*this)[i];
return ret;
}
void shrink() {
while (this->size() && this->back() == mint(0)) this->pop_back();
}
FPS rev() const {
FPS ret(*this);
reverse(begin(ret), end(ret));
return ret;
}
FPS dot(FPS r) const {
FPS ret(min(this->size(), r.size()));
for (int i = 0; i < (int)ret.size(); i++) ret[i] = (*this)[i] * r[i];
return ret;
}
// 前 sz 項を取ってくる。sz に足りない項は 0 埋めする
FPS pre(int sz) const {
FPS ret(begin(*this), begin(*this) + min((int)this->size(), sz));
if ((int)ret.size() < sz) ret.resize(sz);
return ret;
}
FPS operator>>(int sz) const {
if ((int)this->size() <= sz) return {};
FPS ret(*this);
ret.erase(ret.begin(), ret.begin() + sz);
return ret;
}
FPS operator<<(int sz) const {
FPS ret(*this);
ret.insert(ret.begin(), sz, mint(0));
return ret;
}
FPS diff() const {
const int n = (int)this->size();
FPS ret(max(0, n - 1));
mint one(1), coeff(1);
for (int i = 1; i < n; i++) {
ret[i - 1] = (*this)[i] * coeff;
coeff += one;
}
return ret;
}
FPS integral() const {
const int n = (int)this->size();
FPS ret(n + 1);
ret[0] = mint(0);
if (n > 0) ret[1] = mint(1);
auto mod = mint::get_mod();
for (int i = 2; i <= n; i++) ret[i] = (-ret[mod % i]) * (mod / i);
for (int i = 0; i < n; i++) ret[i + 1] *= (*this)[i];
return ret;
}
mint eval(mint x) const {
mint r = 0, w = 1;
for (auto &v : *this) r += w * v, w *= x;
return r;
}
FPS log(int deg = -1) const {
assert(!(*this).empty() && (*this)[0] == mint(1));
if (deg == -1) deg = (int)this->size();
return (this->diff() * this->inv(deg)).pre(deg - 1).integral();
}
FPS pow(int64_t k, int deg = -1) const {
const int n = (int)this->size();
if (deg == -1) deg = n;
if (k == 0) {
FPS ret(deg);
if (deg) ret[0] = 1;
return ret;
}
for (int i = 0; i < n; i++) {
if ((*this)[i] != mint(0)) {
mint rev = mint(1) / (*this)[i];
FPS ret = (((*this * rev) >> i).log(deg) * k).exp(deg);
ret *= (*this)[i].pow(k);
ret = (ret << (i * k)).pre(deg);
if ((int)ret.size() < deg) ret.resize(deg, mint(0));
return ret;
}
if (__int128_t(i + 1) * k >= deg) return FPS(deg, mint(0));
}
return FPS(deg, mint(0));
}
static void *ntt_ptr;
static void set_fft();
FPS &operator*=(const FPS &r);
void ntt();
void intt();
void ntt_doubling();
static int ntt_pr();
FPS inv(int deg = -1) const;
FPS exp(int deg = -1) const;
};
template <typename mint>
void *FormalPowerSeries<mint>::ntt_ptr = nullptr;
/**
* @brief 多項式/形式的冪級数ライブラリ
* @docs docs/fps/formal-power-series.md
*/
#line 2 "fps/sample-point-shift.hpp"
#line 2 "modulo/binomial.hpp"
#include <cassert>
#include <type_traits>
#line 6 "modulo/binomial.hpp"
using namespace std;
// コンストラクタの MAX に 「C(n, r) や fac(n) でクエリを投げる最大の n 」
// を入れると倍速くらいになる
// mod を超えて前計算して 0 割りを踏むバグは対策済み
template <typename T>
struct Binomial {
vector<T> f, g, h;
Binomial(int MAX = 0) {
assert(T::get_mod() != 0 && "Binomial<mint>()");
f.resize(1, T{1});
g.resize(1, T{1});
h.resize(1, T{1});
if (MAX > 0) extend(MAX + 1);
}
void extend(int m = -1) {
int n = f.size();
if (m == -1) m = n * 2;
m = min<int>(m, T::get_mod());
if (n >= m) return;
f.resize(m);
g.resize(m);
h.resize(m);
for (int i = n; i < m; i++) f[i] = f[i - 1] * T(i);
g[m - 1] = f[m - 1].inverse();
h[m - 1] = g[m - 1] * f[m - 2];
for (int i = m - 2; i >= n; i--) {
g[i] = g[i + 1] * T(i + 1);
h[i] = g[i] * f[i - 1];
}
}
T fac(int i) {
if (i < 0) return T(0);
while (i >= (int)f.size()) extend();
return f[i];
}
T finv(int i) {
if (i < 0) return T(0);
while (i >= (int)g.size()) extend();
return g[i];
}
T inv(int i) {
if (i < 0) return -inv(-i);
while (i >= (int)h.size()) extend();
return h[i];
}
T C(int n, int r) {
if (n < 0 || n < r || r < 0) return T(0);
return fac(n) * finv(n - r) * finv(r);
}
inline T operator()(int n, int r) { return C(n, r); }
template <typename I>
T multinomial(const vector<I>& r) {
static_assert(is_integral<I>::value == true);
int n = 0;
for (auto& x : r) {
if (x < 0) return T(0);
n += x;
}
T res = fac(n);
for (auto& x : r) res *= finv(x);
return res;
}
template <typename I>
T operator()(const vector<I>& r) {
return multinomial(r);
}
T C_naive(int n, int r) {
if (n < 0 || n < r || r < 0) return T(0);
T ret = T(1);
r = min(r, n - r);
for (int i = 1; i <= r; ++i) ret *= inv(i) * (n--);
return ret;
}
T P(int n, int r) {
if (n < 0 || n < r || r < 0) return T(0);
return fac(n) * finv(n - r);
}
// [x^r] 1 / (1-x)^n
T H(int n, int r) {
if (n < 0 || r < 0) return T(0);
return r == 0 ? 1 : C(n + r - 1, r);
}
};
#line 5 "fps/sample-point-shift.hpp"
// input : y(0), y(1), ..., y(n - 1)
// output : y(t), y(t + 1), ..., y(t + m - 1)
// (if m is default, m = n)
template <typename mint>
FormalPowerSeries<mint> SamplePointShift(FormalPowerSeries<mint>& y, mint t,
int m = -1) {
if (m == -1) m = y.size();
long long T = t.get();
int k = (int)y.size() - 1;
T %= mint::get_mod();
if (T <= k) {
FormalPowerSeries<mint> ret(m);
int ptr = 0;
for (int64_t i = T; i <= k and ptr < m; i++) {
ret[ptr++] = y[i];
}
if (k + 1 < T + m) {
auto suf = SamplePointShift<mint>(y, k + 1, m - ptr);
for (int i = k + 1; i < T + m; i++) {
ret[ptr++] = suf[i - (k + 1)];
}
}
return ret;
}
if (T + m > mint::get_mod()) {
auto pref = SamplePointShift<mint>(y, T, mint::get_mod() - T);
auto suf = SamplePointShift<mint>(y, 0, m - pref.size());
copy(begin(suf), end(suf), back_inserter(pref));
return pref;
}
FormalPowerSeries<mint> finv(k + 1, 1), d(k + 1);
for (int i = 2; i <= k; i++) finv[k] *= i;
finv[k] = mint(1) / finv[k];
for (int i = k; i >= 1; i--) finv[i - 1] = finv[i] * i;
for (int i = 0; i <= k; i++) {
d[i] = finv[i] * finv[k - i] * y[i];
if ((k - i) & 1) d[i] = -d[i];
}
FormalPowerSeries<mint> h(m + k);
for (int i = 0; i < m + k; i++) {
h[i] = mint(1) / (T - k + i);
}
auto dh = d * h;
FormalPowerSeries<mint> ret(m);
mint cur = T;
for (int i = 1; i <= k; i++) cur *= T - i;
for (int i = 0; i < m; i++) {
ret[i] = cur * dh[k + i];
cur *= T + i + 1;
cur *= h[i];
}
return ret;
}
#line 2 "matrix/matrix.hpp"
#line 2 "matrix/inverse-matrix.hpp"
#line 4 "matrix/inverse-matrix.hpp"
template <typename mint>
vector<vector<mint>> inverse_matrix(const vector<vector<mint>>& a) {
int N = a.size();
assert(N > 0);
assert(N == (int)a[0].size());
vector<vector<mint>> m(N, vector<mint>(2 * N));
for (int i = 0; i < N; i++) {
copy(begin(a[i]), end(a[i]), begin(m[i]));
m[i][N + i] = 1;
}
auto [rank, det] = GaussElimination(m, N, true);
if (rank != N) return {};
vector<vector<mint>> b(N);
for (int i = 0; i < N; i++) {
copy(begin(m[i]) + N, end(m[i]), back_inserter(b[i]));
}
return b;
}
#line 4 "matrix/matrix.hpp"
template <class T>
struct Matrix {
vector<vector<T> > A;
Matrix() = default;
Matrix(int n, int m) : A(n, vector<T>(m, T())) {}
Matrix(int n) : A(n, vector<T>(n, T())){};
int H() const { return A.size(); }
int W() const { return A[0].size(); }
int size() const { return A.size(); }
inline const vector<T> &operator[](int k) const { return A[k]; }
inline vector<T> &operator[](int k) { return A[k]; }
static Matrix I(int n) {
Matrix mat(n);
for (int i = 0; i < n; i++) mat[i][i] = 1;
return (mat);
}
Matrix &operator+=(const Matrix &B) {
int n = H(), m = W();
assert(n == B.H() && m == B.W());
for (int i = 0; i < n; i++)
for (int j = 0; j < m; j++) (*this)[i][j] += B[i][j];
return (*this);
}
Matrix &operator-=(const Matrix &B) {
int n = H(), m = W();
assert(n == B.H() && m == B.W());
for (int i = 0; i < n; i++)
for (int j = 0; j < m; j++) (*this)[i][j] -= B[i][j];
return (*this);
}
Matrix &operator*=(const Matrix &B) {
int n = H(), m = B.W(), p = W();
assert(p == B.H());
vector<vector<T> > C(n, vector<T>(m, T{}));
for (int i = 0; i < n; i++)
for (int k = 0; k < p; k++)
for (int j = 0; j < m; j++) C[i][j] += (*this)[i][k] * B[k][j];
A.swap(C);
return (*this);
}
Matrix &operator^=(long long k) {
Matrix B = Matrix::I(H());
while (k > 0) {
if (k & 1) B *= *this;
*this *= *this;
k >>= 1LL;
}
A.swap(B.A);
return (*this);
}
Matrix operator+(const Matrix &B) const { return (Matrix(*this) += B); }
Matrix operator-(const Matrix &B) const { return (Matrix(*this) -= B); }
Matrix operator*(const Matrix &B) const { return (Matrix(*this) *= B); }
Matrix operator^(const long long k) const { return (Matrix(*this) ^= k); }
bool operator==(const Matrix &B) const {
assert(H() == B.H() && W() == B.W());
for (int i = 0; i < H(); i++)
for (int j = 0; j < W(); j++)
if (A[i][j] != B[i][j]) return false;
return true;
}
bool operator!=(const Matrix &B) const {
assert(H() == B.H() && W() == B.W());
for (int i = 0; i < H(); i++)
for (int j = 0; j < W(); j++)
if (A[i][j] != B[i][j]) return true;
return false;
}
Matrix inverse() const {
assert(H() == W());
Matrix B(H());
B.A = inverse_matrix(A);
return B;
}
friend ostream &operator<<(ostream &os, const Matrix &p) {
int n = p.H(), m = p.W();
for (int i = 0; i < n; i++) {
os << (i ? " " : "") << "[";
for (int j = 0; j < m; j++) {
os << p[i][j] << (j + 1 == m ? "]\n" : ",");
}
}
return (os);
}
T determinant() const {
Matrix B(*this);
assert(H() == W());
T ret = 1;
for (int i = 0; i < H(); i++) {
int idx = -1;
for (int j = i; j < W(); j++) {
if (B[j][i] != 0) {
idx = j;
break;
}
}
if (idx == -1) return 0;
if (i != idx) {
ret *= T(-1);
swap(B[i], B[idx]);
}
ret *= B[i][i];
T inv = T(1) / B[i][i];
for (int j = 0; j < W(); j++) {
B[i][j] *= inv;
}
for (int j = i + 1; j < H(); j++) {
T a = B[j][i];
if (a == 0) continue;
for (int k = i; k < W(); k++) {
B[j][k] -= B[i][k] * a;
}
}
}
return ret;
}
};
/**
* @brief 行列ライブラリ
*/
#line 6 "matrix/polynomial-matrix-prefix-prod.hpp"
// return m(k-1) * m(k-2) * ... * m(1) * m(0)
template <typename mint>
Matrix<mint> polynomial_matrix_prod(Matrix<FormalPowerSeries<mint>> &m,
long long k) {
using Mat = Matrix<mint>;
using fps = FormalPowerSeries<mint>;
auto shift = [](vector<Mat> &G, mint x) -> vector<Mat> {
int d = G.size(), n = G[0].size();
vector<Mat> H(d, Mat(n));
for (int i = 0; i < n; i++) {
for (int j = 0; j < n; j++) {
fps g(d);
for (int l = 0; l < d; l++) g[l] = G[l][i][j];
fps h = SamplePointShift(g, x);
for (int l = 0; l < d; l++) H[l][i][j] = h[l];
}
}
return H;
};
int n = m.size();
int deg = 1;
for (auto &_ : m.A) {
for (auto &x : _) deg = max<int>(deg, (int)x.size() - 1);
}
while (deg & (deg - 1)) deg++;
vector<Mat> G(deg + 1);
long long v = 1;
while (deg * v * v < k) v *= 2;
mint iv = mint(v).inverse();
for (int i = 0; i < (int)G.size(); i++) {
mint x = mint(v) * i;
Mat mt(n);
for (int j = 0; j < n; j++)
for (int l = 0; l < n; l++) mt[j][l] = m[j][l].eval(x);
G[i] = mt;
}
for (long long w = 1; w != v; w <<= 1) {
mint W = w;
auto G1 = shift(G, W * iv);
auto G2 = shift(G, (W * deg * v + v) * iv);
auto G3 = shift(G, (W * deg * v + v + W) * iv);
for (int i = 0; i <= w * deg; i++)
G[i] = G1[i] * G[i], G2[i] = G3[i] * G2[i];
copy(begin(G2), end(G2) - 1, back_inserter(G));
}
Mat res = Mat::I(n);
long long i = 0;
while (i + v <= k) res = G[i / v] * res, i += v;
while (i < k) {
Mat mt(n);
for (int j = 0; j < n; j++)
for (int l = 0; l < n; l++) mt[j][l] = m[j][l].eval(i);
res = mt * res;
i++;
}
return res;
}
/**
* @brief 多項式行列のprefix product
*/
#line 6 "fps/find-p-recursive.hpp"
// return polynomial coefficient s.t. sum_{j=k...0} f_j(i) a_{i+j} = 0
// (In more details, read verification code.)
template <typename mint>
vector<FormalPowerSeries<mint>> find_p_recursive(vector<mint>& a, int d) {
using fps = FormalPowerSeries<mint>;
int n = a.size();
int k = (n + 2) / (d + 2) - 1;
if (k <= 0) return {};
int m = (k + 1) * (d + 1);
vector<vector<mint>> M(m - 1, vector<mint>(m));
for (int i = 0; i < m - 1; i++) {
for (int j = 0; j <= k; j++) {
mint base = 1;
for (int l = 0; l <= d; l++) {
M[i][(d + 1) * j + l] = base * a[i + j];
base *= i + j;
}
}
}
auto gauss = LinearEquation<mint>(M, vector<mint>(m - 1, 0));
if (gauss.size() <= 1) return {};
auto c = gauss[1];
while (all_of(end(c) - d - 1, end(c), [](mint x) { return x == mint(0); })) {
c.erase(end(c) - d - 1, end(c));
}
k = c.size() / (d + 1) - 1;
vector<fps> res;
for (int i = 0, j = 0; i < (int)c.size(); i += d + 1, j++) {
fps f{1}, base{j, 1};
fps sm;
for (int l = 0; l <= d; l++) sm += f * c[i + l], f *= base;
res.push_back(sm);
}
reverse(begin(res), end(res));
return res;
}
template <typename mint>
mint kth_term_of_p_recursive(vector<mint>& a, long long k, int d) {
if (k < (int)a.size()) return a[k];
if (all_of(begin(a), end(a), [](mint x) { return x == mint(0); })) return 0;
auto fs = find_p_recursive(a, d);
assert(fs.empty() == false);
int deg = fs.size() - 1;
assert(deg >= 1);
Matrix<FormalPowerSeries<mint>> m(deg), denom(1);
for (int i = 0; i < deg; i++) m[0][i] = -fs[i + 1];
for (int i = 1; i < deg; i++) m[i][i - 1] = fs[0];
denom[0][0] = fs[0];
Matrix<mint> a0(deg);
for (int i = 0; i < deg; i++) a0[i][0] = a[deg - 1 - i];
mint res = (polynomial_matrix_prod(m, k - deg + 1) * a0)[0][0];
res /= polynomial_matrix_prod(denom, k - deg + 1)[0][0];
return res;
}
// K 項列挙
template <typename mint>
vector<mint> kth_term_of_p_recursive_enumerate(vector<mint>& a, long long K,
int d) {
if (K < (int)a.size()) return {begin(a), begin(a) + K};
if (all_of(begin(a), end(a), [](mint x) { return x == mint(0); }))
return vector<mint>(K, 0);
auto fs = find_p_recursive(a, d);
if (fs.empty()) return {};
int k = fs.size() - 1, is = a.size();
a.resize(K);
for (auto& f : fs) f.shrink();
reverse(begin(fs), end(fs));
for (int ipk = is; ipk < K; ipk++) {
int i = ipk - k;
mint s = 0;
for (int j = 0; j < k; j++) {
if (i + j >= 0) s += a[i + j] * fs[j].eval(i);
}
a[ipk] = -s / fs[k].eval(i);
}
return a;
}
template <typename mint>
mint kth_term_of_p_recursive(vector<mint>& a, long long k) {
if (k < (int)a.size()) return a[k];
if (all_of(begin(a), end(a), [](mint x) { return x == mint(0); })) return 0;
int n = a.size() - 1;
vector<mint> b{begin(a), end(a) - 1};
for (int d = 0;; d++) {
#ifdef NyaanLocal
cerr << "d = " << d << endl;
#endif
if ((n + 2) / (d + 2) <= 1) break;
if (kth_term_of_p_recursive(b, n, d) == a.back()) {
#ifdef NyaanLocal
cerr << "Found, d = " << d << endl;
#endif
return kth_term_of_p_recursive(a, k, d);
}
}
cerr << "Failed." << endl;
exit(1);
}
/**
* @brief P-recursiveの高速計算
* @docs docs/fps/find-p-recursive.md
*/