結果
問題 | No.2272 多項式乗算 mod 258280327 |
ユーザー | tayu |
提出日時 | 2023-09-26 20:49:07 |
言語 | Rust (1.77.0 + proconio) |
結果 |
AC
|
実行時間 | 101 ms / 2,000 ms |
コード長 | 64,927 bytes |
コンパイル時間 | 17,808 ms |
コンパイル使用メモリ | 401,476 KB |
実行使用メモリ | 20,280 KB |
最終ジャッジ日時 | 2024-07-19 14:39:00 |
合計ジャッジ時間 | 20,328 ms |
ジャッジサーバーID (参考情報) |
judge1 / judge3 |
(要ログイン)
テストケース
テストケース表示入力 | 結果 | 実行時間 実行使用メモリ |
---|---|---|
testcase_00 | AC | 1 ms
6,816 KB |
testcase_01 | AC | 0 ms
6,812 KB |
testcase_02 | AC | 1 ms
6,940 KB |
testcase_03 | AC | 1 ms
6,940 KB |
testcase_04 | AC | 1 ms
6,944 KB |
testcase_05 | AC | 0 ms
6,944 KB |
testcase_06 | AC | 1 ms
6,940 KB |
testcase_07 | AC | 1 ms
6,940 KB |
testcase_08 | AC | 1 ms
6,940 KB |
testcase_09 | AC | 1 ms
6,944 KB |
testcase_10 | AC | 1 ms
6,944 KB |
testcase_11 | AC | 1 ms
6,944 KB |
testcase_12 | AC | 1 ms
6,940 KB |
testcase_13 | AC | 1 ms
6,940 KB |
testcase_14 | AC | 1 ms
6,944 KB |
testcase_15 | AC | 1 ms
6,940 KB |
testcase_16 | AC | 1 ms
6,940 KB |
testcase_17 | AC | 1 ms
6,940 KB |
testcase_18 | AC | 1 ms
6,940 KB |
testcase_19 | AC | 0 ms
6,940 KB |
testcase_20 | AC | 1 ms
6,940 KB |
testcase_21 | AC | 1 ms
6,944 KB |
testcase_22 | AC | 1 ms
6,940 KB |
testcase_23 | AC | 1 ms
6,948 KB |
testcase_24 | AC | 2 ms
6,944 KB |
testcase_25 | AC | 6 ms
6,940 KB |
testcase_26 | AC | 6 ms
6,940 KB |
testcase_27 | AC | 13 ms
6,940 KB |
testcase_28 | AC | 12 ms
6,940 KB |
testcase_29 | AC | 49 ms
11,532 KB |
testcase_30 | AC | 101 ms
19,764 KB |
testcase_31 | AC | 96 ms
19,512 KB |
testcase_32 | AC | 98 ms
20,280 KB |
コンパイルメッセージ
warning: creating a mutable reference to mutable static is discouraged --> src/main.rs:40:11223 | 40 | ...)->&'static mut FastOutput<'static>{unsafe{&mut OUTPUT}}pub fn get_output_source()->&'static mut FastOutput<'static>{unsafe{STDOUTSOUR... | ^^^^^^^^^^^ mutable reference to mutable static | = note: for more information, see issue #114447 <https://github.com/rust-lang/rust/issues/114447> = note: this will be a hard error in the 2024 edition = note: this mutable reference has lifetime `'static`, but if the static gets accessed (read or written) by any other means, or any other reference is created, then any further use of this mutable reference is Undefined Behavior = note: `#[warn(static_mut_refs)]` on by default help: use `addr_of_mut!` instead to create a raw pointer | 40 | pub mod iolib {pub use crate::__cargo_equip::macros::iolib::*;mod ext{use std::ffi::c_void;extern "C"{pub fn mmap(addr:*mut c_void,length:usize,prot:i32,flags:i32,fd:i32,offset:i64,)->*mut c_void;}pub const PROT_READ:i32=1;pub const MAP_PRIVATE:i32=2;}mod input{use super::ext::{mmap,MAP_PRIVATE,PROT_READ};use super::parse_number::{parse16c,parse8c,parse8lec};use std::fs::File;use std::io::Read;use std::os::unix::io::FromRawFd;use std::ptr::{null_mut,slice_from_raw_parts_mut};pub trait Readable{fn read(src:&mut FastInput)->Self;}impl Readable for char{fn read(src:&mut FastInput)->Self{src.read_char()}}impl Readable for String{fn read(src:&mut FastInput)->Self{src.read_string()}}macro_rules!impl_readable_integer{($({$t:ty,$ut:ty})*)=>{$(impl Readable for$ut{#[inline]fn read(src:&mut FastInput)->$ut{src.read_u64()as$ut}}impl Readable for$t{#[inline]fn read(src:&mut FastInput)->$t{if src.peek()==b'-'{src.next();-(<$ut>::read(src)as$t)}else{<$ut>::read(src)as$t}}})*};}impl_readable_integer!({i8,u8}{i16,u16}{i32,u32}{i64,u64}{i128,u128}{isize,usize});macro_rules!impl_readable_float{($($t:ty)*)=>{$(impl Readable for$t{f
ソースコード
pub use __cargo_equip::prelude::*; use convolution_simd::arbitrary_convolution; use iolib::{putln, putvec_with_spaceln, scan}; const MOD: u64 = 258280327; fn main() { scan!(n: usize, f: [u64; n + 1], m: usize, g: [u64; m + 1]); let (mut f, mut g) = (f, g); for v in vec![&mut f, &mut g] { while let Some(p) = v.pop() { if p != 0 { v.push(p); break; } } } if f.is_empty() || g.is_empty() { putln!(0); putln!(0); return; } let f = f.into_iter().map(|f| (f % MOD) as u32).collect(); let g = g.into_iter().map(|g| (g % MOD) as u32).collect(); let fg = arbitrary_convolution::<MOD>(f, g); putln!(fg.len() - 1); putvec_with_spaceln!(fg); } #[cfg_attr(any(), rustfmt::skip)] #[allow(unused)] mod __cargo_equip { pub(crate) mod crates { pub mod convolution_simd {use crate::__cargo_equip::preludes::convolution_simd::*;mod arbitrary_modulo_convolution{use crate::__cargo_equip::preludes::convolution_simd::*;use super::convolution;use montgomery_modint::{Mod645922817,Mod754974721,Mod880803841,Mod897581057,Mod998244353,Modulo,MontgomeryModint};type Modint<M> =MontgomeryModint<M>;pub fn arbitrary_convolution<const M:u64>(a:Vec<u32>,b:Vec<u32>)->Vec<u32>{let c1=convolution::<Mod880803841>(a.clone(),b.clone());let c2=convolution::<Mod897581057>(a.clone(),b.clone());let c3=convolution::<Mod998244353>(a,b);const P:[u64;3]=[Mod880803841::N as u64,Mod897581057::N as u64,Mod998244353::N as u64];const P1P2:u64=P[0]*P[1]%P[2];let p1p2mod:u64=P[0]*P[1]%M;let p1i=Modint::<Mod897581057>::raw(P[0]as u32).inv().val()as u64;let p2i=Modint::<Mod998244353>::raw(P1P2 as u32).inv().val()as u64;c1.into_iter().zip(c2.into_iter().zip(c3.into_iter())).map(|(c1,(c2,c3))|{let t1=(c2 as u64+P[1]-c1 as u64)*p1i%P[1];let res2=(c1 as u64+t1*P[0])%P[2];let res3=(c1 as u64+t1*P[0])%M;let t2=(c3 as u64+P[2]-res2)*p2i%P[2];((res3+t2*p1p2mod)%M)as u32}).collect()}pub fn convolution_1e97(a:Vec<u32>,b:Vec<u32>)->Vec<u32>{let c1=convolution::<Mod880803841>(a.clone(),b.clone());let c2=convolution::<Mod897581057>(a.clone(),b.clone());let c3=convolution::<Mod998244353>(a,b);const MOD:u64=1000_000_007;const P:[u64;3]=[Mod880803841::N as u64,Mod897581057::N as u64,Mod998244353::N as u64];const P1P2:u64=P[0]*P[1]%P[2];const P1P2MOD:u64=P[0]*P[1]%MOD;let p1i=Modint::<Mod897581057>::raw(P[0]as u32).inv().val()as u64;let p2i=Modint::<Mod998244353>::raw(P1P2 as u32).inv().val()as u64;c1.into_iter().zip(c2.into_iter().zip(c3.into_iter())).map(|(c1,(c2,c3))|{let t1=(c2 as u64+P[1]-c1 as u64)*p1i%P[1];let res2=(c1 as u64+t1*P[0])%P[2];let res3=(c1 as u64+t1*P[0])%MOD;let t2=(c3 as u64+P[2]-res2)*p2i%P[2];((res3+t2*P1P2MOD)%MOD)as u32}).collect()}pub fn convolution_mod_2_64(a:Vec<u64>,b:Vec<u64>)->Vec<u64>{let c1=convolution::<Mod645922817>(a.iter().cloned().map(|a|(a%Mod645922817::N as u64)as u32).collect(),b.iter().cloned().map(|b|(b%Mod645922817::N as u64)as u32).collect(),);let c2=convolution::<Mod754974721>(a.iter().cloned().map(|a|(a%Mod754974721::N as u64)as u32).collect(),b.iter().cloned().map(|b|(b%Mod754974721::N as u64)as u32).collect(),);let c3=convolution::<Mod880803841>(a.iter().cloned().map(|a|(a%Mod880803841::N as u64)as u32).collect(),b.iter().cloned().map(|b|(b%Mod880803841::N as u64)as u32).collect(),);let c4=convolution::<Mod897581057>(a.iter().cloned().map(|a|(a%Mod897581057::N as u64)as u32).collect(),b.iter().cloned().map(|b|(b%Mod897581057::N as u64)as u32).collect(),);let c5=convolution::<Mod998244353>(a.into_iter().map(|a|(a%Mod998244353::N as u64)as u32).collect(),b.into_iter().map(|b|(b%Mod998244353::N as u64)as u32).collect(),);const P:[u64;5]=[Mod645922817::N as u64,Mod754974721::N as u64,Mod880803841::N as u64,Mod897581057::N as u64,Mod998244353::N as u64];const PROD01:u64=(P[0]as u64).wrapping_mul(P[1]);const PROD012:u64=PROD01.wrapping_mul(P[2]);const PROD0123:u64=PROD012.wrapping_mul(P[3]);const P0P1:u64=P[0]*P[1]%P[2];const P0P1P2:u64=P[0]*P[1]%P[3]*P[2]%P[3];const P0P1P2P3:u64=P[0]*P[1]%P[4]*P[2]%P[4]*P[3]%P[4];let pi=vec![Modint::<Mod754974721>::raw(P[0]as u32).inv().val()as u64,Modint::<Mod880803841>::from(P0P1).inv().val()as u64,Modint::<Mod897581057>::from(P0P1P2).inv().val()as u64,Modint::<Mod998244353>::from(P0P1P2P3).inv().val()as u64,];let mut res=vec![];for i in 0..c1.len(){let t0=c1[i]as u64;let mut w=vec![t0;5];let mut prod=vec![P[0];5];for(j,c)in vec![c2[i],c3[i],c4[i],c5[i]].into_iter().enumerate(){let t=((c+P[j+1]as u32-w[j+1]as u32)as u64*pi[j])%P[j+1];for(k,&p)in P.iter().enumerate().skip(j+2){w[k]=(w[k]+(t*prod[k]))%p;prod[k]=(prod[k]*P[j+1])%p;}w[j]=t;}res.push(t0.wrapping_add(w[0].wrapping_mul(Mod645922817::N as u64)).wrapping_add(w[1].wrapping_mul(PROD01)).wrapping_add(w[2].wrapping_mul(PROD012)).wrapping_add(w[3].wrapping_mul(PROD0123)),)}res}}pub mod common{use crate::__cargo_equip::preludes::convolution_simd::*;#[inline]pub fn bit_reverse<T>(deg:usize,a:&mut[T]){let nh=deg>>1;let nh1=nh+1;let mut i=0;for j in(0..nh).step_by(2){if j<i{a.swap(i,j);a.swap(i+nh1,j+nh1);}a.swap(j+nh,i+1);let mut k=nh>>1;i^=k;while k>i{k>>=1;i^=k;}}}}pub mod cooley_tukey{use crate::__cargo_equip::preludes::convolution_simd::*;use crate::__cargo_equip::crates::convolution_simd::fft_cache::FftCache;use montgomery_modint::{Modulo,MontgomeryModint,MontgomeryModintx8};#[cfg(target_arch="x86_64")]use std::arch::x86_64::{_mm256_permute2f128_si256 as permute2f128,_mm256_permute4x64_epi64 as permute4x64,_mm256_setr_epi32,_mm256_storeu_si256 as storeu,_mm256_unpackhi_epi64 as unpackhi64,_mm256_unpacklo_epi64 as unpacklo64,};type Modint<M> =MontgomeryModint<M>;type Modintx8<M> =MontgomeryModintx8<M>;macro_rules!radix4_inner{($c0:expr,$c1:expr,$c2:expr,$c3:expr,$rot:expr,$rot2:expr,$rot3:expr,$imag:expr)=>{{let(c0,c1,c2,c3)=($c0,$c1,$c2,$c3);let c01=c0+c1;let c01n=c0-c1;let c23=c2+c3;let c23nim=(c2-c3)*$imag;(c01+c23,(c01n+c23nim)*$rot,(c01-c23)*$rot2,(c01n-c23nim)*$rot3)}};}#[inline]#[target_feature(enable="avx2")]unsafe fn cooley_tukey_radix_2_kernel<M:Modulo>(deg:usize,width:usize,offset:usize,a:&mut[Modint<M>],rate:&[Modint<M>]){let mut rot=Modint::one();let blocks=deg/width;if offset==1&&blocks>=8{let mut r=[Modint::zero();8];let vindex=_mm256_setr_epi32(0,2,4,6,8,10,12,14);for block in(0..blocks).step_by(8){let top=block*width;for i in 0..8{r[i]=rot;rot*=rate[(!(block+i)).trailing_zeros()as usize];}let rot=Modintx8::load(r.as_ptr());let c0=Modintx8::gather(a[top..].as_ptr(),vindex);let c1=Modintx8::gather(a[top+1..].as_ptr(),vindex);let(c0,c1)=(c0+c1,(c0-c1)*rot);let r0=c0.unpacklo32(c1);let r1=c0.unpackhi32(c1);storeu(a[top..].as_mut_ptr()as _,permute2f128::<0x20>(r0.rawval(),r1.rawval()));storeu(a[top+8..].as_mut_ptr()as _,permute2f128::<0x31>(r0.rawval(),r1.rawval()));}}else{for block in 0..blocks{let top=block*width;for now in top..top+offset{let c=a[now+offset];a[now+offset]=(a[now]-c)*rot;a[now]+=c;}rot*=rate[(!block).trailing_zeros()as usize];}}}#[inline]#[target_feature(enable="avx2")]unsafe fn cooley_tukey_radix_4_kernel<M:Modulo>(deg:usize,width:usize,offset:usize,im:Modint<M>,a:&mut[Modint<M>],rate:&[Modint<M>]){let mut rot=Modint::one();let blocks=deg/width;let imag=Modintx8::splat(im);if offset==1&&blocks>=8{let mut r=[Modint::zero();8];let vindex=_mm256_setr_epi32(0,4,8,12,16,20,24,28);for block in(0..blocks).step_by(8){let top=block*width;for i in 0..8{r[i]=rot;rot*=rate[(!(block+i)).trailing_zeros()as usize];}let rot=Modintx8::load(r.as_ptr());let rot2=rot*rot;let rot3=rot*rot2;let(r0,r1,r2,r3)=radix4_inner!(Modintx8::gather(a[top..].as_ptr(),vindex),Modintx8::gather(a[top+1..].as_ptr(),vindex),Modintx8::gather(a[top+2..].as_ptr(),vindex),Modintx8::gather(a[top+3..].as_ptr(),vindex),rot,rot2,rot3,imag);let r01lo=r0.unpacklo32(r1);let r01hi=r0.unpackhi32(r1);let r23lo=r2.unpacklo32(r3);let r23hi=r2.unpackhi32(r3);let r01lo=permute4x64::<0b11_01_10_00>(r01lo.rawval());let r01hi=permute4x64::<0b11_01_10_00>(r01hi.rawval());let r23lo=permute4x64::<0b11_01_10_00>(r23lo.rawval());let r23hi=permute4x64::<0b11_01_10_00>(r23hi.rawval());storeu(a[top..].as_mut_ptr()as _,unpacklo64(r01lo,r23lo));storeu(a[top+8..].as_mut_ptr()as _,unpacklo64(r01hi,r23hi));storeu(a[top+16..].as_mut_ptr()as _,unpackhi64(r01lo,r23lo));storeu(a[top+24..].as_mut_ptr()as _,unpackhi64(r01hi,r23hi));}}else if offset==2&&blocks>=4{let mut r=[Modint::zero();8];let vindex=_mm256_setr_epi32(0,1,8,9,16,17,24,25);for block in(0..blocks).step_by(4){let top=block*width;for i in 0..4{r[i*2]=rot;r[i*2+1]=rot;rot*=rate[(!(block+i)).trailing_zeros()as usize];}let rot=Modintx8::load(r.as_ptr());let rot2=rot*rot;let rot3=rot*rot2;let(r0,r1,r2,r3)=radix4_inner!(Modintx8::gather(a[top..].as_ptr(),vindex),Modintx8::gather(a[top+2..].as_ptr(),vindex),Modintx8::gather(a[top+4..].as_ptr(),vindex),Modintx8::gather(a[top+6..].as_ptr(),vindex),rot,rot2,rot3,imag);let r01lo=r0.unpacklo64(r1);let r01hi=r0.unpackhi64(r1);let r23lo=r2.unpacklo64(r3);let r23hi=r2.unpackhi64(r3);storeu(a[top..].as_mut_ptr()as _,permute2f128::<0x20>(r01lo.rawval(),r23lo.rawval()));storeu(a[top+8..].as_mut_ptr()as _,permute2f128::<0x20>(r01hi.rawval(),r23hi.rawval()));storeu(a[top+16..].as_mut_ptr()as _,permute2f128::<0x31>(r01lo.rawval(),r23lo.rawval()));storeu(a[top+24..].as_mut_ptr()as _,permute2f128::<0x31>(r01hi.rawval(),r23hi.rawval()));}}else if offset==4&&blocks>=2{let vindex=_mm256_setr_epi32(0,1,2,3,16,17,18,19);for block in(0..blocks).step_by(2){let top=block*width;let next_rot=rot*rate[(!block).trailing_zeros()as usize];{let rot=Modintx8::load([rot,rot,rot,rot,next_rot,next_rot,next_rot,next_rot].as_ptr());let rot2=rot*rot;let rot3=rot*rot2;let(r0,r1,r2,r3)=radix4_inner!(Modintx8::gather(a[top..].as_ptr(),vindex),Modintx8::gather(a[top+4..].as_ptr(),vindex),Modintx8::gather(a[top+8..].as_ptr(),vindex),Modintx8::gather(a[top+12..].as_ptr(),vindex),rot,rot2,rot3,imag);storeu(a[top..].as_mut_ptr()as _,permute2f128::<0x20>(r0.rawval(),r1.rawval()));storeu(a[top+8..].as_mut_ptr()as _,permute2f128::<0x20>(r2.rawval(),r3.rawval()));storeu(a[top+16..].as_mut_ptr()as _,permute2f128::<0x31>(r0.rawval(),r1.rawval()));storeu(a[top+24..].as_mut_ptr()as _,permute2f128::<0x31>(r2.rawval(),r3.rawval()));}rot=next_rot*rate[(!(block+1)).trailing_zeros()as usize];}}else if offset<8{for block in 0..blocks{let top=block*width;let rot2=rot*rot;let rot3=rot*rot2;for now in top..top+offset{let(r0,r1,r2,r3)=radix4_inner!(a[now],a[now+offset],a[now+offset*2],a[now+offset*3],rot,rot2,rot3,im);a[now]=r0;a[now+offset]=r1;a[now+offset*2]=r2;a[now+offset*3]=r3;}rot*=rate[(!block).trailing_zeros()as usize];}}else{for now in(0..offset).step_by(8){let c0=Modintx8::load(a[now..].as_ptr());let c1=Modintx8::load(a[now+offset..].as_ptr());let c2=Modintx8::load(a[now+offset*2..].as_ptr());let c3=Modintx8::load(a[now+offset*3..].as_ptr());let c01=c0+c1;let c01n=c0-c1;let c23=c2+c3;let c23nim=(c2-c3)*imag;(c01+c23).store(a[now..].as_mut_ptr());(c01n+c23nim).store(a[now+offset..].as_mut_ptr());(c01-c23).store(a[now+offset*2..].as_mut_ptr());(c01n-c23nim).store(a[now+offset*3..].as_mut_ptr());}let mut rot=Modintx8::splat(rate[0]);for block in 1..blocks{let top=block*width;let rot2=rot*rot;let rot3=rot*rot2;let mut head=a[top..].as_mut_ptr();for _ in(top..top+offset).step_by(8){let(c0a,c1a,c2a,c3a)=(head,head.add(offset),head.add(offset*2),head.add(offset*3));let(r0,r1,r2,r3)=radix4_inner!(Modintx8::load(c0a),Modintx8::load(c1a),Modintx8::load(c2a),Modintx8::load(c3a),rot,rot2,rot3,imag);r0.store(c0a);r1.store(c1a);r2.store(c2a);r3.store(c3a);head=head.add(8);}rot=rot*Modintx8::splat(rate[(!block).trailing_zeros()as usize]);}}}#[inline]#[target_feature(enable="avx2")]pub unsafe fn cooley_tukey_radix_4_butterfly<M:Modulo>(deg:usize,log:usize,a:&mut[Modint<M>],cache:&FftCache<M>){if log&1!=0{let width=1<<1;let offset=width>>1;cooley_tukey_radix_2_kernel(deg,width,offset,a,&cache.rate2);}for i in(log&1..log).step_by(2){let width=1<<(i+2);let offset=width>>2;cooley_tukey_radix_4_kernel(deg,width,offset,cache.root[2],a,&cache.rate3);}}#[inline]#[target_feature(enable="avx2")]pub unsafe fn cooley_tukey_radix_4_butterfly_inv<M:Modulo>(deg:usize,log:usize,a:&mut[Modint<M>],cache:&FftCache<M>){if log&1!=0{let width=1<<1;let offset=width>>1;cooley_tukey_radix_2_kernel(deg,width,offset,a,&cache.irate2);}for i in(log&1..log).step_by(2){let width=1<<(i+2);let offset=width>>2;cooley_tukey_radix_4_kernel(deg,width,offset,cache.iroot[2],a,&cache.irate3);}}}pub mod fft_cache{use crate::__cargo_equip::preludes::convolution_simd::*;use montgomery_modint::{Modulo,MontgomeryModint};type Modint<M> =MontgomeryModint<M>;pub struct FftCache<M:Modulo>{pub root:[Modint<M>;35],pub iroot:[Modint<M>;35],pub rate2:[Modint<M>;35],pub irate2:[Modint<M>;35],pub rate3:[Modint<M>;35],pub irate3:[Modint<M>;35],}impl<M:Modulo>FftCache<M>{const RANK2:usize=(M::N-1).trailing_zeros()as usize;pub const fn new()->Self{let mut root=[Modint::one();35];let mut iroot=[Modint::one();35];let mut rate2=[Modint::one();35];let mut irate2=[Modint::one();35];let mut rate3=[Modint::one();35];let mut irate3=[Modint::one();35];root[Self::RANK2]=Modint::<M>::nth_root(1<<Self::RANK2);iroot[Self::RANK2]=root[Self::RANK2].inv();let mut i=Self::RANK2;while i>0{i-=1;root[i]=root[i+1].mul_const(root[i+1]);iroot[i]=iroot[i+1].mul_const(iroot[i+1]);}let mut prod=Modint::one();let mut iprod=Modint::one();let mut i=0;while i<Self::RANK2.saturating_sub(1){rate2[i]=root[i+2].mul_const(prod);irate2[i]=iroot[i+2].mul_const(iprod);prod=prod.mul_const(iroot[i+2]);iprod=iprod.mul_const(root[i+2]);i+=1;}let mut prod=Modint::one();let mut iprod=Modint::one();let mut i=0;while i<Self::RANK2.saturating_sub(2){rate3[i]=root[i+3].mul_const(prod);irate3[i]=iroot[i+3].mul_const(iprod);prod=prod.mul_const(iroot[i+3]);iprod=iprod.mul_const(root[i+3]);i+=1;}Self{root,iroot,rate2,irate2,rate3,irate3}}#[inline]pub fn gen_rate(&self,log:usize)->Vec<Modint<M>>{if log==2{return self.rate2.clone().into();}else if log==3{return self.rate3.clone().into();}let mut rate=vec![Modint::one();Self::RANK2.saturating_sub(log-1)];let mut prod=Modint::one();for i in 0..Self::RANK2.saturating_sub(log-1){rate[i]=self.root[i+log]*prod;prod*=self.iroot[i+log];}rate}#[inline]pub fn gen_irate(&self,log:usize)->Vec<Modint<M>>{if log==2{return self.irate2.clone().into();}if log==3{return self.irate3.clone().into();}let mut irate=vec![Modint::one();Self::RANK2.saturating_sub(log-1)];let mut iprod=Modint::one();for i in 0..Self::RANK2.saturating_sub(log-1){irate[i]=self.iroot[i+log]*iprod;iprod*=self.root[i+log];}irate}}}pub mod gentleman_sande{use crate::__cargo_equip::preludes::convolution_simd::*;use super::FftCache;use montgomery_modint::{Modulo,MontgomeryModint,MontgomeryModintx8};#[cfg(target_arch="x86_64")]use std::arch::x86_64::{_mm256_permute2f128_si256 as permute2f128,_mm256_permute4x64_epi64 as permute4x64,_mm256_setr_epi32,_mm256_storeu_si256 as storeu,_mm256_unpackhi_epi64 as unpackhi64,_mm256_unpacklo_epi64 as unpacklo64,};type Modint<M> =MontgomeryModint<M>;type Modintx8<M> =MontgomeryModintx8<M>;macro_rules!radix4_inner{($c0:expr,$c1:expr,$c2:expr,$c3:expr,$imag:expr)=>{{let(c0,c1,c2,c3)=($c0,$c1,$c2,$c3);let c02=c0+c2;let c02n=c0-c2;let c13=c1+c3;let c13nim=(c1-c3)*$imag;(c02+c13,c02-c13,c02n+c13nim,c02n-c13nim)}};}#[inline]#[target_feature(enable="avx2")]unsafe fn gentleman_sande_radix_2_kernel<M:Modulo>(deg:usize,width:usize,offset:usize,a:&mut[Modint<M>],rate:&[Modint<M>]){let mut rot=Modint::one();let blocks=deg/width;if offset==1&&blocks>=8{let mut r=[Modint::zero();8];let vindex=_mm256_setr_epi32(0,2,4,6,8,10,12,14);for block in(0..blocks).step_by(8){let top=block*width;for i in 0..8{r[i]=rot;rot*=rate[(!(block+i)).trailing_zeros()as usize];}let rot=Modintx8::load(r.as_ptr());let c0=Modintx8::gather(a[top..].as_ptr(),vindex);let c1=Modintx8::gather(a[top+1..].as_ptr(),vindex)*rot;let(c0,c1)=(c0+c1,c0-c1);let r0=c0.unpacklo32(c1);let r1=c0.unpackhi32(c1);storeu(a[top..].as_mut_ptr()as _,permute2f128::<0x20>(r0.rawval(),r1.rawval()));storeu(a[top+8..].as_mut_ptr()as _,permute2f128::<0x31>(r0.rawval(),r1.rawval()));}}else{for block in 0..blocks{let top=block*width;for now in top..top+offset{let c=a[now+offset]*rot;a[now+offset]=a[now]-c;a[now]+=c;}rot*=rate[(!block).trailing_zeros()as usize];}}}#[inline]#[target_feature(enable="avx2")]unsafe fn gentleman_sande_radix_4_kernel<M:Modulo>(deg:usize,width:usize,offset:usize,im:Modint<M>,a:&mut[Modint<M>],rate:&[Modint<M>]){let mut rot=Modint::one();let blocks=deg/width;let imag=Modintx8::splat(im);if offset==1&&blocks>=8{let mut r=[Modint::zero();8];let vindex=_mm256_setr_epi32(0,4,8,12,16,20,24,28);for block in(0..blocks).step_by(8){let top=block*width;for i in 0..8{r[i]=rot;rot*=rate[(!(block+i)).trailing_zeros()as usize];}let rot=Modintx8::<M>::load(r.as_ptr());let rot2=rot*rot;let rot3=rot*rot2;let(r0,r1,r2,r3)=radix4_inner!(Modintx8::gather(a[top..].as_ptr(),vindex),Modintx8::gather(a[top+1..].as_ptr(),vindex)*rot,Modintx8::gather(a[top+2..].as_ptr(),vindex)*rot2,Modintx8::gather(a[top+3..].as_ptr(),vindex)*rot3,imag);let r01lo=r0.unpacklo32(r1);let r01hi=r0.unpackhi32(r1);let r23lo=r2.unpacklo32(r3);let r23hi=r2.unpackhi32(r3);let r01lo=permute4x64::<0b11_01_10_00>(r01lo.rawval());let r01hi=permute4x64::<0b11_01_10_00>(r01hi.rawval());let r23lo=permute4x64::<0b11_01_10_00>(r23lo.rawval());let r23hi=permute4x64::<0b11_01_10_00>(r23hi.rawval());storeu(a[top..].as_mut_ptr()as _,unpacklo64(r01lo,r23lo));storeu(a[top+8..].as_mut_ptr()as _,unpacklo64(r01hi,r23hi));storeu(a[top+16..].as_mut_ptr()as _,unpackhi64(r01lo,r23lo));storeu(a[top+24..].as_mut_ptr()as _,unpackhi64(r01hi,r23hi));}}else if offset==2&&blocks>=4{let mut r=[Modint::zero();8];let vindex=_mm256_setr_epi32(0,1,8,9,16,17,24,25);for block in(0..blocks).step_by(4){let top=block*width;for i in 0..4{r[i*2]=rot;r[i*2+1]=rot;rot*=rate[(!(block+i)).trailing_zeros()as usize];}let rot=Modintx8::<M>::load(r.as_ptr());let rot2=rot*rot;let rot3=rot*rot2;let(r0,r1,r2,r3)=radix4_inner!(Modintx8::gather(a[top..].as_ptr(),vindex),Modintx8::gather(a[top+2..].as_ptr(),vindex)*rot,Modintx8::gather(a[top+4..].as_ptr(),vindex)*rot2,Modintx8::gather(a[top+6..].as_ptr(),vindex)*rot3,imag);let r01lo=r0.unpacklo64(r1);let r01hi=r0.unpackhi64(r1);let r23lo=r2.unpacklo64(r3);let r23hi=r2.unpackhi64(r3);storeu(a[top..].as_mut_ptr()as _,permute2f128::<0x20>(r01lo.rawval(),r23lo.rawval()));storeu(a[top+8..].as_mut_ptr()as _,permute2f128::<0x20>(r01hi.rawval(),r23hi.rawval()));storeu(a[top+16..].as_mut_ptr()as _,permute2f128::<0x31>(r01lo.rawval(),r23lo.rawval()));storeu(a[top+24..].as_mut_ptr()as _,permute2f128::<0x31>(r01hi.rawval(),r23hi.rawval()));}}else if offset==4&&blocks>=2{let vindex=_mm256_setr_epi32(0,1,2,3,16,17,18,19);for block in(0..blocks).step_by(2){let top=block*width;let next_rot=rot*rate[(!block).trailing_zeros()as usize];{let rot=Modintx8::load([rot,rot,rot,rot,next_rot,next_rot,next_rot,next_rot].as_ptr());let rot2=rot*rot;let rot3=rot*rot2;let(r0,r1,r2,r3)=radix4_inner!(Modintx8::gather(a[top..].as_ptr(),vindex),Modintx8::gather(a[top+4..].as_ptr(),vindex)*rot,Modintx8::gather(a[top+8..].as_ptr(),vindex)*rot2,Modintx8::gather(a[top+12..].as_ptr(),vindex)*rot3,imag);storeu(a[top..].as_mut_ptr()as _,permute2f128::<0x20>(r0.rawval(),r1.rawval()));storeu(a[top+8..].as_mut_ptr()as _,permute2f128::<0x20>(r2.rawval(),r3.rawval()));storeu(a[top+16..].as_mut_ptr()as _,permute2f128::<0x31>(r0.rawval(),r1.rawval()));storeu(a[top+24..].as_mut_ptr()as _,permute2f128::<0x31>(r2.rawval(),r3.rawval()));}rot=next_rot*rate[(!(block+1)).trailing_zeros()as usize];}}else if offset<8{for block in 0..blocks{let top=block*width;let rot2=rot*rot;let rot3=rot*rot2;for now in top..top+offset{let(r0,r1,r2,r3)=radix4_inner!(a[now],a[now+offset]*rot,a[now+offset*2]*rot2,a[now+offset*3]*rot3,im);a[now]=r0;a[now+offset]=r1;a[now+offset*2]=r2;a[now+offset*3]=r3;}rot*=rate[(!block).trailing_zeros()as usize];}}else{let mut head=a.as_mut_ptr();for _ in(0..offset).step_by(8){let(c0a,c1a,c2a,c3a)=(head,head.add(offset),head.add(offset*2),head.add(offset*3));let(r0,r1,r2,r3)=radix4_inner!(Modintx8::load(c0a),Modintx8::load(c1a),Modintx8::load(c2a),Modintx8::load(c3a),imag);r0.store(c0a);r1.store(c1a);r2.store(c2a);r3.store(c3a);head=head.add(8);}let mut rot=Modintx8::splat(rate[0]);for block in 1..blocks{let top=block*width;let rot2=rot*rot;let rot3=rot*rot2;let mut head=a[top..].as_mut_ptr();for _ in(top..top+offset).step_by(8){let(c0a,c1a,c2a,c3a)=(head,head.add(offset),head.add(offset*2),head.add(offset*3));let(r0,r1,r2,r3)=radix4_inner!(Modintx8::load(c0a),Modintx8::load(c1a)*rot,Modintx8::load(c2a)*rot2,Modintx8::load(c3a)*rot3,imag);r0.store(c0a);r1.store(c1a);r2.store(c2a);r3.store(c3a);head=head.add(8);}rot=rot*Modintx8::splat(*rate.get_unchecked((!block).trailing_zeros()as usize));}}}#[inline]pub unsafe fn gentleman_sande_radix_4_butterfly<M:Modulo>(deg:usize,log:usize,a:&mut[Modint<M>],cache:&FftCache<M>){for i in(0..log).step_by(2){let width=deg>>i;if i+1==log{let offset=width>>1;gentleman_sande_radix_2_kernel(deg,width,offset,a,&cache.rate2);}else{let offset=width>>2;gentleman_sande_radix_4_kernel(deg,width,offset,cache.root[2],a,&cache.rate3);}}}#[inline]pub unsafe fn gentleman_sande_radix_4_butterfly_inv<M:Modulo>(deg:usize,log:usize,a:&mut[MontgomeryModint<M>],cache:&FftCache<M>){for i in(0..log).step_by(2){let width=deg>>i;if i+1==log{let offset=width>>1;gentleman_sande_radix_2_kernel(deg,width,offset,a,&cache.irate2);}else{let offset=width>>2;gentleman_sande_radix_4_kernel(deg,width,offset,cache.iroot[2],a,&cache.irate3);}}}}pub mod traits{use crate::__cargo_equip::preludes::convolution_simd::*;use super::fft_cache::FftCache;use super::{dot,intt,ntt};use montgomery_modint::{Modulo,MontgomeryModint,MontgomeryModintx8};use std::arch::x86_64::_mm256_storeu_si256;use std::mem::transmute;type Modint<M> =MontgomeryModint<M>;type Modintx8<M> =MontgomeryModintx8<M>;pub trait Nttable<M:Modulo>{const CACHE:FftCache<M> =FftCache::new();fn ntt(&mut self);fn intt(&mut self);fn dot(self,rhs:&Self)->Self;fn dot_assign(&mut self,rhs:&Self);}impl<M:Modulo>Nttable<M>for Vec<Modint<M>>{fn ntt(&mut self){let len=self.len();assert_eq!(1<<len.trailing_zeros(),len);ntt(self,&Self::CACHE)}fn intt(&mut self){let len=self.len();assert_eq!(1<<len.trailing_zeros(),len);intt(self,&Self::CACHE)}fn dot(mut self,rhs:&Self)->Self{dot::<M>(&mut self,&rhs);self}fn dot_assign(&mut self,rhs:&Self){dot::<M>(self,rhs)}}impl<M:Modulo>Nttable<M>for Vec<u32>{fn ntt(&mut self){let len=self.len();assert_eq!(1<<len.trailing_zeros(),len);convert_u32_to_modint::<M>(self);unsafe{transmute::<_,&mut Vec<Modint<M>>>(self).ntt()}}fn intt(&mut self){let len=self.len();assert_eq!(1<<len.trailing_zeros(),len);let a=unsafe{transmute::<_,&mut Vec<Modint<M>>>(self)};a.intt();convert_modint_to_u32::<M>(a);}fn dot(self,rhs:&Self)->Self{unsafe{transmute(transmute::<_,Vec<Modint<M>>>(self).dot(transmute(rhs)))}}fn dot_assign(&mut self,rhs:&Self){unsafe{transmute::<_,&mut Vec<Modint<M>>>(self).dot_assign(transmute::<_,&Vec<Modint<M>>>(rhs))};}}#[inline]fn convert_u32_to_modint<M:Modulo>(a:&mut Vec<u32>){if a.len()<8{a.iter_mut().for_each(|a|*a=Modint::<M>::from(*a).val);}else{a.chunks_exact_mut(8).for_each(|v|unsafe{(Modintx8::<M>::load(v.as_ptr()as _)*Modintx8::from_rawval(M::R2X8)).store(v.as_mut_ptr()as _)});}}#[inline]fn convert_modint_to_u32<M:Modulo>(a:&mut Vec<Modint<M>>){if a.len()<8{a.iter_mut().for_each(|a|*a=Modint::from_rawval(a.val()));}else{unsafe{a.chunks_exact_mut(8).for_each(|v|_mm256_storeu_si256(v.as_mut_ptr()as _,Modintx8::<M>::load(v.as_ptr()).val()))}}}}pub use arbitrary_modulo_convolution::*;use cooley_tukey::cooley_tukey_radix_4_butterfly_inv;use fft_cache::FftCache;use gentleman_sande::gentleman_sande_radix_4_butterfly;use montgomery_modint::{Modulo,MontgomeryModint,MontgomeryModintx8};pub use traits::Nttable;type Modint<M> =MontgomeryModint<M>;type Modintx8<M> =MontgomeryModintx8<M>;#[inline]pub fn ntt<M:Modulo>(a:&mut Vec<Modint<M>>,cache:&FftCache<M>){let n=a.len();let log=n.trailing_zeros()as usize;assert_eq!(n,1<<log);unsafe{gentleman_sande_radix_4_butterfly(n,log,a,cache)}}#[inline]pub fn intt<M:Modulo>(a:&mut Vec<Modint<M>>,cache:&FftCache<M>){let n=a.len();let log=n.trailing_zeros()as usize;assert_eq!(n,1<<log);unsafe{cooley_tukey_radix_4_butterfly_inv(n,log,a,cache);let ninv=Modint::raw(n as u32).inv();if n<8{a.iter_mut().for_each(|a|*a*=ninv);}else{let ninv=Modintx8::<M>::splat(ninv);a.chunks_exact_mut(8).for_each(|v|(Modintx8::load(v.as_ptr())*ninv).store(v.as_mut_ptr()));}}}#[inline]pub fn dot<M:Modulo>(a:&mut Vec<Modint<M>>,b:&[Modint<M>]){if a.len()<8{a.iter_mut().zip(b).for_each(|(a,&b)|*a*=b);}else{unsafe{a.chunks_exact_mut(8).zip(b.chunks_exact(8)).for_each(|(v,w)|(Modintx8::load(v.as_ptr())*Modintx8::load(w.as_ptr())).store(v.as_mut_ptr()))}}}pub fn convolution<M:Modulo>(mut a:Vec<u32>,mut b:Vec<u32>)->Vec<u32>{let deg=a.len()+b.len()-1;let n=deg.next_power_of_two();a.resize(n,0);b.resize(n,0);<Vec<u32>as Nttable<M>>::ntt(&mut a);<Vec<u32>as Nttable<M>>::ntt(&mut b);Nttable::<M>::dot_assign(&mut a,&b);<Vec<u32>as Nttable<M>>::intt(&mut a);a.resize(deg,0);a}} pub mod iolib {pub use crate::__cargo_equip::macros::iolib::*;mod ext{use std::ffi::c_void;extern "C"{pub fn mmap(addr:*mut c_void,length:usize,prot:i32,flags:i32,fd:i32,offset:i64,)->*mut c_void;}pub const PROT_READ:i32=1;pub const MAP_PRIVATE:i32=2;}mod input{use super::ext::{mmap,MAP_PRIVATE,PROT_READ};use super::parse_number::{parse16c,parse8c,parse8lec};use std::fs::File;use std::io::Read;use std::os::unix::io::FromRawFd;use std::ptr::{null_mut,slice_from_raw_parts_mut};pub trait Readable{fn read(src:&mut FastInput)->Self;}impl Readable for char{fn read(src:&mut FastInput)->Self{src.read_char()}}impl Readable for String{fn read(src:&mut FastInput)->Self{src.read_string()}}macro_rules!impl_readable_integer{($({$t:ty,$ut:ty})*)=>{$(impl Readable for$ut{#[inline]fn read(src:&mut FastInput)->$ut{src.read_u64()as$ut}}impl Readable for$t{#[inline]fn read(src:&mut FastInput)->$t{if src.peek()==b'-'{src.next();-(<$ut>::read(src)as$t)}else{<$ut>::read(src)as$t}}})*};}impl_readable_integer!({i8,u8}{i16,u16}{i32,u32}{i64,u64}{i128,u128}{isize,usize});macro_rules!impl_readable_float{($($t:ty)*)=>{$(impl Readable for$t{fn read(src:&mut FastInput)->$t{src.read_string().parse().unwrap()}})*};}impl_readable_float!(f32 f64);pub struct FastInput{head:usize,_file:File,buf:Box<[u8]>,}impl FastInput{fn new(file:File,buf:Box<[u8]>)->Self{Self{head:0,_file:file,buf,}}#[inline]fn peek(&self)->u8{self.buf[self.head]}#[inline]fn next(&mut self)->Option<u8>{if self.head==self.buf.len(){None}else{self.head+=1;Some(self.buf[self.head-1])}}#[inline]pub fn read<R:Readable>(&mut self)->R{R::read(self)}#[inline]pub fn read_char(&mut self)->char{while let Some(c)=self.next(){if!c.is_ascii_whitespace(){return c as char;}}unreachable!("Error: buffer is empty. line: {}, file: {}",line!(),file!())}#[inline]pub fn read_u64(&mut self)->u64{let mut tail=self.head;while tail<self.buf.len()&&!self.buf[tail].is_ascii_whitespace(){tail+=1;}let offset=tail-self.head;let res=if offset<8{unsafe{parse8lec(&self.buf[self.head..self.head+8],offset as u8)}}else if offset==8{unsafe{parse8c(&self.buf[self.head..self.head+8])}}else if offset<16{let rem=offset-8;let upper=unsafe{parse8lec(&self.buf[self.head..self.head+8],rem as u8)};let lower=unsafe{parse8c(&self.buf[self.head+rem..self.head+offset])};upper*10000_0000+lower}else if offset==16{unsafe{parse16c(&self.buf[self.head..self.head+16])}}else{let rem=offset-16;let upper=unsafe{parse8lec(&self.buf[self.head..self.head+8],rem as u8)};let lower=unsafe{parse16c(&self.buf[self.head+rem..self.head+offset])};upper*10000_0000_0000_0000+lower};self.head=tail+1;res}#[inline]pub fn read_string(&mut self)->String{let mut tail=self.head;while tail<self.buf.len()&&!self.buf[tail].is_ascii_whitespace(){tail+=1;}let res=String::from_utf8_lossy(&self.buf[self.head..tail]).into_owned();self.head=tail+1;res}}static mut INPUT:*mut FastInput=0 as*mut FastInput;static mut STDINT_SOURCE:fn()->&'static mut FastInput=init;#[inline]fn init()->&'static mut FastInput{let mut stdin=unsafe{File::from_raw_fd(0)};let meta=stdin.metadata().unwrap();let buf=if meta.is_file(){let len=meta.len()as usize+32;let mapping=unsafe{mmap(null_mut()as _,len,PROT_READ,MAP_PRIVATE,0,0)};let res=slice_from_raw_parts_mut(mapping as*mut u8,len);unsafe{Box::from_raw(res)}}else{let mut buf=Vec::with_capacity(1<<18);stdin.read_to_end(&mut buf).unwrap();buf.resize(buf.len()+32,b' ');buf.into_boxed_slice()};let input=FastInput::new(stdin,buf);unsafe{INPUT=Box::leak(Box::new(input));STDINT_SOURCE=get_input;}get_input()}#[inline]fn get_input()->&'static mut FastInput{unsafe{INPUT.as_mut().unwrap_unchecked()}}#[inline]pub fn get_stdin_source()->&'static mut FastInput{unsafe{STDINT_SOURCE()}}}mod output{use std::{cell::RefCell,io::{StdoutLock,Write},ptr::copy_nonoverlapping,};const BUF_SIZE:usize=1<<15;pub trait Writable{fn write(&self,dest:&mut FastOutput);}impl Writable for char{fn write(&self,dest:&mut FastOutput){dest.store_byte(*self as u8)}}impl Writable for String{fn write(&self,dest:&mut FastOutput){dest.store_string(self)}}impl Writable for str{fn write(&self,dest:&mut FastOutput){dest.store_string(self)}}impl Writable for&str{fn write(&self,dest:&mut FastOutput){dest.store_string(self)}}const LUT:&'static[u8;3000]=b"000001002003004005006007008009010011012013014015016017018019020021022023024025026027028029030031032033034035036037038039040041042043044045046047048049050051052053054055056057058059060061062063064065066067068069070071072073074075076077078079080081082083084085086087088089090091092093094095096097098099100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999";impl Writable for u8{fn write(&self,dest:&mut FastOutput){if self==&0{dest.store_byte(b'0');return;}let mut head=4;let mut buf=[0u8;4];let mut now=*self;if now>=100{let rem=(now as usize%100)<<1;head-=2;buf[head..head+2].copy_from_slice(&LUT[rem..rem+2]);now/=100;}if now>=10{head-=2;let n=(now as usize)<<1;buf[head..head+2].copy_from_slice(&LUT[n..n+2]);}else{head-=1;buf[head]=now as u8+b'0';}dest.store(&buf[head..]);}}impl Writable for i8{fn write(&self,dest:&mut FastOutput){if self<&0{'-'.write(dest);}(self.abs()as u8).write(dest);}}macro_rules!impl_writable_integer{($({$t:ty,$ut:ty,$size:expr})*)=>{$(impl Writable for$ut{fn write(&self,dest:&mut FastOutput){if self==&0{dest.store_byte(b'0');return;}let mut buf=[0;$size];let mut head=$size;let mut now=*self as u64;while now>=1000_000{let rem=(now%1000_000)as usize;let upper=(rem/1000)*3;let lower=(rem%1000)*3;head-=6;buf[head..head+3].copy_from_slice(&LUT[upper..upper+3]);buf[head+3..head+6].copy_from_slice(&LUT[lower..lower+3]);now/=1000_000;}if now>=1000{let rem=(now as usize%1000)*3;head-=3;buf[head..head+3].copy_from_slice(&LUT[rem..rem+3]);now/=1000;}if now>=100{head-=3;let n=(now as usize)*3;buf[head..head+3].copy_from_slice(&LUT[n..n+3]);}else if now>=10{head-=2;let n=(now as usize)*3+1;buf[head..head+2].copy_from_slice(&LUT[n..n+2]);}else{head-=1;buf[head]=now as u8+b'0';}dest.store(&buf[head..]);}}impl Writable for$t{fn write(&self,dest:&mut FastOutput){if self<&0{'-'.write(dest);}(self.abs()as$ut).write(dest);}})*};}impl_writable_integer!({i16,u16,8}{i32,u32,12}{i64,u64,20}{i128,u128,40}{isize,usize,20});macro_rules!impl_writable_float{($($t:ty)*)=>{$(impl Writable for$t{fn write(&self,dest:&mut FastOutput){format!("{}",self).write(dest)}})*};}impl_writable_float!(f32 f64);impl<T:Clone+Writable>Writable for Vec<T>{fn write(&self,dest:&mut FastOutput){dest.store_vec(self,'\n')}}pub struct FastOutput<'a>{tail:usize,buf:[u8;BUF_SIZE],dest:*mut StdoutLock<'a>,}impl<'a>FastOutput<'a>{#[inline]pub const fn new()->Self{Self{tail:0,buf:[0;BUF_SIZE],dest:0 as*mut StdoutLock<'a>}}#[inline]fn init(&mut self){let stdout=Box::leak::<'a>(Box::new(std::io::stdout()));self.dest=Box::leak::<'a>(Box::new(stdout.lock()));}#[inline]pub fn flush(&mut self){unsafe{self.dest.as_mut().unwrap_unchecked().write_all(&self.buf[..self.tail]).unwrap_unchecked();}self.tail=0;}#[inline]pub fn store(&mut self,bytes:&[u8]){if bytes.len()<BUF_SIZE-self.tail{unsafe{copy_nonoverlapping(bytes.as_ptr(),self.buf[self.tail..].as_mut_ptr(),bytes.len())}self.tail+=bytes.len();return;}let head=BUF_SIZE-self.tail;unsafe{copy_nonoverlapping(bytes[..head].as_ptr(),self.buf[self.tail..].as_mut_ptr(),head)}self.tail=BUF_SIZE;self.flush();bytes[head..].chunks(BUF_SIZE).for_each(|v|{self.tail=if v.len()<BUF_SIZE{v.len()}else{BUF_SIZE};unsafe{copy_nonoverlapping(v.as_ptr(),self.buf[0..].as_mut_ptr(),v.len())}if v.len()==BUF_SIZE{self.flush()}});}#[inline]pub fn store_byte(&mut self,b:u8){self.buf[self.tail]=b;self.tail+=1;if self.tail==BUF_SIZE{self.flush();}}#[inline]pub fn store_string(&mut self,s:&str){let bytes=s.as_bytes();self.store(bytes);}#[inline]pub fn write<T:Writable>(&mut self,data:T){data.write(self)}#[inline]pub fn store_vec<T:Clone+Writable>(&mut self,v:&Vec<T>,delim:char){if v.is_empty(){return;}v[0].clone().write(self);for v in v.into_iter().skip(1){delim.write(self);v.clone().write(self);}}}struct DummyForFlush(u32);impl Drop for DummyForFlush{fn drop(&mut self){unsafe{OUTPUT.flush();}}}static mut OUTPUT:FastOutput<'static> =FastOutput::new();static mut STDOUTSOURCE:fn()->&'static mut FastOutput<'static> =init;thread_local!{static DUMMY:RefCell<DummyForFlush> =RefCell::new(DummyForFlush(0));}fn init()->&'static mut FastOutput<'static>{DUMMY.with(|d|{unsafe{core::ptr::write_volatile((&mut d.borrow_mut().0)as*const _ as*mut _,32)};});unsafe{STDOUTSOURCE=get_output;}let res=get_output();res.init();res}fn get_output()->&'static mut FastOutput<'static>{unsafe{&mut OUTPUT}}pub fn get_output_source()->&'static mut FastOutput<'static>{unsafe{STDOUTSOURCE()}}}mod parse_number{use core::convert::TryInto;#[allow(dead_code)]pub(in crate::__cargo_equip::crates::iolib)unsafe fn parse4c(buf:&[u8])->u64{let buf:[u8;4]=buf.try_into().unwrap();let mut chunk=u32::from_le_bytes(buf);let lower=(chunk&0x0f000f00)>>8;let upper=(chunk&0x000f000f)*10;chunk=lower+upper;let lower=(chunk&0x00ff0000)>>16;let upper=(chunk&0x000000ff)*100;(lower+upper)as u64}fn parse8c_core(mut chunk:u64)->u64{let lower=(chunk&0x0f000f000f000f00)>>8;let upper=(chunk&0x000f000f000f000f)*10;chunk=lower+upper;let lower=(chunk&0x00ff000000ff0000)>>16;let upper=(chunk&0x000000ff000000ff)*100;chunk=lower+upper;let lower=(chunk&0x0000ffff00000000)>>32;let upper=(chunk&0x000000000000ffff)*10000;lower+upper}pub(in crate::__cargo_equip::crates::iolib)unsafe fn parse8c(buf:&[u8])->u64{let buf:[u8;8]=buf.try_into().unwrap();let chunk=u64::from_le_bytes(buf);parse8c_core(chunk)}pub(in crate::__cargo_equip::crates::iolib)unsafe fn parse8lec(buf:&[u8],digits:u8)->u64{let buf:[u8;8]=buf.try_into().unwrap();let offset=(8-digits)<<3;let chunk=u64::from_le_bytes(buf).wrapping_shl(offset as u32);parse8c_core(chunk)}#[cfg(not(target_arch="x86_64"))]fn parse16c_core(mut chunk:u128)->u64{let lower=(chunk&0x0f000f000f000f000f000f000f000f00)>>8;let upper=(chunk&0x000f000f000f000f000f000f000f000f)*10;chunk=lower+upper;let lower=(chunk&0x00ff000000ff000000ff000000ff0000)>>16;let upper=(chunk&0x000000ff000000ff000000ff000000ff)*100;chunk=lower+upper;let lower=(chunk&0x0000ffff000000000000ffff00000000)>>32;let upper=(chunk&0x000000000000ffff000000000000ffff)*10000;chunk=lower+upper;let lower=(chunk>>64)as u64&0x00000000ffffffff;let upper=(chunk as u64&0x00000000ffffffff)*10000_0000;lower+upper}#[cfg(not(target_arch="x86_64"))]pub(in crate::__cargo_equip::crates::iolib)unsafe fn parse16c(buf:&[u8])->u64{let buf:[u8;16]=buf.try_into().unwrap();let chunk=u128::from_le_bytes(buf);parse16c_core(chunk)}#[cfg(not(target_arch="x86_64"))]pub(in crate::__cargo_equip::crates::iolib)unsafe fn parse16lec(buf:&[u8],digits:u8)->u64{let buf:[u8;16]=buf.try_into().unwrap();let offset=(16-digits as u32)<<3;let chunk=u128::from_le_bytes(buf).wrapping_shl(offset);parse16c_core(chunk)}#[cfg(target_arch="x86_64")]use std::arch::x86_64::{__m128i,_mm_lddqu_si128,_mm_madd_epi16,_mm_maddubs_epi16,_mm_packus_epi32,_mm_storeu_si128,_mm_sub_epi8,};union ConstSimd{arr:[u8;16],xmm:__m128i,}macro_rules!const_simd{($elem:expr)=>{unsafe{ConstSimd{arr:$elem}.xmm}};}static ZEROS:__m128i=const_simd!([b'0';16]);static TEN:__m128i=const_simd!([10,1,10,1,10,1,10,1,10,1,10,1,10,1,10,1]);static HUN:__m128i=const_simd!([100,0,1,0,100,0,1,0,100,0,1,0,100,0,1,0]);static THO:__m128i=const_simd!([16,39,1,0,16,39,1,0,0,0,0,0,0,0,0,0]);#[cfg(target_arch="x86_64")]#[target_feature(enable="sse2")]#[target_feature(enable="sse3")]#[target_feature(enable="sse4.1")]#[target_feature(enable="sse4.2")]pub(in crate::__cargo_equip::crates::iolib)unsafe fn parse16c(buf:&[u8])->u64{let mut chunk=_mm_lddqu_si128(buf.as_ptr()as _);chunk=_mm_madd_epi16(_mm_maddubs_epi16(_mm_sub_epi8(chunk,ZEROS),TEN),HUN);chunk=_mm_madd_epi16(_mm_packus_epi32(chunk,chunk),THO);let mut buf=[0u8;16];_mm_storeu_si128(buf.as_mut_ptr()as _,chunk);let res=u64::from_le_bytes(buf[..8].try_into().unwrap());((res&0xffffffff)*10000_0000)+(res>>32)}}pub use input::{get_stdin_source,FastInput,Readable};pub use output::get_output_source;#[macro_export]macro_rules!__cargo_equip_macro_def_iolib_scan{($(,)?)=>{};($v:ident:[[$($inner:tt)+];$len:expr]$(,$($rest:tt)*)?)=>{let$v=(0..$len).map(|_|{$crate::__cargo_equip::crates::iolib::scan!(w:[$($inner)+]);w}).collect::<Vec<_>>();$($crate::__cargo_equip::crates::iolib::scan!($($rest)*);)?};($v:ident:[$t:tt;$len:expr]$(,$($rest:tt)*)?)=>{let$v=(0..$len).map(|_|{$crate::__cargo_equip::crates::iolib::scan!($v:$t);$v}).collect::<Vec<_>>();$($crate::__cargo_equip::crates::iolib::scan!($($rest)*);)?};(@expandtuple,($t:tt))=>{{$crate::__cargo_equip::crates::iolib::scan!(w:$t);w}};(@expandtuple,($t:tt$(,$rest:tt)*))=>{($crate::__cargo_equip::crates::iolib::scan!(@expandtuple,($t))$(,$crate::__cargo_equip::crates::iolib::scan!(@expandtuple,($rest)))*)};($v:ident:($($rest:tt)*))=>{let$v=$crate::__cargo_equip::crates::iolib::scan!(@expandtuple,($($rest)*));};($v:ident:$t:tt$(,$($rest:tt)*)?)=>{let$v:$t=$crate::__cargo_equip::crates::iolib::get_stdin_source().read::<$t>();$($crate::__cargo_equip::crates::iolib::scan!($($rest)*);)?};($($rest:tt)*)=>{$crate::__cargo_equip::crates::iolib::scan!($($rest)*);};}macro_rules!scan{($($tt:tt)*)=>(crate::__cargo_equip_macro_def_iolib_scan!{$($tt)*})}#[macro_export]macro_rules!__cargo_equip_macro_def_iolib_put{()=>{};($t:expr)=>{$crate::__cargo_equip::crates::iolib::get_output_source().write($t);};($t:expr$(,$rest:tt)*)=>{$crate::__cargo_equip::crates::iolib::put!($t);$crate::__cargo_equip::crates::iolib::put!(' ');$crate::__cargo_equip::crates::iolib::put!($($rest)*);};}macro_rules!put{($($tt:tt)*)=>(crate::__cargo_equip_macro_def_iolib_put!{$($tt)*})}#[macro_export]macro_rules!__cargo_equip_macro_def_iolib_putln{()=>{$crate::__cargo_equip::crates::iolib::put!('\n');};($t:expr)=>{$crate::__cargo_equip::crates::iolib::put!($t);$crate::__cargo_equip::crates::iolib::put!('\n');};($t:expr$(,$rest:tt)*)=>{$crate::__cargo_equip::crates::iolib::put!($t);$crate::__cargo_equip::crates::iolib::put!(' ');$crate::__cargo_equip::crates::iolib::put!($($rest)*);$crate::__cargo_equip::crates::iolib::put!('\n');};}macro_rules!putln{($($tt:tt)*)=>(crate::__cargo_equip_macro_def_iolib_putln!{$($tt)*})}#[macro_export]macro_rules!__cargo_equip_macro_def_iolib_putvec_with_space{($t:expr)=>{$crate::__cargo_equip::crates::iolib::get_output_source().store_vec(&$t,' ');};}macro_rules!putvec_with_space{($($tt:tt)*)=>(crate::__cargo_equip_macro_def_iolib_putvec_with_space!{$($tt)*})}#[macro_export]macro_rules!__cargo_equip_macro_def_iolib_putvec_with_spaceln{($t:expr)=>{$crate::__cargo_equip::crates::iolib::putvec_with_space!($t);$crate::__cargo_equip::crates::iolib::putln!();};}macro_rules!putvec_with_spaceln{($($tt:tt)*)=>(crate::__cargo_equip_macro_def_iolib_putvec_with_spaceln!{$($tt)*})}} pub mod __modint_common_0_1_0 {mod modulo{use std::{arch::x86_64::__m256i,fmt::Debug,marker,mem::transmute};macro_rules!newtons_method{($mod:expr)=>{{let inv=$mod.wrapping_mul(2u32.wrapping_sub($mod.wrapping_mul($mod)));let inv=inv.wrapping_mul(2u32.wrapping_sub($mod.wrapping_mul(inv)));let inv=inv.wrapping_mul(2u32.wrapping_sub($mod.wrapping_mul(inv)));let inv=inv.wrapping_mul(2u32.wrapping_sub($mod.wrapping_mul(inv)));inv.wrapping_mul(2u32.wrapping_sub($mod.wrapping_mul(inv)))}};}pub trait Modulo:Clone+marker::Copy+PartialEq+Eq+Debug{const N:u32;const N2:u32=Self::N.wrapping_mul(2);const N_INV:u32=newtons_method!(Self::N);const N_PRIME:u32=Self::N_INV.wrapping_neg();const R:u32=((1u64<<32)%Self::N as u64)as u32;const R2:u32=((Self::N as u64).wrapping_neg()%Self::N as u64)as u32;const PRIM_ROOT:u32;const NX8:__m256i=unsafe{transmute([Self::N;8])};const N2X8:__m256i=unsafe{transmute([Self::N2;8])};const N_INVX8:__m256i=unsafe{transmute([Self::N_INV;8])};const N_PRIMEX8:__m256i=unsafe{transmute([Self::N_PRIME;8])};const RX8:__m256i=unsafe{transmute([Self::R;8])};const R2X8:__m256i=unsafe{transmute([Self::R2;8])};}macro_rules!impl_modulo{($({$name:ident,$modulo:literal,$prim_root:literal},)*)=>{$(#[derive(Debug,Clone,marker::Copy,PartialEq,Eq)]pub enum$name{}impl Modulo for$name{const N:u32=$modulo;const PRIM_ROOT:u32=$prim_root;})*};}impl_modulo!({Mod167772161,167772161,3},{Mod377487361,377487361,7},{Mod469762049,469762049,3},{Mod595591169,595591169,3},{Mod645922817,645922817,3},{Mod754974721,754974721,11},{Mod880803841,880803841,26},{Mod897581057,897581057,3},{Mod998244353,998244353,3},{Mod1000000007,1000000007,5},{Mod1107296257,1107296257,10},{Mod1224736769,1224736769,3},{Mod1300234241,1300234241,3},{Mod1484783617,1484783617,5},{Mod1711276033,1711276033,29},{Mod1811939329,1811939329,13},{Mod2013265921,2013265921,31},{Mod2088763393,2088763393,5},{Mod2113929217,2113929217,5},{Mod2130706433,2130706433,3},{Mod2281701377,2281701377,3},{Mod2483027969,2483027969,3},{Mod2533359617,2533359617,3},{Mod2634022913,2634022913,3},{Mod2717908993,2717908993,5},{Mod2868903937,2868903937,35},{Mod2885681153,2885681153,3},{Mod3221225473,3221225473,5},{Mod3238002689,3238002689,3},{Mod3489660929,3489660929,3},{Mod3892314113,3892314113,3},{Mod3942645761,3942645761,3},{Mod4076863489,4076863489,7},{Mod4194304001,4194304001,3},);}mod montgomery{use super::modulo::Modulo;use std::arch::x86_64::{__m256i,_mm256_add_epi32 as addi32,_mm256_add_epi64 as addi64,_mm256_and_si256 as and256,_mm256_blend_epi32 as blendi32,_mm256_cmpeq_epi32 as eqi32,_mm256_cmpgt_epi32,_mm256_max_epu32 as maxu32,_mm256_min_epu32,_mm256_mul_epu32 as mulu32,_mm256_mullo_epi32 as mulloi32,_mm256_or_si256,_mm256_setzero_si256 as zero256,_mm256_shuffle_epi32 as shufi32,_mm256_sub_epi32 as subi32,_mm256_xor_si256 as xor256,};const THRESHOLD:u32=1<<30;#[inline]pub const fn mreduce<M:Modulo>(t:u32)->u32{if M::N>THRESHOLD{let(t,f)=(((t.wrapping_mul(M::N_INV)as u64).wrapping_mul(M::N as u64)>>32)as u32).overflowing_neg();t.wrapping_add(M::N&(f as u32).wrapping_neg())}else{((t as u64+t.wrapping_mul(M::N_PRIME)as u64*M::N as u64)>>32)as u32}}#[inline]#[target_feature(enable="avx2")]pub unsafe fn mreducex8<M:Modulo>(t:__m256i)->__m256i{if M::N>THRESHOLD{let t_ninv=mulloi32(t,M::N_INVX8);let t_ninv_n_lo=mulu32(t_ninv,M::NX8);let t_ninv_n_hi=mulu32(shufi32(t_ninv,0b10_11_00_01),M::NX8);let mr=blendi32(shufi32(t_ninv_n_lo,0b10_11_00_01),t_ninv_n_hi,0b10101010);let zero=zero256();let mask=eqi32(mr,zero);let mask=and256(M::NX8,xor256(mask,eqi32(mask,mask)));subi32(mask,mr)}else{let t_np=mulloi32(t,M::N_PRIMEX8);let res_lo=addi64(mulu32(t_np,M::NX8),blendi32(t,zero256(),0b10101010));let res_hi=addi64(mulu32(shufi32(t_np,0b10_11_00_01),M::NX8),blendi32(shufi32(t,0b10_11_00_01),zero256(),0b10101010));blendi32(shufi32(res_lo,0b10_11_00_01),res_hi,0b10101010)}}#[inline]pub const fn mmul<M:Modulo>(a:u32,b:u32)->u32{let t=a as u64*b as u64;if M::N>THRESHOLD{let(t,f)=((t>>32)as u32).overflowing_sub((((t as u32).wrapping_mul(M::N_INV)as u64).wrapping_mul(M::N as u64)>>32)as u32);t.wrapping_add(M::N&(f as u32).wrapping_neg())}else{((t+(t as u32).wrapping_mul(M::N_PRIME)as u64*M::N as u64)>>32)as u32}}#[inline]#[target_feature(enable="avx2")]pub unsafe fn mmulx8<M:Modulo>(a:__m256i,b:__m256i)->__m256i{if M::N>THRESHOLD{let t1=mulu32(a,b);let t2=mulu32(shufi32(a,0b10_11_00_01),shufi32(b,0b10_11_00_01));let t_modinv=mulloi32(blendi32(t1,shufi32(t2,0b10_11_00_01),0b10101010),M::N_INVX8);let c=blendi32(shufi32(mulu32(t_modinv,M::NX8),0b10_11_00_01),mulu32(shufi32(t_modinv,0b10_11_00_01),M::NX8),0b10101010);let t=blendi32(shufi32(t1,0b10_11_00_01),t2,0b10101010);let one=eqi32(c,c);let mask=and256(M::NX8,xor256(one,eqi32(_mm256_min_epu32(t,c),c)));addi32(mask,subi32(t,c))}else{let t_lo=mulu32(a,b);let t_hi=mulu32(shufi32(a,0b10_11_00_01),shufi32(b,0b10_11_00_01));let t_np=mulloi32(blendi32(t_lo,shufi32(t_hi,0b10_11_00_01),0b10101010),M::N_PRIMEX8);let n64x4=blendi32(M::NX8,zero256(),0b10101010);let res_lo=addi64(t_lo,mulu32(t_np,n64x4));let res_hi=addi64(t_hi,mulu32(shufi32(t_np,0b10_11_00_01),n64x4));blendi32(shufi32(res_lo,0b10_11_00_01),res_hi,0b10101010)}}#[inline]pub const fn mrestore<M:Modulo>(t:u32)->u32{t-if M::N<=THRESHOLD{M::N&((t>=M::N)as u32).wrapping_neg()}else{0}}#[inline]#[target_feature(enable="avx2")]pub unsafe fn mrestorex8<M:Modulo>(t:__m256i)->__m256i{if M::N>THRESHOLD{t}else{let mask=_mm256_or_si256(_mm256_cmpgt_epi32(t,M::NX8),eqi32(t,M::NX8));subi32(t,and256(mask,M::NX8))}}#[inline]pub const fn madd<M:Modulo>(a:u32,b:u32)->u32{if M::N>THRESHOLD{let(t,fa)=a.overflowing_add(b);let(u,fs)=t.overflowing_sub(M::N);let f=(fa||!fs)as u32;f*u+(1-f)*t}else{let res=a+b;res-(((res>=M::N2)as u32).wrapping_neg()&M::N2)}}#[inline]#[target_feature(enable="avx2")]pub unsafe fn maddx8<M:Modulo>(a:__m256i,b:__m256i)->__m256i{if M::N>THRESHOLD{let diff=subi32(M::NX8,a);let mask=eqi32(diff,maxu32(diff,b));let val=addi32(subi32(a,M::NX8),b);addi32(val,and256(mask,M::NX8))}else{let res=addi32(a,b);let mask=eqi32(res,maxu32(res,M::N2X8));subi32(res,and256(M::N2X8,mask))}}#[inline]pub const fn msub<M:Modulo>(a:u32,b:u32)->u32{let(res,f)=a.overflowing_sub(b);if M::N>THRESHOLD{res.wrapping_add(M::N&(f as u32).wrapping_neg())}else{res.wrapping_add(M::N2&(f as u32).wrapping_neg())}}#[inline]#[target_feature(enable="avx2")]pub unsafe fn msubx8<M:Modulo>(a:__m256i,b:__m256i)->__m256i{if M::N>THRESHOLD{let mask=eqi32(b,maxu32(a,b));let c_neg=subi32(a,b);addi32(c_neg,and256(M::NX8,mask))}else{let mask=_mm256_cmpgt_epi32(b,a);addi32(subi32(a,b),and256(M::N2X8,mask))}}}pub use modulo::*;pub use montgomery::*;} pub mod montgomery_modint {use crate::__cargo_equip::preludes::montgomery_modint::*;mod simd{use crate::__cargo_equip::preludes::montgomery_modint::*;use super::MontgomeryModint;use modint_common::*;use numeric::{One,Zero};use std::arch::x86_64::{__m256i,_mm256_i32gather_epi32,_mm256_loadu_si256,_mm256_set1_epi32,_mm256_setzero_si256,_mm256_storeu_si256,_mm256_unpackhi_epi32,_mm256_unpackhi_epi64,_mm256_unpacklo_epi32,_mm256_unpacklo_epi64,};use std::marker::PhantomData;use std::ops::{Add,Mul,Sub};type Modint<M> =MontgomeryModint<M>;#[derive(Clone,Copy)]pub struct MontgomeryModintx8<M:Modulo>{val:__m256i,_phantom:PhantomData<fn()->M>,}type Modintx8<M> =MontgomeryModintx8<M>;impl<M:Modulo>Modintx8<M>{#[inline]pub fn splat_raw(val:u32)->Self{unsafe{Self{val:mmulx8::<M>(_mm256_set1_epi32(val as i32),M::R2X8),_phantom:PhantomData,}}}#[inline]pub fn splat(val:Modint<M>)->Self{Self::from_rawval(unsafe{_mm256_set1_epi32(val.val as i32)})}#[inline]pub fn from_rawval(val:__m256i)->Self{Self{val,_phantom:PhantomData}}#[inline]pub fn val(&self)->__m256i{unsafe{mrestorex8::<M>(mreducex8::<M>(self.val))}}#[inline]pub fn rawval(&self)->__m256i{self.val}#[inline]pub fn one()->Self{Self{val:M::RX8,_phantom:PhantomData}}#[inline]pub fn zero()->Self{Self{val:unsafe{_mm256_setzero_si256()},_phantom:PhantomData,}}#[inline]pub unsafe fn load(head:*const Modint<M>)->Self{Self{val:_mm256_loadu_si256(head as _),_phantom:PhantomData}}#[inline]pub unsafe fn store(&self,head:*mut Modint<M>){unsafe{_mm256_storeu_si256(head as _,self.val)}}#[inline]pub unsafe fn gather(head:*const Modint<M>,vindex:__m256i)->Self{Self::from_rawval(unsafe{_mm256_i32gather_epi32(head as _,vindex,4)})}#[inline]pub unsafe fn unpacklo64(self,other:Self)->Self{Self::from_rawval(_mm256_unpacklo_epi64(self.val,other.val))}#[inline]pub unsafe fn unpackhi64(self,other:Self)->Self{Self::from_rawval(_mm256_unpackhi_epi64(self.val,other.val))}#[inline]pub unsafe fn unpacklo32(self,other:Self)->Self{Self::from_rawval(_mm256_unpacklo_epi32(self.val,other.val))}#[inline]pub unsafe fn unpackhi32(self,other:Self)->Self{Self::from_rawval(_mm256_unpackhi_epi32(self.val,other.val))}}impl<M:Modulo>One for Modintx8<M>{#[inline]fn one()->Self{Self::one()}}impl<M:Modulo>Zero for Modintx8<M>{#[inline]fn zero()->Self{Self::zero()}}impl<M:Modulo>Add for Modintx8<M>{type Output=Self;#[inline(always)]fn add(self,rhs:Self)->Self::Output{Self{val:unsafe{maddx8::<M>(self.val,rhs.val)},_phantom:PhantomData,}}}impl<M:Modulo>Sub for Modintx8<M>{type Output=Self;#[inline(always)]fn sub(self,rhs:Self)->Self::Output{Self{val:unsafe{msubx8::<M>(self.val,rhs.val)},_phantom:PhantomData,}}}impl<M:Modulo>Mul for Modintx8<M>{type Output=Self;#[inline(always)]fn mul(self,rhs:Self)->Self::Output{Self{val:unsafe{mmulx8::<M>(self.val,rhs.val)},_phantom:PhantomData,}}}impl<M:Modulo>std::fmt::Debug for Modintx8<M>{fn fmt(&self,f:&mut std::fmt::Formatter<'_>)->std::fmt::Result{let mut dest=[0u32;8];unsafe{_mm256_storeu_si256(dest.as_mut_ptr()as*mut _,self.val);write!(f,"{:?}",dest)}}}}pub use modint_common::*;use numeric::{One,Zero};pub use simd::*;use std::convert::From;use std::marker::PhantomData;use std::num::ParseIntError;use std::ops::{Add,AddAssign,Div,DivAssign,Mul,MulAssign,Neg,Sub,SubAssign};use std::str::FromStr;#[doc=" MontgomeryModint"]#[derive(Clone,Copy,Eq)]pub struct MontgomeryModint<M:Modulo>{pub val:u32,_phantom:PhantomData<fn()->M>,}impl<M:Modulo>MontgomeryModint<M>{#[inline(always)]pub const fn new(val:u32)->Self{Self::raw(val.rem_euclid(M::N))}#[inline]pub const fn raw(val:u32)->Self{Self{val:mmul::<M>(val,M::R2),_phantom:PhantomData}}#[inline(always)]pub const fn from_rawval(val:u32)->Self{Self{val,_phantom:PhantomData}}#[inline]pub const fn val(&self)->u32{mrestore::<M>(mreduce::<M>(self.val))}#[inline(always)]pub const fn rawval(&self)->u32{self.val}#[inline(always)]pub const fn one()->Self{Self{val:M::R,_phantom:PhantomData}}#[inline(always)]pub const fn zero()->Self{Self{val:0,_phantom:PhantomData}}pub const fn pow(&self,mut n:u64)->Self{let mut val=self.val;let mut res=M::R;while n!=0{if n&1!=0{res=mmul::<M>(res,val);}val=mmul::<M>(val,val);n>>=1;}Self{val:res,_phantom:PhantomData}}#[inline]pub const fn nth_root(n:u32)->Self{debug_assert!(n==1<<n.trailing_zeros());MontgomeryModint::<M>::new(M::PRIM_ROOT).pow(M::N as u64-1+(M::N as u64-1)/n as u64)}#[inline(always)]pub const fn inv(&self)->Self{self.pow(M::N as u64-2)}#[inline]pub const fn add_const(self,rhs:Self)->Self{MontgomeryModint{val:madd::<M>(self.val,rhs.val),_phantom:PhantomData}}#[inline]pub const fn sub_const(self,rhs:Self)->Self{MontgomeryModint{val:msub::<M>(self.val,rhs.val),_phantom:PhantomData}}#[inline]pub const fn mul_const(self,rhs:Self)->Self{MontgomeryModint{val:mmul::<M>(self.val,rhs.val),_phantom:PhantomData}}#[inline]pub const fn div_const(self,rhs:Self)->Self{MontgomeryModint{val:self.mul_const(rhs.inv()).val,_phantom:PhantomData}}}impl<M:Modulo>One for MontgomeryModint<M>{#[inline]fn one()->Self{Self::one()}}impl<M:Modulo>Zero for MontgomeryModint<M>{#[inline]fn zero()->Self{Self::zero()}}impl<M:Modulo>Add for MontgomeryModint<M>{type Output=Self;#[inline(always)]fn add(self,rhs:Self)->Self::Output{self.add_const(rhs)}}impl<M:Modulo>Sub for MontgomeryModint<M>{type Output=Self;#[inline(always)]fn sub(self,rhs:Self)->Self::Output{self.sub_const(rhs)}}impl<M:Modulo>Mul for MontgomeryModint<M>{type Output=Self;#[inline(always)]fn mul(self,rhs:Self)->Self::Output{self.mul_const(rhs)}}impl<M:Modulo>Div for MontgomeryModint<M>{type Output=Self;#[inline(always)]fn div(self,rhs:Self)->Self::Output{self.div_const(rhs)}}impl<M:Modulo>Neg for MontgomeryModint<M>{type Output=Self;fn neg(self)->Self::Output{if self.val==0{self}else{Self{val:(M::N<<1)-self.val,_phantom:PhantomData}}}}impl<M:Modulo>PartialEq for MontgomeryModint<M>{fn eq(&self,other:&Self)->bool{mrestore::<M>(self.val)==mrestore::<M>(other.val)}fn ne(&self,other:&Self)->bool{!(self==other)}}impl<M:Modulo>AddAssign for MontgomeryModint<M>{fn add_assign(&mut self,rhs:Self){*self=*self+rhs;}}impl<M:Modulo>SubAssign for MontgomeryModint<M>{fn sub_assign(&mut self,rhs:Self){*self=*self-rhs;}}impl<M:Modulo>MulAssign for MontgomeryModint<M>{fn mul_assign(&mut self,rhs:Self){*self=*self*rhs;}}impl<M:Modulo>DivAssign for MontgomeryModint<M>{fn div_assign(&mut self,rhs:Self){*self=*self/rhs;}}impl<M:Modulo>std::fmt::Debug for MontgomeryModint<M>{fn fmt(&self,f:&mut std::fmt::Formatter<'_>)->std::fmt::Result{write!(f,"{}",self.val())}}impl<M:Modulo>std::fmt::Display for MontgomeryModint<M>{fn fmt(&self,f:&mut std::fmt::Formatter<'_>)->std::fmt::Result{write!(f,"{}",self.val())}}impl<M:Modulo>From<u32>for MontgomeryModint<M>{fn from(value:u32)->Self{Self::new(value)}}impl<M:Modulo>From<u64>for MontgomeryModint<M>{fn from(value:u64)->Self{Self::raw(value.rem_euclid(M::N as u64)as u32)}}impl<M:Modulo>From<i32>for MontgomeryModint<M>{fn from(value:i32)->Self{Self::raw(value.rem_euclid(M::N as i32)as u32)}}impl<M:Modulo>From<i64>for MontgomeryModint<M>{fn from(value:i64)->Self{Self::raw(value.rem_euclid(M::N as i64)as u32)}}impl<M:Modulo>FromStr for MontgomeryModint<M>{type Err=ParseIntError;fn from_str(s:&str)->Result<Self,Self::Err>{let neg=s.starts_with("-");let val=if neg{s[1..].bytes().fold(0u64,|s,v|s*10+(v-b'0')as u64)}else{s.bytes().fold(0u64,|s,v|s*10+(v-b'0')as u64)};if!neg&&val<M::N as u64{Ok(Self::raw(val as u32))}else if neg{Ok(Self::from(-(val as i64)))}else{Ok(Self::from(val))}}}} pub mod __numeric_0_1_0 {pub mod float{use super::Numeric;use std::ops::Neg;macro_rules!impl_numeric_trait_for_float{($($t:tt)*)=>{$(impl Numeric for$t{fn max_value()->Self{std::$t::MAX}fn min_value()->Self{std::$t::MIN}})*};}impl_numeric_trait_for_float!(f32 f64);pub trait Float:Numeric+Neg<Output=Self>{fn abs(self)->Self;fn acos(self)->Self;fn asin(self)->Self;fn atan(self)->Self;fn atan2(self,other:Self)->Self;fn cbrt(self)->Self;fn ceil(self)->Self;fn cos(self)->Self;fn floor(self)->Self;fn hypot(self,other:Self)->Self;fn is_infinite(self)->bool;fn is_nan(self)->bool;fn max(self,other:Self)->Self;fn min(self,other:Self)->Self;fn mul_add(self,a:Self,b:Self)->Self;fn powf(self,n:Self)->Self;fn powi(self,n:i32)->Self;fn round(self)->Self;fn signum(self)->Self;fn sin(self)->Self;fn sqrt(self)->Self;fn tan(self)->Self;fn to_radians(self)->Self;fn pi()->Self;}macro_rules!impl_float_trait{($($t:tt)*)=>{$(impl Float for$t{fn abs(self)->Self{self.abs()}fn acos(self)->Self{self.acos()}fn asin(self)->Self{self.asin()}fn atan(self)->Self{self.atan()}fn atan2(self,other:Self)->Self{self.atan2(other)}fn cbrt(self)->Self{self.cbrt()}fn ceil(self)->Self{self.ceil()}fn cos(self)->Self{self.cos()}fn floor(self)->Self{self.floor()}fn hypot(self,other:Self)->Self{self.hypot(other)}fn is_infinite(self)->bool{self.is_infinite()}fn is_nan(self)->bool{self.is_nan()}fn max(self,other:Self)->Self{self.max(other)}fn min(self,other:Self)->Self{self.min(other)}fn mul_add(self,a:Self,b:Self)->Self{self.mul_add(a,b)}fn powf(self,n:Self)->Self{self.powf(n)}fn powi(self,n:i32)->Self{self.powi(n)}fn round(self)->Self{self.round()}fn signum(self)->Self{self.signum()}fn sin(self)->Self{self.sin()}fn sqrt(self)->Self{self.sqrt()}fn tan(self)->Self{self.tan()}fn to_radians(self)->Self{self.to_radians()}fn pi()->Self{std::$t::consts::PI}})*};}impl_float_trait!(f32 f64);}pub mod integer{use super::Numeric;use std::ops::{BitAnd,BitAndAssign,BitOr,BitOrAssign,BitXor,BitXorAssign,Rem,RemAssign,Shl,ShlAssign,Shr,ShrAssign};macro_rules!impl_numeric_trait_for_integer{($($t:tt)*)=>{$(impl Numeric for$t{fn max_value()->Self{std::$t::MAX}fn min_value()->Self{std::$t::MIN}})*};}impl_numeric_trait_for_integer!(i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize);pub trait Integer:Numeric+Rem<Self,Output=Self>+RemAssign+Shl<i32,Output=Self>+Shl<i64,Output=Self>+Shl<u32,Output=Self>+Shl<u64,Output=Self>+Shl<usize,Output=Self>+Shr<i32,Output=Self>+Shr<i64,Output=Self>+Shr<u32,Output=Self>+Shr<u64,Output=Self>+Shr<usize,Output=Self>+ShlAssign<i32>+ShlAssign<i64>+ShlAssign<u32>+ShlAssign<u64>+ShlAssign<usize>+ShrAssign<i32>+ShrAssign<i64>+ShrAssign<u32>+ShrAssign<u64>+ShrAssign<usize>+BitAnd<Self,Output=Self>+BitOr<Self,Output=Self>+BitXor<Self,Output=Self>+BitAndAssign+BitOrAssign+BitXorAssign+std::hash::Hash+Eq+Ord{fn abs_diff(self,other:Self)->Self;fn count_ones(self)->u32;fn count_zeros(self)->u32;fn div_euclid(self,rhs:Self)->Self;fn leading_ones(self)->u32;fn leading_zeros(self)->u32;fn rem_euclid(self,rhs:Self)->Self;fn reverse_bits(self)->Self;fn rotate_left(self,n:u32)->Self;fn rotate_right(self,n:u32)->Self;fn trailing_ones(self)->u32;fn trailing_zeros(self)->u32;fn overflowing_add(self,rhs:Self)->(Self,bool);fn overflowing_mul(self,rhs:Self)->(Self,bool);fn overflowing_neg(self)->(Self,bool);fn overflowing_sub(self,rhs:Self)->(Self,bool);fn saturating_add(self,rhs:Self)->Self;fn saturating_mul(self,rhs:Self)->Self;fn saturating_sub(self,rhs:Self)->Self;fn wrapping_add(self,rhs:Self)->Self;fn wrapping_mul(self,rhs:Self)->Self;fn wrapping_neg(self)->Self;fn wrapping_sub(self,rhs:Self)->Self;}macro_rules!impl_integer_trait{($($t:ty)*)=>{$(impl Integer for$t{fn abs_diff(self,other:Self)->Self{std::cmp::max(self,other)-std::cmp::min(self,other)}fn count_ones(self)->u32{self.count_ones()}fn count_zeros(self)->u32{self.count_zeros()}fn div_euclid(self,rhs:Self)->Self{self.div_euclid(rhs)}fn leading_ones(self)->u32{(!self).leading_zeros()}fn leading_zeros(self)->u32{self.leading_zeros()}fn rem_euclid(self,rhs:Self)->Self{self.rem_euclid(rhs)}fn reverse_bits(self)->Self{self.reverse_bits()}fn rotate_left(self,n:u32)->Self{self.rotate_left(n)}fn rotate_right(self,n:u32)->Self{self.rotate_right(n)}fn trailing_ones(self)->u32{(!self).trailing_zeros()}fn trailing_zeros(self)->u32{self.trailing_zeros()}fn overflowing_add(self,rhs:Self)->(Self,bool){self.overflowing_add(rhs)}fn overflowing_mul(self,rhs:Self)->(Self,bool){self.overflowing_mul(rhs)}fn overflowing_neg(self)->(Self,bool){self.overflowing_neg()}fn overflowing_sub(self,rhs:Self)->(Self,bool){self.overflowing_sub(rhs)}fn saturating_add(self,rhs:Self)->Self{self.saturating_add(rhs)}fn saturating_mul(self,rhs:Self)->Self{self.saturating_mul(rhs)}fn saturating_sub(self,rhs:Self)->Self{self.saturating_sub(rhs)}fn wrapping_add(self,rhs:Self)->Self{self.wrapping_add(rhs)}fn wrapping_mul(self,rhs:Self)->Self{self.wrapping_mul(rhs)}fn wrapping_neg(self)->Self{self.wrapping_neg()}fn wrapping_sub(self,rhs:Self)->Self{self.wrapping_sub(rhs)}})*};}impl_integer_trait!(i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize);}pub mod one{pub trait One{fn one()->Self;}macro_rules!impl_one_integer{($($t:ty)*)=>{$(impl One for$t{fn one()->$t{1}})*};}impl_one_integer!(i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize);macro_rules!impl_one_float{($($t:ty)*)=>{$(impl One for$t{fn one()->$t{1.0}})*};}impl_one_float!(f32 f64);}pub mod signed{use std::ops::Neg;pub trait Signed:Neg<Output=Self>+std::marker::Sized{fn is_negative(self)->bool;fn is_positive(self)->bool;}macro_rules!impl_integer_trait{($($t:ty)*)=>{$(impl Signed for$t{fn is_negative(self)->bool{self.is_negative()}fn is_positive(self)->bool{self.is_positive()}})*};}impl_integer_trait!(i8 i16 i32 i64 i128 isize);}pub mod zero{pub trait Zero{fn zero()->Self;}macro_rules!impl_zero_integer{($($t:ty)*)=>{$(impl Zero for$t{fn zero()->$t{0}})*};}impl_zero_integer!(i8 i16 i32 i64 i128 isize u8 u16 u32 u64 u128 usize);macro_rules!impl_zero_float{($($t:ty)*)=>{$(impl Zero for$t{fn zero()->$t{0.0}})*};}impl_zero_float!(f32 f64);}pub use float::Float;pub use integer::Integer;pub use one::One;pub use signed::Signed;pub use zero::Zero;use std::ops::{Add,AddAssign,Div,DivAssign,Mul,MulAssign,Sub,SubAssign};#[derive(Debug)]pub struct Error(pub&'static str);impl std::fmt::Display for Error{fn fmt(&self,f:&mut std::fmt::Formatter<'_>)->std::fmt::Result{write!(f,"{}",self.0)}}impl std::error::Error for Error{}pub trait UnorderedNumeric:Add<Self,Output=Self>+Sub<Self,Output=Self>+Mul<Self,Output=Self>+Div<Self,Output=Self>+AddAssign+SubAssign+MulAssign+DivAssign+std::fmt::Debug+std::fmt::Display+Clone+Copy+PartialEq+Default+Zero+One{}pub trait Numeric:Add<Self,Output=Self>+Sub<Self,Output=Self>+Mul<Self,Output=Self>+Div<Self,Output=Self>+AddAssign+SubAssign+MulAssign+DivAssign+std::fmt::Debug+std::fmt::Display+Clone+Copy+PartialEq+PartialOrd+Default+Zero+One{fn max_value()->Self;fn min_value()->Self;}pub trait IntoFloat:Numeric{fn as_f64(self)->f64;fn as_f32(self)->f32;}impl IntoFloat for i64{fn as_f64(self)->f64{self as f64}fn as_f32(self)->f32{self as f32}}} } pub(crate) mod macros { pub mod convolution_simd {} pub mod iolib {pub use crate::{__cargo_equip_macro_def_iolib_put as put,__cargo_equip_macro_def_iolib_putln as putln,__cargo_equip_macro_def_iolib_putvec_with_space as putvec_with_space,__cargo_equip_macro_def_iolib_putvec_with_spaceln as putvec_with_spaceln,__cargo_equip_macro_def_iolib_scan as scan};} pub mod __modint_common_0_1_0 {} pub mod montgomery_modint {} pub mod __numeric_0_1_0 {} } pub(crate) mod prelude {pub use crate::__cargo_equip::crates::*;} mod preludes { pub mod convolution_simd {pub(in crate::__cargo_equip)use crate::__cargo_equip::crates::montgomery_modint;} pub mod iolib {} pub mod __modint_common_0_1_0 {} pub mod montgomery_modint {pub(in crate::__cargo_equip)use crate::__cargo_equip::crates::{__modint_common_0_1_0 as modint_common,__numeric_0_1_0 as numeric};} pub mod __numeric_0_1_0 {} } }