#include<immintrin.h>//structbit_vector{usingu32=uint32_t;usingi64=int64_t;usingu64=uint64_t;staticconstexpru32w=64;vector<u64>block;vector<u32>count;u32n,zeros;inlineu32get(u32i)const{returnu32(block[i/w]>>(i%w))&1u;}inlinevoidset(u32i){block[i/w]|=1LL<<(i%w);}bit_vector(){}bit_vector(int_n){init(_n);}__attribute__((optimize("O3,unroll-loops")))voidinit(int_n){n=zeros=_n;block.resize(n/w+1,0);count.resize(block.size(),0);}__attribute__((target("popcnt")))voidbuild(){for(u32i=1;i<block.size();++i)count[i]=count[i-1]+_mm_popcnt_u64(block[i-1]);zeros=rank0(n);}inlineu32rank0(u32i)const{returni-rank1(i);}__attribute__((target("bmi2,popcnt")))inlineu32rank1(u32i)const{returncount[i/w]+_mm_popcnt_u64(_bzhi_u64(block[i/w],i%w));}};template<typenameS,typenameT,T(*f)(T,T),T(*I)()>structWaveletMatrix{usingu32=uint32_t;usingi64=int64_t;usingu64=uint64_t;structSegTree{intN;intsize;vector<T>data;SegTree(int_n){init(_n);}voidinit(int_N){N=_N;size=1;while(size<N)size<<=1;data.assign(2*size,I());}voidset(intk,Tx){data[k+size]=x;}voidbuild(){for(intk=size-1;k>0;k--){data[k]=f(data[2*k],data[2*k+1]);}}voidupdate(intk,Tx){k+=size;data[k]=x;while(k>>=1){data[k]=f(data[2*k],data[2*k+1]);}}voidadd(intk,Tx){k+=size;data[k]+=x;while(k>>=1){data[k]=f(data[2*k],data[2*k+1]);}}// query to [a, b)Tquery(inta,intb)const{TL=I(),R=I();for(a+=size,b+=size;a<b;a>>=1,b>>=1){if(a&1)L=f(L,data[a++]);if(b&1)R=f(data[--b],R);}returnf(L,R);}};usingP=pair<S,S>;intn,lg;vector<bit_vector>bv;vector<SegTree>seg;vector<P>ps;vector<S>ys;WaveletMatrix(){}voidadd_point(Sx,Sy){ps.emplace_back(x,y);ys.emplace_back(y);}voidbuild(){sort(begin(ps),end(ps));ps.erase(unique(begin(ps),end(ps)),end(ps));n=ps.size();sort(begin(ys),end(ys));ys.erase(unique(begin(ys),end(ys)),end(ys));vector<u32>cur(n),nxt(n);for(inti=0;i<n;++i)cur[i]=yid(ps[i].second);lg=__lg(max(n,1))+1;bv.assign(lg,n);seg.assign(lg,n);for(inth=lg-1;h>=0;--h){for(inti=0;i<n;++i)if((cur[i]>>h)&1)bv[h].set(i);bv[h].build();array<decltype(begin(nxt)),2>it{begin(nxt),begin(nxt)+bv[h].zeros};for(inti=0;i<n;++i)*it[bv[h].get(i)]++=cur[i];swap(cur,nxt);}}intxid(Sx)const{returnlower_bound(begin(ps),end(ps),make_pair(x,S()),[](constP&a,constP&b){returna.first<b.first;})-begin(ps);}intyid(Sy)const{returnlower_bound(begin(ys),end(ys),y)-begin(ys);}voidadd(Sx,Sy,Tval){inti=lower_bound(begin(ps),end(ps),P{x,y})-begin(ps);for(inth=lg-1;h>=0;--h){inti0=bv[h].rank0(i);if(bv[h].get(i))i+=bv[h].zeros-i0;elsei=i0;seg[h].add(i,val);}}T_sum(intl,intr,u32upper)const{Tres=I();for(inth=lg;h--;){intl0=bv[h].rank0(l),r0=bv[h].rank0(r);if((upper>>h)&1){res=f(res,seg[h].query(l0,r0));l+=bv[h].zeros-l0;r+=bv[h].zeros-r0;}else{l=l0,r=r0;}}returnres;}Tsum(SL,SD,SR,SU)const{intl=xid(L),r=xid(R);return_sum(l,r,yid(U))-_sum(l,r,yid(D));}};
#line 1 "data-structure-2d/segment-tree-on-wavelet-matrix.hpp"
#include<immintrin.h>//structbit_vector{usingu32=uint32_t;usingi64=int64_t;usingu64=uint64_t;staticconstexpru32w=64;vector<u64>block;vector<u32>count;u32n,zeros;inlineu32get(u32i)const{returnu32(block[i/w]>>(i%w))&1u;}inlinevoidset(u32i){block[i/w]|=1LL<<(i%w);}bit_vector(){}bit_vector(int_n){init(_n);}__attribute__((optimize("O3,unroll-loops")))voidinit(int_n){n=zeros=_n;block.resize(n/w+1,0);count.resize(block.size(),0);}__attribute__((target("popcnt")))voidbuild(){for(u32i=1;i<block.size();++i)count[i]=count[i-1]+_mm_popcnt_u64(block[i-1]);zeros=rank0(n);}inlineu32rank0(u32i)const{returni-rank1(i);}__attribute__((target("bmi2,popcnt")))inlineu32rank1(u32i)const{returncount[i/w]+_mm_popcnt_u64(_bzhi_u64(block[i/w],i%w));}};template<typenameS,typenameT,T(*f)(T,T),T(*I)()>structWaveletMatrix{usingu32=uint32_t;usingi64=int64_t;usingu64=uint64_t;structSegTree{intN;intsize;vector<T>data;SegTree(int_n){init(_n);}voidinit(int_N){N=_N;size=1;while(size<N)size<<=1;data.assign(2*size,I());}voidset(intk,Tx){data[k+size]=x;}voidbuild(){for(intk=size-1;k>0;k--){data[k]=f(data[2*k],data[2*k+1]);}}voidupdate(intk,Tx){k+=size;data[k]=x;while(k>>=1){data[k]=f(data[2*k],data[2*k+1]);}}voidadd(intk,Tx){k+=size;data[k]+=x;while(k>>=1){data[k]=f(data[2*k],data[2*k+1]);}}// query to [a, b)Tquery(inta,intb)const{TL=I(),R=I();for(a+=size,b+=size;a<b;a>>=1,b>>=1){if(a&1)L=f(L,data[a++]);if(b&1)R=f(data[--b],R);}returnf(L,R);}};usingP=pair<S,S>;intn,lg;vector<bit_vector>bv;vector<SegTree>seg;vector<P>ps;vector<S>ys;WaveletMatrix(){}voidadd_point(Sx,Sy){ps.emplace_back(x,y);ys.emplace_back(y);}voidbuild(){sort(begin(ps),end(ps));ps.erase(unique(begin(ps),end(ps)),end(ps));n=ps.size();sort(begin(ys),end(ys));ys.erase(unique(begin(ys),end(ys)),end(ys));vector<u32>cur(n),nxt(n);for(inti=0;i<n;++i)cur[i]=yid(ps[i].second);lg=__lg(max(n,1))+1;bv.assign(lg,n);seg.assign(lg,n);for(inth=lg-1;h>=0;--h){for(inti=0;i<n;++i)if((cur[i]>>h)&1)bv[h].set(i);bv[h].build();array<decltype(begin(nxt)),2>it{begin(nxt),begin(nxt)+bv[h].zeros};for(inti=0;i<n;++i)*it[bv[h].get(i)]++=cur[i];swap(cur,nxt);}}intxid(Sx)const{returnlower_bound(begin(ps),end(ps),make_pair(x,S()),[](constP&a,constP&b){returna.first<b.first;})-begin(ps);}intyid(Sy)const{returnlower_bound(begin(ys),end(ys),y)-begin(ys);}voidadd(Sx,Sy,Tval){inti=lower_bound(begin(ps),end(ps),P{x,y})-begin(ps);for(inth=lg-1;h>=0;--h){inti0=bv[h].rank0(i);if(bv[h].get(i))i+=bv[h].zeros-i0;elsei=i0;seg[h].add(i,val);}}T_sum(intl,intr,u32upper)const{Tres=I();for(inth=lg;h--;){intl0=bv[h].rank0(l),r0=bv[h].rank0(r);if((upper>>h)&1){res=f(res,seg[h].query(l0,r0));l+=bv[h].zeros-l0;r+=bv[h].zeros-r0;}else{l=l0,r=r0;}}returnres;}Tsum(SL,SD,SR,SU)const{intl=xid(L),r=xid(R);return_sum(l,r,yid(U))-_sum(l,r,yid(D));}};