Checking pQF vs SKAT

Here we’re checking the sparse-matrix version of pQF on a really unsuitably small example with 67 markers, because it’s the one that comes with the SKAT package: see help(SKAT)

library(SKAT)   #CRAN

## Loading required package: Matrix

## Loading required package: SPAtest

## Loading required package: RSpectra

library(bigQF)  #github/tslumley
set.seed(2018-5-18)

First example: continuous phenotype, no adjustment

data(SKAT.example)
attach(SKAT.example) #look, it's not my fault, that's how they did it.

obj<-SKAT_Null_Model(y.c ~ 1, out_type="C")
skat.out1<-SKAT(Z, obj)
skat.qf1a<-SKAT.matrixfree(Z)
skat.qf1b<-SKAT.matrixfree(Z,model=lm(y.c~1))
skat.qf1c<-SKAT.matrixfree(Z,model=glm(y.c~1))

skat.out1$Q

##          [,1]
## [1,] 234803.8

skat.qf1a$Q(y.c)

##          [,1]
## [1,] 234803.8

skat.qf1b$Q()    ## phenotype used in fitting

## [1] 234803.8

skat.qf1b$Q(y.c) ## new phenotype

## [1] 234803.8

skat.out1$p.value

## [1] 0.01874576

pQF(skat.out1$Q,skat.qf1a,neig=60,convolution.method="integration" )

##            [,1]
## [1,] 0.01874576

pQF(skat.out1$Q,skat.qf1b,neig=60,convolution.method="integration" )

## Warning in pchisqsum(x, c(rep(1, n), nu), c(ee, scale), method = method, :
## Negative/fractional df removed

##            [,1]
## [1,] 0.01874551

pQF(skat.out1$Q,skat.qf1c,neig=60,convolution.method="integration" )

## Warning in pchisqsum(x, c(rep(1, n), nu), c(ee, scale), method = method, :
## Negative/fractional df removed

##            [,1]
## [1,] 0.01874551

The warning indicates the remainder term in the approximation has been dropped, which is appropriate. If you don’t specify convolution.method the default is the saddlepoint approximation – the impact is in the third decimal place.

And more systematically

set.seed(2018-5-18)
p<-lapply(1:65, function(k) pQF(skat.out1$Q, skat.qf1a, neig=k,
                                      convolution.method="integration",tr2.sample.size=1000 )
                       )
pdf<-data.frame(p=do.call(c,p),k=1:65)
plot(p~k,data=pdf,pch=19,col="orange", ylim=c(0.017,0.020))
abline(h=skat.out1$p.value,lty=2)