library(ismev) library(fExtremes) y=read.table("D:\\nasdaq_logreturn.txt") y1=as.matrix(y,nc=1) yy=-y1 #losses! summary(yy) # block maxima method b=blockMaxima(yy,20) #monthly logreturns hist(b) #Fitting distribution #goodness of fit #Diagnostics zz=gev.fit(b) gev.diag(zz) ks.test(b, "pgev",xi=zz$mle[3],mu=zz$mle[1],beta=zz$mle[2])$p.value #we accept the fit ks.test(b, "pnorm",m=mean(b),s=sd(b))$p.value #rejected b1=blockMaxima(yy,5) b2=blockMaxima(yy,10) b3=blockMaxima(yy,20) b4=blockMaxima(yy,50) b5=blockMaxima(yy,100) ff1=gev.fit(b1) ff2=gev.fit(b2) ff3=gev.fit(b3) ff4=gev.fit(b4) ff5=gev.fit(b5) ks.test(b1, "pgev",xi=ff1$mle[3],mu=ff1$mle[1],beta=ff1$mle[2])$p.value ks.test(b2, "pgev",xi=ff2$mle[3],mu=ff2$mle[1],beta=ff2$mle[2])$p.value ks.test(b3, "pgev",xi=ff3$mle[3],mu=ff3$mle[1],beta=ff3$mle[2])$p.value ks.test(b4, "pgev",xi=ff4$mle[3],mu=ff4$mle[1],beta=ff4$mle[2])$p.value ks.test(b5, "pgev",xi=ff5$mle[3],mu=ff5$mle[1],beta=ff5$mle[2])$p.value #all are accepted #but it is not exact: ism=100 ered=rep(0,times=100) for (i in 1:ism) {y=rgev(200,xi=ff$mle[3],mu=ff$mle[1],beta=ff$mle[2]) ff5=gev.fit(y) ered[i]=ks.test(y, "pgev",xi=ff5$mle[3],mu=ff5$mle[1],beta=ff5$mle[2])$p.value } hist(ered) #not uniform, because the parameters were estimated #it is OK if the parameters are known for (i in 1:ism) {y=rgev(200,xi=ff$mle[3],mu=ff$mle[1],beta=ff$mle[2]) ered[i]=ks.test(y, "pgev",xi=ff$mle[3],mu=ff$mle[1],beta=ff$mle[2])$p.value } hist(ered) #so that is why we must simulate the critical values! ism=1000 #some minutes to run n=length(b3) eredr=rep(0,times=ism) for (i in 1:ism) {y=rgev(n,xi=ff3$mle[3],mu=ff3$mle[1],beta=ff3$mle[2]) ff5=gev.fit(y) eredr[i]=ks.test(y, "pgev",xi=ff5$mle[3],mu=ff5$mle[1],beta=ff5$mle[2])$statistic } hist(eredr) #some minutes to run critn=eredr sb3=ks.test(b3, "pgev",xi=ff3$mle[3],mu=ff3$mle[1],beta=ff3$mle[2])$statistic sum(sb3uu],freq=F,breaks=20) lines(xx,dgpd(xx,pars[1],uu,pars[2]),col=2) plot(density(yy[yy>uu])) # kernel density lines(xx,dgpd(xx,pars[1],uu,pars[2]),col=2) #goodness of fit zz=gpd.fit(yy,threshold=uu) pars2=zz$mle dif[1]=abs(abs(pars[1])-abs(pars2[2])) dif[2]=abs(abs(pars[2])-abs(pars2[1])) gpd.diag(zz) # pars # xi beta # -0.072277274840058220 0.015019724528503878 # # $mle # [1] 0.01501509 -0.07229043 # # dif # 1.3151620015666721e-05 4.6334867610901831e-06 #2: threshold = 0.03 uu=0.03 xx=c(1:100)/1000 f=gpdFit(yy,u=uu) pars=f@fit$par.ests hist(yy[yy>uu],freq=F,breaks=20) lines(xx,dgpd(xx,pars[1],uu,pars[2]),col=2) plot(density(yy[yy>uu])) # kernel density lines(xx,dgpd(xx,pars[1],uu,pars[2]),col=2) #goodness of fit zz=gpd.fit(yy,threshold=uu) pars2=zz$mle dif[1]=abs(abs(pars[1])-abs(pars2[2])) dif[2]=abs(abs(pars[2])-abs(pars2[1])) gpd.diag(zz) # $mle # [1] 0.01135923 0.11162187 # # pars # xi beta # 0.111328388885567869 0.011360813265348069 # # dif # 2.9348106126209084e-04 1.5810420300882422e-06 #3: threshold = 0.01 uu=0.01 xx=c(1:100)/1000 f=gpdFit(yy,u=uu) pars=f@fit$par.ests hist(yy[yy>uu],freq=F,breaks=20) lines(xx,dgpd(xx,pars[1],uu,pars[2]),col=2) plot(density(yy[yy>uu])) # kernel density lines(xx,dgpd(xx,pars[1],uu,pars[2]),col=2) #goodness of fit zz=gpd.fit(yy,threshold=uu) pars2=zz$mle dif[1]=abs(abs(pars[1])-abs(pars2[2])) dif[2]=abs(abs(pars[2])-abs(pars2[1])) gpd.diag(zz) # $mle # [1] 0.01353422 -0.01584399 # # pars # xi beta # -0.016128713572147221 0.013538638150638936 # # dif # 2.8472557081378469e-04 4.4177397296122495e-06 #comment: u=0.03 looks the best - there are enough data and the plots #show a reasonable fit