r - R の非線形最小二乗法 - Heligman Pollard モデルパラメーターに適合する Levenberg Marquardt

Question

コスタキスの紙の解を再現しようとしています。この論文では、de Heligman-Pollard モデルを使用して、要約された死亡表を完全な生命表に拡張します。モデルには、適合する必要がある 8 つのパラメーターがあります。著者は修正された Gauss-Newton アルゴリズムを使用しました。このアルゴリズム (E04FDF) は、コンピュータプログラムの NAG ライブラリの一部です。Levenberg Marquardt は、同じ一連のパラメーターを生成するべきではありませんか? LM アルゴリズムのコードまたはアプリケーションの何が問題になっていますか?

library(minpack.lm)


## Heligman-Pollard is used to expand an abridged table.
## nonlinear least squares algorithm is used to fit the parameters on nqx observed over 5 year   intervals (5qx)
AGE <- c(0, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70)
MORTALITY <- c(0.010384069, 0.001469140, 0.001309318, 0.003814265, 0.005378395, 0.005985625,     0.006741766, 0.009325056, 0.014149626, 0.021601755, 0.034271934, 0.053836246, 0.085287751, 0.136549522, 0.215953304)

## The start parameters for de Heligman-Pollard Formula (Converged set a=0.0005893,b=0.0043836,c=0.0828424,d=0.000706,e=9.927863,f=22.197312,g=0.00004948,h=1.10003)
## I modified a random parameter "a" in order to have a start values. The converged set is listed above. 
parStart <- list(a=0.0008893,b=0.0043836,c=0.0828424,d=0.000706,e=9.927863,f=22.197312,g=0.00004948,h=1.10003)

## The Heligman-Pollard Formula (HP8) = qx/px = ...8 parameter equation
HP8 <-function(parS,x)
ifelse(x==0, parS$a^((x+parS$b)^parS$c) + parS$g*parS$h^x, 
             parS$a^((x+parS$b)^parS$c) + parS$d*exp(-parS$e*(log(x/parS$f))^2) +
                 parS$g*parS$h^x)

## Define qx = HP8/(1+HP8)
qxPred <- function(parS,x) HP8(parS,x)/(1+HP8(parS,x))

## Calculate nqx predicted by HP8 model (nqxPred(parStart,x))
nqxPred <- function(parS,x)
(1 -(1-qxPred(parS,x)) * (1-qxPred(parS,x+1)) *
    (1-qxPred(parS,x+2)) * (1-qxPred(parS,x+3)) *
    (1-qxPred(parS,x+4))) 

##Define Residual Function, the relative squared distance is minimized  
ResidFun <- function(parS, Observed,x) (nqxPred(parS,x)/Observed-1)^2

## Applying the nls.lm algo. 
nls.out <- nls.lm(par=parStart, fn = ResidFun, Observed = MORTALITY, x = AGE,
                  control = nls.lm.control(nprint=1,
                                           ftol = .Machine$double.eps,
                                           ptol = .Machine$double.eps,
                                           maxfev=10000, maxiter = 500))

summary(nls.out)


## The author used a modified Gauss-Newton algorithm, this alogorithm (E04FDF) is part of the NAG library of computer programs
## Should not Levenberg Marquardt yield the same set of parameters

score 0 · Accepted Answer

@BenBolker, fitting the parameters with the entire dataset (underlying qx) values. Still not able to reproduce parameters

library(minpack.lm)

library(ggplot2)

library(optimx)

getwd()

d <- data.frame(AGE = seq(0,74), MORTALITY=c(869,58,40,37,36,35,32,28,29,23,24,22,24,28,
                                           33,52,57,77,93,103,103,109,105,114,108,112,119,
                                           125,117,127,125,134,134,131,152,179,173,182,199,
                                           203,232,245,296,315,335,356,405,438,445,535,594,
                                           623,693,749,816,915,994,1128,1172,1294,1473,
                                           1544,1721,1967,2129,2331,2559,2901,3203,3470,
                                           3782,4348,4714,5245,5646))


d$MORTALITY <- d$MORTALITY/100000

ggplot(d,aes(AGE,MORTALITY))+geom_point()  

##Not allowed to post Images

g1 <- ggplot(d,aes(AGE,MORTALITY))+geom_point()

g1+geom_smooth()## with loess fit

Reported Parameters:

parConv <- c(a=0.0005893,b=0.0043836,c=0.0828424,d=0.000706,e=9.927863,f=22.197312,
             g=0.00004948,h=1.10003)

parStart <- parConv

parStart["a"] <- parStart["a"]+3e-4


## Define qx = HP8/(1+HP8)

HP8 <-function(parS,x)
with(as.list(parS),
ifelse(x==0, a^((x+b)^c) + g*h^x, a^((x+b)^c) + d*exp(-e*(log(x/f))^2) + g*h^x))



qxPred <- function(parS,x) {
  h <- HP8(parS,x)
  h/(1+h)
}



##Define Residual Function, the relative squared distance is minimized,
ResidFun <- function(parS, Observed,x) (qxPred(parS,x)/Observed-1)

ssqfun <- function(parS, Observed, x) {
  sum(ResidFun(parS, Observed, x)^2)
}

nls.out <- nls.lm(par=parStart, fn = ResidFun, Observed = d$MORTALITY, x = d$AGE, 
                  control = nls.lm.control(nprint=1, ftol = sqrt(.Machine$double.eps), 
                  ptol = sqrt(.Machine$double.eps), maxfev=1000, maxiter=1000))


parNLS <- coef(nls.out)

pred0 <- qxPred(as.list(parConv),d$AGE)
pred1 <- qxPred(as.list(parNLS),d$AGE)


#Binds Row wise the dataframes from pred0 and pred1
dPred <- with(d,rbind(data.frame(AGE,MORTALITY=pred0,w="conv"),
      data.frame(AGE,MORTALITY=pred1,w="nls")))


g1 + geom_line(data=dPred,aes(colour=w))

round(cbind(parNLS,parConv),7)

mvec <- c('Nelder-Mead','BFGS','CG','L-BFGS-B','nlm','nlminb','spg','ucminf')
opt1 <- optimx(par=parStart, fn = ssqfun,
    Observed = d$MORTALITY, x = d$AGE,
    itnmax=5000,
    method=mvec, control=list(all.methods=TRUE,kkt=TRUE,)
## control=list(all.methods=TRUE,kkt=TRUE)) ## Boom

get.result(opt1, attribute= c("fvalues","method", "grs", "itns",
           "conv", "KKT1", "KKT2", "xtimes"))

##       method       fvalues  grs itns conv KKT1 KKT2 xtimes
##5         nlm 8.988466e+307   NA   NA 9999   NA   NA      0
##4    L-BFGS-B 8.988466e+307 NULL NULL 9999   NA   NA      0
##2          CG 8.988466e+307 NULL NULL 9999   NA   NA   0.02
##1        BFGS 8.988466e+307 NULL NULL 9999   NA   NA      0
##3 Nelder-Mead     0.5673864   NA NULL    0   NA   NA   0.42
##6      nlminb     0.4127198  546   62    0   NA   NA   0.17


opt2 <- nlminb(start=parStart, objective = ssqfun,
    Observed = d$MORTALITY, x = d$AGE,
    control= list(eval.max=5000,iter.max=5000))

parNLM <- opt2$par

Check on parameters:

round(cbind(parNLS,parConv,parNLM),5)

##    parNLS  parConv   parNLM
##a  0.00058  0.00059  0.00058
##b  0.00369  0.00438  0.00369
##c  0.08065  0.08284  0.08065
##d  0.00070  0.00071  0.00070
##e  9.30948  9.92786  9.30970
##f 22.30769 22.19731 22.30769
##g  0.00005  0.00005  0.00005
##h  1.10084  1.10003  1.10084

SSE Review:

sapply(list(parNLS,parConv,parNLM),
  ssqfun,Observed=d$MORTALITY,x=d$AGE)  

 ##[1] 0.4127198 0.4169513 0.4127198

Not able to upload graphs but the code is here. Still appears that the parameters found in the article are not the best fit when the complete mortality data (not abridged or subset) is used

##pred2 <- qxPred(as.list(parNLM),d$AGE)

##dPred <- with(d,rbind(dPred,
    data.frame(AGE,MORTALITY=pred2,w="nlminb")))

##g1 + geom_line(data=dPred,aes(colour=w))
ggplot(data=dPred,aes(x=AGE,y=MORTALITY-d$MORTALITY,colour=w))
        + geom_line()+geom_point(aes(shape=w),alpha=0.3)

r - R の非線形最小二乗法 - Heligman Pollard モデル パラメーターに適合する Levenberg Marquardt

2 に答える 2

Related

Reference

r - R の非線形最小二乗法 - Heligman Pollard モデルパラメーターに適合する Levenberg Marquardt