Parameter estimation via monte carlo and optim function

Question:

I'm doing a study with the bivariate Birnbaum Saunders distribution. To solve my problem I need to create a code that estimates the parameters of a mix model. I would like to insert the gradient of my likelihood function into the code below via the NumDeriv package or otherwise, but I'm not getting it, could someone help me create this gradient via some package or R code? Here's the code:

#Parâmetros iniciais para geração das variáveis aleatórias T1 e T2

###################################
#Tamanho da Amostra
###############################
n<-100

#####################################
#Número de amostras
########################################
N=100
#####################################################################
#Matriz para receber os valores estimados em cada passo do Monte Carlo
#######################################################################
m=matrix(ncol=4,nrow=N)
#########################################################
#Inicio do loop Monte carlo
for (i in 1:N){
  mu1<-1.5
  phi1<-1
  mu2<-2
  phi2<-8
  rho<-0.3
  u1  <-rnorm(n)
  u2  <-rnorm(n)
  z1  <-(((sqrt(1+rho))+(sqrt(1-rho)))/2)*u1+(((sqrt(1+rho))-(sqrt(1-    rho)))/2)*u2
  z2  <-(((sqrt(1+rho))-(sqrt(1-rho)))/2)*u1+(((sqrt(1+rho))+(sqrt(1-    rho)))/2)*u2  

  #################################################################
  #Variáveis (T1,T2)~BSB(mu1,phi1,mu2,phi2,rho)
  ##########################################################
  T1<-(mu1/(1+(1/phi1)))*((1/2)*(sqrt(2/phi1))*z1+sqrt(1+((1/2)*    (sqrt(2/phi1))*z1)^2))^2
  T2<-(mu2/(1+(1/phi2)))*((1/2)*(sqrt(2/phi2))*z2+sqrt(1+((1/2)*    (sqrt(2/phi2))*z2)^2))^2
  ################################################################
  #Variável indicadora
  ######################################
  u<-1*(T2>1)
  #######################################################
  #Variável de interesse cuja densidade é obtida apartir da
  #distribuição Birnbaum Saunders bivariada, observe que esta
  #é uma variável com censura e de acordo com a minha especificação
  #dos parâmetros a censura é cerca de 10% a 20%. 
  ############################################################
  y<-T1*u 
  sum(1*(y>0))
  rm(mu1,phi1,mu2,rho)
  #######################################################
  #Função Log de Verossimilhança
  #Observe que o phi2 foi considerado fixo para garantir a
  #identificabilidade do modelo!
  ############################################################
  log.lik<-function(par){
    mu1  <-abs(par[1])
    phi1 <-abs(par[2])
    mu2  <-abs(par[3])
    arho <-par[4]
    f1<-(1/(2*sqrt(2*pi)))*exp((-phi1/4)*(sqrt(y[y>0]*(phi1+1)/(phi1*mu1))-sqrt(phi1*mu1/(y[y>0]*    (phi1+1))))^2)*    (((sqrt(phi1+1))/(sqrt(phi1*mu1*y[y>0])))+    ((sqrt(phi1*mu1))/((y[y>0]^3)*(phi1+1))))*    (sqrt(phi1/2))*pnorm(((sqrt(phi2*(phi2+1))*    (phi2*mu2-phi2-1))/(sqrt(2)*    (phi2+1)*sqrt(phi2*mu2*(1-tanh(arho)^2))))+    (((sqrt(phi1)*tanh(arho))/(sqrt(2*(1-tanh(arho)^2))))*(sqrt(y[y>0]*    (phi1+1)/(phi1*mu1))-sqrt((phi1*mu1)/(y[y>0]*    (phi1+1))))))
    f2<-pnorm(sqrt(phi2/2)*(sqrt((phi2+1)/(phi2*mu2))-sqrt((phi2*mu2)/(phi2+1))))  
    lv<--(sum(log(f1))+sum(1*(y==0)*log(f2)))
    lv
  }
  #######################################################
  #Tentativa de se criar o gradiente (Não deu certo!)
  ############################################################
  require(numDeriv)
  # grad<-function(par){
  #   mu1 <-par[1]
  #   phi1<-par[2]
  #   mu2 <-par[3]
  #   rho <-par[4]
  #   f1<-(1/(2*sqrt(2*pi)))*exp((-phi1/4)*(sqrt(y[y>0]*    (phi1+1)/(phi1*mu1))-    sqrt(phi1*mu1/(y[y>0]*(phi1+1))))^2)*(((sqrt(phi1+1))/(sqrt(phi1*mu1*y[y>0])))+    ((sqrt(phi1*mu1))/((y[y>0]^3)*(phi1+1))))*(sqrt(phi1/2))*pnorm(((sqrt(phi2*    (phi2+1))*(phi2*mu2-phi2-1))/(sqrt(2)*(phi2+1)*sqrt(phi2*mu2*(1-rho^2))))+    (((sqrt(phi1)*rho)/(sqrt(2*(1-rho^2))))*(sqrt(y[y>0]*(phi1+1)/(phi1*mu1))-    sqrt((phi1*mu1)/(y[y>0]*(phi1+1))))))
  #   f2<-pnorm(sqrt(phi2/2)*(sqrt((phi2+1)/(phi2*mu2))-        sqrt((phi2*mu2)/(phi2+1))))  
  #   lv<--(sum(log(f1))+sum(1*(y==0)*log(f2)))
  #   g1<-(sum(1*(y>0)*D(log(f1),"mu1"))+sum(1*(y==0)*D(log(f2),"mu1")))
  #   g2<-(sum(1*(y>0)*D(log(f1),"phi1"))+sum(1*(y==0)*D(log(f2),"phi1")))
  #   g3<-(sum(1*(y>0)*D(log(f1),"mu2"))+sum(1*(y==0)*D(log(f2),"mu2")))
  #   g4<-(sum(1*(y>0)*D(log(f1),"rho"))+sum(1*(y==0)*D(log(f2),"rho")))
  #   gd=cbind(eval(g1),eval(g2),eval(g3),eval(g4))
  #   colSums(gd, na.rm = TRUE)
  # }
  #######################################################
  #Chutes iniciais
  ############################################################
  mu1_0 <-1.5
  phi1_0<-1
  mu2_0 <-2
  arho_0<-atanh(0.3)

  start<-c(mu1_0,phi1_0,mu2_0,arho_0)    
  #######################################################
  #Estimação via método BFGS da função optim
  ############################################################
  op<-optim(start,log.lik,method = "BFGS")
  #######################################################
  #Como fiz uma reparametrização do parâmetro rho afim de não
  #ter problemas devido a simulação dos valores a entrada da 
  #matriz referente a rho recebe tangente hiperbólico da estimativa de rho
  #via função optim!
  ############################################################
  m[i,]<-c(op$par[1:3],tanh(op$par[4]))
} 

colMeans(m) 

Answer:

Important: I am not familiar with the Birnbaum-Saunders distribution. The entire code is based on the article by Kundu, Balakrishnan and Jamalizadeh (2010) . The article does not address the problem of censoring variable values, so it is possible that the results are not directly applicable to your problem.

Comparing your code for data simulation with the aforementioned article, I found a discrepancy in the passage of the intermediate values z1 and z2 to T1 and T2 (Eq. 8). Therefore, in the simulations below, I followed the proposal of the article, as the results differ.

In the first function, the data are simulated from the proposed parameters and a sample size N. The second function, for maximum likelihood estimation, has as argument a dataset X with two columns and a vector of initial values ​​for the parameters phi — therefore, with only two values.

The optimization function just follows the equations proposed in the article and uses the optim function to find the phi1 of phi1 and phi2 . In the end, the values ​​of mu1 , mu2 and rho are calculated from the closed-form solutions. I tried to identify the number of equations in the original article. Caution: the function does not check whether the initial values ​​are positive and the optimization algorithm employed (default of optim , Nelder-Mead) does not restrict the space of the parameters to positive numbers.

simData <- function(mu1, phi1, mu2, phi2, rho, N){
  u1  <-rnorm(N)
  u2  <-rnorm(N)
  z1  <-(((sqrt(1+rho))+(sqrt(1-rho)))/2)*u1+(((sqrt(1+rho))-(sqrt(1-rho)))/2)*u2
  z2  <-(((sqrt(1+rho))-(sqrt(1-rho)))/2)*u1+(((sqrt(1+rho))+(sqrt(1-rho)))/2)*u2  

  #################################################################
  #Variáveis (T1,T2)~BSB(mu1,phi1,mu2,phi2,rho)
  ##########################################################
  #T1.1<-(mu1/(1+(1/phi1)))*((1/2)*(sqrt(2/phi1))*z1+sqrt(1+((1/2)*    (sqrt(2/phi1))*z1)^2))^2
  #T2.1<-(mu2/(1+(1/phi2)))*((1/2)*(sqrt(2/phi2))*z2+sqrt(1+((1/2)*    (sqrt(2/phi2))*z2)^2))^2
  T1 <- phi1 * ((1/2) * mu1 * z1 + sqrt(((1/2) * mu1 * z1)^2 + 1))^2
  T2 <- phi2 * ((1/2) * mu2 * z2 + sqrt(((1/2) * mu2 * z2)^2 + 1))^2

  data.frame(T1, T2)
}

X <- simData(1.5, 1, 3, 8, 0.5, 1000)

mleBVBS <- function(X, phiInits) {
  #Funções s e r, definidas em (11)
  s <- function(data){
    apply(data, 2, mean)
  }

  r <- function(data){
    apply(data, 2, function(x) 1/mean(1/x))
  }

  # Função em forma fechada para computar mu, dado phi (9)
  muHat <- function(phi, s, r){
    sqrt(s/phi + phi/r - 2)
  }

  # Função em forma fechada para computar rho, dado phi (10)
  rhoHat <- function(phi){
    phi1 <- phi[1]
    phi2 <- phi[2]

    num <- sum(
      (sqrt(X[, 1]/phi1) - sqrt(phi1/X[, 1])) *
        (sqrt(X[, 2]/phi2) - sqrt(phi2/X[, 2]))
    )
    den <- sqrt(
      sum((sqrt(X[, 1]/phi1) - sqrt(phi1/X[, 1]))^2)
    ) * sqrt(
      sum((sqrt(X[, 2]/phi2) - sqrt(phi2/X[, 2]))^2)
    )

    num/den
  }


  sData <- s(X)
  s1 <- sData[1]
  s2 <- sData[2]

  rData <- r(X)
  r1 <- rData[1]
  r2 <- rData[2]
  n <- nrow(X)

  #Função a ser otimizada para obter phi1 e phi2 (12)

  profileLL <- function(phi, s1, s2, r1, r2, n, X){
    phi1 <- phi[1]
    phi2 <- phi[2]

    (-n)*log(muHat(phi1, s1, r1)) - n*log(phi1) -
      n*log(muHat(phi2, s2, r2)) - n*log(phi2) -
      (n/2)*log(1 - rhoHat(phi)) +
      sum(log(
        sqrt(phi1/X[, 1]) + (phi1/X[, 1])^(3/2)
      )) + sum(log(
        sqrt(phi2/X[, 2]) + (phi2/X[, 2])^(3/2)
      ))
  }

  fit <- optim(phiInits, profileLL, control=list(fnscale=-1),
               s1=s1, s2=s2, r1=r1, r2=r2, n=n, X=X)
  phi <- fit$par

  c(mu1=muHat(phi[1], s1, r1), phi1=phi[1], mu2=muHat(phi[2], s2, r2), phi2=phi[2], rho=rhoHat(phi))

}

mleBVBS(X, c(1, 5))
Scroll to Top
AllEscort