Fitting distributions

Some useful functions to fit distributions to means, confidence intervals and standard errors
Published

January 3, 2023

Fit to a mean and confidence interval

Beta distribution

## function for fitting
fit_beta <-
function (par, p = 0.95) {
  target <- par[2:3]
  p <- c(0, p) + (1 - p)/2
  f <-
  function(x, mean, p, target) {
    dev <- qbeta(p = p, shape1 = x, shape2 = x/mean - x)
    return(sum((dev - target)^2))
  }
  shape1 <-
    optimize(f, c(0, 1e4), mean = par[1], p = p, target = target)$minimum
  shape2 <- shape1 / par[1] - shape1
  return(list(shape1 = shape1, shape2 = shape2))
}

## function for simulating from fitted distribution
sim_beta <-
  function(n, ...) {
    fit <- fit_beta(...)
    return(rbeta(n, fit$shape1, fit$shape2))
  }

## disability weights for diarrhea
dw_dia_mld <- c(0.074, 0.049, 0.104)
dw_dia_mod <- c(0.188, 0.125, 0.264)
dw_dia_sev <- c(0.247, 0.164, 0.348)

## simulate from fitted Beta distribution
n <- 1e5
sim_dw_dia_mld <- sim_beta(n, dw_dia_mld)
sim_dw_dia_mod <- sim_beta(n, dw_dia_mod)
sim_dw_dia_sev <- sim_beta(n, dw_dia_sev)

## compare fitted to observed values
summarize <-
  function(x, probs = c(0.025, 0.975), ...) {
    c(mean = mean(x, ...),
      quantile(x, probs, ...))
  }

rbind(observed = dw_dia_mld, fitted = summarize(sim_dw_dia_mld))
               mean       2.5%     97.5%
observed 0.07400000 0.04900000 0.1040000
fitted   0.07403453 0.04881904 0.1040163
rbind(observed = dw_dia_mod, fitted = summarize(sim_dw_dia_mod))
              mean     2.5%     97.5%
observed 0.1880000 0.125000 0.2640000
fitted   0.1880237 0.123059 0.2625791
rbind(observed = dw_dia_sev, fitted = summarize(sim_dw_dia_sev))
             mean      2.5%     97.5%
observed 0.247000 0.1640000 0.3480000
fitted   0.246962 0.1603662 0.3455071
## visualize random values
boxplot(
  las = 1,
  ylab = "Disability Weight",
  cbind("Mild" = sim_dw_dia_mld,
        "Moderate" = sim_dw_dia_mod,
        "Severe" = sim_dw_dia_sev))

Fit to a confidence interval

Gamma distribution

## function for fitting
fit_gamma <-
function(q, p = 0.95) {
  p <- c(0, p) + (1 - p)/2

  f <-
    function(x, p, target) {
      dev <- qgamma(p = p, shape = x[1], rate = x[2])
      return(sum((dev - target) ^ 2))
    }

  fit <- optim(par = c(1, 1), fn = f, p = p, target = q)

  return(fit$par)
}

Simulate based on mean and SE

## Gamma distribution
rgamma2 <-
function(n, m, s) {
  rgamma(n, m^2/s^2, m/s^2)
}

## Beta distribution
rbeta2 <-
function(n, m, s) {
  a <- m^2 * ((1 - m) / s^2 - 1 / m)
  b <- (1/m - 1) * a
  rbeta(n, a, b)
}

R session info

sessionInfo()
R version 4.2.1 (2022-06-23 ucrt)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19044)

Matrix products: default

locale:
[1] LC_COLLATE=English_Belgium.utf8  LC_CTYPE=English_Belgium.utf8   
[3] LC_MONETARY=English_Belgium.utf8 LC_NUMERIC=C                    
[5] LC_TIME=English_Belgium.utf8    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] digest_0.6.29     jsonlite_1.8.0    magrittr_2.0.3    evaluate_0.16    
 [5] rlang_1.0.6       stringi_1.7.8     cli_3.4.1         rstudioapi_0.14  
 [9] rmarkdown_2.16    tools_4.2.1       stringr_1.4.1     htmlwidgets_1.5.4
[13] xfun_0.32         yaml_2.3.5        fastmap_1.1.0     compiler_4.2.1   
[17] htmltools_0.5.3   knitr_1.40