---
title: "Advanced: Mixed models and multi-environment trials"
author: "agriReg Authors"
date: "`r Sys.Date()`"
output:
  rmarkdown::html_vignette:
    toc: true
    fig_width: 7
    fig_height: 4.5
vignette: >
  %\VignetteIndexEntry{Advanced: Mixed models and multi-environment trials}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r setup, include=FALSE}
knitr::opts_chunk$set(collapse=TRUE, comment="#>", warning=FALSE, message=FALSE)
library(agriReg)
```

## Multi-environment mixed model

In multi-environment trials (MET) yield is affected by genotype (G),
environment (E), and their interaction (GEI). A mixed model is the
standard approach:

```{r met-data}
set.seed(2024)
# Simulate a 3-genotype × 4-environment trial with 3 reps
met <- expand.grid(
  genotype    = paste0("G", 1:3),
  environment = paste0("E", 1:4),
  rep         = 1:3,
  stringsAsFactors = FALSE
)
# True effects
g_eff <- c(G1 = 0, G2 = 0.3, G3 = -0.2)
e_eff <- c(E1 = 0, E2 = 0.5, E3 = -0.3, E4 = 0.1)
set.seed(42)
met$yield <- 4 +
  g_eff[met$genotype] +
  e_eff[met$environment] +
  rnorm(nrow(met), 0, 0.15)   # residual

head(met)
```

### Fixed genotype, random environment

```{r met-model}
# Environment as random, genotype as fixed
m_met <- fit_linear(met,
                    formula = "yield ~ genotype",
                    random  = "(1|environment) + (1|environment:rep)")
summary(m_met)
```

### Post-hoc comparisons with emmeans

```{r emmeans}
library(emmeans)
emm <- emmeans(m_met$fit, ~ genotype)
contrast(emm, method = "pairwise", adjust = "tukey")
```

---

## Fitting multiple growth curves by group

Use `lapply` to fit the logistic model separately to each treatment:

```{r by-group}
maize <- load_example_data("maize_growth")

fits_by_trt <- lapply(
  split(maize, maize$treatment),
  function(sub) {
    fit_nonlinear(sub, x_col = "days", y_col = "biomass_g",
                  model = "logistic")
  }
)

# Compare parameter estimates between treatments
lapply(fits_by_trt, function(m) round(coef(m), 2))
```

### Overlay curves on a single plot

```{r overlay, fig.height=4}
library(ggplot2)

# Build prediction ribbons for each treatment
preds <- do.call(rbind, lapply(names(fits_by_trt), function(trt) {
  m     <- fits_by_trt[[trt]]
  xseq  <- seq(1, 100, length.out = 200)
  nd    <- data.frame(days = xseq)
  data.frame(
    days      = xseq,
    biomass_g = predict(m$fit, newdata = nd),
    treatment = trt
  )
}))

raw <- maize[, c("days", "biomass_g", "treatment")]

ggplot(preds, aes(x = days, y = biomass_g, colour = treatment)) +
  geom_line(linewidth = 1.1) +
  geom_point(data = raw, alpha = 0.3, size = 1.2) +
  scale_color_manual(values = c(WW = "#1D9E75", DS = "#D85A30")) +
  labs(title = "Logistic growth by water treatment",
       x = "Days after emergence", y = "Biomass (g/plant)") +
  theme_minimal()
```

---

## Comparing dose-response curves across species

```{r multi-species}
herb <- load_example_data("herbicide_trial")

# Fit one model per species
drc_fits <- lapply(
  split(herb, herb$species),
  function(sub) {
    fit_dose_response(sub,
                      dose_col = "dose_g_ha",
                      resp_col = "weed_control_pct",
                      fct      = "LL.4")
  }
)

# ED50 per species
lapply(names(drc_fits), function(sp) {
  cat(sp, ":\n"); ed_estimates(drc_fits[[sp]], respLev = 50)
})
```

---

## Exporting a comparison table to Word / Excel

```{r export, eval=FALSE}
# Requires the openxlsx package
library(openxlsx)

m1 <- fit_linear(load_example_data("wheat_trial"), "yield ~ nitrogen")
m2 <- fit_nonlinear(load_example_data("wheat_trial"), "nitrogen","yield","quadratic")
cmp <- compare_models(linear = m1, quadratic = m2)

write.xlsx(cmp, "model_comparison.xlsx", rowNames = FALSE)
```
