diff --git a/manuscript_synthesis/notebooks/rtm_chl_models_flow_weekly_avg.Rmd b/manuscript_synthesis/notebooks/rtm_chl_models_flow_weekly_avg.Rmd
index adbe2c0..6594a80 100644
--- a/manuscript_synthesis/notebooks/rtm_chl_models_flow_weekly_avg.Rmd
+++ b/manuscript_synthesis/notebooks/rtm_chl_models_flow_weekly_avg.Rmd
@@ -95,7 +95,7 @@ df_chla_c1 <- df_chla %>%
     # Apply factor order to StationCode
     StationCode = factor(StationCode, levels = sta_order),
     # Add a column for Year as a factor for the model
-    Year_fct = factor(Year)
+    Year_fct = factor(Year, levels=c("2014", "2015", "2016", "2017", "2018", "2019"))
   ) %>% 
   arrange(StationCode, Year, Week)
 ```
@@ -118,8 +118,7 @@ Looking at the sample counts and date ranges, we'll only include years 2015-2019
 
 ```{r chla remove under sampled}
 df_chla_c2 <- df_chla_c1 %>% 
-  filter(Year %in% 2015:2019) %>% 
-  mutate(Year_fct = fct_drop(Year_fct))
+  filter(Year %in% 2015:2019) 
 
 # Create another dataframe that has up to 3 lag variables for chlorophyll to be
   # used in the linear models
@@ -138,7 +137,7 @@ df_chla_c2_lag <- df_chla_c2 %>%
 
 # Plots
 
-Let's explore the data with some plots. First, lets plot the data in scatterplots of chlorophyll and flow facetted by Station and grouping all years together.
+Let's explore the data with some plots. First, lets plot the data in scatter plots of chlorophyll and flow faceted by Station and grouping all years together.
 
 ```{r chla scatterplot all yrs, message = FALSE, fig.height = 6, fig.width = 6}
 df_chla_c2 %>% 
@@ -197,7 +196,7 @@ Box.test(residuals(m_gam3), lag = 20, type = 'Ljung-Box')
 ```
 
 
-## Model with first, second, and third order lag terms
+## Model with first and second order lag terms
 
 Now, we'll try to deal with the residual autocorrelation. 
 
@@ -206,9 +205,9 @@ Now, we'll try to deal with the residual autocorrelation.
 # Fit original model with k = 9 and three successive lagged models
 #lag1
 m_gam3_lag1 <- gam(
-  Chla_log ~ lag1 + Year_fct * Flow * StationCode + s(Week), 
+  Chla_log ~ lag1 + Year_fct * Flow * StationCode + s(Week, bs="cc"), 
   data = df_chla_c2_lag,
-  method = "REML"
+  method = "REML", knots=list(week=c(0,52))
 )
 
 summary(m_gam3_lag1)
@@ -222,9 +221,9 @@ Box.test(residuals(m_gam3_lag1), lag = 20, type = 'Ljung-Box')
 
 #lag2
 m_gam3_lag2 <- gam(
-  Chla_log ~ lag1 + lag2 + Year_fct * Flow * StationCode + s(Week), 
+  Chla_log ~ lag1 + lag2 + Year_fct * Flow * StationCode + s(Week, bs="cc"), 
   data = df_chla_c2_lag,
-  method = "REML"
+  method = "REML", knots=list(week=c(0,52))
 )
 
 summary(m_gam3_lag2)
@@ -274,3 +273,100 @@ plt_m_gam3_lag2_fit +
     labeller = labeller(.multi_line = FALSE)
   )
 ```
+
+Everything looks pretty decent from lag 2 model. Not perfect, but pretty good given the number of data points. Let's compare to a two-way interaction model.
+
+
+```{r gam2 with and without lag terms}
+
+# Fit two-way interaction model with k = 9 and three successive lagged models
+#lag1
+m_gam2 <- gam(
+  Chla_log ~ Year_fct * Flow + StationCode * Flow + Year_fct * StationCode + s(Week, bs="cc"), 
+  data = df_chla_c2_lag,
+  method = "REML", knots=list(week=c(0,52))
+)
+
+summary(m_gam2)
+appraise(m_gam2)
+shapiro.test(residuals(m_gam2))
+k.check(m_gam2)
+draw(m_gam2, select = 1, residuals = TRUE, rug = FALSE)
+plot(m_gam2, pages = 1, all.terms = TRUE)
+acf(residuals(m_gam2))
+Box.test(residuals(m_gam2), lag = 20, type = 'Ljung-Box')
+
+#lag1
+m_gam2_lag1 <- gam(
+  Chla_log ~ lag1 + Year_fct * Flow + StationCode * Flow + Year_fct * StationCode + s(Week, bs="cc"), 
+  data = df_chla_c2_lag,
+  method = "REML", knots=list(week=c(0,52))
+)
+
+summary(m_gam2_lag1)
+appraise(m_gam2_lag1)
+shapiro.test(residuals(m_gam2_lag1))
+k.check(m_gam2_lag1)
+draw(m_gam2_lag1, select = 1, residuals = TRUE, rug = FALSE)
+plot(m_gam2_lag1, pages = 1, all.terms = TRUE)
+acf(residuals(m_gam2_lag1))
+Box.test(residuals(m_gam2_lag1), lag = 20, type = 'Ljung-Box')
+
+#lag2
+m_gam2_lag2 <- gam(
+  Chla_log ~ lag1 + lag2 + Year_fct * Flow + StationCode * Flow + Year_fct * StationCode + s(Week, bs="cc"), 
+  data = df_chla_c2_lag,
+  method = "REML", knots=list(week=c(0,52))
+)
+
+summary(m_gam2_lag2)
+appraise(m_gam2_lag2)
+shapiro.test(residuals(m_gam2_lag2))
+k.check(m_gam2_lag2)
+draw(m_gam2_lag2, select = 1, residuals = TRUE, rug = FALSE)
+plot(m_gam2_lag2, pages = 1, all.terms = TRUE)
+acf(residuals(m_gam2_lag2))
+
+Box.test(residuals(m_gam2_lag2), lag = 20, type = 'Ljung-Box')
+
+#compare fit of lag1 and lag2 models from two and three-way interaction models
+AIC(m_gam2_lag1, m_gam2_lag2, m_gam3_lag1, m_gam3_lag2)
+
+```
+
+It looks like lag2 model with a two-way interaction has the best fit according to the AIC values. 
+
+Let's take a closer look at how the back-transformed fitted values from the model match the observed values.
+
+```{r gam2 lag2 obs vs fit, warning = FALSE}
+df_m_gam2_lag2_fit <- df_chla_c2_lag %>% 
+  fitted_values(m_gam2_lag2, data = .) %>% 
+  mutate(fitted_bt = exp(fitted) / 1000)
+
+plt_m_gam2_lag2_fit <- df_m_gam2_lag2_fit %>% 
+  ggplot(aes(x = fitted_bt, y = Chla)) +
+  geom_point() +
+  geom_abline(slope = 1, intercept = 0, color = "red") +
+  theme_bw() +
+  labs(
+    x = "Back-transformed Fitted Values",
+    y = "Observed Values"
+  )
+
+plt_m_gam2_lag2_fit
+```
+
+Let's look at each Station-Year combination separately.
+
+```{r gam2 lag2 obs vs fit facet sta yr, fig.height = 8, fig.width = 8}
+plt_m_gam2_lag2_fit +
+  facet_wrap(
+    vars(StationCode, Year_fct),
+    ncol = 5,
+    scales = "free",
+    labeller = labeller(.multi_line = FALSE)
+  )
+```
+
+
+Everything looks pretty decent from lag 2 two-way interaction model. Not perfect, but pretty good given the number of data points. Let's compare to a two-way interaction model with a smooth term for flow.