import os import numpy as np import pandas as pd import matplotlib.pyplot as plt import scipy.optimize as opt # ---------------- Paths -------------------------------------------------------- OUT = "out" os.makedirs(OUT, exist_ok=True) # ---------------- Data --------------------------------------------------------- years = np.array([2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2021, 2023, 2024]) # Pew Research Center Mobile Fact Sheet (accessed 2025‑06‑26) adoption = np.array([35, 45, 55, 64, 68, 72, 77, 81, 81, 85, 88, 90]) train_mask = years <= 2019 test_mask = ~train_mask # ---------------- Richards model (Modules 2 + 5) ------------------------------ def richards(t, L, k, t0, nu): """Richards (generalised logistic) curve.""" return L / (1 + np.exp(-k * (t - t0))) ** (1 / nu) p0 = [100, 0.4, 2014, 0.8] bounds = ([80, 0, 2010, 0.2], [120, 5, 2020, 5]) params, cov = opt.curve_fit( richards, years[train_mask], adoption[train_mask], p0=p0, bounds=bounds, maxfev=20000 ) se = np.sqrt(np.diag(cov)) param_names = ["L", "k", "t0", "nu"] df_params = pd.DataFrame({"Parameter": param_names, "Estimate": params, "StdError": se}) df_params.to_csv(f"{OUT}/richards_params.csv", index=False) # correlation matrix corr = cov / np.outer(se, se) df_corr = pd.DataFrame(corr, index=param_names, columns=param_names) df_corr.to_csv(f"{OUT}/param_corr.csv") # ---------------- Rolling‑origin CV ------------------------------------------- records = [] for T in range(2015, 2020): mask_tr = years < T mask_te = years == T popt, _ = opt.curve_fit( richards, years[mask_tr], adoption[mask_tr], p0=p0, bounds=bounds, maxfev=10000 ) pred = richards(years[mask_te], *popt)[0] obs = adoption[mask_te][0] records.append({"TrainEnd": T - 1, "PredYear": T, "Pred": pred, "Obs": obs, "AbsErr": abs(pred - obs)}) df_cv = pd.DataFrame(records) rmse = np.sqrt(((df_cv["Pred"] - df_cv["Obs"]) ** 2).mean()) df_cv.loc[len(df_cv)] = ["—", "RMSE", "", "", rmse] df_cv.to_csv(f"{OUT}/rolling_cv.csv", index=False) # ---------------- Plot deterministic fit -------------------------------------- t_plot = np.linspace(years.min(), years.max(), 400) plt.figure(figsize=(7,4)) plt.scatter(years, adoption, marker="x", label="Data") plt.plot(t_plot, richards(t_plot, *params), label="Richards fit", linewidth=2) plt.axvline(2019.5, color="grey", linestyle=":") plt.xlabel("Year"); plt.ylabel("Smartphone ownership (%)") plt.title("Deterministic Richards fit") plt.legend(); plt.tight_layout() plt.savefig(f"{OUT}/richards_fit.png", dpi=300) plt.close() # ---------------- Stochastic Module‑8 envelope -------------------------------- L_, k_, t0_, nu_ = params residuals = adoption[train_mask] - richards(years[train_mask], *params) denom = 2 * adoption[train_mask] * (1 - adoption[train_mask] / L_) D_hat = max(np.mean((residuals ** 2) / denom), 1e-6) n_sims = 20000 years_full = np.arange(years.min(), years.max() + 1) dt = 1.0 idx_start = np.where(years_full == 2019)[0][0] sim = np.zeros((n_sims, len(years_full))) sim[:, :idx_start+1] = richards(years_full[:idx_start+1], *params) for i in range(idx_start+1, len(years_full)): x_prev = sim[:, i-1] drift = k_ * (1 - x_prev / L_) * x_prev sigma = np.sqrt(2 * D_hat * x_prev * (1 - x_prev / L_)) sim[:, i] = np.clip(x_prev + drift*dt + sigma*np.random.normal(size=n_sims)*np.sqrt(dt), 0, L_) lower = np.percentile(sim, 2.5, axis=0) upper = np.percentile(sim, 97.5, axis=0) mean = sim.mean(axis=0) plt.figure(figsize=(7,4)) plt.fill_between(years_full, lower, upper, color="orange", alpha=0.3, label="95% band") plt.plot(years_full, mean, color="orange", label="Mean") plt.scatter(years, adoption, color="black", marker="x", label="Data", zorder=5) plt.axvline(2019.5, color="grey", linestyle=":") plt.xlabel("Year"); plt.ylabel("Smartphone ownership (%)") plt.title("Module‑8 stochastic envelope") plt.legend(); plt.tight_layout() plt.savefig(f"{OUT}/stochastic_band.png", dpi=300) plt.close() print("Analysis complete. Outputs in 'out/'")