Code
import arviz as az
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import pymc as pm
import seaborn as sns
from plotnine import (
ggplot,
aes,
stat_summary,
facet_wrap,
theme_classic,
labs,
geom_density,
geom_point,
)
"figure.figsize"] = [8, 5]
plt.rcParams[%config InlineBackend.figure_format = 'retina'
= 42
RANDOM_SEED = np.random.default_rng(RANDOM_SEED)
rng
# generate data ------------------------------------------------------------------------
= 4
n_groups = [2, 4, 6, 8]
group_size
# Group-specific parameters
= rng.normal(1, 0.2, size=n_groups)
slopes = rng.normal(0, 1, size=n_groups)
intercepts
# Generate data
= []
data for i in range(n_groups):
= group_size[i]
n = np.sort(rng.uniform(0, 20, size=n))
x_vals = rng.normal(0, 1, size=n)
noise = slopes[i] * x_vals + intercepts[i] + noise
y_vals = np.full(n, i)
group_labels "x": x_vals, "y": y_vals, "group": group_labels}))
data.append(pd.DataFrame({
# Combine all groups into a single DataFrame
= pd.concat(data, ignore_index=True)
df "group"] = df["group"].astype("category")
df[
# Build a PyMC model -------------------------------------------------------------------
= 0
prior_mean = 1
prior_std
= df.x.values
x_data = df.y.values
y_data
= {"groups": df["group"].cat.categories, "obs_ind": df.index}
coords
with pm.Model(coords=coords) as _m:
= pm.Data("x", x_data, dims="obs_ind")
x = pm.Data("y", y_data, dims="obs_ind")
y = pm.Data("group", df["group"].cat.codes.values, dims="obs_ind")
group # priors
= pm.Normal("intercept", mu=0, sigma=10, dims=["groups"])
intercept = pm.Normal("beta", mu=prior_mean, sigma=prior_std, dims=["groups"])
beta = pm.HalfNormal("sigma", sigma=5)
sigma # likelihood
= pm.Deterministic("mu", intercept[group] + beta[group] * x, dims="obs_ind")
mu "obs", mu=mu, sigma=sigma, observed=y, dims="obs_ind")
pm.Normal(# sample
= pm.sample()
idata
# Generate a grid of points to evaluate on ---------------------------------------------
= 20
n_interp_points = np.concatenate(
xi
[1].x.min(), group[1].x.max(), n_interp_points)
np.linspace(group[for group in df.groupby("group")
]
)= np.concatenate([[i] * n_interp_points for i in range(n_groups)]).astype(int)
g = {"x": xi, "group": g, "y": np.zeros_like(xi)}
predict_at
# Posterior prediction on the grid of points -------------------------------------------
= {"groups": predict_at["group"], "obs_ind": np.arange(len(xi))}
coords
with _m:
=coords)
pm.set_data(predict_at, coords
idata.extend(
pm.sample_posterior_predictive(
idata,=["mu", "y"],
var_names=rng,
random_seed=False,
progressbar=True,
predictions
) )