from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive

import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
from os import rename

pio.renderers.default = 'colab'

datadir = '/content/drive/My Drive/Colab Notebooks/'

filepath = datadir + 'bchildcountandedenvironment2022-23.csv'

df_raw = pd.read_csv(filepath, header =None)

df = pd.read_csv(filepath, header =4)

df.head()

df.shape

(16231, 53)

num_cols = [ "Age 3 to 5 (Early Childhood)", "Ages 6-21"]

for col in num_cols:
  df[col] = pd.to_numeric(df[col], errors="coerce")

df[num_cols].describe().round()

mask = (
    (df["SEA Disability Category"] == "All Disabilities") &
    (df["SEA Education Environment"] == "Total, School Age")
)

df_state_total = df[mask].copy()

df_state_total["Ages 6-21"] = pd.to_numeric(df_state_total["Ages 6-21"], errors="coerce")

state_counts = (
    df_state_total.groupby("State Name", as_index=False)["Ages 6-21"]
    .sum()
    .dropna()
)

state_counts = state_counts.sort_values("Ages 6-21", ascending=False)

fig = px.bar(
    state_counts,
    x="State Name",
    y="Ages 6-21",
    title="Total School Age Children with Disabilities (Ages 6-21) by State - 2022-23",
    labels={"Ages 6-21": "Number of Children", "State Name": "State"}
)

fig.update_layout(xaxis_tickangle=-45)
fig.show()

#Choropleth Map
state_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa" : "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}

state_counts_usa = state_counts[state_counts["State Name"].isin(state_abbrev.keys())].copy()
state_counts_usa["state_code"] = state_counts_usa["State Name"].map(state_abbrev)

fig = px.choropleth(
    state_counts_usa,
    locations="state_code",
    locationmode="USA-states",
    color="Ages 6-21",
    scope="usa",
    title="Total School Age Children with Disabilities (Ages 6-21) by State - 2022-23",
    labels={"Ages 6-21": "Number of Children"}
)

fig.show()

# Stacked Bar Chart
def stacked_bar_by_state(state):
  env_keep = [
      "Inside regular class 80% or more of the day",
      "Inside regular class 40% through 79% of the day",
      "Inside regular class less than 40% of the day",
      "Seperate Class",
      "Seperate School, School Age",
      "Residential Facility, School Age"
  ]

  subset = df[
      (df["State Name"] == state) &
      (df["SEA Disability Category"] != "All Disabilities") &
      (df["SEA Education Environment"].isin(env_keep))
  ].copy()

  age_cols_6_21= [c for c in subset.columns
                  if ("Age 6" in c) or ("Age 12" in c) or ("Age 18-21" in c)]
  print("Using these columns to compute Ages 6-21:", age_cols_6_21)

  for col in age_cols_6_21:
    subset[col] = pd.to_numeric(subset[col], errors="coerce")

  subset["Ages_6_21_calc"] = subset[age_cols_6_21].sum(axis=1)


  grouped = (
      subset
      .groupby(["SEA Disability Category", "SEA Education Environment"], as_index=False)["Ages_6_21_calc"]
      .sum()
  )

  display(grouped.head())

  fig = px.bar(
      grouped,
      x="SEA Disability Category",
      y="Ages_6_21_calc",
      color="SEA Education Environment",
      title=f"Educational Environments by Disability Category in [state] (Ages 6-21, 2022-23)",
      labels={
          "Ages 6-21": "Number of Children",
          "SEA Disability Category": "Disability Category",
          "SEA Education Environment": "Educational Environment"
      }
  )

  fig.update_layout(xaxis_tickangle=-45)
  fig.show()

stacked_bar_by_state("Florida")

Using these columns to compute Ages 6-21: ['Age 6', 'Age 12', 'Age 6-11', 'Age 12-17', 'Age 18-21']

stacked_bar_by_state("New York")

Using these columns to compute Ages 6-21: ['Age 6', 'Age 12', 'Age 6-11', 'Age 12-17', 'Age 18-21']

#Box plot
fig = px.box(
    state_counts_usa,
    y="Ages 6-21",
    title= "Distribution of School Age Enrollment (Ages 6-21) Across States - 2022-23",
    points="all",
    labels={"Ages 6-21": "Number of Children"}
)

fig.update_yaxes(type="log")
fig.show()

# Bubble Chart
total_mask = (
    (df["SEA Disability Category"] == "All Disabilities") &
    (df["SEA Education Environment"] == "Total, School Age")
)

df_total = df[total_mask].copy()
df_total["Ages 6-21"] = pd.to_numeric(df_total["Ages 6-21"], errors="coerce")

print("df_total rows:", len(df_total))
print(df_total[["State Name", "Ages 6-21"]].head())

total_by_state = (
    df_total
    .groupby("State Name", as_index=False)["Ages 6-21"]
    .sum()
    .rename(columns={"Ages 6-21": "total_6_21"})
)

incl_mask = df["SEA Education Environment"].fillna(" ").str.contains("80%")
df_incl = df[incl_mask].copy()
df_incl["Ages 6-21"] = pd.to_numeric(df_total["Ages 6-21"], errors="coerce")

print("df_total rows:", len(df_total))
print(df_total[["State Name", "Ages 6-21"]].head())

total_by_state = (
    df_total.groupby("State Name", as_index=False)["Ages 6-21"]
    .sum()
    .rename(columns={"Ages 6-21": "total_6_21"})
)

incl_mask = df["SEA Education Environment"].fillna(" ").str.contains("80%")
df_incl = df[incl_mask].copy()
df_incl["Ages 6-21"] = pd.to_numeric(df_incl["Ages 6-21"], errors="coerce")

print("df_incl rows:", len(df_incl))
print(df_incl[["State Name", "SEA Education Environment", "Ages 6-21"]].head())


incl_by_state = (
    df_incl
    .groupby("State Name", as_index=False)["Ages 6-21"]
    .sum()
    .rename(columns={"Ages 6-21": "incl_6_21"})
)

bubble = total_by_state.merge(incl_by_state, on="State Name", how="left")
bubble["incl_6_21"] = bubble["incl_6_21"].fillna(0)

bubble["inclusion_rate"] = np.where(
    bubble["total_6_21"] > 0,
    bubble["incl_6_21"] / bubble["total_6_21"],
    np.nan
)

print("bubble rows:", len(bubble))
print(bubble.head())

df_total rows: 61
          State Name  Ages 6-21
18           Alabama    91686.0
284           Alaska    17466.0
550   American Samoa      431.0
816          Arizona   134762.0
1082        Arkansas    67505.0
df_total rows: 61
          State Name  Ages 6-21
18           Alabama    91686.0
284           Alaska    17466.0
550   American Samoa      431.0
816          Arizona   134762.0
1082        Arkansas    67505.0
df_incl rows: 854
   State Name                    SEA Education Environment  Ages 6-21
4     Alabama  Inside regular class 80% or more of the day    75905.0
23    Alabama  Inside regular class 80% or more of the day        NaN
42    Alabama  Inside regular class 80% or more of the day        NaN
61    Alabama  Inside regular class 80% or more of the day        NaN
80    Alabama  Inside regular class 80% or more of the day        NaN
bubble rows: 61
       State Name  total_6_21  incl_6_21  inclusion_rate
0         Alabama     91686.0    75905.0        0.827880
1          Alaska     17466.0    11698.0        0.669758
2  American Samoa       431.0      356.0        0.825986
3         Arizona    134762.0    92711.0        0.687961
4        Arkansas     67505.0    43714.0        0.647567

region_map = {
    "Maine": "Northeast", "New Hampshire": "Northeast", "Vermont": "Northeast",
    "Massachusetts": "Northeast", "Rhode Island": "Northeast", "Connecticut": "Northeast",
    "New York": "Northeast", "New Jersey": "Northeast", "Pennsylvania": "Northeast",
    "Ohio": "Midwest", "Indiana": "Midwest", "Illinois": "Midwest", "Michigan": "Midwest",
    "Wisconsin": "Midwest", "Minnesota": "Midwest", "Iowa": "Midwest",
    "Missopuri": "Midwest", "North Dakota": "Midwest", "South Dakota": "Midwest","Nebraska": "Midwest", "Kansas": "Midwest",
    "Delaware": "South", "Maryland": "South", "District of Columbia": "South",
    "Virginia": "South", "West Virginia": "South", "North Carolina": "South", "South Carolina": "South", "Georgia": "South", "Florida": "South",
    "Kentucky": "South", "Tennessee": "South", "Alabama": "South", "Mississippi": "South", "Arkansas": "South", "Louisiana": "South", "Texas": "South",
    "Montana": "West", "Idaho": "West", "Wyoming": "West", "Colorado": "West", "New Mexico": "West", "Arizona": "West", "Utah": "West", "Nevada": "West",
    "Washington": "West", "Oregon": "West", "California": "West", "Alaska": "West", "Hawaii": "West"

}

bubble["region"] = bubble["State Name"].map(region_map).fillna("Other")

bubble_plot = bubble.copy()

fig = px.scatter(
    bubble_plot,
    x="inclusion_rate",
    y="total_6_21",
    size="incl_6_21",
    color="region",
    hover_name="State Name",
    title="Inclusion Rate VS Total Enrollment by Region (Ages 6-21, All Disabilities, 2022-23)",
    labels={
        "inclusion_rate": "Inclusion Rate (80%+ in Regular Class)",
        "total_6_21": "Total Enrollment (Ages 6-21)",
        "incl_6_21": "Included in Regular Class ≥ 80% of Day",
        "region": "Region"
    },
    size_max=40,
    opacity=0.8
)

fig.update_layout(
    height=600,
    width=900,
    template="plotly_white",
    legend_title_text="Region"
)

fig.show()

[col for col in df.columns if "Age" in col]

['Age 3',
 'Age 4',
 'Age 5 (Early Childhood)',
 'Age 3 to 5 (Early Childhood)',
 ' Age 5 (School Age)',
 'Age 6',
 'Age 7',
 'Age 8',
 'Age 9',
 'Age 10',
 'Age 11',
 'Age 12',
 'Age 13',
 'Age 14',
 'Age 15',
 'Age 16',
 'Age 17',
 'Age 18',
 'Age 19',
 'Age 20',
 'Age 21',
 'Age 5 (School Age)-11',
 'Age 6-11',
 'Age 12-17',
 'Age 18-21',
 'Age 5 (School Age)-21',
 'Ages 6-21',
 'EL Yes - School Age',
 'EL No - School Age',
 'Female - School Age',
 'Male - School Age',
 'American Indian or Alaska Native - School Age',
 'Asian - School Age',
 'Black or African American - School Age',
 'Hispanic/Latino - School Age',
 'Native Hawaiian or Other Pacific Islander - School Age',
 'Two or more races - School Age',
 'White - School Age']

age_cols = [c for c in df.columns if c.startswith("Age ")]

age_cols_clean = [
    col for col in age_cols
    if "School Age" not in col
    and "-" not in col
    and "Race" not in col
    and "Male" not in col
    and "Female" not in col
]

age_cols_clean

['Age 3',
 'Age 4',
 'Age 5 (Early Childhood)',
 'Age 3 to 5 (Early Childhood)',
 'Age 6',
 'Age 7',
 'Age 8',
 'Age 9',
 'Age 10',
 'Age 11',
 'Age 12',
 'Age 13',
 'Age 14',
 'Age 15',
 'Age 16',
 'Age 17',
 'Age 18',
 'Age 19',
 'Age 20',
 'Age 21']

# Heat Map
age_cols_clean = [
    col for col in age_cols
    if "School Age" not in col and "-" not in col and "Race" not in col and "Male" not in col and "Female" not in col
]

for col in age_cols_clean:
  df[col] = pd.to_numeric(df[col], errors="coerce")

state_age_matrix = (
    df.groupby("State Name")[age_cols_clean]
    .sum()
    .sort_index()
)

rows_to_drop = [
    "U.s. Outlying Areas and Freely Associated States",
    "United States",
]

state_age_matrix = state_age_matrix.drop(
    [r for r in rows_to_drop if r in state_age_matrix.index],
    errors="ignore"
)


fig = px.imshow(
    np.log10(state_age_matrix +1),
    x=state_age_matrix.columns,
    y=state_age_matrix.index,
    labels=dict(
        x="Age Group",
        y="State",
        color="log10(Number of Children +1)"
    ),
    aspect="auto",
    title="Log Scaled Enrollment Heatmap by Age Group and State (2022-23)",
    color_continuous_scale="Viridis"
)

fig.update_layout(
    height=900,
    xaxis_tickangle=45,
    margin=dict(l=120, r=20, t=60, b=120))
fig.show()

	Age 3 to 5 (Early Childhood)	Ages 6-21
count	8241.0	542.0
mean	517.0	50252.0
std	7769.0	361424.0
min	0.0	0.0
25%	0.0	119.0
50%	1.0	976.0
75%	17.0	15458.0
max	535392.0	6809208.0

	SEA Disability Category	SEA Education Environment	Ages_6_21_calc
0	Autism	Inside regular class 40% through 79% of the day	4920.0
1	Autism	Inside regular class 80% or more of the day	7171.0
2	Autism	Inside regular class less than 40% of the day	6782.0
3	Autism	Residential Facility, School Age	443.0
4	Deaf-blindness	Inside regular class 40% through 79% of the day	0.0

Education Access and Enrollment Patterns for Children with Disabilities (2022-2023)¶

Nina Flores & Steven Lora¶

Hypothesis:¶

Project Overview¶

Mount Drive¶

Import Libraries¶

Get Data¶

Understanding the Data Frame¶

Visualizations¶

Choropleth Map - Geographic Distribution of Students Served¶

Stacked Bar Chart - Educational Environment by Disability Category¶

Box Plot - Enrollment Distribution Across States¶

Bubble Chart - Inclusion Rate vs. Total Enrollment by Region¶

Heatmap - Demographic Representation (Age, Gender, Race, Disability Type)¶

Conclusion¶

References¶

	Year	State Name	SEA Education Environment	SEA Disability Category	Age 3	Age 4	Age 5 (Early Childhood)	American Indian or Alaska Native - Early Childhood	Asian - Early Childhood	Black or African American - Early Childhood	...	EL No - School Age	Female - School Age	Male - School Age	American Indian or Alaska Native - School Age	Asian - School Age	Black or African American - School Age	Hispanic/Latino - School Age	Native Hawaiian or Other Pacific Islander - School Age	Two or more races - School Age	White - School Age
0	2022	Alabama	Correctional Facilities	All Disabilities	-	-	-	-	-	-	...	26	2	24	0	0	21	1	0	0	4
1	2022	Alabama	Home	All Disabilities	52	60	6	1	1	21	...	-	-	-	-	-	-	-	-	-	-
2	2022	Alabama	Homebound/Hospital	All Disabilities	-	-	-	-	-	-	...	409	151	268	1	9	165	26	0	10	208
3	2022	Alabama	Inside regular class 40% through 79% of the day	All Disabilities	-	-	-	-	-	-	...	6032	2004	4332	33	79	2497	502	5	235	2985
4	2022	Alabama	Inside regular class 80% or more of the day	All Disabilities	-	-	-	-	-	-	...	74908	27634	51089	567	484	26555	6287	62	2530	42238

	SEA Disability Category	SEA Education Environment	Ages_6_21_calc
0	Autism	Inside regular class 40% through 79% of the day	2255.0
1	Autism	Inside regular class 80% or more of the day	11738.0
2	Autism	Inside regular class less than 40% of the day	7764.0
3	Autism	Residential Facility, School Age	5.0
4	Deaf-blindness	Inside regular class 40% through 79% of the day	8.0