{ "cells": [ { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# imports\n", "import pandas as pd\n", "import numpy as np\n", "\n", "from great_schools import get_nearby_schools\n", "from distance import get_distance\n", "from secret import get_key\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Shaun and Daniela's Boston Public School Analysis\n", "#### 2021.04.10" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Fetch the API key from the local filesystem." ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "# get the API key\n", "api_key_file = '../keys/api.key'\n", "api_key = get_key(api_key_file)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Use the `nearby_schools` API endpoint to grab raw data of all schools within the maximum radius" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "# Some columns will dropped immediately as pre-processing.\n", "drops = [\n", " 'nces-id',\n", " 'school-summary',\n", " 'street',\n", " 'fipscounty',\n", " 'phone',\n", " 'fax',\n", " 'web-site',\n", " 'overview-url',\n", " 'rating-description',\n", " 'distance',\n", "]\n", "\n", "# Grab data for Boston.\n", "refresh = False\n", "boston_nearby_schools_file = '../data/nearby_schools/boston.csv'\n", "if refresh:\n", " boston_schools = get_nearby_schools(api_key,\"42.3\",\"-71.2\",\"50\")\n", " boston_df = pd.DataFrame.from_dict(boston_schools)\n", " boston_df.drop(columns=drops,inplace=True)\n", " boston_df.to_csv(boston_nearby_schools_file, )\n", "else:\n", " boston_df = pd.read_csv(boston_nearby_schools_file)\n", " boston_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n", " boston_df.drop(columns=[\"Unnamed: 0\"], inplace=True)\n", "\n", "# Grab data for Buffalo.\n", "refresh = False\n", "buffalo_nearby_schools_file = '../data/nearby_schools/buffalo.csv'\n", "if refresh:\n", " buffalo_schools = get_nearby_schools(api_key,\"42.9625\",\"-78.7425\",\"50\")\n", " buffalo_df = pd.DataFrame.from_dict(buffalo_schools)\n", " buffalo_df.drop(columns=drops,inplace=True)\n", " buffalo_df.to_csv(buffalo_nearby_schools_file)\n", "else:\n", " buffalo_df = pd.read_csv(buffalo_nearby_schools_file)\n", " buffalo_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n", " buffalo_df.drop(columns=[\"Unnamed: 0\"], inplace=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Process the `lat` and `lon` columns from the API output into tuples.\n", "\n", "Then create two new columns:\n", "- Distance to Downtown\n", "- Distance to Work" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "# Form tuple to represent coordinates\n", "boston_df['coordinates'] = list(zip(boston_df.lat,boston_df.lon))\n", "#boston_df.drop(columns=['lat', 'lon'], inplace=True)\n", "\n", "# Define coordinates of important places\n", "downtown=(42.3674836866797, -71.07134540735377) # Science Museum\n", "work=(42.47381059540949, -71.25414135292398) # Hartwell\n", "\n", "# Create new columns to tabulate distance to these important places\n", "boston_df['distance-to-downtown'] = boston_df['coordinates'].apply(func=get_distance,p2=downtown)\n", "boston_df['distance-to-work'] = boston_df['coordinates'].apply(func=get_distance,p2=work)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We should definitely removal all schools that aren't in Massachusetts." ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "There are 1789 schools from the original API results.\n", "Allowing only schools from Massachusetts reduces the dataset to 1375 schools.\n" ] } ], "source": [ "print(f'There are {len(boston_df)} schools from the original API results.')\n", "\n", "# only allow from MA\n", "boston_df = boston_df[boston_df['state'] == \"MA\"]\n", "print(f'Allowing only schools from Massachusetts reduces the dataset to {len(boston_df)} schools.')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "How many unique district id's are there?" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "There are 230 unique school districts.\n", "\n" ] } ], "source": [ "# get unique districts\n", "districts = boston_df[\"district-id\"].unique()\n", "print(f'\\nThere are {len(districts)} unique school districts.\\n')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Which of these districts are close to both work and downtown boston?" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "There are 116 school districts within reasonable proximity to downtown and work.\n", "\n", "There are 820 schools within these proximal districts.\n", "\n" ] }, { "data": { "text/html": [ "
| \n", " | state-id | \n", "name | \n", "type | \n", "level-codes | \n", "level | \n", "city | \n", "state | \n", "zip | \n", "county | \n", "lat | \n", "lon | \n", "district-name | \n", "district-id | \n", "rating | \n", "year | \n", "coordinates | \n", "distance-to-downtown | \n", "distance-to-work | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| universal-id | \n", "\n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " | \n", " |
| 2500363 | \n", "380013 | \n", "Spofford Pond | \n", "public | \n", "e | \n", "3,4,5,6 | \n", "Boxford | \n", "MA | \n", "1921 | \n", "Essex County | \n", "42.697018 | \n", "-71.017365 | \n", "Boxford School District | \n", "102 | \n", "7.0 | \n", "2021.0 | \n", "(42.697018, -71.017365) | \n", "22.917933 | \n", "19.554889 | \n", "
| 2506356 | \n", "100305 | \n", "Gibbs School | \n", "public | \n", "e,m | \n", "6 | \n", "Arlington | \n", "MA | \n", "2474 | \n", "Middlesex County | \n", "42.410576 | \n", "-71.145081 | \n", "Arlington Public Schools | \n", "69 | \n", "7.0 | \n", "2021.0 | \n", "(42.410576, -71.145081) | \n", "4.794958 | \n", "7.066929 | \n", "
| 2501835 | \n", "3470410 | \n", "Daniel L Joyce Middle School | \n", "public | \n", "m | \n", "6,7,8 | \n", "Woburn | \n", "MA | \n", "1801 | \n", "Middlesex County | \n", "42.477467 | \n", "-71.175484 | \n", "Woburn School District | \n", "467 | \n", "4.0 | \n", "2021.0 | \n", "(42.477467, -71.175484) | \n", "9.264922 | \n", "4.013598 | \n", "
| 2501714 | \n", "3150005 | \n", "Claypit Hill School | \n", "public | \n", "e | \n", "KG,1,2,3,4,5 | \n", "Wayland | \n", "MA | \n", "1778 | \n", "Middlesex County | \n", "42.373108 | \n", "-71.344765 | \n", "Wayland School District | \n", "434 | \n", "8.0 | \n", "2021.0 | \n", "(42.373108, -71.344765) | \n", "13.952791 | \n", "8.347379 | \n", "
| 2502631 | \n", "1810055 | \n", "Tenney Grammar School | \n", "public | \n", "p,e,m | \n", "PK,KG,1,2,3,4,5,6,7,8 | \n", "Methuen | \n", "MA | \n", "1844 | \n", "Essex County | \n", "42.732357 | \n", "-71.177345 | \n", "Methuen School District | \n", "270 | \n", "3.0 | \n", "2021.0 | \n", "(42.732357, -71.177345) | \n", "25.763243 | \n", "18.273064 | \n", "
| 2500515 | \n", "710505 | \n", "Danvers High School | \n", "public | \n", "h | \n", "9,10,11,12,UG | \n", "Danvers | \n", "MA | \n", "1923 | \n", "Essex County | \n", "42.582523 | \n", "-70.931618 | \n", "Danvers School District | \n", "141 | \n", "6.0 | \n", "2021.0 | \n", "(42.582523, -70.931618) | \n", "16.464503 | \n", "18.045917 | \n", "
| 2501498 | \n", "2740410 | \n", "Next Wave Junior High School | \n", "public | \n", "m | \n", "7,8 | \n", "Somerville | \n", "MA | \n", "2145 | \n", "Middlesex County | \n", "42.387581 | \n", "-71.087326 | \n", "Somerville School District | \n", "383 | \n", "NaN | \n", "NaN | \n", "(42.387581, -71.087326) | \n", "1.609308 | \n", "10.378716 | \n", "
| 2501384 | \n", "2430310 | \n", "Broad Meadows Middle School | \n", "public | \n", "m | \n", "6,7,8 | \n", "Quincy | \n", "MA | \n", "2169 | \n", "Norfolk County | \n", "42.259659 | \n", "-70.985237 | \n", "Quincy School District | \n", "349 | \n", "4.0 | \n", "2021.0 | \n", "(42.259659, -70.985237) | \n", "8.646003 | \n", "20.169491 | \n", "
| 2500916 | \n", "1570006 | \n", "Hanscom Primary School | \n", "public | \n", "p,e | \n", "PK,KG,1,2,3 | \n", "Hanscom Air Force Bs | \n", "MA | \n", "1731 | \n", "Middlesex County | \n", "42.456898 | \n", "-71.278549 | \n", "Lincoln School District | \n", "242 | \n", "3.0 | \n", "2021.0 | \n", "(42.456898, -71.278549) | \n", "12.234463 | \n", "1.705602 | \n", "
| 2501788 | \n", "3360065 | \n", "Lawrence W Pingree | \n", "public | \n", "e | \n", "KG,1,2,3,4 | \n", "Weymouth | \n", "MA | \n", "2189 | \n", "Norfolk County | \n", "42.217670 | \n", "-70.925240 | \n", "Weymouth School District | \n", "455 | \n", "8.0 | \n", "2021.0 | \n", "(42.21767, -70.92524) | \n", "12.754639 | \n", "24.381842 | \n", "