636 lines
22 KiB
Plaintext
636 lines
22 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 32,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# imports\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"from great_schools import get_nearby_schools\n",
|
|
"from distance import get_distance\n",
|
|
"from secret import get_key\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Shaun and Daniela's Boston Public School Analysis\n",
|
|
"#### 2021.04.10"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Fetch the API key from the local filesystem."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 33,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# get the API key\n",
|
|
"api_key_file = '../keys/api.key'\n",
|
|
"api_key = get_key(api_key_file)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Use the `nearby_schools` API endpoint to grab raw data of all schools within the maximum radius"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Some columns will dropped immediately as pre-processing.\n",
|
|
"drops = [\n",
|
|
" 'nces-id',\n",
|
|
" 'school-summary',\n",
|
|
" 'street',\n",
|
|
" 'fipscounty',\n",
|
|
" 'phone',\n",
|
|
" 'fax',\n",
|
|
" 'web-site',\n",
|
|
" 'overview-url',\n",
|
|
" 'rating-description',\n",
|
|
" 'distance',\n",
|
|
"]\n",
|
|
"\n",
|
|
"# Grab data for Boston.\n",
|
|
"refresh = False\n",
|
|
"boston_nearby_schools_file = '../data/nearby_schools/boston.csv'\n",
|
|
"if refresh:\n",
|
|
" boston_schools = get_nearby_schools(api_key,\"42.3\",\"-71.2\",\"50\")\n",
|
|
" boston_df = pd.DataFrame.from_dict(boston_schools)\n",
|
|
" boston_df.drop(columns=drops,inplace=True)\n",
|
|
" boston_df.to_csv(boston_nearby_schools_file, )\n",
|
|
"else:\n",
|
|
" boston_df = pd.read_csv(boston_nearby_schools_file)\n",
|
|
" boston_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n",
|
|
" boston_df.drop(columns=[\"Unnamed: 0\"], inplace=True)\n",
|
|
"\n",
|
|
"# Grab data for Buffalo.\n",
|
|
"refresh = False\n",
|
|
"buffalo_nearby_schools_file = '../data/nearby_schools/buffalo.csv'\n",
|
|
"if refresh:\n",
|
|
" buffalo_schools = get_nearby_schools(api_key,\"42.9625\",\"-78.7425\",\"50\")\n",
|
|
" buffalo_df = pd.DataFrame.from_dict(buffalo_schools)\n",
|
|
" buffalo_df.drop(columns=drops,inplace=True)\n",
|
|
" buffalo_df.to_csv(buffalo_nearby_schools_file)\n",
|
|
"else:\n",
|
|
" buffalo_df = pd.read_csv(buffalo_nearby_schools_file)\n",
|
|
" buffalo_df.set_index(keys=[\"universal-id\"], drop=True, inplace=True)\n",
|
|
" buffalo_df.drop(columns=[\"Unnamed: 0\"], inplace=True)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Process the `lat` and `lon` columns from the API output into tuples.\n",
|
|
"\n",
|
|
"Then create two new columns:\n",
|
|
"- Distance to Downtown\n",
|
|
"- Distance to Work"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 35,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Form tuple to represent coordinates\n",
|
|
"boston_df['coordinates'] = list(zip(boston_df.lat,boston_df.lon))\n",
|
|
"#boston_df.drop(columns=['lat', 'lon'], inplace=True)\n",
|
|
"\n",
|
|
"# Define coordinates of important places\n",
|
|
"downtown=(42.3674836866797, -71.07134540735377) # Science Museum\n",
|
|
"work=(42.47381059540949, -71.25414135292398) # Hartwell\n",
|
|
"\n",
|
|
"# Create new columns to tabulate distance to these important places\n",
|
|
"boston_df['distance-to-downtown'] = boston_df['coordinates'].apply(func=get_distance,p2=downtown)\n",
|
|
"boston_df['distance-to-work'] = boston_df['coordinates'].apply(func=get_distance,p2=work)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"We should definitely removal all schools that aren't in Massachusetts."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 36,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"There are 1789 schools from the original API results.\n",
|
|
"Allowing only schools from Massachusetts reduces the dataset to 1375 schools.\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"print(f'There are {len(boston_df)} schools from the original API results.')\n",
|
|
"\n",
|
|
"# only allow from MA\n",
|
|
"boston_df = boston_df[boston_df['state'] == \"MA\"]\n",
|
|
"print(f'Allowing only schools from Massachusetts reduces the dataset to {len(boston_df)} schools.')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"How many unique district id's are there?"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 37,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"\n",
|
|
"There are 230 unique school districts.\n",
|
|
"\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# get unique districts\n",
|
|
"districts = boston_df[\"district-id\"].unique()\n",
|
|
"print(f'\\nThere are {len(districts)} unique school districts.\\n')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Which of these districts are close to both work and downtown boston?"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"There are 116 school districts within reasonable proximity to downtown and work.\n",
|
|
"\n",
|
|
"There are 820 schools within these proximal districts.\n",
|
|
"\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/html": [
|
|
"<div>\n",
|
|
"<style scoped>\n",
|
|
" .dataframe tbody tr th:only-of-type {\n",
|
|
" vertical-align: middle;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe tbody tr th {\n",
|
|
" vertical-align: top;\n",
|
|
" }\n",
|
|
"\n",
|
|
" .dataframe thead th {\n",
|
|
" text-align: right;\n",
|
|
" }\n",
|
|
"</style>\n",
|
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|
" <thead>\n",
|
|
" <tr style=\"text-align: right;\">\n",
|
|
" <th></th>\n",
|
|
" <th>state-id</th>\n",
|
|
" <th>name</th>\n",
|
|
" <th>type</th>\n",
|
|
" <th>level-codes</th>\n",
|
|
" <th>level</th>\n",
|
|
" <th>city</th>\n",
|
|
" <th>state</th>\n",
|
|
" <th>zip</th>\n",
|
|
" <th>county</th>\n",
|
|
" <th>lat</th>\n",
|
|
" <th>lon</th>\n",
|
|
" <th>district-name</th>\n",
|
|
" <th>district-id</th>\n",
|
|
" <th>rating</th>\n",
|
|
" <th>year</th>\n",
|
|
" <th>coordinates</th>\n",
|
|
" <th>distance-to-downtown</th>\n",
|
|
" <th>distance-to-work</th>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>universal-id</th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" <th></th>\n",
|
|
" </tr>\n",
|
|
" </thead>\n",
|
|
" <tbody>\n",
|
|
" <tr>\n",
|
|
" <th>2500363</th>\n",
|
|
" <td>380013</td>\n",
|
|
" <td>Spofford Pond</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>e</td>\n",
|
|
" <td>3,4,5,6</td>\n",
|
|
" <td>Boxford</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>1921</td>\n",
|
|
" <td>Essex County</td>\n",
|
|
" <td>42.697018</td>\n",
|
|
" <td>-71.017365</td>\n",
|
|
" <td>Boxford School District</td>\n",
|
|
" <td>102</td>\n",
|
|
" <td>7.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.697018, -71.017365)</td>\n",
|
|
" <td>22.917933</td>\n",
|
|
" <td>19.554889</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2506356</th>\n",
|
|
" <td>100305</td>\n",
|
|
" <td>Gibbs School</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>e,m</td>\n",
|
|
" <td>6</td>\n",
|
|
" <td>Arlington</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>2474</td>\n",
|
|
" <td>Middlesex County</td>\n",
|
|
" <td>42.410576</td>\n",
|
|
" <td>-71.145081</td>\n",
|
|
" <td>Arlington Public Schools</td>\n",
|
|
" <td>69</td>\n",
|
|
" <td>7.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.410576, -71.145081)</td>\n",
|
|
" <td>4.794958</td>\n",
|
|
" <td>7.066929</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2501835</th>\n",
|
|
" <td>3470410</td>\n",
|
|
" <td>Daniel L Joyce Middle School</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>m</td>\n",
|
|
" <td>6,7,8</td>\n",
|
|
" <td>Woburn</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>1801</td>\n",
|
|
" <td>Middlesex County</td>\n",
|
|
" <td>42.477467</td>\n",
|
|
" <td>-71.175484</td>\n",
|
|
" <td>Woburn School District</td>\n",
|
|
" <td>467</td>\n",
|
|
" <td>4.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.477467, -71.175484)</td>\n",
|
|
" <td>9.264922</td>\n",
|
|
" <td>4.013598</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2501714</th>\n",
|
|
" <td>3150005</td>\n",
|
|
" <td>Claypit Hill School</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>e</td>\n",
|
|
" <td>KG,1,2,3,4,5</td>\n",
|
|
" <td>Wayland</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>1778</td>\n",
|
|
" <td>Middlesex County</td>\n",
|
|
" <td>42.373108</td>\n",
|
|
" <td>-71.344765</td>\n",
|
|
" <td>Wayland School District</td>\n",
|
|
" <td>434</td>\n",
|
|
" <td>8.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.373108, -71.344765)</td>\n",
|
|
" <td>13.952791</td>\n",
|
|
" <td>8.347379</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2502631</th>\n",
|
|
" <td>1810055</td>\n",
|
|
" <td>Tenney Grammar School</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>p,e,m</td>\n",
|
|
" <td>PK,KG,1,2,3,4,5,6,7,8</td>\n",
|
|
" <td>Methuen</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>1844</td>\n",
|
|
" <td>Essex County</td>\n",
|
|
" <td>42.732357</td>\n",
|
|
" <td>-71.177345</td>\n",
|
|
" <td>Methuen School District</td>\n",
|
|
" <td>270</td>\n",
|
|
" <td>3.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.732357, -71.177345)</td>\n",
|
|
" <td>25.763243</td>\n",
|
|
" <td>18.273064</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2500515</th>\n",
|
|
" <td>710505</td>\n",
|
|
" <td>Danvers High School</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>h</td>\n",
|
|
" <td>9,10,11,12,UG</td>\n",
|
|
" <td>Danvers</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>1923</td>\n",
|
|
" <td>Essex County</td>\n",
|
|
" <td>42.582523</td>\n",
|
|
" <td>-70.931618</td>\n",
|
|
" <td>Danvers School District</td>\n",
|
|
" <td>141</td>\n",
|
|
" <td>6.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.582523, -70.931618)</td>\n",
|
|
" <td>16.464503</td>\n",
|
|
" <td>18.045917</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2501498</th>\n",
|
|
" <td>2740410</td>\n",
|
|
" <td>Next Wave Junior High School</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>m</td>\n",
|
|
" <td>7,8</td>\n",
|
|
" <td>Somerville</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>2145</td>\n",
|
|
" <td>Middlesex County</td>\n",
|
|
" <td>42.387581</td>\n",
|
|
" <td>-71.087326</td>\n",
|
|
" <td>Somerville School District</td>\n",
|
|
" <td>383</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>NaN</td>\n",
|
|
" <td>(42.387581, -71.087326)</td>\n",
|
|
" <td>1.609308</td>\n",
|
|
" <td>10.378716</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2501384</th>\n",
|
|
" <td>2430310</td>\n",
|
|
" <td>Broad Meadows Middle School</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>m</td>\n",
|
|
" <td>6,7,8</td>\n",
|
|
" <td>Quincy</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>2169</td>\n",
|
|
" <td>Norfolk County</td>\n",
|
|
" <td>42.259659</td>\n",
|
|
" <td>-70.985237</td>\n",
|
|
" <td>Quincy School District</td>\n",
|
|
" <td>349</td>\n",
|
|
" <td>4.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.259659, -70.985237)</td>\n",
|
|
" <td>8.646003</td>\n",
|
|
" <td>20.169491</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2500916</th>\n",
|
|
" <td>1570006</td>\n",
|
|
" <td>Hanscom Primary School</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>p,e</td>\n",
|
|
" <td>PK,KG,1,2,3</td>\n",
|
|
" <td>Hanscom Air Force Bs</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>1731</td>\n",
|
|
" <td>Middlesex County</td>\n",
|
|
" <td>42.456898</td>\n",
|
|
" <td>-71.278549</td>\n",
|
|
" <td>Lincoln School District</td>\n",
|
|
" <td>242</td>\n",
|
|
" <td>3.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.456898, -71.278549)</td>\n",
|
|
" <td>12.234463</td>\n",
|
|
" <td>1.705602</td>\n",
|
|
" </tr>\n",
|
|
" <tr>\n",
|
|
" <th>2501788</th>\n",
|
|
" <td>3360065</td>\n",
|
|
" <td>Lawrence W Pingree</td>\n",
|
|
" <td>public</td>\n",
|
|
" <td>e</td>\n",
|
|
" <td>KG,1,2,3,4</td>\n",
|
|
" <td>Weymouth</td>\n",
|
|
" <td>MA</td>\n",
|
|
" <td>2189</td>\n",
|
|
" <td>Norfolk County</td>\n",
|
|
" <td>42.217670</td>\n",
|
|
" <td>-70.925240</td>\n",
|
|
" <td>Weymouth School District</td>\n",
|
|
" <td>455</td>\n",
|
|
" <td>8.0</td>\n",
|
|
" <td>2021.0</td>\n",
|
|
" <td>(42.21767, -70.92524)</td>\n",
|
|
" <td>12.754639</td>\n",
|
|
" <td>24.381842</td>\n",
|
|
" </tr>\n",
|
|
" </tbody>\n",
|
|
"</table>\n",
|
|
"</div>"
|
|
],
|
|
"text/plain": [
|
|
" state-id name type level-codes \\\n",
|
|
"universal-id \n",
|
|
"2500363 380013 Spofford Pond public e \n",
|
|
"2506356 100305 Gibbs School public e,m \n",
|
|
"2501835 3470410 Daniel L Joyce Middle School public m \n",
|
|
"2501714 3150005 Claypit Hill School public e \n",
|
|
"2502631 1810055 Tenney Grammar School public p,e,m \n",
|
|
"2500515 710505 Danvers High School public h \n",
|
|
"2501498 2740410 Next Wave Junior High School public m \n",
|
|
"2501384 2430310 Broad Meadows Middle School public m \n",
|
|
"2500916 1570006 Hanscom Primary School public p,e \n",
|
|
"2501788 3360065 Lawrence W Pingree public e \n",
|
|
"\n",
|
|
" level city state zip \\\n",
|
|
"universal-id \n",
|
|
"2500363 3,4,5,6 Boxford MA 1921 \n",
|
|
"2506356 6 Arlington MA 2474 \n",
|
|
"2501835 6,7,8 Woburn MA 1801 \n",
|
|
"2501714 KG,1,2,3,4,5 Wayland MA 1778 \n",
|
|
"2502631 PK,KG,1,2,3,4,5,6,7,8 Methuen MA 1844 \n",
|
|
"2500515 9,10,11,12,UG Danvers MA 1923 \n",
|
|
"2501498 7,8 Somerville MA 2145 \n",
|
|
"2501384 6,7,8 Quincy MA 2169 \n",
|
|
"2500916 PK,KG,1,2,3 Hanscom Air Force Bs MA 1731 \n",
|
|
"2501788 KG,1,2,3,4 Weymouth MA 2189 \n",
|
|
"\n",
|
|
" county lat lon \\\n",
|
|
"universal-id \n",
|
|
"2500363 Essex County 42.697018 -71.017365 \n",
|
|
"2506356 Middlesex County 42.410576 -71.145081 \n",
|
|
"2501835 Middlesex County 42.477467 -71.175484 \n",
|
|
"2501714 Middlesex County 42.373108 -71.344765 \n",
|
|
"2502631 Essex County 42.732357 -71.177345 \n",
|
|
"2500515 Essex County 42.582523 -70.931618 \n",
|
|
"2501498 Middlesex County 42.387581 -71.087326 \n",
|
|
"2501384 Norfolk County 42.259659 -70.985237 \n",
|
|
"2500916 Middlesex County 42.456898 -71.278549 \n",
|
|
"2501788 Norfolk County 42.217670 -70.925240 \n",
|
|
"\n",
|
|
" district-name district-id rating year \\\n",
|
|
"universal-id \n",
|
|
"2500363 Boxford School District 102 7.0 2021.0 \n",
|
|
"2506356 Arlington Public Schools 69 7.0 2021.0 \n",
|
|
"2501835 Woburn School District 467 4.0 2021.0 \n",
|
|
"2501714 Wayland School District 434 8.0 2021.0 \n",
|
|
"2502631 Methuen School District 270 3.0 2021.0 \n",
|
|
"2500515 Danvers School District 141 6.0 2021.0 \n",
|
|
"2501498 Somerville School District 383 NaN NaN \n",
|
|
"2501384 Quincy School District 349 4.0 2021.0 \n",
|
|
"2500916 Lincoln School District 242 3.0 2021.0 \n",
|
|
"2501788 Weymouth School District 455 8.0 2021.0 \n",
|
|
"\n",
|
|
" coordinates distance-to-downtown distance-to-work \n",
|
|
"universal-id \n",
|
|
"2500363 (42.697018, -71.017365) 22.917933 19.554889 \n",
|
|
"2506356 (42.410576, -71.145081) 4.794958 7.066929 \n",
|
|
"2501835 (42.477467, -71.175484) 9.264922 4.013598 \n",
|
|
"2501714 (42.373108, -71.344765) 13.952791 8.347379 \n",
|
|
"2502631 (42.732357, -71.177345) 25.763243 18.273064 \n",
|
|
"2500515 (42.582523, -70.931618) 16.464503 18.045917 \n",
|
|
"2501498 (42.387581, -71.087326) 1.609308 10.378716 \n",
|
|
"2501384 (42.259659, -70.985237) 8.646003 20.169491 \n",
|
|
"2500916 (42.456898, -71.278549) 12.234463 1.705602 \n",
|
|
"2501788 (42.21767, -70.92524) 12.754639 24.381842 "
|
|
]
|
|
},
|
|
"execution_count": 38,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"# calculate distance to PoI using geo-center of districts\n",
|
|
"distances_to_downtown = {k: np.mean(list(v)) for k, v in boston_df.groupby('district-id')['distance-to-downtown']}\n",
|
|
"distances_to_work = {k: np.mean(list(v)) for k, v in boston_df.groupby('district-id')['distance-to-work']}\n",
|
|
"\n",
|
|
"df_downtown = pd.DataFrame.from_dict(distances_to_downtown, orient='index')\n",
|
|
"df_work = pd.DataFrame.from_dict(distances_to_work, orient='index')\n",
|
|
"\n",
|
|
"# merge these new columns\n",
|
|
"both_df = pd.merge(left=df_downtown, right=df_work, how='inner', left_index=True, right_index=True)\n",
|
|
"both_df.rename(columns={'0_x': \"downtown\", '0_y': \"work\"}, inplace=True)\n",
|
|
"\n",
|
|
"both_df = both_df[both_df[\"downtown\"] < 35.0]\n",
|
|
"both_df = both_df[both_df[\"work\"] < 25.0]\n",
|
|
"\n",
|
|
"print(f'There are {len(both_df)} school districts within reasonable proximity to downtown and work.\\n')\n",
|
|
"\n",
|
|
"# filter out all schools which aren't in proximal districts\n",
|
|
"proximal_district_ids = list(both_df.index)\n",
|
|
"boston_df = boston_df[boston_df['district-id'].isin(proximal_district_ids)]\n",
|
|
"\n",
|
|
"print(f'There are {len(boston_df)} schools within these proximal districts.\\n')\n",
|
|
"\n",
|
|
"boston_df.sample(10)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"Some of these districts don't have enough rating data. Those should be dropped."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 40,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f54f95addf0>"
|
|
]
|
|
},
|
|
"execution_count": 40,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"boston_df.groupby(['district-id'])"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"interpreter": {
|
|
"hash": "4fc861b332db140b7b363b167627eee6a3238262e7c99e0237067fec0875fee7"
|
|
},
|
|
"kernelspec": {
|
|
"display_name": "Python 3.8.10 ('venv': venv)",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.8.10"
|
|
},
|
|
"orig_nbformat": 4
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|