Note: The default ITS GitLab runner is a shared resource and is subject to slowdowns during heavy usage.
You can run your own GitLab runner that is dedicated just to your group if you need to avoid processing delays.

Commit ca0977cb authored by Liwen Huang's avatar Liwen Huang
Browse files

Upload New File

parent 55669682
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Question 1\n",
"Write a function called proportion_of_education which returns the proportion of children in the dataset who had a mother with the education levels equal to less than high school (<12), high school (12), more than high school but not a college graduate (>12) and college degree.\n",
"\n",
"This function should return a dictionary in the form of (use the correct numbers, do not round numbers):\n",
"\n",
" {\"less than high school\":0.2,\n",
" \"high school\":0.4,\n",
" \"more than high school but not college\":0.2,\n",
" \"college\":0.2}"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>SEQNUMC</th>\n",
" <th>SEQNUMHH</th>\n",
" <th>PDAT</th>\n",
" <th>PROVWT_D</th>\n",
" <th>RDDWT_D</th>\n",
" <th>STRATUM</th>\n",
" <th>YEAR</th>\n",
" <th>AGECPOXR</th>\n",
" <th>HAD_CPOX</th>\n",
" <th>AGEGRP</th>\n",
" <th>...</th>\n",
" <th>XVRCTY2</th>\n",
" <th>XVRCTY3</th>\n",
" <th>XVRCTY4</th>\n",
" <th>XVRCTY5</th>\n",
" <th>XVRCTY6</th>\n",
" <th>XVRCTY7</th>\n",
" <th>XVRCTY8</th>\n",
" <th>XVRCTY9</th>\n",
" <th>INS_STAT2_I</th>\n",
" <th>INS_BREAK_I</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>128521</td>\n",
" <td>12852</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>235.916956</td>\n",
" <td>1031</td>\n",
" <td>2017</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>10741</td>\n",
" <td>1074</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>957.353840</td>\n",
" <td>1068</td>\n",
" <td>2017</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>220011</td>\n",
" <td>22001</td>\n",
" <td>2</td>\n",
" <td>NaN</td>\n",
" <td>189.611299</td>\n",
" <td>1050</td>\n",
" <td>2017</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>86131</td>\n",
" <td>8613</td>\n",
" <td>1</td>\n",
" <td>675.430817</td>\n",
" <td>333.447418</td>\n",
" <td>1040</td>\n",
" <td>2017</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>1.0</td>\n",
" <td>2.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>227141</td>\n",
" <td>22714</td>\n",
" <td>1</td>\n",
" <td>482.617748</td>\n",
" <td>278.768063</td>\n",
" <td>1008</td>\n",
" <td>2017</td>\n",
" <td>NaN</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>...</td>\n",
" <td></td>\n",
" <td></td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>2.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 453 columns</p>\n",
"</div>"
],
"text/plain": [
" SEQNUMC SEQNUMHH PDAT PROVWT_D RDDWT_D STRATUM YEAR AGECPOXR \\\n",
"1 128521 12852 2 NaN 235.916956 1031 2017 NaN \n",
"2 10741 1074 2 NaN 957.353840 1068 2017 NaN \n",
"3 220011 22001 2 NaN 189.611299 1050 2017 NaN \n",
"4 86131 8613 1 675.430817 333.447418 1040 2017 NaN \n",
"5 227141 22714 1 482.617748 278.768063 1008 2017 NaN \n",
"\n",
" HAD_CPOX AGEGRP ... XVRCTY2 XVRCTY3 XVRCTY4 XVRCTY5 XVRCTY6 \\\n",
"1 2 1 ... NaN NaN NaN \n",
"2 2 1 ... NaN NaN NaN \n",
"3 2 3 ... NaN NaN NaN \n",
"4 2 1 ... NaN NaN NaN \n",
"5 2 1 ... NaN NaN NaN \n",
"\n",
" XVRCTY7 XVRCTY8 XVRCTY9 INS_STAT2_I INS_BREAK_I \n",
"1 NaN NaN NaN NaN NaN \n",
"2 NaN NaN NaN NaN NaN \n",
"3 NaN NaN NaN NaN NaN \n",
"4 NaN NaN NaN 1.0 2.0 \n",
"5 NaN NaN NaN 2.0 1.0 \n",
"\n",
"[5 rows x 453 columns]"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"df = pd.read_csv('datasets/NISPUF17.csv', index_col = 0)\n",
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1 4\n",
"2 3\n",
"3 3\n",
"4 4\n",
"5 1\n",
"Name: EDUC1, dtype: int64"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"edu = df['EDUC1']\n",
"edu.head()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1, 1, 1, ..., 4, 4, 4], dtype=int64)"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mum_edu = np.sort(edu.values)\n",
"mum_edu"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'less than high school': 0.0,\n",
" 'high school': 0,\n",
" 'more than high school but not college': 0,\n",
" 'college': 0}"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prop_edu = {\"less than high school\":0.00,\n",
" \"high school\":0,\n",
" \"more than high school but not college\":0,\n",
" \"college\":0}\n",
"prop_edu"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [],
"source": [
"n = len(mum_edu)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'less than high school': 0.10202002459160373,\n",
" 'high school': 0.172352011241876,\n",
" 'more than high school but not college': 0.24588090637625154,\n",
" 'college': 0.47974705779026877}"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prop_edu[\"less than high school\"] = np.sum(mum_edu == 1)/n\n",
"prop_edu"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'less than high school': 0.10202002459160373,\n",
" 'high school': 0.172352011241876,\n",
" 'more than high school but not college': 0,\n",
" 'college': 0}"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prop_edu[\"high school\"] = np.sum(mum_edu == 2)/n\n",
"prop_edu"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'less than high school': 0.10202002459160373,\n",
" 'high school': 0.172352011241876,\n",
" 'more than high school but not college': 0.24588090637625154,\n",
" 'college': 0.47974705779026877}"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prop_edu[\"more than high school but not college\"] = np.sum(mum_edu == 3)/n\n",
"prop_edu[\"college\"] = np.sum(mum_edu == 4)/n\n",
"prop_edu"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"ename": "TypeError",
"evalue": "type dict doesn't define __round__ method",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-40-469d995073e1>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mround\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mprop_edu\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;36m2\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m: type dict doesn't define __round__ method"
]
}
],
"source": [
"print(prop_edu)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment