Skip to content

Commit 49c3729

Browse files
committed
Update 04_Create_Acs_Indicators.ipynb
1 parent 7b1e12c commit 49c3729

File tree

1 file changed

+41
-60
lines changed

1 file changed

+41
-60
lines changed

‎notebooks/04_Create_Acs_Indicators.ipynb‎

Lines changed: 41 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -24,28 +24,7 @@
2424
},
2525
{
2626
"cell_type": "code",
27-
"execution_count": 2,
28-
"metadata": {
29-
"id": "HhQ_FIqhjHEG"
30-
},
31-
"outputs": [],
32-
"source": [
33-
"#export \n",
34-
"\n",
35-
"#These are the libraries used by every script. The scripts WILL NOT run without them. \n",
36-
"from VitalSigns.acsDownload import retrieve_acs_data\n",
37-
"from dataplay.merge import mergeDatasets\n",
38-
"from dataplay.intaker import Intake \n",
39-
"from IPython.display import clear_output\n",
40-
"import pandas as pd\n",
41-
"import geopandas as gpd\n",
42-
"import glob\n",
43-
"import numpy"
44-
]
45-
},
46-
{
47-
"cell_type": "code",
48-
"execution_count": 25,
27+
"execution_count": 1,
4928
"metadata": {
5029
"id": "c38OYV2HZ65Y"
5130
},
@@ -88,31 +67,34 @@
8867
" fi = fi.drop(fi.index[1]) #Remove baltimore City row from fi based on index location - its index location is 1 for both 2010 and 2020 indicators.\n",
8968
" fi.loc['Baltimore City'] = bc\n",
9069
" \n",
91-
" return fi\n",
92-
"\n",
93-
"#Add 2010 CSA Column\n",
94-
"def add_CSA2010(df):\n",
95-
" fi = df\n",
96-
" CSA_Crosswalk = pd.read_csv(\"https://raw.githubusercontent.com/BNIA/VitalSigns/main/CSA2010_2020.csv\")\n",
97-
"\n",
98-
" fi.reset_index(inplace=True)\n",
99-
" fi = CSA_Crosswalk.merge(fi, on=\"CSA2020\", how=\"outer\")\n",
100-
" \n",
101-
" return fi\n",
102-
"\n",
103-
"def add_CSA2020(df):\n",
104-
" fi = df\n",
105-
" CSA_Crosswalk = pd.read_csv(\"https://raw.githubusercontent.com/BNIA/VitalSigns/main/CSA2010_2020.csv\")\n",
70+
" return fi"
71+
]
72+
},
73+
{
74+
"cell_type": "code",
75+
"execution_count": null,
76+
"metadata": {
77+
"id": "HhQ_FIqhjHEG"
78+
},
79+
"outputs": [],
80+
"source": [
81+
"#export \n",
10682
"\n",
107-
" fi.reset_index(inplace=True)\n",
108-
" fi = CSA_Crosswalk.merge(fi, on=\"CSA2010\", how=\"outer\")\n",
109-
" \n",
110-
" return fi"
83+
"#These are the libraries used by every script. The scripts WILL NOT run without them. \n",
84+
"from VitalSigns.acsDownload import retrieve_acs_data\n",
85+
"from dataplay.merge import mergeDatasets\n",
86+
"from dataplay.intaker import Intake \n",
87+
"from IPython.display import clear_output\n",
88+
"import pandas as pd\n",
89+
"import geopandas as gpd\n",
90+
"import glob\n",
91+
"import numpy\n",
92+
"import csv"
11193
]
11294
},
11395
{
11496
"cell_type": "code",
115-
"execution_count": 28,
97+
"execution_count": null,
11698
"metadata": {
11799
"id": "SyPNNQF8q9nC"
118100
},
@@ -123,7 +105,7 @@
123105
"\n",
124106
"def createAcsIndicator(state, county, tract, year, tableId,\n",
125107
" mergeUrl, merge_left_col, merge_right_col, merge_how, groupBy,\n",
126-
" aggMethod, method, columnsToInclude):\n",
108+
" aggMethod, method, columnsToInclude, finalFileName=False):\n",
127109
"\n",
128110
" # Pull the data\n",
129111
" df = retrieve_acs_data(state, county, tract, tableId, year)\n",
@@ -154,22 +136,17 @@
154136
" # Create the indicator\n",
155137
" print('Creating Indicator')\n",
156138
" resp = method( df, columnsToInclude)\n",
157-
" \n",
158-
" #Add Missing CSA Column\n",
159-
" data_year = year\n",
160-
" print(\"Adding Missing CSA Column\")\n",
161-
" if int(data_year) <= 19:\n",
162-
" resp = add_CSA2020(resp)\n",
163-
" else:\n",
164-
" resp = add_CSA2010(resp)\n",
165-
"\n",
166139
" print('Indicator Created')\n",
140+
" if finalFileName:\n",
141+
" resp.to_csv(finalFileName, quoting=csv.QUOTE_ALL)\n",
142+
" print('Indicator Saved')\n",
143+
"\n",
167144
" return resp"
168145
]
169146
},
170147
{
171148
"cell_type": "code",
172-
"execution_count": 8,
149+
"execution_count": null,
173150
"metadata": {
174151
"id": "MHRv3nlMg06s"
175152
},
@@ -271,7 +248,7 @@
271248
},
272249
{
273250
"cell_type": "code",
274-
"execution_count": 30,
251+
"execution_count": null,
275252
"metadata": {
276253
"id": "9xXphAkqg0z2"
277254
},
@@ -306,7 +283,7 @@
306283
},
307284
{
308285
"cell_type": "code",
309-
"execution_count": 19,
286+
"execution_count": null,
310287
"metadata": {
311288
"id": "QTqlJLdUg0tU"
312289
},
@@ -340,7 +317,7 @@
340317
},
341318
{
342319
"cell_type": "code",
343-
"execution_count": 32,
320+
"execution_count": null,
344321
"metadata": {
345322
"id": "9O82H26kg_L5"
346323
},
@@ -370,7 +347,8 @@
370347
" groupBy = 'CSA2010',\n",
371348
" aggMethod= 'sum', \n",
372349
" method = hisp,\n",
373-
" columnsToInclude = [])\n",
350+
" columnsToInclude = [],\n",
351+
" finalFileName=False)\n",
374352
" else:\n",
375353
" fi_hisp = createAcsIndicator(state = '24', county = '510', tract = '*' , year = chosen_year, tableId = 'B03002',\n",
376354
" mergeUrl = 'https://raw.githubusercontent.com/BNIA/VitalSigns/main/CSA2020.csv', \n",
@@ -380,7 +358,8 @@
380358
" groupBy = 'CSA2020',\n",
381359
" aggMethod= 'sum', \n",
382360
" method = hisp,\n",
383-
" columnsToInclude = [])\n",
361+
" columnsToInclude = [],\n",
362+
" finalFileName=False)\n",
384363
"\n",
385364
" #Column 012E from the Hisp table has a different name on the years prior to 2019. \n",
386365
" #This code changes the name of that column automatically for every year prior to 2019.\n",
@@ -426,7 +405,7 @@
426405
},
427406
{
428407
"cell_type": "code",
429-
"execution_count": 21,
408+
"execution_count": null,
430409
"metadata": {
431410
"id": "OyxGbFfCjd2-"
432411
},
@@ -1887,6 +1866,8 @@
18871866
],
18881867
"metadata": {
18891868
"colab": {
1869+
"collapsed_sections": [],
1870+
"name": "04_Create_Acs_Indicatorsipynb",
18901871
"provenance": []
18911872
},
18921873
"kernelspec": {
@@ -1914,4 +1895,4 @@
19141895
},
19151896
"nbformat": 4,
19161897
"nbformat_minor": 0
1917-
}
1898+
}

0 commit comments

Comments
 (0)