Skip to content

Commit 7b1e12c

Browse files
committed
Update create.py
1 parent ac2d7c7 commit 7b1e12c

File tree

1 file changed

+56
-17
lines changed

1 file changed

+56
-17
lines changed

‎VitalSigns/create.py‎

Lines changed: 56 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,15 @@
1111
#Cell
1212
#@title Run This Cell: Misc Function Declarations
1313

14+
#Need these imports or the scripts WILL NOT work.
15+
import geopandas as gpd
16+
import numpy
17+
import pandas as pd
18+
from .acsDownload import retrieve_acs_data
19+
from dataplay.merge import mergeDatasets
20+
from dataplay.intaker import Intake
21+
22+
1423
# These functions right here are used in the calculations below.
1524
# Finds a column matchings a substring
1625
def getColName (df, col): return df.columns[df.columns.str.contains(pat = col)][0]
@@ -33,21 +42,49 @@ def nullIfEqual(df, c1, c2):
3342
def sumInts(df): return df.sum(numeric_only=True)
3443

3544

36-
#Need these imports or the scripts WILL NOT work.
37-
import geopandas as gpd
38-
import numpy
39-
import pandas as pd
40-
from .acsDownload import retrieve_acs_data
41-
from dataplay.merge import mergeDatasets
42-
from dataplay.intaker import Intake
45+
#Move Baltimore City row to the bottom, and delete "Unassigned--Jail" row
46+
def baltCity(df):
47+
fi = df
48+
49+
#Delete "Unassigned--Jail" row -> tract 100300
50+
fi = fi[fi.tract != 100300]
51+
52+
#Move Baltimore City row to the bottom of the list.
53+
bc = fi.loc['Baltimore City'] #save Baltimore City row
54+
fi = fi.drop(fi.index[1]) #Remove baltimore City row from fi based on index location - its index location is 1 for both 2010 and 2020 indicators.
55+
fi.loc['Baltimore City'] = bc
56+
57+
return fi
58+
59+
#Add 2010 CSA Column
60+
def add_CSA2010(df):
61+
fi = df
62+
CSA_Crosswalk = pd.read_csv("https://raw.githubusercontent.com/BNIA/VitalSigns/main/CSA2010_2020.csv")
63+
64+
fi.reset_index(inplace=True)
65+
fi = CSA_Crosswalk.merge(fi, on="CSA2020", how="outer")
66+
67+
return fi
68+
69+
def add_CSA2020(df):
70+
fi = df
71+
CSA_Crosswalk = pd.read_csv("https://raw.githubusercontent.com/BNIA/VitalSigns/main/CSA2010_2020.csv")
72+
73+
fi.reset_index(inplace=True)
74+
fi = CSA_Crosswalk.merge(fi, on="CSA2010", how="outer")
75+
76+
return fi
77+
78+
79+
4380

4481

4582
#cell
4683
#@title Run This Cell: Create createIndicator()
4784

4885
def createAcsIndicator(state, county, tract, year, tableId,
4986
mergeUrl, merge_left_col, merge_right_col, merge_how, groupBy,
50-
aggMethod, method, columnsToInclude, finalFileName=False):
87+
aggMethod, method, columnsToInclude):
5188

5289
# Pull the data
5390
df = retrieve_acs_data(state, county, tract, tableId, year)
@@ -78,14 +115,18 @@ def createAcsIndicator(state, county, tract, year, tableId,
78115
# Create the indicator
79116
print('Creating Indicator')
80117
resp = method( df, columnsToInclude)
81-
print('Indicator Created')
82-
if finalFileName:
83-
resp.to_csv(finalFileName, quoting=csv.QUOTE_ALL)
84-
print('Indicator Saved')
118+
119+
#Add Missing CSA Column
120+
data_year = year
121+
print("Adding Missing CSA Column")
122+
if int(data_year) <= 19:
123+
resp = add_CSA2020(resp)
124+
else:
125+
resp = add_CSA2010(resp)
85126

127+
print('Indicator Created')
86128
return resp
87129

88-
89130
# Cell
90131
#@title Run This Cell: Create age5
91132

@@ -276,8 +317,7 @@ def racdiv(df, columnsToInclude):
276317
groupBy = 'CSA2010',
277318
aggMethod= 'sum',
278319
method = hisp,
279-
columnsToInclude = [],
280-
finalFileName=False)
320+
columnsToInclude = [])
281321
else:
282322
fi_hisp = createAcsIndicator(state = '24', county = '510', tract = '*' , year = chosen_year, tableId = 'B03002',
283323
mergeUrl = 'https://raw.githubusercontent.com/BNIA/VitalSigns/main/CSA2020.csv',
@@ -287,8 +327,7 @@ def racdiv(df, columnsToInclude):
287327
groupBy = 'CSA2020',
288328
aggMethod= 'sum',
289329
method = hisp,
290-
columnsToInclude = [],
291-
finalFileName=False)
330+
columnsToInclude = [])
292331

293332
#Column 012E from the Hisp table has a different name on the years prior to 2019.
294333
#This code changes the name of that column automatically for every year prior to 2019.

0 commit comments

Comments
 (0)