Skip to content

Commit 42bd45c

Browse files
committed
Update create.py
1 parent 0d7ab26 commit 42bd45c

File tree

1 file changed

+18
-52
lines changed

1 file changed

+18
-52
lines changed

‎VitalSigns/create.py‎

Lines changed: 18 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,6 @@
88
'empl', 'nilf', 'unempl', 'elheat', 'heatgas', 'affordr', 'affordm', 'nohhint']
99

1010

11-
#Need these imports or the scripts WILL NOT work.
12-
import geopandas as gpd
13-
import numpy
14-
import pandas as pd
15-
from .acsDownload import retrieve_acs_data
16-
from dataplay.merge import mergeDatasets
17-
from dataplay.intaker import Intake
18-
1911
#Cell
2012
#@title Run This Cell: Misc Function Declarations
2113

@@ -40,46 +32,22 @@ def nullIfEqual(df, c1, c2):
4032
# I'm thinking this doesnt need to be a function..
4133
def sumInts(df): return df.sum(numeric_only=True)
4234

43-
#Move Baltimore City row to the bottom, and delete "Unassigned--Jail" row
44-
def baltCity(df):
45-
fi = df
4635

47-
#Delete "Unassigned--Jail" row -> tract 100300
48-
fi = fi[fi.tract != 100300]
49-
50-
#Move Baltimore City row to the bottom of the list.
51-
bc = fi.loc['Baltimore City'] #save Baltimore City row
52-
fi = fi.drop(fi.index[1]) #Remove baltimore City row from fi based on index location - its index location is 1 for both 2010 and 2020 indicators.
53-
fi.loc['Baltimore City'] = bc
54-
55-
return fi
56-
57-
#Add 2010 CSA Names Column
58-
def add_CSA2010(df):
59-
fi = df
60-
CSA_Crosswalk = pd.read_csv("https://raw.githubusercontent.com/BNIA/VitalSigns/main/CSA2010_2020.csv")
61-
62-
fi.reset_index(inplace=True)
63-
fi = CSA_Crosswalk.merge(fi, on="CSA2020", how="inner")
64-
65-
return fi
66-
67-
#Add 2020 CSA Names Column
68-
def add_CSA2020(df):
69-
fi = df
70-
CSA_Crosswalk = pd.read_csv("https://raw.githubusercontent.com/BNIA/VitalSigns/main/CSA2010_2020.csv")
71-
72-
fi.reset_index(inplace=True)
73-
fi = CSA_Crosswalk.merge(fi, on="CSA2010", how="inner")
36+
#Need these imports or the scripts WILL NOT work.
37+
import geopandas as gpd
38+
import numpy
39+
import pandas as pd
40+
from .acsDownload import retrieve_acs_data
41+
from dataplay.merge import mergeDatasets
42+
from dataplay.intaker import Intake
7443

75-
return fi
7644

7745
#cell
7846
#@title Run This Cell: Create createIndicator()
7947

8048
def createAcsIndicator(state, county, tract, year, tableId,
8149
mergeUrl, merge_left_col, merge_right_col, merge_how, groupBy,
82-
aggMethod, method, columnsToInclude):
50+
aggMethod, method, columnsToInclude, finalFileName=False):
8351

8452
# Pull the data
8553
df = retrieve_acs_data(state, county, tract, tableId, year)
@@ -110,15 +78,11 @@ def createAcsIndicator(state, county, tract, year, tableId,
11078
# Create the indicator
11179
print('Creating Indicator')
11280
resp = method( df, columnsToInclude)
113-
114-
#Append Missing CSA Column (2010 or 2020)
115-
print("Appending Missing CSA Column")
116-
if int(year) <=19:
117-
resp = add_CSA2020(resp)
118-
else:
119-
resp = add_CSA2010(resp)
120-
12181
print('Indicator Created')
82+
if finalFileName:
83+
resp.to_csv(finalFileName, quoting=csv.QUOTE_ALL)
84+
print('Indicator Saved')
85+
12286
return resp
12387

12488

@@ -302,8 +266,8 @@ def racdiv(df, columnsToInclude):
302266
#Get hisp table and Indicators
303267
#'hisp' script HAS to be imported for this script to work.
304268
#User has to re-enter the year they want the indicator for. Maybe there is a way to automate this input???
305-
chosen_year = input("Please enter your chosen year again (i.e., '17', '20'): ")
306-
if int(chosen_year) <= 19:
269+
chosen_year = int(input("Please enter your chosen year again (i.e., '17', '20'): "))
270+
if (chosen_year <= 19):
307271
fi_hisp = createAcsIndicator(state = '24', county = '510', tract = '*' , year = chosen_year, tableId = 'B03002',
308272
mergeUrl = 'https://raw.githubusercontent.com/BNIA/VitalSigns/main/CSA2010.csv',
309273
merge_left_col = 'tract',
@@ -312,7 +276,8 @@ def racdiv(df, columnsToInclude):
312276
groupBy = 'CSA2010',
313277
aggMethod= 'sum',
314278
method = hisp,
315-
columnsToInclude = [],)
279+
columnsToInclude = [],
280+
finalFileName=False)
316281
else:
317282
fi_hisp = createAcsIndicator(state = '24', county = '510', tract = '*' , year = chosen_year, tableId = 'B03002',
318283
mergeUrl = 'https://raw.githubusercontent.com/BNIA/VitalSigns/main/CSA2020.csv',
@@ -322,7 +287,8 @@ def racdiv(df, columnsToInclude):
322287
groupBy = 'CSA2020',
323288
aggMethod= 'sum',
324289
method = hisp,
325-
columnsToInclude = [])
290+
columnsToInclude = [],
291+
finalFileName=False)
326292

327293
#Column 012E from the Hisp table has a different name on the years prior to 2019.
328294
#This code changes the name of that column automatically for every year prior to 2019.

0 commit comments

Comments
 (0)