Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

crime_data_engineering_flow.py 1.7 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
  1. import geopandas as gpd
  2. from prefect import task, flow
  3. from typing import Any, Dict, List
  4. import pandas as pd
  5. @task
  6. def load_crimes(path: str) -> pd.DataFrame:
  7. crime_dat=pd.read_csv(path)
  8. return crime_dat
  9. @task
  10. def convert_date_crime(crime_dat: pd.DataFrame)-> pd.DataFrame:
  11. crime_dat['Date']=pd.to_datetime(crime_dat['ARREST_DATE'], format="%m/%d/%Y")
  12. crime_dat['DayOfWeekNum']=crime_dat['Date'].dt.dayofweek
  13. crime_dat['DayOfWeek'] = crime_dat['Date'].dt.day_name()
  14. crime_dat.drop('ARREST_DATE', axis=1)
  15. return crime_dat
  16. @task
  17. def add_boroughs_crime(crime_dat: pd.DataFrame)->pd.DataFrame:
  18. # Load the borough polygons
  19. gdf = gpd.read_file(gpd.datasets.get_path("nybb"))
  20. # Create a GeoDataFrame of points from the crime data
  21. point = gpd.points_from_xy(crime_dat['Longitude'], crime_dat['Latitude'], crs='EPSG:4326')
  22. gdf_point = gpd.GeoDataFrame(geometry=point)
  23. # Set the CRS of the GeoDataFrame to WGS84
  24. gdf_point = gdf_point.set_crs(epsg=4326)
  25. # Reproject the GeoDataFrame to EPSG:2263
  26. gdf_point = gdf_point.to_crs(epsg=2263)
  27. # spatial join crime coordinates to boroughs
  28. point_in_borough = gpd.tools.sjoin(gdf_point, gdf, predicate="within", how='left')
  29. crime_dat['Borough'] = point_in_borough['BoroName']
  30. return crime_dat
  31. @task
  32. def save_output_crime(crime_dat: pd.DataFrame):
  33. crime_dat.to_csv('data/crime.csv')
  34. @flow
  35. def crime_data_eng_pipeline():
  36. crime_dat=load_crimes('data/NYPD_Arrest_Data__Year_to_Date_.csv')
  37. crime_dat=convert_date_crime(crime_dat)
  38. crime_dat=add_boroughs_crime(crime_dat)
  39. save_output_crime(crime_dat)
  40. if __name__=="__main__":
  41. crime_data_eng_pipeline()
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...