knitr::opts_chunk$set(warning = FALSE, message = FALSE, cache = FALSE, fig.show = "asis",
fig.align = "center")
options(scipen=999)
library(sf)
library(ggplot2)
library(tidycensus)
library(tidyverse)
library(ggtext)
library(glue)
library(leaflet)
library(mapview)
library(patchwork)
library(lwgeom)
library(FNN)
library(kableExtra)
library(corrplot)
library(htmltools)
library(knitr)
library(rsample)
library(tidymodels)
library(spatialsample)
library(glmnet)
library(ranger)
library(xgboost)
library(stars)
library(terra)
library(nngeo)
library(finetune)
library(themis)
library(applicable)
library(vetiver)
library(bayesplot)
library(lme4)
library(RcppEigen)
library(dplyr)
library(broom)
library(tibble)
library(car)
library(units)
library(ggforce)
library(RColorBrewer)
library(grid)
library(geosphere)
library(geojsonio)
library(osmdata)
library(showtext)
library(sysfonts)
library(stargazer)
library(modelsummary)
library(Metrics)
library(googleway)
library(workflows)
library(parsnip)
library(jsonlite)
library(classInt)
palette5 <- colorRampPalette(c("#1a9988", "#eb5600"))(5)
source("https://raw.githubusercontent.com/urbanSpatial/Public-Policy-Analytics-Landing/master/functions.r")
# Change to your own workspace
# knitr::opts_knit$set(root.dir = "E:/Spring/Practicum/DataAnalysis/Chinatown") # Luming
# wd <- "E:/Spring/Practicum/DataAnalysis/Chinatown"
# Aki
#knitr::opts_knit$set(root.dir = "~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown-Practicum/")
#wd <- "~/Documents/MUSA/Spring25/MUSA8010_Practicum/Chinatown-Practicum/"
# sijia
knitr::opts_knit$set(root.dir = "~/Documents/Upenn/Practicum/2025/Chinatown-Practicum")
#setwd("~/Documents/Upenn/Practicum/2025/Chinatown-Practicum/")
wd <- "~/Documents/Upenn/Practicum/2025/Chinatown-Practicum"
philly_properties <- st_read("~/Documents/Upenn/Practicum/2025/results/property_philly.geojson")
It is hard to talk about the history of the Philadelphia Chinatown without the construction of I-676, also known as the Vine Street Expressway which bisected the neighborhood. Although the highway serves as a significant thoroughfare through Center City Philadelphia, the indispensable repercussions on the adjacent neighborhoods still lingers today. Consequences of the highway such as demolition of buildings, pedestrian safety threats, traffic congestion, as well as noise and air pollution has been harming Chinatown and its surrounding neighborhoods over the past decades and are yet to be addressed.
In the last couple of years, City of Philadelphia Office of Transportation and Infrastructure Systems (OTIS) brought forth the plan to cap sections of the expressway to address its historical harms to the Chinatown region. This project, titled the Chinatown Stitch project, is aimed to create a safe and equitable green space for the public that would reconnect areas north and south to the Vine Street expressway. Receiving funds from the US department of Transportation through the Bipartisan Infrastructure Law, the construction of phase one the Chinatown Stitch will start in 2027 and is expected to be completed in early 2030.
This purpose of this study is to help OTIS understand how the property values in Philadelphia’s Chinatown region will change after the implementation of the Chinatown Stitch. The first part of the study will focus on the effects of highways on property sales prices in Philadelphia and more specifically in our study area. We also include key findings from our qualitative research on highway effects and caps in other cities. Prices will then be estimated using a predictive model which we validate with real data. We then use this model to predict how property price will change in three alternate futures: Scenario 0 - business as usual, without the implementation of the Chinatown Stitch; Scenario 1 - implementation of the Chinatown Stitch; and Scenario 2 - implementation of the Chinatown Stitch and the addition of more commercial and residential transformations in our study area.
We were tasked with building a data-driven story map to explain possible impacts of the Chinatown Stitch to residents of neighborhoods surrounding the project area (we will define this area shortly). This report includes all of the analysis that we conducted, while the story map highlights what we deemed most pressing and important to residents.
A note to the reader: please click on “SHOW” to see the code behind all of our analyses. Some code chunks may have code commented out. This usually signifies that we have already run the code chunks and have saved the data on our Github repository. We call in the pre-saved data, but provide our code for transparency.
Philly_blockgroup <- st_read("Dataset/Philly_blockgroup/Philly_blockgroup.shp") %>%
st_transform('EPSG:2272')
# philly bounds
philly_bounds <- st_union(Philly_blockgroup)
studyarea <- st_read("Dataset/studyarea/StudyArea.shp") %>%
st_transform('EPSG:2272')
studyarea_blockgroup <- st_read("Dataset/studyarea/StudyArea_blockgroup_tract.shp") %>%
st_transform('EPSG:2272')
studyarea_north <- st_read("Dataset/studyarea-sub/studyarea_north.shp") %>%
st_transform('EPSG:2272')
studyarea_south <- st_read("Dataset/studyarea-sub/studyarea_south.shp") %>%
st_transform('EPSG:2272')
I_676 <- st_read("Dataset/Highways/I_676.shp") %>%
st_transform('EPSG:2272')
nhoods_4326 <-
st_read("Dataset/philly_neighborhoods.geojson")%>%
st_transform('EPSG:4326')
discontinuity <- st_read("Dataset/studyarea-sub/discontinuity.shp") %>%
st_transform('EPSG:2272')
Chinatown_Stitch <- st_read("Dataset/Chinatown_Stitch/Chinatown_Stitch.shp") %>%
st_transform('EPSG:2272')
property_original <- st_read("Dataset/original_property_studyarea.geojson") %>%
st_transform('EPSG:2272')
property_north <- property_original %>%
st_intersection(studyarea_north)
property_south <- property_original %>%
st_intersection(studyarea_south)
# philly hydrology for mapping (bounded by philly_bounds; source: https://opendataphilly.org/datasets/hydrology/)
hydro <- st_read("https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Hydrographic_Features_Poly/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson") %>% st_transform(crs = 'EPSG:2272') %>%
st_intersection(philly_bounds)
philly_highways <- st_read("Dataset/Highways/Casestudy_Highways.shp") %>%
st_transform('EPSG:2272')
property_studyarea <- st_read("Dataset/original_property_studyarea.geojson") %>%
st_transform('EPSG: 2272')
#property <- st_read("Dataset/property/opa_properties_public2.geojson") %>%
# st_transform('EPSG:2272')
property <- st_read("~/Documents/Upenn/Practicum/2025/Data/property/opa_properties_public2.geojson") %>%
st_transform('EPSG:2272')
property2 <- st_read("Dataset/property/property2.geojson") %>%
st_transform('EPSG:2272')
property_park <- st_read("Dataset/property/property_park.geojson")%>%
st_transform('EPSG:2272')
newpark <- st_read("Dataset/ParksModified/Park_join_I676_1.geojson")%>%
st_transform('EPSG:2272')
property2$distance_to_nearest_park <- property_park$distance_to_nearest_park
nhood <- st_read("~/Documents/Upenn/Practicum/2025/Data/philly_neighborhoods.geojson")%>%
st_transform('EPSG:2272')
landuse <- st_read("Dataset/landuse_clip/Land_Use_ClipLayer.shp")
I676_2 <- st_read("Dataset/studyarea/discontinuity.shp")%>%
st_transform('EPSG:2272')
cpi_data <- st_read("Dataset/property/cpi_data.csv")
# cleaned property data before future engineerig for study area
property_final <- st_read("Dataset/property/property_final.geojson")%>%
st_transform('EPSG:2272')
# cleaned and feature engineered property data for study area
property_studyarea_FE <- st_read("Dataset/property/property_CT_FE.geojson")
Three study areas are used in our report and they are as follows.
StudyArea_plot1 <- ggplot() +
geom_sf(data = Philly_blockgroup, fill = "transparent", color = "grey90", linewidth = .2) +
geom_sf(data = studyarea, fill = "#eb5600", color = "#eb5600") +
geom_sf(data = hydro, fill = "#DFF3E3", color = "transparent") +
geom_sf(data = philly_bounds, fill = "transparent", color = "grey60", linewidth = 1, linetype = "dashed") +
theme_void() +
labs(title = "Three Study Areas:",
subtitle = "Philadelphia | 10 Block Groups around the Chinatown Stitch | North and South Sides",
caption = "Source: ACS Census Data and OPA Data.") +
theme(plot.title = element_text(size = 16, face = "bold",margin = margin(b = 6)),
plot.subtitle = element_text(margin = margin(b = 10)),
plot.caption = element_text(hjust = 0, margin = margin(t = 10)))
StudyArea_plot2 <- ggplot() +
geom_sf(data = studyarea_north, fill = alpha("#eb5600", .1), color = "transparent") +
geom_sf(data = studyarea_south, fill = alpha("#1a9988", .1), color = "transparent") +
geom_sf(data = Chinatown_Stitch, fill = "grey", color = "transparent", linetype = "dashed", linewidth = 1) +
geom_sf(data = studyarea_blockgroup, fill = "transparent", color = "grey", linewidth = .8, linetype = "dashed") +
geom_sf(data = property_north, color = "#eb5600", size = .6) +
geom_sf(data = property_south, color = "#1a9988", size = .6) +
geom_sf(data = studyarea, fill = "transparent", color = "grey60", linewidth = 1.2, linetype = "dashed") +
theme_void()
StudyArea_plot1 | StudyArea_plot2
studyarea_4326 <- st_transform(studyarea, crs = 4326)
bbox <- as.list(st_bbox(studyarea_4326))
ChinatownStitch_4326 <- st_read("Dataset/Chinatown_Stitch/Chinatown_Stitch.shp") %>%
st_transform('EPSG:4326')
I676_4326 <- st_transform(I_676, crs = 4326)
landuse_4326 <- st_read("Dataset/landuse_clip/Land_Use_ClipLayer.shp") %>%
st_transform('EPSG:4326')
landuse_4326_plot <- landuse_4326 %>%
filter(C_DIG2DESC != 51 & C_DIG2DESC != 71)
landuse_park_ing <- landuse_4326 %>%
filter(OBJECTID_1 == 514214)
park_4326 <- st_read("Dataset/ParksModified/Parks_within_1km.shp") %>%
st_transform('EPSG:4326')
#nhoods <-
# st_read("Dataset/DataWrangle/philadelphia-neighborhoods.geojson") %>%
# st_transform('EPSG:2272')
park <- st_read("Dataset/ParksModified/Parks_within_1km.shp") %>%
st_transform('EPSG:2272')
Chinatown_Callowhill <- nhoods_4326 %>%
filter(NAME %in% c("CHINATOWN", "CALLOWHILL"))
With all the green space that the Chinatown Stitch will provide, we treat the area as a park, providing residents and visitors not only a natural and inviting space, but increasing opportunities for social interaction or even rest from the buzzing city life. In addition, OTIS is designing the Chinatown Stitch with phase two of the Rail Park (in the north (Callowhill)) in mind. There are many existing cultural and commercial resources to the south (Chinatown) of the expressway, and OTIS’s community surveys have found that residents would like to see some commercial development in addition to a community garden and picnic area on the highway cap as well.
UseCase <- leaflet() %>%
addProviderTiles(providers$CartoDB.Positron) %>%
addPolygons(data = studyarea_4326,
color = "black", # Black border
weight = 4, # Border thickness
dashArray = "8,12", # Dashed line pattern (5px on, 5px off)
fill = FALSE) %>%
addPolygons(data = landuse_4326_plot,
color = "grey",
weight = 0.8,
fillColor = "#DFF3E3",
fillOpacity = 0.4) %>%
addPolygons(data = Chinatown_Callowhill %>%
filter(grepl("CH", NAME)),
color = "#eb5600",
label = "Chinatown",
weight = 1,
opacity = 0.5,
dashArray = "5,5",
fillColor = "#eb5600",
fillOpacity = 0.1) %>%
addPolygons(data = Chinatown_Callowhill %>%
filter(grepl("CA", NAME)),
color = "#eb5600",
label = "Callowhill",
weight = 1,
opacity = 0.5,
dashArray = "5,5",
fillColor = "#eb5600",
fillOpacity = 0.1) %>%
addPolygons(data = park_4326,
color = "#1a9988",
weight = 1,
fillOpacity = 0.6,
fillColor = "#1a9988") %>%
addPolygons(data = landuse_park_ing,
color = "#1a9988",
weight = 2,
opacity = 0.8,
fill = FALSE) %>%
addPolylines(data = I676_4326,
color = "#eb5600",
opacity = 1,
weight = 2) %>%
addPolygons(data = ChinatownStitch_4326,
color = "#eb5600",
weight = 2,
opacity = 1,
fillColor = "#1a9988",
fillOpacity = 0.8
) %>%
fitBounds(
lng1 = bbox$xmin,
lat1 = bbox$ymin,
lng2 = bbox$xmax,
lat2 = bbox$ymax
)
div(class = "center-leaflet", UseCase)
|
|
|
| Callowhill | Chinatown |
With the goal of a data-driven story map, our methodology is structured around constructing a robust model construction process. By incorporating illustrative data visualizations throughout our data analysis, the story map features engaging maps and compelling narratives to guide residents through the very complex relationship between highways and property prices.
To substantiate our quantitative investigation of how property value may react to the implementation of the Chinatown Stitch, a thorough qualitative analysis was undertaken. First, we conducted a literature review on the effects of highways in urban areas, specifically focusing on how proximity to highways influences home values, heavily leaning on the works of Dr. Jeffrey Lin, an economist from the Philadelphia Federal Reserve Bank.
In addition, we conducted a series of interviews with experts with an exhaustive understanding of the highway effects on property prices (Dr. Lin), Philadelphia Chinatown and other Chinatowns across the country (Dr. Domenic Vitiello), and design perspectives that landscape architects consider when working on projects like the Chinatown Stitch (Dr. Yadan Luo). These interviews provided us with an interdisciplinary perspectives on the potential impacts of the Chinatown Stitch on local communities.
We also explored other completed highway capping projects across the United States as case studies to hypothesize how the Chinatown Stitch Project may impact the social, environmental, and economical nature of the Philadelphia Chinatown.
The core dataset of this project consists of parcel-level property records from Philadelphia Properties and Assessment History, which form the foundation for a quantitative analysis of the spatial effects of the I-676 highway. The analysis focuses on historical sale prices, property characteristics, and spatial location of parcels within Chinatown and adjacent neighborhoods. These data are used to simulate and visualize planning scenarios following the Chinatown Stitch intervention, enabling the community to better understand how property sale prices may shift under different policy changes.
Our primary variable of interest, property price, is
sourced from the Philadelphia
Properties and Assessment History dataset available on Open Data
Philly. This dataset includes variables such as sale_price,
number of bedrooms, parcel number, and property condition. To prepare
the data for analysis, we apply the following preprocessing steps
related to sale date, inflation adjustment, geographic transformation,
and handling of missing values:
Sale Date (Philadelphia Focus): To ensure price comparability, we limit our analysis to properties sold between 2020 and 2024 within Philadelphia.
Inflation Adjustment: Sale prices are adjusted for inflation using the Consumer Price Index (CPI), normalized to December 2024 values.
Price Range Filtering: We begin by removing records with implausibly low sale prices (below $100). Then, to reduce the influence of outliers and enhance generalizability, we trim both the bottom and top 5% of the price distribution.
Geographic Transformation: Since spatial proximity to highways and other features influences property values, we reproject coordinates to NAD83 / Pennsylvania South (EPSG: 2272) for accurate spatial analysis.
Missing Data Imputation: Missing values for zoning, central air, and year built are filled using current property characteristics where available.
#### clean data for city wise - sale price
colnames(cpi_data) <- c("half1", "half2", "cpi_date")
cpi_long <- cpi_data %>%
mutate(
Jan = `half1`, Feb = `half1`, Mar = `half1`, Apr = `half1`, May = `half1`, Jun = `half1`,
Jul = `half2`, Aug = `half2`, Sep = `half2`, Oct = `half2`, Nov = `half2`, Dec = `half2`
) %>%
select(-half1, -half2) %>%
pivot_longer(cols = -cpi_date, names_to = "Month", values_to = "CPI") %>%
mutate(
Month = match(Month, month.abb),
date = as.Date(paste(cpi_date, Month, "01", sep = "-"))
) %>%
select(date, CPI)
property_philly <-property%>%
mutate(sale_date = as.Date(sale_date, format = "%Y-%m-%d")) %>%
filter(sale_date >= as.Date("2020-01-01") & sale_date <= as.Date("2024-12-31"))%>%
filter(!is.na(sale_price), !is.na(total_livable_area))%>%
mutate(
sale_month = floor_date(sale_date, "month"))%>%
left_join(cpi_long, by = c("sale_month" = "date"))%>%
mutate(CPI = as.numeric(CPI))%>%
mutate(adj_sale_price = sale_price * (340.331/CPI))%>%
select(-sale_month, -recording_date, -other_building,
-assessment_date, -mailing_address_2, mailing_city_state, mailing_care_of)
#mean_price <- mean(property_philly$adj_sale_price, na.rm = TRUE)
#sd_price <- sd(property_philly$adj_sale_price, na.rm = TRUE)
#extreme_outlier <- mean_price + 5 * sd_price
price_filtered_philly <- property_philly %>%
filter(adj_sale_price >= 100)
q_philly <- quantile(price_filtered_philly$adj_sale_price, probs = c(0.05, 0.95), na.rm = TRUE)
property_philly2 <- property_philly %>%
filter(adj_sale_price >= q_philly[1], adj_sale_price <= q_philly[2])
#property_philly <- property_philly%>%
# filter(adj_sale_price > 30000, adj_sale_price < extreme_outlier) %>%
# filter(total_livable_area >= 100)
property_philly <- st_transform(property_philly2, crs = 2272)
#st_write(property_philly, "property_philly.geojson", driver = "GeoJSON", delete_dsn = TRUE)
#### clean data for study area - sale price
#study area intersection
property_studyarea <- st_intersection(property, studyarea)
#sale price clean up and inflation adjusted
colnames(cpi_data) <- c("half1", "half2", "cpi_date")
cpi_long <- cpi_data %>%
mutate(
Jan = `half1`, Feb = `half1`, Mar = `half1`, Apr = `half1`, May = `half1`, Jun = `half1`,
Jul = `half2`, Aug = `half2`, Sep = `half2`, Oct = `half2`, Nov = `half2`, Dec = `half2`
) %>%
select(-half1, -half2) %>%
pivot_longer(cols = -cpi_date, names_to = "Month", values_to = "CPI") %>%
mutate(
Month = match(Month, month.abb),
date = as.Date(paste(cpi_date, Month, "01", sep = "-"))
) %>%
select(date, CPI)
property_data <-property_studyarea%>%
mutate(sale_date = as.Date(sale_date, format = "%Y-%m-%d")) %>%
filter(!is.na(sale_price))%>%
filter(sale_date <= as.Date("2024-12-31"))%>%
mutate(
sale_date = as.Date(sale_date, format = "%Y-%m-%d"),
sale_month = floor_date(sale_date, "month"))%>%
left_join(cpi_long, by = c("sale_month" = "date"))%>%
mutate(CPI = as.numeric(CPI))%>%
mutate(adj_sale_price = sale_price * (340.331/CPI)) %>%
select(-sale_month,-recording_date, -other_building,
-assessment_date, -mailing_address_2, mailing_city_state, mailing_care_of, -state_code)%>%
relocate(adj_sale_price, sale_price, .after = 2)
price_filtered <- property_data %>%
filter(adj_sale_price >= 100)
q <- quantile(price_filtered$adj_sale_price, probs = c(0.05, 0.95), na.rm = TRUE)
property_data2 <- property_data %>%
filter(adj_sale_price >= q[1], adj_sale_price <= q[2])
property1 <- st_transform(property_data2, crs = 2272)
#join attributes - I676(NS) - nhood1 (neighborhoods in study area)
property1 <- rbind(
property1 %>% st_intersection(studyarea_north["geometry"]) %>%
mutate(I676_NS = "north"),
property1 %>% st_intersection(studyarea_south["geometry"]) %>%
mutate(I676_NS = "south")
)
nhood1 <- st_intersection(nhood, studyarea)
# Features clean up for future FE and modeling
## Zoning
property1 <- property1%>%
select(1:2, zoning, location, everything())
property1 <- property1 %>%
mutate(zoning = case_when(
location == "1310-16 VINE ST" ~ "CMX4",
location == "252-56 N 13TH ST" ~ "CMX4",
location == "255-57 N BROAD ST" ~ "CMX4",
location == "451 N 12TH ST" ~ "CMX3",
location == "453 N 12TH ST" ~"CMX3",
location == "447 N 12TH ST" ~ "CMX3",
location == "1108 BUTTONWOOD ST" ~ "CMX3",
location == "1104 BUTTONWOOD ST" ~ "CMX3",
location == "256-60 N MARVINE ST" ~ "CMX4",
location == "820 VINE ST" ~ "CMX4",
TRUE ~ zoning
))
## central air
target_locations_N <- c(
"1232 SUMMER ST",
"1030 BRANDYWINE ST",
"214 N 12TH ST",
"1021 SPRING ST")
property1 <- property1 %>%
mutate(
central_air = ifelse(location %in% target_locations_N & central_air == "N", "Y", central_air)
)
## year built - clean up values of 1 and delete the other rows which are parking plot properties that have no year_built data
valid_locations <- c(
"305-09 N 12TH ST",
"1008-16 WOOD ST",
"1004-06 WOOD ST",
"211 N CAMAC ST",
"231-33 N BROAD ST",
"225-27 N 12TH ST"
)
property1 <- property1 %>%
filter(location != "711-39 SPRING GARDEN ST") %>%
mutate(year_built = as.numeric(year_built)) %>%
mutate(
year_built = case_when(
location == "305-09 N 12TH ST" & year_built < 1750 ~ 1950,
location == "1008-16 WOOD ST" & year_built < 1750 ~ 1978,
location == "1004-06 WOOD ST" & year_built < 1750 ~ 1920,
location == "211 N CAMAC ST" & year_built < 1750 ~ 1900,
location == "231-33 N BROAD ST" & year_built < 1750 ~ 2020,
location == "225-27 N 12TH ST" & year_built < 1750 ~ 2024,
!is.na(year_built) & year_built < 1750 &
!(location %in% c(
"305-09 N 12TH ST",
"1008-16 WOOD ST",
"1004-06 WOOD ST",
"211 N CAMAC ST",
"231-33 N BROAD ST",
"225-27 N 12TH ST"
)) ~ NA_real_,
TRUE ~ year_built
)
)
#st_write(property_philly, "property_philly.geojson", driver = "GeoJSON", delete_dsn = TRUE)
### The method here for pushing data is diffenrent from the one we usually use cause the there is some issues when downloading the geometry data
# census block group
census_api_key("92998588496b9701036218b765c78d2ffb7aedcd", install = TRUE, overwrite = TRUE)
acs_variable_list.2023 <- load_variables(2023, "acs5", cache = TRUE)
local_shapefile <- "Dataset/census/tl_2023_42_bg"
bg_pa <- st_read(local_shapefile, quiet = TRUE)
bg_philly <- bg_pa %>%
filter(COUNTYFP == "101") %>%
st_transform(2272) %>%
select(GEOID, geometry)
block2023_attr <- get_acs(
geography = "block group",
variables = c("B19013_001E", "B25058_001E"),
year = 2023,
state = 42,
county = 101,
geometry = FALSE
)
block2023_attr <- block2023_attr %>%
select(GEOID, variable, estimate) %>%
pivot_wider(names_from = variable, values_from = estimate) %>%
rename(
MedHHInc = B19013_001,
MedRent = B25058_001
)
block2023 <- left_join(bg_philly, block2023_attr, by = "GEOID")
studyarea_blockgroup_df <- as.data.frame(studyarea_blockgroup) %>% select(-geometry)
block2023_studyarea <- block2023 %>% inner_join(studyarea_blockgroup_df, by = "GEOID")
#census tract
tract_pa <- st_read("Dataset/census/tl_2023_42_tract", quiet = TRUE)
tract_philly <- tract_pa %>%
filter(COUNTYFP == "101") %>%
st_transform(2272) %>%
select(GEOID, geometry)
tract_attr <- get_acs(
geography = "tract",
variables = c("B25026_001E", "B15001_050E", "B15001_009E","B06012_002E"),
year = 2023, state = 42, county = 101, geometry = FALSE
)
tract_attr_wide <- tract_attr %>%
select(GEOID, variable, estimate) %>%
pivot_wider(names_from = variable, values_from = estimate) %>%
rename(
TotalPop = B25026_001,
FemaleBachelors = B15001_050,
MaleBachelors = B15001_009,
TotalPoverty = B06012_002
)
tract2023 <- left_join(tract_philly, tract_attr_wide, by = "GEOID")
studyarea_blockgroup_df <- as.data.frame(studyarea_blockgroup) %>% select(-geometry)
tract2023 <- tract2023 %>%
mutate(GEOID_trimmed = as.numeric(str_sub(GEOID, 7, 9)))
studyarea_blockgroup_df <- studyarea_blockgroup_df %>%
mutate(GEOID_trimmed = as.numeric(str_sub(TRACT, 2, 4)))
tract2023_studyarea <- tract2023 %>%
inner_join(studyarea_blockgroup_df, by = "GEOID_trimmed")
tract_studyarea <- st_intersection(studyarea, tract2023_studyarea)
# Property1_property2_census_join
property1 <- property1 %>%
mutate(census_tract = as.numeric(census_tract))
tract_studyarea_selected <- tract_studyarea %>%
select(GEOID_trimmed, TotalPoverty, MaleBachelors, FemaleBachelors, TotalPop) %>%
st_drop_geometry()
property1_blockgroup <- st_join(property1, block2023_studyarea %>% select(GEOID, MedHHInc, MedRent),
join = st_intersects)
property1_geometry <- st_geometry(property1)
tract_studyarea_selected <- tract_studyarea_selected %>%
distinct(GEOID_trimmed, .keep_all = TRUE)
property1 <- property1_blockgroup %>%
st_drop_geometry() %>%
left_join(tract_studyarea_selected, by = c("census_tract" = "GEOID_trimmed"))
property1 <- property1 %>%
mutate(objectid = as.character(objectid))
# property2 includes additional spatial indicators for modeling,
# such as distance to nearest park, transit, hospital, and supermarket
property2 <- property2 %>%
mutate(objectid = as.character(objectid))
#Join other indicators to cleaned property1
property_combined <- property1 %>%
left_join(property2, by = "objectid") %>%
rename(sale_price = sale_price.y)%>%
select(-sale_price.x)
# POI - restaurant-drink-dessert
api_key <- "AIzaSyCa-yuy1Y3n07URlR2HHg6dp_cIEouv9hs"
keywords <- c("restaurant", "dim sum restaurant", "kitchen", "desert","tea", "bubble tea","matcha", "herbal tea")
place_types <- c("restaurant", "cafe", "bakery")
studyarea_proj <- st_transform(block2023_studyarea, 2272)
grid_pts <- st_make_grid(studyarea_proj, cellsize = 2000, what = "centers")
grid_pts <- st_sf(geometry = grid_pts) %>%
st_filter(st_buffer(studyarea_proj, 1000))
grid_pts_wgs <- st_transform(grid_pts, 4326)
coords <- st_coordinates(grid_pts_wgs)
all_responses <- list()
for (pt_idx in seq_len(nrow(coords))) {
location <- coords[pt_idx, c(2,1)] # lat, lng
cat(" Querying center point", pt_idx, "at", paste(location, collapse = ", "), "\n")
for (ptype in place_types) {
for (kw in keywords) {
Sys.sleep(5)
cat("🔎", kw, "in", ptype, "\n")
response <- google_places(
keyword = kw,
location = location,
radius = 2000,
place_type = ptype,
key = api_key
)
if (is.null(response[["results"]]) || length(response[["results"]]) == 0) {
cat("No results for:", kw, "in", ptype, "\n")
next
}
page_token <- response$next_page_token
results_list <- list(response[["results"]])
i <- 1
while (!is.null(page_token)) {
Sys.sleep(10)
response_next <- google_places(
keyword = kw,
location = location,
radius = 2000,
place_type = ptype,
key = api_key,
page_token = page_token
)
if (is.null(response_next[["results"]]) || length(response_next[["results"]]) == 0) break
results_list[[i + 1]] <- response_next[["results"]]
page_token <- response_next$next_page_token
i <- i + 1
}
all_responses[[paste(kw, ptype, pt_idx, sep = "_")]] <- bind_rows(results_list) %>%
filter(!is.na(place_id)) %>%
distinct(place_id, .keep_all = TRUE)
}
}
}
all_responses <- all_responses[lengths(all_responses) > 0]
restaurant_results <- bind_rows(all_responses) %>%
mutate(lat = geometry$location$lat, lng = geometry$location$lng) %>%
select(lat, lng, name, place_id, rating) %>%
distinct(place_id, .keep_all = TRUE)
restaurant_sf <- st_as_sf(restaurant_results, coords = c("lng", "lat"), crs = 4326)
restaurant_proj <- st_transform(restaurant_sf, 2272)
studyarea_buffer <- st_buffer(studyarea_proj, 2000)
res_poi <- st_filter(restaurant_proj, studyarea_buffer)
radius_circles <- st_buffer(st_transform(grid_pts, 2272), dist = 2000)
# POI - grocery
keywords <- c("grocery store", "supermarket", "asian grocery", "chinese supermarket",
"korean market", "japanese grocery", "international market", "food market")
place_types <- c("supermarket", "grocery_or_supermarket", "convenience_store", "store")
studyarea_proj <- st_transform(block2023_studyarea, 2272)
grid_pts <- st_make_grid(studyarea_proj, cellsize = 2000, what = "centers")
grid_pts <- st_sf(geometry = grid_pts) %>%
st_filter(st_buffer(studyarea_proj, 1000))
grid_pts_wgs <- st_transform(grid_pts, 4326)
coords <- st_coordinates(grid_pts_wgs)
all_responses <- list()
for (pt_idx in seq_len(nrow(coords))) {
location <- coords[pt_idx, c(2,1)] # lat, lng
cat(" Querying center", pt_idx, "at", paste(location, collapse = ", "), "\n")
for (ptype in place_types) {
for (kw in keywords) {
Sys.sleep(5)
cat("🔎", kw, "in", ptype, "\n")
response <- google_places(
keyword = kw,
location = location,
radius = 2000,
place_type = ptype,
key = api_key
)
if (is.null(response[["results"]]) || length(response[["results"]]) == 0) next
page_token <- response$next_page_token
results_list <- list(response[["results"]])
i <- 1
while (!is.null(page_token)) {
Sys.sleep(10)
response_next <- google_places(
keyword = kw,
location = location,
radius = 2000,
place_type = ptype,
key = api_key,
page_token = page_token
)
if (is.null(response_next[["results"]]) || length(response_next[["results"]]) == 0) break
results_list[[i + 1]] <- response_next[["results"]]
page_token <- response_next$next_page_token
i <- i + 1
}
all_responses[[paste(kw, ptype, pt_idx, sep = "_")]] <- bind_rows(results_list) %>%
filter(!is.na(place_id)) %>%
distinct(place_id, .keep_all = TRUE)
}
}
}
all_responses <- all_responses[lengths(all_responses) > 0]
grocery_results <- bind_rows(all_responses) %>%
mutate(lat = geometry$location$lat,
lng = geometry$location$lng) %>%
select(lat, lng, name, place_id, rating) %>%
distinct(place_id, .keep_all = TRUE)
grocery_sf <- st_as_sf(grocery_results, coords = c("lng", "lat"), crs = 4326)
grocery_proj <- st_transform(grocery_sf, 2272)
studyarea_buffer <- st_buffer(studyarea_proj, 2000)
grocery <- st_filter(grocery_proj, studyarea_buffer)
# property join poi
property_sf <- st_as_sf(property_combined, sf_column_name = "geometry")
property_proj <- st_transform(property_sf, 2272)
grocery_proj <- st_transform(grocery, 2272)
restaurant_proj <- st_transform(res_poi, 2272)
grocery_idx <- st_nearest_feature(property_proj, grocery_proj)
property_proj$nearest_grocery_name <- grocery_proj$name[grocery_idx]
property_proj$nearest_grocery_dist <- st_distance(
property_proj,
grocery_proj[grocery_idx, ],
by_element = TRUE
)
restaurant_idx <- st_nearest_feature(property_proj, restaurant_proj)
property_proj$nearest_restaurant_name <- restaurant_proj$name[restaurant_idx]
property_proj$nearest_restaurant_dist <- st_distance(
property_proj,
restaurant_proj[restaurant_idx, ],
by_element = TRUE
)
property_proj$nearest_grocery_dist_m <- as.numeric(property_proj$nearest_grocery_dist)
property_proj$nearest_restaurant_dist_m <- as.numeric(property_proj$nearest_restaurant_dist)
clean_property_poi_distances <- function(property_sf) {
property_sf %>%
mutate(
nearest_grocery_dist_m = as.numeric(nearest_grocery_dist),
nearest_restaurant_dist_m = as.numeric(nearest_restaurant_dist)) %>%
select(
-nearest_grocery_dist,
-nearest_restaurant_dist
)
}
property_final <- clean_property_poi_distances(property_proj)
st_write(property_final, "~/Documents/Upenn/Practicum/2025/Chinatown-Practicum-my/finalresults", driver = "GeoJSON", delete_dsn = TRUE)
To capture the neighborhood context surrounding each parcel, we integrated multiple spatial datasets from Open Data Philly, including parks, City Hall, transit stations (bus, metro and trolley), schools, hospitals, bodies of water, bike networks, and points-of-interest or POIs such as grocery stores and restaurants (via Google Places API). For each type of feature, we computed the distance to the nearest amenity for each property, generating a set of spatial accessibility indicators (i.e. distance to nearest restaurant, distance to nearest hospital, etc.).
We further spatially joined each parcel to its corresponding neighborhood name, land use category, and retail environment data (low- or high-produce supply stores per 1,000 people, abbreviated to LPSS and HPSS, respectively) to enrich contextual attributes. Crime rate was calculated using historical crime records for Philadelphia. Specifically, we applied a k-nearest-neighbor (K-NN) approach (for values of k between 1 and 5) to derive the average distance to the nearest reported incidents for each property, reflecting localized safety levels.
These variables are essential indicators in our analysis and modeling, capturing how the surrounding built and social environments may influence sale price and responses to planning interventions.
landuse <- st_read("Dataset/landuse_clip/Land_Use_ClipLayer.shp") %>%
st_transform('EPSG:2272')
landuse_rename <- landuse %>%
mutate(landuse =
case_when(
C_DIG2DESC == 11 ~ "Residential Low",
C_DIG2DESC == 12 ~ "Residential Medium",
C_DIG2DESC == 13 ~ "Residential High",
C_DIG2DESC == 21 ~ "Commercial Consumer",
C_DIG2DESC == 22 ~ "Commercial Business/Professional",
C_DIG2DESC == 23 ~ "Commercial Mixed Residential",
C_DIG2DESC == 31 ~ "Industrial",
C_DIG2DESC == 41 ~ "Civic/Institution",
C_DIG2DESC == 51 ~ "Transportation",
C_DIG2DESC == 61 ~ "Culture/Amusement",
C_DIG2DESC == 62 ~ "Active Recreation",
C_DIG2DESC == 71 ~ "Park/Open Space",
C_DIG2DESC == 72 ~ "Cemetery",
C_DIG2DESC == 81 ~ "Water",
C_DIG2DESC == 91 ~ "Vacant",
C_DIG2DESC == 92 ~ "Other/Unknown"
))
# philly park data (https://opendataphilly.org/datasets/ppr-properties/)
philly_parks <- st_read("https://opendata.arcgis.com/datasets/d52445160ab14380a673e5849203eb64_0.geojson") %>%
st_transform(crs = 'EPSG:2272') %>%
st_intersection(philly_bounds)
metro <-
st_read("https://opendata.arcgis.com/api/v3/datasets/af52d74b872045d0abb4a6bbbb249453_0/downloads/data?format=geojson&spatialRefId=4326") %>%
st_transform('EPSG:2272') %>%
mutate(Type = "metro")
city_hall <- metro[metro$Station == 'City Hall', 6]
trolley <-
st_read("https://opendata.arcgis.com/api/v3/datasets/dd2afb618d804100867dfe0669383159_0/downloads/data?format=geojson&spatialRefId=4326") %>%
st_transform('EPSG:2272')
trolley_renamed <- trolley %>%
rename(Station = StopName,
Route = LineAbbr,
Longitude = Lon,
Latitude = Lat) %>%
mutate(Type = "trolley")
# Combine both datasets into one
transit <- bind_rows(metro, trolley_renamed)
school <-
st_read("https://opendata.arcgis.com/datasets/d46a7e59e2c246c891fbee778759717e_0.geojson") %>%
st_transform('EPSG:2272')
hospital <-
st_read("Dataset/DataWrangle/DOH_Hospitals202311.geojson") %>%
st_transform('EPSG:2272') %>%
st_filter(st_union(Philly_blockgroup))
water <-
st_read("https://services.arcgis.com/fLeGjb7u4uXqeF9q/arcgis/rest/services/Hydrographic_Features_Poly/FeatureServer/1/query?outFields=*&where=1%3D1&f=geojson") %>%
st_transform('EPSG:2272')
bike_network <-
st_read("Dataset/DataWrangle/Bike_Network.geojson") %>%
st_transform('EPSG:2272')
phillyCrimes <-
st_read("Dataset/DataWrangle/phillyCrimes.geojson") %>%
st_transform('EPSG:2272')
# LPSS_PER1000: Number of low-produce supply stores per 1,000 people
retail <-
st_read("https://opendata.arcgis.com/datasets/53b8a1c653a74c92b2de23a5d7bf04a0_0.geojson") %>%
st_transform('EPSG:2272')
calculate_nearest_distance <- function(set_points, other_layer) {
nearest_idx <- st_nearest_feature(set_points, other_layer)
st_distance(set_points, other_layer[nearest_idx, ], by_element = TRUE) %>% as.numeric()
}
Luming_property <- property_studyarea %>%
dplyr::select(objectid, geometry, sale_price) %>%
rename(orig_objectid = objectid)
Luming_property <- Luming_property %>%
mutate(distance_to_city_hall = st_distance(., city_hall) %>% as.numeric()) %>%
mutate(
distance_to_nearest_transit = calculate_nearest_distance(geometry, transit),
distance_to_nearest_hospital = calculate_nearest_distance(geometry, hospital),
distance_to_nearest_school = calculate_nearest_distance(geometry, school),
distance_to_nearest_park = calculate_nearest_distance(geometry, park),
distance_to_nearest_water = calculate_nearest_distance(geometry, water),
distance_to_nearest_bikelane = calculate_nearest_distance(geometry, bike_network),
distance_to_I676 = calculate_nearest_distance(geometry, I_676)
) %>%
st_join(nhood["NAME"], left = TRUE) %>%
rename(nhoods_name = NAME) %>%
st_join(landuse_rename["landuse"], left = TRUE) %>%
st_join(retail[, c("LPSS_PER1000", "HPSS_PER1000")], left = TRUE)
Luming_property <- Luming_property %>%
mutate(
crime_nn1 = nn_function(st_coordinates(Luming_property),
st_coordinates(phillyCrimes), k = 1),
crime_nn2 = nn_function(st_coordinates(Luming_property),
st_coordinates(phillyCrimes), k = 2),
crime_nn3 = nn_function(st_coordinates(Luming_property),
st_coordinates(phillyCrimes), k = 3),
crime_nn4 = nn_function(st_coordinates(Luming_property),
st_coordinates(phillyCrimes), k = 4),
crime_nn5 = nn_function(st_coordinates(Luming_property),
st_coordinates(phillyCrimes), k = 5))
This land use map of our study area reveals several important aspects of the two neighborhoods divided by the Vine Street Expressway. Callowhill (north of I-676) and Chinatown (south of I-676) have starkly different land use patterns likely due to historical development reasons. The Callowhill neighborhood has a higher portion of industrial and vacant land in contrast with the Chinatown area that is predominantly occupied by commercial business and mixed residential use land.
park_clipped <- st_intersection(park, studyarea)
park_valid <- st_make_valid(park)
transit_valid <- st_make_valid(transit)
city_hall_valid <- st_make_valid(city_hall)
school_valid <- st_make_valid(school)
water_valid <- st_make_valid(water)
bike_network_valid <- st_make_valid(bike_network)
studyarea_buffer <- st_buffer(studyarea, dist = 1000)
# Clip each dataset to the study area
recreation_clipped <- st_intersection(park, studyarea_buffer)
transit_clipped <- st_intersection(transit_valid, studyarea_buffer)
city_hall_clipped <- st_intersection(city_hall_valid, studyarea_buffer)
school_clipped <- st_intersection(school_valid, studyarea_buffer)
water_clipped <- st_intersection(water_valid, studyarea_buffer)
bike_network_clipped <- st_intersection(bike_network_valid, studyarea_buffer)
landuse_transportation <- landuse_rename[landuse_rename$landuse == "Transportation", ]
# property_vacant <- property_EDA %>%
# filter(category_code_description %in% c("VACANT LAND","SINGLE FAMILY", "VACANT LAND - NON-RESIDENTIAL")) %>%
# mutate(category_code_description = case_when(
# category_code_description == "VACANT LAND - NON-RESIDENTIAL" ~ "VACANT LAND",
# TRUE ~ category_code_description
# ))
landuse_colors <- c(
"Residential Low" = "#FFFFB3",
"Residential Medium" = "#FFEB8B",
"Residential High" = "#FFCC80",
"Commercial Consumer" = "#FFB3B3",
"Commercial Business/Professional" = "#FF9980",
"Commercial Mixed Residential" = "#FF6666",
"Industrial" = "#D6A3FF",
"Civic/Institution" = "#66B3FF",
"Transportation" = "gray95",
"Culture/Amusement" = "#C1FFC1",
"Active Recreation" = "#66FFD9",
"Park/Open Space" = "#99FF66",
"Cemetery" = "#FF9999",
"Water" = "#99CCFF",
"Vacant" = "#D3D3D3",
"Other/Unknown" = "gray25"
)
BasicGeo_plot1 <- ggplot(data = landuse_rename %>%
filter(!is.na(landuse) & landuse != "Park/Open Space")) +
geom_sf(aes(fill = landuse), color = NA) +
geom_sf(data = park_clipped, aes(fill = "Park"), color = NA, alpha = 1) + # Clipped park layer
scale_fill_manual(values = c(landuse_colors, "Park" = "#99FF66"), na.value = "white") +
labs(title = "Land Use",
subtitle = "Properties functions in Chinatown",
fill = "Category") +
theme_void() +
theme(legend.position = "right",
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_text(size = 10),
legend.key.size = unit(0.5, "cm"),
legend.text = element_text(size = 8))
BasicGeo_plot2 <- ggplot() +
geom_sf(data = landuse, fill = "white", color = "gray90", size = 0.05) +
geom_sf(data = recreation_clipped, aes(fill = "Park"), color = NA, alpha = 1) +
geom_sf(data = landuse_transportation, aes(fill = "Transportation"), color = NA, alpha = 0.5) +
geom_sf(data = transit_clipped, aes(color = "Transit"), size = 2) +
geom_sf(data = school_clipped, aes(color = "School"), size = 2) +
geom_sf(data = bike_network_clipped, aes(color = "Bike Network"), size = 1) +
scale_color_manual(values = c("Transit" = "blue", "School" = "orange", "Bike Network" = "purple"), name = NULL) +
scale_fill_manual(values = c("Park" = "green","Transportation" = "gray70"), name = NULL) +
labs(
title = "Amenities",
subtitle = "<span style='color:orange;'>Schools</span>, <span style='color:blue;'>Transit Stations</span>, <span style='color:green;'>Parks</span>, and <span style='color:purple;'>Bike Lanes</span>"
) +
theme_void() +
theme(
legend.position = "none",
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = element_markdown(size = 10, hjust = 0),
legend.key.size = unit(0.5, "cm"),
legend.text = element_text(size = 8)
)
BasicGeo_plot3 <- ggplot() +
geom_sf(data = landuse, fill = "white", color = "gray85", size = 0.05) +
geom_sf(data = studyarea, fill = "transparent", color = "grey", linetype = "dashed", linewidth = 2) +
geom_sf(data = discontinuity, fill = "#eb5600", color = "transparent") +
geom_sf(data = Chinatown_Stitch, fill = "#1a9988", alpha = 0.8) +
# geom_sf(data = property_vacant, aes(color = category_code_description), size = 1) +
scale_colour_manual(values = c("black", "#B67352")) +
labs(title="Potential Lands",
subtitle = glue("<span style='color:#1a1a1a;'>Single Family & </span><span style='color:#B67352;'>Vacant Land</span>"),
# caption = "Figure "
) +
theme_void() +
theme(legend.position = "none",
plot.title = element_text(size = 16, face = "bold"),
plot.subtitle = ggtext::element_markdown(size = 10))
BasicGeo_plot1
BasicGeo_plot2 | BasicGeo_plot3