Label Parcel Edges

Extract Parcel Edges with Labels

Through the previous steps, we obtained parcels with assigned classification labels. Next, we need to re-extract parcel edges within each parcel group.

Code

# Get the centroid or representative points of the road segments
road_centroids = np.array([geom.centroid.coords[0] for geom in road_seg.geometry])
# Build the KDTree based on the centroids of the road segments
road_tree = cKDTree(road_centroids)

# Initialize a list to store the matched road geometries
matched_road_geometries = []

# Iterate over each row in parcel
for idx, parcel_row in parcel.iterrows():
    # Check if Found_Match is True
    if parcel_row['Found_Match'] == True:
        match_addr = parcel_row['match_road_address']
        # Filter road_seg to get rows where road_addr matches match_road_address
        matching_road_segs = road_seg[road_seg['road_addr'] == match_addr]
        
        if not matching_road_segs.empty:
            # Calculate distances between the parcel polygon geometry and matching road_seg geometries
            distances = matching_road_segs.geometry.apply(lambda geom: parcel_row.geometry.distance(geom))
            
            # Find the index of the nearest road geometry
            nearest_index = distances.idxmin()
            
            # Append the nearest road geometry to the list
            matched_road_geometries.append(matching_road_segs.loc[nearest_index].geometry)
        else:
            # If no match is found, append None or an empty geometry
            matched_road_geometries.append(None)
    else:
        # If Found_Match is False or NaN, find the nearest road geometry
        # Get the centroid of the current parcel polygon
        parcel_centroid = np.array(parcel_row.geometry.centroid.coords[0])
        
        # Query the KDTree for the nearest road segment
        _, nearest_index = road_tree.query(parcel_centroid)
        
        # Append the nearest road geometry to the list
        matched_road_geometries.append(road_seg.iloc[nearest_index].geometry)
        
# Add the matched road geometries to parcel
parcel['road_geometry'] = matched_road_geometries

# %%
# Function to explode Polygons into individual boundary line segments
def explode_to_lines(gdf):
    # Create a list to store new rows
    line_list = []

    for index, row in gdf.iterrows():
        # Get the exterior boundary of the polygon
        exterior = row['geometry'].exterior
        # Convert the boundary into LineString segments
        lines = [LineString([exterior.coords[i], exterior.coords[i + 1]]) 
                 for i in range(len(exterior.coords) - 1)]
        
        # Create new rows for each line segment, retaining the original attributes
        for line in lines:
            new_row = row.copy()
            new_row['geometry'] = line
            line_list.append(new_row)
    
    # Use pd.concat to generate the final GeoDataFrame
    line_gdf = pd.concat(line_list, axis=1).T
    line_gdf = gpd.GeoDataFrame(line_gdf, geometry='geometry', crs=gdf.crs)
    
    return line_gdf

# Call the function to explode the line segments
parcel_seg = explode_to_lines(parcel)

# Reset the index by group
parcel_seg['new_index'] = parcel_seg.groupby('parcel_id').cumcount()
parcel_seg.set_index('new_index', inplace=True)
parcel_seg.index.name = None


# Function to calculate the bearing of a geometry
def fun_bearing_ra(geom):
    coords = np.array(geom.coords)
    # Use the first and last coordinates to calculate the bearing
    x1, y1 = coords[0]
    x2, y2 = coords[-1]
    
    # Calculate the bearing using atan2
    bearing = math.atan2(y2 - y1, x2 - x1)
    
    return bearing

def calculate_angle_difference(line1, line2):
    bearing1 = fun_bearing_ra(line1)
    bearing2 = fun_bearing_ra(line2)
    # Calculate the absolute angle difference and ensure it is <= 180 degrees
    delta_theta = bearing2 - bearing1
    
    # Ensure the angle is between -π and π
    if delta_theta > math.pi:
        delta_theta -= 2 * math.pi
    elif delta_theta < -math.pi:
        delta_theta += 2 * math.pi
    
    # Convert the angle to degrees
    angle_between_degrees = math.degrees(abs(delta_theta))
    
    # Return the smaller angle difference (angle or its supplement)
    return min(angle_between_degrees, 180 - angle_between_degrees)


# Check if two segments share a common point (i.e., their start or end point is the same)
def are_segments_connected(line1, line2):
    coords1 = np.array(line1.coords)
    coords2 = np.array(line2.coords)
    
    # Check if the start or end points of the segments are the same
    if np.all(coords1[0] == coords2[0]) or np.all(coords1[0] == coords2[-1]) or \
       np.all(coords1[-1] == coords2[0]) or np.all(coords1[-1] == coords2[-1]):
        return True
    return False

# Function to reorder segments based on the turning point
def reorder_segments_by_turning_point(segments, turning_point_index):
    # Reorder segments starting from the identified turning point
    reordered_segments = segments[turning_point_index:] + segments[:turning_point_index]
    return reordered_segments

# Main function: Process each parcel_id group and return a new GeoDataFrame
def process_parcel_segments(parcel_seg):
    merged_segments = []  # List to store the reordered segments

    # Group the parcel segments by parcel_id and process each group
    for object_id, group in parcel_seg.groupby('parcel_id'):
        segments = group['geometry'].tolist()  # Get the list of line segments for the current group
        original_indices = group.index.tolist()  # Preserve the original indices
        turning_points = []

        # Loop through all adjacent segments to calculate angle differences
        for i in range(1, len(segments)):
            if are_segments_connected(segments[i-1], segments[i]):
                angle_diff = calculate_angle_difference(segments[i-1], segments[i])
                if angle_diff > 15:  # If angle difference is greater than 15 degrees, mark it as a turning point
                    turning_points.append(i)

        # If there are turning points, reorder the segments starting from the first turning point
        if turning_points:
            turning_point_index = turning_points[0]
            reordered_segments = reorder_segments_by_turning_point(segments, turning_point_index)
            reordered_original_indices = reorder_segments_by_turning_point(original_indices, turning_point_index)
        else:
            # If no turning points, retain the original order
            reordered_segments = segments
            reordered_original_indices = original_indices

        # Store the reordered segments and their attributes
        for j, (line, original_index) in enumerate(zip(reordered_segments, reordered_original_indices)):
            row = group.iloc[0].copy()  # Copy the first row's attributes
            row['geometry'] = line
            row['original_index'] = original_index  # Preserve the original index
            row['new_index'] = j  # Assign the new index based on the reordered list
            merged_segments.append(row)

    # Create a new GeoDataFrame for the reordered segments
    updated_gdf = gpd.GeoDataFrame(merged_segments, columns=parcel_seg.columns.tolist() + ['original_index', 'new_index'])
    updated_gdf = updated_gdf.reset_index(drop=True)

    return updated_gdf

# Run the main function and get the new GeoDataFrame
updated_parcel_seg = process_parcel_segments(parcel_seg)
parcel_seg = updated_parcel_seg


# Group parcel_seg by parcel_id and process each group
merged_segments = []

for object_id, group in parcel_seg.groupby('parcel_id'):
    # Get the list of geometries in the current group
    segments = group.geometry.tolist()
    # Start with the first segment
    merged_lines = [segments[0]]  # Start with the first segment
    
    for i in range(1, len(segments)):
        connected = False
        
        # Always compare the current segment with the previous one
        if are_segments_connected(segments[i-1], segments[i]):
            # Calculate the angle difference between the current segment and the previous one
            angle_diff = calculate_angle_difference(segments[i-1], segments[i])
            
            # If the angle difference is less than 15 degrees, merge the adjacent line segments
            if angle_diff < 15:
                # Merge the current and previous segments
                merged_result = linemerge([merged_lines[-1], segments[i]])
                
                # Check if the result is a MultiLineString, if so, skip the merge
                if isinstance(merged_result, LineString):
                    merged_lines[-1] = merged_result
                    connected = True
                else:
                    # Skip the merge if it's a MultiLineString
                    continue
        
        # If no connected segment is found or the angle difference is too large, add the current segment as a new one
        if not connected:
            merged_lines.append(segments[i])
    
    # Keep the merged results and add other attributes
    for line in merged_lines:
        row = group.iloc[0].copy()  # Copy the first attribute row from the group
        row['geometry'] = line
        merged_segments.append(row)

# Create a new GeoDataFrame from the merged line segments
parcel_seg = gpd.GeoDataFrame(merged_segments, columns=parcel_seg.columns)

# Check for MultiLineString geometries and explode them into LineString
exploded_segments = []

for index, row in parcel_seg.iterrows():
    geom = row['geometry']
    
    if isinstance(geom, MultiLineString):
        # Explode the MultiLineString into individual LineStrings
        for line in geom:
            new_row = row.copy()
            new_row['geometry'] = line
            exploded_segments.append(new_row)
    else:
        # Keep the original LineString geometries
        exploded_segments.append(row)

# Create a new GeoDataFrame from the exploded segments
parcel_seg = gpd.GeoDataFrame(exploded_segments, columns=parcel_seg.columns)

# extract useful columns
parcel_seg.drop(columns=['original_index', 'new_index'], inplace=True)
# Reset the index of the final GeoDataFrame
parcel_seg = parcel_seg.reset_index(drop=True)



edge_counts = parcel_seg.groupby('parcel_id').size()
parcel_seg['edge_num'] = parcel_seg['parcel_id'].map(edge_counts)

# Function to create tangent lines at both ends of a line segment
def create_tangents(line):
    coords = list(line.coords)
    if len(coords) < 2:
        return None, None  # Skip invalid geometries
    
    # Create tangents at the start and end of the line segment
    start_tangent = LineString([coords[0], coords[1]])
    end_tangent = LineString([coords[-2], coords[-1]])
    
    return start_tangent, end_tangent

# Function to filter curve segments based on angle difference of tangents > 30 degrees
def filter_curve_segments(parcel_seg, angle_threshold=30):
    filtered_segments = []
    non_filtered_segments = []
    
    for idx, row in parcel_seg.iterrows():
        line = row['geometry']
        start_tangent, end_tangent = create_tangents(line)
        
        if start_tangent and end_tangent:
            angle_diff = calculate_angle_difference(start_tangent, end_tangent)
            row_dict = row.to_dict()  # Convert the entire row to a dictionary
            row_dict['index'] = idx  # Preserve the original index
            
            if angle_diff > angle_threshold:
                # Add the entire row to the filtered list
                filtered_segments.append(row_dict)
            else:
                # Add the entire row to the non-filtered list
                non_filtered_segments.append(row_dict)
    
    # Create DataFrames with the filtered and non-filtered results if data exists
    if filtered_segments:
        filtered_df = pd.DataFrame(filtered_segments).set_index('index')
        filtered_gdf = gpd.GeoDataFrame(filtered_df, crs=parcel_seg.crs, geometry=filtered_df['geometry'])
    else:
        # Initialize an empty GeoDataFrame with the same structure if no data
        filtered_gdf = gpd.GeoDataFrame(columns=parcel_seg.columns, crs=parcel_seg.crs)
    
    if non_filtered_segments:
        non_filtered_df = pd.DataFrame(non_filtered_segments).set_index('index')
        non_filtered_gdf = gpd.GeoDataFrame(non_filtered_df, crs=parcel_seg.crs, geometry=non_filtered_df['geometry'])
    else:
        # Initialize an empty GeoDataFrame with the same structure if no data
        non_filtered_gdf = gpd.GeoDataFrame(columns=parcel_seg.columns, crs=parcel_seg.crs)
    
    return filtered_gdf, non_filtered_gdf

# Call the function to filter curve segments and create two GeoDataFrames
filtered_parcel_seg, non_filtered_parcel_seg = filter_curve_segments(parcel_seg[parcel_seg['edge_num'] == 3])

# Function to create tangent lines and reverse the line if necessary
def create_tangents_with_reversal(line):
    coords = list(line.coords)
    if len(coords) < 2:
        return None, None  # Skip invalid geometries
    
    # Find the points with the smallest and largest y-coordinate (latitude)
    if coords[0][1] < coords[-1][1]:  # If the first point's y is smaller, it's the start point
        start_point = coords[0]
        end_point = coords[-1]
    else:  # Otherwise, the last point is the start point
        start_point = coords[-1]
        end_point = coords[0]

    # Reverse the line if start_point is not the same as coords[0]
    if start_point != coords[0]:
        coords.reverse()  # Reverse the order of coordinates
    
    # Now create tangents based on the (possibly reversed) coordinates
    start_tangent = LineString([coords[0], coords[1]])  # Tangent from the first to the second point
    end_tangent = LineString([coords[-2], coords[-1]])  # Tangent from the second last to the last point

    return start_tangent, end_tangent, LineString(coords)  # Return the tangents and the (possibly reversed) line

# Function to calculate the split point based on the 4/5 rule
def calculate_split_point(line, start_tangent, end_tangent, angle_diff, angle_fraction=0.5):
    coords = list(line.coords)

    # Iterate through the line and find the point where the angle difference is approximately 4/5
    for i in range(1, len(coords) - 1):
        intermediate_tangent = LineString([coords[i - 1], coords[i]])
        current_angle_diff = calculate_angle_difference(start_tangent, intermediate_tangent)
        
        if current_angle_diff >= angle_diff * angle_fraction:
            return coords[i]  # Return the split point

    return coords[-1]  # If no point found, return the endpoint

# Function to process each segment in filtered_parcel_seg
def process_filtered_parcel_seg(filtered_parcel_seg, angle_threshold=30, angle_fraction=0.5):
    new_data = []
    
    for idx, row in filtered_parcel_seg.iterrows():
        line = row['geometry']
        
        # Apply the tangent and reversal function
        start_tangent, end_tangent, adjusted_line = create_tangents_with_reversal(line)
        
        if start_tangent and end_tangent:
            angle_diff = calculate_angle_difference(start_tangent, end_tangent)
            
            if angle_diff > angle_threshold:
                # Calculate the split point based on the angle difference and fraction
                split_point = calculate_split_point(adjusted_line, start_tangent, end_tangent, angle_diff, angle_fraction)
                
                # Add split point to row's data
                row_dict = row.to_dict()
                row_dict['split_point'] = Point(split_point)  # Store the split point as geometry
                row_dict['index'] = idx  # Store the original index
                
                new_data.append(row_dict)
            else:
                # If no split needed, just keep the original row
                row_dict = row.to_dict()
                row_dict['split_point'] = None  # No split point, store None
                row_dict['index'] = idx  # Store the original index
                
                new_data.append(row_dict)

    # Convert the processed data back into a GeoDataFrame
    new_df = pd.DataFrame(new_data).set_index('index')  # Use original index
    new_gdf = gpd.GeoDataFrame(new_df, crs=parcel_seg.crs, geometry='split_point')
    
    return new_gdf

# Check if filtered_parcel_seg is non-empty before processing
if not filtered_parcel_seg.empty:
    # Call the function to process the filtered_parcel_seg
    processed_parcel_seg = process_filtered_parcel_seg(filtered_parcel_seg)
else:
    # Handle the case where filtered_parcel_seg is empty
    processed_parcel_seg = gpd.GeoDataFrame(columns=filtered_parcel_seg.columns, crs=parcel_seg.crs)

# Function to split filtered_parcel_seg using points from processed_parcel_seg
def split_lines_with_points(filtered_parcel_seg, processed_parcel_seg):
    split_segments = []

    for idx, row in filtered_parcel_seg.iterrows():
        line = row['geometry']
        split_point_geom = processed_parcel_seg.loc[idx, 'split_point']  # Get the corresponding point geometry from split_point column
        
        if isinstance(split_point_geom, Point):
            # Check if the split point is on the line
            if line.contains(split_point_geom):
                # If the point is on the line, use it directly for splitting
                split_lines = split(line, split_point_geom)
            else:
                # If the point is not on the line, find the closest point on the line
                projected_distance = line.project(split_point_geom)
                nearest_point = line.interpolate(projected_distance)
                split_lines = split(line, nearest_point)
            
            # Handle GeometryCollection by extracting valid LineString geometries
            if isinstance(split_lines, GeometryCollection):
                split_segments.extend([{
                    **row.to_dict(), 'geometry': geom
                } for geom in split_lines.geoms if isinstance(geom, LineString)])
                continue  # Skip to the next iteration

        # If no valid split point or GeometryCollection, add the original row
        split_segments.append(row.to_dict())
    
    # Convert split_segments to a GeoDataFrame and return
    split_gdf = gpd.GeoDataFrame(split_segments, crs=parcel_seg.crs, geometry='geometry')
    return split_gdf

# Check if both filtered_parcel_seg and processed_parcel_seg are non-empty before processing
if not filtered_parcel_seg.empty and not processed_parcel_seg.empty:
    # Call the function to split lines based on points
    split_parcel_seg = split_lines_with_points(filtered_parcel_seg, processed_parcel_seg)
else:
    # Handle the case where one or both GeoDataFrames are empty
    split_parcel_seg = gpd.GeoDataFrame(columns=filtered_parcel_seg.columns, crs=parcel_seg.crs)

# Function to combine split_parcel_seg and non_filtered_parcel_seg, ensuring parcel_id proximity
def combine_parcel_segs(split_parcel_seg, non_filtered_parcel_seg):
    # Ensure both datasets contain the 'parcel_id' column
    if 'parcel_id' not in split_parcel_seg.columns or 'parcel_id' not in non_filtered_parcel_seg.columns:
        raise ValueError("Both datasets must contain the 'parcel_id' column.")
    
    # Convert parcel_id to string to avoid type errors during sorting
    split_parcel_seg['parcel_id'] = split_parcel_seg['parcel_id'].astype(str)
    non_filtered_parcel_seg['parcel_id'] = non_filtered_parcel_seg['parcel_id'].astype(str)
    
    # Concatenate the two GeoDataFrames and ensure 'crs' and 'geometry' are set
    combined_parcel_seg = gpd.GeoDataFrame(
        pd.concat([split_parcel_seg, non_filtered_parcel_seg], ignore_index=True),
        crs=parcel_seg.crs,  # Use the crs from one of the input GeoDataFrames
        geometry='geometry'  # Ensure the geometry column is correctly set
    )
    
    # Sort by 'parcel_id' to ensure similar parcel_id are together
    combined_parcel_seg_sorted = combined_parcel_seg.sort_values(by='parcel_id')
    
    return combined_parcel_seg_sorted

# Check if both split_parcel_seg and non_filtered_parcel_seg are non-empty before processing
if not split_parcel_seg.empty and not non_filtered_parcel_seg.empty:
    # Call the function to combine the datasets
    reconstr_seg = combine_parcel_segs(split_parcel_seg, non_filtered_parcel_seg)
else:
    # Handle the case where one or both GeoDataFrames are empty
    reconstr_seg = gpd.GeoDataFrame(columns=split_parcel_seg.columns, crs=parcel_seg.crs)


# Check if reconstr_seg is non-empty before concatenating
if not reconstr_seg.empty:
    parcel_seg = pd.concat([parcel_seg[parcel_seg['edge_num'] != 3], reconstr_seg], ignore_index=True).reset_index(drop=True)

parcel_seg = parcel_seg.drop(columns=['edge_num'])
parcel_seg = parcel_seg.set_crs(parcel.crs, allow_override=True)

# %%
def normalize_linestring(line):
    # Ensure the coordinates are in a consistent direction (smallest point first)
    if isinstance(line, LineString):
        coords = list(line.coords)
        if coords[0] > coords[-1]:
            coords.reverse()  # Reverse the order of coordinates to normalize the direction
        return LineString(coords)
    else:
        return line  # If it's not a LineString, keep it as is
    
def check_shared_sides_normalized(parcel_seg, threshold=0.1, distance_threshold=100):
    """
    Check for shared sides in parcel_seg using cKDTree for faster neighbor searches.
    
    Parameters:
    - parcel_seg: GeoDataFrame containing parcel segments.
    - threshold: float, minimum proportion of line length overlap to consider as a shared side.
    - distance_threshold: float, maximum distance between line segment midpoints to be considered for comparison.
    
    Returns:
    - parcel_seg: GeoDataFrame with 'shared_side' column indicating whether a side is shared.
    """
    
    # Normalize all the geometry objects
    parcel_seg['normalized_geom'] = parcel_seg['geometry'].apply(normalize_linestring)
    # Extract the midpoints of each line segment to build the KDTree
    midpoints = np.array([line.interpolate(0.5, normalized=True).coords[0] for line in parcel_seg['normalized_geom']])
    # Build cKDTree with midpoints
    kdtree = cKDTree(midpoints)
    # Initialize the 'shared_side' column as False
    parcel_seg['shared_side'] = False
    
    # Loop over each line and find nearby lines using KDTree
    for i, line1 in parcel_seg.iterrows():
        # Query the KDTree for neighbors within the distance_threshold
        indices = kdtree.query_ball_point(midpoints[i], r=distance_threshold)
        
        for j in indices:
            if i != j:  # Avoid comparing the line with itself
                line2 = parcel_seg.iloc[j]
                intersection = line1['normalized_geom'].intersection(line2['normalized_geom'])
                if not intersection.is_empty:
                    # Calculate the proportion of overlap relative to the length of line1
                    overlap_ratio = intersection.length / line1['normalized_geom'].length
                    if overlap_ratio > threshold:
                        # If the overlap is greater than the threshold, mark as shared side
                        parcel_seg.at[i, 'shared_side'] = True
                        parcel_seg.at[j, 'shared_side'] = True

    # Remove the temporarily generated 'normalized_geom' column
    parcel_seg = parcel_seg.drop(columns=['normalized_geom'])
    return parcel_seg

parcel_seg = check_shared_sides_normalized(parcel_seg)

	Prop_ID	GEO_ID	parcel_id	parcel_addr	landuse	landuse_spec	parcel_label	geometry	Found_Match	match_road_address	shape_index	50_threshold	num_edges	angle_difference	road_geometry	shared_side
0	41229479.0	NaN	48	2321 N BEACH ST	NaN	F1	regular corner parcel	LINESTRING (-10830411.5423 3868003.896099999, ...	True	N Beach St	1.222134	False	4.0	2.491665	LINESTRING (-10830338.56773692 3867938.6826204...	True
1	41229479.0	NaN	48	2321 N BEACH ST	NaN	F1	regular corner parcel	LINESTRING (-10830411.4757 3868124.3587, -1083...	True	N Beach St	1.222134	False	4.0	2.491665	LINESTRING (-10830338.56773692 3867938.6826204...	False
2	41229479.0	NaN	48	2321 N BEACH ST	NaN	F1	regular corner parcel	LINESTRING (-10830359.4534 3868124.8385, -1083...	True	N Beach St	1.222134	False	4.0	2.491665	LINESTRING (-10830338.56773692 3867938.6826204...	False
3	41229479.0	NaN	48	2321 N BEACH ST	NaN	F1	regular corner parcel	LINESTRING (-10830352.2613 3868003.210600003, ...	True	N Beach St	1.222134	False	4.0	2.491665	LINESTRING (-10830338.56773692 3867938.6826204...	True
4	1469207.0	NaN	451	4101 CARNATION AVE	R	A	regular inside parcel	LINESTRING (-10829959.8244 3866832.624200001, ...	True	Carnation Ave	1.229169	False	4.0	1.016179	LINESTRING (-10830075.07559543 3866826.3706468...	True
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
20362	4008812.0	NaN	593964	2400 AIRPORT FWY	NaN	C1	special parcel	LINESTRING (-10833332.5534 3865472.222699999, ...	True	North Fwy	1.323138	True	NaN	NaN	LINESTRING (-10833457.07511048 3865174.5206970...	False
20363	42243091.0	NaN	829868	W LOTUS AVE	R	A	special parcel	LINESTRING (-10833129.774 3867306.161200002, -...	True	W Lotus Ave	1.089208	False	NaN	NaN	LINESTRING (-10833113.98614833 3867355.5034094...	True
20364	42243091.0	NaN	829868	W LOTUS AVE	R	A	special parcel	LINESTRING (-10833137.52 3867344.111199998, -1...	True	W Lotus Ave	1.089208	False	NaN	NaN	LINESTRING (-10833113.98614833 3867355.5034094...	False
20365	42243091.0	NaN	829868	W LOTUS AVE	R	A	special parcel	LINESTRING (-10833152.6896 3867323.676899999, ...	True	W Lotus Ave	1.089208	False	NaN	NaN	LINESTRING (-10833113.98614833 3867355.5034094...	False
20366	42243091.0	NaN	829868	W LOTUS AVE	R	A	special parcel	LINESTRING (-10833101.1767 3867333.099799998, ...	True	W Lotus Ave	1.089208	False	NaN	NaN	LINESTRING (-10833113.98614833 3867355.5034094...	True

20367 rows × 16 columns

Functions for Edge Classification

Through the previous steps, we obtained parcels with assigned classification labels. Next, we need to re-extract parcel edges within each parcel group.

01.Regular Inside Parcels
02.Regular Corner Parcels
03.Special Parcels
04.Jagged Parcels
05.Curve Parcels
06.Cul_De_Sac Parcels
07.No Match Address Parcels
08.No Address Parcels
09.Duplicated Address Parcels

01.Regular Inside Parcels Edges

The classification process for “Regular Inside Parcels” was successfully completed by calculating the angular relationships and spatial distances between parcel edges and their nearest road segments with same address name. The steps taken ensure a systematic and context-specific classification of parcel edges into four types: ‘front’, ‘rear’, ‘Interior side’, and ‘Exterior side’.

Select Parcels of Specific Classification

This step isolates “Regular Inside Parcels” from the entire dataset using the parcel_labeled column, and make the operations as below,

Calculate the angle difference between each parcel edge and its nearest road segment with the same address using the fun_bearing_ra and calculate_angle_difference functions.
Calculate the distance between the centroid of each parcel edge and its nearest road segment with the same address.

Code

# calculate the angle between the each parcel seg and nearest road seg
regular_insid_parcel = parcel_seg[parcel_seg['parcel_label'] == 'regular inside parcel']
regular_insid_parcel['parcel_bearing'] = regular_insid_parcel['geometry'].apply(fun_bearing_ra)
regular_insid_parcel['road_bearing'] = regular_insid_parcel['road_geometry'].apply(fun_bearing_ra)
regular_insid_parcel['angle'] = regular_insid_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
regular_insid_parcel['distance_to_road'] = regular_insid_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

This step classifies each edge of the “Regular Inside Parcels” into one of the four categories: ‘front’, ‘rear’, ‘Interior side’, and ‘Exterior side’ by defining classify_sides function. Specifically:

Find the two parcel edges with the smallest angles (most parallel) and assign them as ‘front’ or ‘rear’ based on their distance from the road.
For the remaining parcel edges:
- If an edge is shared with another parcel, label it as ‘Interior side.’
- If it is not a shared edge, label it as ‘Exterior side.’

Code

# Group by 'parcel_id' and perform the operations within each group
def classify_sides(group):
    # Create a new column 'side'
    group['side'] = None 
    # Step 1: Find the two rows with the smallest 'angle' values
    smallest_two_angles = group.nsmallest(2, 'angle')
    if not smallest_two_angles.empty:
        # Compare 'distance_to_road' between the two rows
        idx_min_distance = smallest_two_angles['distance_to_road'].idxmin()
        idx_max_distance = smallest_two_angles['distance_to_road'].idxmax()
        group.loc[idx_min_distance, 'side'] = 'front'
        group.loc[idx_max_distance, 'side'] = 'rear'
    # Step 2: For remaining rows, find shared_side=True and mark as 'Interior side'
    shared_side_true = group[(group['side'].isnull()) & (group['shared_side'] == True)]
    group.loc[shared_side_true.index, 'side'] = 'Interior side'
    # Step 3: Label the remaining rows as 'Exterior side'
    group.loc[group['side'].isnull(), 'side'] = 'Exterior side'
    return group

# Apply the function to each group
regular_insid_parcel = regular_insid_parcel.groupby('parcel_id').apply(classify_sides)
regular_insid_parcel = regular_insid_parcel.reset_index(level=0, drop=True)

Visualize the Results

	Prop_ID	GEO_ID	parcel_id	parcel_addr	landuse	landuse_spec	parcel_label	Found_Match	match_road_address	shape_index	50_threshold	num_edges	angle_difference	shared_side	parcel_bearing	road_bearing	angle	distance_to_road	side	geometry
0	01469207	NaN	451	4101 CARNATION AVE	R	A	regular inside parcel	1	Carnation Ave	1.229169	False	4.0	1.016179	True	1.583615	0.006137	89.617176	28.224510	Interior side	LINESTRING (-10829959.824 3866832.624, -108299...
1	01469207	NaN	451	4101 CARNATION AVE	R	A	regular inside parcel	1	Carnation Ave	1.229169	False	4.0	1.016179	True	0.011563	0.006137	0.310866	50.958238	rear	LINESTRING (-10829960.406 3866877.978, -108299...
2	01469207	NaN	451	4101 CARNATION AVE	R	A	regular inside parcel	1	Carnation Ave	1.229169	False	4.0	1.016179	True	-1.588851	0.006137	88.613901	28.111379	Interior side	LINESTRING (-10829939.996 3866878.214, -108299...
3	01469207	NaN	451	4101 CARNATION AVE	R	A	regular inside parcel	1	Carnation Ave	1.229169	False	4.0	1.016179	False	3.129994	0.006137	1.016179	5.377651	front	LINESTRING (-10829940.823 3866832.404, -108299...
4	01469215	NaN	452	4105 CARNATION AVE	R	A	regular inside parcel	1	Carnation Ave	1.247158	True	4.0	1.016469	True	1.552741	0.006137	88.613901	28.111379	Interior side	LINESTRING (-10829940.823 3866832.404, -108299...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
14571	42992573	NaN	2836787	BLANDIN ST	R	A	regular inside parcel	1	Blandin St	1.135663	False	4.0	0.923995	True	-3.140131	1.568591	89.789895	22.629950	Interior side	LINESTRING (-10832058.964 3867719.706, -108320...
14572	06150845	NaN	2851618	804 BLANDIN ST	R	A	regular inside parcel	1	Blandin St	1.126165	False	4.0	2.650861	False	1.557710	1.511444	2.650861	7.535385	front	LINESTRING (-10832070.833 3865396.148, -108320...
14573	06150845	NaN	2851618	804 BLANDIN ST	R	A	regular inside parcel	1	Blandin St	1.126165	False	4.0	2.650861	True	-0.011118	1.511444	87.236404	20.594869	Interior side	LINESTRING (-10832070.470 3865423.909, -108320...
14574	06150845	NaN	2851618	804 BLANDIN ST	R	A	regular inside parcel	1	Blandin St	1.126165	False	4.0	2.650861	True	-1.582125	1.511444	2.751522	35.027320	rear	LINESTRING (-10832043.038 3865423.604, -108320...
14575	06150845	NaN	2851618	804 BLANDIN ST	R	A	regular inside parcel	1	Blandin St	1.126165	False	4.0	2.650861	True	3.029720	1.511444	86.990798	22.291428	Interior side	LINESTRING (-10832043.384 3865393.065, -108320...

14576 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

02.Regular Corner Parcels Edges

Because the previous process already accounted for address names, the same logic used for classifying “regular inside parcels” can be applied to “regular corner parcels” as well.

Select Parcels of Specific Classification

This step isolates “Regular Corner Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code

# calculate the angle between the each parcel seg and nearest road seg
regular_corner_parcel = parcel_seg[parcel_seg['parcel_label'] == 'regular corner parcel']
regular_corner_parcel['parcel_bearing'] = regular_corner_parcel['geometry'].apply(fun_bearing_ra)
regular_corner_parcel['road_bearing'] = regular_corner_parcel['road_geometry'].apply(fun_bearing_ra)
regular_corner_parcel['angle'] = regular_corner_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
regular_corner_parcel['distance_to_road'] = regular_corner_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

We can directly apply the same classify_sides function for “regular corner parcels”.

Code

# Apply the function to each group
regular_corner_parcel = regular_corner_parcel.groupby('parcel_id').apply(classify_sides)
regular_corner_parcel = regular_corner_parcel.reset_index(level=0, drop=True)

Visualize the Results

	Prop_ID	GEO_ID	parcel_id	parcel_addr	landuse	landuse_spec	parcel_label	Found_Match	match_road_address	shape_index	50_threshold	num_edges	angle_difference	shared_side	parcel_bearing	road_bearing	angle	distance_to_road	side	geometry
0	41229479	NaN	48	2321 N BEACH ST	NaN	F1	regular corner parcel	1	N Beach St	1.222134	False	4.0	2.491665	True	1.570243	1.586372	0.924085	70.951563	rear	LINESTRING (-10830411.542 3868003.896, -108304...
1	41229479	NaN	48	2321 N BEACH ST	NaN	F1	regular corner parcel	1	N Beach St	1.222134	False	4.0	2.491665	False	0.009223	1.586372	89.636014	43.995514	Exterior side	LINESTRING (-10830411.476 3868124.359, -108303...
2	41229479	NaN	48	2321 N BEACH ST	NaN	F1	regular corner parcel	1	N Beach St	1.222134	False	4.0	2.491665	False	-1.511733	1.586372	2.491665	15.882870	front	LINESTRING (-10830359.453 3868124.839, -108303...
3	41229479	NaN	48	2321 N BEACH ST	NaN	F1	regular corner parcel	1	N Beach St	1.222134	False	4.0	2.491665	True	3.130030	1.586372	88.445077	42.318456	Interior side	LINESTRING (-10830352.261 3868003.211, -108304...
4	00381128	NaN	459	1700 N BEACH ST	NaN	F1	regular corner parcel	1	N Beach St	1.288082	True	4.0	1.305030	False	1.595988	1.573211	1.305030	12.608610	front	LINESTRING (-10830328.461 3866836.895, -108303...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
2763	02774291	NaN	2478161	1924 LAYTON AVE	R	A	regular corner parcel	1	Layton St	1.317385	True	4.0	0.113170	True	-3.141109	1.568827	89.859495	37.294564	Interior side	LINESTRING (-10829347.937 3867607.517, -108293...
2764	02020262	NaN	2508563	1615 BLUEBONNET DR	R	A	regular corner parcel	1	Bluebonnet Dr	1.175941	False	4.0	1.947591	True	1.840691	1.647461	11.071235	61.946614	rear	LINESTRING (-10833055.844 3866717.585, -108330...
2765	02020262	NaN	2508563	1615 BLUEBONNET DR	R	A	regular corner parcel	1	Bluebonnet Dr	1.175941	False	4.0	1.947591	False	0.302830	1.647461	77.041683	46.920508	Exterior side	LINESTRING (-10833065.574 3866752.758, -108330...
2766	02020262	NaN	2508563	1615 BLUEBONNET DR	R	A	regular corner parcel	1	Bluebonnet Dr	1.175941	False	4.0	1.947591	False	-1.592794	1.647461	5.652974	21.165463	front	LINESTRING (-10833011.894 3866769.530, -108330...
2767	02020262	NaN	2508563	1615 BLUEBONNET DR	R	A	regular corner parcel	1	Bluebonnet Dr	1.175941	False	4.0	1.947591	True	-2.760543	1.647461	72.560067	36.137848	Interior side	LINESTRING (-10833012.656 3866734.887, -108330...

2768 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

03.Special Parcels Edges

This classification method utilized customized logic for parcels with 3 edges, 4 edges, and other configurations, ensuring accurate categorization of each parcel segment as ‘front’, ‘rear’, ‘Interior side’, or ‘Exterior side’.

Select Parcels of Specific Classification

This step isolates “Special Parcels Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code

# calculate the angle between the each parcel seg and nearest road seg
special_parcel = parcel_seg[parcel_seg['parcel_label'] == 'special parcel']
special_parcel['parcel_bearing'] = special_parcel['geometry'].apply(fun_bearing_ra)
special_parcel['road_bearing'] = special_parcel['road_geometry'].apply(fun_bearing_ra)
special_parcel['angle'] = special_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
special_parcel['distance_to_road'] = special_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

Because special parcels are more complex, we defined different functions to handle them accordingly:

For the parcels with edges num == 3 by classify_num_edges_3 function:
- The nearest edge should be labeled as the “front”, and the farthest edge should be labeled as the “rear”.
- For the remaining edges:
  - If it is shared with another parcel, label it as ‘Interior side’.
  - If it is not a shared edge, label it as ‘Exterior side’.
For the parcels with edges num == 4 by classify_num_edges_4 function:
- Identify the two edges with the smallest angles and label the nearest as front and the farthest as rear.
- For the remaining edges:
  - If it is shared with another parcel, label it as ‘Interior side’.
  - If it is not a shared edge, label it as ‘Exterior side’.
For the parcels with edges num == other by classify_other_edges function:
- Identify the edges parallel to the nearest road (angle < 20).
  - Label the edge with the smallest distance as the “front” and the edge with the greatest distance as the “rear”.
  - For the remaining edges:
    - If an edge is also parallel to the nearest road and connects to the front, it should also be labeled as “front”.
    - If an edge is also parallel to the nearest road and connects to the rear, it should be labeled as “rear”.
      - For any remaining edges:
        
        If it is between two front edges or within the bounding box of front edges, label it as front.
        
        If it is between two rear edges or within the bounding box of rear edges, label it as rear.
- For the remaining edges:
  - If it is shared with another parcel, label it as “Interior side”.
  - If it is not a shared edge, label it as “Exterior side”.
These functions are then integrated into the process_special_parcel function, making it easier to execute the entire classification process by parcel groups.

Code

# Step 2: Handle rows where num_edges = 3
def classify_num_edges_3(group):
    if len(group) == 3:
        idx_min_distance = group['distance_to_road'].idxmin()
        idx_max_distance = group['distance_to_road'].idxmax()
        group.loc[idx_min_distance, 'side'] = 'front'
        group.loc[idx_max_distance, 'side'] = 'rear'
        # For the remaining row(s), classify based on shared_side
        remaining_rows = group['side'].isnull()
        group.loc[remaining_rows & (group['shared_side'] == True), 'side'] = 'Interior side'
        group.loc[remaining_rows & (group['shared_side'] == False), 'side'] = 'Exterior side'
    
    return group

# Step 3: Handle rows where num_edges = 4 (reuse your existing function)
def classify_num_edges_4(group):
    # Step 0: Initialize the 'side' column if it doesn't exist
    if 'side' not in group.columns:
        group['side'] = None  # You can also initialize it with np.nan if preferred
    # Step 1: Find the two rows with the smallest 'angle' values
    smallest_two_angles = group.nsmallest(2, 'angle')  
    if not smallest_two_angles.empty:
        # Compare 'distance_to_road' between the two rows
        idx_min_distance = smallest_two_angles['distance_to_road'].idxmin()
        idx_max_distance = smallest_two_angles['distance_to_road'].idxmax()
        group.loc[idx_min_distance, 'side'] = 'front'
        group.loc[idx_max_distance, 'side'] = 'rear'
    # Step 2: For remaining rows, find shared_side=True and mark as 'Interior side'
    shared_side_true = group[(group['side'].isnull()) & (group['shared_side'] == True)]
    group.loc[shared_side_true.index, 'side'] = 'Interior side'
    # Step 3: Label the remaining rows as 'Exterior side'
    group.loc[group['side'].isnull(), 'side'] = 'Exterior side'
    return group

def classify_other_edges(group):
    # Step 0: Initialize the 'side' column if it doesn't exist
    if 'side' not in group.columns:
        group['side'] = None  # You can also initialize it with np.nan if preferred
    if group['num_edges'].iloc[0] not in [3, 4]:
        # Step 1: Filter rows where angle < 20
        valid_rows = group[group['angle'] < 20]
        if not valid_rows.empty:
            # Mark the smallest and largest distances as front and rear within the filtered rows
            idx_min_distance = valid_rows['distance_to_road'].idxmin()
            idx_max_distance = valid_rows['distance_to_road'].idxmax()
            group.loc[idx_min_distance, 'side'] = 'front'
            group.loc[idx_max_distance, 'side'] = 'rear'
        # Step 2: Check for angle < 20 and adjacency to front or rear
        for idx, row in group.iterrows():
            if pd.isnull(row['side']) and row['angle'] < 20:
                if group.loc[idx_min_distance, 'geometry'].touches(row['geometry']):
                    group.loc[idx, 'side'] = 'front'
                elif group.loc[idx_max_distance, 'geometry'].touches(row['geometry']):
                    group.loc[idx, 'side'] = 'rear'
        # Step 3: For remaining rows with angle < 20, calculate distance to the nearest front and rear
        front_geom = group.loc[group['side'] == 'front', 'geometry']
        rear_geom = group.loc[group['side'] == 'rear', 'geometry']
        for idx, row in group.iterrows():
            if pd.isnull(row['side']) and row['angle'] < 20:
                # Calculate distance to nearest front and rear
                distance_to_front = row['geometry'].distance(front_geom.iloc[0]) if not front_geom.empty else float('inf')
                distance_to_rear = row['geometry'].distance(rear_geom.iloc[0]) if not rear_geom.empty else float('inf')
                # Label based on the closer distance
                if distance_to_front < distance_to_rear:
                    group.loc[idx, 'side'] = 'front'
                else:
                    group.loc[idx, 'side'] = 'rear'    
        # Step 4: For edges between two 'front' or two 'rear' edges, and within bounding box
        for idx, row in group.iterrows():
            if pd.isnull(row['side']):
                front_edges = group[group['side'] == 'front']
                rear_edges = group[group['side'] == 'rear']
                # Check if the current edge touches at least two front edges or two rear edges
                front_touch_count = sum(row['geometry'].touches(front_row['geometry']) for front_idx, front_row in front_edges.iterrows())
                rear_touch_count = sum(row['geometry'].touches(rear_row['geometry']) for rear_idx, rear_row in rear_edges.iterrows())
                # Create bounding box for front_edges and rear_edges
                if not front_edges.empty:
                    min_x_front = front_edges.bounds.minx.min()
                    max_x_front = front_edges.bounds.maxx.max()
                    min_y_front = front_edges.bounds.miny.min()
                    max_y_front = front_edges.bounds.maxy.max()
                    front_boundary_box = gpd.GeoSeries([box(min_x_front, min_y_front, max_x_front, max_y_front)])
                if not rear_edges.empty:
                    min_x_rear = rear_edges.bounds.minx.min()
                    max_x_rear = rear_edges.bounds.maxx.max()
                    min_y_rear = rear_edges.bounds.miny.min()
                    max_y_rear = rear_edges.bounds.maxy.max()
                    rear_boundary_box = gpd.GeoSeries([box(min_x_rear, min_y_rear, max_x_rear, max_y_rear)])
                # Check if the current edge is within the front or rear bounding box
                within_front_boundary = row['geometry'].within(front_boundary_box.unary_union) if not front_edges.empty else False
                within_rear_boundary = row['geometry'].within(rear_boundary_box.unary_union) if not rear_edges.empty else False
                # Final condition for labeling
                if front_touch_count >= 2 or within_front_boundary:
                    group.loc[idx, 'side'] = 'front'
                elif rear_touch_count >= 2 or within_rear_boundary:
                    group.loc[idx, 'side'] = 'rear'
        # Step 5: Fill remaining NaN sides based on shared_side
        group.loc[group['side'].isnull() & (group['shared_side'] == True), 'side'] = 'Interior side'
        group.loc[group['side'].isnull() & (group['shared_side'] == False), 'side'] = 'Exterior side'
    return group

# Combine everything into a single function
def process_special_parcel(special_parcel):
    # Group by parcel_id and classify by num_edges
    def classify_group(group):
        if group['num_edges'].iloc[0] == 3:
            return classify_num_edges_3(group)
        elif group['num_edges'].iloc[0] == 4:
            return classify_num_edges_4(group)
        else:
            return classify_other_edges(group)
    # Apply classification by group
    special_parcel = special_parcel.groupby('parcel_id').apply(classify_group)
    special_parcel = special_parcel.reset_index(drop=True)
    return special_parcel

# Apply the function to process special_parcel
special_parcel = process_special_parcel(special_parcel)

Visuslize the Results

	Prop_ID	GEO_ID	parcel_id	parcel_addr	landuse	landuse_spec	parcel_label	Found_Match	match_road_address	shape_index	50_threshold	num_edges	angle_difference	shared_side	parcel_bearing	road_bearing	angle	distance_to_road	side	geometry
0	40267504	NaN	5819	1225 KELPIE CT	R	A	special parcel	1	Kelpie Ct	1.180428	False	7.0	2.361011	True	2.729854	0.207804	35.497133	31.984578	Interior side	LINESTRING (-10833015.551 3866094.057, -108330...
1	40267504	NaN	5819	1225 KELPIE CT	R	A	special parcel	1	Kelpie Ct	1.180428	False	7.0	2.361011	True	-3.134620	0.207804	11.506737	6.636179	front	LINESTRING (-10833027.112 3866099.106, -108330...
2	40267504	NaN	5819	1225 KELPIE CT	R	A	special parcel	1	Kelpie Ct	1.180428	False	7.0	2.361011	True	1.561944	0.207804	77.586507	4.079854	Interior side	LINESTRING (-10833066.061 3866098.835, -108330...
3	40267504	NaN	5819	1225 KELPIE CT	R	A	special parcel	1	Kelpie Ct	1.180428	False	7.0	2.361011	True	1.958448	0.207804	79.695491	8.971980	Interior side	LINESTRING (-10833065.995 3866106.358, -108330...
4	40267504	NaN	5819	1225 KELPIE CT	R	A	special parcel	1	Kelpie Ct	1.180428	False	7.0	2.361011	True	1.573529	0.207804	78.250328	28.513129	Interior side	LINESTRING (-10833066.931 3866108.651, -108330...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1315	04008812	NaN	593964	2400 AIRPORT FWY	NaN	C1	special parcel	1	North Fwy	1.323138	True	NaN	NaN	False	-0.806314	1.708166	35.930920	226.626260	Exterior side	LINESTRING (-10833332.553 3865472.223, -108333...
1316	42243091	NaN	829868	W LOTUS AVE	R	A	special parcel	1	W Lotus Ave	1.089208	False	NaN	NaN	True	2.488966	-0.591348	3.511015	48.881650	rear	LINESTRING (-10833129.774 3867306.161, -108331...
1317	42243091	NaN	829868	W LOTUS AVE	R	A	special parcel	1	W Lotus Ave	1.089208	False	NaN	NaN	False	-0.294191	-0.591348	17.025814	12.701618	front	LINESTRING (-10833137.520 3867344.111, -108331...
1318	42243091	NaN	829868	W LOTUS AVE	R	A	special parcel	1	W Lotus Ave	1.089208	False	NaN	NaN	False	0.932203	-0.591348	87.293025	38.355285	Exterior side	LINESTRING (-10833152.690 3867323.677, -108331...
1319	42243091	NaN	829868	W LOTUS AVE	R	A	special parcel	1	W Lotus Ave	1.089208	False	NaN	NaN	True	-2.386053	-0.591348	77.170973	30.611536	Interior side	LINESTRING (-10833101.177 3867333.100, -108331...

1320 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

04.Jagged Parcels Edges

Compared to “special parcels”, the “jagged parcels” are characterized by more irregular shapes and multiple edges. While the edge identification process classify_jagged_edges function is somewhat similar to that for “special parcels,” it is slightly easier due to the unpredictable shapes. However, this classification has the lowest confidence level among all parcel types.

Select Parcels of Specific Classification

This step isolates “Jagged Parcels Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code

# calculate the angle between the each parcel seg and nearest road seg
jagged_parcel = parcel_seg[parcel_seg['parcel_label'] == 'jagged parcel']
jagged_parcel['parcel_bearing'] = jagged_parcel['geometry'].apply(fun_bearing_ra)
jagged_parcel['road_bearing'] = jagged_parcel['road_geometry'].apply(fun_bearing_ra)
jagged_parcel['angle'] = jagged_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
jagged_parcel['distance_to_road'] = jagged_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

Due to the highest complexity and irregularity of jagged parcels, which often have varying shapes, we defined the classify_jagged_edges function to manage them effectively.:

Identify edges where angle < 20 to find edges parallel to the nearest road.
- Label the edge with the smallest distance as the front and the edge with the greatest distance as the “rear”.
  - For the remaining edges:
    - If an edge is parallel to the nearest road and is geometrically connected to the “front” using the .touches() method, label it as “front”.
    - If an edge is parallel to the nearest road and is geometrically connected to the “rear” using the .touches() method, label it as “rear”.
    - For any remaining parallel edges:
      - If an edge is spatially between two front edges or within the bounding box of the front edges, label it as front.
      - If an edge is spatially between two rear edges or within the bounding box of the rear edges, label it as rear.
For the remaining edges:
- If it is shared with another parcel, label it as “Interior side”.
- If it is not a shared edge, label it as “Exterior side”.

Code

def classify_jagged_edges(group):
    # Step 0: Initialize the 'side' column if it doesn't exist
    if 'side' not in group.columns:
        group['side'] = None  # You can also initialize it with np.nan if preferred
    # Step 1: Filter rows where angle < 20 for smallest and largest distances
    valid_rows = group[group['angle'] < 20]
    if not valid_rows.empty:
        # Mark the smallest and largest distances as front and rear within the filtered rows
        idx_min_distance = valid_rows['distance_to_road'].idxmin()
        idx_max_distance = valid_rows['distance_to_road'].idxmax()
        group.loc[idx_min_distance, 'side'] = 'front'
        group.loc[idx_max_distance, 'side'] = 'rear'
    # Step 2: Check for angle < 20 and adjacency to front or rear
    for idx, row in group.iterrows():
        if pd.isnull(row['side']) and row['angle'] < 20:
            if group.loc[idx_min_distance, 'geometry'].touches(row['geometry']):
                group.loc[idx, 'side'] = 'front'
            elif group.loc[idx_max_distance, 'geometry'].touches(row['geometry']):
                group.loc[idx, 'side'] = 'rear'
    # Step 3: For remaining rows with angle < 20, calculate distance to the nearest front and rear
    front_geom = group.loc[group['side'] == 'front', 'geometry']
    rear_geom = group.loc[group['side'] == 'rear', 'geometry']
    for idx, row in group.iterrows():
        if pd.isnull(row['side']) and row['angle'] < 20:
            # Calculate distance to nearest front and rear
            distance_to_front = row['geometry'].distance(front_geom.iloc[0]) if not front_geom.empty else float('inf')
            distance_to_rear = row['geometry'].distance(rear_geom.iloc[0]) if not rear_geom.empty else float('inf')
            # Label based on the closer distance
            if distance_to_front < distance_to_rear:
                group.loc[idx, 'side'] = 'front'
            else:
                group.loc[idx, 'side'] = 'rear'
    # Step 4: For edges between two 'front' or two 'rear' edges, and within bounding box
    for idx, row in group.iterrows():
        if pd.isnull(row['side']):
            front_edges = group[group['side'] == 'front']
            rear_edges = group[group['side'] == 'rear']
            # Only proceed if there are at least 2 front or 2 rear edges
            if len(front_edges) >= 2 or len(rear_edges) >= 2:
                # Check if the current edge touches at least two front edges or two rear edges
                front_touch_count = sum(row['geometry'].touches(front_row['geometry']) for front_idx, front_row in front_edges.iterrows())
                rear_touch_count = sum(row['geometry'].touches(rear_row['geometry']) for rear_idx, rear_row in rear_edges.iterrows())
                # Create bounding box for front_edges if there are at least 2 front edges
                if len(front_edges) >= 2:
                    min_x_front = front_edges.bounds.minx.min()
                    max_x_front = front_edges.bounds.maxx.max()
                    min_y_front = front_edges.bounds.miny.min()
                    max_y_front = front_edges.bounds.maxy.max()
                    front_boundary_box = gpd.GeoSeries([box(min_x_front, min_y_front, max_x_front, max_y_front)])
                # Create bounding box for rear_edges if there are at least 2 rear edges
                if len(rear_edges) >= 2:
                    min_x_rear = rear_edges.bounds.minx.min()
                    max_x_rear = rear_edges.bounds.maxx.max()
                    min_y_rear = rear_edges.bounds.miny.min()
                    max_y_rear = rear_edges.bounds.maxy.max()
                    rear_boundary_box = gpd.GeoSeries([box(min_x_rear, min_y_rear, max_x_rear, max_y_rear)])
                # Check if the current edge is within the front or rear bounding box
                within_front_boundary = row['geometry'].within(front_boundary_box.unary_union) if len(front_edges) >= 2 else False
                within_rear_boundary = row['geometry'].within(rear_boundary_box.unary_union) if len(rear_edges) >= 2 else False
                # Final condition for labeling
                if front_touch_count >= 2 or within_front_boundary:
                    group.loc[idx, 'side'] = 'front'
                elif rear_touch_count >= 2 or within_rear_boundary:
                    group.loc[idx, 'side'] = 'rear'
    # Step 5: Fill remaining NaN sides based on shared_side
    group.loc[group['side'].isnull() & (group['shared_side'] == True), 'side'] = 'Interior side'
    group.loc[group['side'].isnull() & (group['shared_side'] == False), 'side'] = 'Exterior side'
    return group

jagged_parcel = jagged_parcel.groupby('parcel_id').apply(classify_jagged_edges)
jagged_parcel = jagged_parcel.reset_index(level=0, drop=True)

Visualize the Results

	Prop_ID	GEO_ID	parcel_id	parcel_addr	landuse	landuse_spec	parcel_label	Found_Match	match_road_address	shape_index	50_threshold	num_edges	angle_difference	shared_side	parcel_bearing	road_bearing	angle	distance_to_road	side	geometry
0	42533552	NaN	5184	2301 HIGGINS LN	NaN	F1	jagged parcel	1	Higgins Ln	1.240656	True	6.0	1.082932	True	1.565843	1.548487	0.994419	149.806487	rear	LINESTRING (-10829918.831 3867857.171, -108299...
1	42533552	NaN	5184	2301 HIGGINS LN	NaN	F1	jagged parcel	1	Higgins Ln	1.240656	True	6.0	1.082932	True	-0.002716	1.548487	88.877354	95.078816	Interior side	LINESTRING (-10829917.996 3868025.709, -108299...
2	42533552	NaN	5184	2301 HIGGINS LN	NaN	F1	jagged parcel	1	Higgins Ln	1.240656	True	6.0	1.082932	False	-1.574205	1.548487	1.082932	10.453269	front	LINESTRING (-10829778.219 3868025.329, -108297...
3	42533552	NaN	5184	2301 HIGGINS LN	NaN	F1	jagged parcel	1	Higgins Ln	1.240656	True	6.0	1.082932	True	3.136596	1.548487	89.008038	44.039257	front	LINESTRING (-10829778.602 3867912.889, -108297...
4	42533552	NaN	5184	2301 HIGGINS LN	NaN	F1	jagged parcel	1	Higgins Ln	1.240656	True	6.0	1.082932	True	-1.575772	1.548487	0.993186	78.203028	front	LINESTRING (-10829847.909 3867913.235, -108298...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
908	02020203	NaN	2508571	1520 OAKHURST SCENIC DR	R	A	jagged parcel	1	Oakhurst Scenic Dr	1.275313	True	6.0	0.982940	True	-0.973416	1.406463	43.643017	111.975728	Interior side	LINESTRING (-10833344.364 3866621.595, -108333...
909	02020203	NaN	2508571	1520 OAKHURST SCENIC DR	R	A	jagged parcel	1	Oakhurst Scenic Dr	1.275313	True	6.0	0.982940	True	-2.046740	1.406463	17.853964	102.718355	rear	LINESTRING (-10833343.015 3866619.612, -108333...
910	02020203	NaN	2508571	1520 OAKHURST SCENIC DR	R	A	jagged parcel	1	Oakhurst Scenic Dr	1.275313	True	6.0	0.982940	True	3.116552	1.406463	82.019113	64.588715	Interior side	LINESTRING (-10833376.420 3866554.807, -108334...
911	02020203	NaN	2508571	1520 OAKHURST SCENIC DR	R	A	jagged parcel	1	Oakhurst Scenic Dr	1.275313	True	6.0	0.982940	False	1.423618	1.406463	0.982940	38.115761	front	LINESTRING (-10833428.420 3866556.109, -108334...
912	02020203	NaN	2508571	1520 OAKHURST SCENIC DR	R	A	jagged parcel	1	Oakhurst Scenic Dr	1.275313	True	6.0	0.982940	True	-0.357955	1.406463	78.906323	63.550032	Interior side	LINESTRING (-10833418.480 3866623.155, -108333...

913 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

05.Curve Parcels Edges

“Curve Parcels” was completed by classify_curve_edges function, analyzing the spatial relationship between parcel edges and their corresponding road segments.

Select Parcels of Specific Classification

This step isolates “Curve Parcels Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code

# calculate the angle between the each parcel seg and nearest road seg
curve_parcel = parcel_seg[parcel_seg['parcel_label'] == 'curve parcel']
curve_parcel['parcel_bearing'] = curve_parcel['geometry'].apply(fun_bearing_ra)
curve_parcel['road_bearing'] = curve_parcel['road_geometry'].apply(fun_bearing_ra)
curve_parcel['angle'] = curve_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
curve_parcel['distance_to_road'] = curve_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

Label the edge with the smallest distance as the “front” and the edge with the greatest distance as the “rear”.
- For the remaining edges:
  - If an edge has an angle less than 20 degrees to the nearest road and is geometrically connected to the “front”, it should also be labeled as “front”.
  - If an edge has an angle less than 20 degrees to the nearest road and is geometrically connected to the “rear”, it should also be labeled as “rear”.
For the remaining edges:
- If it is shared with another parcel, label it as “Interior side”.
- If it is not a shared edge, label it as “Exterior side”.

Code

def classify_curve_edges(group):
    # Mark the smallest and largest distances as front and rear
    idx_min_distance = group['distance_to_road'].idxmin()
    idx_max_distance = group['distance_to_road'].idxmax()
    group.loc[idx_min_distance, 'side'] = 'front'
    group.loc[idx_max_distance, 'side'] = 'rear'
    # Check for angle < 20 and adjacency to front or rear
    for idx, row in group.iterrows():
        if pd.isnull(row['side']) and row['angle'] < 20:
            if group.loc[idx_min_distance, 'geometry'].touches(row['geometry']):
                group.loc[idx, 'side'] = 'front'
            elif group.loc[idx_max_distance, 'geometry'].touches(row['geometry']):
                group.loc[idx, 'side'] = 'rear'
    # Fill remaining NaN sides based on shared_side
    group.loc[group['side'].isnull() & (group['shared_side'] == True), 'side'] = 'Interior side'
    group.loc[group['side'].isnull() & (group['shared_side'] == False), 'side'] = 'Exterior side'
    return group

curve_parcel = curve_parcel.groupby('parcel_id').apply(classify_curve_edges)
curve_parcel = curve_parcel.reset_index(level=0, drop=True)

Visualize the Results

	Prop_ID	GEO_ID	parcel_id	parcel_addr	landuse	landuse_spec	parcel_label	Found_Match	match_road_address	shape_index	50_threshold	num_edges	angle_difference	shared_side	parcel_bearing	road_bearing	angle	distance_to_road	side	geometry
0	06364012	NaN	5816	1208 KELPIE CT	R	A	curve parcel	1	Kelpie Ct	1.161037	False	4.0	2.469301	True	3.110095	1.464124	85.692780	21.192388	nan	LINESTRING (-10833055.060 3866062.389, -108330...
1	06364012	NaN	5816	1208 KELPIE CT	R	A	curve parcel	1	Kelpie Ct	1.161037	False	4.0	2.469301	True	1.237052	1.464124	13.010266	6.013322	front	LINESTRING (-10833101.797 3866063.862, -108331...
2	06364012	NaN	5816	1208 KELPIE CT	R	A	curve parcel	1	Kelpie Ct	1.161037	False	4.0	2.469301	True	0.431157	1.464124	59.184652	34.893701	nan	LINESTRING (-10833095.942 3866080.749, -108330...
3	06364012	NaN	5816	1208 KELPIE CT	R	A	curve parcel	1	Kelpie Ct	1.161037	False	4.0	2.469301	True	-1.565864	1.464124	6.394496	46.496151	rear	LINESTRING (-10833055.242 3866099.472, -108330...
4	00011401	NaN	5821	2227 WESTBROOK AVE	R	A	curve parcel	1	Westbrook Ave	1.224674	False	8.0	7.108440	True	2.694497	0.678219	64.475800	50.298705	nan	LINESTRING (-10832902.319 3866020.390, -108329...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
248	42912677	NaN	2508572	OAKHURST SCENIC DR	NaN	C1	curve parcel	1	Oakhurst Scenic Dr	1.118515	False	4.0	31.919957	True	-0.973398	1.421780	42.766453	91.998429	nan	LINESTRING (-10833379.483 3866673.215, -108333...
249	00011398	NaN	2634961	2301 WESTBROOK AVE	R	A	curve parcel	1	Westbrook Ave	1.219881	False	4.0	6.344358	True	1.563485	0.678219	50.722006	37.382167	nan	LINESTRING (-10832870.931 3866069.051, -108328...
250	00011398	NaN	2634961	2301 WESTBROOK AVE	R	A	curve parcel	1	Westbrook Ave	1.219881	False	4.0	6.344358	True	-0.011160	0.678219	39.498493	73.935535	rear	LINESTRING (-10832870.378 3866144.630, -108328...
251	00011398	NaN	2634961	2301 WESTBROOK AVE	R	A	curve parcel	1	Westbrook Ave	1.219881	False	4.0	6.344358	True	-1.575769	0.678219	50.856033	49.753339	nan	LINESTRING (-10832832.485 3866144.207, -108328...
252	00011398	NaN	2634961	2301 WESTBROOK AVE	R	A	curve parcel	1	Westbrook Ave	1.219881	False	4.0	6.344358	False	-2.842809	0.678219	21.740015	9.814554	front	LINESTRING (-10832832.801 3866080.795, -108328...

253 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

06.Cul_De_Sac Parcels Edges

The classification of “Cul_De_Sac Parcels” was completed using the classify_cul_de_sac_edges function, which analyzes the spatial relationship between parcel edges and their corresponding road segments based on distance, angle, and shared properties.

Select Parcels of Specific Classification

This step isolates “Cul_De_Sac Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code

# calculate the angle between the each parcel seg and nearest road seg
cul_de_sac_parcel = parcel_seg[parcel_seg['parcel_label'] == 'cul_de_sac parcel']
cul_de_sac_parcel['parcel_bearing'] = cul_de_sac_parcel['geometry'].apply(fun_bearing_ra)
cul_de_sac_parcel['road_bearing'] = cul_de_sac_parcel['road_geometry'].apply(fun_bearing_ra)
cul_de_sac_parcel['angle'] = cul_de_sac_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
cul_de_sac_parcel['distance_to_road'] = cul_de_sac_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

Find the edge that is farthest from the road and label it as rear.
Check the number of “non-shared edges”:
- If there is only one “non-shared edge”:
  - Label it as front.
- If there are multiple “non-shared edges”:
  - Check whether these “non-shared edges” are connected:
    - If all “non-shared edges** are connected:
      - Label them all as front.
    - If they are not all connected:
      - Select the one that is closest to the road and label it as front.
For the remaining edges:
- If it is a shared edge (shared_side == True):
  - Label it as Interior side.
- If it is a non-shared edge (shared_side == False):
  - Label it as Exterior side.

Code

# Modified classify_cul_de_sac_edges function
def classify_cul_de_sac_edges(group):
    """
    This function classifies cul-de-sac parcel edges based on their distance to the road and connectivity.
    It assigns each edge to either 'rear', 'front', 'Interior side', or 'Exterior side' based on certain rules.
    """
    # 1. Mark the farthest edge from the road as 'rear'
    idx_max_distance = group['distance_to_road'].idxmax()
    group.loc[idx_max_distance, 'side'] = 'rear'
    # 2. Select edges where shared_side == False (i.e., edges that are not shared with adjacent parcels)
    non_shared_edges = group[group['shared_side'] == False]
    # 3. If there are multiple non-shared edges, check if they are connected
    if len(non_shared_edges) > 1:
        # Check if any of the non-shared edges are connected to each other
        connected = any(are_segments_connected(row1['geometry'], row2['geometry']) 
                        for _, row1 in non_shared_edges.iterrows() 
                        for _, row2 in non_shared_edges.iterrows() if row1.name != row2.name)
        # 4. If all non-shared edges are connected, mark them as 'front'
        if connected:
            group.loc[non_shared_edges.index, 'side'] = 'front'
        else:
            # 5. If not all are connected, select the closest one to the road and mark it as 'front'
            idx_min_distance = non_shared_edges['distance_to_road'].idxmin()
            group.loc[idx_min_distance, 'side'] = 'front'
    elif len(non_shared_edges) == 1:
        # 6. If there is only one non-shared edge, mark it as 'front'
        group.loc[non_shared_edges.index, 'side'] = 'front'
    # 7. Fill remaining NaN values based on the shared_side property
    group.loc[group['side'].isnull() & (group['shared_side'] == True), 'side'] = 'Interior side'
    group.loc[group['side'].isnull() & (group['shared_side'] == False), 'side'] = 'Exterior side'
    return group

cul_de_sac_parcel = cul_de_sac_parcel.groupby('parcel_id').apply(classify_cul_de_sac_edges)
cul_de_sac_parcel = cul_de_sac_parcel.reset_index(level=0, drop=True)

Visualize the Results

	Prop_ID	GEO_ID	parcel_id	parcel_addr	landuse	landuse_spec	parcel_label	Found_Match	match_road_address	shape_index	50_threshold	num_edges	angle_difference	shared_side	parcel_bearing	road_bearing	angle	distance_to_road	side	geometry
0	01857177	NaN	24406	1217 BEVANS ST	R	A	cul_de_sac parcel	1	Bevans St	1.236257	True	5.0	3.231632	False	-2.125962	1.498791	27.683060	11.770061	front	LINESTRING (-10832513.377 3866174.320, -108325...
1	01857177	NaN	24406	1217 BEVANS ST	R	A	cul_de_sac parcel	1	Bevans St	1.236257	True	5.0	3.231632	True	3.141163	1.498791	85.899024	37.929210	nan	LINESTRING (-10832520.626 3866162.633, -108325...
2	01857177	NaN	24406	1217 BEVANS ST	R	A	cul_de_sac parcel	1	Bevans St	1.236257	True	5.0	3.231632	True	1.538834	1.498791	2.294263	62.058368	rear	LINESTRING (-10832568.114 3866162.653, -108325...
3	01857177	NaN	24406	1217 BEVANS ST	R	A	cul_de_sac parcel	1	Bevans St	1.236257	True	5.0	3.231632	True	-0.007115	1.498791	86.282045	35.463996	nan	LINESTRING (-10832567.401 3866184.934, -108325...
4	01857177	NaN	24406	1217 BEVANS ST	R	A	cul_de_sac parcel	1	Bevans St	1.236257	True	5.0	3.231632	False	-1.586399	1.498791	3.231632	8.145203	front	LINESTRING (-10832513.218 3866184.549, -108325...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
180	02020211	NaN	2508566	2000 DAISY LN	NaN	C1	cul_de_sac parcel	1	Daisy Ln	1.673334	True	8.0	85.050229	True	-2.550864	0.683790	5.332045	52.380546	nan	LINESTRING (-10833277.728 3866705.277, -108333...
181	02020211	NaN	2508566	2000 DAISY LN	NaN	C1	cul_de_sac parcel	1	Daisy Ln	1.673334	True	8.0	85.050229	True	-3.067003	0.683790	34.904639	44.388804	nan	LINESTRING (-10833352.738 3866654.975, -108333...
182	02020211	NaN	2508566	2000 DAISY LN	NaN	C1	cul_de_sac parcel	1	Daisy Ln	1.673334	True	8.0	85.050229	True	2.168197	0.683790	85.050229	28.871136	nan	LINESTRING (-10833366.380 3866653.955, -108333...
183	02020211	NaN	2508566	2000 DAISY LN	NaN	C1	cul_de_sac parcel	1	Daisy Ln	1.673334	True	8.0	85.050229	False	0.232059	0.683790	25.882267	56.362080	front	LINESTRING (-10833379.483 3866673.215, -108333...
184	02020211	NaN	2508566	2000 DAISY LN	NaN	C1	cul_de_sac parcel	1	Daisy Ln	1.673334	True	8.0	85.050229	True	-1.773116	0.683790	39.229653	179.760661	rear	LINESTRING (-10833131.088 3866731.915, -108331...

185 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

07.No Match Address Parcels Edges

We design a First, the parcels labeled as “No Match Address” were isolated and their geometric relationship with the nearest road segments was calculated, including their angle and distance to the corresponding road. Next, we use classify_no_match_address_sides function to assign different types of side.

Select Parcels of Specific Classification

This step isolates “No Match Address Parcels” from the entire dataset using the parcel_labeled column, because those parcel cannot match the road centerline, thus assign None to parcel_bearing,road_bearing,angle and distance_to_road.

Code

# calculate the angle between the each parcel seg and nearest road seg
no_match_address_parcel = parcel_seg[parcel_seg['parcel_label'] == 'no_match_address']
no_match_address_parcel['parcel_bearing'] = no_match_address_parcel['geometry'].apply(fun_bearing_ra)
no_match_address_parcel['road_bearing'] = None
no_match_address_parcel['angle'] = None

# calculate the distance between the each parcel seg and nearest road seg
no_match_address_parcel['distance_to_road'] = None

Label Parcel Edges

Filter the parcel edges where the angle is less than 20 degrees (indicating that the edge is nearly parallel to the road).
- Among these filtered edges, identify the edge with the smallest distance to the road and label it as front.
- Identify the edge with the largest distance to the road and label it as rear.
For the remaining edges:
- If it is a shared edge (shared_side == True):
  - Label it as Interior side.
- If it is a non-shared edge (shared_side == False):
  - Label it as Exterior side.

Code

def calculate_temangle_difference(bearing1, bearing2):
    # Calculate the absolute angle difference and ensure it is <= 180 degrees
    delta_theta = bearing2 - bearing1
    # Ensure the angle is between -π and π
    if delta_theta > math.pi:
        delta_theta -= 2 * math.pi
    elif delta_theta < -math.pi:
        delta_theta += 2 * math.pi 
    # Convert the angle to degrees
    angle_between_degrees = math.degrees(abs(delta_theta))
    # Return the smaller angle difference (angle or its supplement)
    return min(angle_between_degrees, 180 - angle_between_degrees)

def classify_no_match_address_sides(group):
    # Step 1: Initialize the 'side' column with None
    group['side'] = None
    # Step 2: Check if there is only one 'False' in shared_side
    if (group['shared_side'] == False).sum() == 1:
        # If there is exactly one 'False', set that row's 'side' to 'front'
        group.loc[group['shared_side'] == False, 'side'] = 'front'
        # Step 3: Create a new column 'temp_angle' for angles between 'shared_side=True' and 'side=front'
        # Get the parcel_bearing of the 'front' side
        front_bearing = group.loc[group['side'] == 'front', 'parcel_bearing'].values[0]
        # Calculate the angle difference for each 'shared_side=True' row
        group['temp_angle'] = group.apply(
            lambda row: calculate_temangle_difference(front_bearing, row['parcel_bearing']) if row['shared_side'] == True else None,
            axis=1
        )
        # Step 4: Create a new column 'centroid_point' for the midpoint of each geometry and Calculate distance from 'side=None' rows to 'front' row
        group['centroid_point'] = group['geometry'].apply(lambda geom: geom.interpolate(0.5, normalized=True))
        # Get the centroid of the 'front' geometry
        front_centroid = group.loc[group['side'] == 'front', 'centroid_point'].values[0]
        # Calculate distance for each 'side=None' row
        group['distance_to_front'] = group.apply(
            lambda row: row['centroid_point'].distance(front_centroid) if row['side'] is None else None,
            axis=1
        )
        # Step 5: Identify the row with side=None, temp_angle < 15, and maximum distance as 'rear'
        candidates = group[(group['side'].isnull()) & (group['temp_angle'] < 15)]
        if not candidates.empty:
            # Find the row with the maximum distance to 'front'
            rear_index = candidates['distance_to_front'].idxmax()
            group.loc[rear_index, 'side'] = 'rear'
        # Step 6: For remaining rows, find shared_side=True and mark as 'Interior side'
        shared_side_true = group[(group['side'].isnull()) & (group['shared_side'] == True)]
        group.loc[shared_side_true.index, 'side'] = 'Interior side'
        # Step 7: Label the remaining rows as 'Exterior side'
        group.loc[group['side'].isnull(), 'side'] = 'Exterior side' 
    else:
        # If there is not exactly one 'False', perform Steps 6 and 7 directly
        shared_side_true = group[(group['side'].isnull()) & (group['shared_side'] == True)]
        group.loc[shared_side_true.index, 'side'] = 'Interior side'
        group.loc[group['side'].isnull(), 'side'] = 'Exterior side'
    return group

# Apply the function to each group
no_match_address_parcel = no_match_address_parcel.groupby('parcel_id').apply(classify_no_match_address_sides)
no_match_address_parcel = no_match_address_parcel.reset_index(level=0, drop=True)
no_match_address_parcel = no_match_address_parcel.drop(columns=['temp_angle', 'centroid_point', 'distance_to_front'], errors='ignore')

Visualize the Results

	Prop_ID	GEO_ID	parcel_id	parcel_addr	landuse	landuse_spec	parcel_label	match_road_address	shape_index	50_threshold	num_edges	angle_difference	shared_side	parcel_bearing	road_bearing	angle	distance_to_road	side	geometry
0	42369752	NaN	1061799	2212 SCENIC BLUFF DR	R	A	no_match_address	NaN	1.138063	False	4.0	NaN	True	1.568891	NaN	NaN	NaN	Interior side	LINESTRING (-10832963.891 3865857.217, -108329...
1	42369752	NaN	1061799	2212 SCENIC BLUFF DR	R	A	no_match_address	NaN	1.138063	False	4.0	NaN	False	-0.002076	NaN	NaN	NaN	front	LINESTRING (-10832963.830 3865888.808, -108329...
2	42369752	NaN	1061799	2212 SCENIC BLUFF DR	R	A	no_match_address	NaN	1.138063	False	4.0	NaN	True	-1.570049	NaN	NaN	NaN	Interior side	LINESTRING (-10832939.607 3865888.758, -108329...
3	42369752	NaN	1061799	2212 SCENIC BLUFF DR	R	A	no_match_address	NaN	1.138063	False	4.0	NaN	True	-3.138367	NaN	NaN	NaN	rear	LINESTRING (-10832939.583 3865857.295, -108329...

Make this Notebook Trusted to load map: File -> Trust Notebook

08.No Address Parcels Edges

Select Parcels of Specific Classification

This step isolates “No Address Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code

# calculate the angle between the each parcel seg and nearest road seg
no_address_parcel = parcel_seg[parcel_seg['parcel_label'] == 'parcel without address']
no_address_parcel['parcel_bearing'] = no_address_parcel['geometry'].apply(fun_bearing_ra)
no_address_parcel['road_bearing'] = no_address_parcel['road_geometry'].apply(fun_bearing_ra)
no_address_parcel['angle'] = no_address_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
no_address_parcel['distance_to_road'] = no_address_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

We applied the same classification process as used in the “No Match Address Parcels” identification.

Code

# Group by 'parcel_id' and perform the operations within each group
def classify_no_address_sides(group):
    # Create a new column 'side'
    group['side'] = None
    
    # Step 1: Filter rows where 'angle' is less than 20 degrees
    valid_rows = group[group['angle'] < 20]
    
    if not valid_rows.empty:
        # Mark the smallest and largest distances as front and rear within the filtered rows
        idx_min_distance = valid_rows['distance_to_road'].idxmin()
        idx_max_distance = valid_rows['distance_to_road'].idxmax()
        group.loc[idx_min_distance, 'side'] = 'front'
        group.loc[idx_max_distance, 'side'] = 'rear'
    
    # Step 2: For remaining rows, find shared_side=True and mark as 'Interior side'
    shared_side_true = group[(group['side'].isnull()) & (group['shared_side'] == True)]
    group.loc[shared_side_true.index, 'side'] = 'Interior side'
    
    # Step 3: Label the remaining rows as 'Exterior side'
    group.loc[group['side'].isnull(), 'side'] = 'Exterior side'
    
    return group

# Apply the function to each group
no_address_parcel = no_address_parcel.groupby('parcel_id').apply(classify_no_address_sides)
no_address_parcel = no_address_parcel.reset_index(level=0, drop=True)

Visualize the Results

	Prop_ID	GEO_ID	parcel_id	parcel_addr	landuse	landuse_spec	parcel_label	Found_Match	match_road_address	shape_index	50_threshold	num_edges	angle_difference	shared_side	parcel_bearing	road_bearing	angle	distance_to_road	side	geometry
0	NaN	NaN	16812	NaN	NaN	NaN	parcel without address	NaN	NaN	1.458189	True	4.0	NaN	False	-2.980462	-0.113052	15.709551	15.006462	rear	LINESTRING (-10833274.714 3866274.355, -108332...
1	NaN	NaN	16812	NaN	NaN	NaN	parcel without address	NaN	NaN	1.458189	True	4.0	NaN	True	0.779535	-0.113052	51.141482	37.141229	Interior side	LINESTRING (-10833341.683 3866263.470, -108333...
2	NaN	NaN	16812	NaN	NaN	NaN	parcel without address	NaN	NaN	1.458189	True	4.0	NaN	True	-0.780332	-0.113052	38.232332	69.719233	Interior side	LINESTRING (-10833268.388 3866335.910, -108332...
3	NaN	NaN	16812	NaN	NaN	NaN	parcel without address	NaN	NaN	1.458189	True	4.0	NaN	True	-1.927666	-0.113052	76.030293	38.977050	Interior side	LINESTRING (-10833256.246 3866323.890, -108332...
4	NaN	NaN	400063	NaN	NaN	NaN	parcel without address	NaN	NaN	1.138871	False	4.0	NaN	False	1.563479	1.570795	0.419200	9.885977	front	LINESTRING (-10831442.243 3865858.228, -108314...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
73	NaN	NaN	594555	NaN	NaN	NaN	parcel without address	NaN	NaN	1.206320	False	4.0	NaN	True	-2.915415	0.251608	1.457034	74.157746	rear	LINESTRING (-10833293.831 3867161.213, -108332...
74	NaN	NaN	1061840	NaN	NaN	NaN	parcel without address	NaN	NaN	1.168514	False	4.0	NaN	True	2.543733	0.732755	76.238633	20.800149	Interior side	LINESTRING (-10832886.360 3865766.999, -108329...
75	NaN	NaN	1061840	NaN	NaN	NaN	parcel without address	NaN	NaN	1.168514	False	4.0	NaN	False	0.944988	0.732755	12.160033	32.958734	rear	LINESTRING (-10832910.065 3865783.142, -108329...
76	NaN	NaN	1061840	NaN	NaN	NaN	parcel without address	NaN	NaN	1.168514	False	4.0	NaN	True	-0.689966	0.732755	81.515947	19.675528	Interior side	LINESTRING (-10832899.472 3865797.800, -108328...
77	NaN	NaN	1061840	NaN	NaN	NaN	parcel without address	NaN	NaN	1.168514	False	4.0	NaN	False	-2.192376	0.732755	12.402345	4.454693	front	LINESTRING (-10832877.366 3865779.556, -108328...

78 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

09.Duplicated Address Parcels Edges

Select Parcels of Specific Classification

This step isolates “Duplicated Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code

# calculate the angle between the each parcel seg and nearest road seg
duplicated_address_parcel = parcel_seg[parcel_seg['parcel_label'] == 'duplicated address']
duplicated_address_parcel['parcel_bearing'] = duplicated_address_parcel['geometry'].apply(fun_bearing_ra)
duplicated_address_parcel['road_bearing'] = duplicated_address_parcel['road_geometry'].apply(fun_bearing_ra)
duplicated_address_parcel['angle'] = duplicated_address_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
duplicated_address_parcel['distance_to_road'] = duplicated_address_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

We applied the same classification process as used in the “Regular Inside Parcel” identification.

Code

# Apply the function to each group
duplicated_address_parcel = duplicated_address_parcel.groupby('parcel_id').apply(classify_sides)
duplicated_address_parcel = duplicated_address_parcel.reset_index(level=0, drop=True)

Visualize the Results

	Prop_ID	GEO_ID	parcel_id	parcel_addr	landuse	landuse_spec	parcel_label	Found_Match	match_road_address	shape_index	50_threshold	num_edges	angle_difference	shared_side	parcel_bearing	road_bearing	angle	distance_to_road	side	geometry
0	40779017	NaN	36515	710 NORTH FWY	NaN	C1	duplicated address	1	North Fwy	2.158450	True	5.0	14.406942	True	2.035121	1.721086	17.992917	359.071163	Interior side	LINESTRING (-10833018.577 3864781.252, -108331...
1	40779017	NaN	36515	710 NORTH FWY	NaN	C1	duplicated address	1	North Fwy	2.158450	True	5.0	14.406942	True	1.469637	1.721086	14.406942	334.926757	front	LINESTRING (-10833100.160 3864944.144, -108330...
2	40779017	NaN	36515	710 NORTH FWY	NaN	C1	duplicated address	1	North Fwy	2.158450	True	5.0	14.406942	True	-1.099249	1.721086	18.406725	366.070858	Interior side	LINESTRING (-10833096.919 3864976.080, -108330...
3	40779017	NaN	36515	710 NORTH FWY	NaN	C1	duplicated address	1	North Fwy	2.158450	True	5.0	14.406942	True	-1.410783	1.721086	0.557141	400.620556	rear	LINESTRING (-10833016.704 3864818.770, -108330...
4	40779017	NaN	36515	710 NORTH FWY	NaN	C1	duplicated address	1	North Fwy	2.158450	True	5.0	14.406942	False	-2.791773	1.721086	78.567761	400.493449	Exterior side	LINESTRING (-10833011.090 3864783.984, -108330...
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
265	03385922	NaN	400107	3715 E BELKNAP ST	NaN	F1	duplicated address	1	E Belknap St	1.126481	False	NaN	NaN	False	1.785254	0.706891	61.785625	39.685612	Exterior side	LINESTRING (-10830735.583 3865959.149, -108307...
266	03385922	NaN	400107	3715 E BELKNAP ST	NaN	F1	duplicated address	1	E Belknap St	1.126481	False	NaN	NaN	False	-2.483281	0.706891	2.783410	14.307807	front	LINESTRING (-10830702.939 3865984.396, -108307...
267	01076388	NaN	402711	2725 GOLDENROD AVE	NaN	J3	duplicated address	1	Goldenrod Ave	1.726928	True	NaN	NaN	False	3.136816	-0.002414	0.135367	7.046898	front	LINESTRING (-10831997.446 3867087.973, -108320...
268	01076388	NaN	402711	2725 GOLDENROD AVE	NaN	J3	duplicated address	1	Goldenrod Ave	1.726928	True	NaN	NaN	True	0.280935	-0.002414	16.234686	16.494547	rear	LINESTRING (-10832062.493 3867088.283, -108320...
269	01076388	NaN	402711	2725 GOLDENROD AVE	NaN	J3	duplicated address	1	Goldenrod Ave	1.726928	True	NaN	NaN	False	-1.569166	-0.002414	89.768267	16.355130	Exterior side	LINESTRING (-10831997.477 3867107.045, -108319...

270 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook