Label Parcel Edges

Extract Parcel Edges with Labels

Through the previous steps, we obtained parcels with assigned classification labels. Next, we need to re-extract parcel edges within each parcel group.

Code
# Get the centroid or representative points of the road segments
road_centroids = np.array([geom.centroid.coords[0] for geom in road_seg.geometry])
# Build the KDTree based on the centroids of the road segments
road_tree = cKDTree(road_centroids)

# Initialize a list to store the matched road geometries
matched_road_geometries = []

# Iterate over each row in parcel
for idx, parcel_row in parcel.iterrows():
    # Check if Found_Match is True
    if parcel_row['Found_Match'] == True:
        match_addr = parcel_row['match_road_address']
        # Filter road_seg to get rows where road_addr matches match_road_address
        matching_road_segs = road_seg[road_seg['road_addr'] == match_addr]
        
        if not matching_road_segs.empty:
            # Calculate distances between the parcel polygon geometry and matching road_seg geometries
            distances = matching_road_segs.geometry.apply(lambda geom: parcel_row.geometry.distance(geom))
            
            # Find the index of the nearest road geometry
            nearest_index = distances.idxmin()
            
            # Append the nearest road geometry to the list
            matched_road_geometries.append(matching_road_segs.loc[nearest_index].geometry)
        else:
            # If no match is found, append None or an empty geometry
            matched_road_geometries.append(None)
    else:
        # If Found_Match is False or NaN, find the nearest road geometry
        # Get the centroid of the current parcel polygon
        parcel_centroid = np.array(parcel_row.geometry.centroid.coords[0])
        
        # Query the KDTree for the nearest road segment
        _, nearest_index = road_tree.query(parcel_centroid)
        
        # Append the nearest road geometry to the list
        matched_road_geometries.append(road_seg.iloc[nearest_index].geometry)
        
# Add the matched road geometries to parcel
parcel['road_geometry'] = matched_road_geometries

# %%
# Function to explode Polygons into individual boundary line segments
def explode_to_lines(gdf):
    # Create a list to store new rows
    line_list = []

    for index, row in gdf.iterrows():
        # Get the exterior boundary of the polygon
        exterior = row['geometry'].exterior
        # Convert the boundary into LineString segments
        lines = [LineString([exterior.coords[i], exterior.coords[i + 1]]) 
                 for i in range(len(exterior.coords) - 1)]
        
        # Create new rows for each line segment, retaining the original attributes
        for line in lines:
            new_row = row.copy()
            new_row['geometry'] = line
            line_list.append(new_row)
    
    # Use pd.concat to generate the final GeoDataFrame
    line_gdf = pd.concat(line_list, axis=1).T
    line_gdf = gpd.GeoDataFrame(line_gdf, geometry='geometry', crs=gdf.crs)
    
    return line_gdf

# Call the function to explode the line segments
parcel_seg = explode_to_lines(parcel)

# Reset the index by group
parcel_seg['new_index'] = parcel_seg.groupby('parcel_id').cumcount()
parcel_seg.set_index('new_index', inplace=True)
parcel_seg.index.name = None


# Function to calculate the bearing of a geometry
def fun_bearing_ra(geom):
    coords = np.array(geom.coords)
    # Use the first and last coordinates to calculate the bearing
    x1, y1 = coords[0]
    x2, y2 = coords[-1]
    
    # Calculate the bearing using atan2
    bearing = math.atan2(y2 - y1, x2 - x1)
    
    return bearing

def calculate_angle_difference(line1, line2):
    bearing1 = fun_bearing_ra(line1)
    bearing2 = fun_bearing_ra(line2)
    # Calculate the absolute angle difference and ensure it is <= 180 degrees
    delta_theta = bearing2 - bearing1
    
    # Ensure the angle is between -π and π
    if delta_theta > math.pi:
        delta_theta -= 2 * math.pi
    elif delta_theta < -math.pi:
        delta_theta += 2 * math.pi
    
    # Convert the angle to degrees
    angle_between_degrees = math.degrees(abs(delta_theta))
    
    # Return the smaller angle difference (angle or its supplement)
    return min(angle_between_degrees, 180 - angle_between_degrees)


# Check if two segments share a common point (i.e., their start or end point is the same)
def are_segments_connected(line1, line2):
    coords1 = np.array(line1.coords)
    coords2 = np.array(line2.coords)
    
    # Check if the start or end points of the segments are the same
    if np.all(coords1[0] == coords2[0]) or np.all(coords1[0] == coords2[-1]) or \
       np.all(coords1[-1] == coords2[0]) or np.all(coords1[-1] == coords2[-1]):
        return True
    return False

# Function to reorder segments based on the turning point
def reorder_segments_by_turning_point(segments, turning_point_index):
    # Reorder segments starting from the identified turning point
    reordered_segments = segments[turning_point_index:] + segments[:turning_point_index]
    return reordered_segments

# Main function: Process each parcel_id group and return a new GeoDataFrame
def process_parcel_segments(parcel_seg):
    merged_segments = []  # List to store the reordered segments

    # Group the parcel segments by parcel_id and process each group
    for object_id, group in parcel_seg.groupby('parcel_id'):
        segments = group['geometry'].tolist()  # Get the list of line segments for the current group
        original_indices = group.index.tolist()  # Preserve the original indices
        turning_points = []

        # Loop through all adjacent segments to calculate angle differences
        for i in range(1, len(segments)):
            if are_segments_connected(segments[i-1], segments[i]):
                angle_diff = calculate_angle_difference(segments[i-1], segments[i])
                if angle_diff > 15:  # If angle difference is greater than 15 degrees, mark it as a turning point
                    turning_points.append(i)

        # If there are turning points, reorder the segments starting from the first turning point
        if turning_points:
            turning_point_index = turning_points[0]
            reordered_segments = reorder_segments_by_turning_point(segments, turning_point_index)
            reordered_original_indices = reorder_segments_by_turning_point(original_indices, turning_point_index)
        else:
            # If no turning points, retain the original order
            reordered_segments = segments
            reordered_original_indices = original_indices

        # Store the reordered segments and their attributes
        for j, (line, original_index) in enumerate(zip(reordered_segments, reordered_original_indices)):
            row = group.iloc[0].copy()  # Copy the first row's attributes
            row['geometry'] = line
            row['original_index'] = original_index  # Preserve the original index
            row['new_index'] = j  # Assign the new index based on the reordered list
            merged_segments.append(row)

    # Create a new GeoDataFrame for the reordered segments
    updated_gdf = gpd.GeoDataFrame(merged_segments, columns=parcel_seg.columns.tolist() + ['original_index', 'new_index'])
    updated_gdf = updated_gdf.reset_index(drop=True)

    return updated_gdf

# Run the main function and get the new GeoDataFrame
updated_parcel_seg = process_parcel_segments(parcel_seg)
parcel_seg = updated_parcel_seg


# Group parcel_seg by parcel_id and process each group
merged_segments = []

for object_id, group in parcel_seg.groupby('parcel_id'):
    # Get the list of geometries in the current group
    segments = group.geometry.tolist()
    # Start with the first segment
    merged_lines = [segments[0]]  # Start with the first segment
    
    for i in range(1, len(segments)):
        connected = False
        
        # Always compare the current segment with the previous one
        if are_segments_connected(segments[i-1], segments[i]):
            # Calculate the angle difference between the current segment and the previous one
            angle_diff = calculate_angle_difference(segments[i-1], segments[i])
            
            # If the angle difference is less than 15 degrees, merge the adjacent line segments
            if angle_diff < 15:
                # Merge the current and previous segments
                merged_result = linemerge([merged_lines[-1], segments[i]])
                
                # Check if the result is a MultiLineString, if so, skip the merge
                if isinstance(merged_result, LineString):
                    merged_lines[-1] = merged_result
                    connected = True
                else:
                    # Skip the merge if it's a MultiLineString
                    continue
        
        # If no connected segment is found or the angle difference is too large, add the current segment as a new one
        if not connected:
            merged_lines.append(segments[i])
    
    # Keep the merged results and add other attributes
    for line in merged_lines:
        row = group.iloc[0].copy()  # Copy the first attribute row from the group
        row['geometry'] = line
        merged_segments.append(row)

# Create a new GeoDataFrame from the merged line segments
parcel_seg = gpd.GeoDataFrame(merged_segments, columns=parcel_seg.columns)

# Check for MultiLineString geometries and explode them into LineString
exploded_segments = []

for index, row in parcel_seg.iterrows():
    geom = row['geometry']
    
    if isinstance(geom, MultiLineString):
        # Explode the MultiLineString into individual LineStrings
        for line in geom:
            new_row = row.copy()
            new_row['geometry'] = line
            exploded_segments.append(new_row)
    else:
        # Keep the original LineString geometries
        exploded_segments.append(row)

# Create a new GeoDataFrame from the exploded segments
parcel_seg = gpd.GeoDataFrame(exploded_segments, columns=parcel_seg.columns)

# extract useful columns
parcel_seg.drop(columns=['original_index', 'new_index'], inplace=True)
# Reset the index of the final GeoDataFrame
parcel_seg = parcel_seg.reset_index(drop=True)



edge_counts = parcel_seg.groupby('parcel_id').size()
parcel_seg['edge_num'] = parcel_seg['parcel_id'].map(edge_counts)

# Function to create tangent lines at both ends of a line segment
def create_tangents(line):
    coords = list(line.coords)
    if len(coords) < 2:
        return None, None  # Skip invalid geometries
    
    # Create tangents at the start and end of the line segment
    start_tangent = LineString([coords[0], coords[1]])
    end_tangent = LineString([coords[-2], coords[-1]])
    
    return start_tangent, end_tangent

# Function to filter curve segments based on angle difference of tangents > 30 degrees
def filter_curve_segments(parcel_seg, angle_threshold=30):
    filtered_segments = []
    non_filtered_segments = []
    
    for idx, row in parcel_seg.iterrows():
        line = row['geometry']
        start_tangent, end_tangent = create_tangents(line)
        
        if start_tangent and end_tangent:
            angle_diff = calculate_angle_difference(start_tangent, end_tangent)
            row_dict = row.to_dict()  # Convert the entire row to a dictionary
            row_dict['index'] = idx  # Preserve the original index
            
            if angle_diff > angle_threshold:
                # Add the entire row to the filtered list
                filtered_segments.append(row_dict)
            else:
                # Add the entire row to the non-filtered list
                non_filtered_segments.append(row_dict)
    
    # Create DataFrames with the filtered and non-filtered results if data exists
    if filtered_segments:
        filtered_df = pd.DataFrame(filtered_segments).set_index('index')
        filtered_gdf = gpd.GeoDataFrame(filtered_df, crs=parcel_seg.crs, geometry=filtered_df['geometry'])
    else:
        # Initialize an empty GeoDataFrame with the same structure if no data
        filtered_gdf = gpd.GeoDataFrame(columns=parcel_seg.columns, crs=parcel_seg.crs)
    
    if non_filtered_segments:
        non_filtered_df = pd.DataFrame(non_filtered_segments).set_index('index')
        non_filtered_gdf = gpd.GeoDataFrame(non_filtered_df, crs=parcel_seg.crs, geometry=non_filtered_df['geometry'])
    else:
        # Initialize an empty GeoDataFrame with the same structure if no data
        non_filtered_gdf = gpd.GeoDataFrame(columns=parcel_seg.columns, crs=parcel_seg.crs)
    
    return filtered_gdf, non_filtered_gdf

# Call the function to filter curve segments and create two GeoDataFrames
filtered_parcel_seg, non_filtered_parcel_seg = filter_curve_segments(parcel_seg[parcel_seg['edge_num'] == 3])

# Function to create tangent lines and reverse the line if necessary
def create_tangents_with_reversal(line):
    coords = list(line.coords)
    if len(coords) < 2:
        return None, None  # Skip invalid geometries
    
    # Find the points with the smallest and largest y-coordinate (latitude)
    if coords[0][1] < coords[-1][1]:  # If the first point's y is smaller, it's the start point
        start_point = coords[0]
        end_point = coords[-1]
    else:  # Otherwise, the last point is the start point
        start_point = coords[-1]
        end_point = coords[0]

    # Reverse the line if start_point is not the same as coords[0]
    if start_point != coords[0]:
        coords.reverse()  # Reverse the order of coordinates
    
    # Now create tangents based on the (possibly reversed) coordinates
    start_tangent = LineString([coords[0], coords[1]])  # Tangent from the first to the second point
    end_tangent = LineString([coords[-2], coords[-1]])  # Tangent from the second last to the last point

    return start_tangent, end_tangent, LineString(coords)  # Return the tangents and the (possibly reversed) line

# Function to calculate the split point based on the 4/5 rule
def calculate_split_point(line, start_tangent, end_tangent, angle_diff, angle_fraction=0.5):
    coords = list(line.coords)

    # Iterate through the line and find the point where the angle difference is approximately 4/5
    for i in range(1, len(coords) - 1):
        intermediate_tangent = LineString([coords[i - 1], coords[i]])
        current_angle_diff = calculate_angle_difference(start_tangent, intermediate_tangent)
        
        if current_angle_diff >= angle_diff * angle_fraction:
            return coords[i]  # Return the split point

    return coords[-1]  # If no point found, return the endpoint

# Function to process each segment in filtered_parcel_seg
def process_filtered_parcel_seg(filtered_parcel_seg, angle_threshold=30, angle_fraction=0.5):
    new_data = []
    
    for idx, row in filtered_parcel_seg.iterrows():
        line = row['geometry']
        
        # Apply the tangent and reversal function
        start_tangent, end_tangent, adjusted_line = create_tangents_with_reversal(line)
        
        if start_tangent and end_tangent:
            angle_diff = calculate_angle_difference(start_tangent, end_tangent)
            
            if angle_diff > angle_threshold:
                # Calculate the split point based on the angle difference and fraction
                split_point = calculate_split_point(adjusted_line, start_tangent, end_tangent, angle_diff, angle_fraction)
                
                # Add split point to row's data
                row_dict = row.to_dict()
                row_dict['split_point'] = Point(split_point)  # Store the split point as geometry
                row_dict['index'] = idx  # Store the original index
                
                new_data.append(row_dict)
            else:
                # If no split needed, just keep the original row
                row_dict = row.to_dict()
                row_dict['split_point'] = None  # No split point, store None
                row_dict['index'] = idx  # Store the original index
                
                new_data.append(row_dict)

    # Convert the processed data back into a GeoDataFrame
    new_df = pd.DataFrame(new_data).set_index('index')  # Use original index
    new_gdf = gpd.GeoDataFrame(new_df, crs=parcel_seg.crs, geometry='split_point')
    
    return new_gdf

# Check if filtered_parcel_seg is non-empty before processing
if not filtered_parcel_seg.empty:
    # Call the function to process the filtered_parcel_seg
    processed_parcel_seg = process_filtered_parcel_seg(filtered_parcel_seg)
else:
    # Handle the case where filtered_parcel_seg is empty
    processed_parcel_seg = gpd.GeoDataFrame(columns=filtered_parcel_seg.columns, crs=parcel_seg.crs)

# Function to split filtered_parcel_seg using points from processed_parcel_seg
def split_lines_with_points(filtered_parcel_seg, processed_parcel_seg):
    split_segments = []

    for idx, row in filtered_parcel_seg.iterrows():
        line = row['geometry']
        split_point_geom = processed_parcel_seg.loc[idx, 'split_point']  # Get the corresponding point geometry from split_point column
        
        if isinstance(split_point_geom, Point):
            # Check if the split point is on the line
            if line.contains(split_point_geom):
                # If the point is on the line, use it directly for splitting
                split_lines = split(line, split_point_geom)
            else:
                # If the point is not on the line, find the closest point on the line
                projected_distance = line.project(split_point_geom)
                nearest_point = line.interpolate(projected_distance)
                split_lines = split(line, nearest_point)
            
            # Handle GeometryCollection by extracting valid LineString geometries
            if isinstance(split_lines, GeometryCollection):
                split_segments.extend([{
                    **row.to_dict(), 'geometry': geom
                } for geom in split_lines.geoms if isinstance(geom, LineString)])
                continue  # Skip to the next iteration

        # If no valid split point or GeometryCollection, add the original row
        split_segments.append(row.to_dict())
    
    # Convert split_segments to a GeoDataFrame and return
    split_gdf = gpd.GeoDataFrame(split_segments, crs=parcel_seg.crs, geometry='geometry')
    return split_gdf

# Check if both filtered_parcel_seg and processed_parcel_seg are non-empty before processing
if not filtered_parcel_seg.empty and not processed_parcel_seg.empty:
    # Call the function to split lines based on points
    split_parcel_seg = split_lines_with_points(filtered_parcel_seg, processed_parcel_seg)
else:
    # Handle the case where one or both GeoDataFrames are empty
    split_parcel_seg = gpd.GeoDataFrame(columns=filtered_parcel_seg.columns, crs=parcel_seg.crs)

# Function to combine split_parcel_seg and non_filtered_parcel_seg, ensuring parcel_id proximity
def combine_parcel_segs(split_parcel_seg, non_filtered_parcel_seg):
    # Ensure both datasets contain the 'parcel_id' column
    if 'parcel_id' not in split_parcel_seg.columns or 'parcel_id' not in non_filtered_parcel_seg.columns:
        raise ValueError("Both datasets must contain the 'parcel_id' column.")
    
    # Convert parcel_id to string to avoid type errors during sorting
    split_parcel_seg['parcel_id'] = split_parcel_seg['parcel_id'].astype(str)
    non_filtered_parcel_seg['parcel_id'] = non_filtered_parcel_seg['parcel_id'].astype(str)
    
    # Concatenate the two GeoDataFrames and ensure 'crs' and 'geometry' are set
    combined_parcel_seg = gpd.GeoDataFrame(
        pd.concat([split_parcel_seg, non_filtered_parcel_seg], ignore_index=True),
        crs=parcel_seg.crs,  # Use the crs from one of the input GeoDataFrames
        geometry='geometry'  # Ensure the geometry column is correctly set
    )
    
    # Sort by 'parcel_id' to ensure similar parcel_id are together
    combined_parcel_seg_sorted = combined_parcel_seg.sort_values(by='parcel_id')
    
    return combined_parcel_seg_sorted

# Check if both split_parcel_seg and non_filtered_parcel_seg are non-empty before processing
if not split_parcel_seg.empty and not non_filtered_parcel_seg.empty:
    # Call the function to combine the datasets
    reconstr_seg = combine_parcel_segs(split_parcel_seg, non_filtered_parcel_seg)
else:
    # Handle the case where one or both GeoDataFrames are empty
    reconstr_seg = gpd.GeoDataFrame(columns=split_parcel_seg.columns, crs=parcel_seg.crs)


# Check if reconstr_seg is non-empty before concatenating
if not reconstr_seg.empty:
    parcel_seg = pd.concat([parcel_seg[parcel_seg['edge_num'] != 3], reconstr_seg], ignore_index=True).reset_index(drop=True)

parcel_seg = parcel_seg.drop(columns=['edge_num'])
parcel_seg = parcel_seg.set_crs(parcel.crs, allow_override=True)

# %%
def normalize_linestring(line):
    # Ensure the coordinates are in a consistent direction (smallest point first)
    if isinstance(line, LineString):
        coords = list(line.coords)
        if coords[0] > coords[-1]:
            coords.reverse()  # Reverse the order of coordinates to normalize the direction
        return LineString(coords)
    else:
        return line  # If it's not a LineString, keep it as is
    
def check_shared_sides_normalized(parcel_seg, threshold=0.1, distance_threshold=100):
    """
    Check for shared sides in parcel_seg using cKDTree for faster neighbor searches.
    
    Parameters:
    - parcel_seg: GeoDataFrame containing parcel segments.
    - threshold: float, minimum proportion of line length overlap to consider as a shared side.
    - distance_threshold: float, maximum distance between line segment midpoints to be considered for comparison.
    
    Returns:
    - parcel_seg: GeoDataFrame with 'shared_side' column indicating whether a side is shared.
    """
    
    # Normalize all the geometry objects
    parcel_seg['normalized_geom'] = parcel_seg['geometry'].apply(normalize_linestring)
    # Extract the midpoints of each line segment to build the KDTree
    midpoints = np.array([line.interpolate(0.5, normalized=True).coords[0] for line in parcel_seg['normalized_geom']])
    # Build cKDTree with midpoints
    kdtree = cKDTree(midpoints)
    # Initialize the 'shared_side' column as False
    parcel_seg['shared_side'] = False
    
    # Loop over each line and find nearby lines using KDTree
    for i, line1 in parcel_seg.iterrows():
        # Query the KDTree for neighbors within the distance_threshold
        indices = kdtree.query_ball_point(midpoints[i], r=distance_threshold)
        
        for j in indices:
            if i != j:  # Avoid comparing the line with itself
                line2 = parcel_seg.iloc[j]
                intersection = line1['normalized_geom'].intersection(line2['normalized_geom'])
                if not intersection.is_empty:
                    # Calculate the proportion of overlap relative to the length of line1
                    overlap_ratio = intersection.length / line1['normalized_geom'].length
                    if overlap_ratio > threshold:
                        # If the overlap is greater than the threshold, mark as shared side
                        parcel_seg.at[i, 'shared_side'] = True
                        parcel_seg.at[j, 'shared_side'] = True

    # Remove the temporarily generated 'normalized_geom' column
    parcel_seg = parcel_seg.drop(columns=['normalized_geom'])
    return parcel_seg

parcel_seg = check_shared_sides_normalized(parcel_seg)
Prop_ID GEO_ID parcel_id parcel_addr landuse landuse_spec parcel_label geometry Found_Match match_road_address shape_index 50_threshold num_edges angle_difference road_geometry shared_side
0 41229479.0 NaN 48 2321 N BEACH ST NaN F1 regular corner parcel LINESTRING (-10830411.5423 3868003.896099999, ... True N Beach St 1.222134 False 4.0 2.491665 LINESTRING (-10830338.56773692 3867938.6826204... True
1 41229479.0 NaN 48 2321 N BEACH ST NaN F1 regular corner parcel LINESTRING (-10830411.4757 3868124.3587, -1083... True N Beach St 1.222134 False 4.0 2.491665 LINESTRING (-10830338.56773692 3867938.6826204... False
2 41229479.0 NaN 48 2321 N BEACH ST NaN F1 regular corner parcel LINESTRING (-10830359.4534 3868124.8385, -1083... True N Beach St 1.222134 False 4.0 2.491665 LINESTRING (-10830338.56773692 3867938.6826204... False
3 41229479.0 NaN 48 2321 N BEACH ST NaN F1 regular corner parcel LINESTRING (-10830352.2613 3868003.210600003, ... True N Beach St 1.222134 False 4.0 2.491665 LINESTRING (-10830338.56773692 3867938.6826204... True
4 1469207.0 NaN 451 4101 CARNATION AVE R A regular inside parcel LINESTRING (-10829959.8244 3866832.624200001, ... True Carnation Ave 1.229169 False 4.0 1.016179 LINESTRING (-10830075.07559543 3866826.3706468... True
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
20362 4008812.0 NaN 593964 2400 AIRPORT FWY NaN C1 special parcel LINESTRING (-10833332.5534 3865472.222699999, ... True North Fwy 1.323138 True NaN NaN LINESTRING (-10833457.07511048 3865174.5206970... False
20363 42243091.0 NaN 829868 W LOTUS AVE R A special parcel LINESTRING (-10833129.774 3867306.161200002, -... True W Lotus Ave 1.089208 False NaN NaN LINESTRING (-10833113.98614833 3867355.5034094... True
20364 42243091.0 NaN 829868 W LOTUS AVE R A special parcel LINESTRING (-10833137.52 3867344.111199998, -1... True W Lotus Ave 1.089208 False NaN NaN LINESTRING (-10833113.98614833 3867355.5034094... False
20365 42243091.0 NaN 829868 W LOTUS AVE R A special parcel LINESTRING (-10833152.6896 3867323.676899999, ... True W Lotus Ave 1.089208 False NaN NaN LINESTRING (-10833113.98614833 3867355.5034094... False
20366 42243091.0 NaN 829868 W LOTUS AVE R A special parcel LINESTRING (-10833101.1767 3867333.099799998, ... True W Lotus Ave 1.089208 False NaN NaN LINESTRING (-10833113.98614833 3867355.5034094... True

20367 rows × 16 columns

Functions for Edge Classification

Through the previous steps, we obtained parcels with assigned classification labels. Next, we need to re-extract parcel edges within each parcel group.

  • 01.Regular Inside Parcels
  • 02.Regular Corner Parcels
  • 03.Special Parcels
  • 04.Jagged Parcels
  • 05.Curve Parcels
  • 06.Cul_De_Sac Parcels
  • 07.No Match Address Parcels
  • 08.No Address Parcels
  • 09.Duplicated Address Parcels

01.Regular Inside Parcels Edges

The classification process for “Regular Inside Parcels” was successfully completed by calculating the angular relationships and spatial distances between parcel edges and their nearest road segments with same address name. The steps taken ensure a systematic and context-specific classification of parcel edges into four types: ‘front’, ‘rear’, ‘Interior side’, and ‘Exterior side’.

Select Parcels of Specific Classification

This step isolates “Regular Inside Parcels” from the entire dataset using the parcel_labeled column, and make the operations as below,

  • Calculate the angle difference between each parcel edge and its nearest road segment with the same address using the fun_bearing_ra and calculate_angle_difference functions.
  • Calculate the distance between the centroid of each parcel edge and its nearest road segment with the same address.
Code
# calculate the angle between the each parcel seg and nearest road seg
regular_insid_parcel = parcel_seg[parcel_seg['parcel_label'] == 'regular inside parcel']
regular_insid_parcel['parcel_bearing'] = regular_insid_parcel['geometry'].apply(fun_bearing_ra)
regular_insid_parcel['road_bearing'] = regular_insid_parcel['road_geometry'].apply(fun_bearing_ra)
regular_insid_parcel['angle'] = regular_insid_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
regular_insid_parcel['distance_to_road'] = regular_insid_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

This step classifies each edge of the “Regular Inside Parcels” into one of the four categories: ‘front’, ‘rear’, ‘Interior side’, and ‘Exterior side’ by defining classify_sides function. Specifically:

  • Find the two parcel edges with the smallest angles (most parallel) and assign them as ‘front’ or ‘rear’ based on their distance from the road.
  • For the remaining parcel edges:
    • If an edge is shared with another parcel, label it as ‘Interior side.’
    • If it is not a shared edge, label it as ‘Exterior side.’
Code
# Group by 'parcel_id' and perform the operations within each group
def classify_sides(group):
    # Create a new column 'side'
    group['side'] = None 
    # Step 1: Find the two rows with the smallest 'angle' values
    smallest_two_angles = group.nsmallest(2, 'angle')
    if not smallest_two_angles.empty:
        # Compare 'distance_to_road' between the two rows
        idx_min_distance = smallest_two_angles['distance_to_road'].idxmin()
        idx_max_distance = smallest_two_angles['distance_to_road'].idxmax()
        group.loc[idx_min_distance, 'side'] = 'front'
        group.loc[idx_max_distance, 'side'] = 'rear'
    # Step 2: For remaining rows, find shared_side=True and mark as 'Interior side'
    shared_side_true = group[(group['side'].isnull()) & (group['shared_side'] == True)]
    group.loc[shared_side_true.index, 'side'] = 'Interior side'
    # Step 3: Label the remaining rows as 'Exterior side'
    group.loc[group['side'].isnull(), 'side'] = 'Exterior side'
    return group

# Apply the function to each group
regular_insid_parcel = regular_insid_parcel.groupby('parcel_id').apply(classify_sides)
regular_insid_parcel = regular_insid_parcel.reset_index(level=0, drop=True)

Visualize the Results

Prop_ID GEO_ID parcel_id parcel_addr landuse landuse_spec parcel_label Found_Match match_road_address shape_index 50_threshold num_edges angle_difference shared_side parcel_bearing road_bearing angle distance_to_road side geometry
0 01469207 NaN 451 4101 CARNATION AVE R A regular inside parcel 1 Carnation Ave 1.229169 False 4.0 1.016179 True 1.583615 0.006137 89.617176 28.224510 Interior side LINESTRING (-10829959.824 3866832.624, -108299...
1 01469207 NaN 451 4101 CARNATION AVE R A regular inside parcel 1 Carnation Ave 1.229169 False 4.0 1.016179 True 0.011563 0.006137 0.310866 50.958238 rear LINESTRING (-10829960.406 3866877.978, -108299...
2 01469207 NaN 451 4101 CARNATION AVE R A regular inside parcel 1 Carnation Ave 1.229169 False 4.0 1.016179 True -1.588851 0.006137 88.613901 28.111379 Interior side LINESTRING (-10829939.996 3866878.214, -108299...
3 01469207 NaN 451 4101 CARNATION AVE R A regular inside parcel 1 Carnation Ave 1.229169 False 4.0 1.016179 False 3.129994 0.006137 1.016179 5.377651 front LINESTRING (-10829940.823 3866832.404, -108299...
4 01469215 NaN 452 4105 CARNATION AVE R A regular inside parcel 1 Carnation Ave 1.247158 True 4.0 1.016469 True 1.552741 0.006137 88.613901 28.111379 Interior side LINESTRING (-10829940.823 3866832.404, -108299...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
14571 42992573 NaN 2836787 BLANDIN ST R A regular inside parcel 1 Blandin St 1.135663 False 4.0 0.923995 True -3.140131 1.568591 89.789895 22.629950 Interior side LINESTRING (-10832058.964 3867719.706, -108320...
14572 06150845 NaN 2851618 804 BLANDIN ST R A regular inside parcel 1 Blandin St 1.126165 False 4.0 2.650861 False 1.557710 1.511444 2.650861 7.535385 front LINESTRING (-10832070.833 3865396.148, -108320...
14573 06150845 NaN 2851618 804 BLANDIN ST R A regular inside parcel 1 Blandin St 1.126165 False 4.0 2.650861 True -0.011118 1.511444 87.236404 20.594869 Interior side LINESTRING (-10832070.470 3865423.909, -108320...
14574 06150845 NaN 2851618 804 BLANDIN ST R A regular inside parcel 1 Blandin St 1.126165 False 4.0 2.650861 True -1.582125 1.511444 2.751522 35.027320 rear LINESTRING (-10832043.038 3865423.604, -108320...
14575 06150845 NaN 2851618 804 BLANDIN ST R A regular inside parcel 1 Blandin St 1.126165 False 4.0 2.650861 True 3.029720 1.511444 86.990798 22.291428 Interior side LINESTRING (-10832043.384 3865393.065, -108320...

14576 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

02.Regular Corner Parcels Edges

Because the previous process already accounted for address names, the same logic used for classifying “regular inside parcels” can be applied to “regular corner parcels” as well.

Select Parcels of Specific Classification

This step isolates “Regular Corner Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code
# calculate the angle between the each parcel seg and nearest road seg
regular_corner_parcel = parcel_seg[parcel_seg['parcel_label'] == 'regular corner parcel']
regular_corner_parcel['parcel_bearing'] = regular_corner_parcel['geometry'].apply(fun_bearing_ra)
regular_corner_parcel['road_bearing'] = regular_corner_parcel['road_geometry'].apply(fun_bearing_ra)
regular_corner_parcel['angle'] = regular_corner_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
regular_corner_parcel['distance_to_road'] = regular_corner_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

We can directly apply the same classify_sides function for “regular corner parcels”.

Code
# Apply the function to each group
regular_corner_parcel = regular_corner_parcel.groupby('parcel_id').apply(classify_sides)
regular_corner_parcel = regular_corner_parcel.reset_index(level=0, drop=True)

Visualize the Results

Prop_ID GEO_ID parcel_id parcel_addr landuse landuse_spec parcel_label Found_Match match_road_address shape_index 50_threshold num_edges angle_difference shared_side parcel_bearing road_bearing angle distance_to_road side geometry
0 41229479 NaN 48 2321 N BEACH ST NaN F1 regular corner parcel 1 N Beach St 1.222134 False 4.0 2.491665 True 1.570243 1.586372 0.924085 70.951563 rear LINESTRING (-10830411.542 3868003.896, -108304...
1 41229479 NaN 48 2321 N BEACH ST NaN F1 regular corner parcel 1 N Beach St 1.222134 False 4.0 2.491665 False 0.009223 1.586372 89.636014 43.995514 Exterior side LINESTRING (-10830411.476 3868124.359, -108303...
2 41229479 NaN 48 2321 N BEACH ST NaN F1 regular corner parcel 1 N Beach St 1.222134 False 4.0 2.491665 False -1.511733 1.586372 2.491665 15.882870 front LINESTRING (-10830359.453 3868124.839, -108303...
3 41229479 NaN 48 2321 N BEACH ST NaN F1 regular corner parcel 1 N Beach St 1.222134 False 4.0 2.491665 True 3.130030 1.586372 88.445077 42.318456 Interior side LINESTRING (-10830352.261 3868003.211, -108304...
4 00381128 NaN 459 1700 N BEACH ST NaN F1 regular corner parcel 1 N Beach St 1.288082 True 4.0 1.305030 False 1.595988 1.573211 1.305030 12.608610 front LINESTRING (-10830328.461 3866836.895, -108303...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2763 02774291 NaN 2478161 1924 LAYTON AVE R A regular corner parcel 1 Layton St 1.317385 True 4.0 0.113170 True -3.141109 1.568827 89.859495 37.294564 Interior side LINESTRING (-10829347.937 3867607.517, -108293...
2764 02020262 NaN 2508563 1615 BLUEBONNET DR R A regular corner parcel 1 Bluebonnet Dr 1.175941 False 4.0 1.947591 True 1.840691 1.647461 11.071235 61.946614 rear LINESTRING (-10833055.844 3866717.585, -108330...
2765 02020262 NaN 2508563 1615 BLUEBONNET DR R A regular corner parcel 1 Bluebonnet Dr 1.175941 False 4.0 1.947591 False 0.302830 1.647461 77.041683 46.920508 Exterior side LINESTRING (-10833065.574 3866752.758, -108330...
2766 02020262 NaN 2508563 1615 BLUEBONNET DR R A regular corner parcel 1 Bluebonnet Dr 1.175941 False 4.0 1.947591 False -1.592794 1.647461 5.652974 21.165463 front LINESTRING (-10833011.894 3866769.530, -108330...
2767 02020262 NaN 2508563 1615 BLUEBONNET DR R A regular corner parcel 1 Bluebonnet Dr 1.175941 False 4.0 1.947591 True -2.760543 1.647461 72.560067 36.137848 Interior side LINESTRING (-10833012.656 3866734.887, -108330...

2768 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

03.Special Parcels Edges

This classification method utilized customized logic for parcels with 3 edges, 4 edges, and other configurations, ensuring accurate categorization of each parcel segment as ‘front’, ‘rear’, ‘Interior side’, or ‘Exterior side’.

Select Parcels of Specific Classification

This step isolates “Special Parcels Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code
# calculate the angle between the each parcel seg and nearest road seg
special_parcel = parcel_seg[parcel_seg['parcel_label'] == 'special parcel']
special_parcel['parcel_bearing'] = special_parcel['geometry'].apply(fun_bearing_ra)
special_parcel['road_bearing'] = special_parcel['road_geometry'].apply(fun_bearing_ra)
special_parcel['angle'] = special_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
special_parcel['distance_to_road'] = special_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

Because special parcels are more complex, we defined different functions to handle them accordingly:

  • For the parcels with edges num == 3 by classify_num_edges_3 function:
    • The nearest edge should be labeled as the “front”, and the farthest edge should be labeled as the “rear”.
    • For the remaining edges:
      • If it is shared with another parcel, label it as ‘Interior side’.
      • If it is not a shared edge, label it as ‘Exterior side’.
  • For the parcels with edges num == 4 by classify_num_edges_4 function:
    • Identify the two edges with the smallest angles and label the nearest as front and the farthest as rear.
    • For the remaining edges:
      • If it is shared with another parcel, label it as ‘Interior side’.
      • If it is not a shared edge, label it as ‘Exterior side’.
  • For the parcels with edges num == other by classify_other_edges function:
    • Identify the edges parallel to the nearest road (angle < 20).
      • Label the edge with the smallest distance as the “front” and the edge with the greatest distance as the “rear”.
      • For the remaining edges:
        • If an edge is also parallel to the nearest road and connects to the front, it should also be labeled as “front”.
        • If an edge is also parallel to the nearest road and connects to the rear, it should be labeled as “rear”.
          • For any remaining edges:
            • If it is between two front edges or within the bounding box of front edges, label it as front.
            • If it is between two rear edges or within the bounding box of rear edges, label it as rear.
    • For the remaining edges:
      • If it is shared with another parcel, label it as “Interior side”.
      • If it is not a shared edge, label it as “Exterior side”.
  • These functions are then integrated into the process_special_parcel function, making it easier to execute the entire classification process by parcel groups.
Code
# Step 2: Handle rows where num_edges = 3
def classify_num_edges_3(group):
    if len(group) == 3:
        idx_min_distance = group['distance_to_road'].idxmin()
        idx_max_distance = group['distance_to_road'].idxmax()
        group.loc[idx_min_distance, 'side'] = 'front'
        group.loc[idx_max_distance, 'side'] = 'rear'
        # For the remaining row(s), classify based on shared_side
        remaining_rows = group['side'].isnull()
        group.loc[remaining_rows & (group['shared_side'] == True), 'side'] = 'Interior side'
        group.loc[remaining_rows & (group['shared_side'] == False), 'side'] = 'Exterior side'
    
    return group

# Step 3: Handle rows where num_edges = 4 (reuse your existing function)
def classify_num_edges_4(group):
    # Step 0: Initialize the 'side' column if it doesn't exist
    if 'side' not in group.columns:
        group['side'] = None  # You can also initialize it with np.nan if preferred
    # Step 1: Find the two rows with the smallest 'angle' values
    smallest_two_angles = group.nsmallest(2, 'angle')  
    if not smallest_two_angles.empty:
        # Compare 'distance_to_road' between the two rows
        idx_min_distance = smallest_two_angles['distance_to_road'].idxmin()
        idx_max_distance = smallest_two_angles['distance_to_road'].idxmax()
        group.loc[idx_min_distance, 'side'] = 'front'
        group.loc[idx_max_distance, 'side'] = 'rear'
    # Step 2: For remaining rows, find shared_side=True and mark as 'Interior side'
    shared_side_true = group[(group['side'].isnull()) & (group['shared_side'] == True)]
    group.loc[shared_side_true.index, 'side'] = 'Interior side'
    # Step 3: Label the remaining rows as 'Exterior side'
    group.loc[group['side'].isnull(), 'side'] = 'Exterior side'
    return group

def classify_other_edges(group):
    # Step 0: Initialize the 'side' column if it doesn't exist
    if 'side' not in group.columns:
        group['side'] = None  # You can also initialize it with np.nan if preferred
    if group['num_edges'].iloc[0] not in [3, 4]:
        # Step 1: Filter rows where angle < 20
        valid_rows = group[group['angle'] < 20]
        if not valid_rows.empty:
            # Mark the smallest and largest distances as front and rear within the filtered rows
            idx_min_distance = valid_rows['distance_to_road'].idxmin()
            idx_max_distance = valid_rows['distance_to_road'].idxmax()
            group.loc[idx_min_distance, 'side'] = 'front'
            group.loc[idx_max_distance, 'side'] = 'rear'
        # Step 2: Check for angle < 20 and adjacency to front or rear
        for idx, row in group.iterrows():
            if pd.isnull(row['side']) and row['angle'] < 20:
                if group.loc[idx_min_distance, 'geometry'].touches(row['geometry']):
                    group.loc[idx, 'side'] = 'front'
                elif group.loc[idx_max_distance, 'geometry'].touches(row['geometry']):
                    group.loc[idx, 'side'] = 'rear'
        # Step 3: For remaining rows with angle < 20, calculate distance to the nearest front and rear
        front_geom = group.loc[group['side'] == 'front', 'geometry']
        rear_geom = group.loc[group['side'] == 'rear', 'geometry']
        for idx, row in group.iterrows():
            if pd.isnull(row['side']) and row['angle'] < 20:
                # Calculate distance to nearest front and rear
                distance_to_front = row['geometry'].distance(front_geom.iloc[0]) if not front_geom.empty else float('inf')
                distance_to_rear = row['geometry'].distance(rear_geom.iloc[0]) if not rear_geom.empty else float('inf')
                # Label based on the closer distance
                if distance_to_front < distance_to_rear:
                    group.loc[idx, 'side'] = 'front'
                else:
                    group.loc[idx, 'side'] = 'rear'    
        # Step 4: For edges between two 'front' or two 'rear' edges, and within bounding box
        for idx, row in group.iterrows():
            if pd.isnull(row['side']):
                front_edges = group[group['side'] == 'front']
                rear_edges = group[group['side'] == 'rear']
                # Check if the current edge touches at least two front edges or two rear edges
                front_touch_count = sum(row['geometry'].touches(front_row['geometry']) for front_idx, front_row in front_edges.iterrows())
                rear_touch_count = sum(row['geometry'].touches(rear_row['geometry']) for rear_idx, rear_row in rear_edges.iterrows())
                # Create bounding box for front_edges and rear_edges
                if not front_edges.empty:
                    min_x_front = front_edges.bounds.minx.min()
                    max_x_front = front_edges.bounds.maxx.max()
                    min_y_front = front_edges.bounds.miny.min()
                    max_y_front = front_edges.bounds.maxy.max()
                    front_boundary_box = gpd.GeoSeries([box(min_x_front, min_y_front, max_x_front, max_y_front)])
                if not rear_edges.empty:
                    min_x_rear = rear_edges.bounds.minx.min()
                    max_x_rear = rear_edges.bounds.maxx.max()
                    min_y_rear = rear_edges.bounds.miny.min()
                    max_y_rear = rear_edges.bounds.maxy.max()
                    rear_boundary_box = gpd.GeoSeries([box(min_x_rear, min_y_rear, max_x_rear, max_y_rear)])
                # Check if the current edge is within the front or rear bounding box
                within_front_boundary = row['geometry'].within(front_boundary_box.unary_union) if not front_edges.empty else False
                within_rear_boundary = row['geometry'].within(rear_boundary_box.unary_union) if not rear_edges.empty else False
                # Final condition for labeling
                if front_touch_count >= 2 or within_front_boundary:
                    group.loc[idx, 'side'] = 'front'
                elif rear_touch_count >= 2 or within_rear_boundary:
                    group.loc[idx, 'side'] = 'rear'
        # Step 5: Fill remaining NaN sides based on shared_side
        group.loc[group['side'].isnull() & (group['shared_side'] == True), 'side'] = 'Interior side'
        group.loc[group['side'].isnull() & (group['shared_side'] == False), 'side'] = 'Exterior side'
    return group

# Combine everything into a single function
def process_special_parcel(special_parcel):
    # Group by parcel_id and classify by num_edges
    def classify_group(group):
        if group['num_edges'].iloc[0] == 3:
            return classify_num_edges_3(group)
        elif group['num_edges'].iloc[0] == 4:
            return classify_num_edges_4(group)
        else:
            return classify_other_edges(group)
    # Apply classification by group
    special_parcel = special_parcel.groupby('parcel_id').apply(classify_group)
    special_parcel = special_parcel.reset_index(drop=True)
    return special_parcel

# Apply the function to process special_parcel
special_parcel = process_special_parcel(special_parcel)

Visuslize the Results

Prop_ID GEO_ID parcel_id parcel_addr landuse landuse_spec parcel_label Found_Match match_road_address shape_index 50_threshold num_edges angle_difference shared_side parcel_bearing road_bearing angle distance_to_road side geometry
0 40267504 NaN 5819 1225 KELPIE CT R A special parcel 1 Kelpie Ct 1.180428 False 7.0 2.361011 True 2.729854 0.207804 35.497133 31.984578 Interior side LINESTRING (-10833015.551 3866094.057, -108330...
1 40267504 NaN 5819 1225 KELPIE CT R A special parcel 1 Kelpie Ct 1.180428 False 7.0 2.361011 True -3.134620 0.207804 11.506737 6.636179 front LINESTRING (-10833027.112 3866099.106, -108330...
2 40267504 NaN 5819 1225 KELPIE CT R A special parcel 1 Kelpie Ct 1.180428 False 7.0 2.361011 True 1.561944 0.207804 77.586507 4.079854 Interior side LINESTRING (-10833066.061 3866098.835, -108330...
3 40267504 NaN 5819 1225 KELPIE CT R A special parcel 1 Kelpie Ct 1.180428 False 7.0 2.361011 True 1.958448 0.207804 79.695491 8.971980 Interior side LINESTRING (-10833065.995 3866106.358, -108330...
4 40267504 NaN 5819 1225 KELPIE CT R A special parcel 1 Kelpie Ct 1.180428 False 7.0 2.361011 True 1.573529 0.207804 78.250328 28.513129 Interior side LINESTRING (-10833066.931 3866108.651, -108330...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1315 04008812 NaN 593964 2400 AIRPORT FWY NaN C1 special parcel 1 North Fwy 1.323138 True NaN NaN False -0.806314 1.708166 35.930920 226.626260 Exterior side LINESTRING (-10833332.553 3865472.223, -108333...
1316 42243091 NaN 829868 W LOTUS AVE R A special parcel 1 W Lotus Ave 1.089208 False NaN NaN True 2.488966 -0.591348 3.511015 48.881650 rear LINESTRING (-10833129.774 3867306.161, -108331...
1317 42243091 NaN 829868 W LOTUS AVE R A special parcel 1 W Lotus Ave 1.089208 False NaN NaN False -0.294191 -0.591348 17.025814 12.701618 front LINESTRING (-10833137.520 3867344.111, -108331...
1318 42243091 NaN 829868 W LOTUS AVE R A special parcel 1 W Lotus Ave 1.089208 False NaN NaN False 0.932203 -0.591348 87.293025 38.355285 Exterior side LINESTRING (-10833152.690 3867323.677, -108331...
1319 42243091 NaN 829868 W LOTUS AVE R A special parcel 1 W Lotus Ave 1.089208 False NaN NaN True -2.386053 -0.591348 77.170973 30.611536 Interior side LINESTRING (-10833101.177 3867333.100, -108331...

1320 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

04.Jagged Parcels Edges

Compared to “special parcels”, the “jagged parcels” are characterized by more irregular shapes and multiple edges. While the edge identification process classify_jagged_edges function is somewhat similar to that for “special parcels,” it is slightly easier due to the unpredictable shapes. However, this classification has the lowest confidence level among all parcel types.

Select Parcels of Specific Classification

This step isolates “Jagged Parcels Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code
# calculate the angle between the each parcel seg and nearest road seg
jagged_parcel = parcel_seg[parcel_seg['parcel_label'] == 'jagged parcel']
jagged_parcel['parcel_bearing'] = jagged_parcel['geometry'].apply(fun_bearing_ra)
jagged_parcel['road_bearing'] = jagged_parcel['road_geometry'].apply(fun_bearing_ra)
jagged_parcel['angle'] = jagged_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
jagged_parcel['distance_to_road'] = jagged_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

Due to the highest complexity and irregularity of jagged parcels, which often have varying shapes, we defined the classify_jagged_edges function to manage them effectively.:

  • Identify edges where angle < 20 to find edges parallel to the nearest road.
    • Label the edge with the smallest distance as the front and the edge with the greatest distance as the “rear”.
      • For the remaining edges:
        • If an edge is parallel to the nearest road and is geometrically connected to the “front” using the .touches() method, label it as “front”.
        • If an edge is parallel to the nearest road and is geometrically connected to the “rear” using the .touches() method, label it as “rear”.
        • For any remaining parallel edges:
          • If an edge is spatially between two front edges or within the bounding box of the front edges, label it as front.
          • If an edge is spatially between two rear edges or within the bounding box of the rear edges, label it as rear.
  • For the remaining edges:
    • If it is shared with another parcel, label it as “Interior side”.
    • If it is not a shared edge, label it as “Exterior side”.
Code
def classify_jagged_edges(group):
    # Step 0: Initialize the 'side' column if it doesn't exist
    if 'side' not in group.columns:
        group['side'] = None  # You can also initialize it with np.nan if preferred
    # Step 1: Filter rows where angle < 20 for smallest and largest distances
    valid_rows = group[group['angle'] < 20]
    if not valid_rows.empty:
        # Mark the smallest and largest distances as front and rear within the filtered rows
        idx_min_distance = valid_rows['distance_to_road'].idxmin()
        idx_max_distance = valid_rows['distance_to_road'].idxmax()
        group.loc[idx_min_distance, 'side'] = 'front'
        group.loc[idx_max_distance, 'side'] = 'rear'
    # Step 2: Check for angle < 20 and adjacency to front or rear
    for idx, row in group.iterrows():
        if pd.isnull(row['side']) and row['angle'] < 20:
            if group.loc[idx_min_distance, 'geometry'].touches(row['geometry']):
                group.loc[idx, 'side'] = 'front'
            elif group.loc[idx_max_distance, 'geometry'].touches(row['geometry']):
                group.loc[idx, 'side'] = 'rear'
    # Step 3: For remaining rows with angle < 20, calculate distance to the nearest front and rear
    front_geom = group.loc[group['side'] == 'front', 'geometry']
    rear_geom = group.loc[group['side'] == 'rear', 'geometry']
    for idx, row in group.iterrows():
        if pd.isnull(row['side']) and row['angle'] < 20:
            # Calculate distance to nearest front and rear
            distance_to_front = row['geometry'].distance(front_geom.iloc[0]) if not front_geom.empty else float('inf')
            distance_to_rear = row['geometry'].distance(rear_geom.iloc[0]) if not rear_geom.empty else float('inf')
            # Label based on the closer distance
            if distance_to_front < distance_to_rear:
                group.loc[idx, 'side'] = 'front'
            else:
                group.loc[idx, 'side'] = 'rear'
    # Step 4: For edges between two 'front' or two 'rear' edges, and within bounding box
    for idx, row in group.iterrows():
        if pd.isnull(row['side']):
            front_edges = group[group['side'] == 'front']
            rear_edges = group[group['side'] == 'rear']
            # Only proceed if there are at least 2 front or 2 rear edges
            if len(front_edges) >= 2 or len(rear_edges) >= 2:
                # Check if the current edge touches at least two front edges or two rear edges
                front_touch_count = sum(row['geometry'].touches(front_row['geometry']) for front_idx, front_row in front_edges.iterrows())
                rear_touch_count = sum(row['geometry'].touches(rear_row['geometry']) for rear_idx, rear_row in rear_edges.iterrows())
                # Create bounding box for front_edges if there are at least 2 front edges
                if len(front_edges) >= 2:
                    min_x_front = front_edges.bounds.minx.min()
                    max_x_front = front_edges.bounds.maxx.max()
                    min_y_front = front_edges.bounds.miny.min()
                    max_y_front = front_edges.bounds.maxy.max()
                    front_boundary_box = gpd.GeoSeries([box(min_x_front, min_y_front, max_x_front, max_y_front)])
                # Create bounding box for rear_edges if there are at least 2 rear edges
                if len(rear_edges) >= 2:
                    min_x_rear = rear_edges.bounds.minx.min()
                    max_x_rear = rear_edges.bounds.maxx.max()
                    min_y_rear = rear_edges.bounds.miny.min()
                    max_y_rear = rear_edges.bounds.maxy.max()
                    rear_boundary_box = gpd.GeoSeries([box(min_x_rear, min_y_rear, max_x_rear, max_y_rear)])
                # Check if the current edge is within the front or rear bounding box
                within_front_boundary = row['geometry'].within(front_boundary_box.unary_union) if len(front_edges) >= 2 else False
                within_rear_boundary = row['geometry'].within(rear_boundary_box.unary_union) if len(rear_edges) >= 2 else False
                # Final condition for labeling
                if front_touch_count >= 2 or within_front_boundary:
                    group.loc[idx, 'side'] = 'front'
                elif rear_touch_count >= 2 or within_rear_boundary:
                    group.loc[idx, 'side'] = 'rear'
    # Step 5: Fill remaining NaN sides based on shared_side
    group.loc[group['side'].isnull() & (group['shared_side'] == True), 'side'] = 'Interior side'
    group.loc[group['side'].isnull() & (group['shared_side'] == False), 'side'] = 'Exterior side'
    return group

jagged_parcel = jagged_parcel.groupby('parcel_id').apply(classify_jagged_edges)
jagged_parcel = jagged_parcel.reset_index(level=0, drop=True)

Visualize the Results

Prop_ID GEO_ID parcel_id parcel_addr landuse landuse_spec parcel_label Found_Match match_road_address shape_index 50_threshold num_edges angle_difference shared_side parcel_bearing road_bearing angle distance_to_road side geometry
0 42533552 NaN 5184 2301 HIGGINS LN NaN F1 jagged parcel 1 Higgins Ln 1.240656 True 6.0 1.082932 True 1.565843 1.548487 0.994419 149.806487 rear LINESTRING (-10829918.831 3867857.171, -108299...
1 42533552 NaN 5184 2301 HIGGINS LN NaN F1 jagged parcel 1 Higgins Ln 1.240656 True 6.0 1.082932 True -0.002716 1.548487 88.877354 95.078816 Interior side LINESTRING (-10829917.996 3868025.709, -108299...
2 42533552 NaN 5184 2301 HIGGINS LN NaN F1 jagged parcel 1 Higgins Ln 1.240656 True 6.0 1.082932 False -1.574205 1.548487 1.082932 10.453269 front LINESTRING (-10829778.219 3868025.329, -108297...
3 42533552 NaN 5184 2301 HIGGINS LN NaN F1 jagged parcel 1 Higgins Ln 1.240656 True 6.0 1.082932 True 3.136596 1.548487 89.008038 44.039257 front LINESTRING (-10829778.602 3867912.889, -108297...
4 42533552 NaN 5184 2301 HIGGINS LN NaN F1 jagged parcel 1 Higgins Ln 1.240656 True 6.0 1.082932 True -1.575772 1.548487 0.993186 78.203028 front LINESTRING (-10829847.909 3867913.235, -108298...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
908 02020203 NaN 2508571 1520 OAKHURST SCENIC DR R A jagged parcel 1 Oakhurst Scenic Dr 1.275313 True 6.0 0.982940 True -0.973416 1.406463 43.643017 111.975728 Interior side LINESTRING (-10833344.364 3866621.595, -108333...
909 02020203 NaN 2508571 1520 OAKHURST SCENIC DR R A jagged parcel 1 Oakhurst Scenic Dr 1.275313 True 6.0 0.982940 True -2.046740 1.406463 17.853964 102.718355 rear LINESTRING (-10833343.015 3866619.612, -108333...
910 02020203 NaN 2508571 1520 OAKHURST SCENIC DR R A jagged parcel 1 Oakhurst Scenic Dr 1.275313 True 6.0 0.982940 True 3.116552 1.406463 82.019113 64.588715 Interior side LINESTRING (-10833376.420 3866554.807, -108334...
911 02020203 NaN 2508571 1520 OAKHURST SCENIC DR R A jagged parcel 1 Oakhurst Scenic Dr 1.275313 True 6.0 0.982940 False 1.423618 1.406463 0.982940 38.115761 front LINESTRING (-10833428.420 3866556.109, -108334...
912 02020203 NaN 2508571 1520 OAKHURST SCENIC DR R A jagged parcel 1 Oakhurst Scenic Dr 1.275313 True 6.0 0.982940 True -0.357955 1.406463 78.906323 63.550032 Interior side LINESTRING (-10833418.480 3866623.155, -108333...

913 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

05.Curve Parcels Edges

“Curve Parcels” was completed by classify_curve_edges function, analyzing the spatial relationship between parcel edges and their corresponding road segments.

Select Parcels of Specific Classification

This step isolates “Curve Parcels Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code
# calculate the angle between the each parcel seg and nearest road seg
curve_parcel = parcel_seg[parcel_seg['parcel_label'] == 'curve parcel']
curve_parcel['parcel_bearing'] = curve_parcel['geometry'].apply(fun_bearing_ra)
curve_parcel['road_bearing'] = curve_parcel['road_geometry'].apply(fun_bearing_ra)
curve_parcel['angle'] = curve_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
curve_parcel['distance_to_road'] = curve_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

  • Label the edge with the smallest distance as the “front” and the edge with the greatest distance as the “rear”.
    • For the remaining edges:
      • If an edge has an angle less than 20 degrees to the nearest road and is geometrically connected to the “front”, it should also be labeled as “front”.
      • If an edge has an angle less than 20 degrees to the nearest road and is geometrically connected to the “rear”, it should also be labeled as “rear”.
  • For the remaining edges:
    • If it is shared with another parcel, label it as “Interior side”.
    • If it is not a shared edge, label it as “Exterior side”.
Code
def classify_curve_edges(group):
    # Mark the smallest and largest distances as front and rear
    idx_min_distance = group['distance_to_road'].idxmin()
    idx_max_distance = group['distance_to_road'].idxmax()
    group.loc[idx_min_distance, 'side'] = 'front'
    group.loc[idx_max_distance, 'side'] = 'rear'
    # Check for angle < 20 and adjacency to front or rear
    for idx, row in group.iterrows():
        if pd.isnull(row['side']) and row['angle'] < 20:
            if group.loc[idx_min_distance, 'geometry'].touches(row['geometry']):
                group.loc[idx, 'side'] = 'front'
            elif group.loc[idx_max_distance, 'geometry'].touches(row['geometry']):
                group.loc[idx, 'side'] = 'rear'
    # Fill remaining NaN sides based on shared_side
    group.loc[group['side'].isnull() & (group['shared_side'] == True), 'side'] = 'Interior side'
    group.loc[group['side'].isnull() & (group['shared_side'] == False), 'side'] = 'Exterior side'
    return group

curve_parcel = curve_parcel.groupby('parcel_id').apply(classify_curve_edges)
curve_parcel = curve_parcel.reset_index(level=0, drop=True)

Visualize the Results

Prop_ID GEO_ID parcel_id parcel_addr landuse landuse_spec parcel_label Found_Match match_road_address shape_index 50_threshold num_edges angle_difference shared_side parcel_bearing road_bearing angle distance_to_road side geometry
0 06364012 NaN 5816 1208 KELPIE CT R A curve parcel 1 Kelpie Ct 1.161037 False 4.0 2.469301 True 3.110095 1.464124 85.692780 21.192388 nan LINESTRING (-10833055.060 3866062.389, -108330...
1 06364012 NaN 5816 1208 KELPIE CT R A curve parcel 1 Kelpie Ct 1.161037 False 4.0 2.469301 True 1.237052 1.464124 13.010266 6.013322 front LINESTRING (-10833101.797 3866063.862, -108331...
2 06364012 NaN 5816 1208 KELPIE CT R A curve parcel 1 Kelpie Ct 1.161037 False 4.0 2.469301 True 0.431157 1.464124 59.184652 34.893701 nan LINESTRING (-10833095.942 3866080.749, -108330...
3 06364012 NaN 5816 1208 KELPIE CT R A curve parcel 1 Kelpie Ct 1.161037 False 4.0 2.469301 True -1.565864 1.464124 6.394496 46.496151 rear LINESTRING (-10833055.242 3866099.472, -108330...
4 00011401 NaN 5821 2227 WESTBROOK AVE R A curve parcel 1 Westbrook Ave 1.224674 False 8.0 7.108440 True 2.694497 0.678219 64.475800 50.298705 nan LINESTRING (-10832902.319 3866020.390, -108329...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
248 42912677 NaN 2508572 OAKHURST SCENIC DR NaN C1 curve parcel 1 Oakhurst Scenic Dr 1.118515 False 4.0 31.919957 True -0.973398 1.421780 42.766453 91.998429 nan LINESTRING (-10833379.483 3866673.215, -108333...
249 00011398 NaN 2634961 2301 WESTBROOK AVE R A curve parcel 1 Westbrook Ave 1.219881 False 4.0 6.344358 True 1.563485 0.678219 50.722006 37.382167 nan LINESTRING (-10832870.931 3866069.051, -108328...
250 00011398 NaN 2634961 2301 WESTBROOK AVE R A curve parcel 1 Westbrook Ave 1.219881 False 4.0 6.344358 True -0.011160 0.678219 39.498493 73.935535 rear LINESTRING (-10832870.378 3866144.630, -108328...
251 00011398 NaN 2634961 2301 WESTBROOK AVE R A curve parcel 1 Westbrook Ave 1.219881 False 4.0 6.344358 True -1.575769 0.678219 50.856033 49.753339 nan LINESTRING (-10832832.485 3866144.207, -108328...
252 00011398 NaN 2634961 2301 WESTBROOK AVE R A curve parcel 1 Westbrook Ave 1.219881 False 4.0 6.344358 False -2.842809 0.678219 21.740015 9.814554 front LINESTRING (-10832832.801 3866080.795, -108328...

253 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

06.Cul_De_Sac Parcels Edges

The classification of “Cul_De_Sac Parcels” was completed using the classify_cul_de_sac_edges function, which analyzes the spatial relationship between parcel edges and their corresponding road segments based on distance, angle, and shared properties.

Select Parcels of Specific Classification

This step isolates “Cul_De_Sac Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code
# calculate the angle between the each parcel seg and nearest road seg
cul_de_sac_parcel = parcel_seg[parcel_seg['parcel_label'] == 'cul_de_sac parcel']
cul_de_sac_parcel['parcel_bearing'] = cul_de_sac_parcel['geometry'].apply(fun_bearing_ra)
cul_de_sac_parcel['road_bearing'] = cul_de_sac_parcel['road_geometry'].apply(fun_bearing_ra)
cul_de_sac_parcel['angle'] = cul_de_sac_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
cul_de_sac_parcel['distance_to_road'] = cul_de_sac_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

  • Find the edge that is farthest from the road and label it as rear.
  • Check the number of “non-shared edges”:
    • If there is only one “non-shared edge”:
      • Label it as front.
    • If there are multiple “non-shared edges”:
      • Check whether these “non-shared edges” are connected:
        • If all “non-shared edges** are connected:
          • Label them all as front.
        • If they are not all connected:
          • Select the one that is closest to the road and label it as front.
  • For the remaining edges:
    • If it is a shared edge (shared_side == True):
      • Label it as Interior side.
    • If it is a non-shared edge (shared_side == False):
      • Label it as Exterior side.
Code
# Modified classify_cul_de_sac_edges function
def classify_cul_de_sac_edges(group):
    """
    This function classifies cul-de-sac parcel edges based on their distance to the road and connectivity.
    It assigns each edge to either 'rear', 'front', 'Interior side', or 'Exterior side' based on certain rules.
    """
    # 1. Mark the farthest edge from the road as 'rear'
    idx_max_distance = group['distance_to_road'].idxmax()
    group.loc[idx_max_distance, 'side'] = 'rear'
    # 2. Select edges where shared_side == False (i.e., edges that are not shared with adjacent parcels)
    non_shared_edges = group[group['shared_side'] == False]
    # 3. If there are multiple non-shared edges, check if they are connected
    if len(non_shared_edges) > 1:
        # Check if any of the non-shared edges are connected to each other
        connected = any(are_segments_connected(row1['geometry'], row2['geometry']) 
                        for _, row1 in non_shared_edges.iterrows() 
                        for _, row2 in non_shared_edges.iterrows() if row1.name != row2.name)
        # 4. If all non-shared edges are connected, mark them as 'front'
        if connected:
            group.loc[non_shared_edges.index, 'side'] = 'front'
        else:
            # 5. If not all are connected, select the closest one to the road and mark it as 'front'
            idx_min_distance = non_shared_edges['distance_to_road'].idxmin()
            group.loc[idx_min_distance, 'side'] = 'front'
    elif len(non_shared_edges) == 1:
        # 6. If there is only one non-shared edge, mark it as 'front'
        group.loc[non_shared_edges.index, 'side'] = 'front'
    # 7. Fill remaining NaN values based on the shared_side property
    group.loc[group['side'].isnull() & (group['shared_side'] == True), 'side'] = 'Interior side'
    group.loc[group['side'].isnull() & (group['shared_side'] == False), 'side'] = 'Exterior side'
    return group

cul_de_sac_parcel = cul_de_sac_parcel.groupby('parcel_id').apply(classify_cul_de_sac_edges)
cul_de_sac_parcel = cul_de_sac_parcel.reset_index(level=0, drop=True)

Visualize the Results

Prop_ID GEO_ID parcel_id parcel_addr landuse landuse_spec parcel_label Found_Match match_road_address shape_index 50_threshold num_edges angle_difference shared_side parcel_bearing road_bearing angle distance_to_road side geometry
0 01857177 NaN 24406 1217 BEVANS ST R A cul_de_sac parcel 1 Bevans St 1.236257 True 5.0 3.231632 False -2.125962 1.498791 27.683060 11.770061 front LINESTRING (-10832513.377 3866174.320, -108325...
1 01857177 NaN 24406 1217 BEVANS ST R A cul_de_sac parcel 1 Bevans St 1.236257 True 5.0 3.231632 True 3.141163 1.498791 85.899024 37.929210 nan LINESTRING (-10832520.626 3866162.633, -108325...
2 01857177 NaN 24406 1217 BEVANS ST R A cul_de_sac parcel 1 Bevans St 1.236257 True 5.0 3.231632 True 1.538834 1.498791 2.294263 62.058368 rear LINESTRING (-10832568.114 3866162.653, -108325...
3 01857177 NaN 24406 1217 BEVANS ST R A cul_de_sac parcel 1 Bevans St 1.236257 True 5.0 3.231632 True -0.007115 1.498791 86.282045 35.463996 nan LINESTRING (-10832567.401 3866184.934, -108325...
4 01857177 NaN 24406 1217 BEVANS ST R A cul_de_sac parcel 1 Bevans St 1.236257 True 5.0 3.231632 False -1.586399 1.498791 3.231632 8.145203 front LINESTRING (-10832513.218 3866184.549, -108325...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
180 02020211 NaN 2508566 2000 DAISY LN NaN C1 cul_de_sac parcel 1 Daisy Ln 1.673334 True 8.0 85.050229 True -2.550864 0.683790 5.332045 52.380546 nan LINESTRING (-10833277.728 3866705.277, -108333...
181 02020211 NaN 2508566 2000 DAISY LN NaN C1 cul_de_sac parcel 1 Daisy Ln 1.673334 True 8.0 85.050229 True -3.067003 0.683790 34.904639 44.388804 nan LINESTRING (-10833352.738 3866654.975, -108333...
182 02020211 NaN 2508566 2000 DAISY LN NaN C1 cul_de_sac parcel 1 Daisy Ln 1.673334 True 8.0 85.050229 True 2.168197 0.683790 85.050229 28.871136 nan LINESTRING (-10833366.380 3866653.955, -108333...
183 02020211 NaN 2508566 2000 DAISY LN NaN C1 cul_de_sac parcel 1 Daisy Ln 1.673334 True 8.0 85.050229 False 0.232059 0.683790 25.882267 56.362080 front LINESTRING (-10833379.483 3866673.215, -108333...
184 02020211 NaN 2508566 2000 DAISY LN NaN C1 cul_de_sac parcel 1 Daisy Ln 1.673334 True 8.0 85.050229 True -1.773116 0.683790 39.229653 179.760661 rear LINESTRING (-10833131.088 3866731.915, -108331...

185 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

07.No Match Address Parcels Edges

We design a First, the parcels labeled as “No Match Address” were isolated and their geometric relationship with the nearest road segments was calculated, including their angle and distance to the corresponding road. Next, we use classify_no_match_address_sides function to assign different types of side.

Select Parcels of Specific Classification

This step isolates “No Match Address Parcels” from the entire dataset using the parcel_labeled column, because those parcel cannot match the road centerline, thus assign None to parcel_bearing,road_bearing,angle and distance_to_road.

Code
# calculate the angle between the each parcel seg and nearest road seg
no_match_address_parcel = parcel_seg[parcel_seg['parcel_label'] == 'no_match_address']
no_match_address_parcel['parcel_bearing'] = no_match_address_parcel['geometry'].apply(fun_bearing_ra)
no_match_address_parcel['road_bearing'] = None
no_match_address_parcel['angle'] = None

# calculate the distance between the each parcel seg and nearest road seg
no_match_address_parcel['distance_to_road'] = None

Label Parcel Edges

  • Filter the parcel edges where the angle is less than 20 degrees (indicating that the edge is nearly parallel to the road).
    • Among these filtered edges, identify the edge with the smallest distance to the road and label it as front.
    • Identify the edge with the largest distance to the road and label it as rear.
  • For the remaining edges:
    • If it is a shared edge (shared_side == True):
      • Label it as Interior side.
    • If it is a non-shared edge (shared_side == False):
      • Label it as Exterior side.
Code
def calculate_temangle_difference(bearing1, bearing2):
    # Calculate the absolute angle difference and ensure it is <= 180 degrees
    delta_theta = bearing2 - bearing1
    # Ensure the angle is between -π and π
    if delta_theta > math.pi:
        delta_theta -= 2 * math.pi
    elif delta_theta < -math.pi:
        delta_theta += 2 * math.pi 
    # Convert the angle to degrees
    angle_between_degrees = math.degrees(abs(delta_theta))
    # Return the smaller angle difference (angle or its supplement)
    return min(angle_between_degrees, 180 - angle_between_degrees)

def classify_no_match_address_sides(group):
    # Step 1: Initialize the 'side' column with None
    group['side'] = None
    # Step 2: Check if there is only one 'False' in shared_side
    if (group['shared_side'] == False).sum() == 1:
        # If there is exactly one 'False', set that row's 'side' to 'front'
        group.loc[group['shared_side'] == False, 'side'] = 'front'
        # Step 3: Create a new column 'temp_angle' for angles between 'shared_side=True' and 'side=front'
        # Get the parcel_bearing of the 'front' side
        front_bearing = group.loc[group['side'] == 'front', 'parcel_bearing'].values[0]
        # Calculate the angle difference for each 'shared_side=True' row
        group['temp_angle'] = group.apply(
            lambda row: calculate_temangle_difference(front_bearing, row['parcel_bearing']) if row['shared_side'] == True else None,
            axis=1
        )
        # Step 4: Create a new column 'centroid_point' for the midpoint of each geometry and Calculate distance from 'side=None' rows to 'front' row
        group['centroid_point'] = group['geometry'].apply(lambda geom: geom.interpolate(0.5, normalized=True))
        # Get the centroid of the 'front' geometry
        front_centroid = group.loc[group['side'] == 'front', 'centroid_point'].values[0]
        # Calculate distance for each 'side=None' row
        group['distance_to_front'] = group.apply(
            lambda row: row['centroid_point'].distance(front_centroid) if row['side'] is None else None,
            axis=1
        )
        # Step 5: Identify the row with side=None, temp_angle < 15, and maximum distance as 'rear'
        candidates = group[(group['side'].isnull()) & (group['temp_angle'] < 15)]
        if not candidates.empty:
            # Find the row with the maximum distance to 'front'
            rear_index = candidates['distance_to_front'].idxmax()
            group.loc[rear_index, 'side'] = 'rear'
        # Step 6: For remaining rows, find shared_side=True and mark as 'Interior side'
        shared_side_true = group[(group['side'].isnull()) & (group['shared_side'] == True)]
        group.loc[shared_side_true.index, 'side'] = 'Interior side'
        # Step 7: Label the remaining rows as 'Exterior side'
        group.loc[group['side'].isnull(), 'side'] = 'Exterior side' 
    else:
        # If there is not exactly one 'False', perform Steps 6 and 7 directly
        shared_side_true = group[(group['side'].isnull()) & (group['shared_side'] == True)]
        group.loc[shared_side_true.index, 'side'] = 'Interior side'
        group.loc[group['side'].isnull(), 'side'] = 'Exterior side'
    return group

# Apply the function to each group
no_match_address_parcel = no_match_address_parcel.groupby('parcel_id').apply(classify_no_match_address_sides)
no_match_address_parcel = no_match_address_parcel.reset_index(level=0, drop=True)
no_match_address_parcel = no_match_address_parcel.drop(columns=['temp_angle', 'centroid_point', 'distance_to_front'], errors='ignore')

Visualize the Results

Prop_ID GEO_ID parcel_id parcel_addr landuse landuse_spec parcel_label Found_Match match_road_address shape_index 50_threshold num_edges angle_difference shared_side parcel_bearing road_bearing angle distance_to_road side geometry
0 42369752 NaN 1061799 2212 SCENIC BLUFF DR R A no_match_address 0 NaN 1.138063 False 4.0 NaN True 1.568891 NaN NaN NaN Interior side LINESTRING (-10832963.891 3865857.217, -108329...
1 42369752 NaN 1061799 2212 SCENIC BLUFF DR R A no_match_address 0 NaN 1.138063 False 4.0 NaN False -0.002076 NaN NaN NaN front LINESTRING (-10832963.830 3865888.808, -108329...
2 42369752 NaN 1061799 2212 SCENIC BLUFF DR R A no_match_address 0 NaN 1.138063 False 4.0 NaN True -1.570049 NaN NaN NaN Interior side LINESTRING (-10832939.607 3865888.758, -108329...
3 42369752 NaN 1061799 2212 SCENIC BLUFF DR R A no_match_address 0 NaN 1.138063 False 4.0 NaN True -3.138367 NaN NaN NaN rear LINESTRING (-10832939.583 3865857.295, -108329...
Make this Notebook Trusted to load map: File -> Trust Notebook

08.No Address Parcels Edges

Select Parcels of Specific Classification

This step isolates “No Address Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code
# calculate the angle between the each parcel seg and nearest road seg
no_address_parcel = parcel_seg[parcel_seg['parcel_label'] == 'parcel without address']
no_address_parcel['parcel_bearing'] = no_address_parcel['geometry'].apply(fun_bearing_ra)
no_address_parcel['road_bearing'] = no_address_parcel['road_geometry'].apply(fun_bearing_ra)
no_address_parcel['angle'] = no_address_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
no_address_parcel['distance_to_road'] = no_address_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

We applied the same classification process as used in the “No Match Address Parcels” identification.

Code
# Group by 'parcel_id' and perform the operations within each group
def classify_no_address_sides(group):
    # Create a new column 'side'
    group['side'] = None
    
    # Step 1: Filter rows where 'angle' is less than 20 degrees
    valid_rows = group[group['angle'] < 20]
    
    if not valid_rows.empty:
        # Mark the smallest and largest distances as front and rear within the filtered rows
        idx_min_distance = valid_rows['distance_to_road'].idxmin()
        idx_max_distance = valid_rows['distance_to_road'].idxmax()
        group.loc[idx_min_distance, 'side'] = 'front'
        group.loc[idx_max_distance, 'side'] = 'rear'
    
    # Step 2: For remaining rows, find shared_side=True and mark as 'Interior side'
    shared_side_true = group[(group['side'].isnull()) & (group['shared_side'] == True)]
    group.loc[shared_side_true.index, 'side'] = 'Interior side'
    
    # Step 3: Label the remaining rows as 'Exterior side'
    group.loc[group['side'].isnull(), 'side'] = 'Exterior side'
    
    return group

# Apply the function to each group
no_address_parcel = no_address_parcel.groupby('parcel_id').apply(classify_no_address_sides)
no_address_parcel = no_address_parcel.reset_index(level=0, drop=True)

Visualize the Results

Prop_ID GEO_ID parcel_id parcel_addr landuse landuse_spec parcel_label Found_Match match_road_address shape_index 50_threshold num_edges angle_difference shared_side parcel_bearing road_bearing angle distance_to_road side geometry
0 NaN NaN 16812 NaN NaN NaN parcel without address NaN NaN 1.458189 True 4.0 NaN False -2.980462 -0.113052 15.709551 15.006462 rear LINESTRING (-10833274.714 3866274.355, -108332...
1 NaN NaN 16812 NaN NaN NaN parcel without address NaN NaN 1.458189 True 4.0 NaN True 0.779535 -0.113052 51.141482 37.141229 Interior side LINESTRING (-10833341.683 3866263.470, -108333...
2 NaN NaN 16812 NaN NaN NaN parcel without address NaN NaN 1.458189 True 4.0 NaN True -0.780332 -0.113052 38.232332 69.719233 Interior side LINESTRING (-10833268.388 3866335.910, -108332...
3 NaN NaN 16812 NaN NaN NaN parcel without address NaN NaN 1.458189 True 4.0 NaN True -1.927666 -0.113052 76.030293 38.977050 Interior side LINESTRING (-10833256.246 3866323.890, -108332...
4 NaN NaN 400063 NaN NaN NaN parcel without address NaN NaN 1.138871 False 4.0 NaN False 1.563479 1.570795 0.419200 9.885977 front LINESTRING (-10831442.243 3865858.228, -108314...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
73 NaN NaN 594555 NaN NaN NaN parcel without address NaN NaN 1.206320 False 4.0 NaN True -2.915415 0.251608 1.457034 74.157746 rear LINESTRING (-10833293.831 3867161.213, -108332...
74 NaN NaN 1061840 NaN NaN NaN parcel without address NaN NaN 1.168514 False 4.0 NaN True 2.543733 0.732755 76.238633 20.800149 Interior side LINESTRING (-10832886.360 3865766.999, -108329...
75 NaN NaN 1061840 NaN NaN NaN parcel without address NaN NaN 1.168514 False 4.0 NaN False 0.944988 0.732755 12.160033 32.958734 rear LINESTRING (-10832910.065 3865783.142, -108329...
76 NaN NaN 1061840 NaN NaN NaN parcel without address NaN NaN 1.168514 False 4.0 NaN True -0.689966 0.732755 81.515947 19.675528 Interior side LINESTRING (-10832899.472 3865797.800, -108328...
77 NaN NaN 1061840 NaN NaN NaN parcel without address NaN NaN 1.168514 False 4.0 NaN False -2.192376 0.732755 12.402345 4.454693 front LINESTRING (-10832877.366 3865779.556, -108328...

78 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook

09.Duplicated Address Parcels Edges

Select Parcels of Specific Classification

This step isolates “Duplicated Parcels” from the entire dataset using the parcel_labeled column, calculating the angle and distance between each parcel egde and nearst road segments with the same address.

Code
# calculate the angle between the each parcel seg and nearest road seg
duplicated_address_parcel = parcel_seg[parcel_seg['parcel_label'] == 'duplicated address']
duplicated_address_parcel['parcel_bearing'] = duplicated_address_parcel['geometry'].apply(fun_bearing_ra)
duplicated_address_parcel['road_bearing'] = duplicated_address_parcel['road_geometry'].apply(fun_bearing_ra)
duplicated_address_parcel['angle'] = duplicated_address_parcel.apply(
    lambda row: calculate_angle_difference(row['geometry'], row['road_geometry']), axis=1
)
# calculate the distance between the each parcel seg and nearest road seg
duplicated_address_parcel['distance_to_road'] = duplicated_address_parcel.apply(lambda row: row['geometry'].centroid.distance(row['road_geometry']), axis=1)

Label Parcel Edges

We applied the same classification process as used in the “Regular Inside Parcel” identification.

Code
# Apply the function to each group
duplicated_address_parcel = duplicated_address_parcel.groupby('parcel_id').apply(classify_sides)
duplicated_address_parcel = duplicated_address_parcel.reset_index(level=0, drop=True)

Visualize the Results

Prop_ID GEO_ID parcel_id parcel_addr landuse landuse_spec parcel_label Found_Match match_road_address shape_index 50_threshold num_edges angle_difference shared_side parcel_bearing road_bearing angle distance_to_road side geometry
0 40779017 NaN 36515 710 NORTH FWY NaN C1 duplicated address 1 North Fwy 2.158450 True 5.0 14.406942 True 2.035121 1.721086 17.992917 359.071163 Interior side LINESTRING (-10833018.577 3864781.252, -108331...
1 40779017 NaN 36515 710 NORTH FWY NaN C1 duplicated address 1 North Fwy 2.158450 True 5.0 14.406942 True 1.469637 1.721086 14.406942 334.926757 front LINESTRING (-10833100.160 3864944.144, -108330...
2 40779017 NaN 36515 710 NORTH FWY NaN C1 duplicated address 1 North Fwy 2.158450 True 5.0 14.406942 True -1.099249 1.721086 18.406725 366.070858 Interior side LINESTRING (-10833096.919 3864976.080, -108330...
3 40779017 NaN 36515 710 NORTH FWY NaN C1 duplicated address 1 North Fwy 2.158450 True 5.0 14.406942 True -1.410783 1.721086 0.557141 400.620556 rear LINESTRING (-10833016.704 3864818.770, -108330...
4 40779017 NaN 36515 710 NORTH FWY NaN C1 duplicated address 1 North Fwy 2.158450 True 5.0 14.406942 False -2.791773 1.721086 78.567761 400.493449 Exterior side LINESTRING (-10833011.090 3864783.984, -108330...
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
265 03385922 NaN 400107 3715 E BELKNAP ST NaN F1 duplicated address 1 E Belknap St 1.126481 False NaN NaN False 1.785254 0.706891 61.785625 39.685612 Exterior side LINESTRING (-10830735.583 3865959.149, -108307...
266 03385922 NaN 400107 3715 E BELKNAP ST NaN F1 duplicated address 1 E Belknap St 1.126481 False NaN NaN False -2.483281 0.706891 2.783410 14.307807 front LINESTRING (-10830702.939 3865984.396, -108307...
267 01076388 NaN 402711 2725 GOLDENROD AVE NaN J3 duplicated address 1 Goldenrod Ave 1.726928 True NaN NaN False 3.136816 -0.002414 0.135367 7.046898 front LINESTRING (-10831997.446 3867087.973, -108320...
268 01076388 NaN 402711 2725 GOLDENROD AVE NaN J3 duplicated address 1 Goldenrod Ave 1.726928 True NaN NaN True 0.280935 -0.002414 16.234686 16.494547 rear LINESTRING (-10832062.493 3867088.283, -108320...
269 01076388 NaN 402711 2725 GOLDENROD AVE NaN J3 duplicated address 1 Goldenrod Ave 1.726928 True NaN NaN False -1.569166 -0.002414 89.768267 16.355130 Exterior side LINESTRING (-10831997.477 3867107.045, -108319...

270 rows × 20 columns

Make this Notebook Trusted to load map: File -> Trust Notebook