Files

1010 lines
38 KiB
Python

import cv2
import random
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from dataclasses import dataclass
from typing import Any, List, Dict, Optional, Union, Tuple
import torch
import requests
import math
from sklearn.neighbors import NearestNeighbors
## detection result dataclasses
@dataclass
class BoundingBox:
xmin: int
ymin: int
xmax: int
ymax: int
@property
def xyxy(self) -> List[float]:
return [self.xmin, self.ymin, self.xmax, self.ymax]
@dataclass
class DetectionResult:
score: float
label: str
box: BoundingBox
mask: Optional[np.array] = None
@classmethod
def from_dict(cls, detection_dict: Dict) -> 'DetectionResult':
return cls(score=detection_dict['score'],
label=detection_dict['label'],
box=BoundingBox(xmin=detection_dict['box']['xmin'],
ymin=detection_dict['box']['ymin'],
xmax=detection_dict['box']['xmax'],
ymax=detection_dict['box']['ymax']))
def annotate(image: Union[Image.Image, np.ndarray], detection_results: List[DetectionResult], parameters: Dict) -> np.ndarray:
# Convert PIL Image to OpenCV format
image_cv2 = np.array(image) if isinstance(image, Image.Image) else image
image_cv2 = cv2.cvtColor(image_cv2, cv2.COLOR_RGB2BGR)
ploygon_contours = []
ploygon_contours_list = []
# Iterate over detections and add bounding boxes and masks
for detection in detection_results:
label = detection.label
score = detection.score
box = detection.box
mask = detection.mask
# Sample a random color for each detection
color = np.random.randint(0, 256, size=3)
# Draw bounding box
cv2.rectangle(image_cv2, (box.xmin, box.ymin), (box.xmax, box.ymax), color.tolist(), 2)
cv2.putText(image_cv2, f'{label}: {score:.2f}', (box.xmin, box.ymin - 10), cv2.FONT_HERSHEY_SIMPLEX, 1.5, color.tolist(), 3)
# If mask is available, apply it
if mask is not None:
# Convert mask (various possible dtypes/ranges) to binary uint8 (0/255)
if isinstance(mask, np.ndarray):
m = mask
if m.dtype != np.uint8:
# if in [0,1] float, scale; else cast
if np.max(m) <= 1.0:
m = (m.astype(np.float32) * 255.0).astype(np.uint8)
else:
m = m.astype(np.uint8)
# ensure binary
mask_uint8 = (m > 127).astype(np.uint8) * 255
else:
# unsupported mask type
print(f"[Annotate] Unsupported mask type: {type(mask)}; skipping")
continue
contours, _ = cv2.findContours(mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
approx_contours = []
approx_contours_list = []
image_height, image_width = image_cv2.shape[:2]
image_area = float(image_height * image_width)
for contour in contours:
# Filter out tiny blobs and near-full-frame masks
area = cv2.contourArea(contour)
if area < 100: # too small
continue
if area / image_area > 0.85: # too big (likely combined mask)
continue
# Normalize each point in contour by the image width and height
uv_points = np.array([(point[0] / image_width, point[1] / image_height) for point in contour.reshape(-1, 2)], dtype=np.float32)
normalized_arc_length = cv2.arcLength(uv_points, True)
base_eps = parameters['polygon_epsilon'] * normalized_arc_length
eps = max(base_eps, 1e-4)
# Adaptive simplification: avoid 4-corner collapse
approx = cv2.approxPolyDP(uv_points, eps, True)
tries = 0
while approx.shape[0] <= 6 and eps > 1e-6 and tries < 5:
eps *= 0.5
approx = cv2.approxPolyDP(uv_points, eps, True)
tries += 1
# Convert the normalized points back to the original image size
approx_points = np.array([(int(point[0] * image_width), int(point[1] * image_height)) for point in approx.reshape(-1, 2)], dtype=np.int32)
approx_contours.append(approx_points)
approx_contours_list.append(approx_points.reshape(-1, 2).tolist())
ploygon_contours.extend(approx_contours)
ploygon_contours_list.extend(approx_contours_list)
if len(approx_contours) > 0:
cv2.drawContours(image_cv2, approx_contours, -1, color.tolist(), 10)
#### draw composition lines
print("Total polygons: ", len(ploygon_contours))
## get all points
all_points = []
## Find global shortest edge length across all polygons
shortest_edge = float('inf')
polygon_areas = []
valid_edges = [] # Track all valid edge lengths
for contour in ploygon_contours:
polygon_areas.append(cv2.contourArea(contour))
points = contour.reshape(-1, 2)
for i in range(len(points)):
p1 = points[i]
p2 = points[(i + 1) % len(points)]
edge_length = np.linalg.norm(p2 - p1)
if edge_length > 0: # Only consider valid edges
valid_edges.append(edge_length)
shortest_edge = min(shortest_edge, edge_length)
# FIX: If no valid edges found, use a default based on image size
if not valid_edges or np.isinf(shortest_edge):
image_height, image_width = image_cv2.shape[:2]
shortest_edge = min(image_width, image_height) * 0.01 # 1% of smallest dimension
print(f"[Warning] No valid edges found, using default shortest_edge: {shortest_edge:.2f}")
else:
print(f"[Info] Found shortest_edge: {shortest_edge:.2f} from {len(valid_edges)} valid edges")
for i, contour in enumerate(ploygon_contours):
# Sample points from the contour edges
sampled_points = sample_contour_points(contour, shortest_edge=shortest_edge)
# Add polygon index to each point
sampled_points_with_index = [(point, i) for point in sampled_points]
all_points.extend(sampled_points_with_index)
## merge similar points
print("Total points: ", len(all_points))
# random.shuffle(all_points)
all_points_with_index = merge_similar_points(all_points, polygon_areas, image_cv2, radius=parameters['point_radius'])
# print("Total points after merging: ", len(all_points))
## find lines that connects at least 4 points
lines = fit_lines(all_points_with_index, image_cv2, line_fit_tol=parameters['line_fit_tol'], inlier_threshold=0.05)
print("Total lines: ", len(lines))
## draw the lines and the points
points_to_draw = []
for (point, index) in all_points_with_index:
cv2.circle(image_cv2, np.array([point[0], point[1]]).astype(int), 10, (0, 0, 255), -1)
points_to_draw.append([int(point[0]), int(point[1])])
lines_list = []
for line in lines:
## draw line from the leftmost to the rightmost point
p1, p2 = line_leftmost_to_rightmost(line)
# img_copy = image_cv2
# cv2.line(img_copy, np.array(p1).astype(int), np.array(p2).astype(int), (0, 255, 0), 20)
lines_list.append([[int(p1[0]), int(p1[1])], [int(p2[0]), int(p2[1])]])
return image_cv2, ploygon_contours_list, lines_list, points_to_draw
def random_named_css_colors(num_colors: int) -> List[str]:
"""
Returns a list of randomly selected named CSS colors.
Args:
- num_colors (int): Number of random colors to generate.
Returns:
- list: List of randomly selected named CSS colors.
"""
# List of named CSS colors
named_css_colors = [
'aliceblue', 'antiquewhite', 'aqua', 'aquamarine', 'azure', 'beige', 'bisque', 'black', 'blanchedalmond',
'blue', 'blueviolet', 'brown', 'burlywood', 'cadetblue', 'chartreuse', 'chocolate', 'coral', 'cornflowerblue',
'cornsilk', 'crimson', 'cyan', 'darkblue', 'darkcyan', 'darkgoldenrod', 'darkgray', 'darkgreen', 'darkgrey',
'darkkhaki', 'darkmagenta', 'darkolivegreen', 'darkorange', 'darkorchid', 'darkred', 'darksalmon', 'darkseagreen',
'darkslateblue', 'darkslategray', 'darkslategrey', 'darkturquoise', 'darkviolet', 'deeppink', 'deepskyblue',
'dimgray', 'dimgrey', 'dodgerblue', 'firebrick', 'floralwhite', 'forestgreen', 'fuchsia', 'gainsboro', 'ghostwhite',
'gold', 'goldenrod', 'gray', 'green', 'greenyellow', 'grey', 'honeydew', 'hotpink', 'indianred', 'indigo', 'ivory',
'khaki', 'lavender', 'lavenderblush', 'lawngreen', 'lemonchiffon', 'lightblue', 'lightcoral', 'lightcyan', 'lightgoldenrodyellow',
'lightgray', 'lightgreen', 'lightgrey', 'lightpink', 'lightsalmon', 'lightseagreen', 'lightskyblue', 'lightslategray',
'lightslategrey', 'lightsteelblue', 'lightyellow', 'lime', 'limegreen', 'linen', 'magenta', 'maroon', 'mediumaquamarine',
'mediumblue', 'mediumorchid', 'mediumpurple', 'mediumseagreen', 'mediumslateblue', 'mediumspringgreen', 'mediumturquoise',
'mediumvioletred', 'midnightblue', 'mintcream', 'mistyrose', 'moccasin', 'navajowhite', 'navy', 'oldlace', 'olive',
'olivedrab', 'orange', 'orangered', 'orchid', 'palegoldenrod', 'palegreen', 'paleturquoise', 'palevioletred', 'papayawhip',
'peachpuff', 'peru', 'pink', 'plum', 'powderblue', 'purple', 'rebeccapurple', 'red', 'rosybrown', 'royalblue', 'saddlebrown',
'salmon', 'sandybrown', 'seagreen', 'seashell', 'sienna', 'silver', 'skyblue', 'slateblue', 'slategray', 'slategrey',
'snow', 'springgreen', 'steelblue', 'tan', 'teal', 'thistle', 'tomato', 'turquoise', 'violet', 'wheat', 'white',
'whitesmoke', 'yellow', 'yellowgreen'
]
# Sample random named CSS colors
return random.sample(named_css_colors, min(num_colors, len(named_css_colors)))
def mask_to_polygon(mask: np.ndarray) -> List[List[int]]:
# Find contours in the binary mask
contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# Find the contour with the largest area
largest_contour = max(contours, key=cv2.contourArea)
# Extract the vertices of the contour
polygon = largest_contour.reshape(-1, 2).tolist()
return polygon
def polygon_to_mask(polygon: List[Tuple[int, int]], image_shape: Tuple[int, int]) -> np.ndarray:
"""
Convert a polygon to a segmentation mask.
Args:
- polygon (list): List of (x, y) coordinates representing the vertices of the polygon.
- image_shape (tuple): Shape of the image (height, width) for the mask.
Returns:
- np.ndarray: Segmentation mask with the polygon filled.
"""
# Create an empty mask
mask = np.zeros(image_shape, dtype=np.uint8)
# Convert polygon to an array of points
pts = np.array(polygon, dtype=np.int32)
# Fill the polygon with white color (255)
cv2.fillPoly(mask, [pts], color=(255,))
return mask
def load_image(image_str: str) -> Image.Image:
if image_str.startswith("http"):
image = Image.open(requests.get(image_str, stream=True).raw).convert("RGB")
else:
image = Image.open(image_str).convert("RGB")
return image
def get_boxes(results: DetectionResult) -> List[List[List[float]]]:
boxes = []
for result in results:
xyxy = result.box.xyxy
boxes.append(xyxy)
return [boxes]
def refine_masks(masks: torch.BoolTensor, polygon_refinement: bool = False) -> List[np.ndarray]:
masks = masks.cpu().float()
masks = masks.permute(0, 2, 3, 1)
masks = masks.mean(axis=-1)
masks = (masks > 0).int()
masks = masks.numpy().astype(np.uint8)
masks = list(masks)
if polygon_refinement:
for idx, mask in enumerate(masks):
shape = mask.shape
polygon = mask_to_polygon(mask)
mask = polygon_to_mask(polygon, shape)
masks[idx] = mask
return masks
def group_consecutive(numbers):
res = []
for i in range(len(numbers) - 1):
res.append([numbers[i], numbers[i + 1]])
return res
## first attempt to draw composition lines
def lines_from_collinear_edges(ploygon_contours, image_cv2):
## finding collinear lines
## not comparing against itself
## lines from different detections might still overlap with each other since the detections might overlap
for i in range(len(ploygon_contours)):
for j in range(i+1, len(ploygon_contours)):
# print(len(ploygon_contours[i]), len(ploygon_contours[j]))
lines_i = group_consecutive(ploygon_contours[i].reshape(-1, 2).tolist())
lines_j = group_consecutive(ploygon_contours[j].reshape(-1, 2).tolist())
counter = 0
for line_i in lines_i:
for line_j in lines_j:
if are_lines_collinear(line_i, line_j, parallel_tol=1e-4, col_tol=0.05) and not are_lines_copoint(line_i, line_j, tol=10):
counter += 1
# plot the lines
cv2.line(image_cv2, (line_i[0][0], line_i[0][1]), (line_i[1][0], line_i[1][1]), (0, 255, 0), 40)
cv2.line(image_cv2, (line_j[0][0], line_j[0][1]), (line_j[1][0], line_j[1][1]), (0, 0, 255), 30)
print("Collinear")
print(line_i, line_j)
pt_1, pt_2 = average_lines(line_i, line_j)
pts = extend_line_to_edge(image_cv2.shape[:2], (pt_1, pt_2))
cv2.line(image_cv2, (pts[0][0], pts[0][1]), (pts[1][0], pts[1][1]), (255, 0, 0), 30)
def are_lines_copoint(line1, line2, tol=1e-9):
"""
Check if two lines are copoint
Args:
tol: in pixels
Returns:
- True if the lines are copoint, False otherwise.
"""
p1, p2 = np.array(line1[0]), np.array(line1[1])
q1, q2 = np.array(line2[0]), np.array(line2[1])
dist_p1_q1 = np.linalg.norm(p1 - q1)
dist_p1_q2 = np.linalg.norm(p1 - q2)
dist_p2_q1 = np.linalg.norm(p2 - q1)
dist_p2_q2 = np.linalg.norm(p2 - q2)
if dist_p1_q1 < tol or dist_p2_q2 < tol or dist_p1_q2 < tol or dist_p2_q1 < tol:
return True
return False
def are_lines_collinear(line1, line2, parallel_tol=1e-9, col_tol=1e-9):
"""
Check if two lines are collinear in 2D or 3D space.
Parameters:
- line1: Tuple of two points defining the first line (e.g., ((x1, y1), (x2, y2)) or ((x1, y1, z1), (x2, y2, z2))).
- line2: Tuple of two points defining the second line.
- tolerance: A small value to account for floating-point inaccuracies.
Returns:
- True if the lines are collinear, False otherwise.
"""
# Extract points from the input
p1, p2 = np.array(line1[0]), np.array(line1[1])
q1, q2 = np.array(line2[0]), np.array(line2[1])
# Compute direction vectors of both lines
dir1 = (p2 - p1) / np.linalg.norm(p2 - p1)
dir2 = (q2 - q1) / np.linalg.norm(q2 - q1)
# Check if direction vectors are parallel using the cross product
dot_product = np.dot(dir1, dir2)
if not np.isclose(dot_product, 1.0, atol=parallel_tol):
return False # Lines are not parallel
# Check if a point from one line lies on the other line
# Vector from a point on line1 to a point on line2
# get the closer point to p1
vector_between_lines = q2 - p1
if np.linalg.norm(q1 - p1) < np.linalg.norm(q2 - p1):
vector_between_lines = q1 - p1
dir_between = vector_between_lines / np.linalg.norm(vector_between_lines)
# dot product of this vector with one of the direction vectors
alignment_check = np.dot(dir1, dir_between)
if not np.isclose(alignment_check, 1.0, atol=col_tol):
return False # A point from one line does not lie on the other
return True # Lines are collinear
def average_lines(line1, line2):
"""
Average a set of lines in 2D or 3D space.
Parameters:
- lines: List of tuples, each containing two points defining a line.
Returns:
- Tuple of two points representing the average line.
"""
# Extract points from the input
p1, p2 = np.array(line1[0]), np.array(line1[1])
q1, q2 = np.array(line2[0]), np.array(line2[1])
pt_1 = (p1 + q1) / 2
pt_2 = (p2 + q2) / 2
return pt_1.astype(int), pt_2.astype(int)
def extend_line_to_edge(dimensions, line, SCALE=10):
"""
Based on https://stackoverflow.com/questions/72083896/how-to-stretch-a-line-to-fit-image-with-python-opencv
"""
p1 = line[0]
p2 = line[1]
# Calculate the intersection point given (x1, y1) and (x2, y2)
def line_intersection(line1, line2):
x_diff = (line1[0][0] - line1[1][0], line2[0][0] - line2[1][0])
y_diff = (line1[0][1] - line1[1][1], line2[0][1] - line2[1][1])
def detect(a, b):
return a[0] * b[1] - a[1] * b[0]
div = detect(x_diff, y_diff)
if div == 0:
raise Exception('lines do not intersect')
dist = (detect(*line1), detect(*line2))
x = detect(dist, x_diff) / div
y = detect(dist, y_diff) / div
return int(x), int(y)
x1, x2 = 0, 0
y1, y2 = 0, 0
# Extract w and h regardless of grayscale or BGR image
if len(dimensions) == 3:
h, w, _ = dimensions
elif len(dimensions) == 2:
h, w = dimensions
# Take longest dimension and use it as maxed out distance
if w > h:
distance = SCALE * w
else:
distance = SCALE * h
# Reorder smaller X or Y to be the first point
# and larger X or Y to be the second point
try:
slope = (p2[1] - p1[1]) / (p1[0] - p2[0])
# HORIZONTAL or DIAGONAL
if p1[0] <= p2[0]:
x1, y1 = p1
x2, y2 = p2
else:
x1, y1 = p2
x2, y2 = p1
except ZeroDivisionError:
# VERTICAL
if p1[1] <= p2[1]:
x1, y1 = p1
x2, y2 = p2
else:
x1, y1 = p2
x2, y2 = p1
# Extend after end-point A
length_A = math.sqrt((x2 - x1)**2 + (y2 - y1)**2)
p3_x = int(x1 + (x1 - x2) / length_A * distance)
p3_y = int(y1 + (y1 - y2) / length_A * distance)
# Extend after end-point B
length_B = math.sqrt((x1 - x2)**2 + (y1 - y2)**2)
p4_x = int(x2 + (x2 - x1) / length_B * distance)
p4_y = int(y2 + (y2 - y1) / length_B * distance)
# --------------------------------------
# Limit coordinates to borders of image
# --------------------------------------
# HORIZONTAL
if y1 == y2:
if p3_x < 0:
p3_x = 0
if p4_x > w:
p4_x = w
return ((p3_x, p3_y), (p4_x, p4_y))
# VERTICAL
elif x1 == x2:
if p3_y < 0:
p3_y = 0
if p4_y > h:
p4_y = h
return ((p3_x, p3_y), (p4_x, p4_y))
# DIAGONAL
else:
A = (p3_x, p3_y)
B = (p4_x, p4_y)
C = (0, 0) # C-------D
D = (w, 0) # |-------|
E = (w, h) # |-------|
F = (0, h) # F-------E
if slope > 0:
# 1st point, try C-F side first, if OTB then F-E
new_x1, new_y1 = line_intersection((A, B), (C, F))
if new_x1 > w or new_y1 > h:
new_x1, new_y1 = line_intersection((A, B), (F, E))
# 2nd point, try C-D side first, if OTB then D-E
new_x2, new_y2 = line_intersection((A, B), (C, D))
if new_x2 > w or new_y2 > h:
new_x2, new_y2 = line_intersection((A, B), (D, E))
return ((new_x1, new_y1), (new_x2, new_y2))
elif slope < 0:
# 1st point, try C-F side first, if OTB then C-D
new_x1, new_y1 = line_intersection((A, B), (C, F))
if new_x1 < 0 or new_y1 < 0:
new_x1, new_y1 = line_intersection((A, B), (C, D))
# 2nd point, try F-E side first, if OTB then E-D
new_x2, new_y2 = line_intersection((A, B), (F, E))
if new_x2 > w or new_y2 > h:
new_x2, new_y2 = line_intersection((A, B), (E, D))
return ((new_x1, new_y1), (new_x2, new_y2))
# get the y=mx+c equation from given points
def get_slope_and_intercept(pointA, pointB):
slope = (pointB[1] - pointA[1])/(pointB[0] - pointA[0])
intercept = pointB[1] - slope * pointB[0]
return slope, intercept
def sample_contour_points(contour, shortest_edge=1):
"""
Sample points from a contour's edges with density proportional to edge length.
Args:
contour: OpenCV contour (numpy array of points)
shortest_edge: Reference edge length for sampling density
Returns:
List of sampled points [[x1,y1], [x2,y2], ...]
"""
# Safety check: validate inputs
if len(contour) < 2:
print(f"[Warning] Contour has only {len(contour)} points, returning as-is")
return contour.reshape(-1, 2).tolist()
if shortest_edge <= 0 or np.isinf(shortest_edge) or np.isnan(shortest_edge):
print(f"[Warning] Invalid shortest_edge value: {shortest_edge}, using default")
shortest_edge = 10.0 # Fallback default
points = contour.reshape(-1, 2)
sampled_points = []
# Iterate through each edge of the contour
for i in range(len(points)):
# Get current and next point (handle wrap-around)
p1 = points[i]
p2 = points[(i + 1) % len(points)]
# Calculate edge length
edge_length = np.linalg.norm(p2 - p1)
# Skip zero-length edges
if edge_length < 1e-6:
continue
# Calculate number of points to sample for this edge
num_points = max(2, int(edge_length // (shortest_edge * 2)))
# Additional safety check
if num_points > 10000: # Prevent excessive sampling
num_points = 10000
# Sample points along the edge
t = np.linspace(0, 1, num_points)
for t_val in t:
# Linear interpolation between p1 and p2
x = p1[0] + t_val * (p2[0] - p1[0])
y = p1[1] + t_val * (p2[1] - p1[1])
sampled_points.append([x, y])
# If no points were sampled, return the original contour points
if len(sampled_points) == 0:
print("[Warning] No points sampled, returning original contour")
return points.tolist()
return sampled_points
# a simple algorithm to find the hash of slope and intercept
def get_unique_id(slope, intercept):
return str(slope)+str(intercept)
def exists_slope_intercept(slope, intercept, slope_intercepts, s_tol=1e-4, i_tol=1e-4):
for slope_intercept in slope_intercepts:
if math.isclose(slope, slope_intercept[0], abs_tol=s_tol) and math.isclose(intercept, slope_intercept[1], abs_tol=i_tol):
return True
return False
def fit_lines(all_points_with_index, image_cv2, line_fit_tol=1, inlier_threshold=0.1):
"""
Fit lines to points using RANSAC algorithm.
Args:
all_points: List of points [[x1,y1], [x2,y2], ...]
line_fit_tol: Tolerance for point-to-line distance
min_points_per_line: Minimum number of points required to form a line
ransac_iterations: Number of RANSAC iterations
inlier_threshold: Fraction of points that need to be inliers to consider a line valid
Returns:
List of lines, where each line is a list of points that fit that line
"""
min_points_per_line = 4
if len(all_points_with_index) < min_points_per_line:
return []
lines = []
all_points = [pt[0] for pt in all_points_with_index]
polygon_indices = [pt[1] for pt in all_points_with_index]
## convert all points to uv coordinates
remaining_points = np.array([[pt[0] / image_cv2.shape[1], pt[1] / image_cv2.shape[0]] for pt in all_points])
remaining_indices = np.array(polygon_indices)
p_success = 0.99
w = inlier_threshold # probability of selecting an inlier
sample_size = 2
k = int(np.ceil(np.log(1 - p_success) / np.log(1 - w**sample_size)))
ransac_iterations = 2*k # Use minimum between computed and provided iterations
while len(remaining_points) >= min_points_per_line:
best_line = None
best_inliers = None
best_inlier_count = 0
# RANSAC iterations
# Compute required number of RANSAC iterations based on:
# - probability of success (0.99)
# - inlier ratio (inlier_threshold)
# - number of points needed for model (2)
for _ in range(ransac_iterations):
# Get unique polygon indices
unique_polygons = np.unique(remaining_indices)
if len(unique_polygons) < 2:
continue
# Pick two different random polygons
poly1, poly2 = np.random.choice(unique_polygons, 2, replace=False)
# Get points from first polygon
points1 = remaining_points[remaining_indices == poly1]
if len(points1) == 0:
continue
p1 = points1[np.random.randint(len(points1))]
# Get points from second polygon
points2 = remaining_points[remaining_indices == poly2]
if len(points2) == 0:
continue
p2 = points2[np.random.randint(len(points2))]
# Skip if points are too close
if np.allclose(p1, p2):
continue
# Get line parameters (ax + by + c = 0)
line_vector = p2 - p1
line_vector = line_vector / np.linalg.norm(line_vector)
a, b = -line_vector[1], line_vector[0] # normal vector
c = -(a * p1[0] + b * p1[1])
# Calculate distances from all points to the line
distances = np.abs(a * remaining_points[:, 0] + b * remaining_points[:, 1] + c)
inliers = distances < line_fit_tol
inlier_count = np.sum(inliers)
if inlier_count > best_inlier_count:
best_line = (a, b, c)
best_inliers = inliers
best_inlier_count = inlier_count
# Check if we found a good line
if best_inlier_count >= min_points_per_line and best_inlier_count / len(all_points) >= inlier_threshold:
print(f"Found a good line with {best_inlier_count} inliers ({best_inlier_count/len(all_points)*100:.1f}%)")
# Add the line and its inliers to our results
line_points = remaining_points[best_inliers].tolist()
line_points = [[pt[0] * image_cv2.shape[1], pt[1] * image_cv2.shape[0]] for pt in line_points]
lines.append(line_points)
# Remove the inliers from remaining points
remaining_points = remaining_points[~best_inliers]
remaining_indices = remaining_indices[~best_inliers]
else:
# No good line found, stop
break
return lines
def merge_similar_points(points_with_index, polygon_areas, image, radius=0.00001):
if len(points_with_index) == 0:
return np.array([])
image_width = image.shape[1]
image_height = image.shape[0]
points = [pt[0] for pt in points_with_index]
polygon_indices = [pt[1] for pt in points_with_index]
## normalize the points
point_features = [[pt[0] / image_width, pt[1] / image_height] for pt in points]
neighbors = NearestNeighbors(radius=radius)
neighbors.fit(point_features)
distances, indices = neighbors.radius_neighbors(point_features)
print("Total point indices: ", len(indices))
print("Total point duplicates: ", len([i for i in indices if len(i) > 1]))
dup_indices = set()
unique_points = []
for i in range(len(points_with_index)):
if i in dup_indices:
continue
if len(indices[i]) > 1:
cluster_points = [points[c_i] for c_i in indices[i]]
cluster_polygon_areas = [polygon_areas[polygon_indices[c_i]] for c_i in indices[i]]
max_area_idx = np.argmax(cluster_polygon_areas)
polygon_index = polygon_indices[indices[i][max_area_idx]]
cluster_points = np.array(cluster_points)
cluster_center = np.mean(cluster_points, axis=0)
unique_points.append((cluster_center, polygon_index))
for c_i in indices[i]:
dup_indices.add(c_i)
else:
unique_points.append((points[i], polygon_indices[i]))
return unique_points
def merge_similar_lines(lines, image, radius=1):
if len(lines) == 0:
return np.array([])
image_width = image.shape[1]
image_height = image.shape[0]
line_features = []
for i, line in enumerate(lines):
## Fit a line through the points in 'line'
# [vx, vy, x, y] = cv2.fitLine(np.array(line), cv2.DIST_L2, 0, 0.01, 0.01)
# lefty = int((-x * vy / vx) + y)
# righty = int(((image.shape[1] - x) * vy / vx) + y)
p1, p2 = line_leftmost_to_rightmost(line)
## normalize the points
line_features.append([p1[0] / image_width, p1[1] / image_height, p2[0] / image_width, p2[1] / image_height])
neighbors = NearestNeighbors(algorithm='ball_tree', radius=radius, metric=line_segment_metric)
neighbors.fit(line_features)
distances, indices = neighbors.radius_neighbors(line_features)
print("Total line indices: ", len(indices))
print("Total line duplicates: ", len([i_line for i_line in indices if len(i_line) > 1]))
dup_indices = set()
unique_lines = []
for i in range(len(lines)):
if i in dup_indices:
continue
if len(indices[i]) > 1:
line_points = []
for c_i in indices[i]:
line_points.extend(lines[c_i])
dup_indices.add(c_i)
unique_lines.append(line_points)
else:
unique_lines.append(lines[i])
print("Total unique lines: ", len(unique_lines))
return unique_lines
def line_segment_metric(line1, line2):
p1, p2 = np.array([line1[0], line1[1]]), np.array((line1[2], line1[3]))
q1, q2 = np.array([line2[0], line2[1]]), np.array((line2[2], line2[3]))
dir1 = (p2 - p1) / np.linalg.norm(p2 - p1)
dir2 = (q2 - q1) / np.linalg.norm(q2 - q1)
## parallel
dot_product = np.dot(dir1, dir2)
## line segment distance (line points should have already been normalized)
distance = segments_distance(line1[0], line1[1], line1[2], line1[3], line2[0], line2[1], line2[2], line2[3])
return (1 - dot_product) + distance
## three functions below are taken from
## https://stackoverflow.com/questions/2824478/shortest-distance-between-two-line-segments
def segments_distance(x11, y11, x12, y12, x21, y21, x22, y22):
""" distance between two segments in the plane:
one segment is (x11, y11) to (x12, y12)
the other is (x21, y21) to (x22, y22)
"""
if segments_intersect(x11, y11, x12, y12, x21, y21, x22, y22):
return 0
# try each of the 4 vertices w/the other segment
distances = []
distances.append(point_segment_distance(x11, y11, x21, y21, x22, y22))
distances.append(point_segment_distance(x12, y12, x21, y21, x22, y22))
distances.append(point_segment_distance(x21, y21, x11, y11, x12, y12))
distances.append(point_segment_distance(x22, y22, x11, y11, x12, y12))
return min(distances)
def segments_intersect(x11, y11, x12, y12, x21, y21, x22, y22):
""" whether two segments in the plane intersect:
one segment is (x11, y11) to (x12, y12)
the other is (x21, y21) to (x22, y22)
"""
dx1 = x12 - x11
dy1 = y12 - y11
dx2 = x22 - x21
dy2 = y22 - y21
delta = dx2 * dy1 - dy2 * dx1
if delta == 0: return False # parallel segments
s = (dx1 * (y21 - y11) + dy1 * (x11 - x21)) / delta
t = (dx2 * (y11 - y21) + dy2 * (x21 - x11)) / (-delta)
return (0 <= s <= 1) and (0 <= t <= 1)
def point_segment_distance(px, py, x1, y1, x2, y2):
dx = x2 - x1
dy = y2 - y1
if dx == dy == 0: # the segment's just a point
return math.hypot(px - x1, py - y1)
# Calculate the t that minimizes the distance.
t = ((px - x1) * dx + (py - y1) * dy) / (dx * dx + dy * dy)
# See if this represents one of the segment's
# end points or a point in the middle.
if t < 0:
dx = px - x1
dy = py - y1
elif t > 1:
dx = px - x2
dy = py - y2
else:
near_x = x1 + t * dx
near_y = y1 + t * dy
dx = px - near_x
dy = py - near_y
return math.hypot(dx, dy)
def line_leftmost_to_rightmost(line):
"""_summary_
Args:
line (list): a list of points in the form of [[x1, y1], [x2, y2], ...]
"""
[vx, vy, x, y] = cv2.fitLine(np.array(line), cv2.DIST_L2, 0, 0.01, 0.01)
points = sorted(line, key=lambda point: (point[0], point[1]))
leftmost_x = points[0][0]
rightmost_x = points[-1][0]
slope = float(vy / vx + 0.00001)
intercept = float(points[0][1]) - slope * float(points[0][0])
leftmost_y = slope * leftmost_x + intercept
rightmost_y = slope * rightmost_x + intercept
return (leftmost_x, leftmost_y), (rightmost_x, rightmost_y)
def process_image_direct(image, detections, polygon_epsilon):
"""
Direct processing without server - ANNOTATION ONLY.
Detection and segmentation should be done by the caller.
Args:
image: PIL Image object
detections: List of DetectionResult objects (already detected and segmented)
polygon_epsilon: Epsilon for polygon approximation
Returns:
Dictionary with polygon_contours, composition_lines, and points
"""
import cv2
import time
t0 = time.time()
# Annotate
image_array = np.array(image)
visualization_parameters = {
"polygon_epsilon": polygon_epsilon * 1e-3,
"point_radius": 1e-2,
"line_fit_tol": 0.04,
"line_radius": 1e-1
}
annotated_image, polygon_contours_list, lines_list, points_to_draw = annotate(
image_array, detections, visualization_parameters
)
t_annotate = time.time()
# Save annotated image
cv2.imwrite("temp/krita_temp_detection_res.png", img=annotated_image)
# Timing summary
try:
print(f"[Timing] annotate={t_annotate - t0:.2f}s total={t_annotate - t0:.2f}s")
except Exception as e:
print(f"[Timing] error computing timings: {e}")
result = {
"ploygon_contours": polygon_contours_list,
"composition_lines": lines_list,
"points": points_to_draw
}
return result
def regenerate_lines_direct(points, polygon_contours):
"""
Regenerate composition lines from manually adjusted points
without calling the detection models again.
Args:
points: List of [x, y] coordinates
polygon_contours: List of polygon contours (each is a list of [x, y] coordinates)
Returns:
List of composition lines [[[x1, y1], [x2, y2]], ...]
"""
import time
if not points:
raise ValueError('Points are required')
if not polygon_contours:
raise ValueError('Polygon contours are required')
print(f"[Direct] Regenerating lines from {len(points)} manually adjusted points")
t0 = time.time()
# Assign points to polygons
points_with_index = assign_points_to_polygons(points, polygon_contours)
# Create a dummy image array for shape information
max_x = max(max(p[0] for p in contour) for contour in polygon_contours)
max_y = max(max(p[1] for p in contour) for contour in polygon_contours)
image_shape = (int(max_y) + 1, int(max_x) + 1, 3)
dummy_image = np.zeros(image_shape, dtype=np.uint8)
# Use the line fitting logic
line_fit_tol = 0.04
inlier_threshold = 0.05
lines = fit_lines(points_with_index, dummy_image, line_fit_tol=line_fit_tol, inlier_threshold=inlier_threshold)
t_generate = time.time()
print(f"[Direct] Generated {len(lines)} composition lines in {t_generate - t0:.2f}s")
# Convert lines to the same format as process_image
lines_list = []
for line in lines:
p1, p2 = line_leftmost_to_rightmost(line)
lines_list.append([[int(p1[0]), int(p1[1])], [int(p2[0]), int(p2[1])]])
return lines_list
def assign_points_to_polygons(points, polygon_contours):
"""
Assign each point to the polygon it's closest to.
Args:
points: List of [x, y] coordinates
polygon_contours: List of polygon contours (each is a list of [x, y] coordinates)
Returns:
List of tuples: [(point, polygon_index), ...]
"""
points_with_index = []
for point in points:
point_array = np.array(point, dtype=np.float32)
min_distance = float('inf')
closest_polygon_idx = 0
# Find the closest polygon to this point
for poly_idx, contour in enumerate(polygon_contours):
contour_array = np.array(contour, dtype=np.float32).reshape(-1, 2)
# Calculate distance to each point in the contour
distances = np.linalg.norm(contour_array - point_array, axis=1)
min_dist_to_contour = np.min(distances)
if min_dist_to_contour < min_distance:
min_distance = min_dist_to_contour
closest_polygon_idx = poly_idx
points_with_index.append((point, closest_polygon_idx))
print(f"[Direct] Assigned {len(points)} points to {len(polygon_contours)} polygons")
return points_with_index