Mothbox/AI/Mothbot/Mothbot_GenThumbnails.py
2025-08-07 13:59:46 -05:00

204 lines
6.4 KiB
Python

#!/usr/bin/env python3
import json
import os
INPUT_PATH = r'F:\Panama\PEA_PeaPorch_AdeptTurca_2024-09-01\2024-09-01'
from pathlib import Path
import cv2
import numpy as np
import re
def crop_rect(
img, rect, interpolation=cv2.INTER_LINEAR
): # cv2.INTER_LANCZOS4 cv2.INTER_LINEAR cv2.INTER_CUBIC
# get the parameter of the small rectangle
center, size, angle = rect[0], rect[1], rect[2]
center, size = tuple(map(int, center)), tuple(map(int, size))
# get row and col num in img
height, width = img.shape[0], img.shape[1]
# calculate the rotation matrix
M = cv2.getRotationMatrix2D(center, angle, 1)
# rotate the original image
img_rot = cv2.warpAffine(img, M, (width, height), flags=interpolation)
# now rotated rectangle becomes vertical, and we crop it
img_crop = cv2.getRectSubPix(img_rot, size, center)
return img_crop #, img_rot
#TODO - save patch_img width and height along with file path
def generateThumbnailPatches_JSON(image_path, json_data, patch_folder, skip_existing=True):
# Load the image using OpenCV
model_name = json_data.get("version")
if not model_name.startswith("Mothbot"):
model_name = "HumanDetection"
# Dictionary to store loaded images
loaded_images = {}
# Iterate through each shape in the JSON data
updated_shapes = []
for shape_index, shape in enumerate(json_data["shapes"]):
filename = os.path.basename(image_path)
patchfilename = f"{filename.split('.')[0]}_{shape_index}_{model_name}.{filename.split('.')[1]}"
patchfullpath = Path(patch_folder) / patchfilename
# Update the shape with the patch path
shape["patch_path"] = f"patches/{patchfilename}"
if os.path.exists(patchfullpath) and skip_existing: # Skip if the thumbnail already exists
print("Thumbnail exists, skipping")
else:
# Check if the image is already loaded
if image_path not in loaded_images:
loaded_images[image_path] = cv2.imread(image_path)
image = loaded_images[image_path]
# Extract the points from the shape
points = shape["points"]
# Convert the points to a NumPy array
points = np.array(points, dtype=np.float32)
# Calculate the minimum area rectangle
rect = cv2.minAreaRect(points)
# Send the cropped image and the shape index to the crop_rect function
img_crop = crop_rect(image, rect)
cv2.imwrite(
patchfullpath,
img_crop,
)
updated_shapes.append(shape)
json_data["shapes"] = updated_shapes
loaded_images.clear()
return json_data
def generateThumbnailPatches(img,thefilepath,rectangle,detnum, modelname):
filename=os.path.basename(thefilepath)
directory_path = os.path.dirname(thefilepath)
patch_folder_path=Path(directory_path+"/patches")
patch_folder_path.mkdir(parents=True, exist_ok=True)
patchfilename=filename.split('.')[0] + "_" + str(detnum) + "_" + modelname+"." +filename.split('.')[1]
patchfullpath = Path(patch_folder_path) / f'{patchfilename}'
# img_crop will the cropped rectangle
img_crop= crop_rect(img, rectangle)
cv2.imwrite(
patchfullpath,
img_crop,
)
patchpath= f"patches/{patchfilename}"
return patchpath
def scan_for_images(date_folder_path):
"""Scans subfolders for JPEG files and returns a list of file paths."""
jpeg_files = []
for date_folder in date_folders:
for filename in os.listdir(date_folder):
if filename.endswith(".jpg"):
jpeg_files.append(os.path.join(date_folder, filename))
return jpeg_files
def find_date_folders(directory):
"""
Recursively searches through a directory and its subdirectories for folders
with names in the YYYY-MM-DD format. If the input directory itself is a "date folder",
it will also be included in the results.
Args:
directory: The directory to search.
Returns:
A list of paths to the found folders.
"""
date_regex = r"^\d{4}-\d{2}-\d{2}$"
folders = []
# Check if the input directory itself is a "date folder"
if re.match(date_regex, os.path.basename(directory)):
folders.append(directory)
# Recursively search subdirectories for "date folders"
for root, dirs, files in os.walk(directory):
for dir_name in dirs:
if re.match(date_regex, dir_name):
folders.append(os.path.join(root, dir_name))
return folders
def process_images(img_files, date_folder):
"""
Args:
"""
# Get total number of JPEG files
total_img_files = len(img_files)
patch_folder_path=Path(date_folder+"/patches")
patch_folder_path.mkdir(parents=True, exist_ok=True)
for idx,filename in enumerate(img_files):
image_path = os.path.join(date_folder, filename)
json_path = os.path.join(date_folder, filename[:-4] + ".json")
# Calculate progress
processed_files = idx + 1
progress = (processed_files / total_img_files) * 100
# Print progress
print(f"({progress:.2f}%) Processing: {filename} ")
# **Check 0: Ensure the image file has more than 0 bytes**
if not os.path.isfile(image_path) or os.path.getsize(image_path) == 0:
print(f"Skipping {filename}: Image file is missing or empty.")
continue
# **Check 1: Check if JSON file exists and if it's an automated Mothbot file**
is_ground_truth_detection = False
if os.path.isfile(json_path):
is_ground_truth_detection=True
print(json_path)
print("Json exists for this image file")
try:
with open(json_path, 'r') as json_file:
json_data = json.load(json_file)
#print(json_data)
print("creating thumbnails for img+json pair")
generateThumbnailPatches_JSON(image_path, json_data, patch_folder_path,)
except json.JSONDecodeError:
print(f"{filename}: Corrupted JSON file.")
# This code will only run if this script is executed directly
if __name__ == "__main__":
date_folders = find_date_folders(INPUT_PATH)
for date_folder_path in date_folders:
print(date_folders)
images = scan_for_images(date_folder_path)
process_images(images, date_folder_path)