Source code for c4dynamics.detectors.yolo3_opencv

import os, sys 
import cv2

import numpy as np
sys.path.append('.')
from c4dynamics import c4d 
from c4dynamics import pixelpoint 
from typing import Optional

MODEL_SIZE = (416, 416, 3)


[docs] class yolov3: ''' YOLO: Real-Time Object Detection :class:`yolov3` is a YOLOv3 (You Only Look Once) object detection model. Though it is no longer the most accurate object detection algorithm, YOLOv3 is still a very good choice when you need real-time detection while maintaining excellent accuracy. YOLOv3 processes an entire image in a single forward pass, making it efficient for dynamic scenes. Its key strength lies the ability to simultaneously predict bounding box coordinates and class probabilities for multiple objects within an image. Parameters ========== weights_path : str, optional Path to the YOLOv3 weights file. Defaults None. See Also ======== .filters .pixelpoint **Classes** Using YOLOv3 means object detection capability with the 80 pre-trained classes that come with the COCO dataset. The following 80 classes are available using COCO's pre-trained weights: .. admonition:: COCO dataset person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, traffic light, fire hydrant, stop sign, parking meter, bench, bird, cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, backpack, umbrella, handbag, tie, suitcase, frisbee, skis,snowboard, sports ball, kite, baseball bat, baseball glove, skateboard, surfboard, tennis racket, bottle, wine glass, cup, fork, knife, spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, hot dog, pizza, donut, cake, chair, couch, potted plant, bed, dining table, toilet, tv, laptop, mouse, remote, keyboard, cell phone, microwave, oven, toaster, sink, refrigerator, book, clock, vase, scissors, teddy bear, hair drier, toothbrush .. figure:: /_architecture/yolo-object-detection.jpg *Figure 1*: Object Detection with YOLO using COCO pre-trained classes 'dog', 'bicycle', 'truck'. Read more at: `darknet-yolo <https://pjreddie.com/darknet/yolo/>`_ . **Implementation (c4dynamics)** The :class:`yolov3` class abstracts the complexities of model initialization, input preprocessing, and output parsing. The :meth:`detect` method returns a :class:`pixelpoint <c4dynamics.states.lib.pixelpoint.pixelpoint>` for each detected object. The `pixelpoint` is a :mod:`predefined state class <c4dynamics.states.lib>` representing a data point in a video frame with an associated bounding box. Its methods and properties enhance the YOLOv3 output structure, providing a convenient data structure for handling tracking missions. **Installation** C4dynamics downloads the YOLOv3' weights file once at first call to :class:`yolov3` and saves the cache. For further details see :mod:`datasets <c4dynamics.datasets>`. Alternatively, the user can provide a path to his own weights file using the parameter `weights_path`. **Construction** A YOLOv3 detector instance is created by making a direct call to the `yolov3` constructor: .. code:: >>> from c4dynamics.detectors import yolov3 >>> yolo3 = yolov3() Fetched successfully Initialization of the instance does not require any mandatory parameters. Example ======= The following snippet initializes the YOLOv3 model and runs the `detect()` method on an image containing four airplanes. The example uses the `datasets` module from `c4dynamics` to fetch an image. For further details, see :mod:`c4dynamics.datasets`. Import required packages: .. code:: >>> import cv2 >>> import c4dynamics as c4d >>> from matplotlib import pyplot as plt Load YOLOv3 detector: .. code:: >>> yolo3 = c4d.detectors.yolov3() Fetched successfully Fetch and read the image: .. code:: >>> imagepath = c4d.datasets.image('planes') Fetched successfully >>> img = cv2.imread(imagepath) Run YOLOv3 detector on an image: .. code:: >>> pts = yolo3.detect(img) Now `pts` consists of :class:`pixelpoint <c4dynamics.states.lib.pixelpoint.pixelpoint>` instances for each object detected in the frame. Let's use the properties and methods of the `pixelpoint` class to view the attributes of the detected objects: .. code:: >>> def ptup(n): return '(' + str(n[0]) + ', ' + str(n[1]) + ')' >>> print('{:^10} | {:^10} | {:^16} | {:^16} | {:^10} | {:^14}'.format('center x', 'center y', 'box top-left', 'box bottom-right', 'class', 'frame size')) # doctest: +IGNORE_OUTPUT >>> for p in pts: ... print('{:^10d} | {:^10d} | {:^16} | {:^16} | {:^10} | {:^14}'.format(p.x, p.y, ptup(p.box[0]), ptup(p.box[1]), p.class_id, ptup(p.fsize))) # doctest: +IGNORE_OUTPUT ... cv2.rectangle(img, p.box[0], p.box[1], [0, 0, 0], 2) # +IGNORE_OUTPUT ... point = (int((p.box[0][0] + p.box[1][0]) / 2 - 75), p.box[1][1] + 22) # doctest: +IGNORE_OUTPUT ... cv2.putText(img, p.class_id, point, cv2.FONT_HERSHEY_SIMPLEX, 1, [0, 0, 0], 2) # doctest: +IGNORE_OUTPUT center x | center y | box top-left | box bottom-right | class | frame size 615 | 295 | (562, 259) | (668, 331) | aeroplane | (1280, 720) 779 | 233 | (720, 199) | (838, 267) | aeroplane | (1280, 720) 635 | 189 | (578, 153) | (692, 225) | aeroplane | (1280, 720) 793 | 575 | (742, 540) | (844, 610) | aeroplane | (1280, 720) .. code:: >>> plt.figure() # doctest: +IGNORE_OUTPUT >>> plt.axis(False) # doctest: +IGNORE_OUTPUT >>> plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # doctest: +IGNORE_OUTPUT .. figure:: /_examples/yolov3/intro.png ''' class_names = ( 'person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', 'traffic', 'light', 'fire', 'hydrant', 'stop', 'sign', 'parking', 'meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports', 'ball', 'kite', 'baseball', 'bat', 'baseball', 'glove', 'skateboard', 'surfboard', 'tennis', 'racket', 'bottle', 'wine', 'glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot', 'dog', 'pizza', 'donut', 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', 'cell', 'phone', 'microwave') _nms_th = 0.5 _confidence_th = 0.5 def __init__(self, weights_path: Optional[str] = None) -> None: errormsg = '' if weights_path is None: weights_path = c4d.datasets.nn_model('YOLOv3') errormsg = "Try to clear the cache by 'c4dynamics.datasets.clear_cache()'" if not os.path.exists(weights_path): raise FileNotFoundError(f"The file 'yolov3.weights' does not " f"exist in: '{weights_path}'. {errormsg}") cfg_path = os.path.join(os.path.dirname(__file__), 'yolov3.cfg') # cfg_path = 'yolov3.cfg' # coconames = os.path.join(yolodir, 'coco.names') self.net = cv2.dnn.readNetFromDarknet(cfg_path, weights_path) self.net.setPreferableBackend(cv2.dnn.DNN_BACKEND_OPENCV) ln = self.net.getLayerNames() self.ln = [ln[i - 1] for i in self.net.getUnconnectedOutLayers()] # with open(coconames, 'r') as f: # self.class_names = f.read().strip().split('\n') # self.__dict__.update(kwargs) @property def nms_th(self) -> float: ''' Gets and sets the Non-Maximum Suppression (NMS) threshold. Objects with confidence scores below this threshold are suppressed. Parameters ---------- nms_th : float The new threshold value for NMS during object detection. Defaults: `nms_th = 0.5`. Returns ------- nms_th : float The threshold value used for NMS during object detection. Objects with confidence scores below this threshold are suppressed. Example ------- Import required packages: .. code:: >>> import c4dynamics as c4d >>> from matplotlib import pyplot as plt >>> import cv2 Fetch 'planes.png' using the c4dynamics' datasets module (see :mod:`c4dynamics.datasets`): .. code:: >>> impath = c4d.datasets.image('planes') Fetched successfully Load YOLOv3 detector and set 3 NMS threshold values to compare: .. code:: >>> yolo3 = c4d.detectors.yolov3() Fetched successfully >>> nms_thresholds = [0.1, 0.5, 0.9] Run the detector on each threshold: .. code:: >>> _, axs = plt.subplots(1, 3) >>> for i, nms_threshold in enumerate(nms_thresholds): ... yolo3.nms_th = nms_threshold ... img = cv2.imread(impath) ... pts = yolo3.detect(img) ... for p in pts: ... cv2.rectangle(img, p.box[0], p.box[1], [0, 255, 0], 2) # doctest: +IGNORE_OUTPUT ... axs[i].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # doctest: +IGNORE_OUTPUT ... axs[i].set_title(f"NMS Threshold: {nms_threshold}", fontsize = 6) ... axs[i].axis('off') # doctest: +IGNORE_OUTPUT .. figure:: /_examples/yolov3/nms_th.png A high value (0.9) for the Non-Maximum Suppression (NMS) threshold here leads to an increased number of bounding boxes around a single object. When the NMS threshold is high, it means that a significant overlap is required for two bounding boxes to be considered redundant, and one of them will be suppressed. To address this issue, it's essential to choose an appropriate NMS threshold based on the characteristics of your dataset and the level of overlap between objects. A lower NMS threshold (e.g., 0.4 or 0.5) is commonly used to suppress redundant boxes effectively while retaining accurate detections. Experimenting with different threshold values and observing their impact on the results is crucial for optimizing the performance of object detection models. ''' return self._nms_th @nms_th.setter def nms_th(self, val: float) -> None: self._nms_th = val @property def confidence_th(self) -> float: ''' Gets and sets the confidence threshold used in the object detection. Detected objects with confidence scores below this threshold are filtered out. Parameters ---------- confidence_th : float The new confidence threshold for object detection. Defaults: `confidence_th = 0.5`. Returns ------- confidence_th : float The confidence threshold for object detection. Detected objects with confidence scores below this threshold are filtered out. Example ------- Import required packages: .. code:: >>> import c4dynamics as c4d >>> from matplotlib import pyplot as plt >>> import cv2 Fetch 'planes.png' using the c4dynamics' datasets module (see :mod:`c4dynamics.datasets`): .. code:: >>> impath = c4d.datasets.image('planes') Fetched successfully Load YOLOv3 detector and set 3 confidence threshold values to compare: .. code:: >>> yolo3 = c4d.detectors.yolov3() Fetched successfully >>> confidence_thresholds = [0.9, 0.95, 0.99] Run the detector on each threshold: .. code:: >>> _, axs = plt.subplots(1, 3) >>> for i, confidence_threshold in enumerate(confidence_thresholds): ... yolo3.confidence_th = confidence_threshold ... img = cv2.imread(impath) ... pts = yolo3.detect(img) ... for p in pts: ... cv2.rectangle(img, p.box[0], p.box[1], [0, 255, 0], 2) # doctest: +IGNORE_OUTPUT ... axs[i].imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # doctest: +IGNORE_OUTPUT ... axs[i].set_title(f"Confidence Threshold: {confidence_threshold}", fontsize = 6) ... axs[i].axis('off') # doctest: +IGNORE_OUTPUT .. figure:: /_examples/yolov3/confidence_th.png A single object being missed, particularly when setting the confidence threshold to 0.99, suggests that the model is highly confident in its predictions. This level of performance is typically achievable when the model has been trained on a diverse and representative dataset, encompassing a wide variety of object instances, backgrounds, and conditions. ''' return self._confidence_th @confidence_th.setter def confidence_th(self, val: float) -> None: self._confidence_th = val
[docs] def detect(self, frame: np.ndarray) -> list[pixelpoint]: ''' Detects objects in a frame using the YOLOv3 model. At each call, the detector performs the following steps: 1. Preprocesses the frame by creating a blob, normalizing pixel values, and swapping Red and Blue channels. 2. Sets input to the YOLOv3 model and performs a forward pass to obtain detections. 3. Extracts detected objects based on a confidence threshold, calculates bounding box coordinates, and filters results using Non-Maximum Suppression (NMS). Parameters ---------- frame : numpy.array An input frame for object detection. Returns ------- out : list[pixelpoint] A list of :class:`pixelpoint <c4dynamics.states.pixelpoint.pixelpoint>` objects representing detected objects, each containing bounding box coordinates and class label. Examples -------- **Setup** Import required packages: .. code:: >>> import cv2 # opencv-python >>> import c4dynamics as c4d >>> from matplotlib import pyplot as plt Fetch 'planes.png' and 'aerobatics.mp4' using the c4dynamics' datasets module (see :mod:`c4dynamics.datasets`): .. code:: >>> impath = c4d.datasets.image('planes') Fetched successfully >>> vidpath = c4d.datasets.video('aerobatics') Fetched successfully Load YOLOv3 detector: .. code:: >>> yolo3 = c4d.detectors.yolov3() Fetched successfully Let the auxiliary function: .. code:: >>> def ptup(n): return '(' + str(n[0]) + ', ' + str(n[1]) + ')' **Object detection in a single frame** .. code:: >>> img = cv2.imread(impath) >>> pts = yolo3.detect(img) >>> for p in pts: ... cv2.rectangle(img, p.box[0], p.box[1], [0, 255, 0], 2) # doctest: +IGNORE_OUTPUT .. code:: >>> plt.figure() # doctest: +IGNORE_OUTPUT >>> plt.axis(False) # doctest: +IGNORE_OUTPUT >>> plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # doctest: +IGNORE_OUTPUT .. figure:: /_examples/yolov3/single_image.png **Object detection in a video** .. code:: >>> video_cap = cv2.VideoCapture(vidpath) >>> while video_cap.isOpened(): ... ret, frame = video_cap.read() ... if not ret: break ... pts = yolo3.detect(frame) ... for p in pts: ... cv2.rectangle(frame, p.box[0], p.box[1], [0, 255, 0], 2) # doctest: +IGNORE_OUTPUT ... cv2.imshow('YOLOv3', frame) # doctest: +IGNORE_OUTPUT ... cv2.waitKey(10) # doctest: +IGNORE_OUTPUT .. figure:: /_examples/yolov3/aerobatics.gif **The output structure** The output of the detect() function is a list of :class:`pixelpoint <c4dynamics.states.pixelpoint.pixelpoint>` object. The :class:`pixelpoint <c4dynamics.states.pixelpoint.pixelpoint>` has unique attributes to manipulate the detected object class and bounding box. .. code:: >>> print('{:^10} | {:^10} | {:^10} | {:^16} | {:^16} | {:^10} | {:^14}' # doctest: +IGNORE_OUTPUT ... .format('# object', 'center x', 'center y', 'box top-left', 'box bottom-right', 'class', 'frame size')) >>> # main loop: >>> for i, p in enumerate(pts): ... print('{:^10d} | {:^10.3f} | {:^10.3f} | {:^16} | {:^16} | {:^10} | {:^14}' ... .format(i, p.x, p.y, ptup(p.box[0]), ptup(p.box[1]), p.class_id, ptup(p.fsize))) ... cv2.rectangle(img, p.box[0], p.box[1], [0, 0, 0], 2) # doctest: +IGNORE_OUTPUT ... point = (int((p.box[0][0] + p.box[1][0]) / 2 - 75), p.box[1][1] + 22) ... cv2.putText(img, p.class_id, point, cv2.FONT_HERSHEY_SIMPLEX, 1, [0, 0, 0], 2) # doctest: +IGNORE_OUTPUT # object | center x | center y | box top-left | box bottom-right | class | frame size 0 | 0.584 | 0.376 | (691, 234) | (802, 306) | aeroplane | (1280, 720) 1 | 0.457 | 0.473 | (528, 305) | (642, 376) | aeroplane | (1280, 720) 2 | 0.471 | 0.322 | (542, 196) | (661, 267) | aeroplane | (1280, 720) 3 | 0.546 | 0.873 | (645, 588) | (752, 668) | aeroplane | (1280, 720) .. code:: >>> plt.figure() # doctest: +IGNORE_OUTPUT >>> plt.axis(False) # doctest: +IGNORE_OUTPUT >>> plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # doctest: +IGNORE_OUTPUT .. figure:: /_examples/yolov3/outformat.png ''' # # Step 1: Preprocess the Frame # - Create a blob (binary large object) from the input frame with the # specified dimensions # - Normalize pixel values to a range of 0 to 1 # - Specify the dimensions of the input layer of the YOLOv3 model # - Swap Red and Blue channels (BGR to RGB) # - Set crop to False to preserve the original aspect ratio ## blob = cv2.dnn.blobFromImage(frame, 1 / 255.0, (MODEL_SIZE[0], MODEL_SIZE[1]), swapRB = True, crop = False) # # Step 2: Set Input to the YOLOv3 Model and Perform Forward Pass # - Set the blob as the input to the YOLOv3 model # - Get the names of the output layers of the model # - Perform a forward pass through the model to obtain detections # # The returning detection structure: # 1) x_center # 2) y_center # 3) width # 4) height # 5) confidence score # 6:end) probabilities for each class ## self.net.setInput(blob) detections = self.net.forward(self.ln) # # Step 3: Extract Detected Objects # - Iterate through the detected objects in the forward pass results # - Filter objects based on confidence threshold # - Calculate bounding box coordinates and convert to integers # - Append bounding box coordinates and class labels to respective lists ## raw = [] # xc, yc, w, h boxes = [] # top left x, top left y, width, height classIDs = [] confidences = [] fheight, fwidth = frame.shape[:2] for detection in detections: for d in detection: scores = d[5:] classID = np.argmax(scores) confidence = scores[classID] if scores[classID] > self._confidence_th: # Adjust the confidence threshold as needed box = d[:4] * [fwidth, fheight, fwidth, fheight] # relative (xc, yc, w, h) to pixels # (center_x, center_y, width, height) = box.astype('int') x = box[0] - box[2] / 2 # top left x y = box[1] - box[3] / 2 # top left y boxes.append([x, y, box[2], box[3]]) # top left x, top left y, width, height confidences.append(float(confidence)) classIDs.append(classID) raw.append(d[:4]) indices = np.array(cv2.dnn.NMSBoxes(boxes, confidences, self._confidence_th, self._nms_th)) # box_out = [] # class_out = [] points_out = [] if len(indices) > 0: # for i in indices.flatten(): for i in indices.ravel(): # (x, y) = (boxes[i][0], boxes[i][1]) # (w, h) = (boxes[i][2], boxes[i][3]) # x top left, y top left, x bottom right, y bottom right # box_out.append([boxes[i][0], boxes[i][1], boxes[i][0] + boxes[i][2], boxes[i][1] + boxes[i][3]]) # points_out.append(pixelpoint(raw[i], self.class_names[classIDs[i]], (w, h))) pp = pixelpoint(x = int(raw[i][0] * fwidth), y = int(raw[i][1] * fheight), w = int(raw[i][2] * fwidth), h = int(raw[i][3] * fheight)) # pp.units = 'normalized' pp.fsize = (fwidth, fheight) pp.class_id = self.class_names[classIDs[i]] points_out.append(pp) # class_out.append(self.class_names[classIDs[i]]) # box_out = np.array(box_out) return points_out # box_out, class_out,
if __name__ == "__main__": import doctest, contextlib from c4dynamics import IgnoreOutputChecker, cprint # Register the custom OutputChecker doctest.OutputChecker = IgnoreOutputChecker tofile = False optionflags = doctest.FAIL_FAST if tofile: with open(os.path.join('tests', '_out', 'output.txt'), 'w') as f: with contextlib.redirect_stdout(f), contextlib.redirect_stderr(f): result = doctest.testmod(optionflags = optionflags) else: result = doctest.testmod(optionflags = optionflags) if result.failed == 0: cprint(os.path.basename(__file__) + ": all tests passed!", 'g') else: print(f"{result.failed}")