Source code for c4dynamics.states.lib.pixelpoint

import numpy as np
import sys 
sys.path.append('.')
from c4dynamics.states.state import state 


# from enum import Enum



[docs]
class pixelpoint(state):
  ''' 
    A data point in a video frame with a bounding box. 

    :class:`pixelpoint` is a data structure for managing 
    object detections in tracking missions. 
    It provides properties and methods to 
    conveniently interact with computer vision modules. 

    The `pixelpoint` state variables are 
    the object center and the bounding box: 
    
    .. math:: 

      X = [x, y, w, h]^T 

    - Center pixel, bounding box size. 



    
    **Arguments** 

    x : float or int 
        The x-coordinate of the center of the object.

    y : float or int 
        The y-coordinate of the center of the object.

    w : float or int
        The width of the bounding box.
    
    h : float or int
        The height of the bounding box.
    

    These variables use for streamlined operations through 
    the state vector :attr:`X <c4dynamics.states.state.state.X>`.

    Parameters define the object and are not part of the state. 
    For the `pixelpoint` class these are the frame size and the object classification. 


    Parameters
    ==========

    fsize : (int, int)
        Frame size in pixels. 

    class_id : str
        Class label associated with the object.

        
    **Construction** 
    

    A `pixelpoint` instance is typically created 
    when a new object is detected.  

    For a given detection `d` with the following indices: 

    - [0] : x-center, pixels
    - [1] : y-center, pixels
    - [2] : width, pixels
    - [3] : height, pixels
    - [4:end] : probabilities for each class of the list `class_names` 
    
    and for frames with dimensions `(f_width, f_height)`, the following snippet 
    constructs a `pixelpoint` and updates its properties. 
    
    Import packages: 

    .. code:: 

      >>> from c4dynamics import pixelpoint 
      >>> import numpy as np 


    Given a detection with arbitrary values: 
    
    .. code:: 

      >>> d = [50, 50, 15, 25, 0.8, 0.1, 0.0, 0.05, 0.89]
      >>> f_width, f_height = 100, 100
      >>> class_names = ['dog', 'cat', 'horse', 'fox']
      
    Initialize a `pixelpoint` instance with the input detection: 
    
    .. code:: 

      >>> pp = pixelpoint(x = d[0], y = d[1], w = d[2], h = d[3])
      >>> pp.fsize = (f_width, f_height)
      >>> pp.class_id = class_names[np.argmax(d[5:])]

        
    See Also
    ========
    .lib
    .state 

    Examples
    ========

    **Setup and Preliminaries** 


    Import required packages:

    .. code:: 

      >>> import cv2  # opencv-python 
      >>> import numpy as np 
      >>> import c4dynamics as c4d 
      >>> from matplotlib import pyplot as plt



    Fetch 'planes.png' and 'triangle.png' using the c4dynamics' 
    datasets module (see :mod:`c4dynamics.datasets`):         

    .. code::

      >>> tripath = c4d.datasets.image('triangle')
      Fetched successfully
      >>> planspath = c4d.datasets.image('planes')
      Fetched successfully



    Define two auxiliary functions. 
    The first, `tridetect()`, returns bounding boxes of the detected triangles:

    .. code::

      >>> def tridetect(img):
      ...   _, thresh = cv2.threshold(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 50, 255, 0)
      ...   contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
      ...   bbox = []
      ...   for contour in contours:
      ...     approx = cv2.approxPolyDP(contour, 0.01 * cv2.arcLength(contour, True), True)    
      ...     if len(approx) == 3:
      ...       bbox.append(cv2.boundingRect(contour))
      ...   return bbox


    The second function, `ptup`, converts a tuple of two numbers into a formatted string: 

    .. code::

      >>> def ptup(n): 
      ...   return '(' + str(n[0]) + ', ' + str(n[1]) + ')'




      
    **Construction from image** 

    .. code:: 

      >>> img = cv2.imread(tripath)
      >>> pp = c4d.pixelpoint(x = int(img.shape[1] / 2), y = int(img.shape[0] / 2), w = 100, h = 100)
      >>> pp.fsize = img.shape[:2]
      >>> pp.class_id = 'triangle' 


      
    **Construction from detection** 

    Given a frame with dimensions `(f_width, f_height)` and a detection `d` from an object detector: 

    .. code:: 

      >>> pp = c4d.pixelpoint(x = d[0], y = d[1], w = d[2], h = d[3])
      >>> pp.fsize = (f_width, f_height)
      >>> pp.class_id = class_names[np.argmax(d[5:])]




    **Triangles detection**

    Run a triangles detector and create a `pixelpoint` object 
    per each detected triangle. 
    Use :attr:`box <c4dynamics.states.lib.pixelpoint.pixelpoint.box>` 
    to draw bounding boxes:

    .. code:: 

      >>> img = cv2.imread(tripath)
      >>> triangles = tridetect(img)
    
    
    .. code:: 

      >>> print('{:^10} | {:^10} | {:^16} | {:^16} | {:^10} | {:^14}'.format('center x', 'center y', 'box top-left', 'box bottom-right', 'class', 'frame size')) # doctest: +IGNORE_OUTPUT 
      >>> # iterate over the detected triangles: 
      >>> for tri in triangles: # doctest: +IGNORE_OUTPUT 
      ...   pp = c4d.pixelpoint(x = int(tri[0] + tri[2] / 2), y = int(tri[1] + tri[3] / 2), w = tri[2], h = tri[3])
      ...   pp.fsize = img.shape[:2]
      ...   pp.class_id = 'triangle'
      ...   print('{:^10d} | {:^10d} | {:^16} | {:^16} | {:^10} | {:^14}'.format(pp.x, pp.y, ptup(pp.box[0]), ptup(pp.box[1]), pp.class_id, ptup(pp.fsize))) 
      ...   cv2.rectangle(img, pp.box[0], pp.box[1], [0, 255, 0], 2)
      center x  |  center y  |   box top-left   | box bottom-right |   class    |   frame size  
        399     |    274     |    (184, 117)    |    (614, 431)    |  triangle  |   (600, 800)
      
    .. code:: 

      >>> plt.figure() # doctest: +IGNORE_OUTPUT 
      >>> plt.axis(False) # doctest: +IGNORE_OUTPUT 
      >>> plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) # doctest: +IGNORE_OUTPUT 


    .. figure:: /_examples/pixelpoint/triangle.png

      





    **C4dynamics' YOLOv3 detector** 

    The method :meth:`detect <c4dynamics.detectors.yolo3_opencv.yolov3.detect>` 
    of the class :class:`yolov3 <c4dynamics.detectors.yolo3_opencv.yolov3>` 
    returns a list of `pixelpoint` for the detected objects in an image. 
    Print the output per a detected object and view the final image: 

    
    .. code:: 

      >>> img = cv2.imread(planspath)
      >>> yolo3 = c4d.detectors.yolov3()    
      Fetched successfully
      >>> pts = yolo3.detect(img)
    
    
    .. code:: 

      >>> # prepare for printing properties:  
      >>> print('{:^10} | {:^10} | {:^16} | {:^16} | {:^10} | {:^14}'.format('center x', 'center y', 'box top-left', 'box bottom-right', 'class', 'frame size'))  # doctest: +IGNORE_OUTPUT 
      >>> for p in pts:   # doctest: +IGNORE_OUTPUT 
      ...   print('{:^10d} | {:^10d} | {:^16} | {:^16} | {:^10} | {:^14}'.format(p.x, p.y, ptup(p.box[0]), ptup(p.box[1]), p.class_id, ptup(p.fsize)))
      ...   cv2.rectangle(img, p.box[0], p.box[1], [0, 255, 0], 2)
      ...   point = (int((p.box[0][0] + p.box[1][0]) / 2 - 75), p.box[1][1] + 22)
      ...   cv2.putText(img, p.class_id, point, cv2.FONT_HERSHEY_SIMPLEX, 1, [0, 255, 0], 2)
      center x  |  center y  |   box top-left   | box bottom-right |   class    |  frame size
        615     |    295     |    (562, 259)    |    (668, 331)    | aeroplane  |  (1280, 720)
        779     |    233     |    (720, 199)    |    (838, 267)    | aeroplane  |  (1280, 720)
        635     |    189     |    (578, 153)    |    (692, 225)    | aeroplane  |  (1280, 720)
        793     |    575     |    (742, 540)    |    (844, 610)    | aeroplane  |  (1280, 720)


    .. code:: 

      >>> plt.figure()  # doctest: +IGNORE_OUTPUT 
      >>> plt.axis(False)  # doctest: +IGNORE_OUTPUT 
      >>> plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))  # doctest: +IGNORE_OUTPUT 


    .. figure:: /_examples/pixelpoint/yolov3.png

  '''  
  x: float
  y: float
  w: float
  h: float

  def __init__(self, x = 0, y = 0, w = 0, h = 0):
      
 
    ppargs = {}
    ppargs.setdefault('x', x) 
    ppargs.setdefault('y', y) 
    ppargs.setdefault('w', w) 
    ppargs.setdefault('h', h) 
     

    # super().__init__(x = bbox[0], y = bbox[1], w = bbox[2], h = bbox[3])
    super().__init__(**ppargs)
    # NOTE what's the ultimate state? with velocities or not? 
    #       they can be added later. 
    # but if in anyway they are added, why not to introduce them here?
    #       

    # self._units = 'pixels'


    # NOTE still not sure whats the best idea to init of these properties: 
    # on the one hand they are not part of he state and shoudlnt be provdied as variables
    # on the other they are necessary for methods like box. 
    # also the filters inherit from the state class i.e. they are states 
    # nontheless they are initialized not only with the state vairables but also with parameters. 
    self._frameheight = None
    self._framewidth = None
    self._class = None 

    # - x (float): X-coordinate of the center of the bounding box in relative coordinates.
    # - y (float): Y-coordinate of the center of the bounding box in relative coordinates.


    # self._boxwidth = bbox[2]
    # ''' float; Width of the bounding box in a normalized format 
    # (0 = left image edge, 1 = right image edge. '''

    # self._boxheight = bbox[3]
    # ''' float; Height of the bounding box in a normalized format
    # (0 = upper image edge, 1 = bottom image edge. '''


    # self._framewidth = framesize[0]
    # ''' int; Width of the frame in pixels. '''

    # self._frameheight = framesize[1]
    # ''' int; Height of the frame in pixels. '''


    # self.class_id = class_id 
    # ''' string; Class label associated with the data point. '''
    


  # @property
  # def X(self): 
  #   return super().X.astype(np.int64)
  

  # parameters 

  @property
  def fsize(self):
    '''

      Gets and sets the frame size. 

      Parameters
      ----------
      fsize : tuple 
          Size of the frame in pixels (width, height).
          - (width)  int : Frame width in pixels. 
          - (height) int : Frame height in pixels. 


      Returns
      -------
      out : tuple 
          A tuple of the frame size in pixels (width, height). 
          

      Raises
      ------
      ValueError
          If `fsize` doesn't have exactly two elements, a ValueError is raised.

      Examples
      --------
      For detailed usage, 
      see the examples in the introduction to the :class:`pixelpoint` class.
    '''

    # TODO any way add even line example to show how to use given the objects defined.  
    # for all the other methods too. 
    return (self._framewidth, self._frameheight)

  @fsize.setter
  def fsize(self, fsize):
    if len(fsize) != 2:
      raise ValueError('fsize must have exactly two elements.')

    self._framewidth  = fsize[0]
    self._frameheight = fsize[1]
    
  
  @property  
  def class_id(self):
    '''
      Gets and sets the object classification. 

      
      Parameters
      ----------
      class_id : str 
          Class label associated with the data point.

          
      Returns 
      -------
      out : str 
          The current class label associated with the data point.


      Raises
      ------
      ValueError
          If `class_id` is not str, a ValueError is raised.
    
      
      Examples
      --------
      For detailed usage, 
      see the examples in the introduction to the :class:`pixelpoint` class.

    ''' 
    return self._class 

  @class_id.setter 
  def class_id(self, class_id):
    if not isinstance(class_id, str):
      raise TypeError('class_id must be an str object')

    self._class = class_id 
  


  # properties 

  @property
  def box(self):
    '''
      Returns the box coordinates.
      
      The box coordinates are given by: 
      `[(x top left, y top left), (x bottom right, y bottom right)]` 

      Returns
      -------
      out : list[tuple] 
          List containing two tuples representing 
          top-left and bottom-right coordinates (in pixels).

      Examples
      --------
      For detailed usage, 
      see the examples in the introduction to the :class:`pixelpoint` class.
      
    ''' 
    # .. include:: ../../states.lib.pixelpoint.examples.rst
        
    # if self._units == 'normalized':
    #   if self._frameheight is None or self._framewidth is None: 
    #     raise ValueError('When ''pixelpoint'' units are ''normalized'', the property ''fsize'' '
    #                         'must be set first with the frame width and height.')

    #   x = int(self.x * self._framewidth)
    #   y = int(self.y * self._frameheight)

    #   w = self.w * self._framewidth
    #   h = self.h * self._frameheight

    # else: # units = pixels. 

    x = self.x
    y = self.y

    w = self.w  
    h = self.h      


    # top left
    xtl = int(x - w / 2)
    ytl = int(y - h / 2)

    # bottom right 
    xbr = int(x + w / 2)
    ybr = int(y + h / 2)

    return [(xtl, ytl), (xbr, ybr)]



  @property
  def Xpixels(self): 
    '''
      Returns the state vector in pixel coordinates.  

      When pixelpoint.units are set to `normalized`, the method `Xpixels` 
      is used to return the state vector in pixels. 

      Returns
      -------
      out : numpy.int32
        A numpy array of the normalized coordinates :math:`[x, y, v_x, v_y]` transformed
        to pixel coordinates considering the specific dimensions of the image. 
          
          
      Examples
      --------

      Import required packages: 
      
      .. code:: 

        >>> import c4dynamics as c4d
        >>> import cv2 


      Settings and initialization: 

      .. code:: 
 
        >>> imgpath = c4d.datasets.image('planes')
        Fetched successfully
        >>> img = cv2.imread(imgpath)
        >>> yolo3 = c4d.detectors.yolov3()
        Fetched successfully
        >>> pts = yolo3.detect(img)
        
      Main loop: 

      .. code:: 

        >>> print('{:^10} | {:^12} | {:^12} | {:^12} | {:^12}'.format('# object', 'X normalized', 'Y normalized', 'X pixels', 'Y pixels'))  # doctest: +IGNORE_OUTPUT
        >>> for i, p in enumerate(pts):  # doctest: +IGNORE_OUTPUT 
        ...     X = p.Xpixels
        ...     print('{:^10d} | {:^12.3f} | {:^12.3f} | {:^12d} | {:^12d}'.format(i, p.x, p.y, X[0], X[1]))
        # object | X normalized | Y normalized |   X pixels   |   Y pixels  
          0      |    0.427     |    0.339     |     503      |     232     
          1      |    0.411     |    0.491     |     484      |     336     
          2      |    0.550     |    0.397     |     648      |     272     
          3      |    0.507     |    0.916     |     598      |     627     

    '''
    # TODO complete with full state vector. 

    # superx = super().X
    return np.array([self.x * self._framewidth        # x   # type: ignore 
                      , self.y * self._frameheight      # y   # type: ignore 
                        , self.w * self._framewidth       # w   # type: ignore 
                          , self.h * self._frameheight]      # h  # type: ignore   
                            , dtype = np.int32)
  

  
  @staticmethod
  def boxcenter(box):
    # XXX seems like useless function and indeed is not in use anywhere. 
    '''
    
      Calculates the center coordinates of bounding boxes.

      Given a list of bounding boxes, this static method computes the center
      coordinates for each box.



      Parameters
      ----------
      out : list[box] 
        List containing one pixelpoint.box or more. where  
        every pixelpoint.box has two tuples 
        representing top-left and bottom-right coordinates.

      Returns
      -------
      out : numpy.ndarray
          An array containing center coordinates for each bounding box in the
          format [[center_x1, center_y1], [center_x2, center_y2], ...].

    '''

    return np.array([[(b[0] + b[2]) / 2, (b[1] + b[3]) / 2] for b in box]) 


  @staticmethod
  def video_detections(vidpath, tf = None, storepath = False):
    import c4dynamics as c4d 
    import pickle 
    import zlib 
    import cv2 
    import os 

    cap = cv2.VideoCapture(vidpath)
    fps = cap.get(cv2.CAP_PROP_FPS)
    dt = 1 / fps # 1 / frame per second = the length of a single frame
    if tf is None: 
      N = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # total frames count 
      tf = N * dt 
    f_frames = int(tf / dt)

    yolo3 = c4d.detectors.yolov3()
    yolo3.nms_th = .45

    detections = {}

    for _ in range(f_frames + 1):
      print(_)
      ret, frame = cap.read()
      if not ret: break
      crc32 = zlib.crc32(frame.tobytes())
      detections[crc32] = yolo3.detect(frame)
      

    if storepath:
      pklname = os.path.join(storepath, os.path.basename(vidpath)[:-4] + '.pkl') # type: ignore 
      with open(pklname, 'wb') as file:
        pickle.dump(detections, file)
      print(f'detections stored at {pklname}')

        
    cap.release()

    return detections





  # def set_box_size(self, width, height):
  #   # TODO document! 
  #   '''
  #   Sets the box size (box width, box height) 
  #   without changing the center. 


  #   Parameters
  #   ----------
  #   b : tuple(width, height)
  #     A tuple containing two integers representing width and height (in pixels).

      
  #   Note
  #   ----
  #   This function sets the box width and height without
  #   chaning the box center. 
  #   The center of the box is modified only by 
  #   direct substitution to the state variables 
  #   or by setting the state vector (:attr:`X <datapoint.X>`). 

      
        
  #   Examples
  #   --------

  #   .. code:: 

  #       >>> width  = 800
  #       >>> height = 600
  #       >>> radius = 50
  #       >>> img = np.zeros((height, width, 3), dtype = np.uint8)
  #       >>> cv2.circle(img, (width // 2, height // 2), radius, (255, 0, 0), -1)
  #       >>> fdp = c4d.pixelpoint(bbox = (0, 0, 0, 0), class_id = 'ball', framesize = (width, height))
  #       >>> fdp.x = 0.5 
  #       >>> fdp.y = 0.5 
  #       >>> fdp.set_box_size(2 * radius + 2, 2 * radius + 2)
  #       >>> cv2.rectangle(img, fdp.box[0], fdp.box[1], [255, 255, 255], 2)
  #       >>> _, ax3 = plt.subplots()
  #       >>> ax3.axis('off')
  #       >>> ax3.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

  #   .. figure:: /_architecture/images/fdp_setboxsize.png
    


  #   '''

  #   # self._boxwidth  = width  / self._framewidth
  #   # self._boxheight = height / self._frameheight










  # class ppunits(Enum):
  #   pixels = 'pixels'
  #   normalized = 'normalized'


  # @property  
  # def units(self):
  #   '''
  #   Gets and sets the image coordinates units. 

  #   Select between two modes of units: 
  #   - pixels (default): the image coordinates range between `[0 : width - 1]` horizontally and `[0 : height - 1]` vertically. 
  #   - normalized: the image coordinates range between `[0 : 1]` in both axes where `0` represents the top and the left edges and `1` the opposite.  

  #   Note 
  #   ---- 
  #   Setting `units` as 'normalized' must be preceded by setting the frame size by the `fsize` property.


  #   Parameters
  #   ----------
  #   units : str
  #       Required units. 
  #       - 'pixels' (default) : image coordinates are `[0 : width - 1]` in the horizontal plane and `[0 : height - 1]` vertical plane.
  #       - 'normalized' : coordinates are `[0 : 1]` in the horizontal plane and `[0 : 1]` vertical plane. `0` represents the top and the left edges. 

  #   Returns
  #   -------
  #   out : str
  #       Current coordinates units. 


  #   Raises
  #   ------
  #   ValueError
  #       - If `units` is not in 'pixels' or 'normalized', a ValueError is raised.
  #       - If `units` is 'normalized' and the `fsize` property is not set, a ValueError is raised.
            
  #   Example
  #   -------    
    
  #   '''
  #   return self._units 

  # @units.setter 
  # def units(self, units):
  #   if not units in pixelpoint.ppunits:
  #     raise ValueError(f'Invalid units. Choose from {[value for option in pixelpoint.ppunits]}')
  #   if units == 'normalized' and self.fsize is None: 
  #     raise ValueError(f'`fsize` property must be set before `units = ''normalized''` is selected')
    
  #   # if self._units == 'normalized' and (self._frameheight is None or self._framewidth is None): 
  #   #   raise ValueError('When pixelpoint units are ''normalized'', the property ''fsize'' '
  #   #                         'must be set first with the frame width and height.')

  #   #
  #   # currently leaving it because it's too complicated to track after the updates
  #   # of the state and verify it's normalized. it probably invloves overriding X which 
  #   # i dont want to do right now.  
  #   ## 
    
  #   self._units = units 
  
  
  # Note
  # ----
  # The pixelpoint has two modes to represent the state coordinates 
  # (:attr:`X <c4dynamics.states.state.state.X>`); pixels (default) and normalized, 
  # controlled by the property 
  # (:attr:`units <c4dynamics.states.lib.pixelpoint.pixelpoint.units>`). 
  # In the `pixels` mode, the coordinates are directly represented by the pixel dimensions. 
  # The `normalized` mode represents the image by normalized coordinates, 
  # ranging from `0` to `1`, where `0` represents 
  # the left or the upper edge, and `1` represents the right or the bottom edge. 


if __name__ == "__main__":

  import doctest, contextlib, os
  from c4dynamics import IgnoreOutputChecker, cprint
  
  # Register the custom OutputChecker
  doctest.OutputChecker = IgnoreOutputChecker

  tofile = False 
  optionflags = doctest.FAIL_FAST

  if tofile: 
    with open(os.path.join('tests', '_out', 'output.txt'), 'w') as f:
      with contextlib.redirect_stdout(f), contextlib.redirect_stderr(f):
        result = doctest.testmod(optionflags = optionflags) 
  else: 
    result = doctest.testmod(optionflags = optionflags)

  if result.failed == 0:
    cprint(os.path.basename(__file__) + ": all tests passed!", 'g')
  else:
    print(f"{result.failed}")