openpibo.vision_detect의 소스 코드

"""
영상처리, 인공지능 비전 기술을 사용합니다.

Class:
:meth:`~openpibo.vision_detect.putTextPIL`
:meth:`~openpibo.vision_detect.vision_api`
:obj:`~openpibo.vision_detect.Detect`
"""
import cv2,dlib,requests
import os,pickle,math
import numpy as np
from pyzbar import pyzbar
import mediapipe as mp
from mediapipe.tasks import python as mp_python
from mediapipe.tasks.python import vision as mp_vision
from ultralytics import YOLO
from .modules.pose.movenet import Movenet
from .modules.pose.utils import visualize_pose
from .modules.card.decode_card import get_card
from PIL import Image,ImageDraw,ImageFont
import openpibo_dlib_models
import openpibo_models
import openpibo_detect_models
import logging

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # TensorFlow C++ 로그 제거
os.environ['LIBCAMERA_LOG_LEVELS'] = '3'

# ✅ 추가: TensorFlow 내부 디버그 메시지 완전 차단
logging.getLogger("tensorflow").setLevel(logging.ERROR)
logging.getLogger("ultralytics").setLevel(logging.ERROR)


[문서]
def putTextPIL(img, text, points, size=30, colors=(255,255,255)):
  """
  이미지에 문자를 입력합니다. (한/영 가능 - pillow 이용)- COPY

  :param numpy.ndarray img: 이미지 객체
  :param str text: 표시할 문자열
  :param tuple(int, int) points: 텍스트 블록 좌측상단 좌표 (x, y)
  :param int size: 표시할 글자의 크기
  :param tuple(int, int, int) colors: 글자 색깔 RGB 값 (b, g, r) or 16진수 값 '#ffffff'
  """
  if not type(img) is np.ndarray:
    raise Exception('"img" must be image data from opencv')

  if type(points) is not tuple:
    raise Exception(f'"{points}" must be tuple type')

  if len(points) != 2:
    raise Exception(f'len({points}) must be 2')

  if type(colors) is str:
    colors = (int(colors[5:7], 16), int(colors[3:5], 16), int(colors[1:3], 16))

  if type(colors) is not tuple:
    raise Exception(f'"{colors}" must be tuple type')

  if len(colors) != 3:
    raise Exception(f'len({colors}) must be 3')

  font = ImageFont.truetype(openpibo_models.filepath("KDL.ttf"), size)
  pil = Image.fromarray(img)  # CV to PIL
  ImageDraw.Draw(pil).text(points, text, font=font, fill=colors)  # putText
  img[:] = np.array(pil)  # PIL to CV
  return img



[문서]
def vision_api(mode, image, params={}):
  """
  인공지능 비전 API를 호출합니다.

  :param str mode: 호출할 비전 API (https://o-vapi.circul.us/guide)
  :param str/numpy.ndarray image: 표시할 이미지 (파일 경로 or cv 이미지)
  :returns: ``Json`` 타입 결과의 데이터

  example::

    { 'type': 'caption', 'result': 'ok', 
      'data': {
        caption:  "사람에게 로봇을 과시하는 사람", 
        caption_en:  "a person showing off a robot to a person",
        raw:  [
          "a person showing off a robot to a person",
          "a robot that is sitting on top of a table",
          "a very cute white robot that is sitting in front of a table"
        ]
      }
    }

  """

  if type(image) is np.ndarray:
    return requests.post(f"https://o-vapi.circul.us/{mode}", files={'uploadFile':cv2.imencode('.jpg', image)[1].tobytes()}, params=params).json()
  else:
    return requests.post(f"https://o-vapi.circul.us/{mode}", files={'uploadFile':open(image, 'rb')}, params=params).json()




[문서]
class Detect:
  """
Functions:
:meth:`~openpibo.vision_detect.Detect.load_hand_gesture_model`
:meth:`~openpibo.vision_detect.Detect.detect_object`
:meth:`~openpibo.vision_detect.Detect.detect_qr`
:meth:`~openpibo.vision_detect.Detect.detect_pose`
:meth:`~openpibo.vision_detect.Detect.analyze_pose`
:meth:`~openpibo.vision_detect.Detect.classify_image`
:meth:`~openpibo.vision_detect.Detect.object_tracker_init`
:meth:`~openpibo.vision_detect.Detect.track_object`
:meth:`~openpibo.vision_detect.Detect.detect_marker`
:meth:`~openpibo.vision_detect.Detect.detect_marker_vis`

  인식과 관련된 다양한 기능을 사용할 수 있는 클래스입니다.

  * 90개 class 안에서의 객체 인식 (MobileNet V2)
  * QR/바코드 인식 (pyzbar)
  * Pose 인식
  * 이미지 분류

  example::

    from openpibo.vision_detect import Detect

    detect = Detect()
    # 아래의 모든 예제 이전에 위 코드를 먼저 사용합니다.
  """

  def __init__(self):
    #self.object_class = ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
    #                     'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
    #                     'None', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
    #                     'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'None',
    #                     'backpack', 'umbrella', 'None', 'None', 'handbag', 'tie', 'suitcase', 'frisbee',
    #                     'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
    #                     'skateboard', 'surfboard', 'tennis racket', 'bottle', 'None', 'wine glass', 'cup',
    #                     'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange',
    #                     'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
    #                     'potted plant', 'bed', 'None', 'dining table', 'None', 'None', 'toilet', 'None', 'tv',
    #                     'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
    #                     'toaster', 'sink', 'refrigerator', 'None', 'book', 'clock', 'vase', 'scissors',
    #                     'teddy bear', 'hair drier'] 

    #self.mobilenet = cv2.dnn.readNet(
    #                    openpibo_detect_models.filepath("frozen_inference_graph.pb"),
    #                    openpibo_detect_models.filepath("ssd_mobilenet_v2_coco_2018_03_29.pbtxt")
    #                )
    self.object_detector = YOLO("/home/pi/.model/object/yolo11s.onnx", task="detect")
    self.pose_detector = Movenet(openpibo_detect_models.filepath("movenet_lightning.tflite"))

    # marker
    self.camera_matrix = np.array([
      [1.42068235e+03,0.00000000e+00,9.49208512e+02],
      [0.00000000e+00,1.37416685e+03,5.39622051e+02],
      [0.00000000e+00,0.00000000e+00,1.00000000e+00]])
    self.distortion_coeff = np.array([1.69926613e-01,-7.40003491e-01,-7.45655262e-03,-1.79442353e-03, 2.46650225e+00])
    #self.dictionary = cv2.aruco.Dictionary_get(cv2.aruco.DICT_4X4_50)
    self.dictionary = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_50)
    self.parameters = cv2.aruco.DetectorParameters()
    self.tracker = None
    self.hand_gesture_recognizer =  None


[문서]
  def load_hand_gesture_model(self, modelpath='/home/pi/.model/hand/gesture_recognizer.task'):
    """
    손동작 인식 모델을 불러옵니다.

    :param str modelpath: 손동작 인식 모델 경로
    """
    #'/home/pi/.model/hand/gesture_recognizer.task', /home/pi/.model/hand/rps_recognizer.task'
    self.hand_gesture_recognizer =  mp_vision.GestureRecognizer.create_from_options(
      mp_vision.GestureRecognizerOptions(
        base_options=mp_python.BaseOptions(model_asset_path=modelpath),
        running_mode=mp_vision.RunningMode.IMAGE,
        num_hands= 2,
        min_hand_detection_confidence= 0.5,
        min_hand_presence_confidence= 0.5,
        min_tracking_confidence= 0.5,
      )
    )



[문서]
  def recognize_hand_gesture(self, image):
    """
    손동작을 인식합니다.

    :param numpy.ndarray img: 이미지 객체
    
    :returns: 모델에 따른 손동작 인식 결과
    """
    if self.hand_gesture_recognizer == None:
      raise Exception('"load_hand_gesture_model" must be called')
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_image)
    recognition_result = self.hand_gesture_recognizer.recognize(mp_image)

    hands_data = []  # 손 정보를 담을 리스트      
    if recognition_result and recognition_result.hand_landmarks:
      height, width, _ = image.shape
      for hand_index, hand_landmarks in enumerate(recognition_result.hand_landmarks):
        # 21개 랜드마크 픽셀 좌표 추출
        hpoints = []
        for landmark in hand_landmarks:
          px = int(landmark.x * width)
          py = int(landmark.y * height)
          hpoints.append((px, py))
        
        name = ""
        score = 0
        if recognition_result.gestures:
          gesture = recognition_result.gestures[hand_index]
          name = gesture[0].category_name
          score = round(gesture[0].score, 2)
        hands_data.append({"point": hpoints, "name": name, "score": score})

    return hands_data



[문서]
  def recognize_hand_gesture_vis(self, img, items):
    """
    손동작 인식 결과를 표시합니다.

    :param numpy.ndarray img: 이미지 객체
    :param array items: 손동작 인식 결과
    """
    for item in items:
      # hands 시각화
      hpoints = item["point"]
      name = item["name"]
      score = item["score"]

      # 21개 포인트 빨간색 원으로 그리기
      for px, py in hpoints:
        cv2.circle(img, (px, py), 3, (255, 255, 255), -1)

      # 첫 번째 랜드마크 근처에 손 라벨(Right or Left / Gesture) 표시
      putTextPIL(img, f'{name}/{score}', (hpoints[0][0], hpoints[0][1] - 50), 30, (255, 0, 0))



[문서]
  def load_object_model(self, modelpath='/home/pi/.model/object/yolo11s.onnx'):
    """
     인식 모델을 불러옵니다.

    :param str modelpath: 사물인식 모델 경로
    """

    del self.object_detector
    self.object_detector = YOLO(modelpath, task="detect")



[문서]
  def detect_object(self, img):
    """
    사물 인식 결과를 표시합니다.

    :param numpy.ndarray img: 이미지 객체
    :param array items: 사물 인식 결과
    """
    if not isinstance(img, np.ndarray):
      raise ValueError('"img" must be a valid OpenCV image (np.ndarray).')

    # Run inference. You can adjust conf=0.5, iou=0.4, imgsz=320 to mirror your old code
    results = self.object_detector.predict(img, conf=0.5, iou=0.4, imgsz=320, verbose=False, device='cpu')

    # YOLO returns a list of Results objects; we’ll just process the first
    data = []
    if len(results) > 0:
      # Each `results[0]` has .boxes attribute containing all detections
      for box in results[0].boxes:
        cls_id = int(box.cls[0])   # class index
        score = float(box.conf[0]) # confidence score
        # box.xyxy gives (x1, y1, x2, y2)
        x1, y1, x2, y2 = map(int, box.xyxy[0])
        if isinstance(self.object_detector.names, dict):
          obj_name = self.object_detector.names.get(cls_id, "Unknown")
        else:
          obj_name = self.object_detector.names[cls_id]

        # Only add detections above 50% confidence if you want to mirror your old filter
        if score >= 0.5:
          data.append({ "name": obj_name, "score": int(score * 100), "box": (x1, y1, x2, y2) })
    return data



[문서]
  def detect_object_vis(self, img, items):
    """
    사물 인식 결과를 표시합니다.

    :param numpy.ndarray img: 이미지 객체
    :param array items: 사물 인식 결과
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    for item in items:
      x1,y1,x2,y2 = item['box']
      name = item['name']
      cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2)
      putTextPIL(img, name, (x1, y1-30), 30, (0,255,0))



[문서]
  def detect_qr(self, img):
    """
    이미지 안의 QR코드 및 바코드를 인식합니다.

    example::

      img = camera.read()
      detect.detect_qr(img)

    :param numpy.ndarray img: 이미지 객체
    :returns: ``{"data": 내용, "type": 바코드 / QR코드, "position":(startX,startY,endX,endY)}``
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    results = []
    barcodes = pyzbar.decode(img)

    for barcode in barcodes:
      x,y,w,h = barcode.rect
      _type = barcode.type
      _data = barcode.data.decode("utf-8")

      res = get_card(_data)
      if res != None:
        _type, _data = "CARD", res
      results.append({"data":_data, "type":_type, "box":(x,y,x+w,y+h)})

    return results



[문서]
  def detect_qr_vis(self, img, items):
    """
    QR/바코드 결과를 표시합니다.

    :param numpy.ndarray img: 이미지 객체
    :param array items: QR 인식 결과
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    for item in items:
      x1,y1,x2,y2 = item['box']
      data = item['data']
      cv2.rectangle(img, (x1,y1), (x2,y2), (255,50,255), 2)
      putTextPIL(img, str(data), (x1, y1-30), 30, (255,50,255))



[문서]
  def detect_pose(self, img):
    """
    이미지 안의 Pose를 인식합니다.

    example::

      img = camera.read()
      detect.detect_pose(img)

    :param numpy.ndarray img: 이미지 객체

    :returns: ``인식한 결과``
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    list_persons = [self.pose_detector.detect(img)]
    return list_persons



[문서]
  def detect_pose_vis(self, img, items):
    """
    포즈를 표시 합니다.

    :param numpy.ndarray img: 이미지 객체
    :param array items: 포즈 데이터
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    visualize_pose(img, items)



[문서]
  def analyze_pose(self, data):
    """
    detect_pose 함수의 결과 값을 분석합니다.

    example::

      img = camera.read()
      result = detect.detect_pose(img)
      detect.analyze_pose(result)

    :param dict data: 이미지 객체

    :returns: ``인식한 포즈 리스트 ['left_hand_up', 'right_hand_up', 'clap']``
    """

    def distance(p1, p2):
      return math.sqrt((p1.x-p2.x)**2 + (p1.y-p2.y)**2)

    NOSE, LEFT_EYE, RIGHT_EYE, LEFT_EAR, RIGHT_EAR = 0,1,2,3,4
    LEFT_SHOULDER, RIGHT_SHOULDER, LEFT_ELBOW, RIGHT_ELBOW, LEFT_WRIST, RIGHT_WRIST = 5,6,7,8,9,10
    LEFT_HIP, RIGHT_HIP, LEFT_KNEE, RIGHT_KNEE, LEFT_ANKLE, RIGHT_ANKLE = 11,12,13,14,15,16

    res = []
    data = data[0].keypoints

    if data[LEFT_WRIST].coordinate.y < data[LEFT_ELBOW].coordinate.y:
      res.append("left_hand_up")
    if data[RIGHT_WRIST].coordinate.y < data[RIGHT_ELBOW].coordinate.y:
      res.append("right_hand_up")
    if distance(data[LEFT_WRIST].coordinate, data[RIGHT_WRIST].coordinate) <  75:
      res.append("clap")

    return res



[문서]
  def object_tracker_init(self, img, p):
    """
    이미지 안의 사물 트래커를 설정합니다.

    example::

      img = camera.read()
      detect.object_tracker_init(img)

    :param numpy.ndarray img: 이미지 객체
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    self.tracker = dlib.correlation_tracker()
    x1,y1,x2,y2 = p
    self.tracker.start_track(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), dlib.rectangle(x1,y1,x2,y2))



[문서]
  def track_object(self, img):
    """
    이미지 안의 사물 트래커를 갱신합니다.

    example::

      img = camera.read()
      detect.object_tracker_init(img, (10,10,100,100))
      position = detect.track_object(img)

    :param numpy.ndarray img: 이미지 객체

    :returns: x1,y1,x2,y2 업데이트 된 사물 위치
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    self.tracker.update(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    box = self.tracker.get_position()

    x1 = int(box.left())
    y1 = int(box.top())
    x2 = int(box.right())
    y2 = int(box.bottom())

    height, width = img.shape[:2]
    x1 = max(0, int(x1))
    y1 = max(0, int(y1))
    x2 = min(width - 1, int(x2))
    y2 = min(height - 1, int(y2))

    return (x1, y1, x2, y2)



[문서]
  def track_object_vis(self, img, item):
    """
    마커 결과를 표시합니다.

    :param numpy.ndarray img: 이미지 객체
    :param array items: 마커 결과
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    x1,y1,x2,y2 = item
    cv2.rectangle(img, (x1,y1), (x2,y2), (255,50,255), 2)



[문서]
  def detect_marker(self, img, marker_length=2):
    """
    이미지 안의 마커를 인식합니다. # cv2.aruco.DICT_4X4_50

    example::

      img = camera.read()
      result = detect.detect_marker(img)

    :param numpy.ndarray img: 이미지 객체

    :returns: ``[{"id": 마커번호, "center":  }, ...]``
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    marker_length /= 250
    corners, ids, _ = cv2.aruco.detectMarkers(img, self.dictionary, parameters=self.parameters)
    res = []
    if len(corners) > 0:
      # img = cv2.aruco.drawDetectedMarkers(img, corners, ids)
      ids = ids.flatten()

      for (corner, markerID) in zip(corners, ids):
        rvec, tvec, _ = cv2.aruco.estimatePoseSingleMarkers(corner, marker_length, self.camera_matrix, self.distortion_coeff)
        (topLeft, topRight, bottomRight, bottomLeft) = corner.reshape((4, 2))

        topRight = (int(topRight[0]), int(topRight[1]))
        topLeft = (int(topLeft[0]), int(topLeft[1]))
        bottomRight = (int(bottomRight[0]), int(bottomRight[1]))
        bottomLeft = (int(bottomLeft[0]), int(bottomLeft[1]))

        cX = int((topLeft[0] + bottomRight[0]) / 2.0)
        cY = int((topLeft[1] + bottomRight[1]) / 2.0)
        distance = round(tvec[0][0][2] * 100, 1) #[cm]

        # cv2.line(img, topLeft, topRight, (255, 0, 0), 4)
        # cv2.line(img, topRight, bottomRight, (255, 0, 0), 4)
        # cv2.line(img, bottomRight, bottomLeft, (255, 0, 0), 4)
        # cv2.line(img, bottomLeft, topLeft, (255, 0, 0), 4)
        # cv2.circle(img, (cX, cY), 4, (0, 0, 255), -1)
        # putTextPIL(img, str(markerID), (topLeft[0], topLeft[1] - 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
        res.append({"id":markerID, "center": (cX, cY), "box": [topLeft, topRight, bottomRight, bottomLeft], "distance":distance})

    return res



[문서]
  def detect_marker_vis(self, img, items):
    """
    마커 결과를 표시합니다.

    :param numpy.ndarray img: 이미지 객체
    :param array items: 마커 결과
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    for item in items:
      cX, cY = item['center']
      topLeft, topRight, bottomRight, bottomLeft = item['box']
      markerID = item['id']
      distance = item['distance']

      cv2.line(img, topLeft, topRight, (255, 0, 0), 4)
      cv2.line(img, topRight, bottomRight, (255, 0, 0), 4)
      cv2.line(img, bottomRight, bottomLeft, (255, 0, 0), 4)
      cv2.line(img, bottomLeft, topLeft, (255, 0, 0), 4)
      cv2.circle(img, (cX, cY), 4, (0, 0, 255), -1)
      putTextPIL(img, f'{markerID}/{distance}cm', (topLeft[0], topLeft[1] - 15), 15, (0, 255, 0))