openpibo.vision_detect의 소스 코드

"""
영상처리, 인공지능 비전 기술을 사용합니다.

Class:
:meth:`~openpibo.vision_detect.putTextPIL`
:meth:`~openpibo.vision_detect.vision_api`
:obj:`~openpibo.vision_detect.Detect`
"""
import cv2,dlib,requests
import os,pickle,math
import numpy as np
from pyzbar import pyzbar
import mediapipe as mp
from mediapipe.tasks import python as mp_python
from mediapipe.tasks.python import vision as mp_vision
from ultralytics import YOLO
from .modules.pose.movenet import Movenet
from .modules.pose.utils import visualize_pose
from .modules.card.decode_card import get_card
from PIL import Image,ImageDraw,ImageFont
import openpibo_dlib_models
import openpibo_models
import openpibo_detect_models
import logging

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # TensorFlow C++ 로그 제거
os.environ['LIBCAMERA_LOG_LEVELS'] = '3'

# ✅ 추가: TensorFlow 내부 디버그 메시지 완전 차단
logging.getLogger("tensorflow").setLevel(logging.ERROR)
logging.getLogger("ultralytics").setLevel(logging.ERROR)

[문서] def putTextPIL(img, text, points, size=30, colors=(255,255,255)): """ 이미지에 문자를 입력합니다. (한/영 가능 - pillow 이용)- COPY :param numpy.ndarray img: 이미지 객체 :param str text: 표시할 문자열 :param tuple(int, int) points: 텍스트 블록 좌측상단 좌표 (x, y) :param int size: 표시할 글자의 크기 :param tuple(int, int, int) colors: 글자 색깔 RGB 값 (b, g, r) or 16진수 값 '#ffffff' """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') if type(points) is not tuple: raise Exception(f'"{points}" must be tuple type') if len(points) != 2: raise Exception(f'len({points}) must be 2') if type(colors) is str: colors = (int(colors[5:7], 16), int(colors[3:5], 16), int(colors[1:3], 16)) if type(colors) is not tuple: raise Exception(f'"{colors}" must be tuple type') if len(colors) != 3: raise Exception(f'len({colors}) must be 3') font = ImageFont.truetype(openpibo_models.filepath("KDL.ttf"), size) pil = Image.fromarray(img) # CV to PIL ImageDraw.Draw(pil).text(points, text, font=font, fill=colors) # putText img[:] = np.array(pil) # PIL to CV return img
[문서] def vision_api(mode, image, params={}): """ 인공지능 비전 API를 호출합니다. :param str mode: 호출할 비전 API (https://o-vapi.circul.us/guide) :param str/numpy.ndarray image: 표시할 이미지 (파일 경로 or cv 이미지) :returns: ``Json`` 타입 결과의 데이터 example:: { 'type': 'caption', 'result': 'ok', 'data': { caption: "사람에게 로봇을 과시하는 사람", caption_en: "a person showing off a robot to a person", raw: [ "a person showing off a robot to a person", "a robot that is sitting on top of a table", "a very cute white robot that is sitting in front of a table" ] } } """ if type(image) is np.ndarray: return requests.post(f"https://o-vapi.circul.us/{mode}", files={'uploadFile':cv2.imencode('.jpg', image)[1].tobytes()}, params=params).json() else: return requests.post(f"https://o-vapi.circul.us/{mode}", files={'uploadFile':open(image, 'rb')}, params=params).json()
[문서] class Detect: """ Functions: :meth:`~openpibo.vision_detect.Detect.load_hand_gesture_model` :meth:`~openpibo.vision_detect.Detect.detect_object` :meth:`~openpibo.vision_detect.Detect.detect_qr` :meth:`~openpibo.vision_detect.Detect.detect_pose` :meth:`~openpibo.vision_detect.Detect.analyze_pose` :meth:`~openpibo.vision_detect.Detect.classify_image` :meth:`~openpibo.vision_detect.Detect.object_tracker_init` :meth:`~openpibo.vision_detect.Detect.track_object` :meth:`~openpibo.vision_detect.Detect.detect_marker` :meth:`~openpibo.vision_detect.Detect.detect_marker_vis` 인식과 관련된 다양한 기능을 사용할 수 있는 클래스입니다. * 90개 class 안에서의 객체 인식 (MobileNet V2) * QR/바코드 인식 (pyzbar) * Pose 인식 * 이미지 분류 example:: from openpibo.vision_detect import Detect detect = Detect() # 아래의 모든 예제 이전에 위 코드를 먼저 사용합니다. """ def __init__(self): #self.object_class = ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', # 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', # 'None', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', # 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'None', # 'backpack', 'umbrella', 'None', 'None', 'handbag', 'tie', 'suitcase', 'frisbee', # 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', # 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'None', 'wine glass', 'cup', # 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', # 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', # 'potted plant', 'bed', 'None', 'dining table', 'None', 'None', 'toilet', 'None', 'tv', # 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', # 'toaster', 'sink', 'refrigerator', 'None', 'book', 'clock', 'vase', 'scissors', # 'teddy bear', 'hair drier'] #self.mobilenet = cv2.dnn.readNet( # openpibo_detect_models.filepath("frozen_inference_graph.pb"), # openpibo_detect_models.filepath("ssd_mobilenet_v2_coco_2018_03_29.pbtxt") # ) self.object_detector = YOLO("/home/pi/.model/object/yolo11s.onnx", task="detect") self.pose_detector = Movenet(openpibo_detect_models.filepath("movenet_lightning.tflite")) # marker self.camera_matrix = np.array([ [1.42068235e+03,0.00000000e+00,9.49208512e+02], [0.00000000e+00,1.37416685e+03,5.39622051e+02], [0.00000000e+00,0.00000000e+00,1.00000000e+00]]) self.distortion_coeff = np.array([1.69926613e-01,-7.40003491e-01,-7.45655262e-03,-1.79442353e-03, 2.46650225e+00]) #self.dictionary = cv2.aruco.Dictionary_get(cv2.aruco.DICT_4X4_50) self.dictionary = cv2.aruco.getPredefinedDictionary(cv2.aruco.DICT_4X4_50) self.parameters = cv2.aruco.DetectorParameters() self.tracker = None self.hand_gesture_recognizer = None
[문서] def load_hand_gesture_model(self, modelpath='/home/pi/.model/hand/gesture_recognizer.task'): """ 손동작 인식 모델을 불러옵니다. :param str modelpath: 손동작 인식 모델 경로 """ #'/home/pi/.model/hand/gesture_recognizer.task', /home/pi/.model/hand/rps_recognizer.task' self.hand_gesture_recognizer = mp_vision.GestureRecognizer.create_from_options( mp_vision.GestureRecognizerOptions( base_options=mp_python.BaseOptions(model_asset_path=modelpath), running_mode=mp_vision.RunningMode.IMAGE, num_hands= 2, min_hand_detection_confidence= 0.5, min_hand_presence_confidence= 0.5, min_tracking_confidence= 0.5, ) )
[문서] def recognize_hand_gesture(self, image): """ 손동작을 인식합니다. :param numpy.ndarray img: 이미지 객체 :returns: 모델에 따른 손동작 인식 결과 """ if self.hand_gesture_recognizer == None: raise Exception('"load_hand_gesture_model" must be called') rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_image) recognition_result = self.hand_gesture_recognizer.recognize(mp_image) hands_data = [] # 손 정보를 담을 리스트 if recognition_result and recognition_result.hand_landmarks: height, width, _ = image.shape for hand_index, hand_landmarks in enumerate(recognition_result.hand_landmarks): # 21개 랜드마크 픽셀 좌표 추출 hpoints = [] for landmark in hand_landmarks: px = int(landmark.x * width) py = int(landmark.y * height) hpoints.append((px, py)) name = "" score = 0 if recognition_result.gestures: gesture = recognition_result.gestures[hand_index] name = gesture[0].category_name score = round(gesture[0].score, 2) hands_data.append({"point": hpoints, "name": name, "score": score}) return hands_data
[문서] def recognize_hand_gesture_vis(self, img, items): """ 손동작 인식 결과를 표시합니다. :param numpy.ndarray img: 이미지 객체 :param array items: 손동작 인식 결과 """ for item in items: # hands 시각화 hpoints = item["point"] name = item["name"] score = item["score"] # 21개 포인트 빨간색 원으로 그리기 for px, py in hpoints: cv2.circle(img, (px, py), 3, (255, 255, 255), -1) # 첫 번째 랜드마크 근처에 손 라벨(Right or Left / Gesture) 표시 putTextPIL(img, f'{name}/{score}', (hpoints[0][0], hpoints[0][1] - 50), 30, (255, 0, 0))
[문서] def load_object_model(self, modelpath='/home/pi/.model/object/yolo11s.onnx'): """ 인식 모델을 불러옵니다. :param str modelpath: 사물인식 모델 경로 """ del self.object_detector self.object_detector = YOLO(modelpath, task="detect")
[문서] def detect_object(self, img): """ 사물 인식 결과를 표시합니다. :param numpy.ndarray img: 이미지 객체 :param array items: 사물 인식 결과 """ if not isinstance(img, np.ndarray): raise ValueError('"img" must be a valid OpenCV image (np.ndarray).') # Run inference. You can adjust conf=0.5, iou=0.4, imgsz=320 to mirror your old code results = self.object_detector.predict(img, conf=0.5, iou=0.4, imgsz=320, verbose=False, device='cpu') # YOLO returns a list of Results objects; we’ll just process the first data = [] if len(results) > 0: # Each `results[0]` has .boxes attribute containing all detections for box in results[0].boxes: cls_id = int(box.cls[0]) # class index score = float(box.conf[0]) # confidence score # box.xyxy gives (x1, y1, x2, y2) x1, y1, x2, y2 = map(int, box.xyxy[0]) if isinstance(self.object_detector.names, dict): obj_name = self.object_detector.names.get(cls_id, "Unknown") else: obj_name = self.object_detector.names[cls_id] # Only add detections above 50% confidence if you want to mirror your old filter if score >= 0.5: data.append({ "name": obj_name, "score": int(score * 100), "box": (x1, y1, x2, y2) }) return data
[문서] def detect_object_vis(self, img, items): """ 사물 인식 결과를 표시합니다. :param numpy.ndarray img: 이미지 객체 :param array items: 사물 인식 결과 """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') for item in items: x1,y1,x2,y2 = item['box'] name = item['name'] cv2.rectangle(img, (x1,y1), (x2,y2), (0,255,0), 2) putTextPIL(img, name, (x1, y1-30), 30, (0,255,0))
[문서] def detect_qr(self, img): """ 이미지 안의 QR코드 및 바코드를 인식합니다. example:: img = camera.read() detect.detect_qr(img) :param numpy.ndarray img: 이미지 객체 :returns: ``{"data": 내용, "type": 바코드 / QR코드, "position":(startX,startY,endX,endY)}`` """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') results = [] barcodes = pyzbar.decode(img) for barcode in barcodes: x,y,w,h = barcode.rect _type = barcode.type _data = barcode.data.decode("utf-8") res = get_card(_data) if res != None: _type, _data = "CARD", res results.append({"data":_data, "type":_type, "box":(x,y,x+w,y+h)}) return results
[문서] def detect_qr_vis(self, img, items): """ QR/바코드 결과를 표시합니다. :param numpy.ndarray img: 이미지 객체 :param array items: QR 인식 결과 """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') for item in items: x1,y1,x2,y2 = item['box'] data = item['data'] cv2.rectangle(img, (x1,y1), (x2,y2), (255,50,255), 2) putTextPIL(img, str(data), (x1, y1-30), 30, (255,50,255))
[문서] def detect_pose(self, img): """ 이미지 안의 Pose를 인식합니다. example:: img = camera.read() detect.detect_pose(img) :param numpy.ndarray img: 이미지 객체 :returns: ``인식한 결과`` """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') list_persons = [self.pose_detector.detect(img)] return list_persons
[문서] def detect_pose_vis(self, img, items): """ 포즈를 표시 합니다. :param numpy.ndarray img: 이미지 객체 :param array items: 포즈 데이터 """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') visualize_pose(img, items)
[문서] def analyze_pose(self, data): """ detect_pose 함수의 결과 값을 분석합니다. example:: img = camera.read() result = detect.detect_pose(img) detect.analyze_pose(result) :param dict data: 이미지 객체 :returns: ``인식한 포즈 리스트 ['left_hand_up', 'right_hand_up', 'clap']`` """ def distance(p1, p2): return math.sqrt((p1.x-p2.x)**2 + (p1.y-p2.y)**2) NOSE, LEFT_EYE, RIGHT_EYE, LEFT_EAR, RIGHT_EAR = 0,1,2,3,4 LEFT_SHOULDER, RIGHT_SHOULDER, LEFT_ELBOW, RIGHT_ELBOW, LEFT_WRIST, RIGHT_WRIST = 5,6,7,8,9,10 LEFT_HIP, RIGHT_HIP, LEFT_KNEE, RIGHT_KNEE, LEFT_ANKLE, RIGHT_ANKLE = 11,12,13,14,15,16 res = [] data = data[0].keypoints if data[LEFT_WRIST].coordinate.y < data[LEFT_ELBOW].coordinate.y: res.append("left_hand_up") if data[RIGHT_WRIST].coordinate.y < data[RIGHT_ELBOW].coordinate.y: res.append("right_hand_up") if distance(data[LEFT_WRIST].coordinate, data[RIGHT_WRIST].coordinate) < 75: res.append("clap") return res
[문서] def object_tracker_init(self, img, p): """ 이미지 안의 사물 트래커를 설정합니다. example:: img = camera.read() detect.object_tracker_init(img) :param numpy.ndarray img: 이미지 객체 """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') self.tracker = dlib.correlation_tracker() x1,y1,x2,y2 = p self.tracker.start_track(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), dlib.rectangle(x1,y1,x2,y2))
[문서] def track_object(self, img): """ 이미지 안의 사물 트래커를 갱신합니다. example:: img = camera.read() detect.object_tracker_init(img, (10,10,100,100)) position = detect.track_object(img) :param numpy.ndarray img: 이미지 객체 :returns: x1,y1,x2,y2 업데이트 된 사물 위치 """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') self.tracker.update(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) box = self.tracker.get_position() x1 = int(box.left()) y1 = int(box.top()) x2 = int(box.right()) y2 = int(box.bottom()) height, width = img.shape[:2] x1 = max(0, int(x1)) y1 = max(0, int(y1)) x2 = min(width - 1, int(x2)) y2 = min(height - 1, int(y2)) return (x1, y1, x2, y2)
[문서] def track_object_vis(self, img, item): """ 마커 결과를 표시합니다. :param numpy.ndarray img: 이미지 객체 :param array items: 마커 결과 """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') x1,y1,x2,y2 = item cv2.rectangle(img, (x1,y1), (x2,y2), (255,50,255), 2)
[문서] def detect_marker(self, img, marker_length=2): """ 이미지 안의 마커를 인식합니다. # cv2.aruco.DICT_4X4_50 example:: img = camera.read() result = detect.detect_marker(img) :param numpy.ndarray img: 이미지 객체 :returns: ``[{"id": 마커번호, "center": }, ...]`` """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') marker_length /= 250 corners, ids, _ = cv2.aruco.detectMarkers(img, self.dictionary, parameters=self.parameters) res = [] if len(corners) > 0: # img = cv2.aruco.drawDetectedMarkers(img, corners, ids) ids = ids.flatten() for (corner, markerID) in zip(corners, ids): rvec, tvec, _ = cv2.aruco.estimatePoseSingleMarkers(corner, marker_length, self.camera_matrix, self.distortion_coeff) (topLeft, topRight, bottomRight, bottomLeft) = corner.reshape((4, 2)) topRight = (int(topRight[0]), int(topRight[1])) topLeft = (int(topLeft[0]), int(topLeft[1])) bottomRight = (int(bottomRight[0]), int(bottomRight[1])) bottomLeft = (int(bottomLeft[0]), int(bottomLeft[1])) cX = int((topLeft[0] + bottomRight[0]) / 2.0) cY = int((topLeft[1] + bottomRight[1]) / 2.0) distance = round(tvec[0][0][2] * 100, 1) #[cm] # cv2.line(img, topLeft, topRight, (255, 0, 0), 4) # cv2.line(img, topRight, bottomRight, (255, 0, 0), 4) # cv2.line(img, bottomRight, bottomLeft, (255, 0, 0), 4) # cv2.line(img, bottomLeft, topLeft, (255, 0, 0), 4) # cv2.circle(img, (cX, cY), 4, (0, 0, 255), -1) # putTextPIL(img, str(markerID), (topLeft[0], topLeft[1] - 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) res.append({"id":markerID, "center": (cX, cY), "box": [topLeft, topRight, bottomRight, bottomLeft], "distance":distance}) return res
[문서] def detect_marker_vis(self, img, items): """ 마커 결과를 표시합니다. :param numpy.ndarray img: 이미지 객체 :param array items: 마커 결과 """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') for item in items: cX, cY = item['center'] topLeft, topRight, bottomRight, bottomLeft = item['box'] markerID = item['id'] distance = item['distance'] cv2.line(img, topLeft, topRight, (255, 0, 0), 4) cv2.line(img, topRight, bottomRight, (255, 0, 0), 4) cv2.line(img, bottomRight, bottomLeft, (255, 0, 0), 4) cv2.line(img, bottomLeft, topLeft, (255, 0, 0), 4) cv2.circle(img, (cX, cY), 4, (0, 0, 255), -1) putTextPIL(img, f'{markerID}/{distance}cm', (topLeft[0], topLeft[1] - 15), 15, (0, 255, 0))