openpibo.vision_face의 소스 코드

"""
영상처리, 인공지능 비전 기술을 사용합니다.

Class:
:meth:`~openpibo.vision_detect.putTextPIL`
:obj:`~openpibo.vision_detect.Face`
"""
import cv2,dlib
import os,pickle,math
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python as mp_python
from mediapipe.tasks.python import vision as mp_vision
from openvino.runtime import Core
from PIL import Image,ImageDraw,ImageFont
import openpibo_dlib_models
import openpibo_models
import logging

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # TensorFlow C++ 로그 제거
os.environ['LIBCAMERA_LOG_LEVELS'] = '3'

# ✅ 추가: TensorFlow 내부 디버그 메시지 완전 차단
logging.getLogger("tensorflow").setLevel(logging.ERROR)
logging.getLogger("ultralytics").setLevel(logging.ERROR)


[문서]
def putTextPIL(img, text, points, size=30, colors=(255,255,255)):
  """
  이미지에 문자를 입력합니다. (한/영 가능 - pillow 이용)- COPY

  :param numpy.ndarray img: 이미지 객체
  :param str text: 표시할 문자열
  :param tuple(int, int) points: 텍스트 블록 좌측상단 좌표 (x, y)
  :param int size: 표시할 글자의 크기
  :param tuple(int, int, int) colors: 글자 색깔 RGB 값 (b, g, r) or 16진수 값 '#ffffff'
  """
  if not type(img) is np.ndarray:
    raise Exception('"img" must be image data from opencv')

  if type(points) is not tuple:
    raise Exception(f'"{points}" must be tuple type')

  if len(points) != 2:
    raise Exception(f'len({points}) must be 2')

  if type(colors) is str:
    colors = (int(colors[5:7], 16), int(colors[3:5], 16), int(colors[1:3], 16))

  if type(colors) is not tuple:
    raise Exception(f'"{colors}" must be tuple type')

  if len(colors) != 3:
    raise Exception(f'len({colors}) must be 3')

  font = ImageFont.truetype(openpibo_models.filepath("KDL.ttf"), size)
  pil = Image.fromarray(img)  # CV to PIL
  ImageDraw.Draw(pil).text(points, text, font=font, fill=colors)  # putText
  img[:] = np.array(pil)  # PIL to CV
  return img



[문서]
def vision_api(mode, image, params={}):
  """
  인공지능 비전 API를 호출합니다.

  :param str mode: 호출할 비전 API (https://o-vapi.circul.us/guide)
  :param str/numpy.ndarray image: 표시할 이미지 (파일 경로 or cv 이미지)
  :returns: ``Json`` 타입 결과의 데이터

  example::

    { 'type': 'caption', 'result': 'ok', 
      'data': {
        caption:  "사람에게 로봇을 과시하는 사람", 
        caption_en:  "a person showing off a robot to a person",
        raw:  [
          "a person showing off a robot to a person",
          "a robot that is sitting on top of a table",
          "a very cute white robot that is sitting in front of a table"
        ]
      }
    }

  """

  if type(image) is np.ndarray:
    return requests.post(f"https://o-vapi.circul.us/{mode}", files={'uploadFile':cv2.imencode('.jpg', image)[1].tobytes()}, params=params).json()
  else:
    return requests.post(f"https://o-vapi.circul.us/{mode}", files={'uploadFile':open(image, 'rb')}, params=params).json()



[문서]
class Face:
  """
Functions:
:meth:`~openpibo.vision_detect.Face.detect`
:meth:`~openpibo.vision_detect.Face.get_ageGender`
:meth:`~openpibo.vision_detect.Face.get_age`
:meth:`~openpibo.vision_detect.Face.get_gender`
:meth:`~openpibo.vision_detect.Face.init_db`
:meth:`~openpibo.vision_detect.Face.train_face`
:meth:`~openpibo.vision_detect.Face.delete_face`
:meth:`~openpibo.vision_detect.Face.recognize`
:meth:`~openpibo.vision_detect.Face.get_db`
:meth:`~openpibo.vision_detect.Face.save_db`
:meth:`~openpibo.vision_detect.Face.load_db`

  얼굴과 관련된 다양한 기능을 수행하는 클래스입니다. 다음 기능을 수행할 수 있습니다.

  * 얼굴을 탐색합니다.
  * 얼굴을 학습/저장/삭제합니다.
  * 학습된 얼굴을 인식합니다.
  * 얼굴로 나이/성별/감정을 추정합니다.

  :얼굴 데이터베이스: 인스턴스 변수 **facedb** 를 의미하며, 여기에서 얼굴 데이터를 등록하고 불러오고 삭제합니다.

    얼굴 데이터베이스의 포맷은 이중 list ``[[], []]`` 이며, 첫 번째 list에는 얼굴의 이름이, 두 번째 list에는 학습된 얼굴 데이터가 인코딩되어 들어갑니다.

    또한 파일로 저장하여 인스턴스가 삭제된 후에도 얼굴 정보를 남겨둘 수 있습니다.

  example::

    from openpibo.vision_detect import Face

    face = Face()
    # 아래의 모든 예제 이전에 위 코드를 먼저 사용합니다.
  """

  def __init__(self):
    self.facedb = [[],[]]
    self.threshold = 0.4
    # self.face_detector = dlib.get_frontal_face_detector()
    self.predictor = dlib.shape_predictor(openpibo_dlib_models.filepath("shape_predictor_68_face_landmarks.dat"))
    self.face_encoder = dlib.face_recognition_model_v1(openpibo_dlib_models.filepath("dlib_face_recognition_resnet_model_v1.dat"))

    # Load OpenVINO models
    ie = Core()
    self.face_detection_compiled = ie.compile_model(ie.read_model("/home/pi/.model/face/detection/face-detection-retail-0004.xml"), "CPU")
    self.age_gender_compiled = ie.compile_model(ie.read_model("/home/pi/.model/face/age-gender/age-gender-recognition-retail-0013.xml"), "CPU")
    self.emotion_compiled = ie.compile_model(ie.read_model("/home/pi/.model/face/emotion/emotions-recognition-retail-0003.xml"), "CPU")

    # Get input and output names for models
    self.face_output_name = self.face_detection_compiled.output(0).any_name
    self.gender_output_name = list(self.age_gender_compiled.outputs)[0].any_name
    self.age_output_name = list(self.age_gender_compiled.outputs)[1].any_name
    self.emotion_output_name = self.emotion_compiled.output(0).any_name
    self.emotions = ['neutral', 'happy', 'sad', 'surprise', 'anger']

    # mediapipe model
    self.mesh_detector = mp_vision.FaceLandmarker.create_from_options(
      mp_vision.FaceLandmarkerOptions(
        base_options=mp_python.BaseOptions(model_asset_path='/home/pi/.model/face/landmark/face_landmarker.task'),
        running_mode=mp_vision.RunningMode.IMAGE,
        num_faces=2,
        min_face_detection_confidence=0.5,
        min_face_presence_confidence=0.5,
        min_tracking_confidence=0.5,
        output_face_blendshapes=True,
        )
    )
    self.IRIS_REAL_DIAMETER_MM = 11.7
    self.FOCAL_LENGTH_MM = 3.6
    self.PIXEL_PITCH_MM = 0.0014 * 2592 / 640



[문서]
  def detect_face(self, img):
    """
    얼굴을 탐색합니다.

    example::

      img = camera.read()
      face.detect_face(img)

    :param numpy.ndarray img: 이미지 객체

    :returns: 인식된 얼굴들의 (x, y, w, h) 배열 입니다.

      list 타입으로, 이미지 하나에 얼굴이 여러 개 인식된 경우 인식된 얼굴의 좌표가 모두 입력됩니다.

      example::

        [(10, 10, 40, 50), (120, 30, 160, 70), (130, 140, 200, 260)]
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv') 

    h, w = img.shape[:2]
    input_frame = cv2.resize(img, (300, 300))
    input_frame = input_frame.transpose(2, 0, 1)[np.newaxis, :]
    input_frame = input_frame.astype(np.float32)

    detections = self.face_detection_compiled([input_frame])[self.face_output_name]

    items = []
    for detection in detections[0][0]:
      confidence = detection[2]
      if confidence > 0.5:  # Threshold
        xmin = int(detection[3] * w)
        ymin = int(detection[4] * h)
        xmax = int(detection[5] * w)
        ymax = int(detection[6] * h)

        if img[ymin:ymax, xmin:xmax].size == 0:
          continue
        items.append([xmin, ymin, xmax, ymax])
    return items

    #return [(d.left(), d.top(), d.right()-d.left(), d.bottom()-d.top()) for d in self.face_detector(img)]
    #return self.face_detector.detectMultiScale(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 1.1, 5) # [(x,y,w,h), ...]


[문서]
  def detect_face_vis(self, img, items):
    """
    얼굴 box 표시합니다.

    :param numpy.ndarray img: 이미지 객체
    :param array item: 얼굴 좌표 (x1,y1,x2,y2) 리스트
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    for item in items:
      x1, y1, x2, y2 = item
      cv2.rectangle(img, (x1,y1), (x2,y2), (50,255,50), 2)



[문서]
  def landmark_face(self, img, item):
    """
    얼굴의 랜드마크를 탐색합니다.

    example::

      img = camera.read()
      face.landmark_face(img)

    :param numpy.ndarray img: 이미지 객체
    :param array item: 얼굴 좌표 (x1,y1,x2,y2)
    :returns: 좌표 리스트
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    if len(item) != 4:
      raise Exception('"item" must be [x1,y1,x2,y2]')

    x1, y1, x2, y2 = item
    face_img = img[y1:y2, x1:x2].copy()
    rect = dlib.rectangle(int(x1), int(y1), int(x2), int(y2))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    shape = self.predictor(gray, rect)
    coords = np.zeros((shape.num_parts, 2), dtype="int")
    for i in range(0, shape.num_parts):
      coords[i] = (shape.part(i).x, shape.part(i).y)
    return coords



[문서]
  def landmark_face_vis(self, img, coords):
    """
    얼굴의 랜드마크를 탐색합니다.

    example::

      img = camera.read()
      face.landmark_face(img)

    :param numpy.ndarray img: 이미지 객체
    :param array item: 얼굴 좌표 (x1,y1,x2,y2)
    :returns: 좌표 리스트
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    for i, coord in enumerate(coords):
      x, y = coord
      cv2.circle(img, (x, y), 2, (50, 200, 50), -1)
      putTextPIL(img, str(i+1), (x, y-15), 15, (255, 255, 255))



[문서]
  def analyze_face(self, img, item):
    """
    얼굴의 나이, 성별, 감정을 추정합니다.

    example::

      img = camera.read()
      items = face.detect_face(img)
      item = items[0] # item은 items 중 하나
      face.analyze_face(img, item)

    :param numpy.ndarray img: 이미지 객체
    :param numpy.ndarray item: 얼굴의 좌표 (x, y, w, h)
    :returns: {age: 0~100, gender: Male 또는 Female, emotions: ``neutral``, ``happy``, ``sad``, ``surprise``, ``anger``, box:좌표}
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    if len(item) != 4:
      raise Exception('"item" must be [x1,y1,x2,y2]')

    x1, y1, x2, y2 = item
    face_img = img[y1:y2, x1:x2].copy()

    # Preprocess face for age/gender
    face_resized_age_gender = cv2.resize(face_img, (62, 62))  # 62x62 크기로 조정
    face_input_age_gender = face_resized_age_gender.transpose(2, 0, 1)[np.newaxis, :]
    face_input_age_gender = face_input_age_gender.astype(np.float32)

    # Age and gender prediction
    age_gender_result = self.age_gender_compiled([face_input_age_gender])
    age = age_gender_result[self.age_output_name].squeeze() * 100
    gender = "Male" if age_gender_result[self.gender_output_name].squeeze()[1] > 0.5 else "Female"

    # Preprocess face for emotion
    face_resized_emotion = cv2.resize(face_img, (64, 64))  # 64x64 크기로 조정
    face_input_emotion = face_resized_emotion.transpose(2, 0, 1)[np.newaxis, :]
    face_input_emotion = face_input_emotion.astype(np.float32)
    
    # Emotion prediction
    emotion_result = self.emotion_compiled([face_input_emotion])[self.emotion_output_name]
    emotion = self.emotions[np.argmax(emotion_result)]
    return {"age":int(age), "gender":gender, "emotion":emotion, "box": (x1, y1, x2, y2)}




[문서]
  def analyze_face_vis(self, img, item):
    """
    얼굴의 나이, 성별, 감정을 추정합니다.

    :param numpy.ndarray img: 이미지 객체
    :param numpy.ndarray item: 얼굴 분석 결과
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv')

    x1, y1, x2, y2 = item['box']
    age, gender, emotion = item['age'], item['gender'], item['emotion']
    putTextPIL(img, f'{age}/{gender}/{emotion}', (x1, y1-30), 30, (255, 255, 255))




[문서]
  def init_db(self):
    """
    얼굴 데이터베이스를 초기화합니다.

    초기화된 데이터베이스는 빈 이중 list ``[[], []]`` 입니다.

    example::

      face.init_db()
    """

    self.facedb = [[], []]



[문서]
  def train_face(self, img, item, name):
    """
    얼굴을 학습하여 얼굴 데이터베이스에 저장합니다.

    example::

      img = camera.read()
      items = face.detect_face(img)
      item = items[0] # item는 items중 하나
      face.train_face(img, item, 'honggildong')

    :param numpy.ndarray img: 이미지 객체

    :param numpy.ndarray item: 디텍팅한 얼굴의 사각형 좌측상단, 우측하단 포인트 (x1, y1, x2, y2)

    :param str name: 디텍팅한 얼굴에 붙일 이름
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv') 

    if len(item) != 4:
      raise Exception('"item" must be [x,y,w,h]')

    x1, y1, x2, y2 = item
    face_img = img[y1:y2, x1:x2].copy()
    rect = dlib.rectangle(int(x1), int(y1), int(x2), int(y2))
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    shape = self.predictor(gray, rect)
    face_encoding = np.array(self.face_encoder.compute_face_descriptor(img, shape, 1))

    self.facedb[0].append(name)
    self.facedb[1].append(face_encoding)

    #cv2.imwrite(self.data_path+"/{}.jpg".format(name), img[y+3:y+h-3, x+3:x+w-3]);


[문서]
  def delete_face(self, name):
    """
    등록된 얼굴을 삭제합니다.

    example::

      face.delete_face('honggildong')

    :param str name: 삭제할 얼굴의 이름

    :returns: ``True`` / ``False``
    """

    ret = name in self.facedb[0]
    if ret == True:
      idx = self.facedb[0].index(name)
      #os.remove(self.data_path +"/" + name + ".jpg")
      for item in self.facedb:
        del item[idx]

    return ret



[문서]
  def recognize(self, img, item):
    """
    등록된 얼굴을 인식합니다.

    example::

      img = camera.read()
      items = face.detect_face(img)
      item = items[0] # item는 items중 하나
      face.recognize(img, item)

    :param numpy.ndarray img: 이미지 객체
    :param numpy.ndarray item: 얼굴의 좌표 (x, y, w, h)

    :returns: ``{"name": 이름, "score": 오차도}``

      얼굴이 비슷할수록 오차도가 낮게 측정됩니다.

      오차도가 0.4 이하일 때 동일인으로 판정합니다.
    """

    if not type(img) is np.ndarray:
      raise Exception('"img" must be image data from opencv') 

    if len(item) != 4:
      raise Exception('"item" must be [x,y,w,h]')

    if len(self.facedb[0]) < 1:
      return {"name":"Guest", "score":0}

    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    data={"name":"Guest", "score":0, "max":""}
    x1, y1, x2, y2 = item
    rect = dlib.rectangle(int(x1), int(y1), int(x2), int(y2))
    shape = self.predictor(gray, rect)
    face_encoding = np.array(self.face_encoder.compute_face_descriptor(img, shape, 1))
    matches = []
    matches = list(np.linalg.norm(self.facedb[1] - face_encoding, axis=1))
    data["score"] = round(min(matches), 2)

    if min(matches) < self.threshold:
      data["name"] = self.facedb[0][matches.index(min(matches))]
    
    data["max"] = self.facedb[0][matches.index(min(matches))]
    return data



[문서]
  def get_db(self):
    """
    사용 중인 얼굴 데이터베이스를 확인합니다.

    example::

      face.get_db()

    :returns: **facedb** (``list(list, list)`` 타입)

      example::

        [
          ['honggildong'],
          [array([-0.06423206,  0.12474005,  0.0511112 , -0.05676335, -0.07211345,
                  -0.03123881, -0.04119622, -0.12800875,  0.11717855, -0.11079554,
                   0.22952782, -0.02007426, -0.17457265, -0.13562854, -0.04972655,
                   0.15810637, -0.12785575, -0.16479518, -0.07002968, -0.00208595,
                   0.169218  ,  0.03144928, -0.01074579,  0.04103286, -0.09245337,
                  ...
                  -0.00706697,  0.06025593, -0.0049719 ])]
        ]
    """

    return self.facedb



[문서]
  def save_db(self, filename):
    """
    얼굴 데이터베이스를 파일로 저장합니다.

    example::

      face.save_db('/home/pi/facedb')

    :param str filename: 저장할 얼굴 데이터베이스 파일의 경로입니다.
    """

    with open(filename, "w+b") as f:
      pickle.dump(self.facedb, f)



[문서]
  def load_db(self, filename):
    """
    얼굴 데이터베이스 파일을 불러옵니다.

    example::

      face.load_db('/home/pi/facedb')

    :param str filename: 불러 올 ``facedb`` 파일의 경로입니다.
    """

    if not os.path.isfile(filename):
      raise Exception('"{filename}" does not exist')

    with open(filename, "rb") as f :
      self.facedb = pickle.load(f)


  # face mesh

[문서]
  def calculate_head_orientation(self, keypoints):
    """
    (내부 함수) 얼굴 랜드마크 데이터를 통해 얼굴 방향 계산
    """

    # Keypoints for calculations
    nose_tip = keypoints[1]
    left_nose = keypoints[279]
    right_nose = keypoints[49]
      
    # Calculate midpoint
    midpoint = {
      "x": (left_nose["x"] + right_nose["x"]) / 2,
      "y": (left_nose["y"] + right_nose["y"]) / 2,
      "z": (left_nose["z"] + right_nose["z"]) / 2,
    }

    # Perpendicular point above midpoint
    perpendicular_up = {
      "x": midpoint["x"],
      "y": midpoint["y"]-50,  # Offset
      "z": midpoint["z"],
    }

    # Calculate yaw and turn
    yaw = self.get_angle_between_lines(midpoint, nose_tip, perpendicular_up)
    turn = self.get_angle_between_lines(midpoint, right_nose, nose_tip)

    # Debug yaw and turn
    # print(f"[DEBUG] Yaw: {yaw:.2f}, Turn: {turn:.2f}")

    # Determine direction based on angles
    direction = ""
    if yaw > 105:  # Adjusted threshold
      direction += "B"  # Bottom
    elif yaw < 75:  # Adjusted threshold
      direction += "T"  # Top
    else:
      direction += "C"  # Center (vertical)

    if turn > 93:  # Adjusted threshold
      direction += "R"  # Right
    elif turn < 87:  # Adjusted threshold
      direction += "L"  # Left
    else:
      direction += "C"  # Center (horizontal)

    return direction



[문서]
  def get_angle_between_lines(self, start, point1, point2):
    """Calculate angle between two lines defined by three points."""
    # Vector 1: start -> point1
    vector1 = (point1["x"] - start["x"], point1["y"] - start["y"], point1["z"] - start["z"])
    # Vector 2: start -> point2
    vector2 = (point2["x"] - start["x"], point2["y"] - start["y"], point2["z"] - start["z"])

    # Dot product and magnitude
    dot_product = sum(v1 * v2 for v1, v2 in zip(vector1, vector2))
    magnitude1 = math.sqrt(sum(v**2 for v in vector1))
    magnitude2 = math.sqrt(sum(v**2 for v in vector2))

    # Calculate angle in degrees
    angle = math.degrees(math.acos(dot_product / (magnitude1 * magnitude2 + 1e-8)))
    return angle



[문서]
  def detect_mesh_vis(self, image, items):
    """Draw connections between landmarks based on Mediapipe's face mesh."""
    for item in items:
      face_landmarks = item['landmark']
      distance = item['distance']
      direction = item['direction']

      if len(face_landmarks) > 0:
        connections = mp.solutions.face_mesh.FACEMESH_TESSELATION
        image_height, image_width, _ = image.shape
        for start, end in connections:
          x1, y1 = int(face_landmarks[start].x * image_width), int(face_landmarks[start].y * image_height)
          x2, y2 = int(face_landmarks[end].x * image_width), int(face_landmarks[end].y * image_height)
          cv2.line(image, (x1, y1), (x2, y2), (255, 255, 255), 1)

        x, y = int(face_landmarks[103].x * image_width), int(face_landmarks[103].y * image_height)
        putTextPIL(image, f'{distance}cm/{direction}' , (x, y-40), 30, (255,255,255))



[문서]
  def detect_mesh(self, image):
    """Detect mesh and return distance, direction, and image with landmarks."""
    """
    얼굴의 랜드마크를 인식하고, 거리, 방향을 추정합니다

    :param numpy.ndarray img: 이미지 객체

    :returns: {얼굴의 거리, 방향, 랜드마크}의 리스트 (얼굴 2개까지 인식) 

    """
    # Convert the image from BGR to RGB
    rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_image)

    mesh_data = []
    # Perform inference
    mesh_result = self.mesh_detector.detect(mp_image)
    if mesh_result and mesh_result.face_landmarks:
      h, w, _ = image.shape
      for _, face_landmarks in enumerate(mesh_result.face_landmarks):
        # Convert landmarks to a dictionary-like structure
        keypoints = [{"x": lm.x * w, "y": lm.y * h, "z": lm.z * w} for lm in face_landmarks]

        # Calculate head orientation
        direction = self.calculate_head_orientation(keypoints)

        # Calculate iris-based distance
        left_iris_idx = [474, 475, 476, 477]
        x_vals, y_vals = [], []
        for idx in left_iris_idx:
          x_vals.append(face_landmarks[idx].x * w)
          y_vals.append(face_landmarks[idx].y * h)

        iris_diameter_px = max(max(x_vals) - min(x_vals), max(y_vals) - min(y_vals))
        if iris_diameter_px > 0:
          distance_mm = (self.FOCAL_LENGTH_MM * self.IRIS_REAL_DIAMETER_MM) / (iris_diameter_px * self.PIXEL_PITCH_MM)
          distance_cm = int(distance_mm / 10.0)

        mesh_data.append({"distance":distance_cm, "direction":direction, "landmark": face_landmarks})
    return mesh_data