openpibo.vision_face의 소스 코드

"""
영상처리, 인공지능 비전 기술을 사용합니다.

Class:
:meth:`~openpibo.vision_detect.putTextPIL`
:obj:`~openpibo.vision_detect.Face`
"""
import cv2,dlib
import os,pickle,math
import numpy as np
import mediapipe as mp
from mediapipe.tasks import python as mp_python
from mediapipe.tasks.python import vision as mp_vision
from openvino.runtime import Core
from PIL import Image,ImageDraw,ImageFont
import openpibo_dlib_models
import openpibo_models
import logging

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # TensorFlow C++ 로그 제거
os.environ['LIBCAMERA_LOG_LEVELS'] = '3'

# ✅ 추가: TensorFlow 내부 디버그 메시지 완전 차단
logging.getLogger("tensorflow").setLevel(logging.ERROR)
logging.getLogger("ultralytics").setLevel(logging.ERROR)

[문서] def putTextPIL(img, text, points, size=30, colors=(255,255,255)): """ 이미지에 문자를 입력합니다. (한/영 가능 - pillow 이용)- COPY :param numpy.ndarray img: 이미지 객체 :param str text: 표시할 문자열 :param tuple(int, int) points: 텍스트 블록 좌측상단 좌표 (x, y) :param int size: 표시할 글자의 크기 :param tuple(int, int, int) colors: 글자 색깔 RGB 값 (b, g, r) or 16진수 값 '#ffffff' """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') if type(points) is not tuple: raise Exception(f'"{points}" must be tuple type') if len(points) != 2: raise Exception(f'len({points}) must be 2') if type(colors) is str: colors = (int(colors[5:7], 16), int(colors[3:5], 16), int(colors[1:3], 16)) if type(colors) is not tuple: raise Exception(f'"{colors}" must be tuple type') if len(colors) != 3: raise Exception(f'len({colors}) must be 3') font = ImageFont.truetype(openpibo_models.filepath("KDL.ttf"), size) pil = Image.fromarray(img) # CV to PIL ImageDraw.Draw(pil).text(points, text, font=font, fill=colors) # putText img[:] = np.array(pil) # PIL to CV return img
[문서] def vision_api(mode, image, params={}): """ 인공지능 비전 API를 호출합니다. :param str mode: 호출할 비전 API (https://o-vapi.circul.us/guide) :param str/numpy.ndarray image: 표시할 이미지 (파일 경로 or cv 이미지) :returns: ``Json`` 타입 결과의 데이터 example:: { 'type': 'caption', 'result': 'ok', 'data': { caption: "사람에게 로봇을 과시하는 사람", caption_en: "a person showing off a robot to a person", raw: [ "a person showing off a robot to a person", "a robot that is sitting on top of a table", "a very cute white robot that is sitting in front of a table" ] } } """ if type(image) is np.ndarray: return requests.post(f"https://o-vapi.circul.us/{mode}", files={'uploadFile':cv2.imencode('.jpg', image)[1].tobytes()}, params=params).json() else: return requests.post(f"https://o-vapi.circul.us/{mode}", files={'uploadFile':open(image, 'rb')}, params=params).json()
[문서] class Face: """ Functions: :meth:`~openpibo.vision_detect.Face.detect` :meth:`~openpibo.vision_detect.Face.get_ageGender` :meth:`~openpibo.vision_detect.Face.get_age` :meth:`~openpibo.vision_detect.Face.get_gender` :meth:`~openpibo.vision_detect.Face.init_db` :meth:`~openpibo.vision_detect.Face.train_face` :meth:`~openpibo.vision_detect.Face.delete_face` :meth:`~openpibo.vision_detect.Face.recognize` :meth:`~openpibo.vision_detect.Face.get_db` :meth:`~openpibo.vision_detect.Face.save_db` :meth:`~openpibo.vision_detect.Face.load_db` 얼굴과 관련된 다양한 기능을 수행하는 클래스입니다. 다음 기능을 수행할 수 있습니다. * 얼굴을 탐색합니다. * 얼굴을 학습/저장/삭제합니다. * 학습된 얼굴을 인식합니다. * 얼굴로 나이/성별/감정을 추정합니다. :얼굴 데이터베이스: 인스턴스 변수 **facedb** 를 의미하며, 여기에서 얼굴 데이터를 등록하고 불러오고 삭제합니다. 얼굴 데이터베이스의 포맷은 이중 list ``[[], []]`` 이며, 첫 번째 list에는 얼굴의 이름이, 두 번째 list에는 학습된 얼굴 데이터가 인코딩되어 들어갑니다. 또한 파일로 저장하여 인스턴스가 삭제된 후에도 얼굴 정보를 남겨둘 수 있습니다. example:: from openpibo.vision_detect import Face face = Face() # 아래의 모든 예제 이전에 위 코드를 먼저 사용합니다. """ def __init__(self): self.facedb = [[],[]] self.threshold = 0.4 # self.face_detector = dlib.get_frontal_face_detector() self.predictor = dlib.shape_predictor(openpibo_dlib_models.filepath("shape_predictor_68_face_landmarks.dat")) self.face_encoder = dlib.face_recognition_model_v1(openpibo_dlib_models.filepath("dlib_face_recognition_resnet_model_v1.dat")) # Load OpenVINO models ie = Core() self.face_detection_compiled = ie.compile_model(ie.read_model("/home/pi/.model/face/detection/face-detection-retail-0004.xml"), "CPU") self.age_gender_compiled = ie.compile_model(ie.read_model("/home/pi/.model/face/age-gender/age-gender-recognition-retail-0013.xml"), "CPU") self.emotion_compiled = ie.compile_model(ie.read_model("/home/pi/.model/face/emotion/emotions-recognition-retail-0003.xml"), "CPU") # Get input and output names for models self.face_output_name = self.face_detection_compiled.output(0).any_name self.gender_output_name = list(self.age_gender_compiled.outputs)[0].any_name self.age_output_name = list(self.age_gender_compiled.outputs)[1].any_name self.emotion_output_name = self.emotion_compiled.output(0).any_name self.emotions = ['neutral', 'happy', 'sad', 'surprise', 'anger'] # mediapipe model self.mesh_detector = mp_vision.FaceLandmarker.create_from_options( mp_vision.FaceLandmarkerOptions( base_options=mp_python.BaseOptions(model_asset_path='/home/pi/.model/face/landmark/face_landmarker.task'), running_mode=mp_vision.RunningMode.IMAGE, num_faces=2, min_face_detection_confidence=0.5, min_face_presence_confidence=0.5, min_tracking_confidence=0.5, output_face_blendshapes=True, ) ) self.IRIS_REAL_DIAMETER_MM = 11.7 self.FOCAL_LENGTH_MM = 3.6 self.PIXEL_PITCH_MM = 0.0014 * 2592 / 640
[문서] def detect_face(self, img): """ 얼굴을 탐색합니다. example:: img = camera.read() face.detect_face(img) :param numpy.ndarray img: 이미지 객체 :returns: 인식된 얼굴들의 (x, y, w, h) 배열 입니다. list 타입으로, 이미지 하나에 얼굴이 여러 개 인식된 경우 인식된 얼굴의 좌표가 모두 입력됩니다. example:: [(10, 10, 40, 50), (120, 30, 160, 70), (130, 140, 200, 260)] """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') h, w = img.shape[:2] input_frame = cv2.resize(img, (300, 300)) input_frame = input_frame.transpose(2, 0, 1)[np.newaxis, :] input_frame = input_frame.astype(np.float32) detections = self.face_detection_compiled([input_frame])[self.face_output_name] items = [] for detection in detections[0][0]: confidence = detection[2] if confidence > 0.5: # Threshold xmin = int(detection[3] * w) ymin = int(detection[4] * h) xmax = int(detection[5] * w) ymax = int(detection[6] * h) if img[ymin:ymax, xmin:xmax].size == 0: continue items.append([xmin, ymin, xmax, ymax]) return items
#return [(d.left(), d.top(), d.right()-d.left(), d.bottom()-d.top()) for d in self.face_detector(img)] #return self.face_detector.detectMultiScale(cv2.cvtColor(img, cv2.COLOR_BGR2GRAY), 1.1, 5) # [(x,y,w,h), ...]
[문서] def detect_face_vis(self, img, items): """ 얼굴 box 표시합니다. :param numpy.ndarray img: 이미지 객체 :param array item: 얼굴 좌표 (x1,y1,x2,y2) 리스트 """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') for item in items: x1, y1, x2, y2 = item cv2.rectangle(img, (x1,y1), (x2,y2), (50,255,50), 2)
[문서] def landmark_face(self, img, item): """ 얼굴의 랜드마크를 탐색합니다. example:: img = camera.read() face.landmark_face(img) :param numpy.ndarray img: 이미지 객체 :param array item: 얼굴 좌표 (x1,y1,x2,y2) :returns: 좌표 리스트 """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') if len(item) != 4: raise Exception('"item" must be [x1,y1,x2,y2]') x1, y1, x2, y2 = item face_img = img[y1:y2, x1:x2].copy() rect = dlib.rectangle(int(x1), int(y1), int(x2), int(y2)) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) shape = self.predictor(gray, rect) coords = np.zeros((shape.num_parts, 2), dtype="int") for i in range(0, shape.num_parts): coords[i] = (shape.part(i).x, shape.part(i).y) return coords
[문서] def landmark_face_vis(self, img, coords): """ 얼굴의 랜드마크를 탐색합니다. example:: img = camera.read() face.landmark_face(img) :param numpy.ndarray img: 이미지 객체 :param array item: 얼굴 좌표 (x1,y1,x2,y2) :returns: 좌표 리스트 """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') for i, coord in enumerate(coords): x, y = coord cv2.circle(img, (x, y), 2, (50, 200, 50), -1) putTextPIL(img, str(i+1), (x, y-15), 15, (255, 255, 255))
[문서] def analyze_face(self, img, item): """ 얼굴의 나이, 성별, 감정을 추정합니다. example:: img = camera.read() items = face.detect_face(img) item = items[0] # item은 items 중 하나 face.analyze_face(img, item) :param numpy.ndarray img: 이미지 객체 :param numpy.ndarray item: 얼굴의 좌표 (x, y, w, h) :returns: {age: 0~100, gender: Male 또는 Female, emotions: ``neutral``, ``happy``, ``sad``, ``surprise``, ``anger``, box:좌표} """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') if len(item) != 4: raise Exception('"item" must be [x1,y1,x2,y2]') x1, y1, x2, y2 = item face_img = img[y1:y2, x1:x2].copy() # Preprocess face for age/gender face_resized_age_gender = cv2.resize(face_img, (62, 62)) # 62x62 크기로 조정 face_input_age_gender = face_resized_age_gender.transpose(2, 0, 1)[np.newaxis, :] face_input_age_gender = face_input_age_gender.astype(np.float32) # Age and gender prediction age_gender_result = self.age_gender_compiled([face_input_age_gender]) age = age_gender_result[self.age_output_name].squeeze() * 100 gender = "Male" if age_gender_result[self.gender_output_name].squeeze()[1] > 0.5 else "Female" # Preprocess face for emotion face_resized_emotion = cv2.resize(face_img, (64, 64)) # 64x64 크기로 조정 face_input_emotion = face_resized_emotion.transpose(2, 0, 1)[np.newaxis, :] face_input_emotion = face_input_emotion.astype(np.float32) # Emotion prediction emotion_result = self.emotion_compiled([face_input_emotion])[self.emotion_output_name] emotion = self.emotions[np.argmax(emotion_result)] return {"age":int(age), "gender":gender, "emotion":emotion, "box": (x1, y1, x2, y2)}
[문서] def analyze_face_vis(self, img, item): """ 얼굴의 나이, 성별, 감정을 추정합니다. :param numpy.ndarray img: 이미지 객체 :param numpy.ndarray item: 얼굴 분석 결과 """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') x1, y1, x2, y2 = item['box'] age, gender, emotion = item['age'], item['gender'], item['emotion'] putTextPIL(img, f'{age}/{gender}/{emotion}', (x1, y1-30), 30, (255, 255, 255))
[문서] def init_db(self): """ 얼굴 데이터베이스를 초기화합니다. 초기화된 데이터베이스는 빈 이중 list ``[[], []]`` 입니다. example:: face.init_db() """ self.facedb = [[], []]
[문서] def train_face(self, img, item, name): """ 얼굴을 학습하여 얼굴 데이터베이스에 저장합니다. example:: img = camera.read() items = face.detect_face(img) item = items[0] # item는 items중 하나 face.train_face(img, item, 'honggildong') :param numpy.ndarray img: 이미지 객체 :param numpy.ndarray item: 디텍팅한 얼굴의 사각형 좌측상단, 우측하단 포인트 (x1, y1, x2, y2) :param str name: 디텍팅한 얼굴에 붙일 이름 """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') if len(item) != 4: raise Exception('"item" must be [x,y,w,h]') x1, y1, x2, y2 = item face_img = img[y1:y2, x1:x2].copy() rect = dlib.rectangle(int(x1), int(y1), int(x2), int(y2)) gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) shape = self.predictor(gray, rect) face_encoding = np.array(self.face_encoder.compute_face_descriptor(img, shape, 1)) self.facedb[0].append(name) self.facedb[1].append(face_encoding)
#cv2.imwrite(self.data_path+"/{}.jpg".format(name), img[y+3:y+h-3, x+3:x+w-3]);
[문서] def delete_face(self, name): """ 등록된 얼굴을 삭제합니다. example:: face.delete_face('honggildong') :param str name: 삭제할 얼굴의 이름 :returns: ``True`` / ``False`` """ ret = name in self.facedb[0] if ret == True: idx = self.facedb[0].index(name) #os.remove(self.data_path +"/" + name + ".jpg") for item in self.facedb: del item[idx] return ret
[문서] def recognize(self, img, item): """ 등록된 얼굴을 인식합니다. example:: img = camera.read() items = face.detect_face(img) item = items[0] # item는 items중 하나 face.recognize(img, item) :param numpy.ndarray img: 이미지 객체 :param numpy.ndarray item: 얼굴의 좌표 (x, y, w, h) :returns: ``{"name": 이름, "score": 오차도}`` 얼굴이 비슷할수록 오차도가 낮게 측정됩니다. 오차도가 0.4 이하일 때 동일인으로 판정합니다. """ if not type(img) is np.ndarray: raise Exception('"img" must be image data from opencv') if len(item) != 4: raise Exception('"item" must be [x,y,w,h]') if len(self.facedb[0]) < 1: return {"name":"Guest", "score":0} gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) data={"name":"Guest", "score":0, "max":""} x1, y1, x2, y2 = item rect = dlib.rectangle(int(x1), int(y1), int(x2), int(y2)) shape = self.predictor(gray, rect) face_encoding = np.array(self.face_encoder.compute_face_descriptor(img, shape, 1)) matches = [] matches = list(np.linalg.norm(self.facedb[1] - face_encoding, axis=1)) data["score"] = round(min(matches), 2) if min(matches) < self.threshold: data["name"] = self.facedb[0][matches.index(min(matches))] data["max"] = self.facedb[0][matches.index(min(matches))] return data
[문서] def get_db(self): """ 사용 중인 얼굴 데이터베이스를 확인합니다. example:: face.get_db() :returns: **facedb** (``list(list, list)`` 타입) example:: [ ['honggildong'], [array([-0.06423206, 0.12474005, 0.0511112 , -0.05676335, -0.07211345, -0.03123881, -0.04119622, -0.12800875, 0.11717855, -0.11079554, 0.22952782, -0.02007426, -0.17457265, -0.13562854, -0.04972655, 0.15810637, -0.12785575, -0.16479518, -0.07002968, -0.00208595, 0.169218 , 0.03144928, -0.01074579, 0.04103286, -0.09245337, ... -0.00706697, 0.06025593, -0.0049719 ])] ] """ return self.facedb
[문서] def save_db(self, filename): """ 얼굴 데이터베이스를 파일로 저장합니다. example:: face.save_db('/home/pi/facedb') :param str filename: 저장할 얼굴 데이터베이스 파일의 경로입니다. """ with open(filename, "w+b") as f: pickle.dump(self.facedb, f)
[문서] def load_db(self, filename): """ 얼굴 데이터베이스 파일을 불러옵니다. example:: face.load_db('/home/pi/facedb') :param str filename: 불러 올 ``facedb`` 파일의 경로입니다. """ if not os.path.isfile(filename): raise Exception('"{filename}" does not exist') with open(filename, "rb") as f : self.facedb = pickle.load(f)
# face mesh
[문서] def calculate_head_orientation(self, keypoints): """ (내부 함수) 얼굴 랜드마크 데이터를 통해 얼굴 방향 계산 """ # Keypoints for calculations nose_tip = keypoints[1] left_nose = keypoints[279] right_nose = keypoints[49] # Calculate midpoint midpoint = { "x": (left_nose["x"] + right_nose["x"]) / 2, "y": (left_nose["y"] + right_nose["y"]) / 2, "z": (left_nose["z"] + right_nose["z"]) / 2, } # Perpendicular point above midpoint perpendicular_up = { "x": midpoint["x"], "y": midpoint["y"]-50, # Offset "z": midpoint["z"], } # Calculate yaw and turn yaw = self.get_angle_between_lines(midpoint, nose_tip, perpendicular_up) turn = self.get_angle_between_lines(midpoint, right_nose, nose_tip) # Debug yaw and turn # print(f"[DEBUG] Yaw: {yaw:.2f}, Turn: {turn:.2f}") # Determine direction based on angles direction = "" if yaw > 105: # Adjusted threshold direction += "B" # Bottom elif yaw < 75: # Adjusted threshold direction += "T" # Top else: direction += "C" # Center (vertical) if turn > 93: # Adjusted threshold direction += "R" # Right elif turn < 87: # Adjusted threshold direction += "L" # Left else: direction += "C" # Center (horizontal) return direction
[문서] def get_angle_between_lines(self, start, point1, point2): """Calculate angle between two lines defined by three points.""" # Vector 1: start -> point1 vector1 = (point1["x"] - start["x"], point1["y"] - start["y"], point1["z"] - start["z"]) # Vector 2: start -> point2 vector2 = (point2["x"] - start["x"], point2["y"] - start["y"], point2["z"] - start["z"]) # Dot product and magnitude dot_product = sum(v1 * v2 for v1, v2 in zip(vector1, vector2)) magnitude1 = math.sqrt(sum(v**2 for v in vector1)) magnitude2 = math.sqrt(sum(v**2 for v in vector2)) # Calculate angle in degrees angle = math.degrees(math.acos(dot_product / (magnitude1 * magnitude2 + 1e-8))) return angle
[문서] def detect_mesh_vis(self, image, items): """Draw connections between landmarks based on Mediapipe's face mesh.""" for item in items: face_landmarks = item['landmark'] distance = item['distance'] direction = item['direction'] if len(face_landmarks) > 0: connections = mp.solutions.face_mesh.FACEMESH_TESSELATION image_height, image_width, _ = image.shape for start, end in connections: x1, y1 = int(face_landmarks[start].x * image_width), int(face_landmarks[start].y * image_height) x2, y2 = int(face_landmarks[end].x * image_width), int(face_landmarks[end].y * image_height) cv2.line(image, (x1, y1), (x2, y2), (255, 255, 255), 1) x, y = int(face_landmarks[103].x * image_width), int(face_landmarks[103].y * image_height) putTextPIL(image, f'{distance}cm/{direction}' , (x, y-40), 30, (255,255,255))
[문서] def detect_mesh(self, image): """Detect mesh and return distance, direction, and image with landmarks.""" """ 얼굴의 랜드마크를 인식하고, 거리, 방향을 추정합니다 :param numpy.ndarray img: 이미지 객체 :returns: {얼굴의 거리, 방향, 랜드마크}의 리스트 (얼굴 2개까지 인식) """ # Convert the image from BGR to RGB rgb_image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) mp_image = mp.Image(image_format=mp.ImageFormat.SRGB, data=rgb_image) mesh_data = [] # Perform inference mesh_result = self.mesh_detector.detect(mp_image) if mesh_result and mesh_result.face_landmarks: h, w, _ = image.shape for _, face_landmarks in enumerate(mesh_result.face_landmarks): # Convert landmarks to a dictionary-like structure keypoints = [{"x": lm.x * w, "y": lm.y * h, "z": lm.z * w} for lm in face_landmarks] # Calculate head orientation direction = self.calculate_head_orientation(keypoints) # Calculate iris-based distance left_iris_idx = [474, 475, 476, 477] x_vals, y_vals = [], [] for idx in left_iris_idx: x_vals.append(face_landmarks[idx].x * w) y_vals.append(face_landmarks[idx].y * h) iris_diameter_px = max(max(x_vals) - min(x_vals), max(y_vals) - min(y_vals)) if iris_diameter_px > 0: distance_mm = (self.FOCAL_LENGTH_MM * self.IRIS_REAL_DIAMETER_MM) / (iris_diameter_px * self.PIXEL_PITCH_MM) distance_cm = int(distance_mm / 10.0) mesh_data.append({"distance":distance_cm, "direction":direction, "landmark": face_landmarks}) return mesh_data