Skip to content
Snippets Groups Projects
Commit b3016d86 authored by Formanek Balázs István's avatar Formanek Balázs István
Browse files

mediapipe library hand detection exploration

parent 16e83a10
No related branches found
No related tags found
No related merge requests found
import cv2
import random
import mediapipe as mp
def write_landmark_positions_on_frame(frame, landmark_list):
if len(landmark_list) == 0:
return frame
height, width = frame.shape[:2]
color = (250, 0, 0)
for lm in landmark_list:
# landmark position in pixels
pos_px = ((int(width * lm[0])), int(height * lm[1]))
text = f'{pos_px[0]} : {pos_px[1]}'
org = (pos_px[0]-20, pos_px[1]-10)
cv2.putText(img = frame, text = text, org = org,
fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = .5, color = color, thickness = 1)
return frame
def transform_to_relative(landmark_list):
if len(landmark_list) == 0:
return landmark_list
origon = landmark_list[0]
relative_landmark_list = []
for lm in landmark_list:
rel_lm = (lm[0] - origon[0], lm[1] - origon[1])
print(rel_lm)
relative_landmark_list.append(rel_lm)
## main: open video and do hand detection
def main():
# create hand detection object
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
# open video
cap = cv2.VideoCapture(0)
# if cannot open video give warning
if not cap.isOpened():
print("Warning: cannot reach camera")
else:
print("Program is running, push 'q' to quit.")
# mediapipe hand object
with mp_hands.Hands( max_num_hands=1, model_complexity=1,
min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
# read frames from webcamera
while cap.isOpened():
ret, frame = cap.read()
if not ret:
print("Warning: cannot read camera input")
break
# flip frame to appear as a mirror
frame = cv2.flip(frame, 1)
frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
## hand detection
results = hands.process(frameRGB)
landmark_list = []
if results.multi_hand_landmarks:
# multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with
# for num, hand in enumerate(results.multi_hand_landmarks):
# one hand is detected, because max_num_hands=1
hand_landmarks = results.multi_hand_landmarks[0]
# draw landmarks on frame
mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2),
)
# get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
for lm in hand_landmarks.landmark:
landmark_list.append((lm.x, lm.y))
# relate landmarks to the first (wrist) position
relative_landmark_list = transform_to_relative(landmark_list)
# write positions on frame
frameRGB = write_landmark_positions_on_frame(frameRGB, landmark_list)
# transform back RGB and show frame with annotation
frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR)
cv2.imshow('Hand tracking', frame_annotated)
# or show original frame without annotation
# cv2.imshow('Hand tracking', frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
print("Quit camera")
break
cap.release()
cv2.destroyAllWindows()
print("Program closed")
if __name__ == '__main__':
main()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment