diff --git a/hand detection discovery.py b/hand detection discovery.py new file mode 100644 index 0000000000000000000000000000000000000000..48ee6195a4e344f07f0b839cbfd063e7458ee5e5 --- /dev/null +++ b/hand detection discovery.py @@ -0,0 +1,107 @@ +import cv2 +import random +import mediapipe as mp + +def write_landmark_positions_on_frame(frame, landmark_list): + if len(landmark_list) == 0: + return frame + + height, width = frame.shape[:2] + color = (250, 0, 0) + for lm in landmark_list: + # landmark position in pixels + pos_px = ((int(width * lm[0])), int(height * lm[1])) + text = f'{pos_px[0]} : {pos_px[1]}' + org = (pos_px[0]-20, pos_px[1]-10) + cv2.putText(img = frame, text = text, org = org, + fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = .5, color = color, thickness = 1) + + return frame + +def transform_to_relative(landmark_list): + if len(landmark_list) == 0: + return landmark_list + + origon = landmark_list[0] + relative_landmark_list = [] + for lm in landmark_list: + rel_lm = (lm[0] - origon[0], lm[1] - origon[1]) + print(rel_lm) + relative_landmark_list.append(rel_lm) + + +## main: open video and do hand detection +def main(): + # create hand detection object + mp_hands = mp.solutions.hands + mp_drawing = mp.solutions.drawing_utils + + # open video + cap = cv2.VideoCapture(0) + + # if cannot open video give warning + if not cap.isOpened(): + print("Warning: cannot reach camera") + else: + print("Program is running, push 'q' to quit.") + + # mediapipe hand object + with mp_hands.Hands( max_num_hands=1, model_complexity=1, + min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands: + + # read frames from webcamera + while cap.isOpened(): + ret, frame = cap.read() + + if not ret: + print("Warning: cannot read camera input") + break + + # flip frame to appear as a mirror + frame = cv2.flip(frame, 1) + frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + ## hand detection + results = hands.process(frameRGB) + + landmark_list = [] + if results.multi_hand_landmarks: + # multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with + # for num, hand in enumerate(results.multi_hand_landmarks): + + # one hand is detected, because max_num_hands=1 + hand_landmarks = results.multi_hand_landmarks[0] + + # draw landmarks on frame + mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, + mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4), + mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2), + ) + + # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py + for lm in hand_landmarks.landmark: + landmark_list.append((lm.x, lm.y)) + + # relate landmarks to the first (wrist) position + relative_landmark_list = transform_to_relative(landmark_list) + + # write positions on frame + frameRGB = write_landmark_positions_on_frame(frameRGB, landmark_list) + + # transform back RGB and show frame with annotation + frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR) + cv2.imshow('Hand tracking', frame_annotated) + + # or show original frame without annotation + # cv2.imshow('Hand tracking', frame) + + if cv2.waitKey(1) & 0xFF == ord('q'): + print("Quit camera") + break + + cap.release() + cv2.destroyAllWindows() + print("Program closed") + +if __name__ == '__main__': + main() \ No newline at end of file