Select Git revision
main.py 4.15 KiB
import cv2
import mediapipe as mp
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from tkinter import Tk, Label
from PIL import Image, ImageTk
from mouse_class import Mouse
from hand_detection import normalise_landmarks
from tools import load_model, set_camera_window
def main():
#define Mouse
mouse = Mouse()
# load MOUSE model
model = load_model(device = "mouse")
# create hand detection object
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
# open video
cap = cv2.VideoCapture(0)
# if cannot open video give warning
if not cap.isOpened():
print("Warning: Cannot reach camera")
return
# set up Tkinter window
root, video_label = set_camera_window()
# mediapipe hand object
with mp_hands.Hands(max_num_hands=1, model_complexity=1,
min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
def update_frame():
ret, frame = cap.read()
if not ret:
print("Warning: Cannot read camera input")
root.destroy()
return
# flip frame and process it
frame = cv2.flip(frame, 1)
frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Hand detection
results = hands.process(frameRGB)
landmark_list = []
mouse_command = None
if results.multi_hand_landmarks:
# one hand is detected, because max_num_hands=1
hand_landmarks = results.multi_hand_landmarks[0]
# Draw landmarks on frame
mp_drawing.draw_landmarks(
frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2)
)
# get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
for lm in hand_landmarks.landmark:
landmark_list.append((lm.x, lm.y))
# normalise landmarks for more powerful training
normalised_landmark_list = normalise_landmarks(landmark_list)
# apply model
pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1))
mouse_command = pred[0]
hand_size = landmark_list[0][0] - landmark_list[12][0], landmark_list[0][1] - landmark_list[12][1]
cv2.putText(
img=frameRGB,
text=f"{pred[0]} pos {landmark_list[8][0]:.2f}, {landmark_list[8][1]:.2f}",
org=(30, 30), fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(255, 0, 0), thickness=1
)
cv2.putText(
img=frameRGB,
text=f"hand size: {hand_size[0]:.2f}, {hand_size[1]:.2f}",
org=(30, 60), fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(0, 255, 0), thickness=1
)
mouse.add_prediction(mouse_command)
if mouse_command == "move cursor" or "grab":
mouse.get_hand_size(landmark_list[12], landmark_list[0])
mouse.get_hand_pos(landmark_list[8])
# Convert frame to Tkinter-compatible format and display
frameRGB_resized = cv2.resize(frameRGB, (root.winfo_width(), root.winfo_height()))
img = ImageTk.PhotoImage(Image.fromarray(frameRGB_resized))
video_label.config(image=img)
video_label.image = img
# Refresh frame
root.after(10, update_frame)
# Start updating frames
update_frame()
# Quit the program properly
root.protocol("WM_DELETE_WINDOW", lambda: (cap.release(), root.destroy()))
root.mainloop()
cap.release()
print("Program closed")
if __name__ == '__main__':
main()