Newer
Older
import cv2
import mediapipe as mp
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from tkinter import Tk, Label
from PIL import Image, ImageTk
from keyboard_class import Keyboard
from specialkeys_class import Specialkeys
from hand_detection import normalise_landmarks, landmarks_from_results
from tools import load_model, set_camera_window
# hide mediapype warning :UserWarning: SymbolDatabase.GetPrototype() is deprecated. Please use message_factory.GetMessageClass() instead. SymbolDatabase.GetPrototype() will be removed soon.
import warnings
warnings.filterwarnings("ignore", category=UserWarning)
MOUSE_ACTIVE = True
FREEZE_CHANGE_MODEL = False
global MOUSE_ACTIVE
global FREEZE_CHANGE_MODEL
keyboard = Keyboard()
specialkeys = Specialkeys()
model_mouse = load_model(device = "mouse")
model_keyboard = load_model(device = "keyboard")
model_specialkeys = load_model(device = "specialkeys")
# create hand detection object
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
# open video
cap = cv2.VideoCapture(0)
# if cannot open video give warning
if not cap.isOpened():
print("Warning: Cannot reach camera")
return
# set up Tkinter window
root, video_label = set_camera_window()
with mp_hands.Hands(max_num_hands=2, model_complexity=1,
min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
def update_frame():
global MOUSE_ACTIVE
global FREEZE_CHANGE_MODEL
print("Warning: Cannot read camera input")
root.destroy()
return
# flip frame and process it
frame = cv2.flip(frame, 1)
frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
# Hand detection
right_landmark_list = []
left_landmark_list = []
command = None
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# two hands are detected, so we split left and right
left_hand_landmarks, right_hand_landmarks = landmarks_from_results(results)
# if right hand detected, process
if right_hand_landmarks is not None:
# Draw landmarks on frame
mp_drawing.draw_landmarks(
frameRGB, right_hand_landmarks, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2)
)
# get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
for lm in right_hand_landmarks.landmark:
right_landmark_list.append((lm.x, lm.y))
# normalise landmarks for more powerful training
normalised_right_landmark_list = normalise_landmarks(right_landmark_list)
# apply model
if MOUSE_ACTIVE:
pred = model_mouse.predict(np.asarray(normalised_right_landmark_list).reshape(1, -1))
command = pred[0]
mouse.add_prediction(command)
if command == "move cursor" or command == "grab":
mouse.get_hand_size(right_landmark_list[12], right_landmark_list[0])
mouse.get_hand_pos(right_landmark_list[9])
elif command == "change the model":
if not FREEZE_CHANGE_MODEL:
MOUSE_ACTIVE = False
FREEZE_CHANGE_MODEL = True
else:
FREEZE_CHANGE_MODEL = False
else:
pred = model_keyboard.predict(np.asarray(normalised_right_landmark_list).reshape(1, -1))
command = pred[0]
keyboard.add_prediction(command)
if command == "change the model":
if not FREEZE_CHANGE_MODEL:
MOUSE_ACTIVE = True
FREEZE_CHANGE_MODEL = True
else:
FREEZE_CHANGE_MODEL = False
cv2.putText(
img=frameRGB,
text=f"{pred[0]} pos {right_landmark_list[8][0]:.2f}, {right_landmark_list[8][1]:.2f}, {MOUSE_ACTIVE}",
org=(30, 30), fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(255, 0, 0), thickness=1
)
if left_hand_landmarks is not None:
# Draw landmarks on frame
mp_drawing.draw_landmarks(
frameRGB, left_hand_landmarks, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(0, 120, 120), thickness=2, circle_radius=2)
)
# get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
for lm in left_hand_landmarks.landmark:
left_landmark_list.append((lm.x, lm.y))
# normalise landmarks for more powerful training
normalised_left_landmark_list = normalise_landmarks(left_landmark_list)
# apply model
pred = model_specialkeys.predict(np.asarray(normalised_left_landmark_list).reshape(1, -1))
command = pred[0]
cv2.putText(
img=frameRGB, text=pred[0], org=(30, 30),
fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(0, 255, 0), thickness=1
)
specialkeys.add_prediction(command)
# Convert frame to Tkinter-compatible format and display
frameRGB_resized = cv2.resize(frameRGB, (root.winfo_width(), root.winfo_height()))
img = ImageTk.PhotoImage(Image.fromarray(frameRGB_resized))
video_label.config(image=img)
video_label.image = img
# Refresh frame
root.after(10, update_frame)
# Start updating frames
update_frame()
# Quit the program properly
root.protocol("WM_DELETE_WINDOW", lambda: (cap.release(), root.destroy()))
root.mainloop()
cap.release()
print("Program closed")
if __name__ == '__main__':