Newer
Older
import cv2
import random
import mediapipe as mp
import pickle
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import pyautogui
import time
from collections import Counter
from screeninfo import get_monitors
MONITOR = get_monitors()[0]
WIDTH, HEIGHT = MONITOR.width, MONITOR.height
class Mouse:
def __init__(self) -> None:
self.predictions = []
self.previous_action = None
self.freeze_action = False
# parameters to fine-tune
self.action_length = 5
self.move_distance = 10
self.scroll_distance = 10
def get_hand_pos(self, hand_pos):
self.hand_pos_x = hand_pos[0]
self.hand_pos_y = hand_pos[1]
def add_prediction(self, prediction):
self.predictions.append(prediction)
if len(self.predictions) == self.action_length:
self.make_action()
def make_action(self):
action = self.get_major_element(self.predictions)
if self.freeze_action and action == self.previous_action:
def update_init(self, action):
self.predictions = []
self.previous_action = action
self.freeze_action = action in {"left click", "right click", "double click"} # maybe change to keyboard and drops
def mouse_hand_parameters(self):
pass
def mouse_control(self, prediction):
if prediction == "stop execution" or None:
pass # Stop movement
elif prediction == "move cursor":
#hand_point = ([int(self.hand_pos_x*WIDTH), int(self.hand_pos_y*HEIGHT)])
hand_x = np.clip(int(self.hand_pos_x*WIDTH), 0, WIDTH-1)
hand_y = np.clip(int(self.hand_pos_y*HEIGHT), 0, HEIGHT-1)
pyautogui.moveTo(hand_x, hand_y)
elif prediction == "stop moving":
pyautogui.move(0, 0) # Stop cursor
elif prediction == "left click":
pyautogui.click() # Left click
elif prediction == "right click":
pyautogui.click(button='right') # Right click
elif prediction == "double click":
pyautogui.click(clicks=2) # Double click
pyautogui.scroll(self.scroll_distance) # Scroll up
elif prediction == "scrolling down":
pyautogui.scroll(-self.scroll_distance) # Scroll down
elif prediction == "scrolling right":
pyautogui.hscroll(self.scroll_distance) # Scroll right
# THIS FUNCTION NOT WORKS ON WINDOWS
pyautogui.hscroll(self.scroll_distance) # Scroll left
# THIS FUNCTION NOT WORKS ON WINDOWS
hand_x = np.clip(int(self.hand_pos_x*WIDTH), 0, WIDTH-1)
hand_y = np.clip(int(self.hand_pos_y*HEIGHT), 0, HEIGHT-1)
pyautogui.moveTo(hand_x, hand_y)
elif prediction == "multiple item selection grab":
elif prediction == "multiple item selection drop":
elif prediction == "change to keyboard":
pass
#time.sleep(self.time_checking) # Adjust speed of movement
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
def get_major_element(self, string_list):
counts = Counter(string_list)
# Find the element with the maximum count
major_element, _ = counts.most_common(1)[0]
return major_element
def normalise_landmarks(landmark_list):
if len(landmark_list) == 0:
return landmark_list
x = [lm[0] for lm in landmark_list]
y = [lm[1] for lm in landmark_list]
min_x = min(x)
max_x = max(x)
min_y = min(y)
max_y = max(y)
normalised_landmarks = []
for lm in landmark_list:
x_norm = (lm[0] - min_x) / (max_x - min_x)
y_norm = (lm[1] - min_y) / (max_y - min_y)
lm_norm = (x_norm, y_norm)
normalised_landmarks.append(lm_norm)
return normalised_landmarks
## main: open video and do hand detection
def main():
#define Mouse
mouse = Mouse()
model_dict = pickle.load(open('./trained_Moni_data.p', 'rb'))
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
model = model_dict['model']
# create hand detection object
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
# open video
cap = cv2.VideoCapture(0)
# if cannot open video give warning
if not cap.isOpened():
print("Warning: cannot reach camera")
else:
print("Program is running, push 'q' to quit.")
# mediapipe hand object
with mp_hands.Hands( max_num_hands=1, model_complexity=1,
min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
# read frames from webcamera
while cap.isOpened():
ret, frame = cap.read()
if not ret:
print("Warning: cannot read camera input")
break
# flip frame to appear as a mirror
frame = cv2.flip(frame, 1)
frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
## hand detection
results = hands.process(frameRGB)
landmark_list = []
if results.multi_hand_landmarks:
# multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with
# for num, hand in enumerate(results.multi_hand_landmarks):
# one hand is detected, because max_num_hands=1
hand_landmarks = results.multi_hand_landmarks[0]
# draw landmarks on frame
mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2),
)
# get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
for lm in hand_landmarks.landmark:
landmark_list.append((lm.x, lm.y))
# normalise landmarks for mor powerful training
normalised_landmark_list = normalise_landmarks(landmark_list)
# apply model
pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1))
cv2.putText(img = frameRGB, text = pred[0], org = (30,30),
fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1, color = (255, 0, 0), thickness = 1)
mouse.add_prediction(mouse_command)
if mouse_command == "move cursor" or "grab":
mouse.get_hand_pos(landmark_list[8])
# transform back RGB and show frame with annotation
frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR)
cv2.imshow('Hand tracking', frame_annotated)
# or show original frame without annotation
# cv2.imshow('Hand tracking', frame)
# Check for key presses
key = cv2.waitKey(1) & 0xFF
if key == ord('n'):
label = ""
elif key == ord('q'):
print("Quit camera")
break
cap.release()
cv2.destroyAllWindows()
print("Program closed")
if __name__ == '__main__':
main()