Skip to content
Snippets Groups Projects
Commit 14a8891f authored by Vajay Mónika's avatar Vajay Mónika
Browse files

mouse control under developement

parent 38c052ca
Branches
No related tags found
No related merge requests found
import cv2
import random
import mediapipe as mp
import pickle
import numpy as np
from sklearn.ensemble import RandomForestClassifier
import pyautogui
import time
from collections import Counter
class Mouse:
def __init__(self) -> None:
self.predictions = []
self.previous_action = None
self.freeze_action = False
self.action_length = 11
self.move_distance = 10
self.scroll_distance = 10
self.time_checking = 0.05
def get_hand_pos(self, hand_pos_x, hand_pos_y):
self.hand_pos_x = hand_pos_x
self.hand_pos_y = hand_pos_y
def add_prediction(self, prediction):
self.predictions.append(prediction)
if len(self.predictions) == self.action_length:
self.make_action()
def make_action(self):
action = self.get_major_element(self.predictions)
if self.freeze_action and action == self.previous_action:
self.update_init()
else:
self.mouse_control(action)
self.update_init()
def update_init(self, action):
self.predictions = []
self.previous_action = action
self.freeze_action = action in {"left click", "right click", "double click"} # maybe change to keyboard and drops
def mouse_hand_parameters(self):
pass
def mouse_control(self, prediction):
if prediction == "stop execution":
pass # Stop movement
elif prediction == "move cursor":
current_x, current_y = pyautogui.position()
delta_x = (self.hand_pos_x - current_x) / self.move_distance
delta_y = (self.hand_pos_y - current_y) / self.move_distance
for i in range(self.move_distance):
pyautogui.moveTo(current_x + delta_x * (i + 1), current_y + delta_y * (i + 1))
time.sleep(0.01) # Short delay for smooth movement
# if current ation is different, change? Or update mouse as well?
elif prediction == "stop moving":
pyautogui.move(0, 0) # Stop cursor
elif prediction == "left click":
pyautogui.click() # Left click
elif prediction == "right click":
pyautogui.click(button='right') # Right click
elif prediction == "double click":
pass # Double click
elif prediction == "scrolling up":
pyautogui.scroll(self.scroll_distance) # Scroll upp
elif prediction == "scrolling down":
pyautogui.scroll(-self.scroll_distance) # Scroll down
elif prediction == "scrolling right":
pass # Scroll right
elif prediction == "scrolling left":
pass # Scroll left
elif prediction == "drag":
pass
elif prediction == "drop":
pass
elif prediction == "multiple item selection grab":
pass
elif prediction == "multiple item selection drop":
pass
elif prediction == "change to keyboard":
pass
time.sleep(self.time_checking) # Adjust speed of movement
def get_major_element(self, string_list):
counts = Counter(string_list)
# Find the element with the maximum count
major_element, _ = counts.most_common(1)[0]
return major_element
def normalise_landmarks(landmark_list):
if len(landmark_list) == 0:
return landmark_list
x = [lm[0] for lm in landmark_list]
y = [lm[1] for lm in landmark_list]
min_x = min(x)
max_x = max(x)
min_y = min(y)
max_y = max(y)
normalised_landmarks = []
for lm in landmark_list:
x_norm = (lm[0] - min_x) / (max_x - min_x)
y_norm = (lm[1] - min_y) / (max_y - min_y)
lm_norm = (x_norm, y_norm)
normalised_landmarks.append(lm_norm)
return normalised_landmarks
## main: open video and do hand detection
def main():
# load model
model_dict = pickle.load(open('./numbers_model.p', 'rb'))
model = model_dict['model']
# create hand detection object
mp_hands = mp.solutions.hands
mp_drawing = mp.solutions.drawing_utils
# open video
cap = cv2.VideoCapture(0)
# if cannot open video give warning
if not cap.isOpened():
print("Warning: cannot reach camera")
else:
print("Program is running, push 'q' to quit.")
# mediapipe hand object
with mp_hands.Hands( max_num_hands=1, model_complexity=1,
min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
# read frames from webcamera
while cap.isOpened():
ret, frame = cap.read()
if not ret:
print("Warning: cannot read camera input")
break
# flip frame to appear as a mirror
frame = cv2.flip(frame, 1)
frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
## hand detection
results = hands.process(frameRGB)
landmark_list = []
if results.multi_hand_landmarks:
# multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with
# for num, hand in enumerate(results.multi_hand_landmarks):
# one hand is detected, because max_num_hands=1
hand_landmarks = results.multi_hand_landmarks[0]
# draw landmarks on frame
mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2),
)
# get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
for lm in hand_landmarks.landmark:
landmark_list.append((lm.x, lm.y))
# normalise landmarks for mor powerful training
normalised_landmark_list = normalise_landmarks(landmark_list)
# apply model
pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1))
print(pred[0])
cv2.putText(img = frameRGB, text = pred[0], org = (30,30),
fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1, color = (255, 0, 0), thickness = 1)
# transform back RGB and show frame with annotation
frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR)
cv2.imshow('Hand tracking', frame_annotated)
# or show original frame without annotation
# cv2.imshow('Hand tracking', frame)
# Check for key presses
key = cv2.waitKey(1) & 0xFF
if key == ord('n'):
label = ""
elif key == ord('q'):
print("Quit camera")
break
cap.release()
cv2.destroyAllWindows()
print("Program closed")
if __name__ == '__main__':
main()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment