From 14a8891f077869243e3706f0035fa11cc3a5caa4 Mon Sep 17 00:00:00 2001 From: Moni <vajay.monika@hallgato.ppke.hu> Date: Sat, 9 Nov 2024 17:30:57 +0100 Subject: [PATCH] mouse control under developement --- control_mouse.py | 209 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 control_mouse.py diff --git a/control_mouse.py b/control_mouse.py new file mode 100644 index 0000000..da0ea73 --- /dev/null +++ b/control_mouse.py @@ -0,0 +1,209 @@ +import cv2 +import random +import mediapipe as mp +import pickle +import numpy as np +from sklearn.ensemble import RandomForestClassifier +import pyautogui +import time +from collections import Counter + +class Mouse: + def __init__(self) -> None: + + self.predictions = [] + self.previous_action = None + self.freeze_action = False + + self.action_length = 11 + self.move_distance = 10 + self.scroll_distance = 10 + self.time_checking = 0.05 + + def get_hand_pos(self, hand_pos_x, hand_pos_y): + self.hand_pos_x = hand_pos_x + self.hand_pos_y = hand_pos_y + + def add_prediction(self, prediction): + self.predictions.append(prediction) + if len(self.predictions) == self.action_length: + self.make_action() + + def make_action(self): + action = self.get_major_element(self.predictions) + if self.freeze_action and action == self.previous_action: + self.update_init() + else: + self.mouse_control(action) + self.update_init() + + def update_init(self, action): + self.predictions = [] + self.previous_action = action + + self.freeze_action = action in {"left click", "right click", "double click"} # maybe change to keyboard and drops + + def mouse_hand_parameters(self): + pass + + def mouse_control(self, prediction): + if prediction == "stop execution": + pass # Stop movement + elif prediction == "move cursor": + current_x, current_y = pyautogui.position() + delta_x = (self.hand_pos_x - current_x) / self.move_distance + delta_y = (self.hand_pos_y - current_y) / self.move_distance + + for i in range(self.move_distance): + pyautogui.moveTo(current_x + delta_x * (i + 1), current_y + delta_y * (i + 1)) + time.sleep(0.01) # Short delay for smooth movement + # if current ation is different, change? Or update mouse as well? + + elif prediction == "stop moving": + pyautogui.move(0, 0) # Stop cursor + elif prediction == "left click": + pyautogui.click() # Left click + elif prediction == "right click": + pyautogui.click(button='right') # Right click + elif prediction == "double click": + pass # Double click + elif prediction == "scrolling up": + pyautogui.scroll(self.scroll_distance) # Scroll upp + elif prediction == "scrolling down": + pyautogui.scroll(-self.scroll_distance) # Scroll down + elif prediction == "scrolling right": + pass # Scroll right + elif prediction == "scrolling left": + pass # Scroll left + elif prediction == "drag": + pass + elif prediction == "drop": + pass + elif prediction == "multiple item selection grab": + pass + elif prediction == "multiple item selection drop": + pass + elif prediction == "change to keyboard": + pass + + + time.sleep(self.time_checking) # Adjust speed of movement + + def get_major_element(self, string_list): + counts = Counter(string_list) + # Find the element with the maximum count + major_element, _ = counts.most_common(1)[0] + + return major_element + + +def normalise_landmarks(landmark_list): + if len(landmark_list) == 0: + return landmark_list + + x = [lm[0] for lm in landmark_list] + y = [lm[1] for lm in landmark_list] + + min_x = min(x) + max_x = max(x) + min_y = min(y) + max_y = max(y) + + normalised_landmarks = [] + for lm in landmark_list: + x_norm = (lm[0] - min_x) / (max_x - min_x) + y_norm = (lm[1] - min_y) / (max_y - min_y) + lm_norm = (x_norm, y_norm) + + normalised_landmarks.append(lm_norm) + + return normalised_landmarks + +## main: open video and do hand detection +def main(): + # load model + model_dict = pickle.load(open('./numbers_model.p', 'rb')) + model = model_dict['model'] + + # create hand detection object + mp_hands = mp.solutions.hands + mp_drawing = mp.solutions.drawing_utils + + # open video + cap = cv2.VideoCapture(0) + + # if cannot open video give warning + if not cap.isOpened(): + print("Warning: cannot reach camera") + else: + print("Program is running, push 'q' to quit.") + + # mediapipe hand object + with mp_hands.Hands( max_num_hands=1, model_complexity=1, + min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands: + + # read frames from webcamera + while cap.isOpened(): + ret, frame = cap.read() + + if not ret: + print("Warning: cannot read camera input") + break + + # flip frame to appear as a mirror + frame = cv2.flip(frame, 1) + frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + ## hand detection + results = hands.process(frameRGB) + + landmark_list = [] + if results.multi_hand_landmarks: + # multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with + # for num, hand in enumerate(results.multi_hand_landmarks): + + # one hand is detected, because max_num_hands=1 + hand_landmarks = results.multi_hand_landmarks[0] + + # draw landmarks on frame + mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, + mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4), + mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2), + ) + + # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py + for lm in hand_landmarks.landmark: + landmark_list.append((lm.x, lm.y)) + + # normalise landmarks for mor powerful training + normalised_landmark_list = normalise_landmarks(landmark_list) + + # apply model + pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1)) + print(pred[0]) + cv2.putText(img = frameRGB, text = pred[0], org = (30,30), + fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1, color = (255, 0, 0), thickness = 1) + + # transform back RGB and show frame with annotation + frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR) + cv2.imshow('Hand tracking', frame_annotated) + + # or show original frame without annotation + # cv2.imshow('Hand tracking', frame) + + # Check for key presses + key = cv2.waitKey(1) & 0xFF + + if key == ord('n'): + label = "" + elif key == ord('q'): + print("Quit camera") + break + + cap.release() + cv2.destroyAllWindows() + + print("Program closed") + +if __name__ == '__main__': + main() \ No newline at end of file -- GitLab