mouse control under developement

14a8891f · Vajay Mónika · 38c052ca · 14a8891f
Commit 14a8891f authored 7 months ago by Vajay Mónika
--- a/control_mouse.py
+++ b/control_mouse.py
+import cv2
+import random
+import mediapipe as mp
+import pickle
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+import pyautogui
+import time
+from collections import Counter
+class Mouse:
+    def __init__(self) -> None:
+        self.predictions = []
+        self.previous_action = None
+        self.freeze_action = False
+        self.action_length = 11
+        self.move_distance = 10
+        self.scroll_distance = 10
+        self.time_checking = 0.05
+    def get_hand_pos(self, hand_pos_x, hand_pos_y):
+        self.hand_pos_x = hand_pos_x
+        self.hand_pos_y = hand_pos_y
+    def add_prediction(self, prediction):
+        self.predictions.append(prediction)
+        if len(self.predictions) == self.action_length:
+            self.make_action()
+    def make_action(self):
+        action = self.get_major_element(self.predictions)
+        if self.freeze_action and action == self.previous_action:
+            self.update_init()
+        else:
+            self.mouse_control(action)
+            self.update_init()
+    def update_init(self, action):
+        self.predictions = []
+        self.previous_action = action
+        self.freeze_action = action in {"left click", "right click", "double click"} # maybe change to keyboard and drops
+    def mouse_hand_parameters(self):
+        pass
+    def mouse_control(self, prediction):
+        if prediction == "stop execution":
+            pass  # Stop movement
+        elif prediction == "move cursor":
+            current_x, current_y = pyautogui.position()
+            delta_x = (self.hand_pos_x - current_x) / self.move_distance
+            delta_y = (self.hand_pos_y - current_y) / self.move_distance
+            for i in range(self.move_distance):
+                pyautogui.moveTo(current_x + delta_x * (i + 1), current_y + delta_y * (i + 1))
+                time.sleep(0.01)  # Short delay for smooth movement
+                # if current ation is different, change? Or update mouse as well?
+        elif prediction == "stop moving":
+            pyautogui.move(0, 0)  # Stop cursor
+        elif prediction == "left click":
+            pyautogui.click()  # Left click 
+        elif prediction == "right click":
+            pyautogui.click(button='right')  # Right click
+        elif prediction == "double click":
+            pass    # Double click
+        elif prediction == "scrolling up":
+            pyautogui.scroll(self.scroll_distance)  # Scroll upp
+        elif prediction == "scrolling down":
+            pyautogui.scroll(-self.scroll_distance)  # Scroll down
+        elif prediction == "scrolling right":
+            pass    # Scroll right
+        elif prediction == "scrolling left":
+            pass    # Scroll left
+        elif prediction == "drag":
+            pass
+        elif prediction == "drop":
+            pass
+        elif prediction == "multiple item selection grab":
+            pass
+        elif prediction == "multiple item selection drop":
+            pass
+        elif prediction == "change to keyboard":
+            pass
+        time.sleep(self.time_checking)  # Adjust speed of movement
+    def get_major_element(self, string_list):
+        counts = Counter(string_list)
+        # Find the element with the maximum count
+        major_element, _ = counts.most_common(1)[0]
+        return major_element
+def normalise_landmarks(landmark_list):
+    if len(landmark_list) == 0:
+        return landmark_list
+    x = [lm[0] for lm in landmark_list]
+    y = [lm[1] for lm in landmark_list]
+    min_x = min(x)
+    max_x = max(x)
+    min_y = min(y)
+    max_y = max(y)
+    normalised_landmarks = []
+    for lm in landmark_list:
+        x_norm = (lm[0] - min_x) / (max_x - min_x)
+        y_norm = (lm[1] - min_y) / (max_y - min_y)
+        lm_norm = (x_norm, y_norm)
+        normalised_landmarks.append(lm_norm)
+    return normalised_landmarks    
+## main: open video and do hand detection
+def main():
+    # load model
+    model_dict = pickle.load(open('./numbers_model.p', 'rb'))
+    model = model_dict['model']
+    # create hand detection object
+    mp_hands = mp.solutions.hands
+    mp_drawing = mp.solutions.drawing_utils
+    # open video
+    cap = cv2.VideoCapture(0)
+    # if cannot open video give warning
+    if not cap.isOpened():
+        print("Warning: cannot reach camera")
+    else:
+        print("Program is running, push 'q' to quit.")
+    # mediapipe hand object
+    with mp_hands.Hands( max_num_hands=1, model_complexity=1,
+                        min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
+        # read frames from webcamera
+        while cap.isOpened():        
+            ret, frame = cap.read()
+            if not ret:
+                print("Warning: cannot read camera input")
+                break
+            # flip frame to appear as a mirror
+            frame = cv2.flip(frame, 1)
+            frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            ## hand detection
+            results = hands.process(frameRGB)
+            landmark_list = []
+            if results.multi_hand_landmarks:
+                # multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with
+                # for num, hand in enumerate(results.multi_hand_landmarks): 
+                # one hand is detected, because max_num_hands=1
+                hand_landmarks = results.multi_hand_landmarks[0]  
+                # draw landmarks on frame
+                mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, 
+                                            mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
+                                            mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2),
+                                            )
+                # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
+                for lm in hand_landmarks.landmark:
+                    landmark_list.append((lm.x, lm.y))
+                # normalise landmarks for mor powerful training
+                normalised_landmark_list = normalise_landmarks(landmark_list)
+                # apply model
+                pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1))
+                print(pred[0])
+                cv2.putText(img = frameRGB, text = pred[0], org = (30,30), 
+                    fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1, color = (255, 0, 0), thickness = 1)
+            # transform back RGB and show frame with annotation
+            frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR)
+            cv2.imshow('Hand tracking', frame_annotated)
+            # or show original frame without annotation
+            # cv2.imshow('Hand tracking', frame)
+            # Check for key presses
+            key = cv2.waitKey(1) & 0xFF
+            if key == ord('n'):
+                label = ""
+            elif key == ord('q'):
+                print("Quit camera")
+                break
+    cap.release()
+    cv2.destroyAllWindows()
+    print("Program closed")
+if __name__ == '__main__':
+    main()
\ No newline at end of file