From 14a8891f077869243e3706f0035fa11cc3a5caa4 Mon Sep 17 00:00:00 2001
From: Moni <vajay.monika@hallgato.ppke.hu>
Date: Sat, 9 Nov 2024 17:30:57 +0100
Subject: [PATCH] mouse control under developement

---
 control_mouse.py | 209 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 209 insertions(+)
 create mode 100644 control_mouse.py

diff --git a/control_mouse.py b/control_mouse.py
new file mode 100644
index 0000000..da0ea73
--- /dev/null
+++ b/control_mouse.py
@@ -0,0 +1,209 @@
+import cv2
+import random
+import mediapipe as mp
+import pickle
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+import pyautogui
+import time
+from collections import Counter
+
+class Mouse:
+    def __init__(self) -> None:
+        
+        self.predictions = []
+        self.previous_action = None
+        self.freeze_action = False
+
+        self.action_length = 11
+        self.move_distance = 10
+        self.scroll_distance = 10
+        self.time_checking = 0.05
+
+    def get_hand_pos(self, hand_pos_x, hand_pos_y):
+        self.hand_pos_x = hand_pos_x
+        self.hand_pos_y = hand_pos_y
+
+    def add_prediction(self, prediction):
+        self.predictions.append(prediction)
+        if len(self.predictions) == self.action_length:
+            self.make_action()
+
+    def make_action(self):
+        action = self.get_major_element(self.predictions)
+        if self.freeze_action and action == self.previous_action:
+            self.update_init()
+        else:
+            self.mouse_control(action)
+            self.update_init()
+
+    def update_init(self, action):
+        self.predictions = []
+        self.previous_action = action
+
+        self.freeze_action = action in {"left click", "right click", "double click"} # maybe change to keyboard and drops
+
+    def mouse_hand_parameters(self):
+        pass
+
+    def mouse_control(self, prediction):
+        if prediction == "stop execution":
+            pass  # Stop movement
+        elif prediction == "move cursor":
+            current_x, current_y = pyautogui.position()
+            delta_x = (self.hand_pos_x - current_x) / self.move_distance
+            delta_y = (self.hand_pos_y - current_y) / self.move_distance
+            
+            for i in range(self.move_distance):
+                pyautogui.moveTo(current_x + delta_x * (i + 1), current_y + delta_y * (i + 1))
+                time.sleep(0.01)  # Short delay for smooth movement
+                # if current ation is different, change? Or update mouse as well?
+            
+        elif prediction == "stop moving":
+            pyautogui.move(0, 0)  # Stop cursor
+        elif prediction == "left click":
+            pyautogui.click()  # Left click 
+        elif prediction == "right click":
+            pyautogui.click(button='right')  # Right click
+        elif prediction == "double click":
+            pass    # Double click
+        elif prediction == "scrolling up":
+            pyautogui.scroll(self.scroll_distance)  # Scroll upp
+        elif prediction == "scrolling down":
+            pyautogui.scroll(-self.scroll_distance)  # Scroll down
+        elif prediction == "scrolling right":
+            pass    # Scroll right
+        elif prediction == "scrolling left":
+            pass    # Scroll left
+        elif prediction == "drag":
+            pass
+        elif prediction == "drop":
+            pass
+        elif prediction == "multiple item selection grab":
+            pass
+        elif prediction == "multiple item selection drop":
+            pass
+        elif prediction == "change to keyboard":
+            pass
+        
+
+        time.sleep(self.time_checking)  # Adjust speed of movement
+    
+    def get_major_element(self, string_list):
+        counts = Counter(string_list)
+        # Find the element with the maximum count
+        major_element, _ = counts.most_common(1)[0]
+        
+        return major_element
+
+
+def normalise_landmarks(landmark_list):
+    if len(landmark_list) == 0:
+        return landmark_list
+    
+    x = [lm[0] for lm in landmark_list]
+    y = [lm[1] for lm in landmark_list]
+
+    min_x = min(x)
+    max_x = max(x)
+    min_y = min(y)
+    max_y = max(y)
+    
+    normalised_landmarks = []
+    for lm in landmark_list:
+        x_norm = (lm[0] - min_x) / (max_x - min_x)
+        y_norm = (lm[1] - min_y) / (max_y - min_y)
+        lm_norm = (x_norm, y_norm)
+        
+        normalised_landmarks.append(lm_norm)
+    
+    return normalised_landmarks    
+
+## main: open video and do hand detection
+def main():
+    # load model
+    model_dict = pickle.load(open('./numbers_model.p', 'rb'))
+    model = model_dict['model']
+    
+    # create hand detection object
+    mp_hands = mp.solutions.hands
+    mp_drawing = mp.solutions.drawing_utils
+    
+    # open video
+    cap = cv2.VideoCapture(0)
+    
+    # if cannot open video give warning
+    if not cap.isOpened():
+        print("Warning: cannot reach camera")
+    else:
+        print("Program is running, push 'q' to quit.")
+        
+    # mediapipe hand object
+    with mp_hands.Hands( max_num_hands=1, model_complexity=1,
+                        min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
+        
+        # read frames from webcamera
+        while cap.isOpened():        
+            ret, frame = cap.read()
+            
+            if not ret:
+                print("Warning: cannot read camera input")
+                break
+                
+            # flip frame to appear as a mirror
+            frame = cv2.flip(frame, 1)
+            frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            
+            ## hand detection
+            results = hands.process(frameRGB)
+            
+            landmark_list = []
+            if results.multi_hand_landmarks:
+                # multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with
+                # for num, hand in enumerate(results.multi_hand_landmarks): 
+                
+                # one hand is detected, because max_num_hands=1
+                hand_landmarks = results.multi_hand_landmarks[0]  
+
+                # draw landmarks on frame
+                mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, 
+                                            mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
+                                            mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2),
+                                            )
+                
+                # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
+                for lm in hand_landmarks.landmark:
+                    landmark_list.append((lm.x, lm.y))
+            
+                # normalise landmarks for mor powerful training
+                normalised_landmark_list = normalise_landmarks(landmark_list)
+            
+                # apply model
+                pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1))
+                print(pred[0])
+                cv2.putText(img = frameRGB, text = pred[0], org = (30,30), 
+                    fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1, color = (255, 0, 0), thickness = 1)
+                
+            # transform back RGB and show frame with annotation
+            frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR)
+            cv2.imshow('Hand tracking', frame_annotated)
+            
+            # or show original frame without annotation
+            # cv2.imshow('Hand tracking', frame)
+            
+            # Check for key presses
+            key = cv2.waitKey(1) & 0xFF
+            
+            if key == ord('n'):
+                label = ""
+            elif key == ord('q'):
+                print("Quit camera")
+                break
+
+    cap.release()
+    cv2.destroyAllWindows()
+    
+    print("Program closed")
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
-- 
GitLab