From 7bb4bc58236e56c40260efb77e794c4f90a3eef2 Mon Sep 17 00:00:00 2001
From: V Moni <vajay.monika@hallgato.ppke.hu>
Date: Fri, 22 Nov 2024 09:40:37 +0100
Subject: [PATCH] divide into classes

---
 final_project/control_mouse.py  | 226 --------------------------------
 final_project/hand_detection.py |  23 ++++
 final_project/main.py           | 110 ++++++++++++++++
 final_project/mouse_class.py    | 101 ++++++++++++++
 4 files changed, 234 insertions(+), 226 deletions(-)
 delete mode 100644 final_project/control_mouse.py
 create mode 100644 final_project/hand_detection.py
 create mode 100644 final_project/main.py
 create mode 100644 final_project/mouse_class.py

diff --git a/final_project/control_mouse.py b/final_project/control_mouse.py
deleted file mode 100644
index cbd798f..0000000
--- a/final_project/control_mouse.py
+++ /dev/null
@@ -1,226 +0,0 @@
-import cv2
-import random
-import mediapipe as mp
-import pickle
-import numpy as np
-from sklearn.ensemble import RandomForestClassifier
-import pyautogui
-import time
-from collections import Counter
-from screeninfo import get_monitors
-
-MONITOR = get_monitors()[0]
-WIDTH, HEIGHT = MONITOR.width, MONITOR.height
-
-class Mouse:
-    def __init__(self) -> None:
-        
-        self.predictions = []
-        self.previous_action = None
-        self.freeze_action = False
-
-        # parameters to fine-tune
-        self.action_length = 5
-        #self.move_distance = 10
-        self.scroll_distance = 50   
-        #self.time_checking = 0.5
-        self.stop_pos = None
-
-    def get_hand_pos(self, hand_pos):
-        self.hand_pos_x = hand_pos[0]
-        self.hand_pos_y = hand_pos[1]
-
-    def add_prediction(self, prediction):
-        self.predictions.append(prediction)
-        if len(self.predictions) == self.action_length:
-            self.make_action()
-
-    def make_action(self):
-        action = self.get_major_element(self.predictions)
-        if self.freeze_action and action == self.previous_action:
-            self.update_init(action)
-        else:
-            self.mouse_control(action)
-            self.update_init(action)
-
-    def update_init(self, action):
-        self.predictions = []
-        self.previous_action = action
-
-        self.freeze_action = action in {"left click", "right click", "double click"} # maybe change to keyboard and drops
-
-    def mouse_hand_parameters(self):
-        pass
-
-    def mouse_control(self, prediction):
-        if prediction == "stop execution" or None:
-            pass  # Stop movement
-        elif prediction == "move cursor":
-            
-            #hand_point = ([int(self.hand_pos_x*WIDTH), int(self.hand_pos_y*HEIGHT)])
-            hand_x = np.clip(int(self.hand_pos_x*WIDTH), 0, WIDTH-1)
-            hand_y = np.clip(int(self.hand_pos_y*HEIGHT), 0, HEIGHT-1)
-            pyautogui.moveTo(hand_x, hand_y)
-
-        elif prediction == "stop moving":
-            pyautogui.move(0, 0)  # Stop cursor
-            self.stop_pos = pyautogui.position()
-        elif prediction == "left click":
-            pyautogui.click()  # Left click 
-        elif prediction == "right click":
-            pyautogui.click(button='right')  # Right click
-        elif prediction == "double click":
-            pyautogui.click(clicks=2)    # Double click
-        elif prediction == "scrolling up":
-            pyautogui.scroll(self.scroll_distance)  # Scroll up
-        elif prediction == "scrolling down":
-            pyautogui.scroll(-self.scroll_distance)  # Scroll down
-        elif prediction == "scrolling right":
-            pyautogui.hscroll(self.scroll_distance)    # Scroll right
-            # THIS FUNCTION NOT WORKS ON WINDOWS
-        elif prediction == "scrolling left":
-            pyautogui.hscroll(self.scroll_distance)    # Scroll left
-            # THIS FUNCTION NOT WORKS ON WINDOWS
-        elif prediction == "drag":
-            if self.previous_action == "stop moving":
-                pyautogui.moveTo(*self.stop_pos)
-            pyautogui.mouseDown()  
-            hand_x = np.clip(int(self.hand_pos_x*WIDTH), 0, WIDTH-1)
-            hand_y = np.clip(int(self.hand_pos_y*HEIGHT), 0, HEIGHT-1)
-            pyautogui.moveTo(hand_x, hand_y)
-        elif prediction == "drop":
-            pyautogui.mouseUp()
-        elif prediction == "multiple item selection grab":
-            pyautogui.mouseDown()
-        elif prediction == "multiple item selection drop":
-            pyautogui.mouseUp()
-        elif prediction == "change to keyboard":
-            pass
-        
-        #time.sleep(self.time_checking)  # Adjust speed of movement
-    
-    def get_major_element(self, string_list):
-        counts = Counter(string_list)
-        # Find the element with the maximum count
-        major_element, _ = counts.most_common(1)[0]
-        
-        return major_element
-
-
-def normalise_landmarks(landmark_list):
-    if len(landmark_list) == 0:
-        return landmark_list
-    
-    x = [lm[0] for lm in landmark_list]
-    y = [lm[1] for lm in landmark_list]
-
-    min_x = min(x)
-    max_x = max(x)
-    min_y = min(y)
-    max_y = max(y)
-    
-    normalised_landmarks = []
-    for lm in landmark_list:
-        x_norm = (lm[0] - min_x) / (max_x - min_x)
-        y_norm = (lm[1] - min_y) / (max_y - min_y)
-        lm_norm = (x_norm, y_norm)
-        
-        normalised_landmarks.append(lm_norm)
-    
-    return normalised_landmarks    
-
-## main: open video and do hand detection
-def main():
-    #define Mouse
-    mouse = Mouse()
-
-    # load model
-    model_dict = pickle.load(open('./trained_Moni_data.p', 'rb'))
-    model = model_dict['model']
-    
-    # create hand detection object
-    mp_hands = mp.solutions.hands
-    mp_drawing = mp.solutions.drawing_utils
-    
-    # open video
-    cap = cv2.VideoCapture(0)
-    
-    # if cannot open video give warning
-    if not cap.isOpened():
-        print("Warning: cannot reach camera")
-    else:
-        print("Program is running, push 'q' to quit.")
-        
-    # mediapipe hand object
-    with mp_hands.Hands( max_num_hands=1, model_complexity=1,
-                        min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
-        
-        # read frames from webcamera
-        while cap.isOpened():        
-            ret, frame = cap.read()
-            
-            if not ret:
-                print("Warning: cannot read camera input")
-                break
-                
-            # flip frame to appear as a mirror
-            frame = cv2.flip(frame, 1)
-            frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            
-            ## hand detection
-            results = hands.process(frameRGB)
-            
-            landmark_list = []
-            mouse_command = None
-            if results.multi_hand_landmarks:
-                # multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with
-                # for num, hand in enumerate(results.multi_hand_landmarks): 
-                
-                # one hand is detected, because max_num_hands=1
-                hand_landmarks = results.multi_hand_landmarks[0]  
-
-                # draw landmarks on frame
-                mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, 
-                                            mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
-                                            mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2),
-                                            )
-                
-                # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
-                for lm in hand_landmarks.landmark:
-                    landmark_list.append((lm.x, lm.y))
-            
-                # normalise landmarks for mor powerful training
-                normalised_landmark_list = normalise_landmarks(landmark_list)
-            
-                # apply model
-                pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1))
-                mouse_command = pred[0]
-                cv2.putText(img = frameRGB, text = pred[0], org = (30,30), 
-                    fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1, color = (255, 0, 0), thickness = 1)
-
-                mouse.add_prediction(mouse_command)
-                if mouse_command == "move cursor" or "grab":
-                    mouse.get_hand_pos(landmark_list[8])
-            # transform back RGB and show frame with annotation
-            frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR)
-            cv2.imshow('Hand tracking', frame_annotated)
-            
-            # or show original frame without annotation
-            # cv2.imshow('Hand tracking', frame)
-            
-            # Check for key presses
-            key = cv2.waitKey(1) & 0xFF
-            
-            if key == ord('n'):
-                label = ""
-            elif key == ord('q'):
-                print("Quit camera")
-                break
-
-    cap.release()
-    cv2.destroyAllWindows()
-    
-    print("Program closed")
-
-if __name__ == '__main__':
-    main()
\ No newline at end of file
diff --git a/final_project/hand_detection.py b/final_project/hand_detection.py
new file mode 100644
index 0000000..5cafec4
--- /dev/null
+++ b/final_project/hand_detection.py
@@ -0,0 +1,23 @@
+
+
+def normalise_landmarks(landmark_list):
+    if len(landmark_list) == 0:
+        return landmark_list
+    
+    x = [lm[0] for lm in landmark_list]
+    y = [lm[1] for lm in landmark_list]
+
+    min_x = min(x)
+    max_x = max(x)
+    min_y = min(y)
+    max_y = max(y)
+    
+    normalised_landmarks = []
+    for lm in landmark_list:
+        x_norm = (lm[0] - min_x) / (max_x - min_x)
+        y_norm = (lm[1] - min_y) / (max_y - min_y)
+        lm_norm = (x_norm, y_norm)
+        
+        normalised_landmarks.append(lm_norm)
+    
+    return normalised_landmarks  
\ No newline at end of file
diff --git a/final_project/main.py b/final_project/main.py
new file mode 100644
index 0000000..3002b94
--- /dev/null
+++ b/final_project/main.py
@@ -0,0 +1,110 @@
+import cv2
+import random
+import mediapipe as mp
+import pickle
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+import time
+import os
+
+from mouse_class import Mouse
+from hand_detection import normalise_landmarks
+  
+
+## main: open video and do hand detection
+def main():
+    #define Mouse
+    mouse = Mouse()
+
+    # load model
+    current_dir = os.path.dirname(__file__)
+    model_path = os.path.abspath(os.path.join(current_dir, os.pardir, 'trained_models', 'trained_Moni_data.p'))
+    model_dict = pickle.load(open(model_path, 'rb'))
+    model = model_dict['model']
+    
+    # create hand detection object
+    mp_hands = mp.solutions.hands
+    mp_drawing = mp.solutions.drawing_utils
+    
+    # open video
+    cap = cv2.VideoCapture(0)
+    
+    # if cannot open video give warning
+    if not cap.isOpened():
+        print("Warning: cannot reach camera")
+    else:
+        print("Program is running, push 'q' to quit.")
+        
+    # mediapipe hand object
+    with mp_hands.Hands( max_num_hands=1, model_complexity=1,
+                        min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
+        
+        # read frames from webcamera
+        while cap.isOpened():        
+            ret, frame = cap.read()
+            
+            if not ret:
+                print("Warning: cannot read camera input")
+                break
+                
+            # flip frame to appear as a mirror
+            frame = cv2.flip(frame, 1)
+            frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+            
+            ## hand detection
+            results = hands.process(frameRGB)
+            
+            landmark_list = []
+            mouse_command = None
+            if results.multi_hand_landmarks:
+                # multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with
+                # for num, hand in enumerate(results.multi_hand_landmarks): 
+                
+                # one hand is detected, because max_num_hands=1
+                hand_landmarks = results.multi_hand_landmarks[0]  
+
+                # draw landmarks on frame
+                mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, 
+                                            mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
+                                            mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2),
+                                            )
+                
+                # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
+                for lm in hand_landmarks.landmark:
+                    landmark_list.append((lm.x, lm.y))
+            
+                # normalise landmarks for mor powerful training
+                normalised_landmark_list = normalise_landmarks(landmark_list)
+            
+                # apply model
+                pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1))
+                mouse_command = pred[0]
+                cv2.putText(img = frameRGB, text = pred[0], org = (30,30), 
+                    fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1, color = (255, 0, 0), thickness = 1)
+
+                mouse.add_prediction(mouse_command)
+                if mouse_command == "move cursor" or "grab":
+                    mouse.get_hand_pos(landmark_list[8])
+            # transform back RGB and show frame with annotation
+            frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR)
+            cv2.imshow('Hand tracking', frame_annotated)
+            
+            # or show original frame without annotation
+            # cv2.imshow('Hand tracking', frame)
+            
+            # Check for key presses
+            key = cv2.waitKey(1) & 0xFF
+            
+            if key == ord('n'):
+                label = ""
+            elif key == ord('q'):
+                print("Quit camera")
+                break
+
+    cap.release()
+    cv2.destroyAllWindows()
+    
+    print("Program closed")
+
+if __name__ == '__main__':
+    main()
\ No newline at end of file
diff --git a/final_project/mouse_class.py b/final_project/mouse_class.py
new file mode 100644
index 0000000..0af223b
--- /dev/null
+++ b/final_project/mouse_class.py
@@ -0,0 +1,101 @@
+import numpy as np
+import pyautogui
+from collections import Counter
+from screeninfo import get_monitors
+
+MONITOR = get_monitors()[0]
+WIDTH, HEIGHT = MONITOR.width, MONITOR.height
+
+class Mouse:
+    def __init__(self) -> None:
+        
+        self.predictions = []
+        self.previous_action = None
+        self.freeze_action = False
+
+        # parameters to fine-tune
+        self.action_length = 5
+        #self.move_distance = 10
+        self.scroll_distance = 50   
+        #self.time_checking = 0.5
+        self.stop_pos = None
+
+    def get_hand_pos(self, hand_pos):
+        self.hand_pos_x = hand_pos[0]
+        self.hand_pos_y = hand_pos[1]
+
+    def add_prediction(self, prediction):
+        self.predictions.append(prediction)
+        if len(self.predictions) == self.action_length:
+            self.make_action()
+
+    def make_action(self):
+        action = self.get_major_element(self.predictions)
+        if self.freeze_action and action == self.previous_action:
+            self.update_init(action)
+        else:
+            self.mouse_control(action)
+            self.update_init(action)
+
+    def update_init(self, action):
+        self.predictions = []
+        self.previous_action = action
+
+        self.freeze_action = action in {"left click", "right click", "double click"} # maybe change to keyboard and drops
+
+    def mouse_hand_parameters(self):
+        pass
+
+    def mouse_control(self, prediction):
+        if prediction == "stop execution" or None:
+            pass  # Stop movement
+        elif prediction == "move cursor":
+            
+            #hand_point = ([int(self.hand_pos_x*WIDTH), int(self.hand_pos_y*HEIGHT)])
+            hand_x = np.clip(int(self.hand_pos_x*WIDTH), 0, WIDTH-1)
+            hand_y = np.clip(int(self.hand_pos_y*HEIGHT), 0, HEIGHT-1)
+            pyautogui.moveTo(hand_x, hand_y)
+
+        elif prediction == "stop moving":
+            pyautogui.move(0, 0)  # Stop cursor
+            self.stop_pos = pyautogui.position()
+        elif prediction == "left click":
+            pyautogui.click()  # Left click 
+        elif prediction == "right click":
+            pyautogui.click(button='right')  # Right click
+        elif prediction == "double click":
+            pyautogui.click(clicks=2)    # Double click
+        elif prediction == "scrolling up":
+            pyautogui.scroll(self.scroll_distance)  # Scroll up
+        elif prediction == "scrolling down":
+            pyautogui.scroll(-self.scroll_distance)  # Scroll down
+        elif prediction == "scrolling right":
+            pyautogui.hscroll(self.scroll_distance)    # Scroll right
+            # THIS FUNCTION NOT WORKS ON WINDOWS
+        elif prediction == "scrolling left":
+            pyautogui.hscroll(self.scroll_distance)    # Scroll left
+            # THIS FUNCTION NOT WORKS ON WINDOWS
+        elif prediction == "drag":
+            if self.previous_action == "stop moving":
+                pyautogui.moveTo(*self.stop_pos)
+            pyautogui.mouseDown()  
+            hand_x = np.clip(int(self.hand_pos_x*WIDTH), 0, WIDTH-1)
+            hand_y = np.clip(int(self.hand_pos_y*HEIGHT), 0, HEIGHT-1)
+            pyautogui.moveTo(hand_x, hand_y)
+        elif prediction == "drop":
+            pyautogui.mouseUp()
+        elif prediction == "multiple item selection grab":
+            pyautogui.mouseDown()
+        elif prediction == "multiple item selection drop":
+            pyautogui.mouseUp()
+        elif prediction == "change to keyboard":
+            pass
+        
+        #time.sleep(self.time_checking)  # Adjust speed of movement
+    
+    def get_major_element(self, string_list):
+        counts = Counter(string_list)
+        # Find the element with the maximum count
+        major_element, _ = counts.most_common(1)[0]
+        
+        return major_element
-- 
GitLab