Skip to content
Snippets Groups Projects
control_mouse.py 7.89 KiB
Newer Older
  • Learn to ignore specific revisions
  • import cv2
    import random
    import mediapipe as mp
    import pickle
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    import pyautogui
    import time
    from collections import Counter
    
    from screeninfo import get_monitors
    
    MONITOR = get_monitors()[0]
    WIDTH, HEIGHT = MONITOR.width, MONITOR.height
    
    
    class Mouse:
        def __init__(self) -> None:
            
            self.predictions = []
            self.previous_action = None
            self.freeze_action = False
    
    
            # parameters to fine-tune
            self.action_length = 5
    
            self.move_distance = 10
            self.scroll_distance = 10
    
            self.time_checking = 0.5
    
        def get_hand_pos(self, hand_pos):
            self.hand_pos_x = hand_pos[0]
            self.hand_pos_y = hand_pos[1]
    
    
        def add_prediction(self, prediction):
            self.predictions.append(prediction)
            if len(self.predictions) == self.action_length:
                self.make_action()
    
        def make_action(self):
            action = self.get_major_element(self.predictions)
            if self.freeze_action and action == self.previous_action:
    
                self.update_init(action)
    
            else:
                self.mouse_control(action)
    
                self.update_init(action)
    
    
        def update_init(self, action):
            self.predictions = []
            self.previous_action = action
    
            self.freeze_action = action in {"left click", "right click", "double click"} # maybe change to keyboard and drops
    
        def mouse_hand_parameters(self):
            pass
    
        def mouse_control(self, prediction):
    
            if prediction == "stop execution" or None:
    
                pass  # Stop movement
            elif prediction == "move cursor":
                
    
                #hand_point = ([int(self.hand_pos_x*WIDTH), int(self.hand_pos_y*HEIGHT)])
                hand_x = np.clip(int(self.hand_pos_x*WIDTH), 0, WIDTH-1)
                hand_y = np.clip(int(self.hand_pos_y*HEIGHT), 0, HEIGHT-1)
                pyautogui.moveTo(hand_x, hand_y)
    
    
            elif prediction == "stop moving":
                pyautogui.move(0, 0)  # Stop cursor
            elif prediction == "left click":
                pyautogui.click()  # Left click 
            elif prediction == "right click":
                pyautogui.click(button='right')  # Right click
            elif prediction == "double click":
    
                pyautogui.click(clicks=2)    # Double click
    
            elif prediction == "scrolling up":
    
                pyautogui.scroll(self.scroll_distance)  # Scroll up
    
            elif prediction == "scrolling down":
                pyautogui.scroll(-self.scroll_distance)  # Scroll down
            elif prediction == "scrolling right":
    
                pyautogui.hscroll(self.scroll_distance)    # Scroll right
                # THIS FUNCTION NOT WORKS ON WINDOWS
    
            elif prediction == "scrolling left":
    
                pyautogui.hscroll(self.scroll_distance)    # Scroll left
                # THIS FUNCTION NOT WORKS ON WINDOWS
    
            elif prediction == "drag":
    
                pyautogui.mouseDown()
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                hand_x = np.clip(int(self.hand_pos_x*WIDTH), 0, WIDTH-1)
                hand_y = np.clip(int(self.hand_pos_y*HEIGHT), 0, HEIGHT-1)
                pyautogui.moveTo(hand_x, hand_y)
    
            elif prediction == "drop":
    
                pyautogui.mouseUp()
    
            elif prediction == "multiple item selection grab":
    
                pyautogui.mouseDown()
    
            elif prediction == "multiple item selection drop":
    
                pyautogui.mouseUp()
    
            elif prediction == "change to keyboard":
                pass
            
    
            #time.sleep(self.time_checking)  # Adjust speed of movement
    
        
        def get_major_element(self, string_list):
            counts = Counter(string_list)
            # Find the element with the maximum count
            major_element, _ = counts.most_common(1)[0]
            
            return major_element
    
    
    def normalise_landmarks(landmark_list):
        if len(landmark_list) == 0:
            return landmark_list
        
        x = [lm[0] for lm in landmark_list]
        y = [lm[1] for lm in landmark_list]
    
        min_x = min(x)
        max_x = max(x)
        min_y = min(y)
        max_y = max(y)
        
        normalised_landmarks = []
        for lm in landmark_list:
            x_norm = (lm[0] - min_x) / (max_x - min_x)
            y_norm = (lm[1] - min_y) / (max_y - min_y)
            lm_norm = (x_norm, y_norm)
            
            normalised_landmarks.append(lm_norm)
        
        return normalised_landmarks    
    
    ## main: open video and do hand detection
    def main():
    
        #define Mouse
        mouse = Mouse()
    
    
        # load model
    
        model_dict = pickle.load(open('./trained_Moni_data.p', 'rb'))
    
        model = model_dict['model']
        
        # create hand detection object
        mp_hands = mp.solutions.hands
        mp_drawing = mp.solutions.drawing_utils
        
        # open video
        cap = cv2.VideoCapture(0)
        
        # if cannot open video give warning
        if not cap.isOpened():
            print("Warning: cannot reach camera")
        else:
            print("Program is running, push 'q' to quit.")
            
        # mediapipe hand object
        with mp_hands.Hands( max_num_hands=1, model_complexity=1,
                            min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
            
            # read frames from webcamera
            while cap.isOpened():        
                ret, frame = cap.read()
                
                if not ret:
                    print("Warning: cannot read camera input")
                    break
                    
                # flip frame to appear as a mirror
                frame = cv2.flip(frame, 1)
                frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                
                ## hand detection
                results = hands.process(frameRGB)
                
                landmark_list = []
    
                mouse_command = None
    
                if results.multi_hand_landmarks:
                    # multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with
                    # for num, hand in enumerate(results.multi_hand_landmarks): 
                    
                    # one hand is detected, because max_num_hands=1
                    hand_landmarks = results.multi_hand_landmarks[0]  
    
                    # draw landmarks on frame
                    mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, 
                                                mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
                                                mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2),
                                                )
                    
                    # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
                    for lm in hand_landmarks.landmark:
                        landmark_list.append((lm.x, lm.y))
                
                    # normalise landmarks for mor powerful training
                    normalised_landmark_list = normalise_landmarks(landmark_list)
                
                    # apply model
                    pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1))
    
                    mouse_command = pred[0]
    
                    cv2.putText(img = frameRGB, text = pred[0], org = (30,30), 
                        fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1, color = (255, 0, 0), thickness = 1)
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                    mouse.add_prediction(mouse_command)
                    if mouse_command == "move cursor" or "grab":
                        mouse.get_hand_pos(landmark_list[8])
    
                # transform back RGB and show frame with annotation
                frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR)
                cv2.imshow('Hand tracking', frame_annotated)
                
                # or show original frame without annotation
                # cv2.imshow('Hand tracking', frame)
                
                # Check for key presses
                key = cv2.waitKey(1) & 0xFF
                
                if key == ord('n'):
                    label = ""
                elif key == ord('q'):
                    print("Quit camera")
                    break
    
        cap.release()
        cv2.destroyAllWindows()
        
        print("Program closed")
    
    if __name__ == '__main__':
        main()