From 7bb4bc58236e56c40260efb77e794c4f90a3eef2 Mon Sep 17 00:00:00 2001 From: V Moni <vajay.monika@hallgato.ppke.hu> Date: Fri, 22 Nov 2024 09:40:37 +0100 Subject: [PATCH] divide into classes --- final_project/control_mouse.py | 226 -------------------------------- final_project/hand_detection.py | 23 ++++ final_project/main.py | 110 ++++++++++++++++ final_project/mouse_class.py | 101 ++++++++++++++ 4 files changed, 234 insertions(+), 226 deletions(-) delete mode 100644 final_project/control_mouse.py create mode 100644 final_project/hand_detection.py create mode 100644 final_project/main.py create mode 100644 final_project/mouse_class.py diff --git a/final_project/control_mouse.py b/final_project/control_mouse.py deleted file mode 100644 index cbd798f..0000000 --- a/final_project/control_mouse.py +++ /dev/null @@ -1,226 +0,0 @@ -import cv2 -import random -import mediapipe as mp -import pickle -import numpy as np -from sklearn.ensemble import RandomForestClassifier -import pyautogui -import time -from collections import Counter -from screeninfo import get_monitors - -MONITOR = get_monitors()[0] -WIDTH, HEIGHT = MONITOR.width, MONITOR.height - -class Mouse: - def __init__(self) -> None: - - self.predictions = [] - self.previous_action = None - self.freeze_action = False - - # parameters to fine-tune - self.action_length = 5 - #self.move_distance = 10 - self.scroll_distance = 50 - #self.time_checking = 0.5 - self.stop_pos = None - - def get_hand_pos(self, hand_pos): - self.hand_pos_x = hand_pos[0] - self.hand_pos_y = hand_pos[1] - - def add_prediction(self, prediction): - self.predictions.append(prediction) - if len(self.predictions) == self.action_length: - self.make_action() - - def make_action(self): - action = self.get_major_element(self.predictions) - if self.freeze_action and action == self.previous_action: - self.update_init(action) - else: - self.mouse_control(action) - self.update_init(action) - - def update_init(self, action): - self.predictions = [] - self.previous_action = action - - self.freeze_action = action in {"left click", "right click", "double click"} # maybe change to keyboard and drops - - def mouse_hand_parameters(self): - pass - - def mouse_control(self, prediction): - if prediction == "stop execution" or None: - pass # Stop movement - elif prediction == "move cursor": - - #hand_point = ([int(self.hand_pos_x*WIDTH), int(self.hand_pos_y*HEIGHT)]) - hand_x = np.clip(int(self.hand_pos_x*WIDTH), 0, WIDTH-1) - hand_y = np.clip(int(self.hand_pos_y*HEIGHT), 0, HEIGHT-1) - pyautogui.moveTo(hand_x, hand_y) - - elif prediction == "stop moving": - pyautogui.move(0, 0) # Stop cursor - self.stop_pos = pyautogui.position() - elif prediction == "left click": - pyautogui.click() # Left click - elif prediction == "right click": - pyautogui.click(button='right') # Right click - elif prediction == "double click": - pyautogui.click(clicks=2) # Double click - elif prediction == "scrolling up": - pyautogui.scroll(self.scroll_distance) # Scroll up - elif prediction == "scrolling down": - pyautogui.scroll(-self.scroll_distance) # Scroll down - elif prediction == "scrolling right": - pyautogui.hscroll(self.scroll_distance) # Scroll right - # THIS FUNCTION NOT WORKS ON WINDOWS - elif prediction == "scrolling left": - pyautogui.hscroll(self.scroll_distance) # Scroll left - # THIS FUNCTION NOT WORKS ON WINDOWS - elif prediction == "drag": - if self.previous_action == "stop moving": - pyautogui.moveTo(*self.stop_pos) - pyautogui.mouseDown() - hand_x = np.clip(int(self.hand_pos_x*WIDTH), 0, WIDTH-1) - hand_y = np.clip(int(self.hand_pos_y*HEIGHT), 0, HEIGHT-1) - pyautogui.moveTo(hand_x, hand_y) - elif prediction == "drop": - pyautogui.mouseUp() - elif prediction == "multiple item selection grab": - pyautogui.mouseDown() - elif prediction == "multiple item selection drop": - pyautogui.mouseUp() - elif prediction == "change to keyboard": - pass - - #time.sleep(self.time_checking) # Adjust speed of movement - - def get_major_element(self, string_list): - counts = Counter(string_list) - # Find the element with the maximum count - major_element, _ = counts.most_common(1)[0] - - return major_element - - -def normalise_landmarks(landmark_list): - if len(landmark_list) == 0: - return landmark_list - - x = [lm[0] for lm in landmark_list] - y = [lm[1] for lm in landmark_list] - - min_x = min(x) - max_x = max(x) - min_y = min(y) - max_y = max(y) - - normalised_landmarks = [] - for lm in landmark_list: - x_norm = (lm[0] - min_x) / (max_x - min_x) - y_norm = (lm[1] - min_y) / (max_y - min_y) - lm_norm = (x_norm, y_norm) - - normalised_landmarks.append(lm_norm) - - return normalised_landmarks - -## main: open video and do hand detection -def main(): - #define Mouse - mouse = Mouse() - - # load model - model_dict = pickle.load(open('./trained_Moni_data.p', 'rb')) - model = model_dict['model'] - - # create hand detection object - mp_hands = mp.solutions.hands - mp_drawing = mp.solutions.drawing_utils - - # open video - cap = cv2.VideoCapture(0) - - # if cannot open video give warning - if not cap.isOpened(): - print("Warning: cannot reach camera") - else: - print("Program is running, push 'q' to quit.") - - # mediapipe hand object - with mp_hands.Hands( max_num_hands=1, model_complexity=1, - min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands: - - # read frames from webcamera - while cap.isOpened(): - ret, frame = cap.read() - - if not ret: - print("Warning: cannot read camera input") - break - - # flip frame to appear as a mirror - frame = cv2.flip(frame, 1) - frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) - - ## hand detection - results = hands.process(frameRGB) - - landmark_list = [] - mouse_command = None - if results.multi_hand_landmarks: - # multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with - # for num, hand in enumerate(results.multi_hand_landmarks): - - # one hand is detected, because max_num_hands=1 - hand_landmarks = results.multi_hand_landmarks[0] - - # draw landmarks on frame - mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, - mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4), - mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2), - ) - - # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py - for lm in hand_landmarks.landmark: - landmark_list.append((lm.x, lm.y)) - - # normalise landmarks for mor powerful training - normalised_landmark_list = normalise_landmarks(landmark_list) - - # apply model - pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1)) - mouse_command = pred[0] - cv2.putText(img = frameRGB, text = pred[0], org = (30,30), - fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1, color = (255, 0, 0), thickness = 1) - - mouse.add_prediction(mouse_command) - if mouse_command == "move cursor" or "grab": - mouse.get_hand_pos(landmark_list[8]) - # transform back RGB and show frame with annotation - frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR) - cv2.imshow('Hand tracking', frame_annotated) - - # or show original frame without annotation - # cv2.imshow('Hand tracking', frame) - - # Check for key presses - key = cv2.waitKey(1) & 0xFF - - if key == ord('n'): - label = "" - elif key == ord('q'): - print("Quit camera") - break - - cap.release() - cv2.destroyAllWindows() - - print("Program closed") - -if __name__ == '__main__': - main() \ No newline at end of file diff --git a/final_project/hand_detection.py b/final_project/hand_detection.py new file mode 100644 index 0000000..5cafec4 --- /dev/null +++ b/final_project/hand_detection.py @@ -0,0 +1,23 @@ + + +def normalise_landmarks(landmark_list): + if len(landmark_list) == 0: + return landmark_list + + x = [lm[0] for lm in landmark_list] + y = [lm[1] for lm in landmark_list] + + min_x = min(x) + max_x = max(x) + min_y = min(y) + max_y = max(y) + + normalised_landmarks = [] + for lm in landmark_list: + x_norm = (lm[0] - min_x) / (max_x - min_x) + y_norm = (lm[1] - min_y) / (max_y - min_y) + lm_norm = (x_norm, y_norm) + + normalised_landmarks.append(lm_norm) + + return normalised_landmarks \ No newline at end of file diff --git a/final_project/main.py b/final_project/main.py new file mode 100644 index 0000000..3002b94 --- /dev/null +++ b/final_project/main.py @@ -0,0 +1,110 @@ +import cv2 +import random +import mediapipe as mp +import pickle +import numpy as np +from sklearn.ensemble import RandomForestClassifier +import time +import os + +from mouse_class import Mouse +from hand_detection import normalise_landmarks + + +## main: open video and do hand detection +def main(): + #define Mouse + mouse = Mouse() + + # load model + current_dir = os.path.dirname(__file__) + model_path = os.path.abspath(os.path.join(current_dir, os.pardir, 'trained_models', 'trained_Moni_data.p')) + model_dict = pickle.load(open(model_path, 'rb')) + model = model_dict['model'] + + # create hand detection object + mp_hands = mp.solutions.hands + mp_drawing = mp.solutions.drawing_utils + + # open video + cap = cv2.VideoCapture(0) + + # if cannot open video give warning + if not cap.isOpened(): + print("Warning: cannot reach camera") + else: + print("Program is running, push 'q' to quit.") + + # mediapipe hand object + with mp_hands.Hands( max_num_hands=1, model_complexity=1, + min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands: + + # read frames from webcamera + while cap.isOpened(): + ret, frame = cap.read() + + if not ret: + print("Warning: cannot read camera input") + break + + # flip frame to appear as a mirror + frame = cv2.flip(frame, 1) + frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + ## hand detection + results = hands.process(frameRGB) + + landmark_list = [] + mouse_command = None + if results.multi_hand_landmarks: + # multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with + # for num, hand in enumerate(results.multi_hand_landmarks): + + # one hand is detected, because max_num_hands=1 + hand_landmarks = results.multi_hand_landmarks[0] + + # draw landmarks on frame + mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, + mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4), + mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2), + ) + + # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py + for lm in hand_landmarks.landmark: + landmark_list.append((lm.x, lm.y)) + + # normalise landmarks for mor powerful training + normalised_landmark_list = normalise_landmarks(landmark_list) + + # apply model + pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1)) + mouse_command = pred[0] + cv2.putText(img = frameRGB, text = pred[0], org = (30,30), + fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1, color = (255, 0, 0), thickness = 1) + + mouse.add_prediction(mouse_command) + if mouse_command == "move cursor" or "grab": + mouse.get_hand_pos(landmark_list[8]) + # transform back RGB and show frame with annotation + frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR) + cv2.imshow('Hand tracking', frame_annotated) + + # or show original frame without annotation + # cv2.imshow('Hand tracking', frame) + + # Check for key presses + key = cv2.waitKey(1) & 0xFF + + if key == ord('n'): + label = "" + elif key == ord('q'): + print("Quit camera") + break + + cap.release() + cv2.destroyAllWindows() + + print("Program closed") + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/final_project/mouse_class.py b/final_project/mouse_class.py new file mode 100644 index 0000000..0af223b --- /dev/null +++ b/final_project/mouse_class.py @@ -0,0 +1,101 @@ +import numpy as np +import pyautogui +from collections import Counter +from screeninfo import get_monitors + +MONITOR = get_monitors()[0] +WIDTH, HEIGHT = MONITOR.width, MONITOR.height + +class Mouse: + def __init__(self) -> None: + + self.predictions = [] + self.previous_action = None + self.freeze_action = False + + # parameters to fine-tune + self.action_length = 5 + #self.move_distance = 10 + self.scroll_distance = 50 + #self.time_checking = 0.5 + self.stop_pos = None + + def get_hand_pos(self, hand_pos): + self.hand_pos_x = hand_pos[0] + self.hand_pos_y = hand_pos[1] + + def add_prediction(self, prediction): + self.predictions.append(prediction) + if len(self.predictions) == self.action_length: + self.make_action() + + def make_action(self): + action = self.get_major_element(self.predictions) + if self.freeze_action and action == self.previous_action: + self.update_init(action) + else: + self.mouse_control(action) + self.update_init(action) + + def update_init(self, action): + self.predictions = [] + self.previous_action = action + + self.freeze_action = action in {"left click", "right click", "double click"} # maybe change to keyboard and drops + + def mouse_hand_parameters(self): + pass + + def mouse_control(self, prediction): + if prediction == "stop execution" or None: + pass # Stop movement + elif prediction == "move cursor": + + #hand_point = ([int(self.hand_pos_x*WIDTH), int(self.hand_pos_y*HEIGHT)]) + hand_x = np.clip(int(self.hand_pos_x*WIDTH), 0, WIDTH-1) + hand_y = np.clip(int(self.hand_pos_y*HEIGHT), 0, HEIGHT-1) + pyautogui.moveTo(hand_x, hand_y) + + elif prediction == "stop moving": + pyautogui.move(0, 0) # Stop cursor + self.stop_pos = pyautogui.position() + elif prediction == "left click": + pyautogui.click() # Left click + elif prediction == "right click": + pyautogui.click(button='right') # Right click + elif prediction == "double click": + pyautogui.click(clicks=2) # Double click + elif prediction == "scrolling up": + pyautogui.scroll(self.scroll_distance) # Scroll up + elif prediction == "scrolling down": + pyautogui.scroll(-self.scroll_distance) # Scroll down + elif prediction == "scrolling right": + pyautogui.hscroll(self.scroll_distance) # Scroll right + # THIS FUNCTION NOT WORKS ON WINDOWS + elif prediction == "scrolling left": + pyautogui.hscroll(self.scroll_distance) # Scroll left + # THIS FUNCTION NOT WORKS ON WINDOWS + elif prediction == "drag": + if self.previous_action == "stop moving": + pyautogui.moveTo(*self.stop_pos) + pyautogui.mouseDown() + hand_x = np.clip(int(self.hand_pos_x*WIDTH), 0, WIDTH-1) + hand_y = np.clip(int(self.hand_pos_y*HEIGHT), 0, HEIGHT-1) + pyautogui.moveTo(hand_x, hand_y) + elif prediction == "drop": + pyautogui.mouseUp() + elif prediction == "multiple item selection grab": + pyautogui.mouseDown() + elif prediction == "multiple item selection drop": + pyautogui.mouseUp() + elif prediction == "change to keyboard": + pass + + #time.sleep(self.time_checking) # Adjust speed of movement + + def get_major_element(self, string_list): + counts = Counter(string_list) + # Find the element with the maximum count + major_element, _ = counts.most_common(1)[0] + + return major_element -- GitLab