diff --git a/final_project/hand_detection.py b/final_project/hand_detection.py index 5cafec425c32286b1300254d3f68b47a1f98d120..185dc8167c8d35697a557285c30306ea9294aa93 100644 --- a/final_project/hand_detection.py +++ b/final_project/hand_detection.py @@ -1,5 +1,3 @@ - - def normalise_landmarks(landmark_list): if len(landmark_list) == 0: return landmark_list @@ -20,4 +18,24 @@ def normalise_landmarks(landmark_list): normalised_landmarks.append(lm_norm) - return normalised_landmarks \ No newline at end of file + return normalised_landmarks + +def landmarks_from_results(results): + + left_hand_saved = False + right_hand_saved = False + + left_hand_landmarks = None + right_hand_landmarks = None + + for i in range(len(results.multi_handedness)): + multihand = results.multi_handedness[i] + + if multihand.classification[0].label == "Right" and not right_hand_saved: + right_hand_landmarks = results.multi_hand_landmarks[i] + right_hand_saved = True + elif multihand.classification[0].label == "Left" and not left_hand_saved: + left_hand_landmarks = results.multi_hand_landmarks[i] + left_hand_saved = True + + return left_hand_landmarks, right_hand_landmarks \ No newline at end of file diff --git a/final_project/main_copy_keyboard.py b/final_project/main_copy_keyboard.py index 0090268fcb2bd87439eddd0359a55fee170153b0..9be8027be7c671426d963c15eb185408399c6851 100644 --- a/final_project/main_copy_keyboard.py +++ b/final_project/main_copy_keyboard.py @@ -11,7 +11,8 @@ from PIL import Image, ImageTk from mouse_class import Mouse from keyboard_class import Keyboard -from hand_detection import normalise_landmarks +from specialkeys_class import Specialkeys +from hand_detection import * # hide mediapype warning :UserWarning: SymbolDatabase.GetPrototype() is deprecated. Please use message_factory.GetMessageClass() instead. SymbolDatabase.GetPrototype() will be removed soon. import warnings @@ -21,13 +22,18 @@ def main(): #define Mouse mouse = Mouse() keyboard = Keyboard() + specialkeys = Specialkeys() - # load model + # load models current_dir = os.path.dirname(__file__) - # model_path = os.path.abspath(os.path.join(current_dir, os.pardir, 'trained_models', 'trained_Moni_data.p')) - model_path = os.path.abspath(os.path.join(current_dir, os.pardir, 'trained_models', 'alfabet_v1.p')) - model_dict = pickle.load(open(model_path, 'rb')) - model = model_dict['model'] + # mouse_model_path = os.path.abspath(os.path.join(current_dir, os.pardir, 'trained_models', 'trained_Moni_data.p')) + alfabet_model_path = os.path.abspath(os.path.join(current_dir, os.pardir, 'trained_models', 'alfabet_v1.p')) + alfabet_model_dict = pickle.load(open(alfabet_model_path, 'rb')) + alfabet_model = alfabet_model_dict['model'] + + special_model_path = os.path.abspath(os.path.join(current_dir, os.pardir, 'trained_models', 'specialkeys_v1.p')) + special_model_dict = pickle.load(open(special_model_path, 'rb')) + special_model = special_model_dict['model'] # create hand detection object mp_hands = mp.solutions.hands @@ -54,15 +60,15 @@ def main(): screen_height = root.winfo_screenheight() # Define window size and position (e.g., 320x240 window at bottom-right corner) - window_width = 160 - window_height = 120 + window_width = 160*2 + window_height = 120*2 x_position = screen_width - window_width - 10 # 10px margin from the right y_position = screen_height - window_height - 70 # 50px margin from the bottom # Set window geometry root.geometry(f"{window_width}x{window_height}+{x_position}+{y_position}") # mediapipe hand object - with mp_hands.Hands(max_num_hands=1, model_complexity=1, + with mp_hands.Hands(max_num_hands=2, model_complexity=1, min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands: def update_frame(): @@ -79,40 +85,69 @@ def main(): # Hand detection results = hands.process(frameRGB) - landmark_list = [] + right_landmark_list = [] + left_landmark_list = [] mouse_command = None if results.multi_hand_landmarks: - # one hand is detected, because max_num_hands=1 - hand_landmarks = results.multi_hand_landmarks[0] - - # Draw landmarks on frame - mp_drawing.draw_landmarks( - frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, - mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4), - mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2) - ) + # two hands are detected, so we split left and right + left_hand_landmarks, right_hand_landmarks = landmarks_from_results(results) - # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py - for lm in hand_landmarks.landmark: - landmark_list.append((lm.x, lm.y)) + # if right hand detected, process + if right_hand_landmarks is not None: + # Draw landmarks on frame + mp_drawing.draw_landmarks( + frameRGB, right_hand_landmarks, mp_hands.HAND_CONNECTIONS, + mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4), + mp_drawing.DrawingSpec(color=(120, 0, 120), thickness=2, circle_radius=2) + ) + + # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py + for lm in right_hand_landmarks.landmark: + right_landmark_list.append((lm.x, lm.y)) - # normalise landmarks for more powerful training - normalised_landmark_list = normalise_landmarks(landmark_list) + # normalise landmarks for more powerful training + normalised_right_landmark_list = normalise_landmarks(right_landmark_list) - # apply model - pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1)) - command = pred[0] - cv2.putText( - img=frameRGB, text=pred[0], org=(30, 30), - fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(255, 0, 0), thickness=1 - ) + # apply model + pred = alfabet_model.predict(np.asarray(normalised_right_landmark_list).reshape(1, -1)) + command = pred[0] + cv2.putText( + img=frameRGB, text=pred[0], org=(int(frameRGB.shape[1]/2)+30, 30), + fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(255, 0, 0), thickness=1 + ) - """mouse.add_prediction(mouse_command) - if mouse_command == "move cursor" or "grab": - mouse.get_hand_pos(landmark_list[8]) - """ - keyboard.add_prediction(command) + """mouse.add_prediction(mouse_command) + if mouse_command == "move cursor" or "grab": + mouse.get_hand_pos(right_landmark_list[8]) + """ + keyboard.add_prediction(command) + + # similarly if left hand detected, process + if left_hand_landmarks is not None: + # Draw landmarks on frame + mp_drawing.draw_landmarks( + frameRGB, left_hand_landmarks, mp_hands.HAND_CONNECTIONS, + mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=4), + mp_drawing.DrawingSpec(color=(0, 120, 120), thickness=2, circle_radius=2) + ) + + # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py + for lm in left_hand_landmarks.landmark: + left_landmark_list.append((lm.x, lm.y)) + + # normalise landmarks for more powerful training + normalised_left_landmark_list = normalise_landmarks(left_landmark_list) + # apply model + pred = special_model.predict(np.asarray(normalised_left_landmark_list).reshape(1, -1)) + command = pred[0] + cv2.putText( + img=frameRGB, text=pred[0], org=(30, 30), + fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(0, 255, 0), thickness=1 + ) + + specialkeys.add_prediction(command) + # Convert frame to Tkinter-compatible format and display frameRGB_resized = cv2.resize(frameRGB, (root.winfo_width(), root.winfo_height())) img = ImageTk.PhotoImage(Image.fromarray(frameRGB_resized)) diff --git a/final_project/specialkeys_class.py b/final_project/specialkeys_class.py new file mode 100644 index 0000000000000000000000000000000000000000..3f2a39f78a71633788197662e2ca4cd956127e0a --- /dev/null +++ b/final_project/specialkeys_class.py @@ -0,0 +1,78 @@ +import numpy as np +import pyautogui +from collections import Counter +# from screeninfo import get_monitors + +# MONITOR = get_monitors()[0] +# WIDTH, HEIGHT = MONITOR.width, MONITOR.height + +class Specialkeys: + def __init__(self) -> None: + + self.predictions = [] + self.previous_action = None + self.freeze_action = False + + # parameters to fine-tune + self.action_length = 10 + #self.time_checking = 0.5 + + def get_hand_pos(self, hand_pos): + self.hand_pos_x = hand_pos[0] + self.hand_pos_y = hand_pos[1] + + def add_prediction(self, prediction): + self.predictions.append(prediction) + # when the number of predictions achieve the number of defined action length, make an action + if len(self.predictions) == self.action_length: + self.make_action() + + def make_action(self): + action = self.get_major_element(self.predictions) + if self.freeze_action and action == self.previous_action: + self.update_init(action) + else: + self.keyboard_control(action) + self.update_init(action) + + def update_init(self, action): + self.predictions = [] + self.previous_action = action + + self.freeze_action = action in {"win", "printscreen"} + + def keyboard_hand_parameters(self): + pass + + def keyboard_control(self, prediction): + pyautogui.keyUp('shift') + pyautogui.keyUp('ctrl') + pyautogui.keyUp('alt') + if prediction == "stop execution" or None: + pass # Stop movement + elif prediction == "tab": + pyautogui.press('tab') + elif prediction == "shift": + pyautogui.keyDown('shift') + print(f"press shift") + elif prediction == "ctrl": + pyautogui.keyDown('ctrl') + print(f"press ctrl") + elif prediction == "alt": + pyautogui.keyDown('alt') + print(f"press alt") + elif prediction == "win": + pyautogui.press('win') + elif prediction == "printscreen": + pyautogui.press('printscreen') + else: + pass + + #time.sleep(self.time_checking) # Adjust speed of movement + + def get_major_element(self, string_list): + counts = Counter(string_list) + # Find the element with the maximum count + major_element, _ = counts.most_common(1)[0] + + return major_element diff --git a/merge_pickles.py b/merge_pickles.py index d5b794baaaab4f300c0927ed18fcfa11f6c1d230..0b23aa18c5b74fab2ada0457f35223a6083b5e9f 100644 --- a/merge_pickles.py +++ b/merge_pickles.py @@ -3,8 +3,8 @@ import os from pprint import pprint -FOLDER_PATH = "./train_data/keyboard/" -OUTPUT_FILE = 'alfabet_v1.pickle' # Output merged file +FOLDER_PATH = "./train_data/specialkeys/" +OUTPUT_FILE = 'specialkeys_v1.pickle' # Output merged file current_dir = os.path.dirname(__file__) OUTPUT_FOLDER = os.path.abspath(os.path.join(current_dir, 'merged_training_data', OUTPUT_FILE)) diff --git a/merged_training_data/specialkeys_v1.pickle b/merged_training_data/specialkeys_v1.pickle new file mode 100644 index 0000000000000000000000000000000000000000..aae9cd289a91e1551911ebbd97a855cab2958006 Binary files /dev/null and b/merged_training_data/specialkeys_v1.pickle differ diff --git a/train_data/specialkeys/left_alt_Bazsi.pickle b/train_data/specialkeys/left_alt_Bazsi.pickle new file mode 100644 index 0000000000000000000000000000000000000000..0ea020ab37c256085e18d5d3220874b27e993752 Binary files /dev/null and b/train_data/specialkeys/left_alt_Bazsi.pickle differ diff --git a/train_data/specialkeys/left_caraculo_Bazsi.pickle b/train_data/specialkeys/left_caraculo_Bazsi.pickle new file mode 100644 index 0000000000000000000000000000000000000000..1637b2c2c10209053a626fd9c1d7657665ef406d Binary files /dev/null and b/train_data/specialkeys/left_caraculo_Bazsi.pickle differ diff --git a/train_data/specialkeys/left_ctrl_Bazsi.pickle b/train_data/specialkeys/left_ctrl_Bazsi.pickle new file mode 100644 index 0000000000000000000000000000000000000000..009f3738b3694601ca07eba4289b5b5e1a2be664 Binary files /dev/null and b/train_data/specialkeys/left_ctrl_Bazsi.pickle differ diff --git a/train_data/specialkeys/left_printscreen_Bazsi.pickle b/train_data/specialkeys/left_printscreen_Bazsi.pickle new file mode 100644 index 0000000000000000000000000000000000000000..b89a5b1f116482521787c0237fafbb41886b4a65 Binary files /dev/null and b/train_data/specialkeys/left_printscreen_Bazsi.pickle differ diff --git a/train_data/specialkeys/left_shift_Bazsi.pickle b/train_data/specialkeys/left_shift_Bazsi.pickle new file mode 100644 index 0000000000000000000000000000000000000000..62ab06485c5b6ce22892c43fe9df37ec989363f0 Binary files /dev/null and b/train_data/specialkeys/left_shift_Bazsi.pickle differ diff --git a/train_data/specialkeys/left_stop_execution_Bazsi.pickle b/train_data/specialkeys/left_stop_execution_Bazsi.pickle new file mode 100644 index 0000000000000000000000000000000000000000..ccc57e8800e7b58ffd82c9f5802d3df6fbe58a67 Binary files /dev/null and b/train_data/specialkeys/left_stop_execution_Bazsi.pickle differ diff --git a/train_data/specialkeys/left_tab_Bazsi.pickle b/train_data/specialkeys/left_tab_Bazsi.pickle new file mode 100644 index 0000000000000000000000000000000000000000..18d7b563453a4eea7a3707c1df5016a7f49ef0a0 Binary files /dev/null and b/train_data/specialkeys/left_tab_Bazsi.pickle differ diff --git a/train_data/specialkeys/left_win_Bazsi.pickle b/train_data/specialkeys/left_win_Bazsi.pickle new file mode 100644 index 0000000000000000000000000000000000000000..56cd42f5c663ccb47f25e04c90c25c0620009fba Binary files /dev/null and b/train_data/specialkeys/left_win_Bazsi.pickle differ diff --git a/trained_models/specialkeys_v1.p b/trained_models/specialkeys_v1.p new file mode 100644 index 0000000000000000000000000000000000000000..37fe2e77f17a503404550b15a02d8615d20a1e70 Binary files /dev/null and b/trained_models/specialkeys_v1.p differ diff --git a/training/data_collector.py b/training/data_collector.py index 4e72090866906eaf127bf6147102a95d7155b6c2..5dcca2831710e13b5a17cea791293dde0c3a85ca 100644 --- a/training/data_collector.py +++ b/training/data_collector.py @@ -166,7 +166,7 @@ def main(): current_dir = os.path.dirname(__file__) filename = input("give filename: ") - folder_path = os.path.abspath(os.path.join(current_dir, os.pardir, 'train_data/keyboard', filename)) + folder_path = os.path.abspath(os.path.join(current_dir, os.pardir, 'train_data/specialkeys', filename)) f = open(folder_path + '.pickle', 'wb') pickle.dump({'data': data, 'label':labels}, f) f.close() diff --git a/training/train_model.py b/training/train_model.py index 3723f1a169f799206b244642e69aed1c45745693..0bf3c550cb1e53ba01720ca46373da50a2b373ce 100644 --- a/training/train_model.py +++ b/training/train_model.py @@ -6,7 +6,7 @@ from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score import os -filename = 'alfabet_v1' +filename = 'specialkeys_v1' current_dir = os.path.dirname(__file__) file_path = os.path.abspath(os.path.join(current_dir, os.pardir, 'merged_training_data', filename))