Skip to content
Snippets Groups Projects
main.py 5 KiB
Newer Older
  • Learn to ignore specific revisions
  • Vajay Mónika's avatar
    Vajay Mónika committed
    import cv2
    import random
    import mediapipe as mp
    import pickle
    import numpy as np
    from sklearn.ensemble import RandomForestClassifier
    import time
    import os
    
    from tkinter import Tk, Label
    from PIL import Image, ImageTk
    
    Vajay Mónika's avatar
    Vajay Mónika committed
    
    from mouse_class import Mouse
    from hand_detection import normalise_landmarks
    
    def main():
        #define Mouse
        mouse = Mouse()
    
        # load model
        current_dir = os.path.dirname(__file__)
        model_path = os.path.abspath(os.path.join(current_dir, os.pardir, 'trained_models', 'trained_Moni_data.p'))
        model_dict = pickle.load(open(model_path, 'rb'))
        model = model_dict['model']
        
        # create hand detection object
        mp_hands = mp.solutions.hands
        mp_drawing = mp.solutions.drawing_utils
        
        # open video
        cap = cv2.VideoCapture(0)
        
        # if cannot open video give warning
        if not cap.isOpened():
    
            print("Warning: Cannot reach camera")
            return
    
        # set up Tkinter window
        root = Tk()
        root.title("Hand Tracking - Always on Top")
        root.attributes("-topmost", True)
        video_label = Label(root)
        video_label.pack()
    
        # adjust window geometry
        # Get the screen width and height
        screen_width = root.winfo_screenwidth()
        screen_height = root.winfo_screenheight()
        
        # Define window size and position (e.g., 320x240 window at bottom-right corner)
    
    Vajay Mónika's avatar
    Vajay Mónika committed
        window_width = 160*4
        window_height = 120*4
    
        x_position = screen_width - window_width - 10  # 10px margin from the right
        y_position = screen_height - window_height - 70  # 50px margin from the bottom
    
        # Set window geometry
        root.geometry(f"{window_width}x{window_height}+{x_position}+{y_position}")
    
    Vajay Mónika's avatar
    Vajay Mónika committed
        # mediapipe hand object
    
        with mp_hands.Hands(max_num_hands=1, model_complexity=1,
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                            min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
            
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                ret, frame = cap.read()
                if not ret:
    
                    print("Warning: Cannot read camera input")
                    root.destroy()
                    return
                
                # flip frame and process it
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                frame = cv2.flip(frame, 1)
                frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                results = hands.process(frameRGB)
                
                landmark_list = []
                mouse_command = None
                if results.multi_hand_landmarks:
                    # one hand is detected, because max_num_hands=1
    
                    hand_landmarks = results.multi_hand_landmarks[0]
    
    Vajay Mónika's avatar
    Vajay Mónika committed
    
    
                    # Draw landmarks on frame
                    mp_drawing.draw_landmarks(
                        frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, 
                        mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
                        mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2)
                    )
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                    
                    # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
                    for lm in hand_landmarks.landmark:
                        landmark_list.append((lm.x, lm.y))
    
                    
                    # normalise landmarks for more powerful training
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                    normalised_landmark_list = normalise_landmarks(landmark_list)
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                    # apply model
                    pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1))
                    mouse_command = pred[0]
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                    hand_size = landmark_list[0][0] - landmark_list[12][0], landmark_list[0][1] - landmark_list[12][1]
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                        img=frameRGB, 
                        text=f"{pred[0]} pos {landmark_list[8][0]:.2f}, {landmark_list[8][1]:.2f}",
                        org=(30, 30), fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(255, 0, 0), thickness=1
                    )
                    cv2.putText(
                        img=frameRGB, 
                        text=f"hand size: {hand_size[0]:.2f}, {hand_size[1]:.2f}",
                        org=(30, 60), fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(0, 255, 0), thickness=1
    
    Vajay Mónika's avatar
    Vajay Mónika committed
    
                    mouse.add_prediction(mouse_command)
                    if mouse_command == "move cursor" or "grab":
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                        mouse.get_hand_size(landmark_list[12], landmark_list[0])
    
    Vajay Mónika's avatar
    Vajay Mónika committed
                        mouse.get_hand_pos(landmark_list[8])
                
    
                # Convert frame to Tkinter-compatible format and display
                frameRGB_resized = cv2.resize(frameRGB, (root.winfo_width(), root.winfo_height()))
                img = ImageTk.PhotoImage(Image.fromarray(frameRGB_resized))
                video_label.config(image=img)
                video_label.image = img
    
                # Refresh frame
                root.after(10, update_frame)
    
            # Start updating frames
            update_frame()
    
            # Quit the program properly
            root.protocol("WM_DELETE_WINDOW", lambda: (cap.release(), root.destroy()))
            root.mainloop()
    
    Vajay Mónika's avatar
    Vajay Mónika committed
    
        cap.release()
        print("Program closed")
    
    if __name__ == '__main__':