main.py

import cv2
import mediapipe as mp
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from tkinter import Tk, Label
from PIL import Image, ImageTk

from mouse_class import Mouse
from hand_detection import normalise_landmarks
from tools import load_model, set_camera_window

def main():
    #define Mouse
    mouse = Mouse()

    # load MOUSE model
    model = load_model(device = "mouse")

    # create hand detection object
    mp_hands = mp.solutions.hands
    mp_drawing = mp.solutions.drawing_utils

    # open video
    cap = cv2.VideoCapture(0)

    # if cannot open video give warning
    if not cap.isOpened():
        print("Warning: Cannot reach camera")
        return

    # set up Tkinter window
    root, video_label = set_camera_window()

    # mediapipe hand object
    with mp_hands.Hands(max_num_hands=1, model_complexity=1,
                        min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:

        def update_frame():
            ret, frame = cap.read()
            if not ret:
                print("Warning: Cannot read camera input")
                root.destroy()
                return

            # flip frame and process it
            frame = cv2.flip(frame, 1)
            frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            # Hand detection
            results = hands.process(frameRGB)

            landmark_list = []
            mouse_command = None
            if results.multi_hand_landmarks:
                # one hand is detected, because max_num_hands=1
                hand_landmarks = results.multi_hand_landmarks[0]

                # Draw landmarks on frame
                mp_drawing.draw_landmarks(
                    frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS,
                    mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
                    mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2)
                )

                # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
                for lm in hand_landmarks.landmark:
                    landmark_list.append((lm.x, lm.y))

                # normalise landmarks for more powerful training
                normalised_landmark_list = normalise_landmarks(landmark_list)

                # apply model
                pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1))
                mouse_command = pred[0]
                hand_size = landmark_list[0][0] - landmark_list[12][0], landmark_list[0][1] - landmark_list[12][1]
                cv2.putText(
                    img=frameRGB,
                    text=f"{pred[0]} pos {landmark_list[8][0]:.2f}, {landmark_list[8][1]:.2f}",
                    org=(30, 30), fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(255, 0, 0), thickness=1
                )
                cv2.putText(
                    img=frameRGB,
                    text=f"hand size: {hand_size[0]:.2f}, {hand_size[1]:.2f}",
                    org=(30, 60), fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(0, 255, 0), thickness=1
                )

                mouse.add_prediction(mouse_command)
                if mouse_command == "move cursor" or "grab":
                    mouse.get_hand_size(landmark_list[12], landmark_list[0])
                    mouse.get_hand_pos(landmark_list[8])

            # Convert frame to Tkinter-compatible format and display
            frameRGB_resized = cv2.resize(frameRGB, (root.winfo_width(), root.winfo_height()))
            img = ImageTk.PhotoImage(Image.fromarray(frameRGB_resized))
            video_label.config(image=img)
            video_label.image = img

            # Refresh frame
            root.after(10, update_frame)

        # Start updating frames
        update_frame()

        # Quit the program properly
        root.protocol("WM_DELETE_WINDOW", lambda: (cap.release(), root.destroy()))
        root.mainloop()

    cap.release()
    print("Program closed")

if __name__ == '__main__':
    main()