Skip to content
Snippets Groups Projects
Commit 34a2e87b authored by Formanek Balázs István's avatar Formanek Balázs István
Browse files

main.py - display window on top of other applications

parent 05c9f2a4
Branches
No related tags found
No related merge requests found
......@@ -6,12 +6,12 @@ import numpy as np
from sklearn.ensemble import RandomForestClassifier
import time
import os
from tkinter import Tk, Label
from PIL import Image, ImageTk
from mouse_class import Mouse
from hand_detection import normalise_landmarks
## main: open video and do hand detection
def main():
#define Mouse
mouse = Mouse()
......@@ -31,81 +31,99 @@ def main():
# if cannot open video give warning
if not cap.isOpened():
print("Warning: cannot reach camera")
print("Warning: Cannot reach camera")
return
else:
print("Program is running, push 'q' to quit.")
print("Program is running, press 'q' to quit.")
# set up Tkinter window
root = Tk()
root.title("Hand Tracking - Always on Top")
root.attributes("-topmost", True)
video_label = Label(root)
video_label.pack()
# adjust window geometry
# Get the screen width and height
screen_width = root.winfo_screenwidth()
screen_height = root.winfo_screenheight()
# Define window size and position (e.g., 320x240 window at bottom-right corner)
window_width = 160
window_height = 120
x_position = screen_width - window_width - 10 # 10px margin from the right
y_position = screen_height - window_height - 70 # 50px margin from the bottom
# Set window geometry
root.geometry(f"{window_width}x{window_height}+{x_position}+{y_position}")
# mediapipe hand object
with mp_hands.Hands( max_num_hands=1, model_complexity=1,
with mp_hands.Hands(max_num_hands=1, model_complexity=1,
min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
# read frames from webcamera
while cap.isOpened():
def update_frame():
ret, frame = cap.read()
if not ret:
print("Warning: cannot read camera input")
break
# flip frame to appear as a mirror
print("Warning: Cannot read camera input")
root.destroy()
return
# flip frame and process it
frame = cv2.flip(frame, 1)
frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
## hand detection
# Hand detection
results = hands.process(frameRGB)
landmark_list = []
mouse_command = None
if results.multi_hand_landmarks:
# multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with
# for num, hand in enumerate(results.multi_hand_landmarks):
# one hand is detected, because max_num_hands=1
hand_landmarks = results.multi_hand_landmarks[0]
hand_landmarks = results.multi_hand_landmarks[0]
# draw landmarks on frame
mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2),
)
# Draw landmarks on frame
mp_drawing.draw_landmarks(
frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS,
mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2)
)
# get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
for lm in hand_landmarks.landmark:
landmark_list.append((lm.x, lm.y))
# normalise landmarks for mor powerful training
# normalise landmarks for more powerful training
normalised_landmark_list = normalise_landmarks(landmark_list)
# apply model
pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1))
mouse_command = pred[0]
cv2.putText(img = frameRGB, text = pred[0], org = (30,30),
fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1, color = (255, 0, 0), thickness = 1)
cv2.putText(
img=frameRGB, text=pred[0], org=(30, 30),
fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(255, 0, 0), thickness=1
)
mouse.add_prediction(mouse_command)
if mouse_command == "move cursor" or "grab":
mouse.get_hand_pos(landmark_list[8])
# transform back RGB and show frame with annotation
frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR)
cv2.imshow('Hand tracking', frame_annotated)
# or show original frame without annotation
# cv2.imshow('Hand tracking', frame)
# Check for key presses
key = cv2.waitKey(1) & 0xFF
if key == ord('n'):
label = ""
elif key == ord('q'):
print("Quit camera")
break
# Convert frame to Tkinter-compatible format and display
frameRGB_resized = cv2.resize(frameRGB, (root.winfo_width(), root.winfo_height()))
img = ImageTk.PhotoImage(Image.fromarray(frameRGB_resized))
video_label.config(image=img)
video_label.image = img
# Refresh frame
root.after(10, update_frame)
# Start updating frames
update_frame()
# Quit the program properly
root.protocol("WM_DELETE_WINDOW", lambda: (cap.release(), root.destroy()))
root.mainloop()
cap.release()
cv2.destroyAllWindows()
print("Program closed")
if __name__ == '__main__':
main()
\ No newline at end of file
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment