diff --git a/final_project/main.py b/final_project/main.py
index 9be031afa7a71fc4a96b084e886ac3d8e0f23cef..cde4c48271153e92ce8bcd0d5670787a03481380 100644
--- a/final_project/main.py
+++ b/final_project/main.py
@@ -6,12 +6,12 @@ import numpy as np
 from sklearn.ensemble import RandomForestClassifier
 import time
 import os
+from tkinter import Tk, Label
+from PIL import Image, ImageTk
 
 from mouse_class import Mouse
 from hand_detection import normalise_landmarks
-  
 
-## main: open video and do hand detection
 def main():
     #define Mouse
     mouse = Mouse()
@@ -31,81 +31,99 @@ def main():
     
     # if cannot open video give warning
     if not cap.isOpened():
-        print("Warning: cannot reach camera")
+        print("Warning: Cannot reach camera")
+        return
     else:
-        print("Program is running, push 'q' to quit.")
-        
+        print("Program is running, press 'q' to quit.")
+
+    # set up Tkinter window
+    root = Tk()
+    root.title("Hand Tracking - Always on Top")
+    root.attributes("-topmost", True)
+    video_label = Label(root)
+    video_label.pack()
+
+    # adjust window geometry
+    # Get the screen width and height
+    screen_width = root.winfo_screenwidth()
+    screen_height = root.winfo_screenheight()
+    
+    # Define window size and position (e.g., 320x240 window at bottom-right corner)
+    window_width = 160
+    window_height = 120
+    x_position = screen_width - window_width - 10  # 10px margin from the right
+    y_position = screen_height - window_height - 70  # 50px margin from the bottom
+
+    # Set window geometry
+    root.geometry(f"{window_width}x{window_height}+{x_position}+{y_position}")
     # mediapipe hand object
-    with mp_hands.Hands( max_num_hands=1, model_complexity=1,
+    with mp_hands.Hands(max_num_hands=1, model_complexity=1,
                         min_detection_confidence=0.9, min_tracking_confidence=0.9) as hands:
         
-        # read frames from webcamera
-        while cap.isOpened():        
+        def update_frame():
             ret, frame = cap.read()
-            
             if not ret:
-                print("Warning: cannot read camera input")
-                break
-                
-            # flip frame to appear as a mirror
+                print("Warning: Cannot read camera input")
+                root.destroy()
+                return
+            
+            # flip frame and process it
             frame = cv2.flip(frame, 1)
             frameRGB = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
-            
-            ## hand detection
+
+            # Hand detection
             results = hands.process(frameRGB)
             
             landmark_list = []
             mouse_command = None
             if results.multi_hand_landmarks:
-                # multi_hand_landmarks can store two hands, if max_num_hands=2, in which case we have to iterate through the hands with
-                # for num, hand in enumerate(results.multi_hand_landmarks): 
-                
                 # one hand is detected, because max_num_hands=1
-                hand_landmarks = results.multi_hand_landmarks[0]  
+                hand_landmarks = results.multi_hand_landmarks[0]
 
-                # draw landmarks on frame
-                mp_drawing.draw_landmarks(frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, 
-                                            mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
-                                            mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2),
-                                            )
+                # Draw landmarks on frame
+                mp_drawing.draw_landmarks(
+                    frameRGB, hand_landmarks, mp_hands.HAND_CONNECTIONS, 
+                    mp_drawing.DrawingSpec(color=(250, 0, 0), thickness=2, circle_radius=4),
+                    mp_drawing.DrawingSpec(color=(0, 250, 0), thickness=2, circle_radius=2)
+                )
                 
                 # get landmark list with indices described in https://github.com/google-ai-edge/mediapipe/blob/master/mediapipe/python/solutions/hands.py
                 for lm in hand_landmarks.landmark:
                     landmark_list.append((lm.x, lm.y))
-            
-                # normalise landmarks for mor powerful training
+                
+                # normalise landmarks for more powerful training
                 normalised_landmark_list = normalise_landmarks(landmark_list)
-            
+                
                 # apply model
                 pred = model.predict(np.asarray(normalised_landmark_list).reshape(1, -1))
                 mouse_command = pred[0]
-                cv2.putText(img = frameRGB, text = pred[0], org = (30,30), 
-                    fontFace = cv2.FONT_HERSHEY_DUPLEX, fontScale = 1, color = (255, 0, 0), thickness = 1)
+                cv2.putText(
+                    img=frameRGB, text=pred[0], org=(30, 30), 
+                    fontFace=cv2.FONT_HERSHEY_DUPLEX, fontScale=1, color=(255, 0, 0), thickness=1
+                )
 
                 mouse.add_prediction(mouse_command)
                 if mouse_command == "move cursor" or "grab":
                     mouse.get_hand_pos(landmark_list[8])
-                    
-            # transform back RGB and show frame with annotation
-            frame_annotated = cv2.cvtColor(frameRGB, cv2.COLOR_RGB2BGR)
-            cv2.imshow('Hand tracking', frame_annotated)
-            
-            # or show original frame without annotation
-            # cv2.imshow('Hand tracking', frame)
             
-            # Check for key presses
-            key = cv2.waitKey(1) & 0xFF
-            
-            if key == ord('n'):
-                label = ""
-            elif key == ord('q'):
-                print("Quit camera")
-                break
+            # Convert frame to Tkinter-compatible format and display
+            frameRGB_resized = cv2.resize(frameRGB, (root.winfo_width(), root.winfo_height()))
+            img = ImageTk.PhotoImage(Image.fromarray(frameRGB_resized))
+            video_label.config(image=img)
+            video_label.image = img
+
+            # Refresh frame
+            root.after(10, update_frame)
+
+        # Start updating frames
+        update_frame()
+
+        # Quit the program properly
+        root.protocol("WM_DELETE_WINDOW", lambda: (cap.release(), root.destroy()))
+        root.mainloop()
 
     cap.release()
-    cv2.destroyAllWindows()
-    
     print("Program closed")
 
 if __name__ == '__main__':
-    main()
\ No newline at end of file
+    main()