diff --git a/debug_viewer.py b/debug_viewer.py index 82175a9..7fad30c 100644 --- a/debug_viewer.py +++ b/debug_viewer.py @@ -2,141 +2,87 @@ import mss import cv2 import numpy as np import time +from typing import List, Tuple -# ========= Dein Spielausschnitt ========= +# ===== User config ===== monitor_area = {"top": 120, "left": 330, "width": 1900, "height": 1263} +# monitor_area = {"top": 121, "left": 27, "width": 672-27, "height": 549-121} -# ========= HSV-Grenzen ========= -yellow_lower = np.array([15, 40, 200], dtype=np.uint8) -yellow_upper = np.array([25, 120, 255], dtype=np.uint8) -white_lower = np.array([0, 0, 220], dtype=np.uint8) -white_upper = np.array([180, 50, 255], dtype=np.uint8) +# Einträge dürfen Pixel (x0,y0,x1,y1) oder normiert [0..1] sein (identisch zur Env) +ui_exclude_rects: List[Tuple[float, float, float, float]] = [ + # (0.0, 0.0, 1.0, 0.08), # Beispiel: oberer HUD-Streifen (normiert) +] +# Feste Referenz (muss zur Env passen!) +REF_SIZE = (1900, 1263) -black_lower = np.array([0, 0, 0], dtype=np.uint8) -black_upper = np.array([180, 80, 60], dtype=np.uint8) - -green1_lower = np.array([30, 80, 80], dtype=np.uint8) -green1_upper = np.array([45, 255, 255], dtype=np.uint8) -green2_lower = np.array([65, 100, 80], dtype=np.uint8) -green2_upper = np.array([90, 255, 255], dtype=np.uint8) - -kernel = np.ones((3,3), np.uint8) - -# Radien -EAT_RADIUS = 95 -COLL_RADIUS = 115 - -# Bomben-Filter -BOMB_MIN_AREA = 400 # angepasst! -BOMB_CIRC_MIN = 0.60 -BOMB_ASPECT_TOL = 0.35 -BOMB_EXTENT_MIN = 0.60 -BOMB_SOLIDITY_MIN = 0.85 - -# Fenster-Skalierung (0.7 = 70 % Größe) +# Anzeige-Skalierung (nur fürs Fenster) WINDOW_SCALE = 0.8 - -# Anzeige-Modi -MODE_OVERLAY, MODE_FLOWER_MASK, MODE_BOMB_MASK, MODE_TURTLE_MASK = 0,1,2,3 +MODE_OVERLAY, MODE_TEXT_ONLY = 0, 1 mode = MODE_OVERLAY +# ===== Implementation ===== +# Wichtig: Die Detection/Parameter kommen aus der Env, um Drift zu vermeiden. +from flower_game_env import FlowerGameEnv # Datei muss als flower_game_env.py vorliegen -def centroid(mask): - cnt = int(cv2.countNonZero(mask)) - if cnt == 0: return None, None, 0 - M = cv2.moments(mask) - if M["m00"] == 0: return None, None, cnt - return int(M["m10"]/M["m00"]), int(M["m01"]/M["m00"]), cnt +env = FlowerGameEnv( + monitor_area, + ui_exclude_rects=ui_exclude_rects, + ref_size=REF_SIZE, +) -def bomb_centroids_filtered(mask): - contours,_ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) - out=[] - for c in contours: - area = float(cv2.contourArea(c)) - if area < BOMB_MIN_AREA: - continue - x,y,w,h = cv2.boundingRect(c) - if w == 0 or h == 0: - continue - aspect = w/float(h) - if not (1.0 - BOMB_ASPECT_TOL <= aspect <= 1.0 + BOMB_ASPECT_TOL): - continue - per = float(cv2.arcLength(c, True)) - if per <= 0: - continue - circularity = 4.0 * np.pi * area / (per * per) - if circularity < BOMB_CIRC_MIN: - continue - hull = cv2.convexHull(c) - hull_area = float(cv2.contourArea(hull)) - if hull_area <= 0: - continue - solidity = area / hull_area - extent = area / float(w*h) - if solidity < BOMB_SOLIDITY_MIN or extent < BOMB_EXTENT_MIN: - continue - M = cv2.moments(c) - if M["m00"] == 0: - continue - cx = int(M["m10"]/M["m00"]) - cy = int(M["m01"]/M["m00"]) - out.append((cx, cy, int(area))) - return out +def _draw_overlay(frame_bgr, det, fps: float): + """Zeichnet exakt die Größen, die die Env auch nutzt (achsenweise Skalierung).""" + h, w = det["frame_hw"] + # Rechteck-Halbachsen in Pixel aus achsenweisen nd-Schwellen + eat_half_w_px = int(round((env.eat_x_nd * w) * 0.5)) + eat_half_h_px = int(round((env.eat_y_nd * h) * 0.5)) + coll_half_w_px = int(round((env.collision_x_nd * w) * 0.5)) + coll_half_h_px = int(round((env.collision_y_nd * h) * 0.5)) -def detect_all(frame_bgr): - hsv = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2HSV) - mw = cv2.inRange(hsv, white_lower, white_upper) - my = cv2.inRange(hsv, yellow_lower, yellow_upper) - mw = cv2.morphologyEx(mw, cv2.MORPH_DILATE, kernel, iterations=1) - my = cv2.morphologyEx(my, cv2.MORPH_DILATE, kernel, iterations=1) - mf = cv2.bitwise_and(mw, my) - mf = cv2.morphologyEx(mf, cv2.MORPH_CLOSE, kernel, iterations=1) - fx, fy, _ = centroid(mf) + tx, ty = det["turtle_xy"] + fx, fy = det["flower_xy"] + bombs = det["bombs_xy"] - mb = cv2.inRange(hsv, black_lower, black_upper) - bombs = bomb_centroids_filtered(mb) + # Nearest Bomb (aus state_norm, schon normiert) + n_tx, n_ty, n_fx, n_fy, nbx, nby = det["state_norm"] + nb_px = (int(round(nbx * w)), int(round(nby * h))) if (len(bombs) > 0) else None - g1 = cv2.inRange(hsv, green1_lower, green1_upper) - g2 = cv2.inRange(hsv, green2_lower, green2_upper) - mg = cv2.bitwise_or(g1, g2) - mg = cv2.morphologyEx(mg, cv2.MORPH_OPEN, kernel, iterations=1) - mg = cv2.morphologyEx(mg, cv2.MORPH_DILATE, kernel, iterations=1) - tx, ty, _ = centroid(mg) - - masks = {"flower": mf, "bomb": mb, "turtle": mg} - return (fx,fy), bombs, (tx,ty), masks - - -def draw_overlay(frame, flower, bombs, turtle, fps): - fx, fy = flower - tx, ty = turtle + # Flower if fx is not None: - cv2.circle(frame, (fx,fy), 8, (0,255,255), 2) - cv2.putText(frame, "Flower", (fx+10, fy-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,255), 2) - nearest = None - if bombs and tx is not None: - nearest = min(bombs, key=lambda b: np.hypot(b[0]-tx, b[1]-ty)) - for (bx,by,_) in bombs: - color = (60,60,60); thick = 2 - if nearest and (bx,by)==(nearest[0],nearest[1]): - color = (0,0,255); thick = 3 - cv2.circle(frame, (bx,by), 10, color, thick) + cv2.circle(frame_bgr, (fx, fy), 8, (0, 255, 255), 2) + cv2.putText(frame_bgr, "Flower", (fx+10, fy-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,255,255), 2) + + # Bomben (alle grau, nächste rot) + for (bx, by, _a) in bombs: + cv2.circle(frame_bgr, (bx, by), 10, (60, 60, 60), 2) + if nb_px is not None: + cv2.circle(frame_bgr, nb_px, 12, (0, 0, 255), 3) + + # Turtle + Zonenrechtecke if tx is not None: - cv2.circle(frame, (tx,ty), 8, (0,200,0), 2) - cv2.putText(frame, "Turtle", (tx+10, ty-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,200,0), 2) - cv2.circle(frame, (tx,ty), EAT_RADIUS, (0,255,0), 1) - cv2.circle(frame, (tx,ty), COLL_RADIUS, (0,0,255), 1) - cv2.putText(frame, f"FPS: {fps:.1f}", (20,40), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255,255,255), 2) - return frame + cv2.circle(frame_bgr, (tx, ty), 8, (0, 200, 0), 2) + cv2.putText(frame_bgr, "Turtle", (tx+10, ty-10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0,200,0), 2) + cv2.rectangle(frame_bgr, (tx - eat_half_w_px, ty - eat_half_h_px), + (tx + eat_half_w_px, ty + eat_half_h_px), (0,255,0), 1) + cv2.rectangle(frame_bgr, (tx - coll_half_w_px, ty - coll_half_h_px), + (tx + coll_half_w_px, ty + coll_half_h_px), (0,0,255), 1) + # HUD + min_area_px = int(round(env.bomb_min_area_frac * h * w)) + hud_lines = [ + f"FPS: {fps:.1f}", + f"state = [tx={n_tx:.3f}, ty={n_ty:.3f}, fx={n_fx:.3f}, fy={n_fy:.3f}, bx={nbx:.3f}, by={nby:.3f}]", + f"eat_x_nd={env.eat_x_nd:.5f} eat_y_nd={env.eat_y_nd:.5f} coll_x_nd={env.collision_x_nd:.5f} coll_y_nd={env.collision_y_nd:.5f}", + f"bomb_min_area_frac={env.bomb_min_area_frac:.7f} (-> min_area_px≈{min_area_px}) ref_size={env.w_ref}x{env.h_ref}", + ] + y = 28 + for line in hud_lines: + cv2.putText(frame_bgr, line, (16, y), cv2.FONT_HERSHEY_SIMPLEX, 0.55, (255,255,255), 2) + y += 24 -def colorize(mask): - return cv2.applyColorMap( - cv2.normalize(mask, None, 0, 255, cv2.NORM_MINMAX).astype(np.uint8), - cv2.COLORMAP_JET - ) + return frame_bgr def main(): @@ -144,31 +90,35 @@ def main(): sct = mss.mss() prev = time.time() fps = 0.0 + while True: - raw = np.array(sct.grab(monitor_area)) + raw = np.array(sct.grab(monitor_area)) # BGRA frame = cv2.cvtColor(raw, cv2.COLOR_BGRA2BGR) - flower, bombs, turtle, masks = detect_all(frame) + + # Detection mit exakt derselben Logik wie in der Env + det = env._detect_entities(frame) + now = time.time() dt = now - prev; prev = now - if dt > 0: fps = 1.0/dt + if dt > 0: + fps = 1.0 / dt + if mode == MODE_OVERLAY: - out = draw_overlay(frame.copy(), flower, bombs, turtle, fps) - elif mode == MODE_FLOWER_MASK: - out = colorize(masks["flower"]) - elif mode == MODE_BOMB_MASK: - out = colorize(masks["bomb"]) - elif mode == MODE_TURTLE_MASK: - out = colorize(masks["turtle"]) - # --- hier skalieren --- + out = _draw_overlay(frame.copy(), det, fps) + else: + h, w = det["frame_hw"] + out = np.zeros((h, w, 3), dtype=np.uint8) + out = _draw_overlay(out, det, fps) + if WINDOW_SCALE != 1.0: - out = cv2.resize(out, (int(out.shape[1]*WINDOW_SCALE), int(out.shape[0]*WINDOW_SCALE))) - cv2.imshow("Debug Viewer", out) + out = cv2.resize(out, (int(out.shape[1] * WINDOW_SCALE), int(out.shape[0] * WINDOW_SCALE))) + cv2.imshow("Env-State Viewer (axes-normalized)", out) + key = cv2.waitKey(1) & 0xFF if key == ord('q'): break elif key == ord('0'): mode = MODE_OVERLAY - elif key == ord('1'): mode = MODE_FLOWER_MASK - elif key == ord('2'): mode = MODE_BOMB_MASK - elif key == ord('3'): mode = MODE_TURTLE_MASK + elif key == ord('1'): mode = MODE_TEXT_ONLY + cv2.destroyAllWindows() diff --git a/flower_game_env.py b/flower_game_env.py index 19c2358..9868d67 100644 --- a/flower_game_env.py +++ b/flower_game_env.py @@ -8,6 +8,7 @@ import time from typing import Tuple, Optional, List +# ---------------- Hilfsfunktionen ---------------- def _centroid_from_mask(mask: np.ndarray) -> Tuple[Optional[int], Optional[int], int]: cnt = int(cv2.countNonZero(mask)) if cnt == 0: @@ -15,39 +16,58 @@ def _centroid_from_mask(mask: np.ndarray) -> Tuple[Optional[int], Optional[int], M = cv2.moments(mask) if M["m00"] == 0: return None, None, cnt - return int(M["m10"]/M["m00"]), int(M["m01"]/M["m00"]), cnt + return int(M["m10"] / M["m00"]), int(M["m01"] / M["m00"]), cnt -def _centroids_from_contours(mask: np.ndarray, - ui_exclude_rects: List[Tuple[int,int,int,int]], - min_area: int, - circ_min: float, - aspect_tol: float, - extent_min: float, - solidity_min: float) -> List[Tuple[int,int,int]]: +def _centroids_from_contours( + mask: np.ndarray, + ui_exclude_rects: List[Tuple[float, float, float, float]], + min_area_px: int, + circ_min: float, + aspect_tol: float, + extent_min: float, + solidity_min: float, +) -> List[Tuple[int, int, int]]: """ - Liefert (cx,cy,area) für Konturen, die Bomben-Formkriterien erfüllen. - ui_exclude_rects: Liste von (x0,y0,x1,y1) in Pixeln relativ zum monitor_area. + Liefert (cx, cy, area) für Konturen, die Bomben-Formkriterien erfüllen. + + ui_exclude_rects akzeptiert Einträge entweder in Pixeln (x0,y0,x1,y1) + oder normiert (0..1). Normierte Werte werden auf die aktuelle Framegröße + umgerechnet. """ - # UI-Zonen ausmastern + h, w = mask.shape + + # UI-Zonen ausmaskieren (Pixel- oder Normalformate unterstützen) if ui_exclude_rects: - h, w = mask.shape - for (x0,y0,x1,y1) in ui_exclude_rects: - x0 = max(0, min(w, x0)); x1 = max(0, min(w, x1)) - y0 = max(0, min(h, y0)); y1 = max(0, min(h, y1)) - mask[y0:y1, x0:x1] = 0 + for (x0, y0, x1, y1) in ui_exclude_rects: + # Wenn alle Koordinaten in [0,1], als normierte Eingaben interpretieren + if 0.0 <= x0 <= 1.0 and 0.0 <= x1 <= 1.0 and 0.0 <= y0 <= 1.0 and 0.0 <= y1 <= 1.0: + px0 = int(round(x0 * w)) + px1 = int(round(x1 * w)) + py0 = int(round(y0 * h)) + py1 = int(round(y1 * h)) + else: + px0, py0, px1, py1 = int(x0), int(y0), int(x1), int(y1) + + # clamp + px0 = max(0, min(w, px0)) + px1 = max(0, min(w, px1)) + py0 = max(0, min(h, py0)) + py1 = max(0, min(h, py1)) + if py0 < py1 and px0 < px1: + mask[py0:py1, px0:px1] = 0 contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) out = [] for c in contours: area = float(cv2.contourArea(c)) - if area < min_area: + if area < float(min_area_px): continue - x, y, w, h = cv2.boundingRect(c) - if h == 0 or w == 0: + x, y, w_b, h_b = cv2.boundingRect(c) + if h_b == 0 or w_b == 0: continue - aspect = w / float(h) + aspect = w_b / float(h_b) if not (1.0 - aspect_tol <= aspect <= 1.0 + aspect_tol): continue @@ -63,7 +83,7 @@ def _centroids_from_contours(mask: np.ndarray, if hull_area <= 0: continue solidity = area / hull_area - extent = area / float(w * h) + extent = area / float(w_b * h_b) if solidity < solidity_min or extent < extent_min: continue @@ -77,70 +97,112 @@ def _centroids_from_contours(mask: np.ndarray, return out +# ---------------- Environment ---------------- class FlowerGameEnv(gym.Env): """ - Observation = Dict: - "image": (84,84,1) - "state": [tx,ty, fx,fy, bx,by] (bx,by = nächstgelegene gültige Bombe) + Beobachtung (nur positionsbasiert, kein Bild im Learning-Interface!): + obs = {"state": [tx, ty, fx, fy, bx, by]} + Alle Werte in [0,1] relativ zur aktuellen Framebreite/-höhe. + (bx,by) ist die nächste gültige Bombe relativ zur Turtle, sonst 0. Actions: 0=W, 1=A, 2=S, 3=D - Rewards: - +0.6 bei Kontakt (<= 95 px) mit Blume - +0.10 * Distanzverkleinerung zur Blume (auf Bilddiagonale normiert) - -5.0 wenn Distanz zur nächsten Bombe <= 115 px + Rewards (größeninvariant, **achsenweise normiert**): + +1.0 bei Kontakt, definiert als Rechtecktest um die Turtle: + |tx - fx|/w <= eat_x_nd/2 UND |ty - fy|/h <= eat_y_nd/2 + +shaping_gain * (eukl. Distanzabnahme zur Blume) [Distanz/diag] + -5.0 wenn irgendeine Bombe im Kollisionsrechteck liegt: + |tx - bx|/w <= collision_x_nd/2 UND |ty - by|/h <= collision_y_nd/2 + + Größeninvarianz: + - Schwellen sind feste Bruchteile der **Breite** bzw. **Höhe** eines + festen Referenzmaßes (ref_size). Standardmäßig (1900,1263), passend + zu deinem Setup; kann überschrieben werden. + - Bomben-Minimalfläche als Anteil an der Referenzfläche (w_ref*h_ref) + und pro Frame in Pixel umgerechnet. + - UI-Exclude-Rects: optional normiert (0..1) oder in Pixeln. """ metadata = {"render_modes": []} - def __init__(self, monitor_area, ui_exclude_rects: Optional[List[Tuple[int,int,int,int]]] = None): + def __init__( + self, + monitor_area, + ui_exclude_rects: Optional[List[Tuple[float, float, float, float]]] = None, + ref_size: Optional[Tuple[int, int]] = (1900, 1263), # feste Baseline + ): super().__init__() self.monitor_area = monitor_area self.sct = mss.mss() - # --- Observation & Actions --- - self.observation_space = spaces.Dict({ - "image": spaces.Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8), - "state": spaces.Box(low=0.0, high=1.0, shape=(6,), dtype=np.float32), - }) + # --- Observation & Actions (nur STATE) --- + self.observation_space = spaces.Dict( + { + "state": spaces.Box(low=0.0, high=1.0, shape=(6,), dtype=np.float32) + } + ) self.action_space = spaces.Discrete(4) # --- HSV-Grenzen --- self.yellow_lower = np.array([15, 40, 200], dtype=np.uint8) self.yellow_upper = np.array([25, 120, 255], dtype=np.uint8) - self.white_lower = np.array([0, 0, 220], dtype=np.uint8) - self.white_upper = np.array([180, 50, 255], dtype=np.uint8) - self.black_lower = np.array([0, 0, 0], dtype=np.uint8) - self.black_upper = np.array([180, 80, 60], dtype=np.uint8) - self.green1_lower = np.array([30, 80, 80], dtype=np.uint8) + self.white_lower = np.array([0, 0, 220], dtype=np.uint8) + self.white_upper = np.array([180, 50, 255], dtype=np.uint8) + self.black_lower = np.array([0, 0, 0], dtype=np.uint8) + self.black_upper = np.array([180, 80, 60], dtype=np.uint8) + self.green1_lower = np.array([30, 80, 80], dtype=np.uint8) self.green1_upper = np.array([45, 255, 255], dtype=np.uint8) - self.green2_lower = np.array([65, 100, 80], dtype=np.uint8) + self.green2_lower = np.array([65, 100, 80], dtype=np.uint8) self.green2_upper = np.array([90, 255, 255], dtype=np.uint8) self.kernel = np.ones((3, 3), np.uint8) - # --- Reward-/Heuristik-Parameter --- - self.eat_radius_px = 95 - self.collision_dist_px = 115 - self.shaping_scale = 0.20 - self.eat_reward = 1 + # --- Rechteckige Default-Schwellen (volle Breite/Höhe in Pixel) --- + self._eat_x_px_default = 320 + self._eat_y_px_default = 220 + self._collision_x_px_default = 320 + self._collision_y_px_default = 220 + self._bomb_min_area_px_default = 400 + + # --- Feste Referenzgröße (größeninvariante Bruchteile) --- + if ref_size is not None: + w_ref, h_ref = int(ref_size[0]), int(ref_size[1]) + else: + w_ref = int(self.monitor_area.get("width", 1)) + h_ref = int(self.monitor_area.get("height", 1)) + self.w_ref = max(1, w_ref) + self.h_ref = max(1, h_ref) + + # --- Bruchteile relativ zu w_ref/h_ref (achsenweise Normierung) --- + # Semantik: Werte beziehen sich auf die **volle** Rechteckbreite/-höhe; + # in den Tests werden Halbachsen (= */2) verwendet. + self.eat_x_nd = float(self._eat_x_px_default) / float(self.w_ref) + self.eat_y_nd = float(self._eat_y_px_default) / float(self.h_ref) + self.collision_x_nd = float(self._collision_x_px_default) / float(self.w_ref) + self.collision_y_nd = float(self._collision_y_px_default) / float(self.h_ref) + self.bomb_min_area_frac = float(self._bomb_min_area_px_default) / float(self.w_ref * self.h_ref) + + # Reward-/Heuristik-Parameter (dimensionslos) + self.shaping_gain = 1.0 + self.eat_reward = 1.0 self.collision_penalty = 5.0 + # Event-Cooldown self.contact_cooldown_frames = 8 self._cooldown = 0 - self.prev_dist_to_flower = None + self.prev_dist_to_flower_nd = None self.flowers_eaten = 0 - # --- Bomben-Filterparameter (gegen Score-Schrift) --- - self.bomb_min_area = 400 # <— Text-Glyphen sind meist kleiner - self.bomb_circ_min = 0.60 # Kreisförmigkeit (1.0 ist perfekter Kreis) - self.bomb_aspect_tol = 0.35 # erlaubt 0.65–1.35 Seitenverhältnis - self.bomb_extent_min = 0.60 # Füllgrad im Bounding-Rect - self.bomb_solidity_min = 0.85 # gegen ring-/schriftartige Konturen + # Bomben-Filter (konstant, bis auf min_area -> wird aus frac abgeleitet) + self.bomb_circ_min = 0.60 + self.bomb_aspect_tol = 0.35 + self.bomb_extent_min = 0.60 + self.bomb_solidity_min = 0.85 - # UI-Ausschlusszonen (optional): [(x0,y0,x1,y1), ...] relativ zum monitor_area + # UI-Ausschlusszonen (px oder normiert), relativ zum monitor_area self.ui_exclude_rects = ui_exclude_rects or [] + # Cache self._last_cache = { "turtle_xy": (None, None), "flower_xy": (None, None), @@ -150,51 +212,61 @@ class FlowerGameEnv(gym.Env): "frame_hw": (1, 1), } + self.raw = None; + # ---------------- Gymnasium API ---------------- def reset(self, seed=None, options=None): super().reset(seed=seed) - self.prev_dist_to_flower = None + self.prev_dist_to_flower_nd = None self._cooldown = 0 self.flowers_eaten = 0 - return self._build_observation(), {} + obs = self._build_observation() + return obs, {} def step(self, action): - if action == 0: pyautogui.press("w") - elif action == 1: pyautogui.press("a") - elif action == 2: pyautogui.press("s") - elif action == 3: pyautogui.press("d") + if action == 0: + pyautogui.press("w") + elif action == 1: + pyautogui.press("a") + elif action == 2: + pyautogui.press("s") + elif action == 3: + pyautogui.press("d") - time.sleep(0.05) + time.sleep(0.01) obs = self._build_observation() reward = self._calculate_reward() - if self._cooldown > 0: self._cooldown -= 1 + if self._cooldown > 0: + self._cooldown -= 1 info = { "flowers_eaten": self.flowers_eaten, "bombs_expected": self._bombs_expected(self.flowers_eaten), - "bombs_detected": len(self._last_cache["bombs_xy"]) + "bombs_detected": len(self._last_cache["bombs_xy"]), + # Semantik: *_nd sind Bruchteile von w bzw. h (nicht Diagonale) + "eat_x_nd": self.eat_x_nd, + "eat_y_nd": self.eat_y_nd, + "collision_x_nd": self.collision_x_nd, + "collision_y_nd": self.collision_y_nd, + # Anteil bezogen auf Referenzfläche, in Pixel pro Frame: frac * (w*h) + "bomb_min_area_frac": self.bomb_min_area_frac, + "ref_size": (self.w_ref, self.h_ref), } return obs, reward, False, False, info - # ---------------- Erkennung ---------------- - def _grab_bgr(self): + # ---------------- Erkennung & Beobachtung ---------------- + def _grab_bgr(self) -> np.ndarray: raw = np.array(self.sct.grab(self.monitor_area)) # BGRA - rgb_data = raw[:, :, :3] - target_color = np.array([84, 111, 113]) - tolerance = 20 - found_blue = np.any(np.all(np.abs(rgb_data - target_color) <= tolerance, axis=-1)) - if found_blue: - pyautogui.hotkey("ctrl", "p") - time.sleep(0.5) + self.raw = raw return cv2.cvtColor(raw, cv2.COLOR_BGRA2BGR) def _detect_entities(self, frame_bgr): h, w, _ = frame_bgr.shape hsv = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2HSV) - # Flower - mw = cv2.inRange(hsv, self.white_lower, self.white_upper) + # Flower (weiß ∧ gelb) + mw = cv2.inRange(hsv, self.white_lower, self.white_upper) my = cv2.inRange(hsv, self.yellow_lower, self.yellow_upper) mw = cv2.morphologyEx(mw, cv2.MORPH_DILATE, self.kernel, iterations=1) my = cv2.morphologyEx(my, cv2.MORPH_DILATE, self.kernel, iterations=1) @@ -203,30 +275,32 @@ class FlowerGameEnv(gym.Env): fx, fy, _ = _centroid_from_mask(mf) flower_found = (fx is not None and fy is not None) - # Bombs (mit strenger Filterung & UI-Exklusion) + # Bomben (mit formbasierter Filterung & UI-Exklusion); min_area in px aus **aktueller** Framefläche ableiten mb = cv2.inRange(hsv, self.black_lower, self.black_upper) + min_area_px = max(1, int(round(self.bomb_min_area_frac * h * w))) bombs_xy = _centroids_from_contours( mb.copy(), self.ui_exclude_rects, - min_area=self.bomb_min_area, + min_area_px=min_area_px, circ_min=self.bomb_circ_min, aspect_tol=self.bomb_aspect_tol, extent_min=self.bomb_extent_min, solidity_min=self.bomb_solidity_min, ) - # Turtle + # Turtle (grün: zwei Bereiche OR) g1 = cv2.inRange(hsv, self.green1_lower, self.green1_upper) g2 = cv2.inRange(hsv, self.green2_lower, self.green2_upper) mg = cv2.bitwise_or(g1, g2) - mg = cv2.morphologyEx(mg, cv2.MORPH_OPEN, self.kernel, iterations=1) + mg = cv2.morphologyEx(mg, cv2.MORPH_OPEN, self.kernel, iterations=1) mg = cv2.morphologyEx(mg, cv2.MORPH_DILATE, self.kernel, iterations=1) tx, ty, _ = _centroid_from_mask(mg) turtle_found = (tx is not None and ty is not None) - # State: nächste Bombe relativ zur Turtle + # State: nächste Bombe relativ zur Turtle (normierte Koordinaten) def nxy(x, y): - if x is None or y is None: return 0.0, 0.0 + if x is None or y is None: + return 0.0, 0.0 return x / float(w), y / float(h) nbx, nby = 0.0, 0.0 @@ -251,14 +325,12 @@ class FlowerGameEnv(gym.Env): def _build_observation(self): frame_bgr = self._grab_bgr() - gray = cv2.cvtColor(frame_bgr, cv2.COLOR_BGR2GRAY) - gray = cv2.resize(gray, (84, 84), interpolation=cv2.INTER_AREA) - gray = np.expand_dims(gray, axis=-1) det = self._detect_entities(frame_bgr) self._last_cache = det - return {"image": gray, "state": det["state_norm"]} + # Nur der positionsbasierte Zustand wird als Beobachtung exponiert + return {"state": det["state_norm"]} - # ---------------- Reward ---------------- + # ---------------- Rewards ---------------- def _calculate_reward(self) -> float: det = self._last_cache reward = 0.0 @@ -270,30 +342,41 @@ class FlowerGameEnv(gym.Env): bombs_xy = det["bombs_xy"] h, w = det["frame_hw"] - # Distanz-Shaping + # Distanz-Shaping (euklidisch, diagonal-normiert) + diag = float(np.hypot(h, w)) if (h > 0 and w > 0) else 1.0 if tf and ff: - txy = np.array([tx, ty], dtype=np.float32) - fxy = np.array([fx, fy], dtype=np.float32) - dist = float(np.linalg.norm(txy - fxy)) - if hasattr(self, "prev_dist_to_flower") and self.prev_dist_to_flower is not None: - delta = self.prev_dist_to_flower - dist - reward += self.shaping_scale * (delta / max(1.0, np.hypot(h, w))) - self.prev_dist_to_flower = dist + dist_px = float(np.hypot(tx - fx, ty - fy)) + dist_nd = dist_px / diag + if self.prev_dist_to_flower_nd is not None: + delta = self.prev_dist_to_flower_nd - dist_nd + reward += self.shaping_gain * delta + self.prev_dist_to_flower_nd = dist_nd else: - self.prev_dist_to_flower = None + self.prev_dist_to_flower_nd = None - # Eat-Event mit Cooldown + # Eat-Event (achsenweise Rechtecktest, Halbachsen = */2 von w bzw. h) if self._cooldown == 0 and tf and ff: - if np.linalg.norm(np.array([tx - fx, ty - fy], dtype=np.float32)) <= self.eat_radius_px: + dx_nw = abs(tx - fx) / float(w if w > 0 else 1) + dy_nh = abs(ty - fy) / float(h if h > 0 else 1) + if (dx_nw <= (self.eat_x_nd * 0.5)) and (dy_nh <= (self.eat_y_nd * 0.5)): + print("Blume gegessen!") reward += self.eat_reward self._cooldown = self.contact_cooldown_frames self.flowers_eaten += 1 - # Kollision mit nächster Bombe - if tf and bombs_xy: - min_dist = min([np.hypot(tx - bx, ty - by) for (bx, by, _a) in bombs_xy]) - if min_dist <= self.collision_dist_px: - reward -= self.collision_penalty + # Überprüfung auf Kollision mit Bombe / Game Over Screen Farben + rgb_data = self.raw[:, :, :3] + target_color = np.array([113, 110, 83]) + tolerance = 10 + found_blue = np.any(np.all(np.abs(rgb_data - target_color) <= tolerance, axis=-1)) + if found_blue: + print("In Bombe gelaufen!") + reward -= self.collision_penalty + time.sleep(0.5) + pyautogui.hotkey("ctrl", "p") + time.sleep(0.5) + + print(reward) return float(reward) diff --git a/train_bot.py b/train_bot.py index 777f256..c387cf1 100644 --- a/train_bot.py +++ b/train_bot.py @@ -4,18 +4,25 @@ from stable_baselines3 import PPO from stable_baselines3.common.callbacks import BaseCallback from flower_game_env import FlowerGameEnv - -# ---- Dein Spielbereich (anpassen!) ---- +# ---- Spielbereich ---- monitor_area = {"top": 120, "left": 330, "width": 1900, "height": 1263} -env = FlowerGameEnv(monitor_area) +ui_exclude_rects = [] # optional: Pixel oder [0..1]-normiert + +time.sleep(3) + +# Env mit fester Referenzgröße (Baseline) +env = FlowerGameEnv( + monitor_area, + ui_exclude_rects=ui_exclude_rects, + ref_size=(1900, 1263), +) + saved_model_name = "flower_bot" -zip_file = saved_model_name + ".zip" +zip_file = f"{saved_model_name}.zip" class TimeBasedCheckpoint(BaseCallback): - """ - Speichert das Modell alle 'save_every_secs' Sekunden in 'save_prefix' + Timestamp. - """ + """Speichert das Modell alle 'save_every_secs' Sekunden nach save_prefix.zip""" def __init__(self, save_every_secs=60, save_prefix=saved_model_name, verbose=1): super().__init__(verbose) self.save_every_secs = save_every_secs @@ -25,7 +32,7 @@ class TimeBasedCheckpoint(BaseCallback): def _on_step(self) -> bool: now = time.time() if now - self._last_save >= self.save_every_secs: - fname = f"{self.save_prefix}" + fname = self.save_prefix if self.verbose: print(f"[Autosave] Saving model to {fname}.zip") self.model.save(fname) @@ -33,17 +40,16 @@ class TimeBasedCheckpoint(BaseCallback): return True -# --- Laden, falls Datei vorhanden --- +# --- Laden/Starten --- if os.path.exists(zip_file): print(f"Lade existierendes Modell aus {zip_file}") - model = PPO.load(zip_file, env=env) # Weitertrainieren mit neuem Env + model = PPO.load(zip_file, env=env) # weitertrainieren else: print("Starte neues Modell") - # CNN + Dict-Observation → Verwende 'MultiInputPolicy' model = PPO("MultiInputPolicy", env, verbose=2) -# Trainieren mit Autosave (jede Minute) -model.learn(total_timesteps=500_000, callback=TimeBasedCheckpoint(100, "flower_bot")) +# Trainieren mit Autosave +model.learn(total_timesteps=500_000, callback=TimeBasedCheckpoint(100, saved_model_name)) # Abschluss-Speicherstand model.save("flower_bot_final")